From dd4fb927c5a38c7cb311bb8e1a203e504a1d794e Mon Sep 17 00:00:00 2001 From: B-CE Date: Tue, 28 May 2019 09:06:48 +0200 Subject: [PATCH] Add script helper for i18n checks --- tools/getI18n.sh | 534 +++++++++++++++++++++++++++++++++ tools/patchLokalizeWrapping.sh | 58 ++++ 2 files changed, 592 insertions(+) create mode 100755 tools/getI18n.sh create mode 100755 tools/patchLokalizeWrapping.sh diff --git a/tools/getI18n.sh b/tools/getI18n.sh new file mode 100755 index 00000000..d793bf05 --- /dev/null +++ b/tools/getI18n.sh @@ -0,0 +1,534 @@ +#!/bin/bash +# Author : BCE - 19/05. +# extractions from i18n po files. (visualisazion only, no modifications) +# extract all language translations for checks, grep, identify typo... +# extract also links or tags +# permit word i18n control (non case sensitive, with regexp) +# identify fuzzy areas +# (to launch in main directory colobot or into any directory containing po files) +# +# designed for Colobot, some parts like -K may be specific, rest should be usable in other projects +# +# +version=V1.0.0 #thanks to upgrade this tag in case of change. +authorList="BCE" #thanks to upgrade this tag in case of change. + +usage(){ + echo " Purpose: extract language translations for checks." >&2 + echo " from every po files into subdirectories" >&2 + echo "" >&2 + echo " Usage: ${0##*/} [-l language_digram] -[AaTtKZf] [-s] [-O] [-hH] [-w pattern] [dir]" >&2 + echo " (-l xx and -lxx are equivalent, options can be put together)" >&2 + if [[ $# -lt 1 ]]; then + echo " dir : optionnal directory and subdirectories where looking for xx.po files, current one by default" >&2 + echo " ==== processing types ===" >&2 + echo " : full wanted language extraction from every lg.po into subfolders" >&2 + echo " -a/A : tags link (zzz) extraction only (table presentation)" >&2 + echo " -t/T : tags extraction only (zz<>/tt>) (table presentation)" >&2 + echo " A/T : like a/t, without inside part (zz) (for ref/link comparisons)" >&2 + echo " -K : key tags only (\\\\key option;) (table presentation)" >&2 + echo " -Z : identify fuzzy parts (non took in account translations...)" >&2 + echo " -f : identify filenames references alterations" >&2 + echo " -w pattern: non case sensitive translation extraction for i18n checks (ignoring codes parts)" >&2 + # non case sensitive... + # support regexp .*[] + # sample : getI18n.sh -Ow "d[ée]fi" + # support also regexp +{} but escaping them + # color can be disabled with C option + echo " -h : this help page" >&2 + echo " -H : samples page" >&2 + echo " ==== specializations ===" >&2 + echo " -l lg : set language : {cs,de,fr,pl,ru,pt,..}.po sources files. (default is fr)" >&2 + echo " -e : corresponding english i18n origins instead" >&2 + echo " -O : origin from each line (usefull with sort)" >&2 + echo " -s : sort & count" >&2 + #echo " -C : disable color highlightment for -w" >&2 # further batch treatments? + echo " dir : directory to analyse with its subdirectories" >&2 + echo " (last opt argument) (default:current)" >&2 + echo "" >&2 + echo " Designed for i18n Colobot'uses - $version - $authorList" >&2 + else + echo " -h : usage help page" >&2 + echo " ==== samples ===" >&2 + echo " full brazilian portuguese i18n extraction" >&2 + echo " $0 -l pt" >&2 + echo " full fr i18n extraction with origin into a file having current date" >&2 + echo " $0 -O -lfr > i18nFr.$(date +%y%m%d).txt" >&2 + echo " every german tags (sorted)" >&2 + echo " ${0##*/} -l de -ts" >&2 + echo " compare 'a' tags between wanted lg & corresponding english (constumize spaces to be ignore)" >&2 + echo " ${0##*/} -Asl pl> s.tmp && ${0##*/} -Asel pl> e.tmp && meld [se].tmp && rm [se].tmp" >&2 + echo " every key-tags from russian (sorted) (e:original english part) (from there)" >&2 + echo " ${0##*/} -KsOel ru ../.." >&2 + echo " every fuzzy (ignored translations) parts into russian" >&2 + echo " ${0##*/} -Zl ru" >&2 + echo " search for \"cell\" translations into czech" >&2 + echo " ${0##*/} -ewlcz \"cell\"" >&2 + echo " search for \"powerplant\"/\"lowerplant\" translations into polish files" >&2 + echo " ${0##*/}"' -lpl -Oew "[PL]ower.\{0,2\}plant"' >&2 + echo " every key translated (from Colobot root)" >&2 + echo " ${0##*/}"' -Ksl..' >&2 + echo " every instruction tags translated (from Colobot root)" >&2 + echo " ${0##*/}"' -Tsl.. |grep " cbot"' >&2 + echo " every fr files containing a pattern (from Colobot root)" >&2 + echo " ${0##*/} -Olfr | grep -i tireur | sed 's/^.*\.\///' | uniq" >&2 + echo " =====================" >&2 + fi +} + +KO(){ + usage + echo "" >&2 + echo " -f, -t, -T, -K & -Z are incompatible options" >&2 + echo " -e , -f & -Z are also incompatible options" >&2 + exit 1 +} + +# internals vars & default values +language=fr #language_digram +bSort=false +bOrig=false +bEnglish=false +bTags=false +bTagsLinkOnly=false +bTagsOnly=false +bKey=false +bFuzzy=false +bWord=false +sWord="" +bColor=true # hided option, only for -w, word highlightment +bFilesNameCheck=false +sSrc="" + +# check params +while getopts ":esOtTaAhHl:KZvw:Cf" option ; do #IndevW + # echo "option $option" >&2 + case $option in + e) $bFuzzy && KO + bEnglish=true ;; + s) bSort=true ;; + O) bOrig=true ;; + t) $bTags && KO + bTags=true ;; + T) $bTags && KO + bTags=true ; bTagsOnly=true ;; + a) $bTags && KO + bTags=true ; bTagsLinkOnly=true ;; + A) $bTags && KO + bTags=true ; bTagsLinkOnly=true ; bTagsOnly=true ;; + K) $bTags && KO + bTags=true ; bKey=true ;; + l) language=$OPTARG + case $language in + "cs") ;; + "de") ;; + "fr") ;; + "pl") ;; + "ru") ;; + "pt") ;; + "..") language='??' ;; #every ones + *) usage + echo "Unknown language : <$language> : not into {cs,de,fr,pl,ru,pt,..}" >&2 + exit 1;; + esac ;; + Z) $bTags || $bEnglish && KO + bTags=true; bFuzzy=true ;; + f) $bTags || $bEnglish && KO + bTags=true; bFilesNameCheck=true ;; + h) usage; exit 1 ;; + H) usage 2; exit 1 ;; + v) echo $version ; exit 0;; + \?) usage + echo " $OPTARG : invalid option" >&2 + exit 1 ;; + w) sWord=$OPTARG + bWord=true ;; + C) bColor=false;; + esac +done +shift $(($OPTIND - 1)) + +if [[ $# -gt 1 ]]; then + usage + echo " Unsupported arguments list (or option provide after the folder): $*" >&2 + exit 1 +fi + +dir=. +if [[ $# -eq 1 ]]; then + [[ ! -d $1 ]] && + usage && + echo " Unsupported arguments : not a directory : $*" >&2 && + exit 1 + dir=$1 +fi + +if $bWord ; then + [[ "${#sWord}" -lt 1 ]] && + usage && + echo " No argument provided for research of translations : $*" >&2 && + exit 1 + + $bSort && usage && echo " argument -s incompatible with -w" >&2 && exit 1 + $bTags && usage && echo " arguments -KtTZ are incompatible with -w" >&2 && exit 1 + bTags=true +fi + +#trace +sTags="language" +$bTags && sTags="tags from language" +$bKey && sTags="special keys from language" +$bWord && $bEnglish && sTags="Translations for <$sWord>" #IndevW +$bWord && ! $bEnglish && sTags="Translations sources for <$sWord>" #IndevW + +if $bFuzzy ; then + echo "~~Extraction of fuzzy translations from $language.po" >&2 +elif $bFilesNameCheck ; then + echo "~~Extraction of alterations translations of filenames from $language.po" >&2 +elif ! $bEnglish ; then + echo "~~Extraction of $sTags : $language" >&2 + sLangFilter='/msgstr/,/msgid/p' + sLangFilter2="s/^msgid .*//" + sLangFilter3='s/^msgstr //' +else + echo "~~Extraction of $sTags : english from $language files" >&2 + sLangFilter='/msgid/,/msgstr/p' + sLangFilter2="s/^msgstr .*//" + sLangFilter3="s/^msgid //" +fi + +# corpus +if ! $bTags ; then # full wanted language extraction + for fic in $(find $dir -name "$language.po" |sort) ; do + # echo "~~~~~~~ Extraction from : $fic ~~~~~~~ " #>&2 #(not synchro) + [[ $bOrig == true ]] && + sSrc="s:$:\t${fic}:" + sed '1,17d' $fic <(echo "msgid ") | + sed '/^#/d' | + sed -n "$sLangFilter" | + sed -e "$sLangFilter2" \ + -e "$sLangFilter3" \ + -e 's/^\"//' \ + -e 's/\"$//' \ + -e '/^$/d' \ + -e "$sSrc" + done | if $bSort ; then sort -i |uniq -ci ; else cat; fi + +else # {$bTags == true} (+bTagsLinkOnly or not) + # t/T : tags extraction or a/A : a tags extraction + # zz<>/tt> or zz<>/a> + sInside="" + $bTagsOnly && + sInside="s:>:>\n:g" + if ! $bWord && ! $bKey && ! $bFuzzy && ! $bFilesNameCheck ; then #tags + for fic in $(find $dir -name "$language.po" |sort) ; do + $bOrig && + sSrc="s:>:¯${fic}>:g" + # echo "~~~~~~~ Extraction from : $fic ~~~~~~~ " >&2 + sed '1,17d' $fic <(echo "msgid ") | + sed '/^#/d' | + sed -n "$sLangFilter" | + sed -e "$sLangFilter2" \ + -e "$sLangFilter3" \ + -e '/^$/d' \ + -e '/^""$/d' \ + -e 's::\n:g' \ + -e 's:\([^<]\)/:\1¯:' \ + -e "$sSrc" \ + -e "$sInside" \ + -e 's:|:¯:g' \ + -e 's:\\\\:¯:' | + if $bTagsLinkOnly ; then + grep '' | sed -e "s/' | sed -e 's/^' | sed -e "s/<\([^>\/]*\)/\n<\1/g" -e "s/<\//\n/g" | + grep -i '<.*>' | sed -e 's/^.*:>:' #| + #grep -ve '^a[ ¯]' | grep -ve '^button[ ¯]' | grep -ve '^code[ ¯]' | grep -ve '^c[ ¯]' #tmp for other checks + fi | + if $bTagsOnly ; then + sed -e 's/>//' + else + sed -e 's/>/¯/' + fi | + sed -e '/^$/d' | + if $bTagsOnly && ! $bOrig ; then # t , TO, a, AO + cat + else + sed -e 's:^\([^¯]*\)¯\([^¯]*\)$:\1¯ ¯\2:' + fi | + if ! $bTagsOnly && $bOrig ; then # tO , aO + sed -e 's:^\([^¯]*\)¯\([^¯]*\)¯\([^¯]*\)$:\1¯ ¯\2¯\3:' + else + cat + fi + done | + if $bSort ; then + sort -i | column -t -s "¯" |uniq -c + else + column -t -s "¯" | uniq + fi + + elif ! $bWord && ! $bFuzzy && ! $bFilesNameCheck ; then #bKey + # -K : key tags only (\\\\key option;) (table presentation) + sSrc="s:;:;\n:g" + for fic in $(find $dir -name "$language.po" |sort) ; do + $bOrig && + sSrc="s:;:¯${fic};\n:g" + # echo "~~~~~~~ Extraction from : $fic ~~~~~~~ " >&2 + sed '1,17d' $fic <(echo "msgid ") | + sed '/^#/d' | + sed -n "$sLangFilter" | + sed -e "$sLangFilter2" \ + -e "$sLangFilter3" \ + -e "$sSrc" \ + -e '/^$/d' \ + -e '/^""$/d' + done | + grep -e '\\\\[^;]*;' | + sed -e 's/.*\\\\/\\\\/' \ + -e 's/;//' \ + -e 's:^\([^¯ ]*\)[¯ ]\([^¯ ]*\)$:\1¯ ¯\2:' | + if $bSort ; then + sort -i | column -t -s "¯ " | uniq -ci + else + column -t -s "¯ " | uniq + fi + + elif ! $bWord && ! $bFilesNameCheck ; then #bFuzzy + # extract fuzzy part en & translation to be validate + sLangFilter='/fuzzy/,/^$/p' + for fic in $(find $dir -name "$language.po" |sort) ; do + $bOrig && + sSrc="s:\(fuzzy.*\)$:\1¯${fic}:" + sed '1,17d' $fic <(echo "msgid ") | + sed -n "$sLangFilter" | + sed -e "$sSrc" \ + -e 's/^#/\n~\n#/' \ + -e '/^$/d' \ + -e '/^""$/d' + done | column -t -s "¯" | uniq + + elif $bFilesNameCheck ; then #bFilesNameCheck + # pattern to check : + # #. type: Image filename + # ... + # msgid "xxx" + # msgstr "xxx" + # or + # msgstr "" + # other msgstr are displayed... + # : possible specific i18n image with translated legends + + sLangFilter='/#\..*type:.*filename/,/^$/p' + sTrslFrom="" + sTrslTo="" + sPlomp="" + for fic in $(find $dir -name "$language.po" |sort) ; do + $bOrig && + sSrc="s:\(filename.*\)$:\1¯${fic}:" + sed '1,17d' $fic <(echo -e "\n\n#\. type: filename\nmsgid zz12 \nmsgstr zz1\n\n") | + sed -n "$sLangFilter" | + grep -E "msgid|msgstr|filename|\#:" | + sed -e "$sSrc" \ + -e 's/^#/\n~\n#/' \ + -e '/^$/d' \ + -e '/^""$/d' + done | + while read -r sLine ; do + if [[ "${sLine::6}" == "msgstr" ]] ; then #begin with msgstr + sTrslTo="${sLine:6}" + [[ $sTrslTo == $sTrslFrom ]] && + sPlomp="" && + continue + [[ $sTrslTo == ' ""' ]] && + sPlomp="" && + continue + # [[ "${sTrslFrom%\"}_cs\"" == "$sTrslTo" ]] && # those alterate images exist ! + # sPlomp="" && + # continue + sPlomp="$sPlomp\n$sLine" + echo -e "$sPlomp" + sPlomp="" + elif [[ "${sLine::5}" == "msgid" ]] ; then #begin with msgid + sTrslFrom="${sLine:5}" + sPlomp="$sPlomp\n$sLine" + elif [[ "${sLine::2}" == "#:" ]] ; then #begin with msgid + sPlomp="${sPlomp%%\\n\\n~}^?¯(->${sLine:2})" + else + sPlomp="$sPlomp\n$sLine" + fi + done | column -t -s "¯" | + # if 0 output, next lines + prev pipe are commentable + # thoses few lines just to get an output in case of success + tee /dev/tty | + ( + nb=0 + while read -r sLine ; do + (( nb++ )) + break + done + [[ nb -lt 1 ]] && + echo " => No alterations found !" >&2 + ) + + else #bWord .. -non case senstive research- manage plural forms + # + ignore most of detectable "code parts" + # even grep -i is more efficient, it detects codes and only matched line. + sTrslFrom="" + sTrslTo="" + sSrc="" + bLastIsId=false + $bColor && + sColor="s/\($sWord\)/$(tput setaf 2)\1$(tput sgr 0)/gI" + shopt -s nocasematch + for fic in $(find $dir -name "$language.po" |sort) ; do + # echo "~~~~~~~ Extraction from : $fic ~~~~~~~ " >&2 + $bOrig && + sSrc="s,^$,\n~~~~ ${fic}," + bInCode=false + bInCodeComment=false + sed '1,17d' $fic <(echo -e "\n\nmsgid EOF1\nmsgstr EOF2\n\n") | + sed -e 's/^#.*$//' \ + -e '/^$/d' \ + -e 's/]*>//g' \ + -e 's/[^/]*<\/code>/ ~~SOME_CODES~~ /g' \ + -e 's/[^><]*\(\/\/[^><]*\)<\/code>/ ~~SOME_CODES~~ \1 ~EOC~ /g' \ + -e 's// /g' \ + -e 's/[^/]*/ ~SOME_CODES~ /g' \ + -e 's:[^/]*//\([^/><]*\): ~SOME_CODES~ \1 ~EOC~ :g' \ + -e 's/\([^\"]\)/<\/code>"\n"/g' \ + -e 's/\\/\\\\/g' | + # -e 's/<[stb]\/>/ /g' + while read -r sLine ; do + # ignoring codes without comments (cleaning sLine) + ! $bInCode && + if $( echo "$sLine" | grep -q "^\"[^/]*//" ) ; then + sLine=$( echo $sLine | sed "s/[^/]*/ ~SomeCodes~ /" ) + bInCode=true + bInCodeComment=true + fi + ! $bInCode && + if $( echo "$sLine" | grep -qe "^\"[^/]*$" ) ; then + if $( echo "$sLine" | grep -q "" ) ; then + bInCode=false + sLine=$( echo $sLine | sed "s/.*<\/code>/ ~PeaceOfCode~ /" ) + bInCode=false + else + sLine=" ~SomeCodes~ " + bInCode=true + fi + fi + ! $bInCode && + if [[ "${sLine::6}" == "" ]] ; then + #check if slash are from other xml peaces... or from a comment... + sBeg=$( echo $sLine | sed "s:]*>::g" ) && + sBeg="$sEnd" + if [[ $sBeg == $sEnd ]] ; then #no comment nor divide + if $( echo "$sLine" | grep -q "" ) ; then + sLine=$( echo $sLine | sed "s/.*<\/code>/ ~PeaceOfCode2~ /" ) + bInCode=false + else + sLine=" ~SomeCodes2~ " + bInCode=true + fi + #else + # ; #TODO : manage++ lines with multine code, slashes & comments + fi + fi + $bInCode && + if $( echo "$sLine" | grep -q "" ) ; then + bInCode=false + + sBeg=$( echo $sLine | sed "s/<\/code>$//" ) + sEnd="" + if $( echo "$sBeg" | grep -q "//" ) ; then + sBeg=$( echo $sLine | sed "s:^.*\(//.*\)<\/code>.*$:\1:" ) + else + sBeg="" + fi + bInCode=false + bInCodeComment=false + sLine="$sBeg ~Codes~ $sEnd" + elif $( echo "$sLine" | grep -q "//" ) ; then + sLine=$( echo $sLine | sed "s:^.*\(//.*\)$: ~~ALineOfCodeWithCom~~ \1:" ) + else + sLine=" ~~A_LINE_OF_CODES~~ " + fi + # std treatments + if [[ "${sLine::6}" == "msgstr" ]] ; then #begin with msgstr + $bEnglish && + [[ ${#sTrslFrom} -gt 1 ]] && + ! $( echo "$sTrslFrom"| grep -iq "$sWord" ) && + sTrslFrom="" && + sTrslTo="" && + bLastIsId=false && + continue + $bLastIsId && + sTrslTo="${sLine:7}" || + sTrslTo="$sTrslTo\n${sLine:7}" + bLastIsId=false; + elif [[ "${sLine::5}" == "msgid" ]] ; then #begin with msgid + if ! $bLastIsId ; then + ! $bEnglish && + [[ ${#sTrslTo} -gt 1 ]] && + ! $( echo "$sTrslTo"| grep -iq "$sWord" ) && + sTrslFrom="" && + sTrslTo="" + # clean before display + [[ ${#sTrslFrom} -gt 0 ]] && + sTrslFrom=$(echo $sTrslFrom | + sed -e 's/\"\"\n//' \ + -e 's/\"\"//' \ + -e 's/\"$//' \ + -e 's/^"//' + ) + if [[ ${#sTrslTo} -gt 0 ]] ; then + [[ ${#sTrslFrom} -gt 0 ]] && + echo -e "\n${sTrslFrom//\\\\n}\n${sTrslTo//\\\\n}" + # echo -n + else + [[ ${#sTrslFrom} -gt 0 ]] && + echo -e "\n${sTrslFrom//\\\\n}" + fi | sed -e "$sColor" \ + -e "$sSrc" + fi + #new ... = > prepare next + sTrslTo="" + ! $bLastIsId && + sTrslFrom="${sLine:6}" || + sTrslFrom="$sTrslFrom\n${sLine:6}" + bLastIsId=true; + else + #next... or empty + [[ ${#sLine} -lt 2 ]] && + continue + if $bLastIsId ; then + [[ ${#sTrslFrom} -gt 2 ]] && + sTrslFrom="$sTrslFrom\n$sLine" || + sTrslFrom="$sLine" + else + [[ ${#sTrslTo} -gt 2 ]] && + sTrslTo="$sTrslTo\n$sLine" || + sTrslTo="$sLine" + fi + fi + done # every sLines + done # every xx.po files + fi +fi + +echo "!! $0 END" >&2 diff --git a/tools/patchLokalizeWrapping.sh b/tools/patchLokalizeWrapping.sh new file mode 100755 index 00000000..d285154d --- /dev/null +++ b/tools/patchLokalizeWrapping.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# Author : BCE - 1905 - 1906 +# +# correction into i18n po files. +# restablish fake wrapping done by lokalize... +# checking non existing save +# preventively creating a save before eventual modifications +# and removing it if no alteration +# +# Histo +# V0.1 +# base +# non treatment for po commented lines +# ... +# V1.0 +# checking save don't exist => checking non existing save +# + trace only for modified files +# issue : sometimes to launch twice... + + +# +lg=fr # language digramme xx.po + +# forcing a save name (v 0.2) +# sav=sav0 # permit indental saves use sav, sav1, sav2, ... (v 0.2) +# for fic in $(find . -name $lg.po |sort) ; do +# [[ -e $fic.$sav ]] && +# echo -e "=== $fic.$sav preexist\n\t\t ABANDON" >&2 && +# exit -1 +# done + +# autofinding a non used savename (v 1.0) +echo "Seeking for a non used save name..." >&2 +i=0 +while : ; do + j=$i + sav=sav$i # permit indental saves use sav1, sav2, ... + nb=$(find . -name $lg.po.$sav -print -quit 2> /dev/null | wc -l) + [[ $nb -eq 0 ]] && break + (( ++i )) +done + +echo "OK, no save $sav preexist ! => processing saving $lg.po into $lg.po.$sav" >&2 +sleep 1 + + # -exec echo " == restablishing {} ==" >&2 \; \ +find . -name $lg.po -type f \ + -exec sed -i.$sav \ + -e '/^[^#]/ s/\\n\([^"]\)/\\n"\n"\1/g' \ + -e '/^msgid "[^"]/ N;s/^msgid \("[^\n]\+"\)\n"/msgid ""\n\1\n"/' \ + -e '/^msgstr "[^"]/ N;s/^msgstr \(".\+"\)\n"/msgstr ""\n\1\n"/' \ + -e '/^[^#]/ s/\\n\([^"]\)/\\n"\n"\1/g' \ + {} \; \ + -exec bash -c '[[ $(diff {} {}.'$sav' 2>/dev/null | wc -l ) == 0 ]] && rm {}.'$sav' || echo " cleaning {} !!" >&2' \; + + # note : doubling s/\\n\([^"]\)/../g to cover the case : ".*\\n\\n.*" + +echo "!! END $0 !" >&2