#!/bin/bash
# Author : BCE - 19/05.
# extractions from i18n po files. (visualisazion only, no modifications)
# extract all language translations for checks, grep, identify typo...
# extract also links or tags
# permit word i18n control (non case sensitive, with regexp)
# identify fuzzy areas
# (to launch in main directory colobot or into any directory containing po files)
#
# designed for Colobot, some parts like -K may be specific, rest should be usable in other projects
#
#
version=V1.0.0 #thanks to upgrade this tag in case of change.
authorList="BCE" #thanks to upgrade this tag in case of change.
usage(){
echo " Purpose: extract language translations for checks." >&2
echo " from every po files into subdirectories" >&2
echo "" >&2
echo " Usage: ${0##*/} [-l language_digram] -[AaTtKZf] [-s] [-O] [-hH] [-w pattern] [dir]" >&2
echo " (-l xx and -lxx are equivalent, options can be put together)" >&2
if [[ $# -lt 1 ]]; then
echo " dir : optionnal directory and subdirectories where looking for xx.po files, current one by default" >&2
echo " ==== processing types ===" >&2
echo " : full wanted language extraction from every lg.po into subfolders" >&2
echo " -a/A : tags link (zzz) extraction only (table presentation)" >&2
echo " -t/T : tags extraction only (zz<>/tt>) (table presentation)" >&2
echo " A/T : like a/t, without inside part (zz) (for ref/link comparisons)" >&2
echo " -K : key tags only (\\\\key option;) (table presentation)" >&2
echo " -Z : identify fuzzy parts (non took in account translations...)" >&2
echo " -f : identify filenames references alterations" >&2
echo " -w pattern: non case sensitive translation extraction for i18n checks (ignoring codes parts)" >&2
# non case sensitive...
# support regexp .*[]
# sample : getI18n.sh -Ow "d[ée]fi"
# support also regexp +{} but escaping them
# color can be disabled with C option
echo " -h : this help page" >&2
echo " -H : samples page" >&2
echo " ==== specializations ===" >&2
echo " -l lg : set language : {cs,de,fr,pl,ru,pt,..}.po sources files. (default is fr)" >&2
echo " -e : corresponding english i18n origins instead" >&2
echo " -O : origin from each line (usefull with sort)" >&2
echo " -s : sort & count" >&2
#echo " -C : disable color highlightment for -w" >&2 # further batch treatments?
echo " dir : directory to analyse with its subdirectories" >&2
echo " (last opt argument) (default:current)" >&2
echo "" >&2
echo " Designed for i18n Colobot'uses - $version - $authorList" >&2
else
echo " -h : usage help page" >&2
echo " ==== samples ===" >&2
echo " full brazilian portuguese i18n extraction" >&2
echo " $0 -l pt" >&2
echo " full fr i18n extraction with origin into a file having current date" >&2
echo " $0 -O -lfr > i18nFr.$(date +%y%m%d).txt" >&2
echo " every german tags (sorted)" >&2
echo " ${0##*/} -l de -ts" >&2
echo " compare 'a' tags between wanted lg & corresponding english (constumize spaces to be ignore)" >&2
echo " ${0##*/} -Asl pl> s.tmp && ${0##*/} -Asel pl> e.tmp && meld [se].tmp && rm [se].tmp" >&2
echo " every key-tags from russian (sorted) (e:original english part) (from there)" >&2
echo " ${0##*/} -KsOel ru ../.." >&2
echo " every fuzzy (ignored translations) parts into russian" >&2
echo " ${0##*/} -Zl ru" >&2
echo " search for \"cell\" translations into czech" >&2
echo " ${0##*/} -ewlcz \"cell\"" >&2
echo " search for \"powerplant\"/\"lowerplant\" translations into polish files" >&2
echo " ${0##*/}"' -lpl -Oew "[PL]ower.\{0,2\}plant"' >&2
echo " every key translated (from Colobot root)" >&2
echo " ${0##*/}"' -Ksl..' >&2
echo " every instruction tags translated (from Colobot root)" >&2
echo " ${0##*/}"' -Tsl.. |grep " cbot"' >&2
echo " every fr files containing a pattern (from Colobot root)" >&2
echo " ${0##*/} -Olfr | grep -i tireur | sed 's/^.*\.\///' | uniq" >&2
echo " =====================" >&2
fi
}
KO(){
usage
echo "" >&2
echo " -f, -t, -T, -K & -Z are incompatible options" >&2
echo " -e , -f & -Z are also incompatible options" >&2
exit 1
}
# internals vars & default values
language=fr #language_digram
bSort=false
bOrig=false
bEnglish=false
bTags=false
bTagsLinkOnly=false
bTagsOnly=false
bKey=false
bFuzzy=false
bWord=false
sWord=""
bColor=true # hided option, only for -w, word highlightment
bFilesNameCheck=false
sSrc=""
# check params
while getopts ":esOtTaAhHl:KZvw:Cf" option ; do #IndevW
# echo "option $option" >&2
case $option in
e) $bFuzzy && KO
bEnglish=true ;;
s) bSort=true ;;
O) bOrig=true ;;
t) $bTags && KO
bTags=true ;;
T) $bTags && KO
bTags=true ; bTagsOnly=true ;;
a) $bTags && KO
bTags=true ; bTagsLinkOnly=true ;;
A) $bTags && KO
bTags=true ; bTagsLinkOnly=true ; bTagsOnly=true ;;
K) $bTags && KO
bTags=true ; bKey=true ;;
l) language=$OPTARG
case $language in
"cs") ;;
"de") ;;
"fr") ;;
"pl") ;;
"ru") ;;
"pt") ;;
"..") language='??' ;; #every ones
*) usage
echo "Unknown language : <$language> : not into {cs,de,fr,pl,ru,pt,..}" >&2
exit 1;;
esac ;;
Z) $bTags || $bEnglish && KO
bTags=true; bFuzzy=true ;;
f) $bTags || $bEnglish && KO
bTags=true; bFilesNameCheck=true ;;
h) usage; exit 1 ;;
H) usage 2; exit 1 ;;
v) echo $version ; exit 0;;
\?) usage
echo " $OPTARG : invalid option" >&2
exit 1 ;;
w) sWord=$OPTARG
bWord=true ;;
C) bColor=false;;
esac
done
shift $(($OPTIND - 1))
if [[ $# -gt 1 ]]; then
usage
echo " Unsupported arguments list (or option provide after the folder): $*" >&2
exit 1
fi
dir=.
if [[ $# -eq 1 ]]; then
[[ ! -d $1 ]] &&
usage &&
echo " Unsupported arguments : not a directory : $*" >&2 &&
exit 1
dir=$1
fi
if $bWord ; then
[[ "${#sWord}" -lt 1 ]] &&
usage &&
echo " No argument provided for research of translations : $*" >&2 &&
exit 1
$bSort && usage && echo " argument -s incompatible with -w" >&2 && exit 1
$bTags && usage && echo " arguments -KtTZ are incompatible with -w" >&2 && exit 1
bTags=true
fi
#trace
sTags="language"
$bTags && sTags="tags from language"
$bKey && sTags="special keys from language"
$bWord && $bEnglish && sTags="Translations for <$sWord>" #IndevW
$bWord && ! $bEnglish && sTags="Translations sources for <$sWord>" #IndevW
if $bFuzzy ; then
echo "~~Extraction of fuzzy translations from $language.po" >&2
elif $bFilesNameCheck ; then
echo "~~Extraction of alterations translations of filenames from $language.po" >&2
elif ! $bEnglish ; then
echo "~~Extraction of $sTags : $language" >&2
sLangFilter='/msgstr/,/msgid/p'
sLangFilter2="s/^msgid .*//"
sLangFilter3='s/^msgstr //'
else
echo "~~Extraction of $sTags : english from $language files" >&2
sLangFilter='/msgid/,/msgstr/p'
sLangFilter2="s/^msgstr .*//"
sLangFilter3="s/^msgid //"
fi
# corpus
if ! $bTags ; then # full wanted language extraction
for fic in $(find $dir -name "$language.po" |sort) ; do
# echo "~~~~~~~ Extraction from : $fic ~~~~~~~ " #>&2 #(not synchro)
[[ $bOrig == true ]] &&
sSrc="s:$:\t${fic}:"
sed '1,17d' $fic <(echo "msgid ") |
sed '/^#/d' |
sed -n "$sLangFilter" |
sed -e "$sLangFilter2" \
-e "$sLangFilter3" \
-e 's/^\"//' \
-e 's/\"$//' \
-e '/^$/d' \
-e "$sSrc"
done | if $bSort ; then sort -i |uniq -ci ; else cat; fi
else # {$bTags == true} (+bTagsLinkOnly or not)
# t/T : tags extraction or a/A : a tags extraction
# zz<>/tt> or zz<>/a>
sInside=""
$bTagsOnly &&
sInside="s:>:>\n:g"
if ! $bWord && ! $bKey && ! $bFuzzy && ! $bFilesNameCheck ; then #tags
for fic in $(find $dir -name "$language.po" |sort) ; do
$bOrig &&
sSrc="s:>:¯${fic}>:g"
# echo "~~~~~~~ Extraction from : $fic ~~~~~~~ " >&2
sed '1,17d' $fic <(echo "msgid ") |
sed '/^#/d' |
sed -n "$sLangFilter" |
sed -e "$sLangFilter2" \
-e "$sLangFilter3" \
-e '/^$/d' \
-e '/^""$/d' \
-e 's::\n:g' \
-e 's:\([^<]\)/:\1¯:' \
-e "$sSrc" \
-e "$sInside" \
-e 's:|:¯:g' \
-e 's:\\\\:¯:' |
if $bTagsLinkOnly ; then
grep '' | sed -e "s/' | sed -e 's/^' | sed -e "s/<\([^>\/]*\)/\n<\1/g" -e "s/<\//\n/g" |
grep -i '<.*>' | sed -e 's/^/' -e 's:/>.*:>:' #|
#grep -ve '^a[ ¯]' | grep -ve '^button[ ¯]' | grep -ve '^code[ ¯]' | grep -ve '^c[ ¯]' #tmp for other checks
fi |
if $bTagsOnly ; then
sed -e 's/>//'
else
sed -e 's/>/¯/'
fi |
sed -e '/^$/d' |
if $bTagsOnly && ! $bOrig ; then # t , TO, a, AO
cat
else
sed -e 's:^\([^¯]*\)¯\([^¯]*\)$:\1¯ ¯\2:'
fi |
if ! $bTagsOnly && $bOrig ; then # tO , aO
sed -e 's:^\([^¯]*\)¯\([^¯]*\)¯\([^¯]*\)$:\1¯ ¯\2¯\3:'
else
cat
fi
done |
if $bSort ; then
sort -i | column -t -s "¯" |uniq -c
else
column -t -s "¯" | uniq
fi
elif ! $bWord && ! $bFuzzy && ! $bFilesNameCheck ; then #bKey
# -K : key tags only (\\\\key option;) (table presentation)
sSrc="s:;:;\n:g"
for fic in $(find $dir -name "$language.po" |sort) ; do
$bOrig &&
sSrc="s:;:¯${fic};\n:g"
# echo "~~~~~~~ Extraction from : $fic ~~~~~~~ " >&2
sed '1,17d' $fic <(echo "msgid ") |
sed '/^#/d' |
sed -n "$sLangFilter" |
sed -e "$sLangFilter2" \
-e "$sLangFilter3" \
-e "$sSrc" \
-e '/^$/d' \
-e '/^""$/d'
done |
grep -e '\\\\[^;]*;' |
sed -e 's/.*\\\\/\\\\/' \
-e 's/;//' \
-e 's:^\([^¯ ]*\)[¯ ]\([^¯ ]*\)$:\1¯ ¯\2:' |
if $bSort ; then
sort -i | column -t -s "¯ " | uniq -ci
else
column -t -s "¯ " | uniq
fi
elif ! $bWord && ! $bFilesNameCheck ; then #bFuzzy
# extract fuzzy part en & translation to be validate
sLangFilter='/fuzzy/,/^$/p'
for fic in $(find $dir -name "$language.po" |sort) ; do
$bOrig &&
sSrc="s:\(fuzzy.*\)$:\1¯${fic}:"
sed '1,17d' $fic <(echo "msgid ") |
sed -n "$sLangFilter" |
sed -e "$sSrc" \
-e 's/^#/\n~\n#/' \
-e '/^$/d' \
-e '/^""$/d'
done | column -t -s "¯" | uniq
elif $bFilesNameCheck ; then #bFilesNameCheck
# pattern to check :
# #. type: Image filename
# ...
# msgid "xxx"
# msgstr "xxx"
# or
# msgstr ""
# other msgstr are displayed...
# : possible specific i18n image with translated legends
sLangFilter='/#\..*type:.*filename/,/^$/p'
sTrslFrom=""
sTrslTo=""
sPlomp=""
for fic in $(find $dir -name "$language.po" |sort) ; do
$bOrig &&
sSrc="s:\(filename.*\)$:\1¯${fic}:"
sed '1,17d' $fic <(echo -e "\n\n#\. type: filename\nmsgid zz12 \nmsgstr zz1\n\n") |
sed -n "$sLangFilter" |
grep -E "msgid|msgstr|filename|\#:" |
sed -e "$sSrc" \
-e 's/^#/\n~\n#/' \
-e '/^$/d' \
-e '/^""$/d'
done |
while read -r sLine ; do
if [[ "${sLine::6}" == "msgstr" ]] ; then #begin with msgstr
sTrslTo="${sLine:6}"
[[ $sTrslTo == $sTrslFrom ]] &&
sPlomp="" &&
continue
[[ $sTrslTo == ' ""' ]] &&
sPlomp="" &&
continue
# [[ "${sTrslFrom%\"}_cs\"" == "$sTrslTo" ]] && # those alterate images exist !
# sPlomp="" &&
# continue
sPlomp="$sPlomp\n$sLine"
echo -e "$sPlomp"
sPlomp=""
elif [[ "${sLine::5}" == "msgid" ]] ; then #begin with msgid
sTrslFrom="${sLine:5}"
sPlomp="$sPlomp\n$sLine"
elif [[ "${sLine::2}" == "#:" ]] ; then #begin with msgid
sPlomp="${sPlomp%%\\n\\n~}^?¯(->${sLine:2})"
else
sPlomp="$sPlomp\n$sLine"
fi
done | column -t -s "¯" |
# if 0 output, next lines + prev pipe are commentable
# thoses few lines just to get an output in case of success
tee /dev/tty |
(
nb=0
while read -r sLine ; do
(( nb++ ))
break
done
[[ nb -lt 1 ]] &&
echo " => No alterations found !" >&2
)
else #bWord .. -non case senstive research- manage plural forms
# + ignore most of detectable "code parts"
# even grep -i is more efficient, it detects codes and only matched line.
sTrslFrom=""
sTrslTo=""
sSrc=""
bLastIsId=false
$bColor &&
sColor="s/\($sWord\)/$(tput setaf 2)\1$(tput sgr 0)/gI"
shopt -s nocasematch
for fic in $(find $dir -name "$language.po" |sort) ; do
# echo "~~~~~~~ Extraction from : $fic ~~~~~~~ " >&2
$bOrig &&
sSrc="s,^$,\n~~~~ ${fic},"
bInCode=false
bInCodeComment=false
sed '1,17d' $fic <(echo -e "\n\nmsgid EOF1\nmsgstr EOF2\n\n") |
sed -e 's/^#.*$//' \
-e '/^$/d' \
-e 's/]*>//g' \
-e 's/[^/]*<\/code>/ ~~SOME_CODES~~ /g' \
-e 's/[^><]*\(\/\/[^><]*\)<\/code>/ ~~SOME_CODES~~ \1 ~EOC~ /g' \
-e 's// /g' \
-e 's/[^/]*/ ~SOME_CODES~ /g' \
-e 's:[^/]*//\([^/><]*\): ~SOME_CODES~ \1 ~EOC~ :g' \
-e 's/\([^\"]\)/<\/code>"\n"/g' \
-e 's/\\/\\\\/g' |
# -e 's/<[stb]\/>/ /g'
while read -r sLine ; do
# ignoring codes without comments (cleaning sLine)
! $bInCode &&
if $( echo "$sLine" | grep -q "^\"[^/]*//" ) ; then
sLine=$( echo $sLine | sed "s/[^/]*/ ~SomeCodes~ /" )
bInCode=true
bInCodeComment=true
fi
! $bInCode &&
if $( echo "$sLine" | grep -qe "^\"[^/]*$" ) ; then
if $( echo "$sLine" | grep -q "
" ) ; then
bInCode=false
sLine=$( echo $sLine | sed "s/.*<\/code>/ ~PeaceOfCode~ /" )
bInCode=false
else
sLine=" ~SomeCodes~ "
bInCode=true
fi
fi
! $bInCode &&
if [[ "${sLine::6}" == "" ]] ; then
#check if slash are from other xml peaces... or from a comment...
sBeg=$( echo $sLine | sed "s:]*>::g" ) &&
sBeg="$sEnd"
if [[ $sBeg == $sEnd ]] ; then #no comment nor divide
if $( echo "$sLine" | grep -q "
" ) ; then
sLine=$( echo $sLine | sed "s/.*<\/code>/ ~PeaceOfCode2~ /" )
bInCode=false
else
sLine=" ~SomeCodes2~ "
bInCode=true
fi
#else
# ; #TODO : manage++ lines with multine code, slashes & comments
fi
fi
$bInCode &&
if $( echo "$sLine" | grep -q "
" ) ; then
bInCode=false
sBeg=$( echo $sLine | sed "s/<\/code>$//" )
sEnd=""
if $( echo "$sBeg" | grep -q "//" ) ; then
sBeg=$( echo $sLine | sed "s:^.*\(//.*\)<\/code>.*$:\1:" )
else
sBeg=""
fi
bInCode=false
bInCodeComment=false
sLine="$sBeg ~Codes~ $sEnd"
elif $( echo "$sLine" | grep -q "//" ) ; then
sLine=$( echo $sLine | sed "s:^.*\(//.*\)$: ~~ALineOfCodeWithCom~~ \1:" )
else
sLine=" ~~A_LINE_OF_CODES~~ "
fi
# std treatments
if [[ "${sLine::6}" == "msgstr" ]] ; then #begin with msgstr
$bEnglish &&
[[ ${#sTrslFrom} -gt 1 ]] &&
! $( echo "$sTrslFrom"| grep -iq "$sWord" ) &&
sTrslFrom="" &&
sTrslTo="" &&
bLastIsId=false &&
continue
$bLastIsId &&
sTrslTo="${sLine:7}" ||
sTrslTo="$sTrslTo\n${sLine:7}"
bLastIsId=false;
elif [[ "${sLine::5}" == "msgid" ]] ; then #begin with msgid
if ! $bLastIsId ; then
! $bEnglish &&
[[ ${#sTrslTo} -gt 1 ]] &&
! $( echo "$sTrslTo"| grep -iq "$sWord" ) &&
sTrslFrom="" &&
sTrslTo=""
# clean before display
[[ ${#sTrslFrom} -gt 0 ]] &&
sTrslFrom=$(echo $sTrslFrom |
sed -e 's/\"\"\n//' \
-e 's/\"\"//' \
-e 's/\"$//' \
-e 's/^"/'
)
[[ ${#sTrslTo} -gt 0 ]] &&
sTrslTo=$(echo $sTrslTo |
sed -e 's/\"\"\n//' \
-e 's/\"\"//' \
-e 's/\"$//' \
-e 's/^"/>/'
)
if [[ ${#sTrslTo} -gt 0 ]] ; then
[[ ${#sTrslFrom} -gt 0 ]] &&
echo -e "\n${sTrslFrom//\\\\n}\n${sTrslTo//\\\\n}"
# echo -n
else
[[ ${#sTrslFrom} -gt 0 ]] &&
echo -e "\n${sTrslFrom//\\\\n}"
fi | sed -e "$sColor" \
-e "$sSrc"
fi
#new ... = > prepare next
sTrslTo=""
! $bLastIsId &&
sTrslFrom="${sLine:6}" ||
sTrslFrom="$sTrslFrom\n${sLine:6}"
bLastIsId=true;
else
#next... or empty
[[ ${#sLine} -lt 2 ]] &&
continue
if $bLastIsId ; then
[[ ${#sTrslFrom} -gt 2 ]] &&
sTrslFrom="$sTrslFrom\n$sLine" ||
sTrslFrom="$sLine"
else
[[ ${#sTrslTo} -gt 2 ]] &&
sTrslTo="$sTrslTo\n$sLine" ||
sTrslTo="$sLine"
fi
fi
done # every sLines
done # every xx.po files
fi
fi
echo "!! $0 END" >&2