diff --git a/CMakeLists.txt b/CMakeLists.txt
index 82c3d0e2..9f28554f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,11 @@
cmake_minimum_required(VERSION 2.8)
+find_package(PythonInterp 2.6)
+
+if(NOT PYTHONINTERP_FOUND)
+ message(WARNING "Python not found, help and level files will NOT be translated!")
+endif()
+
if(NOT DEFINED COLOBOT_INSTALL_DATA_DIR)
if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
set(COLOBOT_INSTALL_DATA_DIR ${CMAKE_INSTALL_PREFIX}/data CACHE PATH "Colobot shared data directory")
diff --git a/help/CMakeLists.txt b/help/CMakeLists.txt
index 69470bb8..3ff52821 100644
--- a/help/CMakeLists.txt
+++ b/help/CMakeLists.txt
@@ -1,28 +1,34 @@
cmake_minimum_required(VERSION 2.8)
-include(../i18n-tools/CommonI18N.cmake)
-include(../i18n-tools/HelpI18N.cmake)
+include(../i18n-tools/I18NTools.cmake)
set(HELP_INSTALL_DATA_DIR ${COLOBOT_INSTALL_DATA_DIR}/help)
##
# Add help category directory
##
-function(add_help_category help_category_dir install_dest_dir)
- file(GLOB help_files RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${help_category_dir}/E/*.txt)
- list(SORT help_files)
- if(PO4A AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${help_category_dir}/po/)
- generate_help_i18n(translated_help_dirs
- "${help_files}"
- ${help_category_dir}/po
- ${DATA_BINARY_DIR}/help-po/${help_category_dir})
- else()
- set(translated_help_dirs "")
+function(add_help_category help_category_dir install_subdir)
+
+ message(STATUS "Adding translation targets for help/${help_category_dir}")
+
+ file(GLOB english_help_files ${help_category_dir}/E/*)
+ install(FILES ${english_help_files} DESTINATION ${HELP_INSTALL_DATA_DIR}/E/${install_subdir})
+
+ if(PYTHONINTERP_FOUND)
+ set(work_dir ${DATA_BINARY_DIR}/help-po/${help_category_dir})
+ generate_translations(translated_help_files
+ "help"
+ ${CMAKE_CURRENT_SOURCE_DIR}
+ ${help_category_dir}
+ ${help_category_dir}/po
+ ${work_dir}
+ "${install_subdir}")
+
+ install_preserving_relative_paths("${translated_help_files}"
+ ${work_dir}
+ ${HELP_INSTALL_DATA_DIR})
endif()
- install(DIRECTORY ${help_category_dir}/E/ DESTINATION ${HELP_INSTALL_DATA_DIR}/E/${install_dest_dir})
- foreach(translated_help_dir ${translated_help_dirs})
- install(DIRECTORY ${DATA_BINARY_DIR}/help-po/${help_category_dir}/${translated_help_dir}/ DESTINATION ${HELP_INSTALL_DATA_DIR}/${translated_help_dir}/${install_dest_dir})
- endforeach()
+
endfunction()
diff --git a/i18n-tools/.gitignore b/i18n-tools/.gitignore
new file mode 100644
index 00000000..7a60b85e
--- /dev/null
+++ b/i18n-tools/.gitignore
@@ -0,0 +1,2 @@
+__pycache__/
+*.pyc
diff --git a/i18n-tools/CommonI18N.cmake b/i18n-tools/CommonI18N.cmake
deleted file mode 100644
index 65c0ad8c..00000000
--- a/i18n-tools/CommonI18N.cmake
+++ /dev/null
@@ -1,22 +0,0 @@
-##
-# Common function used in other I18N CMake modules
-##
-
-##
-# Get language code from *.po file name e.g. "de.po" -> "de"
-##
-function(get_language_code result_language_code po_file)
- get_filename_component(po_file_name ${po_file} NAME)
- string(REPLACE ".po" "" language_code ${po_file_name})
- set(${result_language_code} ${language_code} PARENT_SCOPE)
-endfunction()
-
-##
-# Get language char from *.po file name e.g. "de.po" -> "D"
-##
-function(get_language_char result_language_char po_file)
- get_filename_component(po_file_name ${po_file} NAME)
- string(REGEX REPLACE ".\\.po" "" language_char ${po_file_name})
- string(TOUPPER ${language_char} language_char)
- set(${result_language_char} ${language_char} PARENT_SCOPE)
-endfunction()
diff --git a/i18n-tools/HelpI18N.cmake b/i18n-tools/HelpI18N.cmake
deleted file mode 100644
index 10840928..00000000
--- a/i18n-tools/HelpI18N.cmake
+++ /dev/null
@@ -1,83 +0,0 @@
-##
-# Meta-infrastructure to allow po-based translation of Colobot help files
-##
-
-find_program(PO4A po4a)
-
-if(NOT PO4A)
- message(WARNING "PO4A not found, help files will NOT be translated!")
-endif()
-
-##
-# Generate translated help files in separate directories per language
-##
-function(generate_help_i18n
- result_generated_help_dirs # output variable to return names of directories with translated files
- source_help_files # input help files
- po_dir # directory with translations
- work_dir) # directory where to save generated files
-
- # generated config file for po4a
- set(po4a_cfg_file ${work_dir}/help_po4a.cfg)
-
- # get translations from po directory
- get_filename_component(abs_po_dir ${po_dir} ABSOLUTE)
- file(WRITE ${po4a_cfg_file} "[po_directory] ${abs_po_dir}\n")
-
- # prepare output directories
- set(output_help_subdirs "")
- file(GLOB po_files ${po_dir}/*.po)
- foreach(po_file ${po_files})
- get_language_char(language_char ${po_file})
- #set(language_help_subdir ${work_dir}/${language_char})
- list(APPEND output_help_subdirs ${language_char})
- endforeach()
-
- # add translation rules for help files
- foreach(source_help_file ${source_help_files})
- get_filename_component(abs_source_help_file ${source_help_file} ABSOLUTE)
- get_filename_component(help_file_name ${source_help_file} NAME)
-
- file(APPEND ${po4a_cfg_file} "\n[type:colobothelp] ${abs_source_help_file}")
- foreach(po_file ${po_files})
- # generated file for single language
- get_language_code(language_code ${po_file})
- get_language_char(language_char ${po_file})
- set(generated_help_file ${work_dir}/${language_char}/${help_file_name})
- file(APPEND ${po4a_cfg_file} " \\\n ${language_code}:${generated_help_file}")
- endforeach()
- endforeach()
-
- # dummy files to signal that scripts have finished running
- set(translation_signalfile ${work_dir}/translations)
- set(po_clean_signalfile ${work_dir}/po_clean)
-
- # script to run po4a and generate translated files
- add_custom_command(OUTPUT ${translation_signalfile}
- COMMAND ${DATA_SOURCE_DIR}/i18n-tools/scripts/run_po4a.sh
- ${po4a_cfg_file}
- ${translation_signalfile}
- DEPENDS ${po_files})
-
- file(GLOB pot_file ${po_dir}/*.pot)
- set(po_files ${po_files} ${pot_file})
-
- # script to do some cleanups in updated *.po and *.pot files
- string(REPLACE ";" ":" escaped_po_files "${po_files}")
- add_custom_command(OUTPUT ${po_clean_signalfile}
- COMMAND ${DATA_SOURCE_DIR}/i18n-tools/scripts/clean_po_files.sh
- ${escaped_po_files}
- ${translation_signalfile}
- ${po_clean_signalfile}
- DEPENDS ${translation_signalfile}
- )
-
- # generate some unique string for target name
- string(REGEX REPLACE "[/\\]" "_" target_suffix ${po_dir})
-
- # target to run both scripts
- add_custom_target(i18n_${target_suffix} ALL DEPENDS ${translation_signalfile} ${po_clean_signalfile})
-
- # return the translated files
- set(${result_generated_help_dirs} ${output_help_subdirs} PARENT_SCOPE)
-endfunction()
diff --git a/i18n-tools/I18NTools.cmake b/i18n-tools/I18NTools.cmake
new file mode 100644
index 00000000..00f20591
--- /dev/null
+++ b/i18n-tools/I18NTools.cmake
@@ -0,0 +1,85 @@
+##
+# Meta-infrastructure to allow po-based translation of Colobot help files
+##
+
+##
+# Generate translated files with Python script
+##
+function(generate_translations
+ result_output_files # output variable to return file names of translated files
+ type # type of files to process
+ working_dir # working directory for the commands to run
+ input_dir # directory with source files
+ po_dir # directory with translations
+ output_dir # directory where to save generated files
+ output_subdir) # optional installation subdirectory
+
+ if(output_subdir STREQUAL "")
+ set(output_subdir_opt "")
+ else()
+ set(output_subdir_opt "--output_subdir")
+ endif()
+
+ # first command is used to get list of input and output files when running CMake to
+ # execute appropriate CMake install commands and set up dependencies properly
+ execute_process(COMMAND ${PYTHON_EXECUTABLE}
+ ${DATA_SOURCE_DIR}/i18n-tools/scripts/process_translations.py
+ --mode print_files
+ --type ${type}
+ --input_dir ${input_dir}
+ --po_dir ${po_dir}
+ --output_dir ${output_dir}
+ ${output_subdir_opt} ${output_subdir}
+ WORKING_DIRECTORY ${working_dir}
+ OUTPUT_VARIABLE files_list)
+
+ string(REGEX REPLACE "(.*)\n(.*)" "\\1" input_files "${files_list}")
+ string(REGEX REPLACE "(.*)\n(.*)" "\\2" output_files "${files_list}")
+
+ # return the list of output files to parent
+ set(${result_output_files} ${output_files} PARENT_SCOPE)
+
+ # dummy file to indicate success
+ set(signal_file ${output_dir}/translation)
+
+ # po files are also dependency
+ file(GLOB po_files ${po_dir}/*)
+
+ # actual command used to generate translations executed when building project
+ add_custom_command(OUTPUT ${signal_file}
+ COMMAND ${PYTHON_EXECUTABLE}
+ ${DATA_SOURCE_DIR}/i18n-tools/scripts/process_translations.py
+ --mode generate
+ --type ${type}
+ --input_dir ${input_dir}
+ --po_dir ${po_dir}
+ --output_dir ${output_dir}
+ ${output_subdir_opt} ${output_subdir}
+ --signal_file ${signal_file}
+ WORKING_DIRECTORY ${working_dir}
+ DEPENDS ${input_files} ${po_files})
+
+ # generate some unique string for target name
+ string(REGEX REPLACE "[/\\]" "_" target_suffix ${po_dir})
+
+ # target to run the command
+ add_custom_target(i18n_${target_suffix} ALL DEPENDS ${signal_file})
+
+endfunction()
+
+##
+# Convenience function to installing generated files while keeping
+# their relative paths in output directory
+##
+function(install_preserving_relative_paths
+ output_files # list of output files
+ output_dir # output directory
+ destination_dir) # install destination directory
+
+ foreach(output_file ${output_files})
+ file(RELATIVE_PATH rel_output_file ${output_dir} ${output_file})
+ get_filename_component(rel_output_file_dir ${rel_output_file} DIRECTORY)
+ install(FILES ${output_file} DESTINATION ${destination_dir}/${rel_output_file_dir})
+ endforeach()
+
+endfunction()
diff --git a/i18n-tools/LevelsI18N.cmake b/i18n-tools/LevelsI18N.cmake
deleted file mode 100644
index 8d18fe27..00000000
--- a/i18n-tools/LevelsI18N.cmake
+++ /dev/null
@@ -1,193 +0,0 @@
-##
-# Meta-infrastructure to allow po-based translation of Colobot level files
-##
-
-find_program(PO4A po4a)
-
-if(NOT PO4A)
- message(WARNING "PO4A not found, level files will NOT be translated!")
-endif()
-
-##
-# Generate translated chaptertitle files using po4a
-##
-function(generate_chaptertitles_i18n
- result_translated_chaptertitle_files # output variable to return names of translated chaptertitle files
- source_chaptertitle_prefix_dir # prefix directory for chaptertitle files
- source_chaptertitle_files # input chaptertitle files relative to prefix dir
- po_dir # directory with translations (*.po, *.pot files)
- work_dir) # directory where to save generated files
-
- # generated dummy file for translation of "E", "D", "F", "P", etc. language letters
- # TODO find a better way to provide translations than this hack
- set(langchar_file ${work_dir}/chaptertitles_langchar.txt)
- file(WRITE ${langchar_file} "E")
-
- # generated config file for po4a
- set(po4a_cfg_file ${work_dir}/chaptertitles_po4a.cfg)
-
- # get translations from po directory
- get_filename_component(abs_po_dir ${po_dir} ABSOLUTE)
- file(WRITE ${po4a_cfg_file} "[po_directory] ${abs_po_dir}\n")
-
- # add content of dummy language file to translation
- file(APPEND ${po4a_cfg_file} "[type:text] ${langchar_file}")
-
- set(abs_source_chaptertitle_files "")
- set(translated_chaptertitle_files "")
- file(GLOB po_files ${po_dir}/*.po)
-
- foreach(source_chaptertitle_file ${source_chaptertitle_files})
- get_filename_component(abs_source_chaptertitle_file ${source_chaptertitle_prefix_dir}/${source_chaptertitle_file} ABSOLUTE)
- set(output_chaptertitle_file ${work_dir}/${source_chaptertitle_file})
-
- # translation rule for chaptertitle file
- file(APPEND ${po4a_cfg_file} "\n[type:colobotlevel] ${abs_source_chaptertitle_file}")
-
- foreach(po_file ${po_files})
- # generated file for single language
- get_language_code(language_code ${po_file})
- set(generated_language_file ${output_chaptertitle_file}.${language_code})
- file(APPEND ${po4a_cfg_file} " \\\n ${language_code}:${generated_language_file}")
- endforeach()
-
- list(APPEND abs_source_chaptertitle_files ${abs_source_chaptertitle_file})
- list(APPEND translated_chaptertitle_files ${output_chaptertitle_file})
- endforeach()
-
- # dummy files to signal that scripts have finished running
- set(translation_signalfile ${work_dir}/translations)
- set(po_clean_signalfile ${work_dir}/po_clean)
-
- # script to run po4a and consolidate the translations
- string(REPLACE ";" ":" escaped_abs_source_chaptertitle_files "${abs_source_chaptertitle_files}")
- string(REPLACE ";" ":" escaped_translated_chaptertitle_files "${translated_chaptertitle_files}")
- add_custom_command(OUTPUT ${translation_signalfile}
- COMMAND ${DATA_SOURCE_DIR}/i18n-tools/scripts/run_po4a.sh ${po4a_cfg_file}
- COMMAND ${DATA_SOURCE_DIR}/i18n-tools/scripts/create_level_translations.sh
- ${escaped_abs_source_chaptertitle_files}
- ${escaped_translated_chaptertitle_files}
- ${translation_signalfile}
- DEPENDS ${po_files})
-
- file(GLOB pot_file ${po_dir}/*.pot)
- set(po_files ${po_files} ${pot_file})
-
- # script to do some cleanups in updated *.po and *.pot files
- string(REPLACE ";" ":" escaped_po_files "${po_files}")
- add_custom_command(OUTPUT ${po_clean_signalfile}
- COMMAND ${DATA_SOURCE_DIR}/i18n-tools/scripts/clean_po_files.sh
- ${escaped_po_files}
- ${translation_signalfile}
- ${po_clean_signalfile}
- DEPENDS ${translation_signalfile}
- )
-
- # generate some unique string for target name
- string(REGEX REPLACE "[/\\]" "_" target_suffix ${po_dir})
-
- # target to run both scripts
- add_custom_target(i18n_${target_suffix} ALL DEPENDS ${translation_signalfile} ${po_clean_signalfile})
-
- # return the translated files
- set(${result_translated_chaptertitle_files} ${translated_chaptertitle_files} PARENT_SCOPE)
-endfunction()
-
-##
-# Generate translated level and help files using po4a
-##
-function(generate_level_i18n
- result_translated_level_file # output variable to return names of translaed level files
- result_translated_help_files # output variable to return names of translated help files
- source_level_file # input scene.txt files
- source_help_files # input help files
- po_dir # directory with translations (*.po, *.pot files)
- work_dir) # directory where to save generated files
-
- # generated dummy file for translation of "E", "D", "F", "P", etc. language letters
- # TODO find a better way to provide translations than this hack
- set(langchar_file ${work_dir}/scene_langchar.txt)
- file(WRITE ${langchar_file} "E")
-
- # generated config file for po4a
- set(po4a_cfg_file ${work_dir}/scene_po4a.cfg)
-
- # get translations from po directory
- get_filename_component(abs_po_dir ${po_dir} ABSOLUTE)
- file(WRITE ${po4a_cfg_file} "[po_directory] ${abs_po_dir}\n")
-
- # add content of dummy language file to translation
- file(APPEND ${po4a_cfg_file} "[type:text] ${langchar_file}")
-
- # translation rule for scene file
- get_filename_component(abs_source_level_file ${source_level_file} ABSOLUTE)
- file(APPEND ${po4a_cfg_file} "\n[type:colobotlevel] ${abs_source_level_file}")
-
- get_filename_component(source_level_file_name ${source_level_file} NAME)
- set(output_level_file ${work_dir}/${source_level_file_name})
-
- file(GLOB po_files ${po_dir}/*.po)
- foreach(po_file ${po_files})
- get_language_code(language_code ${po_file})
- # generated file for single language
- set(generated_language_file ${output_level_file}.${language_code})
- file(APPEND ${po4a_cfg_file} " \\\n ${language_code}:${generated_language_file}")
- endforeach()
-
- # translation rules for help files
- set(output_help_dir ${work_dir}/help)
- set(translated_help_files "")
-
- foreach(source_help_file ${source_help_files})
- get_filename_component(help_file_name ${source_help_file} NAME)
-
- file(APPEND ${po4a_cfg_file} "\n[type:colobothelp] ${source_help_file}")
- foreach(po_file ${po_files})
- # generated file for single language
- get_language_code(language_code ${po_file})
- get_language_char(language_char ${po_file})
- string(REPLACE ".E." ".${language_char}." generated_help_file_name ${help_file_name})
- set(generated_help_file ${output_help_dir}/${generated_help_file_name})
- file(APPEND ${po4a_cfg_file} " \\\n ${language_code}:${generated_help_file}")
-
- list(APPEND translated_help_files ${generated_help_file})
- endforeach()
- endforeach()
-
- # dummy files to signal that scripts have finished running
- set(translation_signalfile ${work_dir}/translations)
- set(po_clean_signalfile ${work_dir}/po_clean)
-
- # script to run po4a and consolidate the translations
- add_custom_command(OUTPUT ${translation_signalfile}
- COMMAND ${DATA_SOURCE_DIR}/i18n-tools/scripts/run_po4a.sh ${po4a_cfg_file}
- COMMAND ${DATA_SOURCE_DIR}/i18n-tools/scripts/create_level_translations.sh
- ${abs_source_level_file}
- ${output_level_file}
- ${translation_signalfile}
- DEPENDS ${po_files})
-
- file(GLOB pot_file ${po_dir}/*.pot)
- set(po_files ${po_files} ${pot_file})
-
- # script to do some cleanups in updated *.po and *.pot files
- string(REPLACE ";" ":" escaped_po_files "${po_files}")
- add_custom_command(OUTPUT ${po_clean_signalfile}
- COMMAND ${DATA_SOURCE_DIR}/i18n-tools/scripts/clean_po_files.sh
- ${escaped_po_files}
- ${translation_signalfile}
- ${po_clean_signalfile}
- DEPENDS ${translation_signalfile}
- )
-
- # generate some unique string for target name
- string(REGEX REPLACE "[/\\]" "_" target_suffix ${po_dir})
-
- # target to run both scripts
- add_custom_target(i18n_${target_suffix} ALL DEPENDS ${translation_signalfile} ${po_clean_signalfile})
-
- # return the translated files
- set(${result_translated_level_file} ${output_level_file} PARENT_SCOPE)
- set(${result_translated_help_files} ${translated_help_files} PARENT_SCOPE)
-endfunction()
-
diff --git a/i18n-tools/scripts/clean_po_files.sh b/i18n-tools/scripts/clean_po_files.sh
deleted file mode 100755
index decd88d7..00000000
--- a/i18n-tools/scripts/clean_po_files.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/bash
-
-##
-# Script to do some cleaning up of merged/generated *.po and *.pot files
-#
-# It is basically a sed wrapper that does two things:
-# - remove information about absolute filenames which were used to generate translations
-# - remove modification date of file
-#
-# By doing these two things, it makes sure that *.po and *.pot files do not change
-# compared to versions stored in repository when building the project
-#
-# The arguments are a colon-separated list of *.po or *.pot files and
-# two dummy signal files used by build system that must be updated
-##
-
-# stop on errors
-set -e
-
-if [ $# -ne 3 ]; then
- echo "Invalid arguments!" >&2
- echo "Usage: $0 po_file1[:po_file2;...] translation_signalfile po_clean_signalfile" >&2
- exit 1
-fi
-
-PO_FILES="$1"
-TRANSLATION_SIGNALFILE="$2"
-PO_CLEAN_SIGNALFILE="$3"
-
-IFS=':' read -a po_files_array <<< "$PO_FILES"
-
-for po_file in "${po_files_array[@]}"; do
- # strip unnecessary part of file names
- sed -i -e 's|^#: .*data/\(.*\)$|#: \1|' "$po_file"
- # remove the creation date
- sed -i -e 's|^\("POT-Creation-Date:\).*$|\1 DATE\\n"|' "$po_file"
-done
-
-# update the dummy signal files to indicate success
-# we also have to touch translation signalfile because it's supposed to be modified later than po files
-touch "$TRANSLATION_SIGNALFILE"
-touch "$PO_CLEAN_SIGNALFILE"
\ No newline at end of file
diff --git a/i18n-tools/scripts/common.py b/i18n-tools/scripts/common.py
new file mode 100644
index 00000000..3fab2607
--- /dev/null
+++ b/i18n-tools/scripts/common.py
@@ -0,0 +1,229 @@
+import errno
+import io
+import os
+import polib
+
+"""
+ Works like shell's "mkdir -p" and also behaves nicely if given None argument
+"""
+def nice_mkdir(path):
+ if path is None:
+ return
+
+ try:
+ os.makedirs(path)
+ except OSError as exc:
+ if exc.errno == errno.EEXIST and os.path.isdir(path):
+ pass
+ else: raise
+
+"""
+ Works as os.path.join, but behaves nicely if given None argument
+"""
+def nice_path_join(*paths):
+ for path in paths:
+ if path is None:
+ return None
+
+ return os.path.join(*paths)
+
+"""
+ Wrapper class over POFile, acting as translation template file
+
+ It actually hold two POFile instances:
+ previous_catalog is the content of PO file read from disk
+ current_catalog is created empty and filled with entries from input files
+
+ Once all processing is done, the content of previous_catalog is merged with current_catalog
+ and the result is saved to disk.
+"""
+class TemplateFile:
+ def __init__(self, file_name):
+ self.file_name = file_name
+ self.dir_name = os.path.dirname(file_name)
+ self.language = 'en'
+ self.current_catalog = polib.POFile(wrapwidth = 0)
+ if os.path.exists(file_name):
+ self.previous_catalog = polib.pofile(file_name, wrapwidth = 0)
+ else:
+ self.previous_catalog = polib.POFile(wrapwidth = 0)
+
+ """
+ Wrapper over inserting template file entry
+ If entry does not exist, it is created;
+ otherwise it is modified to indicate multiple occurrences
+ """
+ def insert_entry(self, text, occurrence, type_comment):
+ entry = self.current_catalog.find(text)
+ relative_file_name = os.path.relpath(occurrence.file_name, self.dir_name)
+ occurrence = (relative_file_name, occurrence.line_number)
+ if entry:
+ entry.comment = self._merge_comment(entry.comment, type_comment)
+ if occurrence not in entry.occurrences:
+ entry.occurrences.append(occurrence)
+ else:
+ comment = 'type: ' + type_comment
+ new_entry = polib.POEntry(msgid = text,
+ comment = comment,
+ occurrences = [occurrence],
+ flags = ['no-wrap'])
+
+ self.current_catalog.append(new_entry)
+
+ def _merge_comment(self, previous_comment, type_comment):
+ new_comment = previous_comment
+
+ previous_types = previous_comment.replace('type: ', '')
+ previous_types_list = previous_types.split(', ')
+
+ if type_comment not in previous_types_list:
+ new_comment += ', ' + type_comment
+
+ return new_comment
+
+ """
+ Merges previous_catalog with current_catalog and saved the result to disk
+ """
+ def merge_and_save(self):
+ self.previous_catalog.merge(self.current_catalog)
+ self.previous_catalog.save(self.file_name)
+
+"""
+ Wrapper class over POFile, acting as language translation file
+"""
+class LanguageFile:
+ def __init__(self, file_name):
+ self.file_name = file_name
+ # get language from file name e.g. "/foo/de.po" -> "de"
+ (self.language, _) = os.path.splitext(os.path.basename(file_name))
+ if os.path.exists(file_name):
+ self.catalog = polib.pofile(file_name, wrapwidth = 0)
+ else:
+ self.catalog = polib.POFile(wrapwidth = 0)
+
+ """
+ Return single language character e.g. "de" -> "D"
+ """
+ def language_char(self):
+ return self.language[0].upper()
+
+ """
+ Try to translate given text; if not found among translations,
+ return the original
+ """
+ def translate(self, text):
+ entry = self.catalog.find(text)
+ if entry and entry.msgstr != '':
+ return entry.msgstr
+ return text
+
+ """
+ Merges entries with current_catalog from template file and saves the result to disk
+ """
+ def merge_and_save(self, template_file):
+ self.catalog.merge(template_file.current_catalog)
+ self.catalog.save(self.file_name)
+
+"""
+ Locates the translation files in po_dir
+"""
+def find_translation_file_names(po_dir):
+ pot_file_name = os.path.join(po_dir, 'translations.pot') # default
+ po_file_names = []
+ for file_name in os.listdir(po_dir):
+ if file_name.endswith('.pot'):
+ pot_file_name = os.path.join(po_dir, file_name)
+ elif file_name.endswith('.po'):
+ po_file_names.append(os.path.join(po_dir, file_name))
+
+ return (pot_file_name, po_file_names)
+
+"""
+ Creates template and language files by reading po_dir
+"""
+def create_template_and_language_files(po_dir):
+ (pot_file_name, po_file_names) = find_translation_file_names(po_dir)
+
+ template_file = TemplateFile(pot_file_name)
+ language_files = []
+ for po_file_name in po_file_names:
+ language_files.append(LanguageFile(po_file_name))
+
+ return (template_file, language_files)
+
+"""
+ Structure representing occurrence of text
+"""
+class Occurrence:
+ def __init__(self, file_name, line_number):
+ self.file_name = file_name
+ self.line_number = line_number
+
+"""
+ Structure representing line read from input file
+"""
+class InputLine:
+ def __init__(self, text, occurrence):
+ self.text = text
+ self.occurrence = occurrence
+
+
+"""
+ Base class for single translation process,
+ translating one input file into one output file
+
+ It provides wrapper code for reading consecutive lines of text and saving the result
+"""
+class TranslationJob:
+ def __init__(self, **kwargs):
+ self._input_line_counter = 0
+ self._input_file_name = kwargs['input_file']
+ self._input_file = None
+
+ self._output_file_name = kwargs['output_file']
+ self._output_file = None
+
+ """
+ Launch translation process
+ Actual processing is done in process_file() function which must be implemented by subclasses
+ """
+ def run(self):
+ try:
+ self._open_files()
+ self.process_file()
+ finally:
+ self._close_files()
+
+ def _open_files(self):
+ self._input_file = io.open(self._input_file_name, 'r', encoding='utf-8')
+ if self._output_file_name:
+ self._output_file = io.open(self._output_file_name, 'w', encoding='utf-8')
+
+ def _close_files(self):
+ self._input_file.close()
+ if self._output_file:
+ self._output_file.close()
+
+ """
+ Return next line, occurrene pair from input file or None if at end of input
+ """
+ def read_input_line(self):
+ line = self._input_file.readline()
+ if line == '':
+ return None
+
+ self._input_line_counter += 1
+ return InputLine(line.rstrip('\n'), Occurrence(self._input_file_name, self._input_line_counter))
+
+ """
+ Write line to output file, if present
+ """
+ def write_output_line(self, line):
+ if self._output_file:
+ self._output_file.write(line + '\n')
+
+ def get_input_file_name(self):
+ return self._input_file_name
+
+ def get_output_file_name(self):
+ return self._output_file_name
diff --git a/i18n-tools/scripts/create_level_translations.sh b/i18n-tools/scripts/create_level_translations.sh
deleted file mode 100755
index 88d9f9ba..00000000
--- a/i18n-tools/scripts/create_level_translations.sh
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/bin/bash
-
-##
-# Script to consolidate multiple translated level files (scene.txt or chaptertitle.txt),
-# generated previously by PO4A, into a single all-in-one output file
-#
-# It supports multiple pairs of source and output files and makes the assumption that
-# each source file was processed by PO4A to yield output files named like $output_file.$language_code
-#
-# Basically, it is a simple sed wrapper that uses the source file and the translated files to copy-paste
-# content into resulting output file
-#
-# The arugments are list of source files as a colon-separated list, list of output files also as colon-separated list
-# and dummy signal file used by build system
-##
-
-# Stop on errors
-set -e
-
-if [ $# -ne 3 ]; then
- echo "Invalid arguments!" >&2
- echo "Usage: $0 source_file1[:source_file2:...] output_file1[:output_file2:...] translation_signalfile" >&2
- exit 1
-fi
-
-SOURCE_FILES="$1"
-OUTPUT_FILES="$2"
-TRANSLATION_SIGNALFILE="$3"
-
-IFS=':' read -a source_files_array <<< "$SOURCE_FILES"
-IFS=':' read -a output_files_array <<< "$OUTPUT_FILES"
-
-for index in "${!source_files_array[@]}"; do
- source_file="${source_files_array[index]}"
- output_file="${output_files_array[index]}"
-
- # generate output file
- echo -n "" > "$output_file"
-
- # first, write original English headers
- sed -n '/^Title/p;/^Resume/p;/^ScriptName/p' "$source_file" >> "$output_file"
-
- # now, copy translated headers from translated files
- # (translated files are named output file + suffix with language code)
- for translated_file in $output_file.*; do
- sed -n '/^Title/p;/^Resume/p;/^ScriptName/p' "$translated_file" >> "$output_file"
- done
- echo "// End of level headers translations" >> "$output_file"
- echo "" >> "$output_file"
-
- # copy the rest of source file, excluding headers
- sed -e '/^Title/d;/^Resume/d;/^ScriptName/d' "$source_file" >> "$output_file"
-done
-
-# update the dummy signal file to indicate success
-touch "$TRANSLATION_SIGNALFILE"
\ No newline at end of file
diff --git a/i18n-tools/scripts/perllib/Locale/Po4a/Colobothelp.pm b/i18n-tools/scripts/perllib/Locale/Po4a/Colobothelp.pm
deleted file mode 100644
index c2a683c6..00000000
--- a/i18n-tools/scripts/perllib/Locale/Po4a/Colobothelp.pm
+++ /dev/null
@@ -1,194 +0,0 @@
-# Locale::Po4a::Colobothelp -- Convert Colobot help files
-#
-# This program is free software; you may redistribute it and/or modify it
-# under the terms of GPLv3.
-#
-
-use Locale::Po4a::TransTractor qw(process new);
-use Locale::Po4a::Common;
-use Locale::Po4a::Text;
-
-package Locale::Po4a::Colobothelp;
-
-use 5.006;
-use strict;
-use warnings;
-
-require Exporter;
-
-use vars qw(@ISA @EXPORT $AUTOLOAD);
-@ISA = qw(Locale::Po4a::TransTractor);
-@EXPORT = qw();
-
-my @comments = ();
-
-sub initialize {}
-
-sub parse {
- my $self = shift;
- my ($line,$ref);
- my $paragraph="";
- my $wrapped_mode = 1;
- my $s_mode = 0;
- my $expect_header = 1;
- my $end_of_paragraph = 0;
- ($line,$ref)=$self->shiftline();
- while (defined($line)) {
- chomp($line);
- $self->{ref}="$ref";
- ($paragraph,$wrapped_mode,$s_mode,$expect_header,$end_of_paragraph) = parse_colobothelp($self,$line,$ref,$paragraph,$wrapped_mode,$s_mode,$expect_header,$end_of_paragraph);
- if ($end_of_paragraph) {
- do_paragraph($self,offlink($paragraph),$wrapped_mode);
- $paragraph="";
- $wrapped_mode = 1;
- $end_of_paragraph = 0;
- }
- ($line,$ref)=$self->shiftline();
- }
- if (length $paragraph) {
- $paragraph =~ s/\n$//;
- do_paragraph($self,$paragraph,$wrapped_mode);
- $self->pushline("\n");
- }
-}
-
-sub parse_colobothelp {
- my ($self,$line,$ref,$paragraph,$wrapped_mode,$s_mode,$expect_header,$end_of_paragraph) = @_;
-
- if (($s_mode == 1) and ($line !~ /^\\s;/)) {
- # Process the end of \s; blocks
- $s_mode = 0;
- # substr removes the last superfluous \n
- my $s_block = onlink($self->translate(substr(offlink($paragraph),0,-1),$ref,"\\s; block (usually verbatim code)"));
- $s_block =~ s/(\n|^)/$1\\s;/g;
- $self->pushline($s_block."\n");
- $paragraph="";
- $wrapped_mode = 0;
- }
-
- if ( $line =~ /^\s*$/
- or $line =~ m/^\\[nctr];$/) {
- # Break paragraphs on lines containing only spaces or any of \n; \c; \t; \r; (alone)
-
- # Drop the latest EOL to avoid having it in the translation
- my $dropped_eol = ($paragraph =~ s/\n$//);
- do_paragraph($self,$paragraph,$wrapped_mode);
- $self->pushline("\n") if $dropped_eol; # Therefore only add it back if it was removed
- $paragraph="";
- $wrapped_mode = 0;
- $self->pushline($line."\n");
- } elsif ($line =~ s/^(\\s;)//) {
- # Lines starting with \s; are special (yellow-background, usually code-block)
- # Break paragraph before them
- if($s_mode == 0) {
- $s_mode = 1;
- my $dropped_eol = ($paragraph =~ s/\n$//);
- do_paragraph($self,$paragraph,$wrapped_mode);
- $self->pushline("\n") if $dropped_eol; # Therefore only add it back if it was removed
- $paragraph="";
- $wrapped_mode = 0;
- }
- $paragraph .= $line."\n";
- } elsif ($line =~ s/^(\\[bt];)//) {
- # Break paragraphs on \b; or \t; headers
- do_paragraph($self,$paragraph,$wrapped_mode);
- $paragraph="";
- $wrapped_mode = 1;
-
- $self->pushline($1.onlink($self->translate(offlink($line),$ref,"$1 header")."\n"));
- } elsif ($line =~ /^\\image (.*) (\d*) (\d*);$/) {
- # Discard lines with \image name lx ly; tags
- do_paragraph($self,$paragraph,$wrapped_mode);
- $paragraph="";
- $wrapped_mode = 1;
-
- $self->pushline("\\image ".$self->translate($1,$ref,'Image filename')." $2 $3;\n");
- } elsif ( $line =~ /^=+$/
- or $line =~ /^_+$/
- or $line =~ /^-+$/) {
- $wrapped_mode = 0;
- $paragraph .= $line."\n";
- do_paragraph($self,$paragraph,$wrapped_mode);
- $paragraph="";
- $wrapped_mode = 1;
- } elsif ($line =~ s/^(\s*)([0-9]\)|[o-])(\s*)//) {
- # Break paragraphs on lines starting with either number + parenthesis or any of o- + space
- do_paragraph($self,$paragraph,$wrapped_mode);
- $paragraph="";
- $wrapped_mode = 1;
-
- $self->pushline("$1$2$3".onlink($self->translate(offlink($line),$ref,"Bullet: '$2'")."\n"));
- } else {
- # All paragraphs are non-wrap paragraphs by default
- $wrapped_mode = 0;
- undef $self->{bullet};
- undef $self->{indent};
- $paragraph .= $line."\n";
- }
- return ($paragraph,$wrapped_mode,$s_mode,$expect_header,$end_of_paragraph);
-}
-
-sub offlink {
- my ($paragraph) = @_;
- # Replace \button $id; as pseudo xHTML tags
- $paragraph =~ s#\\(button|key) ([^;]*?);#<$1 $2/>#g;
- # Put \const;Code\norm; sequences into pseudo-HTML tags
- $paragraph =~ s#\\(const|type|token|key);([^\\;]*?)\\norm;#$2#g;
- # Transform CBot links \l;text\u target; into pseudo-HTML text
- $paragraph =~ s#\\l;(.*?)\\u ([^;]*?);#$1#g;
- # Cleanup pseudo-html targets separated by \\ to have a single character |
- $paragraph =~ s###g;
- # Replace remnants of \const; \type; \token or \norm; as pseudo xHTML tags
- $paragraph =~ s#\\(const|type|token|norm|key);#<$1/>#g;
- # Put \c;Code\n; sequences into pseudo-HTML tags
- $paragraph =~ s#\\c;([^\\;]*?)\\n;#$1
#g;
- # Replace remnants of \s; \c; \b; or \n; as pseudo xHTML tags
- $paragraph =~ s#\\([scbn]);#<$1/>#g;
- return ($paragraph);
-}
-
-sub onlink {
- my ($paragraph) = @_;
- # Invert the replace remnants of \s; \c; \b; or \n; as pseudo xHTML tagsyy
- $paragraph =~ s#<([scbn])/>#\\$1;#g;
- # Inverse the put of \c;Code\n; sequences into pseudo-HTML tags
- $paragraph =~ s#([^\\;]*?)
#\\c;$1\\n;#g;
- # Invert the replace remnants of \const; \type; \token or \norm; as pseudo xHTML tags
- $paragraph =~ s#<(const|type|token|norm|key)/>#\\$1;#g;
- # Inverse of the cleanup of pseudo-html targets separated by \\ to have a single character |
- $paragraph =~ s###g;
- # Inverse of the transform of CBot links \l;text\u target; into pseudo-HTML text
- $paragraph =~ s#(.*?)#\\l;$2\\u $1;#g;
- # Invert the put \const;Code\norm; sequences into pseudo-HTML tags
- $paragraph =~ s#([^\\;]*?)#\\$1;$2\\norm;#g;
- # Invert the replace of \button $id; as pseudo xHTML tags
- $paragraph =~ s#<(button|key) ([^;]*?)/>#\\$1 $2;#g;
- return ($paragraph);
-}
-
-sub do_paragraph {
- my ($self, $paragraph, $wrap) = (shift, shift, shift);
- my $type = shift || $self->{type} || "Plain text";
- return if ($paragraph eq "");
-
- my $end = "";
- if ($wrap) {
- $paragraph =~ s/^(.*?)(\n*)$/$1/s;
- $end = $2 || "";
- }
- my $t = onlink($self->translate(offlink($paragraph),
- $self->{ref},
- $type,
- "wrap" => $wrap));
- if (defined $self->{bullet}) {
- my $bullet = $self->{bullet};
- my $indent1 = $self->{indent};
- my $indent2 = $indent1.(' ' x length($bullet));
- $t =~ s/^/$indent1$bullet/s;
- $t =~ s/\n(.)/\n$indent2$1/sg;
- }
- $self->pushline( $t.$end );
-}
-
-1;
-__END__
diff --git a/i18n-tools/scripts/perllib/Locale/Po4a/Colobotlevel.pm b/i18n-tools/scripts/perllib/Locale/Po4a/Colobotlevel.pm
deleted file mode 100644
index df64498e..00000000
--- a/i18n-tools/scripts/perllib/Locale/Po4a/Colobotlevel.pm
+++ /dev/null
@@ -1,86 +0,0 @@
-# Locale::Po4a::ColobotLevels -- Convert Colobot levels
-#
-# This program is free software; you may redistribute it and/or modify it
-# under the terms of GPLv3.
-#
-
-use Locale::Po4a::TransTractor qw(process new);
-use Locale::Po4a::Common;
-
-package Locale::Po4a::Colobotlevel;
-
-use 5.006;
-use strict;
-use warnings;
-
-require Exporter;
-
-use vars qw(@ISA @EXPORT $AUTOLOAD);
-@ISA = qw(Locale::Po4a::TransTractor);
-@EXPORT = qw();
-
-my $debug=0;
-
-sub initialize {}
-
-
-sub parse {
- my $self=shift;
- my ($line,$line_source);
- my $language_char;
-
- LINE:
- ($line,$line_source)=$self->shiftline();
-
- while (defined($line)) {
- chomp($line);
-
- if ($line =~ /^(Title|Resume|ScriptName)/) {
- # Text before the first dot
- $line =~ m/(^[^"\r\n]*)\./;
- my $type = $1;
-
- # One char just after the .
- $line =~ m/\.(.)/;
- my $E = $1;
- if (not $language_char) {
- # Take this one-char only once
- $language_char = $self->translate($E, '', 'One-char language identifier');
- }
-
- # The text between .E and first quote
- $line =~ m/\.$E([^\r\n"]*?)(text|resume)="([^\r\n"]*?)"([^\r\n"]*)((text|resume)="([^\r\n"]*?)"([^\r\n"]*))?$/;
- my $spacing_1 = $1;
- my $subtype_1 = $2;
- my $quoted_1 = $3;
- my $spacing_2 = $4;
- my $secondpart = $5;
- my $subtype_2 = $6;
- my $quoted_2 = $7;
- my $spacing_3 = $8;
-
- my $par_1 = $self->translate($quoted_1, $line_source, $type."-".$subtype_1);
- $par_1 =~ s/^\D*\d*://;
- if ($secondpart) {
- my $par_2 = $self->translate($quoted_2, $line_source, $type."-".$subtype_2);
- $par_2 =~ s/^\D*\d*://;
-
- # This is awkward, but works
- $spacing_2 = $spacing_2.$subtype_2.'="'.$par_2.'"'.$spacing_3;
- }
- $par_1 =~ s/\n/\\n/g;
- $spacing_2 =~ s/\n/\\n/g;
-
- # Now push the result
- $self->pushline($type.'.'.$language_char.$spacing_1.$subtype_1.'="'.$par_1.'"'.$spacing_2."\n");
- }
- else {
- $self->pushline("$line\n");
- }
- # Reinit the loop
- ($line,$line_source)=$self->shiftline();
- }
-}
-
-1;
-__END__
diff --git a/i18n-tools/scripts/polib.py b/i18n-tools/scripts/polib.py
new file mode 100644
index 00000000..5f0f727d
--- /dev/null
+++ b/i18n-tools/scripts/polib.py
@@ -0,0 +1,1822 @@
+# -* coding: utf-8 -*-
+#
+# License: MIT (see LICENSE file provided)
+# vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
+
+"""
+**polib** allows you to manipulate, create, modify gettext files (pot, po and
+mo files). You can load existing files, iterate through it's entries, add,
+modify entries, comments or metadata, etc. or create new po files from scratch.
+
+**polib** provides a simple and pythonic API via the :func:`~polib.pofile` and
+:func:`~polib.mofile` convenience functions.
+"""
+
+__author__ = 'David Jean Louis '
+__version__ = '1.0.6'
+__all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
+ 'default_encoding', 'escape', 'unescape', 'detect_encoding', ]
+
+import array
+import codecs
+import os
+import re
+import struct
+import sys
+import textwrap
+
+try:
+ import io
+except ImportError:
+ # replacement of io.open() for python < 2.6
+ # we use codecs instead
+ class io(object):
+ @staticmethod
+ def open(fpath, mode='r', encoding=None):
+ return codecs.open(fpath, mode, encoding)
+
+
+# the default encoding to use when encoding cannot be detected
+default_encoding = 'utf-8'
+
+# python 2/3 compatibility helpers {{{
+
+
+if sys.version_info[:2] < (3, 0):
+ PY3 = False
+ text_type = unicode
+
+ def b(s):
+ return s
+
+ def u(s):
+ return unicode(s, "unicode_escape")
+
+else:
+ PY3 = True
+ text_type = str
+
+ def b(s):
+ return s.encode("latin-1")
+
+ def u(s):
+ return s
+# }}}
+# _pofile_or_mofile {{{
+
+
+def _pofile_or_mofile(f, type, **kwargs):
+ """
+ Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
+ honor the DRY concept.
+ """
+ # get the file encoding
+ enc = kwargs.get('encoding')
+ if enc is None:
+ enc = detect_encoding(f, type == 'mofile')
+
+ # parse the file
+ kls = type == 'pofile' and _POFileParser or _MOFileParser
+ parser = kls(
+ f,
+ encoding=enc,
+ check_for_duplicates=kwargs.get('check_for_duplicates', False),
+ klass=kwargs.get('klass')
+ )
+ instance = parser.parse()
+ instance.wrapwidth = kwargs.get('wrapwidth', 78)
+ return instance
+# }}}
+# _is_file {{{
+
+
+def _is_file(filename_or_contents):
+ """
+ Safely returns the value of os.path.exists(filename_or_contents).
+
+ Arguments:
+
+ ``filename_or_contents``
+ either a filename, or a string holding the contents of some file.
+ In the latter case, this function will always return False.
+ """
+ try:
+ return os.path.exists(filename_or_contents)
+ except (ValueError, UnicodeEncodeError):
+ return False
+# }}}
+# function pofile() {{{
+
+
+def pofile(pofile, **kwargs):
+ """
+ Convenience function that parses the po or pot file ``pofile`` and returns
+ a :class:`~polib.POFile` instance.
+
+ Arguments:
+
+ ``pofile``
+ string, full or relative path to the po/pot file or its content (data).
+
+ ``wrapwidth``
+ integer, the wrap width, only useful when the ``-w`` option was passed
+ to xgettext (optional, default: ``78``).
+
+ ``encoding``
+ string, the encoding to use (e.g. "utf-8") (default: ``None``, the
+ encoding will be auto-detected).
+
+ ``check_for_duplicates``
+ whether to check for duplicate entries when adding entries to the
+ file (optional, default: ``False``).
+
+ ``klass``
+ class which is used to instantiate the return value (optional,
+ default: ``None``, the return value with be a :class:`~polib.POFile`
+ instance).
+ """
+ return _pofile_or_mofile(pofile, 'pofile', **kwargs)
+# }}}
+# function mofile() {{{
+
+
+def mofile(mofile, **kwargs):
+ """
+ Convenience function that parses the mo file ``mofile`` and returns a
+ :class:`~polib.MOFile` instance.
+
+ Arguments:
+
+ ``mofile``
+ string, full or relative path to the mo file or its content (data).
+
+ ``wrapwidth``
+ integer, the wrap width, only useful when the ``-w`` option was passed
+ to xgettext to generate the po file that was used to format the mo file
+ (optional, default: ``78``).
+
+ ``encoding``
+ string, the encoding to use (e.g. "utf-8") (default: ``None``, the
+ encoding will be auto-detected).
+
+ ``check_for_duplicates``
+ whether to check for duplicate entries when adding entries to the
+ file (optional, default: ``False``).
+
+ ``klass``
+ class which is used to instantiate the return value (optional,
+ default: ``None``, the return value with be a :class:`~polib.POFile`
+ instance).
+ """
+ return _pofile_or_mofile(mofile, 'mofile', **kwargs)
+# }}}
+# function detect_encoding() {{{
+
+
+def detect_encoding(file, binary_mode=False):
+ """
+ Try to detect the encoding used by the ``file``. The ``file`` argument can
+ be a PO or MO file path or a string containing the contents of the file.
+ If the encoding cannot be detected, the function will return the value of
+ ``default_encoding``.
+
+ Arguments:
+
+ ``file``
+ string, full or relative path to the po/mo file or its content.
+
+ ``binary_mode``
+ boolean, set this to True if ``file`` is a mo file.
+ """
+ PATTERN = r'"?Content-Type:.+? charset=([\w_\-:\.]+)'
+ rxt = re.compile(u(PATTERN))
+ rxb = re.compile(b(PATTERN))
+
+ def charset_exists(charset):
+ """Check whether ``charset`` is valid or not."""
+ try:
+ codecs.lookup(charset)
+ except LookupError:
+ return False
+ return True
+
+ if not _is_file(file):
+ match = rxt.search(file)
+ if match:
+ enc = match.group(1).strip()
+ if charset_exists(enc):
+ return enc
+ else:
+ # For PY3, always treat as binary
+ if binary_mode or PY3:
+ mode = 'rb'
+ rx = rxb
+ else:
+ mode = 'r'
+ rx = rxt
+ f = open(file, mode)
+ for l in f.readlines():
+ match = rx.search(l)
+ if match:
+ f.close()
+ enc = match.group(1).strip()
+ if not isinstance(enc, text_type):
+ enc = enc.decode('utf-8')
+ if charset_exists(enc):
+ return enc
+ f.close()
+ return default_encoding
+# }}}
+# function escape() {{{
+
+
+def escape(st):
+ """
+ Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
+ the given string ``st`` and returns it.
+ """
+ return st.replace('\\', r'\\')\
+ .replace('\t', r'\t')\
+ .replace('\r', r'\r')\
+ .replace('\n', r'\n')\
+ .replace('\"', r'\"')
+# }}}
+# function unescape() {{{
+
+
+def unescape(st):
+ """
+ Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
+ the given string ``st`` and returns it.
+ """
+ def unescape_repl(m):
+ m = m.group(1)
+ if m == 'n':
+ return '\n'
+ if m == 't':
+ return '\t'
+ if m == 'r':
+ return '\r'
+ if m == '\\':
+ return '\\'
+ return m # handles escaped double quote
+ return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
+# }}}
+# class _BaseFile {{{
+
+
+class _BaseFile(list):
+ """
+ Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
+ classes. This class should **not** be instanciated directly.
+ """
+
+ def __init__(self, *args, **kwargs):
+ """
+ Constructor, accepts the following keyword arguments:
+
+ ``pofile``
+ string, the path to the po or mo file, or its content as a string.
+
+ ``wrapwidth``
+ integer, the wrap width, only useful when the ``-w`` option was
+ passed to xgettext (optional, default: ``78``).
+
+ ``encoding``
+ string, the encoding to use, defaults to ``default_encoding``
+ global variable (optional).
+
+ ``check_for_duplicates``
+ whether to check for duplicate entries when adding entries to the
+ file, (optional, default: ``False``).
+ """
+ list.__init__(self)
+ # the opened file handle
+ pofile = kwargs.get('pofile', None)
+ if pofile and _is_file(pofile):
+ self.fpath = pofile
+ else:
+ self.fpath = kwargs.get('fpath')
+ # the width at which lines should be wrapped
+ self.wrapwidth = kwargs.get('wrapwidth', 78)
+ # the file encoding
+ self.encoding = kwargs.get('encoding', default_encoding)
+ # whether to check for duplicate entries or not
+ self.check_for_duplicates = kwargs.get('check_for_duplicates', False)
+ # header
+ self.header = ''
+ # both po and mo files have metadata
+ self.metadata = {}
+ self.metadata_is_fuzzy = 0
+
+ def __unicode__(self):
+ """
+ Returns the unicode representation of the file.
+ """
+ ret = []
+ entries = [self.metadata_as_entry()] + \
+ [e for e in self if not e.obsolete]
+ for entry in entries:
+ ret.append(entry.__unicode__(self.wrapwidth))
+ for entry in self.obsolete_entries():
+ ret.append(entry.__unicode__(self.wrapwidth))
+ ret = u('\n').join(ret)
+
+ assert isinstance(ret, text_type)
+ #if type(ret) != text_type:
+ # return unicode(ret, self.encoding)
+ return ret
+
+ if PY3:
+ def __str__(self):
+ return self.__unicode__()
+ else:
+ def __str__(self):
+ """
+ Returns the string representation of the file.
+ """
+ return unicode(self).encode(self.encoding)
+
+ def __contains__(self, entry):
+ """
+ Overriden ``list`` method to implement the membership test (in and
+ not in).
+ The method considers that an entry is in the file if it finds an entry
+ that has the same msgid (the test is **case sensitive**) and the same
+ msgctxt (or none for both entries).
+
+ Argument:
+
+ ``entry``
+ an instance of :class:`~polib._BaseEntry`.
+ """
+ return self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt) \
+ is not None
+
+ def __eq__(self, other):
+ return str(self) == str(other)
+
+ def append(self, entry):
+ """
+ Overriden method to check for duplicates entries, if a user tries to
+ add an entry that is already in the file, the method will raise a
+ ``ValueError`` exception.
+
+ Argument:
+
+ ``entry``
+ an instance of :class:`~polib._BaseEntry`.
+ """
+ if self.check_for_duplicates and entry in self:
+ raise ValueError('Entry "%s" already exists' % entry.msgid)
+ super(_BaseFile, self).append(entry)
+
+ def insert(self, index, entry):
+ """
+ Overriden method to check for duplicates entries, if a user tries to
+ add an entry that is already in the file, the method will raise a
+ ``ValueError`` exception.
+
+ Arguments:
+
+ ``index``
+ index at which the entry should be inserted.
+
+ ``entry``
+ an instance of :class:`~polib._BaseEntry`.
+ """
+ if self.check_for_duplicates and entry in self:
+ raise ValueError('Entry "%s" already exists' % entry.msgid)
+ super(_BaseFile, self).insert(index, entry)
+
+ def metadata_as_entry(self):
+ """
+ Returns the file metadata as a :class:`~polib.POFile` instance.
+ """
+ e = POEntry(msgid='')
+ mdata = self.ordered_metadata()
+ if mdata:
+ strs = []
+ for name, value in mdata:
+ # Strip whitespace off each line in a multi-line entry
+ strs.append('%s: %s' % (name, value))
+ e.msgstr = '\n'.join(strs) + '\n'
+ if self.metadata_is_fuzzy:
+ e.flags.append('fuzzy')
+ return e
+
+ def save(self, fpath=None, repr_method='__unicode__'):
+ """
+ Saves the po file to ``fpath``.
+ If it is an existing file and no ``fpath`` is provided, then the
+ existing file is rewritten with the modified data.
+
+ Keyword arguments:
+
+ ``fpath``
+ string, full or relative path to the file.
+
+ ``repr_method``
+ string, the method to use for output.
+ """
+ if self.fpath is None and fpath is None:
+ raise IOError('You must provide a file path to save() method')
+ contents = getattr(self, repr_method)()
+ if fpath is None:
+ fpath = self.fpath
+ if repr_method == 'to_binary':
+ fhandle = open(fpath, 'wb')
+ else:
+ fhandle = io.open(fpath, 'w', encoding=self.encoding)
+ if not isinstance(contents, text_type):
+ contents = contents.decode(self.encoding)
+ fhandle.write(contents)
+ fhandle.close()
+ # set the file path if not set
+ if self.fpath is None and fpath:
+ self.fpath = fpath
+
+ def find(self, st, by='msgid', include_obsolete_entries=False,
+ msgctxt=False):
+ """
+ Find the entry which msgid (or property identified by the ``by``
+ argument) matches the string ``st``.
+
+ Keyword arguments:
+
+ ``st``
+ string, the string to search for.
+
+ ``by``
+ string, the property to use for comparison (default: ``msgid``).
+
+ ``include_obsolete_entries``
+ boolean, whether to also search in entries that are obsolete.
+
+ ``msgctxt``
+ string, allows to specify a specific message context for the
+ search.
+ """
+ if include_obsolete_entries:
+ entries = self[:]
+ else:
+ entries = [e for e in self if not e.obsolete]
+ for e in entries:
+ if getattr(e, by) == st:
+ if msgctxt is not False and e.msgctxt != msgctxt:
+ continue
+ return e
+ return None
+
+ def ordered_metadata(self):
+ """
+ Convenience method that returns an ordered version of the metadata
+ dictionary. The return value is list of tuples (metadata name,
+ metadata_value).
+ """
+ # copy the dict first
+ metadata = self.metadata.copy()
+ data_order = [
+ 'Project-Id-Version',
+ 'Report-Msgid-Bugs-To',
+ 'POT-Creation-Date',
+ 'PO-Revision-Date',
+ 'Last-Translator',
+ 'Language-Team',
+ 'Language',
+ 'MIME-Version',
+ 'Content-Type',
+ 'Content-Transfer-Encoding',
+ 'Plural-Forms'
+ ]
+ ordered_data = []
+ for data in data_order:
+ try:
+ value = metadata.pop(data)
+ ordered_data.append((data, value))
+ except KeyError:
+ pass
+ # the rest of the metadata will be alphabetically ordered since there
+ # are no specs for this AFAIK
+ for data in sorted(metadata.keys()):
+ value = metadata[data]
+ ordered_data.append((data, value))
+ return ordered_data
+
+ def to_binary(self):
+ """
+ Return the binary representation of the file.
+ """
+ offsets = []
+ entries = self.translated_entries()
+
+ # the keys are sorted in the .mo file
+ def cmp(_self, other):
+ # msgfmt compares entries with msgctxt if it exists
+ self_msgid = _self.msgctxt and _self.msgctxt or _self.msgid
+ other_msgid = other.msgctxt and other.msgctxt or other.msgid
+ if self_msgid > other_msgid:
+ return 1
+ elif self_msgid < other_msgid:
+ return -1
+ else:
+ return 0
+ # add metadata entry
+ entries.sort(key=lambda o: o.msgctxt or o.msgid)
+ mentry = self.metadata_as_entry()
+ #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
+ entries = [mentry] + entries
+ entries_len = len(entries)
+ ids, strs = b(''), b('')
+ for e in entries:
+ # For each string, we need size and file offset. Each string is
+ # NUL terminated; the NUL does not count into the size.
+ msgid = b('')
+ if e.msgctxt:
+ # Contexts are stored by storing the concatenation of the
+ # context, a byte, and the original string
+ msgid = self._encode(e.msgctxt + '\4')
+ if e.msgid_plural:
+ msgstr = []
+ for index in sorted(e.msgstr_plural.keys()):
+ msgstr.append(e.msgstr_plural[index])
+ msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
+ msgstr = self._encode('\0'.join(msgstr))
+ else:
+ msgid += self._encode(e.msgid)
+ msgstr = self._encode(e.msgstr)
+ offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
+ ids += msgid + b('\0')
+ strs += msgstr + b('\0')
+
+ # The header is 7 32-bit unsigned integers.
+ keystart = 7 * 4 + 16 * entries_len
+ # and the values start after the keys
+ valuestart = keystart + len(ids)
+ koffsets = []
+ voffsets = []
+ # The string table first has the list of keys, then the list of values.
+ # Each entry has first the size of the string, then the file offset.
+ for o1, l1, o2, l2 in offsets:
+ koffsets += [l1, o1 + keystart]
+ voffsets += [l2, o2 + valuestart]
+ offsets = koffsets + voffsets
+
+ output = struct.pack(
+ "Iiiiiii",
+ # Magic number
+ MOFile.MAGIC,
+ # Version
+ 0,
+ # number of entries
+ entries_len,
+ # start of key index
+ 7 * 4,
+ # start of value index
+ 7 * 4 + entries_len * 8,
+ # size and offset of hash table, we don't use hash tables
+ 0, keystart
+
+ )
+ if PY3 and sys.version_info.minor > 1: # python 3.2 or superior
+ output += array.array("i", offsets).tobytes()
+ else:
+ output += array.array("i", offsets).tostring()
+ output += ids
+ output += strs
+ return output
+
+ def _encode(self, mixed):
+ """
+ Encodes the given ``mixed`` argument with the file encoding if and
+ only if it's an unicode string and returns the encoded string.
+ """
+ if isinstance(mixed, text_type):
+ mixed = mixed.encode(self.encoding)
+ return mixed
+# }}}
+# class POFile {{{
+
+
+class POFile(_BaseFile):
+ """
+ Po (or Pot) file reader/writer.
+ This class inherits the :class:`~polib._BaseFile` class and, by extension,
+ the python ``list`` type.
+ """
+
+ def __unicode__(self):
+ """
+ Returns the unicode representation of the po file.
+ """
+ ret, headers = '', self.header.split('\n')
+ for header in headers:
+ if header[:1] in [',', ':']:
+ ret += '#%s\n' % header
+ else:
+ ret += '# %s\n' % header
+
+ if not isinstance(ret, text_type):
+ ret = ret.decode(self.encoding)
+
+ return ret + _BaseFile.__unicode__(self)
+
+ def save_as_mofile(self, fpath):
+ """
+ Saves the binary representation of the file to given ``fpath``.
+
+ Keyword argument:
+
+ ``fpath``
+ string, full or relative path to the mo file.
+ """
+ _BaseFile.save(self, fpath, 'to_binary')
+
+ def percent_translated(self):
+ """
+ Convenience method that returns the percentage of translated
+ messages.
+ """
+ total = len([e for e in self if not e.obsolete])
+ if total == 0:
+ return 100
+ translated = len(self.translated_entries())
+ return int(translated * 100 / float(total))
+
+ def translated_entries(self):
+ """
+ Convenience method that returns the list of translated entries.
+ """
+ return [e for e in self if e.translated()]
+
+ def untranslated_entries(self):
+ """
+ Convenience method that returns the list of untranslated entries.
+ """
+ return [e for e in self if not e.translated() and not e.obsolete
+ and not 'fuzzy' in e.flags]
+
+ def fuzzy_entries(self):
+ """
+ Convenience method that returns the list of fuzzy entries.
+ """
+ return [e for e in self if 'fuzzy' in e.flags]
+
+ def obsolete_entries(self):
+ """
+ Convenience method that returns the list of obsolete entries.
+ """
+ return [e for e in self if e.obsolete]
+
+ def merge(self, refpot):
+ """
+ Convenience method that merges the current pofile with the pot file
+ provided. It behaves exactly as the gettext msgmerge utility:
+
+ * comments of this file will be preserved, but extracted comments and
+ occurrences will be discarded;
+ * any translations or comments in the file will be discarded, however,
+ dot comments and file positions will be preserved;
+ * the fuzzy flags are preserved.
+
+ Keyword argument:
+
+ ``refpot``
+ object POFile, the reference catalog.
+ """
+ # Store entries in dict/set for faster access
+ self_entries = dict((entry.msgid, entry) for entry in self)
+ refpot_msgids = set(entry.msgid for entry in refpot)
+ # Merge entries that are in the refpot
+ for entry in refpot:
+ e = self_entries.get(entry.msgid)
+ if e is None:
+ e = POEntry()
+ self.append(e)
+ e.merge(entry)
+ # ok, now we must "obsolete" entries that are not in the refpot anymore
+ for entry in self:
+ if entry.msgid not in refpot_msgids:
+ entry.obsolete = True
+# }}}
+# class MOFile {{{
+
+
+class MOFile(_BaseFile):
+ """
+ Mo file reader/writer.
+ This class inherits the :class:`~polib._BaseFile` class and, by
+ extension, the python ``list`` type.
+ """
+ MAGIC = 0x950412de
+ MAGIC_SWAPPED = 0xde120495
+
+ def __init__(self, *args, **kwargs):
+ """
+ Constructor, accepts all keywords arguments accepted by
+ :class:`~polib._BaseFile` class.
+ """
+ _BaseFile.__init__(self, *args, **kwargs)
+ self.magic_number = None
+ self.version = 0
+
+ def save_as_pofile(self, fpath):
+ """
+ Saves the mofile as a pofile to ``fpath``.
+
+ Keyword argument:
+
+ ``fpath``
+ string, full or relative path to the file.
+ """
+ _BaseFile.save(self, fpath)
+
+ def save(self, fpath=None):
+ """
+ Saves the mofile to ``fpath``.
+
+ Keyword argument:
+
+ ``fpath``
+ string, full or relative path to the file.
+ """
+ _BaseFile.save(self, fpath, 'to_binary')
+
+ def percent_translated(self):
+ """
+ Convenience method to keep the same interface with POFile instances.
+ """
+ return 100
+
+ def translated_entries(self):
+ """
+ Convenience method to keep the same interface with POFile instances.
+ """
+ return self
+
+ def untranslated_entries(self):
+ """
+ Convenience method to keep the same interface with POFile instances.
+ """
+ return []
+
+ def fuzzy_entries(self):
+ """
+ Convenience method to keep the same interface with POFile instances.
+ """
+ return []
+
+ def obsolete_entries(self):
+ """
+ Convenience method to keep the same interface with POFile instances.
+ """
+ return []
+# }}}
+# class _BaseEntry {{{
+
+
+class _BaseEntry(object):
+ """
+ Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
+ This class should **not** be instanciated directly.
+ """
+
+ def __init__(self, *args, **kwargs):
+ """
+ Constructor, accepts the following keyword arguments:
+
+ ``msgid``
+ string, the entry msgid.
+
+ ``msgstr``
+ string, the entry msgstr.
+
+ ``msgid_plural``
+ string, the entry msgid_plural.
+
+ ``msgstr_plural``
+ list, the entry msgstr_plural lines.
+
+ ``msgctxt``
+ string, the entry context (msgctxt).
+
+ ``obsolete``
+ bool, whether the entry is "obsolete" or not.
+
+ ``encoding``
+ string, the encoding to use, defaults to ``default_encoding``
+ global variable (optional).
+ """
+ self.msgid = kwargs.get('msgid', '')
+ self.msgstr = kwargs.get('msgstr', '')
+ self.msgid_plural = kwargs.get('msgid_plural', '')
+ self.msgstr_plural = kwargs.get('msgstr_plural', {})
+ self.msgctxt = kwargs.get('msgctxt', None)
+ self.obsolete = kwargs.get('obsolete', False)
+ self.encoding = kwargs.get('encoding', default_encoding)
+
+ def __unicode__(self, wrapwidth=78):
+ """
+ Returns the unicode representation of the entry.
+ """
+ if self.obsolete:
+ delflag = '#~ '
+ else:
+ delflag = ''
+ ret = []
+ # write the msgctxt if any
+ if self.msgctxt is not None:
+ ret += self._str_field("msgctxt", delflag, "", self.msgctxt,
+ wrapwidth)
+ # write the msgid
+ ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
+ # write the msgid_plural if any
+ if self.msgid_plural:
+ ret += self._str_field("msgid_plural", delflag, "",
+ self.msgid_plural, wrapwidth)
+ if self.msgstr_plural:
+ # write the msgstr_plural if any
+ msgstrs = self.msgstr_plural
+ keys = list(msgstrs)
+ keys.sort()
+ for index in keys:
+ msgstr = msgstrs[index]
+ plural_index = '[%s]' % index
+ ret += self._str_field("msgstr", delflag, plural_index, msgstr,
+ wrapwidth)
+ else:
+ # otherwise write the msgstr
+ ret += self._str_field("msgstr", delflag, "", self.msgstr,
+ wrapwidth)
+ ret.append('')
+ ret = u('\n').join(ret)
+ return ret
+
+ if PY3:
+ def __str__(self):
+ return self.__unicode__()
+ else:
+ def __str__(self):
+ """
+ Returns the string representation of the entry.
+ """
+ return unicode(self).encode(self.encoding)
+
+ def __eq__(self, other):
+ return str(self) == str(other)
+
+ def _str_field(self, fieldname, delflag, plural_index, field,
+ wrapwidth=78):
+ lines = field.splitlines(True)
+ if len(lines) > 1:
+ lines = [''] + lines # start with initial empty line
+ else:
+ escaped_field = escape(field)
+ specialchars_count = 0
+ for c in ['\\', '\n', '\r', '\t', '"']:
+ specialchars_count += field.count(c)
+ # comparison must take into account fieldname length + one space
+ # + 2 quotes (eg. msgid "")
+ flength = len(fieldname) + 3
+ if plural_index:
+ flength += len(plural_index)
+ real_wrapwidth = wrapwidth - flength + specialchars_count
+ if wrapwidth > 0 and len(field) > real_wrapwidth:
+ # Wrap the line but take field name into account
+ lines = [''] + [unescape(item) for item in wrap(
+ escaped_field,
+ wrapwidth - 2, # 2 for quotes ""
+ drop_whitespace=False,
+ break_long_words=False
+ )]
+ else:
+ lines = [field]
+ if fieldname.startswith('previous_'):
+ # quick and dirty trick to get the real field name
+ fieldname = fieldname[9:]
+
+ ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
+ escape(lines.pop(0)))]
+ for mstr in lines:
+ #import pdb; pdb.set_trace()
+ ret.append('%s"%s"' % (delflag, escape(mstr)))
+ return ret
+# }}}
+# class POEntry {{{
+
+
+class POEntry(_BaseEntry):
+ """
+ Represents a po file entry.
+ """
+
+ def __init__(self, *args, **kwargs):
+ """
+ Constructor, accepts the following keyword arguments:
+
+ ``comment``
+ string, the entry comment.
+
+ ``tcomment``
+ string, the entry translator comment.
+
+ ``occurrences``
+ list, the entry occurrences.
+
+ ``flags``
+ list, the entry flags.
+
+ ``previous_msgctxt``
+ string, the entry previous context.
+
+ ``previous_msgid``
+ string, the entry previous msgid.
+
+ ``previous_msgid_plural``
+ string, the entry previous msgid_plural.
+
+ ``linenum``
+ integer, the line number of the entry
+ """
+ _BaseEntry.__init__(self, *args, **kwargs)
+ self.comment = kwargs.get('comment', '')
+ self.tcomment = kwargs.get('tcomment', '')
+ self.occurrences = kwargs.get('occurrences', [])
+ self.flags = kwargs.get('flags', [])
+ self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
+ self.previous_msgid = kwargs.get('previous_msgid', None)
+ self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
+ self.linenum = kwargs.get('linenum', None)
+
+ def __unicode__(self, wrapwidth=78):
+ """
+ Returns the unicode representation of the entry.
+ """
+ if self.obsolete:
+ return _BaseEntry.__unicode__(self, wrapwidth)
+
+ ret = []
+ # comments first, if any (with text wrapping as xgettext does)
+ comments = [('comment', '#. '), ('tcomment', '# ')]
+ for c in comments:
+ val = getattr(self, c[0])
+ if val:
+ for comment in val.split('\n'):
+ if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth:
+ ret += wrap(
+ comment,
+ wrapwidth,
+ initial_indent=c[1],
+ subsequent_indent=c[1],
+ break_long_words=False
+ )
+ else:
+ ret.append('%s%s' % (c[1], comment))
+
+ # occurrences (with text wrapping as xgettext does)
+ if self.occurrences:
+ filelist = []
+ for fpath, lineno in self.occurrences:
+ if lineno:
+ filelist.append('%s:%s' % (fpath, lineno))
+ else:
+ filelist.append(fpath)
+ filestr = ' '.join(filelist)
+ if wrapwidth > 0 and len(filestr) + 3 > wrapwidth:
+ # textwrap split words that contain hyphen, this is not
+ # what we want for filenames, so the dirty hack is to
+ # temporally replace hyphens with a char that a file cannot
+ # contain, like "*"
+ ret += [l.replace('*', '-') for l in wrap(
+ filestr.replace('-', '*'),
+ wrapwidth,
+ initial_indent='#: ',
+ subsequent_indent='#: ',
+ break_long_words=False
+ )]
+ else:
+ ret.append('#: ' + filestr)
+
+ # flags (TODO: wrapping ?)
+ if self.flags:
+ ret.append('#, %s' % ', '.join(self.flags))
+
+ # previous context and previous msgid/msgid_plural
+ fields = ['previous_msgctxt', 'previous_msgid',
+ 'previous_msgid_plural']
+ for f in fields:
+ val = getattr(self, f)
+ if val:
+ ret += self._str_field(f, "#| ", "", val, wrapwidth)
+
+ ret.append(_BaseEntry.__unicode__(self, wrapwidth))
+ ret = u('\n').join(ret)
+
+ assert isinstance(ret, text_type)
+ #if type(ret) != types.UnicodeType:
+ # return unicode(ret, self.encoding)
+ return ret
+
+ def __cmp__(self, other):
+ """
+ Called by comparison operations if rich comparison is not defined.
+ """
+
+ # First: Obsolete test
+ if self.obsolete != other.obsolete:
+ if self.obsolete:
+ return -1
+ else:
+ return 1
+ # Work on a copy to protect original
+ occ1 = sorted(self.occurrences[:])
+ occ2 = sorted(other.occurrences[:])
+ pos = 0
+ for entry1 in occ1:
+ try:
+ entry2 = occ2[pos]
+ except IndexError:
+ return 1
+ pos = pos + 1
+ if entry1[0] != entry2[0]:
+ if entry1[0] > entry2[0]:
+ return 1
+ else:
+ return -1
+ if entry1[1] != entry2[1]:
+ if entry1[1] > entry2[1]:
+ return 1
+ else:
+ return -1
+ # Finally: Compare message ID
+ if self.msgid > other.msgid:
+ return 1
+ elif self.msgid < other.msgid:
+ return -1
+ return 0
+
+ def __gt__(self, other):
+ return self.__cmp__(other) > 0
+
+ def __lt__(self, other):
+ return self.__cmp__(other) < 0
+
+ def __ge__(self, other):
+ return self.__cmp__(other) >= 0
+
+ def __le__(self, other):
+ return self.__cmp__(other) <= 0
+
+ def __eq__(self, other):
+ return self.__cmp__(other) == 0
+
+ def __ne__(self, other):
+ return self.__cmp__(other) != 0
+
+ def translated(self):
+ """
+ Returns ``True`` if the entry has been translated or ``False``
+ otherwise.
+ """
+ if self.obsolete or 'fuzzy' in self.flags:
+ return False
+ if self.msgstr != '':
+ return True
+ if self.msgstr_plural:
+ for pos in self.msgstr_plural:
+ if self.msgstr_plural[pos] == '':
+ return False
+ return True
+ return False
+
+ def merge(self, other):
+ """
+ Merge the current entry with the given pot entry.
+ """
+ self.msgid = other.msgid
+ self.msgctxt = other.msgctxt
+ self.occurrences = other.occurrences
+ self.comment = other.comment
+ fuzzy = 'fuzzy' in self.flags
+ self.flags = other.flags[:] # clone flags
+ if fuzzy:
+ self.flags.append('fuzzy')
+ self.msgid_plural = other.msgid_plural
+ self.obsolete = other.obsolete
+ self.previous_msgctxt = other.previous_msgctxt
+ self.previous_msgid = other.previous_msgid
+ self.previous_msgid_plural = other.previous_msgid_plural
+ if other.msgstr_plural:
+ for pos in other.msgstr_plural:
+ try:
+ # keep existing translation at pos if any
+ self.msgstr_plural[pos]
+ except KeyError:
+ self.msgstr_plural[pos] = ''
+
+ def __hash__(self):
+ return hash((self.msgid, self.msgstr))
+# }}}
+# class MOEntry {{{
+
+
+class MOEntry(_BaseEntry):
+ """
+ Represents a mo file entry.
+ """
+ def __init__(self, *args, **kwargs):
+ """
+ Constructor, accepts the following keyword arguments,
+ for consistency with :class:`~polib.POEntry`:
+
+ ``comment``
+ ``tcomment``
+ ``occurrences``
+ ``flags``
+ ``previous_msgctxt``
+ ``previous_msgid``
+ ``previous_msgid_plural``
+
+ Note: even though these keyword arguments are accepted,
+ they hold no real meaning in the context of MO files
+ and are simply ignored.
+ """
+ _BaseEntry.__init__(self, *args, **kwargs)
+ self.comment = ''
+ self.tcomment = ''
+ self.occurrences = []
+ self.flags = []
+ self.previous_msgctxt = None
+ self.previous_msgid = None
+ self.previous_msgid_plural = None
+
+ def __hash__(self):
+ return hash((self.msgid, self.msgstr))
+
+# }}}
+# class _POFileParser {{{
+
+
+class _POFileParser(object):
+ """
+ A finite state machine to parse efficiently and correctly po
+ file format.
+ """
+
+ def __init__(self, pofile, *args, **kwargs):
+ """
+ Constructor.
+
+ Keyword arguments:
+
+ ``pofile``
+ string, path to the po file or its content
+
+ ``encoding``
+ string, the encoding to use, defaults to ``default_encoding``
+ global variable (optional).
+
+ ``check_for_duplicates``
+ whether to check for duplicate entries when adding entries to the
+ file (optional, default: ``False``).
+ """
+ enc = kwargs.get('encoding', default_encoding)
+ if _is_file(pofile):
+ try:
+ self.fhandle = io.open(pofile, 'rt', encoding=enc)
+ except LookupError:
+ enc = default_encoding
+ self.fhandle = io.open(pofile, 'rt', encoding=enc)
+ else:
+ self.fhandle = pofile.splitlines()
+
+ klass = kwargs.get('klass')
+ if klass is None:
+ klass = POFile
+ self.instance = klass(
+ pofile=pofile,
+ encoding=enc,
+ check_for_duplicates=kwargs.get('check_for_duplicates', False)
+ )
+ self.transitions = {}
+ self.current_line = 0
+ self.current_entry = POEntry(linenum=self.current_line)
+ self.current_state = 'st'
+ self.current_token = None
+ # two memo flags used in handlers
+ self.msgstr_index = 0
+ self.entry_obsolete = 0
+ # Configure the state machine, by adding transitions.
+ # Signification of symbols:
+ # * ST: Beginning of the file (start)
+ # * HE: Header
+ # * TC: a translation comment
+ # * GC: a generated comment
+ # * OC: a file/line occurence
+ # * FL: a flags line
+ # * CT: a message context
+ # * PC: a previous msgctxt
+ # * PM: a previous msgid
+ # * PP: a previous msgid_plural
+ # * MI: a msgid
+ # * MP: a msgid plural
+ # * MS: a msgstr
+ # * MX: a msgstr plural
+ # * MC: a msgid or msgstr continuation line
+ all = ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'pc', 'pm', 'pp', 'tc',
+ 'ms', 'mp', 'mx', 'mi']
+
+ self.add('tc', ['st', 'he'], 'he')
+ self.add('tc', ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms',
+ 'mp', 'mx', 'mi'], 'tc')
+ self.add('gc', all, 'gc')
+ self.add('oc', all, 'oc')
+ self.add('fl', all, 'fl')
+ self.add('pc', all, 'pc')
+ self.add('pm', all, 'pm')
+ self.add('pp', all, 'pp')
+ self.add('ct', ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm',
+ 'pp', 'ms', 'mx'], 'ct')
+ self.add('mi', ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'tc', 'pc',
+ 'pm', 'pp', 'ms', 'mx'], 'mi')
+ self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'], 'mp')
+ self.add('ms', ['mi', 'mp', 'tc'], 'ms')
+ self.add('mx', ['mi', 'mx', 'mp', 'tc'], 'mx')
+ self.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc')
+
+ def parse(self):
+ """
+ Run the state machine, parse the file line by line and call process()
+ with the current matched symbol.
+ """
+
+ keywords = {
+ 'msgctxt': 'ct',
+ 'msgid': 'mi',
+ 'msgstr': 'ms',
+ 'msgid_plural': 'mp',
+ }
+ prev_keywords = {
+ 'msgid_plural': 'pp',
+ 'msgid': 'pm',
+ 'msgctxt': 'pc',
+ }
+ tokens = []
+ for line in self.fhandle:
+ self.current_line += 1
+ line = line.strip()
+ if line == '':
+ continue
+
+ tokens = line.split(None, 2)
+ nb_tokens = len(tokens)
+
+ if tokens[0] == '#~|':
+ continue
+
+ if tokens[0] == '#~' and nb_tokens > 1:
+ line = line[3:].strip()
+ tokens = tokens[1:]
+ nb_tokens -= 1
+ self.entry_obsolete = 1
+ else:
+ self.entry_obsolete = 0
+
+ # Take care of keywords like
+ # msgid, msgid_plural, msgctxt & msgstr.
+ if tokens[0] in keywords and nb_tokens > 1:
+ line = line[len(tokens[0]):].lstrip()
+ if re.search(r'([^\\]|^)"', line[1:-1]):
+ raise IOError('Syntax error in po file %s (line %s): '
+ 'unescaped double quote found' %
+ (self.instance.fpath, self.current_line))
+ self.current_token = line
+ self.process(keywords[tokens[0]])
+ continue
+
+ self.current_token = line
+
+ if tokens[0] == '#:':
+ if nb_tokens <= 1:
+ continue
+ # we are on a occurrences line
+ self.process('oc')
+
+ elif line[:1] == '"':
+ # we are on a continuation line
+ if re.search(r'([^\\]|^)"', line[1:-1]):
+ raise IOError('Syntax error in po file %s (line %s): '
+ 'unescaped double quote found' %
+ (self.instance.fpath, self.current_line))
+ self.process('mc')
+
+ elif line[:7] == 'msgstr[':
+ # we are on a msgstr plural
+ self.process('mx')
+
+ elif tokens[0] == '#,':
+ if nb_tokens <= 1:
+ continue
+ # we are on a flags line
+ self.process('fl')
+
+ elif tokens[0] == '#' or tokens[0].startswith('##'):
+ if line == '#':
+ line += ' '
+ # we are on a translator comment line
+ self.process('tc')
+
+ elif tokens[0] == '#.':
+ if nb_tokens <= 1:
+ continue
+ # we are on a generated comment line
+ self.process('gc')
+
+ elif tokens[0] == '#|':
+ if nb_tokens <= 1:
+ raise IOError('Syntax error in po file %s (line %s)' %
+ (self.instance.fpath, self.current_line))
+
+ # Remove the marker and any whitespace right after that.
+ line = line[2:].lstrip()
+ self.current_token = line
+
+ if tokens[1].startswith('"'):
+ # Continuation of previous metadata.
+ self.process('mc')
+ continue
+
+ if nb_tokens == 2:
+ # Invalid continuation line.
+ raise IOError('Syntax error in po file %s (line %s): '
+ 'invalid continuation line' %
+ (self.instance.fpath, self.current_line))
+
+ # we are on a "previous translation" comment line,
+ if tokens[1] not in prev_keywords:
+ # Unknown keyword in previous translation comment.
+ raise IOError('Syntax error in po file %s (line %s): '
+ 'unknown keyword %s' %
+ (self.instance.fpath, self.current_line,
+ tokens[1]))
+
+ # Remove the keyword and any whitespace
+ # between it and the starting quote.
+ line = line[len(tokens[1]):].lstrip()
+ self.current_token = line
+ self.process(prev_keywords[tokens[1]])
+
+ else:
+ raise IOError('Syntax error in po file %s (line %s)' %
+ (self.instance.fpath, self.current_line))
+
+ if self.current_entry and len(tokens) > 0 and \
+ not tokens[0].startswith('#'):
+ # since entries are added when another entry is found, we must add
+ # the last entry here (only if there are lines). Trailing comments
+ # are ignored
+ self.instance.append(self.current_entry)
+
+ # before returning the instance, check if there's metadata and if
+ # so extract it in a dict
+ metadataentry = self.instance.find('')
+ if metadataentry: # metadata found
+ # remove the entry
+ self.instance.remove(metadataentry)
+ self.instance.metadata_is_fuzzy = metadataentry.flags
+ key = None
+ for msg in metadataentry.msgstr.splitlines():
+ try:
+ key, val = msg.split(':', 1)
+ self.instance.metadata[key] = val.strip()
+ except (ValueError, KeyError):
+ if key is not None:
+ self.instance.metadata[key] += '\n' + msg.strip()
+ # close opened file
+ if not isinstance(self.fhandle, list): # must be file
+ self.fhandle.close()
+ return self.instance
+
+ def add(self, symbol, states, next_state):
+ """
+ Add a transition to the state machine.
+
+ Keywords arguments:
+
+ ``symbol``
+ string, the matched token (two chars symbol).
+
+ ``states``
+ list, a list of states (two chars symbols).
+
+ ``next_state``
+ the next state the fsm will have after the action.
+ """
+ for state in states:
+ action = getattr(self, 'handle_%s' % next_state)
+ self.transitions[(symbol, state)] = (action, next_state)
+
+ def process(self, symbol):
+ """
+ Process the transition corresponding to the current state and the
+ symbol provided.
+
+ Keywords arguments:
+
+ ``symbol``
+ string, the matched token (two chars symbol).
+
+ ``linenum``
+ integer, the current line number of the parsed file.
+ """
+ try:
+ (action, state) = self.transitions[(symbol, self.current_state)]
+ if action():
+ self.current_state = state
+ except Exception:
+ raise IOError('Syntax error in po file (line %s)' %
+ self.current_line)
+
+ # state handlers
+
+ def handle_he(self):
+ """Handle a header comment."""
+ if self.instance.header != '':
+ self.instance.header += '\n'
+ self.instance.header += self.current_token[2:]
+ return 1
+
+ def handle_tc(self):
+ """Handle a translator comment."""
+ if self.current_state in ['mc', 'ms', 'mx']:
+ self.instance.append(self.current_entry)
+ self.current_entry = POEntry(linenum=self.current_line)
+ if self.current_entry.tcomment != '':
+ self.current_entry.tcomment += '\n'
+ tcomment = self.current_token.lstrip('#')
+ if tcomment.startswith(' '):
+ tcomment = tcomment[1:]
+ self.current_entry.tcomment += tcomment
+ return True
+
+ def handle_gc(self):
+ """Handle a generated comment."""
+ if self.current_state in ['mc', 'ms', 'mx']:
+ self.instance.append(self.current_entry)
+ self.current_entry = POEntry(linenum=self.current_line)
+ if self.current_entry.comment != '':
+ self.current_entry.comment += '\n'
+ self.current_entry.comment += self.current_token[3:]
+ return True
+
+ def handle_oc(self):
+ """Handle a file:num occurence."""
+ if self.current_state in ['mc', 'ms', 'mx']:
+ self.instance.append(self.current_entry)
+ self.current_entry = POEntry(linenum=self.current_line)
+ occurrences = self.current_token[3:].split()
+ for occurrence in occurrences:
+ if occurrence != '':
+ try:
+ fil, line = occurrence.split(':')
+ if not line.isdigit():
+ fil = fil + line
+ line = ''
+ self.current_entry.occurrences.append((fil, line))
+ except (ValueError, AttributeError):
+ self.current_entry.occurrences.append((occurrence, ''))
+ return True
+
+ def handle_fl(self):
+ """Handle a flags line."""
+ if self.current_state in ['mc', 'ms', 'mx']:
+ self.instance.append(self.current_entry)
+ self.current_entry = POEntry(linenum=self.current_line)
+ self.current_entry.flags += [c.strip() for c in
+ self.current_token[3:].split(',')]
+ return True
+
+ def handle_pp(self):
+ """Handle a previous msgid_plural line."""
+ if self.current_state in ['mc', 'ms', 'mx']:
+ self.instance.append(self.current_entry)
+ self.current_entry = POEntry(linenum=self.current_line)
+ self.current_entry.previous_msgid_plural = \
+ unescape(self.current_token[1:-1])
+ return True
+
+ def handle_pm(self):
+ """Handle a previous msgid line."""
+ if self.current_state in ['mc', 'ms', 'mx']:
+ self.instance.append(self.current_entry)
+ self.current_entry = POEntry(linenum=self.current_line)
+ self.current_entry.previous_msgid = \
+ unescape(self.current_token[1:-1])
+ return True
+
+ def handle_pc(self):
+ """Handle a previous msgctxt line."""
+ if self.current_state in ['mc', 'ms', 'mx']:
+ self.instance.append(self.current_entry)
+ self.current_entry = POEntry(linenum=self.current_line)
+ self.current_entry.previous_msgctxt = \
+ unescape(self.current_token[1:-1])
+ return True
+
+ def handle_ct(self):
+ """Handle a msgctxt."""
+ if self.current_state in ['mc', 'ms', 'mx']:
+ self.instance.append(self.current_entry)
+ self.current_entry = POEntry(linenum=self.current_line)
+ self.current_entry.msgctxt = unescape(self.current_token[1:-1])
+ return True
+
+ def handle_mi(self):
+ """Handle a msgid."""
+ if self.current_state in ['mc', 'ms', 'mx']:
+ self.instance.append(self.current_entry)
+ self.current_entry = POEntry(linenum=self.current_line)
+ self.current_entry.obsolete = self.entry_obsolete
+ self.current_entry.msgid = unescape(self.current_token[1:-1])
+ return True
+
+ def handle_mp(self):
+ """Handle a msgid plural."""
+ self.current_entry.msgid_plural = unescape(self.current_token[1:-1])
+ return True
+
+ def handle_ms(self):
+ """Handle a msgstr."""
+ self.current_entry.msgstr = unescape(self.current_token[1:-1])
+ return True
+
+ def handle_mx(self):
+ """Handle a msgstr plural."""
+ index, value = self.current_token[7], self.current_token[11:-1]
+ self.current_entry.msgstr_plural[int(index)] = unescape(value)
+ self.msgstr_index = int(index)
+ return True
+
+ def handle_mc(self):
+ """Handle a msgid or msgstr continuation line."""
+ token = unescape(self.current_token[1:-1])
+ if self.current_state == 'ct':
+ self.current_entry.msgctxt += token
+ elif self.current_state == 'mi':
+ self.current_entry.msgid += token
+ elif self.current_state == 'mp':
+ self.current_entry.msgid_plural += token
+ elif self.current_state == 'ms':
+ self.current_entry.msgstr += token
+ elif self.current_state == 'mx':
+ self.current_entry.msgstr_plural[self.msgstr_index] += token
+ elif self.current_state == 'pp':
+ self.current_entry.previous_msgid_plural += token
+ elif self.current_state == 'pm':
+ self.current_entry.previous_msgid += token
+ elif self.current_state == 'pc':
+ self.current_entry.previous_msgctxt += token
+ # don't change the current state
+ return False
+# }}}
+# class _MOFileParser {{{
+
+
+class _MOFileParser(object):
+ """
+ A class to parse binary mo files.
+ """
+
+ def __init__(self, mofile, *args, **kwargs):
+ """
+ Constructor.
+
+ Keyword arguments:
+
+ ``mofile``
+ string, path to the mo file or its content
+
+ ``encoding``
+ string, the encoding to use, defaults to ``default_encoding``
+ global variable (optional).
+
+ ``check_for_duplicates``
+ whether to check for duplicate entries when adding entries to the
+ file (optional, default: ``False``).
+ """
+ self.fhandle = open(mofile, 'rb')
+
+ klass = kwargs.get('klass')
+ if klass is None:
+ klass = MOFile
+ self.instance = klass(
+ fpath=mofile,
+ encoding=kwargs.get('encoding', default_encoding),
+ check_for_duplicates=kwargs.get('check_for_duplicates', False)
+ )
+
+ def __del__(self):
+ """
+ Make sure the file is closed, this prevents warnings on unclosed file
+ when running tests with python >= 3.2.
+ """
+ if self.fhandle:
+ self.fhandle.close()
+
+ def parse(self):
+ """
+ Build the instance with the file handle provided in the
+ constructor.
+ """
+ # parse magic number
+ magic_number = self._readbinary(' 1:
+ entry = self._build_entry(
+ msgid=msgid_tokens[0],
+ msgid_plural=msgid_tokens[1],
+ msgstr_plural=dict((k, v) for k, v in
+ enumerate(msgstr.split(b('\0'))))
+ )
+ else:
+ entry = self._build_entry(msgid=msgid, msgstr=msgstr)
+ self.instance.append(entry)
+ # close opened file
+ self.fhandle.close()
+ return self.instance
+
+ def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
+ msgstr_plural=None):
+ msgctxt_msgid = msgid.split(b('\x04'))
+ encoding = self.instance.encoding
+ if len(msgctxt_msgid) > 1:
+ kwargs = {
+ 'msgctxt': msgctxt_msgid[0].decode(encoding),
+ 'msgid': msgctxt_msgid[1].decode(encoding),
+ }
+ else:
+ kwargs = {'msgid': msgid.decode(encoding)}
+ if msgstr:
+ kwargs['msgstr'] = msgstr.decode(encoding)
+ if msgid_plural:
+ kwargs['msgid_plural'] = msgid_plural.decode(encoding)
+ if msgstr_plural:
+ for k in msgstr_plural:
+ msgstr_plural[k] = msgstr_plural[k].decode(encoding)
+ kwargs['msgstr_plural'] = msgstr_plural
+ return MOEntry(**kwargs)
+
+ def _readbinary(self, fmt, numbytes):
+ """
+ Private method that unpack n bytes of data using format .
+ It returns a tuple or a mixed value if the tuple length is 1.
+ """
+ bytes = self.fhandle.read(numbytes)
+ tup = struct.unpack(fmt, bytes)
+ if len(tup) == 1:
+ return tup[0]
+ return tup
+# }}}
+# class TextWrapper {{{
+
+
+class TextWrapper(textwrap.TextWrapper):
+ """
+ Subclass of textwrap.TextWrapper that backport the
+ drop_whitespace option.
+ """
+ def __init__(self, *args, **kwargs):
+ drop_whitespace = kwargs.pop('drop_whitespace', True)
+ textwrap.TextWrapper.__init__(self, *args, **kwargs)
+ self.drop_whitespace = drop_whitespace
+
+ def _wrap_chunks(self, chunks):
+ """_wrap_chunks(chunks : [string]) -> [string]
+
+ Wrap a sequence of text chunks and return a list of lines of
+ length 'self.width' or less. (If 'break_long_words' is false,
+ some lines may be longer than this.) Chunks correspond roughly
+ to words and the whitespace between them: each chunk is
+ indivisible (modulo 'break_long_words'), but a line break can
+ come between any two chunks. Chunks should not have internal
+ whitespace; ie. a chunk is either all whitespace or a "word".
+ Whitespace chunks will be removed from the beginning and end of
+ lines, but apart from that whitespace is preserved.
+ """
+ lines = []
+ if self.width <= 0:
+ raise ValueError("invalid width %r (must be > 0)" % self.width)
+
+ # Arrange in reverse order so items can be efficiently popped
+ # from a stack of chucks.
+ chunks.reverse()
+
+ while chunks:
+
+ # Start the list of chunks that will make up the current line.
+ # cur_len is just the length of all the chunks in cur_line.
+ cur_line = []
+ cur_len = 0
+
+ # Figure out which static string will prefix this line.
+ if lines:
+ indent = self.subsequent_indent
+ else:
+ indent = self.initial_indent
+
+ # Maximum width for this line.
+ width = self.width - len(indent)
+
+ # First chunk on line is whitespace -- drop it, unless this
+ # is the very beginning of the text (ie. no lines started yet).
+ if self.drop_whitespace and chunks[-1].strip() == '' and lines:
+ del chunks[-1]
+
+ while chunks:
+ l = len(chunks[-1])
+
+ # Can at least squeeze this chunk onto the current line.
+ if cur_len + l <= width:
+ cur_line.append(chunks.pop())
+ cur_len += l
+
+ # Nope, this line is full.
+ else:
+ break
+
+ # The current line is full, and the next chunk is too big to
+ # fit on *any* line (not just this one).
+ if chunks and len(chunks[-1]) > width:
+ self._handle_long_word(chunks, cur_line, cur_len, width)
+
+ # If the last chunk on this line is all whitespace, drop it.
+ if self.drop_whitespace and cur_line and not cur_line[-1].strip():
+ del cur_line[-1]
+
+ # Convert current line back to a string and store it in list
+ # of all lines (return value).
+ if cur_line:
+ lines.append(indent + ''.join(cur_line))
+
+ return lines
+# }}}
+# function wrap() {{{
+
+
+def wrap(text, width=70, **kwargs):
+ """
+ Wrap a single paragraph of text, returning a list of wrapped lines.
+ """
+ if sys.version_info < (2, 6):
+ return TextWrapper(width=width, **kwargs).wrap(text)
+ return textwrap.wrap(text, width=width, **kwargs)
+
+# }}}
diff --git a/i18n-tools/scripts/process_translations.py b/i18n-tools/scripts/process_translations.py
new file mode 100755
index 00000000..984e3ba8
--- /dev/null
+++ b/i18n-tools/scripts/process_translations.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python
+
+import argparse
+import os
+import sys
+
+from common import create_template_and_language_files, nice_mkdir
+from translate_help import create_help_translation_jobs
+from translate_level import create_level_translation_jobs
+from translate_chaptertitles import create_chaptertitles_translation_jobs
+
+def parse_args():
+ parser = argparse.ArgumentParser(description = 'Generate translations of Colobot data files')
+
+ parser.add_argument('--mode',
+ choices = ['generate', 'print_files'],
+ required = True,
+ help = 'Mode of operation: run generation process or only print input and output files')
+ parser.add_argument('--type',
+ choices = ['help', 'level', 'chaptertitles'],
+ required = True,
+ help = 'Type of translation: help file or level file')
+ parser.add_argument('--input_dir',
+ required = True,
+ help = 'Input file(s) or directory to translate')
+ parser.add_argument('--po_dir',
+ required = True,
+ help = 'Translations directory (with *.pot and *.po files)')
+ parser.add_argument('--output_dir',
+ help = 'Output directory for translated files')
+ parser.add_argument('--output_subdir',
+ help = 'Install subdirectory (only for help files)')
+ parser.add_argument('--signal_file',
+ help = 'Signal file to indicate successful operation')
+
+ return parser.parse_args()
+
+def preprocess_args(args):
+ if not os.path.isdir(args.input_dir):
+ sys.stderr.write('Expected existing input directory!\n')
+ sys.exit(1)
+
+ if not os.path.isdir(args.po_dir):
+ sys.stderr.write('Expected existing translations directory!\n')
+ sys.exit(1)
+
+ if args.output_dir:
+ nice_mkdir(args.output_dir)
+
+def create_translation_jobs(args, template_file, language_files):
+ translation_jobs = []
+
+ if args.type == 'help':
+ translation_jobs = create_help_translation_jobs(args.input_dir,
+ args.output_dir,
+ args.output_subdir,
+ template_file,
+ language_files)
+ elif args.type == 'level':
+ translation_jobs = create_level_translation_jobs(args.input_dir,
+ args.output_dir,
+ template_file,
+ language_files)
+ elif args.type == 'chaptertitles':
+ translation_jobs = create_chaptertitles_translation_jobs(args.input_dir,
+ args.output_dir,
+ template_file,
+ language_files)
+
+ return translation_jobs
+
+def print_files(translation_jobs):
+ input_files = []
+ output_files = []
+ for translation_job in translation_jobs:
+ input_files.append(translation_job.get_input_file_name())
+ output_files.append(translation_job.get_output_file_name())
+
+ sys.stdout.write(';'.join(input_files))
+ sys.stdout.write('\n')
+ sys.stdout.write(';'.join(output_files))
+
+def generate_translations(translation_jobs, template_file, language_files):
+ for translation_job in translation_jobs:
+ translation_job.run()
+
+ template_file.merge_and_save()
+ for language_file in language_files:
+ language_file.merge_and_save(template_file)
+
+def save_signalfile(signal_file_name):
+ if signal_file_name:
+ nice_mkdir(os.path.dirname(signal_file_name))
+ with open(signal_file_name, 'w') as signal_file:
+ signal_file.close()
+
+def main():
+ args = parse_args()
+ preprocess_args(args)
+
+ (template_file, language_files) = create_template_and_language_files(args.po_dir)
+ translation_jobs = create_translation_jobs(args, template_file, language_files)
+
+ if args.mode == 'print_files':
+ print_files(translation_jobs)
+
+ elif args.mode == 'generate':
+ generate_translations(translation_jobs, template_file, language_files)
+
+ save_signalfile(args.signal_file)
+
+if __name__ == '__main__':
+ main()
diff --git a/i18n-tools/scripts/run_po4a.sh b/i18n-tools/scripts/run_po4a.sh
deleted file mode 100755
index c6babea9..00000000
--- a/i18n-tools/scripts/run_po4a.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/bash
-
-##
-# Script to execute PO4A with proper enviroment and commandline options
-#
-# The arguments are config file which is assumed to be already present and
-# optional dummy signal file which is used by build system
-##
-
-# stop on errors
-set -e
-
-if [ $# -ne 1 -a $# -ne 2 ]; then
- echo "Invalid arguments!" >&2
- echo "Usage: $0 po4a_config_file [po4a_signalfile]" >&2
- exit 1
-fi
-
-PO4A_CONFIG_FILE="$1"
-PO4A_SIGNALFILE="$2"
-
-# get the directory where the script is in
-SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
-# run po4a
-if [ -n "$VERBOSE" ]; then
- verbosity="-v"
-else
- verbosity="-q"
-fi
-PERL5LIB="${SCRIPT_DIR}/perllib${PERL5LIB+:}$PERL5LIB" po4a -k0 $verbosity -f --msgmerge-opt=--no-wrap --master-charset=UTF-8 --localized-charset=UTF-8 "$PO4A_CONFIG_FILE"
-
-# if applicable, touch dummy signal file to indicate success
-if [ -n "$PO4A_SIGNALFILE" ]; then
- touch "$PO4A_SIGNALFILE"
-fi
diff --git a/i18n-tools/scripts/translate_chaptertitles.py b/i18n-tools/scripts/translate_chaptertitles.py
new file mode 100644
index 00000000..2becf40d
--- /dev/null
+++ b/i18n-tools/scripts/translate_chaptertitles.py
@@ -0,0 +1,42 @@
+import os
+
+from translate_level import LevelTranslationJob
+from common import nice_mkdir, nice_path_join
+
+"""
+ Create jobs for chaptertile files translation
+
+ Assumes that input_dir has structure like so:
+ ${input_dir}/dir1/chaptertitle.txt
+ ...
+ ${input_dir}/dirN/chaptertitle.txt
+
+ The output files will be saved in:
+ ${input_dir}/dir1/chaptertitle.txt
+ ...
+ ${input_dir}/dirN/chaptertitle.txt
+
+ The actual translation is done using the same jobs as level files
+"""
+def create_chaptertitles_translation_jobs(input_dir, output_dir, template_file, language_files):
+ translation_jobs = []
+
+ for subdirectory in sorted(os.listdir(input_dir)):
+ input_subdirectory = os.path.join(input_dir, subdirectory)
+ if not os.path.isdir(input_subdirectory):
+ continue
+
+ input_file = os.path.join(input_subdirectory, 'chaptertitle.txt')
+ if not os.path.isfile(input_file):
+ continue
+
+ output_subdirectory = nice_path_join(output_dir, subdirectory)
+ nice_mkdir(output_subdirectory)
+
+ translation_jobs.append(LevelTranslationJob(
+ input_file = input_file,
+ output_file = nice_path_join(output_subdirectory, 'chaptertitle.txt'),
+ template_file = template_file,
+ language_files = language_files))
+
+ return translation_jobs
diff --git a/i18n-tools/scripts/translate_help.py b/i18n-tools/scripts/translate_help.py
new file mode 100644
index 00000000..22af47c5
--- /dev/null
+++ b/i18n-tools/scripts/translate_help.py
@@ -0,0 +1,229 @@
+import os
+import re
+
+from common import TranslationJob, nice_mkdir, nice_path_join
+
+"""
+ Types of input lines
+"""
+TYPE_WHITESPACE = 1 # whitespace only
+TYPE_HEADER = 2 # header (beginning with \b or \t)
+TYPE_BULLET = 3 # bullet point
+TYPE_IMAGE = 4 # image (beginning with \image)
+TYPE_CODE = 5 # code (beginning with \s;)
+TYPE_PLAIN = 6 # plain text
+
+class HelpTranslationJob(TranslationJob):
+ def __init__(self, **kwargs):
+ TranslationJob.__init__(self, **kwargs)
+ self.template_file = kwargs['template_file']
+ self.language_file = kwargs['language_file']
+ self.line_buffer = None
+
+ def process_file(self):
+ while True:
+ (paragraph, paragraph_type) = self.read_input_paragraph()
+ if not paragraph:
+ break
+
+ if paragraph_type == TYPE_WHITESPACE:
+ self.process_whitespace(paragraph[0])
+ elif paragraph_type == TYPE_HEADER:
+ self.process_header(paragraph[0])
+ elif paragraph_type == TYPE_BULLET:
+ self.process_bullet(paragraph[0])
+ elif paragraph_type == TYPE_IMAGE:
+ self.process_image(paragraph[0])
+ elif paragraph_type == TYPE_CODE:
+ self.process_code(paragraph)
+ elif paragraph_type == TYPE_PLAIN:
+ self.process_plain(paragraph)
+
+ """
+ Read one or more lines of input with same line type and return the list as paragraph
+ Exception is types which are processed as single lines, giving only paragraph with one line
+ """
+ def read_input_paragraph(self):
+ paragraph = None
+ paragraph_type = None
+ while True:
+ line = None
+ line_type = None
+ if self.line_buffer:
+ (line, line_type) = self.line_buffer
+ self.line_buffer = None
+ else:
+ line = self.read_input_line()
+ if line:
+ line_type = self.check_line_type(line.text)
+
+ if not line:
+ break
+
+ if not paragraph_type:
+ paragraph_type = line_type
+
+ if paragraph_type == line_type:
+ if not paragraph:
+ paragraph = []
+ paragraph.append(line)
+ else:
+ self.line_buffer = (line, line_type)
+ break
+
+ if line_type in [TYPE_WHITESPACE, TYPE_HEADER, TYPE_BULLET, TYPE_IMAGE]:
+ break
+
+ return (paragraph, paragraph_type)
+
+ def check_line_type(self, line):
+ if re.match(r'^\s*$', line) or re.match(r'^\\[nctr];$', line):
+ return TYPE_WHITESPACE
+ elif re.match(r'^\\[bt];', line):
+ return TYPE_HEADER
+ elif re.match(r'^\s*([0-9]\)|[o-])', line):
+ return TYPE_BULLET
+ elif re.match(r'^\\image.*;$', line):
+ return TYPE_IMAGE
+ elif re.match(r'^\\s;', line):
+ return TYPE_CODE
+ else:
+ return TYPE_PLAIN
+
+ def process_whitespace(self, line):
+ self.write_output_line(line.text)
+
+ def process_header(self, line):
+ match = re.match(r'^(\\[bt];)(.*)', line.text)
+ header_type = match.group(1)
+ header_text = match.group(2)
+ translated_header_text = self.translate_text(header_text, line.occurrence, header_type + ' header')
+ self.write_output_line(header_type + translated_header_text)
+
+ def process_bullet(self, line):
+ match = re.match(r'^(\s*)([0-9]\)|[o-])(\s*)(.*)', line.text)
+ spacing_before_bullet = match.group(1)
+ bullet_point = match.group(2)
+ spacing_after_bullet = match.group(3)
+ text = match.group(4)
+ translated_text = self.translate_text(
+ text, line.occurrence, "Bullet: '{0}'".format(bullet_point))
+ self.write_output_line(spacing_before_bullet + bullet_point + spacing_after_bullet + translated_text)
+
+ def process_image(self, line):
+ match = re.match(r'^(\\image )(.*)( \d* \d*;)$', line.text)
+ image_command = match.group(1)
+ image_source = match.group(2)
+ image_coords = match.group(3)
+ translated_image_source = self.translate_text(image_source, line.occurrence, 'Image filename')
+ self.write_output_line(image_command + translated_image_source + image_coords)
+
+ def process_code(self, paragraph):
+ text_lines = []
+ for line in paragraph:
+ match = re.match(r'^\\s;(.*)', line.text)
+ code_line = match.group(1)
+ text_lines.append(code_line)
+
+ joined_text_lines = '\n'.join(text_lines)
+ translated_text_lines = self.translate_text(joined_text_lines, paragraph[0].occurrence, 'Source code')
+ for line in translated_text_lines.split('\n'):
+ self.write_output_line(r'\s;' + line)
+
+ def process_plain(self, paragraph):
+ text_lines = []
+ for line in paragraph:
+ text_lines.append(line.text)
+
+ joined_text_lines = '\n'.join(text_lines)
+ translated_text_lines = self.translate_text(joined_text_lines, paragraph[0].occurrence, 'Plain text')
+ for line in translated_text_lines.split('\n'):
+ self.write_output_line(line)
+
+ def translate_text(self, text, occurrence, type_comment):
+ converted_text = convert_escape_syntax_to_tag_syntax(text)
+ self.template_file.insert_entry(converted_text, occurrence, type_comment)
+
+ if not self.language_file:
+ return text
+
+ translated_text = self.language_file.translate(converted_text)
+ return convert_tag_syntax_to_escape_syntax(translated_text)
+
+def convert_escape_syntax_to_tag_syntax(text):
+ # Replace \button $id; as pseudo xHTML tags
+ text = re.sub(r'\\(button|key) ([^;]*?);', r'<\1 \2/>', text)
+ # Put \const;Code\norm; sequences into pseudo-HTML tags
+ text = re.sub(r'\\(const|type|token|key);([^\\;]*?)\\norm;', r'\2', text)
+ # Transform CBot links \l;text\u target; into pseudo-HTML text
+ text = re.sub(r'\\l;(.*?)\\u (.*?);', r'\1', text)
+ # Cleanup pseudo-html targets separated by \\ to have a single character |
+ text = re.sub(r'', r'', text)
+ # Replace remnants of \const; \type; \token, \norm; or \key; as pseudo xHTML tags
+ text = re.sub(r'\\(const|type|token|norm|key);', r'<\1/>', text)
+ # Put \c;Code\n; sequences into pseudo-HTML tags
+ text = re.sub(r'\\c;([^\\;]*?)\\n;', r'\1
', text)
+ # Replace remnants of \s; \c; \b; or \n; as pseudo xHTML tags
+ text = re.sub(r'\\([scbn]);', r'<\1/>', text)
+ return text
+
+def convert_tag_syntax_to_escape_syntax(text):
+ # Invert the replace remnants of \s; \c; \b; or \n; as pseudo xHTML tags
+ text = re.sub(r'<([scbn])/>', r'\\\1;', text)
+ # Invert the put of \c;Code\n; sequences into pseudo-HTML tags
+ text = re.sub(r'([^\\;]*?)
', r'\\c;\1\\n;', text)
+ # Invert the replace remnants of \const; \type; \token or \norm; as pseudo xHTML tags
+ text = re.sub(r'<(const|type|token|norm|key)/>', r'\\\1;', text)
+ # Invert the cleanup of pseudo-html targets separated by \\ to have a single character |
+ text = re.sub(r'', r'', text)
+ # Invert the transform of CBot links \l;text\u target; into pseudo-HTML text
+ text = re.sub(r'(.*?)', r'\\l;\2\\u \1;', text)
+ # Invert the put \const;Code\norm; sequences into pseudo-HTML tags
+ text = re.sub(r'([^\\;]*?)', r'\\\1;\2\\norm;', text)
+ # Invert the replace of \button $id; as pseudo xHTML tags
+ text = re.sub(r'<(button|key) (.*?)/>', r'\\\1 \2;', text)
+ return text
+
+"""
+ Create jobs for help translation
+
+ Assumes that input_dir has structure like so:
+ ${input_dir}/E/help_file1.txt
+ ...
+ ${input_dir}/E/help_fileN.txt
+
+ The output files will be saved in:
+ ${output_dir}/${language_char1}/${install_subdir}/help_file1.txt
+ ...
+ ${output_dir}/${language_charM}/${install_subdir}/help_fileN.txt
+"""
+def create_help_translation_jobs(input_dir, output_dir, install_subdir, template_file, language_files):
+ translation_jobs = []
+
+ e_dir = os.path.join(input_dir, 'E')
+ input_files = sorted(os.listdir(e_dir))
+
+ if not install_subdir:
+ install_subdir = ''
+
+ language_files_list = []
+ if len(language_files) > 0:
+ language_files_list = language_files
+ else:
+ # We need at least one dummy language file to create any jobs
+ language_files_list = [None]
+
+ for language_file in language_files_list:
+ output_translation_dir = None
+ if language_file:
+ output_translation_dir = nice_path_join(output_dir, language_file.language_char(), install_subdir)
+ nice_mkdir(output_translation_dir)
+
+ for input_file in input_files:
+ translation_jobs.append(HelpTranslationJob(
+ input_file = os.path.join(e_dir, input_file),
+ output_file = nice_path_join(output_translation_dir, input_file),
+ template_file = template_file,
+ language_file = language_file))
+
+ return translation_jobs
diff --git a/i18n-tools/scripts/translate_level.py b/i18n-tools/scripts/translate_level.py
new file mode 100644
index 00000000..5ec03066
--- /dev/null
+++ b/i18n-tools/scripts/translate_level.py
@@ -0,0 +1,98 @@
+import os
+import re
+
+from translate_help import HelpTranslationJob
+from common import TranslationJob, nice_mkdir, nice_path_join
+
+class LevelTranslationJob(TranslationJob):
+ def __init__(self, **kwargs):
+ TranslationJob.__init__(self, **kwargs)
+ self.template_file = kwargs['template_file']
+ self.language_files = kwargs['language_files']
+ self.line_buffer = None
+
+ def process_file(self):
+ while True:
+ line = self.read_input_line()
+ if not line:
+ break
+
+ # English version is always written
+ self.write_output_line(line.text)
+
+ match = re.match(r'^(Title|Resume|ScriptName)\.E(.*)', line.text)
+ if match:
+ for language_file in self.language_files:
+ self.add_translated_line(match, line.occurrence, language_file)
+
+ def add_translated_line(self, command_match, occurrence, language_file):
+ command = command_match.group(1)
+ arguments = command_match.group(2)
+
+ translated_arguments = arguments
+ for attribute_match in re.finditer('(text|resume)="([^"]*)"', arguments):
+ whole_attribute_match = attribute_match.group(0)
+ attribute = attribute_match.group(1)
+ text = attribute_match.group(2)
+
+ self.template_file.insert_entry(text, occurrence, command + '-' + attribute)
+
+ translated_arguments = translated_arguments.replace(
+ whole_attribute_match,
+ u'{0}="{1}"'.format(attribute, language_file.translate(text)))
+
+ self.write_output_line(u'{0}.{1}{2}'.format(
+ command,
+ language_file.language_char(),
+ translated_arguments))
+
+
+"""
+ Create jobs for chapter translation
+
+ Assumes that input_dir has structure like so:
+ ${input_dir}/scene.txt
+ ${input_dir}/help/help_file1.txt
+ ...
+ ${input_dir}/help/help_fileN.txt
+
+ The output files will be saved in:
+ ${output_dir}/scene.txt
+ ${output_dir}/help/help_file1.${language_char1}.txt
+ ...
+ ${output_dir}/help/help_fileN.${language_charM}.txt
+"""
+def create_level_translation_jobs(input_dir, output_dir, template_file, language_files):
+ translation_jobs = []
+
+ input_file = os.path.join(input_dir, 'scene.txt')
+ translation_jobs.append(LevelTranslationJob(
+ input_file = input_file,
+ output_file = nice_path_join(output_dir, 'scene.txt'),
+ template_file = template_file,
+ language_files = language_files))
+
+ input_help_dir = os.path.join(input_dir, 'help')
+ if os.path.isdir(input_help_dir):
+ output_help_dir = nice_path_join(output_dir, 'help')
+ nice_mkdir(output_help_dir)
+
+ language_files_list = []
+ if len(language_files) > 0:
+ language_files_list = language_files
+ else:
+ # We need at least one dummy language file to create any jobs
+ language_files_list = [None]
+
+ for language_file in language_files_list:
+ for help_file in sorted(os.listdir(input_help_dir)):
+ if language_file:
+ translated_help_file = help_file.replace('.E.txt', '.{0}.txt'.format(language_file.language_char()))
+
+ translation_jobs.append(HelpTranslationJob(
+ input_file = os.path.join(input_help_dir, help_file),
+ output_file = nice_path_join(output_help_dir, translated_help_file),
+ template_file = template_file,
+ language_file = language_file))
+
+ return translation_jobs
diff --git a/levels/CMakeLists.txt b/levels/CMakeLists.txt
index 6a094ba5..c5aa9802 100644
--- a/levels/CMakeLists.txt
+++ b/levels/CMakeLists.txt
@@ -1,7 +1,6 @@
cmake_minimum_required(VERSION 2.8)
-include(../i18n-tools/CommonI18N.cmake)
-include(../i18n-tools/LevelsI18N.cmake)
+include(../i18n-tools/I18NTools.cmake)
set(LEVEL_INSTALL_DATA_DIR ${COLOBOT_INSTALL_DATA_DIR}/levels)
@@ -9,63 +8,65 @@ set(LEVEL_INSTALL_DATA_DIR ${COLOBOT_INSTALL_DATA_DIR}/levels)
# Add level category directory with all chapters inside
##
function(add_level_category level_category_dir)
- file(GLOB chaptertitle_files RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}/${level_category_dir} ${level_category_dir}/chapter*/chaptertitle.txt)
- list(SORT chaptertitle_files)
- if(PO4A AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${level_category_dir}/po/)
- generate_chaptertitles_i18n(translated_chaptertitle_files
- ${level_category_dir}
- "${chaptertitle_files}"
- ${level_category_dir}/po
- ${DATA_BINARY_DIR}/levels-po/${level_category_dir})
- else()
- file(GLOB translated_chaptertitle_files ${level_category_dir}/chapter*/chaptertitle.txt)
+
+ message(STATUS "Adding translation targets for level/${level_category_dir}")
+
+ # Without Python, just install all files as they are
+ if(NOT PYTHONINTERP_FOUND)
+ install(DIRECTORY ${add_level_category} DESTINATION ${LEVEL_INSTALL_DATA_DIR})
+ return()
endif()
- file(GLOB chapter_dirs RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${level_category_dir}/chapter*)
- list(SORT chapter_dirs)
- list(LENGTH chapter_dirs chapter_dirs_count)
- math(EXPR iterate_range "${chapter_dirs_count} - 1")
- foreach(index RANGE ${iterate_range})
- list(GET chapter_dirs ${index} chapter_dir)
- list(GET translated_chaptertitle_files ${index} translated_chaptertitle_file)
- install(FILES ${translated_chaptertitle_file} DESTINATION ${LEVEL_INSTALL_DATA_DIR}/${chapter_dir})
- endforeach()
+ add_chaptertitles(${level_category_dir})
- file(GLOB chapter_dirs RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${level_category_dir}/chapter*)
- foreach(chapter_dir ${chapter_dirs})
- add_chapter(${chapter_dir})
- endforeach()
-endfunction()
-
-##
-# Add chapter directory with all levels inside
-##
-function(add_chapter chapter_dir)
- file(GLOB level_dirs RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${chapter_dir}/level*)
+ file(GLOB level_dirs RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${level_category_dir}/chapter*/level*)
foreach(level_dir ${level_dirs})
add_level(${level_dir})
endforeach()
+
+endfunction()
+
+##
+# Add chaptertitles
+##
+function(add_chaptertitles level_category_dir)
+
+ set(work_dir ${DATA_BINARY_DIR}/levels-po/${level_category_dir})
+ generate_translations(translated_chaptertitle_files
+ "chaptertitles"
+ ${CMAKE_CURRENT_SOURCE_DIR}
+ ${level_category_dir}
+ ${level_category_dir}/po
+ ${work_dir}
+ "")
+
+ install_preserving_relative_paths("${translated_chaptertitle_files}"
+ ${work_dir}
+ ${LEVEL_INSTALL_DATA_DIR}/${level_category_dir})
+
endfunction()
##
# Add level directory
##
function(add_level level_dir)
- file(GLOB original_help_files ${level_dir}/help/*.txt)
- list(SORT original_help_files)
- if(PO4A AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${level_dir}/po/)
- generate_level_i18n(translated_level_file
- translated_help_files
- ${level_dir}/scene.txt
- "${original_help_files}"
- ${level_dir}/po
- ${DATA_BINARY_DIR}/levels-po/${level_dir})
- else()
- set(translated_level_file ${level_dir}/scene.txt)
- endif()
- install(FILES ${translated_level_file} DESTINATION ${LEVEL_INSTALL_DATA_DIR}/${level_dir})
- install(FILES ${original_help_files} DESTINATION ${LEVEL_INSTALL_DATA_DIR}/${level_dir}/help)
- install(FILES ${translated_help_files} DESTINATION ${LEVEL_INSTALL_DATA_DIR}/${level_dir}/help)
+
+ set(work_dir ${DATA_BINARY_DIR}/levels-po/${level_dir})
+ generate_translations(translated_level_files
+ "level"
+ ${CMAKE_CURRENT_SOURCE_DIR}
+ ${level_dir}
+ ${level_dir}/po
+ ${work_dir}
+ "")
+
+ file(GLOB english_help_files ${level_dir}/help/*)
+ install(FILES ${english_help_files} DESTINATION ${LEVEL_INSTALL_DATA_DIR}/${level_dir}/help)
+
+ install_preserving_relative_paths("${translated_level_files}"
+ ${work_dir}
+ ${LEVEL_INSTALL_DATA_DIR}/${level_dir})
+
endfunction()