diff --git a/localization/preprocess.py b/localization/preprocess.py index 06a2269..b0f8ffe 100644 --- a/localization/preprocess.py +++ b/localization/preprocess.py @@ -2,52 +2,52 @@ import os import re import glob import json +from typing import Generator, List, Dict, Any from parse_rc_file import parse_rc_file -base_lang = "en" -available_langs = [dir for dir in os.listdir(os.path.dirname(__file__)) if re.match(r"^\w+(-\w+)?$", dir)] -target_langs = [lang for lang in available_langs if lang != base_lang] +base_lang: str = "en" +available_langs: List[str] = [dir for dir in os.listdir(os.path.dirname(__file__)) if re.match(r"^\w+(-\w+)?$", dir)] +target_langs: List[str] = [lang for lang in available_langs if lang != base_lang] print("Target languages:", target_langs) # & defines accelerators (hotkeys) in menus and buttons and things, which get underlined in the UI. # & can be escaped by doubling it, e.g. "&Taskbar && Start Menu" -def index_of_hotkey(text): - # Returns the index of the ampersand that defines a hotkey, or -1 if not present. - # The space here handles beginning-of-string matching and counteracts the offset for the [^&] so it acts like a negative lookbehind +def index_of_hotkey(text: str) -> int: + # Returns the index of the ampersand that defines a hotkey, or -1 if not present. + # The space here handles beginning-of-string matching and counteracts the offset for the [^&] so it acts like a negative lookbehind m = re.search(r"[^&]&[^&\s]", f" {text}") return m.start() if m else -1 -def has_hotkey(text): +def has_hotkey(text: str) -> bool: return index_of_hotkey(text) != -1 -def remove_hotkey(text): +def remove_hotkey(text: str) -> str: text = re.sub(r"\s?\(&.\)", "", text) text = re.sub(r"([^&]|^)&([^&\s])", r"\1\2", text) return text -def remove_ellipsis(string): +def remove_ellipsis(string: str) -> str: return string.replace("...", "") -def get_strings(lang): - rc_files = glob.glob(f"{os.path.dirname(__file__)}/{lang}/**/*.rc", recursive=True) +def get_strings(lang: str) -> Generator[str, None, None]: + rc_files: List[str] = glob.glob(f"{os.path.dirname(__file__)}/{lang}/**/*.rc", recursive=True) for rc_file in rc_files: with open(rc_file, "r", encoding="utf16") as f: yield from parse_rc_file(f.read().replace("\ufeff", "")) - -base_strings = list(get_strings(base_lang)) +base_strings: List[str] = list(get_strings(base_lang)) for target_lang in target_langs: - target_strings = list(get_strings(target_lang)) - localizations = {} + target_strings: List[str] = list(get_strings(target_lang)) + localizations: Dict[str, Any] = {} - def add_localization(base_string, target_string, fudgedness): + def add_localization(base_string: str, target_string: str, fudgedness: int) -> None: localizations[base_string] = localizations.get(base_string, []) localizations[base_string].append({"target_string": target_string, "fudgedness": fudgedness}) - def add_localizations(base_strings, target_strings): + def add_localizations(base_strings: List[str], target_strings: List[str]) -> None: for i, target_string in enumerate(target_strings): if len(base_strings) <= i: break @@ -66,7 +66,9 @@ for target_lang in target_langs: add_localizations(base_strings, target_strings) for base_string, options in localizations.items(): - options.sort(key=lambda x: x["fudgedness"]) + def get_fudgedness(translation_option: Dict[str, Any]) -> int: + return translation_option["fudgedness"] + # options.sort(key=lambda x: x["fudgedness"]) unique_strings = list(set(option["target_string"] for option in options)) if len(unique_strings) > 1: unique_strings_json = json.dumps(unique_strings, ensure_ascii=False, indent="\t")