From 37eeccbf4cd7c25239b78d6c3747fccb1bca519c Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 27 Oct 2021 10:07:19 +0200 Subject: [PATCH] ICU: use normalization from config in PHP The TERM_NORMALIZATION config option is no longer applicable. That was already documented but not yet implemented. --- nominatim/tokenizer/icu_tokenizer.py | 14 ++------------ test/python/test_tokenizer_icu.py | 16 ++++++---------- 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py index 3331a321..ea6e5d3c 100644 --- a/nominatim/tokenizer/icu_tokenizer.py +++ b/nominatim/tokenizer/icu_tokenizer.py @@ -9,7 +9,6 @@ import re from textwrap import dedent from nominatim.db.connection import connect -from nominatim.db.properties import set_property, get_property from nominatim.db.utils import CopyBuffer from nominatim.db.sql_preprocessor import SQLPreprocessor from nominatim.indexer.place_info import PlaceInfo @@ -36,7 +35,6 @@ class LegacyICUTokenizer(AbstractTokenizer): self.dsn = dsn self.data_dir = data_dir self.loader = None - self.term_normalization = None def init_new_db(self, config, init_db=True): @@ -47,8 +45,6 @@ class LegacyICUTokenizer(AbstractTokenizer): """ self.loader = ICURuleLoader(config) - self.term_normalization = config.TERM_NORMALIZATION - self._install_php(config.lib_dir.php) self._save_config() @@ -64,7 +60,6 @@ class LegacyICUTokenizer(AbstractTokenizer): with connect(self.dsn) as conn: self.loader.load_config_from_db(conn) - self.term_normalization = get_property(conn, DBCFG_TERM_NORMALIZATION) def finalize_import(self, config): @@ -87,13 +82,9 @@ class LegacyICUTokenizer(AbstractTokenizer): def check_database(self, config): """ Check that the tokenizer is set up correctly. """ + # Will throw an error if there is an issue. self.init_from_project(config) - if self.term_normalization is None: - return "Configuration for tokenizer 'icu' are missing." - - return None - def update_statistics(self): """ Recompute frequencies for all name words. @@ -141,7 +132,7 @@ class LegacyICUTokenizer(AbstractTokenizer): php_file.write_text(dedent(f"""\