From d8b7a51ab6d47da79c7e281465c9f5a7bfd74a5a Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 20 Jan 2022 20:18:15 +0100 Subject: [PATCH] add actual removal of housenumber tokens --- nominatim/tokenizer/icu_tokenizer.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py index da07897b..0841300a 100644 --- a/nominatim/tokenizer/icu_tokenizer.py +++ b/nominatim/tokenizer/icu_tokenizer.py @@ -134,15 +134,21 @@ class LegacyICUTokenizer(AbstractTokenizer): for row in cur: for hnr in row[0].split(';'): candidates.pop(hnr, None) - LOG.info("There are %s outdated housenumbers.", len(candidates)) + LOG.info("There are %s outdated housenumbers.", len(candidates)) + if candidates: + with conn.cursor() as cur: + cur.execute("""DELETE FROM word WHERE word_id = any(%s)""", + (list(candidates.values()), )) + conn.commit() + def update_word_tokens(self): """ Remove unused tokens. """ - LOG.info("Cleaning up housenumber tokens.") + LOG.warn("Cleaning up housenumber tokens.") self._cleanup_housenumbers() - LOG.info("Tokenizer house-keeping done.") + LOG.warn("Tokenizer house-keeping done.") def name_analyzer(self):