add actual removal of housenumber tokens

This commit is contained in:
Sarah Hoffmann 2022-01-20 20:18:15 +01:00
parent 344a2bfc1a
commit d8b7a51ab6

View File

@ -134,15 +134,21 @@ class LegacyICUTokenizer(AbstractTokenizer):
for row in cur: for row in cur:
for hnr in row[0].split(';'): for hnr in row[0].split(';'):
candidates.pop(hnr, None) candidates.pop(hnr, None)
LOG.info("There are %s outdated housenumbers.", len(candidates)) LOG.info("There are %s outdated housenumbers.", len(candidates))
if candidates:
with conn.cursor() as cur:
cur.execute("""DELETE FROM word WHERE word_id = any(%s)""",
(list(candidates.values()), ))
conn.commit()
def update_word_tokens(self): def update_word_tokens(self):
""" Remove unused tokens. """ Remove unused tokens.
""" """
LOG.info("Cleaning up housenumber tokens.") LOG.warn("Cleaning up housenumber tokens.")
self._cleanup_housenumbers() self._cleanup_housenumbers()
LOG.info("Tokenizer house-keeping done.") LOG.warn("Tokenizer house-keeping done.")
def name_analyzer(self): def name_analyzer(self):