From 24c986c842a20e85961706c0f5bf4b0ce3d7e478 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 24 May 2021 10:29:21 +0200 Subject: [PATCH] add tests for new full name computation with ICU --- nominatim/tokenizer/legacy_icu_tokenizer.py | 2 -- test/Makefile | 4 ++-- test/python/test_tokenizer_legacy_icu.py | 17 +++++------------ 3 files changed, 7 insertions(+), 16 deletions(-) diff --git a/nominatim/tokenizer/legacy_icu_tokenizer.py b/nominatim/tokenizer/legacy_icu_tokenizer.py index b4d85356..2bd22c72 100644 --- a/nominatim/tokenizer/legacy_icu_tokenizer.py +++ b/nominatim/tokenizer/legacy_icu_tokenizer.py @@ -537,8 +537,6 @@ class _TokenInfo: """ # Start with all partial names terms = set((part for ns in names for part in ns.split())) - # Add partials for the full terms (TO BE REMOVED) - terms.update((n for n in names)) # Add the full names terms.update((' ' + n for n in names)) diff --git a/test/Makefile b/test/Makefile index 613b974d..b8afdf9b 100644 --- a/test/Makefile +++ b/test/Makefile @@ -4,8 +4,8 @@ no-test-db: bdd-no-test-db php bdd: cd bdd && behave -DREMOVE_TEMPLATE=1 -bdd-no-test-db: - cd bdd && behave -DREMOVE_TEMPLATE=1 db osm2pgsql +icu: + cd bdd && behave -DREMOVE_TEMPLATE=1 -DTOKENIZER=legacy_icu php: cd php && phpunit ./ diff --git a/test/python/test_tokenizer_legacy_icu.py b/test/python/test_tokenizer_legacy_icu.py index ebce7218..d8ca2f22 100644 --- a/test/python/test_tokenizer_legacy_icu.py +++ b/test/python/test_tokenizer_legacy_icu.py @@ -232,23 +232,16 @@ def test_process_place_names(analyzer, getorcreate_term_id): @pytest.mark.parametrize('sep', [',' , ';']) def test_full_names_with_separator(analyzer, getorcreate_term_id, sep): with analyzer() as anl: - full_names = - anl._compute_full_names({'name' : sep.join(('New York', 'Big Apple'))}) + names = anl._compute_full_names({'name' : sep.join(('New York', 'Big Apple'))}) - expect = set((anl.make_standard_word(w) for w in ('New York', 'Big Apple'))) - - assert full_names == expect + assert names == set(('NEW YORK', 'BIG APPLE')) -def test_process_place_names_with_bracket(analyzer, getorcreate_term_id): +def test_full_names_with_bracket(analyzer, getorcreate_term_id): with analyzer() as anl: - info = anl.process_place({'name' : - {'name' : 'Houseboat (left)'}}) + names = anl._compute_full_names({'name' : 'Houseboat (left)'}) - expect = set((anl.make_standard_word(w) for w in - (' houseboat', ' houseboat left', 'houseboat', 'left'))) - - assert eval(info['names']) == expect + assert names == set(('HOUSEBOAT (LEFT)', 'HOUSEBOAT')) @pytest.mark.parametrize('pcode', ['12345', 'AB 123', '34-345'])