From 24c986c842a20e85961706c0f5bf4b0ce3d7e478 Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Mon, 24 May 2021 10:29:21 +0200
Subject: [PATCH] add tests for new full name computation with ICU

---
 nominatim/tokenizer/legacy_icu_tokenizer.py |  2 --
 test/Makefile                               |  4 ++--
 test/python/test_tokenizer_legacy_icu.py    | 17 +++++------------
 3 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/nominatim/tokenizer/legacy_icu_tokenizer.py b/nominatim/tokenizer/legacy_icu_tokenizer.py
index b4d85356..2bd22c72 100644
--- a/nominatim/tokenizer/legacy_icu_tokenizer.py
+++ b/nominatim/tokenizer/legacy_icu_tokenizer.py
@@ -537,8 +537,6 @@ class _TokenInfo:
         """
         # Start with all partial names
         terms = set((part for ns in names for part in ns.split()))
-        # Add partials for the full terms (TO BE REMOVED)
-        terms.update((n for n in names))
         # Add the full names
         terms.update((' ' + n for n in names))
 
diff --git a/test/Makefile b/test/Makefile
index 613b974d..b8afdf9b 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -4,8 +4,8 @@ no-test-db: bdd-no-test-db php
 bdd:
 	cd bdd && behave -DREMOVE_TEMPLATE=1
 
-bdd-no-test-db:
-	cd bdd && behave -DREMOVE_TEMPLATE=1 db osm2pgsql
+icu:
+	cd bdd && behave -DREMOVE_TEMPLATE=1 -DTOKENIZER=legacy_icu
 
 php:
 	cd php && phpunit ./
diff --git a/test/python/test_tokenizer_legacy_icu.py b/test/python/test_tokenizer_legacy_icu.py
index ebce7218..d8ca2f22 100644
--- a/test/python/test_tokenizer_legacy_icu.py
+++ b/test/python/test_tokenizer_legacy_icu.py
@@ -232,23 +232,16 @@ def test_process_place_names(analyzer, getorcreate_term_id):
 @pytest.mark.parametrize('sep', [',' , ';'])
 def test_full_names_with_separator(analyzer, getorcreate_term_id, sep):
     with analyzer() as anl:
-        full_names =
-            anl._compute_full_names({'name' : sep.join(('New York', 'Big Apple'))})
+        names = anl._compute_full_names({'name' : sep.join(('New York', 'Big Apple'))})
 
-        expect = set((anl.make_standard_word(w) for w in ('New York', 'Big Apple')))
-
-    assert full_names == expect
+    assert names == set(('NEW YORK', 'BIG APPLE'))
 
 
-def test_process_place_names_with_bracket(analyzer, getorcreate_term_id):
+def test_full_names_with_bracket(analyzer, getorcreate_term_id):
     with analyzer() as anl:
-        info = anl.process_place({'name' :
-                                   {'name' : 'Houseboat (left)'}})
+        names = anl._compute_full_names({'name' : 'Houseboat (left)'})
 
-        expect = set((anl.make_standard_word(w) for w in
-                       (' houseboat', ' houseboat left', 'houseboat', 'left')))
-
-    assert eval(info['names']) == expect
+    assert names == set(('HOUSEBOAT (LEFT)', 'HOUSEBOAT'))
 
 
 @pytest.mark.parametrize('pcode', ['12345', 'AB 123', '34-345'])