Added --no-replace command for special phrases importation and added corresponding tests

2024-12-24 13:31:37 +03:00 · 2021-05-17 12:40:50 +02:00 · 2021-05-17 12:40:50 +02:00 · 8b8dfc46eb
commit 8b8dfc46eb
parent 06aab389ed
11 changed files with 76 additions and 31 deletions
--- a/nominatim/clicmd/special_phrases.py
+++ b/nominatim/clicmd/special_phrases.py
@ -27,6 +27,8 @@ class ImportSpecialPhrases:
                           help='Import special phrases from the OSM wiki to the database.')
        group.add_argument('--import-from-csv', metavar='FILE',
                           help='Import special phrases from a CSV file.')
+        group.add_argument('--no-replace', action='store_true',
+                           help='Keep the old phrases and only add the new ones.')

    @staticmethod
    def run(args):
@ -51,7 +53,8 @@ class ImportSpecialPhrases:
        from ..tokenizer import factory as tokenizer_factory

        tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+        should_replace = not args.no_replace
        with connect(args.config.get_libpq_dsn()) as db_connection:
            SPImporter(
                args.config, args.phplib_dir, db_connection, loader
-            ).import_phrases(tokenizer)
+            ).import_phrases(tokenizer, should_replace)
--- a/nominatim/tokenizer/legacy_icu_tokenizer.py
+++ b/nominatim/tokenizer/legacy_icu_tokenizer.py
@ -306,7 +306,7 @@ class LegacyICUNameAnalyzer:
            #                WHERE word_id is null and type = 'postcode'""")


-    def update_special_phrases(self, phrases):
+    def update_special_phrases(self, phrases, should_replace):
        """ Replace the search index for special phrases with the new phrases.
        """
        norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
@ -345,7 +345,7 @@ class LegacyICUNameAnalyzer:
                              columns=['word', 'word_token', 'class', 'type',
                                       'operator', 'search_name_count'])

-            if to_delete:
+            if to_delete and should_replace:
                psycopg2.extras.execute_values(
                    cur,
                    """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
--- a/nominatim/tokenizer/legacy_tokenizer.py
+++ b/nominatim/tokenizer/legacy_tokenizer.py
@ -314,7 +314,7 @@ class LegacyNameAnalyzer:
                                 FROM location_postcode) x""")


-    def update_special_phrases(self, phrases):
+    def update_special_phrases(self, phrases, should_replace):
        """ Replace the search index for special phrases with the new phrases.
        """
        norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
@ -343,7 +343,7 @@ class LegacyNameAnalyzer:
                           FROM (VALUES %s) as v(name, class, type, op))""",
                    to_add)

-            if to_delete:
+            if to_delete and should_replace:
                psycopg2.extras.execute_values(
                    cur,
                    """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
--- a/nominatim/tools/special_phrases/sp_importer.py
+++ b/nominatim/tools/special_phrases/sp_importer.py
@ -23,7 +23,7 @@ LOG = logging.getLogger()
 class SPImporter():
    # pylint: disable-msg=too-many-instance-attributes
    """
-        Class handling the process of special phrases importations into the database.
+        Class handling the process of special phrases importation into the database.

        Take a sp loader which load the phrases from an external source.
    """
@ -42,10 +42,14 @@ class SPImporter():
        #special phrases class/type on the wiki.
        self.table_phrases_to_delete = set()

-    def import_phrases(self, tokenizer):
+    def import_phrases(self, tokenizer, should_replace):
        """
-            Iterate through all specified languages and
-            extract corresponding special phrases from the wiki.
+            Iterate through all SpecialPhrases extracted from the
+            loader and import them into the database.
+
+            If should_replace is set to True only the loaded phrases
+            will be kept into the database. All other phrases already
+            in the database will be removed.
        """
        LOG.warning('Special phrases importation starting')
        self._fetch_existing_place_classtype_tables()
@ -60,11 +64,12 @@ class SPImporter():
                    class_type_pairs.update(result)

        self._create_place_classtype_table_and_indexes(class_type_pairs)
-        self._remove_non_existent_tables_from_db()
+        if should_replace:
+            self._remove_non_existent_tables_from_db()
        self.db_connection.commit()

        with tokenizer.name_analyzer() as analyzer:
-            analyzer.update_special_phrases(self.word_phrases)
+            analyzer.update_special_phrases(self.word_phrases, should_replace)

        LOG.warning('Import done.')
        self.statistics_handler.notify_import_done()
--- a/test/python/dummy_tokenizer.py
+++ b/test/python/dummy_tokenizer.py
@ -54,7 +54,7 @@ class DummyNameAnalyzer:
    def add_postcodes_from_db(self):
        pass

-    def update_special_phrases(self, phrases):
+    def update_special_phrases(self, phrases, should_replace):
        self.analyser_cache['special_phrases'] = phrases

    def add_country_names(self, code, names):
--- a/test/python/test_cli.py
+++ b/test/python/test_cli.py
@ -255,18 +255,27 @@ def test_index_command(mock_func_factory, temp_db_cursor, tokenizer_mock,
    assert bnd_mock.called == do_bnds
    assert rank_mock.called == do_ranks

-def test_special_phrases_wiki_command(temp_db, mock_func_factory, tokenizer_mock):
+@pytest.mark.parametrize("no_replace", [(True), (False)])
+def test_special_phrases_wiki_command(temp_db, mock_func_factory, tokenizer_mock, no_replace):
    func = mock_func_factory(nominatim.clicmd.special_phrases.SPImporter, 'import_phrases')

-    call_nominatim('special-phrases', '--import-from-wiki')
+    if no_replace:
+        call_nominatim('special-phrases', '--import-from-wiki', '--no-replace')
+    else:
+        call_nominatim('special-phrases', '--import-from-wiki')

    assert func.called == 1

-def test_special_phrases_csv_command(temp_db, mock_func_factory, tokenizer_mock):
+@pytest.mark.parametrize("no_replace", [(True), (False)])
+def test_special_phrases_csv_command(temp_db, mock_func_factory, tokenizer_mock, no_replace):
    func = mock_func_factory(nominatim.clicmd.special_phrases.SPImporter, 'import_phrases')
-    testdata = Path('__file__') / '..' / '..' / 'testdb'
+    testdata = SRC_DIR / 'test' / 'testdb'
    csv_path = str((testdata / 'full_en_phrases_test.csv').resolve())
-    call_nominatim('special-phrases', '--import-from-csv', csv_path)
+
+    if no_replace:
+        call_nominatim('special-phrases', '--import-from-csv', csv_path, '--no-replace')
+    else:
+        call_nominatim('special-phrases', '--import-from-csv', csv_path)

    assert func.called == 1

--- a/test/python/test_tokenizer_legacy.py
+++ b/test/python/test_tokenizer_legacy.py
@ -209,7 +209,7 @@ def test_update_special_phrase_empty_table(analyzer, word_table, temp_db_cursor,
        ("König bei", "amenity", "royal", "near"),
        ("Könige", "amenity", "royal", "-"),
        ("strasse", "highway", "primary", "in")
-    ])
+    ], True)

    assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
                                     FROM word WHERE class != 'place'""") \
@ -226,11 +226,24 @@ def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor,

    assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")

-    analyzer.update_special_phrases([])
+    analyzer.update_special_phrases([], True)

    assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")


+def test_update_special_phrases_no_replace(analyzer, word_table, temp_db_cursor,
+                                          make_standard_name):
+    temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
+                              VALUES (' foo', 'foo', 'amenity', 'prison', 'in'),
+                                     (' bar', 'bar', 'highway', 'road', null)""")
+
+    assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+    analyzer.update_special_phrases([], False)
+
+    assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+
 def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor,
                                      make_standard_name):
    temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
@ -243,7 +256,7 @@ def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor,
      ('prison', 'amenity', 'prison', 'in'),
      ('bar', 'highway', 'road', '-'),
      ('garden', 'leisure', 'garden', 'near')
-    ])
+    ], True)

    assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
                                     FROM word WHERE class != 'place'""") \
--- a/test/python/test_tokenizer_legacy_icu.py
+++ b/test/python/test_tokenizer_legacy_icu.py
@ -159,7 +159,7 @@ def test_update_special_phrase_empty_table(analyzer, word_table, temp_db_cursor)
            ("König bei", "amenity", "royal", "near"),
            ("Könige", "amenity", "royal", "-"),
            ("street", "highway", "primary", "in")
-        ])
+        ], True)

    assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
                                     FROM word WHERE class != 'place'""") \
@ -176,11 +176,24 @@ def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor):
    assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")

    with analyzer() as a:
-        a.update_special_phrases([])
+        a.update_special_phrases([], True)

    assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")


+def test_update_special_phrases_no_replace(analyzer, word_table, temp_db_cursor,):
+    temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
+                              VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'),
+                                     (' BAR', 'bar', 'highway', 'road', null)""")
+
+    assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+    with analyzer() as a:
+        a.update_special_phrases([], False)
+
+    assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
+
+
 def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor):
    temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
                              VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'),
@ -193,7 +206,7 @@ def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor):
          ('prison', 'amenity', 'prison', 'in'),
          ('bar', 'highway', 'road', '-'),
          ('garden', 'leisure', 'garden', 'near')
-        ])
+        ], True)

    assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
                                     FROM word WHERE class != 'place'""") \
--- a/test/python/test_tools_import_special_phrases.py
+++ b/test/python/test_tools_import_special_phrases.py
@ -185,8 +185,9 @@ def test_remove_non_existent_tables_from_db(sp_importer, default_phrases,
            tables_result[0][0] == 'place_classtype_testclasstypetable_to_keep'
        )

+@pytest.mark.parametrize("should_replace", [(True), (False)])
 def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
-                          placex_table, tokenizer_mock):
+                        placex_table, tokenizer_mock, should_replace):
    """
        Check that the main import_phrases() method is well executed.
        It should create the place_classtype table, the place_id and centroid indexes,
@ -202,10 +203,10 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
            CREATE TABLE place_classtype_wrongclass_wrongtype();""")
    
    monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content',
-                    mock_get_wiki_content)
+                        mock_get_wiki_content)

    tokenizer = tokenizer_mock()
-    sp_importer.import_phrases(tokenizer)
+    sp_importer.import_phrases(tokenizer, should_replace)

    assert len(tokenizer.analyser_cache['special_phrases']) == 18

@ -216,7 +217,8 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
    assert check_placeid_and_centroid_indexes(temp_db_conn, class_test, type_test)
    assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test)
    assert check_table_exist(temp_db_conn, 'amenity', 'animal_shelter')
-    assert not check_table_exist(temp_db_conn, 'wrong_class', 'wrong_type')
+    if should_replace:
+        assert not check_table_exist(temp_db_conn, 'wrong_class', 'wrong_type')

    #Format (query, should_return_something_bool) use to easily execute all asserts
    queries_tests = set()
@ -237,7 +239,8 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
        WHERE table_schema='public'
        AND table_name = 'place_classtype_wrongclass_wrongtype';
    """
-    queries_tests.add((query_wrong_table, False))
+    if should_replace:
+        queries_tests.add((query_wrong_table, False))

    with temp_db_conn.cursor() as temp_db_cursor:
        for query in queries_tests:
@ -247,7 +250,7 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
            else:
                assert not temp_db_cursor.fetchone()

-def mock_get_wiki_content(lang):
+def mock_get_wiki_content(self, lang):
    """
        Mock the _get_wiki_content() method to return
        static xml test file content.
--- a/test/python/test_tools_sp_csv_loader.py
+++ b/test/python/test_tools_sp_csv_loader.py
@ -16,7 +16,6 @@ def test_parse_csv(sp_csv_loader):
    phrases = sp_csv_loader.parse_csv()
    assert check_phrases_content(phrases)

-
 def test_next(sp_csv_loader):
    """
        Test objects returned from the next() method.
--- a/test/python/test_tools_sp_wiki_loader.py
+++ b/test/python/test_tools_sp_wiki_loader.py
@ -47,7 +47,7 @@ def sp_wiki_loader(monkeypatch, def_config):
                        mock_get_wiki_content)
    return loader

-def mock_get_wiki_content(lang):
+def mock_get_wiki_content(self, lang):
    """
        Mock the _get_wiki_content() method to return
        static xml test file content.