Added --no-replace command for special phrases importation and added corresponding tests

This commit is contained in:
AntoJvlt 2021-05-17 12:40:50 +02:00
parent 06aab389ed
commit 8b8dfc46eb
11 changed files with 76 additions and 31 deletions

View File

@ -27,6 +27,8 @@ class ImportSpecialPhrases:
help='Import special phrases from the OSM wiki to the database.')
group.add_argument('--import-from-csv', metavar='FILE',
help='Import special phrases from a CSV file.')
group.add_argument('--no-replace', action='store_true',
help='Keep the old phrases and only add the new ones.')
@staticmethod
def run(args):
@ -51,7 +53,8 @@ class ImportSpecialPhrases:
from ..tokenizer import factory as tokenizer_factory
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
should_replace = not args.no_replace
with connect(args.config.get_libpq_dsn()) as db_connection:
SPImporter(
args.config, args.phplib_dir, db_connection, loader
).import_phrases(tokenizer)
).import_phrases(tokenizer, should_replace)

View File

@ -306,7 +306,7 @@ class LegacyICUNameAnalyzer:
# WHERE word_id is null and type = 'postcode'""")
def update_special_phrases(self, phrases):
def update_special_phrases(self, phrases, should_replace):
""" Replace the search index for special phrases with the new phrases.
"""
norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
@ -345,7 +345,7 @@ class LegacyICUNameAnalyzer:
columns=['word', 'word_token', 'class', 'type',
'operator', 'search_name_count'])
if to_delete:
if to_delete and should_replace:
psycopg2.extras.execute_values(
cur,
""" DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)

View File

@ -314,7 +314,7 @@ class LegacyNameAnalyzer:
FROM location_postcode) x""")
def update_special_phrases(self, phrases):
def update_special_phrases(self, phrases, should_replace):
""" Replace the search index for special phrases with the new phrases.
"""
norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
@ -343,7 +343,7 @@ class LegacyNameAnalyzer:
FROM (VALUES %s) as v(name, class, type, op))""",
to_add)
if to_delete:
if to_delete and should_replace:
psycopg2.extras.execute_values(
cur,
""" DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)

View File

@ -23,7 +23,7 @@ LOG = logging.getLogger()
class SPImporter():
# pylint: disable-msg=too-many-instance-attributes
"""
Class handling the process of special phrases importations into the database.
Class handling the process of special phrases importation into the database.
Take a sp loader which load the phrases from an external source.
"""
@ -42,10 +42,14 @@ class SPImporter():
#special phrases class/type on the wiki.
self.table_phrases_to_delete = set()
def import_phrases(self, tokenizer):
def import_phrases(self, tokenizer, should_replace):
"""
Iterate through all specified languages and
extract corresponding special phrases from the wiki.
Iterate through all SpecialPhrases extracted from the
loader and import them into the database.
If should_replace is set to True only the loaded phrases
will be kept into the database. All other phrases already
in the database will be removed.
"""
LOG.warning('Special phrases importation starting')
self._fetch_existing_place_classtype_tables()
@ -60,11 +64,12 @@ class SPImporter():
class_type_pairs.update(result)
self._create_place_classtype_table_and_indexes(class_type_pairs)
self._remove_non_existent_tables_from_db()
if should_replace:
self._remove_non_existent_tables_from_db()
self.db_connection.commit()
with tokenizer.name_analyzer() as analyzer:
analyzer.update_special_phrases(self.word_phrases)
analyzer.update_special_phrases(self.word_phrases, should_replace)
LOG.warning('Import done.')
self.statistics_handler.notify_import_done()

View File

@ -54,7 +54,7 @@ class DummyNameAnalyzer:
def add_postcodes_from_db(self):
pass
def update_special_phrases(self, phrases):
def update_special_phrases(self, phrases, should_replace):
self.analyser_cache['special_phrases'] = phrases
def add_country_names(self, code, names):

View File

@ -255,18 +255,27 @@ def test_index_command(mock_func_factory, temp_db_cursor, tokenizer_mock,
assert bnd_mock.called == do_bnds
assert rank_mock.called == do_ranks
def test_special_phrases_wiki_command(temp_db, mock_func_factory, tokenizer_mock):
@pytest.mark.parametrize("no_replace", [(True), (False)])
def test_special_phrases_wiki_command(temp_db, mock_func_factory, tokenizer_mock, no_replace):
func = mock_func_factory(nominatim.clicmd.special_phrases.SPImporter, 'import_phrases')
call_nominatim('special-phrases', '--import-from-wiki')
if no_replace:
call_nominatim('special-phrases', '--import-from-wiki', '--no-replace')
else:
call_nominatim('special-phrases', '--import-from-wiki')
assert func.called == 1
def test_special_phrases_csv_command(temp_db, mock_func_factory, tokenizer_mock):
@pytest.mark.parametrize("no_replace", [(True), (False)])
def test_special_phrases_csv_command(temp_db, mock_func_factory, tokenizer_mock, no_replace):
func = mock_func_factory(nominatim.clicmd.special_phrases.SPImporter, 'import_phrases')
testdata = Path('__file__') / '..' / '..' / 'testdb'
testdata = SRC_DIR / 'test' / 'testdb'
csv_path = str((testdata / 'full_en_phrases_test.csv').resolve())
call_nominatim('special-phrases', '--import-from-csv', csv_path)
if no_replace:
call_nominatim('special-phrases', '--import-from-csv', csv_path, '--no-replace')
else:
call_nominatim('special-phrases', '--import-from-csv', csv_path)
assert func.called == 1

View File

@ -209,7 +209,7 @@ def test_update_special_phrase_empty_table(analyzer, word_table, temp_db_cursor,
("König bei", "amenity", "royal", "near"),
("Könige", "amenity", "royal", "-"),
("strasse", "highway", "primary", "in")
])
], True)
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
FROM word WHERE class != 'place'""") \
@ -226,11 +226,24 @@ def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor,
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
analyzer.update_special_phrases([])
analyzer.update_special_phrases([], True)
assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
def test_update_special_phrases_no_replace(analyzer, word_table, temp_db_cursor,
make_standard_name):
temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
VALUES (' foo', 'foo', 'amenity', 'prison', 'in'),
(' bar', 'bar', 'highway', 'road', null)""")
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
analyzer.update_special_phrases([], False)
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor,
make_standard_name):
temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
@ -243,7 +256,7 @@ def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor,
('prison', 'amenity', 'prison', 'in'),
('bar', 'highway', 'road', '-'),
('garden', 'leisure', 'garden', 'near')
])
], True)
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
FROM word WHERE class != 'place'""") \

View File

@ -159,7 +159,7 @@ def test_update_special_phrase_empty_table(analyzer, word_table, temp_db_cursor)
("König bei", "amenity", "royal", "near"),
("Könige", "amenity", "royal", "-"),
("street", "highway", "primary", "in")
])
], True)
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
FROM word WHERE class != 'place'""") \
@ -176,11 +176,24 @@ def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor):
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
with analyzer() as a:
a.update_special_phrases([])
a.update_special_phrases([], True)
assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
def test_update_special_phrases_no_replace(analyzer, word_table, temp_db_cursor,):
temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'),
(' BAR', 'bar', 'highway', 'road', null)""")
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
with analyzer() as a:
a.update_special_phrases([], False)
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor):
temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'),
@ -193,7 +206,7 @@ def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor):
('prison', 'amenity', 'prison', 'in'),
('bar', 'highway', 'road', '-'),
('garden', 'leisure', 'garden', 'near')
])
], True)
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
FROM word WHERE class != 'place'""") \

View File

@ -185,8 +185,9 @@ def test_remove_non_existent_tables_from_db(sp_importer, default_phrases,
tables_result[0][0] == 'place_classtype_testclasstypetable_to_keep'
)
@pytest.mark.parametrize("should_replace", [(True), (False)])
def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
placex_table, tokenizer_mock):
placex_table, tokenizer_mock, should_replace):
"""
Check that the main import_phrases() method is well executed.
It should create the place_classtype table, the place_id and centroid indexes,
@ -202,10 +203,10 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
CREATE TABLE place_classtype_wrongclass_wrongtype();""")
monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content',
mock_get_wiki_content)
mock_get_wiki_content)
tokenizer = tokenizer_mock()
sp_importer.import_phrases(tokenizer)
sp_importer.import_phrases(tokenizer, should_replace)
assert len(tokenizer.analyser_cache['special_phrases']) == 18
@ -216,7 +217,8 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
assert check_placeid_and_centroid_indexes(temp_db_conn, class_test, type_test)
assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test)
assert check_table_exist(temp_db_conn, 'amenity', 'animal_shelter')
assert not check_table_exist(temp_db_conn, 'wrong_class', 'wrong_type')
if should_replace:
assert not check_table_exist(temp_db_conn, 'wrong_class', 'wrong_type')
#Format (query, should_return_something_bool) use to easily execute all asserts
queries_tests = set()
@ -237,7 +239,8 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
WHERE table_schema='public'
AND table_name = 'place_classtype_wrongclass_wrongtype';
"""
queries_tests.add((query_wrong_table, False))
if should_replace:
queries_tests.add((query_wrong_table, False))
with temp_db_conn.cursor() as temp_db_cursor:
for query in queries_tests:
@ -247,7 +250,7 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
else:
assert not temp_db_cursor.fetchone()
def mock_get_wiki_content(lang):
def mock_get_wiki_content(self, lang):
"""
Mock the _get_wiki_content() method to return
static xml test file content.

View File

@ -16,7 +16,6 @@ def test_parse_csv(sp_csv_loader):
phrases = sp_csv_loader.parse_csv()
assert check_phrases_content(phrases)
def test_next(sp_csv_loader):
"""
Test objects returned from the next() method.

View File

@ -47,7 +47,7 @@ def sp_wiki_loader(monkeypatch, def_config):
mock_get_wiki_content)
return loader
def mock_get_wiki_content(lang):
def mock_get_wiki_content(self, lang):
"""
Mock the _get_wiki_content() method to return
static xml test file content.