mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-12-24 13:31:37 +03:00
Added --no-replace command for special phrases importation and added corresponding tests
This commit is contained in:
parent
06aab389ed
commit
8b8dfc46eb
@ -27,6 +27,8 @@ class ImportSpecialPhrases:
|
||||
help='Import special phrases from the OSM wiki to the database.')
|
||||
group.add_argument('--import-from-csv', metavar='FILE',
|
||||
help='Import special phrases from a CSV file.')
|
||||
group.add_argument('--no-replace', action='store_true',
|
||||
help='Keep the old phrases and only add the new ones.')
|
||||
|
||||
@staticmethod
|
||||
def run(args):
|
||||
@ -51,7 +53,8 @@ class ImportSpecialPhrases:
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||
should_replace = not args.no_replace
|
||||
with connect(args.config.get_libpq_dsn()) as db_connection:
|
||||
SPImporter(
|
||||
args.config, args.phplib_dir, db_connection, loader
|
||||
).import_phrases(tokenizer)
|
||||
).import_phrases(tokenizer, should_replace)
|
||||
|
@ -306,7 +306,7 @@ class LegacyICUNameAnalyzer:
|
||||
# WHERE word_id is null and type = 'postcode'""")
|
||||
|
||||
|
||||
def update_special_phrases(self, phrases):
|
||||
def update_special_phrases(self, phrases, should_replace):
|
||||
""" Replace the search index for special phrases with the new phrases.
|
||||
"""
|
||||
norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
|
||||
@ -345,7 +345,7 @@ class LegacyICUNameAnalyzer:
|
||||
columns=['word', 'word_token', 'class', 'type',
|
||||
'operator', 'search_name_count'])
|
||||
|
||||
if to_delete:
|
||||
if to_delete and should_replace:
|
||||
psycopg2.extras.execute_values(
|
||||
cur,
|
||||
""" DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
|
||||
|
@ -314,7 +314,7 @@ class LegacyNameAnalyzer:
|
||||
FROM location_postcode) x""")
|
||||
|
||||
|
||||
def update_special_phrases(self, phrases):
|
||||
def update_special_phrases(self, phrases, should_replace):
|
||||
""" Replace the search index for special phrases with the new phrases.
|
||||
"""
|
||||
norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
|
||||
@ -343,7 +343,7 @@ class LegacyNameAnalyzer:
|
||||
FROM (VALUES %s) as v(name, class, type, op))""",
|
||||
to_add)
|
||||
|
||||
if to_delete:
|
||||
if to_delete and should_replace:
|
||||
psycopg2.extras.execute_values(
|
||||
cur,
|
||||
""" DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
|
||||
|
@ -23,7 +23,7 @@ LOG = logging.getLogger()
|
||||
class SPImporter():
|
||||
# pylint: disable-msg=too-many-instance-attributes
|
||||
"""
|
||||
Class handling the process of special phrases importations into the database.
|
||||
Class handling the process of special phrases importation into the database.
|
||||
|
||||
Take a sp loader which load the phrases from an external source.
|
||||
"""
|
||||
@ -42,10 +42,14 @@ class SPImporter():
|
||||
#special phrases class/type on the wiki.
|
||||
self.table_phrases_to_delete = set()
|
||||
|
||||
def import_phrases(self, tokenizer):
|
||||
def import_phrases(self, tokenizer, should_replace):
|
||||
"""
|
||||
Iterate through all specified languages and
|
||||
extract corresponding special phrases from the wiki.
|
||||
Iterate through all SpecialPhrases extracted from the
|
||||
loader and import them into the database.
|
||||
|
||||
If should_replace is set to True only the loaded phrases
|
||||
will be kept into the database. All other phrases already
|
||||
in the database will be removed.
|
||||
"""
|
||||
LOG.warning('Special phrases importation starting')
|
||||
self._fetch_existing_place_classtype_tables()
|
||||
@ -60,11 +64,12 @@ class SPImporter():
|
||||
class_type_pairs.update(result)
|
||||
|
||||
self._create_place_classtype_table_and_indexes(class_type_pairs)
|
||||
self._remove_non_existent_tables_from_db()
|
||||
if should_replace:
|
||||
self._remove_non_existent_tables_from_db()
|
||||
self.db_connection.commit()
|
||||
|
||||
with tokenizer.name_analyzer() as analyzer:
|
||||
analyzer.update_special_phrases(self.word_phrases)
|
||||
analyzer.update_special_phrases(self.word_phrases, should_replace)
|
||||
|
||||
LOG.warning('Import done.')
|
||||
self.statistics_handler.notify_import_done()
|
||||
|
@ -54,7 +54,7 @@ class DummyNameAnalyzer:
|
||||
def add_postcodes_from_db(self):
|
||||
pass
|
||||
|
||||
def update_special_phrases(self, phrases):
|
||||
def update_special_phrases(self, phrases, should_replace):
|
||||
self.analyser_cache['special_phrases'] = phrases
|
||||
|
||||
def add_country_names(self, code, names):
|
||||
|
@ -255,18 +255,27 @@ def test_index_command(mock_func_factory, temp_db_cursor, tokenizer_mock,
|
||||
assert bnd_mock.called == do_bnds
|
||||
assert rank_mock.called == do_ranks
|
||||
|
||||
def test_special_phrases_wiki_command(temp_db, mock_func_factory, tokenizer_mock):
|
||||
@pytest.mark.parametrize("no_replace", [(True), (False)])
|
||||
def test_special_phrases_wiki_command(temp_db, mock_func_factory, tokenizer_mock, no_replace):
|
||||
func = mock_func_factory(nominatim.clicmd.special_phrases.SPImporter, 'import_phrases')
|
||||
|
||||
call_nominatim('special-phrases', '--import-from-wiki')
|
||||
if no_replace:
|
||||
call_nominatim('special-phrases', '--import-from-wiki', '--no-replace')
|
||||
else:
|
||||
call_nominatim('special-phrases', '--import-from-wiki')
|
||||
|
||||
assert func.called == 1
|
||||
|
||||
def test_special_phrases_csv_command(temp_db, mock_func_factory, tokenizer_mock):
|
||||
@pytest.mark.parametrize("no_replace", [(True), (False)])
|
||||
def test_special_phrases_csv_command(temp_db, mock_func_factory, tokenizer_mock, no_replace):
|
||||
func = mock_func_factory(nominatim.clicmd.special_phrases.SPImporter, 'import_phrases')
|
||||
testdata = Path('__file__') / '..' / '..' / 'testdb'
|
||||
testdata = SRC_DIR / 'test' / 'testdb'
|
||||
csv_path = str((testdata / 'full_en_phrases_test.csv').resolve())
|
||||
call_nominatim('special-phrases', '--import-from-csv', csv_path)
|
||||
|
||||
if no_replace:
|
||||
call_nominatim('special-phrases', '--import-from-csv', csv_path, '--no-replace')
|
||||
else:
|
||||
call_nominatim('special-phrases', '--import-from-csv', csv_path)
|
||||
|
||||
assert func.called == 1
|
||||
|
||||
|
@ -209,7 +209,7 @@ def test_update_special_phrase_empty_table(analyzer, word_table, temp_db_cursor,
|
||||
("König bei", "amenity", "royal", "near"),
|
||||
("Könige", "amenity", "royal", "-"),
|
||||
("strasse", "highway", "primary", "in")
|
||||
])
|
||||
], True)
|
||||
|
||||
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
|
||||
FROM word WHERE class != 'place'""") \
|
||||
@ -226,11 +226,24 @@ def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor,
|
||||
|
||||
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
||||
|
||||
analyzer.update_special_phrases([])
|
||||
analyzer.update_special_phrases([], True)
|
||||
|
||||
assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
||||
|
||||
|
||||
def test_update_special_phrases_no_replace(analyzer, word_table, temp_db_cursor,
|
||||
make_standard_name):
|
||||
temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
|
||||
VALUES (' foo', 'foo', 'amenity', 'prison', 'in'),
|
||||
(' bar', 'bar', 'highway', 'road', null)""")
|
||||
|
||||
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
||||
|
||||
analyzer.update_special_phrases([], False)
|
||||
|
||||
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
||||
|
||||
|
||||
def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor,
|
||||
make_standard_name):
|
||||
temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
|
||||
@ -243,7 +256,7 @@ def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor,
|
||||
('prison', 'amenity', 'prison', 'in'),
|
||||
('bar', 'highway', 'road', '-'),
|
||||
('garden', 'leisure', 'garden', 'near')
|
||||
])
|
||||
], True)
|
||||
|
||||
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
|
||||
FROM word WHERE class != 'place'""") \
|
||||
|
@ -159,7 +159,7 @@ def test_update_special_phrase_empty_table(analyzer, word_table, temp_db_cursor)
|
||||
("König bei", "amenity", "royal", "near"),
|
||||
("Könige", "amenity", "royal", "-"),
|
||||
("street", "highway", "primary", "in")
|
||||
])
|
||||
], True)
|
||||
|
||||
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
|
||||
FROM word WHERE class != 'place'""") \
|
||||
@ -176,11 +176,24 @@ def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor):
|
||||
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
||||
|
||||
with analyzer() as a:
|
||||
a.update_special_phrases([])
|
||||
a.update_special_phrases([], True)
|
||||
|
||||
assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
||||
|
||||
|
||||
def test_update_special_phrases_no_replace(analyzer, word_table, temp_db_cursor,):
|
||||
temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
|
||||
VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'),
|
||||
(' BAR', 'bar', 'highway', 'road', null)""")
|
||||
|
||||
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
||||
|
||||
with analyzer() as a:
|
||||
a.update_special_phrases([], False)
|
||||
|
||||
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
||||
|
||||
|
||||
def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor):
|
||||
temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
|
||||
VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'),
|
||||
@ -193,7 +206,7 @@ def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor):
|
||||
('prison', 'amenity', 'prison', 'in'),
|
||||
('bar', 'highway', 'road', '-'),
|
||||
('garden', 'leisure', 'garden', 'near')
|
||||
])
|
||||
], True)
|
||||
|
||||
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
|
||||
FROM word WHERE class != 'place'""") \
|
||||
|
@ -185,8 +185,9 @@ def test_remove_non_existent_tables_from_db(sp_importer, default_phrases,
|
||||
tables_result[0][0] == 'place_classtype_testclasstypetable_to_keep'
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("should_replace", [(True), (False)])
|
||||
def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
|
||||
placex_table, tokenizer_mock):
|
||||
placex_table, tokenizer_mock, should_replace):
|
||||
"""
|
||||
Check that the main import_phrases() method is well executed.
|
||||
It should create the place_classtype table, the place_id and centroid indexes,
|
||||
@ -202,10 +203,10 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
|
||||
CREATE TABLE place_classtype_wrongclass_wrongtype();""")
|
||||
|
||||
monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content',
|
||||
mock_get_wiki_content)
|
||||
mock_get_wiki_content)
|
||||
|
||||
tokenizer = tokenizer_mock()
|
||||
sp_importer.import_phrases(tokenizer)
|
||||
sp_importer.import_phrases(tokenizer, should_replace)
|
||||
|
||||
assert len(tokenizer.analyser_cache['special_phrases']) == 18
|
||||
|
||||
@ -216,7 +217,8 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
|
||||
assert check_placeid_and_centroid_indexes(temp_db_conn, class_test, type_test)
|
||||
assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test)
|
||||
assert check_table_exist(temp_db_conn, 'amenity', 'animal_shelter')
|
||||
assert not check_table_exist(temp_db_conn, 'wrong_class', 'wrong_type')
|
||||
if should_replace:
|
||||
assert not check_table_exist(temp_db_conn, 'wrong_class', 'wrong_type')
|
||||
|
||||
#Format (query, should_return_something_bool) use to easily execute all asserts
|
||||
queries_tests = set()
|
||||
@ -237,7 +239,8 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
|
||||
WHERE table_schema='public'
|
||||
AND table_name = 'place_classtype_wrongclass_wrongtype';
|
||||
"""
|
||||
queries_tests.add((query_wrong_table, False))
|
||||
if should_replace:
|
||||
queries_tests.add((query_wrong_table, False))
|
||||
|
||||
with temp_db_conn.cursor() as temp_db_cursor:
|
||||
for query in queries_tests:
|
||||
@ -247,7 +250,7 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
|
||||
else:
|
||||
assert not temp_db_cursor.fetchone()
|
||||
|
||||
def mock_get_wiki_content(lang):
|
||||
def mock_get_wiki_content(self, lang):
|
||||
"""
|
||||
Mock the _get_wiki_content() method to return
|
||||
static xml test file content.
|
||||
|
@ -16,7 +16,6 @@ def test_parse_csv(sp_csv_loader):
|
||||
phrases = sp_csv_loader.parse_csv()
|
||||
assert check_phrases_content(phrases)
|
||||
|
||||
|
||||
def test_next(sp_csv_loader):
|
||||
"""
|
||||
Test objects returned from the next() method.
|
||||
|
@ -47,7 +47,7 @@ def sp_wiki_loader(monkeypatch, def_config):
|
||||
mock_get_wiki_content)
|
||||
return loader
|
||||
|
||||
def mock_get_wiki_content(lang):
|
||||
def mock_get_wiki_content(self, lang):
|
||||
"""
|
||||
Mock the _get_wiki_content() method to return
|
||||
static xml test file content.
|
||||
|
Loading…
Reference in New Issue
Block a user