mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-12-27 15:05:28 +03:00
fix insertion of special terms and countries into word table
Special terms need to be prefixed by a space because they are full terms. For countries avoid duplicate entries of word tokens. Adds tests for adding country terms.
This commit is contained in:
parent
b1d33e6b49
commit
bc981d0261
@ -375,7 +375,7 @@ class LegacyNameAnalyzer:
|
||||
cur,
|
||||
""" INSERT INTO word (word_id, word_token, word, class, type,
|
||||
search_name_count, operator)
|
||||
(SELECT nextval('seq_word'), make_standard_name(name), name,
|
||||
(SELECT nextval('seq_word'), ' ' || make_standard_name(name), name,
|
||||
class, type, 0,
|
||||
CASE WHEN op in ('in', 'near') THEN op ELSE null END
|
||||
FROM (VALUES %s) as v(name, class, type, op))""",
|
||||
@ -400,7 +400,7 @@ class LegacyNameAnalyzer:
|
||||
cur.execute(
|
||||
"""INSERT INTO word (word_id, word_token, country_code)
|
||||
(SELECT nextval('seq_word'), lookup_token, %s
|
||||
FROM (SELECT ' ' || make_standard_name(n) as lookup_token
|
||||
FROM (SELECT DISTINCT ' ' || make_standard_name(n) as lookup_token
|
||||
FROM unnest(%s)n) y
|
||||
WHERE NOT EXISTS(SELECT * FROM word
|
||||
WHERE word_token = lookup_token and country_code = %s))
|
||||
|
@ -49,6 +49,13 @@ class MockWordTable:
|
||||
self.conn.commit()
|
||||
|
||||
|
||||
def add_country(self, country_code, word_token):
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("INSERT INTO word (word_token, country_code) VALUES(%s, %s)",
|
||||
(word_token, country_code))
|
||||
self.conn.commit()
|
||||
|
||||
|
||||
def add_postcode(self, word_token, postcode):
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("""INSERT INTO word (word_token, word, class, type)
|
||||
@ -71,7 +78,18 @@ class MockWordTable:
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("""SELECT word_token, word, class, type, operator
|
||||
FROM word WHERE class != 'place'""")
|
||||
return set((tuple(row) for row in cur))
|
||||
result = set((tuple(row) for row in cur))
|
||||
assert len(result) == cur.rowcount, "Word table has duplicates."
|
||||
return result
|
||||
|
||||
|
||||
def get_country(self):
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("""SELECT country_code, word_token
|
||||
FROM word WHERE country_code is not null""")
|
||||
result = set((tuple(row) for row in cur))
|
||||
assert len(result) == cur.rowcount, "Word table has duplicates."
|
||||
return result
|
||||
|
||||
|
||||
def get_postcodes(self):
|
||||
|
@ -73,7 +73,7 @@ def analyzer(tokenizer_factory, test_config, monkeypatch, sql_preprocessor,
|
||||
@pytest.fixture
|
||||
def make_standard_name(temp_db_cursor):
|
||||
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION make_standard_name(name TEXT)
|
||||
RETURNS TEXT AS $$ SELECT ' ' || name; $$ LANGUAGE SQL""")
|
||||
RETURNS TEXT AS $$ SELECT '#' || lower(name) || '#'; $$ LANGUAGE SQL""")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@ -209,18 +209,19 @@ def test_update_special_phrase_empty_table(analyzer, word_table, make_standard_n
|
||||
analyzer.update_special_phrases([
|
||||
("König bei", "amenity", "royal", "near"),
|
||||
("Könige", "amenity", "royal", "-"),
|
||||
("könige", "amenity", "royal", "-"),
|
||||
("strasse", "highway", "primary", "in")
|
||||
], True)
|
||||
|
||||
assert word_table.get_special() \
|
||||
== set(((' könig bei', 'könig bei', 'amenity', 'royal', 'near'),
|
||||
(' könige', 'könige', 'amenity', 'royal', None),
|
||||
(' strasse', 'strasse', 'highway', 'primary', 'in')))
|
||||
== set(((' #könig bei#', 'könig bei', 'amenity', 'royal', 'near'),
|
||||
(' #könige#', 'könige', 'amenity', 'royal', None),
|
||||
(' #strasse#', 'strasse', 'highway', 'primary', 'in')))
|
||||
|
||||
|
||||
def test_update_special_phrase_delete_all(analyzer, word_table, make_standard_name):
|
||||
word_table.add_special(' foo', 'foo', 'amenity', 'prison', 'in')
|
||||
word_table.add_special(' bar', 'bar', 'highway', 'road', None)
|
||||
word_table.add_special(' #foo#', 'foo', 'amenity', 'prison', 'in')
|
||||
word_table.add_special(' #bar#', 'bar', 'highway', 'road', None)
|
||||
|
||||
assert word_table.count_special() == 2
|
||||
|
||||
@ -230,8 +231,8 @@ def test_update_special_phrase_delete_all(analyzer, word_table, make_standard_na
|
||||
|
||||
|
||||
def test_update_special_phrases_no_replace(analyzer, word_table, make_standard_name):
|
||||
word_table.add_special(' foo', 'foo', 'amenity', 'prison', 'in')
|
||||
word_table.add_special(' bar', 'bar', 'highway', 'road', None)
|
||||
word_table.add_special(' #foo#', 'foo', 'amenity', 'prison', 'in')
|
||||
word_table.add_special(' #bar#', 'bar', 'highway', 'road', None)
|
||||
|
||||
assert word_table.count_special() == 2
|
||||
|
||||
@ -241,8 +242,8 @@ def test_update_special_phrases_no_replace(analyzer, word_table, make_standard_n
|
||||
|
||||
|
||||
def test_update_special_phrase_modify(analyzer, word_table, make_standard_name):
|
||||
word_table.add_special(' foo', 'foo', 'amenity', 'prison', 'in')
|
||||
word_table.add_special(' bar', 'bar', 'highway', 'road', None)
|
||||
word_table.add_special(' #foo#', 'foo', 'amenity', 'prison', 'in')
|
||||
word_table.add_special(' #bar#', 'bar', 'highway', 'road', None)
|
||||
|
||||
assert word_table.count_special() == 2
|
||||
|
||||
@ -253,9 +254,31 @@ def test_update_special_phrase_modify(analyzer, word_table, make_standard_name):
|
||||
], True)
|
||||
|
||||
assert word_table.get_special() \
|
||||
== set(((' prison', 'prison', 'amenity', 'prison', 'in'),
|
||||
(' bar', 'bar', 'highway', 'road', None),
|
||||
(' garden', 'garden', 'leisure', 'garden', 'near')))
|
||||
== set(((' #prison#', 'prison', 'amenity', 'prison', 'in'),
|
||||
(' #bar#', 'bar', 'highway', 'road', None),
|
||||
(' #garden#', 'garden', 'leisure', 'garden', 'near')))
|
||||
|
||||
|
||||
def test_add_country_names(analyzer, word_table, make_standard_name):
|
||||
analyzer.add_country_names('de', ['Germany', 'Deutschland', 'germany'])
|
||||
|
||||
assert word_table.get_country() \
|
||||
== {('de', ' #germany#'),
|
||||
('de', ' #deutschland#')}
|
||||
|
||||
|
||||
def test_add_more_country_names(analyzer, word_table, make_standard_name):
|
||||
word_table.add_country('fr', ' #france#')
|
||||
word_table.add_country('it', ' #italy#')
|
||||
word_table.add_country('it', ' #itala#')
|
||||
|
||||
analyzer.add_country_names('it', ['Italy', 'IT'])
|
||||
|
||||
assert word_table.get_country() \
|
||||
== {('fr', ' #france#'),
|
||||
('it', ' #italy#'),
|
||||
('it', ' #itala#'),
|
||||
('it', ' #it#')}
|
||||
|
||||
|
||||
def test_process_place_names(analyzer, make_keywords):
|
||||
|
Loading…
Reference in New Issue
Block a user