Merge pull request #2553 from lonvia/revert-street-matching-to-full-names

Revert street matching to full names
This commit is contained in:
Sarah Hoffmann 2021-12-14 15:52:34 +01:00 committed by GitHub
commit ab6f35d83a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 140 additions and 18 deletions

View File

@ -51,7 +51,7 @@ $$ LANGUAGE SQL IMMUTABLE;
CREATE OR REPLACE FUNCTION token_matches_street(info JSONB, street_tokens INTEGER[]) CREATE OR REPLACE FUNCTION token_matches_street(info JSONB, street_tokens INTEGER[])
RETURNS BOOLEAN RETURNS BOOLEAN
AS $$ AS $$
SELECT (info->>'street')::INTEGER[] <@ street_tokens SELECT (info->>'street')::INTEGER[] && street_tokens
$$ LANGUAGE SQL IMMUTABLE STRICT; $$ LANGUAGE SQL IMMUTABLE STRICT;

View File

@ -409,16 +409,18 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
def _process_place_address(self, token_info, address): def _process_place_address(self, token_info, address):
hnrs = [] hnrs = []
addr_terms = [] addr_terms = []
streets = []
for item in address: for item in address:
if item.kind == 'postcode': if item.kind == 'postcode':
self._add_postcode(item.name) self._add_postcode(item.name)
elif item.kind in ('housenumber', 'streetnumber', 'conscriptionnumber'): elif item.kind in ('housenumber', 'streetnumber', 'conscriptionnumber'):
hnrs.append(item.name) hnrs.append(item.name)
elif item.kind == 'street': elif item.kind == 'street':
token_info.add_street(self._compute_partial_tokens(item.name)) streets.extend(self._retrieve_full_tokens(item.name))
elif item.kind == 'place': elif item.kind == 'place':
token_info.add_place(self._compute_partial_tokens(item.name)) if not item.suffix:
elif not item.kind.startswith('_') and \ token_info.add_place(self._compute_partial_tokens(item.name))
elif not item.kind.startswith('_') and not item.suffix and \
item.kind not in ('country', 'full'): item.kind not in ('country', 'full'):
addr_terms.append((item.kind, self._compute_partial_tokens(item.name))) addr_terms.append((item.kind, self._compute_partial_tokens(item.name)))
@ -429,6 +431,9 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
if addr_terms: if addr_terms:
token_info.add_address_terms(addr_terms) token_info.add_address_terms(addr_terms)
if streets:
token_info.add_street(streets)
def _compute_partial_tokens(self, name): def _compute_partial_tokens(self, name):
""" Normalize the given term, split it into partial words and return """ Normalize the given term, split it into partial words and return
@ -458,6 +463,26 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
return tokens return tokens
def _retrieve_full_tokens(self, name):
""" Get the full name token for the given name, if it exists.
The name is only retrived for the standard analyser.
"""
norm_name = self._search_normalized(name)
# return cached if possible
if norm_name in self._cache.fulls:
return self._cache.fulls[norm_name]
with self.conn.cursor() as cur:
cur.execute("SELECT word_id FROM word WHERE word_token = %s and type = 'W'",
(norm_name, ))
full = [row[0] for row in cur]
self._cache.fulls[norm_name] = full
return full
def _compute_name_tokens(self, names): def _compute_name_tokens(self, names):
""" Computes the full name and partial name tokens for the given """ Computes the full name and partial name tokens for the given
dictionary of names. dictionary of names.
@ -561,8 +586,7 @@ class _TokenInfo:
def add_street(self, tokens): def add_street(self, tokens):
""" Add addr:street match terms. """ Add addr:street match terms.
""" """
if tokens: self.data['street'] = self._mk_array(tokens)
self.data['street'] = self._mk_array(tokens)
def add_place(self, tokens): def add_place(self, tokens):
@ -591,6 +615,7 @@ class _TokenCache:
def __init__(self): def __init__(self):
self.names = {} self.names = {}
self.partials = {} self.partials = {}
self.fulls = {}
self.postcodes = set() self.postcodes = set()
self.housenumbers = {} self.housenumbers = {}

View File

@ -11,8 +11,9 @@
} }
}, },
{ {
"keys" : ["name:prefix", "name:suffix", "name:prefix:*", "name:suffix:*", "keys" : ["*:prefix", "*:suffix", "name:prefix:*", "name:suffix:*",
"name:etymology", "name:signed", "name:botanical", "*wikidata"], "name:etymology", "name:signed", "name:botanical", "*:wikidata",
"addr:street:name", "addr:street:type"],
"values" : { "values" : {
"" : "skip" "" : "skip"
} }

View File

@ -5,8 +5,9 @@
} }
}, },
{ {
"keys" : ["name:prefix", "name:suffix", "name:prefix:*", "name:suffix:*", "keys" : ["*:prefix", "*:suffix", "name:prefix:*", "name:suffix:*",
"name:etymology", "name:signed", "name:botanical", "*wikidata"], "name:etymology", "name:signed", "name:botanical", "*:wikidata",
"addr:street:name", "addr:street:type"],
"values" : { "values" : {
"" : "skip" "" : "skip"
} }

View File

@ -6,8 +6,9 @@
} }
}, },
{ {
"keys" : ["name:prefix", "name:suffix", "name:prefix:*", "name:suffix:*", "keys" : ["*:prefix", "*:suffix", "name:prefix:*", "name:suffix:*",
"name:etymology", "name:signed", "name:botanical", "wikidata", "*:wikidata"], "name:etymology", "name:signed", "name:botanical", "wikidata", "*:wikidata",
"addr:street:name", "addr:street:type"],
"values" : { "values" : {
"" : "extra" "" : "extra"
} }

View File

@ -6,8 +6,9 @@
} }
}, },
{ {
"keys" : ["name:prefix", "name:suffix", "name:prefix:*", "name:suffix:*", "keys" : ["*:prefix", "*:suffix", "name:prefix:*", "name:suffix:*",
"name:etymology", "name:signed", "name:botanical", "wikidata", "*:wikidata"], "name:etymology", "name:signed", "name:botanical", "wikidata", "*:wikidata",
"addr:street:name", "addr:street:type"],
"values" : { "values" : {
"" : "extra" "" : "extra"
} }

View File

@ -5,8 +5,9 @@
} }
}, },
{ {
"keys" : ["name:prefix", "name:suffix", "name:prefix:*", "name:suffix:*", "keys" : ["*:prefix", "*:suffix", "name:prefix:*", "name:suffix:*",
"name:etymology", "name:signed", "name:botanical", "*wikidata"], "name:etymology", "name:signed", "name:botanical", "*:wikidata",
"addr:street:name", "addr:street:type"],
"values" : { "values" : {
"" : "skip" "" : "skip"
} }

View File

@ -1,3 +1,3 @@
[behave] [behave]
show_skipped=False show_skipped=False
tags=~@Fail default_tags=~@Fail

View File

@ -87,6 +87,52 @@ Feature: Parenting of objects
| N3 | W2 | | N3 | W2 |
| N4 | W1 | | N4 | W1 |
@fail-legacy
Scenario: addr:street tag parents to appropriately named street, locale names
Given the scene roads-with-pois
And the places
| osm | class | type | street| addr+street:de | geometry |
| N1 | place | house | south | Süd | :p-N1 |
| N2 | place | house | north | Nord | :p-N2 |
| N3 | place | house | south | Süd | :p-S1 |
| N4 | place | house | north | Nord | :p-S2 |
And the places
| osm | class | type | name | geometry |
| W1 | highway | residential | Nord | :w-north |
| W2 | highway | residential | Süd | :w-south |
And the places
| osm | class | type | name | name+name:old |
| N5 | place | hamlet | south | north |
When importing
Then placex contains
| object | parent_place_id |
| N1 | W2 |
| N2 | W1 |
| N3 | W2 |
| N4 | W1 |
Scenario: addr:street tag parents to appropriately named street with abbreviation
Given the scene roads-with-pois
And the places
| osm | class | type | street| geometry |
| N1 | place | house | south st | :p-N1 |
| N2 | place | house | north st | :p-N2 |
| N3 | place | house | south st | :p-S1 |
| N4 | place | house | north st | :p-S2 |
And the places
| osm | class | type | name+name:en | geometry |
| W1 | highway | residential | north street | :w-north |
| W2 | highway | residential | south street | :w-south |
When importing
Then placex contains
| object | parent_place_id |
| N1 | W2 |
| N2 | W1 |
| N3 | W2 |
| N4 | W1 |
Scenario: addr:street tag parents to next named street Scenario: addr:street tag parents to next named street
Given the scene roads-with-pois Given the scene roads-with-pois
And the places And the places

View File

@ -49,3 +49,9 @@ def before_scenario(context, scenario):
def after_scenario(context, scenario): def after_scenario(context, scenario):
if 'DB' in context.tags: if 'DB' in context.tags:
context.nominatim.teardown_db(context) context.nominatim.teardown_db(context)
def before_tag(context, tag):
if tag == 'fail-legacy':
if context.config.userdata['TOKENIZER'] in (None, 'legacy'):
context.scenario.skip("Not implemented in legacy tokenizer")

View File

@ -471,9 +471,25 @@ class TestPlaceAddress:
def test_process_place_street(self): def test_process_place_street(self):
self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road'}}))
info = self.process_address(street='Grand Road') info = self.process_address(street='Grand Road')
assert eval(info['street']) == self.name_token_set('GRAND', 'ROAD') assert eval(info['street']) == self.name_token_set('#Grand Road')
def test_process_place_nonexisting_street(self):
info = self.process_address(street='Grand Road')
assert 'street' not in info
def test_process_place_multiple_street_tags(self):
self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road',
'ref': '05989'}}))
info = self.process_address(**{'street': 'Grand Road',
'street:sym_ul': '05989'})
assert eval(info['street']) == self.name_token_set('#Grand Road', '#05989')
def test_process_place_street_empty(self): def test_process_place_street_empty(self):
@ -482,12 +498,28 @@ class TestPlaceAddress:
assert 'street' not in info assert 'street' not in info
def test_process_place_street_from_cache(self):
self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road'}}))
self.process_address(street='Grand Road')
# request address again
info = self.process_address(street='Grand Road')
assert eval(info['street']) == self.name_token_set('#Grand Road')
def test_process_place_place(self): def test_process_place_place(self):
info = self.process_address(place='Honu Lulu') info = self.process_address(place='Honu Lulu')
assert eval(info['place']) == self.name_token_set('HONU', 'LULU') assert eval(info['place']) == self.name_token_set('HONU', 'LULU')
def test_process_place_place_extra(self):
info = self.process_address(**{'place:en': 'Honu Lulu'})
assert 'place' not in info
def test_process_place_place_empty(self): def test_process_place_place_empty(self):
info = self.process_address(place='🜵') info = self.process_address(place='🜵')
@ -507,6 +539,14 @@ class TestPlaceAddress:
assert result == {'city': city, 'suburb': city, 'state': state} assert result == {'city': city, 'suburb': city, 'state': state}
def test_process_place_multiple_address_terms(self):
info = self.process_address(**{'city': 'Bruxelles', 'city:de': 'Brüssel'})
result = {k: eval(v) for k,v in info['addr'].items()}
assert result == {'city': self.name_token_set('Bruxelles')}
def test_process_place_address_terms_empty(self): def test_process_place_address_terms_empty(self):
info = self.process_address(country='de', city=' ', street='Hauptstr', info = self.process_address(country='de', city=' ', street='Hauptstr',
full='right behind the church') full='right behind the church')