Merge pull request #2553 from lonvia/revert-street-matching-to-full-names

Revert street matching to full names
This commit is contained in:
Sarah Hoffmann 2021-12-14 15:52:34 +01:00 committed by GitHub
commit ab6f35d83a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 140 additions and 18 deletions

View File

@ -51,7 +51,7 @@ $$ LANGUAGE SQL IMMUTABLE;
CREATE OR REPLACE FUNCTION token_matches_street(info JSONB, street_tokens INTEGER[])
RETURNS BOOLEAN
AS $$
SELECT (info->>'street')::INTEGER[] <@ street_tokens
SELECT (info->>'street')::INTEGER[] && street_tokens
$$ LANGUAGE SQL IMMUTABLE STRICT;

View File

@ -409,16 +409,18 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
def _process_place_address(self, token_info, address):
hnrs = []
addr_terms = []
streets = []
for item in address:
if item.kind == 'postcode':
self._add_postcode(item.name)
elif item.kind in ('housenumber', 'streetnumber', 'conscriptionnumber'):
hnrs.append(item.name)
elif item.kind == 'street':
token_info.add_street(self._compute_partial_tokens(item.name))
streets.extend(self._retrieve_full_tokens(item.name))
elif item.kind == 'place':
token_info.add_place(self._compute_partial_tokens(item.name))
elif not item.kind.startswith('_') and \
if not item.suffix:
token_info.add_place(self._compute_partial_tokens(item.name))
elif not item.kind.startswith('_') and not item.suffix and \
item.kind not in ('country', 'full'):
addr_terms.append((item.kind, self._compute_partial_tokens(item.name)))
@ -429,6 +431,9 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
if addr_terms:
token_info.add_address_terms(addr_terms)
if streets:
token_info.add_street(streets)
def _compute_partial_tokens(self, name):
""" Normalize the given term, split it into partial words and return
@ -458,6 +463,26 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
return tokens
def _retrieve_full_tokens(self, name):
""" Get the full name token for the given name, if it exists.
The name is only retrived for the standard analyser.
"""
norm_name = self._search_normalized(name)
# return cached if possible
if norm_name in self._cache.fulls:
return self._cache.fulls[norm_name]
with self.conn.cursor() as cur:
cur.execute("SELECT word_id FROM word WHERE word_token = %s and type = 'W'",
(norm_name, ))
full = [row[0] for row in cur]
self._cache.fulls[norm_name] = full
return full
def _compute_name_tokens(self, names):
""" Computes the full name and partial name tokens for the given
dictionary of names.
@ -561,8 +586,7 @@ class _TokenInfo:
def add_street(self, tokens):
""" Add addr:street match terms.
"""
if tokens:
self.data['street'] = self._mk_array(tokens)
self.data['street'] = self._mk_array(tokens)
def add_place(self, tokens):
@ -591,6 +615,7 @@ class _TokenCache:
def __init__(self):
self.names = {}
self.partials = {}
self.fulls = {}
self.postcodes = set()
self.housenumbers = {}

View File

@ -11,8 +11,9 @@
}
},
{
"keys" : ["name:prefix", "name:suffix", "name:prefix:*", "name:suffix:*",
"name:etymology", "name:signed", "name:botanical", "*wikidata"],
"keys" : ["*:prefix", "*:suffix", "name:prefix:*", "name:suffix:*",
"name:etymology", "name:signed", "name:botanical", "*:wikidata",
"addr:street:name", "addr:street:type"],
"values" : {
"" : "skip"
}

View File

@ -5,8 +5,9 @@
}
},
{
"keys" : ["name:prefix", "name:suffix", "name:prefix:*", "name:suffix:*",
"name:etymology", "name:signed", "name:botanical", "*wikidata"],
"keys" : ["*:prefix", "*:suffix", "name:prefix:*", "name:suffix:*",
"name:etymology", "name:signed", "name:botanical", "*:wikidata",
"addr:street:name", "addr:street:type"],
"values" : {
"" : "skip"
}

View File

@ -6,8 +6,9 @@
}
},
{
"keys" : ["name:prefix", "name:suffix", "name:prefix:*", "name:suffix:*",
"name:etymology", "name:signed", "name:botanical", "wikidata", "*:wikidata"],
"keys" : ["*:prefix", "*:suffix", "name:prefix:*", "name:suffix:*",
"name:etymology", "name:signed", "name:botanical", "wikidata", "*:wikidata",
"addr:street:name", "addr:street:type"],
"values" : {
"" : "extra"
}

View File

@ -6,8 +6,9 @@
}
},
{
"keys" : ["name:prefix", "name:suffix", "name:prefix:*", "name:suffix:*",
"name:etymology", "name:signed", "name:botanical", "wikidata", "*:wikidata"],
"keys" : ["*:prefix", "*:suffix", "name:prefix:*", "name:suffix:*",
"name:etymology", "name:signed", "name:botanical", "wikidata", "*:wikidata",
"addr:street:name", "addr:street:type"],
"values" : {
"" : "extra"
}

View File

@ -5,8 +5,9 @@
}
},
{
"keys" : ["name:prefix", "name:suffix", "name:prefix:*", "name:suffix:*",
"name:etymology", "name:signed", "name:botanical", "*wikidata"],
"keys" : ["*:prefix", "*:suffix", "name:prefix:*", "name:suffix:*",
"name:etymology", "name:signed", "name:botanical", "*:wikidata",
"addr:street:name", "addr:street:type"],
"values" : {
"" : "skip"
}

View File

@ -1,3 +1,3 @@
[behave]
show_skipped=False
tags=~@Fail
default_tags=~@Fail

View File

@ -87,6 +87,52 @@ Feature: Parenting of objects
| N3 | W2 |
| N4 | W1 |
@fail-legacy
Scenario: addr:street tag parents to appropriately named street, locale names
Given the scene roads-with-pois
And the places
| osm | class | type | street| addr+street:de | geometry |
| N1 | place | house | south | Süd | :p-N1 |
| N2 | place | house | north | Nord | :p-N2 |
| N3 | place | house | south | Süd | :p-S1 |
| N4 | place | house | north | Nord | :p-S2 |
And the places
| osm | class | type | name | geometry |
| W1 | highway | residential | Nord | :w-north |
| W2 | highway | residential | Süd | :w-south |
And the places
| osm | class | type | name | name+name:old |
| N5 | place | hamlet | south | north |
When importing
Then placex contains
| object | parent_place_id |
| N1 | W2 |
| N2 | W1 |
| N3 | W2 |
| N4 | W1 |
Scenario: addr:street tag parents to appropriately named street with abbreviation
Given the scene roads-with-pois
And the places
| osm | class | type | street| geometry |
| N1 | place | house | south st | :p-N1 |
| N2 | place | house | north st | :p-N2 |
| N3 | place | house | south st | :p-S1 |
| N4 | place | house | north st | :p-S2 |
And the places
| osm | class | type | name+name:en | geometry |
| W1 | highway | residential | north street | :w-north |
| W2 | highway | residential | south street | :w-south |
When importing
Then placex contains
| object | parent_place_id |
| N1 | W2 |
| N2 | W1 |
| N3 | W2 |
| N4 | W1 |
Scenario: addr:street tag parents to next named street
Given the scene roads-with-pois
And the places

View File

@ -49,3 +49,9 @@ def before_scenario(context, scenario):
def after_scenario(context, scenario):
if 'DB' in context.tags:
context.nominatim.teardown_db(context)
def before_tag(context, tag):
if tag == 'fail-legacy':
if context.config.userdata['TOKENIZER'] in (None, 'legacy'):
context.scenario.skip("Not implemented in legacy tokenizer")

View File

@ -471,9 +471,25 @@ class TestPlaceAddress:
def test_process_place_street(self):
self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road'}}))
info = self.process_address(street='Grand Road')
assert eval(info['street']) == self.name_token_set('GRAND', 'ROAD')
assert eval(info['street']) == self.name_token_set('#Grand Road')
def test_process_place_nonexisting_street(self):
info = self.process_address(street='Grand Road')
assert 'street' not in info
def test_process_place_multiple_street_tags(self):
self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road',
'ref': '05989'}}))
info = self.process_address(**{'street': 'Grand Road',
'street:sym_ul': '05989'})
assert eval(info['street']) == self.name_token_set('#Grand Road', '#05989')
def test_process_place_street_empty(self):
@ -482,12 +498,28 @@ class TestPlaceAddress:
assert 'street' not in info
def test_process_place_street_from_cache(self):
self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road'}}))
self.process_address(street='Grand Road')
# request address again
info = self.process_address(street='Grand Road')
assert eval(info['street']) == self.name_token_set('#Grand Road')
def test_process_place_place(self):
info = self.process_address(place='Honu Lulu')
assert eval(info['place']) == self.name_token_set('HONU', 'LULU')
def test_process_place_place_extra(self):
info = self.process_address(**{'place:en': 'Honu Lulu'})
assert 'place' not in info
def test_process_place_place_empty(self):
info = self.process_address(place='🜵')
@ -507,6 +539,14 @@ class TestPlaceAddress:
assert result == {'city': city, 'suburb': city, 'state': state}
def test_process_place_multiple_address_terms(self):
info = self.process_address(**{'city': 'Bruxelles', 'city:de': 'Brüssel'})
result = {k: eval(v) for k,v in info['addr'].items()}
assert result == {'city': self.name_token_set('Bruxelles')}
def test_process_place_address_terms_empty(self):
info = self.process_address(country='de', city=' ', street='Hauptstr',
full='right behind the church')