mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-26 13:27:52 +03:00
Merge pull request #2553 from lonvia/revert-street-matching-to-full-names
Revert street matching to full names
This commit is contained in:
commit
ab6f35d83a
@ -51,7 +51,7 @@ $$ LANGUAGE SQL IMMUTABLE;
|
|||||||
CREATE OR REPLACE FUNCTION token_matches_street(info JSONB, street_tokens INTEGER[])
|
CREATE OR REPLACE FUNCTION token_matches_street(info JSONB, street_tokens INTEGER[])
|
||||||
RETURNS BOOLEAN
|
RETURNS BOOLEAN
|
||||||
AS $$
|
AS $$
|
||||||
SELECT (info->>'street')::INTEGER[] <@ street_tokens
|
SELECT (info->>'street')::INTEGER[] && street_tokens
|
||||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||||
|
|
||||||
|
|
||||||
|
@ -409,16 +409,18 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
|||||||
def _process_place_address(self, token_info, address):
|
def _process_place_address(self, token_info, address):
|
||||||
hnrs = []
|
hnrs = []
|
||||||
addr_terms = []
|
addr_terms = []
|
||||||
|
streets = []
|
||||||
for item in address:
|
for item in address:
|
||||||
if item.kind == 'postcode':
|
if item.kind == 'postcode':
|
||||||
self._add_postcode(item.name)
|
self._add_postcode(item.name)
|
||||||
elif item.kind in ('housenumber', 'streetnumber', 'conscriptionnumber'):
|
elif item.kind in ('housenumber', 'streetnumber', 'conscriptionnumber'):
|
||||||
hnrs.append(item.name)
|
hnrs.append(item.name)
|
||||||
elif item.kind == 'street':
|
elif item.kind == 'street':
|
||||||
token_info.add_street(self._compute_partial_tokens(item.name))
|
streets.extend(self._retrieve_full_tokens(item.name))
|
||||||
elif item.kind == 'place':
|
elif item.kind == 'place':
|
||||||
token_info.add_place(self._compute_partial_tokens(item.name))
|
if not item.suffix:
|
||||||
elif not item.kind.startswith('_') and \
|
token_info.add_place(self._compute_partial_tokens(item.name))
|
||||||
|
elif not item.kind.startswith('_') and not item.suffix and \
|
||||||
item.kind not in ('country', 'full'):
|
item.kind not in ('country', 'full'):
|
||||||
addr_terms.append((item.kind, self._compute_partial_tokens(item.name)))
|
addr_terms.append((item.kind, self._compute_partial_tokens(item.name)))
|
||||||
|
|
||||||
@ -429,6 +431,9 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
|||||||
if addr_terms:
|
if addr_terms:
|
||||||
token_info.add_address_terms(addr_terms)
|
token_info.add_address_terms(addr_terms)
|
||||||
|
|
||||||
|
if streets:
|
||||||
|
token_info.add_street(streets)
|
||||||
|
|
||||||
|
|
||||||
def _compute_partial_tokens(self, name):
|
def _compute_partial_tokens(self, name):
|
||||||
""" Normalize the given term, split it into partial words and return
|
""" Normalize the given term, split it into partial words and return
|
||||||
@ -458,6 +463,26 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
|||||||
return tokens
|
return tokens
|
||||||
|
|
||||||
|
|
||||||
|
def _retrieve_full_tokens(self, name):
|
||||||
|
""" Get the full name token for the given name, if it exists.
|
||||||
|
The name is only retrived for the standard analyser.
|
||||||
|
"""
|
||||||
|
norm_name = self._search_normalized(name)
|
||||||
|
|
||||||
|
# return cached if possible
|
||||||
|
if norm_name in self._cache.fulls:
|
||||||
|
return self._cache.fulls[norm_name]
|
||||||
|
|
||||||
|
with self.conn.cursor() as cur:
|
||||||
|
cur.execute("SELECT word_id FROM word WHERE word_token = %s and type = 'W'",
|
||||||
|
(norm_name, ))
|
||||||
|
full = [row[0] for row in cur]
|
||||||
|
|
||||||
|
self._cache.fulls[norm_name] = full
|
||||||
|
|
||||||
|
return full
|
||||||
|
|
||||||
|
|
||||||
def _compute_name_tokens(self, names):
|
def _compute_name_tokens(self, names):
|
||||||
""" Computes the full name and partial name tokens for the given
|
""" Computes the full name and partial name tokens for the given
|
||||||
dictionary of names.
|
dictionary of names.
|
||||||
@ -561,8 +586,7 @@ class _TokenInfo:
|
|||||||
def add_street(self, tokens):
|
def add_street(self, tokens):
|
||||||
""" Add addr:street match terms.
|
""" Add addr:street match terms.
|
||||||
"""
|
"""
|
||||||
if tokens:
|
self.data['street'] = self._mk_array(tokens)
|
||||||
self.data['street'] = self._mk_array(tokens)
|
|
||||||
|
|
||||||
|
|
||||||
def add_place(self, tokens):
|
def add_place(self, tokens):
|
||||||
@ -591,6 +615,7 @@ class _TokenCache:
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.names = {}
|
self.names = {}
|
||||||
self.partials = {}
|
self.partials = {}
|
||||||
|
self.fulls = {}
|
||||||
self.postcodes = set()
|
self.postcodes = set()
|
||||||
self.housenumbers = {}
|
self.housenumbers = {}
|
||||||
|
|
||||||
|
@ -11,8 +11,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"keys" : ["name:prefix", "name:suffix", "name:prefix:*", "name:suffix:*",
|
"keys" : ["*:prefix", "*:suffix", "name:prefix:*", "name:suffix:*",
|
||||||
"name:etymology", "name:signed", "name:botanical", "*wikidata"],
|
"name:etymology", "name:signed", "name:botanical", "*:wikidata",
|
||||||
|
"addr:street:name", "addr:street:type"],
|
||||||
"values" : {
|
"values" : {
|
||||||
"" : "skip"
|
"" : "skip"
|
||||||
}
|
}
|
||||||
|
@ -5,8 +5,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"keys" : ["name:prefix", "name:suffix", "name:prefix:*", "name:suffix:*",
|
"keys" : ["*:prefix", "*:suffix", "name:prefix:*", "name:suffix:*",
|
||||||
"name:etymology", "name:signed", "name:botanical", "*wikidata"],
|
"name:etymology", "name:signed", "name:botanical", "*:wikidata",
|
||||||
|
"addr:street:name", "addr:street:type"],
|
||||||
"values" : {
|
"values" : {
|
||||||
"" : "skip"
|
"" : "skip"
|
||||||
}
|
}
|
||||||
|
@ -6,8 +6,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"keys" : ["name:prefix", "name:suffix", "name:prefix:*", "name:suffix:*",
|
"keys" : ["*:prefix", "*:suffix", "name:prefix:*", "name:suffix:*",
|
||||||
"name:etymology", "name:signed", "name:botanical", "wikidata", "*:wikidata"],
|
"name:etymology", "name:signed", "name:botanical", "wikidata", "*:wikidata",
|
||||||
|
"addr:street:name", "addr:street:type"],
|
||||||
"values" : {
|
"values" : {
|
||||||
"" : "extra"
|
"" : "extra"
|
||||||
}
|
}
|
||||||
|
@ -6,8 +6,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"keys" : ["name:prefix", "name:suffix", "name:prefix:*", "name:suffix:*",
|
"keys" : ["*:prefix", "*:suffix", "name:prefix:*", "name:suffix:*",
|
||||||
"name:etymology", "name:signed", "name:botanical", "wikidata", "*:wikidata"],
|
"name:etymology", "name:signed", "name:botanical", "wikidata", "*:wikidata",
|
||||||
|
"addr:street:name", "addr:street:type"],
|
||||||
"values" : {
|
"values" : {
|
||||||
"" : "extra"
|
"" : "extra"
|
||||||
}
|
}
|
||||||
|
@ -5,8 +5,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"keys" : ["name:prefix", "name:suffix", "name:prefix:*", "name:suffix:*",
|
"keys" : ["*:prefix", "*:suffix", "name:prefix:*", "name:suffix:*",
|
||||||
"name:etymology", "name:signed", "name:botanical", "*wikidata"],
|
"name:etymology", "name:signed", "name:botanical", "*:wikidata",
|
||||||
|
"addr:street:name", "addr:street:type"],
|
||||||
"values" : {
|
"values" : {
|
||||||
"" : "skip"
|
"" : "skip"
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
[behave]
|
[behave]
|
||||||
show_skipped=False
|
show_skipped=False
|
||||||
tags=~@Fail
|
default_tags=~@Fail
|
||||||
|
@ -87,6 +87,52 @@ Feature: Parenting of objects
|
|||||||
| N3 | W2 |
|
| N3 | W2 |
|
||||||
| N4 | W1 |
|
| N4 | W1 |
|
||||||
|
|
||||||
|
@fail-legacy
|
||||||
|
Scenario: addr:street tag parents to appropriately named street, locale names
|
||||||
|
Given the scene roads-with-pois
|
||||||
|
And the places
|
||||||
|
| osm | class | type | street| addr+street:de | geometry |
|
||||||
|
| N1 | place | house | south | Süd | :p-N1 |
|
||||||
|
| N2 | place | house | north | Nord | :p-N2 |
|
||||||
|
| N3 | place | house | south | Süd | :p-S1 |
|
||||||
|
| N4 | place | house | north | Nord | :p-S2 |
|
||||||
|
And the places
|
||||||
|
| osm | class | type | name | geometry |
|
||||||
|
| W1 | highway | residential | Nord | :w-north |
|
||||||
|
| W2 | highway | residential | Süd | :w-south |
|
||||||
|
And the places
|
||||||
|
| osm | class | type | name | name+name:old |
|
||||||
|
| N5 | place | hamlet | south | north |
|
||||||
|
When importing
|
||||||
|
Then placex contains
|
||||||
|
| object | parent_place_id |
|
||||||
|
| N1 | W2 |
|
||||||
|
| N2 | W1 |
|
||||||
|
| N3 | W2 |
|
||||||
|
| N4 | W1 |
|
||||||
|
|
||||||
|
Scenario: addr:street tag parents to appropriately named street with abbreviation
|
||||||
|
Given the scene roads-with-pois
|
||||||
|
And the places
|
||||||
|
| osm | class | type | street| geometry |
|
||||||
|
| N1 | place | house | south st | :p-N1 |
|
||||||
|
| N2 | place | house | north st | :p-N2 |
|
||||||
|
| N3 | place | house | south st | :p-S1 |
|
||||||
|
| N4 | place | house | north st | :p-S2 |
|
||||||
|
And the places
|
||||||
|
| osm | class | type | name+name:en | geometry |
|
||||||
|
| W1 | highway | residential | north street | :w-north |
|
||||||
|
| W2 | highway | residential | south street | :w-south |
|
||||||
|
When importing
|
||||||
|
Then placex contains
|
||||||
|
| object | parent_place_id |
|
||||||
|
| N1 | W2 |
|
||||||
|
| N2 | W1 |
|
||||||
|
| N3 | W2 |
|
||||||
|
| N4 | W1 |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Scenario: addr:street tag parents to next named street
|
Scenario: addr:street tag parents to next named street
|
||||||
Given the scene roads-with-pois
|
Given the scene roads-with-pois
|
||||||
And the places
|
And the places
|
||||||
|
@ -49,3 +49,9 @@ def before_scenario(context, scenario):
|
|||||||
def after_scenario(context, scenario):
|
def after_scenario(context, scenario):
|
||||||
if 'DB' in context.tags:
|
if 'DB' in context.tags:
|
||||||
context.nominatim.teardown_db(context)
|
context.nominatim.teardown_db(context)
|
||||||
|
|
||||||
|
|
||||||
|
def before_tag(context, tag):
|
||||||
|
if tag == 'fail-legacy':
|
||||||
|
if context.config.userdata['TOKENIZER'] in (None, 'legacy'):
|
||||||
|
context.scenario.skip("Not implemented in legacy tokenizer")
|
||||||
|
@ -471,9 +471,25 @@ class TestPlaceAddress:
|
|||||||
|
|
||||||
|
|
||||||
def test_process_place_street(self):
|
def test_process_place_street(self):
|
||||||
|
self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road'}}))
|
||||||
info = self.process_address(street='Grand Road')
|
info = self.process_address(street='Grand Road')
|
||||||
|
|
||||||
assert eval(info['street']) == self.name_token_set('GRAND', 'ROAD')
|
assert eval(info['street']) == self.name_token_set('#Grand Road')
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_place_nonexisting_street(self):
|
||||||
|
info = self.process_address(street='Grand Road')
|
||||||
|
|
||||||
|
assert 'street' not in info
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_place_multiple_street_tags(self):
|
||||||
|
self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road',
|
||||||
|
'ref': '05989'}}))
|
||||||
|
info = self.process_address(**{'street': 'Grand Road',
|
||||||
|
'street:sym_ul': '05989'})
|
||||||
|
|
||||||
|
assert eval(info['street']) == self.name_token_set('#Grand Road', '#05989')
|
||||||
|
|
||||||
|
|
||||||
def test_process_place_street_empty(self):
|
def test_process_place_street_empty(self):
|
||||||
@ -482,12 +498,28 @@ class TestPlaceAddress:
|
|||||||
assert 'street' not in info
|
assert 'street' not in info
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_place_street_from_cache(self):
|
||||||
|
self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road'}}))
|
||||||
|
self.process_address(street='Grand Road')
|
||||||
|
|
||||||
|
# request address again
|
||||||
|
info = self.process_address(street='Grand Road')
|
||||||
|
|
||||||
|
assert eval(info['street']) == self.name_token_set('#Grand Road')
|
||||||
|
|
||||||
|
|
||||||
def test_process_place_place(self):
|
def test_process_place_place(self):
|
||||||
info = self.process_address(place='Honu Lulu')
|
info = self.process_address(place='Honu Lulu')
|
||||||
|
|
||||||
assert eval(info['place']) == self.name_token_set('HONU', 'LULU')
|
assert eval(info['place']) == self.name_token_set('HONU', 'LULU')
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_place_place_extra(self):
|
||||||
|
info = self.process_address(**{'place:en': 'Honu Lulu'})
|
||||||
|
|
||||||
|
assert 'place' not in info
|
||||||
|
|
||||||
|
|
||||||
def test_process_place_place_empty(self):
|
def test_process_place_place_empty(self):
|
||||||
info = self.process_address(place='🜵')
|
info = self.process_address(place='🜵')
|
||||||
|
|
||||||
@ -507,6 +539,14 @@ class TestPlaceAddress:
|
|||||||
assert result == {'city': city, 'suburb': city, 'state': state}
|
assert result == {'city': city, 'suburb': city, 'state': state}
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_place_multiple_address_terms(self):
|
||||||
|
info = self.process_address(**{'city': 'Bruxelles', 'city:de': 'Brüssel'})
|
||||||
|
|
||||||
|
result = {k: eval(v) for k,v in info['addr'].items()}
|
||||||
|
|
||||||
|
assert result == {'city': self.name_token_set('Bruxelles')}
|
||||||
|
|
||||||
|
|
||||||
def test_process_place_address_terms_empty(self):
|
def test_process_place_address_terms_empty(self):
|
||||||
info = self.process_address(country='de', city=' ', street='Hauptstr',
|
info = self.process_address(country='de', city=' ', street='Hauptstr',
|
||||||
full='right behind the church')
|
full='right behind the church')
|
||||||
|
Loading…
Reference in New Issue
Block a user