mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-22 12:06:27 +03:00
Merge pull request #2553 from lonvia/revert-street-matching-to-full-names
Revert street matching to full names
This commit is contained in:
commit
ab6f35d83a
@ -51,7 +51,7 @@ $$ LANGUAGE SQL IMMUTABLE;
|
||||
CREATE OR REPLACE FUNCTION token_matches_street(info JSONB, street_tokens INTEGER[])
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT (info->>'street')::INTEGER[] <@ street_tokens
|
||||
SELECT (info->>'street')::INTEGER[] && street_tokens
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
|
@ -409,16 +409,18 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
||||
def _process_place_address(self, token_info, address):
|
||||
hnrs = []
|
||||
addr_terms = []
|
||||
streets = []
|
||||
for item in address:
|
||||
if item.kind == 'postcode':
|
||||
self._add_postcode(item.name)
|
||||
elif item.kind in ('housenumber', 'streetnumber', 'conscriptionnumber'):
|
||||
hnrs.append(item.name)
|
||||
elif item.kind == 'street':
|
||||
token_info.add_street(self._compute_partial_tokens(item.name))
|
||||
streets.extend(self._retrieve_full_tokens(item.name))
|
||||
elif item.kind == 'place':
|
||||
token_info.add_place(self._compute_partial_tokens(item.name))
|
||||
elif not item.kind.startswith('_') and \
|
||||
if not item.suffix:
|
||||
token_info.add_place(self._compute_partial_tokens(item.name))
|
||||
elif not item.kind.startswith('_') and not item.suffix and \
|
||||
item.kind not in ('country', 'full'):
|
||||
addr_terms.append((item.kind, self._compute_partial_tokens(item.name)))
|
||||
|
||||
@ -429,6 +431,9 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
||||
if addr_terms:
|
||||
token_info.add_address_terms(addr_terms)
|
||||
|
||||
if streets:
|
||||
token_info.add_street(streets)
|
||||
|
||||
|
||||
def _compute_partial_tokens(self, name):
|
||||
""" Normalize the given term, split it into partial words and return
|
||||
@ -458,6 +463,26 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
||||
return tokens
|
||||
|
||||
|
||||
def _retrieve_full_tokens(self, name):
|
||||
""" Get the full name token for the given name, if it exists.
|
||||
The name is only retrived for the standard analyser.
|
||||
"""
|
||||
norm_name = self._search_normalized(name)
|
||||
|
||||
# return cached if possible
|
||||
if norm_name in self._cache.fulls:
|
||||
return self._cache.fulls[norm_name]
|
||||
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("SELECT word_id FROM word WHERE word_token = %s and type = 'W'",
|
||||
(norm_name, ))
|
||||
full = [row[0] for row in cur]
|
||||
|
||||
self._cache.fulls[norm_name] = full
|
||||
|
||||
return full
|
||||
|
||||
|
||||
def _compute_name_tokens(self, names):
|
||||
""" Computes the full name and partial name tokens for the given
|
||||
dictionary of names.
|
||||
@ -561,8 +586,7 @@ class _TokenInfo:
|
||||
def add_street(self, tokens):
|
||||
""" Add addr:street match terms.
|
||||
"""
|
||||
if tokens:
|
||||
self.data['street'] = self._mk_array(tokens)
|
||||
self.data['street'] = self._mk_array(tokens)
|
||||
|
||||
|
||||
def add_place(self, tokens):
|
||||
@ -591,6 +615,7 @@ class _TokenCache:
|
||||
def __init__(self):
|
||||
self.names = {}
|
||||
self.partials = {}
|
||||
self.fulls = {}
|
||||
self.postcodes = set()
|
||||
self.housenumbers = {}
|
||||
|
||||
|
@ -11,8 +11,9 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"keys" : ["name:prefix", "name:suffix", "name:prefix:*", "name:suffix:*",
|
||||
"name:etymology", "name:signed", "name:botanical", "*wikidata"],
|
||||
"keys" : ["*:prefix", "*:suffix", "name:prefix:*", "name:suffix:*",
|
||||
"name:etymology", "name:signed", "name:botanical", "*:wikidata",
|
||||
"addr:street:name", "addr:street:type"],
|
||||
"values" : {
|
||||
"" : "skip"
|
||||
}
|
||||
|
@ -5,8 +5,9 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"keys" : ["name:prefix", "name:suffix", "name:prefix:*", "name:suffix:*",
|
||||
"name:etymology", "name:signed", "name:botanical", "*wikidata"],
|
||||
"keys" : ["*:prefix", "*:suffix", "name:prefix:*", "name:suffix:*",
|
||||
"name:etymology", "name:signed", "name:botanical", "*:wikidata",
|
||||
"addr:street:name", "addr:street:type"],
|
||||
"values" : {
|
||||
"" : "skip"
|
||||
}
|
||||
|
@ -6,8 +6,9 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"keys" : ["name:prefix", "name:suffix", "name:prefix:*", "name:suffix:*",
|
||||
"name:etymology", "name:signed", "name:botanical", "wikidata", "*:wikidata"],
|
||||
"keys" : ["*:prefix", "*:suffix", "name:prefix:*", "name:suffix:*",
|
||||
"name:etymology", "name:signed", "name:botanical", "wikidata", "*:wikidata",
|
||||
"addr:street:name", "addr:street:type"],
|
||||
"values" : {
|
||||
"" : "extra"
|
||||
}
|
||||
|
@ -6,8 +6,9 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"keys" : ["name:prefix", "name:suffix", "name:prefix:*", "name:suffix:*",
|
||||
"name:etymology", "name:signed", "name:botanical", "wikidata", "*:wikidata"],
|
||||
"keys" : ["*:prefix", "*:suffix", "name:prefix:*", "name:suffix:*",
|
||||
"name:etymology", "name:signed", "name:botanical", "wikidata", "*:wikidata",
|
||||
"addr:street:name", "addr:street:type"],
|
||||
"values" : {
|
||||
"" : "extra"
|
||||
}
|
||||
|
@ -5,8 +5,9 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"keys" : ["name:prefix", "name:suffix", "name:prefix:*", "name:suffix:*",
|
||||
"name:etymology", "name:signed", "name:botanical", "*wikidata"],
|
||||
"keys" : ["*:prefix", "*:suffix", "name:prefix:*", "name:suffix:*",
|
||||
"name:etymology", "name:signed", "name:botanical", "*:wikidata",
|
||||
"addr:street:name", "addr:street:type"],
|
||||
"values" : {
|
||||
"" : "skip"
|
||||
}
|
||||
|
@ -1,3 +1,3 @@
|
||||
[behave]
|
||||
show_skipped=False
|
||||
tags=~@Fail
|
||||
default_tags=~@Fail
|
||||
|
@ -87,6 +87,52 @@ Feature: Parenting of objects
|
||||
| N3 | W2 |
|
||||
| N4 | W1 |
|
||||
|
||||
@fail-legacy
|
||||
Scenario: addr:street tag parents to appropriately named street, locale names
|
||||
Given the scene roads-with-pois
|
||||
And the places
|
||||
| osm | class | type | street| addr+street:de | geometry |
|
||||
| N1 | place | house | south | Süd | :p-N1 |
|
||||
| N2 | place | house | north | Nord | :p-N2 |
|
||||
| N3 | place | house | south | Süd | :p-S1 |
|
||||
| N4 | place | house | north | Nord | :p-S2 |
|
||||
And the places
|
||||
| osm | class | type | name | geometry |
|
||||
| W1 | highway | residential | Nord | :w-north |
|
||||
| W2 | highway | residential | Süd | :w-south |
|
||||
And the places
|
||||
| osm | class | type | name | name+name:old |
|
||||
| N5 | place | hamlet | south | north |
|
||||
When importing
|
||||
Then placex contains
|
||||
| object | parent_place_id |
|
||||
| N1 | W2 |
|
||||
| N2 | W1 |
|
||||
| N3 | W2 |
|
||||
| N4 | W1 |
|
||||
|
||||
Scenario: addr:street tag parents to appropriately named street with abbreviation
|
||||
Given the scene roads-with-pois
|
||||
And the places
|
||||
| osm | class | type | street| geometry |
|
||||
| N1 | place | house | south st | :p-N1 |
|
||||
| N2 | place | house | north st | :p-N2 |
|
||||
| N3 | place | house | south st | :p-S1 |
|
||||
| N4 | place | house | north st | :p-S2 |
|
||||
And the places
|
||||
| osm | class | type | name+name:en | geometry |
|
||||
| W1 | highway | residential | north street | :w-north |
|
||||
| W2 | highway | residential | south street | :w-south |
|
||||
When importing
|
||||
Then placex contains
|
||||
| object | parent_place_id |
|
||||
| N1 | W2 |
|
||||
| N2 | W1 |
|
||||
| N3 | W2 |
|
||||
| N4 | W1 |
|
||||
|
||||
|
||||
|
||||
Scenario: addr:street tag parents to next named street
|
||||
Given the scene roads-with-pois
|
||||
And the places
|
||||
|
@ -49,3 +49,9 @@ def before_scenario(context, scenario):
|
||||
def after_scenario(context, scenario):
|
||||
if 'DB' in context.tags:
|
||||
context.nominatim.teardown_db(context)
|
||||
|
||||
|
||||
def before_tag(context, tag):
|
||||
if tag == 'fail-legacy':
|
||||
if context.config.userdata['TOKENIZER'] in (None, 'legacy'):
|
||||
context.scenario.skip("Not implemented in legacy tokenizer")
|
||||
|
@ -471,9 +471,25 @@ class TestPlaceAddress:
|
||||
|
||||
|
||||
def test_process_place_street(self):
|
||||
self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road'}}))
|
||||
info = self.process_address(street='Grand Road')
|
||||
|
||||
assert eval(info['street']) == self.name_token_set('GRAND', 'ROAD')
|
||||
assert eval(info['street']) == self.name_token_set('#Grand Road')
|
||||
|
||||
|
||||
def test_process_place_nonexisting_street(self):
|
||||
info = self.process_address(street='Grand Road')
|
||||
|
||||
assert 'street' not in info
|
||||
|
||||
|
||||
def test_process_place_multiple_street_tags(self):
|
||||
self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road',
|
||||
'ref': '05989'}}))
|
||||
info = self.process_address(**{'street': 'Grand Road',
|
||||
'street:sym_ul': '05989'})
|
||||
|
||||
assert eval(info['street']) == self.name_token_set('#Grand Road', '#05989')
|
||||
|
||||
|
||||
def test_process_place_street_empty(self):
|
||||
@ -482,12 +498,28 @@ class TestPlaceAddress:
|
||||
assert 'street' not in info
|
||||
|
||||
|
||||
def test_process_place_street_from_cache(self):
|
||||
self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road'}}))
|
||||
self.process_address(street='Grand Road')
|
||||
|
||||
# request address again
|
||||
info = self.process_address(street='Grand Road')
|
||||
|
||||
assert eval(info['street']) == self.name_token_set('#Grand Road')
|
||||
|
||||
|
||||
def test_process_place_place(self):
|
||||
info = self.process_address(place='Honu Lulu')
|
||||
|
||||
assert eval(info['place']) == self.name_token_set('HONU', 'LULU')
|
||||
|
||||
|
||||
def test_process_place_place_extra(self):
|
||||
info = self.process_address(**{'place:en': 'Honu Lulu'})
|
||||
|
||||
assert 'place' not in info
|
||||
|
||||
|
||||
def test_process_place_place_empty(self):
|
||||
info = self.process_address(place='🜵')
|
||||
|
||||
@ -507,6 +539,14 @@ class TestPlaceAddress:
|
||||
assert result == {'city': city, 'suburb': city, 'state': state}
|
||||
|
||||
|
||||
def test_process_place_multiple_address_terms(self):
|
||||
info = self.process_address(**{'city': 'Bruxelles', 'city:de': 'Brüssel'})
|
||||
|
||||
result = {k: eval(v) for k,v in info['addr'].items()}
|
||||
|
||||
assert result == {'city': self.name_token_set('Bruxelles')}
|
||||
|
||||
|
||||
def test_process_place_address_terms_empty(self):
|
||||
info = self.process_address(country='de', city=' ', street='Hauptstr',
|
||||
full='right behind the church')
|
||||
|
Loading…
Reference in New Issue
Block a user