mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-12-25 05:52:32 +03:00
revert to using full names for street name matching
Using partial names turned out to not work well because there are often similarly named streets next to each other. It also prevents us from being able to take into account all addr:street:* tags. This change gets all the full term tokens for the addr:street tags from the DB. As they are used for matching only, we can assume that the term must already be there or there will be no match. This avoid creating unused full name tags.
This commit is contained in:
parent
bb175cc958
commit
44cfce1ca4
@ -409,13 +409,16 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
|||||||
def _process_place_address(self, token_info, address):
|
def _process_place_address(self, token_info, address):
|
||||||
hnrs = []
|
hnrs = []
|
||||||
addr_terms = []
|
addr_terms = []
|
||||||
|
streets = []
|
||||||
for item in address:
|
for item in address:
|
||||||
if item.kind == 'postcode':
|
if item.kind == 'postcode':
|
||||||
self._add_postcode(item.name)
|
self._add_postcode(item.name)
|
||||||
elif item.kind in ('housenumber', 'streetnumber', 'conscriptionnumber'):
|
elif item.kind in ('housenumber', 'streetnumber', 'conscriptionnumber'):
|
||||||
hnrs.append(item.name)
|
hnrs.append(item.name)
|
||||||
elif item.kind == 'street':
|
elif item.kind == 'street':
|
||||||
token_info.add_street(self._compute_partial_tokens(item.name))
|
token = self._retrieve_full_token(item.name)
|
||||||
|
if token:
|
||||||
|
streets.append(token)
|
||||||
elif item.kind == 'place':
|
elif item.kind == 'place':
|
||||||
token_info.add_place(self._compute_partial_tokens(item.name))
|
token_info.add_place(self._compute_partial_tokens(item.name))
|
||||||
elif not item.kind.startswith('_') and \
|
elif not item.kind.startswith('_') and \
|
||||||
@ -429,6 +432,9 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
|||||||
if addr_terms:
|
if addr_terms:
|
||||||
token_info.add_address_terms(addr_terms)
|
token_info.add_address_terms(addr_terms)
|
||||||
|
|
||||||
|
if streets:
|
||||||
|
token_info.add_street(streets)
|
||||||
|
|
||||||
|
|
||||||
def _compute_partial_tokens(self, name):
|
def _compute_partial_tokens(self, name):
|
||||||
""" Normalize the given term, split it into partial words and return
|
""" Normalize the given term, split it into partial words and return
|
||||||
@ -458,6 +464,31 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
|||||||
return tokens
|
return tokens
|
||||||
|
|
||||||
|
|
||||||
|
def _retrieve_full_token(self, name):
|
||||||
|
""" Get the full name token for the given name, if it exists.
|
||||||
|
The name is only retrived for the standard analyser.
|
||||||
|
"""
|
||||||
|
norm_name = self._normalized(name)
|
||||||
|
|
||||||
|
# return cached if possible
|
||||||
|
if norm_name in self._cache.fulls:
|
||||||
|
return self._cache.fulls[norm_name]
|
||||||
|
|
||||||
|
# otherwise compute
|
||||||
|
full, _ = self._cache.names.get(norm_name, (None, None))
|
||||||
|
|
||||||
|
if full is None:
|
||||||
|
with self.conn.cursor() as cur:
|
||||||
|
cur.execute("SELECT word_id FROM word WHERE word = %s and type = 'W' LIMIT 1",
|
||||||
|
(norm_name, ))
|
||||||
|
if cur.rowcount > 0:
|
||||||
|
full = cur.fetchone()[0]
|
||||||
|
|
||||||
|
self._cache.fulls[norm_name] = full
|
||||||
|
|
||||||
|
return full
|
||||||
|
|
||||||
|
|
||||||
def _compute_name_tokens(self, names):
|
def _compute_name_tokens(self, names):
|
||||||
""" Computes the full name and partial name tokens for the given
|
""" Computes the full name and partial name tokens for the given
|
||||||
dictionary of names.
|
dictionary of names.
|
||||||
@ -561,8 +592,7 @@ class _TokenInfo:
|
|||||||
def add_street(self, tokens):
|
def add_street(self, tokens):
|
||||||
""" Add addr:street match terms.
|
""" Add addr:street match terms.
|
||||||
"""
|
"""
|
||||||
if tokens:
|
self.data['street'] = self._mk_array(tokens)
|
||||||
self.data['street'] = self._mk_array(tokens)
|
|
||||||
|
|
||||||
|
|
||||||
def add_place(self, tokens):
|
def add_place(self, tokens):
|
||||||
@ -591,6 +621,7 @@ class _TokenCache:
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.names = {}
|
self.names = {}
|
||||||
self.partials = {}
|
self.partials = {}
|
||||||
|
self.fulls = {}
|
||||||
self.postcodes = set()
|
self.postcodes = set()
|
||||||
self.housenumbers = {}
|
self.housenumbers = {}
|
||||||
|
|
||||||
|
@ -471,9 +471,25 @@ class TestPlaceAddress:
|
|||||||
|
|
||||||
|
|
||||||
def test_process_place_street(self):
|
def test_process_place_street(self):
|
||||||
|
self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road'}}))
|
||||||
info = self.process_address(street='Grand Road')
|
info = self.process_address(street='Grand Road')
|
||||||
|
|
||||||
assert eval(info['street']) == self.name_token_set('GRAND', 'ROAD')
|
assert eval(info['street']) == self.name_token_set('#Grand Road')
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_place_nonexisting_street(self):
|
||||||
|
info = self.process_address(street='Grand Road')
|
||||||
|
|
||||||
|
assert 'street' not in info
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_place_multiple_street_tags(self):
|
||||||
|
self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road',
|
||||||
|
'ref': '05989'}}))
|
||||||
|
info = self.process_address(**{'street': 'Grand Road',
|
||||||
|
'street:sym_ul': '05989'})
|
||||||
|
|
||||||
|
assert eval(info['street']) == self.name_token_set('#Grand Road', '#05989')
|
||||||
|
|
||||||
|
|
||||||
def test_process_place_street_empty(self):
|
def test_process_place_street_empty(self):
|
||||||
@ -482,6 +498,16 @@ class TestPlaceAddress:
|
|||||||
assert 'street' not in info
|
assert 'street' not in info
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_place_street_from_cache(self):
|
||||||
|
self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road'}}))
|
||||||
|
self.process_address(street='Grand Road')
|
||||||
|
|
||||||
|
# request address again
|
||||||
|
info = self.process_address(street='Grand Road')
|
||||||
|
|
||||||
|
assert eval(info['street']) == self.name_token_set('#Grand Road')
|
||||||
|
|
||||||
|
|
||||||
def test_process_place_place(self):
|
def test_process_place_place(self):
|
||||||
info = self.process_address(place='Honu Lulu')
|
info = self.process_address(place='Honu Lulu')
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user