mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-23 05:35:13 +03:00
reorganise address iteration in tokenizer
This commit is contained in:
parent
0da481f207
commit
0ba93e5ba9
@ -37,7 +37,7 @@ $$ LANGUAGE SQL IMMUTABLE STRICT;
|
|||||||
CREATE OR REPLACE FUNCTION token_addr_street_match_tokens(info JSONB)
|
CREATE OR REPLACE FUNCTION token_addr_street_match_tokens(info JSONB)
|
||||||
RETURNS INTEGER[]
|
RETURNS INTEGER[]
|
||||||
AS $$
|
AS $$
|
||||||
SELECT (info->>'street_match')::INTEGER[]
|
SELECT (info->>'street')::INTEGER[]
|
||||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||||
|
|
||||||
|
|
||||||
|
@ -236,11 +236,26 @@ class LegacyNameAnalyzer:
|
|||||||
address = place.get('address')
|
address = place.get('address')
|
||||||
|
|
||||||
if address:
|
if address:
|
||||||
self._add_postcode(address.get('postcode'))
|
hnrs = []
|
||||||
token_info.add_housenumbers(self.conn, address)
|
addr_terms = []
|
||||||
token_info.add_address_parent(self.conn, address.get('street'),
|
for key, value in address.items():
|
||||||
address.get('place'))
|
if key == 'postcode':
|
||||||
token_info.add_address_parts(self.conn, address)
|
self._add_postcode(value)
|
||||||
|
elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'):
|
||||||
|
hnrs.append(value)
|
||||||
|
elif key == 'street':
|
||||||
|
token_info.add_street(self.conn, value)
|
||||||
|
elif key == 'place':
|
||||||
|
token_info.add_place(self.conn, value)
|
||||||
|
elif not key.startswith('_') and \
|
||||||
|
key not in ('country', 'full'):
|
||||||
|
addr_terms.append((key, value))
|
||||||
|
|
||||||
|
if hnrs:
|
||||||
|
token_info.add_housenumbers(self.conn, hnrs)
|
||||||
|
|
||||||
|
if addr_terms:
|
||||||
|
token_info.add_address_terms(self.conn, addr_terms)
|
||||||
|
|
||||||
return token_info.data
|
return token_info.data
|
||||||
|
|
||||||
@ -248,14 +263,12 @@ class LegacyNameAnalyzer:
|
|||||||
def _add_postcode(self, postcode):
|
def _add_postcode(self, postcode):
|
||||||
""" Make sure the normalized postcode is present in the word table.
|
""" Make sure the normalized postcode is present in the word table.
|
||||||
"""
|
"""
|
||||||
if not postcode or re.search(r'[:,;]', postcode) is not None:
|
|
||||||
return
|
|
||||||
|
|
||||||
def _create_postcode_from_db(pcode):
|
def _create_postcode_from_db(pcode):
|
||||||
with self.conn.cursor() as cur:
|
with self.conn.cursor() as cur:
|
||||||
cur.execute('SELECT create_postcode_id(%s)', (pcode, ))
|
cur.execute('SELECT create_postcode_id(%s)', (pcode, ))
|
||||||
|
|
||||||
self._cache.postcodes.get(postcode.strip().upper(), _create_postcode_from_db)
|
if re.search(r'[:,;]', postcode) is None:
|
||||||
|
self._cache.postcodes.get(postcode.strip().upper(), _create_postcode_from_db)
|
||||||
|
|
||||||
|
|
||||||
class _TokenInfo:
|
class _TokenInfo:
|
||||||
@ -283,15 +296,9 @@ class _TokenInfo:
|
|||||||
(names, country_feature.lower()))
|
(names, country_feature.lower()))
|
||||||
|
|
||||||
|
|
||||||
def add_housenumbers(self, conn, address):
|
def add_housenumbers(self, conn, hnrs):
|
||||||
""" Extract housenumber information from the address.
|
""" Extract housenumber information from the address.
|
||||||
"""
|
"""
|
||||||
hnrs = [v for k, v in address.items()
|
|
||||||
if k in ('housenumber', 'streetnumber', 'conscriptionnumber')]
|
|
||||||
|
|
||||||
if not hnrs:
|
|
||||||
return
|
|
||||||
|
|
||||||
if len(hnrs) == 1:
|
if len(hnrs) == 1:
|
||||||
token = self.cache.get_housenumber(hnrs[0])
|
token = self.cache.get_housenumber(hnrs[0])
|
||||||
if token is not None:
|
if token is not None:
|
||||||
@ -312,27 +319,32 @@ class _TokenInfo:
|
|||||||
self.data['hnr_tokens'], self.data['hnr'] = cur.fetchone()
|
self.data['hnr_tokens'], self.data['hnr'] = cur.fetchone()
|
||||||
|
|
||||||
|
|
||||||
def add_address_parent(self, conn, street, place):
|
def add_street(self, conn, street):
|
||||||
""" Extract the tokens for street and place terms.
|
""" Add addr:street match terms.
|
||||||
"""
|
"""
|
||||||
def _get_streetplace(name):
|
def _get_street(name):
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
return cur.scalar("SELECT word_ids_from_name(%s)::text", (name, ))
|
||||||
|
|
||||||
|
self.data['street'] = self.cache.streets.get(street, _get_street)
|
||||||
|
|
||||||
|
|
||||||
|
def add_place(self, conn, place):
|
||||||
|
""" Add addr:place search and match terms.
|
||||||
|
"""
|
||||||
|
def _get_place(name):
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.execute("""SELECT (addr_ids_from_name(%s) || getorcreate_name_id(make_standard_name(%s), ''))::text,
|
cur.execute("""SELECT (addr_ids_from_name(%s) || getorcreate_name_id(make_standard_name(%s), ''))::text,
|
||||||
word_ids_from_name(%s)::text""",
|
word_ids_from_name(%s)::text""",
|
||||||
(name, name, name))
|
(name, name, name))
|
||||||
return cur.fetchone()
|
return cur.fetchone()
|
||||||
|
|
||||||
if street:
|
self.data['place_search'], self.data['place_match'] = \
|
||||||
self.data['street_search'], self.data['street_match'] = \
|
self.cache.places.get(place, _get_place)
|
||||||
self.cache.streets.get(street, _get_streetplace)
|
|
||||||
|
|
||||||
if place:
|
|
||||||
self.data['place_search'], self.data['place_match'] = \
|
|
||||||
self.cache.streets.get(place, _get_streetplace)
|
|
||||||
|
|
||||||
|
|
||||||
def add_address_parts(self, conn, address):
|
def add_address_terms(self, conn, terms):
|
||||||
""" Extract address terms.
|
""" Add additional address terms.
|
||||||
"""
|
"""
|
||||||
def _get_address_term(name):
|
def _get_address_term(name):
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
@ -342,14 +354,10 @@ class _TokenInfo:
|
|||||||
return cur.fetchone()
|
return cur.fetchone()
|
||||||
|
|
||||||
tokens = {}
|
tokens = {}
|
||||||
for key, value in address.items():
|
for key, value in terms:
|
||||||
if not key.startswith('_') and \
|
tokens[key] = self.cache.address_terms.get(value, _get_address_term)
|
||||||
key not in ('country', 'street', 'place', 'postcode', 'full',
|
|
||||||
'housenumber', 'streetnumber', 'conscriptionnumber'):
|
|
||||||
tokens[key] = self.cache.address_terms.get(value, _get_address_term)
|
|
||||||
|
|
||||||
if tokens:
|
self.data['addr'] = tokens
|
||||||
self.data['addr'] = tokens
|
|
||||||
|
|
||||||
|
|
||||||
class _LRU:
|
class _LRU:
|
||||||
|
Loading…
Reference in New Issue
Block a user