add support for search by houenumber

This commit is contained in:
Sarah Hoffmann 2023-05-26 14:10:57 +02:00
parent 964bc7fbe0
commit 146a0b29c0
3 changed files with 43 additions and 4 deletions

View File

@ -11,7 +11,7 @@ from typing import Optional, List, Tuple, Iterator
import heapq
from nominatim.api.types import SearchDetails, DataLayer
from nominatim.api.search.query import QueryStruct, TokenType, TokenRange, BreakType
from nominatim.api.search.query import QueryStruct, Token, TokenType, TokenRange, BreakType
from nominatim.api.search.token_assignment import TokenAssignment
import nominatim.api.search.db_search_fields as dbf
import nominatim.api.search.db_searches as dbs
@ -97,6 +97,10 @@ class SearchBuilder:
sdata.qualifiers = categories
categories = None
builder = self.build_poi_search(sdata)
elif assignment.housenumber:
hnr_tokens = self.query.get_tokens(assignment.housenumber,
TokenType.HOUSENUMBER)
builder = self.build_housenumber_search(sdata, hnr_tokens, assignment.address)
else:
builder = self.build_special_search(sdata, assignment.address,
bool(categories))
@ -128,8 +132,8 @@ class SearchBuilder:
""" Build abstract search queries for searches that do not involve
a named place.
"""
if sdata.qualifiers or sdata.housenumbers:
# No special searches over housenumbers or qualifiers supported.
if sdata.qualifiers:
# No special searches over qualifiers supported.
return
if sdata.countries and not address and not sdata.postcodes \
@ -145,6 +149,21 @@ class SearchBuilder:
yield dbs.PostcodeSearch(0.4, sdata)
def build_housenumber_search(self, sdata: dbf.SearchData, hnrs: List[Token],
address: List[TokenRange]) -> Iterator[dbs.AbstractSearch]:
""" Build a simple address search for special entries where the
housenumber is the main name token.
"""
partial_tokens: List[int] = []
for trange in address:
partial_tokens.extend(t.token for t in self.query.get_partials_list(trange))
sdata.lookups = [dbf.FieldLookup('name_vector', [t.token for t in hnrs], 'lookup_any'),
dbf.FieldLookup('nameaddress_vector', partial_tokens, 'lookup_all')
]
yield dbs.PlaceSearch(0.05, sdata, sum(t.count for t in hnrs))
def build_name_search(self, sdata: dbf.SearchData,
name: TokenRange, address: List[TokenRange],
is_category: bool) -> Iterator[dbs.AbstractSearch]:

View File

@ -292,6 +292,10 @@ class _TokenSequence:
penalty=self.penalty,
address=base.address[:-1])
# variant for special housenumber searches
if base.housenumber:
yield dataclasses.replace(base, penalty=self.penalty)
# Use beginning of first word as name
if self.direction != -1:
first = base.address[0]

View File

@ -125,6 +125,8 @@ def test_housenumber_and_street():
check_assignments(yield_token_assignments(q),
TokenAssignment(name=TokenRange(1, 2),
housenumber=TokenRange(0, 1)),
TokenAssignment(address=[TokenRange(1, 2)],
housenumber=TokenRange(0, 1)))
@ -134,6 +136,8 @@ def test_housenumber_and_street_backwards():
check_assignments(yield_token_assignments(q),
TokenAssignment(name=TokenRange(0, 1),
housenumber=TokenRange(1, 2)),
TokenAssignment(address=[TokenRange(0, 1)],
housenumber=TokenRange(1, 2)))
@ -148,6 +152,10 @@ def test_housenumber_and_postcode():
name=TokenRange(0, 1),
housenumber=TokenRange(1, 2),
address=[TokenRange(2, 3)],
postcode=TokenRange(3, 4)),
TokenAssignment(penalty=pytest.approx(0.3),
housenumber=TokenRange(1, 2),
address=[TokenRange(0, 1), TokenRange(2, 3)],
postcode=TokenRange(3, 4)))
def test_postcode_and_housenumber():
@ -161,6 +169,10 @@ def test_postcode_and_housenumber():
name=TokenRange(2, 3),
housenumber=TokenRange(3, 4),
address=[TokenRange(0, 1)],
postcode=TokenRange(1, 2)),
TokenAssignment(penalty=pytest.approx(0.3),
housenumber=TokenRange(3, 4),
address=[TokenRange(0, 1), TokenRange(2, 3)],
postcode=TokenRange(1, 2)))
@ -203,7 +215,11 @@ def test_housenumber_many_phrases():
name=TokenRange(4, 5),
housenumber=TokenRange(3, 4),\
address=[TokenRange(0, 1), TokenRange(1, 2),
TokenRange(2, 3)]))
TokenRange(2, 3)]),
TokenAssignment(penalty=0.1,
housenumber=TokenRange(3, 4),\
address=[TokenRange(0, 1), TokenRange(1, 2),
TokenRange(2, 3), TokenRange(4, 5)]))
def test_country_at_beginning():