Merge pull request #3099 from lonvia/determine-place-address-from-tokenizer

Use information from tokenizer to determine street vs. place address
This commit is contained in:
Sarah Hoffmann 2023-06-30 21:47:57 +02:00 committed by GitHub
commit b45f761227
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 51 additions and 14 deletions

View File

@ -189,6 +189,28 @@ a house number token text. If a place has multiple house numbers they must
be listed with a semicolon as delimiter. Must be NULL when the place has no
house numbers.
```sql
FUNCTION token_is_street_address(info JSONB) RETURNS BOOLEAN
```
Return true if this is an object that should be parented against a street.
Only relevant for objects with address rank 30.
```sql
FUNCTION token_has_addr_street(info JSONB) RETURNS BOOLEAN
```
Return true if there are street names to match against for finding the
parent of the object.
```sql
FUNCTION token_has_addr_place(info JSONB) RETURNS BOOLEAN
```
Return true if there are place names to match against for finding the
parent of the object.
```sql
FUNCTION token_matches_street(info JSONB, street_tokens INTEGER[]) RETURNS BOOLEAN
```

View File

@ -996,7 +996,7 @@ BEGIN
{% if debug %}RAISE WARNING 'finding street for % %', NEW.osm_type, NEW.osm_id;{% endif %}
NEW.parent_place_id := null;
is_place_address := coalesce(not NEW.address ? 'street' and NEW.address ? 'place', FALSE);
is_place_address := not token_is_street_address(NEW.token_info);
-- We have to find our parent road.
NEW.parent_place_id := find_parent_for_poi(NEW.osm_type, NEW.osm_id,
@ -1013,7 +1013,7 @@ BEGIN
SELECT p.country_code, p.postcode, p.name FROM placex p
WHERE p.place_id = NEW.parent_place_id INTO location;
IF is_place_address THEN
IF is_place_address and NEW.address ? 'place' THEN
-- Check if the addr:place tag is part of the parent name
SELECT count(*) INTO i
FROM svals(location.name) AS pname WHERE pname = NEW.address->'place';

View File

@ -41,10 +41,17 @@ AS $$
$$ LANGUAGE SQL IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION token_is_street_address(info JSONB)
RETURNS BOOLEAN
AS $$
SELECT info->>'street' is not null or info->>'place' is null;
$$ LANGUAGE SQL IMMUTABLE;
CREATE OR REPLACE FUNCTION token_has_addr_street(info JSONB)
RETURNS BOOLEAN
AS $$
SELECT info->>'street' is not null;
SELECT info->>'street' is not null and info->>'street' != '{}';
$$ LANGUAGE SQL IMMUTABLE;

View File

@ -41,10 +41,17 @@ AS $$
$$ LANGUAGE SQL IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION token_is_street_address(info JSONB)
RETURNS BOOLEAN
AS $$
SELECT info->>'street' is not null or info->>'place_search' is null;
$$ LANGUAGE SQL IMMUTABLE;
CREATE OR REPLACE FUNCTION token_has_addr_street(info JSONB)
RETURNS BOOLEAN
AS $$
SELECT info->>'street' is not null;
SELECT info->>'street' is not null and info->>'street' != '{}';
$$ LANGUAGE SQL IMMUTABLE;

View File

@ -34,7 +34,7 @@ class PlaceName:
def __repr__(self) -> str:
return f"PlaceName(name='{self.name}',kind='{self.kind}',suffix='{self.suffix}')"
return f"PlaceName(name={self.name!r},kind={self.kind!r},suffix={self.suffix!r})"
def clone(self, name: Optional[str] = None,

View File

@ -720,7 +720,7 @@ class _TokenInfo:
self.names: Optional[str] = None
self.housenumbers: Set[str] = set()
self.housenumber_tokens: Set[int] = set()
self.street_tokens: Set[int] = set()
self.street_tokens: Optional[Set[int]] = None
self.place_tokens: Set[int] = set()
self.address_tokens: Dict[str, str] = {}
self.postcode: Optional[str] = None
@ -742,7 +742,7 @@ class _TokenInfo:
out['hnr'] = ';'.join(self.housenumbers)
out['hnr_tokens'] = self._mk_array(self.housenumber_tokens)
if self.street_tokens:
if self.street_tokens is not None:
out['street'] = self._mk_array(self.street_tokens)
if self.place_tokens:
@ -776,6 +776,8 @@ class _TokenInfo:
def add_street(self, tokens: Iterable[int]) -> None:
""" Add addr:street match terms.
"""
if self.street_tokens is None:
self.street_tokens = set()
self.street_tokens.update(tokens)

View File

@ -564,14 +564,13 @@ class _TokenInfo:
def add_street(self, conn: Connection, street: str) -> None:
""" Add addr:street match terms.
"""
def _get_street(name: str) -> List[int]:
def _get_street(name: str) -> Optional[str]:
with conn.cursor() as cur:
return cast(List[int],
return cast(Optional[str],
cur.scalar("SELECT word_ids_from_name(%s)::text", (name, )))
tokens = self.cache.streets.get(street, _get_street)
if tokens:
self.data['street'] = tokens
self.data['street'] = tokens or '{}'
def add_place(self, conn: Connection, place: str) -> None:

View File

@ -523,7 +523,7 @@ class TestPlaceAddress:
def test_process_place_nonexisting_street(self):
info = self.process_address(street='Grand Road')
assert 'street' not in info
assert info['street'] == '{}'
def test_process_place_multiple_street_tags(self):
@ -538,7 +538,7 @@ class TestPlaceAddress:
def test_process_place_street_empty(self):
info = self.process_address(street='🜵')
assert 'street' not in info
assert info['street'] == '{}'
def test_process_place_street_from_cache(self):

View File

@ -549,7 +549,7 @@ class TestPlaceAddress:
def test_process_place_street_empty(self):
info = self.process_address(street='🜵')
assert 'street' not in info
assert info['street'] == '{}'
def test_process_place_place(self):