merge addr tags into search_name table

When a place of rank 30 has addr tags that are not covered by the
search terms of the parent, add a separate entry for the POI in
the search_name table that includes the addr tags. We can only
do that with named places. For POIs without a name the housenumber
is used as name. If that is not available either, searching still
won't work.
This commit is contained in:
Sarah Hoffmann 2020-09-03 10:38:33 +02:00
parent c5fc12e04b
commit caea14d035
4 changed files with 282 additions and 26 deletions

View File

@ -412,3 +412,99 @@ BEGIN
END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION create_poi_search_terms(parent_place_id BIGINT,
address HSTORE,
housenumber TEXT,
initial_name_vector INTEGER[],
OUT name_vector INTEGER[],
OUT nameaddress_vector INTEGER[])
AS $$
DECLARE
parent_name_vector INTEGER[];
parent_address_vector INTEGER[];
addr_place_ids INTEGER[];
addr_item RECORD;
BEGIN
-- Compute all search terms from the addr: tags.
nameaddress_vector := '{}'::INTEGER[];
IF address IS NOT NULL THEN
FOR addr_item IN SELECT * FROM each(address)
LOOP
IF addr_item.key IN ('city', 'tiger:county', 'state', 'suburb', 'province',
'district', 'region', 'county', 'municipality',
'hamlet', 'village', 'subdistrict', 'town',
'neighbourhood', 'quarter', 'parish')
THEN
nameaddress_vector := array_merge(nameaddress_vector,
addr_ids_from_name(addr_item.value));
END IF;
END LOOP;
END IF;
-- If the POI is named, simply mix in all address terms and be done.
IF array_length(initial_name_vector, 1) is not NULL THEN
-- Cheating here by not recomputing all terms but simply using the ones
-- from the parent object.
SELECT array_merge(s.name_vector, s.nameaddress_vector)
INTO parent_address_vector
FROM search_name s
WHERE s.place_id = parent_place_id;
name_vector := initial_name_vector;
nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector);
IF not address ? 'street' and address ? 'place' THEN
-- make sure addr:place terms are always searchable
nameaddress_vector := array_merge(nameaddress_vector,
addr_ids_from_name(address->'place'));
END IF;
RETURN;
END IF;
----- unnamed POIS
IF (array_length(nameaddress_vector, 1) is null
and (address ? 'street'or not address ? 'place'))
or housenumber is null
THEN
RETURN;
END IF;
SELECT s.name_vector, s.nameaddress_vector
INTO parent_name_vector, parent_address_vector
FROM search_name s
WHERE s.place_id = parent_place_id;
-- Check if the parent covers all address terms.
-- If not, create a search name entry with the house number as the name.
-- This is unusual for the search_name table but prevents that the place
-- is returned when we only search for the street/place.
IF not nameaddress_vector <@ parent_address_vector THEN
name_vector := ARRAY[getorcreate_name_id(housenumber)];
END IF;
IF not address ? 'street' and address ? 'place' THEN
addr_place_ids := addr_ids_from_name(address->'place');
IF not addr_place_ids <@ parent_name_vector THEN
-- addr:place tag exists without a corresponding place. Mix in addr:place
-- in the address and drop the name from the parent. This would only be
-- the street name of the nearest street.
nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids);
name_vector := ARRAY[getorcreate_name_id(housenumber)];
END IF;
ELSE
nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector);
END IF;
-- The address vector always gets merged in.
nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector);
END;
$$
LANGUAGE plpgsql;

View File

@ -224,7 +224,7 @@ LANGUAGE plpgsql STABLE;
-- \param maxrank Rank of the place. All address features must have
-- a search rank lower than the given rank.
-- \param address Address terms for the place.
-- \param geoemtry Geometry to which the address objects should be close.
-- \param geometry Geometry to which the address objects should be close.
--
-- \retval parent_place_id Place_id of the address object that is the direct
-- ancestor.
@ -541,6 +541,9 @@ DECLARE
name_vector INTEGER[];
nameaddress_vector INTEGER[];
addr_nameaddress_vector INTEGER[];
inherited_address HSTORE;
linked_node_id BIGINT;
linked_importance FLOAT;
@ -710,6 +713,7 @@ BEGIN
-- if we have a POI and there is no address information,
-- see if we can get it from a surrounding building
inherited_address := ''::HSTORE;
IF NEW.osm_type = 'N' AND addr_street IS NULL AND addr_place IS NULL
AND NEW.housenumber IS NULL THEN
FOR location IN
@ -724,6 +728,7 @@ BEGIN
NEW.housenumber := location.address->'housenumber';
addr_street := location.address->'street';
addr_place := location.address->'place';
inherited_address := location.address;
END LOOP;
END IF;
@ -754,28 +759,26 @@ BEGIN
NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry);
END IF;
-- If there is no name it isn't searchable, don't bother to create a search record
IF NEW.name is NULL THEN
--DEBUG: RAISE WARNING 'Not a searchable place % %', NEW.osm_type, NEW.osm_id;
return NEW;
IF NEW.name is not NULL THEN
NEW.name := add_default_place_name(NEW.country_code, NEW.name);
name_vector := make_keywords(NEW.name);
IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN
result := add_location(NEW.place_id, NEW.country_code, NEW.partition,
name_vector, NEW.rank_search, NEW.rank_address,
upper(trim(NEW.address->'postcode')), NEW.geometry);
--DEBUG: RAISE WARNING 'Place added to location table';
END IF;
END IF;
NEW.name := add_default_place_name(NEW.country_code, NEW.name);
name_vector := make_keywords(NEW.name);
-- Performance, it would be more acurate to do all the rest of the import
-- process but it takes too long
-- Just be happy with inheriting from parent road only
result := insertSearchName(NEW.partition, NEW.place_id, name_vector,
NEW.rank_search, NEW.rank_address, NEW.geometry);
IF NOT %REVERSE-ONLY% THEN
-- Merge address from parent
SELECT array_merge(s.name_vector, s.nameaddress_vector)
INTO nameaddress_vector
FROM search_name s
WHERE s.place_id = NEW.parent_place_id;
SELECT * INTO name_vector, nameaddress_vector
FROM create_poi_search_terms(NEW.parent_place_id,
inherited_address || NEW.address,
NEW.housenumber, name_vector);
IF array_length(name_vector, 1) is not NULL THEN
INSERT INTO search_name (place_id, search_rank, address_rank,
importance, country_code, name_vector,
nameaddress_vector, centroid)
@ -784,8 +787,9 @@ BEGIN
nameaddress_vector, NEW.centroid);
--DEBUG: RAISE WARNING 'Place added to search table';
END IF;
END IF;
return NEW;
RETURN NEW;
END IF;
END IF;

View File

@ -2,7 +2,7 @@
Feature: Creation of search terms
Tests that search_name table is filled correctly
Scenario: POIs without a name have no search entry
Scenario: Unnamed POIs have no search entry
Given the scene roads-with-pois
And the places
| osm | class | type | geometry |
@ -13,6 +13,152 @@ Feature: Creation of search terms
When importing
Then search_name has no entry for N1
Scenario: Unnamed POI has a search entry when it has unknown addr: tags
Given the scene roads-with-pois
And the places
| osm | class | type | housenr | addr+city | geometry |
| N1 | place | house | 23 | Walltown | :p-N1 |
And the places
| osm | class | type | name+name | geometry |
| W1 | highway | residential | Rose Street | :w-north |
When importing
Then search_name contains
| object | name_vector | nameaddress_vector |
| N1 | #23 | Rose Street, Walltown |
When searching for "23 Rose Street, Walltown"
Then results contain
| osm_type | osm_id |
| N | 1 |
Scenario: Unnamed POI has no search entry when it has known addr: tags
Given the scene roads-with-pois
And the places
| osm | class | type | housenr | addr+city | geometry |
| N1 | place | house | 23 | Walltown | :p-N1 |
And the places
| osm | class | type | name+name | addr+city | geometry |
| W1 | highway | residential | Rose Street | Walltown | :w-north |
When importing
Then search_name has no entry for N1
When searching for "23 Rose Street, Walltown"
Then results contain
| osm_type | osm_id |
| N | 1 |
Scenario: Unnamed POI must have a house number to get a search entry
Given the scene roads-with-pois
And the places
| osm | class | type | addr+city | geometry |
| N1 | place | house | Walltown | :p-N1 |
And the places
| osm | class | type | name+name | geometry |
| W1 | highway | residential | Rose Street | :w-north |
When importing
Then search_name has no entry for N1
Scenario: Unnamed POIs doesn't inherit parent name when unknown addr:place is present
Given the scene roads-with-pois
And the places
| osm | class | type | housenr | addr+place | geometry |
| N1 | place | house | 23 | Walltown | :p-N1 |
And the places
| osm | class | type | name+name | geometry |
| W1 | highway | residential | Rose Street | :w-north |
When importing
Then search_name contains
| object | name_vector | nameaddress_vector |
| N1 | #23 | Walltown |
When searching for "23 Rose Street, Walltown"
Then exactly 0 results are returned
# XXX Need to change parenting of POis without addr:street and with addr:place
Scenario: Unnamed POIs doesn't inherit parent name when addr:place is present only in parent address
Given the scene roads-with-pois
And the places
| osm | class | type | housenr | addr+place | geometry |
| N1 | place | house | 23 | Walltown | :p-N1 |
And the places
| osm | class | type | name+name | addr+city | geometry |
| W1 | highway | residential | Rose Street | Walltown | :w-north |
When importing
Then search_name contains
| object | name_vector | nameaddress_vector |
| N1 | #23 | Walltown |
When searching for "23 Rose Street, Walltown"
Then exactly 1 result is returned
And results contain
| osm_type | osm_id |
| W | 1 |
Scenario: Unnamed POIs does inherit parent name when unknown addr:place and addr:street is present
Given the scene roads-with-pois
And the places
| osm | class | type | housenr | addr+place | addr+street | geometry |
| N1 | place | house | 23 | Walltown | Lily Street | :p-N1 |
And the places
| osm | class | type | name+name | geometry |
| W1 | highway | residential | Rose Street | :w-north |
When importing
Then search_name has no entry for N1
When searching for "23 Rose Street"
Then results contain
| osm_type | osm_id |
| N | 1 |
When searching for "23 Lily Street"
Then exactly 0 results are returned
Scenario: An unknown addr:street is ignored
Given the scene roads-with-pois
And the places
| osm | class | type | housenr | addr+street | geometry |
| N1 | place | house | 23 | Lily Street | :p-N1 |
And the places
| osm | class | type | name+name | geometry |
| W1 | highway | residential | Rose Street | :w-north |
When importing
Then search_name has no entry for N1
When searching for "23 Rose Street"
Then results contain
| osm_type | osm_id |
| N | 1 |
When searching for "23 Lily Street"
Then exactly 0 results are returned
Scenario: Named POIs have unknown address tags added in the search_name table
Given the scene roads-with-pois
And the places
| osm | class | type | name+name | addr+city | geometry |
| N1 | place | house | Green Moss | Walltown | :p-N1 |
And the places
| osm | class | type | name+name | geometry |
| W1 | highway | residential | Rose Street | :w-north |
When importing
Then search_name contains
| object | name_vector | nameaddress_vector |
| N1 | #Green Moss | Rose Street, Walltown |
When searching for "Green Moss, Rose Street, Walltown"
Then results contain
| osm_type | osm_id |
| N | 1 |
Scenario: Named POI doesn't inherit parent name when addr:place is present only in parent address
Given the scene roads-with-pois
And the places
| osm | class | type | name+name | addr+place | geometry |
| N1 | place | house | Green Moss | Walltown | :p-N1 |
And the places
| osm | class | type | name+name | geometry |
| W1 | highway | residential | Rose Street | :w-north |
When importing
Then search_name contains
| object | name_vector | nameaddress_vector |
| N1 | #Green Moss | Walltown |
When searching for "Green Moss, Rose Street, Walltown"
Then exactly 1 result is returned
And results contain
| osm_type | osm_id |
| W | 1 |
Scenario: Named POIs inherit address from parent
Given the scene roads-with-pois
And the places

View File

@ -470,12 +470,22 @@ def check_search_name_contents(context, exclude):
for res in cur:
for h in row.headings:
if h in ('name_vector', 'nameaddress_vector'):
terms = [x.strip().replace('#', ' ') for x in row[h].split(',')]
terms = [x.strip() for x in row[h].split(',') if not x.strip().startswith('#')]
words = [x.strip()[1:] for x in row[h].split(',') if x.strip().startswith('#')]
subcur = context.db.cursor()
subcur.execute("""SELECT word_id, word_token
FROM word, (SELECT unnest(%s) as term) t
WHERE word_token = make_standard_name(t.term)""",
(terms,))
subcur.execute(""" SELECT word_id, word_token
FROM word, (SELECT unnest(%s::TEXT[]) as term) t
WHERE word_token = make_standard_name(t.term)
and class is null and country_code is null
and operator is null
UNION
SELECT word_id, word_token
FROM word, (SELECT unnest(%s::TEXT[]) as term) t
WHERE word_token = ' ' || make_standard_name(t.term)
and class is null and country_code is null
and operator is null
""",
(terms, words))
if not exclude:
ok_(subcur.rowcount >= len(terms),
"No word entry found for " + row[h])