From caea14d0357a1e0a901dfac82febc834aec95f0f Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 3 Sep 2020 10:38:33 +0200 Subject: [PATCH] merge addr tags into search_name table When a place of rank 30 has addr tags that are not covered by the search terms of the parent, add a separate entry for the POI in the search_name table that includes the addr tags. We can only do that with named places. For POIs without a name the housenumber is used as name. If that is not available either, searching still won't work. --- sql/functions/normalization.sql | 96 ++++++++++++++++ sql/functions/placex_triggers.sql | 44 ++++---- test/bdd/db/import/search_name.feature | 148 ++++++++++++++++++++++++- test/bdd/steps/db_ops.py | 20 +++- 4 files changed, 282 insertions(+), 26 deletions(-) diff --git a/sql/functions/normalization.sql b/sql/functions/normalization.sql index 08087172..71b14ee5 100644 --- a/sql/functions/normalization.sql +++ b/sql/functions/normalization.sql @@ -412,3 +412,99 @@ BEGIN END; $$ LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION create_poi_search_terms(parent_place_id BIGINT, + address HSTORE, + housenumber TEXT, + initial_name_vector INTEGER[], + OUT name_vector INTEGER[], + OUT nameaddress_vector INTEGER[]) + AS $$ +DECLARE + parent_name_vector INTEGER[]; + parent_address_vector INTEGER[]; + addr_place_ids INTEGER[]; + + addr_item RECORD; +BEGIN + -- Compute all search terms from the addr: tags. + nameaddress_vector := '{}'::INTEGER[]; + + IF address IS NOT NULL THEN + FOR addr_item IN SELECT * FROM each(address) + LOOP + IF addr_item.key IN ('city', 'tiger:county', 'state', 'suburb', 'province', + 'district', 'region', 'county', 'municipality', + 'hamlet', 'village', 'subdistrict', 'town', + 'neighbourhood', 'quarter', 'parish') + THEN + nameaddress_vector := array_merge(nameaddress_vector, + addr_ids_from_name(addr_item.value)); + END IF; + END LOOP; + END IF; + + -- If the POI is named, simply mix in all address terms and be done. + IF array_length(initial_name_vector, 1) is not NULL THEN + -- Cheating here by not recomputing all terms but simply using the ones + -- from the parent object. + SELECT array_merge(s.name_vector, s.nameaddress_vector) + INTO parent_address_vector + FROM search_name s + WHERE s.place_id = parent_place_id; + + name_vector := initial_name_vector; + nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector); + + IF not address ? 'street' and address ? 'place' THEN + -- make sure addr:place terms are always searchable + nameaddress_vector := array_merge(nameaddress_vector, + addr_ids_from_name(address->'place')); + END IF; + + RETURN; + END IF; + + ----- unnamed POIS + + IF (array_length(nameaddress_vector, 1) is null + and (address ? 'street'or not address ? 'place')) + or housenumber is null + THEN + RETURN; + END IF; + + SELECT s.name_vector, s.nameaddress_vector + INTO parent_name_vector, parent_address_vector + FROM search_name s + WHERE s.place_id = parent_place_id; + + -- Check if the parent covers all address terms. + -- If not, create a search name entry with the house number as the name. + -- This is unusual for the search_name table but prevents that the place + -- is returned when we only search for the street/place. + + IF not nameaddress_vector <@ parent_address_vector THEN + name_vector := ARRAY[getorcreate_name_id(housenumber)]; + END IF; + + IF not address ? 'street' and address ? 'place' THEN + addr_place_ids := addr_ids_from_name(address->'place'); + IF not addr_place_ids <@ parent_name_vector THEN + -- addr:place tag exists without a corresponding place. Mix in addr:place + -- in the address and drop the name from the parent. This would only be + -- the street name of the nearest street. + nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids); + name_vector := ARRAY[getorcreate_name_id(housenumber)]; + END IF; + ELSE + nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector); + END IF; + + -- The address vector always gets merged in. + nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector); + +END; +$$ +LANGUAGE plpgsql; diff --git a/sql/functions/placex_triggers.sql b/sql/functions/placex_triggers.sql index 0bfefbb0..9c2777a9 100644 --- a/sql/functions/placex_triggers.sql +++ b/sql/functions/placex_triggers.sql @@ -224,7 +224,7 @@ LANGUAGE plpgsql STABLE; -- \param maxrank Rank of the place. All address features must have -- a search rank lower than the given rank. -- \param address Address terms for the place. --- \param geoemtry Geometry to which the address objects should be close. +-- \param geometry Geometry to which the address objects should be close. -- -- \retval parent_place_id Place_id of the address object that is the direct -- ancestor. @@ -541,6 +541,9 @@ DECLARE name_vector INTEGER[]; nameaddress_vector INTEGER[]; + addr_nameaddress_vector INTEGER[]; + + inherited_address HSTORE; linked_node_id BIGINT; linked_importance FLOAT; @@ -710,6 +713,7 @@ BEGIN -- if we have a POI and there is no address information, -- see if we can get it from a surrounding building + inherited_address := ''::HSTORE; IF NEW.osm_type = 'N' AND addr_street IS NULL AND addr_place IS NULL AND NEW.housenumber IS NULL THEN FOR location IN @@ -724,6 +728,7 @@ BEGIN NEW.housenumber := location.address->'housenumber'; addr_street := location.address->'street'; addr_place := location.address->'place'; + inherited_address := location.address; END LOOP; END IF; @@ -754,28 +759,26 @@ BEGIN NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry); END IF; - -- If there is no name it isn't searchable, don't bother to create a search record - IF NEW.name is NULL THEN - --DEBUG: RAISE WARNING 'Not a searchable place % %', NEW.osm_type, NEW.osm_id; - return NEW; + IF NEW.name is not NULL THEN + NEW.name := add_default_place_name(NEW.country_code, NEW.name); + name_vector := make_keywords(NEW.name); + + IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN + result := add_location(NEW.place_id, NEW.country_code, NEW.partition, + name_vector, NEW.rank_search, NEW.rank_address, + upper(trim(NEW.address->'postcode')), NEW.geometry); + --DEBUG: RAISE WARNING 'Place added to location table'; + END IF; + END IF; - NEW.name := add_default_place_name(NEW.country_code, NEW.name); - name_vector := make_keywords(NEW.name); - - -- Performance, it would be more acurate to do all the rest of the import - -- process but it takes too long - -- Just be happy with inheriting from parent road only - result := insertSearchName(NEW.partition, NEW.place_id, name_vector, - NEW.rank_search, NEW.rank_address, NEW.geometry); - IF NOT %REVERSE-ONLY% THEN - -- Merge address from parent - SELECT array_merge(s.name_vector, s.nameaddress_vector) - INTO nameaddress_vector - FROM search_name s - WHERE s.place_id = NEW.parent_place_id; + SELECT * INTO name_vector, nameaddress_vector + FROM create_poi_search_terms(NEW.parent_place_id, + inherited_address || NEW.address, + NEW.housenumber, name_vector); + IF array_length(name_vector, 1) is not NULL THEN INSERT INTO search_name (place_id, search_rank, address_rank, importance, country_code, name_vector, nameaddress_vector, centroid) @@ -784,8 +787,9 @@ BEGIN nameaddress_vector, NEW.centroid); --DEBUG: RAISE WARNING 'Place added to search table'; END IF; + END IF; - return NEW; + RETURN NEW; END IF; END IF; diff --git a/test/bdd/db/import/search_name.feature b/test/bdd/db/import/search_name.feature index 8006045f..5514e7de 100644 --- a/test/bdd/db/import/search_name.feature +++ b/test/bdd/db/import/search_name.feature @@ -2,7 +2,7 @@ Feature: Creation of search terms Tests that search_name table is filled correctly - Scenario: POIs without a name have no search entry + Scenario: Unnamed POIs have no search entry Given the scene roads-with-pois And the places | osm | class | type | geometry | @@ -13,6 +13,152 @@ Feature: Creation of search terms When importing Then search_name has no entry for N1 + Scenario: Unnamed POI has a search entry when it has unknown addr: tags + Given the scene roads-with-pois + And the places + | osm | class | type | housenr | addr+city | geometry | + | N1 | place | house | 23 | Walltown | :p-N1 | + And the places + | osm | class | type | name+name | geometry | + | W1 | highway | residential | Rose Street | :w-north | + When importing + Then search_name contains + | object | name_vector | nameaddress_vector | + | N1 | #23 | Rose Street, Walltown | + When searching for "23 Rose Street, Walltown" + Then results contain + | osm_type | osm_id | + | N | 1 | + + Scenario: Unnamed POI has no search entry when it has known addr: tags + Given the scene roads-with-pois + And the places + | osm | class | type | housenr | addr+city | geometry | + | N1 | place | house | 23 | Walltown | :p-N1 | + And the places + | osm | class | type | name+name | addr+city | geometry | + | W1 | highway | residential | Rose Street | Walltown | :w-north | + When importing + Then search_name has no entry for N1 + When searching for "23 Rose Street, Walltown" + Then results contain + | osm_type | osm_id | + | N | 1 | + + Scenario: Unnamed POI must have a house number to get a search entry + Given the scene roads-with-pois + And the places + | osm | class | type | addr+city | geometry | + | N1 | place | house | Walltown | :p-N1 | + And the places + | osm | class | type | name+name | geometry | + | W1 | highway | residential | Rose Street | :w-north | + When importing + Then search_name has no entry for N1 + + Scenario: Unnamed POIs doesn't inherit parent name when unknown addr:place is present + Given the scene roads-with-pois + And the places + | osm | class | type | housenr | addr+place | geometry | + | N1 | place | house | 23 | Walltown | :p-N1 | + And the places + | osm | class | type | name+name | geometry | + | W1 | highway | residential | Rose Street | :w-north | + When importing + Then search_name contains + | object | name_vector | nameaddress_vector | + | N1 | #23 | Walltown | + When searching for "23 Rose Street, Walltown" + Then exactly 0 results are returned + + # XXX Need to change parenting of POis without addr:street and with addr:place + Scenario: Unnamed POIs doesn't inherit parent name when addr:place is present only in parent address + Given the scene roads-with-pois + And the places + | osm | class | type | housenr | addr+place | geometry | + | N1 | place | house | 23 | Walltown | :p-N1 | + And the places + | osm | class | type | name+name | addr+city | geometry | + | W1 | highway | residential | Rose Street | Walltown | :w-north | + When importing + Then search_name contains + | object | name_vector | nameaddress_vector | + | N1 | #23 | Walltown | + When searching for "23 Rose Street, Walltown" + Then exactly 1 result is returned + And results contain + | osm_type | osm_id | + | W | 1 | + + Scenario: Unnamed POIs does inherit parent name when unknown addr:place and addr:street is present + Given the scene roads-with-pois + And the places + | osm | class | type | housenr | addr+place | addr+street | geometry | + | N1 | place | house | 23 | Walltown | Lily Street | :p-N1 | + And the places + | osm | class | type | name+name | geometry | + | W1 | highway | residential | Rose Street | :w-north | + When importing + Then search_name has no entry for N1 + When searching for "23 Rose Street" + Then results contain + | osm_type | osm_id | + | N | 1 | + When searching for "23 Lily Street" + Then exactly 0 results are returned + + Scenario: An unknown addr:street is ignored + Given the scene roads-with-pois + And the places + | osm | class | type | housenr | addr+street | geometry | + | N1 | place | house | 23 | Lily Street | :p-N1 | + And the places + | osm | class | type | name+name | geometry | + | W1 | highway | residential | Rose Street | :w-north | + When importing + Then search_name has no entry for N1 + When searching for "23 Rose Street" + Then results contain + | osm_type | osm_id | + | N | 1 | + When searching for "23 Lily Street" + Then exactly 0 results are returned + + Scenario: Named POIs have unknown address tags added in the search_name table + Given the scene roads-with-pois + And the places + | osm | class | type | name+name | addr+city | geometry | + | N1 | place | house | Green Moss | Walltown | :p-N1 | + And the places + | osm | class | type | name+name | geometry | + | W1 | highway | residential | Rose Street | :w-north | + When importing + Then search_name contains + | object | name_vector | nameaddress_vector | + | N1 | #Green Moss | Rose Street, Walltown | + When searching for "Green Moss, Rose Street, Walltown" + Then results contain + | osm_type | osm_id | + | N | 1 | + + Scenario: Named POI doesn't inherit parent name when addr:place is present only in parent address + Given the scene roads-with-pois + And the places + | osm | class | type | name+name | addr+place | geometry | + | N1 | place | house | Green Moss | Walltown | :p-N1 | + And the places + | osm | class | type | name+name | geometry | + | W1 | highway | residential | Rose Street | :w-north | + When importing + Then search_name contains + | object | name_vector | nameaddress_vector | + | N1 | #Green Moss | Walltown | + When searching for "Green Moss, Rose Street, Walltown" + Then exactly 1 result is returned + And results contain + | osm_type | osm_id | + | W | 1 | + Scenario: Named POIs inherit address from parent Given the scene roads-with-pois And the places diff --git a/test/bdd/steps/db_ops.py b/test/bdd/steps/db_ops.py index 9604fd10..377c977d 100644 --- a/test/bdd/steps/db_ops.py +++ b/test/bdd/steps/db_ops.py @@ -470,12 +470,22 @@ def check_search_name_contents(context, exclude): for res in cur: for h in row.headings: if h in ('name_vector', 'nameaddress_vector'): - terms = [x.strip().replace('#', ' ') for x in row[h].split(',')] + terms = [x.strip() for x in row[h].split(',') if not x.strip().startswith('#')] + words = [x.strip()[1:] for x in row[h].split(',') if x.strip().startswith('#')] subcur = context.db.cursor() - subcur.execute("""SELECT word_id, word_token - FROM word, (SELECT unnest(%s) as term) t - WHERE word_token = make_standard_name(t.term)""", - (terms,)) + subcur.execute(""" SELECT word_id, word_token + FROM word, (SELECT unnest(%s::TEXT[]) as term) t + WHERE word_token = make_standard_name(t.term) + and class is null and country_code is null + and operator is null + UNION + SELECT word_id, word_token + FROM word, (SELECT unnest(%s::TEXT[]) as term) t + WHERE word_token = ' ' || make_standard_name(t.term) + and class is null and country_code is null + and operator is null + """, + (terms, words)) if not exclude: ok_(subcur.rowcount >= len(terms), "No word entry found for " + row[h])