From e7266b52ae4b9e543513df95ad853ba30a7aefb3 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Fri, 9 Apr 2021 21:10:00 +0200 Subject: [PATCH 1/3] simplify name matching between boundary and place node Instead of normalising the names simply compare them in lower case. This removes the dependency on the tokenizer for linking boundaries and nodes. When looking up the linked places by place type also allow that one name is simply contained in the other. This catches the frequent case where one of the names has an addendum (e.g. Newport vs. City of Newport). Drops the special index for the name lookup and insted relies on a slightly extended version of the geometry index used for reverse lookup. Saves around 100MB on a planet. --- lib-sql/functions/placex_triggers.sql | 16 ++++++++++------ lib-sql/indices.sql | 6 ------ lib-sql/tables.sql | 5 ++++- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/lib-sql/functions/placex_triggers.sql b/lib-sql/functions/placex_triggers.sql index 6998224e..812bc79f 100644 --- a/lib-sql/functions/placex_triggers.sql +++ b/lib-sql/functions/placex_triggers.sql @@ -169,7 +169,7 @@ BEGIN END IF; IF bnd.name ? 'name' THEN - bnd_name := make_standard_name(bnd.name->'name'); + bnd_name := lower(bnd.name->'name'); IF bnd_name = '' THEN bnd_name := NULL; END IF; @@ -180,12 +180,14 @@ BEGIN IF bnd.extratags ? 'place' and bnd_name is not null THEN FOR linked_placex IN SELECT * FROM placex - WHERE make_standard_name(name->'name') = bnd_name + WHERE (position(lower(name->'name') in bnd_name) > 0 + OR position(bnd_name in lower(name->'name')) > 0) AND placex.class = 'place' AND placex.type = bnd.extratags->'place' AND placex.osm_type = 'N' AND placex.linked_place_id is null AND placex.rank_search < 26 -- needed to select the right index - AND _st_covers(bnd.geometry, placex.geometry) + AND placex.type != 'postcode' + AND ST_Covers(bnd.geometry, placex.geometry) LOOP {% if debug %}RAISE WARNING 'Found type-matching place node %', linked_placex.osm_id;{% endif %} RETURN linked_placex; @@ -201,7 +203,7 @@ BEGIN AND placex.linked_place_id is null AND placex.rank_search < 26 AND _st_covers(bnd.geometry, placex.geometry) - ORDER BY make_standard_name(name->'name') = bnd_name desc + ORDER BY lower(name->'name') = bnd_name desc LOOP {% if debug %}RAISE WARNING 'Found wikidata-matching place node %', linked_placex.osm_id;{% endif %} RETURN linked_placex; @@ -213,7 +215,7 @@ BEGIN {% if debug %}RAISE WARNING 'Looking for nodes with matching names';{% endif %} FOR linked_placex IN SELECT placex.* from placex - WHERE make_standard_name(name->'name') = bnd_name + WHERE lower(name->'name') = bnd_name AND ((bnd.rank_address > 0 and bnd.rank_address = (compute_place_rank(placex.country_code, 'N', placex.class, @@ -221,9 +223,11 @@ BEGIN false, placex.postcode)).address_rank) OR (bnd.rank_address = 0 and placex.rank_search = bnd.rank_search)) AND placex.osm_type = 'N' + AND placex.class = 'place' AND placex.linked_place_id is null AND placex.rank_search < 26 -- needed to select the right index - AND _st_covers(bnd.geometry, placex.geometry) + AND placex.type != 'postcode' + AND ST_Covers(bnd.geometry, placex.geometry) LOOP {% if debug %}RAISE WARNING 'Found matching place node %', linked_placex.osm_id;{% endif %} RETURN linked_placex; diff --git a/lib-sql/indices.sql b/lib-sql/indices.sql index c121a963..a6f7cf95 100644 --- a/lib-sql/indices.sql +++ b/lib-sql/indices.sql @@ -23,12 +23,6 @@ CREATE INDEX {{sql.if_index_not_exists}} idx_placex_geometry_reverse_lookupPolyg AND rank_address between 4 and 25 AND type != 'postcode' AND name is not null AND indexed_status = 0 AND linked_place_id is null; -CREATE INDEX {{sql.if_index_not_exists}} idx_placex_geometry_reverse_placeNode - ON placex USING gist (geometry) {{db.tablespace.search_index}} - WHERE osm_type = 'N' AND rank_search between 5 and 25 - AND class = 'place' AND type != 'postcode' - AND name is not null AND indexed_status = 0 AND linked_place_id is null; - CREATE INDEX {{sql.if_index_not_exists}} idx_osmline_parent_place_id ON location_property_osmline USING BTREE (parent_place_id) {{db.tablespace.search_index}}; diff --git a/lib-sql/tables.sql b/lib-sql/tables.sql index 329eb7a1..aa213dba 100644 --- a/lib-sql/tables.sql +++ b/lib-sql/tables.sql @@ -184,7 +184,10 @@ CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) {{db.tabl CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) {{db.tablespace.address_index}} WHERE linked_place_id IS NOT NULL; CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}}; CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) {{db.tablespace.search_index}}; -CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name')) {{db.tablespace.address_index}} WHERE osm_type='N' and rank_search < 26; +CREATE INDEX idx_placex_geometry_placenode ON placex + USING GIST (geometry) {{db.tablespace.search_index}} + WHERE osm_type = 'N' and rank_search < 26 + and class = 'place' and type != 'postcode' and linked_place_id is null; CREATE INDEX idx_placex_wikidata on placex USING BTREE ((extratags -> 'wikidata')) {{db.tablespace.address_index}} WHERE extratags ? 'wikidata' and class = 'place' and osm_type = 'N' and rank_search < 26; DROP SEQUENCE IF EXISTS seq_place; From 16267dc021f945f808442df116229bd4abac372b Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Fri, 9 Apr 2021 21:24:35 +0200 Subject: [PATCH 2/3] add migration for new placenode geometry index --- nominatim/tools/migration.py | 17 +++++++++++++++++ nominatim/version.py | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/nominatim/tools/migration.py b/nominatim/tools/migration.py index b5f0b80e..54848341 100644 --- a/nominatim/tools/migration.py +++ b/nominatim/tools/migration.py @@ -156,3 +156,20 @@ def change_housenumber_transliteration(conn, **_): cur.execute("""UPDATE placex SET housenumber = create_housenumber_id(housenumber) WHERE housenumber is not null""") + + +@_migration(3, 7, 0, 0) +def switch_placenode_geometry_index(conn, **_): + """ Replace idx_placex_geometry_reverse_placeNode index. + + Make the index slightly more permissive, so that it can also be used + when matching up boundaries and place nodes. It makes the index + idx_placex_adminname index unnecessary. + """ + with conn.cursor() as cur: + cur.execute(""" CREATE INDEX IF NOT EXISTS idx_placex_geometry_placenode ON placex + USING GIST (geometry) + WHERE osm_type = 'N' and rank_search < 26 + and class = 'place' and type != 'postcode' + and linked_place_id is null""") + cur.execute(""" DROP INDEX IF EXISTS idx_placex_adminname """) diff --git a/nominatim/version.py b/nominatim/version.py index 52550d19..9670ea60 100644 --- a/nominatim/version.py +++ b/nominatim/version.py @@ -10,7 +10,7 @@ Version information for Nominatim. # and must always be increased when there is a change to the database or code # that requires a migration. # Released versions always have a database patch level of 0. -NOMINATIM_VERSION = (3, 7, 0, 0) +NOMINATIM_VERSION = (3, 7, 0, 1) POSTGRESQL_REQUIRED_VERSION = (9, 3) POSTGIS_REQUIRED_VERSION = (2, 2) From e90adfc7c3a9a990df084da1f9b80769c458737b Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 14 Apr 2021 09:58:14 +0200 Subject: [PATCH 3/3] adapt database check to new index layout --- nominatim/tools/check_database.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nominatim/tools/check_database.py b/nominatim/tools/check_database.py index d8ab08cc..5b39085d 100644 --- a/nominatim/tools/check_database.py +++ b/nominatim/tools/check_database.py @@ -84,7 +84,8 @@ def _get_indexes(conn): 'idx_placex_rank_address', 'idx_placex_parent_place_id', 'idx_placex_geometry_reverse_lookuppolygon', - 'idx_placex_geometry_reverse_placenode', + 'idx_placex_geometry_placenode', + 'idx_placex_housenumber', 'idx_osmline_parent_place_id', 'idx_osmline_parent_osm_id', 'idx_postcode_id',