From 981e9700bee115e8686f0bdf5bae2c683c1da4a5 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 4 Oct 2022 15:02:19 +0200 Subject: [PATCH 01/16] add osm2pgsql gazetteer tests This ports the gazetteer tests from osm2pgsql to BDD tests. --- test/bdd/osm2pgsql/import/tags.feature | 205 +++++++++++++++++++++++ test/bdd/osm2pgsql/update/tags.feature | 223 +++++++++++++++++++++++++ test/bdd/steps/steps_db_ops.py | 5 +- 3 files changed, 432 insertions(+), 1 deletion(-) create mode 100644 test/bdd/osm2pgsql/import/tags.feature create mode 100644 test/bdd/osm2pgsql/update/tags.feature diff --git a/test/bdd/osm2pgsql/import/tags.feature b/test/bdd/osm2pgsql/import/tags.feature new file mode 100644 index 00000000..8c3d5c13 --- /dev/null +++ b/test/bdd/osm2pgsql/import/tags.feature @@ -0,0 +1,205 @@ +@DB +Feature: Tag evaluation + Tests if tags are correctly imported into the place table + + Scenario: Main tags as fallback + When loading osm data + """ + n100 Tjunction=yes,highway=bus_stop + n101 Tjunction=yes,name=Bar + n200 Tbuilding=yes,amenity=cafe + n201 Tbuilding=yes,name=Intersting + n202 Tbuilding=yes + """ + Then place contains exactly + | object | class | type | + | N100 | highway | bus_stop | + | N101 | junction | yes | + | N200 | amenity | cafe | + | N201 | building | yes | + + + Scenario: Name and reg tags + When loading osm data + """ + n2001 Thighway=road,name=Foo,alt_name:de=Bar,ref=45 + n2002 Thighway=road,name:prefix=Pre,name:suffix=Post,ref:de=55 + n2003 Thighway=yes,name:%20%de=Foo,name=real1 + n2004 Thighway=yes,name:%a%de=Foo,name=real2 + n2005 Thighway=yes,name:%9%de=Foo,name:\\=real3 + n2006 Thighway=yes,name:%9%de=Foo,name=rea\l3 + """ + Then place contains exactly + | object | class | type | name | + | N2001 | highway | road | 'name': 'Foo', 'alt_name:de': 'Bar', 'ref': '45' | + | N2002 | highway | road | - | + | N2003 | highway | yes | 'name: de': 'Foo', 'name': 'real1' | + | N2004 | highway | yes | 'name:\nde': 'Foo', 'name': 'real2' | + | N2005 | highway | yes | 'name:\tde': 'Foo', 'name:\\\\': 'real3' | + | N2006 | highway | yes | 'name:\tde': 'Foo', 'name': 'rea\\l3' | + + And place contains + | object | extratags | + | N2002 | 'name:prefix': 'Pre', 'name:suffix': 'Post', 'ref:de': '55' | + + + Scenario: Name when using with_name flag + When loading osm data + """ + n3001 Tbridge=yes,bridge:name=GoldenGate + n3002 Tbridge=yes,bridge:name:en=Rainbow + """ + Then place contains exactly + | object | class | type | name | + | N3001 | bridge | yes | 'name': 'GoldenGate' | + | N3002 | bridge | yes | 'name:en': 'Rainbow' | + + + Scenario: Address tags + When loading osm data + """ + n4001 Taddr:housenumber=34,addr:city=Esmarald,addr:county=Land + n4002 Taddr:streetnumber=10,is_in:city=Rootoo,is_in=Gold + """ + Then place contains exactly + | object | class | address | + | N4001 | place | 'housenumber': '34', 'city': 'Esmarald', 'county': 'Land' | + | N4002 | place | 'streetnumber': '10', 'city': 'Rootoo' | + + + Scenario: Country codes + When loading osm data + """ + n5001 Tshop=yes,country_code=DE + n5002 Tshop=yes,country_code=toolong + n5003 Tshop=yes,country_code=x + n5004 Tshop=yes,addr:country=us + n5005 Tshop=yes,country=be + """ + Then place contains exactly + | object | class | address | + | N5001 | shop | 'country': 'DE' | + | N5002 | shop | - | + | N5003 | shop | - | + | N5004 | shop | 'country': 'us' | + | N5005 | shop | - | + + + Scenario: Postcodes + When loading osm data + """ + n6001 Tshop=bank,addr:postcode=12345 + n6002 Tshop=bank,tiger:zip_left=34343 + n6003 Tshop=bank,is_in:postcode=9009 + """ + Then place contains exactly + | object | class | address | + | N6001 | shop | 'postcode': '12345' | + | N6002 | shop | 'postcode': '34343' | + | N6003 | shop | - | + + + Scenario: Main with extra + When loading osm data + """ + n7001 Thighway=primary,bridge=yes,name=1 + n7002 Thighway=primary,bridge=yes,bridge:name=1 + """ + Then place contains exactly + | object | class | type | name | extratags | + | N7001 | highway | primary | 'name': '1' | - | + | N7002:highway | highway | primary | - | 'bridge:name': '1'| + | N7002:bridge | bridge | yes | 'name': '1' | 'bridge:name': '1'| + + + Scenario: Global fallback and skipping + When loading osm data + """ + n8001 Tshop=shoes,note:de=Nein,xx=yy + n8002 Tshop=shoes,building=no,ele=234 + n8003 Tshop=shoes,name:source=survey + """ + Then place contains exactly + | object | class | extratags | + | N8001 | shop | 'xx': 'yy' | + | N8002 | shop | 'ele': '234' | + | N8003 | shop | - | + + + Scenario: Admin levels + When loading osm data + """ + n9001 Tplace=city + n9002 Tplace=city,admin_level=16 + n9003 Tplace=city,admin_level=x + n9004 Tplace=city,admin_level=1 + n9005 Tplace=city,admin_level=0 + n9006 Tplace=city,admin_level=2.5 + """ + Then place contains exactly + | object | class | admin_level | + | N9001 | place | 15 | + | N9002 | place | 15 | + | N9003 | place | 15 | + | N9004 | place | 1 | + | N9005 | place | 15 | + | N9006 | place | 15 | + + + Scenario: Administrative boundaries with place tags + When loading osm data + """ + n10001 Tboundary=administrative,place=city,name=A + n10002 Tboundary=natural,place=city,name=B + n10003 Tboundary=administrative,place=island,name=C + """ + Then place contains exactly + | object | class | type | extratags | + | N10001 | boundary | administrative | 'place': 'city' | + | N10002:boundary | boundary | natural | - | + | N10002:place | place | city | - | + | N10003:boundary | boundary | administrative | - | + | N10003:place | place | island | - | + + + Scenario: Shorten tiger:county tags + When loading osm data + """ + n11001 Tplace=village,tiger:county=Feebourgh%2c%%20%AL + n11002 Tplace=village,addr:state=Alabama,tiger:county=Feebourgh%2c%%20%AL + n11003 Tplace=village,tiger:county=Feebourgh + """ + Then place contains exactly + | object | class | address | + | N11001 | place | 'tiger:county': 'Feebourgh county' | + | N11002 | place | 'tiger:county': 'Feebourgh county', 'state': 'Alabama' | + | N11003 | place | 'tiger:county': 'Feebourgh county' | + + + Scenario: Building fallbacks + When loading osm data + """ + n12001 Ttourism=hotel,building=yes + n12002 Tbuilding=house + n12003 Tbuilding=shed,addr:housenumber=1 + n12004 Tbuilding=yes,name=Das-Haus + n12005 Tbuilding=yes,addr:postcode=12345 + """ + Then place contains exactly + | object | class | type | + | N12001 | tourism | hotel | + | N12003 | building | shed | + | N12004 | building | yes | + | N12005 | place | postcode | + + + Scenario: Address interpolations + When loading osm data + """ + n13001 Taddr:interpolation=odd + n13002 Taddr:interpolation=even,place=city + """ + Then place contains exactly + | object | class | type | extratags | address | + | N13001 | place | houses | - | 'interpolation': 'odd' | + | N13002 | place | houses | 'place': 'city' | 'interpolation': 'even' | diff --git a/test/bdd/osm2pgsql/update/tags.feature b/test/bdd/osm2pgsql/update/tags.feature new file mode 100644 index 00000000..0b6c2a62 --- /dev/null +++ b/test/bdd/osm2pgsql/update/tags.feature @@ -0,0 +1,223 @@ +@DB +Feature: Tag evaluation + Tests if tags are correctly updated in the place table + + + Scenario: Main tag deleted + When loading osm data + """ + n1 Tamenity=restaurant + n2 Thighway=bus_stop,railway=stop,name=X + n3 Tamenity=prison + """ + Then place contains exactly + | object | class | type | + | N1 | amenity | restaurant | + | N2:highway | highway | bus_stop | + | N2:railway | railway | stop | + | N3 | amenity | prison | + + When updating osm data + """ + n1 Tnot_a=restaurant + n2 Thighway=bus_stop,name=X + """ + Then place contains exactly + | object | class | type | + | N2:highway | highway | bus_stop | + | N3 | amenity | prison | + + + Scenario: Main tag added + When loading osm data + """ + n1 Tatity=restaurant + n2 Thighway=bus_stop,name=X + """ + Then place contains exactly + | object | class | type | + | N2:highway | highway | bus_stop | + + When updating osm data + """ + n1 Tamenity=restaurant + n2 Thighway=bus_stop,railway=stop,name=X + """ + Then place contains exactly + | object | class | type | + | N1 | amenity | restaurant | + | N2:highway | highway | bus_stop | + | N2:railway | railway | stop | + + + Scenario: Main tag modified + When loading osm data + """ + n10 Thighway=footway,name=X + n11 Tamenity=atm + """ + Then place contains exactly + | object | class | type | + | N10 | highway | footway | + | N11 | amenity | atm | + + When updating osm data + """ + n10 Thighway=path,name=X + n11 Thighway=primary + """ + Then place contains exactly + | object | class | type | + | N10 | highway | path | + | N11 | highway | primary | + + + Scenario: Main tags with name, name added + When loading osm data + """ + n45 Tlanduse=cemetry + n46 Tbuilding=yes + """ + Then place contains exactly + | object | class | type | + + When updating osm data + """ + n45 Tlanduse=cemetry,name=TODO + n46 Tbuilding=yes,addr:housenumber=1 + """ + Then place contains exactly + | object | class | type | + | N45 | landuse | cemetry | + | N46 | building| yes | + + + Scenario: Main tags with name, name removed + When loading osm data + """ + n45 Tlanduse=cemetry,name=TODO + n46 Tbuilding=yes,addr:housenumber=1 + """ + Then place contains exactly + | object | class | type | + | N45 | landuse | cemetry | + | N46 | building| yes | + + When updating osm data + """ + n45 Tlanduse=cemetry + n46 Tbuilding=yes + """ + Then place contains exactly + | object | class | type | + + + Scenario: Main tags with name, name modified + When loading osm data + """ + n45 Tlanduse=cemetry,name=TODO + n46 Tbuilding=yes,addr:housenumber=1 + """ + Then place contains exactly + | object | class | type | name | address | + | N45 | landuse | cemetry | 'name' : 'TODO' | - | + | N46 | building| yes | - | 'housenumber': '1'| + + When updating osm data + """ + n45 Tlanduse=cemetry,name=DONE + n46 Tbuilding=yes,addr:housenumber=10 + """ + Then place contains exactly + | object | class | type | name | address | + | N45 | landuse | cemetry | 'name' : 'DONE' | - | + | N46 | building| yes | - | 'housenumber': '10'| + + + Scenario: Main tag added to address only node + When loading osm data + """ + n1 Taddr:housenumber=345 + """ + Then place contains exactly + | object | class | type | address | + | N1 | place | house | 'housenumber': '345'| + + When updating osm data + """ + n1 Taddr:housenumber=345,building=yes + """ + Then place contains exactly + | object | class | type | address | + | N1 | building | yes | 'housenumber': '345'| + + + Scenario: Main tag removed from address only node + When loading osm data + """ + n1 Taddr:housenumber=345,building=yes + """ + Then place contains exactly + | object | class | type | address | + | N1 | building | yes | 'housenumber': '345'| + + When updating osm data + """ + n1 Taddr:housenumber=345 + """ + Then place contains exactly + | object | class | type | address | + | N1 | place | house | 'housenumber': '345'| + + + Scenario: Main tags with name key, adding key name + When loading osm data + """ + n22 Tbridge=yes + """ + Then place contains exactly + | object | class | type | + + When updating osm data + """ + n22 Tbridge=yes,bridge:name=high + """ + Then place contains exactly + | object | class | type | name | + | N22 | bridge | yes | 'name': 'high' | + + + Scenario: Main tags with name key, deleting key name + When loading osm data + """ + n22 Tbridge=yes,bridge:name=high + """ + Then place contains exactly + | object | class | type | name | + | N22 | bridge | yes | 'name': 'high' | + + When updating osm data + """ + n22 Tbridge=yes + """ + Then place contains exactly + | object | class | type | + + + Scenario: Main tags with name key, changing key name + When loading osm data + """ + n22 Tbridge=yes,bridge:name=high + """ + Then place contains exactly + | object | class | type | name | + | N22 | bridge | yes | 'name': 'high' | + + When updating osm data + """ + n22 Tbridge=yes,bridge:name:en=high + """ + Then place contains exactly + | object | class | type | name | + | N22 | bridge | yes | 'name:en': 'high' | + diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py index 2f598f3d..83333cb5 100644 --- a/test/bdd/steps/steps_db_ops.py +++ b/test/bdd/steps/steps_db_ops.py @@ -185,7 +185,10 @@ def check_place_contents(context, table, exact): if exact: cur.execute('SELECT osm_type, osm_id, class from {}'.format(table)) - assert expected_content == set([(r[0], r[1], r[2]) for r in cur]) + actual = set([(r[0], r[1], r[2]) for r in cur]) + assert expected_content == actual, \ + f"Missing entries: {expected_content - actual}\n" \ + f"Not expected in table: {actual - expected_content}" @then("(?Pplacex|place) has no entry for (?P.*)") From de2a3bd5f8c2a4a40da1f2153afdb69e387dbcd2 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 4 Oct 2022 17:01:25 +0200 Subject: [PATCH 02/16] bdd tests: make import style configurable The switch is for development. Tests are not guaranteed to still work when run with anything but the 'extratags' style. --- test/bdd/environment.py | 1 + test/bdd/steps/nominatim_environment.py | 3 +++ test/bdd/steps/steps_osm_data.py | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/test/bdd/environment.py b/test/bdd/environment.py index c11a2c8c..7a7b943d 100644 --- a/test/bdd/environment.py +++ b/test/bdd/environment.py @@ -27,6 +27,7 @@ userconfig = { 'API_TEST_FILE' : (TEST_BASE_DIR / 'testdb' / 'apidb-test-data.pbf').resolve(), 'SERVER_MODULE_PATH' : None, 'TOKENIZER' : None, # Test with a custom tokenizer + 'STYLE' : 'extratags', 'PHPCOV' : False, # set to output directory to enable code coverage } diff --git a/test/bdd/steps/nominatim_environment.py b/test/bdd/steps/nominatim_environment.py index 1feafd75..5145327c 100644 --- a/test/bdd/steps/nominatim_environment.py +++ b/test/bdd/steps/nominatim_environment.py @@ -36,6 +36,7 @@ class NominatimEnvironment: self.api_test_db = config['API_TEST_DB'] self.api_test_file = config['API_TEST_FILE'] self.tokenizer = config['TOKENIZER'] + self.import_style = config['STYLE'] self.server_module_path = config['SERVER_MODULE_PATH'] self.reuse_template = not config['REMOVE_TEMPLATE'] self.keep_scenario_db = config['KEEP_TEST_DB'] @@ -107,6 +108,8 @@ class NominatimEnvironment: self.test_env['NOMINATIM_NOMINATIM_TOOL'] = str((self.build_dir / 'nominatim').resolve()) if self.tokenizer is not None: self.test_env['NOMINATIM_TOKENIZER'] = self.tokenizer + if self.import_style is not None: + self.test_env['NOMINATIM_IMPORT_STYLE'] = self.import_style if self.server_module_path: self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path diff --git a/test/bdd/steps/steps_osm_data.py b/test/bdd/steps/steps_osm_data.py index 94f72796..6271f6b8 100644 --- a/test/bdd/steps/steps_osm_data.py +++ b/test/bdd/steps/steps_osm_data.py @@ -17,7 +17,7 @@ def get_osm2pgsql_options(nominatim_env, fname, append): return dict(import_file=fname, osm2pgsql=str(nominatim_env.build_dir / 'osm2pgsql' / 'osm2pgsql'), osm2pgsql_cache=50, - osm2pgsql_style=str(nominatim_env.src_dir / 'settings' / 'import-extratags.style'), + osm2pgsql_style=str(nominatim_env.get_test_config().get_import_style_file()), threads=1, dsn=nominatim_env.get_libpq_dsn(), flatnode_file='', From 51ed55cc32580644544b8e38c570bbfdaf09b5a2 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 3 Nov 2022 17:15:01 +0100 Subject: [PATCH 03/16] initial flex import scripts Only implements the extratags style for the moment. Tests pass for the same behaviour as the gazetteer output. Updates still need to be done. --- CMakeLists.txt | 1 - nominatim/clicmd/args.py | 1 + nominatim/tools/exec_utils.py | 10 +- settings/flex-base.lua | 382 +++++++++++++++++++++++++ settings/import-extratags.lua | 130 +++++++++ test/bdd/osm2pgsql/import/tags.feature | 26 +- test/bdd/steps/steps_osm_data.py | 1 + 7 files changed, 537 insertions(+), 14 deletions(-) create mode 100644 settings/flex-base.lua create mode 100644 settings/import-extratags.lua diff --git a/CMakeLists.txt b/CMakeLists.txt index 036dda31..f5f776a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -63,7 +63,6 @@ if (BUILD_IMPORTER AND BUILD_OSM2PGSQL) endif() set(BUILD_TESTS_SAVED "${BUILD_TESTS}") set(BUILD_TESTS off) - set(WITH_LUA off CACHE BOOL "") add_subdirectory(osm2pgsql) set(BUILD_TESTS ${BUILD_TESTS_SAVED}) endif() diff --git a/nominatim/clicmd/args.py b/nominatim/clicmd/args.py index 2f8273d6..b120ee73 100644 --- a/nominatim/clicmd/args.py +++ b/nominatim/clicmd/args.py @@ -184,6 +184,7 @@ class NominatimArgs: return dict(osm2pgsql=self.config.OSM2PGSQL_BINARY or self.osm2pgsql_path, osm2pgsql_cache=self.osm2pgsql_cache or default_cache, osm2pgsql_style=self.config.get_import_style_file(), + osm2pgsql_style_path=self.config.config_dir, threads=self.threads or default_threads, dsn=self.config.get_libpq_dsn(), flatnode_file=str(self.config.get_path('FLATNODE_FILE') or ''), diff --git a/nominatim/tools/exec_utils.py b/nominatim/tools/exec_utils.py index 610e2182..675e070b 100644 --- a/nominatim/tools/exec_utils.py +++ b/nominatim/tools/exec_utils.py @@ -10,6 +10,7 @@ Helper functions for executing external programs. from typing import Any, Union, Optional, Mapping, IO from pathlib import Path import logging +import os import subprocess import urllib.request as urlrequest from urllib.parse import urlencode @@ -120,9 +121,16 @@ def run_osm2pgsql(options: Mapping[str, Any]) -> None: '--log-progress', 'true', '--number-processes', str(options['threads']), '--cache', str(options['osm2pgsql_cache']), - '--output', 'gazetteer', '--style', str(options['osm2pgsql_style']) ] + + if str(options['osm2pgsql_style']).endswith('.lua'): + env['LUA_PATH'] = ';'.join((str(options['osm2pgsql_style_path'] / 'flex-base.lua'), + os.environ.get('LUAPATH', ';'))) + cmd.extend(('--output', 'flex')) + else: + cmd.extend(('--output', 'gazetteer')) + if options['append']: cmd.append('--append') else: diff --git a/settings/flex-base.lua b/settings/flex-base.lua new file mode 100644 index 00000000..d91299ad --- /dev/null +++ b/settings/flex-base.lua @@ -0,0 +1,382 @@ +-- Core functions for Nominatim import flex style. +-- + + +-- The single place table. +place_table = osm2pgsql.define_table{ + name = "place", + ids = { type = 'any', id_column = 'osm_id', type_column = 'osm_type' }, + columns = { + { column = 'class', type = 'text', not_null = true }, + { column = 'type', type = 'text', not_null = true }, + { column = 'admin_level', type = 'smallint' }, + { column = 'name', type = 'hstore' }, + { column = 'address', type = 'hstore' }, + { column = 'extratags', type = 'hstore' }, + { column = 'geometry', type = 'geometry', projection = 'WGS84', not_null = true }, + } +} + +------------- Place class ------------------------------------------ + +local Place = {} +Place.__index = Place + +function Place.new(object, geom_func) + local self = setmetatable({}, Place) + self.object = object + self.geom_func = geom_func + + self.admin_level = tonumber(self.object:grab_tag('admin_level')) + if self.admin_level == nil + or self.admin_level <= 0 or self.admin_level > 15 + or math.floor(self.admin_level) ~= self.admin_level then + self.admin_level = 15 + end + + self.num_entries = 0 + self.has_name = false + self.names = {} + self.address = {} + self.extratags = {} + + return self +end + +function Place:delete(data) + if data.match ~= nil then + for k, v in pairs(self.object.tags) do + if data.match(k, v) then + self.object.tags[k] = nil + end + end + end +end + +function Place:grab_extratags(data) + local count = 0 + + if data.match ~= nil then + for k, v in pairs(self.object.tags) do + if data.match(k, v) then + self.object.tags[k] = nil + self.extratags[k] = v + count = count + 1 + end + end + end + + return count +end + +function Place:grab_address(data) + local count = 0 + + if data.match ~= nil then + for k, v in pairs(self.object.tags) do + if data.match(k, v) then + self.object.tags[k] = nil + + if data.include_on_name == true then + self.has_name = true + end + + if data.out_key ~= nil then + self.address[data.out_key] = v + return 1 + end + + if k:sub(1, 5) == 'addr:' then + self.address[k:sub(6)] = v + elseif k:sub(1, 6) == 'is_in:' then + self.address[k:sub(7)] = v + else + self.address[k] = v + end + count = count + 1 + end + end + end + + return count +end + +function Place:set_address(key, value) + self.address[key] = value +end + +function Place:grab_name(data) + local count = 0 + + if data.match ~= nil then + for k, v in pairs(self.object.tags) do + if data.match(k, v) then + self.object.tags[k] = nil + self.names[k] = v + if data.include_on_name ~= false then + self.has_name = true + end + count = count + 1 + end + end + end + + return count +end + +function Place:grab_tag(key) + return self.object:grab_tag(key) +end + +function Place:tags() + return self.object.tags +end + +function Place:write_place(k, v, mtype, save_extra_mains) + if mtype == nil then + return 0 + end + + v = v or self.object.tags[k] + if v == nil then + return 0 + end + + if type(mtype) == 'table' then + mtype = mtype[v] or mtype[1] + end + + if mtype == 'always' or (self.has_name and mtype == 'named') then + return self:write_row(k, v, save_extra_mains) + end + + if mtype == 'named_with_key' then + local names = {} + local prefix = k .. ':name' + for namek, namev in pairs(self.object.tags) do + if namek:sub(1, #prefix) == prefix + and (#namek == #prefix + or namek:sub(#prefix + 1, #prefix + 1) == ':') then + names[namek:sub(#k + 2)] = namev + end + end + + if next(names) ~= nil then + local saved_names = self.names + self.names = names + + local results = self:write_row(k, v, save_extra_mains) + + self.names = saved_names + + return results + end + end + + return 0 +end + +function Place:write_row(k, v, save_extra_mains) + if self.geometry == nil then + self.geometry = self.geom_func(self.object) + end + if self.geometry:is_null() then + return 0 + end + + if save_extra_mains then + for extra_k, extra_v in pairs(self.object.tags) do + if extra_k ~= k then + self.extratags[extra_k] = extra_v + end + end + end + + place_table:insert{ + class = k, + type = v, + admin_level = self.admin_level, + name = next(self.names) and self.names, + address = next(self.address) and self.address, + extratags = next(self.extratags) and self.extratags, + geometry = self.geometry + } + + if save_extra_mains then + for k, v in pairs(self.object.tags) do + self.extratags[k] = nil + end + end + + self.num_entries = self.num_entries + 1 + + return 1 +end + + +function tag_match(data) + if data == nil or next(data) == nil then + return nil + end + + local tests = {} + + if data.keys ~= nil then + for _, key in pairs(data.keys) do + if key:sub(1, 1) == '*' then + if #key > 1 then + local suffix = key:sub(2) + tests[#tests + 1] = function (k, v) + return k:sub(-#suffix) == suffix + end + end + elseif key:sub(#key, #key) == '*' then + local prefix = key:sub(1, #key - 1) + tests[#tests + 1] = function (k, v) + return k:sub(1, #prefix) == prefix + end + else + tests[#tests + 1] = function (k, v) + return k == key + end + end + end + end + + if data.tags ~= nil then + local tags = {} + for k, vlist in pairs(data.tags) do + tags[k] = {} + for _, v in pairs(vlist) do + tags[k][v] = true + end + end + tests[#tests + 1] = function (k, v) + return tags[k] ~= nil and tags[k][v] ~= nil + end + end + + return function (k, v) + for _, func in pairs(tests) do + if func(k, v) then + return true + end + end + return false + end +end + + +-- Process functions for all data types +function osm2pgsql.process_node(object) + + local function geom_func(o) + return o:as_point() + end + + process_tags(Place.new(object, geom_func)) +end + +function osm2pgsql.process_way(object) + + local function geom_func(o) + local geom = o:as_polygon() + + if geom:is_null() then + geom = o:as_linestring() + end + + return geom + end + + process_tags(Place.new(object, geom_func)) +end + +function relation_as_multipolygon(o) + return o:as_multipolygon() +end + +function relation_as_multiline(o) + return o:as_multilinestring():line_merge() +end + +function osm2pgsql.process_relation(object) + local geom_func = RELATION_TYPES[object.tags.type] + + if geom_func ~= nil then + process_tags(Place.new(object, geom_func)) + end +end + +function process_tags(o) + local fallback + + o:delete{match = PRE_DELETE} + o:grab_extratags{match = PRE_EXTRAS} + + -- Exception for boundary/place double tagging + if o.object.tags.boundary == 'administrative' then + o:grab_extratags{match = function (k, v) + return k == 'place' and v:sub(1,3) ~= 'isl' + end} + end + + -- address keys + o:grab_address{match=function (k, v) return COUNTRY_TAGS(k, v) and #v == 2 end, + out_key='country'} + if o:grab_name{match=HOUSENAME_TAGS} > 0 then + fallback = {'place', 'house'} + end + if o:grab_address{match=HOUSENUMBER_TAGS, include_on_name = true} > 0 and fallback == nil then + fallback = {'place', 'house'} + end + if o:grab_address{match=POSTCODES, out_key='postcode'} > 0 and fallback == nil then + fallback = {'place', 'postcode'} + end + + local is_interpolation = o:grab_address{match=INTERPOLATION_TAGS} > 0 + + if ADD_TIGER_COUNTY then + local v = o:grab_tag('tiger:county') + if v ~= nil then + v, num = v:gsub(',.*', ' county') + if num == 0 then + v = v .. ' county' + end + o:set_address('tiger:county', v) + end + end + o:grab_address{match=ADDRESS_TAGS} + + if is_interpolation then + o:write_place('place', 'houses', 'always', SAVE_EXTRA_MAINS) + return + end + + -- name keys + o:grab_name{match = NAMES} + o:grab_name{match = REFS, include_on_name = false} + + o:delete{match = POST_DELETE} + o:grab_extratags{match = POST_EXTRAS} + + -- collect main keys + local num_mains = 0 + for k, v in pairs(o:tags()) do + num_mains = num_mains + o:write_place(k, v, MAIN_KEYS[k], SAVE_EXTRA_MAINS) + end + + if num_mains == 0 then + for tag, mtype in pairs(MAIN_FALLBACK_KEYS) do + if o:write_place(tag, nil, mtype, SAVE_EXTRA_MAINS) > 0 then + return + end + end + + if fallback ~= nil then + o:write_place(fallback[1], fallback[2], 'always', SAVE_EXTRA_MAINS) + end + end +end + + diff --git a/settings/import-extratags.lua b/settings/import-extratags.lua new file mode 100644 index 00000000..535af3c8 --- /dev/null +++ b/settings/import-extratags.lua @@ -0,0 +1,130 @@ +require('flex-base') + +RELATION_TYPES = { + multipolygon = relation_as_multipolygon, + boundary = relation_as_multipolygon, + waterway = relation_as_multiline +} + +MAIN_KEYS = { + emergency = 'always', + historic = 'always', + military = 'always', + natural = 'named', + landuse = 'named', + highway = {'always', + street_lamp = 'named', + traffic_signals = 'named', + service = 'named', + cycleway = 'named', + path = 'named', + footway = 'named', + steps = 'named', + bridleway = 'named', + track = 'named', + motorway_link = 'named', + trunk_link = 'named', + primary_link = 'named', + secondary_link = 'named', + tertiary_link = 'named'}, + railway = 'named', + man_made = 'always', + aerialway = 'always', + boundary = {'named', + postal_code = 'named'}, + aeroway = 'always', + amenity = 'always', + club = 'always', + craft = 'always', + leisure = 'always', + office = 'always', + mountain_pass = 'always', + shop = 'always', + tourism = 'always', + bridge = 'named_with_key', + tunnel = 'named_with_key', + waterway = 'named', + place = 'always' +} + +MAIN_FALLBACK_KEYS = { + building = 'named', + landuse = 'named', + junction = 'named', + healthcare = 'named' +} + + +PRE_DELETE = tag_match{keys = {'note', 'note:*', 'source', 'source*', 'attribution', + 'comment', 'fixme', 'FIXME', 'created_by', 'NHD:*', + 'nhd:*', 'gnis:*', 'geobase:*', 'KSJ2:*', 'yh:*', + 'osak:*', 'naptan:*', 'CLC:*', 'import', 'it:fvg:*', + 'type', 'lacounty:*', 'ref:ruian:*', 'building:ruian:type', + 'ref:linz:*', 'is_in:postcode'}, + tags = {emergency = {'yes', 'no', 'fire_hydrant'}, + historic = {'yes', 'no'}, + military = {'yes', 'no'}, + natural = {'yes', 'no', 'coastline'}, + highway = {'no', 'turning_circle', 'mini_roundabout', + 'noexit', 'crossing', 'give_way', 'stop'}, + railway = {'level_crossing', 'no', 'rail'}, + man_made = {'survey_point', 'cutline'}, + aerialway = {'pylon', 'no'}, + aeroway = {'no'}, + amenity = {'no'}, + club = {'no'}, + craft = {'no'}, + leisure = {'no'}, + office = {'no'}, + mountain_pass = {'no'}, + shop = {'no'}, + tourism = {'yes', 'no'}, + bridge = {'no'}, + tunnel = {'no'}, + waterway = {'riverbank'}, + building = {'no'}, + boundary = {'place'}} + } + +POST_DELETE = tag_match{keys = {'tiger:*'}} + +PRE_EXTRAS = tag_match{keys = {'*:prefix', '*:suffix', 'name:prefix:*', 'name:suffix:*', + 'name:etymology', 'name:signed', 'name:botanical', + 'wikidata', '*:wikidata', + 'addr:street:name', 'addr:street:type'} + } + + +NAMES = tag_match{keys = {'name', 'name:*', + 'int_name', 'int_name:*', + 'nat_name', 'nat_name:*', + 'reg_name', 'reg_name:*', + 'loc_name', 'loc_name:*', + 'old_name', 'old_name:*', + 'alt_name', 'alt_name:*', 'alt_name_*', + 'official_name', 'official_name:*', + 'place_name', 'place_name:*', + 'short_name', 'short_name:*', 'brand'}} + +REFS = tag_match{keys = {'ref', 'int_ref', 'nat_ref', 'reg_ref', 'loc_ref', 'old_ref', + 'iata', 'icao', 'pcode', 'pcode:*', 'ISO3166-2'}} + +POSTCODES = tag_match{keys = {'postal_code', 'postcode', 'addr:postcode', + 'tiger:zip_left', 'tiger:zip_right'}} + +COUNTRY_TAGS = tag_match{keys = {'country_code', 'ISO3166-1', + 'addr:country_code', 'is_in:country_code', + 'addr:country', 'is_in:country'}} + +HOUSENAME_TAGS = tag_match{keys = {'addr:housename'}} + +HOUSENUMBER_TAGS = tag_match{keys = {'addr:housenumber', 'addr:conscriptionnumber', + 'addr:streetnumber'}} + +INTERPOLATION_TAGS = tag_match{keys = {'addr:interpolation'}} + +ADDRESS_TAGS = tag_match{keys = {'addr:*', 'is_in:*'}} +ADD_TIGER_COUNTY = true + +SAVE_EXTRA_MAINS = true + diff --git a/test/bdd/osm2pgsql/import/tags.feature b/test/bdd/osm2pgsql/import/tags.feature index 8c3d5c13..83d7fe52 100644 --- a/test/bdd/osm2pgsql/import/tags.feature +++ b/test/bdd/osm2pgsql/import/tags.feature @@ -106,10 +106,10 @@ Feature: Tag evaluation n7002 Thighway=primary,bridge=yes,bridge:name=1 """ Then place contains exactly - | object | class | type | name | extratags | - | N7001 | highway | primary | 'name': '1' | - | - | N7002:highway | highway | primary | - | 'bridge:name': '1'| - | N7002:bridge | bridge | yes | 'name': '1' | 'bridge:name': '1'| + | object | class | type | name | extratags+bridge:name | + | N7001 | highway | primary | 'name': '1' | - | + | N7002:highway | highway | primary | - | 1 | + | N7002:bridge | bridge | yes | 'name': '1' | 1 | Scenario: Global fallback and skipping @@ -153,13 +153,15 @@ Feature: Tag evaluation n10002 Tboundary=natural,place=city,name=B n10003 Tboundary=administrative,place=island,name=C """ - Then place contains exactly + Then place contains | object | class | type | extratags | | N10001 | boundary | administrative | 'place': 'city' | - | N10002:boundary | boundary | natural | - | - | N10002:place | place | city | - | - | N10003:boundary | boundary | administrative | - | - | N10003:place | place | island | - | + And place contains + | object | class | type | + | N10002:boundary | boundary | natural | + | N10002:place | place | city | + | N10003:boundary | boundary | administrative | + | N10003:place | place | island | Scenario: Shorten tiger:county tags @@ -200,6 +202,6 @@ Feature: Tag evaluation n13002 Taddr:interpolation=even,place=city """ Then place contains exactly - | object | class | type | extratags | address | - | N13001 | place | houses | - | 'interpolation': 'odd' | - | N13002 | place | houses | 'place': 'city' | 'interpolation': 'even' | + | object | class | type | address | + | N13001 | place | houses | 'interpolation': 'odd' | + | N13002 | place | houses | 'interpolation': 'even' | diff --git a/test/bdd/steps/steps_osm_data.py b/test/bdd/steps/steps_osm_data.py index 6271f6b8..0082bd08 100644 --- a/test/bdd/steps/steps_osm_data.py +++ b/test/bdd/steps/steps_osm_data.py @@ -18,6 +18,7 @@ def get_osm2pgsql_options(nominatim_env, fname, append): osm2pgsql=str(nominatim_env.build_dir / 'osm2pgsql' / 'osm2pgsql'), osm2pgsql_cache=50, osm2pgsql_style=str(nominatim_env.get_test_config().get_import_style_file()), + osm2pgsql_style_path=nominatim_env.get_test_config().config_dir, threads=1, dsn=nominatim_env.get_libpq_dsn(), flatnode_file='', From 2fac507453ca5d50ecc8ccd4f8d66aa0b8954c18 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Fri, 4 Nov 2022 14:50:07 +0100 Subject: [PATCH 04/16] change updates to handle delete/insert workflow This makes Nominatim compatible with osm2pgsql's default update modus operandi of deleting and reinserting data. Deletes are diverted into a TODO table instead of executing them. When data is reinserted, the corresponding entry in the TODO table is deleted. After updates are finished, the remaining entries in the TODO table are executed, doing the same work as the delete trigger did before. The new behaviour also works against the gazetteer output with its insert-only mechanism. --- lib-sql/functions/place_triggers.sql | 79 ++++++++++++++++++++-------- lib-sql/indices.sql | 10 ++++ nominatim/tools/replication.py | 24 +++++++-- test/bdd/steps/place_inserter.py | 6 +++ test/bdd/steps/steps_db_ops.py | 7 ++- test/bdd/steps/steps_osm_data.py | 4 +- 6 files changed, 103 insertions(+), 27 deletions(-) diff --git a/lib-sql/functions/place_triggers.sql b/lib-sql/functions/place_triggers.sql index 9b968c3e..5b2642e7 100644 --- a/lib-sql/functions/place_triggers.sql +++ b/lib-sql/functions/place_triggers.sql @@ -34,6 +34,11 @@ BEGIN RETURN null; END IF; + -- Remove the place from the list of places to be deleted + DELETE FROM place_to_be_deleted pdel + WHERE pdel.osm_type = NEW.osm_type and pdel.osm_id = NEW.osm_id + and pdel.class = NEW.class; + -- Have we already done this place? SELECT * INTO existing FROM place @@ -321,35 +326,67 @@ BEGIN END; $$ LANGUAGE plpgsql; - CREATE OR REPLACE FUNCTION place_delete() RETURNS TRIGGER AS $$ DECLARE - has_rank BOOLEAN; + deferred BOOLEAN; BEGIN + {% if debug %}RAISE WARNING 'Delete for % % %/%', OLD.osm_type, OLD.osm_id, OLD.class, OLD.type;{% endif %} - {% if debug %}RAISE WARNING 'delete: % % % %',OLD.osm_type,OLD.osm_id,OLD.class,OLD.type;{% endif %} - - -- deleting large polygons can have a massive effect on the system - require manual intervention to let them through - IF st_area(OLD.geometry) > 2 and st_isvalid(OLD.geometry) THEN - SELECT bool_or(not (rank_address = 0 or rank_address > 25)) as ranked FROM placex WHERE osm_type = OLD.osm_type and osm_id = OLD.osm_id and class = OLD.class and type = OLD.type INTO has_rank; - IF has_rank THEN - insert into import_polygon_delete (osm_type, osm_id, class, type) values (OLD.osm_type,OLD.osm_id,OLD.class,OLD.type); - RETURN NULL; - END IF; + deferred := ST_IsValid(OLD.geometry) and ST_Area(OLD.geometry) > 2; + IF deferred THEN + SELECT bool_or(not (rank_address = 0 or rank_address > 25)) INTO deferred + FROM placex + WHERE osm_type = OLD.osm_type and osm_id = OLD.osm_id + and class = OLD.class and type = OLD.type; END IF; - -- mark for delete - UPDATE placex set indexed_status = 100 where osm_type = OLD.osm_type and osm_id = OLD.osm_id and class = OLD.class and type = OLD.type; + INSERT INTO place_to_be_deleted (osm_type, osm_id, class, type, deferred) + VALUES(OLD.osm_type, OLD.osm_id, OLD.class, OLD.type, deferred); - -- interpolations are special - IF OLD.osm_type='W' and OLD.class = 'place' and OLD.type = 'houses' THEN - UPDATE location_property_osmline set indexed_status = 100 where osm_id = OLD.osm_id; -- osm_id = wayid (=old.osm_id) - END IF; - - RETURN OLD; + RETURN NULL; END; -$$ -LANGUAGE plpgsql; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION flush_deleted_places() + RETURNS INTEGER + AS $$ +BEGIN + -- deleting large polygons can have a massive effect on the system - require manual intervention to let them through + INSERT INTO import_polygon_delete (osm_type, osm_id, class, type) + SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred; + + -- delete from place table + ALTER TABLE place DISABLE TRIGGER place_before_delete; + DELETE FROM place USING place_to_be_deleted + WHERE place.osm_type = place_to_be_deleted.osm_type + and place.osm_id = place_to_be_deleted.osm_id + and place.class = place_to_be_deleted.class + and place.type = place_to_be_deleted.type + and not deferred; + ALTER TABLE place ENABLE TRIGGER place_before_delete; + + -- Mark for delete in the placex table + UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted + WHERE placex.osm_type = place_to_be_deleted.osm_type + and placex.osm_id = place_to_be_deleted.osm_id + and placex.class = place_to_be_deleted.class + and placex.type = place_to_be_deleted.type + and not deferred; + + -- Mark for delete in interpolations + UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted + WHERE place_to_be_deleted.osm_type = 'W' + and place_to_be_deleted.class = 'place' + and place_to_be_deleted.type = 'houses' + and location_property_osmline.osm_id = place_to_be_deleted.osm_id + and not deferred; + + -- Clear todo list. + TRUNCATE TABLE place_to_be_deleted; + + RETURN NULL; +END; +$$ LANGUAGE plpgsql; diff --git a/lib-sql/indices.sql b/lib-sql/indices.sql index b1396034..54e88fd8 100644 --- a/lib-sql/indices.sql +++ b/lib-sql/indices.sql @@ -82,4 +82,14 @@ CREATE INDEX IF NOT EXISTS idx_postcode_postcode INCLUDE (startnumber, endnumber) {{db.tablespace.search_index}} WHERE startnumber is not null; {% endif %} +--- +-- Table needed for running updates with osm2pgsql on place. + CREATE TABLE IF NOT EXISTS place_to_be_deleted ( + osm_type CHAR(1), + osm_id BIGINT, + class TEXT, + type TEXT, + deferred BOOLEAN + ); + {% endif %} diff --git a/nominatim/tools/replication.py b/nominatim/tools/replication.py index 443a9577..846b9c34 100644 --- a/nominatim/tools/replication.py +++ b/nominatim/tools/replication.py @@ -130,10 +130,7 @@ def update(conn: Connection, options: MutableMapping[str, Any], if endseq is None: return UpdateState.NO_CHANGES - # Consume updates with osm2pgsql. - options['append'] = True - options['disable_jit'] = conn.server_version_tuple() >= (11, 0) - run_osm2pgsql(options) + run_osm2pgsql_updates(conn, options) # Write the current status to the file endstate = repl.get_state_info(endseq) @@ -143,6 +140,25 @@ def update(conn: Connection, options: MutableMapping[str, Any], return UpdateState.UP_TO_DATE +def run_osm2pgsql_updates(conn: Connection, options: MutableMapping[str, Any]) -> None: + """ Run osm2pgsql in append mode. + """ + # Remove any stale deletion marks. + with conn.cursor() as cur: + cur.execute('TRUNCATE place_to_be_deleted') + conn.commit() + + # Consume updates with osm2pgsql. + options['append'] = True + options['disable_jit'] = conn.server_version_tuple() >= (11, 0) + run_osm2pgsql(options) + + # Handle deletions + with conn.cursor() as cur: + cur.execute('SELECT flush_deleted_places()') + conn.commit() + + def _make_replication_server(url: str, timeout: int) -> ContextManager[ReplicationServer]: """ Returns a ReplicationServer in form of a context manager. diff --git a/test/bdd/steps/place_inserter.py b/test/bdd/steps/place_inserter.py index 6e7e6a75..c033ac17 100644 --- a/test/bdd/steps/place_inserter.py +++ b/test/bdd/steps/place_inserter.py @@ -92,6 +92,12 @@ class PlaceColumn: else: self.columns[column] = {key: value} + def db_delete(self, cursor): + """ Issue a delete for the given OSM object. + """ + cursor.execute('DELETE FROM place WHERE osm_type = %s and osm_id = %s', + (self.columns['osm_type'] , self.columns['osm_id'])) + def db_insert(self, cursor): """ Insert the collected data into the database. """ diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py index 83333cb5..84379cbe 100644 --- a/test/bdd/steps/steps_db_ops.py +++ b/test/bdd/steps/steps_db_ops.py @@ -118,7 +118,10 @@ def update_place_table(context): context.nominatim.run_nominatim('refresh', '--functions') with context.db.cursor() as cur: for row in context.table: - PlaceColumn(context).add_row(row, False).db_insert(cur) + col = PlaceColumn(context).add_row(row, False) + col.db_delete(cur) + col.db_insert(cur) + cur.execute('SELECT flush_deleted_places()') context.nominatim.reindex_placex(context.db) check_database_integrity(context) @@ -143,8 +146,10 @@ def delete_places(context, oids): """ context.nominatim.run_nominatim('refresh', '--functions') with context.db.cursor() as cur: + cur.execute('TRUNCATE place_to_be_deleted') for oid in oids.split(','): NominatimID(oid).query_osm_id(cur, 'DELETE FROM place WHERE {}') + cur.execute('SELECT flush_deleted_places()') context.nominatim.reindex_placex(context.db) diff --git a/test/bdd/steps/steps_osm_data.py b/test/bdd/steps/steps_osm_data.py index 0082bd08..20d327df 100644 --- a/test/bdd/steps/steps_osm_data.py +++ b/test/bdd/steps/steps_osm_data.py @@ -10,6 +10,7 @@ import os from pathlib import Path from nominatim.tools.exec_utils import run_osm2pgsql +from nominatim.tools.replication import run_osm2pgsql_updates from geometry_alias import ALIASES @@ -118,6 +119,7 @@ def update_from_osm_file(context): # create an OSM file and import it fname = write_opl_file(context.text, context.osm) try: - run_osm2pgsql(get_osm2pgsql_options(context.nominatim, fname, append=True)) + run_osm2pgsql_updates(context.db, + get_osm2pgsql_options(context.nominatim, fname, append=True)) finally: os.remove(fname) From 74405e96849da87173106989f786e056818cbb92 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Fri, 4 Nov 2022 15:20:43 +0100 Subject: [PATCH 05/16] add migration for place_to_be_deleted table --- nominatim/tools/migration.py | 17 +++++++++++++++++ nominatim/version.py | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/nominatim/tools/migration.py b/nominatim/tools/migration.py index 7854154c..d5806097 100644 --- a/nominatim/tools/migration.py +++ b/nominatim/tools/migration.py @@ -315,3 +315,20 @@ def mark_internal_country_names(conn: Connection, config: Configuration, **_: An names = {} names['countrycode'] = country_code analyzer.add_country_names(country_code, names) + + +@_migration(4, 1, 99, 0) +def add_place_deletion_todo_table(conn: Connection, **_: Any) -> None: + """ Add helper table for deleting data on updates. + + The table is only necessary when updates are possible, i.e. + the database is not in freeze mode. + """ + if conn.table_exists('place'): + with conn.cursor() as cur: + cur.execute("""CREATE TABLE IF NOT EXISTS place_to_be_deleted ( + osm_type CHAR(1), + osm_id BIGINT, + class TEXT, + type TEXT, + deferred BOOLEAN)""") diff --git a/nominatim/version.py b/nominatim/version.py index e7e750b0..36573040 100644 --- a/nominatim/version.py +++ b/nominatim/version.py @@ -25,7 +25,7 @@ from typing import Optional, Tuple # patch level when cherry-picking the commit with the migration. # # Released versions always have a database patch level of 0. -NOMINATIM_VERSION = (4, 1, 0, 0) +NOMINATIM_VERSION = (4, 1, 99, 0) POSTGRESQL_REQUIRED_VERSION = (9, 6) POSTGIS_REQUIRED_VERSION = (2, 2) From a1da14921150b8432fb92ec260597cef7a7d7ceb Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Fri, 4 Nov 2022 16:03:04 +0100 Subject: [PATCH 06/16] CI: require lua libraries --- .github/actions/build-nominatim/action.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/actions/build-nominatim/action.yml b/.github/actions/build-nominatim/action.yml index 125a0f8b..acc9b8b6 100644 --- a/.github/actions/build-nominatim/action.yml +++ b/.github/actions/build-nominatim/action.yml @@ -9,6 +9,10 @@ inputs: description: 'Additional options to hand to cmake' required: false default: '' + lua: + description: 'Version of Lua to use' + required: false + default: '5.3' runs: using: "composite" @@ -21,7 +25,7 @@ runs: shell: bash - name: Install prerequisites run: | - sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev + sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} if [ "x$UBUNTUVER" == "x18" ]; then pip3 install python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 datrie else @@ -31,6 +35,7 @@ runs: env: UBUNTUVER: ${{ inputs.ubuntu }} CMAKE_ARGS: ${{ inputs.cmake-args }} + LUA_VERSION: ${{ inputs.lua }} - name: Configure run: mkdir build && cd build && cmake $CMAKE_ARGS ../Nominatim From 84e5e601e1bddd4babe2efd1b302593635ddcd5d Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Fri, 4 Nov 2022 22:43:10 +0100 Subject: [PATCH 07/16] add lua requirements for vagrant scripts --- vagrant/Install-on-Ubuntu-18.sh | 2 +- vagrant/Install-on-Ubuntu-20.sh | 2 +- vagrant/Install-on-Ubuntu-22.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/vagrant/Install-on-Ubuntu-18.sh b/vagrant/Install-on-Ubuntu-18.sh index 53284ea8..621e4473 100755 --- a/vagrant/Install-on-Ubuntu-18.sh +++ b/vagrant/Install-on-Ubuntu-18.sh @@ -24,7 +24,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS: sudo apt install -y php-cgi sudo apt install -y build-essential cmake g++ libboost-dev libboost-system-dev \ libboost-filesystem-dev libexpat1-dev zlib1g-dev\ - libbz2-dev libpq-dev \ + libbz2-dev libpq-dev liblua5.3-dev lua5.3\ postgresql-10-postgis-2.4 \ postgresql-contrib-10 postgresql-10-postgis-scripts \ php-cli php-pgsql php-intl libicu-dev python3-pip \ diff --git a/vagrant/Install-on-Ubuntu-20.sh b/vagrant/Install-on-Ubuntu-20.sh index a8654490..87aad5ef 100755 --- a/vagrant/Install-on-Ubuntu-20.sh +++ b/vagrant/Install-on-Ubuntu-20.sh @@ -23,7 +23,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS: sudo apt install -y php-cgi sudo apt install -y build-essential cmake g++ libboost-dev libboost-system-dev \ libboost-filesystem-dev libexpat1-dev zlib1g-dev \ - libbz2-dev libpq-dev \ + libbz2-dev libpq-dev liblua5.3-dev lua5.3 \ postgresql-12-postgis-3 \ postgresql-contrib-12 postgresql-12-postgis-3-scripts \ php-cli php-pgsql php-intl libicu-dev python3-dotenv \ diff --git a/vagrant/Install-on-Ubuntu-22.sh b/vagrant/Install-on-Ubuntu-22.sh index a7ba6866..83cd5350 100755 --- a/vagrant/Install-on-Ubuntu-22.sh +++ b/vagrant/Install-on-Ubuntu-22.sh @@ -23,7 +23,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS: sudo apt install -y php-cgi sudo apt install -y build-essential cmake g++ libboost-dev libboost-system-dev \ libboost-filesystem-dev libexpat1-dev zlib1g-dev \ - libbz2-dev libpq-dev \ + libbz2-dev libpq-dev liblua5.3-dev lua5.3 \ postgresql-server-dev-14 postgresql-14-postgis-3 \ postgresql-contrib-14 postgresql-14-postgis-3-scripts \ php-cli php-pgsql php-intl libicu-dev python3-dotenv \ From 3683cf7ddc72b57c0990430ca5b03e370b29f8c3 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 8 Nov 2022 10:21:44 +0100 Subject: [PATCH 08/16] optimise tag match function --- settings/flex-base.lua | 47 +++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/settings/flex-base.lua b/settings/flex-base.lua index d91299ad..d9462588 100644 --- a/settings/flex-base.lua +++ b/settings/flex-base.lua @@ -219,49 +219,58 @@ function tag_match(data) return nil end - local tests = {} + local fullmatches = {} + local key_prefixes = {} + local key_suffixes = {} if data.keys ~= nil then for _, key in pairs(data.keys) do if key:sub(1, 1) == '*' then if #key > 1 then - local suffix = key:sub(2) - tests[#tests + 1] = function (k, v) - return k:sub(-#suffix) == suffix + if key_suffixes[#key - 1] == nil then + key_suffixes[#key - 1] = {} end + key_suffixes[#key - 1][key:sub(2)] = true end elseif key:sub(#key, #key) == '*' then - local prefix = key:sub(1, #key - 1) - tests[#tests + 1] = function (k, v) - return k:sub(1, #prefix) == prefix + if key_prefixes[#key - 1] == nil then + key_prefixes[#key - 1] = {} end + key_prefixes[#key - 1][key:sub(1, #key - 1)] = true else - tests[#tests + 1] = function (k, v) - return k == key - end + fullmatches[key] = true end end end if data.tags ~= nil then - local tags = {} for k, vlist in pairs(data.tags) do - tags[k] = {} - for _, v in pairs(vlist) do - tags[k][v] = true + if fullmatches[k] == nil then + fullmatches[k] = {} + for _, v in pairs(vlist) do + fullmatches[k][v] = true + end end end - tests[#tests + 1] = function (k, v) - return tags[k] ~= nil and tags[k][v] ~= nil - end end return function (k, v) - for _, func in pairs(tests) do - if func(k, v) then + if fullmatches[k] ~= nil and (fullmatches[k] == true or fullmatches[k][v] ~= nil) then + return true + end + + for slen, slist in pairs(key_suffixes) do + if #k >= slen and slist[k:sub(-slen)] ~= nil then return true end end + + for slen, slist in pairs(key_prefixes) do + if #k >= slen and slist[k:sub(1, slen)] ~= nil then + return true + end + end + return false end end From b98d3d3f005cb1a55261f87bc057d99cc4ba2a5b Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 8 Nov 2022 14:06:18 +0100 Subject: [PATCH 09/16] bdd: extend osm2pgsql update tests Now also checks for correct indexing state of placex table. --- test/bdd/osm2pgsql/update/simple.feature | 86 ++++++-------- test/bdd/osm2pgsql/update/tags.feature | 140 +++++++++++++++++++++++ 2 files changed, 177 insertions(+), 49 deletions(-) diff --git a/test/bdd/osm2pgsql/update/simple.feature b/test/bdd/osm2pgsql/update/simple.feature index 072f83fa..5a86917c 100644 --- a/test/bdd/osm2pgsql/update/simple.feature +++ b/test/bdd/osm2pgsql/update/simple.feature @@ -2,60 +2,48 @@ Feature: Update of simple objects by osm2pgsql Testing basic update functions of osm2pgsql. - Scenario: Import object with two main tags + Scenario: Adding a new object When loading osm data """ - n1 Ttourism=hotel,amenity=restaurant,name=foo - n2 Tplace=locality,name=spotty + n1 Tplace=town,name=Middletown """ - Then place contains - | object | type | name+name | - | N1:tourism | hotel | foo | - | N1:amenity | restaurant | foo | - | N2:place | locality | spotty | - When updating osm data - """ - n1 dV Ttourism=hotel,name=foo - n2 dD - """ - Then place has no entry for N1:amenity - And place has no entry for N2 - And place contains - | object | class | type | name | - | N1:tourism | tourism | hotel | 'name' : 'foo' | + Then place contains exactly + | object | type | name+name | + | N1:place | town | Middletown | - Scenario: Downgrading a highway to one that is dropped without name - When loading osm data - """ - n100 x0 y0 - n101 x0.0001 y0.0001 - w1 Thighway=residential Nn100,n101 - """ - Then place contains - | object | - | W1:highway | - When updating osm data - """ - w1 Thighway=service Nn100,n101 - """ - Then place has no entry for W1 + When updating osm data + """ + n2 Tamenity=hotel,name=Posthotel + """ + Then place contains exactly + | object | type | name+name | + | N1:place | town | Middletown | + | N2:amenity | hotel | Posthotel | + And placex contains exactly + | object | type | name+name | indexed_status | + | N1:place | town | Middletown | 0 | + | N2:amenity | hotel | Posthotel | 1 | - Scenario: Downgrading a highway when a second tag is present + + Scenario: Deleting an existing object When loading osm data """ - n100 x0 y0 - n101 x0.0001 y0.0001 - w1 Thighway=residential,tourism=hotel Nn100,n101 + n1 Tplace=town,name=Middletown + n2 Tamenity=hotel,name=Posthotel """ - Then place contains - | object | - | W1:highway | - | W1:tourism | - When updating osm data - """ - w1 Thighway=service,tourism=hotel Nn100,n101 - """ - Then place has no entry for W1:highway - And place contains - | object | - | W1:tourism | + Then place contains exactly + | object | type | name+name | + | N1:place | town | Middletown | + | N2:amenity | hotel | Posthotel | + + When updating osm data + """ + n2 dD + """ + Then place contains exactly + | object | type | name+name | + | N1:place | town | Middletown | + And placex contains exactly + | object | type | name+name | indexed_status | + | N1:place | town | Middletown | 0 | + | N2:amenity | hotel | Posthotel | 100 | diff --git a/test/bdd/osm2pgsql/update/tags.feature b/test/bdd/osm2pgsql/update/tags.feature index 0b6c2a62..bfd3c546 100644 --- a/test/bdd/osm2pgsql/update/tags.feature +++ b/test/bdd/osm2pgsql/update/tags.feature @@ -26,6 +26,12 @@ Feature: Tag evaluation | object | class | type | | N2:highway | highway | bus_stop | | N3 | amenity | prison | + And placex contains exactly + | object | indexed_status | + | N1:amenity | 100 | + | N2:highway | 2 | + | N2:railway | 100 | + | N3:amenity | 0 | Scenario: Main tag added @@ -48,6 +54,11 @@ Feature: Tag evaluation | N1 | amenity | restaurant | | N2:highway | highway | bus_stop | | N2:railway | railway | stop | + And placex contains exactly + | object | indexed_status | + | N1:amenity | 1 | + | N2:highway | 2 | + | N2:railway | 1 | Scenario: Main tag modified @@ -70,6 +81,10 @@ Feature: Tag evaluation | object | class | type | | N10 | highway | path | | N11 | highway | primary | + And placex contains + | object | indexed_status | + | N11:amenity | 100 | + | N11:highway | 1 | Scenario: Main tags with name, name added @@ -90,6 +105,10 @@ Feature: Tag evaluation | object | class | type | | N45 | landuse | cemetry | | N46 | building| yes | + And placex contains exactly + | object | indexed_status | + | N45:landuse | 1 | + | N46:building | 1 | Scenario: Main tags with name, name removed @@ -110,6 +129,10 @@ Feature: Tag evaluation """ Then place contains exactly | object | class | type | + And placex contains exactly + | object | indexed_status | + | N45:landuse | 100 | + | N46:building | 100 | Scenario: Main tags with name, name modified @@ -132,6 +155,10 @@ Feature: Tag evaluation | object | class | type | name | address | | N45 | landuse | cemetry | 'name' : 'DONE' | - | | N46 | building| yes | - | 'housenumber': '10'| + And placex contains exactly + | object | indexed_status | + | N45:landuse | 2 | + | N46:building | 2 | Scenario: Main tag added to address only node @@ -150,6 +177,10 @@ Feature: Tag evaluation Then place contains exactly | object | class | type | address | | N1 | building | yes | 'housenumber': '345'| + And placex contains exactly + | object | indexed_status | + | N1:place | 100 | + | N1:building | 1 | Scenario: Main tag removed from address only node @@ -168,6 +199,10 @@ Feature: Tag evaluation Then place contains exactly | object | class | type | address | | N1 | place | house | 'housenumber': '345'| + And placex contains exactly + | object | indexed_status | + | N1:place | 1 | + | N1:building | 100 | Scenario: Main tags with name key, adding key name @@ -185,6 +220,9 @@ Feature: Tag evaluation Then place contains exactly | object | class | type | name | | N22 | bridge | yes | 'name': 'high' | + And placex contains exactly + | object | indexed_status | + | N22:bridge | 1 | Scenario: Main tags with name key, deleting key name @@ -202,6 +240,9 @@ Feature: Tag evaluation """ Then place contains exactly | object | class | type | + And placex contains exactly + | object | indexed_status | + | N22:bridge | 100 | Scenario: Main tags with name key, changing key name @@ -220,4 +261,103 @@ Feature: Tag evaluation Then place contains exactly | object | class | type | name | | N22 | bridge | yes | 'name:en': 'high' | + And placex contains exactly + | object | indexed_status | + | N22:bridge | 2 | + + Scenario: Downgrading a highway to one that is dropped without name + When loading osm data + """ + n100 x0 y0 + n101 x0.0001 y0.0001 + w1 Thighway=residential Nn100,n101 + """ + Then place contains exactly + | object | + | W1:highway | + + When updating osm data + """ + w1 Thighway=service Nn100,n101 + """ + Then place contains exactly + | object | + And placex contains exactly + | object | indexed_status | + | W1:highway | 100 | + + + Scenario: Upgrading a highway to one that is not dropped without name + When loading osm data + """ + n100 x0 y0 + n101 x0.0001 y0.0001 + w1 Thighway=service Nn100,n101 + """ + Then place contains exactly + | object | + + When updating osm data + """ + w1 Thighway=unclassified Nn100,n101 + """ + Then place contains exactly + | object | + | W1:highway | + And placex contains exactly + | object | indexed_status | + | W1:highway | 1 | + + + Scenario: Downgrading a highway when a second tag is present + When loading osm data + """ + n100 x0 y0 + n101 x0.0001 y0.0001 + w1 Thighway=residential,tourism=hotel Nn100,n101 + """ + Then place contains exactly + | object | + | W1:highway | + | W1:tourism | + + When updating osm data + """ + w1 Thighway=service,tourism=hotel Nn100,n101 + """ + Then place contains exactly + | object | + | W1:tourism | + And placex contains exactly + | object | + | W1:tourism | + | W1:highway | + And placex contains + | object | indexed_status | + | W1:highway | 100 | + + + Scenario: Upgrading a highway when a second tag is present + When loading osm data + """ + n100 x0 y0 + n101 x0.0001 y0.0001 + w1 Thighway=service,tourism=hotel Nn100,n101 + """ + Then place contains exactly + | object | + | W1:tourism | + + When updating osm data + """ + w1 Thighway=residential,tourism=hotel Nn100,n101 + """ + Then place contains exactly + | object | + | W1:highway | + | W1:tourism | + And placex contains exactly + | object | indexed_status | + | W1:tourism | 2 | + | W1:highway | 1 | From 68d09f9cadc016ac0b98485dbea087463832e00a Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 10 Nov 2022 11:11:45 +0100 Subject: [PATCH 10/16] node locations must be stable for osm2pgsql update tests --- test/bdd/osm2pgsql/update/tags.feature | 41 +++++++++++++++----------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/test/bdd/osm2pgsql/update/tags.feature b/test/bdd/osm2pgsql/update/tags.feature index bfd3c546..ed14429c 100644 --- a/test/bdd/osm2pgsql/update/tags.feature +++ b/test/bdd/osm2pgsql/update/tags.feature @@ -2,6 +2,11 @@ Feature: Tag evaluation Tests if tags are correctly updated in the place table + Background: + Given the grid + | 1 | 2 | 3 | + | 10 | 11 | | + | 45 | 46 | | Scenario: Main tag deleted When loading osm data @@ -29,7 +34,7 @@ Feature: Tag evaluation And placex contains exactly | object | indexed_status | | N1:amenity | 100 | - | N2:highway | 2 | + | N2:highway | 0 | | N2:railway | 100 | | N3:amenity | 0 | @@ -57,7 +62,7 @@ Feature: Tag evaluation And placex contains exactly | object | indexed_status | | N1:amenity | 1 | - | N2:highway | 2 | + | N2:highway | 0 | | N2:railway | 1 | @@ -208,62 +213,62 @@ Feature: Tag evaluation Scenario: Main tags with name key, adding key name When loading osm data """ - n22 Tbridge=yes + n2 Tbridge=yes """ Then place contains exactly | object | class | type | When updating osm data """ - n22 Tbridge=yes,bridge:name=high + n2 Tbridge=yes,bridge:name=high """ Then place contains exactly | object | class | type | name | - | N22 | bridge | yes | 'name': 'high' | + | N2 | bridge | yes | 'name': 'high' | And placex contains exactly - | object | indexed_status | - | N22:bridge | 1 | + | object | indexed_status | + | N2:bridge | 1 | Scenario: Main tags with name key, deleting key name When loading osm data """ - n22 Tbridge=yes,bridge:name=high + n2 Tbridge=yes,bridge:name=high """ Then place contains exactly | object | class | type | name | - | N22 | bridge | yes | 'name': 'high' | + | N2 | bridge | yes | 'name': 'high' | When updating osm data """ - n22 Tbridge=yes + n2 Tbridge=yes """ Then place contains exactly | object | class | type | And placex contains exactly - | object | indexed_status | - | N22:bridge | 100 | + | object | indexed_status | + | N2:bridge | 100 | Scenario: Main tags with name key, changing key name When loading osm data """ - n22 Tbridge=yes,bridge:name=high + n2 Tbridge=yes,bridge:name=high """ Then place contains exactly | object | class | type | name | - | N22 | bridge | yes | 'name': 'high' | + | N2 | bridge | yes | 'name': 'high' | When updating osm data """ - n22 Tbridge=yes,bridge:name:en=high + n2 Tbridge=yes,bridge:name:en=high """ Then place contains exactly | object | class | type | name | - | N22 | bridge | yes | 'name:en': 'high' | + | N2 | bridge | yes | 'name:en': 'high' | And placex contains exactly - | object | indexed_status | - | N22:bridge | 2 | + | object | indexed_status | + | N2:bridge | 2 | Scenario: Downgrading a highway to one that is dropped without name From 2dafc4cf4fc46ea4be293a458d565fbff645ac28 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 10 Nov 2022 15:51:55 +0100 Subject: [PATCH 11/16] remove tests that differ between lua and gazetteer versions --- test/bdd/osm2pgsql/update/tags.feature | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/test/bdd/osm2pgsql/update/tags.feature b/test/bdd/osm2pgsql/update/tags.feature index ed14429c..b94f2564 100644 --- a/test/bdd/osm2pgsql/update/tags.feature +++ b/test/bdd/osm2pgsql/update/tags.feature @@ -31,10 +31,9 @@ Feature: Tag evaluation | object | class | type | | N2:highway | highway | bus_stop | | N3 | amenity | prison | - And placex contains exactly + And placex contains | object | indexed_status | | N1:amenity | 100 | - | N2:highway | 0 | | N2:railway | 100 | | N3:amenity | 0 | @@ -59,10 +58,9 @@ Feature: Tag evaluation | N1 | amenity | restaurant | | N2:highway | highway | bus_stop | | N2:railway | railway | stop | - And placex contains exactly + And placex contains | object | indexed_status | | N1:amenity | 1 | - | N2:highway | 0 | | N2:railway | 1 | From 63a9bc94f767bc6aea2ba0eac52c018a9d09443a Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 10 Nov 2022 15:52:13 +0100 Subject: [PATCH 12/16] fix country handling in flex style If the country tag does not match a 2-letter code, it needs to be dropped. --- settings/flex-base.lua | 6 ++++-- test/bdd/osm2pgsql/import/tags.feature | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/settings/flex-base.lua b/settings/flex-base.lua index d9462588..19f4e27b 100644 --- a/settings/flex-base.lua +++ b/settings/flex-base.lua @@ -331,8 +331,10 @@ function process_tags(o) end -- address keys - o:grab_address{match=function (k, v) return COUNTRY_TAGS(k, v) and #v == 2 end, - out_key='country'} + o:grab_address{match=COUNTRY_TAGS, out_key='country'} + if o.address.country ~= nil and #o.address.country ~= 2 then + o.address['country'] = nil + end if o:grab_name{match=HOUSENAME_TAGS} > 0 then fallback = {'place', 'house'} end diff --git a/test/bdd/osm2pgsql/import/tags.feature b/test/bdd/osm2pgsql/import/tags.feature index 83d7fe52..1f6857f2 100644 --- a/test/bdd/osm2pgsql/import/tags.feature +++ b/test/bdd/osm2pgsql/import/tags.feature @@ -75,6 +75,7 @@ Feature: Tag evaluation n5003 Tshop=yes,country_code=x n5004 Tshop=yes,addr:country=us n5005 Tshop=yes,country=be + n5006 Tshop=yes,addr:country=France """ Then place contains exactly | object | class | address | @@ -83,6 +84,7 @@ Feature: Tag evaluation | N5003 | shop | - | | N5004 | shop | 'country': 'us' | | N5005 | shop | - | + | N5006 | shop | - | Scenario: Postcodes From 36cf0eb922d4c03e73a3a3a5505e46d5cee061b9 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 14 Nov 2022 13:57:26 +0100 Subject: [PATCH 13/16] reorganize handling of place type changes Always replace existing entries in place, never delete them because a direct delete will cause conflicts. --- lib-sql/functions/place_triggers.sql | 37 ++++++++++++++------ test/bdd/osm2pgsql/update/tags.feature | 47 ++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 11 deletions(-) diff --git a/lib-sql/functions/place_triggers.sql b/lib-sql/functions/place_triggers.sql index 5b2642e7..ca6ba690 100644 --- a/lib-sql/functions/place_triggers.sql +++ b/lib-sql/functions/place_triggers.sql @@ -47,8 +47,6 @@ BEGIN {% if debug %}RAISE WARNING 'Existing: %',existing.osm_id;{% endif %} - -- Handle a place changing type by removing the old data. - -- (This trigger is executed BEFORE INSERT of the NEW tuple.) IF existing.osm_type IS NULL THEN DELETE FROM place where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class; END IF; @@ -192,15 +190,11 @@ BEGIN END IF; {% endif %} - IF existing.osm_type IS NOT NULL THEN - -- Pathological case caused by the triggerless copy into place during initial import - -- force delete even for large areas, it will be reinserted later - UPDATE place SET geometry = ST_SetSRID(ST_Point(0,0), 4326) - WHERE osm_type = NEW.osm_type and osm_id = NEW.osm_id - and class = NEW.class and type = NEW.type; - DELETE FROM place - WHERE osm_type = NEW.osm_type and osm_id = NEW.osm_id - and class = NEW.class and type = NEW.type; + IF existingplacex.osm_type is not NULL THEN + -- Mark any existing place for delete in the placex table + UPDATE placex SET indexed_status = 100 + WHERE placex.osm_type = NEW.osm_type and placex.osm_id = NEW.osm_id + and placex.class = 'boundary' and placex.type = 'administrative'; END IF; -- Process it as a new insertion @@ -211,6 +205,27 @@ BEGIN {% if debug %}RAISE WARNING 'insert done % % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type,NEW.name;{% endif %} + IF existing.osm_type is not NULL THEN + -- If there is already an entry in place, just update that, if necessary. + IF coalesce(existing.name, ''::hstore) != coalesce(NEW.name, ''::hstore) + or coalesce(existing.address, ''::hstore) != coalesce(NEW.address, ''::hstore) + or coalesce(existing.extratags, ''::hstore) != coalesce(NEW.extratags, ''::hstore) + or coalesce(existing.admin_level, 15) != coalesce(NEW.admin_level, 15) + or existing.geometry::text != NEW.geometry::text + THEN + UPDATE place + SET name = NEW.name, + address = NEW.address, + extratags = NEW.extratags, + admin_level = NEW.admin_level, + geometry = NEW.geometry + WHERE osm_type = NEW.osm_type and osm_id = NEW.osm_id + and class = NEW.class and type = NEW.type; + END IF; + + RETURN NULL; + END IF; + RETURN NEW; END IF; diff --git a/test/bdd/osm2pgsql/update/tags.feature b/test/bdd/osm2pgsql/update/tags.feature index b94f2564..f617c38f 100644 --- a/test/bdd/osm2pgsql/update/tags.feature +++ b/test/bdd/osm2pgsql/update/tags.feature @@ -364,3 +364,50 @@ Feature: Tag evaluation | object | indexed_status | | W1:tourism | 2 | | W1:highway | 1 | + + + Scenario: Replay on administrative boundary + When loading osm data + """ + n10 x34.0 y-4.23 + n11 x34.1 y-4.23 + n12 x34.2 y-4.13 + w10 Tboundary=administrative,waterway=river,name=Border,admin_level=2 Nn12,n11,n10 + """ + Then place contains exactly + | object | + | W10:waterway | + | W10:boundary | + + When updating osm data + """ + w10 Tboundary=administrative,waterway=river,name=Border,admin_level=2 Nn12,n11,n10 + """ + Then place contains exactly + | object | + | W10:waterway | + | W10:boundary | + And placex contains exactly + | object | + | W10:waterway | + + + Scenario: Change admin_level on administrative boundary + When loading osm data + """ + n10 x34.0 y-4.23 + n11 x34.1 y-4.23 + n12 x34.2 y-4.13 + w10 Tboundary=administrative,name=Border,admin_level=2 Nn12,n11,n10 + """ + Then place contains exactly + | object | admin_level | + | W10:boundary | 2 | + + When updating osm data + """ + w10 Tboundary=administrative,name=Border,admin_level=4 Nn12,n11,n10 + """ + Then place contains exactly + | object | admin_level | + | W10:boundary | 4 | From a46348da3833cd6a6fe07a982a83a5bdac13f10f Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 14 Nov 2022 14:48:44 +0100 Subject: [PATCH 14/16] bdd: test placex content when updating with osm2pgsql --- lib-sql/functions/place_triggers.sql | 2 +- test/bdd/osm2pgsql/update/tags.feature | 251 ++++++++++++++++--------- test/bdd/steps/steps_osm_data.py | 8 + 3 files changed, 173 insertions(+), 88 deletions(-) diff --git a/lib-sql/functions/place_triggers.sql b/lib-sql/functions/place_triggers.sql index ca6ba690..489ecb35 100644 --- a/lib-sql/functions/place_triggers.sql +++ b/lib-sql/functions/place_triggers.sql @@ -194,7 +194,7 @@ BEGIN -- Mark any existing place for delete in the placex table UPDATE placex SET indexed_status = 100 WHERE placex.osm_type = NEW.osm_type and placex.osm_id = NEW.osm_id - and placex.class = 'boundary' and placex.type = 'administrative'; + and placex.class = NEW.class and placex.type = NEW.type; END IF; -- Process it as a new insertion diff --git a/test/bdd/osm2pgsql/update/tags.feature b/test/bdd/osm2pgsql/update/tags.feature index f617c38f..3d083040 100644 --- a/test/bdd/osm2pgsql/update/tags.feature +++ b/test/bdd/osm2pgsql/update/tags.feature @@ -33,9 +33,12 @@ Feature: Tag evaluation | N3 | amenity | prison | And placex contains | object | indexed_status | - | N1:amenity | 100 | - | N2:railway | 100 | | N3:amenity | 0 | + When indexing + Then placex contains exactly + | object | type | name | + | N2:highway | bus_stop | 'name': 'X' | + | N3:amenity | prison | - | Scenario: Main tag added @@ -58,10 +61,12 @@ Feature: Tag evaluation | N1 | amenity | restaurant | | N2:highway | highway | bus_stop | | N2:railway | railway | stop | - And placex contains - | object | indexed_status | - | N1:amenity | 1 | - | N2:railway | 1 | + When indexing + Then placex contains exactly + | object | type | name | + | N1:amenity | restaurant | - | + | N2:highway | bus_stop | 'name': 'X' | + | N2:railway | stop | 'name': 'X' | Scenario: Main tag modified @@ -84,10 +89,11 @@ Feature: Tag evaluation | object | class | type | | N10 | highway | path | | N11 | highway | primary | - And placex contains - | object | indexed_status | - | N11:amenity | 100 | - | N11:highway | 1 | + When indexing + Then placex contains exactly + | object | type | name | + | N10:highway | path | 'name': 'X' | + | N11:highway | primary | - | Scenario: Main tags with name, name added @@ -108,10 +114,11 @@ Feature: Tag evaluation | object | class | type | | N45 | landuse | cemetry | | N46 | building| yes | - And placex contains exactly - | object | indexed_status | - | N45:landuse | 1 | - | N46:building | 1 | + When indexing + Then placex contains exactly + | object | type | name | address | + | N45:landuse | cemetry | 'name': 'TODO' | - | + | N46:building| yes | - | 'housenumber': '1' | Scenario: Main tags with name, name removed @@ -132,11 +139,9 @@ Feature: Tag evaluation """ Then place contains exactly | object | class | type | - And placex contains exactly - | object | indexed_status | - | N45:landuse | 100 | - | N46:building | 100 | - + When indexing + Then placex contains exactly + | object | Scenario: Main tags with name, name modified When loading osm data @@ -158,10 +163,11 @@ Feature: Tag evaluation | object | class | type | name | address | | N45 | landuse | cemetry | 'name' : 'DONE' | - | | N46 | building| yes | - | 'housenumber': '10'| - And placex contains exactly - | object | indexed_status | - | N45:landuse | 2 | - | N46:building | 2 | + When indexing + Then placex contains exactly + | object | class | type | name | address | + | N45 | landuse | cemetry | 'name' : 'DONE' | - | + | N46 | building| yes | - | 'housenumber': '10'| Scenario: Main tag added to address only node @@ -180,10 +186,10 @@ Feature: Tag evaluation Then place contains exactly | object | class | type | address | | N1 | building | yes | 'housenumber': '345'| - And placex contains exactly - | object | indexed_status | - | N1:place | 100 | - | N1:building | 1 | + When indexing + Then placex contains exactly + | object | class | type | address | + | N1 | building | yes | 'housenumber': '345'| Scenario: Main tag removed from address only node @@ -202,10 +208,10 @@ Feature: Tag evaluation Then place contains exactly | object | class | type | address | | N1 | place | house | 'housenumber': '345'| - And placex contains exactly - | object | indexed_status | - | N1:place | 1 | - | N1:building | 100 | + When indexing + Then placex contains exactly + | object | class | type | address | + | N1 | place | house | 'housenumber': '345'| Scenario: Main tags with name key, adding key name @@ -223,9 +229,10 @@ Feature: Tag evaluation Then place contains exactly | object | class | type | name | | N2 | bridge | yes | 'name': 'high' | - And placex contains exactly - | object | indexed_status | - | N2:bridge | 1 | + When indexing + Then placex contains exactly + | object | class | type | name | + | N2 | bridge | yes | 'name': 'high' | Scenario: Main tags with name key, deleting key name @@ -242,10 +249,10 @@ Feature: Tag evaluation n2 Tbridge=yes """ Then place contains exactly - | object | class | type | - And placex contains exactly - | object | indexed_status | - | N2:bridge | 100 | + | object | + When indexing + Then placex contains exactly + | object | Scenario: Main tags with name key, changing key name @@ -262,11 +269,12 @@ Feature: Tag evaluation n2 Tbridge=yes,bridge:name:en=high """ Then place contains exactly - | object | class | type | name | - | N2 | bridge | yes | 'name:en': 'high' | - And placex contains exactly - | object | indexed_status | - | N2:bridge | 2 | + | object | class | type | name | + | N2 | bridge | yes | 'name:en': 'high' | + When indexing + Then placex contains exactly + | object | class | type | name | + | N2 | bridge | yes | 'name:en': 'high' | Scenario: Downgrading a highway to one that is dropped without name @@ -286,9 +294,9 @@ Feature: Tag evaluation """ Then place contains exactly | object | - And placex contains exactly - | object | indexed_status | - | W1:highway | 100 | + When indexing + Then placex contains exactly + | object | Scenario: Upgrading a highway to one that is not dropped without name @@ -308,9 +316,10 @@ Feature: Tag evaluation Then place contains exactly | object | | W1:highway | - And placex contains exactly - | object | indexed_status | - | W1:highway | 1 | + When indexing + Then placex contains exactly + | object | + | W1:highway | Scenario: Downgrading a highway when a second tag is present @@ -321,24 +330,21 @@ Feature: Tag evaluation w1 Thighway=residential,tourism=hotel Nn100,n101 """ Then place contains exactly - | object | - | W1:highway | - | W1:tourism | + | object | type | + | W1:highway | residential | + | W1:tourism | hotel | When updating osm data """ w1 Thighway=service,tourism=hotel Nn100,n101 """ Then place contains exactly - | object | - | W1:tourism | - And placex contains exactly - | object | - | W1:tourism | - | W1:highway | - And placex contains - | object | indexed_status | - | W1:highway | 100 | + | object | type | + | W1:tourism | hotel | + When indexing + Then placex contains exactly + | object | type | + | W1:tourism | hotel | Scenario: Upgrading a highway when a second tag is present @@ -349,21 +355,22 @@ Feature: Tag evaluation w1 Thighway=service,tourism=hotel Nn100,n101 """ Then place contains exactly - | object | - | W1:tourism | + | object | type | + | W1:tourism | hotel | When updating osm data """ w1 Thighway=residential,tourism=hotel Nn100,n101 """ Then place contains exactly - | object | - | W1:highway | - | W1:tourism | - And placex contains exactly - | object | indexed_status | - | W1:tourism | 2 | - | W1:highway | 1 | + | object | type | + | W1:highway | residential | + | W1:tourism | hotel | + When indexing + Then placex contains exactly + | object | type | + | W1:highway | residential | + | W1:tourism | hotel | Scenario: Replay on administrative boundary @@ -375,39 +382,109 @@ Feature: Tag evaluation w10 Tboundary=administrative,waterway=river,name=Border,admin_level=2 Nn12,n11,n10 """ Then place contains exactly - | object | - | W10:waterway | - | W10:boundary | + | object | type | admin_level | name | + | W10:waterway | river | 2 | 'name': 'Border' | + | W10:boundary | administrative | 2 | 'name': 'Border' | When updating osm data """ w10 Tboundary=administrative,waterway=river,name=Border,admin_level=2 Nn12,n11,n10 """ Then place contains exactly - | object | - | W10:waterway | - | W10:boundary | - And placex contains exactly - | object | - | W10:waterway | + | object | type | admin_level | name | + | W10:waterway | river | 2 | 'name': 'Border' | + | W10:boundary | administrative | 2 | 'name': 'Border' | + When indexing + Then placex contains exactly + | object | type | admin_level | name | + | W10:waterway | river | 2 | 'name': 'Border' | Scenario: Change admin_level on administrative boundary + Given the grid + | 10 | 11 | + | 13 | 12 | When loading osm data """ - n10 x34.0 y-4.23 - n11 x34.1 y-4.23 - n12 x34.2 y-4.13 - w10 Tboundary=administrative,name=Border,admin_level=2 Nn12,n11,n10 + n10 + n11 + n12 + n13 + w10 Nn10,n11,n12,n13,n10 + r10 Ttype=multipolygon,boundary=administrative,name=Border,admin_level=2 Mw10@ """ Then place contains exactly | object | admin_level | - | W10:boundary | 2 | + | R10:boundary | 2 | When updating osm data """ - w10 Tboundary=administrative,name=Border,admin_level=4 Nn12,n11,n10 + r10 Ttype=multipolygon,boundary=administrative,name=Border,admin_level=4 Mw10@ """ Then place contains exactly - | object | admin_level | - | W10:boundary | 4 | + | object | type | admin_level | + | R10:boundary | administrative | 4 | + When indexing + Then placex contains exactly + | object | type | admin_level | + | R10:boundary | administrative | 4 | + + + Scenario: Change boundary to administrative + Given the grid + | 10 | 11 | + | 13 | 12 | + When loading osm data + """ + n10 + n11 + n12 + n13 + w10 Nn10,n11,n12,n13,n10 + r10 Ttype=multipolygon,boundary=informal,name=Border,admin_level=4 Mw10@ + """ + Then place contains exactly + | object | type | admin_level | + | R10:boundary | informal | 4 | + + When updating osm data + """ + r10 Ttype=multipolygon,boundary=administrative,name=Border,admin_level=4 Mw10@ + """ + Then place contains exactly + | object | type | admin_level | + | R10:boundary | administrative | 4 | + When indexing + Then placex contains exactly + | object | type | admin_level | + | R10:boundary | administrative | 4 | + + + Scenario: Change boundary away from administrative + Given the grid + | 10 | 11 | + | 13 | 12 | + When loading osm data + """ + n10 + n11 + n12 + n13 + w10 Nn10,n11,n12,n13,n10 + r10 Ttype=multipolygon,boundary=administrative,name=Border,admin_level=4 Mw10@ + """ + Then place contains exactly + | object | type | admin_level | + | R10:boundary | administrative | 4 | + + When updating osm data + """ + r10 Ttype=multipolygon,boundary=informal,name=Border,admin_level=4 Mw10@ + """ + Then place contains exactly + | object | type | admin_level | + | R10:boundary | informal | 4 | + When indexing + Then placex contains exactly + | object | type | admin_level | + | R10:boundary | informal | 4 | diff --git a/test/bdd/steps/steps_osm_data.py b/test/bdd/steps/steps_osm_data.py index 20d327df..7590b17c 100644 --- a/test/bdd/steps/steps_osm_data.py +++ b/test/bdd/steps/steps_osm_data.py @@ -123,3 +123,11 @@ def update_from_osm_file(context): get_osm2pgsql_options(context.nominatim, fname, append=True)) finally: os.remove(fname) + +@when('indexing') +def index_database(context): + """ + Run the Nominatim indexing step. This will process data previously + loaded with 'updating osm data' + """ + context.nominatim.run_nominatim('index') From d8e3ba3b54f07fcc89e5b257bf3183f21bde8a7a Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 14 Nov 2022 16:57:31 +0100 Subject: [PATCH 15/16] bdd: add osm2pgsql tests for updating interpolations --- .../osm2pgsql/update/interpolations.feature | 133 ++++++++++++++++++ test/bdd/steps/steps_db_ops.py | 45 ++++++ 2 files changed, 178 insertions(+) create mode 100644 test/bdd/osm2pgsql/update/interpolations.feature diff --git a/test/bdd/osm2pgsql/update/interpolations.feature b/test/bdd/osm2pgsql/update/interpolations.feature new file mode 100644 index 00000000..dae5168b --- /dev/null +++ b/test/bdd/osm2pgsql/update/interpolations.feature @@ -0,0 +1,133 @@ +@DB +Feature: Updates of address interpolation objects + Test that changes to address interpolation objects are correctly + propagated. + + Background: + Given the grid + | 1 | 2 | + + + Scenario: Adding a new interpolation + When loading osm data + """ + n1 Taddr:housenumber=3 + n2 Taddr:housenumber=17 + """ + Then place contains + | object | type | + | N1:place | house | + | N2:place | house | + + When updating osm data + """ + w99 Taddr:interpolation=odd Nn1,n2 + """ + Then place contains + | object | type | + | N1:place | house | + | N2:place | house | + | W99:place | houses | + When indexing + Then placex contains exactly + | object | type | + | N1:place | house | + | N2:place | house | + Then location_property_osmline contains exactly + | object | + | 99:5 | + + + Scenario: Delete an existing interpolation + When loading osm data + """ + n1 Taddr:housenumber=2 + n2 Taddr:housenumber=7 + w99 Taddr:interpolation=odd Nn1,n2 + """ + Then place contains + | object | type | + | N1:place | house | + | N2:place | house | + | W99:place | houses | + + When updating osm data + """ + w99 v2 dD + """ + Then place contains + | object | type | + | N1:place | house | + | N2:place | house | + When indexing + Then placex contains exactly + | object | type | + | N1:place | house | + | N2:place | house | + Then location_property_osmline contains exactly + | object | indexed_status | + + + Scenario: Changing an object to an interpolation + When loading osm data + """ + n1 Taddr:housenumber=3 + n2 Taddr:housenumber=17 + w99 Thighway=residential Nn1,n2 + """ + Then place contains + | object | type | + | N1:place | house | + | N2:place | house | + | W99:highway | residential | + + When updating osm data + """ + w99 Taddr:interpolation=odd Nn1,n2 + """ + Then place contains + | object | type | + | N1:place | house | + | N2:place | house | + | W99:place | houses | + When indexing + Then placex contains exactly + | object | type | + | N1:place | house | + | N2:place | house | + And location_property_osmline contains exactly + | object | + | 99:5 | + + + Scenario: Changing an interpolation to something else + When loading osm data + """ + n1 Taddr:housenumber=3 + n2 Taddr:housenumber=17 + w99 Taddr:interpolation=odd Nn1,n2 + """ + Then place contains + | object | type | + | N1:place | house | + | N2:place | house | + | W99:place | houses | + + When updating osm data + """ + w99 Thighway=residential Nn1,n2 + """ + Then place contains + | object | type | + | N1:place | house | + | N2:place | house | + | W99:highway | residential | + When indexing + Then placex contains exactly + | object | type | + | N1:place | house | + | N2:place | house | + | W99:highway | residential | + And location_property_osmline contains exactly + | object | + diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py index 84379cbe..14ae5d52 100644 --- a/test/bdd/steps/steps_db_ops.py +++ b/test/bdd/steps/steps_db_ops.py @@ -380,4 +380,49 @@ def check_location_property_osmline(context, oid, neg): assert not todo, f"Unmatched lines in table: {list(context.table[i] for i in todo)}" +@then("location_property_osmline contains(?P exactly)?") +def check_place_contents(context, exact): + """ Check contents of the interpolation table. Each row represents a table row + and all data must match. Data not present in the expected table, may + be arbitry. The rows are identified via the 'object' column which must + have an identifier of the form '[:]'. When multiple + rows match (for example because 'startnumber' was left out and there are + multiple entries for the given OSM object) then all must match. All + expected rows are expected to be present with at least one database row. + When 'exactly' is given, there must not be additional rows in the database. + """ + with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + expected_content = set() + for row in context.table: + if ':' in row['object']: + nid, start = row['object'].split(':', 2) + start = int(start) + else: + nid, start = row['object'], None + + query = """SELECT *, ST_AsText(linegeo) as geomtxt, + ST_GeometryType(linegeo) as geometrytype + FROM location_property_osmline WHERE osm_id=%s""" + + if ':' in row['object']: + query += ' and startnumber = %s' + params = [int(val) for val in row['object'].split(':', 2)] + else: + params = (int(row['object']), ) + + cur.execute(query, params) + assert cur.rowcount > 0, "No rows found for " + row['object'] + + for res in cur: + if exact: + expected_content.add((res['osm_id'], res['startnumber'])) + + DBRow(nid, res, context).assert_row(row, ['object']) + + if exact: + cur.execute('SELECT osm_id, startnumber from location_property_osmline') + actual = set([(r[0], r[1]) for r in cur]) + assert expected_content == actual, \ + f"Missing entries: {expected_content - actual}\n" \ + f"Not expected in table: {actual - expected_content}" From 93ada250f774d8bc521af56cedf7e34b548d64dd Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 14 Nov 2022 17:27:04 +0100 Subject: [PATCH 16/16] bdd: add tests for osm2pgsql update of postcode nodes --- test/bdd/osm2pgsql/update/postcodes.feature | 163 ++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 test/bdd/osm2pgsql/update/postcodes.feature diff --git a/test/bdd/osm2pgsql/update/postcodes.feature b/test/bdd/osm2pgsql/update/postcodes.feature new file mode 100644 index 00000000..23f86f76 --- /dev/null +++ b/test/bdd/osm2pgsql/update/postcodes.feature @@ -0,0 +1,163 @@ +@DB +Feature: Update of postcode only objects + Tests that changes to objects containing only a postcode are + propagated correctly. + + + Scenario: Adding a postcode-only node + When loading osm data + """ + """ + Then place contains exactly + | object | + + When updating osm data + """ + n34 Tpostcode=4456 + """ + Then place contains exactly + | object | type | + | N34:place | postcode | + When indexing + Then placex contains exactly + | object | + + + Scenario: Deleting a postcode-only node + When loading osm data + """ + n34 Tpostcode=4456 + """ + Then place contains exactly + | object | type | + | N34:place | postcode | + + When updating osm data + """ + n34 v2 dD + """ + Then place contains exactly + | object | + When indexing + Then placex contains exactly + | object | + + + Scenario Outline: Converting a regular object into a postcode-only node + When loading osm data + """ + n34 T= + """ + Then place contains exactly + | object | type | + | N34: | | + + When updating osm data + """ + n34 Tpostcode=4456 + """ + Then place contains exactly + | object | type | + | N34:place | postcode | + When indexing + Then placex contains exactly + | object | + + Examples: + | class | type | + | amenity | restaurant | + | place | hamlet | + + + Scenario Outline: Converting a postcode-only node into a regular object + When loading osm data + """ + n34 Tpostcode=4456 + """ + Then place contains exactly + | object | type | + | N34:place | postcode | + + When updating osm data + """ + n34 T= + """ + Then place contains exactly + | object | type | + | N34: | | + When indexing + Then placex contains exactly + | object | type | + | N34: | | + + Examples: + | class | type | + | amenity | restaurant | + | place | hamlet | + + + Scenario: Converting na interpolation into a postcode-only node + Given the grid + | 1 | 2 | + When loading osm data + """ + n1 Taddr:housenumber=3 + n2 Taddr:housenumber=17 + w34 Taddr:interpolation=odd Nn1,n2 + """ + Then place contains exactly + | object | type | + | N1:place | house | + | N2:place | house | + | W34:place | houses | + + When updating osm data + """ + w34 Tpostcode=4456 Nn1,n2 + """ + Then place contains exactly + | object | type | + | N1:place | house | + | N2:place | house | + | W34:place | postcode | + When indexing + Then location_property_osmline contains exactly + | object | + And placex contains exactly + | object | type | + | N1:place | house | + | N2:place | house | + + + Scenario: Converting a postcode-only node into an interpolation + Given the grid + | 1 | 2 | + When loading osm data + """ + n1 Taddr:housenumber=3 + n2 Taddr:housenumber=17 + w34 Tpostcode=4456 Nn1,n2 + """ + Then place contains exactly + | object | type | + | N1:place | house | + | N2:place | house | + | W34:place | postcode | + + When updating osm data + """ + w34 Taddr:interpolation=odd Nn1,n2 + """ + Then place contains exactly + | object | type | + | N1:place | house | + | N2:place | house | + | W34:place | houses | + When indexing + Then location_property_osmline contains exactly + | object | + | 34:5 | + And placex contains exactly + | object | type | + | N1:place | house | + | N2:place | house |