Merge branch 'clean-deleted-relations' of https://github.com/lujoh/Nominatim into lujoh-clean-deleted-relations

This commit is contained in:
Sarah Hoffmann 2023-10-23 10:47:31 +02:00
commit 6ad397d4a9
8 changed files with 176 additions and 62 deletions

View File

@ -60,16 +60,13 @@ to finish the recomputation.
## Removing large deleted objects
Command: `nominatim admin --clean-deleted <PostgreSQL Time Interval>`
Nominatim refuses to delete very large areas because often these deletions are
accidental and are reverted within hours. Instead the deletions are logged in
the `import_polygon_delete` table and left to the administrator to clean up.
There is currently no command to do that. You can use the following SQL
query to force a deletion on all objects that have been deleted more than
a certain timespan ago (here: 1 month):
To run this command you will need to pass a PostgreSQL time interval. For example to
delete any objects that have been deleted more than a month ago you would run:
`nominatim admin --clean-deleted '1 month'`
```sql
SELECT place_force_delete(p.place_id) FROM import_polygon_delete d, placex p
WHERE p.osm_type = d.osm_type and p.osm_id = d.osm_id
and age(p.indexed_date) > '1 month'::interval
```

View File

@ -363,57 +363,3 @@ BEGIN
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION flush_deleted_places()
RETURNS INTEGER
AS $$
BEGIN
-- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;
-- delete from place table
ALTER TABLE place DISABLE TRIGGER place_before_delete;
DELETE FROM place USING place_to_be_deleted
WHERE place.osm_type = place_to_be_deleted.osm_type
and place.osm_id = place_to_be_deleted.osm_id
and place.class = place_to_be_deleted.class
and place.type = place_to_be_deleted.type
and not deferred;
ALTER TABLE place ENABLE TRIGGER place_before_delete;
-- Mark for delete in the placex table
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'N' and place_to_be_deleted.osm_type = 'N'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'W' and place_to_be_deleted.osm_type = 'W'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'R' and place_to_be_deleted.osm_type = 'R'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
-- Mark for delete in interpolations
UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
WHERE place_to_be_deleted.osm_type = 'W'
and place_to_be_deleted.class = 'place'
and place_to_be_deleted.type = 'houses'
and location_property_osmline.osm_id = place_to_be_deleted.osm_id
and not deferred;
-- Clear todo list.
TRUNCATE TABLE place_to_be_deleted;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;

View File

@ -487,3 +487,56 @@ BEGIN
END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION flush_deleted_places()
RETURNS INTEGER
AS $$
BEGIN
-- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;
-- delete from place table
ALTER TABLE place DISABLE TRIGGER place_before_delete;
DELETE FROM place USING place_to_be_deleted
WHERE place.osm_type = place_to_be_deleted.osm_type
and place.osm_id = place_to_be_deleted.osm_id
and place.class = place_to_be_deleted.class
and place.type = place_to_be_deleted.type
and not deferred;
ALTER TABLE place ENABLE TRIGGER place_before_delete;
-- Mark for delete in the placex table
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'N' and place_to_be_deleted.osm_type = 'N'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'W' and place_to_be_deleted.osm_type = 'W'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'R' and place_to_be_deleted.osm_type = 'R'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
-- Mark for delete in interpolations
UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
WHERE place_to_be_deleted.osm_type = 'W'
and place_to_be_deleted.class = 'place'
and place_to_be_deleted.type = 'houses'
and location_property_osmline.osm_id = place_to_be_deleted.osm_id
and not deferred;
-- Clear todo list.
TRUNCATE TABLE place_to_be_deleted;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;

View File

@ -29,6 +29,7 @@ class AdminFuncs:
"""
def add_args(self, parser: argparse.ArgumentParser) -> None:
self.parser = parser
group = parser.add_argument_group('Admin tasks')
objs = group.add_mutually_exclusive_group(required=True)
objs.add_argument('--warm', action='store_true',
@ -41,6 +42,8 @@ class AdminFuncs:
help='Print performance analysis of the indexing process')
objs.add_argument('--collect-os-info', action="store_true",
help="Generate a report about the host system information")
objs.add_argument('--clean-deleted', action='store', metavar='AGE',
help='Clean up deleted relations')
group = parser.add_argument_group('Arguments for cache warming')
group.add_argument('--search-only', action='store_const', dest='target',
const='search',
@ -54,8 +57,11 @@ class AdminFuncs:
help='Analyse indexing of the given OSM object')
mgroup.add_argument('--place-id', type=int,
help='Analyse indexing of the given Nominatim object')
group = parser.add_argument_group('Arguments for cleaning deleted')
def run(self, args: NominatimArgs) -> int:
# pylint: disable=too-many-return-statements
if args.warm:
return self._warm(args)
@ -81,6 +87,12 @@ class AdminFuncs:
collect_os_info.report_system_information(args.config)
return 0
if args.clean_deleted:
LOG.warning('Cleaning up deleted relations')
from ..tools import admin
admin.clean_deleted_relations(args.config, age=args.clean_deleted)
return 0
return 1

View File

@ -72,6 +72,7 @@ class NominatimArgs:
check_database: bool
migrate: bool
collect_os_info: bool
clean_deleted: str
analyse_indexing: bool
target: Optional[str]
osm_id: Optional[str]

View File

@ -11,6 +11,7 @@ from typing import Optional, Tuple, Any, cast
import logging
from psycopg2.extras import Json, register_hstore
from psycopg2 import DataError
from nominatim.config import Configuration
from nominatim.db.connection import connect, Cursor
@ -87,3 +88,20 @@ def analyse_indexing(config: Configuration, osm_id: Optional[str] = None,
for msg in conn.notices:
print(msg)
def clean_deleted_relations(config: Configuration, age: str) -> None:
""" Clean deleted relations older than a given age
"""
with connect(config.get_libpq_dsn()) as conn:
with conn.cursor() as cur:
try:
cur.execute("""SELECT place_force_delete(p.place_id)
FROM import_polygon_delete d, placex p
WHERE p.osm_type = d.osm_type AND p.osm_id = d.osm_id
AND age(p.indexed_date) > %s::interval""",
(age, ))
except DataError as exc:
raise UsageError('Invalid PostgreSQL time interval format') from exc
conn.commit()

View File

@ -33,6 +33,17 @@ def test_admin_migrate(cli_call, mock_func_factory):
assert mock.called == 1
def test_admin_clean_deleted_relations(cli_call, mock_func_factory):
mock = mock_func_factory(nominatim.tools.admin, 'clean_deleted_relations')
assert cli_call('admin', '--clean-deleted', '1 month') == 0
assert mock.called == 1
def test_admin_clean_deleted_relations_no_age(cli_call, mock_func_factory):
mock = mock_func_factory(nominatim.tools.admin, 'clean_deleted_relations')
assert cli_call('admin', '--clean-deleted') == 1
class TestCliAdminWithDb:
@pytest.fixture(autouse=True)

View File

@ -12,6 +12,7 @@ import pytest
from nominatim.errors import UsageError
from nominatim.tools import admin
from nominatim.tokenizer import factory
from nominatim.db.sql_preprocessor import SQLPreprocessor
@pytest.fixture(autouse=True)
def create_placex_table(project_env, tokenizer_mock, temp_db_cursor, placex_table):
@ -70,3 +71,78 @@ def test_analyse_indexing_with_osm_id(project_env, temp_db_cursor):
VALUES(9988, 'N', 10000)""")
admin.analyse_indexing(project_env, osm_id='N10000')
class TestAdminCleanDeleted:
@pytest.fixture(autouse=True)
def setup_polygon_delete(self, project_env, table_factory, place_table, osmline_table, temp_db_cursor, temp_db_conn, def_config, src_dir):
""" Set up place_force_delete function and related tables
"""
self.project_env = project_env
self.temp_db_cursor = temp_db_cursor
table_factory('import_polygon_delete',
"""osm_id BIGINT,
osm_type CHAR(1),
class TEXT NOT NULL,
type TEXT NOT NULL""",
((100, 'N', 'boundary', 'administrative'),
(145, 'N', 'boundary', 'administrative'),
(175, 'R', 'landcover', 'grass')))
temp_db_cursor.execute("""INSERT INTO placex (place_id, osm_id, osm_type, class, type, indexed_date, indexed_status)
VALUES(1, 100, 'N', 'boundary', 'administrative', current_date - INTERVAL '1 month', 1),
(2, 145, 'N', 'boundary', 'administrative', current_date - INTERVAL '3 month', 1),
(3, 175, 'R', 'landcover', 'grass', current_date - INTERVAL '3 months', 1)""")
# set up tables and triggers for utils function
table_factory('place_to_be_deleted',
"""osm_id BIGINT,
osm_type CHAR(1),
class TEXT NOT NULL,
type TEXT NOT NULL,
deferred BOOLEAN""")
table_factory('country_name', 'partition INT')
table_factory('import_polygon_error', """osm_id BIGINT,
osm_type CHAR(1),
class TEXT NOT NULL,
type TEXT NOT NULL""")
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION place_delete()
RETURNS TRIGGER AS $$
BEGIN RETURN NULL; END;
$$ LANGUAGE plpgsql;""")
temp_db_cursor.execute("""CREATE TRIGGER place_before_delete BEFORE DELETE ON place
FOR EACH ROW EXECUTE PROCEDURE place_delete();""")
orig_sql = def_config.lib_dir.sql
def_config.lib_dir.sql = src_dir / 'lib-sql'
sqlproc = SQLPreprocessor(temp_db_conn, def_config)
sqlproc.run_sql_file(temp_db_conn, 'functions/utils.sql')
def_config.lib_dir.sql = orig_sql
def test_admin_clean_deleted_no_records(self):
admin.clean_deleted_relations(self.project_env, age='1 year')
assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 1),
(145, 'N', 'boundary', 'administrative', 1),
(175, 'R', 'landcover', 'grass', 1)}
assert self.temp_db_cursor.table_rows('import_polygon_delete') == 3
@pytest.mark.parametrize('test_age', ['T week', '1 welk', 'P1E'])
def test_admin_clean_deleted_bad_age(self, test_age):
with pytest.raises(UsageError):
admin.clean_deleted_relations(self.project_env, age = test_age)
def test_admin_clean_deleted_partial(self):
admin.clean_deleted_relations(self.project_env, age = '2 months')
assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 1),
(145, 'N', 'boundary', 'administrative', 100),
(175, 'R', 'landcover', 'grass', 100)}
assert self.temp_db_cursor.table_rows('import_polygon_delete') == 1
@pytest.mark.parametrize('test_age', ['1 week', 'P3D', '5 hours'])
def test_admin_clean_deleted(self, test_age):
admin.clean_deleted_relations(self.project_env, age = test_age)
assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 100),
(145, 'N', 'boundary', 'administrative', 100),
(175, 'R', 'landcover', 'grass', 100)}
assert self.temp_db_cursor.table_rows('import_polygon_delete') == 0