diff --git a/nominatim/clicmd/admin.py b/nominatim/clicmd/admin.py index c020e224..1ed0ac9b 100644 --- a/nominatim/clicmd/admin.py +++ b/nominatim/clicmd/admin.py @@ -10,7 +10,6 @@ Implementation of the 'admin' subcommand. import logging from nominatim.tools.exec_utils import run_legacy_script -from nominatim.db.connection import connect # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 @@ -63,8 +62,7 @@ class AdminFuncs: if args.analyse_indexing: LOG.warning('Analysing performance of indexing function') from ..tools import admin - with connect(args.config.get_libpq_dsn()) as conn: - admin.analyse_indexing(conn, osm_id=args.osm_id, place_id=args.place_id) + admin.analyse_indexing(args.config, osm_id=args.osm_id, place_id=args.place_id) return 0 if args.migrate: diff --git a/nominatim/tools/admin.py b/nominatim/tools/admin.py index 1886bf24..1bf217e2 100644 --- a/nominatim/tools/admin.py +++ b/nominatim/tools/admin.py @@ -9,47 +9,75 @@ Functions for database analysis and maintenance. """ import logging +from psycopg2.extras import Json, register_hstore + +from nominatim.db.connection import connect +from nominatim.tokenizer import factory as tokenizer_factory from nominatim.errors import UsageError +from nominatim.data.place_info import PlaceInfo LOG = logging.getLogger() -def analyse_indexing(conn, osm_id=None, place_id=None): +def _get_place_info(cursor, osm_id, place_id): + sql = """SELECT place_id, extra.* + FROM placex, LATERAL placex_indexing_prepare(placex) as extra + """ + + if osm_id: + osm_type = osm_id[0].upper() + if osm_type not in 'NWR' or not osm_id[1:].isdigit(): + LOG.fatal('OSM ID must be of form . Got: %s', osm_id) + raise UsageError("OSM ID parameter badly formatted") + + sql += ' WHERE placex.osm_type = %s AND placex.osm_id = %s' + values = (osm_type, int(osm_id[1:])) + elif place_id is not None: + sql += ' WHERE placex.place_id = %s' + values = (place_id, ) + else: + LOG.fatal("No OSM object given to index.") + raise UsageError("OSM object not found") + + cursor.execute(sql + ' LIMIT 1', values) + + if cursor.rowcount < 1: + LOG.fatal("OSM object %s not found in database.", osm_id) + raise UsageError("OSM object not found") + + return cursor.fetchone() + + +def analyse_indexing(config, osm_id=None, place_id=None): """ Analyse indexing of a single Nominatim object. """ - with conn.cursor() as cur: - if osm_id: - osm_type = osm_id[0].upper() - if osm_type not in 'NWR' or not osm_id[1:].isdigit(): - LOG.fatal('OSM ID must be of form . Got: %s', osm_id) - raise UsageError("OSM ID parameter badly formatted") - cur.execute('SELECT place_id FROM placex WHERE osm_type = %s AND osm_id = %s', - (osm_type, osm_id[1:])) + with connect(config.get_libpq_dsn()) as conn: + register_hstore(conn) + with conn.cursor() as cur: + place = _get_place_info(cur, osm_id, place_id) - if cur.rowcount < 1: - LOG.fatal("OSM object %s not found in database.", osm_id) - raise UsageError("OSM object not found") + cur.execute("update placex set indexed_status = 2 where place_id = %s", + (place['place_id'], )) - place_id = cur.fetchone()[0] + cur.execute("""SET auto_explain.log_min_duration = '0'; + SET auto_explain.log_analyze = 'true'; + SET auto_explain.log_nested_statements = 'true'; + LOAD 'auto_explain'; + SET client_min_messages = LOG; + SET log_min_messages = FATAL""") - if place_id is None: - LOG.fatal("No OSM object given to index.") - raise UsageError("OSM object not found") + tokenizer = tokenizer_factory.get_tokenizer_for_db(config) - cur.execute("update placex set indexed_status = 2 where place_id = %s", - (place_id, )) + with tokenizer.name_analyzer() as analyzer: + cur.execute("""UPDATE placex + SET indexed_status = 0, address = %s, token_info = %s, + name = %s, linked_place_id = %s + WHERE place_id = %s""", + (place['address'], + Json(analyzer.process_place(PlaceInfo(place))), + place['name'], place['linked_place_id'], place['place_id'])) - cur.execute("""SET auto_explain.log_min_duration = '0'; - SET auto_explain.log_analyze = 'true'; - SET auto_explain.log_nested_statements = 'true'; - LOAD 'auto_explain'; - SET client_min_messages = LOG; - SET log_min_messages = FATAL""") + # we do not want to keep the results + conn.rollback() - cur.execute("update placex set indexed_status = 0 where place_id = %s", - (place_id, )) - - # we do not want to keep the results - conn.rollback() - - for msg in conn.notices: - print(msg) + for msg in conn.notices: + print(msg) diff --git a/test/python/mocks.py b/test/python/mocks.py index 9c6ef532..a2fff677 100644 --- a/test/python/mocks.py +++ b/test/python/mocks.py @@ -42,6 +42,7 @@ class MockPlacexTable: admin_level smallint, address hstore, extratags hstore, + token_info jsonb, geometry Geometry(Geometry,4326), wikipedia TEXT, country_code varchar(2), diff --git a/test/python/tools/test_admin.py b/test/python/tools/test_admin.py index e53d35c4..9c010b9d 100644 --- a/test/python/tools/test_admin.py +++ b/test/python/tools/test_admin.py @@ -11,37 +11,62 @@ import pytest from nominatim.errors import UsageError from nominatim.tools import admin +from nominatim.tokenizer import factory @pytest.fixture(autouse=True) -def create_placex_table(placex_table): +def create_placex_table(project_env, tokenizer_mock, temp_db_cursor, placex_table): """ All tests in this module require the placex table to be set up. """ + temp_db_cursor.execute("DROP TYPE IF EXISTS prepare_update_info CASCADE") + temp_db_cursor.execute("""CREATE TYPE prepare_update_info AS ( + name HSTORE, + address HSTORE, + rank_address SMALLINT, + country_code TEXT, + class TEXT, + type TEXT, + linked_place_id BIGINT + )""") + temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION placex_indexing_prepare(p placex, + OUT result prepare_update_info) + AS $$ + BEGIN + result.address := p.address; + result.name := p.name; + result.class := p.class; + result.type := p.type; + result.country_code := p.country_code; + result.rank_address := p.rank_address; + END; + $$ LANGUAGE plpgsql STABLE; + """) + factory.create_tokenizer(project_env) -def test_analyse_indexing_no_objects(temp_db_conn): +def test_analyse_indexing_no_objects(project_env): with pytest.raises(UsageError): - admin.analyse_indexing(temp_db_conn) + admin.analyse_indexing(project_env) @pytest.mark.parametrize("oid", ['1234', 'N123a', 'X123']) -def test_analyse_indexing_bad_osmid(temp_db_conn, oid): +def test_analyse_indexing_bad_osmid(project_env, oid): with pytest.raises(UsageError): - admin.analyse_indexing(temp_db_conn, osm_id=oid) + admin.analyse_indexing(project_env, osm_id=oid) -def test_analyse_indexing_unknown_osmid(temp_db_conn): +def test_analyse_indexing_unknown_osmid(project_env): with pytest.raises(UsageError): - admin.analyse_indexing(temp_db_conn, osm_id='W12345674') + admin.analyse_indexing(project_env, osm_id='W12345674') -def test_analyse_indexing_with_place_id(temp_db_conn, temp_db_cursor): +def test_analyse_indexing_with_place_id(project_env, temp_db_cursor): temp_db_cursor.execute("INSERT INTO placex (place_id) VALUES(12345)") - admin.analyse_indexing(temp_db_conn, place_id=12345) + admin.analyse_indexing(project_env, place_id=12345) -def test_analyse_indexing_with_osm_id(temp_db_conn, temp_db_cursor): +def test_analyse_indexing_with_osm_id(project_env, temp_db_cursor): temp_db_cursor.execute("""INSERT INTO placex (place_id, osm_type, osm_id) VALUES(9988, 'N', 10000)""") - admin.analyse_indexing(temp_db_conn, osm_id='N10000') + admin.analyse_indexing(project_env, osm_id='N10000')