Merge pull request #2761 from lonvia/repair-index-analysis

Repair `admin --analyse-indexing`
This commit is contained in:
Sarah Hoffmann 2022-07-18 09:38:08 +02:00 committed by GitHub
commit b1903f0fbf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 98 additions and 46 deletions

View File

@ -10,7 +10,6 @@ Implementation of the 'admin' subcommand.
import logging import logging
from nominatim.tools.exec_utils import run_legacy_script from nominatim.tools.exec_utils import run_legacy_script
from nominatim.db.connection import connect
# Do not repeat documentation of subcommand classes. # Do not repeat documentation of subcommand classes.
# pylint: disable=C0111 # pylint: disable=C0111
@ -63,8 +62,7 @@ class AdminFuncs:
if args.analyse_indexing: if args.analyse_indexing:
LOG.warning('Analysing performance of indexing function') LOG.warning('Analysing performance of indexing function')
from ..tools import admin from ..tools import admin
with connect(args.config.get_libpq_dsn()) as conn: admin.analyse_indexing(args.config, osm_id=args.osm_id, place_id=args.place_id)
admin.analyse_indexing(conn, osm_id=args.osm_id, place_id=args.place_id)
return 0 return 0
if args.migrate: if args.migrate:

View File

@ -9,34 +9,54 @@ Functions for database analysis and maintenance.
""" """
import logging import logging
from psycopg2.extras import Json, register_hstore
from nominatim.db.connection import connect
from nominatim.tokenizer import factory as tokenizer_factory
from nominatim.errors import UsageError from nominatim.errors import UsageError
from nominatim.data.place_info import PlaceInfo
LOG = logging.getLogger() LOG = logging.getLogger()
def analyse_indexing(conn, osm_id=None, place_id=None): def _get_place_info(cursor, osm_id, place_id):
""" Analyse indexing of a single Nominatim object. sql = """SELECT place_id, extra.*
FROM placex, LATERAL placex_indexing_prepare(placex) as extra
""" """
with conn.cursor() as cur:
if osm_id: if osm_id:
osm_type = osm_id[0].upper() osm_type = osm_id[0].upper()
if osm_type not in 'NWR' or not osm_id[1:].isdigit(): if osm_type not in 'NWR' or not osm_id[1:].isdigit():
LOG.fatal('OSM ID must be of form <N|W|R><id>. Got: %s', osm_id) LOG.fatal('OSM ID must be of form <N|W|R><id>. Got: %s', osm_id)
raise UsageError("OSM ID parameter badly formatted") raise UsageError("OSM ID parameter badly formatted")
cur.execute('SELECT place_id FROM placex WHERE osm_type = %s AND osm_id = %s',
(osm_type, osm_id[1:]))
if cur.rowcount < 1: sql += ' WHERE placex.osm_type = %s AND placex.osm_id = %s'
LOG.fatal("OSM object %s not found in database.", osm_id) values = (osm_type, int(osm_id[1:]))
raise UsageError("OSM object not found") elif place_id is not None:
sql += ' WHERE placex.place_id = %s'
place_id = cur.fetchone()[0] values = (place_id, )
else:
if place_id is None:
LOG.fatal("No OSM object given to index.") LOG.fatal("No OSM object given to index.")
raise UsageError("OSM object not found") raise UsageError("OSM object not found")
cursor.execute(sql + ' LIMIT 1', values)
if cursor.rowcount < 1:
LOG.fatal("OSM object %s not found in database.", osm_id)
raise UsageError("OSM object not found")
return cursor.fetchone()
def analyse_indexing(config, osm_id=None, place_id=None):
""" Analyse indexing of a single Nominatim object.
"""
with connect(config.get_libpq_dsn()) as conn:
register_hstore(conn)
with conn.cursor() as cur:
place = _get_place_info(cur, osm_id, place_id)
cur.execute("update placex set indexed_status = 2 where place_id = %s", cur.execute("update placex set indexed_status = 2 where place_id = %s",
(place_id, )) (place['place_id'], ))
cur.execute("""SET auto_explain.log_min_duration = '0'; cur.execute("""SET auto_explain.log_min_duration = '0';
SET auto_explain.log_analyze = 'true'; SET auto_explain.log_analyze = 'true';
@ -45,8 +65,16 @@ def analyse_indexing(conn, osm_id=None, place_id=None):
SET client_min_messages = LOG; SET client_min_messages = LOG;
SET log_min_messages = FATAL""") SET log_min_messages = FATAL""")
cur.execute("update placex set indexed_status = 0 where place_id = %s", tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
(place_id, ))
with tokenizer.name_analyzer() as analyzer:
cur.execute("""UPDATE placex
SET indexed_status = 0, address = %s, token_info = %s,
name = %s, linked_place_id = %s
WHERE place_id = %s""",
(place['address'],
Json(analyzer.process_place(PlaceInfo(place))),
place['name'], place['linked_place_id'], place['place_id']))
# we do not want to keep the results # we do not want to keep the results
conn.rollback() conn.rollback()

View File

@ -42,6 +42,7 @@ class MockPlacexTable:
admin_level smallint, admin_level smallint,
address hstore, address hstore,
extratags hstore, extratags hstore,
token_info jsonb,
geometry Geometry(Geometry,4326), geometry Geometry(Geometry,4326),
wikipedia TEXT, wikipedia TEXT,
country_code varchar(2), country_code varchar(2),

View File

@ -11,37 +11,62 @@ import pytest
from nominatim.errors import UsageError from nominatim.errors import UsageError
from nominatim.tools import admin from nominatim.tools import admin
from nominatim.tokenizer import factory
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def create_placex_table(placex_table): def create_placex_table(project_env, tokenizer_mock, temp_db_cursor, placex_table):
""" All tests in this module require the placex table to be set up. """ All tests in this module require the placex table to be set up.
""" """
temp_db_cursor.execute("DROP TYPE IF EXISTS prepare_update_info CASCADE")
temp_db_cursor.execute("""CREATE TYPE prepare_update_info AS (
name HSTORE,
address HSTORE,
rank_address SMALLINT,
country_code TEXT,
class TEXT,
type TEXT,
linked_place_id BIGINT
)""")
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION placex_indexing_prepare(p placex,
OUT result prepare_update_info)
AS $$
BEGIN
result.address := p.address;
result.name := p.name;
result.class := p.class;
result.type := p.type;
result.country_code := p.country_code;
result.rank_address := p.rank_address;
END;
$$ LANGUAGE plpgsql STABLE;
""")
factory.create_tokenizer(project_env)
def test_analyse_indexing_no_objects(temp_db_conn): def test_analyse_indexing_no_objects(project_env):
with pytest.raises(UsageError): with pytest.raises(UsageError):
admin.analyse_indexing(temp_db_conn) admin.analyse_indexing(project_env)
@pytest.mark.parametrize("oid", ['1234', 'N123a', 'X123']) @pytest.mark.parametrize("oid", ['1234', 'N123a', 'X123'])
def test_analyse_indexing_bad_osmid(temp_db_conn, oid): def test_analyse_indexing_bad_osmid(project_env, oid):
with pytest.raises(UsageError): with pytest.raises(UsageError):
admin.analyse_indexing(temp_db_conn, osm_id=oid) admin.analyse_indexing(project_env, osm_id=oid)
def test_analyse_indexing_unknown_osmid(temp_db_conn): def test_analyse_indexing_unknown_osmid(project_env):
with pytest.raises(UsageError): with pytest.raises(UsageError):
admin.analyse_indexing(temp_db_conn, osm_id='W12345674') admin.analyse_indexing(project_env, osm_id='W12345674')
def test_analyse_indexing_with_place_id(temp_db_conn, temp_db_cursor): def test_analyse_indexing_with_place_id(project_env, temp_db_cursor):
temp_db_cursor.execute("INSERT INTO placex (place_id) VALUES(12345)") temp_db_cursor.execute("INSERT INTO placex (place_id) VALUES(12345)")
admin.analyse_indexing(temp_db_conn, place_id=12345) admin.analyse_indexing(project_env, place_id=12345)
def test_analyse_indexing_with_osm_id(temp_db_conn, temp_db_cursor): def test_analyse_indexing_with_osm_id(project_env, temp_db_cursor):
temp_db_cursor.execute("""INSERT INTO placex (place_id, osm_type, osm_id) temp_db_cursor.execute("""INSERT INTO placex (place_id, osm_type, osm_id)
VALUES(9988, 'N', 10000)""") VALUES(9988, 'N', 10000)""")
admin.analyse_indexing(temp_db_conn, osm_id='N10000') admin.analyse_indexing(project_env, osm_id='N10000')