mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-12-24 21:44:45 +03:00
convert admin --analyse-indexing to new indexing method
A proper run of indexing requires the place information from the analyzer. Add the pre-processing of place data, so the right information is handed into the update function.
This commit is contained in:
parent
300612c5a8
commit
4b12d52ef5
@ -10,7 +10,6 @@ Implementation of the 'admin' subcommand.
|
||||
import logging
|
||||
|
||||
from nominatim.tools.exec_utils import run_legacy_script
|
||||
from nominatim.db.connection import connect
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
@ -63,8 +62,7 @@ class AdminFuncs:
|
||||
if args.analyse_indexing:
|
||||
LOG.warning('Analysing performance of indexing function')
|
||||
from ..tools import admin
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
admin.analyse_indexing(conn, osm_id=args.osm_id, place_id=args.place_id)
|
||||
admin.analyse_indexing(args.config, osm_id=args.osm_id, place_id=args.place_id)
|
||||
return 0
|
||||
|
||||
if args.migrate:
|
||||
|
@ -9,47 +9,75 @@ Functions for database analysis and maintenance.
|
||||
"""
|
||||
import logging
|
||||
|
||||
from psycopg2.extras import Json, register_hstore
|
||||
|
||||
from nominatim.db.connection import connect
|
||||
from nominatim.tokenizer import factory as tokenizer_factory
|
||||
from nominatim.errors import UsageError
|
||||
from nominatim.data.place_info import PlaceInfo
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def analyse_indexing(conn, osm_id=None, place_id=None):
|
||||
def _get_place_info(cursor, osm_id, place_id):
|
||||
sql = """SELECT place_id, extra.*
|
||||
FROM placex, LATERAL placex_indexing_prepare(placex) as extra
|
||||
"""
|
||||
|
||||
if osm_id:
|
||||
osm_type = osm_id[0].upper()
|
||||
if osm_type not in 'NWR' or not osm_id[1:].isdigit():
|
||||
LOG.fatal('OSM ID must be of form <N|W|R><id>. Got: %s', osm_id)
|
||||
raise UsageError("OSM ID parameter badly formatted")
|
||||
|
||||
sql += ' WHERE placex.osm_type = %s AND placex.osm_id = %s'
|
||||
values = (osm_type, int(osm_id[1:]))
|
||||
elif place_id is not None:
|
||||
sql += ' WHERE placex.place_id = %s'
|
||||
values = (place_id, )
|
||||
else:
|
||||
LOG.fatal("No OSM object given to index.")
|
||||
raise UsageError("OSM object not found")
|
||||
|
||||
cursor.execute(sql + ' LIMIT 1', values)
|
||||
|
||||
if cursor.rowcount < 1:
|
||||
LOG.fatal("OSM object %s not found in database.", osm_id)
|
||||
raise UsageError("OSM object not found")
|
||||
|
||||
return cursor.fetchone()
|
||||
|
||||
|
||||
def analyse_indexing(config, osm_id=None, place_id=None):
|
||||
""" Analyse indexing of a single Nominatim object.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
if osm_id:
|
||||
osm_type = osm_id[0].upper()
|
||||
if osm_type not in 'NWR' or not osm_id[1:].isdigit():
|
||||
LOG.fatal('OSM ID must be of form <N|W|R><id>. Got: %s', osm_id)
|
||||
raise UsageError("OSM ID parameter badly formatted")
|
||||
cur.execute('SELECT place_id FROM placex WHERE osm_type = %s AND osm_id = %s',
|
||||
(osm_type, osm_id[1:]))
|
||||
with connect(config.get_libpq_dsn()) as conn:
|
||||
register_hstore(conn)
|
||||
with conn.cursor() as cur:
|
||||
place = _get_place_info(cur, osm_id, place_id)
|
||||
|
||||
if cur.rowcount < 1:
|
||||
LOG.fatal("OSM object %s not found in database.", osm_id)
|
||||
raise UsageError("OSM object not found")
|
||||
cur.execute("update placex set indexed_status = 2 where place_id = %s",
|
||||
(place['place_id'], ))
|
||||
|
||||
place_id = cur.fetchone()[0]
|
||||
cur.execute("""SET auto_explain.log_min_duration = '0';
|
||||
SET auto_explain.log_analyze = 'true';
|
||||
SET auto_explain.log_nested_statements = 'true';
|
||||
LOAD 'auto_explain';
|
||||
SET client_min_messages = LOG;
|
||||
SET log_min_messages = FATAL""")
|
||||
|
||||
if place_id is None:
|
||||
LOG.fatal("No OSM object given to index.")
|
||||
raise UsageError("OSM object not found")
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
|
||||
|
||||
cur.execute("update placex set indexed_status = 2 where place_id = %s",
|
||||
(place_id, ))
|
||||
with tokenizer.name_analyzer() as analyzer:
|
||||
cur.execute("""UPDATE placex
|
||||
SET indexed_status = 0, address = %s, token_info = %s,
|
||||
name = %s, linked_place_id = %s
|
||||
WHERE place_id = %s""",
|
||||
(place['address'],
|
||||
Json(analyzer.process_place(PlaceInfo(place))),
|
||||
place['name'], place['linked_place_id'], place['place_id']))
|
||||
|
||||
cur.execute("""SET auto_explain.log_min_duration = '0';
|
||||
SET auto_explain.log_analyze = 'true';
|
||||
SET auto_explain.log_nested_statements = 'true';
|
||||
LOAD 'auto_explain';
|
||||
SET client_min_messages = LOG;
|
||||
SET log_min_messages = FATAL""")
|
||||
# we do not want to keep the results
|
||||
conn.rollback()
|
||||
|
||||
cur.execute("update placex set indexed_status = 0 where place_id = %s",
|
||||
(place_id, ))
|
||||
|
||||
# we do not want to keep the results
|
||||
conn.rollback()
|
||||
|
||||
for msg in conn.notices:
|
||||
print(msg)
|
||||
for msg in conn.notices:
|
||||
print(msg)
|
||||
|
@ -42,6 +42,7 @@ class MockPlacexTable:
|
||||
admin_level smallint,
|
||||
address hstore,
|
||||
extratags hstore,
|
||||
token_info jsonb,
|
||||
geometry Geometry(Geometry,4326),
|
||||
wikipedia TEXT,
|
||||
country_code varchar(2),
|
||||
|
@ -11,37 +11,62 @@ import pytest
|
||||
|
||||
from nominatim.errors import UsageError
|
||||
from nominatim.tools import admin
|
||||
from nominatim.tokenizer import factory
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def create_placex_table(placex_table):
|
||||
def create_placex_table(project_env, tokenizer_mock, temp_db_cursor, placex_table):
|
||||
""" All tests in this module require the placex table to be set up.
|
||||
"""
|
||||
temp_db_cursor.execute("DROP TYPE IF EXISTS prepare_update_info CASCADE")
|
||||
temp_db_cursor.execute("""CREATE TYPE prepare_update_info AS (
|
||||
name HSTORE,
|
||||
address HSTORE,
|
||||
rank_address SMALLINT,
|
||||
country_code TEXT,
|
||||
class TEXT,
|
||||
type TEXT,
|
||||
linked_place_id BIGINT
|
||||
)""")
|
||||
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION placex_indexing_prepare(p placex,
|
||||
OUT result prepare_update_info)
|
||||
AS $$
|
||||
BEGIN
|
||||
result.address := p.address;
|
||||
result.name := p.name;
|
||||
result.class := p.class;
|
||||
result.type := p.type;
|
||||
result.country_code := p.country_code;
|
||||
result.rank_address := p.rank_address;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql STABLE;
|
||||
""")
|
||||
factory.create_tokenizer(project_env)
|
||||
|
||||
|
||||
def test_analyse_indexing_no_objects(temp_db_conn):
|
||||
def test_analyse_indexing_no_objects(project_env):
|
||||
with pytest.raises(UsageError):
|
||||
admin.analyse_indexing(temp_db_conn)
|
||||
admin.analyse_indexing(project_env)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("oid", ['1234', 'N123a', 'X123'])
|
||||
def test_analyse_indexing_bad_osmid(temp_db_conn, oid):
|
||||
def test_analyse_indexing_bad_osmid(project_env, oid):
|
||||
with pytest.raises(UsageError):
|
||||
admin.analyse_indexing(temp_db_conn, osm_id=oid)
|
||||
admin.analyse_indexing(project_env, osm_id=oid)
|
||||
|
||||
|
||||
def test_analyse_indexing_unknown_osmid(temp_db_conn):
|
||||
def test_analyse_indexing_unknown_osmid(project_env):
|
||||
with pytest.raises(UsageError):
|
||||
admin.analyse_indexing(temp_db_conn, osm_id='W12345674')
|
||||
admin.analyse_indexing(project_env, osm_id='W12345674')
|
||||
|
||||
|
||||
def test_analyse_indexing_with_place_id(temp_db_conn, temp_db_cursor):
|
||||
def test_analyse_indexing_with_place_id(project_env, temp_db_cursor):
|
||||
temp_db_cursor.execute("INSERT INTO placex (place_id) VALUES(12345)")
|
||||
|
||||
admin.analyse_indexing(temp_db_conn, place_id=12345)
|
||||
admin.analyse_indexing(project_env, place_id=12345)
|
||||
|
||||
|
||||
def test_analyse_indexing_with_osm_id(temp_db_conn, temp_db_cursor):
|
||||
def test_analyse_indexing_with_osm_id(project_env, temp_db_cursor):
|
||||
temp_db_cursor.execute("""INSERT INTO placex (place_id, osm_type, osm_id)
|
||||
VALUES(9988, 'N', 10000)""")
|
||||
|
||||
admin.analyse_indexing(temp_db_conn, osm_id='N10000')
|
||||
admin.analyse_indexing(project_env, osm_id='N10000')
|
||||
|
Loading…
Reference in New Issue
Block a user