integrate analyse of indexing into nominatim tool

This commit is contained in:
Sarah Hoffmann 2021-02-08 22:21:57 +01:00
parent 0cbf98c020
commit d81e152804
6 changed files with 149 additions and 119 deletions

View File

@ -2,6 +2,7 @@
Implementation of the 'admin' subcommand.
"""
from ..tools.exec_utils import run_legacy_script
from ..db.connection import connect
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
@ -20,6 +21,8 @@ class AdminFuncs:
help='Warm database caches for search and reverse queries.')
group.add_argument('--check-database', action='store_true',
help='Check that the database is complete and operational.')
group.add_argument('--analyse-indexing', action='store_true',
help='Print performance analysis of the indexing process.')
group = parser.add_argument_group('Arguments for cache warming')
group.add_argument('--search-only', action='store_const', dest='target',
const='search',
@ -27,15 +30,27 @@ class AdminFuncs:
group.add_argument('--reverse-only', action='store_const', dest='target',
const='reverse',
help="Only pre-warm tables for reverse queries")
group = parser.add_argument_group('Arguments for index anaysis')
mgroup = group.add_mutually_exclusive_group()
mgroup.add_argument('--osm-id', type=str,
help='Analyse indexing of the given OSM object')
mgroup.add_argument('--place-id', type=int,
help='Analyse indexing of the given Nominatim object')
@staticmethod
def run(args):
from ..tools import admin
if args.warm:
AdminFuncs._warm(args)
if args.check_database:
run_legacy_script('check_import_finished.php', nominatim_env=args)
if args.analyse_indexing:
conn = connect(args.config.get_libpq_dsn())
admin.analyse_indexing(conn, osm_id=args.osm_id, place_id=args.place_id)
conn.close()
return 0

49
nominatim/tools/admin.py Normal file
View File

@ -0,0 +1,49 @@
"""
Functions for database analysis and maintenance.
"""
import logging
from ..errors import UsageError
LOG = logging.getLogger()
def analyse_indexing(conn, osm_id=None, place_id=None):
""" Analyse indexing of a single Nominatim object.
"""
with conn.cursor() as cur:
if osm_id:
osm_type = osm_id[0].upper()
if osm_type not in 'NWR' or not osm_id[1:].isdigit():
LOG.fatal('OSM ID must be of form <N|W|R><id>. Got: %s', osm_id)
raise UsageError("OSM ID parameter badly formatted")
cur.execute('SELECT place_id FROM placex WHERE osm_type = %s AND osm_id = %s',
(osm_type, osm_id[1:]))
if cur.rowcount < 1:
LOG.fatal("OSM object %s not found in database.", osm_id)
raise UsageError("OSM object not found")
place_id = cur.fetchone()[0]
if place_id is None:
LOG.fatal("No OSM object given to index.")
raise UsageError("OSM object not found")
cur.execute("update placex set indexed_status = 2 where place_id = %s",
(place_id, ))
cur.execute("""SET auto_explain.log_min_duration = '0';
SET auto_explain.log_analyze = 'true';
SET auto_explain.log_nested_statements = 'true';
LOAD 'auto_explain';
SET client_min_messages = LOG;
SET log_min_messages = FATAL""")
cur.execute("update placex set indexed_status = 0 where place_id = %s",
(place_id, ))
# we do not want to keep the results
conn.rollback()
for msg in conn.notices:
print(msg)

View File

@ -153,3 +153,39 @@ def place_row(place_table, temp_db_cursor):
geom or 'SRID=4326;POINT(0 0 )'))
return _insert
@pytest.fixture
def placex_table(temp_db_with_extensions, temp_db_conn):
""" Create an empty version of the place table.
"""
with temp_db_conn.cursor() as cur:
cur.execute("""CREATE TABLE placex (
place_id BIGINT NOT NULL,
parent_place_id BIGINT,
linked_place_id BIGINT,
importance FLOAT,
indexed_date TIMESTAMP,
geometry_sector INTEGER,
rank_address SMALLINT,
rank_search SMALLINT,
partition SMALLINT,
indexed_status SMALLINT,
osm_id int8,
osm_type char(1),
class text,
type text,
name hstore,
admin_level smallint,
address hstore,
extratags hstore,
geometry Geometry(Geometry,4326),
wikipedia TEXT,
country_code varchar(2),
housenumber TEXT,
postcode TEXT,
centroid GEOMETRY(Geometry, 4326))
""")
temp_db_conn.commit()

View File

@ -85,6 +85,13 @@ def test_admin_command_legacy(monkeypatch, params):
assert mock_run_legacy.called == 1
@pytest.mark.parametrize("func, params", [('analyse_indexing', ('--analyse-indexing', ))])
def test_admin_command_tool(monkeypatch, func, params):
mock = MockParamCapture()
monkeypatch.setattr(nominatim.tools.admin, func, mock)
assert 0 == call_nominatim('admin', *params)
assert mock.called == 1
@pytest.mark.parametrize("name,oid", [('file', 'foo.osm'), ('diff', 'foo.osc'),
('node', 12), ('way', 8), ('relation', 32)])

View File

@ -0,0 +1,42 @@
"""
Tests for maintenance and analysis functions.
"""
import pytest
from nominatim.db.connection import connect
from nominatim.errors import UsageError
from nominatim.tools import admin
@pytest.fixture
def db(temp_db, placex_table):
conn = connect('dbname=' + temp_db)
yield conn
conn.close()
def test_analyse_indexing_no_objects(db):
with pytest.raises(UsageError):
admin.analyse_indexing(db)
@pytest.mark.parametrize("oid", ['1234', 'N123a', 'X123'])
def test_analyse_indexing_bad_osmid(db, oid):
with pytest.raises(UsageError):
admin.analyse_indexing(db, osm_id=oid)
def test_analyse_indexing_unknown_osmid(db):
with pytest.raises(UsageError):
admin.analyse_indexing(db, osm_id='W12345674')
def test_analyse_indexing_with_place_id(db, temp_db_cursor):
temp_db_cursor.execute("INSERT INTO placex (place_id) VALUES(12345)")
admin.analyse_indexing(db, place_id=12345)
def test_analyse_indexing_with_osm_id(db, temp_db_cursor):
temp_db_cursor.execute("""INSERT INTO placex (place_id, osm_type, osm_id)
VALUES(9988, 'N', 10000)""")
admin.analyse_indexing(db, osm_id='N10000')

View File

@ -1,119 +0,0 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0-only
#
# This file is part of Nominatim.
# Copyright (C) 2020 Sarah Hoffmann
"""
Script for analysing the indexing process.
The script enables detailed logging for nested statements and then
runs the indexing process for teh given object. Detailed 'EXPLAIN ANALYSE'
information is printed for each executed query in the trigger. The
transaction is then rolled back, so that no actual changes to the database
happen. It also disables logging into the system log, so that the
log files are not cluttered.
"""
from argparse import ArgumentParser, RawDescriptionHelpFormatter, ArgumentTypeError
import psycopg2
import getpass
import re
class Analyser(object):
def __init__(self, options):
password = None
if options.password_prompt:
password = getpass.getpass("Database password: ")
self.options = options
self.conn = psycopg2.connect(dbname=options.dbname,
user=options.user,
password=password,
host=options.host,
port=options.port)
def run(self):
c = self.conn.cursor()
if self.options.placeid:
place_id = self.options.placeid
else:
if self.options.rank:
c.execute(f"""select place_id from placex
where rank_address = {self.options.rank}
and linked_place_id is null
limit 1""")
objinfo = f"rank {self.options.rank}"
if self.options.osmid:
osm_type = self.options.osmid[0].upper()
if osm_type not in ('N', 'W', 'R'):
raise RuntimeError("OSM ID must be of form <N|W|R><id>")
try:
osm_id = int(self.options.osmid[1:])
except ValueError:
raise RuntimeError("OSM ID must be of form <N|W|R><id>")
c.execute(f"""SELECT place_id FROM placex
WHERE osm_type = '{osm_type}' AND osm_id = {osm_id}""")
objinfo = f"OSM object {self.options.osmid}"
if c.rowcount < 1:
raise RuntimeError(f"Cannot find a place for {objinfo}.")
place_id = c.fetchone()[0]
c.execute(f"""update placex set indexed_status = 2 where
place_id = {place_id}""")
c.execute("""SET auto_explain.log_min_duration = '0';
SET auto_explain.log_analyze = 'true';
SET auto_explain.log_nested_statements = 'true';
LOAD 'auto_explain';
SET client_min_messages = LOG;
SET log_min_messages = FATAL""");
c.execute(f"""update placex set indexed_status = 0 where
place_id = {place_id}""")
c.close() # automatic rollback
for l in self.conn.notices:
print(l)
if __name__ == '__main__':
def h(s):
return re.sub("\s\s+" , " ", s)
p = ArgumentParser(description=__doc__,
formatter_class=RawDescriptionHelpFormatter)
group = p.add_mutually_exclusive_group(required=True)
group.add_argument('--rank', dest='rank', type=int,
help='Analyse indexing of the given address rank')
group.add_argument('--osm-id', dest='osmid', type=str,
help='Analyse indexing of the given OSM object')
group.add_argument('--place-id', dest='placeid', type=int,
help='Analyse indexing of the given Nominatim object')
p.add_argument('-d', '--database',
dest='dbname', action='store', default='nominatim',
help='Name of the PostgreSQL database to connect to.')
p.add_argument('-U', '--username',
dest='user', action='store',
help='PostgreSQL user name.')
p.add_argument('-W', '--password',
dest='password_prompt', action='store_true',
help='Force password prompt.')
p.add_argument('-H', '--host',
dest='host', action='store',
help='PostgreSQL server hostname or socket location.')
p.add_argument('-P', '--port',
dest='port', action='store',
help='PostgreSQL server port')
Analyser(p.parse_args()).run()