Merge pull request #2143 from lonvia/integrate-indexer-into-nominatim-tool

Integrate indexer into nominatim tool
This commit is contained in:
Sarah Hoffmann 2021-01-19 08:42:22 +01:00 committed by GitHub
commit 3475e1dfd6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 116 additions and 172 deletions

View File

@ -8,5 +8,6 @@ require('@CMAKE_SOURCE_DIR@/lib/dotenv_loader.php');
@define('CONST_DataDir', '@CMAKE_SOURCE_DIR@');
loadDotEnv();
$_SERVER['NOMINATIM_NOMINATIM_TOOL'] = '@CMAKE_BINARY_DIR@/nominatim';
require_once('@CMAKE_SOURCE_DIR@/lib/admin/@script_source@');

View File

@ -1,8 +1,11 @@
#!/usr/bin/env python3
import sys
import os
sys.path.insert(1, '@CMAKE_SOURCE_DIR@')
os.environ['NOMINATIM_NOMINATIM_TOOL'] = __file__
from nominatim import cli
exit(cli.nominatim(module_dir='@CMAKE_BINARY_DIR@/module',

View File

@ -7,7 +7,7 @@ class Shell
public function __construct($sBaseCmd, ...$aParams)
{
if (!$sBaseCmd) {
throw new Exception('Command missing in new() call');
throw new \Exception('Command missing in new() call');
}
$this->baseCmd = $sBaseCmd;
$this->aParams = array();

View File

@ -105,25 +105,14 @@ if ($fPostgresVersion >= 11.0) {
}
$oIndexCmd = (new \Nominatim\Shell(CONST_DataDir.'/nominatim/nominatim.py'))
->addParams('--database', $aDSNInfo['database'])
->addParams('--port', $aDSNInfo['port'])
->addParams('--threads', $aResult['index-instances']);
if (!$aResult['quiet']) {
$oIndexCmd->addParams('--verbose');
$oIndexCmd = (new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
->addParams('index');
if ($aResult['quiet']) {
$oIndexCmd->addParams('--quiet');
}
if ($aResult['verbose']) {
$oIndexCmd->addParams('--verbose');
}
if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) {
$oIndexCmd->addParams('--host', $aDSNInfo['hostspec']);
}
if (isset($aDSNInfo['username']) && $aDSNInfo['username']) {
$oIndexCmd->addParams('--username', $aDSNInfo['username']);
}
if (isset($aDSNInfo['password']) && $aDSNInfo['password']) {
$oIndexCmd->addEnvPair('PGPASSWORD', $aDSNInfo['password']);
}
$sPyosmiumBin = getSetting('PYOSMIUM_BINARY');
$sBaseURL = getSetting('REPLICATION_URL');
@ -288,15 +277,9 @@ if ($aResult['recompute-word-counts']) {
}
if ($aResult['index']) {
$oCmd = (clone $oIndexCmd)
->addParams('--minrank', $aResult['index-rank'], '-b');
$oCmd->run();
$oCmd = (clone $oIndexCmd)
->addParams('--minrank', $aResult['index-rank']);
$oCmd->run();
$oDB->exec('update import_status set indexed = true');
}
if ($aResult['update-address-levels']) {
@ -438,15 +421,6 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
if (!$aResult['no-index']) {
$fCMDStartTime = time();
$oThisIndexCmd = clone($oIndexCmd);
$oThisIndexCmd->addParams('-b');
echo $oThisIndexCmd->escapedCmd()."\n";
$iErrorLevel = $oThisIndexCmd->run();
if ($iErrorLevel) {
echo "Error: $iErrorLevel\n";
exit($iErrorLevel);
}
$oThisIndexCmd = clone($oIndexCmd);
echo $oThisIndexCmd->escapedCmd()."\n";
$iErrorLevel = $oThisIndexCmd->run();
@ -463,9 +437,6 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
var_Dump($sSQL);
$oDB->exec($sSQL);
echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
$sSQL = 'update import_status set indexed = true';
$oDB->exec($sSQL);
} else {
if ($aResult['import-osmosis-all']) {
echo "Error: --no-index cannot be used with continuous imports (--import-osmosis-all).\n";

View File

@ -549,26 +549,15 @@ class SetupFunctions
{
$this->checkModulePresence(); // raises exception on failure
$oBaseCmd = (new \Nominatim\Shell(CONST_DataDir.'/nominatim/nominatim.py'))
->addParams('--database', $this->aDSNInfo['database'])
->addParams('--port', $this->aDSNInfo['port'])
->addParams('--threads', $this->iInstances);
$oBaseCmd = (new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
->addParams('index');
if (!$this->bQuiet) {
$oBaseCmd->addParams('-v');
if ($this->bQuiet) {
$oBaseCmd->addParams('-q');
}
if ($this->bVerbose) {
$oBaseCmd->addParams('-v');
}
if (isset($this->aDSNInfo['hostspec'])) {
$oBaseCmd->addParams('--host', $this->aDSNInfo['hostspec']);
}
if (isset($this->aDSNInfo['username'])) {
$oBaseCmd->addParams('--user', $this->aDSNInfo['username']);
}
if (isset($this->aDSNInfo['password'])) {
$oBaseCmd->addEnvPair('PGPASSWORD', $this->aDSNInfo['password']);
}
info('Index ranks 0 - 4');
$oCmd = (clone $oBaseCmd)->addParams('--maxrank', 4);
@ -581,14 +570,14 @@ class SetupFunctions
if (!$bIndexNoanalyse) $this->pgsqlRunScript('ANALYSE');
info('Index administrative boundaries');
$oCmd = (clone $oBaseCmd)->addParams('-b');
$oCmd = (clone $oBaseCmd)->addParams('--boundaries-only');
$iStatus = $oCmd->run();
if ($iStatus != 0) {
fail('error status ' . $iStatus . ' running nominatim!');
}
info('Index ranks 5 - 25');
$oCmd = (clone $oBaseCmd)->addParams('--minrank', 5, '--maxrank', 25);
$oCmd = (clone $oBaseCmd)->addParams('--no-boundaries', '--minrank', 5, '--maxrank', 25);
$iStatus = $oCmd->run();
if ($iStatus != 0) {
fail('error status ' . $iStatus . ' running nominatim!');
@ -597,7 +586,7 @@ class SetupFunctions
if (!$bIndexNoanalyse) $this->pgsqlRunScript('ANALYSE');
info('Index ranks 26 - 30');
$oCmd = (clone $oBaseCmd)->addParams('--minrank', 26);
$oCmd = (clone $oBaseCmd)->addParams('--no-boundaries', '--minrank', 26);
$iStatus = $oCmd->run();
if ($iStatus != 0) {
fail('error status ' . $iStatus . ' running nominatim!');

View File

@ -11,6 +11,17 @@ from pathlib import Path
from .config import Configuration
from .admin.exec_utils import run_legacy_script
from .indexer.indexer import Indexer
def _num_system_cpus():
try:
cpus = len(os.sched_getaffinity(0))
except NotImplementedError:
cpus = None
return cpus or os.cpu_count()
class CommandlineParser:
""" Wraps some of the common functions for parsing the command line
and setting up subcommands.
@ -67,7 +78,7 @@ class CommandlineParser:
args.project_dir = Path(args.project_dir)
logging.basicConfig(stream=sys.stderr,
format='%(asctime)s %(levelname)s: %(message)s',
format='%(asctime)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
level=max(4 - args.verbose, 1) * 10)
@ -297,11 +308,30 @@ class UpdateIndex:
@staticmethod
def add_args(parser):
pass
group = parser.add_argument_group('Filter arguments')
group.add_argument('--boundaries-only', action='store_true',
help="""Index only administrative boundaries.""")
group.add_argument('--no-boundaries', action='store_true',
help="""Index everything except administrative boundaries.""")
group.add_argument('--minrank', '-r', type=int, metavar='RANK', default=0,
help='Minimum/starting rank')
group.add_argument('--maxrank', '-R', type=int, metavar='RANK', default=30,
help='Maximum/finishing rank')
@staticmethod
def run(args):
return run_legacy_script('update.php', '--index', nominatim_env=args)
indexer = Indexer(args.config.get_libpq_dsn(),
args.threads or _num_system_cpus() or 1)
if not args.no_boundaries:
indexer.index_boundaries(args.minrank, args.maxrank)
if not args.boundaries_only:
indexer.index_by_rank(args.minrank, args.maxrank)
if not args.no_boundaries and not args.boundaries_only:
indexer.update_status_table()
return 0
class UpdateRefresh:

View File

@ -29,6 +29,18 @@ class Configuration:
return os.environ.get(name) or self._config[name]
def get_libpq_dsn(self):
""" Get configured database DSN converted into the key/value format
understood by libpq and psycopg.
"""
dsn = self.DATABASE_DSN
if dsn.startswith('pgsql:'):
# Old PHP DSN format. Convert before returning.
return dsn[6:].replace(';', ' ')
return dsn
def get_os_env(self):
""" Return a copy of the OS environment with the Nominatim configuration
merged in.

0
nominatim/db/__init__.py Normal file
View File

View File

@ -11,26 +11,14 @@ from psycopg2.extras import wait_select
LOG = logging.getLogger()
def make_connection(options, asynchronous=False):
""" Create a psycopg2 connection from the given options.
"""
params = {'dbname' : options.dbname,
'user' : options.user,
'password' : options.password,
'host' : options.host,
'port' : options.port,
'async' : asynchronous}
return psycopg2.connect(**params)
class DBConnection:
""" A single non-blocking database connection.
"""
def __init__(self, options):
def __init__(self, dsn):
self.current_query = None
self.current_params = None
self.options = options
self.dsn = dsn
self.conn = None
self.cursor = None
@ -46,7 +34,9 @@ class DBConnection:
self.cursor.close()
self.conn.close()
self.conn = make_connection(self.options, asynchronous=True)
# Use a dict to hand in the parameters because async is a reserved
# word in Python3.
self.conn = psycopg2.connect(**{'dsn' : self.dsn, 'async' : True})
self.wait()
self.cursor = self.conn.cursor()

124
nominatim/nominatim.py → nominatim/indexer/indexer.py Executable file → Normal file
View File

@ -1,35 +1,14 @@
#! /usr/bin/env python3
#-----------------------------------------------------------------------------
# nominatim - [description]
#-----------------------------------------------------------------------------
#
# Indexing tool for the Nominatim database.
#
# Based on C version by Brian Quinion
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#-----------------------------------------------------------------------------
"""
Main work horse for indexing (computing addresses) the database.
"""
# pylint: disable=C0111
from argparse import ArgumentParser, RawDescriptionHelpFormatter
import logging
import sys
import getpass
import select
from indexer.progress import ProgressLogger # pylint: disable=E0401
from indexer.db import DBConnection, make_connection # pylint: disable=E0401
import psycopg2
from .progress import ProgressLogger
from ..db.async_connection import DBConnection
LOG = logging.getLogger()
@ -117,34 +96,40 @@ class Indexer:
""" Main indexing routine.
"""
def __init__(self, opts):
self.minrank = max(1, opts.minrank)
self.maxrank = min(30, opts.maxrank)
self.conn = make_connection(opts)
self.threads = [DBConnection(opts) for _ in range(opts.threads)]
def __init__(self, dsn, num_threads):
self.conn = psycopg2.connect(dsn)
self.threads = [DBConnection(dsn) for _ in range(num_threads)]
def index_boundaries(self):
def index_boundaries(self, minrank, maxrank):
LOG.warning("Starting indexing boundaries using %s threads",
len(self.threads))
for rank in range(max(self.minrank, 5), min(self.maxrank, 26)):
for rank in range(max(minrank, 5), min(maxrank, 26)):
self.index(BoundaryRunner(rank))
def index_by_rank(self):
def index_by_rank(self, minrank, maxrank):
""" Run classic indexing by rank.
"""
maxrank = min(maxrank, 30)
LOG.warning("Starting indexing rank (%i to %i) using %i threads",
self.minrank, self.maxrank, len(self.threads))
minrank, maxrank, len(self.threads))
for rank in range(max(1, self.minrank), self.maxrank):
for rank in range(max(1, minrank), maxrank):
self.index(RankRunner(rank))
if self.maxrank == 30:
if maxrank == 30:
self.index(RankRunner(0))
self.index(InterpolationRunner(), 20)
self.index(RankRunner(self.maxrank), 20)
self.index(RankRunner(30), 20)
else:
self.index(RankRunner(self.maxrank))
self.index(RankRunner(maxrank))
def update_status_table(self):
""" Update the status in the status table to 'indexed'.
"""
with self.conn.cursor() as cur:
cur.execute('UPDATE import_status SET indexed = true')
self.conn.commit()
def index(self, obj, batch=1):
""" Index a single rank or table. `obj` describes the SQL to use
@ -212,60 +197,3 @@ class Indexer:
ready, _, _ = select.select(self.threads, [], [])
assert False, "Unreachable code"
def nominatim_arg_parser():
""" Setup the command-line parser for the tool.
"""
parser = ArgumentParser(description="Indexing tool for Nominatim.",
formatter_class=RawDescriptionHelpFormatter)
parser.add_argument('-d', '--database',
dest='dbname', action='store', default='nominatim',
help='Name of the PostgreSQL database to connect to.')
parser.add_argument('-U', '--username',
dest='user', action='store',
help='PostgreSQL user name.')
parser.add_argument('-W', '--password',
dest='password_prompt', action='store_true',
help='Force password prompt.')
parser.add_argument('-H', '--host',
dest='host', action='store',
help='PostgreSQL server hostname or socket location.')
parser.add_argument('-P', '--port',
dest='port', action='store',
help='PostgreSQL server port')
parser.add_argument('-b', '--boundary-only',
dest='boundary_only', action='store_true',
help='Only index administrative boundaries (ignores min/maxrank).')
parser.add_argument('-r', '--minrank',
dest='minrank', type=int, metavar='RANK', default=0,
help='Minimum/starting rank.')
parser.add_argument('-R', '--maxrank',
dest='maxrank', type=int, metavar='RANK', default=30,
help='Maximum/finishing rank.')
parser.add_argument('-t', '--threads',
dest='threads', type=int, metavar='NUM', default=1,
help='Number of threads to create for indexing.')
parser.add_argument('-v', '--verbose',
dest='loglevel', action='count', default=0,
help='Increase verbosity')
return parser
if __name__ == '__main__':
logging.basicConfig(stream=sys.stderr, format='%(levelname)s: %(message)s')
OPTIONS = nominatim_arg_parser().parse_args(sys.argv[1:])
LOG.setLevel(max(3 - OPTIONS.loglevel, 0) * 10)
OPTIONS.password = None
if OPTIONS.password_prompt:
PASSWORD = getpass.getpass("Database password: ")
OPTIONS.password = PASSWORD
if OPTIONS.boundary_only:
Indexer(OPTIONS).index_boundaries()
else:
Indexer(OPTIONS).index_by_rank()

View File

@ -26,7 +26,7 @@ class ProgressLogger:
self.done_places = 0
self.rank_start_time = datetime.now()
self.log_interval = log_interval
self.next_info = INITIAL_PROGRESS if LOG.isEnabledFor(logging.INFO) else total + 1
self.next_info = INITIAL_PROGRESS if LOG.isEnabledFor(logging.WARNING) else total + 1
def add(self, num=1):
""" Mark `num` places as processed. Print a log message if the
@ -47,9 +47,9 @@ class ProgressLogger:
places_per_sec = self.done_places / done_time
eta = (self.total_places - self.done_places) / places_per_sec
LOG.info("Done %d in %d @ %.3f per second - %s ETA (seconds): %.2f",
self.done_places, int(done_time),
places_per_sec, self.name, eta)
LOG.warning("Done %d in %d @ %.3f per second - %s ETA (seconds): %.2f",
self.done_places, int(done_time),
places_per_sec, self.name, eta)
self.next_info += int(places_per_sec) * self.log_interval

View File

@ -91,6 +91,7 @@ class NominatimEnvironment:
self.test_env['NOMINATIM_BINDIR'] = self.src_dir / 'utils'
self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.build_dir / 'module'
self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = self.build_dir / 'osm2pgsql' / 'osm2pgsql'
self.test_env['NOMINATIM_NOMINATIM_TOOL'] = self.build_dir / 'nominatim'
if self.server_module_path:
self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path

View File

@ -54,3 +54,22 @@ def test_get_os_env_prefer_os_environ():
assert config.get_os_env()['NOMINATIM_DATABASE_WEBUSER'] == 'nobody'
del os.environ['NOMINATIM_DATABASE_WEBUSER']
def test_get_libpq_dsn_convert_default():
config = Configuration(None, DEFCFG_DIR)
assert config.get_libpq_dsn() == 'dbname=nominatim'
def test_get_libpq_dsn_convert_php():
config = Configuration(None, DEFCFG_DIR)
os.environ['NOMINATIM_DATABASE_DSN'] = 'pgsql:dbname=gis;password=foo;host=localhost'
assert config.get_libpq_dsn() == 'dbname=gis password=foo host=localhost'
def test_get_libpq_dsn_convert_libpq():
config = Configuration(None, DEFCFG_DIR)
os.environ['NOMINATIM_DATABASE_DSN'] = 'host=localhost dbname=gis password=foo'
assert config.get_libpq_dsn() == 'host=localhost dbname=gis password=foo'