mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-11 06:24:45 +03:00
Merge pull request #2143 from lonvia/integrate-indexer-into-nominatim-tool
Integrate indexer into nominatim tool
This commit is contained in:
commit
3475e1dfd6
@ -8,5 +8,6 @@ require('@CMAKE_SOURCE_DIR@/lib/dotenv_loader.php');
|
||||
@define('CONST_DataDir', '@CMAKE_SOURCE_DIR@');
|
||||
|
||||
loadDotEnv();
|
||||
$_SERVER['NOMINATIM_NOMINATIM_TOOL'] = '@CMAKE_BINARY_DIR@/nominatim';
|
||||
|
||||
require_once('@CMAKE_SOURCE_DIR@/lib/admin/@script_source@');
|
||||
|
@ -1,8 +1,11 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(1, '@CMAKE_SOURCE_DIR@')
|
||||
|
||||
os.environ['NOMINATIM_NOMINATIM_TOOL'] = __file__
|
||||
|
||||
from nominatim import cli
|
||||
|
||||
exit(cli.nominatim(module_dir='@CMAKE_BINARY_DIR@/module',
|
||||
|
@ -7,7 +7,7 @@ class Shell
|
||||
public function __construct($sBaseCmd, ...$aParams)
|
||||
{
|
||||
if (!$sBaseCmd) {
|
||||
throw new Exception('Command missing in new() call');
|
||||
throw new \Exception('Command missing in new() call');
|
||||
}
|
||||
$this->baseCmd = $sBaseCmd;
|
||||
$this->aParams = array();
|
||||
|
@ -105,25 +105,14 @@ if ($fPostgresVersion >= 11.0) {
|
||||
}
|
||||
|
||||
|
||||
$oIndexCmd = (new \Nominatim\Shell(CONST_DataDir.'/nominatim/nominatim.py'))
|
||||
->addParams('--database', $aDSNInfo['database'])
|
||||
->addParams('--port', $aDSNInfo['port'])
|
||||
->addParams('--threads', $aResult['index-instances']);
|
||||
if (!$aResult['quiet']) {
|
||||
$oIndexCmd->addParams('--verbose');
|
||||
$oIndexCmd = (new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
|
||||
->addParams('index');
|
||||
if ($aResult['quiet']) {
|
||||
$oIndexCmd->addParams('--quiet');
|
||||
}
|
||||
if ($aResult['verbose']) {
|
||||
$oIndexCmd->addParams('--verbose');
|
||||
}
|
||||
if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) {
|
||||
$oIndexCmd->addParams('--host', $aDSNInfo['hostspec']);
|
||||
}
|
||||
if (isset($aDSNInfo['username']) && $aDSNInfo['username']) {
|
||||
$oIndexCmd->addParams('--username', $aDSNInfo['username']);
|
||||
}
|
||||
if (isset($aDSNInfo['password']) && $aDSNInfo['password']) {
|
||||
$oIndexCmd->addEnvPair('PGPASSWORD', $aDSNInfo['password']);
|
||||
}
|
||||
|
||||
$sPyosmiumBin = getSetting('PYOSMIUM_BINARY');
|
||||
$sBaseURL = getSetting('REPLICATION_URL');
|
||||
@ -288,15 +277,9 @@ if ($aResult['recompute-word-counts']) {
|
||||
}
|
||||
|
||||
if ($aResult['index']) {
|
||||
$oCmd = (clone $oIndexCmd)
|
||||
->addParams('--minrank', $aResult['index-rank'], '-b');
|
||||
$oCmd->run();
|
||||
|
||||
$oCmd = (clone $oIndexCmd)
|
||||
->addParams('--minrank', $aResult['index-rank']);
|
||||
$oCmd->run();
|
||||
|
||||
$oDB->exec('update import_status set indexed = true');
|
||||
}
|
||||
|
||||
if ($aResult['update-address-levels']) {
|
||||
@ -438,15 +421,6 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
|
||||
if (!$aResult['no-index']) {
|
||||
$fCMDStartTime = time();
|
||||
|
||||
$oThisIndexCmd = clone($oIndexCmd);
|
||||
$oThisIndexCmd->addParams('-b');
|
||||
echo $oThisIndexCmd->escapedCmd()."\n";
|
||||
$iErrorLevel = $oThisIndexCmd->run();
|
||||
if ($iErrorLevel) {
|
||||
echo "Error: $iErrorLevel\n";
|
||||
exit($iErrorLevel);
|
||||
}
|
||||
|
||||
$oThisIndexCmd = clone($oIndexCmd);
|
||||
echo $oThisIndexCmd->escapedCmd()."\n";
|
||||
$iErrorLevel = $oThisIndexCmd->run();
|
||||
@ -463,9 +437,6 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
|
||||
var_Dump($sSQL);
|
||||
$oDB->exec($sSQL);
|
||||
echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
|
||||
|
||||
$sSQL = 'update import_status set indexed = true';
|
||||
$oDB->exec($sSQL);
|
||||
} else {
|
||||
if ($aResult['import-osmosis-all']) {
|
||||
echo "Error: --no-index cannot be used with continuous imports (--import-osmosis-all).\n";
|
||||
|
@ -549,26 +549,15 @@ class SetupFunctions
|
||||
{
|
||||
$this->checkModulePresence(); // raises exception on failure
|
||||
|
||||
$oBaseCmd = (new \Nominatim\Shell(CONST_DataDir.'/nominatim/nominatim.py'))
|
||||
->addParams('--database', $this->aDSNInfo['database'])
|
||||
->addParams('--port', $this->aDSNInfo['port'])
|
||||
->addParams('--threads', $this->iInstances);
|
||||
$oBaseCmd = (new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
|
||||
->addParams('index');
|
||||
|
||||
if (!$this->bQuiet) {
|
||||
$oBaseCmd->addParams('-v');
|
||||
if ($this->bQuiet) {
|
||||
$oBaseCmd->addParams('-q');
|
||||
}
|
||||
if ($this->bVerbose) {
|
||||
$oBaseCmd->addParams('-v');
|
||||
}
|
||||
if (isset($this->aDSNInfo['hostspec'])) {
|
||||
$oBaseCmd->addParams('--host', $this->aDSNInfo['hostspec']);
|
||||
}
|
||||
if (isset($this->aDSNInfo['username'])) {
|
||||
$oBaseCmd->addParams('--user', $this->aDSNInfo['username']);
|
||||
}
|
||||
if (isset($this->aDSNInfo['password'])) {
|
||||
$oBaseCmd->addEnvPair('PGPASSWORD', $this->aDSNInfo['password']);
|
||||
}
|
||||
|
||||
info('Index ranks 0 - 4');
|
||||
$oCmd = (clone $oBaseCmd)->addParams('--maxrank', 4);
|
||||
@ -581,14 +570,14 @@ class SetupFunctions
|
||||
if (!$bIndexNoanalyse) $this->pgsqlRunScript('ANALYSE');
|
||||
|
||||
info('Index administrative boundaries');
|
||||
$oCmd = (clone $oBaseCmd)->addParams('-b');
|
||||
$oCmd = (clone $oBaseCmd)->addParams('--boundaries-only');
|
||||
$iStatus = $oCmd->run();
|
||||
if ($iStatus != 0) {
|
||||
fail('error status ' . $iStatus . ' running nominatim!');
|
||||
}
|
||||
|
||||
info('Index ranks 5 - 25');
|
||||
$oCmd = (clone $oBaseCmd)->addParams('--minrank', 5, '--maxrank', 25);
|
||||
$oCmd = (clone $oBaseCmd)->addParams('--no-boundaries', '--minrank', 5, '--maxrank', 25);
|
||||
$iStatus = $oCmd->run();
|
||||
if ($iStatus != 0) {
|
||||
fail('error status ' . $iStatus . ' running nominatim!');
|
||||
@ -597,7 +586,7 @@ class SetupFunctions
|
||||
if (!$bIndexNoanalyse) $this->pgsqlRunScript('ANALYSE');
|
||||
|
||||
info('Index ranks 26 - 30');
|
||||
$oCmd = (clone $oBaseCmd)->addParams('--minrank', 26);
|
||||
$oCmd = (clone $oBaseCmd)->addParams('--no-boundaries', '--minrank', 26);
|
||||
$iStatus = $oCmd->run();
|
||||
if ($iStatus != 0) {
|
||||
fail('error status ' . $iStatus . ' running nominatim!');
|
||||
|
@ -11,6 +11,17 @@ from pathlib import Path
|
||||
from .config import Configuration
|
||||
from .admin.exec_utils import run_legacy_script
|
||||
|
||||
from .indexer.indexer import Indexer
|
||||
|
||||
def _num_system_cpus():
|
||||
try:
|
||||
cpus = len(os.sched_getaffinity(0))
|
||||
except NotImplementedError:
|
||||
cpus = None
|
||||
|
||||
return cpus or os.cpu_count()
|
||||
|
||||
|
||||
class CommandlineParser:
|
||||
""" Wraps some of the common functions for parsing the command line
|
||||
and setting up subcommands.
|
||||
@ -67,7 +78,7 @@ class CommandlineParser:
|
||||
args.project_dir = Path(args.project_dir)
|
||||
|
||||
logging.basicConfig(stream=sys.stderr,
|
||||
format='%(asctime)s %(levelname)s: %(message)s',
|
||||
format='%(asctime)s: %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S',
|
||||
level=max(4 - args.verbose, 1) * 10)
|
||||
|
||||
@ -297,11 +308,30 @@ class UpdateIndex:
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
pass
|
||||
group = parser.add_argument_group('Filter arguments')
|
||||
group.add_argument('--boundaries-only', action='store_true',
|
||||
help="""Index only administrative boundaries.""")
|
||||
group.add_argument('--no-boundaries', action='store_true',
|
||||
help="""Index everything except administrative boundaries.""")
|
||||
group.add_argument('--minrank', '-r', type=int, metavar='RANK', default=0,
|
||||
help='Minimum/starting rank')
|
||||
group.add_argument('--maxrank', '-R', type=int, metavar='RANK', default=30,
|
||||
help='Maximum/finishing rank')
|
||||
|
||||
@staticmethod
|
||||
def run(args):
|
||||
return run_legacy_script('update.php', '--index', nominatim_env=args)
|
||||
indexer = Indexer(args.config.get_libpq_dsn(),
|
||||
args.threads or _num_system_cpus() or 1)
|
||||
|
||||
if not args.no_boundaries:
|
||||
indexer.index_boundaries(args.minrank, args.maxrank)
|
||||
if not args.boundaries_only:
|
||||
indexer.index_by_rank(args.minrank, args.maxrank)
|
||||
|
||||
if not args.no_boundaries and not args.boundaries_only:
|
||||
indexer.update_status_table()
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
class UpdateRefresh:
|
||||
|
@ -29,6 +29,18 @@ class Configuration:
|
||||
|
||||
return os.environ.get(name) or self._config[name]
|
||||
|
||||
def get_libpq_dsn(self):
|
||||
""" Get configured database DSN converted into the key/value format
|
||||
understood by libpq and psycopg.
|
||||
"""
|
||||
dsn = self.DATABASE_DSN
|
||||
|
||||
if dsn.startswith('pgsql:'):
|
||||
# Old PHP DSN format. Convert before returning.
|
||||
return dsn[6:].replace(';', ' ')
|
||||
|
||||
return dsn
|
||||
|
||||
def get_os_env(self):
|
||||
""" Return a copy of the OS environment with the Nominatim configuration
|
||||
merged in.
|
||||
|
0
nominatim/db/__init__.py
Normal file
0
nominatim/db/__init__.py
Normal file
@ -11,26 +11,14 @@ from psycopg2.extras import wait_select
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def make_connection(options, asynchronous=False):
|
||||
""" Create a psycopg2 connection from the given options.
|
||||
"""
|
||||
params = {'dbname' : options.dbname,
|
||||
'user' : options.user,
|
||||
'password' : options.password,
|
||||
'host' : options.host,
|
||||
'port' : options.port,
|
||||
'async' : asynchronous}
|
||||
|
||||
return psycopg2.connect(**params)
|
||||
|
||||
class DBConnection:
|
||||
""" A single non-blocking database connection.
|
||||
"""
|
||||
|
||||
def __init__(self, options):
|
||||
def __init__(self, dsn):
|
||||
self.current_query = None
|
||||
self.current_params = None
|
||||
self.options = options
|
||||
self.dsn = dsn
|
||||
|
||||
self.conn = None
|
||||
self.cursor = None
|
||||
@ -46,7 +34,9 @@ class DBConnection:
|
||||
self.cursor.close()
|
||||
self.conn.close()
|
||||
|
||||
self.conn = make_connection(self.options, asynchronous=True)
|
||||
# Use a dict to hand in the parameters because async is a reserved
|
||||
# word in Python3.
|
||||
self.conn = psycopg2.connect(**{'dsn' : self.dsn, 'async' : True})
|
||||
self.wait()
|
||||
|
||||
self.cursor = self.conn.cursor()
|
124
nominatim/nominatim.py → nominatim/indexer/indexer.py
Executable file → Normal file
124
nominatim/nominatim.py → nominatim/indexer/indexer.py
Executable file → Normal file
@ -1,35 +1,14 @@
|
||||
#! /usr/bin/env python3
|
||||
#-----------------------------------------------------------------------------
|
||||
# nominatim - [description]
|
||||
#-----------------------------------------------------------------------------
|
||||
#
|
||||
# Indexing tool for the Nominatim database.
|
||||
#
|
||||
# Based on C version by Brian Quinion
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
# as published by the Free Software Foundation; either version 2
|
||||
# of the License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
#-----------------------------------------------------------------------------
|
||||
"""
|
||||
Main work horse for indexing (computing addresses) the database.
|
||||
"""
|
||||
# pylint: disable=C0111
|
||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||
import logging
|
||||
import sys
|
||||
import getpass
|
||||
import select
|
||||
|
||||
from indexer.progress import ProgressLogger # pylint: disable=E0401
|
||||
from indexer.db import DBConnection, make_connection # pylint: disable=E0401
|
||||
import psycopg2
|
||||
|
||||
from .progress import ProgressLogger
|
||||
from ..db.async_connection import DBConnection
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
@ -117,34 +96,40 @@ class Indexer:
|
||||
""" Main indexing routine.
|
||||
"""
|
||||
|
||||
def __init__(self, opts):
|
||||
self.minrank = max(1, opts.minrank)
|
||||
self.maxrank = min(30, opts.maxrank)
|
||||
self.conn = make_connection(opts)
|
||||
self.threads = [DBConnection(opts) for _ in range(opts.threads)]
|
||||
def __init__(self, dsn, num_threads):
|
||||
self.conn = psycopg2.connect(dsn)
|
||||
self.threads = [DBConnection(dsn) for _ in range(num_threads)]
|
||||
|
||||
def index_boundaries(self):
|
||||
def index_boundaries(self, minrank, maxrank):
|
||||
LOG.warning("Starting indexing boundaries using %s threads",
|
||||
len(self.threads))
|
||||
|
||||
for rank in range(max(self.minrank, 5), min(self.maxrank, 26)):
|
||||
for rank in range(max(minrank, 5), min(maxrank, 26)):
|
||||
self.index(BoundaryRunner(rank))
|
||||
|
||||
def index_by_rank(self):
|
||||
def index_by_rank(self, minrank, maxrank):
|
||||
""" Run classic indexing by rank.
|
||||
"""
|
||||
maxrank = min(maxrank, 30)
|
||||
LOG.warning("Starting indexing rank (%i to %i) using %i threads",
|
||||
self.minrank, self.maxrank, len(self.threads))
|
||||
minrank, maxrank, len(self.threads))
|
||||
|
||||
for rank in range(max(1, self.minrank), self.maxrank):
|
||||
for rank in range(max(1, minrank), maxrank):
|
||||
self.index(RankRunner(rank))
|
||||
|
||||
if self.maxrank == 30:
|
||||
if maxrank == 30:
|
||||
self.index(RankRunner(0))
|
||||
self.index(InterpolationRunner(), 20)
|
||||
self.index(RankRunner(self.maxrank), 20)
|
||||
self.index(RankRunner(30), 20)
|
||||
else:
|
||||
self.index(RankRunner(self.maxrank))
|
||||
self.index(RankRunner(maxrank))
|
||||
|
||||
def update_status_table(self):
|
||||
""" Update the status in the status table to 'indexed'.
|
||||
"""
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute('UPDATE import_status SET indexed = true')
|
||||
self.conn.commit()
|
||||
|
||||
def index(self, obj, batch=1):
|
||||
""" Index a single rank or table. `obj` describes the SQL to use
|
||||
@ -212,60 +197,3 @@ class Indexer:
|
||||
ready, _, _ = select.select(self.threads, [], [])
|
||||
|
||||
assert False, "Unreachable code"
|
||||
|
||||
|
||||
def nominatim_arg_parser():
|
||||
""" Setup the command-line parser for the tool.
|
||||
"""
|
||||
parser = ArgumentParser(description="Indexing tool for Nominatim.",
|
||||
formatter_class=RawDescriptionHelpFormatter)
|
||||
|
||||
parser.add_argument('-d', '--database',
|
||||
dest='dbname', action='store', default='nominatim',
|
||||
help='Name of the PostgreSQL database to connect to.')
|
||||
parser.add_argument('-U', '--username',
|
||||
dest='user', action='store',
|
||||
help='PostgreSQL user name.')
|
||||
parser.add_argument('-W', '--password',
|
||||
dest='password_prompt', action='store_true',
|
||||
help='Force password prompt.')
|
||||
parser.add_argument('-H', '--host',
|
||||
dest='host', action='store',
|
||||
help='PostgreSQL server hostname or socket location.')
|
||||
parser.add_argument('-P', '--port',
|
||||
dest='port', action='store',
|
||||
help='PostgreSQL server port')
|
||||
parser.add_argument('-b', '--boundary-only',
|
||||
dest='boundary_only', action='store_true',
|
||||
help='Only index administrative boundaries (ignores min/maxrank).')
|
||||
parser.add_argument('-r', '--minrank',
|
||||
dest='minrank', type=int, metavar='RANK', default=0,
|
||||
help='Minimum/starting rank.')
|
||||
parser.add_argument('-R', '--maxrank',
|
||||
dest='maxrank', type=int, metavar='RANK', default=30,
|
||||
help='Maximum/finishing rank.')
|
||||
parser.add_argument('-t', '--threads',
|
||||
dest='threads', type=int, metavar='NUM', default=1,
|
||||
help='Number of threads to create for indexing.')
|
||||
parser.add_argument('-v', '--verbose',
|
||||
dest='loglevel', action='count', default=0,
|
||||
help='Increase verbosity')
|
||||
|
||||
return parser
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(stream=sys.stderr, format='%(levelname)s: %(message)s')
|
||||
|
||||
OPTIONS = nominatim_arg_parser().parse_args(sys.argv[1:])
|
||||
|
||||
LOG.setLevel(max(3 - OPTIONS.loglevel, 0) * 10)
|
||||
|
||||
OPTIONS.password = None
|
||||
if OPTIONS.password_prompt:
|
||||
PASSWORD = getpass.getpass("Database password: ")
|
||||
OPTIONS.password = PASSWORD
|
||||
|
||||
if OPTIONS.boundary_only:
|
||||
Indexer(OPTIONS).index_boundaries()
|
||||
else:
|
||||
Indexer(OPTIONS).index_by_rank()
|
@ -26,7 +26,7 @@ class ProgressLogger:
|
||||
self.done_places = 0
|
||||
self.rank_start_time = datetime.now()
|
||||
self.log_interval = log_interval
|
||||
self.next_info = INITIAL_PROGRESS if LOG.isEnabledFor(logging.INFO) else total + 1
|
||||
self.next_info = INITIAL_PROGRESS if LOG.isEnabledFor(logging.WARNING) else total + 1
|
||||
|
||||
def add(self, num=1):
|
||||
""" Mark `num` places as processed. Print a log message if the
|
||||
@ -47,9 +47,9 @@ class ProgressLogger:
|
||||
places_per_sec = self.done_places / done_time
|
||||
eta = (self.total_places - self.done_places) / places_per_sec
|
||||
|
||||
LOG.info("Done %d in %d @ %.3f per second - %s ETA (seconds): %.2f",
|
||||
self.done_places, int(done_time),
|
||||
places_per_sec, self.name, eta)
|
||||
LOG.warning("Done %d in %d @ %.3f per second - %s ETA (seconds): %.2f",
|
||||
self.done_places, int(done_time),
|
||||
places_per_sec, self.name, eta)
|
||||
|
||||
self.next_info += int(places_per_sec) * self.log_interval
|
||||
|
||||
|
@ -91,6 +91,7 @@ class NominatimEnvironment:
|
||||
self.test_env['NOMINATIM_BINDIR'] = self.src_dir / 'utils'
|
||||
self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.build_dir / 'module'
|
||||
self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = self.build_dir / 'osm2pgsql' / 'osm2pgsql'
|
||||
self.test_env['NOMINATIM_NOMINATIM_TOOL'] = self.build_dir / 'nominatim'
|
||||
|
||||
if self.server_module_path:
|
||||
self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path
|
||||
|
@ -54,3 +54,22 @@ def test_get_os_env_prefer_os_environ():
|
||||
assert config.get_os_env()['NOMINATIM_DATABASE_WEBUSER'] == 'nobody'
|
||||
|
||||
del os.environ['NOMINATIM_DATABASE_WEBUSER']
|
||||
|
||||
def test_get_libpq_dsn_convert_default():
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
assert config.get_libpq_dsn() == 'dbname=nominatim'
|
||||
|
||||
def test_get_libpq_dsn_convert_php():
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
os.environ['NOMINATIM_DATABASE_DSN'] = 'pgsql:dbname=gis;password=foo;host=localhost'
|
||||
|
||||
assert config.get_libpq_dsn() == 'dbname=gis password=foo host=localhost'
|
||||
|
||||
def test_get_libpq_dsn_convert_libpq():
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
os.environ['NOMINATIM_DATABASE_DSN'] = 'host=localhost dbname=gis password=foo'
|
||||
|
||||
assert config.get_libpq_dsn() == 'host=localhost dbname=gis password=foo'
|
||||
|
Loading…
Reference in New Issue
Block a user