From b79c79fa730621140003690966f95de9704d3318 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 17 Jan 2021 17:06:18 +0100 Subject: [PATCH 1/4] add function to get a DSN for psycopg Converts the PHP DSN syntax into psycopg syntax when necessary. --- nominatim/config.py | 12 ++++++++++++ test/python/test_config.py | 19 +++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/nominatim/config.py b/nominatim/config.py index 911c7ddf..458c828f 100644 --- a/nominatim/config.py +++ b/nominatim/config.py @@ -29,6 +29,18 @@ class Configuration: return os.environ.get(name) or self._config[name] + def get_libpq_dsn(self): + """ Get configured database DSN converted into the key/value format + understood by libpq and psycopg. + """ + dsn = self.DATABASE_DSN + + if dsn.startswith('pgsql:'): + # Old PHP DSN format. Convert before returning. + return dsn[6:].replace(';', ' ') + + return dsn + def get_os_env(self): """ Return a copy of the OS environment with the Nominatim configuration merged in. diff --git a/test/python/test_config.py b/test/python/test_config.py index 03e4a800..e5d18f91 100644 --- a/test/python/test_config.py +++ b/test/python/test_config.py @@ -54,3 +54,22 @@ def test_get_os_env_prefer_os_environ(): assert config.get_os_env()['NOMINATIM_DATABASE_WEBUSER'] == 'nobody' del os.environ['NOMINATIM_DATABASE_WEBUSER'] + +def test_get_libpq_dsn_convert_default(): + config = Configuration(None, DEFCFG_DIR) + + assert config.get_libpq_dsn() == 'dbname=nominatim' + +def test_get_libpq_dsn_convert_php(): + config = Configuration(None, DEFCFG_DIR) + + os.environ['NOMINATIM_DATABASE_DSN'] = 'pgsql:dbname=gis;password=foo;host=localhost' + + assert config.get_libpq_dsn() == 'dbname=gis password=foo host=localhost' + +def test_get_libpq_dsn_convert_libpq(): + config = Configuration(None, DEFCFG_DIR) + + os.environ['NOMINATIM_DATABASE_DSN'] = 'host=localhost dbname=gis password=foo' + + assert config.get_libpq_dsn() == 'host=localhost dbname=gis password=foo' From 27977411e958a6bfc9037db728d61d296c5487c0 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 17 Jan 2021 17:19:17 +0100 Subject: [PATCH 2/4] move indexing function into its own Python module This makes it mow a standard function of our new Python library instead of a stand-alone program. --- nominatim/db/__init__.py | 0 .../{indexer/db.py => db/async_connection.py} | 0 nominatim/indexer/indexer.py | 191 +++++++++++++++++ nominatim/nominatim.py | 193 +----------------- 4 files changed, 194 insertions(+), 190 deletions(-) create mode 100644 nominatim/db/__init__.py rename nominatim/{indexer/db.py => db/async_connection.py} (100%) create mode 100644 nominatim/indexer/indexer.py diff --git a/nominatim/db/__init__.py b/nominatim/db/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/nominatim/indexer/db.py b/nominatim/db/async_connection.py similarity index 100% rename from nominatim/indexer/db.py rename to nominatim/db/async_connection.py diff --git a/nominatim/indexer/indexer.py b/nominatim/indexer/indexer.py new file mode 100644 index 00000000..52046456 --- /dev/null +++ b/nominatim/indexer/indexer.py @@ -0,0 +1,191 @@ +""" +Main work horse for indexing (computing addresses) the database. +""" +# pylint: disable=C0111 +import logging +import select + +from .progress import ProgressLogger +from db.async_connection import DBConnection, make_connection + +LOG = logging.getLogger() + +class RankRunner: + """ Returns SQL commands for indexing one rank within the placex table. + """ + + def __init__(self, rank): + self.rank = rank + + def name(self): + return "rank {}".format(self.rank) + + def sql_count_objects(self): + return """SELECT count(*) FROM placex + WHERE rank_address = {} and indexed_status > 0 + """.format(self.rank) + + def sql_get_objects(self): + return """SELECT place_id FROM placex + WHERE indexed_status > 0 and rank_address = {} + ORDER BY geometry_sector""".format(self.rank) + + @staticmethod + def sql_index_place(ids): + return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\ + .format(','.join((str(i) for i in ids))) + + +class InterpolationRunner: + """ Returns SQL commands for indexing the address interpolation table + location_property_osmline. + """ + + @staticmethod + def name(): + return "interpolation lines (location_property_osmline)" + + @staticmethod + def sql_count_objects(): + return """SELECT count(*) FROM location_property_osmline + WHERE indexed_status > 0""" + + @staticmethod + def sql_get_objects(): + return """SELECT place_id FROM location_property_osmline + WHERE indexed_status > 0 + ORDER BY geometry_sector""" + + @staticmethod + def sql_index_place(ids): + return """UPDATE location_property_osmline + SET indexed_status = 0 WHERE place_id IN ({})"""\ + .format(','.join((str(i) for i in ids))) + +class BoundaryRunner: + """ Returns SQL commands for indexing the administrative boundaries + of a certain rank. + """ + + def __init__(self, rank): + self.rank = rank + + def name(self): + return "boundaries rank {}".format(self.rank) + + def sql_count_objects(self): + return """SELECT count(*) FROM placex + WHERE indexed_status > 0 + AND rank_search = {} + AND class = 'boundary' and type = 'administrative'""".format(self.rank) + + def sql_get_objects(self): + return """SELECT place_id FROM placex + WHERE indexed_status > 0 and rank_search = {} + and class = 'boundary' and type = 'administrative' + ORDER BY partition, admin_level""".format(self.rank) + + @staticmethod + def sql_index_place(ids): + return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\ + .format(','.join((str(i) for i in ids))) + +class Indexer: + """ Main indexing routine. + """ + + def __init__(self, opts): + self.minrank = max(1, opts.minrank) + self.maxrank = min(30, opts.maxrank) + self.conn = make_connection(opts) + self.threads = [DBConnection(opts) for _ in range(opts.threads)] + + def index_boundaries(self): + LOG.warning("Starting indexing boundaries using %s threads", + len(self.threads)) + + for rank in range(max(self.minrank, 5), min(self.maxrank, 26)): + self.index(BoundaryRunner(rank)) + + def index_by_rank(self): + """ Run classic indexing by rank. + """ + LOG.warning("Starting indexing rank (%i to %i) using %i threads", + self.minrank, self.maxrank, len(self.threads)) + + for rank in range(max(1, self.minrank), self.maxrank): + self.index(RankRunner(rank)) + + if self.maxrank == 30: + self.index(RankRunner(0)) + self.index(InterpolationRunner(), 20) + self.index(RankRunner(self.maxrank), 20) + else: + self.index(RankRunner(self.maxrank)) + + def index(self, obj, batch=1): + """ Index a single rank or table. `obj` describes the SQL to use + for indexing. `batch` describes the number of objects that + should be processed with a single SQL statement + """ + LOG.warning("Starting %s (using batch size %s)", obj.name(), batch) + + cur = self.conn.cursor() + cur.execute(obj.sql_count_objects()) + + total_tuples = cur.fetchone()[0] + LOG.debug("Total number of rows: %i", total_tuples) + + cur.close() + + progress = ProgressLogger(obj.name(), total_tuples) + + if total_tuples > 0: + cur = self.conn.cursor(name='places') + cur.execute(obj.sql_get_objects()) + + next_thread = self.find_free_thread() + while True: + places = [p[0] for p in cur.fetchmany(batch)] + if not places: + break + + LOG.debug("Processing places: %s", str(places)) + thread = next(next_thread) + + thread.perform(obj.sql_index_place(places)) + progress.add(len(places)) + + cur.close() + + for thread in self.threads: + thread.wait() + + progress.done() + + def find_free_thread(self): + """ Generator that returns the next connection that is free for + sending a query. + """ + ready = self.threads + command_stat = 0 + + while True: + for thread in ready: + if thread.is_done(): + command_stat += 1 + yield thread + + # refresh the connections occasionaly to avoid potential + # memory leaks in Postgresql. + if command_stat > 100000: + for thread in self.threads: + while not thread.is_done(): + thread.wait() + thread.connect() + command_stat = 0 + ready = self.threads + else: + ready, _, _ = select.select(self.threads, [], []) + + assert False, "Unreachable code" diff --git a/nominatim/nominatim.py b/nominatim/nominatim.py index 8cac583e..fdc2bcba 100755 --- a/nominatim/nominatim.py +++ b/nominatim/nominatim.py @@ -21,198 +21,12 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #----------------------------------------------------------------------------- -# pylint: disable=C0111 from argparse import ArgumentParser, RawDescriptionHelpFormatter import logging import sys import getpass -import select - -from indexer.progress import ProgressLogger # pylint: disable=E0401 -from indexer.db import DBConnection, make_connection # pylint: disable=E0401 - -LOG = logging.getLogger() - -class RankRunner: - """ Returns SQL commands for indexing one rank within the placex table. - """ - - def __init__(self, rank): - self.rank = rank - - def name(self): - return "rank {}".format(self.rank) - - def sql_count_objects(self): - return """SELECT count(*) FROM placex - WHERE rank_address = {} and indexed_status > 0 - """.format(self.rank) - - def sql_get_objects(self): - return """SELECT place_id FROM placex - WHERE indexed_status > 0 and rank_address = {} - ORDER BY geometry_sector""".format(self.rank) - - @staticmethod - def sql_index_place(ids): - return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\ - .format(','.join((str(i) for i in ids))) - - -class InterpolationRunner: - """ Returns SQL commands for indexing the address interpolation table - location_property_osmline. - """ - - @staticmethod - def name(): - return "interpolation lines (location_property_osmline)" - - @staticmethod - def sql_count_objects(): - return """SELECT count(*) FROM location_property_osmline - WHERE indexed_status > 0""" - - @staticmethod - def sql_get_objects(): - return """SELECT place_id FROM location_property_osmline - WHERE indexed_status > 0 - ORDER BY geometry_sector""" - - @staticmethod - def sql_index_place(ids): - return """UPDATE location_property_osmline - SET indexed_status = 0 WHERE place_id IN ({})"""\ - .format(','.join((str(i) for i in ids))) - -class BoundaryRunner: - """ Returns SQL commands for indexing the administrative boundaries - of a certain rank. - """ - - def __init__(self, rank): - self.rank = rank - - def name(self): - return "boundaries rank {}".format(self.rank) - - def sql_count_objects(self): - return """SELECT count(*) FROM placex - WHERE indexed_status > 0 - AND rank_search = {} - AND class = 'boundary' and type = 'administrative'""".format(self.rank) - - def sql_get_objects(self): - return """SELECT place_id FROM placex - WHERE indexed_status > 0 and rank_search = {} - and class = 'boundary' and type = 'administrative' - ORDER BY partition, admin_level""".format(self.rank) - - @staticmethod - def sql_index_place(ids): - return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\ - .format(','.join((str(i) for i in ids))) - -class Indexer: - """ Main indexing routine. - """ - - def __init__(self, opts): - self.minrank = max(1, opts.minrank) - self.maxrank = min(30, opts.maxrank) - self.conn = make_connection(opts) - self.threads = [DBConnection(opts) for _ in range(opts.threads)] - - def index_boundaries(self): - LOG.warning("Starting indexing boundaries using %s threads", - len(self.threads)) - - for rank in range(max(self.minrank, 5), min(self.maxrank, 26)): - self.index(BoundaryRunner(rank)) - - def index_by_rank(self): - """ Run classic indexing by rank. - """ - LOG.warning("Starting indexing rank (%i to %i) using %i threads", - self.minrank, self.maxrank, len(self.threads)) - - for rank in range(max(1, self.minrank), self.maxrank): - self.index(RankRunner(rank)) - - if self.maxrank == 30: - self.index(RankRunner(0)) - self.index(InterpolationRunner(), 20) - self.index(RankRunner(self.maxrank), 20) - else: - self.index(RankRunner(self.maxrank)) - - def index(self, obj, batch=1): - """ Index a single rank or table. `obj` describes the SQL to use - for indexing. `batch` describes the number of objects that - should be processed with a single SQL statement - """ - LOG.warning("Starting %s (using batch size %s)", obj.name(), batch) - - cur = self.conn.cursor() - cur.execute(obj.sql_count_objects()) - - total_tuples = cur.fetchone()[0] - LOG.debug("Total number of rows: %i", total_tuples) - - cur.close() - - progress = ProgressLogger(obj.name(), total_tuples) - - if total_tuples > 0: - cur = self.conn.cursor(name='places') - cur.execute(obj.sql_get_objects()) - - next_thread = self.find_free_thread() - while True: - places = [p[0] for p in cur.fetchmany(batch)] - if not places: - break - - LOG.debug("Processing places: %s", str(places)) - thread = next(next_thread) - - thread.perform(obj.sql_index_place(places)) - progress.add(len(places)) - - cur.close() - - for thread in self.threads: - thread.wait() - - progress.done() - - def find_free_thread(self): - """ Generator that returns the next connection that is free for - sending a query. - """ - ready = self.threads - command_stat = 0 - - while True: - for thread in ready: - if thread.is_done(): - command_stat += 1 - yield thread - - # refresh the connections occasionaly to avoid potential - # memory leaks in Postgresql. - if command_stat > 100000: - for thread in self.threads: - while not thread.is_done(): - thread.wait() - thread.connect() - command_stat = 0 - ready = self.threads - else: - ready, _, _ = select.select(self.threads, [], []) - - assert False, "Unreachable code" +from indexer.indexer import Indexer def nominatim_arg_parser(): """ Setup the command-line parser for the tool. @@ -254,11 +68,10 @@ def nominatim_arg_parser(): return parser if __name__ == '__main__': - logging.basicConfig(stream=sys.stderr, format='%(levelname)s: %(message)s') - OPTIONS = nominatim_arg_parser().parse_args(sys.argv[1:]) - LOG.setLevel(max(3 - OPTIONS.loglevel, 0) * 10) + logging.basicConfig(stream=sys.stderr, format='%(levelname)s: %(message)s', + level=max(3 - OPTIONS.loglevel, 0) * 10) OPTIONS.password = None if OPTIONS.password_prompt: From c77877a93401dd2f87e3caefb7aa6f04d05f7c95 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 17 Jan 2021 20:05:41 +0100 Subject: [PATCH 3/4] implementaion of 'nominatim index' --- nominatim/cli.py | 31 +++++++++++++++++++++++++++++-- nominatim/db/async_connection.py | 20 +++++--------------- nominatim/indexer/indexer.py | 29 +++++++++++++++-------------- 3 files changed, 49 insertions(+), 31 deletions(-) diff --git a/nominatim/cli.py b/nominatim/cli.py index 8d4071db..acb6839f 100644 --- a/nominatim/cli.py +++ b/nominatim/cli.py @@ -11,6 +11,17 @@ from pathlib import Path from .config import Configuration from .admin.exec_utils import run_legacy_script +from .indexer.indexer import Indexer + +def _num_system_cpus(): + try: + cpus = len(os.sched_getaffinity(0)) + except NotImplementedError: + cpus = None + + return cpus or os.cpu_count() + + class CommandlineParser: """ Wraps some of the common functions for parsing the command line and setting up subcommands. @@ -297,11 +308,27 @@ class UpdateIndex: @staticmethod def add_args(parser): - pass + group = parser.add_argument_group('Filter arguments') + group.add_argument('--boundaries-only', action='store_true', + help="""Index only administrative boundaries.""") + group.add_argument('--no-boundaries', action='store_true', + help="""Index everything except administrative boundaries.""") + group.add_argument('--minrank', '-r', type=int, metavar='RANK', default=0, + help='Minimum/starting rank') + group.add_argument('--maxrank', '-R', type=int, metavar='RANK', default=30, + help='Maximum/finishing rank') @staticmethod def run(args): - return run_legacy_script('update.php', '--index', nominatim_env=args) + indexer = Indexer(args.config.get_libpq_dsn(), + args.threads or _num_system_cpus() or 1) + + if not args.no_boundaries: + indexer.index_boundaries(args.minrank, args.maxrank) + if not args.boundaries_only: + indexer.index_by_rank(args.minrank, args.maxrank) + + return 0 class UpdateRefresh: diff --git a/nominatim/db/async_connection.py b/nominatim/db/async_connection.py index 85b84431..45e83664 100644 --- a/nominatim/db/async_connection.py +++ b/nominatim/db/async_connection.py @@ -11,26 +11,14 @@ from psycopg2.extras import wait_select LOG = logging.getLogger() -def make_connection(options, asynchronous=False): - """ Create a psycopg2 connection from the given options. - """ - params = {'dbname' : options.dbname, - 'user' : options.user, - 'password' : options.password, - 'host' : options.host, - 'port' : options.port, - 'async' : asynchronous} - - return psycopg2.connect(**params) - class DBConnection: """ A single non-blocking database connection. """ - def __init__(self, options): + def __init__(self, dsn): self.current_query = None self.current_params = None - self.options = options + self.dsn = dsn self.conn = None self.cursor = None @@ -46,7 +34,9 @@ class DBConnection: self.cursor.close() self.conn.close() - self.conn = make_connection(self.options, asynchronous=True) + # Use a dict to hand in the parameters because async is a reserved + # word in Python3. + self.conn = psycopg2.connect(**{'dsn' : self.dsn, 'async' : True}) self.wait() self.cursor = self.conn.cursor() diff --git a/nominatim/indexer/indexer.py b/nominatim/indexer/indexer.py index 52046456..d86303c4 100644 --- a/nominatim/indexer/indexer.py +++ b/nominatim/indexer/indexer.py @@ -5,8 +5,10 @@ Main work horse for indexing (computing addresses) the database. import logging import select +import psycopg2 + from .progress import ProgressLogger -from db.async_connection import DBConnection, make_connection +from ..db.async_connection import DBConnection LOG = logging.getLogger() @@ -94,34 +96,33 @@ class Indexer: """ Main indexing routine. """ - def __init__(self, opts): - self.minrank = max(1, opts.minrank) - self.maxrank = min(30, opts.maxrank) - self.conn = make_connection(opts) - self.threads = [DBConnection(opts) for _ in range(opts.threads)] + def __init__(self, dsn, num_threads): + self.conn = psycopg2.connect(dsn) + self.threads = [DBConnection(dsn) for _ in range(num_threads)] - def index_boundaries(self): + def index_boundaries(self, minrank, maxrank): LOG.warning("Starting indexing boundaries using %s threads", len(self.threads)) - for rank in range(max(self.minrank, 5), min(self.maxrank, 26)): + for rank in range(max(minrank, 5), min(maxrank, 26)): self.index(BoundaryRunner(rank)) - def index_by_rank(self): + def index_by_rank(self, minrank, maxrank): """ Run classic indexing by rank. """ + maxrank = min(maxrank, 30) LOG.warning("Starting indexing rank (%i to %i) using %i threads", - self.minrank, self.maxrank, len(self.threads)) + minrank, maxrank, len(self.threads)) - for rank in range(max(1, self.minrank), self.maxrank): + for rank in range(max(1, minrank), maxrank): self.index(RankRunner(rank)) - if self.maxrank == 30: + if maxrank == 30: self.index(RankRunner(0)) self.index(InterpolationRunner(), 20) - self.index(RankRunner(self.maxrank), 20) + self.index(RankRunner(30), 20) else: - self.index(RankRunner(self.maxrank)) + self.index(RankRunner(maxrank)) def index(self, obj, batch=1): """ Index a single rank or table. `obj` describes the SQL to use From 504922ffbecd42eed01dfb9da6bbf2c7aae9a094 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 17 Jan 2021 21:02:50 +0100 Subject: [PATCH 4/4] remove old nominatim.py in favour of 'nominatim index' The PHP scripts need to know the position of the nominatim tool in order to call it. This is handed in as environment variable, so it can be set by the Python script. --- cmake/script.tmpl | 1 + cmake/tool.tmpl | 3 + lib/Shell.php | 2 +- lib/admin/update.php | 37 ++--------- lib/setup/SetupClass.php | 25 +++----- nominatim/cli.py | 5 +- nominatim/indexer/indexer.py | 7 +++ nominatim/indexer/progress.py | 8 +-- nominatim/nominatim.py | 84 ------------------------- test/bdd/steps/nominatim_environment.py | 1 + 10 files changed, 32 insertions(+), 141 deletions(-) delete mode 100755 nominatim/nominatim.py diff --git a/cmake/script.tmpl b/cmake/script.tmpl index 30b8717b..aa25a124 100755 --- a/cmake/script.tmpl +++ b/cmake/script.tmpl @@ -8,5 +8,6 @@ require('@CMAKE_SOURCE_DIR@/lib/dotenv_loader.php'); @define('CONST_DataDir', '@CMAKE_SOURCE_DIR@'); loadDotEnv(); +$_SERVER['NOMINATIM_NOMINATIM_TOOL'] = '@CMAKE_BINARY_DIR@/nominatim'; require_once('@CMAKE_SOURCE_DIR@/lib/admin/@script_source@'); diff --git a/cmake/tool.tmpl b/cmake/tool.tmpl index 40f2b8ea..43646792 100755 --- a/cmake/tool.tmpl +++ b/cmake/tool.tmpl @@ -1,8 +1,11 @@ #!/usr/bin/env python3 import sys +import os sys.path.insert(1, '@CMAKE_SOURCE_DIR@') +os.environ['NOMINATIM_NOMINATIM_TOOL'] = __file__ + from nominatim import cli exit(cli.nominatim(module_dir='@CMAKE_BINARY_DIR@/module', diff --git a/lib/Shell.php b/lib/Shell.php index 59c4473b..72f90735 100644 --- a/lib/Shell.php +++ b/lib/Shell.php @@ -7,7 +7,7 @@ class Shell public function __construct($sBaseCmd, ...$aParams) { if (!$sBaseCmd) { - throw new Exception('Command missing in new() call'); + throw new \Exception('Command missing in new() call'); } $this->baseCmd = $sBaseCmd; $this->aParams = array(); diff --git a/lib/admin/update.php b/lib/admin/update.php index 50f611d7..fe9658b5 100644 --- a/lib/admin/update.php +++ b/lib/admin/update.php @@ -105,25 +105,14 @@ if ($fPostgresVersion >= 11.0) { } -$oIndexCmd = (new \Nominatim\Shell(CONST_DataDir.'/nominatim/nominatim.py')) - ->addParams('--database', $aDSNInfo['database']) - ->addParams('--port', $aDSNInfo['port']) - ->addParams('--threads', $aResult['index-instances']); -if (!$aResult['quiet']) { - $oIndexCmd->addParams('--verbose'); +$oIndexCmd = (new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'))) + ->addParams('index'); +if ($aResult['quiet']) { + $oIndexCmd->addParams('--quiet'); } if ($aResult['verbose']) { $oIndexCmd->addParams('--verbose'); } -if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) { - $oIndexCmd->addParams('--host', $aDSNInfo['hostspec']); -} -if (isset($aDSNInfo['username']) && $aDSNInfo['username']) { - $oIndexCmd->addParams('--username', $aDSNInfo['username']); -} -if (isset($aDSNInfo['password']) && $aDSNInfo['password']) { - $oIndexCmd->addEnvPair('PGPASSWORD', $aDSNInfo['password']); -} $sPyosmiumBin = getSetting('PYOSMIUM_BINARY'); $sBaseURL = getSetting('REPLICATION_URL'); @@ -288,15 +277,9 @@ if ($aResult['recompute-word-counts']) { } if ($aResult['index']) { - $oCmd = (clone $oIndexCmd) - ->addParams('--minrank', $aResult['index-rank'], '-b'); - $oCmd->run(); - $oCmd = (clone $oIndexCmd) ->addParams('--minrank', $aResult['index-rank']); $oCmd->run(); - - $oDB->exec('update import_status set indexed = true'); } if ($aResult['update-address-levels']) { @@ -438,15 +421,6 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) { if (!$aResult['no-index']) { $fCMDStartTime = time(); - $oThisIndexCmd = clone($oIndexCmd); - $oThisIndexCmd->addParams('-b'); - echo $oThisIndexCmd->escapedCmd()."\n"; - $iErrorLevel = $oThisIndexCmd->run(); - if ($iErrorLevel) { - echo "Error: $iErrorLevel\n"; - exit($iErrorLevel); - } - $oThisIndexCmd = clone($oIndexCmd); echo $oThisIndexCmd->escapedCmd()."\n"; $iErrorLevel = $oThisIndexCmd->run(); @@ -463,9 +437,6 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) { var_Dump($sSQL); $oDB->exec($sSQL); echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n"; - - $sSQL = 'update import_status set indexed = true'; - $oDB->exec($sSQL); } else { if ($aResult['import-osmosis-all']) { echo "Error: --no-index cannot be used with continuous imports (--import-osmosis-all).\n"; diff --git a/lib/setup/SetupClass.php b/lib/setup/SetupClass.php index 77b14a8a..d17fdca7 100755 --- a/lib/setup/SetupClass.php +++ b/lib/setup/SetupClass.php @@ -549,26 +549,15 @@ class SetupFunctions { $this->checkModulePresence(); // raises exception on failure - $oBaseCmd = (new \Nominatim\Shell(CONST_DataDir.'/nominatim/nominatim.py')) - ->addParams('--database', $this->aDSNInfo['database']) - ->addParams('--port', $this->aDSNInfo['port']) - ->addParams('--threads', $this->iInstances); + $oBaseCmd = (new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'))) + ->addParams('index'); - if (!$this->bQuiet) { - $oBaseCmd->addParams('-v'); + if ($this->bQuiet) { + $oBaseCmd->addParams('-q'); } if ($this->bVerbose) { $oBaseCmd->addParams('-v'); } - if (isset($this->aDSNInfo['hostspec'])) { - $oBaseCmd->addParams('--host', $this->aDSNInfo['hostspec']); - } - if (isset($this->aDSNInfo['username'])) { - $oBaseCmd->addParams('--user', $this->aDSNInfo['username']); - } - if (isset($this->aDSNInfo['password'])) { - $oBaseCmd->addEnvPair('PGPASSWORD', $this->aDSNInfo['password']); - } info('Index ranks 0 - 4'); $oCmd = (clone $oBaseCmd)->addParams('--maxrank', 4); @@ -581,14 +570,14 @@ class SetupFunctions if (!$bIndexNoanalyse) $this->pgsqlRunScript('ANALYSE'); info('Index administrative boundaries'); - $oCmd = (clone $oBaseCmd)->addParams('-b'); + $oCmd = (clone $oBaseCmd)->addParams('--boundaries-only'); $iStatus = $oCmd->run(); if ($iStatus != 0) { fail('error status ' . $iStatus . ' running nominatim!'); } info('Index ranks 5 - 25'); - $oCmd = (clone $oBaseCmd)->addParams('--minrank', 5, '--maxrank', 25); + $oCmd = (clone $oBaseCmd)->addParams('--no-boundaries', '--minrank', 5, '--maxrank', 25); $iStatus = $oCmd->run(); if ($iStatus != 0) { fail('error status ' . $iStatus . ' running nominatim!'); @@ -597,7 +586,7 @@ class SetupFunctions if (!$bIndexNoanalyse) $this->pgsqlRunScript('ANALYSE'); info('Index ranks 26 - 30'); - $oCmd = (clone $oBaseCmd)->addParams('--minrank', 26); + $oCmd = (clone $oBaseCmd)->addParams('--no-boundaries', '--minrank', 26); $iStatus = $oCmd->run(); if ($iStatus != 0) { fail('error status ' . $iStatus . ' running nominatim!'); diff --git a/nominatim/cli.py b/nominatim/cli.py index acb6839f..65ea90bb 100644 --- a/nominatim/cli.py +++ b/nominatim/cli.py @@ -78,7 +78,7 @@ class CommandlineParser: args.project_dir = Path(args.project_dir) logging.basicConfig(stream=sys.stderr, - format='%(asctime)s %(levelname)s: %(message)s', + format='%(asctime)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=max(4 - args.verbose, 1) * 10) @@ -328,6 +328,9 @@ class UpdateIndex: if not args.boundaries_only: indexer.index_by_rank(args.minrank, args.maxrank) + if not args.no_boundaries and not args.boundaries_only: + indexer.update_status_table() + return 0 diff --git a/nominatim/indexer/indexer.py b/nominatim/indexer/indexer.py index d86303c4..094d1279 100644 --- a/nominatim/indexer/indexer.py +++ b/nominatim/indexer/indexer.py @@ -124,6 +124,13 @@ class Indexer: else: self.index(RankRunner(maxrank)) + def update_status_table(self): + """ Update the status in the status table to 'indexed'. + """ + with self.conn.cursor() as cur: + cur.execute('UPDATE import_status SET indexed = true') + self.conn.commit() + def index(self, obj, batch=1): """ Index a single rank or table. `obj` describes the SQL to use for indexing. `batch` describes the number of objects that diff --git a/nominatim/indexer/progress.py b/nominatim/indexer/progress.py index 99120673..c9d8816b 100644 --- a/nominatim/indexer/progress.py +++ b/nominatim/indexer/progress.py @@ -26,7 +26,7 @@ class ProgressLogger: self.done_places = 0 self.rank_start_time = datetime.now() self.log_interval = log_interval - self.next_info = INITIAL_PROGRESS if LOG.isEnabledFor(logging.INFO) else total + 1 + self.next_info = INITIAL_PROGRESS if LOG.isEnabledFor(logging.WARNING) else total + 1 def add(self, num=1): """ Mark `num` places as processed. Print a log message if the @@ -47,9 +47,9 @@ class ProgressLogger: places_per_sec = self.done_places / done_time eta = (self.total_places - self.done_places) / places_per_sec - LOG.info("Done %d in %d @ %.3f per second - %s ETA (seconds): %.2f", - self.done_places, int(done_time), - places_per_sec, self.name, eta) + LOG.warning("Done %d in %d @ %.3f per second - %s ETA (seconds): %.2f", + self.done_places, int(done_time), + places_per_sec, self.name, eta) self.next_info += int(places_per_sec) * self.log_interval diff --git a/nominatim/nominatim.py b/nominatim/nominatim.py deleted file mode 100755 index fdc2bcba..00000000 --- a/nominatim/nominatim.py +++ /dev/null @@ -1,84 +0,0 @@ -#! /usr/bin/env python3 -#----------------------------------------------------------------------------- -# nominatim - [description] -#----------------------------------------------------------------------------- -# -# Indexing tool for the Nominatim database. -# -# Based on C version by Brian Quinion -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -#----------------------------------------------------------------------------- -from argparse import ArgumentParser, RawDescriptionHelpFormatter -import logging -import sys -import getpass - -from indexer.indexer import Indexer - -def nominatim_arg_parser(): - """ Setup the command-line parser for the tool. - """ - parser = ArgumentParser(description="Indexing tool for Nominatim.", - formatter_class=RawDescriptionHelpFormatter) - - parser.add_argument('-d', '--database', - dest='dbname', action='store', default='nominatim', - help='Name of the PostgreSQL database to connect to.') - parser.add_argument('-U', '--username', - dest='user', action='store', - help='PostgreSQL user name.') - parser.add_argument('-W', '--password', - dest='password_prompt', action='store_true', - help='Force password prompt.') - parser.add_argument('-H', '--host', - dest='host', action='store', - help='PostgreSQL server hostname or socket location.') - parser.add_argument('-P', '--port', - dest='port', action='store', - help='PostgreSQL server port') - parser.add_argument('-b', '--boundary-only', - dest='boundary_only', action='store_true', - help='Only index administrative boundaries (ignores min/maxrank).') - parser.add_argument('-r', '--minrank', - dest='minrank', type=int, metavar='RANK', default=0, - help='Minimum/starting rank.') - parser.add_argument('-R', '--maxrank', - dest='maxrank', type=int, metavar='RANK', default=30, - help='Maximum/finishing rank.') - parser.add_argument('-t', '--threads', - dest='threads', type=int, metavar='NUM', default=1, - help='Number of threads to create for indexing.') - parser.add_argument('-v', '--verbose', - dest='loglevel', action='count', default=0, - help='Increase verbosity') - - return parser - -if __name__ == '__main__': - OPTIONS = nominatim_arg_parser().parse_args(sys.argv[1:]) - - logging.basicConfig(stream=sys.stderr, format='%(levelname)s: %(message)s', - level=max(3 - OPTIONS.loglevel, 0) * 10) - - OPTIONS.password = None - if OPTIONS.password_prompt: - PASSWORD = getpass.getpass("Database password: ") - OPTIONS.password = PASSWORD - - if OPTIONS.boundary_only: - Indexer(OPTIONS).index_boundaries() - else: - Indexer(OPTIONS).index_by_rank() diff --git a/test/bdd/steps/nominatim_environment.py b/test/bdd/steps/nominatim_environment.py index 68d7b2f4..0ee92137 100644 --- a/test/bdd/steps/nominatim_environment.py +++ b/test/bdd/steps/nominatim_environment.py @@ -91,6 +91,7 @@ class NominatimEnvironment: self.test_env['NOMINATIM_BINDIR'] = self.src_dir / 'utils' self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.build_dir / 'module' self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = self.build_dir / 'osm2pgsql' / 'osm2pgsql' + self.test_env['NOMINATIM_NOMINATIM_TOOL'] = self.build_dir / 'nominatim' if self.server_module_path: self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path