mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-26 13:27:52 +03:00
port replication update function to python
This commit is contained in:
parent
8f0885f6cb
commit
4cb6dc01f3
@ -111,9 +111,6 @@ if ($aResult['verbose']) {
|
||||
$oNominatimCmd->addParams('--verbose');
|
||||
}
|
||||
|
||||
$sPyosmiumBin = getSetting('PYOSMIUM_BINARY');
|
||||
$sBaseURL = getSetting('REPLICATION_URL');
|
||||
|
||||
|
||||
if ($aResult['init-updates']) {
|
||||
$oCmd = (clone($oNominatimCmd))->addParams('replication', '--init');
|
||||
@ -203,7 +200,10 @@ if ($aResult['recompute-word-counts']) {
|
||||
}
|
||||
|
||||
if ($aResult['index']) {
|
||||
(clone $oNominatimCmd)->addParams('index', '--minrank', $aResult['index-rank'])->run();
|
||||
(clone $oNominatimCmd)
|
||||
->addParams('index', '--minrank', $aResult['index-rank'])
|
||||
->addParams('--threads', $aResult['index-instances'])
|
||||
->run();
|
||||
}
|
||||
|
||||
if ($aResult['update-address-levels']) {
|
||||
@ -228,146 +228,17 @@ if ($aResult['recompute-importance']) {
|
||||
}
|
||||
|
||||
if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
|
||||
//
|
||||
if (strpos($sBaseURL, 'download.geofabrik.de') !== false && getSetting('REPLICATION_UPDATE_INTERVAL') < 86400) {
|
||||
fail('Error: Update interval too low for download.geofabrik.de. ' .
|
||||
"Please check install documentation (https://nominatim.org/release-docs/latest/admin/Import-and-Update#setting-up-the-update-process)\n");
|
||||
$oCmd = (clone($oNominatimCmd))
|
||||
->addParams('replication')
|
||||
->addParams('--threads', $aResult['index-instances']);
|
||||
|
||||
if (!$aResult['import-osmosis-all']) {
|
||||
$oCmd->addParams('--once');
|
||||
}
|
||||
|
||||
$sImportFile = CONST_InstallDir.'/osmosischange.osc';
|
||||
|
||||
$oCMDDownload = (new \Nominatim\Shell($sPyosmiumBin))
|
||||
->addParams('--server', $sBaseURL)
|
||||
->addParams('--outfile', $sImportFile)
|
||||
->addParams('--size', getSetting('REPLICATION_MAX_DIFF'));
|
||||
|
||||
$oCMDImport = (clone $oOsm2pgsqlCmd)->addParams($sImportFile);
|
||||
|
||||
while (true) {
|
||||
$fStartTime = time();
|
||||
$aLastState = $oDB->getRow('SELECT *, EXTRACT (EPOCH FROM lastimportdate) as unix_ts FROM import_status');
|
||||
|
||||
if (!$aLastState['sequence_id']) {
|
||||
echo "Updates not set up. Please run ./utils/update.php --init-updates.\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
echo 'Currently at sequence '.$aLastState['sequence_id'].' ('.$aLastState['lastimportdate'].') - '.$aLastState['indexed']." indexed\n";
|
||||
|
||||
$sBatchEnd = $aLastState['lastimportdate'];
|
||||
$iEndSequence = $aLastState['sequence_id'];
|
||||
|
||||
if ($aLastState['indexed']) {
|
||||
// Sleep if the update interval has not yet been reached.
|
||||
$fNextUpdate = $aLastState['unix_ts'] + getSetting('REPLICATION_UPDATE_INTERVAL');
|
||||
if ($fNextUpdate > $fStartTime) {
|
||||
$iSleepTime = $fNextUpdate - $fStartTime;
|
||||
echo "Waiting for next update for $iSleepTime sec.";
|
||||
sleep($iSleepTime);
|
||||
}
|
||||
|
||||
// Download the next batch of changes.
|
||||
do {
|
||||
$fCMDStartTime = time();
|
||||
$iNextSeq = (int) $aLastState['sequence_id'];
|
||||
unset($aOutput);
|
||||
|
||||
$oCMD = (clone $oCMDDownload)->addParams('--start-id', $iNextSeq);
|
||||
echo $oCMD->escapedCmd()."\n";
|
||||
if (file_exists($sImportFile)) {
|
||||
unlink($sImportFile);
|
||||
}
|
||||
exec($oCMD->escapedCmd(), $aOutput, $iResult);
|
||||
|
||||
if ($iResult == 3) {
|
||||
$sSleep = getSetting('REPLICATION_RECHECK_INTERVAL');
|
||||
echo 'No new updates. Sleeping for '.$sSleep." sec.\n";
|
||||
sleep($sSleep);
|
||||
} elseif ($iResult != 0) {
|
||||
echo 'ERROR: updates failed.';
|
||||
exit($iResult);
|
||||
} else {
|
||||
$iEndSequence = (int)$aOutput[0];
|
||||
}
|
||||
} while ($iResult);
|
||||
|
||||
// get the newest object from the diff file
|
||||
$sBatchEnd = 0;
|
||||
$iRet = 0;
|
||||
$oCMD = new \Nominatim\Shell(CONST_BinDir.'/osm_file_date.py', $sImportFile);
|
||||
exec($oCMD->escapedCmd(), $sBatchEnd, $iRet);
|
||||
if ($iRet == 5) {
|
||||
echo "Diff file is empty. skipping import.\n";
|
||||
if (!$aResult['import-osmosis-all']) {
|
||||
exit(0);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if ($iRet != 0) {
|
||||
fail('Error getting date from diff file.');
|
||||
}
|
||||
$sBatchEnd = $sBatchEnd[0];
|
||||
|
||||
// Import the file
|
||||
$fCMDStartTime = time();
|
||||
|
||||
|
||||
echo $oCMDImport->escapedCmd()."\n";
|
||||
unset($sJunk);
|
||||
$iErrorLevel = $oCMDImport->run();
|
||||
if ($iErrorLevel) {
|
||||
echo "Error executing osm2pgsql: $iErrorLevel\n";
|
||||
exit($iErrorLevel);
|
||||
}
|
||||
|
||||
// write the update logs
|
||||
$iFileSize = filesize($sImportFile);
|
||||
$sSQL = 'INSERT INTO import_osmosis_log';
|
||||
$sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)';
|
||||
$sSQL .= " values ('$sBatchEnd',$iEndSequence,$iFileSize,'";
|
||||
$sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
|
||||
$sSQL .= date('Y-m-d H:i:s')."','import')";
|
||||
var_Dump($sSQL);
|
||||
$oDB->exec($sSQL);
|
||||
|
||||
// update the status
|
||||
$sSQL = "UPDATE import_status SET lastimportdate = '$sBatchEnd', indexed=false, sequence_id = $iEndSequence";
|
||||
var_Dump($sSQL);
|
||||
$oDB->exec($sSQL);
|
||||
echo date('Y-m-d H:i:s')." Completed download step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
|
||||
}
|
||||
|
||||
// Index file
|
||||
if (!$aResult['no-index']) {
|
||||
$fCMDStartTime = time();
|
||||
|
||||
$oThisIndexCmd = clone($oNominatimCmd);
|
||||
$oThisIndexCmd->addParams('index');
|
||||
echo $oThisIndexCmd->escapedCmd()."\n";
|
||||
$iErrorLevel = $oThisIndexCmd->run();
|
||||
if ($iErrorLevel) {
|
||||
echo "Error: $iErrorLevel\n";
|
||||
exit($iErrorLevel);
|
||||
}
|
||||
|
||||
$sSQL = 'INSERT INTO import_osmosis_log';
|
||||
$sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)';
|
||||
$sSQL .= " values ('$sBatchEnd',$iEndSequence,NULL,'";
|
||||
$sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
|
||||
$sSQL .= date('Y-m-d H:i:s')."','index')";
|
||||
var_Dump($sSQL);
|
||||
$oDB->exec($sSQL);
|
||||
echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
|
||||
} else {
|
||||
if ($aResult['import-osmosis-all']) {
|
||||
echo "Error: --no-index cannot be used with continuous imports (--import-osmosis-all).\n";
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
$fDuration = time() - $fStartTime;
|
||||
echo date('Y-m-d H:i:s')." Completed all for $sBatchEnd in ".round($fDuration/60, 2)." minutes\n";
|
||||
if (!$aResult['import-osmosis-all']) exit(0);
|
||||
if ($aResult['no-index']) {
|
||||
$oCmd->addParams('--no-index');
|
||||
}
|
||||
|
||||
exit($oCmd->run());
|
||||
}
|
||||
|
145
nominatim/cli.py
145
nominatim/cli.py
@ -2,8 +2,10 @@
|
||||
Command-line interface to the Nominatim functions for import, update,
|
||||
database administration and querying.
|
||||
"""
|
||||
import sys
|
||||
import datetime as dt
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
@ -11,6 +13,7 @@ from pathlib import Path
|
||||
from .config import Configuration
|
||||
from .tools.exec_utils import run_legacy_script, run_api_script
|
||||
from .db.connection import connect
|
||||
from .db import status
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
@ -88,6 +91,17 @@ class CommandlineParser:
|
||||
|
||||
return args.command.run(args)
|
||||
|
||||
|
||||
def _osm2pgsql_options_from_args(args, default_cache, default_threads):
|
||||
""" Set up the stanadrd osm2pgsql from the command line arguments.
|
||||
"""
|
||||
return dict(osm2pgsql=args.osm2pgsql_path,
|
||||
osm2pgsql_cache=args.osm2pgsql_cache or default_cache,
|
||||
osm2pgsql_style=args.config.get_import_style_file(),
|
||||
threads=args.threads or default_threads,
|
||||
dsn=args.config.get_libpq_dsn(),
|
||||
flatnode_file=args.config.FLATNODE_FILE)
|
||||
|
||||
##### Subcommand classes
|
||||
#
|
||||
# Each class needs to implement two functions: add_args() adds the CLI parameters
|
||||
@ -231,6 +245,88 @@ class UpdateReplication:
|
||||
group.add_argument('--no-index', action='store_false', dest='do_index',
|
||||
help="""Do not index the new data. Only applicable
|
||||
together with --once""")
|
||||
group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
|
||||
help='Size of cache to be used by osm2pgsql (in MB)')
|
||||
|
||||
@staticmethod
|
||||
def _init_replication(args):
|
||||
from .tools import replication, refresh
|
||||
|
||||
LOG.warning("Initialising replication updates")
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
replication.init_replication(conn, base_url=args.config.REPLICATION_URL)
|
||||
if args.update_functions:
|
||||
LOG.warning("Create functions")
|
||||
refresh.create_functions(conn, args.config, args.data_dir,
|
||||
True, False)
|
||||
conn.close()
|
||||
return 0
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _check_for_updates(args):
|
||||
from .tools import replication
|
||||
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
ret = replication.check_for_updates(conn, base_url=args.config.REPLICATION_URL)
|
||||
conn.close()
|
||||
return ret
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _update(args):
|
||||
from .tools import replication
|
||||
from .indexer.indexer import Indexer
|
||||
|
||||
params = _osm2pgsql_options_from_args(args, 2000, 1)
|
||||
params.update(base_url=args.config.REPLICATION_URL,
|
||||
update_interval=args.config.get_int('REPLICATION_UPDATE_INTERVAL'),
|
||||
import_file=args.project_dir / 'osmosischange.osc',
|
||||
max_diff_size=args.config.get_int('REPLICATION_MAX_DIFF'),
|
||||
indexed_only=not args.once)
|
||||
|
||||
# Sanity check to not overwhelm the Geofabrik servers.
|
||||
if 'download.geofabrik.de'in params['base_url']\
|
||||
and params['update_interval'] < 86400:
|
||||
LOG.fatal("Update interval too low for download.geofabrik.de.\n"
|
||||
"Please check install documentation "
|
||||
"(https://nominatim.org/release-docs/latest/admin/Import-and-Update#"
|
||||
"setting-up-the-update-process).")
|
||||
raise RuntimeError("Invalid replication update interval setting.")
|
||||
|
||||
if not args.once:
|
||||
if not args.do_index:
|
||||
LOG.fatal("Indexing cannot be disabled when running updates continuously.")
|
||||
raise RuntimeError("Bad arguments.")
|
||||
recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
|
||||
|
||||
while True:
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
start = dt.datetime.now(dt.timezone.utc)
|
||||
state = replication.update(conn, params)
|
||||
status.log_status(conn, start, 'import')
|
||||
conn.close()
|
||||
|
||||
if state is not replication.UpdateState.NO_CHANGES and args.do_index:
|
||||
start = dt.datetime.now(dt.timezone.utc)
|
||||
indexer = Indexer(args.config.get_libpq_dsn(),
|
||||
args.threads or 1)
|
||||
indexer.index_boundaries(0, 30)
|
||||
indexer.index_by_rank(0, 30)
|
||||
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
status.set_indexed(conn, True)
|
||||
status.log_status(conn, start, 'index')
|
||||
conn.close()
|
||||
|
||||
if args.once:
|
||||
break
|
||||
|
||||
if state is replication.UpdateState.NO_CHANGES:
|
||||
LOG.warning("No new changes. Sleeping for %d sec.", recheck_interval)
|
||||
time.sleep(recheck_interval)
|
||||
|
||||
return state.value
|
||||
|
||||
@staticmethod
|
||||
def run(args):
|
||||
@ -241,35 +337,13 @@ class UpdateReplication:
|
||||
"To install pyosmium via pip: pip3 install osmium")
|
||||
return 1
|
||||
|
||||
from .tools import replication, refresh
|
||||
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
|
||||
params = ['update.php']
|
||||
if args.init:
|
||||
LOG.warning("Initialising replication updates")
|
||||
replication.init_replication(conn, args.config.REPLICATION_URL)
|
||||
if args.update_functions:
|
||||
LOG.warning("Create functions")
|
||||
refresh.create_functions(conn, args.config, args.data_dir,
|
||||
True, False)
|
||||
conn.close()
|
||||
return 0
|
||||
return UpdateReplication._init_replication(args)
|
||||
|
||||
if args.check_for_updates:
|
||||
ret = replication.check_for_updates(conn, args.config.REPLICATION_URL)
|
||||
conn.close()
|
||||
return ret
|
||||
|
||||
if args.once:
|
||||
params.append('--import-osmosis')
|
||||
else:
|
||||
params.append('--import-osmosis-all')
|
||||
if not args.do_index:
|
||||
params.append('--no-index')
|
||||
|
||||
return run_legacy_script(*params, nominatim_env=args)
|
||||
return UpdateReplication._check_for_updates(args)
|
||||
|
||||
return UpdateReplication._update(args)
|
||||
|
||||
class UpdateAddData:
|
||||
"""\
|
||||
@ -350,8 +424,11 @@ class UpdateIndex:
|
||||
if not args.boundaries_only:
|
||||
indexer.index_by_rank(args.minrank, args.maxrank)
|
||||
|
||||
if not args.no_boundaries and not args.boundaries_only:
|
||||
indexer.update_status_table()
|
||||
if not args.no_boundaries and not args.boundaries_only \
|
||||
and args.minrank == 0 and args.maxrank == 30:
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
status.set_indexed(conn, True)
|
||||
conn.close()
|
||||
|
||||
return 0
|
||||
|
||||
@ -390,25 +467,31 @@ class UpdateRefresh:
|
||||
def run(args):
|
||||
from .tools import refresh
|
||||
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
|
||||
if args.postcodes:
|
||||
LOG.warning("Update postcodes centroid")
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
refresh.update_postcodes(conn, args.data_dir)
|
||||
conn.close()
|
||||
|
||||
if args.word_counts:
|
||||
LOG.warning('Recompute frequency of full-word search terms')
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
refresh.recompute_word_counts(conn, args.data_dir)
|
||||
conn.close()
|
||||
|
||||
if args.address_levels:
|
||||
cfg = Path(args.config.ADDRESS_LEVEL_CONFIG)
|
||||
LOG.warning('Updating address levels from %s', cfg)
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
refresh.load_address_levels_from_file(conn, cfg)
|
||||
conn.close()
|
||||
|
||||
if args.functions:
|
||||
LOG.warning('Create functions')
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
refresh.create_functions(conn, args.config, args.data_dir,
|
||||
args.diffs, args.enable_debug_statements)
|
||||
conn.close()
|
||||
|
||||
if args.wiki_data:
|
||||
run_legacy_script('setup.php', '--import-wikipedia-articles',
|
||||
@ -421,8 +504,6 @@ class UpdateRefresh:
|
||||
run_legacy_script('setup.php', '--setup-website',
|
||||
nominatim_env=args, throw_on_fail=True)
|
||||
|
||||
conn.close()
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
|
@ -1,10 +1,14 @@
|
||||
"""
|
||||
Nominatim configuration accessor.
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import dotenv_values
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
class Configuration:
|
||||
""" Load and manage the project configuration.
|
||||
|
||||
@ -21,6 +25,7 @@ class Configuration:
|
||||
|
||||
def __init__(self, project_dir, config_dir):
|
||||
self.project_dir = project_dir
|
||||
self.config_dir = config_dir
|
||||
self._config = dotenv_values(str((config_dir / 'env.defaults').resolve()))
|
||||
if project_dir is not None:
|
||||
self._config.update(dotenv_values(str((project_dir / '.env').resolve())))
|
||||
@ -38,24 +43,56 @@ class Configuration:
|
||||
return os.environ.get(name) or self._config[name]
|
||||
|
||||
def get_bool(self, name):
|
||||
""" Return the given configuration parameters as a boolean.
|
||||
""" Return the given configuration parameter as a boolean.
|
||||
Values of '1', 'yes' and 'true' are accepted as truthy values,
|
||||
everything else is interpreted as false.
|
||||
"""
|
||||
return self.__getattr__(name).lower() in ('1', 'yes', 'true')
|
||||
|
||||
|
||||
def get_int(self, name):
|
||||
""" Return the given configuration parameter as an int.
|
||||
"""
|
||||
try:
|
||||
return int(self.__getattr__(name))
|
||||
except ValueError:
|
||||
LOG.fatal("Invalid setting NOMINATIM_%s. Needs to be a number.", name)
|
||||
raise
|
||||
|
||||
|
||||
def get_libpq_dsn(self):
|
||||
""" Get configured database DSN converted into the key/value format
|
||||
understood by libpq and psycopg.
|
||||
"""
|
||||
dsn = self.DATABASE_DSN
|
||||
|
||||
def quote_param(param):
|
||||
key, val = param.split('=')
|
||||
val = val.replace('\\', '\\\\').replace("'", "\\'")
|
||||
if ' ' in val:
|
||||
val = "'" + val + "'"
|
||||
return key + '=' + val
|
||||
|
||||
if dsn.startswith('pgsql:'):
|
||||
# Old PHP DSN format. Convert before returning.
|
||||
return dsn[6:].replace(';', ' ')
|
||||
return ' '.join([quote_param(p) for p in dsn[6:].split(';')])
|
||||
|
||||
return dsn
|
||||
|
||||
|
||||
def get_import_style_file(self):
|
||||
""" Return the import style file as a path object. Translates the
|
||||
name of the standard styles automatically into a file in the
|
||||
config style.
|
||||
"""
|
||||
style = self.__getattr__('IMPORT_STYLE')
|
||||
|
||||
if style in ('admin', 'street', 'address', 'full', 'extratags'):
|
||||
return self.config_dir / 'import-{}.style'.format(style)
|
||||
|
||||
return Path(style)
|
||||
|
||||
|
||||
def get_os_env(self):
|
||||
""" Return a copy of the OS environment with the Nominatim configuration
|
||||
merged in.
|
||||
|
@ -1,5 +1,5 @@
|
||||
"""
|
||||
Access and helper functions for the status table.
|
||||
Access and helper functions for the status and status log table.
|
||||
"""
|
||||
import datetime as dt
|
||||
import logging
|
||||
@ -61,3 +61,21 @@ def get_status(conn):
|
||||
|
||||
row = cur.fetchone()
|
||||
return row['lastimportdate'], row['sequence_id'], row['indexed']
|
||||
|
||||
|
||||
def set_indexed(conn, state):
|
||||
""" Set the indexed flag in the status table to the given state.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("UPDATE import_status SET indexed = %s", (state, ))
|
||||
conn.commit()
|
||||
|
||||
|
||||
def log_status(conn, start, event, batchsize=None):
|
||||
""" Write a new status line to the `import_osmosis_log` table.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""INSERT INTO import_osmosis_log
|
||||
(batchend, batchseq, batchsize, starttime, endtime, event)
|
||||
SELECT lastimportdate, sequence_id, %s, %s, now(), %s FROM import_status""",
|
||||
(batchsize, start, event))
|
||||
|
@ -2,10 +2,13 @@
|
||||
Helper functions for executing external programs.
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import urllib.request as urlrequest
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from psycopg2.extensions import parse_dsn
|
||||
|
||||
from ..version import NOMINATIM_VERSION
|
||||
|
||||
LOG = logging.getLogger()
|
||||
@ -87,6 +90,41 @@ def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None,
|
||||
return 0
|
||||
|
||||
|
||||
def run_osm2pgsql(options):
|
||||
""" Run osm2pgsql with the given options.
|
||||
"""
|
||||
env = os.environ
|
||||
cmd = [options['osm2pgsql'],
|
||||
'--hstore', '--latlon', '--slim',
|
||||
'--with-forward-dependencies', 'false',
|
||||
'--log-progress', 'true',
|
||||
'--number-processes', str(options['threads']),
|
||||
'--cache', str(options['osm2pgsql_cache']),
|
||||
'--output', 'gazetteer',
|
||||
'--style', str(options['osm2pgsql_style'])
|
||||
]
|
||||
if options['append']:
|
||||
cmd.append('--append')
|
||||
|
||||
if options['flatnode_file']:
|
||||
cmd.extend(('--flat-nodes', options['flatnode_file']))
|
||||
|
||||
dsn = parse_dsn(options['dsn'])
|
||||
if 'password' in dsn:
|
||||
env['PGPASSWORD'] = dsn['password']
|
||||
if 'dbname' in dsn:
|
||||
cmd.extend(('-d', dsn['dbname']))
|
||||
if 'user' in dsn:
|
||||
cmd.extend(('--username', dsn['user']))
|
||||
for param in ('host', 'port'):
|
||||
if param in dsn:
|
||||
cmd.extend(('--' + param, dsn[param]))
|
||||
|
||||
cmd.append(str(options['import_file']))
|
||||
|
||||
subprocess.run(cmd, cwd=options.get('cwd', '.'), env=env, check=True)
|
||||
|
||||
|
||||
def get_url(url):
|
||||
""" Get the contents from the given URL and return it as a UTF-8 string.
|
||||
"""
|
||||
|
@ -1,12 +1,16 @@
|
||||
"""
|
||||
Functions for updating a database from a replication source.
|
||||
"""
|
||||
import datetime
|
||||
import datetime as dt
|
||||
from enum import Enum
|
||||
import logging
|
||||
import time
|
||||
|
||||
from osmium.replication.server import ReplicationServer
|
||||
from osmium import WriteHandler
|
||||
|
||||
from ..db import status
|
||||
from .exec_utils import run_osm2pgsql
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
@ -17,7 +21,7 @@ def init_replication(conn, base_url):
|
||||
date = status.compute_database_date(conn)
|
||||
|
||||
# margin of error to make sure we get all data
|
||||
date -= datetime.timedelta(hours=3)
|
||||
date -= dt.timedelta(hours=3)
|
||||
|
||||
repl = ReplicationServer(base_url)
|
||||
|
||||
@ -53,7 +57,62 @@ def check_for_updates(conn, base_url):
|
||||
|
||||
if state.sequence <= seq:
|
||||
LOG.warning("Database is up to date.")
|
||||
return 1
|
||||
return 2
|
||||
|
||||
LOG.warning("New data available (%i => %i).", seq, state.sequence)
|
||||
return 0
|
||||
|
||||
class UpdateState(Enum):
|
||||
""" Possible states after an update has run.
|
||||
"""
|
||||
|
||||
UP_TO_DATE = 0
|
||||
MORE_PENDING = 2
|
||||
NO_CHANGES = 3
|
||||
|
||||
|
||||
def update(conn, options):
|
||||
""" Update database from the next batch of data. Returns the state of
|
||||
updates according to `UpdateState`.
|
||||
"""
|
||||
startdate, startseq, indexed = status.get_status(conn)
|
||||
|
||||
if startseq is None:
|
||||
LOG.error("Replication not set up. "
|
||||
"Please run 'nominatim replication --init' first.")
|
||||
raise RuntimeError("Replication not set up.")
|
||||
|
||||
if not indexed and options['indexed_only']:
|
||||
LOG.info("Skipping update. There is data that needs indexing.")
|
||||
return UpdateState.MORE_PENDING
|
||||
|
||||
last_since_update = dt.datetime.now(dt.timezone.utc) - startdate
|
||||
update_interval = dt.timedelta(seconds=options['update_interval'])
|
||||
if last_since_update < update_interval:
|
||||
duration = (update_interval - last_since_update).seconds
|
||||
LOG.warning("Sleeping for %s sec before next update.", duration)
|
||||
time.sleep(duration)
|
||||
|
||||
if options['import_file'].exists():
|
||||
options['import_file'].unlink()
|
||||
|
||||
# Read updates into file.
|
||||
repl = ReplicationServer(options['base_url'])
|
||||
|
||||
outhandler = WriteHandler(str(options['import_file']))
|
||||
endseq = repl.apply_diffs(outhandler, startseq,
|
||||
max_size=options['max_diff_size'] * 1024)
|
||||
outhandler.close()
|
||||
|
||||
if endseq is None:
|
||||
return UpdateState.NO_CHANGES
|
||||
|
||||
# Consume updates with osm2pgsql.
|
||||
options['append'] = True
|
||||
run_osm2pgsql(options)
|
||||
|
||||
# Write the current status to the file
|
||||
endstate = repl.get_state_info(endseq)
|
||||
status.set_status(conn, endstate.timestamp, seq=endseq, indexed=False)
|
||||
|
||||
return UpdateState.UP_TO_DATE
|
||||
|
@ -101,7 +101,8 @@ def def_config():
|
||||
|
||||
@pytest.fixture
|
||||
def status_table(temp_db_conn):
|
||||
""" Create an empty version of the status table.
|
||||
""" Create an empty version of the status table and
|
||||
the status logging table.
|
||||
"""
|
||||
with temp_db_conn.cursor() as cur:
|
||||
cur.execute("""CREATE TABLE import_status (
|
||||
@ -109,6 +110,14 @@ def status_table(temp_db_conn):
|
||||
sequence_id integer,
|
||||
indexed boolean
|
||||
)""")
|
||||
cur.execute("""CREATE TABLE import_osmosis_log (
|
||||
batchend timestamp,
|
||||
batchseq integer,
|
||||
batchsize bigint,
|
||||
starttime timestamp,
|
||||
endtime timestamp,
|
||||
event text
|
||||
)""")
|
||||
temp_db_conn.commit()
|
||||
|
||||
|
||||
|
@ -1,8 +1,13 @@
|
||||
"""
|
||||
Tests for command line interface wrapper.
|
||||
|
||||
These tests just check that the various command line parameters route to the
|
||||
correct functionionality. They use a lot of monkeypatching to avoid executing
|
||||
the actual functions.
|
||||
"""
|
||||
import psycopg2
|
||||
import pytest
|
||||
import time
|
||||
|
||||
import nominatim.cli
|
||||
import nominatim.indexer.indexer
|
||||
@ -21,9 +26,9 @@ class MockParamCapture:
|
||||
""" Mock that records the parameters with which a function was called
|
||||
as well as the number of calls.
|
||||
"""
|
||||
def __init__(self):
|
||||
def __init__(self, retval=0):
|
||||
self.called = 0
|
||||
self.return_value = 0
|
||||
self.return_value = retval
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
self.called += 1
|
||||
@ -142,6 +147,69 @@ def test_replication_command(monkeypatch, temp_db, params, func):
|
||||
assert func_mock.called == 1
|
||||
|
||||
|
||||
def test_replication_update_bad_interval(monkeypatch, temp_db):
|
||||
monkeypatch.setenv('NOMINATIM_REPLICATION_UPDATE_INTERVAL', 'xx')
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
call_nominatim('replication')
|
||||
|
||||
|
||||
def test_replication_update_bad_interval_for_geofabrik(monkeypatch, temp_db):
|
||||
monkeypatch.setenv('NOMINATIM_REPLICATION_URL',
|
||||
'https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates')
|
||||
|
||||
with pytest.raises(RuntimeError, match='Invalid replication.*'):
|
||||
call_nominatim('replication')
|
||||
|
||||
|
||||
@pytest.mark.parametrize("state, retval", [
|
||||
(nominatim.tools.replication.UpdateState.UP_TO_DATE, 0),
|
||||
(nominatim.tools.replication.UpdateState.NO_CHANGES, 3)
|
||||
])
|
||||
def test_replication_update_once_no_index(monkeypatch, temp_db, status_table, state, retval):
|
||||
func_mock = MockParamCapture(retval=state)
|
||||
monkeypatch.setattr(nominatim.tools.replication, 'update', func_mock)
|
||||
|
||||
assert retval == call_nominatim('replication', '--once', '--no-index')
|
||||
|
||||
|
||||
def test_replication_update_continuous(monkeypatch, status_table):
|
||||
states = [nominatim.tools.replication.UpdateState.UP_TO_DATE,
|
||||
nominatim.tools.replication.UpdateState.UP_TO_DATE]
|
||||
monkeypatch.setattr(nominatim.tools.replication, 'update',
|
||||
lambda *args, **kwargs: states.pop())
|
||||
|
||||
index_mock = MockParamCapture()
|
||||
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', index_mock)
|
||||
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', index_mock)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
call_nominatim('replication')
|
||||
|
||||
assert index_mock.called == 4
|
||||
|
||||
|
||||
def test_replication_update_continuous_no_change(monkeypatch, status_table):
|
||||
states = [nominatim.tools.replication.UpdateState.NO_CHANGES,
|
||||
nominatim.tools.replication.UpdateState.UP_TO_DATE]
|
||||
monkeypatch.setattr(nominatim.tools.replication, 'update',
|
||||
lambda *args, **kwargs: states.pop())
|
||||
|
||||
index_mock = MockParamCapture()
|
||||
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', index_mock)
|
||||
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', index_mock)
|
||||
|
||||
sleep_mock = MockParamCapture()
|
||||
monkeypatch.setattr(time, 'sleep', sleep_mock)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
call_nominatim('replication')
|
||||
|
||||
assert index_mock.called == 2
|
||||
assert sleep_mock.called == 1
|
||||
assert sleep_mock.last_args[0] == 60
|
||||
|
||||
|
||||
@pytest.mark.parametrize("params", [
|
||||
('search', '--query', 'new'),
|
||||
('reverse', '--lat', '0', '--lon', '0'),
|
||||
|
@ -69,6 +69,18 @@ def test_get_libpq_dsn_convert_php(monkeypatch):
|
||||
assert config.get_libpq_dsn() == 'dbname=gis password=foo host=localhost'
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val,expect", [('foo bar', "'foo bar'"),
|
||||
("xy'z", "xy\\'z"),
|
||||
])
|
||||
def test_get_libpq_dsn_convert_php_special_chars(monkeypatch, val, expect):
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
monkeypatch.setenv('NOMINATIM_DATABASE_DSN',
|
||||
'pgsql:dbname=gis;password={}'.format(val))
|
||||
|
||||
assert config.get_libpq_dsn() == "dbname=gis password={}".format(expect)
|
||||
|
||||
|
||||
def test_get_libpq_dsn_convert_libpq(monkeypatch):
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
@ -93,3 +105,51 @@ def test_get_bool_empty():
|
||||
|
||||
assert config.DATABASE_MODULE_PATH == ''
|
||||
assert config.get_bool('DATABASE_MODULE_PATH') == False
|
||||
|
||||
|
||||
@pytest.mark.parametrize("value,result", [('0', 0), ('1', 1),
|
||||
('85762513444', 85762513444)])
|
||||
def test_get_int_success(monkeypatch, value, result):
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
monkeypatch.setenv('NOMINATIM_FOOBAR', value)
|
||||
|
||||
assert config.get_int('FOOBAR') == result
|
||||
|
||||
|
||||
@pytest.mark.parametrize("value", ['1b', 'fg', '0x23'])
|
||||
def test_get_int_bad_values(monkeypatch, value):
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
monkeypatch.setenv('NOMINATIM_FOOBAR', value)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
config.get_int('FOOBAR')
|
||||
|
||||
|
||||
def test_get_int_empty():
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
assert config.DATABASE_MODULE_PATH == ''
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
config.get_int('DATABASE_MODULE_PATH')
|
||||
|
||||
|
||||
def test_get_import_style_intern(monkeypatch):
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', 'street')
|
||||
|
||||
expected = DEFCFG_DIR / 'import-street.style'
|
||||
|
||||
assert config.get_import_style_file() == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("value", ['custom', '/foo/bar.stye'])
|
||||
def test_get_import_style_intern(monkeypatch, value):
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', value)
|
||||
|
||||
assert str(config.get_import_style_file()) == value
|
||||
|
@ -84,3 +84,30 @@ def test_get_status_success(status_table, temp_db_conn):
|
||||
|
||||
assert nominatim.db.status.get_status(temp_db_conn) == \
|
||||
(date, 667, False)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("old_state", [True, False])
|
||||
@pytest.mark.parametrize("new_state", [True, False])
|
||||
def test_set_indexed(status_table, temp_db_conn, temp_db_cursor, old_state, new_state):
|
||||
date = dt.datetime.fromordinal(1000000).replace(tzinfo=dt.timezone.utc)
|
||||
nominatim.db.status.set_status(temp_db_conn, date=date, indexed=old_state)
|
||||
nominatim.db.status.set_indexed(temp_db_conn, new_state)
|
||||
|
||||
assert temp_db_cursor.scalar("SELECT indexed FROM import_status") == new_state
|
||||
|
||||
|
||||
def test_set_indexed_empty_status(status_table, temp_db_conn, temp_db_cursor):
|
||||
nominatim.db.status.set_indexed(temp_db_conn, True)
|
||||
|
||||
assert temp_db_cursor.scalar("SELECT count(*) FROM import_status") == 0
|
||||
|
||||
|
||||
def text_log_status(status_table, temp_db_conn):
|
||||
date = dt.datetime.fromordinal(1000000).replace(tzinfo=dt.timezone.utc)
|
||||
start = dt.datetime.now() - dt.timedelta(hours=1)
|
||||
nominatim.db.status.set_status(temp_db_conn, date=date, seq=56)
|
||||
nominatim.db.status.log_status(temp_db_conn, start, 'index')
|
||||
|
||||
assert temp_db_cursor.scalar("SELECT count(*) FROM import_osmosis_log") == 1
|
||||
assert temp_db_cursor.scalar("SELECT seq FROM import_osmosis_log") == 56
|
||||
assert temp_db_cursor.scalar("SELECT date FROM import_osmosis_log") == date
|
||||
|
@ -99,3 +99,12 @@ def test_run_api_with_extra_env(tmp_project_dir):
|
||||
extra_env = dict(SCRIPT_FILENAME=str(tmp_project_dir / 'website' / 'test.php'))
|
||||
assert 0 == exec_utils.run_api_script('badname', tmp_project_dir,
|
||||
extra_env=extra_env)
|
||||
|
||||
|
||||
### run_osm2pgsql
|
||||
|
||||
def test_run_osm2pgsql():
|
||||
exec_utils.run_osm2pgsql(dict(osm2pgsql='echo', append=False, flatnode_file=None,
|
||||
dsn='dbname=foobar', threads=1, osm2pgsql_cache=500,
|
||||
osm2pgsql_style='./my.style',
|
||||
import_file='foo.bar'))
|
||||
|
@ -2,6 +2,7 @@
|
||||
Tests for replication functionality.
|
||||
"""
|
||||
import datetime as dt
|
||||
import time
|
||||
|
||||
import pytest
|
||||
from osmium.replication.server import OsmosisState
|
||||
@ -16,6 +17,7 @@ OSM_NODE_DATA = """\
|
||||
</osm>
|
||||
"""
|
||||
|
||||
### init replication
|
||||
|
||||
def test_init_replication_bad_base_url(monkeypatch, status_table, place_row, temp_db_conn, temp_db_cursor):
|
||||
place_row(osm_type='N', osm_id=100)
|
||||
@ -43,20 +45,20 @@ def test_init_replication_success(monkeypatch, status_table, place_row, temp_db_
|
||||
assert temp_db_cursor.fetchone() == [expected_date, 234, True]
|
||||
|
||||
|
||||
### checking for updates
|
||||
|
||||
def test_check_for_updates_empty_status_table(status_table, temp_db_conn):
|
||||
assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == 254
|
||||
|
||||
|
||||
def test_check_for_updates_seq_not_set(status_table, temp_db_conn):
|
||||
status.set_status(temp_db_conn, dt.datetime.now().replace(tzinfo=dt.timezone.utc))
|
||||
status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc))
|
||||
|
||||
assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == 254
|
||||
|
||||
|
||||
def test_check_for_updates_no_state(monkeypatch, status_table, temp_db_conn):
|
||||
status.set_status(temp_db_conn,
|
||||
dt.datetime.now().replace(tzinfo=dt.timezone.utc),
|
||||
seq=345)
|
||||
status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc), seq=345)
|
||||
|
||||
monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
|
||||
"get_state_info", lambda self: None)
|
||||
@ -64,10 +66,10 @@ def test_check_for_updates_no_state(monkeypatch, status_table, temp_db_conn):
|
||||
assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == 253
|
||||
|
||||
|
||||
@pytest.mark.parametrize("server_sequence,result", [(344, 1), (345, 1), (346, 0)])
|
||||
@pytest.mark.parametrize("server_sequence,result", [(344, 2), (345, 2), (346, 0)])
|
||||
def test_check_for_updates_no_new_data(monkeypatch, status_table, temp_db_conn,
|
||||
server_sequence, result):
|
||||
date = dt.datetime.now().replace(tzinfo=dt.timezone.utc)
|
||||
date = dt.datetime.now(dt.timezone.utc)
|
||||
status.set_status(temp_db_conn, date, seq=345)
|
||||
|
||||
monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
|
||||
@ -75,3 +77,61 @@ def test_check_for_updates_no_new_data(monkeypatch, status_table, temp_db_conn,
|
||||
lambda self: OsmosisState(server_sequence, date))
|
||||
|
||||
assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == result
|
||||
|
||||
|
||||
### updating
|
||||
|
||||
@pytest.fixture
|
||||
def update_options(tmpdir):
|
||||
return dict(base_url='https://test.io',
|
||||
indexed_only=False,
|
||||
update_interval=3600,
|
||||
import_file=tmpdir / 'foo.osm',
|
||||
max_diff_size=1)
|
||||
|
||||
def test_update_empty_status_table(status_table, temp_db_conn):
|
||||
with pytest.raises(RuntimeError):
|
||||
nominatim.tools.replication.update(temp_db_conn, {})
|
||||
|
||||
|
||||
def test_update_already_indexed(status_table, temp_db_conn):
|
||||
status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc), seq=34, indexed=False)
|
||||
|
||||
assert nominatim.tools.replication.update(temp_db_conn, dict(indexed_only=True)) \
|
||||
== nominatim.tools.replication.UpdateState.MORE_PENDING
|
||||
|
||||
|
||||
def test_update_no_data_no_sleep(monkeypatch, status_table, temp_db_conn, update_options):
|
||||
date = dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=1)
|
||||
status.set_status(temp_db_conn, date, seq=34)
|
||||
|
||||
monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
|
||||
"apply_diffs",
|
||||
lambda *args, **kwargs: None)
|
||||
|
||||
sleeptime = []
|
||||
monkeypatch.setattr(time, 'sleep', lambda s: sleeptime.append(s))
|
||||
|
||||
assert nominatim.tools.replication.update(temp_db_conn, update_options) \
|
||||
== nominatim.tools.replication.UpdateState.NO_CHANGES
|
||||
|
||||
assert not sleeptime
|
||||
|
||||
|
||||
def test_update_no_data_sleep(monkeypatch, status_table, temp_db_conn, update_options):
|
||||
date = dt.datetime.now(dt.timezone.utc) - dt.timedelta(minutes=30)
|
||||
status.set_status(temp_db_conn, date, seq=34)
|
||||
|
||||
monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
|
||||
"apply_diffs",
|
||||
lambda *args, **kwargs: None)
|
||||
|
||||
sleeptime = []
|
||||
monkeypatch.setattr(time, 'sleep', lambda s: sleeptime.append(s))
|
||||
|
||||
assert nominatim.tools.replication.update(temp_db_conn, update_options) \
|
||||
== nominatim.tools.replication.UpdateState.NO_CHANGES
|
||||
|
||||
assert len(sleeptime) == 1
|
||||
assert sleeptime[0] < 3600
|
||||
assert sleeptime[0] > 0
|
||||
|
@ -1,34 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import osmium
|
||||
import sys
|
||||
import datetime
|
||||
|
||||
|
||||
class Datecounter(osmium.SimpleHandler):
|
||||
|
||||
filedate = None
|
||||
|
||||
def date(self, o):
|
||||
ts = o.timestamp
|
||||
if self.filedate is None or ts > self.filedate:
|
||||
self.filedate = ts
|
||||
|
||||
node = date
|
||||
way = date
|
||||
relation = date
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: python osm_file_date.py <osmfile>")
|
||||
sys.exit(-1)
|
||||
|
||||
h = Datecounter()
|
||||
|
||||
h.apply_file(sys.argv[1])
|
||||
|
||||
if h.filedate is None:
|
||||
exit(5)
|
||||
|
||||
print(h.filedate)
|
Loading…
Reference in New Issue
Block a user