Merge pull request #2155 from lonvia/port-regresh-to-python

Port replication and part of the refrsh function to native Python
This commit is contained in:
Sarah Hoffmann 2021-02-01 11:50:05 +01:00 committed by GitHub
commit 0ad1b28497
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
32 changed files with 1634 additions and 519 deletions

View File

@ -52,7 +52,7 @@ jobs:
run: phpcs --report-width=120 .
- name: Python linting
run: pylint nominatim
run: pylint --extension-pkg-whitelist=osmium nominatim
- name: PHP unit tests
run: phpunit ./

View File

@ -191,7 +191,7 @@ if (BUILD_TESTS)
if (PYLINT)
message(STATUS "Using pylint binary ${PYLINT}")
add_test(NAME pylint
COMMAND ${PYLINT} nominatim
COMMAND ${PYLINT} --extension-pkg-whitelist=osmium nominatim
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
else()
message(WARNING "pylint not found. Python linting tests disabled.")

View File

@ -53,7 +53,7 @@ The coding style is enforced with PHPCS and pylint. It can be tested with:
```
phpcs --report-width=120 --colors .
pylint3 nominatim
pylint3 --extension-pkg-whitelist=osmium nominatim
```
## Testing

View File

@ -19,15 +19,6 @@ Run (as the same user who will later run the updates):
pip3 install --user osmium
```
Nominatim needs a tool called `pyosmium-get-changes` which comes with
Pyosmium. You need to tell Nominatim where to find it. Add the
following line to your `.env`:
NOMINATIM_PYOSMIUM_BINARY=/home/user/.local/bin/pyosmium-get-changes
The path above is fine if you used the `--user` parameter with pip.
Replace `user` with your user name.
#### Setting up the update process
Next the update needs to be initialised. By default Nominatim is configured

View File

@ -4,7 +4,6 @@
require_once(CONST_LibDir.'/init-cmd.php');
require_once(CONST_LibDir.'/setup_functions.php');
require_once(CONST_LibDir.'/setup/SetupClass.php');
require_once(CONST_LibDir.'/setup/AddressLevelParser.php');
ini_set('memory_limit', '800M');
@ -104,99 +103,27 @@ if ($fPostgresVersion >= 11.0) {
);
}
$oIndexCmd = (new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
->addParams('index');
$oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
if ($aResult['quiet']) {
$oIndexCmd->addParams('--quiet');
$oNominatimCmd->addParams('--quiet');
}
if ($aResult['verbose']) {
$oIndexCmd->addParams('--verbose');
$oNominatimCmd->addParams('--verbose');
}
$sPyosmiumBin = getSetting('PYOSMIUM_BINARY');
$sBaseURL = getSetting('REPLICATION_URL');
if ($aResult['init-updates']) {
// sanity check that the replication URL is correct
$sBaseState = file_get_contents($sBaseURL.'/state.txt');
if ($sBaseState === false) {
echo "\nCannot find state.txt file at the configured replication URL.\n";
echo "Does the URL point to a directory containing OSM update data?\n\n";
fail('replication URL not reachable.');
}
// sanity check for pyosmium-get-changes
if (!$sPyosmiumBin) {
echo "\nNOMINATIM_PYOSMIUM_BINARY not configured.\n";
echo "You need to install pyosmium and set up the path to pyosmium-get-changes\n";
echo "in your local .env file.\n\n";
fail('NOMINATIM_PYOSMIUM_BINARY not configured');
$oCmd = (clone($oNominatimCmd))->addParams('replication', '--init');
if ($aResult['no-update-functions']) {
$oCmd->addParams('--no-update-functions');
}
$aOutput = 0;
$oCMD = new \Nominatim\Shell($sPyosmiumBin, '--help');
exec($oCMD->escapedCmd(), $aOutput, $iRet);
if ($iRet != 0) {
echo "Cannot execute pyosmium-get-changes.\n";
echo "Make sure you have pyosmium installed correctly\n";
echo "and have set up NOMINATIM_PYOSMIUM_BINARY to point to pyosmium-get-changes.\n";
fail('pyosmium-get-changes not found or not usable');
}
if (!$aResult['no-update-functions']) {
// instantiate setupClass to use the function therein
$cSetup = new SetupFunctions(array(
'enable-diff-updates' => true,
'verbose' => $aResult['verbose']
));
$cSetup->createFunctions();
}
$sDatabaseDate = getDatabaseDate($oDB);
if (!$sDatabaseDate) {
fail('Cannot determine date of database.');
}
$sWindBack = strftime('%Y-%m-%dT%H:%M:%SZ', strtotime($sDatabaseDate) - (3*60*60));
// get the appropriate state id
$aOutput = 0;
$oCMD = (new \Nominatim\Shell($sPyosmiumBin))
->addParams('--start-date', $sWindBack)
->addParams('--server', $sBaseURL);
exec($oCMD->escapedCmd(), $aOutput, $iRet);
if ($iRet != 0 || $aOutput[0] == 'None') {
fail('Error running pyosmium tools');
}
$oDB->exec('TRUNCATE import_status');
$sSQL = "INSERT INTO import_status (lastimportdate, sequence_id, indexed) VALUES('";
$sSQL .= $sDatabaseDate."',".$aOutput[0].', true)';
try {
$oDB->exec($sSQL);
} catch (\Nominatim\DatabaseError $e) {
fail('Could not enter sequence into database.');
}
echo "Done. Database updates will start at sequence $aOutput[0] ($sWindBack)\n";
$oCmd->run();
}
if ($aResult['check-for-updates']) {
$aLastState = $oDB->getRow('SELECT sequence_id FROM import_status');
if (!$aLastState['sequence_id']) {
fail('Updates not set up. Please run ./utils/update.php --init-updates.');
}
$oCmd = (new \Nominatim\Shell(CONST_BinDir.'/check_server_for_updates.py'))
->addParams($sBaseURL)
->addParams($aLastState['sequence_id']);
$iRet = $oCmd->run();
exit($iRet);
exit((clone($oNominatimCmd))->addParams('replication', '--check-for-updates')->run());
}
if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
@ -220,9 +147,7 @@ if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
}
if ($aResult['calculate-postcodes']) {
info('Update postcodes centroids');
$sTemplate = file_get_contents(CONST_DataDir.'/sql/update-postcodes.sql');
runSQLScript($sTemplate, true, true);
(clone($oNominatimCmd))->addParams('refresh', '--postcodes')->run();
}
$sTemporaryFile = CONST_InstallDir.'/osmosischange.osc';
@ -271,22 +196,18 @@ if ($bHaveDiff) {
}
if ($aResult['recompute-word-counts']) {
info('Recompute frequency of full-word search terms');
$sTemplate = file_get_contents(CONST_DataDir.'/sql/words_from_search_name.sql');
runSQLScript($sTemplate, true, true);
(clone($oNominatimCmd))->addParams('refresh', '--word-counts')->run();
}
if ($aResult['index']) {
$oCmd = (clone $oIndexCmd)
->addParams('--minrank', $aResult['index-rank']);
$oCmd->run();
(clone $oNominatimCmd)
->addParams('index', '--minrank', $aResult['index-rank'])
->addParams('--threads', $aResult['index-instances'])
->run();
}
if ($aResult['update-address-levels']) {
$sAddressLevelConfig = getSettingConfig('ADDRESS_LEVEL_CONFIG', 'address-levels.json');
echo 'Updating address levels from '.$sAddressLevelConfig.".\n";
$oAlParser = new \Nominatim\Setup\AddressLevelParser($sAddressLevelConfig);
$oAlParser->createTable($oDB, 'address_levels');
(clone($oNominatimCmd))->addParams('refresh', '--address-levels')->run();
}
if ($aResult['recompute-importance']) {
@ -307,145 +228,17 @@ if ($aResult['recompute-importance']) {
}
if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
//
if (strpos($sBaseURL, 'download.geofabrik.de') !== false && getSetting('REPLICATION_UPDATE_INTERVAL') < 86400) {
fail('Error: Update interval too low for download.geofabrik.de. ' .
"Please check install documentation (https://nominatim.org/release-docs/latest/admin/Import-and-Update#setting-up-the-update-process)\n");
$oCmd = (clone($oNominatimCmd))
->addParams('replication')
->addParams('--threads', $aResult['index-instances']);
if (!$aResult['import-osmosis-all']) {
$oCmd->addParams('--once');
}
$sImportFile = CONST_InstallDir.'/osmosischange.osc';
$oCMDDownload = (new \Nominatim\Shell($sPyosmiumBin))
->addParams('--server', $sBaseURL)
->addParams('--outfile', $sImportFile)
->addParams('--size', getSetting('REPLICATION_MAX_DIFF'));
$oCMDImport = (clone $oOsm2pgsqlCmd)->addParams($sImportFile);
while (true) {
$fStartTime = time();
$aLastState = $oDB->getRow('SELECT *, EXTRACT (EPOCH FROM lastimportdate) as unix_ts FROM import_status');
if (!$aLastState['sequence_id']) {
echo "Updates not set up. Please run ./utils/update.php --init-updates.\n";
exit(1);
}
echo 'Currently at sequence '.$aLastState['sequence_id'].' ('.$aLastState['lastimportdate'].') - '.$aLastState['indexed']." indexed\n";
$sBatchEnd = $aLastState['lastimportdate'];
$iEndSequence = $aLastState['sequence_id'];
if ($aLastState['indexed']) {
// Sleep if the update interval has not yet been reached.
$fNextUpdate = $aLastState['unix_ts'] + getSetting('REPLICATION_UPDATE_INTERVAL');
if ($fNextUpdate > $fStartTime) {
$iSleepTime = $fNextUpdate - $fStartTime;
echo "Waiting for next update for $iSleepTime sec.";
sleep($iSleepTime);
}
// Download the next batch of changes.
do {
$fCMDStartTime = time();
$iNextSeq = (int) $aLastState['sequence_id'];
unset($aOutput);
$oCMD = (clone $oCMDDownload)->addParams('--start-id', $iNextSeq);
echo $oCMD->escapedCmd()."\n";
if (file_exists($sImportFile)) {
unlink($sImportFile);
}
exec($oCMD->escapedCmd(), $aOutput, $iResult);
if ($iResult == 3) {
$sSleep = getSetting('REPLICATION_RECHECK_INTERVAL');
echo 'No new updates. Sleeping for '.$sSleep." sec.\n";
sleep($sSleep);
} elseif ($iResult != 0) {
echo 'ERROR: updates failed.';
exit($iResult);
} else {
$iEndSequence = (int)$aOutput[0];
}
} while ($iResult);
// get the newest object from the diff file
$sBatchEnd = 0;
$iRet = 0;
$oCMD = new \Nominatim\Shell(CONST_BinDir.'/osm_file_date.py', $sImportFile);
exec($oCMD->escapedCmd(), $sBatchEnd, $iRet);
if ($iRet == 5) {
echo "Diff file is empty. skipping import.\n";
if (!$aResult['import-osmosis-all']) {
exit(0);
} else {
continue;
}
}
if ($iRet != 0) {
fail('Error getting date from diff file.');
}
$sBatchEnd = $sBatchEnd[0];
// Import the file
$fCMDStartTime = time();
echo $oCMDImport->escapedCmd()."\n";
unset($sJunk);
$iErrorLevel = $oCMDImport->run();
if ($iErrorLevel) {
echo "Error executing osm2pgsql: $iErrorLevel\n";
exit($iErrorLevel);
}
// write the update logs
$iFileSize = filesize($sImportFile);
$sSQL = 'INSERT INTO import_osmosis_log';
$sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)';
$sSQL .= " values ('$sBatchEnd',$iEndSequence,$iFileSize,'";
$sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
$sSQL .= date('Y-m-d H:i:s')."','import')";
var_Dump($sSQL);
$oDB->exec($sSQL);
// update the status
$sSQL = "UPDATE import_status SET lastimportdate = '$sBatchEnd', indexed=false, sequence_id = $iEndSequence";
var_Dump($sSQL);
$oDB->exec($sSQL);
echo date('Y-m-d H:i:s')." Completed download step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
}
// Index file
if (!$aResult['no-index']) {
$fCMDStartTime = time();
$oThisIndexCmd = clone($oIndexCmd);
echo $oThisIndexCmd->escapedCmd()."\n";
$iErrorLevel = $oThisIndexCmd->run();
if ($iErrorLevel) {
echo "Error: $iErrorLevel\n";
exit($iErrorLevel);
}
$sSQL = 'INSERT INTO import_osmosis_log';
$sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)';
$sSQL .= " values ('$sBatchEnd',$iEndSequence,NULL,'";
$sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
$sSQL .= date('Y-m-d H:i:s')."','index')";
var_Dump($sSQL);
$oDB->exec($sSQL);
echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
} else {
if ($aResult['import-osmosis-all']) {
echo "Error: --no-index cannot be used with continuous imports (--import-osmosis-all).\n";
exit(1);
}
}
$fDuration = time() - $fStartTime;
echo date('Y-m-d H:i:s')." Completed all for $sBatchEnd in ".round($fDuration/60, 2)." minutes\n";
if (!$aResult['import-osmosis-all']) exit(0);
if ($aResult['no-index']) {
$oCmd->addParams('--no-index');
}
exit($oCmd->run());
}

View File

@ -1,98 +0,0 @@
<?php
namespace Nominatim\Setup;
/**
* Parses an address level description.
*/
class AddressLevelParser
{
private $aLevels;
public function __construct($sDescriptionFile)
{
$sJson = file_get_contents($sDescriptionFile);
$this->aLevels = json_decode($sJson, true);
if (!$this->aLevels) {
switch (json_last_error()) {
case JSON_ERROR_NONE:
break;
case JSON_ERROR_DEPTH:
fail('JSON error - Maximum stack depth exceeded');
break;
case JSON_ERROR_STATE_MISMATCH:
fail('JSON error - Underflow or the modes mismatch');
break;
case JSON_ERROR_CTRL_CHAR:
fail('JSON error - Unexpected control character found');
break;
case JSON_ERROR_SYNTAX:
fail('JSON error - Syntax error, malformed JSON');
break;
case JSON_ERROR_UTF8:
fail('JSON error - Malformed UTF-8 characters, possibly incorrectly encoded');
break;
default:
fail('JSON error - Unknown error');
break;
}
}
}
/**
* Dump the description into a database table.
*
* @param object $oDB Database conneciton to use.
* @param string $sTable Name of table to create.
*
* @return null
*
* A new table is created. Any previously existing table is dropped.
* The table has the following columns:
* country, class, type, rank_search, rank_address.
*/
public function createTable($oDB, $sTable)
{
$oDB->exec('DROP TABLE IF EXISTS '.$sTable);
$sSql = 'CREATE TABLE '.$sTable;
$sSql .= '(country_code varchar(2), class TEXT, type TEXT,';
$sSql .= ' rank_search SMALLINT, rank_address SMALLINT)';
$oDB->exec($sSql);
$sSql = 'CREATE UNIQUE INDEX ON '.$sTable.' (country_code, class, type)';
$oDB->exec($sSql);
$sSql = 'INSERT INTO '.$sTable.' VALUES ';
foreach ($this->aLevels as $aLevel) {
$aCountries = array();
if (isset($aLevel['countries'])) {
foreach ($aLevel['countries'] as $sCountry) {
$aCountries[$sCountry] = $oDB->getDBQuoted($sCountry);
}
} else {
$aCountries['NULL'] = 'NULL';
}
foreach ($aLevel['tags'] as $sKey => $aValues) {
foreach ($aValues as $sValue => $mRanks) {
$aFields = array(
$oDB->getDBQuoted($sKey),
$sValue ? $oDB->getDBQuoted($sValue) : 'NULL'
);
if (is_array($mRanks)) {
$aFields[] = (string) $mRanks[0];
$aFields[] = (string) $mRanks[1];
} else {
$aFields[] = (string) $mRanks;
$aFields[] = (string) $mRanks;
}
$sLine = ','.join(',', $aFields).'),';
foreach ($aCountries as $sCountries) {
$sSql .= '('.$sCountries.$sLine;
}
}
}
}
$oDB->exec(rtrim($sSql, ','));
}
}

View File

@ -2,7 +2,6 @@
namespace Nominatim\Setup;
require_once(CONST_LibDir.'/setup/AddressLevelParser.php');
require_once(CONST_LibDir.'/Shell.php');
class SetupFunctions
@ -19,6 +18,7 @@ class SetupFunctions
protected $bNoPartitions;
protected $bDrop;
protected $oDB = null;
protected $oNominatimCmd;
public function __construct(array $aCMDResult)
{
@ -81,6 +81,14 @@ class SetupFunctions
}
$this->bDrop = isset($aCMDResult['drop']) && $aCMDResult['drop'];
$this->oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
if ($this->bQuiet) {
$this->oNominatimCmd->addParams('--quiet');
}
if ($this->bVerbose) {
$this->oNominatimCmd->addParams('--verbose');
}
}
public function createDB()
@ -256,8 +264,7 @@ class SetupFunctions
$this->dropTable('search_name');
}
$oAlParser = new AddressLevelParser(getSettingConfig('ADDRESS_LEVEL_CONFIG', 'address-levels.json'));
$oAlParser->createTable($this->db(), 'address_levels');
(clone($this->oNominatimCmd))->addParams('refresh', '--address-levels')->run();
}
public function createTableTriggers()
@ -283,9 +290,7 @@ class SetupFunctions
public function createPartitionFunctions()
{
info('Create Partition Functions');
$sTemplate = file_get_contents(CONST_DataDir.'/sql/partition-functions.src.sql');
$this->pgsqlRunPartitionScript($sTemplate);
$this->createSqlFunctions(); // also create partition functions
}
public function importWikipediaArticles()
@ -549,19 +554,10 @@ class SetupFunctions
{
$this->checkModulePresence(); // raises exception on failure
$oBaseCmd = (new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
->addParams('index');
if ($this->bQuiet) {
$oBaseCmd->addParams('-q');
}
if ($this->bVerbose) {
$oBaseCmd->addParams('-v');
}
$oBaseCmd = (clone $this->oNominatimCmd)->addParams('index');
info('Index ranks 0 - 4');
$oCmd = (clone $oBaseCmd)->addParams('--maxrank', 4);
echo $oCmd->escapedCmd();
$iStatus = $oCmd->run();
if ($iStatus != 0) {
@ -790,43 +786,18 @@ class SetupFunctions
private function createSqlFunctions()
{
$sBasePath = CONST_DataDir.'/sql/functions/';
$sTemplate = file_get_contents($sBasePath.'utils.sql');
$sTemplate .= file_get_contents($sBasePath.'normalization.sql');
$sTemplate .= file_get_contents($sBasePath.'ranking.sql');
$sTemplate .= file_get_contents($sBasePath.'importance.sql');
$sTemplate .= file_get_contents($sBasePath.'address_lookup.sql');
$sTemplate .= file_get_contents($sBasePath.'interpolation.sql');
if ($this->db()->tableExists('place')) {
$sTemplate .= file_get_contents($sBasePath.'place_triggers.sql');
}
if ($this->db()->tableExists('placex')) {
$sTemplate .= file_get_contents($sBasePath.'placex_triggers.sql');
}
if ($this->db()->tableExists('location_postcode')) {
$sTemplate .= file_get_contents($sBasePath.'postcode_triggers.sql');
}
$sTemplate = str_replace('{modulepath}', $this->sModulePath, $sTemplate);
if ($this->bEnableDiffUpdates) {
$sTemplate = str_replace('RETURN NEW; -- %DIFFUPDATES%', '--', $sTemplate);
$oCmd = (clone($this->oNominatimCmd))
->addParams('refresh', '--functions');
if (!$this->bEnableDiffUpdates) {
$oCmd->addParams('--no-diff-updates');
}
if ($this->bEnableDebugStatements) {
$sTemplate = str_replace('--DEBUG:', '', $sTemplate);
}
if (getSettingBool('LIMIT_REINDEXING')) {
$sTemplate = str_replace('--LIMIT INDEXING:', '', $sTemplate);
}
if (!getSettingBool('USE_US_TIGER_DATA')) {
$sTemplate = str_replace('-- %NOTIGERDATA% ', '', $sTemplate);
}
if (!getSettingBool('USE_AUX_LOCATION_DATA')) {
$sTemplate = str_replace('-- %NOAUXDATA% ', '', $sTemplate);
$oCmd->addParams('--enable-debug-statements');
}
$sReverseOnly = $this->dbReverseOnly() ? 'true' : 'false';
$sTemplate = str_replace('%REVERSE-ONLY%', $sReverseOnly, $sTemplate);
$this->pgsqlRunScript($sTemplate);
$oCmd->run();
}
private function pgsqlRunPartitionScript($sTemplate)

View File

@ -2,16 +2,21 @@
Command-line interface to the Nominatim functions for import, update,
database administration and querying.
"""
import sys
import datetime as dt
import os
import sys
import time
import argparse
import logging
from pathlib import Path
from .config import Configuration
from .tools.exec_utils import run_legacy_script, run_api_script
from .db.connection import connect
from .db import status
from .errors import UsageError
from .indexer.indexer import Indexer
LOG = logging.getLogger()
def _num_system_cpus():
try:
@ -85,7 +90,27 @@ class CommandlineParser:
args.config = Configuration(args.project_dir, args.data_dir / 'settings')
return args.command.run(args)
try:
return args.command.run(args)
except UsageError as exception:
log = logging.getLogger()
if log.isEnabledFor(logging.DEBUG):
raise # use Python's exception printing
log.fatal('FATAL: %s', exception)
# If we get here, then execution has failed in some way.
return 1
def _osm2pgsql_options_from_args(args, default_cache, default_threads):
""" Set up the stanadrd osm2pgsql from the command line arguments.
"""
return dict(osm2pgsql=args.osm2pgsql_path,
osm2pgsql_cache=args.osm2pgsql_cache or default_cache,
osm2pgsql_style=args.config.get_import_style_file(),
threads=args.threads or default_threads,
dsn=args.config.get_libpq_dsn(),
flatnode_file=args.config.FLATNODE_FILE)
##### Subcommand classes
#
@ -98,6 +123,8 @@ class CommandlineParser:
#
# No need to document the functions each time.
# pylint: disable=C0111
# Using non-top-level imports to make pyosmium optional for replication only.
# pylint: disable=E0012,C0415
class SetupAll:
@ -230,26 +257,123 @@ class UpdateReplication:
group.add_argument('--no-index', action='store_false', dest='do_index',
help="""Do not index the new data. Only applicable
together with --once""")
group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
help='Size of cache to be used by osm2pgsql (in MB)')
@staticmethod
def _init_replication(args):
from .tools import replication, refresh
LOG.warning("Initialising replication updates")
conn = connect(args.config.get_libpq_dsn())
replication.init_replication(conn, base_url=args.config.REPLICATION_URL)
if args.update_functions:
LOG.warning("Create functions")
refresh.create_functions(conn, args.config, args.data_dir,
True, False)
conn.close()
return 0
@staticmethod
def _check_for_updates(args):
from .tools import replication
conn = connect(args.config.get_libpq_dsn())
ret = replication.check_for_updates(conn, base_url=args.config.REPLICATION_URL)
conn.close()
return ret
@staticmethod
def _report_update(batchdate, start_import, start_index):
def round_time(delta):
return dt.timedelta(seconds=int(delta.total_seconds()))
end = dt.datetime.now(dt.timezone.utc)
LOG.warning("Update completed. Import: %s. %sTotal: %s. Remaining backlog: %s.",
round_time((start_index or end) - start_import),
"Indexing: {} ".format(round_time(end - start_index))
if start_index else '',
round_time(end - start_import),
round_time(end - batchdate))
@staticmethod
def _update(args):
from .tools import replication
from .indexer.indexer import Indexer
params = _osm2pgsql_options_from_args(args, 2000, 1)
params.update(base_url=args.config.REPLICATION_URL,
update_interval=args.config.get_int('REPLICATION_UPDATE_INTERVAL'),
import_file=args.project_dir / 'osmosischange.osc',
max_diff_size=args.config.get_int('REPLICATION_MAX_DIFF'),
indexed_only=not args.once)
# Sanity check to not overwhelm the Geofabrik servers.
if 'download.geofabrik.de'in params['base_url']\
and params['update_interval'] < 86400:
LOG.fatal("Update interval too low for download.geofabrik.de.\n"
"Please check install documentation "
"(https://nominatim.org/release-docs/latest/admin/Import-and-Update#"
"setting-up-the-update-process).")
raise UsageError("Invalid replication update interval setting.")
if not args.once:
if not args.do_index:
LOG.fatal("Indexing cannot be disabled when running updates continuously.")
raise UsageError("Bad argument '--no-index'.")
recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
while True:
conn = connect(args.config.get_libpq_dsn())
start = dt.datetime.now(dt.timezone.utc)
state = replication.update(conn, params)
status.log_status(conn, start, 'import')
batchdate, _, _ = status.get_status(conn)
conn.close()
if state is not replication.UpdateState.NO_CHANGES and args.do_index:
index_start = dt.datetime.now(dt.timezone.utc)
indexer = Indexer(args.config.get_libpq_dsn(),
args.threads or 1)
indexer.index_boundaries(0, 30)
indexer.index_by_rank(0, 30)
conn = connect(args.config.get_libpq_dsn())
status.set_indexed(conn, True)
status.log_status(conn, index_start, 'index')
conn.close()
else:
index_start = None
if LOG.isEnabledFor(logging.WARNING):
UpdateReplication._report_update(batchdate, start, index_start)
if args.once:
break
if state is replication.UpdateState.NO_CHANGES:
LOG.warning("No new changes. Sleeping for %d sec.", recheck_interval)
time.sleep(recheck_interval)
return state.value
@staticmethod
def run(args):
params = ['update.php']
try:
import osmium # pylint: disable=W0611
except ModuleNotFoundError:
LOG.fatal("pyosmium not installed. Replication functions not available.\n"
"To install pyosmium via pip: pip3 install osmium")
return 1
if args.init:
params.append('--init-updates')
if not args.update_functions:
params.append('--no-update-functions')
elif args.check_for_updates:
params.append('--check-for-updates')
else:
if args.once:
params.append('--import-osmosis')
else:
params.append('--import-osmosis-all')
if not args.do_index:
params.append('--no-index')
return UpdateReplication._init_replication(args)
return run_legacy_script(*params, nominatim_env=args)
if args.check_for_updates:
return UpdateReplication._check_for_updates(args)
return UpdateReplication._update(args)
class UpdateAddData:
"""\
@ -320,6 +444,8 @@ class UpdateIndex:
@staticmethod
def run(args):
from .indexer.indexer import Indexer
indexer = Indexer(args.config.get_libpq_dsn(),
args.threads or _num_system_cpus() or 1)
@ -328,8 +454,11 @@ class UpdateIndex:
if not args.boundaries_only:
indexer.index_by_rank(args.minrank, args.maxrank)
if not args.no_boundaries and not args.boundaries_only:
indexer.update_status_table()
if not args.no_boundaries and not args.boundaries_only \
and args.minrank == 0 and args.maxrank == 30:
conn = connect(args.config.get_libpq_dsn())
status.set_indexed(conn, True)
conn.close()
return 0
@ -366,22 +495,34 @@ class UpdateRefresh:
@staticmethod
def run(args):
from .tools import refresh
if args.postcodes:
run_legacy_script('update.php', '--calculate-postcodes',
nominatim_env=args, throw_on_fail=True)
LOG.warning("Update postcodes centroid")
conn = connect(args.config.get_libpq_dsn())
refresh.update_postcodes(conn, args.data_dir)
conn.close()
if args.word_counts:
run_legacy_script('update.php', '--recompute-word-counts',
nominatim_env=args, throw_on_fail=True)
LOG.warning('Recompute frequency of full-word search terms')
conn = connect(args.config.get_libpq_dsn())
refresh.recompute_word_counts(conn, args.data_dir)
conn.close()
if args.address_levels:
run_legacy_script('update.php', '--update-address-levels',
nominatim_env=args, throw_on_fail=True)
cfg = Path(args.config.ADDRESS_LEVEL_CONFIG)
LOG.warning('Updating address levels from %s', cfg)
conn = connect(args.config.get_libpq_dsn())
refresh.load_address_levels_from_file(conn, cfg)
conn.close()
if args.functions:
params = ['setup.php', '--create-functions', '--create-partition-functions']
if args.diffs:
params.append('--enable-diff-updates')
if args.enable_debug_statements:
params.append('--enable-debug-statements')
run_legacy_script(*params, nominatim_env=args, throw_on_fail=True)
LOG.warning('Create functions')
conn = connect(args.config.get_libpq_dsn())
refresh.create_functions(conn, args.config, args.data_dir,
args.diffs, args.enable_debug_statements)
conn.close()
if args.wiki_data:
run_legacy_script('setup.php', '--import-wikipedia-articles',
nominatim_env=args, throw_on_fail=True)
@ -392,6 +533,7 @@ class UpdateRefresh:
if args.website:
run_legacy_script('setup.php', '--setup-website',
nominatim_env=args, throw_on_fail=True)
return 0

View File

@ -1,10 +1,16 @@
"""
Nominatim configuration accessor.
"""
import logging
import os
from pathlib import Path
from dotenv import dotenv_values
from .errors import UsageError
LOG = logging.getLogger()
class Configuration:
""" Load and manage the project configuration.
@ -20,27 +26,75 @@ class Configuration:
"""
def __init__(self, project_dir, config_dir):
self.project_dir = project_dir
self.config_dir = config_dir
self._config = dotenv_values(str((config_dir / 'env.defaults').resolve()))
if project_dir is not None:
self._config.update(dotenv_values(str((project_dir / '.env').resolve())))
# Add defaults for variables that are left empty to set the default.
# They may still be overwritten by environment variables.
if not self._config['NOMINATIM_ADDRESS_LEVEL_CONFIG']:
self._config['NOMINATIM_ADDRESS_LEVEL_CONFIG'] = \
str(config_dir / 'address-levels.json')
def __getattr__(self, name):
name = 'NOMINATIM_' + name
return os.environ.get(name) or self._config[name]
def get_bool(self, name):
""" Return the given configuration parameter as a boolean.
Values of '1', 'yes' and 'true' are accepted as truthy values,
everything else is interpreted as false.
"""
return self.__getattr__(name).lower() in ('1', 'yes', 'true')
def get_int(self, name):
""" Return the given configuration parameter as an int.
"""
try:
return int(self.__getattr__(name))
except ValueError:
LOG.fatal("Invalid setting NOMINATIM_%s. Needs to be a number.", name)
raise UsageError("Configuration error.")
def get_libpq_dsn(self):
""" Get configured database DSN converted into the key/value format
understood by libpq and psycopg.
"""
dsn = self.DATABASE_DSN
def quote_param(param):
key, val = param.split('=')
val = val.replace('\\', '\\\\').replace("'", "\\'")
if ' ' in val:
val = "'" + val + "'"
return key + '=' + val
if dsn.startswith('pgsql:'):
# Old PHP DSN format. Convert before returning.
return dsn[6:].replace(';', ' ')
return ' '.join([quote_param(p) for p in dsn[6:].split(';')])
return dsn
def get_import_style_file(self):
""" Return the import style file as a path object. Translates the
name of the standard styles automatically into a file in the
config style.
"""
style = self.__getattr__('IMPORT_STYLE')
if style in ('admin', 'street', 'address', 'full', 'extratags'):
return self.config_dir / 'import-{}.style'.format(style)
return Path(style)
def get_os_env(self):
""" Return a copy of the OS environment with the Nominatim configuration
merged in.

View File

@ -0,0 +1,58 @@
"""
Specialised connection and cursor functions.
"""
import logging
import psycopg2
import psycopg2.extensions
import psycopg2.extras
class _Cursor(psycopg2.extras.DictCursor):
""" A cursor returning dict-like objects and providing specialised
execution functions.
"""
def execute(self, query, args=None): # pylint: disable=W0221
""" Query execution that logs the SQL query when debugging is enabled.
"""
logger = logging.getLogger()
logger.debug(self.mogrify(query, args).decode('utf-8'))
super().execute(query, args)
def scalar(self, sql, args=None):
""" Execute query that returns a single value. The value is returned.
If the query yields more than one row, a ValueError is raised.
"""
self.execute(sql, args)
if self.rowcount != 1:
raise RuntimeError("Query did not return a single row.")
return self.fetchone()[0]
class _Connection(psycopg2.extensions.connection):
""" A connection that provides the specialised cursor by default and
adds convenience functions for administrating the database.
"""
def cursor(self, cursor_factory=_Cursor, **kwargs):
""" Return a new cursor. By default the specialised cursor is returned.
"""
return super().cursor(cursor_factory=cursor_factory, **kwargs)
def table_exists(self, table):
""" Check that a table with the given name exists in the database.
"""
with self.cursor() as cur:
num = cur.scalar("""SELECT count(*) FROM pg_tables
WHERE tablename = %s""", (table, ))
return num == 1
def connect(dsn):
""" Open a connection to the database using the specialised connection
factory.
"""
return psycopg2.connect(dsn, connection_factory=_Connection)

82
nominatim/db/status.py Normal file
View File

@ -0,0 +1,82 @@
"""
Access and helper functions for the status and status log table.
"""
import datetime as dt
import logging
import re
from ..tools.exec_utils import get_url
from ..errors import UsageError
LOG = logging.getLogger()
def compute_database_date(conn):
""" Determine the date of the database from the newest object in the
data base.
"""
# First, find the node with the highest ID in the database
with conn.cursor() as cur:
osmid = cur.scalar("SELECT max(osm_id) FROM place WHERE osm_type='N'")
if osmid is None:
LOG.fatal("No data found in the database.")
raise UsageError("No data found in the database.")
LOG.info("Using node id %d for timestamp lookup", osmid)
# Get the node from the API to find the timestamp when it was created.
node_url = 'https://www.openstreetmap.org/api/0.6/node/{}/1'.format(osmid)
data = get_url(node_url)
match = re.search(r'timestamp="((\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}))Z"', data)
if match is None:
LOG.fatal("The node data downloaded from the API does not contain valid data.\n"
"URL used: %s", node_url)
raise UsageError("Bad API data.")
LOG.debug("Found timestamp %s", match[1])
return dt.datetime.fromisoformat(match[1]).replace(tzinfo=dt.timezone.utc)
def set_status(conn, date, seq=None, indexed=True):
""" Replace the current status with the given status.
"""
assert date.tzinfo == dt.timezone.utc
with conn.cursor() as cur:
cur.execute("TRUNCATE TABLE import_status")
cur.execute("""INSERT INTO import_status (lastimportdate, sequence_id, indexed)
VALUES (%s, %s, %s)""", (date, seq, indexed))
conn.commit()
def get_status(conn):
""" Return the current status as a triple of (date, sequence, indexed).
If status has not been set up yet, a triple of None is returned.
"""
with conn.cursor() as cur:
cur.execute("SELECT * FROM import_status LIMIT 1")
if cur.rowcount < 1:
return None, None, None
row = cur.fetchone()
return row['lastimportdate'], row['sequence_id'], row['indexed']
def set_indexed(conn, state):
""" Set the indexed flag in the status table to the given state.
"""
with conn.cursor() as cur:
cur.execute("UPDATE import_status SET indexed = %s", (state, ))
conn.commit()
def log_status(conn, start, event, batchsize=None):
""" Write a new status line to the `import_osmosis_log` table.
"""
with conn.cursor() as cur:
cur.execute("""INSERT INTO import_osmosis_log
(batchend, batchseq, batchsize, starttime, endtime, event)
SELECT lastimportdate, sequence_id, %s, %s, now(), %s FROM import_status""",
(batchsize, start, event))

12
nominatim/db/utils.py Normal file
View File

@ -0,0 +1,12 @@
"""
Helper functions for handling DB accesses.
"""
def execute_file(conn, fname):
""" Read an SQL file and run its contents against the given connection.
"""
with fname.open('r') as fdesc:
sql = fdesc.read()
with conn.cursor() as cur:
cur.execute(sql)
conn.commit()

8
nominatim/errors.py Normal file
View File

@ -0,0 +1,8 @@
"""
Custom exception and error classes for Nominatim.
"""
class UsageError(Exception):
""" An error raised because of bad user input. This error will usually
not cause a stack trace to be printed unless debugging is enabled.
"""

View File

@ -2,9 +2,17 @@
Helper functions for executing external programs.
"""
import logging
import os
import subprocess
import urllib.request as urlrequest
from urllib.parse import urlencode
from psycopg2.extensions import parse_dsn
from ..version import NOMINATIM_VERSION
LOG = logging.getLogger()
def run_legacy_script(script, *args, nominatim_env=None, throw_on_fail=False):
""" Run a Nominatim PHP script with the given arguments.
@ -80,3 +88,51 @@ def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None,
print(result[content_start + 4:].replace('\\n', '\n'))
return 0
def run_osm2pgsql(options):
""" Run osm2pgsql with the given options.
"""
env = os.environ
cmd = [options['osm2pgsql'],
'--hstore', '--latlon', '--slim',
'--with-forward-dependencies', 'false',
'--log-progress', 'true',
'--number-processes', str(options['threads']),
'--cache', str(options['osm2pgsql_cache']),
'--output', 'gazetteer',
'--style', str(options['osm2pgsql_style'])
]
if options['append']:
cmd.append('--append')
if options['flatnode_file']:
cmd.extend(('--flat-nodes', options['flatnode_file']))
dsn = parse_dsn(options['dsn'])
if 'password' in dsn:
env['PGPASSWORD'] = dsn['password']
if 'dbname' in dsn:
cmd.extend(('-d', dsn['dbname']))
if 'user' in dsn:
cmd.extend(('--username', dsn['user']))
for param in ('host', 'port'):
if param in dsn:
cmd.extend(('--' + param, dsn[param]))
cmd.append(str(options['import_file']))
subprocess.run(cmd, cwd=options.get('cwd', '.'), env=env, check=True)
def get_url(url):
""" Get the contents from the given URL and return it as a UTF-8 string.
"""
headers = {"User-Agent" : "Nominatim/" + NOMINATIM_VERSION}
try:
with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response:
return response.read().decode('utf-8')
except:
LOG.fatal('Failed to load URL: %s', url)
raise

169
nominatim/tools/refresh.py Normal file
View File

@ -0,0 +1,169 @@
"""
Functions for bringing auxiliary data in the database up-to-date.
"""
import json
import re
from psycopg2.extras import execute_values
from ..db.utils import execute_file
def update_postcodes(conn, datadir):
""" Recalculate postcode centroids and add, remove and update entries in the
location_postcode table. `conn` is an opne connection to the database.
"""
execute_file(conn, datadir / 'sql' / 'update-postcodes.sql')
def recompute_word_counts(conn, datadir):
""" Compute the frequency of full-word search terms.
"""
execute_file(conn, datadir / 'sql' / 'words_from_search_name.sql')
def _add_address_level_rows_from_entry(rows, entry):
""" Converts a single entry from the JSON format for address rank
descriptions into a flat format suitable for inserting into a
PostgreSQL table and adds these lines to `rows`.
"""
countries = entry.get('countries') or (None, )
for key, values in entry['tags'].items():
for value, ranks in values.items():
if isinstance(ranks, list):
rank_search, rank_address = ranks
else:
rank_search = rank_address = ranks
if not value:
value = None
for country in countries:
rows.append((country, key, value, rank_search, rank_address))
def load_address_levels(conn, table, levels):
""" Replace the `address_levels` table with the contents of `levels'.
A new table is created any previously existing table is dropped.
The table has the following columns:
country, class, type, rank_search, rank_address
"""
rows = []
for entry in levels:
_add_address_level_rows_from_entry(rows, entry)
with conn.cursor() as cur:
cur.execute('DROP TABLE IF EXISTS {}'.format(table))
cur.execute("""CREATE TABLE {} (country_code varchar(2),
class TEXT,
type TEXT,
rank_search SMALLINT,
rank_address SMALLINT)""".format(table))
execute_values(cur, "INSERT INTO {} VALUES %s".format(table), rows)
cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table))
conn.commit()
def load_address_levels_from_file(conn, config_file):
""" Replace the `address_levels` table with the contents of the config
file.
"""
with config_file.open('r') as fdesc:
load_address_levels(conn, 'address_levels', json.load(fdesc))
PLPGSQL_BASE_MODULES = (
'utils.sql',
'normalization.sql',
'ranking.sql',
'importance.sql',
'address_lookup.sql',
'interpolation.sql'
)
PLPGSQL_TABLE_MODULES = (
('place', 'place_triggers.sql'),
('placex', 'placex_triggers.sql'),
('location_postcode', 'postcode_triggers.sql')
)
def _get_standard_function_sql(conn, config, sql_dir, enable_diff_updates, enable_debug):
""" Read all applicable SQLs containing PL/pgSQL functions, replace
placefolders and execute them.
"""
sql_func_dir = sql_dir / 'functions'
sql = ''
# Get the basic set of functions that is always imported.
for sql_file in PLPGSQL_BASE_MODULES:
with (sql_func_dir / sql_file).open('r') as fdesc:
sql += fdesc.read()
# Some files require the presence of a certain table
for table, fname in PLPGSQL_TABLE_MODULES:
if conn.table_exists(table):
with (sql_func_dir / fname).open('r') as fdesc:
sql += fdesc.read()
# Replace placeholders.
sql = sql.replace('{modulepath}',
config.DATABASE_MODULE_PATH or str((config.project_dir / 'module').resolve()))
if enable_diff_updates:
sql = sql.replace('RETURN NEW; -- %DIFFUPDATES%', '--')
if enable_debug:
sql = sql.replace('--DEBUG:', '')
if config.get_bool('LIMIT_REINDEXING'):
sql = sql.replace('--LIMIT INDEXING:', '')
if not config.get_bool('USE_US_TIGER_DATA'):
sql = sql.replace('-- %NOTIGERDATA% ', '')
if not config.get_bool('USE_AUX_LOCATION_DATA'):
sql = sql.replace('-- %NOAUXDATA% ', '')
reverse_only = 'false' if conn.table_exists('search_name') else 'true'
return sql.replace('%REVERSE-ONLY%', reverse_only)
def replace_partition_string(sql, partitions):
""" Replace a partition template with the actual partition code.
"""
for match in re.findall('^-- start(.*?)^-- end', sql, re.M | re.S):
repl = ''
for part in partitions:
repl += match.replace('-partition-', str(part))
sql = sql.replace(match, repl)
return sql
def _get_partition_function_sql(conn, sql_dir):
""" Create functions that work on partition tables.
"""
with conn.cursor() as cur:
cur.execute('SELECT distinct partition FROM country_name')
partitions = set([0])
for row in cur:
partitions.add(row[0])
with (sql_dir / 'partition-functions.src.sql').open('r') as fdesc:
sql = fdesc.read()
return replace_partition_string(sql, sorted(partitions))
def create_functions(conn, config, data_dir,
enable_diff_updates=True, enable_debug=False):
""" (Re)create the PL/pgSQL functions.
"""
sql_dir = data_dir / 'sql'
sql = _get_standard_function_sql(conn, config, sql_dir,
enable_diff_updates, enable_debug)
sql += _get_partition_function_sql(conn, sql_dir)
with conn.cursor() as cur:
cur.execute(sql)
conn.commit()

View File

@ -0,0 +1,119 @@
"""
Functions for updating a database from a replication source.
"""
import datetime as dt
from enum import Enum
import logging
import time
from osmium.replication.server import ReplicationServer
from osmium import WriteHandler
from ..db import status
from .exec_utils import run_osm2pgsql
from ..errors import UsageError
LOG = logging.getLogger()
def init_replication(conn, base_url):
""" Set up replication for the server at the given base URL.
"""
LOG.info("Using replication source: %s", base_url)
date = status.compute_database_date(conn)
# margin of error to make sure we get all data
date -= dt.timedelta(hours=3)
repl = ReplicationServer(base_url)
seq = repl.timestamp_to_sequence(date)
if seq is None:
LOG.fatal("Cannot reach the configured replication service '%s'.\n"
"Does the URL point to a directory containing OSM update data?",
base_url)
raise UsageError("Failed to reach replication service")
status.set_status(conn, date=date, seq=seq)
LOG.warning("Updates intialised at sequence %s (%s)", seq, date)
def check_for_updates(conn, base_url):
""" Check if new data is available from the replication service at the
given base URL.
"""
_, seq, _ = status.get_status(conn)
if seq is None:
LOG.error("Replication not set up. "
"Please run 'nominatim replication --init' first.")
return 254
state = ReplicationServer(base_url).get_state_info()
if state is None:
LOG.error("Cannot get state for URL %s.", base_url)
return 253
if state.sequence <= seq:
LOG.warning("Database is up to date.")
return 2
LOG.warning("New data available (%i => %i).", seq, state.sequence)
return 0
class UpdateState(Enum):
""" Possible states after an update has run.
"""
UP_TO_DATE = 0
MORE_PENDING = 2
NO_CHANGES = 3
def update(conn, options):
""" Update database from the next batch of data. Returns the state of
updates according to `UpdateState`.
"""
startdate, startseq, indexed = status.get_status(conn)
if startseq is None:
LOG.error("Replication not set up. "
"Please run 'nominatim replication --init' first.")
raise UsageError("Replication not set up.")
if not indexed and options['indexed_only']:
LOG.info("Skipping update. There is data that needs indexing.")
return UpdateState.MORE_PENDING
last_since_update = dt.datetime.now(dt.timezone.utc) - startdate
update_interval = dt.timedelta(seconds=options['update_interval'])
if last_since_update < update_interval:
duration = (update_interval - last_since_update).seconds
LOG.warning("Sleeping for %s sec before next update.", duration)
time.sleep(duration)
if options['import_file'].exists():
options['import_file'].unlink()
# Read updates into file.
repl = ReplicationServer(options['base_url'])
outhandler = WriteHandler(str(options['import_file']))
endseq = repl.apply_diffs(outhandler, startseq,
max_size=options['max_diff_size'] * 1024)
outhandler.close()
if endseq is None:
return UpdateState.NO_CHANGES
# Consume updates with osm2pgsql.
options['append'] = True
run_osm2pgsql(options)
# Write the current status to the file
endstate = repl.get_state_info(endseq)
status.set_status(conn, endstate.timestamp, seq=endseq, indexed=False)
return UpdateState.UP_TO_DATE

5
nominatim/version.py Normal file
View File

@ -0,0 +1,5 @@
"""
Version information for Nominatim.
"""
NOMINATIM_VERSION = "3.6.0"

View File

@ -57,6 +57,9 @@ NOMINATIM_HTTP_PROXY_HOST=proxy.mydomain.com
NOMINATIM_HTTP_PROXY_PORT=3128
NOMINATIM_HTTP_PROXY_LOGIN=
NOMINATIM_HTTP_PROXY_PASSWORD=
# Also set these standard environment variables.
# HTTP_PROXY="http://user:pass@10.10.1.10:1080"
# HTTPS_PROXY="http://user:pass@10.10.1.10:1080"
# Location of the osm2pgsql binary.
# When empty, osm2pgsql is expected to reside in the osm2pgsql directory in
@ -64,10 +67,6 @@ NOMINATIM_HTTP_PROXY_PASSWORD=
# EXPERT ONLY. You should usually use the supplied osm2pgsql.
NOMINATIM_OSM2PGSQL_BINARY=
# Location of pyosmium-get-changes.
# Only needed when running updates.
NOMINATIM_PYOSMIUM_BINARY=
# Directory where to find US Tiger data files to import.
# Used with setup.php --import-tiger-data. When unset, the data is expected
# to be located under 'data/tiger' in the source tree.

View File

@ -1,6 +1,6 @@
drop table if exists import_status;
CREATE TABLE import_status (
lastimportdate timestamp NOT NULL,
lastimportdate timestamp with time zone NOT NULL,
sequence_id integer,
indexed boolean
);

View File

@ -1,26 +1,155 @@
import itertools
import sys
from pathlib import Path
import psycopg2
import psycopg2.extras
import pytest
SRC_DIR = Path(__file__) / '..' / '..' / '..'
# always test against the source
sys.path.insert(0, str((Path(__file__) / '..' / '..' / '..').resolve()))
sys.path.insert(0, str(SRC_DIR.resolve()))
from nominatim.config import Configuration
from nominatim.db import connection
class _TestingCursor(psycopg2.extras.DictCursor):
""" Extension to the DictCursor class that provides execution
short-cuts that simplify writing assertions.
"""
def scalar(self, sql, params=None):
""" Execute a query with a single return value and return this value.
Raises an assertion when not exactly one row is returned.
"""
self.execute(sql, params)
assert self.rowcount == 1
return self.fetchone()[0]
def row_set(self, sql, params=None):
""" Execute a query and return the result as a set of tuples.
"""
self.execute(sql, params)
if self.rowcount == 1:
return set(tuple(self.fetchone()))
return set((tuple(row) for row in self))
@pytest.fixture
def temp_db(monkeypatch):
""" Create an empty database for the test. The database name is also
exported into NOMINATIM_DATABASE_DSN.
"""
name = 'test_nominatim_python_unittest'
with psycopg2.connect(database='postgres') as conn:
conn.set_isolation_level(0)
with conn.cursor() as cur:
cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
cur.execute('CREATE DATABASE {}'.format(name))
conn = psycopg2.connect(database='postgres')
conn.set_isolation_level(0)
with conn.cursor() as cur:
cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
cur.execute('CREATE DATABASE {}'.format(name))
conn.close()
monkeypatch.setenv('NOMINATIM_DATABASE_DSN' , 'dbname=' + name)
yield name
with psycopg2.connect(database='postgres') as conn:
conn.set_isolation_level(0)
with conn.cursor() as cur:
cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
conn = psycopg2.connect(database='postgres')
conn.set_isolation_level(0)
with conn.cursor() as cur:
cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
conn.close()
@pytest.fixture
def temp_db_with_extensions(temp_db):
conn = psycopg2.connect(database=temp_db)
with conn.cursor() as cur:
cur.execute('CREATE EXTENSION hstore; CREATE EXTENSION postgis;')
conn.commit()
conn.close()
return temp_db
@pytest.fixture
def temp_db_conn(temp_db):
""" Connection to the test database.
"""
conn = connection.connect('dbname=' + temp_db)
yield conn
conn.close()
@pytest.fixture
def temp_db_cursor(temp_db):
""" Connection and cursor towards the test database. The connection will
be in auto-commit mode.
"""
conn = psycopg2.connect('dbname=' + temp_db)
conn.set_isolation_level(0)
with conn.cursor(cursor_factory=_TestingCursor) as cur:
yield cur
conn.close()
@pytest.fixture
def def_config():
return Configuration(None, SRC_DIR.resolve() / 'settings')
@pytest.fixture
def status_table(temp_db_conn):
""" Create an empty version of the status table and
the status logging table.
"""
with temp_db_conn.cursor() as cur:
cur.execute("""CREATE TABLE import_status (
lastimportdate timestamp with time zone NOT NULL,
sequence_id integer,
indexed boolean
)""")
cur.execute("""CREATE TABLE import_osmosis_log (
batchend timestamp,
batchseq integer,
batchsize bigint,
starttime timestamp,
endtime timestamp,
event text
)""")
temp_db_conn.commit()
@pytest.fixture
def place_table(temp_db_with_extensions, temp_db_conn):
""" Create an empty version of the place table.
"""
with temp_db_conn.cursor() as cur:
cur.execute("""CREATE TABLE place (
osm_id int8 NOT NULL,
osm_type char(1) NOT NULL,
class text NOT NULL,
type text NOT NULL,
name hstore,
admin_level smallint,
address hstore,
extratags hstore,
geometry Geometry(Geometry,4326) NOT NULL)""")
temp_db_conn.commit()
@pytest.fixture
def place_row(place_table, temp_db_cursor):
""" A factory for rows in the place table. The table is created as a
prerequisite to the fixture.
"""
idseq = itertools.count(1001)
def _insert(osm_type='N', osm_id=None, cls='amenity', typ='cafe', names=None,
admin_level=None, address=None, extratags=None, geom=None):
temp_db_cursor.execute("INSERT INTO place VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
(osm_id or next(idseq), osm_type, cls, typ, names,
admin_level, address, extratags,
geom or 'SRID=4326;POINT(0 0 )'))
return _insert

View File

@ -1,10 +1,21 @@
"""
Tests for command line interface wrapper.
These tests just check that the various command line parameters route to the
correct functionionality. They use a lot of monkeypatching to avoid executing
the actual functions.
"""
import datetime as dt
import psycopg2
import pytest
import time
import nominatim.cli
import nominatim.indexer.indexer
import nominatim.tools.refresh
import nominatim.tools.replication
from nominatim.errors import UsageError
from nominatim.db import status
def call_nominatim(*args):
return nominatim.cli.nominatim(module_dir='build/module',
@ -18,9 +29,9 @@ class MockParamCapture:
""" Mock that records the parameters with which a function was called
as well as the number of calls.
"""
def __init__(self):
def __init__(self, retval=0):
self.called = 0
self.return_value = 0
self.return_value = retval
def __call__(self, *args, **kwargs):
self.called += 1
@ -54,7 +65,6 @@ def test_cli_help(capsys):
(('import', '--continue', 'load-data'), 'setup'),
(('freeze',), 'setup'),
(('special-phrases',), 'specialphrases'),
(('replication',), 'update'),
(('add-data', '--tiger-data', 'tiger'), 'setup'),
(('add-data', '--file', 'foo.osm'), 'update'),
(('check-database',), 'check_import_finished'),
@ -82,14 +92,12 @@ def test_add_data_command(mock_run_legacy, name, oid):
(['--boundaries-only'], 1, 0),
(['--no-boundaries'], 0, 1),
(['--boundaries-only', '--no-boundaries'], 0, 0)])
def test_index_command(monkeypatch, temp_db, params, do_bnds, do_ranks):
with psycopg2.connect(database=temp_db) as conn:
with conn.cursor() as cur:
cur.execute("CREATE TABLE import_status (indexed bool)")
def test_index_command(monkeypatch, temp_db_cursor, params, do_bnds, do_ranks):
temp_db_cursor.execute("CREATE TABLE import_status (indexed bool)")
bnd_mock = MockParamCapture()
monkeypatch.setattr(nominatim.cli.Indexer, 'index_boundaries', bnd_mock)
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', bnd_mock)
rank_mock = MockParamCapture()
monkeypatch.setattr(nominatim.cli.Indexer, 'index_by_rank', rank_mock)
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', rank_mock)
assert 0 == call_nominatim('index', *params)
@ -98,29 +106,115 @@ def test_index_command(monkeypatch, temp_db, params, do_bnds, do_ranks):
@pytest.mark.parametrize("command,params", [
('postcodes', ('update.php', '--calculate-postcodes')),
('word-counts', ('update.php', '--recompute-word-counts')),
('address-levels', ('update.php', '--update-address-levels')),
('functions', ('setup.php',)),
('wiki-data', ('setup.php', '--import-wikipedia-articles')),
('importance', ('update.php', '--recompute-importance')),
('website', ('setup.php', '--setup-website')),
])
def test_refresh_command(mock_run_legacy, command, params):
def test_refresh_legacy_command(mock_run_legacy, temp_db, command, params):
assert 0 == call_nominatim('refresh', '--' + command)
assert mock_run_legacy.called == 1
assert len(mock_run_legacy.last_args) >= len(params)
assert mock_run_legacy.last_args[:len(params)] == params
@pytest.mark.parametrize("command,func", [
('postcodes', 'update_postcodes'),
('word-counts', 'recompute_word_counts'),
('address-levels', 'load_address_levels_from_file'),
('functions', 'create_functions'),
])
def test_refresh_command(monkeypatch, temp_db, command, func):
func_mock = MockParamCapture()
monkeypatch.setattr(nominatim.tools.refresh, func, func_mock)
def test_refresh_importance_computed_after_wiki_import(mock_run_legacy):
assert 0 == call_nominatim('refresh', '--' + command)
assert func_mock.called == 1
def test_refresh_importance_computed_after_wiki_import(mock_run_legacy, temp_db):
assert 0 == call_nominatim('refresh', '--importance', '--wiki-data')
assert mock_run_legacy.called == 2
assert mock_run_legacy.last_args == ('update.php', '--recompute-importance')
@pytest.mark.parametrize("params,func", [
(('--init', '--no-update-functions'), 'init_replication'),
(('--check-for-updates',), 'check_for_updates')
])
def test_replication_command(monkeypatch, temp_db, params, func):
func_mock = MockParamCapture()
monkeypatch.setattr(nominatim.tools.replication, func, func_mock)
assert 0 == call_nominatim('replication', *params)
assert func_mock.called == 1
def test_replication_update_bad_interval(monkeypatch, temp_db):
monkeypatch.setenv('NOMINATIM_REPLICATION_UPDATE_INTERVAL', 'xx')
assert call_nominatim('replication') == 1
def test_replication_update_bad_interval_for_geofabrik(monkeypatch, temp_db):
monkeypatch.setenv('NOMINATIM_REPLICATION_URL',
'https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates')
assert call_nominatim('replication') == 1
@pytest.mark.parametrize("state, retval", [
(nominatim.tools.replication.UpdateState.UP_TO_DATE, 0),
(nominatim.tools.replication.UpdateState.NO_CHANGES, 3)
])
def test_replication_update_once_no_index(monkeypatch, temp_db, temp_db_conn,
status_table, state, retval):
status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
func_mock = MockParamCapture(retval=state)
monkeypatch.setattr(nominatim.tools.replication, 'update', func_mock)
assert retval == call_nominatim('replication', '--once', '--no-index')
def test_replication_update_continuous(monkeypatch, temp_db_conn, status_table):
status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
states = [nominatim.tools.replication.UpdateState.UP_TO_DATE,
nominatim.tools.replication.UpdateState.UP_TO_DATE]
monkeypatch.setattr(nominatim.tools.replication, 'update',
lambda *args, **kwargs: states.pop())
index_mock = MockParamCapture()
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', index_mock)
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', index_mock)
with pytest.raises(IndexError):
call_nominatim('replication')
assert index_mock.called == 4
def test_replication_update_continuous_no_change(monkeypatch, temp_db_conn, status_table):
status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
states = [nominatim.tools.replication.UpdateState.NO_CHANGES,
nominatim.tools.replication.UpdateState.UP_TO_DATE]
monkeypatch.setattr(nominatim.tools.replication, 'update',
lambda *args, **kwargs: states.pop())
index_mock = MockParamCapture()
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', index_mock)
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', index_mock)
sleep_mock = MockParamCapture()
monkeypatch.setattr(time, 'sleep', sleep_mock)
with pytest.raises(IndexError):
call_nominatim('replication')
assert index_mock.called == 2
assert sleep_mock.called == 1
assert sleep_mock.last_args[0] == 60
@pytest.mark.parametrize("params", [
('search', '--query', 'new'),
('reverse', '--lat', '0', '--lon', '0'),

View File

@ -7,6 +7,7 @@ import tempfile
import pytest
from nominatim.config import Configuration
from nominatim.errors import UsageError
DEFCFG_DIR = Path(__file__) / '..' / '..' / '..' / 'settings'
@ -15,6 +16,7 @@ def test_no_project_dir():
assert config.DATABASE_WEBUSER == 'www-data'
def test_prefer_project_setting_over_default():
with tempfile.TemporaryDirectory() as project_dir:
with open(project_dir + '/.env', 'w') as envfile:
@ -24,6 +26,7 @@ def test_prefer_project_setting_over_default():
assert config.DATABASE_WEBUSER == 'apache'
def test_prefer_os_environ_over_project_setting(monkeypatch):
with tempfile.TemporaryDirectory() as project_dir:
with open(project_dir + '/.env', 'w') as envfile:
@ -35,6 +38,7 @@ def test_prefer_os_environ_over_project_setting(monkeypatch):
assert config.DATABASE_WEBUSER == 'nobody'
def test_get_os_env_add_defaults(monkeypatch):
config = Configuration(None, DEFCFG_DIR)
@ -42,6 +46,7 @@ def test_get_os_env_add_defaults(monkeypatch):
assert config.get_os_env()['NOMINATIM_DATABASE_WEBUSER'] == 'www-data'
def test_get_os_env_prefer_os_environ(monkeypatch):
config = Configuration(None, DEFCFG_DIR)
@ -49,11 +54,13 @@ def test_get_os_env_prefer_os_environ(monkeypatch):
assert config.get_os_env()['NOMINATIM_DATABASE_WEBUSER'] == 'nobody'
def test_get_libpq_dsn_convert_default():
config = Configuration(None, DEFCFG_DIR)
assert config.get_libpq_dsn() == 'dbname=nominatim'
def test_get_libpq_dsn_convert_php(monkeypatch):
config = Configuration(None, DEFCFG_DIR)
@ -62,6 +69,19 @@ def test_get_libpq_dsn_convert_php(monkeypatch):
assert config.get_libpq_dsn() == 'dbname=gis password=foo host=localhost'
@pytest.mark.parametrize("val,expect", [('foo bar', "'foo bar'"),
("xy'z", "xy\\'z"),
])
def test_get_libpq_dsn_convert_php_special_chars(monkeypatch, val, expect):
config = Configuration(None, DEFCFG_DIR)
monkeypatch.setenv('NOMINATIM_DATABASE_DSN',
'pgsql:dbname=gis;password={}'.format(val))
assert config.get_libpq_dsn() == "dbname=gis password={}".format(expect)
def test_get_libpq_dsn_convert_libpq(monkeypatch):
config = Configuration(None, DEFCFG_DIR)
@ -69,3 +89,68 @@ def test_get_libpq_dsn_convert_libpq(monkeypatch):
'host=localhost dbname=gis password=foo')
assert config.get_libpq_dsn() == 'host=localhost dbname=gis password=foo'
@pytest.mark.parametrize("value,result",
[(x, True) for x in ('1', 'true', 'True', 'yes', 'YES')] +
[(x, False) for x in ('0', 'false', 'no', 'NO', 'x')])
def test_get_bool(monkeypatch, value, result):
config = Configuration(None, DEFCFG_DIR)
monkeypatch.setenv('NOMINATIM_FOOBAR', value)
assert config.get_bool('FOOBAR') == result
def test_get_bool_empty():
config = Configuration(None, DEFCFG_DIR)
assert config.DATABASE_MODULE_PATH == ''
assert config.get_bool('DATABASE_MODULE_PATH') == False
@pytest.mark.parametrize("value,result", [('0', 0), ('1', 1),
('85762513444', 85762513444)])
def test_get_int_success(monkeypatch, value, result):
config = Configuration(None, DEFCFG_DIR)
monkeypatch.setenv('NOMINATIM_FOOBAR', value)
assert config.get_int('FOOBAR') == result
@pytest.mark.parametrize("value", ['1b', 'fg', '0x23'])
def test_get_int_bad_values(monkeypatch, value):
config = Configuration(None, DEFCFG_DIR)
monkeypatch.setenv('NOMINATIM_FOOBAR', value)
with pytest.raises(UsageError):
config.get_int('FOOBAR')
def test_get_int_empty():
config = Configuration(None, DEFCFG_DIR)
assert config.DATABASE_MODULE_PATH == ''
with pytest.raises(UsageError):
config.get_int('DATABASE_MODULE_PATH')
def test_get_import_style_intern(monkeypatch):
config = Configuration(None, DEFCFG_DIR)
monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', 'street')
expected = DEFCFG_DIR / 'import-street.style'
assert config.get_import_style_file() == expected
@pytest.mark.parametrize("value", ['custom', '/foo/bar.stye'])
def test_get_import_style_intern(monkeypatch, value):
config = Configuration(None, DEFCFG_DIR)
monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', value)
assert str(config.get_import_style_file()) == value

View File

@ -0,0 +1,32 @@
"""
Tests for specialised conenction and cursor classes.
"""
import pytest
from nominatim.db.connection import connect
@pytest.fixture
def db(temp_db):
conn = connect('dbname=' + temp_db)
yield conn
conn.close()
def test_connection_table_exists(db, temp_db_cursor):
assert db.table_exists('foobar') == False
temp_db_cursor.execute('CREATE TABLE foobar (id INT)')
assert db.table_exists('foobar') == True
def test_cursor_scalar(db, temp_db_cursor):
temp_db_cursor.execute('CREATE TABLE dummy (id INT)')
with db.cursor() as cur:
assert cur.scalar('SELECT count(*) FROM dummy') == 0
def test_cursor_scalar_many_rows(db):
with db.cursor() as cur:
with pytest.raises(RuntimeError):
cur.scalar('SELECT * FROM pg_tables')

View File

@ -0,0 +1,114 @@
"""
Tests for status table manipulation.
"""
import datetime as dt
import pytest
import nominatim.db.status
from nominatim.errors import UsageError
def test_compute_database_date_place_empty(status_table, place_table, temp_db_conn):
with pytest.raises(UsageError):
nominatim.db.status.compute_database_date(temp_db_conn)
OSM_NODE_DATA = """\
<osm version="0.6" generator="OpenStreetMap server" copyright="OpenStreetMap and contributors" attribution="http://www.openstreetmap.org/copyright" license="http://opendatacommons.org/licenses/odbl/1-0/">
<node id="45673" visible="true" version="1" changeset="2047" timestamp="2006-01-27T22:09:10Z" user="Foo" uid="111" lat="48.7586670" lon="8.1343060">
</node>
</osm>
"""
def test_compute_database_date_valid(monkeypatch, status_table, place_row, temp_db_conn):
place_row(osm_type='N', osm_id=45673)
requested_url = []
def mock_url(url):
requested_url.append(url)
return OSM_NODE_DATA
monkeypatch.setattr(nominatim.db.status, "get_url", mock_url)
date = nominatim.db.status.compute_database_date(temp_db_conn)
assert requested_url == ['https://www.openstreetmap.org/api/0.6/node/45673/1']
assert date == dt.datetime.fromisoformat('2006-01-27T22:09:10').replace(tzinfo=dt.timezone.utc)
def test_compute_database_broken_api(monkeypatch, status_table, place_row, temp_db_conn):
place_row(osm_type='N', osm_id=45673)
requested_url = []
def mock_url(url):
requested_url.append(url)
return '<osm version="0.6" generator="OpenStre'
monkeypatch.setattr(nominatim.db.status, "get_url", mock_url)
with pytest.raises(UsageError):
date = nominatim.db.status.compute_database_date(temp_db_conn)
def test_set_status_empty_table(status_table, temp_db_conn, temp_db_cursor):
date = dt.datetime.fromordinal(1000000).replace(tzinfo=dt.timezone.utc)
nominatim.db.status.set_status(temp_db_conn, date=date)
temp_db_cursor.execute("SELECT * FROM import_status")
assert temp_db_cursor.rowcount == 1
assert temp_db_cursor.fetchone() == [date, None, True]
def test_set_status_filled_table(status_table, temp_db_conn, temp_db_cursor):
date = dt.datetime.fromordinal(1000000).replace(tzinfo=dt.timezone.utc)
nominatim.db.status.set_status(temp_db_conn, date=date)
assert 1 == temp_db_cursor.scalar("SELECT count(*) FROM import_status")
date = dt.datetime.fromordinal(1000100).replace(tzinfo=dt.timezone.utc)
nominatim.db.status.set_status(temp_db_conn, date=date, seq=456, indexed=False)
temp_db_cursor.execute("SELECT * FROM import_status")
assert temp_db_cursor.rowcount == 1
assert temp_db_cursor.fetchone() == [date, 456, False]
def test_get_status_empty_table(status_table, temp_db_conn):
assert nominatim.db.status.get_status(temp_db_conn) == (None, None, None)
def test_get_status_success(status_table, temp_db_conn):
date = dt.datetime.fromordinal(1000000).replace(tzinfo=dt.timezone.utc)
nominatim.db.status.set_status(temp_db_conn, date=date, seq=667, indexed=False)
assert nominatim.db.status.get_status(temp_db_conn) == \
(date, 667, False)
@pytest.mark.parametrize("old_state", [True, False])
@pytest.mark.parametrize("new_state", [True, False])
def test_set_indexed(status_table, temp_db_conn, temp_db_cursor, old_state, new_state):
date = dt.datetime.fromordinal(1000000).replace(tzinfo=dt.timezone.utc)
nominatim.db.status.set_status(temp_db_conn, date=date, indexed=old_state)
nominatim.db.status.set_indexed(temp_db_conn, new_state)
assert temp_db_cursor.scalar("SELECT indexed FROM import_status") == new_state
def test_set_indexed_empty_status(status_table, temp_db_conn, temp_db_cursor):
nominatim.db.status.set_indexed(temp_db_conn, True)
assert temp_db_cursor.scalar("SELECT count(*) FROM import_status") == 0
def text_log_status(status_table, temp_db_conn):
date = dt.datetime.fromordinal(1000000).replace(tzinfo=dt.timezone.utc)
start = dt.datetime.now() - dt.timedelta(hours=1)
nominatim.db.status.set_status(temp_db_conn, date=date, seq=56)
nominatim.db.status.log_status(temp_db_conn, start, 'index')
assert temp_db_cursor.scalar("SELECT count(*) FROM import_osmosis_log") == 1
assert temp_db_cursor.scalar("SELECT seq FROM import_osmosis_log") == 56
assert temp_db_cursor.scalar("SELECT date FROM import_osmosis_log") == date

View File

@ -0,0 +1,30 @@
"""
Tests for DB utility functions in db.utils
"""
import psycopg2
import pytest
import nominatim.db.utils as db_utils
def test_execute_file_success(temp_db_conn, tmp_path):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);')
db_utils.execute_file(temp_db_conn, tmpfile)
with temp_db_conn.cursor() as cur:
cur.execute('SELECT * FROM test')
assert cur.rowcount == 1
assert cur.fetchone()[0] == 56
def test_execute_file_bad_file(temp_db_conn, tmp_path):
with pytest.raises(FileNotFoundError):
db_utils.execute_file(temp_db_conn, tmp_path / 'test2.sql')
def test_execute_file_bad_sql(temp_db_conn, tmp_path):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE STABLE test (id INT)')
with pytest.raises(psycopg2.ProgrammingError):
db_utils.execute_file(temp_db_conn, tmpfile)

View File

@ -82,10 +82,8 @@ class IndexerTestDB:
@pytest.fixture
def test_db(temp_db):
conn = psycopg2.connect(database=temp_db)
yield IndexerTestDB(conn)
conn.close()
def test_db(temp_db_conn):
yield IndexerTestDB(temp_db_conn)
@pytest.mark.parametrize("threads", [1, 15])

View File

@ -7,7 +7,6 @@ import tempfile
import pytest
from nominatim.config import Configuration
import nominatim.tools.exec_utils as exec_utils
@pytest.fixture
@ -18,9 +17,9 @@ def tmp_phplib_dir():
yield Path(phpdir)
@pytest.fixture
def nominatim_env(tmp_phplib_dir):
def nominatim_env(tmp_phplib_dir, def_config):
class _NominatimEnv:
config = Configuration(None, Path(__file__) / '..' / '..' / '..' / 'settings')
config = def_config
phplib_dir = tmp_phplib_dir
data_dir = Path('data')
project_dir = Path('.')
@ -100,3 +99,12 @@ def test_run_api_with_extra_env(tmp_project_dir):
extra_env = dict(SCRIPT_FILENAME=str(tmp_project_dir / 'website' / 'test.php'))
assert 0 == exec_utils.run_api_script('badname', tmp_project_dir,
extra_env=extra_env)
### run_osm2pgsql
def test_run_osm2pgsql():
exec_utils.run_osm2pgsql(dict(osm2pgsql='echo', append=False, flatnode_file=None,
dsn='dbname=foobar', threads=1, osm2pgsql_cache=500,
osm2pgsql_style='./my.style',
import_file='foo.bar'))

View File

@ -0,0 +1,85 @@
"""
Tests for function for importing address ranks.
"""
import json
import pytest
from pathlib import Path
from nominatim.tools.refresh import load_address_levels, load_address_levels_from_file
def test_load_ranks_def_config(temp_db_conn, temp_db_cursor, def_config):
load_address_levels_from_file(temp_db_conn, Path(def_config.ADDRESS_LEVEL_CONFIG))
assert temp_db_cursor.scalar('SELECT count(*) FROM address_levels') > 0
def test_load_ranks_from_file(temp_db_conn, temp_db_cursor, tmp_path):
test_file = tmp_path / 'test_levels.json'
test_file.write_text('[{"tags":{"place":{"sea":2}}}]')
load_address_levels_from_file(temp_db_conn, test_file)
assert temp_db_cursor.scalar('SELECT count(*) FROM address_levels') > 0
def test_load_ranks_from_broken_file(temp_db_conn, tmp_path):
test_file = tmp_path / 'test_levels.json'
test_file.write_text('[{"tags":"place":{"sea":2}}}]')
with pytest.raises(json.decoder.JSONDecodeError):
load_address_levels_from_file(temp_db_conn, test_file)
def test_load_ranks_country(temp_db_conn, temp_db_cursor):
load_address_levels(temp_db_conn, 'levels',
[{"tags": {"place": {"village": 14}}},
{"countries": ['de'],
"tags": {"place": {"village": 15}}},
{"countries": ['uk', 'us' ],
"tags": {"place": {"village": 16}}}
])
assert temp_db_cursor.row_set('SELECT * FROM levels') == \
set([(None, 'place', 'village', 14, 14),
('de', 'place', 'village', 15, 15),
('uk', 'place', 'village', 16, 16),
('us', 'place', 'village', 16, 16),
])
def test_load_ranks_default_value(temp_db_conn, temp_db_cursor):
load_address_levels(temp_db_conn, 'levels',
[{"tags": {"boundary": {"": 28}}},
{"countries": ['hu'],
"tags": {"boundary": {"": 29}}}
])
assert temp_db_cursor.row_set('SELECT * FROM levels') == \
set([(None, 'boundary', None, 28, 28),
('hu', 'boundary', None, 29, 29),
])
def test_load_ranks_multiple_keys(temp_db_conn, temp_db_cursor):
load_address_levels(temp_db_conn, 'levels',
[{"tags":
{"place": {"city": 14},
"boundary": {"administrative2" : 4}}
}])
assert temp_db_cursor.row_set('SELECT * FROM levels') == \
set([(None, 'place', 'city', 14, 14),
(None, 'boundary', 'administrative2', 4, 4),
])
def test_load_ranks_address(temp_db_conn, temp_db_cursor):
load_address_levels(temp_db_conn, 'levels',
[{"tags":
{"place": {"city": 14,
"town" : [14, 13]}}
}])
assert temp_db_cursor.row_set('SELECT * FROM levels') == \
set([(None, 'place', 'city', 14, 14),
(None, 'place', 'town', 14, 13),
])

View File

@ -0,0 +1,99 @@
"""
Tests for creating PL/pgSQL functions for Nominatim.
"""
from pathlib import Path
import pytest
from nominatim.db.connection import connect
from nominatim.tools.refresh import _get_standard_function_sql, _get_partition_function_sql
SQL_DIR = (Path(__file__) / '..' / '..' / '..' / 'sql').resolve()
@pytest.fixture
def db(temp_db):
conn = connect('dbname=' + temp_db)
yield conn
conn.close()
@pytest.fixture
def db_with_tables(db):
with db.cursor() as cur:
for table in ('place', 'placex', 'location_postcode'):
cur.execute('CREATE TABLE {} (place_id BIGINT)'.format(table))
return db
def test_standard_functions_replace_module_default(db, def_config):
def_config.project_dir = Path('.')
sql = _get_standard_function_sql(db, def_config, SQL_DIR, False, False)
assert sql
assert sql.find('{modulepath}') < 0
assert sql.find("'{}'".format(Path('module/nominatim.so').resolve())) >= 0
def test_standard_functions_replace_module_custom(monkeypatch, db, def_config):
monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', 'custom')
sql = _get_standard_function_sql(db, def_config, SQL_DIR, False, False)
assert sql
assert sql.find('{modulepath}') < 0
assert sql.find("'custom/nominatim.so'") >= 0
@pytest.mark.parametrize("enabled", (True, False))
def test_standard_functions_enable_diff(db_with_tables, def_config, enabled):
def_config.project_dir = Path('.')
sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, enabled, False)
assert sql
assert (sql.find('%DIFFUPDATES%') < 0) == enabled
@pytest.mark.parametrize("enabled", (True, False))
def test_standard_functions_enable_debug(db_with_tables, def_config, enabled):
def_config.project_dir = Path('.')
sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, enabled)
assert sql
assert (sql.find('--DEBUG') < 0) == enabled
@pytest.mark.parametrize("enabled", (True, False))
def test_standard_functions_enable_limit_reindexing(monkeypatch, db_with_tables, def_config, enabled):
def_config.project_dir = Path('.')
monkeypatch.setenv('NOMINATIM_LIMIT_REINDEXING', 'yes' if enabled else 'no')
sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False)
assert sql
assert (sql.find('--LIMIT INDEXING') < 0) == enabled
@pytest.mark.parametrize("enabled", (True, False))
def test_standard_functions_enable_tiger(monkeypatch, db_with_tables, def_config, enabled):
def_config.project_dir = Path('.')
monkeypatch.setenv('NOMINATIM_USE_US_TIGER_DATA', 'yes' if enabled else 'no')
sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False)
assert sql
assert (sql.find('%NOTIGERDATA%') >= 0) == enabled
@pytest.mark.parametrize("enabled", (True, False))
def test_standard_functions_enable_aux(monkeypatch, db_with_tables, def_config, enabled):
def_config.project_dir = Path('.')
monkeypatch.setenv('NOMINATIM_USE_AUX_LOCATION_DATA', 'yes' if enabled else 'no')
sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False)
assert sql
assert (sql.find('%NOAUXDATA%') >= 0) == enabled
def test_partition_function(temp_db_cursor, db, def_config):
temp_db_cursor.execute("CREATE TABLE country_name (partition SMALLINT)")
sql = _get_partition_function_sql(db, SQL_DIR)
assert sql
assert sql.find('-partition-') < 0

View File

@ -0,0 +1,138 @@
"""
Tests for replication functionality.
"""
import datetime as dt
import time
import pytest
from osmium.replication.server import OsmosisState
import nominatim.tools.replication
import nominatim.db.status as status
from nominatim.errors import UsageError
OSM_NODE_DATA = """\
<osm version="0.6" generator="OpenStreetMap server" copyright="OpenStreetMap and contributors" attribution="http://www.openstreetmap.org/copyright" license="http://opendatacommons.org/licenses/odbl/1-0/">
<node id="100" visible="true" version="1" changeset="2047" timestamp="2006-01-27T22:09:10Z" user="Foo" uid="111" lat="48.7586670" lon="8.1343060">
</node>
</osm>
"""
### init replication
def test_init_replication_bad_base_url(monkeypatch, status_table, place_row, temp_db_conn, temp_db_cursor):
place_row(osm_type='N', osm_id=100)
monkeypatch.setattr(nominatim.db.status, "get_url", lambda u : OSM_NODE_DATA)
with pytest.raises(UsageError, match="Failed to reach replication service"):
nominatim.tools.replication.init_replication(temp_db_conn, 'https://test.io')
def test_init_replication_success(monkeypatch, status_table, place_row, temp_db_conn, temp_db_cursor):
place_row(osm_type='N', osm_id=100)
monkeypatch.setattr(nominatim.db.status, "get_url", lambda u : OSM_NODE_DATA)
monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
"timestamp_to_sequence",
lambda self, date: 234)
nominatim.tools.replication.init_replication(temp_db_conn, 'https://test.io')
temp_db_cursor.execute("SELECT * FROM import_status")
expected_date = dt.datetime.fromisoformat('2006-01-27T19:09:10').replace(tzinfo=dt.timezone.utc)
assert temp_db_cursor.rowcount == 1
assert temp_db_cursor.fetchone() == [expected_date, 234, True]
### checking for updates
def test_check_for_updates_empty_status_table(status_table, temp_db_conn):
assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == 254
def test_check_for_updates_seq_not_set(status_table, temp_db_conn):
status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc))
assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == 254
def test_check_for_updates_no_state(monkeypatch, status_table, temp_db_conn):
status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc), seq=345)
monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
"get_state_info", lambda self: None)
assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == 253
@pytest.mark.parametrize("server_sequence,result", [(344, 2), (345, 2), (346, 0)])
def test_check_for_updates_no_new_data(monkeypatch, status_table, temp_db_conn,
server_sequence, result):
date = dt.datetime.now(dt.timezone.utc)
status.set_status(temp_db_conn, date, seq=345)
monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
"get_state_info",
lambda self: OsmosisState(server_sequence, date))
assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == result
### updating
@pytest.fixture
def update_options(tmpdir):
return dict(base_url='https://test.io',
indexed_only=False,
update_interval=3600,
import_file=tmpdir / 'foo.osm',
max_diff_size=1)
def test_update_empty_status_table(status_table, temp_db_conn):
with pytest.raises(UsageError):
nominatim.tools.replication.update(temp_db_conn, {})
def test_update_already_indexed(status_table, temp_db_conn):
status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc), seq=34, indexed=False)
assert nominatim.tools.replication.update(temp_db_conn, dict(indexed_only=True)) \
== nominatim.tools.replication.UpdateState.MORE_PENDING
def test_update_no_data_no_sleep(monkeypatch, status_table, temp_db_conn, update_options):
date = dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=1)
status.set_status(temp_db_conn, date, seq=34)
monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
"apply_diffs",
lambda *args, **kwargs: None)
sleeptime = []
monkeypatch.setattr(time, 'sleep', lambda s: sleeptime.append(s))
assert nominatim.tools.replication.update(temp_db_conn, update_options) \
== nominatim.tools.replication.UpdateState.NO_CHANGES
assert not sleeptime
def test_update_no_data_sleep(monkeypatch, status_table, temp_db_conn, update_options):
date = dt.datetime.now(dt.timezone.utc) - dt.timedelta(minutes=30)
status.set_status(temp_db_conn, date, seq=34)
monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
"apply_diffs",
lambda *args, **kwargs: None)
sleeptime = []
monkeypatch.setattr(time, 'sleep', lambda s: sleeptime.append(s))
assert nominatim.tools.replication.update(temp_db_conn, update_options) \
== nominatim.tools.replication.UpdateState.NO_CHANGES
assert len(sleeptime) == 1
assert sleeptime[0] < 3600
assert sleeptime[0] > 0

View File

@ -1,24 +0,0 @@
#!/usr/bin/env python3
import sys
from osmium.replication import server
if __name__ == '__main__':
if len(sys.argv) != 3:
print("Usage: python check_server_for_updates.py <server url> <sequence id>")
sys.exit(254)
seqid = int(sys.argv[2])
state = server.ReplicationServer(sys.argv[1]).get_state_info()
if state is None:
print("ERROR: Cannot get state from URL %s." % (sys.argv[1], ))
sys.exit(253)
if state.sequence <= seqid:
print("Database up to date.")
sys.exit(1)
print("New data available (%i => %i)." % (seqid, state.sequence))
sys.exit(0)

View File

@ -1,34 +0,0 @@
#!/usr/bin/env python3
import osmium
import sys
import datetime
class Datecounter(osmium.SimpleHandler):
filedate = None
def date(self, o):
ts = o.timestamp
if self.filedate is None or ts > self.filedate:
self.filedate = ts
node = date
way = date
relation = date
if __name__ == '__main__':
if len(sys.argv) != 2:
print("Usage: python osm_file_date.py <osmfile>")
sys.exit(-1)
h = Datecounter()
h.apply_file(sys.argv[1])
if h.filedate is None:
exit(5)
print(h.filedate)