mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-10 01:29:28 +03:00
Merge pull request #2155 from lonvia/port-regresh-to-python
Port replication and part of the refrsh function to native Python
This commit is contained in:
commit
0ad1b28497
2
.github/workflows/ci-tests.yml
vendored
2
.github/workflows/ci-tests.yml
vendored
@ -52,7 +52,7 @@ jobs:
|
||||
run: phpcs --report-width=120 .
|
||||
|
||||
- name: Python linting
|
||||
run: pylint nominatim
|
||||
run: pylint --extension-pkg-whitelist=osmium nominatim
|
||||
|
||||
- name: PHP unit tests
|
||||
run: phpunit ./
|
||||
|
@ -191,7 +191,7 @@ if (BUILD_TESTS)
|
||||
if (PYLINT)
|
||||
message(STATUS "Using pylint binary ${PYLINT}")
|
||||
add_test(NAME pylint
|
||||
COMMAND ${PYLINT} nominatim
|
||||
COMMAND ${PYLINT} --extension-pkg-whitelist=osmium nominatim
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
else()
|
||||
message(WARNING "pylint not found. Python linting tests disabled.")
|
||||
|
@ -53,7 +53,7 @@ The coding style is enforced with PHPCS and pylint. It can be tested with:
|
||||
|
||||
```
|
||||
phpcs --report-width=120 --colors .
|
||||
pylint3 nominatim
|
||||
pylint3 --extension-pkg-whitelist=osmium nominatim
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
@ -19,15 +19,6 @@ Run (as the same user who will later run the updates):
|
||||
pip3 install --user osmium
|
||||
```
|
||||
|
||||
Nominatim needs a tool called `pyosmium-get-changes` which comes with
|
||||
Pyosmium. You need to tell Nominatim where to find it. Add the
|
||||
following line to your `.env`:
|
||||
|
||||
NOMINATIM_PYOSMIUM_BINARY=/home/user/.local/bin/pyosmium-get-changes
|
||||
|
||||
The path above is fine if you used the `--user` parameter with pip.
|
||||
Replace `user` with your user name.
|
||||
|
||||
#### Setting up the update process
|
||||
|
||||
Next the update needs to be initialised. By default Nominatim is configured
|
||||
|
@ -4,7 +4,6 @@
|
||||
require_once(CONST_LibDir.'/init-cmd.php');
|
||||
require_once(CONST_LibDir.'/setup_functions.php');
|
||||
require_once(CONST_LibDir.'/setup/SetupClass.php');
|
||||
require_once(CONST_LibDir.'/setup/AddressLevelParser.php');
|
||||
|
||||
ini_set('memory_limit', '800M');
|
||||
|
||||
@ -104,99 +103,27 @@ if ($fPostgresVersion >= 11.0) {
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
$oIndexCmd = (new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
|
||||
->addParams('index');
|
||||
$oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
|
||||
if ($aResult['quiet']) {
|
||||
$oIndexCmd->addParams('--quiet');
|
||||
$oNominatimCmd->addParams('--quiet');
|
||||
}
|
||||
if ($aResult['verbose']) {
|
||||
$oIndexCmd->addParams('--verbose');
|
||||
$oNominatimCmd->addParams('--verbose');
|
||||
}
|
||||
|
||||
$sPyosmiumBin = getSetting('PYOSMIUM_BINARY');
|
||||
$sBaseURL = getSetting('REPLICATION_URL');
|
||||
|
||||
|
||||
if ($aResult['init-updates']) {
|
||||
// sanity check that the replication URL is correct
|
||||
$sBaseState = file_get_contents($sBaseURL.'/state.txt');
|
||||
if ($sBaseState === false) {
|
||||
echo "\nCannot find state.txt file at the configured replication URL.\n";
|
||||
echo "Does the URL point to a directory containing OSM update data?\n\n";
|
||||
fail('replication URL not reachable.');
|
||||
}
|
||||
// sanity check for pyosmium-get-changes
|
||||
if (!$sPyosmiumBin) {
|
||||
echo "\nNOMINATIM_PYOSMIUM_BINARY not configured.\n";
|
||||
echo "You need to install pyosmium and set up the path to pyosmium-get-changes\n";
|
||||
echo "in your local .env file.\n\n";
|
||||
fail('NOMINATIM_PYOSMIUM_BINARY not configured');
|
||||
$oCmd = (clone($oNominatimCmd))->addParams('replication', '--init');
|
||||
|
||||
if ($aResult['no-update-functions']) {
|
||||
$oCmd->addParams('--no-update-functions');
|
||||
}
|
||||
|
||||
$aOutput = 0;
|
||||
$oCMD = new \Nominatim\Shell($sPyosmiumBin, '--help');
|
||||
exec($oCMD->escapedCmd(), $aOutput, $iRet);
|
||||
|
||||
if ($iRet != 0) {
|
||||
echo "Cannot execute pyosmium-get-changes.\n";
|
||||
echo "Make sure you have pyosmium installed correctly\n";
|
||||
echo "and have set up NOMINATIM_PYOSMIUM_BINARY to point to pyosmium-get-changes.\n";
|
||||
fail('pyosmium-get-changes not found or not usable');
|
||||
}
|
||||
|
||||
if (!$aResult['no-update-functions']) {
|
||||
// instantiate setupClass to use the function therein
|
||||
$cSetup = new SetupFunctions(array(
|
||||
'enable-diff-updates' => true,
|
||||
'verbose' => $aResult['verbose']
|
||||
));
|
||||
$cSetup->createFunctions();
|
||||
}
|
||||
|
||||
$sDatabaseDate = getDatabaseDate($oDB);
|
||||
if (!$sDatabaseDate) {
|
||||
fail('Cannot determine date of database.');
|
||||
}
|
||||
$sWindBack = strftime('%Y-%m-%dT%H:%M:%SZ', strtotime($sDatabaseDate) - (3*60*60));
|
||||
|
||||
// get the appropriate state id
|
||||
$aOutput = 0;
|
||||
$oCMD = (new \Nominatim\Shell($sPyosmiumBin))
|
||||
->addParams('--start-date', $sWindBack)
|
||||
->addParams('--server', $sBaseURL);
|
||||
|
||||
exec($oCMD->escapedCmd(), $aOutput, $iRet);
|
||||
if ($iRet != 0 || $aOutput[0] == 'None') {
|
||||
fail('Error running pyosmium tools');
|
||||
}
|
||||
|
||||
$oDB->exec('TRUNCATE import_status');
|
||||
$sSQL = "INSERT INTO import_status (lastimportdate, sequence_id, indexed) VALUES('";
|
||||
$sSQL .= $sDatabaseDate."',".$aOutput[0].', true)';
|
||||
|
||||
try {
|
||||
$oDB->exec($sSQL);
|
||||
} catch (\Nominatim\DatabaseError $e) {
|
||||
fail('Could not enter sequence into database.');
|
||||
}
|
||||
|
||||
echo "Done. Database updates will start at sequence $aOutput[0] ($sWindBack)\n";
|
||||
$oCmd->run();
|
||||
}
|
||||
|
||||
if ($aResult['check-for-updates']) {
|
||||
$aLastState = $oDB->getRow('SELECT sequence_id FROM import_status');
|
||||
|
||||
if (!$aLastState['sequence_id']) {
|
||||
fail('Updates not set up. Please run ./utils/update.php --init-updates.');
|
||||
}
|
||||
|
||||
$oCmd = (new \Nominatim\Shell(CONST_BinDir.'/check_server_for_updates.py'))
|
||||
->addParams($sBaseURL)
|
||||
->addParams($aLastState['sequence_id']);
|
||||
$iRet = $oCmd->run();
|
||||
|
||||
exit($iRet);
|
||||
exit((clone($oNominatimCmd))->addParams('replication', '--check-for-updates')->run());
|
||||
}
|
||||
|
||||
if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
|
||||
@ -220,9 +147,7 @@ if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
|
||||
}
|
||||
|
||||
if ($aResult['calculate-postcodes']) {
|
||||
info('Update postcodes centroids');
|
||||
$sTemplate = file_get_contents(CONST_DataDir.'/sql/update-postcodes.sql');
|
||||
runSQLScript($sTemplate, true, true);
|
||||
(clone($oNominatimCmd))->addParams('refresh', '--postcodes')->run();
|
||||
}
|
||||
|
||||
$sTemporaryFile = CONST_InstallDir.'/osmosischange.osc';
|
||||
@ -271,22 +196,18 @@ if ($bHaveDiff) {
|
||||
}
|
||||
|
||||
if ($aResult['recompute-word-counts']) {
|
||||
info('Recompute frequency of full-word search terms');
|
||||
$sTemplate = file_get_contents(CONST_DataDir.'/sql/words_from_search_name.sql');
|
||||
runSQLScript($sTemplate, true, true);
|
||||
(clone($oNominatimCmd))->addParams('refresh', '--word-counts')->run();
|
||||
}
|
||||
|
||||
if ($aResult['index']) {
|
||||
$oCmd = (clone $oIndexCmd)
|
||||
->addParams('--minrank', $aResult['index-rank']);
|
||||
$oCmd->run();
|
||||
(clone $oNominatimCmd)
|
||||
->addParams('index', '--minrank', $aResult['index-rank'])
|
||||
->addParams('--threads', $aResult['index-instances'])
|
||||
->run();
|
||||
}
|
||||
|
||||
if ($aResult['update-address-levels']) {
|
||||
$sAddressLevelConfig = getSettingConfig('ADDRESS_LEVEL_CONFIG', 'address-levels.json');
|
||||
echo 'Updating address levels from '.$sAddressLevelConfig.".\n";
|
||||
$oAlParser = new \Nominatim\Setup\AddressLevelParser($sAddressLevelConfig);
|
||||
$oAlParser->createTable($oDB, 'address_levels');
|
||||
(clone($oNominatimCmd))->addParams('refresh', '--address-levels')->run();
|
||||
}
|
||||
|
||||
if ($aResult['recompute-importance']) {
|
||||
@ -307,145 +228,17 @@ if ($aResult['recompute-importance']) {
|
||||
}
|
||||
|
||||
if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
|
||||
//
|
||||
if (strpos($sBaseURL, 'download.geofabrik.de') !== false && getSetting('REPLICATION_UPDATE_INTERVAL') < 86400) {
|
||||
fail('Error: Update interval too low for download.geofabrik.de. ' .
|
||||
"Please check install documentation (https://nominatim.org/release-docs/latest/admin/Import-and-Update#setting-up-the-update-process)\n");
|
||||
$oCmd = (clone($oNominatimCmd))
|
||||
->addParams('replication')
|
||||
->addParams('--threads', $aResult['index-instances']);
|
||||
|
||||
if (!$aResult['import-osmosis-all']) {
|
||||
$oCmd->addParams('--once');
|
||||
}
|
||||
|
||||
$sImportFile = CONST_InstallDir.'/osmosischange.osc';
|
||||
|
||||
$oCMDDownload = (new \Nominatim\Shell($sPyosmiumBin))
|
||||
->addParams('--server', $sBaseURL)
|
||||
->addParams('--outfile', $sImportFile)
|
||||
->addParams('--size', getSetting('REPLICATION_MAX_DIFF'));
|
||||
|
||||
$oCMDImport = (clone $oOsm2pgsqlCmd)->addParams($sImportFile);
|
||||
|
||||
while (true) {
|
||||
$fStartTime = time();
|
||||
$aLastState = $oDB->getRow('SELECT *, EXTRACT (EPOCH FROM lastimportdate) as unix_ts FROM import_status');
|
||||
|
||||
if (!$aLastState['sequence_id']) {
|
||||
echo "Updates not set up. Please run ./utils/update.php --init-updates.\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
echo 'Currently at sequence '.$aLastState['sequence_id'].' ('.$aLastState['lastimportdate'].') - '.$aLastState['indexed']." indexed\n";
|
||||
|
||||
$sBatchEnd = $aLastState['lastimportdate'];
|
||||
$iEndSequence = $aLastState['sequence_id'];
|
||||
|
||||
if ($aLastState['indexed']) {
|
||||
// Sleep if the update interval has not yet been reached.
|
||||
$fNextUpdate = $aLastState['unix_ts'] + getSetting('REPLICATION_UPDATE_INTERVAL');
|
||||
if ($fNextUpdate > $fStartTime) {
|
||||
$iSleepTime = $fNextUpdate - $fStartTime;
|
||||
echo "Waiting for next update for $iSleepTime sec.";
|
||||
sleep($iSleepTime);
|
||||
}
|
||||
|
||||
// Download the next batch of changes.
|
||||
do {
|
||||
$fCMDStartTime = time();
|
||||
$iNextSeq = (int) $aLastState['sequence_id'];
|
||||
unset($aOutput);
|
||||
|
||||
$oCMD = (clone $oCMDDownload)->addParams('--start-id', $iNextSeq);
|
||||
echo $oCMD->escapedCmd()."\n";
|
||||
if (file_exists($sImportFile)) {
|
||||
unlink($sImportFile);
|
||||
}
|
||||
exec($oCMD->escapedCmd(), $aOutput, $iResult);
|
||||
|
||||
if ($iResult == 3) {
|
||||
$sSleep = getSetting('REPLICATION_RECHECK_INTERVAL');
|
||||
echo 'No new updates. Sleeping for '.$sSleep." sec.\n";
|
||||
sleep($sSleep);
|
||||
} elseif ($iResult != 0) {
|
||||
echo 'ERROR: updates failed.';
|
||||
exit($iResult);
|
||||
} else {
|
||||
$iEndSequence = (int)$aOutput[0];
|
||||
}
|
||||
} while ($iResult);
|
||||
|
||||
// get the newest object from the diff file
|
||||
$sBatchEnd = 0;
|
||||
$iRet = 0;
|
||||
$oCMD = new \Nominatim\Shell(CONST_BinDir.'/osm_file_date.py', $sImportFile);
|
||||
exec($oCMD->escapedCmd(), $sBatchEnd, $iRet);
|
||||
if ($iRet == 5) {
|
||||
echo "Diff file is empty. skipping import.\n";
|
||||
if (!$aResult['import-osmosis-all']) {
|
||||
exit(0);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if ($iRet != 0) {
|
||||
fail('Error getting date from diff file.');
|
||||
}
|
||||
$sBatchEnd = $sBatchEnd[0];
|
||||
|
||||
// Import the file
|
||||
$fCMDStartTime = time();
|
||||
|
||||
|
||||
echo $oCMDImport->escapedCmd()."\n";
|
||||
unset($sJunk);
|
||||
$iErrorLevel = $oCMDImport->run();
|
||||
if ($iErrorLevel) {
|
||||
echo "Error executing osm2pgsql: $iErrorLevel\n";
|
||||
exit($iErrorLevel);
|
||||
}
|
||||
|
||||
// write the update logs
|
||||
$iFileSize = filesize($sImportFile);
|
||||
$sSQL = 'INSERT INTO import_osmosis_log';
|
||||
$sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)';
|
||||
$sSQL .= " values ('$sBatchEnd',$iEndSequence,$iFileSize,'";
|
||||
$sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
|
||||
$sSQL .= date('Y-m-d H:i:s')."','import')";
|
||||
var_Dump($sSQL);
|
||||
$oDB->exec($sSQL);
|
||||
|
||||
// update the status
|
||||
$sSQL = "UPDATE import_status SET lastimportdate = '$sBatchEnd', indexed=false, sequence_id = $iEndSequence";
|
||||
var_Dump($sSQL);
|
||||
$oDB->exec($sSQL);
|
||||
echo date('Y-m-d H:i:s')." Completed download step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
|
||||
}
|
||||
|
||||
// Index file
|
||||
if (!$aResult['no-index']) {
|
||||
$fCMDStartTime = time();
|
||||
|
||||
$oThisIndexCmd = clone($oIndexCmd);
|
||||
echo $oThisIndexCmd->escapedCmd()."\n";
|
||||
$iErrorLevel = $oThisIndexCmd->run();
|
||||
if ($iErrorLevel) {
|
||||
echo "Error: $iErrorLevel\n";
|
||||
exit($iErrorLevel);
|
||||
}
|
||||
|
||||
$sSQL = 'INSERT INTO import_osmosis_log';
|
||||
$sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)';
|
||||
$sSQL .= " values ('$sBatchEnd',$iEndSequence,NULL,'";
|
||||
$sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
|
||||
$sSQL .= date('Y-m-d H:i:s')."','index')";
|
||||
var_Dump($sSQL);
|
||||
$oDB->exec($sSQL);
|
||||
echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
|
||||
} else {
|
||||
if ($aResult['import-osmosis-all']) {
|
||||
echo "Error: --no-index cannot be used with continuous imports (--import-osmosis-all).\n";
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
$fDuration = time() - $fStartTime;
|
||||
echo date('Y-m-d H:i:s')." Completed all for $sBatchEnd in ".round($fDuration/60, 2)." minutes\n";
|
||||
if (!$aResult['import-osmosis-all']) exit(0);
|
||||
if ($aResult['no-index']) {
|
||||
$oCmd->addParams('--no-index');
|
||||
}
|
||||
|
||||
exit($oCmd->run());
|
||||
}
|
||||
|
@ -1,98 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Nominatim\Setup;
|
||||
|
||||
/**
|
||||
* Parses an address level description.
|
||||
*/
|
||||
class AddressLevelParser
|
||||
{
|
||||
private $aLevels;
|
||||
|
||||
public function __construct($sDescriptionFile)
|
||||
{
|
||||
$sJson = file_get_contents($sDescriptionFile);
|
||||
$this->aLevels = json_decode($sJson, true);
|
||||
if (!$this->aLevels) {
|
||||
switch (json_last_error()) {
|
||||
case JSON_ERROR_NONE:
|
||||
break;
|
||||
case JSON_ERROR_DEPTH:
|
||||
fail('JSON error - Maximum stack depth exceeded');
|
||||
break;
|
||||
case JSON_ERROR_STATE_MISMATCH:
|
||||
fail('JSON error - Underflow or the modes mismatch');
|
||||
break;
|
||||
case JSON_ERROR_CTRL_CHAR:
|
||||
fail('JSON error - Unexpected control character found');
|
||||
break;
|
||||
case JSON_ERROR_SYNTAX:
|
||||
fail('JSON error - Syntax error, malformed JSON');
|
||||
break;
|
||||
case JSON_ERROR_UTF8:
|
||||
fail('JSON error - Malformed UTF-8 characters, possibly incorrectly encoded');
|
||||
break;
|
||||
default:
|
||||
fail('JSON error - Unknown error');
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Dump the description into a database table.
|
||||
*
|
||||
* @param object $oDB Database conneciton to use.
|
||||
* @param string $sTable Name of table to create.
|
||||
*
|
||||
* @return null
|
||||
*
|
||||
* A new table is created. Any previously existing table is dropped.
|
||||
* The table has the following columns:
|
||||
* country, class, type, rank_search, rank_address.
|
||||
*/
|
||||
public function createTable($oDB, $sTable)
|
||||
{
|
||||
$oDB->exec('DROP TABLE IF EXISTS '.$sTable);
|
||||
$sSql = 'CREATE TABLE '.$sTable;
|
||||
$sSql .= '(country_code varchar(2), class TEXT, type TEXT,';
|
||||
$sSql .= ' rank_search SMALLINT, rank_address SMALLINT)';
|
||||
$oDB->exec($sSql);
|
||||
|
||||
$sSql = 'CREATE UNIQUE INDEX ON '.$sTable.' (country_code, class, type)';
|
||||
$oDB->exec($sSql);
|
||||
|
||||
$sSql = 'INSERT INTO '.$sTable.' VALUES ';
|
||||
foreach ($this->aLevels as $aLevel) {
|
||||
$aCountries = array();
|
||||
if (isset($aLevel['countries'])) {
|
||||
foreach ($aLevel['countries'] as $sCountry) {
|
||||
$aCountries[$sCountry] = $oDB->getDBQuoted($sCountry);
|
||||
}
|
||||
} else {
|
||||
$aCountries['NULL'] = 'NULL';
|
||||
}
|
||||
foreach ($aLevel['tags'] as $sKey => $aValues) {
|
||||
foreach ($aValues as $sValue => $mRanks) {
|
||||
$aFields = array(
|
||||
$oDB->getDBQuoted($sKey),
|
||||
$sValue ? $oDB->getDBQuoted($sValue) : 'NULL'
|
||||
);
|
||||
if (is_array($mRanks)) {
|
||||
$aFields[] = (string) $mRanks[0];
|
||||
$aFields[] = (string) $mRanks[1];
|
||||
} else {
|
||||
$aFields[] = (string) $mRanks;
|
||||
$aFields[] = (string) $mRanks;
|
||||
}
|
||||
$sLine = ','.join(',', $aFields).'),';
|
||||
|
||||
foreach ($aCountries as $sCountries) {
|
||||
$sSql .= '('.$sCountries.$sLine;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
$oDB->exec(rtrim($sSql, ','));
|
||||
}
|
||||
}
|
@ -2,7 +2,6 @@
|
||||
|
||||
namespace Nominatim\Setup;
|
||||
|
||||
require_once(CONST_LibDir.'/setup/AddressLevelParser.php');
|
||||
require_once(CONST_LibDir.'/Shell.php');
|
||||
|
||||
class SetupFunctions
|
||||
@ -19,6 +18,7 @@ class SetupFunctions
|
||||
protected $bNoPartitions;
|
||||
protected $bDrop;
|
||||
protected $oDB = null;
|
||||
protected $oNominatimCmd;
|
||||
|
||||
public function __construct(array $aCMDResult)
|
||||
{
|
||||
@ -81,6 +81,14 @@ class SetupFunctions
|
||||
}
|
||||
|
||||
$this->bDrop = isset($aCMDResult['drop']) && $aCMDResult['drop'];
|
||||
|
||||
$this->oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
|
||||
if ($this->bQuiet) {
|
||||
$this->oNominatimCmd->addParams('--quiet');
|
||||
}
|
||||
if ($this->bVerbose) {
|
||||
$this->oNominatimCmd->addParams('--verbose');
|
||||
}
|
||||
}
|
||||
|
||||
public function createDB()
|
||||
@ -256,8 +264,7 @@ class SetupFunctions
|
||||
$this->dropTable('search_name');
|
||||
}
|
||||
|
||||
$oAlParser = new AddressLevelParser(getSettingConfig('ADDRESS_LEVEL_CONFIG', 'address-levels.json'));
|
||||
$oAlParser->createTable($this->db(), 'address_levels');
|
||||
(clone($this->oNominatimCmd))->addParams('refresh', '--address-levels')->run();
|
||||
}
|
||||
|
||||
public function createTableTriggers()
|
||||
@ -283,9 +290,7 @@ class SetupFunctions
|
||||
public function createPartitionFunctions()
|
||||
{
|
||||
info('Create Partition Functions');
|
||||
|
||||
$sTemplate = file_get_contents(CONST_DataDir.'/sql/partition-functions.src.sql');
|
||||
$this->pgsqlRunPartitionScript($sTemplate);
|
||||
$this->createSqlFunctions(); // also create partition functions
|
||||
}
|
||||
|
||||
public function importWikipediaArticles()
|
||||
@ -549,19 +554,10 @@ class SetupFunctions
|
||||
{
|
||||
$this->checkModulePresence(); // raises exception on failure
|
||||
|
||||
$oBaseCmd = (new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
|
||||
->addParams('index');
|
||||
|
||||
if ($this->bQuiet) {
|
||||
$oBaseCmd->addParams('-q');
|
||||
}
|
||||
if ($this->bVerbose) {
|
||||
$oBaseCmd->addParams('-v');
|
||||
}
|
||||
$oBaseCmd = (clone $this->oNominatimCmd)->addParams('index');
|
||||
|
||||
info('Index ranks 0 - 4');
|
||||
$oCmd = (clone $oBaseCmd)->addParams('--maxrank', 4);
|
||||
echo $oCmd->escapedCmd();
|
||||
|
||||
$iStatus = $oCmd->run();
|
||||
if ($iStatus != 0) {
|
||||
@ -790,43 +786,18 @@ class SetupFunctions
|
||||
|
||||
private function createSqlFunctions()
|
||||
{
|
||||
$sBasePath = CONST_DataDir.'/sql/functions/';
|
||||
$sTemplate = file_get_contents($sBasePath.'utils.sql');
|
||||
$sTemplate .= file_get_contents($sBasePath.'normalization.sql');
|
||||
$sTemplate .= file_get_contents($sBasePath.'ranking.sql');
|
||||
$sTemplate .= file_get_contents($sBasePath.'importance.sql');
|
||||
$sTemplate .= file_get_contents($sBasePath.'address_lookup.sql');
|
||||
$sTemplate .= file_get_contents($sBasePath.'interpolation.sql');
|
||||
if ($this->db()->tableExists('place')) {
|
||||
$sTemplate .= file_get_contents($sBasePath.'place_triggers.sql');
|
||||
}
|
||||
if ($this->db()->tableExists('placex')) {
|
||||
$sTemplate .= file_get_contents($sBasePath.'placex_triggers.sql');
|
||||
}
|
||||
if ($this->db()->tableExists('location_postcode')) {
|
||||
$sTemplate .= file_get_contents($sBasePath.'postcode_triggers.sql');
|
||||
}
|
||||
$sTemplate = str_replace('{modulepath}', $this->sModulePath, $sTemplate);
|
||||
if ($this->bEnableDiffUpdates) {
|
||||
$sTemplate = str_replace('RETURN NEW; -- %DIFFUPDATES%', '--', $sTemplate);
|
||||
$oCmd = (clone($this->oNominatimCmd))
|
||||
->addParams('refresh', '--functions');
|
||||
|
||||
if (!$this->bEnableDiffUpdates) {
|
||||
$oCmd->addParams('--no-diff-updates');
|
||||
}
|
||||
|
||||
if ($this->bEnableDebugStatements) {
|
||||
$sTemplate = str_replace('--DEBUG:', '', $sTemplate);
|
||||
}
|
||||
if (getSettingBool('LIMIT_REINDEXING')) {
|
||||
$sTemplate = str_replace('--LIMIT INDEXING:', '', $sTemplate);
|
||||
}
|
||||
if (!getSettingBool('USE_US_TIGER_DATA')) {
|
||||
$sTemplate = str_replace('-- %NOTIGERDATA% ', '', $sTemplate);
|
||||
}
|
||||
if (!getSettingBool('USE_AUX_LOCATION_DATA')) {
|
||||
$sTemplate = str_replace('-- %NOAUXDATA% ', '', $sTemplate);
|
||||
$oCmd->addParams('--enable-debug-statements');
|
||||
}
|
||||
|
||||
$sReverseOnly = $this->dbReverseOnly() ? 'true' : 'false';
|
||||
$sTemplate = str_replace('%REVERSE-ONLY%', $sReverseOnly, $sTemplate);
|
||||
|
||||
$this->pgsqlRunScript($sTemplate);
|
||||
$oCmd->run();
|
||||
}
|
||||
|
||||
private function pgsqlRunPartitionScript($sTemplate)
|
||||
|
204
nominatim/cli.py
204
nominatim/cli.py
@ -2,16 +2,21 @@
|
||||
Command-line interface to the Nominatim functions for import, update,
|
||||
database administration and querying.
|
||||
"""
|
||||
import sys
|
||||
import datetime as dt
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from .config import Configuration
|
||||
from .tools.exec_utils import run_legacy_script, run_api_script
|
||||
from .db.connection import connect
|
||||
from .db import status
|
||||
from .errors import UsageError
|
||||
|
||||
from .indexer.indexer import Indexer
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def _num_system_cpus():
|
||||
try:
|
||||
@ -85,7 +90,27 @@ class CommandlineParser:
|
||||
|
||||
args.config = Configuration(args.project_dir, args.data_dir / 'settings')
|
||||
|
||||
return args.command.run(args)
|
||||
try:
|
||||
return args.command.run(args)
|
||||
except UsageError as exception:
|
||||
log = logging.getLogger()
|
||||
if log.isEnabledFor(logging.DEBUG):
|
||||
raise # use Python's exception printing
|
||||
log.fatal('FATAL: %s', exception)
|
||||
|
||||
# If we get here, then execution has failed in some way.
|
||||
return 1
|
||||
|
||||
|
||||
def _osm2pgsql_options_from_args(args, default_cache, default_threads):
|
||||
""" Set up the stanadrd osm2pgsql from the command line arguments.
|
||||
"""
|
||||
return dict(osm2pgsql=args.osm2pgsql_path,
|
||||
osm2pgsql_cache=args.osm2pgsql_cache or default_cache,
|
||||
osm2pgsql_style=args.config.get_import_style_file(),
|
||||
threads=args.threads or default_threads,
|
||||
dsn=args.config.get_libpq_dsn(),
|
||||
flatnode_file=args.config.FLATNODE_FILE)
|
||||
|
||||
##### Subcommand classes
|
||||
#
|
||||
@ -98,6 +123,8 @@ class CommandlineParser:
|
||||
#
|
||||
# No need to document the functions each time.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to make pyosmium optional for replication only.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
|
||||
class SetupAll:
|
||||
@ -230,26 +257,123 @@ class UpdateReplication:
|
||||
group.add_argument('--no-index', action='store_false', dest='do_index',
|
||||
help="""Do not index the new data. Only applicable
|
||||
together with --once""")
|
||||
group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
|
||||
help='Size of cache to be used by osm2pgsql (in MB)')
|
||||
|
||||
@staticmethod
|
||||
def _init_replication(args):
|
||||
from .tools import replication, refresh
|
||||
|
||||
LOG.warning("Initialising replication updates")
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
replication.init_replication(conn, base_url=args.config.REPLICATION_URL)
|
||||
if args.update_functions:
|
||||
LOG.warning("Create functions")
|
||||
refresh.create_functions(conn, args.config, args.data_dir,
|
||||
True, False)
|
||||
conn.close()
|
||||
return 0
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _check_for_updates(args):
|
||||
from .tools import replication
|
||||
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
ret = replication.check_for_updates(conn, base_url=args.config.REPLICATION_URL)
|
||||
conn.close()
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def _report_update(batchdate, start_import, start_index):
|
||||
def round_time(delta):
|
||||
return dt.timedelta(seconds=int(delta.total_seconds()))
|
||||
|
||||
end = dt.datetime.now(dt.timezone.utc)
|
||||
LOG.warning("Update completed. Import: %s. %sTotal: %s. Remaining backlog: %s.",
|
||||
round_time((start_index or end) - start_import),
|
||||
"Indexing: {} ".format(round_time(end - start_index))
|
||||
if start_index else '',
|
||||
round_time(end - start_import),
|
||||
round_time(end - batchdate))
|
||||
|
||||
@staticmethod
|
||||
def _update(args):
|
||||
from .tools import replication
|
||||
from .indexer.indexer import Indexer
|
||||
|
||||
params = _osm2pgsql_options_from_args(args, 2000, 1)
|
||||
params.update(base_url=args.config.REPLICATION_URL,
|
||||
update_interval=args.config.get_int('REPLICATION_UPDATE_INTERVAL'),
|
||||
import_file=args.project_dir / 'osmosischange.osc',
|
||||
max_diff_size=args.config.get_int('REPLICATION_MAX_DIFF'),
|
||||
indexed_only=not args.once)
|
||||
|
||||
# Sanity check to not overwhelm the Geofabrik servers.
|
||||
if 'download.geofabrik.de'in params['base_url']\
|
||||
and params['update_interval'] < 86400:
|
||||
LOG.fatal("Update interval too low for download.geofabrik.de.\n"
|
||||
"Please check install documentation "
|
||||
"(https://nominatim.org/release-docs/latest/admin/Import-and-Update#"
|
||||
"setting-up-the-update-process).")
|
||||
raise UsageError("Invalid replication update interval setting.")
|
||||
|
||||
if not args.once:
|
||||
if not args.do_index:
|
||||
LOG.fatal("Indexing cannot be disabled when running updates continuously.")
|
||||
raise UsageError("Bad argument '--no-index'.")
|
||||
recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
|
||||
|
||||
while True:
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
start = dt.datetime.now(dt.timezone.utc)
|
||||
state = replication.update(conn, params)
|
||||
status.log_status(conn, start, 'import')
|
||||
batchdate, _, _ = status.get_status(conn)
|
||||
conn.close()
|
||||
|
||||
if state is not replication.UpdateState.NO_CHANGES and args.do_index:
|
||||
index_start = dt.datetime.now(dt.timezone.utc)
|
||||
indexer = Indexer(args.config.get_libpq_dsn(),
|
||||
args.threads or 1)
|
||||
indexer.index_boundaries(0, 30)
|
||||
indexer.index_by_rank(0, 30)
|
||||
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
status.set_indexed(conn, True)
|
||||
status.log_status(conn, index_start, 'index')
|
||||
conn.close()
|
||||
else:
|
||||
index_start = None
|
||||
|
||||
if LOG.isEnabledFor(logging.WARNING):
|
||||
UpdateReplication._report_update(batchdate, start, index_start)
|
||||
|
||||
if args.once:
|
||||
break
|
||||
|
||||
if state is replication.UpdateState.NO_CHANGES:
|
||||
LOG.warning("No new changes. Sleeping for %d sec.", recheck_interval)
|
||||
time.sleep(recheck_interval)
|
||||
|
||||
return state.value
|
||||
|
||||
@staticmethod
|
||||
def run(args):
|
||||
params = ['update.php']
|
||||
try:
|
||||
import osmium # pylint: disable=W0611
|
||||
except ModuleNotFoundError:
|
||||
LOG.fatal("pyosmium not installed. Replication functions not available.\n"
|
||||
"To install pyosmium via pip: pip3 install osmium")
|
||||
return 1
|
||||
|
||||
if args.init:
|
||||
params.append('--init-updates')
|
||||
if not args.update_functions:
|
||||
params.append('--no-update-functions')
|
||||
elif args.check_for_updates:
|
||||
params.append('--check-for-updates')
|
||||
else:
|
||||
if args.once:
|
||||
params.append('--import-osmosis')
|
||||
else:
|
||||
params.append('--import-osmosis-all')
|
||||
if not args.do_index:
|
||||
params.append('--no-index')
|
||||
return UpdateReplication._init_replication(args)
|
||||
|
||||
return run_legacy_script(*params, nominatim_env=args)
|
||||
if args.check_for_updates:
|
||||
return UpdateReplication._check_for_updates(args)
|
||||
|
||||
return UpdateReplication._update(args)
|
||||
|
||||
class UpdateAddData:
|
||||
"""\
|
||||
@ -320,6 +444,8 @@ class UpdateIndex:
|
||||
|
||||
@staticmethod
|
||||
def run(args):
|
||||
from .indexer.indexer import Indexer
|
||||
|
||||
indexer = Indexer(args.config.get_libpq_dsn(),
|
||||
args.threads or _num_system_cpus() or 1)
|
||||
|
||||
@ -328,8 +454,11 @@ class UpdateIndex:
|
||||
if not args.boundaries_only:
|
||||
indexer.index_by_rank(args.minrank, args.maxrank)
|
||||
|
||||
if not args.no_boundaries and not args.boundaries_only:
|
||||
indexer.update_status_table()
|
||||
if not args.no_boundaries and not args.boundaries_only \
|
||||
and args.minrank == 0 and args.maxrank == 30:
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
status.set_indexed(conn, True)
|
||||
conn.close()
|
||||
|
||||
return 0
|
||||
|
||||
@ -366,22 +495,34 @@ class UpdateRefresh:
|
||||
|
||||
@staticmethod
|
||||
def run(args):
|
||||
from .tools import refresh
|
||||
|
||||
if args.postcodes:
|
||||
run_legacy_script('update.php', '--calculate-postcodes',
|
||||
nominatim_env=args, throw_on_fail=True)
|
||||
LOG.warning("Update postcodes centroid")
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
refresh.update_postcodes(conn, args.data_dir)
|
||||
conn.close()
|
||||
|
||||
if args.word_counts:
|
||||
run_legacy_script('update.php', '--recompute-word-counts',
|
||||
nominatim_env=args, throw_on_fail=True)
|
||||
LOG.warning('Recompute frequency of full-word search terms')
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
refresh.recompute_word_counts(conn, args.data_dir)
|
||||
conn.close()
|
||||
|
||||
if args.address_levels:
|
||||
run_legacy_script('update.php', '--update-address-levels',
|
||||
nominatim_env=args, throw_on_fail=True)
|
||||
cfg = Path(args.config.ADDRESS_LEVEL_CONFIG)
|
||||
LOG.warning('Updating address levels from %s', cfg)
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
refresh.load_address_levels_from_file(conn, cfg)
|
||||
conn.close()
|
||||
|
||||
if args.functions:
|
||||
params = ['setup.php', '--create-functions', '--create-partition-functions']
|
||||
if args.diffs:
|
||||
params.append('--enable-diff-updates')
|
||||
if args.enable_debug_statements:
|
||||
params.append('--enable-debug-statements')
|
||||
run_legacy_script(*params, nominatim_env=args, throw_on_fail=True)
|
||||
LOG.warning('Create functions')
|
||||
conn = connect(args.config.get_libpq_dsn())
|
||||
refresh.create_functions(conn, args.config, args.data_dir,
|
||||
args.diffs, args.enable_debug_statements)
|
||||
conn.close()
|
||||
|
||||
if args.wiki_data:
|
||||
run_legacy_script('setup.php', '--import-wikipedia-articles',
|
||||
nominatim_env=args, throw_on_fail=True)
|
||||
@ -392,6 +533,7 @@ class UpdateRefresh:
|
||||
if args.website:
|
||||
run_legacy_script('setup.php', '--setup-website',
|
||||
nominatim_env=args, throw_on_fail=True)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
|
@ -1,10 +1,16 @@
|
||||
"""
|
||||
Nominatim configuration accessor.
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import dotenv_values
|
||||
|
||||
from .errors import UsageError
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
class Configuration:
|
||||
""" Load and manage the project configuration.
|
||||
|
||||
@ -20,27 +26,75 @@ class Configuration:
|
||||
"""
|
||||
|
||||
def __init__(self, project_dir, config_dir):
|
||||
self.project_dir = project_dir
|
||||
self.config_dir = config_dir
|
||||
self._config = dotenv_values(str((config_dir / 'env.defaults').resolve()))
|
||||
if project_dir is not None:
|
||||
self._config.update(dotenv_values(str((project_dir / '.env').resolve())))
|
||||
|
||||
# Add defaults for variables that are left empty to set the default.
|
||||
# They may still be overwritten by environment variables.
|
||||
if not self._config['NOMINATIM_ADDRESS_LEVEL_CONFIG']:
|
||||
self._config['NOMINATIM_ADDRESS_LEVEL_CONFIG'] = \
|
||||
str(config_dir / 'address-levels.json')
|
||||
|
||||
|
||||
def __getattr__(self, name):
|
||||
name = 'NOMINATIM_' + name
|
||||
|
||||
return os.environ.get(name) or self._config[name]
|
||||
|
||||
def get_bool(self, name):
|
||||
""" Return the given configuration parameter as a boolean.
|
||||
Values of '1', 'yes' and 'true' are accepted as truthy values,
|
||||
everything else is interpreted as false.
|
||||
"""
|
||||
return self.__getattr__(name).lower() in ('1', 'yes', 'true')
|
||||
|
||||
|
||||
def get_int(self, name):
|
||||
""" Return the given configuration parameter as an int.
|
||||
"""
|
||||
try:
|
||||
return int(self.__getattr__(name))
|
||||
except ValueError:
|
||||
LOG.fatal("Invalid setting NOMINATIM_%s. Needs to be a number.", name)
|
||||
raise UsageError("Configuration error.")
|
||||
|
||||
|
||||
def get_libpq_dsn(self):
|
||||
""" Get configured database DSN converted into the key/value format
|
||||
understood by libpq and psycopg.
|
||||
"""
|
||||
dsn = self.DATABASE_DSN
|
||||
|
||||
def quote_param(param):
|
||||
key, val = param.split('=')
|
||||
val = val.replace('\\', '\\\\').replace("'", "\\'")
|
||||
if ' ' in val:
|
||||
val = "'" + val + "'"
|
||||
return key + '=' + val
|
||||
|
||||
if dsn.startswith('pgsql:'):
|
||||
# Old PHP DSN format. Convert before returning.
|
||||
return dsn[6:].replace(';', ' ')
|
||||
return ' '.join([quote_param(p) for p in dsn[6:].split(';')])
|
||||
|
||||
return dsn
|
||||
|
||||
|
||||
def get_import_style_file(self):
|
||||
""" Return the import style file as a path object. Translates the
|
||||
name of the standard styles automatically into a file in the
|
||||
config style.
|
||||
"""
|
||||
style = self.__getattr__('IMPORT_STYLE')
|
||||
|
||||
if style in ('admin', 'street', 'address', 'full', 'extratags'):
|
||||
return self.config_dir / 'import-{}.style'.format(style)
|
||||
|
||||
return Path(style)
|
||||
|
||||
|
||||
def get_os_env(self):
|
||||
""" Return a copy of the OS environment with the Nominatim configuration
|
||||
merged in.
|
||||
|
58
nominatim/db/connection.py
Normal file
58
nominatim/db/connection.py
Normal file
@ -0,0 +1,58 @@
|
||||
"""
|
||||
Specialised connection and cursor functions.
|
||||
"""
|
||||
import logging
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extensions
|
||||
import psycopg2.extras
|
||||
|
||||
class _Cursor(psycopg2.extras.DictCursor):
|
||||
""" A cursor returning dict-like objects and providing specialised
|
||||
execution functions.
|
||||
"""
|
||||
|
||||
def execute(self, query, args=None): # pylint: disable=W0221
|
||||
""" Query execution that logs the SQL query when debugging is enabled.
|
||||
"""
|
||||
logger = logging.getLogger()
|
||||
logger.debug(self.mogrify(query, args).decode('utf-8'))
|
||||
|
||||
super().execute(query, args)
|
||||
|
||||
def scalar(self, sql, args=None):
|
||||
""" Execute query that returns a single value. The value is returned.
|
||||
If the query yields more than one row, a ValueError is raised.
|
||||
"""
|
||||
self.execute(sql, args)
|
||||
|
||||
if self.rowcount != 1:
|
||||
raise RuntimeError("Query did not return a single row.")
|
||||
|
||||
return self.fetchone()[0]
|
||||
|
||||
|
||||
class _Connection(psycopg2.extensions.connection):
|
||||
""" A connection that provides the specialised cursor by default and
|
||||
adds convenience functions for administrating the database.
|
||||
"""
|
||||
|
||||
def cursor(self, cursor_factory=_Cursor, **kwargs):
|
||||
""" Return a new cursor. By default the specialised cursor is returned.
|
||||
"""
|
||||
return super().cursor(cursor_factory=cursor_factory, **kwargs)
|
||||
|
||||
def table_exists(self, table):
|
||||
""" Check that a table with the given name exists in the database.
|
||||
"""
|
||||
with self.cursor() as cur:
|
||||
num = cur.scalar("""SELECT count(*) FROM pg_tables
|
||||
WHERE tablename = %s""", (table, ))
|
||||
return num == 1
|
||||
|
||||
|
||||
def connect(dsn):
|
||||
""" Open a connection to the database using the specialised connection
|
||||
factory.
|
||||
"""
|
||||
return psycopg2.connect(dsn, connection_factory=_Connection)
|
82
nominatim/db/status.py
Normal file
82
nominatim/db/status.py
Normal file
@ -0,0 +1,82 @@
|
||||
"""
|
||||
Access and helper functions for the status and status log table.
|
||||
"""
|
||||
import datetime as dt
|
||||
import logging
|
||||
import re
|
||||
|
||||
from ..tools.exec_utils import get_url
|
||||
from ..errors import UsageError
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def compute_database_date(conn):
|
||||
""" Determine the date of the database from the newest object in the
|
||||
data base.
|
||||
"""
|
||||
# First, find the node with the highest ID in the database
|
||||
with conn.cursor() as cur:
|
||||
osmid = cur.scalar("SELECT max(osm_id) FROM place WHERE osm_type='N'")
|
||||
|
||||
if osmid is None:
|
||||
LOG.fatal("No data found in the database.")
|
||||
raise UsageError("No data found in the database.")
|
||||
|
||||
LOG.info("Using node id %d for timestamp lookup", osmid)
|
||||
# Get the node from the API to find the timestamp when it was created.
|
||||
node_url = 'https://www.openstreetmap.org/api/0.6/node/{}/1'.format(osmid)
|
||||
data = get_url(node_url)
|
||||
|
||||
match = re.search(r'timestamp="((\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}))Z"', data)
|
||||
|
||||
if match is None:
|
||||
LOG.fatal("The node data downloaded from the API does not contain valid data.\n"
|
||||
"URL used: %s", node_url)
|
||||
raise UsageError("Bad API data.")
|
||||
|
||||
LOG.debug("Found timestamp %s", match[1])
|
||||
|
||||
return dt.datetime.fromisoformat(match[1]).replace(tzinfo=dt.timezone.utc)
|
||||
|
||||
|
||||
def set_status(conn, date, seq=None, indexed=True):
|
||||
""" Replace the current status with the given status.
|
||||
"""
|
||||
assert date.tzinfo == dt.timezone.utc
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("TRUNCATE TABLE import_status")
|
||||
cur.execute("""INSERT INTO import_status (lastimportdate, sequence_id, indexed)
|
||||
VALUES (%s, %s, %s)""", (date, seq, indexed))
|
||||
|
||||
conn.commit()
|
||||
|
||||
|
||||
def get_status(conn):
|
||||
""" Return the current status as a triple of (date, sequence, indexed).
|
||||
If status has not been set up yet, a triple of None is returned.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT * FROM import_status LIMIT 1")
|
||||
if cur.rowcount < 1:
|
||||
return None, None, None
|
||||
|
||||
row = cur.fetchone()
|
||||
return row['lastimportdate'], row['sequence_id'], row['indexed']
|
||||
|
||||
|
||||
def set_indexed(conn, state):
|
||||
""" Set the indexed flag in the status table to the given state.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("UPDATE import_status SET indexed = %s", (state, ))
|
||||
conn.commit()
|
||||
|
||||
|
||||
def log_status(conn, start, event, batchsize=None):
|
||||
""" Write a new status line to the `import_osmosis_log` table.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""INSERT INTO import_osmosis_log
|
||||
(batchend, batchseq, batchsize, starttime, endtime, event)
|
||||
SELECT lastimportdate, sequence_id, %s, %s, now(), %s FROM import_status""",
|
||||
(batchsize, start, event))
|
12
nominatim/db/utils.py
Normal file
12
nominatim/db/utils.py
Normal file
@ -0,0 +1,12 @@
|
||||
"""
|
||||
Helper functions for handling DB accesses.
|
||||
"""
|
||||
|
||||
def execute_file(conn, fname):
|
||||
""" Read an SQL file and run its contents against the given connection.
|
||||
"""
|
||||
with fname.open('r') as fdesc:
|
||||
sql = fdesc.read()
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql)
|
||||
conn.commit()
|
8
nominatim/errors.py
Normal file
8
nominatim/errors.py
Normal file
@ -0,0 +1,8 @@
|
||||
"""
|
||||
Custom exception and error classes for Nominatim.
|
||||
"""
|
||||
|
||||
class UsageError(Exception):
|
||||
""" An error raised because of bad user input. This error will usually
|
||||
not cause a stack trace to be printed unless debugging is enabled.
|
||||
"""
|
@ -2,9 +2,17 @@
|
||||
Helper functions for executing external programs.
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import urllib.request as urlrequest
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from psycopg2.extensions import parse_dsn
|
||||
|
||||
from ..version import NOMINATIM_VERSION
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def run_legacy_script(script, *args, nominatim_env=None, throw_on_fail=False):
|
||||
""" Run a Nominatim PHP script with the given arguments.
|
||||
|
||||
@ -80,3 +88,51 @@ def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None,
|
||||
print(result[content_start + 4:].replace('\\n', '\n'))
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def run_osm2pgsql(options):
|
||||
""" Run osm2pgsql with the given options.
|
||||
"""
|
||||
env = os.environ
|
||||
cmd = [options['osm2pgsql'],
|
||||
'--hstore', '--latlon', '--slim',
|
||||
'--with-forward-dependencies', 'false',
|
||||
'--log-progress', 'true',
|
||||
'--number-processes', str(options['threads']),
|
||||
'--cache', str(options['osm2pgsql_cache']),
|
||||
'--output', 'gazetteer',
|
||||
'--style', str(options['osm2pgsql_style'])
|
||||
]
|
||||
if options['append']:
|
||||
cmd.append('--append')
|
||||
|
||||
if options['flatnode_file']:
|
||||
cmd.extend(('--flat-nodes', options['flatnode_file']))
|
||||
|
||||
dsn = parse_dsn(options['dsn'])
|
||||
if 'password' in dsn:
|
||||
env['PGPASSWORD'] = dsn['password']
|
||||
if 'dbname' in dsn:
|
||||
cmd.extend(('-d', dsn['dbname']))
|
||||
if 'user' in dsn:
|
||||
cmd.extend(('--username', dsn['user']))
|
||||
for param in ('host', 'port'):
|
||||
if param in dsn:
|
||||
cmd.extend(('--' + param, dsn[param]))
|
||||
|
||||
cmd.append(str(options['import_file']))
|
||||
|
||||
subprocess.run(cmd, cwd=options.get('cwd', '.'), env=env, check=True)
|
||||
|
||||
|
||||
def get_url(url):
|
||||
""" Get the contents from the given URL and return it as a UTF-8 string.
|
||||
"""
|
||||
headers = {"User-Agent" : "Nominatim/" + NOMINATIM_VERSION}
|
||||
|
||||
try:
|
||||
with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response:
|
||||
return response.read().decode('utf-8')
|
||||
except:
|
||||
LOG.fatal('Failed to load URL: %s', url)
|
||||
raise
|
||||
|
169
nominatim/tools/refresh.py
Normal file
169
nominatim/tools/refresh.py
Normal file
@ -0,0 +1,169 @@
|
||||
"""
|
||||
Functions for bringing auxiliary data in the database up-to-date.
|
||||
"""
|
||||
import json
|
||||
import re
|
||||
|
||||
from psycopg2.extras import execute_values
|
||||
|
||||
from ..db.utils import execute_file
|
||||
|
||||
def update_postcodes(conn, datadir):
|
||||
""" Recalculate postcode centroids and add, remove and update entries in the
|
||||
location_postcode table. `conn` is an opne connection to the database.
|
||||
"""
|
||||
execute_file(conn, datadir / 'sql' / 'update-postcodes.sql')
|
||||
|
||||
|
||||
def recompute_word_counts(conn, datadir):
|
||||
""" Compute the frequency of full-word search terms.
|
||||
"""
|
||||
execute_file(conn, datadir / 'sql' / 'words_from_search_name.sql')
|
||||
|
||||
|
||||
def _add_address_level_rows_from_entry(rows, entry):
|
||||
""" Converts a single entry from the JSON format for address rank
|
||||
descriptions into a flat format suitable for inserting into a
|
||||
PostgreSQL table and adds these lines to `rows`.
|
||||
"""
|
||||
countries = entry.get('countries') or (None, )
|
||||
for key, values in entry['tags'].items():
|
||||
for value, ranks in values.items():
|
||||
if isinstance(ranks, list):
|
||||
rank_search, rank_address = ranks
|
||||
else:
|
||||
rank_search = rank_address = ranks
|
||||
if not value:
|
||||
value = None
|
||||
for country in countries:
|
||||
rows.append((country, key, value, rank_search, rank_address))
|
||||
|
||||
def load_address_levels(conn, table, levels):
|
||||
""" Replace the `address_levels` table with the contents of `levels'.
|
||||
|
||||
A new table is created any previously existing table is dropped.
|
||||
The table has the following columns:
|
||||
country, class, type, rank_search, rank_address
|
||||
"""
|
||||
rows = []
|
||||
for entry in levels:
|
||||
_add_address_level_rows_from_entry(rows, entry)
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('DROP TABLE IF EXISTS {}'.format(table))
|
||||
|
||||
cur.execute("""CREATE TABLE {} (country_code varchar(2),
|
||||
class TEXT,
|
||||
type TEXT,
|
||||
rank_search SMALLINT,
|
||||
rank_address SMALLINT)""".format(table))
|
||||
|
||||
execute_values(cur, "INSERT INTO {} VALUES %s".format(table), rows)
|
||||
|
||||
cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table))
|
||||
|
||||
conn.commit()
|
||||
|
||||
def load_address_levels_from_file(conn, config_file):
|
||||
""" Replace the `address_levels` table with the contents of the config
|
||||
file.
|
||||
"""
|
||||
with config_file.open('r') as fdesc:
|
||||
load_address_levels(conn, 'address_levels', json.load(fdesc))
|
||||
|
||||
PLPGSQL_BASE_MODULES = (
|
||||
'utils.sql',
|
||||
'normalization.sql',
|
||||
'ranking.sql',
|
||||
'importance.sql',
|
||||
'address_lookup.sql',
|
||||
'interpolation.sql'
|
||||
)
|
||||
|
||||
PLPGSQL_TABLE_MODULES = (
|
||||
('place', 'place_triggers.sql'),
|
||||
('placex', 'placex_triggers.sql'),
|
||||
('location_postcode', 'postcode_triggers.sql')
|
||||
)
|
||||
|
||||
def _get_standard_function_sql(conn, config, sql_dir, enable_diff_updates, enable_debug):
|
||||
""" Read all applicable SQLs containing PL/pgSQL functions, replace
|
||||
placefolders and execute them.
|
||||
"""
|
||||
sql_func_dir = sql_dir / 'functions'
|
||||
sql = ''
|
||||
|
||||
# Get the basic set of functions that is always imported.
|
||||
for sql_file in PLPGSQL_BASE_MODULES:
|
||||
with (sql_func_dir / sql_file).open('r') as fdesc:
|
||||
sql += fdesc.read()
|
||||
|
||||
# Some files require the presence of a certain table
|
||||
for table, fname in PLPGSQL_TABLE_MODULES:
|
||||
if conn.table_exists(table):
|
||||
with (sql_func_dir / fname).open('r') as fdesc:
|
||||
sql += fdesc.read()
|
||||
|
||||
# Replace placeholders.
|
||||
sql = sql.replace('{modulepath}',
|
||||
config.DATABASE_MODULE_PATH or str((config.project_dir / 'module').resolve()))
|
||||
|
||||
if enable_diff_updates:
|
||||
sql = sql.replace('RETURN NEW; -- %DIFFUPDATES%', '--')
|
||||
|
||||
if enable_debug:
|
||||
sql = sql.replace('--DEBUG:', '')
|
||||
|
||||
if config.get_bool('LIMIT_REINDEXING'):
|
||||
sql = sql.replace('--LIMIT INDEXING:', '')
|
||||
|
||||
if not config.get_bool('USE_US_TIGER_DATA'):
|
||||
sql = sql.replace('-- %NOTIGERDATA% ', '')
|
||||
|
||||
if not config.get_bool('USE_AUX_LOCATION_DATA'):
|
||||
sql = sql.replace('-- %NOAUXDATA% ', '')
|
||||
|
||||
reverse_only = 'false' if conn.table_exists('search_name') else 'true'
|
||||
|
||||
return sql.replace('%REVERSE-ONLY%', reverse_only)
|
||||
|
||||
|
||||
def replace_partition_string(sql, partitions):
|
||||
""" Replace a partition template with the actual partition code.
|
||||
"""
|
||||
for match in re.findall('^-- start(.*?)^-- end', sql, re.M | re.S):
|
||||
repl = ''
|
||||
for part in partitions:
|
||||
repl += match.replace('-partition-', str(part))
|
||||
sql = sql.replace(match, repl)
|
||||
|
||||
return sql
|
||||
|
||||
def _get_partition_function_sql(conn, sql_dir):
|
||||
""" Create functions that work on partition tables.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('SELECT distinct partition FROM country_name')
|
||||
partitions = set([0])
|
||||
for row in cur:
|
||||
partitions.add(row[0])
|
||||
|
||||
with (sql_dir / 'partition-functions.src.sql').open('r') as fdesc:
|
||||
sql = fdesc.read()
|
||||
|
||||
return replace_partition_string(sql, sorted(partitions))
|
||||
|
||||
def create_functions(conn, config, data_dir,
|
||||
enable_diff_updates=True, enable_debug=False):
|
||||
""" (Re)create the PL/pgSQL functions.
|
||||
"""
|
||||
sql_dir = data_dir / 'sql'
|
||||
|
||||
sql = _get_standard_function_sql(conn, config, sql_dir,
|
||||
enable_diff_updates, enable_debug)
|
||||
sql += _get_partition_function_sql(conn, sql_dir)
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql)
|
||||
|
||||
conn.commit()
|
119
nominatim/tools/replication.py
Normal file
119
nominatim/tools/replication.py
Normal file
@ -0,0 +1,119 @@
|
||||
"""
|
||||
Functions for updating a database from a replication source.
|
||||
"""
|
||||
import datetime as dt
|
||||
from enum import Enum
|
||||
import logging
|
||||
import time
|
||||
|
||||
from osmium.replication.server import ReplicationServer
|
||||
from osmium import WriteHandler
|
||||
|
||||
from ..db import status
|
||||
from .exec_utils import run_osm2pgsql
|
||||
from ..errors import UsageError
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def init_replication(conn, base_url):
|
||||
""" Set up replication for the server at the given base URL.
|
||||
"""
|
||||
LOG.info("Using replication source: %s", base_url)
|
||||
date = status.compute_database_date(conn)
|
||||
|
||||
# margin of error to make sure we get all data
|
||||
date -= dt.timedelta(hours=3)
|
||||
|
||||
repl = ReplicationServer(base_url)
|
||||
|
||||
seq = repl.timestamp_to_sequence(date)
|
||||
|
||||
if seq is None:
|
||||
LOG.fatal("Cannot reach the configured replication service '%s'.\n"
|
||||
"Does the URL point to a directory containing OSM update data?",
|
||||
base_url)
|
||||
raise UsageError("Failed to reach replication service")
|
||||
|
||||
status.set_status(conn, date=date, seq=seq)
|
||||
|
||||
LOG.warning("Updates intialised at sequence %s (%s)", seq, date)
|
||||
|
||||
|
||||
def check_for_updates(conn, base_url):
|
||||
""" Check if new data is available from the replication service at the
|
||||
given base URL.
|
||||
"""
|
||||
_, seq, _ = status.get_status(conn)
|
||||
|
||||
if seq is None:
|
||||
LOG.error("Replication not set up. "
|
||||
"Please run 'nominatim replication --init' first.")
|
||||
return 254
|
||||
|
||||
state = ReplicationServer(base_url).get_state_info()
|
||||
|
||||
if state is None:
|
||||
LOG.error("Cannot get state for URL %s.", base_url)
|
||||
return 253
|
||||
|
||||
if state.sequence <= seq:
|
||||
LOG.warning("Database is up to date.")
|
||||
return 2
|
||||
|
||||
LOG.warning("New data available (%i => %i).", seq, state.sequence)
|
||||
return 0
|
||||
|
||||
class UpdateState(Enum):
|
||||
""" Possible states after an update has run.
|
||||
"""
|
||||
|
||||
UP_TO_DATE = 0
|
||||
MORE_PENDING = 2
|
||||
NO_CHANGES = 3
|
||||
|
||||
|
||||
def update(conn, options):
|
||||
""" Update database from the next batch of data. Returns the state of
|
||||
updates according to `UpdateState`.
|
||||
"""
|
||||
startdate, startseq, indexed = status.get_status(conn)
|
||||
|
||||
if startseq is None:
|
||||
LOG.error("Replication not set up. "
|
||||
"Please run 'nominatim replication --init' first.")
|
||||
raise UsageError("Replication not set up.")
|
||||
|
||||
if not indexed and options['indexed_only']:
|
||||
LOG.info("Skipping update. There is data that needs indexing.")
|
||||
return UpdateState.MORE_PENDING
|
||||
|
||||
last_since_update = dt.datetime.now(dt.timezone.utc) - startdate
|
||||
update_interval = dt.timedelta(seconds=options['update_interval'])
|
||||
if last_since_update < update_interval:
|
||||
duration = (update_interval - last_since_update).seconds
|
||||
LOG.warning("Sleeping for %s sec before next update.", duration)
|
||||
time.sleep(duration)
|
||||
|
||||
if options['import_file'].exists():
|
||||
options['import_file'].unlink()
|
||||
|
||||
# Read updates into file.
|
||||
repl = ReplicationServer(options['base_url'])
|
||||
|
||||
outhandler = WriteHandler(str(options['import_file']))
|
||||
endseq = repl.apply_diffs(outhandler, startseq,
|
||||
max_size=options['max_diff_size'] * 1024)
|
||||
outhandler.close()
|
||||
|
||||
if endseq is None:
|
||||
return UpdateState.NO_CHANGES
|
||||
|
||||
# Consume updates with osm2pgsql.
|
||||
options['append'] = True
|
||||
run_osm2pgsql(options)
|
||||
|
||||
# Write the current status to the file
|
||||
endstate = repl.get_state_info(endseq)
|
||||
status.set_status(conn, endstate.timestamp, seq=endseq, indexed=False)
|
||||
|
||||
return UpdateState.UP_TO_DATE
|
5
nominatim/version.py
Normal file
5
nominatim/version.py
Normal file
@ -0,0 +1,5 @@
|
||||
"""
|
||||
Version information for Nominatim.
|
||||
"""
|
||||
|
||||
NOMINATIM_VERSION = "3.6.0"
|
@ -57,6 +57,9 @@ NOMINATIM_HTTP_PROXY_HOST=proxy.mydomain.com
|
||||
NOMINATIM_HTTP_PROXY_PORT=3128
|
||||
NOMINATIM_HTTP_PROXY_LOGIN=
|
||||
NOMINATIM_HTTP_PROXY_PASSWORD=
|
||||
# Also set these standard environment variables.
|
||||
# HTTP_PROXY="http://user:pass@10.10.1.10:1080"
|
||||
# HTTPS_PROXY="http://user:pass@10.10.1.10:1080"
|
||||
|
||||
# Location of the osm2pgsql binary.
|
||||
# When empty, osm2pgsql is expected to reside in the osm2pgsql directory in
|
||||
@ -64,10 +67,6 @@ NOMINATIM_HTTP_PROXY_PASSWORD=
|
||||
# EXPERT ONLY. You should usually use the supplied osm2pgsql.
|
||||
NOMINATIM_OSM2PGSQL_BINARY=
|
||||
|
||||
# Location of pyosmium-get-changes.
|
||||
# Only needed when running updates.
|
||||
NOMINATIM_PYOSMIUM_BINARY=
|
||||
|
||||
# Directory where to find US Tiger data files to import.
|
||||
# Used with setup.php --import-tiger-data. When unset, the data is expected
|
||||
# to be located under 'data/tiger' in the source tree.
|
||||
|
@ -1,6 +1,6 @@
|
||||
drop table if exists import_status;
|
||||
CREATE TABLE import_status (
|
||||
lastimportdate timestamp NOT NULL,
|
||||
lastimportdate timestamp with time zone NOT NULL,
|
||||
sequence_id integer,
|
||||
indexed boolean
|
||||
);
|
||||
|
@ -1,26 +1,155 @@
|
||||
import itertools
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
import pytest
|
||||
|
||||
SRC_DIR = Path(__file__) / '..' / '..' / '..'
|
||||
|
||||
# always test against the source
|
||||
sys.path.insert(0, str((Path(__file__) / '..' / '..' / '..').resolve()))
|
||||
sys.path.insert(0, str(SRC_DIR.resolve()))
|
||||
|
||||
from nominatim.config import Configuration
|
||||
from nominatim.db import connection
|
||||
|
||||
class _TestingCursor(psycopg2.extras.DictCursor):
|
||||
""" Extension to the DictCursor class that provides execution
|
||||
short-cuts that simplify writing assertions.
|
||||
"""
|
||||
|
||||
def scalar(self, sql, params=None):
|
||||
""" Execute a query with a single return value and return this value.
|
||||
Raises an assertion when not exactly one row is returned.
|
||||
"""
|
||||
self.execute(sql, params)
|
||||
assert self.rowcount == 1
|
||||
return self.fetchone()[0]
|
||||
|
||||
def row_set(self, sql, params=None):
|
||||
""" Execute a query and return the result as a set of tuples.
|
||||
"""
|
||||
self.execute(sql, params)
|
||||
if self.rowcount == 1:
|
||||
return set(tuple(self.fetchone()))
|
||||
|
||||
return set((tuple(row) for row in self))
|
||||
|
||||
@pytest.fixture
|
||||
def temp_db(monkeypatch):
|
||||
""" Create an empty database for the test. The database name is also
|
||||
exported into NOMINATIM_DATABASE_DSN.
|
||||
"""
|
||||
name = 'test_nominatim_python_unittest'
|
||||
with psycopg2.connect(database='postgres') as conn:
|
||||
conn.set_isolation_level(0)
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
|
||||
cur.execute('CREATE DATABASE {}'.format(name))
|
||||
conn = psycopg2.connect(database='postgres')
|
||||
|
||||
conn.set_isolation_level(0)
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
|
||||
cur.execute('CREATE DATABASE {}'.format(name))
|
||||
|
||||
conn.close()
|
||||
|
||||
monkeypatch.setenv('NOMINATIM_DATABASE_DSN' , 'dbname=' + name)
|
||||
|
||||
yield name
|
||||
|
||||
with psycopg2.connect(database='postgres') as conn:
|
||||
conn.set_isolation_level(0)
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
|
||||
conn = psycopg2.connect(database='postgres')
|
||||
|
||||
conn.set_isolation_level(0)
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
|
||||
|
||||
conn.close()
|
||||
|
||||
@pytest.fixture
|
||||
def temp_db_with_extensions(temp_db):
|
||||
conn = psycopg2.connect(database=temp_db)
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('CREATE EXTENSION hstore; CREATE EXTENSION postgis;')
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
return temp_db
|
||||
|
||||
@pytest.fixture
|
||||
def temp_db_conn(temp_db):
|
||||
""" Connection to the test database.
|
||||
"""
|
||||
conn = connection.connect('dbname=' + temp_db)
|
||||
yield conn
|
||||
conn.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_db_cursor(temp_db):
|
||||
""" Connection and cursor towards the test database. The connection will
|
||||
be in auto-commit mode.
|
||||
"""
|
||||
conn = psycopg2.connect('dbname=' + temp_db)
|
||||
conn.set_isolation_level(0)
|
||||
with conn.cursor(cursor_factory=_TestingCursor) as cur:
|
||||
yield cur
|
||||
conn.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def def_config():
|
||||
return Configuration(None, SRC_DIR.resolve() / 'settings')
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def status_table(temp_db_conn):
|
||||
""" Create an empty version of the status table and
|
||||
the status logging table.
|
||||
"""
|
||||
with temp_db_conn.cursor() as cur:
|
||||
cur.execute("""CREATE TABLE import_status (
|
||||
lastimportdate timestamp with time zone NOT NULL,
|
||||
sequence_id integer,
|
||||
indexed boolean
|
||||
)""")
|
||||
cur.execute("""CREATE TABLE import_osmosis_log (
|
||||
batchend timestamp,
|
||||
batchseq integer,
|
||||
batchsize bigint,
|
||||
starttime timestamp,
|
||||
endtime timestamp,
|
||||
event text
|
||||
)""")
|
||||
temp_db_conn.commit()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def place_table(temp_db_with_extensions, temp_db_conn):
|
||||
""" Create an empty version of the place table.
|
||||
"""
|
||||
with temp_db_conn.cursor() as cur:
|
||||
cur.execute("""CREATE TABLE place (
|
||||
osm_id int8 NOT NULL,
|
||||
osm_type char(1) NOT NULL,
|
||||
class text NOT NULL,
|
||||
type text NOT NULL,
|
||||
name hstore,
|
||||
admin_level smallint,
|
||||
address hstore,
|
||||
extratags hstore,
|
||||
geometry Geometry(Geometry,4326) NOT NULL)""")
|
||||
temp_db_conn.commit()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def place_row(place_table, temp_db_cursor):
|
||||
""" A factory for rows in the place table. The table is created as a
|
||||
prerequisite to the fixture.
|
||||
"""
|
||||
idseq = itertools.count(1001)
|
||||
def _insert(osm_type='N', osm_id=None, cls='amenity', typ='cafe', names=None,
|
||||
admin_level=None, address=None, extratags=None, geom=None):
|
||||
temp_db_cursor.execute("INSERT INTO place VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
|
||||
(osm_id or next(idseq), osm_type, cls, typ, names,
|
||||
admin_level, address, extratags,
|
||||
geom or 'SRID=4326;POINT(0 0 )'))
|
||||
|
||||
return _insert
|
||||
|
@ -1,10 +1,21 @@
|
||||
"""
|
||||
Tests for command line interface wrapper.
|
||||
|
||||
These tests just check that the various command line parameters route to the
|
||||
correct functionionality. They use a lot of monkeypatching to avoid executing
|
||||
the actual functions.
|
||||
"""
|
||||
import datetime as dt
|
||||
import psycopg2
|
||||
import pytest
|
||||
import time
|
||||
|
||||
import nominatim.cli
|
||||
import nominatim.indexer.indexer
|
||||
import nominatim.tools.refresh
|
||||
import nominatim.tools.replication
|
||||
from nominatim.errors import UsageError
|
||||
from nominatim.db import status
|
||||
|
||||
def call_nominatim(*args):
|
||||
return nominatim.cli.nominatim(module_dir='build/module',
|
||||
@ -18,9 +29,9 @@ class MockParamCapture:
|
||||
""" Mock that records the parameters with which a function was called
|
||||
as well as the number of calls.
|
||||
"""
|
||||
def __init__(self):
|
||||
def __init__(self, retval=0):
|
||||
self.called = 0
|
||||
self.return_value = 0
|
||||
self.return_value = retval
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
self.called += 1
|
||||
@ -54,7 +65,6 @@ def test_cli_help(capsys):
|
||||
(('import', '--continue', 'load-data'), 'setup'),
|
||||
(('freeze',), 'setup'),
|
||||
(('special-phrases',), 'specialphrases'),
|
||||
(('replication',), 'update'),
|
||||
(('add-data', '--tiger-data', 'tiger'), 'setup'),
|
||||
(('add-data', '--file', 'foo.osm'), 'update'),
|
||||
(('check-database',), 'check_import_finished'),
|
||||
@ -82,14 +92,12 @@ def test_add_data_command(mock_run_legacy, name, oid):
|
||||
(['--boundaries-only'], 1, 0),
|
||||
(['--no-boundaries'], 0, 1),
|
||||
(['--boundaries-only', '--no-boundaries'], 0, 0)])
|
||||
def test_index_command(monkeypatch, temp_db, params, do_bnds, do_ranks):
|
||||
with psycopg2.connect(database=temp_db) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("CREATE TABLE import_status (indexed bool)")
|
||||
def test_index_command(monkeypatch, temp_db_cursor, params, do_bnds, do_ranks):
|
||||
temp_db_cursor.execute("CREATE TABLE import_status (indexed bool)")
|
||||
bnd_mock = MockParamCapture()
|
||||
monkeypatch.setattr(nominatim.cli.Indexer, 'index_boundaries', bnd_mock)
|
||||
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', bnd_mock)
|
||||
rank_mock = MockParamCapture()
|
||||
monkeypatch.setattr(nominatim.cli.Indexer, 'index_by_rank', rank_mock)
|
||||
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', rank_mock)
|
||||
|
||||
assert 0 == call_nominatim('index', *params)
|
||||
|
||||
@ -98,29 +106,115 @@ def test_index_command(monkeypatch, temp_db, params, do_bnds, do_ranks):
|
||||
|
||||
|
||||
@pytest.mark.parametrize("command,params", [
|
||||
('postcodes', ('update.php', '--calculate-postcodes')),
|
||||
('word-counts', ('update.php', '--recompute-word-counts')),
|
||||
('address-levels', ('update.php', '--update-address-levels')),
|
||||
('functions', ('setup.php',)),
|
||||
('wiki-data', ('setup.php', '--import-wikipedia-articles')),
|
||||
('importance', ('update.php', '--recompute-importance')),
|
||||
('website', ('setup.php', '--setup-website')),
|
||||
])
|
||||
def test_refresh_command(mock_run_legacy, command, params):
|
||||
def test_refresh_legacy_command(mock_run_legacy, temp_db, command, params):
|
||||
assert 0 == call_nominatim('refresh', '--' + command)
|
||||
|
||||
assert mock_run_legacy.called == 1
|
||||
assert len(mock_run_legacy.last_args) >= len(params)
|
||||
assert mock_run_legacy.last_args[:len(params)] == params
|
||||
|
||||
@pytest.mark.parametrize("command,func", [
|
||||
('postcodes', 'update_postcodes'),
|
||||
('word-counts', 'recompute_word_counts'),
|
||||
('address-levels', 'load_address_levels_from_file'),
|
||||
('functions', 'create_functions'),
|
||||
])
|
||||
def test_refresh_command(monkeypatch, temp_db, command, func):
|
||||
func_mock = MockParamCapture()
|
||||
monkeypatch.setattr(nominatim.tools.refresh, func, func_mock)
|
||||
|
||||
def test_refresh_importance_computed_after_wiki_import(mock_run_legacy):
|
||||
assert 0 == call_nominatim('refresh', '--' + command)
|
||||
assert func_mock.called == 1
|
||||
|
||||
|
||||
def test_refresh_importance_computed_after_wiki_import(mock_run_legacy, temp_db):
|
||||
assert 0 == call_nominatim('refresh', '--importance', '--wiki-data')
|
||||
|
||||
assert mock_run_legacy.called == 2
|
||||
assert mock_run_legacy.last_args == ('update.php', '--recompute-importance')
|
||||
|
||||
|
||||
@pytest.mark.parametrize("params,func", [
|
||||
(('--init', '--no-update-functions'), 'init_replication'),
|
||||
(('--check-for-updates',), 'check_for_updates')
|
||||
])
|
||||
def test_replication_command(monkeypatch, temp_db, params, func):
|
||||
func_mock = MockParamCapture()
|
||||
monkeypatch.setattr(nominatim.tools.replication, func, func_mock)
|
||||
|
||||
assert 0 == call_nominatim('replication', *params)
|
||||
assert func_mock.called == 1
|
||||
|
||||
|
||||
def test_replication_update_bad_interval(monkeypatch, temp_db):
|
||||
monkeypatch.setenv('NOMINATIM_REPLICATION_UPDATE_INTERVAL', 'xx')
|
||||
|
||||
assert call_nominatim('replication') == 1
|
||||
|
||||
|
||||
def test_replication_update_bad_interval_for_geofabrik(monkeypatch, temp_db):
|
||||
monkeypatch.setenv('NOMINATIM_REPLICATION_URL',
|
||||
'https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates')
|
||||
|
||||
assert call_nominatim('replication') == 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize("state, retval", [
|
||||
(nominatim.tools.replication.UpdateState.UP_TO_DATE, 0),
|
||||
(nominatim.tools.replication.UpdateState.NO_CHANGES, 3)
|
||||
])
|
||||
def test_replication_update_once_no_index(monkeypatch, temp_db, temp_db_conn,
|
||||
status_table, state, retval):
|
||||
status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
|
||||
func_mock = MockParamCapture(retval=state)
|
||||
monkeypatch.setattr(nominatim.tools.replication, 'update', func_mock)
|
||||
|
||||
assert retval == call_nominatim('replication', '--once', '--no-index')
|
||||
|
||||
|
||||
def test_replication_update_continuous(monkeypatch, temp_db_conn, status_table):
|
||||
status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
|
||||
states = [nominatim.tools.replication.UpdateState.UP_TO_DATE,
|
||||
nominatim.tools.replication.UpdateState.UP_TO_DATE]
|
||||
monkeypatch.setattr(nominatim.tools.replication, 'update',
|
||||
lambda *args, **kwargs: states.pop())
|
||||
|
||||
index_mock = MockParamCapture()
|
||||
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', index_mock)
|
||||
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', index_mock)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
call_nominatim('replication')
|
||||
|
||||
assert index_mock.called == 4
|
||||
|
||||
|
||||
def test_replication_update_continuous_no_change(monkeypatch, temp_db_conn, status_table):
|
||||
status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
|
||||
states = [nominatim.tools.replication.UpdateState.NO_CHANGES,
|
||||
nominatim.tools.replication.UpdateState.UP_TO_DATE]
|
||||
monkeypatch.setattr(nominatim.tools.replication, 'update',
|
||||
lambda *args, **kwargs: states.pop())
|
||||
|
||||
index_mock = MockParamCapture()
|
||||
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', index_mock)
|
||||
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', index_mock)
|
||||
|
||||
sleep_mock = MockParamCapture()
|
||||
monkeypatch.setattr(time, 'sleep', sleep_mock)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
call_nominatim('replication')
|
||||
|
||||
assert index_mock.called == 2
|
||||
assert sleep_mock.called == 1
|
||||
assert sleep_mock.last_args[0] == 60
|
||||
|
||||
|
||||
@pytest.mark.parametrize("params", [
|
||||
('search', '--query', 'new'),
|
||||
('reverse', '--lat', '0', '--lon', '0'),
|
||||
|
@ -7,6 +7,7 @@ import tempfile
|
||||
import pytest
|
||||
|
||||
from nominatim.config import Configuration
|
||||
from nominatim.errors import UsageError
|
||||
|
||||
DEFCFG_DIR = Path(__file__) / '..' / '..' / '..' / 'settings'
|
||||
|
||||
@ -15,6 +16,7 @@ def test_no_project_dir():
|
||||
|
||||
assert config.DATABASE_WEBUSER == 'www-data'
|
||||
|
||||
|
||||
def test_prefer_project_setting_over_default():
|
||||
with tempfile.TemporaryDirectory() as project_dir:
|
||||
with open(project_dir + '/.env', 'w') as envfile:
|
||||
@ -24,6 +26,7 @@ def test_prefer_project_setting_over_default():
|
||||
|
||||
assert config.DATABASE_WEBUSER == 'apache'
|
||||
|
||||
|
||||
def test_prefer_os_environ_over_project_setting(monkeypatch):
|
||||
with tempfile.TemporaryDirectory() as project_dir:
|
||||
with open(project_dir + '/.env', 'w') as envfile:
|
||||
@ -35,6 +38,7 @@ def test_prefer_os_environ_over_project_setting(monkeypatch):
|
||||
|
||||
assert config.DATABASE_WEBUSER == 'nobody'
|
||||
|
||||
|
||||
def test_get_os_env_add_defaults(monkeypatch):
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
@ -42,6 +46,7 @@ def test_get_os_env_add_defaults(monkeypatch):
|
||||
|
||||
assert config.get_os_env()['NOMINATIM_DATABASE_WEBUSER'] == 'www-data'
|
||||
|
||||
|
||||
def test_get_os_env_prefer_os_environ(monkeypatch):
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
@ -49,11 +54,13 @@ def test_get_os_env_prefer_os_environ(monkeypatch):
|
||||
|
||||
assert config.get_os_env()['NOMINATIM_DATABASE_WEBUSER'] == 'nobody'
|
||||
|
||||
|
||||
def test_get_libpq_dsn_convert_default():
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
assert config.get_libpq_dsn() == 'dbname=nominatim'
|
||||
|
||||
|
||||
def test_get_libpq_dsn_convert_php(monkeypatch):
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
@ -62,6 +69,19 @@ def test_get_libpq_dsn_convert_php(monkeypatch):
|
||||
|
||||
assert config.get_libpq_dsn() == 'dbname=gis password=foo host=localhost'
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val,expect", [('foo bar', "'foo bar'"),
|
||||
("xy'z", "xy\\'z"),
|
||||
])
|
||||
def test_get_libpq_dsn_convert_php_special_chars(monkeypatch, val, expect):
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
monkeypatch.setenv('NOMINATIM_DATABASE_DSN',
|
||||
'pgsql:dbname=gis;password={}'.format(val))
|
||||
|
||||
assert config.get_libpq_dsn() == "dbname=gis password={}".format(expect)
|
||||
|
||||
|
||||
def test_get_libpq_dsn_convert_libpq(monkeypatch):
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
@ -69,3 +89,68 @@ def test_get_libpq_dsn_convert_libpq(monkeypatch):
|
||||
'host=localhost dbname=gis password=foo')
|
||||
|
||||
assert config.get_libpq_dsn() == 'host=localhost dbname=gis password=foo'
|
||||
|
||||
|
||||
@pytest.mark.parametrize("value,result",
|
||||
[(x, True) for x in ('1', 'true', 'True', 'yes', 'YES')] +
|
||||
[(x, False) for x in ('0', 'false', 'no', 'NO', 'x')])
|
||||
def test_get_bool(monkeypatch, value, result):
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
monkeypatch.setenv('NOMINATIM_FOOBAR', value)
|
||||
|
||||
assert config.get_bool('FOOBAR') == result
|
||||
|
||||
def test_get_bool_empty():
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
assert config.DATABASE_MODULE_PATH == ''
|
||||
assert config.get_bool('DATABASE_MODULE_PATH') == False
|
||||
|
||||
|
||||
@pytest.mark.parametrize("value,result", [('0', 0), ('1', 1),
|
||||
('85762513444', 85762513444)])
|
||||
def test_get_int_success(monkeypatch, value, result):
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
monkeypatch.setenv('NOMINATIM_FOOBAR', value)
|
||||
|
||||
assert config.get_int('FOOBAR') == result
|
||||
|
||||
|
||||
@pytest.mark.parametrize("value", ['1b', 'fg', '0x23'])
|
||||
def test_get_int_bad_values(monkeypatch, value):
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
monkeypatch.setenv('NOMINATIM_FOOBAR', value)
|
||||
|
||||
with pytest.raises(UsageError):
|
||||
config.get_int('FOOBAR')
|
||||
|
||||
|
||||
def test_get_int_empty():
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
assert config.DATABASE_MODULE_PATH == ''
|
||||
|
||||
with pytest.raises(UsageError):
|
||||
config.get_int('DATABASE_MODULE_PATH')
|
||||
|
||||
|
||||
def test_get_import_style_intern(monkeypatch):
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', 'street')
|
||||
|
||||
expected = DEFCFG_DIR / 'import-street.style'
|
||||
|
||||
assert config.get_import_style_file() == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("value", ['custom', '/foo/bar.stye'])
|
||||
def test_get_import_style_intern(monkeypatch, value):
|
||||
config = Configuration(None, DEFCFG_DIR)
|
||||
|
||||
monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', value)
|
||||
|
||||
assert str(config.get_import_style_file()) == value
|
||||
|
32
test/python/test_db_connection.py
Normal file
32
test/python/test_db_connection.py
Normal file
@ -0,0 +1,32 @@
|
||||
"""
|
||||
Tests for specialised conenction and cursor classes.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
from nominatim.db.connection import connect
|
||||
|
||||
@pytest.fixture
|
||||
def db(temp_db):
|
||||
conn = connect('dbname=' + temp_db)
|
||||
yield conn
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_connection_table_exists(db, temp_db_cursor):
|
||||
assert db.table_exists('foobar') == False
|
||||
|
||||
temp_db_cursor.execute('CREATE TABLE foobar (id INT)')
|
||||
|
||||
assert db.table_exists('foobar') == True
|
||||
|
||||
|
||||
def test_cursor_scalar(db, temp_db_cursor):
|
||||
temp_db_cursor.execute('CREATE TABLE dummy (id INT)')
|
||||
|
||||
with db.cursor() as cur:
|
||||
assert cur.scalar('SELECT count(*) FROM dummy') == 0
|
||||
|
||||
def test_cursor_scalar_many_rows(db):
|
||||
with db.cursor() as cur:
|
||||
with pytest.raises(RuntimeError):
|
||||
cur.scalar('SELECT * FROM pg_tables')
|
114
test/python/test_db_status.py
Normal file
114
test/python/test_db_status.py
Normal file
@ -0,0 +1,114 @@
|
||||
"""
|
||||
Tests for status table manipulation.
|
||||
"""
|
||||
import datetime as dt
|
||||
|
||||
import pytest
|
||||
|
||||
import nominatim.db.status
|
||||
from nominatim.errors import UsageError
|
||||
|
||||
def test_compute_database_date_place_empty(status_table, place_table, temp_db_conn):
|
||||
with pytest.raises(UsageError):
|
||||
nominatim.db.status.compute_database_date(temp_db_conn)
|
||||
|
||||
OSM_NODE_DATA = """\
|
||||
<osm version="0.6" generator="OpenStreetMap server" copyright="OpenStreetMap and contributors" attribution="http://www.openstreetmap.org/copyright" license="http://opendatacommons.org/licenses/odbl/1-0/">
|
||||
<node id="45673" visible="true" version="1" changeset="2047" timestamp="2006-01-27T22:09:10Z" user="Foo" uid="111" lat="48.7586670" lon="8.1343060">
|
||||
</node>
|
||||
</osm>
|
||||
"""
|
||||
|
||||
def test_compute_database_date_valid(monkeypatch, status_table, place_row, temp_db_conn):
|
||||
place_row(osm_type='N', osm_id=45673)
|
||||
|
||||
requested_url = []
|
||||
def mock_url(url):
|
||||
requested_url.append(url)
|
||||
return OSM_NODE_DATA
|
||||
|
||||
monkeypatch.setattr(nominatim.db.status, "get_url", mock_url)
|
||||
|
||||
date = nominatim.db.status.compute_database_date(temp_db_conn)
|
||||
|
||||
assert requested_url == ['https://www.openstreetmap.org/api/0.6/node/45673/1']
|
||||
assert date == dt.datetime.fromisoformat('2006-01-27T22:09:10').replace(tzinfo=dt.timezone.utc)
|
||||
|
||||
|
||||
def test_compute_database_broken_api(monkeypatch, status_table, place_row, temp_db_conn):
|
||||
place_row(osm_type='N', osm_id=45673)
|
||||
|
||||
requested_url = []
|
||||
def mock_url(url):
|
||||
requested_url.append(url)
|
||||
return '<osm version="0.6" generator="OpenStre'
|
||||
|
||||
monkeypatch.setattr(nominatim.db.status, "get_url", mock_url)
|
||||
|
||||
with pytest.raises(UsageError):
|
||||
date = nominatim.db.status.compute_database_date(temp_db_conn)
|
||||
|
||||
|
||||
def test_set_status_empty_table(status_table, temp_db_conn, temp_db_cursor):
|
||||
date = dt.datetime.fromordinal(1000000).replace(tzinfo=dt.timezone.utc)
|
||||
nominatim.db.status.set_status(temp_db_conn, date=date)
|
||||
|
||||
temp_db_cursor.execute("SELECT * FROM import_status")
|
||||
|
||||
assert temp_db_cursor.rowcount == 1
|
||||
assert temp_db_cursor.fetchone() == [date, None, True]
|
||||
|
||||
|
||||
def test_set_status_filled_table(status_table, temp_db_conn, temp_db_cursor):
|
||||
date = dt.datetime.fromordinal(1000000).replace(tzinfo=dt.timezone.utc)
|
||||
nominatim.db.status.set_status(temp_db_conn, date=date)
|
||||
|
||||
assert 1 == temp_db_cursor.scalar("SELECT count(*) FROM import_status")
|
||||
|
||||
|
||||
date = dt.datetime.fromordinal(1000100).replace(tzinfo=dt.timezone.utc)
|
||||
nominatim.db.status.set_status(temp_db_conn, date=date, seq=456, indexed=False)
|
||||
|
||||
temp_db_cursor.execute("SELECT * FROM import_status")
|
||||
|
||||
assert temp_db_cursor.rowcount == 1
|
||||
assert temp_db_cursor.fetchone() == [date, 456, False]
|
||||
|
||||
|
||||
def test_get_status_empty_table(status_table, temp_db_conn):
|
||||
assert nominatim.db.status.get_status(temp_db_conn) == (None, None, None)
|
||||
|
||||
|
||||
def test_get_status_success(status_table, temp_db_conn):
|
||||
date = dt.datetime.fromordinal(1000000).replace(tzinfo=dt.timezone.utc)
|
||||
nominatim.db.status.set_status(temp_db_conn, date=date, seq=667, indexed=False)
|
||||
|
||||
assert nominatim.db.status.get_status(temp_db_conn) == \
|
||||
(date, 667, False)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("old_state", [True, False])
|
||||
@pytest.mark.parametrize("new_state", [True, False])
|
||||
def test_set_indexed(status_table, temp_db_conn, temp_db_cursor, old_state, new_state):
|
||||
date = dt.datetime.fromordinal(1000000).replace(tzinfo=dt.timezone.utc)
|
||||
nominatim.db.status.set_status(temp_db_conn, date=date, indexed=old_state)
|
||||
nominatim.db.status.set_indexed(temp_db_conn, new_state)
|
||||
|
||||
assert temp_db_cursor.scalar("SELECT indexed FROM import_status") == new_state
|
||||
|
||||
|
||||
def test_set_indexed_empty_status(status_table, temp_db_conn, temp_db_cursor):
|
||||
nominatim.db.status.set_indexed(temp_db_conn, True)
|
||||
|
||||
assert temp_db_cursor.scalar("SELECT count(*) FROM import_status") == 0
|
||||
|
||||
|
||||
def text_log_status(status_table, temp_db_conn):
|
||||
date = dt.datetime.fromordinal(1000000).replace(tzinfo=dt.timezone.utc)
|
||||
start = dt.datetime.now() - dt.timedelta(hours=1)
|
||||
nominatim.db.status.set_status(temp_db_conn, date=date, seq=56)
|
||||
nominatim.db.status.log_status(temp_db_conn, start, 'index')
|
||||
|
||||
assert temp_db_cursor.scalar("SELECT count(*) FROM import_osmosis_log") == 1
|
||||
assert temp_db_cursor.scalar("SELECT seq FROM import_osmosis_log") == 56
|
||||
assert temp_db_cursor.scalar("SELECT date FROM import_osmosis_log") == date
|
30
test/python/test_db_utils.py
Normal file
30
test/python/test_db_utils.py
Normal file
@ -0,0 +1,30 @@
|
||||
"""
|
||||
Tests for DB utility functions in db.utils
|
||||
"""
|
||||
import psycopg2
|
||||
import pytest
|
||||
|
||||
import nominatim.db.utils as db_utils
|
||||
|
||||
def test_execute_file_success(temp_db_conn, tmp_path):
|
||||
tmpfile = tmp_path / 'test.sql'
|
||||
tmpfile.write_text('CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);')
|
||||
|
||||
db_utils.execute_file(temp_db_conn, tmpfile)
|
||||
|
||||
with temp_db_conn.cursor() as cur:
|
||||
cur.execute('SELECT * FROM test')
|
||||
|
||||
assert cur.rowcount == 1
|
||||
assert cur.fetchone()[0] == 56
|
||||
|
||||
def test_execute_file_bad_file(temp_db_conn, tmp_path):
|
||||
with pytest.raises(FileNotFoundError):
|
||||
db_utils.execute_file(temp_db_conn, tmp_path / 'test2.sql')
|
||||
|
||||
def test_execute_file_bad_sql(temp_db_conn, tmp_path):
|
||||
tmpfile = tmp_path / 'test.sql'
|
||||
tmpfile.write_text('CREATE STABLE test (id INT)')
|
||||
|
||||
with pytest.raises(psycopg2.ProgrammingError):
|
||||
db_utils.execute_file(temp_db_conn, tmpfile)
|
@ -82,10 +82,8 @@ class IndexerTestDB:
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_db(temp_db):
|
||||
conn = psycopg2.connect(database=temp_db)
|
||||
yield IndexerTestDB(conn)
|
||||
conn.close()
|
||||
def test_db(temp_db_conn):
|
||||
yield IndexerTestDB(temp_db_conn)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("threads", [1, 15])
|
||||
|
@ -7,7 +7,6 @@ import tempfile
|
||||
|
||||
import pytest
|
||||
|
||||
from nominatim.config import Configuration
|
||||
import nominatim.tools.exec_utils as exec_utils
|
||||
|
||||
@pytest.fixture
|
||||
@ -18,9 +17,9 @@ def tmp_phplib_dir():
|
||||
yield Path(phpdir)
|
||||
|
||||
@pytest.fixture
|
||||
def nominatim_env(tmp_phplib_dir):
|
||||
def nominatim_env(tmp_phplib_dir, def_config):
|
||||
class _NominatimEnv:
|
||||
config = Configuration(None, Path(__file__) / '..' / '..' / '..' / 'settings')
|
||||
config = def_config
|
||||
phplib_dir = tmp_phplib_dir
|
||||
data_dir = Path('data')
|
||||
project_dir = Path('.')
|
||||
@ -100,3 +99,12 @@ def test_run_api_with_extra_env(tmp_project_dir):
|
||||
extra_env = dict(SCRIPT_FILENAME=str(tmp_project_dir / 'website' / 'test.php'))
|
||||
assert 0 == exec_utils.run_api_script('badname', tmp_project_dir,
|
||||
extra_env=extra_env)
|
||||
|
||||
|
||||
### run_osm2pgsql
|
||||
|
||||
def test_run_osm2pgsql():
|
||||
exec_utils.run_osm2pgsql(dict(osm2pgsql='echo', append=False, flatnode_file=None,
|
||||
dsn='dbname=foobar', threads=1, osm2pgsql_cache=500,
|
||||
osm2pgsql_style='./my.style',
|
||||
import_file='foo.bar'))
|
||||
|
85
test/python/test_tools_refresh_address_levels.py
Normal file
85
test/python/test_tools_refresh_address_levels.py
Normal file
@ -0,0 +1,85 @@
|
||||
"""
|
||||
Tests for function for importing address ranks.
|
||||
"""
|
||||
import json
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
|
||||
from nominatim.tools.refresh import load_address_levels, load_address_levels_from_file
|
||||
|
||||
def test_load_ranks_def_config(temp_db_conn, temp_db_cursor, def_config):
|
||||
load_address_levels_from_file(temp_db_conn, Path(def_config.ADDRESS_LEVEL_CONFIG))
|
||||
|
||||
assert temp_db_cursor.scalar('SELECT count(*) FROM address_levels') > 0
|
||||
|
||||
def test_load_ranks_from_file(temp_db_conn, temp_db_cursor, tmp_path):
|
||||
test_file = tmp_path / 'test_levels.json'
|
||||
test_file.write_text('[{"tags":{"place":{"sea":2}}}]')
|
||||
|
||||
load_address_levels_from_file(temp_db_conn, test_file)
|
||||
|
||||
assert temp_db_cursor.scalar('SELECT count(*) FROM address_levels') > 0
|
||||
|
||||
|
||||
def test_load_ranks_from_broken_file(temp_db_conn, tmp_path):
|
||||
test_file = tmp_path / 'test_levels.json'
|
||||
test_file.write_text('[{"tags":"place":{"sea":2}}}]')
|
||||
|
||||
with pytest.raises(json.decoder.JSONDecodeError):
|
||||
load_address_levels_from_file(temp_db_conn, test_file)
|
||||
|
||||
|
||||
def test_load_ranks_country(temp_db_conn, temp_db_cursor):
|
||||
load_address_levels(temp_db_conn, 'levels',
|
||||
[{"tags": {"place": {"village": 14}}},
|
||||
{"countries": ['de'],
|
||||
"tags": {"place": {"village": 15}}},
|
||||
{"countries": ['uk', 'us' ],
|
||||
"tags": {"place": {"village": 16}}}
|
||||
])
|
||||
|
||||
assert temp_db_cursor.row_set('SELECT * FROM levels') == \
|
||||
set([(None, 'place', 'village', 14, 14),
|
||||
('de', 'place', 'village', 15, 15),
|
||||
('uk', 'place', 'village', 16, 16),
|
||||
('us', 'place', 'village', 16, 16),
|
||||
])
|
||||
|
||||
|
||||
def test_load_ranks_default_value(temp_db_conn, temp_db_cursor):
|
||||
load_address_levels(temp_db_conn, 'levels',
|
||||
[{"tags": {"boundary": {"": 28}}},
|
||||
{"countries": ['hu'],
|
||||
"tags": {"boundary": {"": 29}}}
|
||||
])
|
||||
|
||||
assert temp_db_cursor.row_set('SELECT * FROM levels') == \
|
||||
set([(None, 'boundary', None, 28, 28),
|
||||
('hu', 'boundary', None, 29, 29),
|
||||
])
|
||||
|
||||
|
||||
def test_load_ranks_multiple_keys(temp_db_conn, temp_db_cursor):
|
||||
load_address_levels(temp_db_conn, 'levels',
|
||||
[{"tags":
|
||||
{"place": {"city": 14},
|
||||
"boundary": {"administrative2" : 4}}
|
||||
}])
|
||||
|
||||
assert temp_db_cursor.row_set('SELECT * FROM levels') == \
|
||||
set([(None, 'place', 'city', 14, 14),
|
||||
(None, 'boundary', 'administrative2', 4, 4),
|
||||
])
|
||||
|
||||
|
||||
def test_load_ranks_address(temp_db_conn, temp_db_cursor):
|
||||
load_address_levels(temp_db_conn, 'levels',
|
||||
[{"tags":
|
||||
{"place": {"city": 14,
|
||||
"town" : [14, 13]}}
|
||||
}])
|
||||
|
||||
assert temp_db_cursor.row_set('SELECT * FROM levels') == \
|
||||
set([(None, 'place', 'city', 14, 14),
|
||||
(None, 'place', 'town', 14, 13),
|
||||
])
|
99
test/python/test_tools_refresh_create_functions.py
Normal file
99
test/python/test_tools_refresh_create_functions.py
Normal file
@ -0,0 +1,99 @@
|
||||
"""
|
||||
Tests for creating PL/pgSQL functions for Nominatim.
|
||||
"""
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
|
||||
from nominatim.db.connection import connect
|
||||
from nominatim.tools.refresh import _get_standard_function_sql, _get_partition_function_sql
|
||||
|
||||
SQL_DIR = (Path(__file__) / '..' / '..' / '..' / 'sql').resolve()
|
||||
|
||||
@pytest.fixture
|
||||
def db(temp_db):
|
||||
conn = connect('dbname=' + temp_db)
|
||||
yield conn
|
||||
conn.close()
|
||||
|
||||
@pytest.fixture
|
||||
def db_with_tables(db):
|
||||
with db.cursor() as cur:
|
||||
for table in ('place', 'placex', 'location_postcode'):
|
||||
cur.execute('CREATE TABLE {} (place_id BIGINT)'.format(table))
|
||||
|
||||
return db
|
||||
|
||||
|
||||
def test_standard_functions_replace_module_default(db, def_config):
|
||||
def_config.project_dir = Path('.')
|
||||
sql = _get_standard_function_sql(db, def_config, SQL_DIR, False, False)
|
||||
|
||||
assert sql
|
||||
assert sql.find('{modulepath}') < 0
|
||||
assert sql.find("'{}'".format(Path('module/nominatim.so').resolve())) >= 0
|
||||
|
||||
|
||||
def test_standard_functions_replace_module_custom(monkeypatch, db, def_config):
|
||||
monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', 'custom')
|
||||
sql = _get_standard_function_sql(db, def_config, SQL_DIR, False, False)
|
||||
|
||||
assert sql
|
||||
assert sql.find('{modulepath}') < 0
|
||||
assert sql.find("'custom/nominatim.so'") >= 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("enabled", (True, False))
|
||||
def test_standard_functions_enable_diff(db_with_tables, def_config, enabled):
|
||||
def_config.project_dir = Path('.')
|
||||
sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, enabled, False)
|
||||
|
||||
assert sql
|
||||
assert (sql.find('%DIFFUPDATES%') < 0) == enabled
|
||||
|
||||
|
||||
@pytest.mark.parametrize("enabled", (True, False))
|
||||
def test_standard_functions_enable_debug(db_with_tables, def_config, enabled):
|
||||
def_config.project_dir = Path('.')
|
||||
sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, enabled)
|
||||
|
||||
assert sql
|
||||
assert (sql.find('--DEBUG') < 0) == enabled
|
||||
|
||||
|
||||
@pytest.mark.parametrize("enabled", (True, False))
|
||||
def test_standard_functions_enable_limit_reindexing(monkeypatch, db_with_tables, def_config, enabled):
|
||||
def_config.project_dir = Path('.')
|
||||
monkeypatch.setenv('NOMINATIM_LIMIT_REINDEXING', 'yes' if enabled else 'no')
|
||||
sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False)
|
||||
|
||||
assert sql
|
||||
assert (sql.find('--LIMIT INDEXING') < 0) == enabled
|
||||
|
||||
|
||||
@pytest.mark.parametrize("enabled", (True, False))
|
||||
def test_standard_functions_enable_tiger(monkeypatch, db_with_tables, def_config, enabled):
|
||||
def_config.project_dir = Path('.')
|
||||
monkeypatch.setenv('NOMINATIM_USE_US_TIGER_DATA', 'yes' if enabled else 'no')
|
||||
sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False)
|
||||
|
||||
assert sql
|
||||
assert (sql.find('%NOTIGERDATA%') >= 0) == enabled
|
||||
|
||||
|
||||
@pytest.mark.parametrize("enabled", (True, False))
|
||||
def test_standard_functions_enable_aux(monkeypatch, db_with_tables, def_config, enabled):
|
||||
def_config.project_dir = Path('.')
|
||||
monkeypatch.setenv('NOMINATIM_USE_AUX_LOCATION_DATA', 'yes' if enabled else 'no')
|
||||
sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False)
|
||||
|
||||
assert sql
|
||||
assert (sql.find('%NOAUXDATA%') >= 0) == enabled
|
||||
|
||||
|
||||
def test_partition_function(temp_db_cursor, db, def_config):
|
||||
temp_db_cursor.execute("CREATE TABLE country_name (partition SMALLINT)")
|
||||
|
||||
sql = _get_partition_function_sql(db, SQL_DIR)
|
||||
|
||||
assert sql
|
||||
assert sql.find('-partition-') < 0
|
138
test/python/test_tools_replication.py
Normal file
138
test/python/test_tools_replication.py
Normal file
@ -0,0 +1,138 @@
|
||||
"""
|
||||
Tests for replication functionality.
|
||||
"""
|
||||
import datetime as dt
|
||||
import time
|
||||
|
||||
import pytest
|
||||
from osmium.replication.server import OsmosisState
|
||||
|
||||
import nominatim.tools.replication
|
||||
import nominatim.db.status as status
|
||||
from nominatim.errors import UsageError
|
||||
|
||||
OSM_NODE_DATA = """\
|
||||
<osm version="0.6" generator="OpenStreetMap server" copyright="OpenStreetMap and contributors" attribution="http://www.openstreetmap.org/copyright" license="http://opendatacommons.org/licenses/odbl/1-0/">
|
||||
<node id="100" visible="true" version="1" changeset="2047" timestamp="2006-01-27T22:09:10Z" user="Foo" uid="111" lat="48.7586670" lon="8.1343060">
|
||||
</node>
|
||||
</osm>
|
||||
"""
|
||||
|
||||
### init replication
|
||||
|
||||
def test_init_replication_bad_base_url(monkeypatch, status_table, place_row, temp_db_conn, temp_db_cursor):
|
||||
place_row(osm_type='N', osm_id=100)
|
||||
|
||||
monkeypatch.setattr(nominatim.db.status, "get_url", lambda u : OSM_NODE_DATA)
|
||||
|
||||
with pytest.raises(UsageError, match="Failed to reach replication service"):
|
||||
nominatim.tools.replication.init_replication(temp_db_conn, 'https://test.io')
|
||||
|
||||
|
||||
def test_init_replication_success(monkeypatch, status_table, place_row, temp_db_conn, temp_db_cursor):
|
||||
place_row(osm_type='N', osm_id=100)
|
||||
|
||||
monkeypatch.setattr(nominatim.db.status, "get_url", lambda u : OSM_NODE_DATA)
|
||||
monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
|
||||
"timestamp_to_sequence",
|
||||
lambda self, date: 234)
|
||||
|
||||
nominatim.tools.replication.init_replication(temp_db_conn, 'https://test.io')
|
||||
|
||||
temp_db_cursor.execute("SELECT * FROM import_status")
|
||||
|
||||
expected_date = dt.datetime.fromisoformat('2006-01-27T19:09:10').replace(tzinfo=dt.timezone.utc)
|
||||
assert temp_db_cursor.rowcount == 1
|
||||
assert temp_db_cursor.fetchone() == [expected_date, 234, True]
|
||||
|
||||
|
||||
### checking for updates
|
||||
|
||||
def test_check_for_updates_empty_status_table(status_table, temp_db_conn):
|
||||
assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == 254
|
||||
|
||||
|
||||
def test_check_for_updates_seq_not_set(status_table, temp_db_conn):
|
||||
status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc))
|
||||
|
||||
assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == 254
|
||||
|
||||
|
||||
def test_check_for_updates_no_state(monkeypatch, status_table, temp_db_conn):
|
||||
status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc), seq=345)
|
||||
|
||||
monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
|
||||
"get_state_info", lambda self: None)
|
||||
|
||||
assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == 253
|
||||
|
||||
|
||||
@pytest.mark.parametrize("server_sequence,result", [(344, 2), (345, 2), (346, 0)])
|
||||
def test_check_for_updates_no_new_data(monkeypatch, status_table, temp_db_conn,
|
||||
server_sequence, result):
|
||||
date = dt.datetime.now(dt.timezone.utc)
|
||||
status.set_status(temp_db_conn, date, seq=345)
|
||||
|
||||
monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
|
||||
"get_state_info",
|
||||
lambda self: OsmosisState(server_sequence, date))
|
||||
|
||||
assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == result
|
||||
|
||||
|
||||
### updating
|
||||
|
||||
@pytest.fixture
|
||||
def update_options(tmpdir):
|
||||
return dict(base_url='https://test.io',
|
||||
indexed_only=False,
|
||||
update_interval=3600,
|
||||
import_file=tmpdir / 'foo.osm',
|
||||
max_diff_size=1)
|
||||
|
||||
def test_update_empty_status_table(status_table, temp_db_conn):
|
||||
with pytest.raises(UsageError):
|
||||
nominatim.tools.replication.update(temp_db_conn, {})
|
||||
|
||||
|
||||
def test_update_already_indexed(status_table, temp_db_conn):
|
||||
status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc), seq=34, indexed=False)
|
||||
|
||||
assert nominatim.tools.replication.update(temp_db_conn, dict(indexed_only=True)) \
|
||||
== nominatim.tools.replication.UpdateState.MORE_PENDING
|
||||
|
||||
|
||||
def test_update_no_data_no_sleep(monkeypatch, status_table, temp_db_conn, update_options):
|
||||
date = dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=1)
|
||||
status.set_status(temp_db_conn, date, seq=34)
|
||||
|
||||
monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
|
||||
"apply_diffs",
|
||||
lambda *args, **kwargs: None)
|
||||
|
||||
sleeptime = []
|
||||
monkeypatch.setattr(time, 'sleep', lambda s: sleeptime.append(s))
|
||||
|
||||
assert nominatim.tools.replication.update(temp_db_conn, update_options) \
|
||||
== nominatim.tools.replication.UpdateState.NO_CHANGES
|
||||
|
||||
assert not sleeptime
|
||||
|
||||
|
||||
def test_update_no_data_sleep(monkeypatch, status_table, temp_db_conn, update_options):
|
||||
date = dt.datetime.now(dt.timezone.utc) - dt.timedelta(minutes=30)
|
||||
status.set_status(temp_db_conn, date, seq=34)
|
||||
|
||||
monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
|
||||
"apply_diffs",
|
||||
lambda *args, **kwargs: None)
|
||||
|
||||
sleeptime = []
|
||||
monkeypatch.setattr(time, 'sleep', lambda s: sleeptime.append(s))
|
||||
|
||||
assert nominatim.tools.replication.update(temp_db_conn, update_options) \
|
||||
== nominatim.tools.replication.UpdateState.NO_CHANGES
|
||||
|
||||
assert len(sleeptime) == 1
|
||||
assert sleeptime[0] < 3600
|
||||
assert sleeptime[0] > 0
|
@ -1,24 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
from osmium.replication import server
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python check_server_for_updates.py <server url> <sequence id>")
|
||||
sys.exit(254)
|
||||
|
||||
seqid = int(sys.argv[2])
|
||||
|
||||
state = server.ReplicationServer(sys.argv[1]).get_state_info()
|
||||
|
||||
if state is None:
|
||||
print("ERROR: Cannot get state from URL %s." % (sys.argv[1], ))
|
||||
sys.exit(253)
|
||||
|
||||
if state.sequence <= seqid:
|
||||
print("Database up to date.")
|
||||
sys.exit(1)
|
||||
|
||||
print("New data available (%i => %i)." % (seqid, state.sequence))
|
||||
sys.exit(0)
|
@ -1,34 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import osmium
|
||||
import sys
|
||||
import datetime
|
||||
|
||||
|
||||
class Datecounter(osmium.SimpleHandler):
|
||||
|
||||
filedate = None
|
||||
|
||||
def date(self, o):
|
||||
ts = o.timestamp
|
||||
if self.filedate is None or ts > self.filedate:
|
||||
self.filedate = ts
|
||||
|
||||
node = date
|
||||
way = date
|
||||
relation = date
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: python osm_file_date.py <osmfile>")
|
||||
sys.exit(-1)
|
||||
|
||||
h = Datecounter()
|
||||
|
||||
h.apply_file(sys.argv[1])
|
||||
|
||||
if h.filedate is None:
|
||||
exit(5)
|
||||
|
||||
print(h.filedate)
|
Loading…
Reference in New Issue
Block a user