Merge branch 'updatePHP' of https://github.com/ThomasBarris/Nominatim into ThomasBarris-updatePHP

This commit is contained in:
Sarah Hoffmann 2018-10-02 22:46:53 +02:00
commit f45b3fa3f2
5 changed files with 1062 additions and 948 deletions

0
CMakeLists.txt Normal file → Executable file
View File

870
lib/setup/SetupClass.php Executable file
View File

@ -0,0 +1,870 @@
<?php
namespace Nominatim\Setup;
class SetupFunctions
{
protected $iCacheMemory; // set in constructor
protected $iInstances; // set in constructor
protected $sModulePath; // set in constructor
protected $aDSNInfo; // set in constructor = DB::parseDSN(CONST_Database_DSN);
protected $sVerbose; // set in constructor
protected $sIgnoreErrors; // set in constructor
protected $bEnableDiffUpdates; // set in constructor
protected $bEnableDebugStatements; // set in constructor
protected $bNoPartitions; // set in constructor
protected $oDB = null; // set in setupDB (earliest) or later in loadData, importData, drop, createSqlFunctions, importTigerData
// pgsqlRunPartitionScript, calculatePostcodes, ..if no already set
public function __construct($callingFunction, array $aCMDResult = array())
{
// by default, use all but one processor, but never more than 15.
$this->iInstances = isset($aCMDResult['threads'])
? $aCMDResult['threads']
: (min(16, getProcessorCount()) - 1);
if ($this->iInstances < 1) {
$this->iInstances = 1;
warn('resetting threads to '.$this->iInstances);
}
// Assume we can steal all the cache memory in the box (unless told otherwise)
if (isset($aCMDResult['osm2pgsql-cache'])) {
$this->iCacheMemory = $aCMDResult['osm2pgsql-cache'];
} else {
$this->iCacheMemory = getCacheMemoryMB();
}
$this->sModulePath = CONST_Database_Module_Path;
info('module path: ' . $this->sModulePath);
// prepares DB for import or update, sets the Data Source Name
$this->aDSNInfo = \DB::parseDSN(CONST_Database_DSN);
if (!isset($this->aDSNInfo['port']) || !$this->aDSNInfo['port']) $this->aDSNInfo['port'] = 5432;
// setting member variables based on command line options stored in $aCMDResult
$this->sVerbose = $aCMDResult['verbose'];
//setting default values which are not set by the update.php array
if (isset($aCMDResult['ignore-errors'])) {
$this->sIgnoreErrors = $aCMDResult['ignore-errors'];
} else {
$this->sIgnoreErrors = false;
}
if (isset($aCMDResult['enable-debug-statements'])) {
$this->bEnableDebugStatements = $aCMDResult['enable-debug-statements'];
} else {
$this->bEnableDebugStatements = false;
}
if (isset($aCMDResult['no-partitions'])) {
$this->bNoPartitions = $aCMDResult['no-partitions'];
} else {
$this->bNoPartitions = false;
}
// if class is instantiated by update.php, we have to set EnableDiffUpdates to true
// otherwise set to value provided by setup.php's command line arg array
if ($callingFunction == 'update') {
$this->bEnableDiffUpdates = true;
} elseif ($callingFunction == 'setup') {
$this->bEnableDiffUpdates = $aCMDResult['enable-diff-updates'];
}
}
public function createDB()
{
info('Create DB');
$sDB = \DB::connect(CONST_Database_DSN, false);
if (!\PEAR::isError($sDB)) {
fail('database already exists ('.CONST_Database_DSN.')');
}
$sCreateDBCmd = 'createdb -E UTF-8 -p '.$this->aDSNInfo['port'].' '.$this->aDSNInfo['database'];
if (isset($this->aDSNInfo['username']) && $this->aDSNInfo['username']) {
$sCreateDBCmd .= ' -U '.$this->aDSNInfo['username'];
}
if (isset($this->aDSNInfo['hostspec']) && $this->aDSNInfo['hostspec']) {
$sCreateDBCmd .= ' -h '.$this->aDSNInfo['hostspec'];
}
$aProcEnv = null;
if (isset($this->aDSNInfo['password']) && $this->aDSNInfo['password']) {
$aProcEnv = array_merge(array('PGPASSWORD' => $this->aDSNInfo['password']), $_ENV);
}
$result = runWithEnv($sCreateDBCmd, $aProcEnv);
if ($result != 0) fail('Error executing external command: '.$sCreateDBCmd);
}
public function setupDB()
{
info('Setup DB');
$this->oDB =& getDB();
$fPostgresVersion = getPostgresVersion($this->oDB);
echo 'Postgres version found: '.$fPostgresVersion."\n";
if ($fPostgresVersion < 9.1) {
fail('Minimum supported version of Postgresql is 9.1.');
}
$this->pgsqlRunScript('CREATE EXTENSION IF NOT EXISTS hstore');
$this->pgsqlRunScript('CREATE EXTENSION IF NOT EXISTS postgis');
// For extratags and namedetails the hstore_to_json converter is
// needed which is only available from Postgresql 9.3+. For older
// versions add a dummy function that returns nothing.
$iNumFunc = chksql($this->oDB->getOne("select count(*) from pg_proc where proname = 'hstore_to_json'"));
if ($iNumFunc == 0) {
$this->pgsqlRunScript("create function hstore_to_json(dummy hstore) returns text AS 'select null::text' language sql immutable");
warn('Postgresql is too old. extratags and namedetails API not available.');
}
$fPostgisVersion = getPostgisVersion($this->oDB);
echo 'Postgis version found: '.$fPostgisVersion."\n";
if ($fPostgisVersion < 2.1) {
// Functions were renamed in 2.1 and throw an annoying deprecation warning
$this->pgsqlRunScript('ALTER FUNCTION st_line_interpolate_point(geometry, double precision) RENAME TO ST_LineInterpolatePoint');
$this->pgsqlRunScript('ALTER FUNCTION ST_Line_Locate_Point(geometry, geometry) RENAME TO ST_LineLocatePoint');
}
if ($fPostgisVersion < 2.2) {
$this->pgsqlRunScript('ALTER FUNCTION ST_Distance_Spheroid(geometry, geometry, spheroid) RENAME TO ST_DistanceSpheroid');
}
$i = chksql($this->oDB->getOne("select count(*) from pg_user where usename = '".CONST_Database_Web_User."'"));
if ($i == 0) {
echo "\nERROR: Web user '".CONST_Database_Web_User."' does not exist. Create it with:\n";
echo "\n createuser ".CONST_Database_Web_User."\n\n";
exit(1);
}
// Try accessing the C module, so we know early if something is wrong
if (!checkModulePresence()) {
fail('error loading nominatim.so module');
}
if (!file_exists(CONST_ExtraDataPath.'/country_osm_grid.sql.gz')) {
echo 'Error: you need to download the country_osm_grid first:';
echo "\n wget -O ".CONST_ExtraDataPath."/country_osm_grid.sql.gz https://www.nominatim.org/data/country_grid.sql.gz\n";
exit(1);
}
$this->pgsqlRunScriptFile(CONST_BasePath.'/data/country_name.sql');
$this->pgsqlRunScriptFile(CONST_BasePath.'/data/country_naturalearthdata.sql');
$this->pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql.gz');
$this->pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode_table.sql');
if (file_exists(CONST_BasePath.'/data/gb_postcode_data.sql.gz')) {
$this->pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode_data.sql.gz');
} else {
warn('external UK postcode table not found.');
}
if (CONST_Use_Extra_US_Postcodes) {
$this->pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode.sql');
}
if ($this->bNoPartitions) {
$this->pgsqlRunScript('update country_name set partition = 0');
}
// the following will be needed by createFunctions later but
// is only defined in the subsequently called createTables
// Create dummies here that will be overwritten by the proper
// versions in create-tables.
$this->pgsqlRunScript('CREATE TABLE IF NOT EXISTS place_boundingbox ()');
$this->pgsqlRunScript('CREATE TYPE wikipedia_article_match AS ()', false);
}
public function importData($sOSMFile)
{
info('Import data');
$osm2pgsql = CONST_Osm2pgsql_Binary;
if (!file_exists($osm2pgsql)) {
echo "Check CONST_Osm2pgsql_Binary in your local settings file.\n";
echo "Normally you should not need to set this manually.\n";
fail("osm2pgsql not found in '$osm2pgsql'");
}
if (!is_null(CONST_Osm2pgsql_Flatnode_File) && CONST_Osm2pgsql_Flatnode_File) {
$osm2pgsql .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File;
}
if (CONST_Tablespace_Osm2pgsql_Data)
$osm2pgsql .= ' --tablespace-slim-data '.CONST_Tablespace_Osm2pgsql_Data;
if (CONST_Tablespace_Osm2pgsql_Index)
$osm2pgsql .= ' --tablespace-slim-index '.CONST_Tablespace_Osm2pgsql_Index;
if (CONST_Tablespace_Place_Data)
$osm2pgsql .= ' --tablespace-main-data '.CONST_Tablespace_Place_Data;
if (CONST_Tablespace_Place_Index)
$osm2pgsql .= ' --tablespace-main-index '.CONST_Tablespace_Place_Index;
$osm2pgsql .= ' -lsc -O gazetteer --hstore --number-processes 1';
$osm2pgsql .= ' -C '.$this->iCacheMemory;
$osm2pgsql .= ' -P '.$this->aDSNInfo['port'];
if (isset($this->aDSNInfo['username']) && $this->aDSNInfo['username']) {
$osm2pgsql .= ' -U '.$this->aDSNInfo['username'];
}
if (isset($this->aDSNInfo['hostspec']) && $this->aDSNInfo['hostspec']) {
$osm2pgsql .= ' -H '.$this->aDSNInfo['hostspec'];
}
$aProcEnv = null;
if (isset($this->aDSNInfo['password']) && $this->aDSNInfo['password']) {
$aProcEnv = array_merge(array('PGPASSWORD' => $this->aDSNInfo['password']), $_ENV);
}
$osm2pgsql .= ' -d '.$this->aDSNInfo['database'].' '.$sOSMFile;
runWithEnv($osm2pgsql, $aProcEnv);
if ($this->oDB == null) $this->oDB =& getDB();
if (!$this->sIgnoreErrors && !chksql($this->oDB->getRow('select * from place limit 1'))) {
fail('No Data');
}
}
public function createFunctions()
{
info('Create Functions');
// Try accessing the C module, so we know eif something is wrong
// update.php calls this function
if (!checkModulePresence()) {
fail('error loading nominatim.so module');
}
$this->createSqlFunctions();
}
public function createTables()
{
info('Create Tables');
$sTemplate = file_get_contents(CONST_BasePath.'/sql/tables.sql');
$sTemplate = str_replace('{www-user}', CONST_Database_Web_User, $sTemplate);
$sTemplate = $this->replaceTablespace(
'{ts:address-data}',
CONST_Tablespace_Address_Data,
$sTemplate
);
$sTemplate = $this->replaceTablespace(
'{ts:address-index}',
CONST_Tablespace_Address_Index,
$sTemplate
);
$sTemplate = $this->replaceTablespace(
'{ts:search-data}',
CONST_Tablespace_Search_Data,
$sTemplate
);
$sTemplate = $this->replaceTablespace(
'{ts:search-index}',
CONST_Tablespace_Search_Index,
$sTemplate
);
$sTemplate = $this->replaceTablespace(
'{ts:aux-data}',
CONST_Tablespace_Aux_Data,
$sTemplate
);
$sTemplate = $this->replaceTablespace(
'{ts:aux-index}',
CONST_Tablespace_Aux_Index,
$sTemplate
);
$this->pgsqlRunScript($sTemplate, false);
}
public function createPartitionTables()
{
info('Create Partition Tables');
$sTemplate = file_get_contents(CONST_BasePath.'/sql/partition-tables.src.sql');
$sTemplate = $this->replaceTablespace(
'{ts:address-data}',
CONST_Tablespace_Address_Data,
$sTemplate
);
$sTemplate = $this->replaceTablespace(
'{ts:address-index}',
CONST_Tablespace_Address_Index,
$sTemplate
);
$sTemplate = $this->replaceTablespace(
'{ts:search-data}',
CONST_Tablespace_Search_Data,
$sTemplate
);
$sTemplate = $this->replaceTablespace(
'{ts:search-index}',
CONST_Tablespace_Search_Index,
$sTemplate
);
$sTemplate = $this->replaceTablespace(
'{ts:aux-data}',
CONST_Tablespace_Aux_Data,
$sTemplate
);
$sTemplate = $this->replaceTablespace(
'{ts:aux-index}',
CONST_Tablespace_Aux_Index,
$sTemplate
);
$this->pgsqlRunPartitionScript($sTemplate);
}
public function createPartitionFunctions()
{
info('Create Partition Functions');
$sTemplate = file_get_contents(CONST_BasePath.'/sql/partition-functions.src.sql');
$this->pgsqlRunPartitionScript($sTemplate);
}
public function importWikipediaArticles()
{
$sWikiArticlesFile = CONST_Wikipedia_Data_Path.'/wikipedia_article.sql.bin';
$sWikiRedirectsFile = CONST_Wikipedia_Data_Path.'/wikipedia_redirect.sql.bin';
if (file_exists($sWikiArticlesFile)) {
info('Importing wikipedia articles');
$this->pgsqlRunDropAndRestore($sWikiArticlesFile);
} else {
warn('wikipedia article dump file not found - places will have default importance');
}
if (file_exists($sWikiRedirectsFile)) {
info('Importing wikipedia redirects');
$this->pgsqlRunDropAndRestore($sWikiRedirectsFile);
} else {
warn('wikipedia redirect dump file not found - some place importance values may be missing');
}
}
public function loadData($bDisableTokenPrecalc)
{
info('Drop old Data');
if ($this->oDB == null) $this->oDB =& getDB();
if (!pg_query($this->oDB->connection, 'TRUNCATE word')) fail(pg_last_error($this->oDB->connection));
echo '.';
if (!pg_query($this->oDB->connection, 'TRUNCATE placex')) fail(pg_last_error($this->oDB->connection));
echo '.';
if (!pg_query($this->oDB->connection, 'TRUNCATE location_property_osmline')) fail(pg_last_error($this->oDB->connection));
echo '.';
if (!pg_query($this->oDB->connection, 'TRUNCATE place_addressline')) fail(pg_last_error($this->oDB->connection));
echo '.';
if (!pg_query($this->oDB->connection, 'TRUNCATE place_boundingbox')) fail(pg_last_error($this->oDB->connection));
echo '.';
if (!pg_query($this->oDB->connection, 'TRUNCATE location_area')) fail(pg_last_error($this->oDB->connection));
echo '.';
if (!pg_query($this->oDB->connection, 'TRUNCATE search_name')) fail(pg_last_error($this->oDB->connection));
echo '.';
if (!pg_query($this->oDB->connection, 'TRUNCATE search_name_blank')) fail(pg_last_error($this->oDB->connection));
echo '.';
if (!pg_query($this->oDB->connection, 'DROP SEQUENCE seq_place')) fail(pg_last_error($this->oDB->connection));
echo '.';
if (!pg_query($this->oDB->connection, 'CREATE SEQUENCE seq_place start 100000')) fail(pg_last_error($this->oDB->connection));
echo '.';
$sSQL = 'select distinct partition from country_name';
$aPartitions = chksql($this->oDB->getCol($sSQL));
if (!$this->bNoPartitions) $aPartitions[] = 0;
foreach ($aPartitions as $sPartition) {
if (!pg_query($this->oDB->connection, 'TRUNCATE location_road_'.$sPartition)) fail(pg_last_error($this->oDB->connection));
echo '.';
}
// used by getorcreate_word_id to ignore frequent partial words
$sSQL = 'CREATE OR REPLACE FUNCTION get_maxwordfreq() RETURNS integer AS ';
$sSQL .= '$$ SELECT '.CONST_Max_Word_Frequency.' as maxwordfreq; $$ LANGUAGE SQL IMMUTABLE';
if (!pg_query($this->oDB->connection, $sSQL)) {
fail(pg_last_error($this->oDB->connection));
}
echo ".\n";
// pre-create the word list
if (!$bDisableTokenPrecalc) {
info('Loading word list');
$this->pgsqlRunScriptFile(CONST_BasePath.'/data/words.sql');
}
info('Load Data');
$sColumns = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry';
$aDBInstances = array();
$iLoadThreads = max(1, $this->iInstances - 1);
for ($i = 0; $i < $iLoadThreads; $i++) {
$aDBInstances[$i] =& getDB(true);
$sSQL = "INSERT INTO placex ($sColumns) SELECT $sColumns FROM place WHERE osm_id % $iLoadThreads = $i";
$sSQL .= " and not (class='place' and type='houses' and osm_type='W'";
$sSQL .= " and ST_GeometryType(geometry) = 'ST_LineString')";
$sSQL .= ' and ST_IsValid(geometry)';
if ($this->sVerbose) echo "$sSQL\n";
if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) {
fail(pg_last_error($aDBInstances[$i]->connection));
}
}
// last thread for interpolation lines
$aDBInstances[$iLoadThreads] =& getDB(true);
$sSQL = 'insert into location_property_osmline';
$sSQL .= ' (osm_id, address, linegeo)';
$sSQL .= ' SELECT osm_id, address, geometry from place where ';
$sSQL .= "class='place' and type='houses' and osm_type='W' and ST_GeometryType(geometry) = 'ST_LineString'";
if ($this->sVerbose) echo "$sSQL\n";
if (!pg_send_query($aDBInstances[$iLoadThreads]->connection, $sSQL)) {
fail(pg_last_error($aDBInstances[$iLoadThreads]->connection));
}
$bFailed = false;
for ($i = 0; $i <= $iLoadThreads; $i++) {
while (($hPGresult = pg_get_result($aDBInstances[$i]->connection)) !== false) {
$resultStatus = pg_result_status($hPGresult);
// PGSQL_EMPTY_QUERY, PGSQL_COMMAND_OK, PGSQL_TUPLES_OK,
// PGSQL_COPY_OUT, PGSQL_COPY_IN, PGSQL_BAD_RESPONSE,
// PGSQL_NONFATAL_ERROR and PGSQL_FATAL_ERROR
// echo 'Query result ' . $i . ' is: ' . $resultStatus . "\n";
if ($resultStatus != PGSQL_COMMAND_OK && $resultStatus != PGSQL_TUPLES_OK) {
$resultError = pg_result_error($hPGresult);
echo '-- error text ' . $i . ': ' . $resultError . "\n";
$bFailed = true;
}
}
}
if ($bFailed) {
fail('SQL errors loading placex and/or location_property_osmline tables');
}
echo "\n";
info('Reanalysing database');
$this->pgsqlRunScript('ANALYSE');
$sDatabaseDate = getDatabaseDate($this->oDB);
pg_query($this->oDB->connection, 'TRUNCATE import_status');
if ($sDatabaseDate === false) {
warn('could not determine database date.');
} else {
$sSQL = "INSERT INTO import_status (lastimportdate) VALUES('".$sDatabaseDate."')";
pg_query($this->oDB->connection, $sSQL);
echo "Latest data imported from $sDatabaseDate.\n";
}
}
public function importTigerData()
{
info('Import Tiger data');
$sTemplate = file_get_contents(CONST_BasePath.'/sql/tiger_import_start.sql');
$sTemplate = str_replace('{www-user}', CONST_Database_Web_User, $sTemplate);
$sTemplate = $this->replaceTablespace(
'{ts:aux-data}',
CONST_Tablespace_Aux_Data,
$sTemplate
);
$sTemplate = $this->replaceTablespace(
'{ts:aux-index}',
CONST_Tablespace_Aux_Index,
$sTemplate
);
$this->pgsqlRunScript($sTemplate, false);
$aDBInstances = array();
for ($i = 0; $i < $this->iInstances; $i++) {
$aDBInstances[$i] =& getDB(true);
}
foreach (glob(CONST_Tiger_Data_Path.'/*.sql') as $sFile) {
echo $sFile.': ';
$hFile = fopen($sFile, 'r');
$sSQL = fgets($hFile, 100000);
$iLines = 0;
while (true) {
for ($i = 0; $i < $this->iInstances; $i++) {
if (!pg_connection_busy($aDBInstances[$i]->connection)) {
while (pg_get_result($aDBInstances[$i]->connection));
$sSQL = fgets($hFile, 100000);
if (!$sSQL) break 2;
if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($this->oDB->connection));
$iLines++;
if ($iLines == 1000) {
echo '.';
$iLines = 0;
}
}
}
usleep(10);
}
fclose($hFile);
$bAnyBusy = true;
while ($bAnyBusy) {
$bAnyBusy = false;
for ($i = 0; $i < $this->iInstances; $i++) {
if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
}
usleep(10);
}
echo "\n";
}
info('Creating indexes on Tiger data');
$sTemplate = file_get_contents(CONST_BasePath.'/sql/tiger_import_finish.sql');
$sTemplate = str_replace('{www-user}', CONST_Database_Web_User, $sTemplate);
$sTemplate = $this->replaceTablespace(
'{ts:aux-data}',
CONST_Tablespace_Aux_Data,
$sTemplate
);
$sTemplate = $this->replaceTablespace(
'{ts:aux-index}',
CONST_Tablespace_Aux_Index,
$sTemplate
);
$this->pgsqlRunScript($sTemplate, false);
}
public function calculatePostcodes($bCMDResultAll)
{
info('Calculate Postcodes');
if ($this->oDB == null) $this->oDB =& getDB();
if (!pg_query($this->oDB->connection, 'TRUNCATE location_postcode')) {
fail(pg_last_error($this->oDB->connection));
}
$sSQL = 'INSERT INTO location_postcode';
$sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
$sSQL .= "SELECT nextval('seq_place'), 1, country_code,";
$sSQL .= " upper(trim (both ' ' from address->'postcode')) as pc,";
$sSQL .= ' ST_Centroid(ST_Collect(ST_Centroid(geometry)))';
$sSQL .= ' FROM placex';
$sSQL .= " WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'";
$sSQL .= ' AND geometry IS NOT null';
$sSQL .= ' GROUP BY country_code, pc';
if (!pg_query($this->oDB->connection, $sSQL)) {
fail(pg_last_error($this->oDB->connection));
}
if (CONST_Use_Extra_US_Postcodes) {
// only add postcodes that are not yet available in OSM
$sSQL = 'INSERT INTO location_postcode';
$sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
$sSQL .= "SELECT nextval('seq_place'), 1, 'us', postcode,";
$sSQL .= ' ST_SetSRID(ST_Point(x,y),4326)';
$sSQL .= ' FROM us_postcode WHERE postcode NOT IN';
$sSQL .= ' (SELECT postcode FROM location_postcode';
$sSQL .= " WHERE country_code = 'us')";
if (!pg_query($this->oDB->connection, $sSQL)) fail(pg_last_error($this->oDB->connection));
}
// add missing postcodes for GB (if available)
$sSQL = 'INSERT INTO location_postcode';
$sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
$sSQL .= "SELECT nextval('seq_place'), 1, 'gb', postcode, geometry";
$sSQL .= ' FROM gb_postcode WHERE postcode NOT IN';
$sSQL .= ' (SELECT postcode FROM location_postcode';
$sSQL .= " WHERE country_code = 'gb')";
if (!pg_query($this->oDB->connection, $sSQL)) fail(pg_last_error($this->oDB->connection));
if (!$bCMDResultAll) {
$sSQL = "DELETE FROM word WHERE class='place' and type='postcode'";
$sSQL .= 'and word NOT IN (SELECT postcode FROM location_postcode)';
if (!pg_query($this->oDB->connection, $sSQL)) {
fail(pg_last_error($this->oDB->connection));
}
}
$sSQL = 'SELECT count(getorcreate_postcode_id(v)) FROM ';
$sSQL .= '(SELECT distinct(postcode) as v FROM location_postcode) p';
if (!pg_query($this->oDB->connection, $sSQL)) {
fail(pg_last_error($this->oDB->connection));
}
}
public function index($bIndexNoanalyse)
{
$sOutputFile = '';
$sBaseCmd = CONST_InstallPath.'/nominatim/nominatim -i -d '.$this->aDSNInfo['database'].' -P '
.$this->aDSNInfo['port'].' -t '.$this->iInstances.$sOutputFile;
if (isset($this->aDSNInfo['hostspec']) && $this->aDSNInfo['hostspec']) {
$sBaseCmd .= ' -H '.$this->aDSNInfo['hostspec'];
}
if (isset($this->aDSNInfo['username']) && $this->aDSNInfo['username']) {
$sBaseCmd .= ' -U '.$this->aDSNInfo['username'];
}
$aProcEnv = null;
if (isset($this->aDSNInfo['password']) && $this->aDSNInfo['password']) {
$aProcEnv = array_merge(array('PGPASSWORD' => $this->aDSNInfo['password']), $_ENV);
}
info('Index ranks 0 - 4');
$iStatus = runWithEnv($sBaseCmd.' -R 4', $aProcEnv);
if ($iStatus != 0) {
fail('error status ' . $iStatus . ' running nominatim!');
}
if (!$bIndexNoanalyse) $this->pgsqlRunScript('ANALYSE');
info('Index ranks 5 - 25');
$iStatus = runWithEnv($sBaseCmd.' -r 5 -R 25', $aProcEnv);
if ($iStatus != 0) {
fail('error status ' . $iStatus . ' running nominatim!');
}
if (!$bIndexNoanalyse) $this->pgsqlRunScript('ANALYSE');
info('Index ranks 26 - 30');
$iStatus = runWithEnv($sBaseCmd.' -r 26', $aProcEnv);
if ($iStatus != 0) {
fail('error status ' . $iStatus . ' running nominatim!');
}
info('Index postcodes');
if ($this->oDB == null) $this->oDB =& getDB();
$sSQL = 'UPDATE location_postcode SET indexed_status = 0';
if (!pg_query($this->oDB->connection, $sSQL)) fail(pg_last_error($this->oDB->connection));
}
public function createSearchIndices()
{
info('Create Search indices');
$sTemplate = file_get_contents(CONST_BasePath.'/sql/indices.src.sql');
$sTemplate = str_replace('{www-user}', CONST_Database_Web_User, $sTemplate);
$sTemplate = $this->replaceTablespace(
'{ts:address-index}',
CONST_Tablespace_Address_Index,
$sTemplate
);
$sTemplate = $this->replaceTablespace(
'{ts:search-index}',
CONST_Tablespace_Search_Index,
$sTemplate
);
$sTemplate = $this->replaceTablespace(
'{ts:aux-index}',
CONST_Tablespace_Aux_Index,
$sTemplate
);
$this->pgsqlRunScript($sTemplate);
}
public function createCountryNames()
{
info('Create search index for default country names');
$this->pgsqlRunScript("select getorcreate_country(make_standard_name('uk'), 'gb')");
$this->pgsqlRunScript("select getorcreate_country(make_standard_name('united states'), 'us')");
$this->pgsqlRunScript('select count(*) from (select getorcreate_country(make_standard_name(country_code), country_code) from country_name where country_code is not null) as x');
$this->pgsqlRunScript("select count(*) from (select getorcreate_country(make_standard_name(name->'name'), country_code) from country_name where name ? 'name') as x");
$sSQL = 'select count(*) from (select getorcreate_country(make_standard_name(v),'
.'country_code) from (select country_code, skeys(name) as k, svals(name) as v from country_name) x where k ';
if (CONST_Languages) {
$sSQL .= 'in ';
$sDelim = '(';
foreach (explode(',', CONST_Languages) as $sLang) {
$sSQL .= $sDelim."'name:$sLang'";
$sDelim = ',';
}
$sSQL .= ')';
} else {
// all include all simple name tags
$sSQL .= "like 'name:%'";
}
$sSQL .= ') v';
$this->pgsqlRunScript($sSQL);
}
public function drop()
{
info('Drop tables only required for updates');
// The implementation is potentially a bit dangerous because it uses
// a positive selection of tables to keep, and deletes everything else.
// Including any tables that the unsuspecting user might have manually
// created. USE AT YOUR OWN PERIL.
// tables we want to keep. everything else goes.
$aKeepTables = array(
'*columns',
'import_polygon_*',
'import_status',
'place_addressline',
'location_postcode',
'location_property*',
'placex',
'search_name',
'seq_*',
'word',
'query_log',
'new_query_log',
'spatial_ref_sys',
'country_name',
'place_classtype_*'
);
if ($this->oDB = null) $this->oDB =& getDB();
$aDropTables = array();
$aHaveTables = chksql($this->oDB->getCol("SELECT tablename FROM pg_tables WHERE schemaname='public'"));
foreach ($aHaveTables as $sTable) {
$bFound = false;
foreach ($aKeepTables as $sKeep) {
if (fnmatch($sKeep, $sTable)) {
$bFound = true;
break;
}
}
if (!$bFound) array_push($aDropTables, $sTable);
}
foreach ($aDropTables as $sDrop) {
if ($this->sVerbose) echo "dropping table $sDrop\n";
@pg_query($this->oDB->connection, "DROP TABLE $sDrop CASCADE");
// ignore warnings/errors as they might be caused by a table having
// been deleted already by CASCADE
}
if (!is_null(CONST_Osm2pgsql_Flatnode_File) && CONST_Osm2pgsql_Flatnode_File) {
if ($sVerbose) echo 'deleting '.CONST_Osm2pgsql_Flatnode_File."\n";
unlink(CONST_Osm2pgsql_Flatnode_File);
}
}
private function pgsqlRunDropAndRestore($sDumpFile)
{
if (!isset($this->aDSNInfo['port']) || !$this->aDSNInfo['port']) $this->aDSNInfo['port'] = 5432;
$sCMD = 'pg_restore -p '.$this->aDSNInfo['port'].' -d '.$this->aDSNInfo['database'].' -Fc --clean '.$sDumpFile;
if (isset($this->aDSNInfo['hostspec']) && $this->aDSNInfo['hostspec']) {
$sCMD .= ' -h '.$this->aDSNInfo['hostspec'];
}
if (isset($this->aDSNInfo['username']) && $this->aDSNInfo['username']) {
$sCMD .= ' -U '.$this->aDSNInfo['username'];
}
$aProcEnv = null;
if (isset($this->aDSNInfo['password']) && $this->aDSNInfo['password']) {
$aProcEnv = array_merge(array('PGPASSWORD' => $this->aDSNInfo['password']), $_ENV);
}
$iReturn = runWithEnv($sCMD, $aProcEnv); // /lib/cmd.php "function runWithEnv($sCmd, $aEnv)"
}
private function pgsqlRunScript($sScript, $bfatal = true)
{
runSQLScript(
$sScript,
$bfatal,
$this->sVerbose,
$this->sIgnoreErrors
);
}
private function createSqlFunctions()
{
$sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
$sTemplate = str_replace('{modulepath}', $this->sModulePath, $sTemplate);
if ($this->bEnableDiffUpdates) {
$sTemplate = str_replace('RETURN NEW; -- %DIFFUPDATES%', '--', $sTemplate);
}
if ($this->bEnableDebugStatements) {
$sTemplate = str_replace('--DEBUG:', '', $sTemplate);
}
if (CONST_Limit_Reindexing) {
$sTemplate = str_replace('--LIMIT INDEXING:', '', $sTemplate);
}
if (!CONST_Use_US_Tiger_Data) {
$sTemplate = str_replace('-- %NOTIGERDATA% ', '', $sTemplate);
}
if (!CONST_Use_Aux_Location_data) {
$sTemplate = str_replace('-- %NOAUXDATA% ', '', $sTemplate);
}
$this->pgsqlRunScript($sTemplate);
}
private function pgsqlRunPartitionScript($sTemplate)
{
if ($this->oDB == null) $this->oDB =& getDB();
$sSQL = 'select distinct partition from country_name';
$aPartitions = chksql($this->oDB->getCol($sSQL));
if (!$this->bNoPartitions) $aPartitions[] = 0;
preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
foreach ($aMatches as $aMatch) {
$sResult = '';
foreach ($aPartitions as $sPartitionName) {
$sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
}
$sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
}
$this->pgsqlRunScript($sTemplate);
}
private function pgsqlRunScriptFile($sFilename)
{
if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
$sCMD = 'psql -p '.$this->aDSNInfo['port'].' -d '.$this->aDSNInfo['database'];
if (!$this->sVerbose) {
$sCMD .= ' -q';
}
if (isset($this->aDSNInfo['hostspec']) && $this->aDSNInfo['hostspec']) {
$sCMD .= ' -h '.$this->aDSNInfo['hostspec'];
}
if (isset($this->aDSNInfo['username']) && $this->aDSNInfo['username']) {
$sCMD .= ' -U '.$this->aDSNInfo['username'];
}
$aProcEnv = null;
if (isset($this->aDSNInfo['password']) && $this->aDSNInfo['password']) {
$aProcEnv = array_merge(array('PGPASSWORD' => $this->aDSNInfo['password']), $_ENV);
}
$ahGzipPipes = null;
if (preg_match('/\\.gz$/', $sFilename)) {
$aDescriptors = array(
0 => array('pipe', 'r'),
1 => array('pipe', 'w'),
2 => array('file', '/dev/null', 'a')
);
$hGzipProcess = proc_open('zcat '.$sFilename, $aDescriptors, $ahGzipPipes);
if (!is_resource($hGzipProcess)) fail('unable to start zcat');
$aReadPipe = $ahGzipPipes[1];
fclose($ahGzipPipes[0]);
} else {
$sCMD .= ' -f '.$sFilename;
$aReadPipe = array('pipe', 'r');
}
$aDescriptors = array(
0 => $aReadPipe,
1 => array('pipe', 'w'),
2 => array('file', '/dev/null', 'a')
);
$ahPipes = null;
$hProcess = proc_open($sCMD, $aDescriptors, $ahPipes, null, $aProcEnv);
if (!is_resource($hProcess)) fail('unable to start pgsql');
// TODO: error checking
while (!feof($ahPipes[1])) {
echo fread($ahPipes[1], 4096);
}
fclose($ahPipes[1]);
$iReturn = proc_close($hProcess);
if ($iReturn > 0) {
fail("pgsql returned with error code ($iReturn)");
}
if ($ahGzipPipes) {
fclose($ahGzipPipes[1]);
proc_close($hGzipProcess);
}
}
private function replaceTablespace($sTemplate, $sTablespace, $sSql)
{
if ($sTablespace) {
$sSql = str_replace($sTemplate, 'TABLESPACE "'.$sTablespace.'"', $sSql);
} else {
$sSql = str_replace($sTemplate, '', $sSql);
}
return $sSql;
}
}

38
lib/setup_functions.php Executable file
View File

@ -0,0 +1,38 @@
<?php
function checkInFile($sOSMFile)
{
if (!isset($sOSMFile)) {
fail('missing --osm-file for data import');
}
if (!file_exists($sOSMFile)) {
fail('the path supplied to --osm-file does not exist');
}
if (!is_readable($sOSMFile)) {
fail('osm-file "' . $aCMDResult['osm-file'] . '" not readable');
}
}
function checkModulePresence()
{
// Try accessing the C module, so we know early if something is wrong
// and can simply error out.
$sModulePath = CONST_Database_Module_Path;
$sSQL = "CREATE FUNCTION nominatim_test_import_func(text) RETURNS text AS '";
$sSQL .= $sModulePath . "/nominatim.so', 'transliteration' LANGUAGE c IMMUTABLE STRICT";
$sSQL .= ';DROP FUNCTION nominatim_test_import_func(text);';
$oDB = &getDB();
$oResult = $oDB->query($sSQL);
$bResult = true;
if (PEAR::isError($oResult)) {
echo "\nERROR: Failed to load nominatim module. Reason:\n";
echo $oResult->userinfo . "\n\n";
$bResult = false;
}
return $bResult;
}

File diff suppressed because it is too large Load Diff

View File

@ -3,43 +3,48 @@
require_once(dirname(dirname(__FILE__)).'/settings/settings.php');
require_once(CONST_BasePath.'/lib/init-cmd.php');
require_once(CONST_BasePath.'/lib/setup_functions.php');
require_once(CONST_BasePath.'/lib/setup/SetupClass.php');
ini_set('memory_limit', '800M');
# (long-opt, short-opt, min-occurs, max-occurs, num-arguments, num-arguments, type, help)
use Nominatim\Setup\SetupFunctions as SetupFunctions;
// (long-opt, short-opt, min-occurs, max-occurs, num-arguments, num-arguments, type, help)
$aCMDOptions
= array(
'Import / update / index osm data',
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
'Import / update / index osm data',
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
array('init-updates', '', 0, 1, 0, 0, 'bool', 'Set up database for updating'),
array('check-for-updates', '', 0, 1, 0, 0, 'bool', 'Check if new updates are available'),
array('no-update-functions', '', 0, 1, 0, 0, 'bool', 'Do not update trigger functions to support differential updates (assuming the diff update logic is already present)'),
array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import updates once'),
array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import updates forever'),
array('no-index', '', 0, 1, 0, 0, 'bool', 'Do not index the new data'),
array('init-updates', '', 0, 1, 0, 0, 'bool', 'Set up database for updating'),
array('check-for-updates', '', 0, 1, 0, 0, 'bool', 'Check if new updates are available'),
array('no-update-functions', '', 0, 1, 0, 0, 'bool', 'Do not update trigger functions to support differential updates (assuming the diff update logic is already present)'),
array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import updates once'),
array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import updates forever'),
array('no-index', '', 0, 1, 0, 0, 'bool', 'Do not index the new data'),
array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Update postcode centroid table'),
array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Update postcode centroid table'),
array('import-file', '', 0, 1, 1, 1, 'realpath', 'Re-import data from an OSM file'),
array('import-diff', '', 0, 1, 1, 1, 'realpath', 'Import a diff (osc) file from local file system'),
array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
array('import-file', '', 0, 1, 1, 1, 'realpath', 'Re-import data from an OSM file'),
array('import-diff', '', 0, 1, 1, 1, 'realpath', 'Import a diff (osc) file from local file system'),
array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
array('import-node', '', 0, 1, 1, 1, 'int', 'Re-import node'),
array('import-way', '', 0, 1, 1, 1, 'int', 'Re-import way'),
array('import-relation', '', 0, 1, 1, 1, 'int', 'Re-import relation'),
array('import-from-main-api', '', 0, 1, 0, 0, 'bool', 'Use OSM API instead of Overpass to download objects'),
array('import-node', '', 0, 1, 1, 1, 'int', 'Re-import node'),
array('import-way', '', 0, 1, 1, 1, 'int', 'Re-import way'),
array('import-relation', '', 0, 1, 1, 1, 'int', 'Re-import relation'),
array('import-from-main-api', '', 0, 1, 0, 0, 'bool', 'Use OSM API instead of Overpass to download objects'),
array('index', '', 0, 1, 0, 0, 'bool', 'Index'),
array('index-rank', '', 0, 1, 1, 1, 'int', 'Rank to start indexing from'),
array('index-instances', '', 0, 1, 1, 1, 'int', 'Number of indexing instances (threads)'),
array('index', '', 0, 1, 0, 0, 'bool', 'Index'),
array('index-rank', '', 0, 1, 1, 1, 'int', 'Rank to start indexing from'),
array('index-instances', '', 0, 1, 1, 1, 'int', 'Number of indexing instances (threads)'),
array('deduplicate', '', 0, 1, 0, 0, 'bool', 'Deduplicate tokens'),
array('recompute-word-counts', '', 0, 1, 0, 0, 'bool', 'Compute frequency of full-word search terms'),
array('no-npi', '', 0, 1, 0, 0, 'bool', '(obsolete)'),
);
array('deduplicate', '', 0, 1, 0, 0, 'bool', 'Deduplicate tokens'),
array('recompute-word-counts', '', 0, 1, 0, 0, 'bool', 'Compute frequency of full-word search terms'),
array('no-npi', '', 0, 1, 0, 0, 'bool', '(obsolete)'),
);
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1;
@ -98,13 +103,11 @@ if ($aResult['init-updates']) {
echo "and have set up CONST_Pyosmium_Binary to point to pyosmium-get-changes.\n";
fail('pyosmium-get-changes not found or not usable');
}
if (!$aResult['no-update-functions']) {
$sSetup ='@PHP_BIN@ '. CONST_InstallPath.'/utils/setup.php';
$iRet = -1;
passthru($sSetup.' --create-functions --enable-diff-updates', $iRet);
if ($iRet != 0) {
fail('Error running setup script');
}
// instantiate setupClass to use the function therein
$cSetup = new SetupFunctions('update');
$cSetup->createFunctions();
}
$sDatabaseDate = getDatabaseDate($oDB);