Merge pull request #803 from lonvia/update-postcodes

Add script to update table with artifical postcode centroids
This commit is contained in:
Sarah Hoffmann 2017-10-03 16:28:22 +02:00 committed by GitHub
commit e276ec2e94
4 changed files with 108 additions and 31 deletions

View File

@ -136,15 +136,53 @@ function info($sMsg)
$aWarnings = []; $aWarnings = [];
function warn($sMsg) function warn($sMsg)
{ {
$GLOBALS['aWarnings'][] = $sMsg; $GLOBALS['aWarnings'][] = $sMsg;
echo date('Y-m-d H:i:s == ').'WARNING: '.$sMsg."\n"; echo date('Y-m-d H:i:s == ').'WARNING: '.$sMsg."\n";
} }
function repeatWarnings() function repeatWarnings()
{ {
foreach ($GLOBALS['aWarnings'] as $sMsg) { foreach ($GLOBALS['aWarnings'] as $sMsg) {
echo ' * ',$sMsg."\n"; echo ' * ',$sMsg."\n";
} }
} }
function runSQLScript($sScript, $bfatal = true, $bVerbose = false, $bIgnoreErrors = false)
{
// Convert database DSN to psql parameters
$aDSNInfo = DB::parseDSN(CONST_Database_DSN);
if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
$sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'];
if (!$bVerbose) {
$sCMD .= ' -q';
}
if ($bfatal && !$bIgnoreErrors) {
$sCMD .= ' -v ON_ERROR_STOP=1';
}
$aDescriptors = array(
0 => array('pipe', 'r'),
1 => STDOUT,
2 => STDERR
);
$ahPipes = null;
$hProcess = @proc_open($sCMD, $aDescriptors, $ahPipes);
if (!is_resource($hProcess)) {
fail('unable to start pgsql');
}
while (strlen($sScript)) {
$written = fwrite($ahPipes[0], $sScript);
if ($written <= 0) break;
$sScript = substr($sScript, $written);
}
fclose($ahPipes[0]);
$iReturn = proc_close($hProcess);
if ($bfatal && $iReturn > 0) {
fail("pgsql returned with error code ($iReturn)");
}
}

52
sql/update-postcodes.sql Normal file
View File

@ -0,0 +1,52 @@
-- Create a temporary table with postcodes from placex.
CREATE TEMP TABLE tmp_new_postcode_locations AS
SELECT country_code,
upper(trim (both ' ' from address->'postcode')) as pc,
ST_Centroid(ST_Collect(ST_Centroid(geometry))) as centroid
FROM placex
WHERE address ? 'postcode'
AND address->'postcode' NOT SIMILAR TO '%(,|;)%'
AND geometry IS NOT null
GROUP BY country_code, pc;
CREATE INDEX idx_tmp_new_postcode_locations
ON tmp_new_postcode_locations (pc, country_code);
-- add extra US postcodes
INSERT INTO tmp_new_postcode_locations (country_code, pc, centroid)
SELECT 'us', postcode, ST_SetSRID(ST_Point(x,y),4326)
FROM us_postcode u
WHERE NOT EXISTS (SELECT 0 FROM tmp_new_postcode_locations new
WHERE new.country_code = 'us' AND new.pc = u.postcode);
-- add extra UK postcodes
INSERT INTO tmp_new_postcode_locations (country_code, pc, centroid)
SELECT 'gb', postcode, geometry FROM gb_postcode g
WHERE NOT EXISTS (SELECT 0 FROM tmp_new_postcode_locations new
WHERE new.country_code = 'gb' and new.pc = g.postcode);
-- Remove all postcodes that are no longer valid
DELETE FROM location_postcode old
WHERE NOT EXISTS(SELECT 0 FROM tmp_new_postcode_locations new
WHERE old.postcode = new.pc
AND old.country_code = new.country_code);
-- Update geometries where necessary
UPDATE location_postcode old SET geometry = new.centroid, indexed_status = 1
FROM tmp_new_postcode_locations new
WHERE old.postcode = new.pc AND old.country_code = new.country_code
AND ST_AsText(old.geometry) != ST_AsText(new.centroid);
-- Remove all postcodes that already exist from the temporary table
DELETE FROM tmp_new_postcode_locations new
WHERE EXISTS(SELECT 0 FROM location_postcode old
WHERE old.postcode = new.pc AND old.country_code = new.country_code);
-- Add newly added postcode
INSERT INTO location_postcode
(place_id, indexed_status, country_code, postcode, geometry)
SELECT nextval('seq_place'), 1, country_code, pc, centroid
FROM tmp_new_postcode_locations new;
-- Finally index the newly inserted postcodes
UPDATE location_postcode SET indexed_status = 0 WHERE indexed_status > 0;

View File

@ -541,8 +541,10 @@ if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) {
$sSQL .= " (SELECT postcode FROM location_postcode"; $sSQL .= " (SELECT postcode FROM location_postcode";
$sSQL .= " WHERE country_code = 'us')"; $sSQL .= " WHERE country_code = 'us')";
if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); } else {
$sSQL .= "TRUNCATE TABLE us_postcode";
} }
if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
// add missing postcodes for GB (if available) // add missing postcodes for GB (if available)
$sSQL = "INSERT INTO location_postcode"; $sSQL = "INSERT INTO location_postcode";
@ -765,34 +767,12 @@ function pgsqlRunScriptFile($sFilename)
function pgsqlRunScript($sScript, $bfatal = true) function pgsqlRunScript($sScript, $bfatal = true)
{ {
global $aCMDResult; global $aCMDResult;
// Convert database DSN to psql parameters runSQLScript(
$aDSNInfo = DB::parseDSN(CONST_Database_DSN); $sScript,
if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432; $bfatal,
$sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database']; $aCMDResult['verbose'],
if (!$aCMDResult['verbose']) { $aCMDResult['ignore-errors']
$sCMD .= ' -q'; );
}
if ($bfatal && !$aCMDResult['ignore-errors'])
$sCMD .= ' -v ON_ERROR_STOP=1';
$aDescriptors = array(
0 => array('pipe', 'r'),
1 => STDOUT,
2 => STDERR
);
$ahPipes = null;
$hProcess = @proc_open($sCMD, $aDescriptors, $ahPipes);
if (!is_resource($hProcess)) fail('unable to start pgsql');
while (strlen($sScript)) {
$written = fwrite($ahPipes[0], $sScript);
if ($written <= 0) break;
$sScript = substr($sScript, $written);
}
fclose($ahPipes[0]);
$iReturn = proc_close($hProcess);
if ($bfatal && $iReturn > 0) {
fail("pgsql returned with error code ($iReturn)");
}
} }
function pgsqlRunPartitionScript($sTemplate) function pgsqlRunPartitionScript($sTemplate)

View File

@ -15,10 +15,9 @@ $aCMDOptions
array('init-updates', '', 0, 1, 0, 0, 'bool', 'Set up database for updating'), array('init-updates', '', 0, 1, 0, 0, 'bool', 'Set up database for updating'),
array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import updates once'), array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import updates once'),
array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import updates forever'), array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import updates forever'),
array('no-npi', '', 0, 1, 0, 0, 'bool', '(obsolate)'),
array('no-index', '', 0, 1, 0, 0, 'bool', 'Do not index the new data'), array('no-index', '', 0, 1, 0, 0, 'bool', 'Do not index the new data'),
array('import-all', '', 0, 1, 0, 0, 'bool', 'Import all available files'), array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Update postcode centroid table'),
array('import-file', '', 0, 1, 1, 1, 'realpath', 'Re-import data from an OSM file'), array('import-file', '', 0, 1, 1, 1, 'realpath', 'Re-import data from an OSM file'),
array('import-diff', '', 0, 1, 1, 1, 'realpath', 'Import a diff (osc) file from local file system'), array('import-diff', '', 0, 1, 1, 1, 'realpath', 'Import a diff (osc) file from local file system'),
@ -34,6 +33,7 @@ $aCMDOptions
array('index-instances', '', 0, 1, 1, 1, 'int', 'Number of indexing instances (threads)'), array('index-instances', '', 0, 1, 1, 1, 'int', 'Number of indexing instances (threads)'),
array('deduplicate', '', 0, 1, 0, 0, 'bool', 'Deduplicate tokens'), array('deduplicate', '', 0, 1, 0, 0, 'bool', 'Deduplicate tokens'),
array('no-npi', '', 0, 1, 0, 0, 'bool', '(obsolete)'),
); );
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true); getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
@ -101,6 +101,7 @@ if ($aResult['init-updates']) {
if (isset($aResult['import-diff']) || isset($aResult['import-file'])) { if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
// import diffs and files directly (e.g. from osmosis --rri) // import diffs and files directly (e.g. from osmosis --rri)
$sNextFile = isset($aResult['import-diff']) ? $aResult['import-diff'] : $aResult['import-file']; $sNextFile = isset($aResult['import-diff']) ? $aResult['import-diff'] : $aResult['import-file'];
if (!file_exists($sNextFile)) { if (!file_exists($sNextFile)) {
fail("Cannot open $sNextFile\n"); fail("Cannot open $sNextFile\n");
} }
@ -117,6 +118,12 @@ if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
// Don't update the import status - we don't know what this file contains // Don't update the import status - we don't know what this file contains
} }
if ($aResult['calculate-postcodes']) {
info("Update postcodes centroids");
$sTemplate = file_get_contents(CONST_BasePath.'/sql/update-postcodes.sql');
runSQLScript($sTemplate, true, true);
}
$sTemporaryFile = CONST_BasePath.'/data/osmosischange.osc'; $sTemporaryFile = CONST_BasePath.'/data/osmosischange.osc';
$bHaveDiff = false; $bHaveDiff = false;
$bUseOSMApi = isset($aResult['import-from-main-api']) && $aResult['import-from-main-api']; $bUseOSMApi = isset($aResult['import-from-main-api']) && $aResult['import-from-main-api'];