mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-12-25 05:52:32 +03:00
changed export.php to work with current master
This commit is contained in:
parent
dfb9579a73
commit
073221d321
187
utils/export.php
187
utils/export.php
@ -1,79 +1,75 @@
|
||||
#!/usr/bin/php -Cq
|
||||
<?php
|
||||
# Script to extract structured city and street data
|
||||
# from a running nominatim instance as CSV data
|
||||
// Script to extract structured city and street data
|
||||
// from a running nominatim instance as CSV data
|
||||
|
||||
|
||||
require_once(dirname(dirname(__FILE__)).'/lib/init-cmd.php');
|
||||
ini_set('memory_limit', '800M');
|
||||
require_once(dirname(dirname(__FILE__)).'/settings/settings.php');
|
||||
require_once(CONST_BasePath.'/lib/init-cmd.php');
|
||||
require_once(CONST_BasePath.'/lib/ParameterParser.php');
|
||||
ini_set('memory_limit', '800M');
|
||||
|
||||
$aCMDOptions = array(
|
||||
"Export addresses as CSV file from a Nominatim database",
|
||||
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
|
||||
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
|
||||
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
|
||||
$aCMDOptions = array(
|
||||
'Export addresses as CSV file from a Nominatim database',
|
||||
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
|
||||
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
|
||||
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
|
||||
|
||||
array('output-type', '', 0, 1, 1, 1, 'str', 'Type of places to output (see below)'),
|
||||
array('output-format', '', 0, 1, 1, 1, 'str', 'Column mapping (see below)'),
|
||||
array('output-all-postcodes', '', 0, 1, 0, 0, 'bool', 'List all postcodes for address instead of just the most likely one'),
|
||||
array('language', '', 0, 1, 1, 1, 'str', 'Preferred language for output (local name, if omitted)'),
|
||||
array('restrict-to-country', '', 0, 1, 1, 1, 'str', 'Export only objects within country (country code)'),
|
||||
array('restrict-to-osm-node', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM node'),
|
||||
array('restrict-to-osm-way', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM way'),
|
||||
array('restrict-to-osm-relation', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM relation'),
|
||||
"\nAddress ranks: continent, country, state, county, city, suburb, street, path",
|
||||
"Additional output types: postcode, placeid (placeid for each object)",
|
||||
"\noutput-format must be a semicolon-separated list of address ranks. Multiple ranks",
|
||||
"can be merged into one column by simply using a comma-separated list.",
|
||||
"\nDefault output-type: street",
|
||||
"Default output format: street;suburb;city;county;state;country"
|
||||
);
|
||||
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
|
||||
array('output-type', '', 0, 1, 1, 1, 'str', 'Type of places to output (see below)'),
|
||||
array('output-format', '', 0, 1, 1, 1, 'str', 'Column mapping (see below)'),
|
||||
array('output-all-postcodes', '', 0, 1, 0, 0, 'bool', 'List all postcodes for address instead of just the most likely one'),
|
||||
array('language', '', 0, 1, 1, 1, 'str', 'Preferred language for output (local name, if omitted)'),
|
||||
array('restrict-to-country', '', 0, 1, 1, 1, 'str', 'Export only objects within country (country code)'),
|
||||
array('restrict-to-osm-node', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM node'),
|
||||
array('restrict-to-osm-way', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM way'),
|
||||
array('restrict-to-osm-relation', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM relation'),
|
||||
"\nAddress ranks: continent, country, state, county, city, suburb, street, path",
|
||||
'Additional output types: postcode, placeid (placeid for each object)',
|
||||
"\noutput-format must be a semicolon-separated list of address ranks. Multiple ranks",
|
||||
'can be merged into one column by simply using a comma-separated list.',
|
||||
"\nDefault output-type: street",
|
||||
'Default output format: street;suburb;city;county;state;country'
|
||||
);
|
||||
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
|
||||
|
||||
$aRankmap = array( 'continent' => 1,
|
||||
'country' => 4,
|
||||
'state' => 8,
|
||||
'county' => 12,
|
||||
'city' => 16,
|
||||
'suburb' => 20,
|
||||
'street' => 26,
|
||||
'path' => 27
|
||||
);
|
||||
$aRankmap = array(
|
||||
'continent' => 1,
|
||||
'country' => 4,
|
||||
'state' => 8,
|
||||
'county' => 12,
|
||||
'city' => 16,
|
||||
'suburb' => 20,
|
||||
'street' => 26,
|
||||
'path' => 27
|
||||
);
|
||||
|
||||
$oDB =& getDB();
|
||||
|
||||
if (isset($aCMDResult['output-type']))
|
||||
{
|
||||
if (isset($aCMDResult['output-type'])) {
|
||||
if (!isset($aRankmap[$aCMDResult['output-type']])) fail('unknown output-type: '.$aCMDResult['output-type']);
|
||||
$iOutputRank = $aRankmap[$aCMDResult['output-type']];
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
$iOutputRank = $aRankmap['street'];
|
||||
}
|
||||
|
||||
|
||||
// Preferred language
|
||||
$oParams = new Nominatim\ParameterParser();
|
||||
if (!isset($aCMDResult['language'])) $aCMDResult['language'] = 'xx';
|
||||
$aLangPrefOrder = getPreferredLanguages($aCMDResult['language']);
|
||||
$sLanguagePrefArraySQL = "ARRAY[".join(',',array_map("getDBQuoted",$aLangPrefOrder))."]";
|
||||
$aLangPrefOrder = $oParams->getPreferredLanguages($aCMDResult['language']);
|
||||
$sLanguagePrefArraySQL = 'ARRAY['.join(',', array_map('getDBQuoted', $aLangPrefOrder)).']';
|
||||
|
||||
// output formatting: build up a lookup table that maps address ranks to columns
|
||||
$aColumnMapping = array();
|
||||
$iNumCol = 0;
|
||||
If (!isset($aCMDResult['output-format'])) $aCMDResult['output-format'] = 'street;suburb;city;county;state;country';
|
||||
foreach (preg_split('/\s*;\s*/',$aCMDResult['output-format']) as $sColumn)
|
||||
{
|
||||
if (!isset($aCMDResult['output-format'])) $aCMDResult['output-format'] = 'street;suburb;city;county;state;country';
|
||||
foreach (preg_split('/\s*;\s*/', $aCMDResult['output-format']) as $sColumn) {
|
||||
$bHasData = false;
|
||||
foreach (preg_split('/\s*,\s*/', $sColumn) as $sRank)
|
||||
{
|
||||
if ($sRank == 'postcode' || $sRank == 'placeid')
|
||||
{
|
||||
foreach (preg_split('/\s*,\s*/', $sColumn) as $sRank) {
|
||||
if ($sRank == 'postcode' || $sRank == 'placeid') {
|
||||
$aColumnMapping[$sRank] = $iNumCol;
|
||||
$bHasData = true;
|
||||
}
|
||||
elseif (isset($aRankmap[$sRank]))
|
||||
{
|
||||
} elseif (isset($aRankmap[$sRank])) {
|
||||
$iRank = $aRankmap[$sRank];
|
||||
if ($iRank <= $iOutputRank) {
|
||||
$aColumnMapping[(string)$iRank] = $iNumCol;
|
||||
@ -87,39 +83,34 @@
|
||||
// build the query for objects
|
||||
$sPlacexSQL = 'select min(place_id) as place_id, ';
|
||||
$sPlacexSQL .= 'array_agg(place_id) as place_ids, ';
|
||||
$sPlacexSQL .= 'calculated_country_code as cc, ';
|
||||
$sPlacexSQL .= 'country_code as cc, ';
|
||||
// get the address places excluding postcodes
|
||||
$sPlacexSQL .= 'array(select address_place_id from place_addressline a where a.place_id = placex.place_id and isaddress and address_place_id != placex.place_id and not cached_rank_address in (5,11) and cached_rank_address > 2 order by cached_rank_address) as address';
|
||||
$sPlacexSQL .= " from placex where name is not null and linked_place_id is null";
|
||||
$sPlacexSQL .= ' from placex where name is not null and linked_place_id is null';
|
||||
|
||||
$sPlacexSQL .= ' and rank_address = '.$iOutputRank;
|
||||
|
||||
if (isset($aCMDResult['restrict-to-country']))
|
||||
{
|
||||
$sPlacexSQL .= ' and calculated_country_code = '.getDBQuoted($aCMDResult['restrict-to-country']);
|
||||
if (isset($aCMDResult['restrict-to-country'])) {
|
||||
$sPlacexSQL .= ' and country_code = '.getDBQuoted($aCMDResult['restrict-to-country']);
|
||||
}
|
||||
|
||||
// restriction to parent place id
|
||||
$sParentId = false;
|
||||
$sOsmType = false;
|
||||
|
||||
if (isset($aCMDResult['restrict-to-osm-node']))
|
||||
{
|
||||
if (isset($aCMDResult['restrict-to-osm-node'])) {
|
||||
$sOsmType = 'N';
|
||||
$sOsmId = $aCMDResult['restrict-to-osm-node'];
|
||||
}
|
||||
if (isset($aCMDResult['restrict-to-osm-way']))
|
||||
{
|
||||
if (isset($aCMDResult['restrict-to-osm-way'])) {
|
||||
$sOsmType = 'W';
|
||||
$sOsmId = $aCMDResult['restrict-to-osm-way'];
|
||||
}
|
||||
if (isset($aCMDResult['restrict-to-osm-relation']))
|
||||
{
|
||||
if (isset($aCMDResult['restrict-to-osm-relation'])) {
|
||||
$sOsmType = 'R';
|
||||
$sOsmId = $aCMDResult['restrict-to-osm-relation'];
|
||||
}
|
||||
if ($sOsmType)
|
||||
{
|
||||
if ($sOsmType) {
|
||||
$sSQL = 'select place_id from placex where';
|
||||
$sSQL .= ' osm_type = '.getDBQuoted($sOsmType);
|
||||
$sSQL .= ' and osm_id = '.$sOsmId;
|
||||
@ -127,67 +118,55 @@
|
||||
if (PEAR::isError($sParentId)) fail(pg_last_error($oDB->connection));
|
||||
if (!$sParentId) fail('Could not find place '.$sOsmType.' '.$sOsmId);
|
||||
}
|
||||
if ($sParentId)
|
||||
{
|
||||
if ($sParentId) {
|
||||
$sPlacexSQL .= ' and place_id in (select place_id from place_addressline where address_place_id = '.$sParentId.' and isaddress)';
|
||||
}
|
||||
|
||||
$sPlacexSQL .= " group by name->'name', address, calculated_country_code";
|
||||
$sPlacexSQL .= " group by name->'name', address, country_code, placex.place_id";
|
||||
|
||||
# Iterate over placeids
|
||||
# to get further hierarchical information
|
||||
// Iterate over placeids
|
||||
// to get further hierarchical information
|
||||
//var_dump($sPlacexSQL);
|
||||
$aRes =& $oDB->query($sPlacexSQL);
|
||||
if (PEAR::isError($aRes)) fail(pg_last_error($oDB->connection));
|
||||
$fOutstream = fopen("php://output", 'w');
|
||||
while ($aRes->fetchInto($aRow))
|
||||
{
|
||||
//var_dump($aRow);
|
||||
$iPlaceID = $aRow['place_id'];
|
||||
$sSQL = "select rank_address,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata($iPlaceID)";
|
||||
$sSQL .= " WHERE isaddress";
|
||||
$sSQL .= " order by rank_address desc,isaddress desc";
|
||||
$aAddressLines = $oDB->getAll($sSQL);
|
||||
$fOutstream = fopen('php://output', 'w');
|
||||
while ($aRes->fetchInto($aRow)) {
|
||||
//var_dump($aRow);
|
||||
$iPlaceID = $aRow['place_id'];
|
||||
$sSQL = "select rank_address,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata($iPlaceID, -1)";
|
||||
$sSQL .= ' WHERE isaddress';
|
||||
$sSQL .= ' order by rank_address desc,isaddress desc';
|
||||
$aAddressLines = $oDB->getAll($sSQL);
|
||||
if (PEAR::IsError($aAddressLines)) fail(pg_last_error($oDB->connection));
|
||||
|
||||
|
||||
$aOutput = array_fill(0, $iNumCol, '');
|
||||
# output address parts
|
||||
foreach ($aAddressLines as $aAddress)
|
||||
{
|
||||
if (isset($aColumnMapping[$aAddress['rank_address']]))
|
||||
{
|
||||
// output address parts
|
||||
foreach ($aAddressLines as $aAddress) {
|
||||
if (isset($aColumnMapping[$aAddress['rank_address']])) {
|
||||
$aOutput[$aColumnMapping[$aAddress['rank_address']]] = $aAddress['localname'];
|
||||
}
|
||||
}
|
||||
# output postcode
|
||||
if (isset($aColumnMapping['postcode']))
|
||||
{
|
||||
if ($aCMDResult['output-all-postcodes'])
|
||||
{
|
||||
$sSQL = "select array_agg(px.postcode) from placex px join place_addressline pa ";
|
||||
// output postcode
|
||||
if (isset($aColumnMapping['postcode'])) {
|
||||
if ($aCMDResult['output-all-postcodes']) {
|
||||
$sSQL = 'select array_agg(px.postcode) from placex px join place_addressline pa ';
|
||||
} else {
|
||||
$sSQL = 'select px.postcode from placex px join place_addressline pa ';
|
||||
}
|
||||
else
|
||||
{
|
||||
$sSQL = "select px.postcode from placex px join place_addressline pa ";
|
||||
}
|
||||
$sSQL .= "on px.place_id = pa.address_place_id ";
|
||||
$sSQL .= "where pa.cached_rank_address in (5,11) ";
|
||||
$sSQL .= "and pa.place_id in (select place_id from place_addressline where address_place_id in (".substr($aRow['place_ids'], 1, -1).")) ";
|
||||
$sSQL .= "group by postcode order by count(*) desc limit 1";
|
||||
$sSQL .= 'on px.place_id = pa.address_place_id ';
|
||||
$sSQL .= 'where pa.cached_rank_address in (5,11) ';
|
||||
$sSQL .= 'and pa.place_id in (select place_id from place_addressline where address_place_id in ('.substr($aRow['place_ids'], 1, -1).')) ';
|
||||
$sSQL .= 'group by postcode order by count(*) desc limit 1';
|
||||
$sRes = $oDB->getOne($sSQL);
|
||||
if (PEAR::IsError($sRes)) fail(pg_last_error($oDB->connection));
|
||||
if ($aCMDResult['output-all-postcodes'])
|
||||
{
|
||||
if ($aCMDResult['output-all-postcodes']) {
|
||||
$aOutput[$aColumnMapping['postcode']] = substr($sRes, 1, -1);
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
$aOutput[$aColumnMapping['postcode']] = $sRes;
|
||||
}
|
||||
}
|
||||
if (isset($aColumnMapping['placeid']))
|
||||
{
|
||||
if (isset($aColumnMapping['placeid'])) {
|
||||
$aOutput[$aColumnMapping['placeid']] = substr($aRow['place_ids'], 1, -1);
|
||||
}
|
||||
fputcsv($fOutstream, $aOutput);
|
||||
|
Loading…
Reference in New Issue
Block a user