2012-08-26 00:03:23 +04:00
#!/usr/bin/php -Cq
< ? php
2018-06-27 15:17:08 +03:00
// Script to extract structured city and street data
// from a running nominatim instance as CSV data
require_once ( dirname ( dirname ( __FILE__ )) . '/settings/settings.php' );
require_once ( CONST_BasePath . '/lib/init-cmd.php' );
require_once ( CONST_BasePath . '/lib/ParameterParser.php' );
ini_set ( 'memory_limit' , '800M' );
$aCMDOptions = array (
'Export addresses as CSV file from a Nominatim database' ,
array ( 'help' , 'h' , 0 , 1 , 0 , 0 , false , 'Show Help' ),
array ( 'quiet' , 'q' , 0 , 1 , 0 , 0 , 'bool' , 'Quiet output' ),
array ( 'verbose' , 'v' , 0 , 1 , 0 , 0 , 'bool' , 'Verbose output' ),
array ( 'output-type' , '' , 0 , 1 , 1 , 1 , 'str' , 'Type of places to output (see below)' ),
array ( 'output-format' , '' , 0 , 1 , 1 , 1 , 'str' , 'Column mapping (see below)' ),
array ( 'output-all-postcodes' , '' , 0 , 1 , 0 , 0 , 'bool' , 'List all postcodes for address instead of just the most likely one' ),
array ( 'language' , '' , 0 , 1 , 1 , 1 , 'str' , 'Preferred language for output (local name, if omitted)' ),
array ( 'restrict-to-country' , '' , 0 , 1 , 1 , 1 , 'str' , 'Export only objects within country (country code)' ),
array ( 'restrict-to-osm-node' , '' , 0 , 1 , 1 , 1 , 'int' , 'Export only objects that are children of this OSM node' ),
array ( 'restrict-to-osm-way' , '' , 0 , 1 , 1 , 1 , 'int' , 'Export only objects that are children of this OSM way' ),
array ( 'restrict-to-osm-relation' , '' , 0 , 1 , 1 , 1 , 'int' , 'Export only objects that are children of this OSM relation' ),
" \n Address ranks: continent, country, state, county, city, suburb, street, path " ,
'Additional output types: postcode, placeid (placeid for each object)' ,
" \n output-format must be a semicolon-separated list of address ranks. Multiple ranks " ,
'can be merged into one column by simply using a comma-separated list.' ,
" \n Default output-type: street " ,
'Default output format: street;suburb;city;county;state;country'
);
getCmdOpt ( $_SERVER [ 'argv' ], $aCMDOptions , $aCMDResult , true , true );
$aRankmap = array (
'continent' => 1 ,
'country' => 4 ,
'state' => 8 ,
'county' => 12 ,
'city' => 16 ,
'suburb' => 20 ,
'street' => 26 ,
'path' => 27
);
2012-08-26 00:03:23 +04:00
$oDB =& getDB ();
2018-06-27 15:17:08 +03:00
if ( isset ( $aCMDResult [ 'output-type' ])) {
2012-08-26 00:03:23 +04:00
if ( ! isset ( $aRankmap [ $aCMDResult [ 'output-type' ]])) fail ( 'unknown output-type: ' . $aCMDResult [ 'output-type' ]);
$iOutputRank = $aRankmap [ $aCMDResult [ 'output-type' ]];
2018-06-27 15:17:08 +03:00
} else {
2012-08-26 00:03:23 +04:00
$iOutputRank = $aRankmap [ 'street' ];
}
// Preferred language
2018-06-27 15:17:08 +03:00
$oParams = new Nominatim\ParameterParser ();
2012-08-26 00:03:23 +04:00
if ( ! isset ( $aCMDResult [ 'language' ])) $aCMDResult [ 'language' ] = 'xx' ;
2018-06-27 15:17:08 +03:00
$aLangPrefOrder = $oParams -> getPreferredLanguages ( $aCMDResult [ 'language' ]);
$sLanguagePrefArraySQL = 'ARRAY[' . join ( ',' , array_map ( 'getDBQuoted' , $aLangPrefOrder )) . ']' ;
2012-08-26 00:03:23 +04:00
// output formatting: build up a lookup table that maps address ranks to columns
$aColumnMapping = array ();
$iNumCol = 0 ;
2018-06-27 15:17:08 +03:00
if ( ! isset ( $aCMDResult [ 'output-format' ])) $aCMDResult [ 'output-format' ] = 'street;suburb;city;county;state;country' ;
foreach ( preg_split ( '/\s*;\s*/' , $aCMDResult [ 'output-format' ]) as $sColumn ) {
2012-08-26 00:03:23 +04:00
$bHasData = false ;
2018-06-27 15:17:08 +03:00
foreach ( preg_split ( '/\s*,\s*/' , $sColumn ) as $sRank ) {
if ( $sRank == 'postcode' || $sRank == 'placeid' ) {
2012-08-26 00:03:23 +04:00
$aColumnMapping [ $sRank ] = $iNumCol ;
$bHasData = true ;
2018-06-27 15:17:08 +03:00
} elseif ( isset ( $aRankmap [ $sRank ])) {
2012-08-26 00:03:23 +04:00
$iRank = $aRankmap [ $sRank ];
if ( $iRank <= $iOutputRank ) {
$aColumnMapping [( string ) $iRank ] = $iNumCol ;
$bHasData = true ;
}
}
}
if ( $bHasData ) $iNumCol ++ ;
}
// build the query for objects
$sPlacexSQL = 'select min(place_id) as place_id, ' ;
$sPlacexSQL .= 'array_agg(place_id) as place_ids, ' ;
2018-06-27 15:17:08 +03:00
$sPlacexSQL .= 'country_code as cc, ' ;
2012-08-26 00:03:23 +04:00
// get the address places excluding postcodes
$sPlacexSQL .= 'array(select address_place_id from place_addressline a where a.place_id = placex.place_id and isaddress and address_place_id != placex.place_id and not cached_rank_address in (5,11) and cached_rank_address > 2 order by cached_rank_address) as address' ;
2018-06-27 15:17:08 +03:00
$sPlacexSQL .= ' from placex where name is not null and linked_place_id is null' ;
2012-08-26 00:03:23 +04:00
$sPlacexSQL .= ' and rank_address = ' . $iOutputRank ;
2018-06-27 15:17:08 +03:00
if ( isset ( $aCMDResult [ 'restrict-to-country' ])) {
$sPlacexSQL .= ' and country_code = ' . getDBQuoted ( $aCMDResult [ 'restrict-to-country' ]);
2012-08-26 00:03:23 +04:00
}
// restriction to parent place id
$sParentId = false ;
$sOsmType = false ;
2018-06-27 15:17:08 +03:00
if ( isset ( $aCMDResult [ 'restrict-to-osm-node' ])) {
2012-08-26 00:03:23 +04:00
$sOsmType = 'N' ;
$sOsmId = $aCMDResult [ 'restrict-to-osm-node' ];
}
2018-06-27 15:17:08 +03:00
if ( isset ( $aCMDResult [ 'restrict-to-osm-way' ])) {
2012-08-26 00:03:23 +04:00
$sOsmType = 'W' ;
$sOsmId = $aCMDResult [ 'restrict-to-osm-way' ];
}
2018-06-27 15:17:08 +03:00
if ( isset ( $aCMDResult [ 'restrict-to-osm-relation' ])) {
2012-08-26 00:03:23 +04:00
$sOsmType = 'R' ;
$sOsmId = $aCMDResult [ 'restrict-to-osm-relation' ];
}
2018-06-27 15:17:08 +03:00
if ( $sOsmType ) {
2012-08-26 00:03:23 +04:00
$sSQL = 'select place_id from placex where' ;
$sSQL .= ' osm_type = ' . getDBQuoted ( $sOsmType );
$sSQL .= ' and osm_id = ' . $sOsmId ;
$sParentId = $oDB -> getOne ( $sSQL );
if ( PEAR :: isError ( $sParentId )) fail ( pg_last_error ( $oDB -> connection ));
if ( ! $sParentId ) fail ( 'Could not find place ' . $sOsmType . ' ' . $sOsmId );
}
2018-06-27 15:17:08 +03:00
if ( $sParentId ) {
2012-08-26 00:03:23 +04:00
$sPlacexSQL .= ' and place_id in (select place_id from place_addressline where address_place_id = ' . $sParentId . ' and isaddress)' ;
}
2018-06-27 15:17:08 +03:00
$sPlacexSQL .= " group by name->'name', address, country_code, placex.place_id " ;
2012-08-26 00:03:23 +04:00
2018-06-27 15:17:08 +03:00
// Iterate over placeids
// to get further hierarchical information
2012-08-26 00:03:23 +04:00
//var_dump($sPlacexSQL);
$aRes =& $oDB -> query ( $sPlacexSQL );
if ( PEAR :: isError ( $aRes )) fail ( pg_last_error ( $oDB -> connection ));
2018-06-27 15:17:08 +03:00
$fOutstream = fopen ( 'php://output' , 'w' );
while ( $aRes -> fetchInto ( $aRow )) {
//var_dump($aRow);
$iPlaceID = $aRow [ 'place_id' ];
$sSQL = " select rank_address,get_name_by_language(name, $sLanguagePrefArraySQL ) as localname from get_addressdata( $iPlaceID , -1) " ;
$sSQL .= ' WHERE isaddress' ;
$sSQL .= ' order by rank_address desc,isaddress desc' ;
$aAddressLines = $oDB -> getAll ( $sSQL );
2012-08-26 00:03:23 +04:00
if ( PEAR :: IsError ( $aAddressLines )) fail ( pg_last_error ( $oDB -> connection ));
$aOutput = array_fill ( 0 , $iNumCol , '' );
2018-06-27 15:17:08 +03:00
// output address parts
foreach ( $aAddressLines as $aAddress ) {
if ( isset ( $aColumnMapping [ $aAddress [ 'rank_address' ]])) {
2012-08-26 00:03:23 +04:00
$aOutput [ $aColumnMapping [ $aAddress [ 'rank_address' ]]] = $aAddress [ 'localname' ];
}
}
2018-06-27 15:17:08 +03:00
// output postcode
if ( isset ( $aColumnMapping [ 'postcode' ])) {
if ( $aCMDResult [ 'output-all-postcodes' ]) {
$sSQL = 'select array_agg(px.postcode) from placex px join place_addressline pa ' ;
} else {
$sSQL = 'select px.postcode from placex px join place_addressline pa ' ;
2012-08-26 00:03:23 +04:00
}
2018-06-27 15:17:08 +03:00
$sSQL .= 'on px.place_id = pa.address_place_id ' ;
$sSQL .= 'where pa.cached_rank_address in (5,11) ' ;
$sSQL .= 'and pa.place_id in (select place_id from place_addressline where address_place_id in (' . substr ( $aRow [ 'place_ids' ], 1 , - 1 ) . ')) ' ;
$sSQL .= 'group by postcode order by count(*) desc limit 1' ;
2012-08-26 00:03:23 +04:00
$sRes = $oDB -> getOne ( $sSQL );
if ( PEAR :: IsError ( $sRes )) fail ( pg_last_error ( $oDB -> connection ));
2018-06-27 15:17:08 +03:00
if ( $aCMDResult [ 'output-all-postcodes' ]) {
2012-08-26 00:03:23 +04:00
$aOutput [ $aColumnMapping [ 'postcode' ]] = substr ( $sRes , 1 , - 1 );
2018-06-27 15:17:08 +03:00
} else {
2012-08-26 00:03:23 +04:00
$aOutput [ $aColumnMapping [ 'postcode' ]] = $sRes ;
}
}
2018-06-27 15:17:08 +03:00
if ( isset ( $aColumnMapping [ 'placeid' ])) {
2012-08-26 00:03:23 +04:00
$aOutput [ $aColumnMapping [ 'placeid' ]] = substr ( $aRow [ 'place_ids' ], 1 , - 1 );
}
fputcsv ( $fOutstream , $aOutput );
}
fclose ( $fOutstream );