hstore names / refactoring

This commit is contained in:
Brian Quinion 2010-10-27 14:05:42 +00:00
parent 32fc283d37
commit 57583f09e3
6 changed files with 1185 additions and 90 deletions

View File

@ -1,15 +1,19 @@
<?php
require_once('DB.php');
// Get the database object
$oDB =& DB::connect(CONST_Database_DSN, false);
if (PEAR::IsError($oDB))
function &getDB()
{
fail($oDB->getMessage(), 'Unable to connect to the database');
// Get the database object
$oDB =& DB::connect(CONST_Database_DSN, false);
if (PEAR::IsError($oDB))
{
fail($oDB->getMessage(), 'Unable to connect to the database');
}
$oDB->setFetchMode(DB_FETCHMODE_ASSOC);
$oDB->query("SET DateStyle TO 'sql,european'");
$oDB->query("SET client_encoding TO 'utf-8'");
return $oDB;
}
$oDB->setFetchMode(DB_FETCHMODE_ASSOC);
$oDB->query("SET DateStyle TO 'sql,european'");
$oDB->query("SET client_encoding TO 'utf-8'");
function getDBQuoted($s)
{

View File

@ -1,5 +1,8 @@
<?php
if (file_exists(CONST_BasePath.'/settings/local.php')) require_once(CONST_BasePath.'/settings/local.php');
if (isset($_GET['debug']) && $_GET['debug']) @define('CONST_Debug', true);
// General settings
@define('CONST_Debug', false);
@define('CONST_Database_DSN', 'pgsql://@/nominatim');
@ -23,5 +26,3 @@
@define('CONST_Search_AreaPolygons_Enabled', true);
@define('CONST_Suggestions_Enabled', false);

View File

@ -106,11 +106,11 @@ $$
LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION transliteration(text) RETURNS text
AS '/home/brian/nominatim/live/osm2pgsql/gazetteer/gazetteer.so', 'transliteration'
AS '{modulepath}/nominatim.so', 'transliteration'
LANGUAGE c IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION gettokenstring(text) RETURNS text
AS '/home/brian/nominatim/live/osm2pgsql/gazetteer/gazetteer.so', 'gettokenstring'
AS '{modulepath}/nominatim.so', 'gettokenstring'
LANGUAGE c IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT
@ -1004,12 +1004,14 @@ BEGIN
NEW.rank_search := 26;
NEW.rank_address := NEW.rank_search;
ELSEIF NEW.type in ('house','building') THEN
NEW.rank_search := 28;
NEW.rank_search := 30;
NEW.rank_address := NEW.rank_search;
ELSEIF NEW.type in ('houses') THEN
-- can't guarantee all required nodes loaded yet due to caching in osm2pgsql
-- insert new point into place for each derived building
--i := create_interpolation(NEW.osm_id, NEW.housenumber);
NEW.rank_search := 28;
NEW.rank_address := 0;
END IF;
ELSEIF NEW.class = 'boundary' THEN
@ -1075,10 +1077,10 @@ BEGIN
-- mark items within the geometry for re-indexing
-- RAISE WARNING 'placex poly insert: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;
-- work around bug in postgis
update placex set indexed = false where indexed and (ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry))
AND rank_search > NEW.rank_search and ST_geometrytype(placex.geometry) = 'ST_Point';
update placex set indexed = false where indexed and (ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry))
AND rank_search > NEW.rank_search and ST_geometrytype(placex.geometry) != 'ST_Point';
update placex set indexed_status = 2 where (ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry))
AND rank_search > NEW.rank_search and indexed = 0 and ST_geometrytype(placex.geometry) = 'ST_Point';
update placex set indexed_status = 2 where (ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry))
AND rank_search > NEW.rank_search and indexed = 0 and ST_geometrytype(placex.geometry) != 'ST_Point';
END IF;
ELSE
-- mark nearby items for re-indexing, where 'nearby' depends on the features rank_search and is a complete guess :(
@ -1103,7 +1105,7 @@ BEGIN
END IF;
IF diameter > 0 THEN
-- RAISE WARNING 'placex point insert: % % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type,diameter;
update placex set indexed = false where indexed and rank_search > NEW.rank_search and ST_DWithin(placex.geometry, NEW.geometry, diameter);
update placex set indexed = 2 where indexed and rank_search > NEW.rank_search and ST_DWithin(placex.geometry, NEW.geometry, diameter);
END IF;
END IF;
@ -1272,6 +1274,7 @@ BEGIN
-- Try and find a way that is close roughly parellel to this line
FOR relation IN SELECT place_id FROM placex
WHERE ST_DWithin(location.geometry, placex.geometry, 0.001) and placex.rank_search = 26
and st_geometrytype(location.geometry) in ('ST_LineString')
ORDER BY (ST_distance(placex.geometry, ST_Line_Interpolate_Point(location.geometry,0))+
ST_distance(placex.geometry, ST_Line_Interpolate_Point(location.geometry,0.5))+
ST_distance(placex.geometry, ST_Line_Interpolate_Point(location.geometry,1))) ASC limit 1
@ -1460,38 +1463,11 @@ BEGIN
--END IF;
-- mark everything linked to this place for re-indexing
UPDATE placex set indexed = false from place_addressline where address_place_id = OLD.place_id and placex.place_id = place_addressline.place_id and indexed;
UPDATE placex set indexed_status = 2 from place_addressline where address_place_id = OLD.place_id
and placex.place_id = place_addressline.place_id and indexed_status = 0;
-- do the actual delete
DELETE FROM location_area where place_id = OLD.place_id;
DELETE FROM location_point where place_id = OLD.place_id;
DELETE FROM location_point_0 where place_id = OLD.place_id;
DELETE FROM location_point_1 where place_id = OLD.place_id;
DELETE FROM location_point_2 where place_id = OLD.place_id;
DELETE FROM location_point_3 where place_id = OLD.place_id;
DELETE FROM location_point_4 where place_id = OLD.place_id;
DELETE FROM location_point_5 where place_id = OLD.place_id;
DELETE FROM location_point_6 where place_id = OLD.place_id;
DELETE FROM location_point_7 where place_id = OLD.place_id;
DELETE FROM location_point_8 where place_id = OLD.place_id;
DELETE FROM location_point_9 where place_id = OLD.place_id;
DELETE FROM location_point_10 where place_id = OLD.place_id;
DELETE FROM location_point_11 where place_id = OLD.place_id;
DELETE FROM location_point_12 where place_id = OLD.place_id;
DELETE FROM location_point_13 where place_id = OLD.place_id;
DELETE FROM location_point_14 where place_id = OLD.place_id;
DELETE FROM location_point_15 where place_id = OLD.place_id;
DELETE FROM location_point_16 where place_id = OLD.place_id;
DELETE FROM location_point_17 where place_id = OLD.place_id;
DELETE FROM location_point_18 where place_id = OLD.place_id;
DELETE FROM location_point_19 where place_id = OLD.place_id;
DELETE FROM location_point_20 where place_id = OLD.place_id;
DELETE FROM location_point_21 where place_id = OLD.place_id;
DELETE FROM location_point_22 where place_id = OLD.place_id;
DELETE FROM location_point_23 where place_id = OLD.place_id;
DELETE FROM location_point_24 where place_id = OLD.place_id;
DELETE FROM location_point_25 where place_id = OLD.place_id;
DELETE FROM location_point_26 where place_id = OLD.place_id;
DELETE FROM search_name where place_id = OLD.place_id;
DELETE FROM place_addressline where place_id = OLD.place_id;
DELETE FROM place_addressline where address_place_id = OLD.place_id;
@ -1663,12 +1639,12 @@ BEGIN
IF st_area(NEW.geometry) < 1 AND st_area(existinggeometry) < 1 THEN
-- re-index points that have moved in / out of the polygon, could be done as a single query but postgres gets the index usage wrong
update placex set indexed = false where indexed and
update placex set indexed_status = 2 where indexed_status = 0 and
(ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry))
AND NOT (ST_Contains(existinggeometry, placex.geometry) OR ST_Intersects(existinggeometry, placex.geometry))
AND rank_search > NEW.rank_search;
update placex set indexed = false where indexed and
update placex set indexed_status = 2 where indexed_status = 0 and
(ST_Contains(existinggeometry, placex.geometry) OR ST_Intersects(existinggeometry, placex.geometry))
AND NOT (ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry))
AND rank_search > NEW.rank_search;
@ -1694,8 +1670,8 @@ BEGIN
IF NOT update_location_nameonly(existingplacex.place_id, NEW.name) THEN
IF st_area(NEW.geometry) < 0.5 THEN
UPDATE placex set indexed = false from place_addressline where address_place_id = existingplacex.place_id
and placex.place_id = place_addressline.place_id and indexed;
UPDATE placex set indexed_status = 2 from place_addressline where address_place_id = existingplacex.place_id
and placex.place_id = place_addressline.place_id and indexed_status = 0;
END IF;
END IF;
@ -1716,8 +1692,8 @@ BEGIN
-- performance, can't take the load of re-indexing a whole country / huge area
IF st_area(NEW.geometry) < 0.5 THEN
UPDATE placex set indexed = false from place_addressline where address_place_id = existingplacex.place_id
and placex.place_id = place_addressline.place_id and indexed;
UPDATE placex set indexed_status = 2 from place_addressline where address_place_id = existingplacex.place_id
and placex.place_id = place_addressline.place_id and indexed_status = 0;
END IF;
END IF;
@ -1773,14 +1749,14 @@ DECLARE
found BOOLEAN;
BEGIN
IF (array_upper(name, 1) is null) THEN
return null;
IF name is null THEN
RETURN null;
END IF;
search := languagepref;
FOR j IN 1..array_upper(search, 1) LOOP
IF name ? search[j] AND trim(name->search[j] != '') THEN
IF name ? search[j] AND trim(name->search[j]) != '' THEN
return trim(name->search[j]);
END IF;
END LOOP;
@ -1869,28 +1845,24 @@ BEGIN
search := languagepref;
result := '{}';
-- UPDATE placex set indexed = false where indexed = true and place_id = for_place_id;
UPDATE placex set indexed = true where indexed = false and place_id = for_place_id;
select country_code,housenumber,rank_address from placex where place_id = for_place_id into searchcountrycode,searchhousenumber,searchrankaddress;
FOR location IN
select CASE WHEN address_place_id = for_place_id AND rank_address = 0 THEN 100 ELSE rank_address END as rank_address,
CASE WHEN type = 'postcode' THEN 'name'->postcode ELSE name END as name,
CASE WHEN type = 'postcode' THEN 'name' => postcode ELSE name END as name,
distance,length(name::text) as namelength
from place_addressline join placex on (address_place_id = placex.place_id)
where place_addressline.place_id = for_place_id and ((rank_address > 0 AND rank_address < searchrankaddress) OR address_place_id = for_place_id)
and (placex.country_code IS NULL OR searchcountrycode IS NULL OR placex.country_code = searchcountrycode OR rank_address < 4)
order by rank_address desc,fromarea desc,distance asc,rank_search desc,namelength desc
LOOP
IF array_upper(search, 1) IS NOT NULL AND array_upper(location.name, 1) IS NOT NULL THEN
IF array_upper(search, 1) IS NOT NULL AND location.name IS NOT NULL THEN
FOR j IN 1..array_upper(search, 1) LOOP
FOR k IN 1..array_upper(location.name, 1) LOOP
IF (found > location.rank_address AND location.name[k].key = search[j] AND location.name[k].value != '') AND NOT result && ARRAY[trim(location.name[k].value)] THEN
result[(100 - location.rank_address)] := trim(location.name[k].value);
found := location.rank_address;
END IF;
END LOOP;
IF (found > location.rank_address AND location.name ? search[j] AND location.name -> search[j] != ''
AND NOT result && ARRAY[location.name -> search[j]]) THEN
result[(100 - location.rank_address)] := trim(location.name -> search[j]);
found := location.rank_address;
END IF;
END LOOP;
END IF;
END LOOP;
@ -1931,8 +1903,7 @@ BEGIN
search := languagepref;
result := '{}';
-- UPDATE placex set indexed = false where indexed = true and place_id = for_place_id;
UPDATE placex set indexed = true where indexed = false and place_id = for_place_id;
UPDATE placex set indexed_status = 0 where indexed_status > 0 and place_id = for_place_id;
select country_code,housenumber from placex where place_id = for_place_id into searchcountrycode,searchhousenumber;

View File

@ -63,7 +63,8 @@ CREATE TABLE word (
class text,
type text,
country_code varchar(2),
search_name_count INTEGER
search_name_count INTEGER,
operator TEXT
);
SELECT AddGeometryColumn('word', 'location', 4326, 'GEOMETRY', 2);
CREATE INDEX idx_word_word_id on word USING BTREE (word_id);

View File

@ -10,43 +10,75 @@
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
array('create-db', '', 0, 1, 0, 0, 'bool', 'Build a blank nominatim db'),
array('load-data', '', 0, 1, 1, 1, 'realpath', 'Import a osm file'),
array('all', '', 0, 1, 1, 1, 'realpath', 'Do the complete process'),
array('create-db', '', 0, 1, 0, 0, 'bool', 'Create nominatim db'),
array('setup-db', '', 0, 1, 0, 0, 'bool', 'Build a blank nominatim db'),
array('import-data', '', 0, 1, 1, 1, 'realpath', 'Import a osm file'),
array('create-functions', '', 0, 1, 0, 0, 'bool', 'Create functions'),
array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
array('create-partitions', '', 0, 1, 0, 0, 'bool', 'Create required partition tables and triggers'),
array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
);
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
$bDidSomething = false;
if ($aCMDResult['create-db'])
if ($aCMDResult['create-db'] || isset($aCMDResult['all']))
{
$bDidSomething = true;
$oDB =& DB::connect(CONST_Database_DSN, false);
if (!PEAR::isError($oDB))
{
fail('database already exists');
}
passthru('createdb nominatim');
}
if ($aCMDResult['create-db'] || isset($aCMDResult['all']))
{
$bDidSomething = true;
// TODO: path detection, detection memory, etc.
// passthru('createdb nominatim');
$oDB =& getDB();
passthru('createlang plpgsql nominatim');
passthru('psql -f '.CONST_Path_Postgresql_Contrib.'/_int.sql nominatim');
passthru('psql -f '.CONST_Path_Postgresql_Contrib.'/hstore.sql nominatim');
passthru('psql -f '.CONST_Path_Postgresql_Postgis.'/postgis.sql nominatim');
passthru('psql -f '.CONST_Path_Postgresql_Postgis.'/spatial_ref_sys.sql nominatim');
passthru('psql -f '.CONST_BasePath.'/data/country_name.sql nominatim');
passthru('psql -f '.CONST_BasePath.'/data/country_osm_grid.sql nominatim');
passthru('psql -f '.CONST_BasePath.'/data/gb_postcode.sql nominatim');
passthru('psql -f '.CONST_BasePath.'/data/us_statecounty.sql nominatim');
passthru('psql -f '.CONST_BasePath.'/data/us_state.sql nominatim');
passthru('psql -f '.CONST_BasePath.'/data/worldboundaries.sql nominatim');
pgsqlRunScriptFile(CONST_Path_Postgresql_Contrib.'/_int.sql');
pgsqlRunScriptFile(CONST_Path_Postgresql_Contrib.'/hstore.sql');
pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/postgis.sql');
pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/spatial_ref_sys.sql');
pgsqlRunScriptFile(CONST_BasePath.'/data/country_name.sql');
pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql');
pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode.sql');
pgsqlRunScriptFile(CONST_BasePath.'/data/us_statecounty.sql');
pgsqlRunScriptFile(CONST_BasePath.'/data/us_state.sql');
pgsqlRunScriptFile(CONST_BasePath.'/data/worldboundaries.sql');
}
if (isset($aCMDResult['load-data']) && $aCMDResult['load-data'])
if (isset($aCMDResult['all']) && !isset($aCMDResult['import-data'])) $aCMDResult['import-data'] = $aCMDResult['all'];
if (isset($aCMDResult['import-data']) && $aCMDResult['import-data'])
{
$bDidSomething = true;
passthru(CONST_BasePath.'/osm2pgsql/osm2pgsql -lsc -O gazetteer -C 10000 --hstore -d nominatim '.$aCMDResult['load-data']);
passthru('psql -f '.CONST_BasePath.'/sql/functions.sql nominatim');
passthru('psql -f '.CONST_BasePath.'/sql/tables.sql nominatim');
passthru(CONST_BasePath.'/osm2pgsql/osm2pgsql -lsc -O gazetteer -C 10000 --hstore -d nominatim '.$aCMDResult['import-data']);
}
if ($aCMDResult['create-partitions'])
if ($aCMDResult['create-functions'] || isset($aCMDResult['all']))
{
$bDidSomething = true;
$sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
$sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
pgsqlRunScript($sTemplate);
}
if ($aCMDResult['create-tables'] || isset($aCMDResult['all']))
{
$bDidSomething = true;
pgsqlRunScriptFile(CONST_BasePath.'/sql/tables.sql');
}
if ($aCMDResult['create-partitions'] || isset($aCMDResult['all']))
{
$bDidSomething = true;
$oDB =& getDB();
$sSQL = 'select distinct country_code from country_name order by country_code';
$aPartitions = $oDB->getCol($sSQL);
if (PEAR::isError($aPartitions))
@ -66,11 +98,73 @@
}
$sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
}
echo $sTemplate;
exit;
pgsqlRunScript($sTemplate);
}
if ($aCMDResult['load-data'] || isset($aCMDResult['all']))
{
$bDidSomething = true;
pgsqlRunScriptFile(CONST_BasePath.'/sql/loaddata.sql');
}
if (!$bDidSomething)
{
showUsage($aCMDOptions, true);
}
function pgsqlRunScriptFile($sFilename)
{
if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
// Convert database DSN to psql paramaters
$aDSNInfo = DB::parseDSN(CONST_Database_DSN);
$sCMD = 'psql -f '.$sFilename.' '.$aDSNInfo['database'];
$aDescriptors = array(
0 => array('pipe', 'r'),
1 => array('pipe', 'w'),
2 => array('file', '/dev/null', 'a')
);
$ahPipes = null;
$hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
if (!is_resource($hProcess)) fail('unable to start pgsql');
fclose($ahPipes[0]);
// TODO: error checking
while(!feof($ahPipes[1]))
{
echo fread($ahPipes[1], 4096);
}
fclose($ahPipes[1]);
proc_close($hProcess);
}
function pgsqlRunScript($sScript)
{
// Convert database DSN to psql paramaters
$aDSNInfo = DB::parseDSN(CONST_Database_DSN);
$sCMD = 'psql '.$aDSNInfo['database'];
$aDescriptors = array(
0 => array('pipe', 'r'),
1 => array('pipe', 'w'),
2 => array('file', '/dev/null', 'a')
);
$ahPipes = null;
$hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
if (!is_resource($hProcess)) fail('unable to start pgsql');
fwrite($ahPipes[0], $sScript);
fclose($ahPipes[0]);
// TODO: error checking
while(!feof($ahPipes[1]))
{
echo fread($ahPipes[1], 4096);
}
fclose($ahPipes[1]);
proc_close($hProcess);
}

1024
website/search.php Executable file

File diff suppressed because it is too large Load Diff