remove the road buffer code - made things slower. Plus some more tiger import code

This commit is contained in:
Brian Quinion 2010-11-17 15:19:25 +00:00
parent 1663dd49a4
commit a6453a808d
4 changed files with 109 additions and 117 deletions

View File

@ -5,7 +5,7 @@
// General settings
@define('CONST_Debug', false);
@define('CONST_Database_DSN', 'pgsql://brian@/nominatim');
@define('CONST_Database_DSN', 'pgsql://@/nominatim');
// Paths
@define('CONST_Path_Postgresql_Contrib', '/usr/share/postgresql/9.0/contrib');

View File

@ -575,8 +575,8 @@ DECLARE
x BOOLEAN;
BEGIN
IF rank_search > 26 THEN
RAISE EXCEPTION 'Adding location with rank > 26 (% rank %)', place_id, rank_search;
IF rank_search > 25 THEN
RAISE EXCEPTION 'Adding location with rank > 25 (% rank %)', place_id, rank_search;
END IF;
x := deleteLocationArea(partition, place_id);
@ -1318,10 +1318,8 @@ BEGIN
IF NEW.parent_place_id IS NULL AND NEW.street IS NOT NULL THEN
address_street_word_id := get_name_id(make_standard_name(NEW.street));
--RAISE WARNING 'street: % %', NEW.street, address_street_word_id;
IF address_street_word_id IS NOT NULL THEN
FOR location IN SELECT * from getNearestNamedRoadFeature(NEW.partition, place_centroid, address_street_word_id) LOOP
--RAISE WARNING 'streetname found nearby %',location;
NEW.parent_place_id := location.place_id;
END LOOP;
END IF;
@ -1330,12 +1328,6 @@ BEGIN
--RAISE WARNING 'x4';
-- Still nothing, just use the nearest road
-- IF NEW.parent_place_id IS NULL THEN
-- FOR location IN SELECT place_id FROM getNearRoads(NEW.partition, place_centroid) LOOP
-- NEW.parent_place_id := location.place_id;
-- END LOOP;
-- END IF;
search_diameter := 0.00005;
WHILE NEW.parent_place_id IS NULL AND search_diameter < 0.1 LOOP
FOR location IN SELECT place_id FROM placex
@ -1465,15 +1457,25 @@ DECLARE
b BOOLEAN;
BEGIN
-- mark everything linked to this place for re-indexing
UPDATE placex set indexed_status = 2 from place_addressline where address_place_id = OLD.place_id
and placex.place_id = place_addressline.place_id and indexed_status = 0;
IF OLD.rank_address < 30 THEN
-- mark everything linked to this place for re-indexing
UPDATE placex set indexed_status = 2 from place_addressline where address_place_id = OLD.place_id
and placex.place_id = place_addressline.place_id and indexed_status = 0;
DELETE FROM place_addressline where address_place_id = OLD.place_id;
END IF;
IF OLD.rank_address < 26 THEN
b := deleteLocationArea(OLD.partition, OLD.place_id);
END IF;
IF OLD.name is not null THEN
b := deleteSearchName(OLD.partition, OLD.place_id);
END IF;
-- do the actual delete
b := deleteLocationArea(OLD.partition, OLD.place_id);
b := deleteSearchName(OLD.partition, OLD.place_id);
DELETE FROM place_addressline where place_id = OLD.place_id;
DELETE FROM place_addressline where address_place_id = OLD.place_id;
RETURN OLD;
@ -2212,37 +2214,49 @@ CREATE OR REPLACE FUNCTION tigger_create_interpolation(linegeo GEOMETRY, in_star
DECLARE
startnumber INTEGER;
endnumber INTEGER;
stepsize INTEGER;
housenum INTEGER;
newpoints INTEGER;
numberrange INTEGER;
rangestartnumber INTEGER;
BEGIN
numberrange := in_endnumber - in_startnumber;
IF in_endnumber > in_startnumber THEN
startnumber = in_startnumber;
endnumber = in_endnumber;
ELSE
startnumber = in_endnumber;
endnumber = in_startnumber;
END IF;
numberrange := endnumber - startnumber;
rangestartnumber := startnumber;
IF (interpolationtype = 'odd' AND startnumber%2 = 0) OR (interpolationtype = 'even' AND startnumber%2 = 1) THEN
startnumber := in_startnumber + 1;
startnumber := startnumber + 1;
stepsize := 2;
ELSE
IF (interpolationtype = 'odd' OR interpolationtype = 'even') THEN
startnumber := in_startnumber;
stepsize := 2;
ELSE -- everything else assumed to be 'all'
startnumber := in_startnumber;
stepsize := 1;
END IF;
END IF;
--this is a one time operation - skip the delete
--delete from placex where osm_type = 'N' and osm_id = prevnode.osm_id and type = 'house' and place_id != prevnode.place_id;
-- Filter out really broken tiger data
IF numberrange > 0 AND (numberrange::float/stepsize::float > 500) AND ST_length(linegeo)/(numberrange::float/stepsize::float) < 0.000001 THEN
RAISE WARNING 'Road too short for number range % to % on %, % (%)',startnumber,endnumber,in_street,in_isin,ST_length(linegeo)/(numberrange::float/stepsize::float);
RETURN 0;
END IF;
newpoints := 0;
FOR housenum IN startnumber..in_endnumber BY stepsize LOOP
FOR housenum IN startnumber..endnumber BY stepsize LOOP
insert into placex (osm_type, osm_id, class, type, admin_level, housenumber, street, isin, postcode,
country_code, parent_place_id, rank_address, rank_search, indexed_status, geometry)
values ('T', nextval('seq_tigger_house'), 'place', 'house', null, housenum, in_street, in_isin, in_postcode,
'us', null, 30, 30, 1, ST_Line_Interpolate_Point(linegeo, (housenum::float-in_startnumber::float)/numberrange::float));
'us', null, 30, 30, 1, ST_Line_Interpolate_Point(linegeo, (housenum::float-rangestartnumber::float)/numberrange::float));
newpoints := newpoints + 1;
END LOOP;

View File

@ -25,14 +25,6 @@ CREATE TABLE location_area_large_-partition- () INHERITS (location_area_large);
CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id);
CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry);
CREATE TABLE location_area_roadnear_-partition- () INHERITS (location_area_roadnear);
CREATE INDEX idx_location_area_roadnear_-partition-_place_id ON location_area_roadnear_-partition- USING BTREE (place_id);
CREATE INDEX idx_location_area_roadnear_-partition-_geometry ON location_area_roadnear_-partition- USING GIST (geometry);
CREATE TABLE location_area_roadfar_-partition- () INHERITS (location_area_roadfar);
CREATE INDEX idx_location_area_roadfar_-partition-_place_id ON location_area_roadfar_-partition- USING BTREE (place_id);
CREATE INDEX idx_location_area_roadfar_-partition-_geometry ON location_area_roadfar_-partition- USING GIST (geometry);
CREATE TABLE search_name_-partition- () INHERITS (search_name_blank);
CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id);
CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid);
@ -41,33 +33,6 @@ CREATE INDEX idx_search_name_-partition-_nameaddress_vector ON search_name_-part
-- end
create or replace function getNearRoads(in_partition INTEGER, point GEOMETRY) RETURNS setof nearplace AS $$
DECLARE
r nearplace%rowtype;
a BOOLEAN;
BEGIN
-- start
IF in_partition = -partition- THEN
a := FALSE;
FOR r IN SELECT place_id FROM location_area_roadnear_-partition- WHERE ST_Contains(geometry, point) ORDER BY ST_Distance(point, centroid) ASC LIMIT 1 LOOP
a := TRUE;
RETURN NEXT r;
RETURN;
END LOOP;
FOR r IN SELECT place_id FROM location_area_roadfar_-partition- WHERE ST_Contains(geometry, point) ORDER BY ST_Distance(point, centroid) ASC LOOP
RETURN NEXT r;
RETURN;
END LOOP;
RETURN;
END IF;
-- end
RAISE EXCEPTION 'Unknown partition %', in_partition;
END
$$
LANGUAGE plpgsql;
create or replace function getNearFeatures(in_partition INTEGER, point GEOMETRY, maxrank INTEGER, isin_tokens INT[]) RETURNS setof nearfeature AS $$
DECLARE
r nearfeature%rowtype;
@ -100,9 +65,7 @@ BEGIN
-- start
IF in_partition = -partition- THEN
-- DELETE from location_area_large_-partition- WHERE place_id = in_place_id;
-- DELETE from location_area_roadnear_-partition- WHERE place_id = in_place_id;
-- DELETE from location_area_roadfar_-partition- WHERE place_id = in_place_id;
DELETE from location_area_large_-partition- WHERE place_id = in_place_id;
RETURN TRUE;
END IF;
-- end
@ -139,46 +102,6 @@ END
$$
LANGUAGE plpgsql;
create or replace function insertLocationAreaRoadNear(
in_partition INTEGER, in_place_id integer, in_country_code VARCHAR(2), in_keywords INTEGER[],
in_rank_search INTEGER, in_rank_address INTEGER, in_estimate BOOLEAN,
in_centroid GEOMETRY, in_geometry GEOMETRY) RETURNS BOOLEAN AS $$
DECLARE
BEGIN
-- start
IF in_partition = -partition- THEN
INSERT INTO location_area_roadnear_-partition- values (in_partition, in_place_id, in_country_code, in_keywords, in_rank_search, in_rank_address, in_estimate, in_centroid, in_geometry);
RETURN TRUE;
END IF;
-- end
RAISE EXCEPTION 'Unknown partition %', in_partition;
RETURN FALSE;
END
$$
LANGUAGE plpgsql;
create or replace function insertLocationAreaRoadFar(
in_partition INTEGER, in_place_id integer, in_country_code VARCHAR(2), in_keywords INTEGER[],
in_rank_search INTEGER, in_rank_address INTEGER, in_estimate BOOLEAN,
in_centroid GEOMETRY, in_geometry GEOMETRY) RETURNS BOOLEAN AS $$
DECLARE
BEGIN
-- start
IF in_partition = -partition- THEN
INSERT INTO location_area_roadfar_-partition- values (in_partition, in_place_id, in_country_code, in_keywords, in_rank_search, in_rank_address, in_estimate, in_centroid, in_geometry);
RETURN TRUE;
END IF;
-- end
RAISE EXCEPTION 'Unknown partition %', in_partition;
RETURN FALSE;
END
$$
LANGUAGE plpgsql;
create or replace function getNearestNamedFeature(in_partition INTEGER, point GEOMETRY, maxrank INTEGER, isin_token INTEGER) RETURNS setof nearfeature AS $$
DECLARE
r nearfeature%rowtype;

View File

@ -22,11 +22,25 @@
array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
array('create-partitions', '', 0, 1, 0, 0, 'bool', 'Create required partition tables and triggers'),
array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data'),
);
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
$bDidSomething = false;
// This is a pretty hard core defult - the number of processors in the box - 1
$iInstances = isset($aCMDResult['threads'])?$aCMDResult['threads']:(getProcessorCount()-1);
if ($iInstances < 1)
{
$iInstances = 1;
echo "WARNING: resetting threads to $iInstances\n";
}
if ($iInstances > getProcessorCount())
{
$iInstances = getProcessorCount();
echo "WARNING: resetting threads to $iInstances\n";
}
if ($aCMDResult['create-db'] || $aCMDResult['all'])
{
$bDidSomething = true;
@ -134,19 +148,6 @@
if (!pg_query($oDB->connection, 'CREATE SEQUENCE seq_place start 100000')) fail(pg_last_error($oDB->connection));
echo '.';
// This is a pretty hard core defult - the number of processors in the box - 1
$iInstances = isset($aCMDResult['threads'])?$aCMDResult['threads']:(getProcessorCount()-1);
if ($iInstances < 1)
{
$iInstances = 1;
echo "WARNING: reseting threads to $iInstances\n";
}
if ($iInstances > getProcessorCount())
{
$iInstances = getProcessorCount();
echo "WARNING: reseting threads to $iInstances\n";
}
$aDBInstances = array();
for($i = 0; $i < $iInstances; $i++)
{
@ -171,6 +172,60 @@
echo "\n";
}
if ($aCMDResult['import-tiger-data'] || $aCMDResult['all'])
{
$bDidSomething = true;
$aDBInstances = array();
for($i = 0; $i < $iInstances; $i++)
{
$aDBInstances[$i] =& getDB(true);
}
foreach(glob(CONST_BasePath.'/data/tiger2009/*.sql') as $sFile)
{
echo $sFile.': ';
$hFile = fopen($sFile, "r");
$sSQL = fgets($hFile, 100000);
$iLines = 0;
while(true)
{
for($i = 0; $i < $iInstances; $i++)
{
if (!pg_connection_busy($aDBInstances[$i]->connection))
{
while(pg_get_result($aDBInstances[$i]->connection));
$sSQL = fgets($hFile, 100000);
if (!$sSQL) break 2;
if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
$iLines++;
if ($iLines == 1000)
{
echo ".";
$iLines = 0;
}
}
}
usleep(10);
}
fclose($hFile);
$bAnyBusy = true;
while($bAnyBusy)
{
$bAnyBusy = false;
for($i = 0; $i < $iInstances; $i++)
{
if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
}
usleep(10);
}
echo "\n";
}
}
if (!$bDidSomething)
{
showUsage($aCMDOptions, true);