From 4d16677d2ee4e57cef44d7a208b15b390d5994f4 Mon Sep 17 00:00:00 2001 From: Brian Quinion Date: Tue, 14 Jun 2011 13:42:46 +0000 Subject: [PATCH] update place_id to BIGINT --- lib/db.php | 4 +-- nominatim/export.c | 4 +-- nominatim/import.c | 14 +++++++-- nominatim/index.c | 20 ++++++------ sql/functions.sql | 71 ++++++++++++++++++++++++------------------ sql/partitions.src.sql | 58 ++++++++++++++++++++++++---------- sql/tables-minimal.sql | 18 +++++------ sql/tables.sql | 22 ++++++------- utils/setup.php | 2 +- 9 files changed, 129 insertions(+), 84 deletions(-) diff --git a/lib/db.php b/lib/db.php index acdb31c4..b6dc89c4 100644 --- a/lib/db.php +++ b/lib/db.php @@ -1,10 +1,10 @@ res, *thread_data->count, 0))); + place_id = PGint64(*((uint64_t *)PQgetvalue(thread_data->res, *thread_data->count, 0))); (*thread_data->count)++; pthread_mutex_unlock( thread_data->count_mutex ); - if (verbose) fprintf(stderr, " Processing place_id %d\n", place_id); + if (verbose) fprintf(stderr, " Processing place_id %ld\n", place_id); updateStartTime = time(0); int done = 0; @@ -351,8 +350,7 @@ void *nominatim_indexThread(void * thread_data_in) while(!done) { - - paramPlaceID = PGint32(place_id); + paramPlaceID = PGint64(place_id); paramValues[0] = (char *)¶mPlaceID; paramLengths[0] = sizeof(paramPlaceID); paramFormats[0] = 1; @@ -363,7 +361,7 @@ void *nominatim_indexThread(void * thread_data_in) { if (strncmp(PQerrorMessage(thread_data->conn), "ERROR: deadlock detected", 25)) { - fprintf(stderr, "index_placex: UPDATE failed - deadlock, retrying\n"); + fprintf(stderr, "index_placex: UPDATE failed - deadlock, retrying (%ld)\n", place_id); PQclear(res); sleep(rand() % 10); } @@ -377,7 +375,7 @@ void *nominatim_indexThread(void * thread_data_in) } } PQclear(res); - if (difftime(time(0), updateStartTime) > 1) fprintf(stderr, " Slow place_id %d\n", place_id); + if (difftime(time(0), updateStartTime) > 1) fprintf(stderr, " Slow place_id %ld\n", place_id); if (thread_data->writer) { diff --git a/sql/functions.sql b/sql/functions.sql index c6b76ad1..5ce5d5dd 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -537,7 +537,7 @@ END; $$ LANGUAGE plpgsql IMMUTABLE; -CREATE OR REPLACE FUNCTION delete_location(OLD_place_id INTEGER) RETURNS BOOLEAN +CREATE OR REPLACE FUNCTION delete_location(OLD_place_id BIGINT) RETURNS BOOLEAN AS $$ DECLARE BEGIN @@ -549,7 +549,7 @@ $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION add_location( - place_id INTEGER, + place_id BIGINT, country_code varchar(2), partition INTEGER, keywords INTEGER[], @@ -654,7 +654,7 @@ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION update_location( partition INTEGER, - place_id INTEGER, + place_id BIGINT, place_country_code varchar(2), name hstore, rank_search INTEGER, @@ -673,7 +673,7 @@ END; $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION search_name_add_words(parent_place_id INTEGER, to_add INTEGER[]) +CREATE OR REPLACE FUNCTION search_name_add_words(parent_place_id BIGINT, to_add INTEGER[]) RETURNS BOOLEAN AS $$ DECLARE @@ -704,7 +704,7 @@ END; $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION update_location_nameonly(partition INTEGER, OLD_place_id INTEGER, name hstore) RETURNS BOOLEAN +CREATE OR REPLACE FUNCTION update_location_nameonly(partition INTEGER, OLD_place_id BIGINT, name hstore) RETURNS BOOLEAN AS $$ DECLARE newkeywords INTEGER[]; @@ -753,7 +753,7 @@ DECLARE originalnumberrange INTEGER; housenum INTEGER; linegeo GEOMETRY; - search_place_id INTEGER; + search_place_id BIGINT; defpostalcode TEXT; havefirstpoint BOOLEAN; @@ -1176,7 +1176,7 @@ DECLARE search_maxrank INTEGER; address_maxrank INTEGER; address_street_word_id INTEGER; - parent_place_id_rank INTEGER; + parent_place_id_rank BIGINT; isin TEXT[]; isin_tokens INT[]; @@ -1450,7 +1450,7 @@ BEGIN -- Process area matches location_rank_search := 100; location_distance := 0; ---RAISE WARNING ' getNearFeatures(%,%,%,%)',NEW.partition, place_centroid, search_maxrank, isin_tokens; +--RAISE WARNING ' getNearFeatures(%,''%'',%,''%'')',NEW.partition, place_centroid, search_maxrank, isin_tokens; FOR location IN SELECT * from getNearFeatures(NEW.partition, place_centroid, search_maxrank, isin_tokens) LOOP --RAISE WARNING ' AREA: %',location; @@ -1460,13 +1460,15 @@ BEGIN location_distance := location.distance * 1.5; END IF; - IF location.distance < location_distance THEN + IF location.distance < location_distance OR NOT location.isguess THEN -- Add it to the list of search terms nameaddress_vector := array_merge(nameaddress_vector, location.keywords::integer[]); INSERT INTO place_addressline VALUES (NEW.place_id, location.place_id, true, NOT address_havelevel[location.rank_address], location.distance, location.rank_address); address_havelevel[location.rank_address] := true; +--RAISE WARNING ' Terms: (%) %',location, nameaddress_vector; + IF location.rank_address > parent_place_id_rank THEN NEW.parent_place_id = location.place_id; parent_place_id_rank = location.rank_address; @@ -1586,7 +1588,7 @@ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION place_delete() RETURNS TRIGGER AS $$ DECLARE - placeid INTEGER; + placeid BIGINT; BEGIN -- RAISE WARNING 'delete: % % % %',OLD.osm_type,OLD.osm_id,OLD.class,OLD.type; @@ -1614,7 +1616,7 @@ DECLARE existing RECORD; existingplacex RECORD; existinggeometry GEOMETRY; - existingplace_id INTEGER; + existingplace_id BIGINT; result BOOLEAN; partition INTEGER; BEGIN @@ -1653,11 +1655,11 @@ BEGIN select * from placex where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class and type = NEW.type INTO existingplacex; -- Handle a place changing type by removing the old data - -- My generated 'place' types are causing havok because they overlap with real tags + -- My generated 'place' types are causing havok because they overlap with real keys -- TODO: move them to their own special purpose key/class to avoid collisions --- IF existing.osm_type IS NULL AND (NEW.type not in ('postcode','house','houses')) THEN --- DELETE FROM place where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class and type not in ('postcode','house','houses'); --- END IF; + IF existing.osm_type IS NULL AND (NEW.type not in ('postcode','house','houses')) THEN + DELETE FROM place where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class and type not in ('postcode','house','houses'); + END IF; -- RAISE WARNING 'Existing: %',existing.place_id; @@ -1905,7 +1907,7 @@ END; $$ LANGUAGE plpgsql IMMUTABLE; -CREATE OR REPLACE FUNCTION get_address_postcode(for_place_id INTEGER) RETURNS TEXT +CREATE OR REPLACE FUNCTION get_address_postcode(for_place_id BIGINT) RETURNS TEXT AS $$ DECLARE result TEXT[]; @@ -1944,7 +1946,7 @@ END; $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION get_address_by_language(for_place_id INTEGER, languagepref TEXT[]) RETURNS TEXT +CREATE OR REPLACE FUNCTION get_address_by_language(for_place_id BIGINT, languagepref TEXT[]) RETURNS TEXT AS $$ DECLARE result TEXT[]; @@ -1971,7 +1973,7 @@ LANGUAGE plpgsql; DROP TYPE addressline CASCADE; create type addressline as ( - place_id INTEGER, + place_id BIGINT, osm_type CHAR(1), osm_id INTEGER, name HSTORE, @@ -1984,10 +1986,10 @@ create type addressline as ( distance FLOAT ); -CREATE OR REPLACE FUNCTION get_addressdata(in_place_id INTEGER) RETURNS setof addressline +CREATE OR REPLACE FUNCTION get_addressdata(in_place_id BIGINT) RETURNS setof addressline AS $$ DECLARE - for_place_id INTEGER; + for_place_id BIGINT; result TEXT[]; search TEXT[]; found INTEGER; @@ -2135,7 +2137,7 @@ END; $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION get_place_boundingbox(search_place_id INTEGER) RETURNS place_boundingbox +CREATE OR REPLACE FUNCTION get_place_boundingbox(search_place_id BIGINT) RETURNS place_boundingbox AS $$ DECLARE result place_boundingbox; @@ -2172,7 +2174,7 @@ $$ LANGUAGE plpgsql; -- don't do the operation if it would be slow -CREATE OR REPLACE FUNCTION get_place_boundingbox_quick(search_place_id INTEGER) RETURNS place_boundingbox +CREATE OR REPLACE FUNCTION get_place_boundingbox_quick(search_place_id BIGINT) RETURNS place_boundingbox AS $$ DECLARE result place_boundingbox; @@ -2211,7 +2213,7 @@ END; $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION update_place(search_place_id INTEGER) RETURNS BOOLEAN +CREATE OR REPLACE FUNCTION update_place(search_place_id BIGINT) RETURNS BOOLEAN AS $$ DECLARE result place_boundingbox; @@ -2384,7 +2386,7 @@ DECLARE rangestartnumber INTEGER; place_centroid GEOMETRY; partition INTEGER; - parent_place_id INTEGER; + parent_place_id BIGINT; location RECORD; address_street_word_id INTEGER; @@ -2464,32 +2466,41 @@ DECLARE newpoints INTEGER; place_centroid GEOMETRY; partition INTEGER; - parent_place_id INTEGER; + out_parent_place_id BIGINT; location RECORD; address_street_word_id INTEGER; + out_postcode TEXT; BEGIN place_centroid := ST_Centroid(pointgeo); partition := get_partition(place_centroid, in_countrycode); - parent_place_id := null; + out_parent_place_id := null; address_street_word_id := get_name_id(make_standard_name(in_street)); IF address_street_word_id IS NOT NULL THEN FOR location IN SELECT * from getNearestNamedRoadFeature(partition, place_centroid, address_street_word_id) LOOP - parent_place_id := location.place_id; + out_parent_place_id := location.place_id; END LOOP; END IF; - IF parent_place_id IS NULL THEN + IF out_parent_place_id IS NULL THEN FOR location IN SELECT place_id FROM getNearestRoadFeature(partition, place_centroid) LOOP - parent_place_id := location.place_id; + out_parent_place_id := location.place_id; END LOOP; END IF; + out_postcode := in_postcode; + IF out_postcode IS NULL THEN + SELECT postcode from placex where place_id = out_parent_place_id INTO out_postcode; + END IF; + IF out_postcode IS NULL THEN + out_postcode := getNearestPostcode(partition, place_centroid); + END IF; + newpoints := 0; insert into location_property_aux (place_id, partition, parent_place_id, housenumber, postcode, centroid) - values (nextval('seq_place'), partition, parent_place_id, in_housenumber, in_postcode, place_centroid); + values (nextval('seq_place'), partition, out_parent_place_id, in_housenumber, out_postcode, place_centroid); newpoints := newpoints + 1; RETURN newpoints; diff --git a/sql/partitions.src.sql b/sql/partitions.src.sql index e31eadff..f710cfb1 100644 --- a/sql/partitions.src.sql +++ b/sql/partitions.src.sql @@ -1,13 +1,16 @@ +drop type nearplace cascade; create type nearplace as ( - place_id integer + place_id BIGINT ); +drop type nearfeature cascade; create type nearfeature as ( - place_id integer, + place_id BIGINT, keywords int[], rank_address integer, rank_search integer, - distance float + distance float, + isguess boolean ); CREATE TABLE location_area_country () INHERITS (location_area_large); @@ -39,7 +42,7 @@ CREATE INDEX idx_location_property_-partition-_housenumber_parent_place_id ON lo CREATE TABLE location_road_-partition- ( partition integer, - place_id INTEGER, + place_id BIGINT, country_code VARCHAR(2) ); SELECT AddGeometryColumn('location_road_-partition-', 'geometry', 4326, 'GEOMETRY', 2); @@ -56,7 +59,7 @@ BEGIN -- start IF in_partition = -partition- THEN FOR r IN - SELECT place_id, keywords, rank_address, rank_search, min(ST_Distance(point, centroid)) as distance FROM ( + SELECT place_id, keywords, rank_address, rank_search, min(ST_Distance(point, centroid)) as distance, isguess FROM ( SELECT * FROM location_area_large_-partition- WHERE ST_Contains(geometry, point) and rank_search < maxrank UNION ALL SELECT * FROM location_area_country WHERE ST_Contains(geometry, point) and rank_search < maxrank @@ -81,7 +84,7 @@ END $$ LANGUAGE plpgsql; -create or replace function deleteLocationArea(in_partition INTEGER, in_place_id integer) RETURNS BOOLEAN AS $$ +create or replace function deleteLocationArea(in_partition INTEGER, in_place_id BIGINT) RETURNS BOOLEAN AS $$ DECLARE BEGIN @@ -100,7 +103,7 @@ $$ LANGUAGE plpgsql; create or replace function insertLocationAreaLarge( - in_partition INTEGER, in_place_id integer, in_country_code VARCHAR(2), in_keywords INTEGER[], + in_partition INTEGER, in_place_id BIGINT, in_country_code VARCHAR(2), in_keywords INTEGER[], in_rank_search INTEGER, in_rank_address INTEGER, in_estimate BOOLEAN, in_centroid GEOMETRY, in_geometry GEOMETRY) RETURNS BOOLEAN AS $$ DECLARE @@ -133,13 +136,13 @@ BEGIN IF in_partition = -partition- THEN FOR r IN SELECT place_id, name_vector, address_rank, search_rank, - ST_Distance(centroid, point) as distance + ST_Distance(centroid, point) as distance, null as isguess FROM search_name_-partition- WHERE name_vector @> ARRAY[isin_token] AND search_rank < maxrank UNION ALL SELECT place_id, name_vector, address_rank, search_rank, - ST_Distance(centroid, point) as distance + ST_Distance(centroid, point) as distance, null as isguess FROM search_name_country WHERE name_vector @> ARRAY[isin_token] AND search_rank < maxrank @@ -166,7 +169,7 @@ BEGIN IF in_partition = -partition- THEN FOR r IN SELECT place_id, name_vector, address_rank, search_rank, - ST_Distance(centroid, point) as distance + ST_Distance(centroid, point) as distance, null as isguess FROM search_name_-partition- WHERE name_vector @> ARRAY[isin_token] AND ST_DWithin(centroid, point, 0.01) @@ -184,8 +187,31 @@ END $$ LANGUAGE plpgsql; +create or replace function getNearestPostcode(in_partition INTEGER, point GEOMETRY) + RETURNS TEXT AS $$ +DECLARE + out_postcode TEXT; +BEGIN + +-- start + IF in_partition = -partition- THEN + SELECT postcode + FROM location_area_large_-partition- join placex using (place_id) + WHERE st_contains(location_area_large_-partition-.geometry, point) + AND class = 'place' and type = 'postcode' + ORDER BY st_distance(location_area_large_-partition-.centroid, point) ASC limit 1 + INTO out_postcode; + RETURN out_postcode; + END IF; +-- end + + RAISE EXCEPTION 'Unknown partition %', in_partition; +END +$$ +LANGUAGE plpgsql; + create or replace function insertSearchName( - in_partition INTEGER, in_place_id integer, in_country_code VARCHAR(2), + in_partition INTEGER, in_place_id BIGINT, in_country_code VARCHAR(2), in_name_vector INTEGER[], in_nameaddress_vector INTEGER[], in_rank_search INTEGER, in_rank_address INTEGER, in_importance FLOAT, in_centroid GEOMETRY) RETURNS BOOLEAN AS $$ @@ -218,7 +244,7 @@ END $$ LANGUAGE plpgsql; -create or replace function deleteSearchName(in_partition INTEGER, in_place_id integer) RETURNS BOOLEAN AS $$ +create or replace function deleteSearchName(in_partition INTEGER, in_place_id BIGINT) RETURNS BOOLEAN AS $$ DECLARE BEGIN @@ -240,7 +266,7 @@ $$ LANGUAGE plpgsql; create or replace function insertLocationRoad( - in_partition INTEGER, in_place_id integer, in_country_code VARCHAR(2), in_geometry GEOMETRY) RETURNS BOOLEAN AS $$ + in_partition INTEGER, in_place_id BIGINT, in_country_code VARCHAR(2), in_geometry GEOMETRY) RETURNS BOOLEAN AS $$ DECLARE BEGIN @@ -258,7 +284,7 @@ END $$ LANGUAGE plpgsql; -create or replace function deleteRoad(in_partition INTEGER, in_place_id integer) RETURNS BOOLEAN AS $$ +create or replace function deleteRoad(in_partition INTEGER, in_place_id BIGINT) RETURNS BOOLEAN AS $$ DECLARE BEGIN @@ -288,7 +314,7 @@ BEGIN WHILE search_diameter < 0.1 LOOP FOR r IN SELECT place_id, null, null, null, - ST_Distance(geometry, point) as distance + ST_Distance(geometry, point) as distance, null as isguess FROM location_road_-partition- WHERE ST_DWithin(geometry, point, search_diameter) ORDER BY distance ASC limit 1 @@ -330,7 +356,7 @@ BEGIN WHILE search_diameter < 0.01 LOOP FOR r IN SELECT place_id, null, null, null, - ST_Distance(geometry, line) as distance + ST_Distance(geometry, line) as distance, null as isguess FROM location_road_-partition- WHERE ST_DWithin(line, geometry, search_diameter) ORDER BY (ST_distance(geometry, p1)+ diff --git a/sql/tables-minimal.sql b/sql/tables-minimal.sql index eb529252..3dad8aa5 100644 --- a/sql/tables-minimal.sql +++ b/sql/tables-minimal.sql @@ -29,9 +29,9 @@ CREATE SEQUENCE seq_word start 1; drop table IF EXISTS location_property CASCADE; CREATE TABLE location_property ( - place_id INTEGER, + place_id BIGINT, partition integer, - parent_place_id INTEGER, + parent_place_id BIINT, housenumber TEXT, postcode TEXT ); @@ -49,7 +49,7 @@ CREATE INDEX idx_location_property_tiger_housenumber_parent_place_id ON location drop table IF EXISTS search_name_blank CASCADE; CREATE TABLE search_name_blank ( - place_id INTEGER, + place_id BIGINT, search_rank integer, address_rank integer, importance FLOAT, @@ -68,8 +68,8 @@ CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id); drop table IF EXISTS place_addressline; CREATE TABLE place_addressline ( - place_id INTEGER, - address_place_id INTEGER, + place_id BIGINT, + address_place_id BIGINT, fromarea boolean, isaddress boolean, distance float, @@ -80,7 +80,7 @@ CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING B drop table IF EXISTS place_boundingbox CASCADE; CREATE TABLE place_boundingbox ( - place_id INTEGER, + place_id BIGINT, minlat float, maxlat float, minlon float, @@ -108,7 +108,7 @@ CREATE INDEX idx_country_geometry ON country USING GIST (geometry); drop table placex; CREATE TABLE placex ( - place_id INTEGER NOT NULL, + place_id BIGINT NOT NULL, partition integer, osm_type char(1), osm_id INTEGER, @@ -122,8 +122,8 @@ CREATE TABLE placex ( postcode TEXT, country_code varchar(2), extratags HSTORE, - parent_place_id INTEGER, - linked_place_id INTEGER, + parent_place_id BIGINT, + linked_place_id BIGINT, rank_address INTEGER, rank_search INTEGER, importance FLOAT, diff --git a/sql/tables.sql b/sql/tables.sql index 1abf530a..5d9c60b2 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -87,7 +87,7 @@ CREATE SEQUENCE seq_word start 1; drop table IF EXISTS location_area CASCADE; CREATE TABLE location_area ( partition integer, - place_id INTEGER, + place_id BIGINT, country_code VARCHAR(2), keywords INTEGER[], rank_search INTEGER NOT NULL, @@ -103,9 +103,9 @@ CREATE TABLE location_area_roadfar () INHERITS (location_area); drop table IF EXISTS location_property CASCADE; CREATE TABLE location_property ( - place_id INTEGER, + place_id BIGINT, partition integer, - parent_place_id INTEGER, + parent_place_id BIGINT, housenumber TEXT, postcode TEXT ); @@ -123,7 +123,7 @@ CREATE INDEX idx_location_property_tiger_housenumber_parent_place_id ON location drop table IF EXISTS search_name_blank CASCADE; CREATE TABLE search_name_blank ( - place_id INTEGER, + place_id BIGINT, search_rank integer, address_rank integer, importance FLOAT, @@ -142,8 +142,8 @@ CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id); drop table IF EXISTS place_addressline; CREATE TABLE place_addressline ( - place_id INTEGER, - address_place_id INTEGER, + place_id BIGINT, + address_place_id BIGINT, fromarea boolean, isaddress boolean, distance float, @@ -154,7 +154,7 @@ CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING B drop table IF EXISTS place_boundingbox CASCADE; CREATE TABLE place_boundingbox ( - place_id INTEGER, + place_id BIGINT, minlat float, maxlat float, minlon float, @@ -172,7 +172,7 @@ drop table IF EXISTS reverse_cache; CREATE TABLE reverse_cache ( latlonzoomid integer, country_code varchar(2), - place_id INTEGER + place_id BIGINT ); GRANT SELECT on reverse_cache to "www-data" ; GRANT INSERT on reverse_cache to "www-data" ; @@ -192,7 +192,7 @@ CREATE INDEX idx_country_geometry ON country USING GIST (geometry); drop table placex; CREATE TABLE placex ( - place_id INTEGER NOT NULL, + place_id BIGINT NOT NULL, partition integer, osm_type char(1), osm_id INTEGER, @@ -206,8 +206,8 @@ CREATE TABLE placex ( postcode TEXT, country_code varchar(2), extratags HSTORE, - parent_place_id INTEGER, - linked_place_id INTEGER, + parent_place_id BIGINT, + linked_place_id BIGINT, rank_address INTEGER, rank_search INTEGER, importance FLOAT, diff --git a/utils/setup.php b/utils/setup.php index 5a54ef77..6ef22b2d 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -424,7 +424,7 @@ 2 => STDERR ); $ahPipes = null; - $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes); + $hProcess = @proc_open($sCMD, $aDescriptors, $ahPipes); if (!is_resource($hProcess)) fail('unable to start pgsql'); while(strlen($sScript))