mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-10-27 11:42:46 +03:00
Merge pull request #1693 from lonvia/reorganize-addressline-computation
Reorganize addressline computation
This commit is contained in:
commit
65df218f91
@ -252,11 +252,172 @@ END;
|
|||||||
$$
|
$$
|
||||||
LANGUAGE plpgsql STABLE;
|
LANGUAGE plpgsql STABLE;
|
||||||
|
|
||||||
|
|
||||||
|
-- Insert address of a place into the place_addressline table.
|
||||||
|
--
|
||||||
|
-- \param obj_place_id Place_id of the place to compute the address for.
|
||||||
|
-- \param partition Partition number where the place is in.
|
||||||
|
-- \param maxrank Rank of the place. All address features must have
|
||||||
|
-- a search rank lower than the given rank.
|
||||||
|
-- \param address Address terms for the place.
|
||||||
|
-- \param geoemtry Geometry to which the address objects should be close.
|
||||||
|
--
|
||||||
|
-- \retval parent_place_id Place_id of the address object that is the direct
|
||||||
|
-- ancestor.
|
||||||
|
-- \retval postcode Postcode computed from the address. This is the
|
||||||
|
-- addr:postcode of one of the address objects. If
|
||||||
|
-- more than one of has a postcode, the highest ranking
|
||||||
|
-- one is used. May be NULL.
|
||||||
|
-- \retval nameaddress_vector Search terms for the address. This is the sum
|
||||||
|
-- of name terms of all address objects.
|
||||||
|
CREATE OR REPLACE FUNCTION insert_addresslines(obj_place_id BIGINT,
|
||||||
|
partition SMALLINT,
|
||||||
|
maxrank SMALLINT,
|
||||||
|
address HSTORE,
|
||||||
|
geometry GEOMETRY,
|
||||||
|
OUT parent_place_id BIGINT,
|
||||||
|
OUT postcode TEXT,
|
||||||
|
OUT nameaddress_vector INT[])
|
||||||
|
AS $$
|
||||||
|
DECLARE
|
||||||
|
current_rank_address INTEGER := 0;
|
||||||
|
location_distance FLOAT := 0;
|
||||||
|
location_parent GEOMETRY := NULL;
|
||||||
|
parent_place_id_rank SMALLINT := 0;
|
||||||
|
|
||||||
|
location_isaddress BOOLEAN;
|
||||||
|
|
||||||
|
address_havelevel BOOLEAN[];
|
||||||
|
location_keywords INT[];
|
||||||
|
|
||||||
|
location RECORD;
|
||||||
|
addr_item RECORD;
|
||||||
|
|
||||||
|
isin_tokens INT[];
|
||||||
|
isin TEXT[];
|
||||||
|
BEGIN
|
||||||
|
parent_place_id := 0;
|
||||||
|
nameaddress_vector := '{}'::int[];
|
||||||
|
isin_tokens := '{}'::int[];
|
||||||
|
|
||||||
|
---- convert address store to array of tokenids
|
||||||
|
IF address IS NOT NULL THEN
|
||||||
|
FOR addr_item IN SELECT * FROM each(address)
|
||||||
|
LOOP
|
||||||
|
IF addr_item.key IN ('city', 'tiger:county', 'state', 'suburb', 'province',
|
||||||
|
'district', 'region', 'county', 'municipality',
|
||||||
|
'hamlet', 'village', 'subdistrict', 'town',
|
||||||
|
'neighbourhood', 'quarter', 'parish')
|
||||||
|
THEN
|
||||||
|
isin_tokens := array_merge(isin_tokens,
|
||||||
|
word_ids_from_name(addr_item.value));
|
||||||
|
IF NOT %REVERSE-ONLY% THEN
|
||||||
|
nameaddress_vector := array_merge(nameaddress_vector,
|
||||||
|
addr_ids_from_name(addr_item.value));
|
||||||
|
END IF;
|
||||||
|
END IF;
|
||||||
|
END LOOP;
|
||||||
|
|
||||||
|
IF address ? 'is_in' THEN
|
||||||
|
-- is_in items need splitting
|
||||||
|
isin := regexp_split_to_array(address->'is_in', E'[;,]');
|
||||||
|
IF array_upper(isin, 1) IS NOT NULL THEN
|
||||||
|
FOR i IN 1..array_upper(isin, 1) LOOP
|
||||||
|
isin_tokens := array_merge(isin_tokens,
|
||||||
|
word_ids_from_name(isin[i]));
|
||||||
|
|
||||||
|
-- merge word into address vector
|
||||||
|
IF NOT %REVERSE-ONLY% THEN
|
||||||
|
nameaddress_vector := array_merge(nameaddress_vector,
|
||||||
|
addr_ids_from_name(isin[i]));
|
||||||
|
END IF;
|
||||||
|
END LOOP;
|
||||||
|
END IF;
|
||||||
|
END IF;
|
||||||
|
END IF;
|
||||||
|
IF NOT %REVERSE-ONLY% THEN
|
||||||
|
nameaddress_vector := array_merge(nameaddress_vector, isin_tokens);
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
---- now compute the address terms
|
||||||
|
FOR i IN 1..28 LOOP
|
||||||
|
address_havelevel[i] := false;
|
||||||
|
END LOOP;
|
||||||
|
|
||||||
|
FOR location IN
|
||||||
|
SELECT * FROM getNearFeatures(partition, geometry, maxrank, isin_tokens)
|
||||||
|
LOOP
|
||||||
|
IF location.rank_address != current_rank_address THEN
|
||||||
|
current_rank_address := location.rank_address;
|
||||||
|
IF location.isguess THEN
|
||||||
|
location_distance := location.distance * 1.5;
|
||||||
|
ELSE
|
||||||
|
IF location.rank_address <= 12 THEN
|
||||||
|
-- for county and above, if we have an area consider that exact
|
||||||
|
-- (It would be nice to relax the constraint for places close to
|
||||||
|
-- the boundary but we'd need the exact geometry for that. Too
|
||||||
|
-- expensive.)
|
||||||
|
location_distance = 0;
|
||||||
|
ELSE
|
||||||
|
-- Below county level remain slightly fuzzy.
|
||||||
|
location_distance := location.distance * 0.5;
|
||||||
|
END IF;
|
||||||
|
END IF;
|
||||||
|
ELSE
|
||||||
|
CONTINUE WHEN location.keywords <@ location_keywords;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
IF location.distance < location_distance OR NOT location.isguess THEN
|
||||||
|
location_keywords := location.keywords;
|
||||||
|
|
||||||
|
location_isaddress := NOT address_havelevel[location.rank_address];
|
||||||
|
IF location_isaddress AND location.isguess AND location_parent IS NOT NULL THEN
|
||||||
|
location_isaddress := ST_Contains(location_parent, location.centroid);
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- RAISE WARNING '% isaddress: %', location.place_id, location_isaddress;
|
||||||
|
-- Add it to the list of search terms
|
||||||
|
IF NOT %REVERSE-ONLY% THEN
|
||||||
|
nameaddress_vector := array_merge(nameaddress_vector,
|
||||||
|
location.keywords::integer[]);
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
INSERT INTO place_addressline (place_id, address_place_id, fromarea,
|
||||||
|
isaddress, distance, cached_rank_address)
|
||||||
|
VALUES (obj_place_id, location.place_id, true,
|
||||||
|
location_isaddress, location.distance, location.rank_address);
|
||||||
|
|
||||||
|
IF location_isaddress THEN
|
||||||
|
-- add postcode if we have one
|
||||||
|
-- (If multiple postcodes are available, we end up with the highest ranking one.)
|
||||||
|
IF location.postcode is not null THEN
|
||||||
|
postcode = location.postcode;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
address_havelevel[location.rank_address] := true;
|
||||||
|
IF NOT location.isguess THEN
|
||||||
|
SELECT placex.geometry FROM placex
|
||||||
|
WHERE obj_place_id = location.place_id INTO location_parent;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
IF location.rank_address > parent_place_id_rank THEN
|
||||||
|
parent_place_id = location.place_id;
|
||||||
|
parent_place_id_rank = location.rank_address;
|
||||||
|
END IF;
|
||||||
|
END IF;
|
||||||
|
--DEBUG: RAISE WARNING ' Terms: (%) %',location, nameaddress_vector;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
END LOOP;
|
||||||
|
END;
|
||||||
|
$$
|
||||||
|
LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION placex_insert()
|
CREATE OR REPLACE FUNCTION placex_insert()
|
||||||
RETURNS TRIGGER
|
RETURNS TRIGGER
|
||||||
AS $$
|
AS $$
|
||||||
DECLARE
|
DECLARE
|
||||||
i INTEGER;
|
|
||||||
postcode TEXT;
|
postcode TEXT;
|
||||||
result BOOLEAN;
|
result BOOLEAN;
|
||||||
is_area BOOLEAN;
|
is_area BOOLEAN;
|
||||||
@ -428,33 +589,13 @@ CREATE OR REPLACE FUNCTION placex_update()
|
|||||||
RETURNS TRIGGER
|
RETURNS TRIGGER
|
||||||
AS $$
|
AS $$
|
||||||
DECLARE
|
DECLARE
|
||||||
search_maxdistance FLOAT[];
|
|
||||||
search_mindistance FLOAT[];
|
|
||||||
address_havelevel BOOLEAN[];
|
|
||||||
|
|
||||||
i INTEGER;
|
i INTEGER;
|
||||||
location RECORD;
|
location RECORD;
|
||||||
relation_members TEXT[];
|
relation_members TEXT[];
|
||||||
addr_item RECORD;
|
|
||||||
search_diameter FLOAT;
|
|
||||||
search_prevdiameter FLOAT;
|
|
||||||
search_maxrank INTEGER;
|
|
||||||
address_maxrank INTEGER;
|
|
||||||
address_street_word_ids INTEGER[];
|
|
||||||
parent_place_id_rank BIGINT;
|
|
||||||
|
|
||||||
addr_street TEXT;
|
addr_street TEXT;
|
||||||
addr_place TEXT;
|
addr_place TEXT;
|
||||||
|
|
||||||
isin TEXT[];
|
|
||||||
isin_tokens INT[];
|
|
||||||
|
|
||||||
location_rank_search INTEGER;
|
|
||||||
location_distance FLOAT;
|
|
||||||
location_parent GEOMETRY;
|
|
||||||
location_isaddress BOOLEAN;
|
|
||||||
location_keywords INTEGER[];
|
|
||||||
|
|
||||||
name_vector INTEGER[];
|
name_vector INTEGER[];
|
||||||
nameaddress_vector INTEGER[];
|
nameaddress_vector INTEGER[];
|
||||||
|
|
||||||
@ -711,13 +852,9 @@ BEGIN
|
|||||||
END IF;
|
END IF;
|
||||||
END IF;
|
END IF;
|
||||||
|
|
||||||
-- What level are we searching from
|
|
||||||
search_maxrank := NEW.rank_search;
|
|
||||||
|
|
||||||
-- Initialise the name vector using our name
|
-- Initialise the name vector using our name
|
||||||
NEW.name := add_default_place_name(NEW.country_code, NEW.name);
|
NEW.name := add_default_place_name(NEW.country_code, NEW.name);
|
||||||
name_vector := make_keywords(NEW.name);
|
name_vector := make_keywords(NEW.name);
|
||||||
nameaddress_vector := '{}'::int[];
|
|
||||||
|
|
||||||
-- make sure all names are in the word table
|
-- make sure all names are in the word table
|
||||||
IF NEW.admin_level = 2
|
IF NEW.admin_level = 2
|
||||||
@ -728,142 +865,14 @@ BEGIN
|
|||||||
--DEBUG: RAISE WARNING 'Country names updated';
|
--DEBUG: RAISE WARNING 'Country names updated';
|
||||||
END IF;
|
END IF;
|
||||||
|
|
||||||
FOR i IN 1..28 LOOP
|
SELECT * FROM insert_addresslines(NEW.place_id, NEW.partition,
|
||||||
address_havelevel[i] := false;
|
NEW.rank_search, NEW.address,
|
||||||
END LOOP;
|
CASE WHEN NEW.rank_search >= 26
|
||||||
|
|
||||||
NEW.parent_place_id = 0;
|
|
||||||
parent_place_id_rank = 0;
|
|
||||||
|
|
||||||
|
|
||||||
-- convert address store to array of tokenids
|
|
||||||
--DEBUG: RAISE WARNING 'Starting address search';
|
|
||||||
isin_tokens := '{}'::int[];
|
|
||||||
IF NEW.address IS NOT NULL THEN
|
|
||||||
FOR addr_item IN SELECT * FROM each(NEW.address)
|
|
||||||
LOOP
|
|
||||||
IF addr_item.key IN ('city', 'tiger:county', 'state', 'suburb', 'province',
|
|
||||||
'district', 'region', 'county', 'municipality',
|
|
||||||
'hamlet', 'village', 'subdistrict', 'town',
|
|
||||||
'neighbourhood', 'quarter', 'parish')
|
|
||||||
THEN
|
|
||||||
address_street_word_ids := word_ids_from_name(addr_item.value);
|
|
||||||
IF address_street_word_ids is not null THEN
|
|
||||||
isin_tokens := array_merge(isin_tokens, address_street_word_ids);
|
|
||||||
END IF;
|
|
||||||
IF NOT %REVERSE-ONLY% THEN
|
|
||||||
address_street_word_ids := addr_ids_from_name(addr_item.value);
|
|
||||||
IF address_street_word_ids is not null THEN
|
|
||||||
nameaddress_vector := array_merge(nameaddress_vector,
|
|
||||||
address_street_word_ids);
|
|
||||||
END IF;
|
|
||||||
END IF;
|
|
||||||
END IF;
|
|
||||||
IF addr_item.key = 'is_in' THEN
|
|
||||||
-- is_in items need splitting
|
|
||||||
isin := regexp_split_to_array(addr_item.value, E'[;,]');
|
|
||||||
IF array_upper(isin, 1) IS NOT NULL THEN
|
|
||||||
FOR i IN 1..array_upper(isin, 1) LOOP
|
|
||||||
address_street_word_ids := word_ids_from_name(isin[i]);
|
|
||||||
IF address_street_word_ids is not null THEN
|
|
||||||
isin_tokens := array_merge(isin_tokens, address_street_word_ids);
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
-- merge word into address vector
|
|
||||||
IF NOT %REVERSE-ONLY% THEN
|
|
||||||
address_street_word_ids := addr_ids_from_name(isin[i]);
|
|
||||||
IF address_street_word_ids is not null THEN
|
|
||||||
nameaddress_vector := array_merge(nameaddress_vector,
|
|
||||||
address_street_word_ids);
|
|
||||||
END IF;
|
|
||||||
END IF;
|
|
||||||
END LOOP;
|
|
||||||
END IF;
|
|
||||||
END IF;
|
|
||||||
END LOOP;
|
|
||||||
END IF;
|
|
||||||
IF NOT %REVERSE-ONLY% THEN
|
|
||||||
nameaddress_vector := array_merge(nameaddress_vector, isin_tokens);
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
-- RAISE WARNING 'ISIN: %', isin_tokens;
|
|
||||||
|
|
||||||
-- Process area matches
|
|
||||||
location_rank_search := 0;
|
|
||||||
location_distance := 0;
|
|
||||||
location_parent := NULL;
|
|
||||||
-- added ourself as address already
|
|
||||||
address_havelevel[NEW.rank_address] := true;
|
|
||||||
--DEBUG: RAISE WARNING ' getNearFeatures(%,''%'',%,''%'')',NEW.partition, NEW.centroid, search_maxrank, isin_tokens;
|
|
||||||
FOR location IN
|
|
||||||
SELECT * from getNearFeatures(NEW.partition,
|
|
||||||
CASE WHEN NEW.rank_search >= 26
|
|
||||||
AND NEW.rank_search < 30
|
AND NEW.rank_search < 30
|
||||||
THEN NEW.geometry
|
THEN NEW.geometry ELSE NEW.centroid END)
|
||||||
ELSE NEW.centroid END,
|
INTO NEW.parent_place_id, NEW.postcode, nameaddress_vector;
|
||||||
search_maxrank, isin_tokens)
|
|
||||||
LOOP
|
|
||||||
IF location.rank_address != location_rank_search THEN
|
|
||||||
location_rank_search := location.rank_address;
|
|
||||||
IF location.isguess THEN
|
|
||||||
location_distance := location.distance * 1.5;
|
|
||||||
ELSE
|
|
||||||
IF location.rank_address <= 12 THEN
|
|
||||||
-- for county and above, if we have an area consider that exact
|
|
||||||
-- (It would be nice to relax the constraint for places close to
|
|
||||||
-- the boundary but we'd need the exact geometry for that. Too
|
|
||||||
-- expensive.)
|
|
||||||
location_distance = 0;
|
|
||||||
ELSE
|
|
||||||
-- Below county level remain slightly fuzzy.
|
|
||||||
location_distance := location.distance * 0.5;
|
|
||||||
END IF;
|
|
||||||
END IF;
|
|
||||||
ELSE
|
|
||||||
CONTINUE WHEN location.keywords <@ location_keywords;
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
IF location.distance < location_distance OR NOT location.isguess THEN
|
--DEBUG: RAISE WARNING 'RETURN insert_addresslines: %, %, %', NEW.parent_place_id, NEW.postcode, nameaddress_vector;
|
||||||
location_keywords := location.keywords;
|
|
||||||
|
|
||||||
location_isaddress := NOT address_havelevel[location.rank_address];
|
|
||||||
IF location_isaddress AND location.isguess AND location_parent IS NOT NULL THEN
|
|
||||||
location_isaddress := ST_Contains(location_parent,location.centroid);
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
-- RAISE WARNING '% isaddress: %', location.place_id, location_isaddress;
|
|
||||||
-- Add it to the list of search terms
|
|
||||||
IF NOT %REVERSE-ONLY% THEN
|
|
||||||
nameaddress_vector := array_merge(nameaddress_vector, location.keywords::integer[]);
|
|
||||||
END IF;
|
|
||||||
INSERT INTO place_addressline (place_id, address_place_id, fromarea, isaddress, distance, cached_rank_address)
|
|
||||||
VALUES (NEW.place_id, location.place_id, true, location_isaddress, location.distance, location.rank_address);
|
|
||||||
|
|
||||||
IF location_isaddress THEN
|
|
||||||
-- add postcode if we have one
|
|
||||||
-- (If multiple postcodes are available, we end up with the highest ranking one.)
|
|
||||||
IF location.postcode is not null THEN
|
|
||||||
NEW.postcode = location.postcode;
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
address_havelevel[location.rank_address] := true;
|
|
||||||
IF NOT location.isguess THEN
|
|
||||||
SELECT geometry FROM placex WHERE place_id = location.place_id INTO location_parent;
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
IF location.rank_address > parent_place_id_rank THEN
|
|
||||||
NEW.parent_place_id = location.place_id;
|
|
||||||
parent_place_id_rank = location.rank_address;
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
--DEBUG: RAISE WARNING ' Terms: (%) %',location, nameaddress_vector;
|
|
||||||
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
END LOOP;
|
|
||||||
--DEBUG: RAISE WARNING 'address computed';
|
|
||||||
|
|
||||||
IF NEW.address is not null AND NEW.address ? 'postcode'
|
IF NEW.address is not null AND NEW.address ? 'postcode'
|
||||||
AND NEW.address->'postcode' not similar to '%(,|;)%' THEN
|
AND NEW.address->'postcode' not similar to '%(,|;)%' THEN
|
||||||
|
@ -27,6 +27,7 @@ Feature: Search queries
|
|||||||
| suburb | Eilbek |
|
| suburb | Eilbek |
|
||||||
| postcode | 22089 |
|
| postcode | 22089 |
|
||||||
| city_district | Wandsbek |
|
| city_district | Wandsbek |
|
||||||
|
| city | Hamburg |
|
||||||
| country | Deutschland |
|
| country | Deutschland |
|
||||||
| country_code | de |
|
| country_code | de |
|
||||||
|
|
||||||
@ -42,6 +43,7 @@ Feature: Search queries
|
|||||||
| suburb | Eilbek |
|
| suburb | Eilbek |
|
||||||
| postcode | 22089 |
|
| postcode | 22089 |
|
||||||
| city_district | Wandsbek |
|
| city_district | Wandsbek |
|
||||||
|
| city | Hamburg |
|
||||||
| country | Deutschland |
|
| country | Deutschland |
|
||||||
| country_code | de |
|
| country_code | de |
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user