mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-27 19:07:55 +03:00
Merge pull request #1024 from lonvia/reduce-address-search-terms
Reduce address search terms
This commit is contained in:
commit
080ba00956
@ -1234,6 +1234,7 @@ DECLARE
|
||||
relation_members TEXT[];
|
||||
relMember RECORD;
|
||||
linkedplacex RECORD;
|
||||
addr_item RECORD;
|
||||
search_diameter FLOAT;
|
||||
search_prevdiameter FLOAT;
|
||||
search_maxrank INTEGER;
|
||||
@ -1766,44 +1767,43 @@ BEGIN
|
||||
parent_place_id_rank = 0;
|
||||
|
||||
|
||||
-- convert isin to array of tokenids
|
||||
-- convert address store to array of tokenids
|
||||
--DEBUG: RAISE WARNING 'Starting address search';
|
||||
isin_tokens := '{}'::int[];
|
||||
IF NEW.address IS NOT NULL THEN
|
||||
isin := avals(NEW.address);
|
||||
IF array_upper(isin, 1) IS NOT NULL THEN
|
||||
FOR i IN 1..array_upper(isin, 1) LOOP
|
||||
-- TODO further split terms with comma and semicolon
|
||||
address_street_word_id := get_name_id(make_standard_name(isin[i]));
|
||||
FOR addr_item IN SELECT * FROM each(NEW.address)
|
||||
LOOP
|
||||
IF addr_item.key IN ('city', 'tiger:county', 'state', 'suburb', 'province', 'district', 'region', 'county', 'municipality', 'hamlet', 'village', 'subdistrict', 'town', 'neighbourhood', 'quarter', 'parish') THEN
|
||||
address_street_word_id := get_name_id(make_standard_name(addr_item.value));
|
||||
IF address_street_word_id IS NOT NULL AND NOT(ARRAY[address_street_word_id] <@ isin_tokens) THEN
|
||||
nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]);
|
||||
isin_tokens := isin_tokens || address_street_word_id;
|
||||
END IF;
|
||||
|
||||
-- merge word into address vector
|
||||
address_street_word_id := get_word_id(make_standard_name(isin[i]));
|
||||
address_street_word_id := get_word_id(make_standard_name(addr_item.value));
|
||||
IF address_street_word_id IS NOT NULL THEN
|
||||
nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]);
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
IF addr_item.key = 'is_in' THEN
|
||||
-- is_in items need splitting
|
||||
isin := regexp_split_to_array(addr_item.value, E'[;,]');
|
||||
IF array_upper(isin, 1) IS NOT NULL THEN
|
||||
FOR i IN 1..array_upper(isin, 1) LOOP
|
||||
address_street_word_id := get_name_id(make_standard_name(isin[i]));
|
||||
IF address_street_word_id IS NOT NULL AND NOT(ARRAY[address_street_word_id] <@ isin_tokens) THEN
|
||||
isin_tokens := isin_tokens || address_street_word_id;
|
||||
END IF;
|
||||
|
||||
-- %NOTIGERDATA% IF 0 THEN
|
||||
-- for the USA we have an additional address table. Merge in zip codes from there too
|
||||
IF NEW.rank_search = 26 AND NEW.country_code = 'us' THEN
|
||||
FOR location IN SELECT distinct postcode from location_property_tiger where parent_place_id = NEW.place_id LOOP
|
||||
address_street_word_id := get_name_id(make_standard_name(location.postcode));
|
||||
nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]);
|
||||
isin_tokens := isin_tokens || address_street_word_id;
|
||||
|
||||
-- also merge in the single word version
|
||||
address_street_word_id := get_word_id(make_standard_name(location.postcode));
|
||||
nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]);
|
||||
-- merge word into address vector
|
||||
address_street_word_id := get_word_id(make_standard_name(isin[i]));
|
||||
IF address_street_word_id IS NOT NULL THEN
|
||||
nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]);
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
--DEBUG: RAISE WARNING 'Tiger postcodes collected';
|
||||
-- %NOTIGERDATA% END IF;
|
||||
nameaddress_vector := array_merge(nameaddress_vector, isin_tokens);
|
||||
|
||||
-- RAISE WARNING 'ISIN: %', isin_tokens;
|
||||
|
||||
@ -1884,40 +1884,6 @@ BEGIN
|
||||
END LOOP;
|
||||
--DEBUG: RAISE WARNING 'address computed';
|
||||
|
||||
-- try using the isin value to find parent places
|
||||
IF array_upper(isin_tokens, 1) IS NOT NULL THEN
|
||||
FOR i IN 1..array_upper(isin_tokens, 1) LOOP
|
||||
--RAISE WARNING ' getNearestNamedFeature: % % % %',NEW.partition, place_centroid, search_maxrank, isin_tokens[i];
|
||||
IF NOT ARRAY[isin_tokens[i]] <@ nameaddress_vector THEN
|
||||
|
||||
FOR location IN SELECT * from getNearestNamedFeature(NEW.partition, place_centroid, search_maxrank, isin_tokens[i]) LOOP
|
||||
|
||||
--RAISE WARNING ' ISIN: %',location;
|
||||
|
||||
IF location.rank_search > 4 THEN
|
||||
nameaddress_vector := array_merge(nameaddress_vector, location.keywords::integer[]);
|
||||
INSERT INTO place_addressline (place_id, address_place_id, fromarea, isaddress, distance, cached_rank_address)
|
||||
VALUES (NEW.place_id, location.place_id, false, NOT address_havelevel[location.rank_address], location.distance, location.rank_address);
|
||||
IF NEW.postcode is null AND location.postcode is not null
|
||||
AND NOT address_havelevel[location.rank_address] THEN
|
||||
NEW.postcode := location.postcode;
|
||||
END IF;
|
||||
|
||||
address_havelevel[location.rank_address] := true;
|
||||
|
||||
IF location.rank_address > parent_place_id_rank THEN
|
||||
NEW.parent_place_id = location.place_id;
|
||||
parent_place_id_rank = location.rank_address;
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
|
||||
END IF;
|
||||
|
||||
END LOOP;
|
||||
END IF;
|
||||
--DEBUG: RAISE WARNING 'isin tokens processed';
|
||||
|
||||
-- for long ways we should add search terms for the entire length
|
||||
IF st_length(NEW.geometry) > 0.05 THEN
|
||||
|
||||
|
@ -19,8 +19,6 @@ CREATE INDEX idx_location_area_country_place_id ON location_area_country USING B
|
||||
|
||||
CREATE INDEX idx_osmline_parent_place_id ON location_property_osmline USING BTREE (parent_place_id) {ts:search-index};
|
||||
|
||||
CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid) {ts:address-index};
|
||||
|
||||
DROP INDEX IF EXISTS place_id_idx;
|
||||
CREATE UNIQUE INDEX idx_place_osm_unique on place using btree(osm_id,osm_type,class,type) {ts:address-index};
|
||||
|
||||
|
@ -84,38 +84,6 @@ END
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
create or replace function getNearestNamedFeature(in_partition INTEGER, point GEOMETRY, maxrank INTEGER, isin_token INTEGER) RETURNS setof nearfeature AS $$
|
||||
DECLARE
|
||||
r nearfeature%rowtype;
|
||||
BEGIN
|
||||
|
||||
-- start
|
||||
IF in_partition = -partition- THEN
|
||||
FOR r IN
|
||||
SELECT place_id, name_vector, address_rank, search_rank,
|
||||
ST_Distance(centroid, point) as distance, null as isguess
|
||||
FROM search_name_-partition-
|
||||
WHERE name_vector @> ARRAY[isin_token]
|
||||
AND search_rank < maxrank
|
||||
UNION ALL
|
||||
SELECT place_id, name_vector, address_rank, search_rank,
|
||||
ST_Distance(centroid, point) as distance, null as isguess
|
||||
FROM search_name_country
|
||||
WHERE name_vector @> ARRAY[isin_token]
|
||||
AND search_rank < maxrank
|
||||
ORDER BY distance ASC limit 1
|
||||
LOOP
|
||||
RETURN NEXT r;
|
||||
END LOOP;
|
||||
RETURN;
|
||||
END IF;
|
||||
-- end
|
||||
|
||||
RAISE EXCEPTION 'Unknown partition %', in_partition;
|
||||
END
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
create or replace function getNearestNamedRoadFeature(in_partition INTEGER, point GEOMETRY, isin_token INTEGER[])
|
||||
RETURNS setof nearfeature AS $$
|
||||
DECLARE
|
||||
@ -185,15 +153,6 @@ BEGIN
|
||||
INSERT INTO search_name (place_id, search_rank, address_rank, importance, country_code, name_vector, nameaddress_vector, centroid)
|
||||
values (in_place_id, in_rank_search, in_rank_address, in_importance, in_country_code, in_name_vector, in_nameaddress_vector, in_centroid);
|
||||
|
||||
IF in_rank_search <= 4 THEN
|
||||
DELETE FROM search_name_country WHERE place_id = in_place_id;
|
||||
IF in_rank_address > 0 THEN
|
||||
INSERT INTO search_name_country (place_id, search_rank, address_rank, name_vector, centroid)
|
||||
values (in_place_id, in_rank_search, in_rank_address, in_name_vector, in_geometry);
|
||||
END IF;
|
||||
RETURN TRUE;
|
||||
END IF;
|
||||
|
||||
-- start
|
||||
IF in_partition = -partition- THEN
|
||||
DELETE FROM search_name_-partition- values WHERE place_id = in_place_id;
|
||||
@ -216,7 +175,6 @@ DECLARE
|
||||
BEGIN
|
||||
|
||||
DELETE from search_name WHERE place_id = in_place_id;
|
||||
DELETE from search_name_country WHERE place_id = in_place_id;
|
||||
|
||||
-- start
|
||||
IF in_partition = -partition- THEN
|
||||
|
@ -38,10 +38,6 @@ CREATE TABLE search_name_blank (
|
||||
CREATE TABLE location_area_country () INHERITS (location_area_large) {ts:address-data};
|
||||
CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) {ts:address-index};
|
||||
|
||||
CREATE TABLE search_name_country () INHERITS (search_name_blank) {ts:address-data};
|
||||
CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id) {ts:address-index};
|
||||
CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off) {ts:address-index};
|
||||
|
||||
-- start
|
||||
CREATE TABLE location_area_large_-partition- () INHERITS (location_area_large) {ts:address-data};
|
||||
CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id) {ts:address-index};
|
||||
|
@ -23,3 +23,57 @@ Feature: Creation of search terms
|
||||
Then search_name contains
|
||||
| object | name_vector | nameaddress_vector |
|
||||
| N1 | foo | the road |
|
||||
|
||||
Scenario: Some addr: tags are added to address when the name exists
|
||||
Given the scene roads-with-pois
|
||||
And the places
|
||||
| osm | class | type | name | geometry |
|
||||
| N1 | place | state | new york | 80 80 |
|
||||
| N1 | place | city | bonn | 81 81 |
|
||||
| N1 | place | suburb | smalltown| 80 81 |
|
||||
And the named places
|
||||
| osm | class | type | addr+city | addr+state | addr+suburb | geometry |
|
||||
| W1 | highway | service | bonn | New York | Smalltown | :w-north |
|
||||
When importing
|
||||
Then search_name contains
|
||||
| object | nameaddress_vector |
|
||||
| W1 | bonn, new york, smalltown |
|
||||
|
||||
Scenario: A known addr:* tag is not added if the name is unknown
|
||||
Given the scene roads-with-pois
|
||||
And the places
|
||||
| osm | class | type | name | addr+city | geometry |
|
||||
| W1 | highway | residential | Road | Nandu | :w-north |
|
||||
When importing
|
||||
Then search_name contains not
|
||||
| object | nameaddress_vector |
|
||||
| W1 | nandu |
|
||||
|
||||
Scenario: addr:postcode is not added to the address terms
|
||||
Given the scene roads-with-pois
|
||||
And the places
|
||||
| osm | class | type | name+ref | geometry |
|
||||
| N1 | place | state | 12345 | 80 80 |
|
||||
And the named places
|
||||
| osm | class | type | addr+postcode | geometry |
|
||||
| W1 | highway | residential | 12345 | :w-north |
|
||||
When importing
|
||||
Then search_name contains not
|
||||
| object | nameaddress_vector |
|
||||
| W1 | 12345 |
|
||||
|
||||
Scenario: is_in is split and added to the address search terms
|
||||
Given the scene roads-with-pois
|
||||
And the places
|
||||
| osm | class | type | name | geometry |
|
||||
| N1 | place | state | new york | 80 80 |
|
||||
| N1 | place | city | bonn | 81 81 |
|
||||
| N1 | place | suburb | smalltown| 80 81 |
|
||||
And the named places
|
||||
| osm | class | type | addr+is_in | geometry |
|
||||
| W1 | highway | service | bonn, New York, Smalltown | :w-north |
|
||||
When importing
|
||||
Then search_name contains
|
||||
| object | nameaddress_vector |
|
||||
| W1 | bonn, new york, smalltown |
|
||||
|
||||
|
@ -427,8 +427,8 @@ def check_placex_contents(context, exact):
|
||||
|
||||
context.db.commit()
|
||||
|
||||
@then("search_name contains")
|
||||
def check_search_name_contents(context):
|
||||
@then("search_name contains(?P<exclude> not)?")
|
||||
def check_search_name_contents(context, exclude):
|
||||
cur = context.db.cursor(cursor_factory=psycopg2.extras.DictCursor)
|
||||
|
||||
for row in context.table:
|
||||
@ -446,11 +446,16 @@ def check_search_name_contents(context):
|
||||
FROM word, (SELECT unnest(%s) as term) t
|
||||
WHERE word_token = make_standard_name(t.term)""",
|
||||
(terms,))
|
||||
ok_(subcur.rowcount >= len(terms),
|
||||
"No word entry found for " + row[h])
|
||||
if not exclude:
|
||||
ok_(subcur.rowcount >= len(terms),
|
||||
"No word entry found for " + row[h])
|
||||
for wid in subcur:
|
||||
assert_in(wid[0], res[h],
|
||||
"Missing term for %s/%s: %s" % (pid, h, wid[1]))
|
||||
if exclude:
|
||||
assert_not_in(wid[0], res[h],
|
||||
"Found term for %s/%s: %s" % (pid, h, wid[1]))
|
||||
else:
|
||||
assert_in(wid[0], res[h],
|
||||
"Missing term for %s/%s: %s" % (pid, h, wid[1]))
|
||||
else:
|
||||
assert_db_column(res, h, row[h], context)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user