mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-22 21:28:10 +03:00
move name matching into tokenizer module
Instead of requesting the match tokens from the tokenizer when looking for parent streets/places and address parts, hand in the saved tokens and ask if they match. This gives the tokenizer more freedom to decide how name matching should be done.
This commit is contained in:
parent
d562f11298
commit
59fe74ddf6
@ -43,7 +43,7 @@ LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
-- find the parent road of the cut road parts
|
||||
CREATE OR REPLACE FUNCTION get_interpolation_parent(street INTEGER[], place INTEGER[],
|
||||
CREATE OR REPLACE FUNCTION get_interpolation_parent(token_info JSONB,
|
||||
partition SMALLINT,
|
||||
centroid GEOMETRY, geom GEOMETRY)
|
||||
RETURNS BIGINT
|
||||
@ -52,7 +52,7 @@ DECLARE
|
||||
parent_place_id BIGINT;
|
||||
location RECORD;
|
||||
BEGIN
|
||||
parent_place_id := find_parent_for_address(street, place, partition, centroid);
|
||||
parent_place_id := find_parent_for_address(token_info, partition, centroid);
|
||||
|
||||
IF parent_place_id is null THEN
|
||||
FOR location IN SELECT place_id FROM placex
|
||||
@ -155,9 +155,8 @@ BEGIN
|
||||
NEW.interpolationtype = NEW.address->'interpolation';
|
||||
|
||||
place_centroid := ST_PointOnSurface(NEW.linegeo);
|
||||
NEW.parent_place_id = get_interpolation_parent(token_addr_street_match_tokens(NEW.token_info),
|
||||
token_addr_place_match_tokens(NEW.token_info),
|
||||
NEW.partition, place_centroid, NEW.linegeo);
|
||||
NEW.parent_place_id = get_interpolation_parent(NEW.token_info, NEW.partition,
|
||||
place_centroid, NEW.linegeo);
|
||||
|
||||
interpol_postcode := token_normalized_postcode(NEW.address->'postcode');
|
||||
|
||||
|
@ -66,7 +66,7 @@ LANGUAGE plpgsql STABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION get_address_place(in_partition SMALLINT, feature GEOMETRY,
|
||||
from_rank SMALLINT, to_rank SMALLINT,
|
||||
extent FLOAT, tokens INT[])
|
||||
extent FLOAT, token_info JSONB, key TEXT)
|
||||
RETURNS nearfeaturecentr
|
||||
AS $$
|
||||
DECLARE
|
||||
@ -80,7 +80,7 @@ BEGIN
|
||||
FROM location_area_large_{{ partition }}
|
||||
WHERE geometry && ST_Expand(feature, extent)
|
||||
AND rank_address between from_rank and to_rank
|
||||
AND tokens && keywords
|
||||
AND token_matches_address(token_info, key, keywords)
|
||||
GROUP BY place_id, keywords, rank_address, rank_search, isguess, postcode, centroid
|
||||
ORDER BY bool_or(ST_Intersects(geometry, feature)), distance LIMIT 1;
|
||||
RETURN r;
|
||||
@ -148,18 +148,21 @@ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION getNearestNamedRoadPlaceId(in_partition INTEGER,
|
||||
point GEOMETRY,
|
||||
isin_token INTEGER[])
|
||||
token_info JSONB)
|
||||
RETURNS BIGINT
|
||||
AS $$
|
||||
DECLARE
|
||||
parent BIGINT;
|
||||
BEGIN
|
||||
IF not token_has_addr_street(token_info) THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
{% for partition in db.partitions %}
|
||||
IF in_partition = {{ partition }} THEN
|
||||
SELECT place_id FROM search_name_{{ partition }}
|
||||
INTO parent
|
||||
WHERE name_vector && isin_token
|
||||
WHERE token_matches_street(token_info, name_vector)
|
||||
AND centroid && ST_Expand(point, 0.015)
|
||||
AND address_rank between 26 and 27
|
||||
ORDER BY ST_Distance(centroid, point) ASC limit 1;
|
||||
@ -174,19 +177,22 @@ LANGUAGE plpgsql STABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION getNearestNamedPlacePlaceId(in_partition INTEGER,
|
||||
point GEOMETRY,
|
||||
isin_token INTEGER[])
|
||||
token_info JSONB)
|
||||
RETURNS BIGINT
|
||||
AS $$
|
||||
DECLARE
|
||||
parent BIGINT;
|
||||
BEGIN
|
||||
IF not token_has_addr_place(token_info) THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
{% for partition in db.partitions %}
|
||||
IF in_partition = {{ partition }} THEN
|
||||
SELECT place_id
|
||||
INTO parent
|
||||
FROM search_name_{{ partition }}
|
||||
WHERE name_vector && isin_token
|
||||
WHERE token_matches_place(token_info, name_vector)
|
||||
AND centroid && ST_Expand(point, 0.04)
|
||||
AND address_rank between 16 and 25
|
||||
ORDER BY ST_Distance(centroid, point) ASC limit 1;
|
||||
|
@ -104,8 +104,7 @@ CREATE OR REPLACE FUNCTION find_parent_for_poi(poi_osm_type CHAR(1),
|
||||
poi_osm_id BIGINT,
|
||||
poi_partition SMALLINT,
|
||||
bbox GEOMETRY,
|
||||
addr_street INTEGER[],
|
||||
addr_place INTEGER[],
|
||||
token_info JSONB,
|
||||
is_place_addr BOOLEAN)
|
||||
RETURNS BIGINT
|
||||
AS $$
|
||||
@ -119,8 +118,7 @@ BEGIN
|
||||
parent_place_id := find_associated_street(poi_osm_type, poi_osm_id);
|
||||
|
||||
IF parent_place_id is null THEN
|
||||
parent_place_id := find_parent_for_address(addr_street, addr_place,
|
||||
poi_partition, bbox);
|
||||
parent_place_id := find_parent_for_address(token_info, poi_partition, bbox);
|
||||
END IF;
|
||||
|
||||
IF parent_place_id is null and poi_osm_type = 'N' THEN
|
||||
@ -333,13 +331,14 @@ BEGIN
|
||||
WHERE s.place_id = parent_place_id;
|
||||
|
||||
FOR addr_item IN
|
||||
SELECT (get_addr_tag_rank(key, country)).*, match_tokens, search_tokens
|
||||
FROM token_get_address_tokens(token_info)
|
||||
WHERE not search_tokens <@ parent_address_vector
|
||||
SELECT (get_addr_tag_rank(key, country)).*, key,
|
||||
token_get_address_search_tokens(token_info, key) as search_tokens
|
||||
FROM token_get_address_keys(token_info) as key
|
||||
WHERE not token_get_address_search_tokens(token_info, key) <@ parent_address_vector
|
||||
LOOP
|
||||
addr_place := get_address_place(in_partition, geometry,
|
||||
addr_item.from_rank, addr_item.to_rank,
|
||||
addr_item.extent, addr_item.match_tokens);
|
||||
addr_item.extent, token_info, addr_item.key);
|
||||
|
||||
IF addr_place is null THEN
|
||||
-- No place found in OSM that matches. Make it at least searchable.
|
||||
@ -447,14 +446,16 @@ BEGIN
|
||||
|
||||
FOR location IN
|
||||
SELECT (get_address_place(partition, geometry, from_rank, to_rank,
|
||||
extent, match_tokens)).*, search_tokens
|
||||
FROM (SELECT (get_addr_tag_rank(key, country)).*, match_tokens, search_tokens
|
||||
FROM token_get_address_tokens(token_info)) x
|
||||
extent, token_info, key)).*, key
|
||||
FROM (SELECT (get_addr_tag_rank(key, country)).*, key
|
||||
FROM token_get_address_keys(token_info) as key) x
|
||||
ORDER BY rank_address, distance, isguess desc
|
||||
LOOP
|
||||
IF location.place_id is null THEN
|
||||
{% if not db.reverse_only %}
|
||||
nameaddress_vector := array_merge(nameaddress_vector, location.search_tokens);
|
||||
nameaddress_vector := array_merge(nameaddress_vector,
|
||||
token_get_address_search_tokens(token_info,
|
||||
location.key));
|
||||
{% endif %}
|
||||
ELSE
|
||||
{% if not db.reverse_only %}
|
||||
@ -689,9 +690,6 @@ DECLARE
|
||||
parent_address_level SMALLINT;
|
||||
place_address_level SMALLINT;
|
||||
|
||||
addr_street INTEGER[];
|
||||
addr_place INTEGER[];
|
||||
|
||||
max_rank SMALLINT;
|
||||
|
||||
name_vector INTEGER[];
|
||||
@ -860,8 +858,6 @@ BEGIN
|
||||
END IF;
|
||||
|
||||
NEW.housenumber := token_normalized_housenumber(NEW.token_info);
|
||||
addr_street := token_addr_street_match_tokens(NEW.token_info);
|
||||
addr_place := token_addr_place_match_tokens(NEW.token_info);
|
||||
|
||||
NEW.postcode := null;
|
||||
|
||||
@ -907,7 +903,7 @@ BEGIN
|
||||
NEW.parent_place_id := find_parent_for_poi(NEW.osm_type, NEW.osm_id,
|
||||
NEW.partition,
|
||||
ST_Envelope(NEW.geometry),
|
||||
addr_street, addr_place,
|
||||
NEW.token_info,
|
||||
is_place_address);
|
||||
|
||||
-- If we found the road take a shortcut here.
|
||||
|
@ -215,13 +215,12 @@ LANGUAGE plpgsql STABLE;
|
||||
|
||||
-- Find the parent of an address with addr:street/addr:place tag.
|
||||
--
|
||||
-- \param street Value of addr:street or NULL if tag is missing.
|
||||
-- \param place Value of addr:place or NULL if tag is missing.
|
||||
-- \param token_info Naming info with the address information.
|
||||
-- \param partition Partition where to search the parent.
|
||||
-- \param centroid Location of the address.
|
||||
--
|
||||
-- \return Place ID of the parent if one was found, NULL otherwise.
|
||||
CREATE OR REPLACE FUNCTION find_parent_for_address(street INTEGER[], place INTEGER[],
|
||||
CREATE OR REPLACE FUNCTION find_parent_for_address(token_info JSONB,
|
||||
partition SMALLINT,
|
||||
centroid GEOMETRY)
|
||||
RETURNS BIGINT
|
||||
@ -229,30 +228,22 @@ CREATE OR REPLACE FUNCTION find_parent_for_address(street INTEGER[], place INTEG
|
||||
DECLARE
|
||||
parent_place_id BIGINT;
|
||||
BEGIN
|
||||
IF street is not null THEN
|
||||
-- Check for addr:street attributes
|
||||
-- Note that addr:street links can only be indexed, once the street itself is indexed
|
||||
parent_place_id := getNearestNamedRoadPlaceId(partition, centroid, street);
|
||||
IF parent_place_id is not null THEN
|
||||
{% if debug %}RAISE WARNING 'Get parent form addr:street: %', parent_place_id;{% endif %}
|
||||
RETURN parent_place_id;
|
||||
END IF;
|
||||
-- Check for addr:street attributes
|
||||
parent_place_id := getNearestNamedRoadPlaceId(partition, centroid, token_info);
|
||||
IF parent_place_id is not null THEN
|
||||
{% if debug %}RAISE WARNING 'Get parent from addr:street: %', parent_place_id;{% endif %}
|
||||
RETURN parent_place_id;
|
||||
END IF;
|
||||
|
||||
-- Check for addr:place attributes.
|
||||
IF place is not null THEN
|
||||
parent_place_id := getNearestNamedPlacePlaceId(partition, centroid, place);
|
||||
IF parent_place_id is not null THEN
|
||||
{% if debug %}RAISE WARNING 'Get parent form addr:place: %', parent_place_id;{% endif %}
|
||||
RETURN parent_place_id;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
RETURN NULL;
|
||||
parent_place_id := getNearestNamedPlacePlaceId(partition, centroid, token_info);
|
||||
{% if debug %}RAISE WARNING 'Get parent from addr:place: %', parent_place_id;{% endif %}
|
||||
RETURN parent_place_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION delete_location(OLD_place_id BIGINT)
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
|
@ -14,7 +14,6 @@ DECLARE
|
||||
out_partition INTEGER;
|
||||
out_parent_place_id BIGINT;
|
||||
location RECORD;
|
||||
address_street_word_ids INTEGER[];
|
||||
|
||||
BEGIN
|
||||
|
||||
@ -54,13 +53,9 @@ BEGIN
|
||||
|
||||
place_centroid := ST_Centroid(linegeo);
|
||||
out_partition := get_partition('us');
|
||||
out_parent_place_id := null;
|
||||
|
||||
address_street_word_ids := token_addr_street_match_tokens(token_info);
|
||||
IF address_street_word_ids IS NOT NULL THEN
|
||||
out_parent_place_id := getNearestNamedRoadPlaceId(out_partition, place_centroid,
|
||||
address_street_word_ids);
|
||||
END IF;
|
||||
out_parent_place_id := getNearestNamedRoadPlaceId(out_partition, place_centroid,
|
||||
token_info);
|
||||
|
||||
IF out_parent_place_id IS NULL THEN
|
||||
SELECT getNearestParallelRoadFeature(out_partition, linegeo)
|
||||
|
@ -34,17 +34,31 @@ AS $$
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_addr_street_match_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
CREATE OR REPLACE FUNCTION token_has_addr_street(info JSONB)
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT (info->>'street')::INTEGER[]
|
||||
SELECT info->>'street' is not null;
|
||||
$$ LANGUAGE SQL IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_has_addr_place(info JSONB)
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT info->>'place_match' is not null;
|
||||
$$ LANGUAGE SQL IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_matches_street(info JSONB, street_tokens INTEGER[])
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT (info->>'street')::INTEGER[] && street_tokens
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_addr_place_match_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
CREATE OR REPLACE FUNCTION token_matches_place(info JSONB, place_tokens INTEGER[])
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT (info->>'place_match')::INTEGER[]
|
||||
SELECT (info->>'place_match')::INTEGER[] && place_tokens
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
@ -55,19 +69,24 @@ AS $$
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
DROP TYPE IF EXISTS token_addresstoken CASCADE;
|
||||
CREATE TYPE token_addresstoken AS (
|
||||
key TEXT,
|
||||
match_tokens INT[],
|
||||
search_tokens INT[]
|
||||
);
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_get_address_tokens(info JSONB)
|
||||
RETURNS SETOF token_addresstoken
|
||||
CREATE OR REPLACE FUNCTION token_get_address_keys(info JSONB)
|
||||
RETURNS SETOF TEXT
|
||||
AS $$
|
||||
SELECT key, (value->>1)::int[] as match_tokens,
|
||||
(value->>0)::int[] as search_tokens
|
||||
FROM jsonb_each(info->'addr');
|
||||
SELECT * FROM jsonb_object_keys(info->'addr');
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_get_address_search_tokens(info JSONB, key TEXT)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->'addr'->key->>0)::INTEGER[];
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_matches_address(info JSONB, key TEXT, tokens INTEGER[])
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT (info->'addr'->key->>1)::INTEGER[] && tokens;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
|
@ -34,17 +34,31 @@ AS $$
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_addr_street_match_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
CREATE OR REPLACE FUNCTION token_has_addr_street(info JSONB)
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT (info->>'street')::INTEGER[]
|
||||
SELECT info->>'street' is not null;
|
||||
$$ LANGUAGE SQL IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_has_addr_place(info JSONB)
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT info->>'place_match' is not null;
|
||||
$$ LANGUAGE SQL IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_matches_street(info JSONB, street_tokens INTEGER[])
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT (info->>'street')::INTEGER[] && street_tokens
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_addr_place_match_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
CREATE OR REPLACE FUNCTION token_matches_place(info JSONB, place_tokens INTEGER[])
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT (info->>'place_match')::INTEGER[]
|
||||
SELECT (info->>'place_match')::INTEGER[] && place_tokens
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
@ -55,19 +69,24 @@ AS $$
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
DROP TYPE IF EXISTS token_addresstoken CASCADE;
|
||||
CREATE TYPE token_addresstoken AS (
|
||||
key TEXT,
|
||||
match_tokens INT[],
|
||||
search_tokens INT[]
|
||||
);
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_get_address_tokens(info JSONB)
|
||||
RETURNS SETOF token_addresstoken
|
||||
CREATE OR REPLACE FUNCTION token_get_address_keys(info JSONB)
|
||||
RETURNS SETOF TEXT
|
||||
AS $$
|
||||
SELECT key, (value->>1)::int[] as match_tokens,
|
||||
(value->>0)::int[] as search_tokens
|
||||
FROM jsonb_each(info->'addr');
|
||||
SELECT * FROM jsonb_object_keys(info->'addr');
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_get_address_search_tokens(info JSONB, key TEXT)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->'addr'->key->>0)::INTEGER[];
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_matches_address(info JSONB, key TEXT, tokens INTEGER[])
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT (info->'addr'->key->>1)::INTEGER[] && tokens;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user