move name matching into tokenizer module

Instead of requesting the match tokens from the tokenizer
when looking for parent streets/places and address parts,
hand in the saved tokens and ask if they match. This gives
the tokenizer more freedom to decide how name matching
should be done.
This commit is contained in:
Sarah Hoffmann 2021-09-22 22:20:02 +02:00
parent d562f11298
commit 59fe74ddf6
7 changed files with 117 additions and 92 deletions

View File

@ -43,7 +43,7 @@ LANGUAGE plpgsql STABLE;
-- find the parent road of the cut road parts
CREATE OR REPLACE FUNCTION get_interpolation_parent(street INTEGER[], place INTEGER[],
CREATE OR REPLACE FUNCTION get_interpolation_parent(token_info JSONB,
partition SMALLINT,
centroid GEOMETRY, geom GEOMETRY)
RETURNS BIGINT
@ -52,7 +52,7 @@ DECLARE
parent_place_id BIGINT;
location RECORD;
BEGIN
parent_place_id := find_parent_for_address(street, place, partition, centroid);
parent_place_id := find_parent_for_address(token_info, partition, centroid);
IF parent_place_id is null THEN
FOR location IN SELECT place_id FROM placex
@ -155,9 +155,8 @@ BEGIN
NEW.interpolationtype = NEW.address->'interpolation';
place_centroid := ST_PointOnSurface(NEW.linegeo);
NEW.parent_place_id = get_interpolation_parent(token_addr_street_match_tokens(NEW.token_info),
token_addr_place_match_tokens(NEW.token_info),
NEW.partition, place_centroid, NEW.linegeo);
NEW.parent_place_id = get_interpolation_parent(NEW.token_info, NEW.partition,
place_centroid, NEW.linegeo);
interpol_postcode := token_normalized_postcode(NEW.address->'postcode');

View File

@ -66,7 +66,7 @@ LANGUAGE plpgsql STABLE;
CREATE OR REPLACE FUNCTION get_address_place(in_partition SMALLINT, feature GEOMETRY,
from_rank SMALLINT, to_rank SMALLINT,
extent FLOAT, tokens INT[])
extent FLOAT, token_info JSONB, key TEXT)
RETURNS nearfeaturecentr
AS $$
DECLARE
@ -80,7 +80,7 @@ BEGIN
FROM location_area_large_{{ partition }}
WHERE geometry && ST_Expand(feature, extent)
AND rank_address between from_rank and to_rank
AND tokens && keywords
AND token_matches_address(token_info, key, keywords)
GROUP BY place_id, keywords, rank_address, rank_search, isguess, postcode, centroid
ORDER BY bool_or(ST_Intersects(geometry, feature)), distance LIMIT 1;
RETURN r;
@ -148,18 +148,21 @@ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION getNearestNamedRoadPlaceId(in_partition INTEGER,
point GEOMETRY,
isin_token INTEGER[])
token_info JSONB)
RETURNS BIGINT
AS $$
DECLARE
parent BIGINT;
BEGIN
IF not token_has_addr_street(token_info) THEN
RETURN NULL;
END IF;
{% for partition in db.partitions %}
IF in_partition = {{ partition }} THEN
SELECT place_id FROM search_name_{{ partition }}
INTO parent
WHERE name_vector && isin_token
WHERE token_matches_street(token_info, name_vector)
AND centroid && ST_Expand(point, 0.015)
AND address_rank between 26 and 27
ORDER BY ST_Distance(centroid, point) ASC limit 1;
@ -174,19 +177,22 @@ LANGUAGE plpgsql STABLE;
CREATE OR REPLACE FUNCTION getNearestNamedPlacePlaceId(in_partition INTEGER,
point GEOMETRY,
isin_token INTEGER[])
token_info JSONB)
RETURNS BIGINT
AS $$
DECLARE
parent BIGINT;
BEGIN
IF not token_has_addr_place(token_info) THEN
RETURN NULL;
END IF;
{% for partition in db.partitions %}
IF in_partition = {{ partition }} THEN
SELECT place_id
INTO parent
FROM search_name_{{ partition }}
WHERE name_vector && isin_token
WHERE token_matches_place(token_info, name_vector)
AND centroid && ST_Expand(point, 0.04)
AND address_rank between 16 and 25
ORDER BY ST_Distance(centroid, point) ASC limit 1;

View File

@ -104,8 +104,7 @@ CREATE OR REPLACE FUNCTION find_parent_for_poi(poi_osm_type CHAR(1),
poi_osm_id BIGINT,
poi_partition SMALLINT,
bbox GEOMETRY,
addr_street INTEGER[],
addr_place INTEGER[],
token_info JSONB,
is_place_addr BOOLEAN)
RETURNS BIGINT
AS $$
@ -119,8 +118,7 @@ BEGIN
parent_place_id := find_associated_street(poi_osm_type, poi_osm_id);
IF parent_place_id is null THEN
parent_place_id := find_parent_for_address(addr_street, addr_place,
poi_partition, bbox);
parent_place_id := find_parent_for_address(token_info, poi_partition, bbox);
END IF;
IF parent_place_id is null and poi_osm_type = 'N' THEN
@ -333,13 +331,14 @@ BEGIN
WHERE s.place_id = parent_place_id;
FOR addr_item IN
SELECT (get_addr_tag_rank(key, country)).*, match_tokens, search_tokens
FROM token_get_address_tokens(token_info)
WHERE not search_tokens <@ parent_address_vector
SELECT (get_addr_tag_rank(key, country)).*, key,
token_get_address_search_tokens(token_info, key) as search_tokens
FROM token_get_address_keys(token_info) as key
WHERE not token_get_address_search_tokens(token_info, key) <@ parent_address_vector
LOOP
addr_place := get_address_place(in_partition, geometry,
addr_item.from_rank, addr_item.to_rank,
addr_item.extent, addr_item.match_tokens);
addr_item.extent, token_info, addr_item.key);
IF addr_place is null THEN
-- No place found in OSM that matches. Make it at least searchable.
@ -447,14 +446,16 @@ BEGIN
FOR location IN
SELECT (get_address_place(partition, geometry, from_rank, to_rank,
extent, match_tokens)).*, search_tokens
FROM (SELECT (get_addr_tag_rank(key, country)).*, match_tokens, search_tokens
FROM token_get_address_tokens(token_info)) x
extent, token_info, key)).*, key
FROM (SELECT (get_addr_tag_rank(key, country)).*, key
FROM token_get_address_keys(token_info) as key) x
ORDER BY rank_address, distance, isguess desc
LOOP
IF location.place_id is null THEN
{% if not db.reverse_only %}
nameaddress_vector := array_merge(nameaddress_vector, location.search_tokens);
nameaddress_vector := array_merge(nameaddress_vector,
token_get_address_search_tokens(token_info,
location.key));
{% endif %}
ELSE
{% if not db.reverse_only %}
@ -689,9 +690,6 @@ DECLARE
parent_address_level SMALLINT;
place_address_level SMALLINT;
addr_street INTEGER[];
addr_place INTEGER[];
max_rank SMALLINT;
name_vector INTEGER[];
@ -860,8 +858,6 @@ BEGIN
END IF;
NEW.housenumber := token_normalized_housenumber(NEW.token_info);
addr_street := token_addr_street_match_tokens(NEW.token_info);
addr_place := token_addr_place_match_tokens(NEW.token_info);
NEW.postcode := null;
@ -907,7 +903,7 @@ BEGIN
NEW.parent_place_id := find_parent_for_poi(NEW.osm_type, NEW.osm_id,
NEW.partition,
ST_Envelope(NEW.geometry),
addr_street, addr_place,
NEW.token_info,
is_place_address);
-- If we found the road take a shortcut here.

View File

@ -215,13 +215,12 @@ LANGUAGE plpgsql STABLE;
-- Find the parent of an address with addr:street/addr:place tag.
--
-- \param street Value of addr:street or NULL if tag is missing.
-- \param place Value of addr:place or NULL if tag is missing.
-- \param token_info Naming info with the address information.
-- \param partition Partition where to search the parent.
-- \param centroid Location of the address.
--
-- \return Place ID of the parent if one was found, NULL otherwise.
CREATE OR REPLACE FUNCTION find_parent_for_address(street INTEGER[], place INTEGER[],
CREATE OR REPLACE FUNCTION find_parent_for_address(token_info JSONB,
partition SMALLINT,
centroid GEOMETRY)
RETURNS BIGINT
@ -229,30 +228,22 @@ CREATE OR REPLACE FUNCTION find_parent_for_address(street INTEGER[], place INTEG
DECLARE
parent_place_id BIGINT;
BEGIN
IF street is not null THEN
-- Check for addr:street attributes
-- Note that addr:street links can only be indexed, once the street itself is indexed
parent_place_id := getNearestNamedRoadPlaceId(partition, centroid, street);
IF parent_place_id is not null THEN
{% if debug %}RAISE WARNING 'Get parent form addr:street: %', parent_place_id;{% endif %}
RETURN parent_place_id;
END IF;
-- Check for addr:street attributes
parent_place_id := getNearestNamedRoadPlaceId(partition, centroid, token_info);
IF parent_place_id is not null THEN
{% if debug %}RAISE WARNING 'Get parent from addr:street: %', parent_place_id;{% endif %}
RETURN parent_place_id;
END IF;
-- Check for addr:place attributes.
IF place is not null THEN
parent_place_id := getNearestNamedPlacePlaceId(partition, centroid, place);
IF parent_place_id is not null THEN
{% if debug %}RAISE WARNING 'Get parent form addr:place: %', parent_place_id;{% endif %}
RETURN parent_place_id;
END IF;
END IF;
RETURN NULL;
parent_place_id := getNearestNamedPlacePlaceId(partition, centroid, token_info);
{% if debug %}RAISE WARNING 'Get parent from addr:place: %', parent_place_id;{% endif %}
RETURN parent_place_id;
END;
$$
LANGUAGE plpgsql STABLE;
CREATE OR REPLACE FUNCTION delete_location(OLD_place_id BIGINT)
RETURNS BOOLEAN
AS $$

View File

@ -14,7 +14,6 @@ DECLARE
out_partition INTEGER;
out_parent_place_id BIGINT;
location RECORD;
address_street_word_ids INTEGER[];
BEGIN
@ -54,13 +53,9 @@ BEGIN
place_centroid := ST_Centroid(linegeo);
out_partition := get_partition('us');
out_parent_place_id := null;
address_street_word_ids := token_addr_street_match_tokens(token_info);
IF address_street_word_ids IS NOT NULL THEN
out_parent_place_id := getNearestNamedRoadPlaceId(out_partition, place_centroid,
address_street_word_ids);
END IF;
out_parent_place_id := getNearestNamedRoadPlaceId(out_partition, place_centroid,
token_info);
IF out_parent_place_id IS NULL THEN
SELECT getNearestParallelRoadFeature(out_partition, linegeo)

View File

@ -34,17 +34,31 @@ AS $$
$$ LANGUAGE SQL IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION token_addr_street_match_tokens(info JSONB)
RETURNS INTEGER[]
CREATE OR REPLACE FUNCTION token_has_addr_street(info JSONB)
RETURNS BOOLEAN
AS $$
SELECT (info->>'street')::INTEGER[]
SELECT info->>'street' is not null;
$$ LANGUAGE SQL IMMUTABLE;
CREATE OR REPLACE FUNCTION token_has_addr_place(info JSONB)
RETURNS BOOLEAN
AS $$
SELECT info->>'place_match' is not null;
$$ LANGUAGE SQL IMMUTABLE;
CREATE OR REPLACE FUNCTION token_matches_street(info JSONB, street_tokens INTEGER[])
RETURNS BOOLEAN
AS $$
SELECT (info->>'street')::INTEGER[] && street_tokens
$$ LANGUAGE SQL IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION token_addr_place_match_tokens(info JSONB)
RETURNS INTEGER[]
CREATE OR REPLACE FUNCTION token_matches_place(info JSONB, place_tokens INTEGER[])
RETURNS BOOLEAN
AS $$
SELECT (info->>'place_match')::INTEGER[]
SELECT (info->>'place_match')::INTEGER[] && place_tokens
$$ LANGUAGE SQL IMMUTABLE STRICT;
@ -55,19 +69,24 @@ AS $$
$$ LANGUAGE SQL IMMUTABLE STRICT;
DROP TYPE IF EXISTS token_addresstoken CASCADE;
CREATE TYPE token_addresstoken AS (
key TEXT,
match_tokens INT[],
search_tokens INT[]
);
CREATE OR REPLACE FUNCTION token_get_address_tokens(info JSONB)
RETURNS SETOF token_addresstoken
CREATE OR REPLACE FUNCTION token_get_address_keys(info JSONB)
RETURNS SETOF TEXT
AS $$
SELECT key, (value->>1)::int[] as match_tokens,
(value->>0)::int[] as search_tokens
FROM jsonb_each(info->'addr');
SELECT * FROM jsonb_object_keys(info->'addr');
$$ LANGUAGE SQL IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION token_get_address_search_tokens(info JSONB, key TEXT)
RETURNS INTEGER[]
AS $$
SELECT (info->'addr'->key->>0)::INTEGER[];
$$ LANGUAGE SQL IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION token_matches_address(info JSONB, key TEXT, tokens INTEGER[])
RETURNS BOOLEAN
AS $$
SELECT (info->'addr'->key->>1)::INTEGER[] && tokens;
$$ LANGUAGE SQL IMMUTABLE STRICT;

View File

@ -34,17 +34,31 @@ AS $$
$$ LANGUAGE SQL IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION token_addr_street_match_tokens(info JSONB)
RETURNS INTEGER[]
CREATE OR REPLACE FUNCTION token_has_addr_street(info JSONB)
RETURNS BOOLEAN
AS $$
SELECT (info->>'street')::INTEGER[]
SELECT info->>'street' is not null;
$$ LANGUAGE SQL IMMUTABLE;
CREATE OR REPLACE FUNCTION token_has_addr_place(info JSONB)
RETURNS BOOLEAN
AS $$
SELECT info->>'place_match' is not null;
$$ LANGUAGE SQL IMMUTABLE;
CREATE OR REPLACE FUNCTION token_matches_street(info JSONB, street_tokens INTEGER[])
RETURNS BOOLEAN
AS $$
SELECT (info->>'street')::INTEGER[] && street_tokens
$$ LANGUAGE SQL IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION token_addr_place_match_tokens(info JSONB)
RETURNS INTEGER[]
CREATE OR REPLACE FUNCTION token_matches_place(info JSONB, place_tokens INTEGER[])
RETURNS BOOLEAN
AS $$
SELECT (info->>'place_match')::INTEGER[]
SELECT (info->>'place_match')::INTEGER[] && place_tokens
$$ LANGUAGE SQL IMMUTABLE STRICT;
@ -55,19 +69,24 @@ AS $$
$$ LANGUAGE SQL IMMUTABLE STRICT;
DROP TYPE IF EXISTS token_addresstoken CASCADE;
CREATE TYPE token_addresstoken AS (
key TEXT,
match_tokens INT[],
search_tokens INT[]
);
CREATE OR REPLACE FUNCTION token_get_address_tokens(info JSONB)
RETURNS SETOF token_addresstoken
CREATE OR REPLACE FUNCTION token_get_address_keys(info JSONB)
RETURNS SETOF TEXT
AS $$
SELECT key, (value->>1)::int[] as match_tokens,
(value->>0)::int[] as search_tokens
FROM jsonb_each(info->'addr');
SELECT * FROM jsonb_object_keys(info->'addr');
$$ LANGUAGE SQL IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION token_get_address_search_tokens(info JSONB, key TEXT)
RETURNS INTEGER[]
AS $$
SELECT (info->'addr'->key->>0)::INTEGER[];
$$ LANGUAGE SQL IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION token_matches_address(info JSONB, key TEXT, tokens INTEGER[])
RETURNS BOOLEAN
AS $$
SELECT (info->'addr'->key->>1)::INTEGER[] && tokens;
$$ LANGUAGE SQL IMMUTABLE STRICT;