Nominatim/lib-sql/functions/ranking.sql
Sarah Hoffmann c41f2fed21 simplify weigh_search() function
Use JSON arrays which can have mixed types and therefore have
a more logical structure than separate arrays. Avoid JSON dicts
because of their verboseness.
2023-12-07 09:31:00 +01:00

308 lines
8.9 KiB
PL/PgSQL

-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2022 by the Nominatim developer community.
-- For a full list of authors see the git log.
-- Functions related to search and address ranks
-- Return an approximate search radius according to the search rank.
CREATE OR REPLACE FUNCTION reverse_place_diameter(rank_search SMALLINT)
RETURNS FLOAT
AS $$
BEGIN
IF rank_search <= 4 THEN
RETURN 5.0;
ELSIF rank_search <= 8 THEN
RETURN 1.8;
ELSIF rank_search <= 12 THEN
RETURN 0.6;
ELSIF rank_search <= 17 THEN
RETURN 0.16;
ELSIF rank_search <= 18 THEN
RETURN 0.08;
ELSIF rank_search <= 19 THEN
RETURN 0.04;
END IF;
RETURN 0.02;
END;
$$
LANGUAGE plpgsql IMMUTABLE;
-- Return an approximate update radius according to the search rank.
CREATE OR REPLACE FUNCTION update_place_diameter(rank_search SMALLINT)
RETURNS FLOAT
AS $$
BEGIN
-- postcodes
IF rank_search = 11 or rank_search = 5 THEN
RETURN 0.05;
-- anything higher than city is effectively ignored (polygon required)
ELSIF rank_search < 16 THEN
RETURN 0;
ELSIF rank_search < 18 THEN
RETURN 0.1;
ELSIF rank_search < 20 THEN
RETURN 0.05;
ELSIF rank_search = 21 THEN
RETURN 0.001;
ELSIF rank_search < 24 THEN
RETURN 0.02;
ELSIF rank_search < 26 THEN
RETURN 0.002;
ELSIF rank_search < 28 THEN
RETURN 0.001;
END IF;
RETURN 0;
END;
$$
LANGUAGE plpgsql IMMUTABLE;
-- Compute a base address rank from the extent of the given geometry.
--
-- This is all simple guess work. We don't need particularly good estimates
-- here. This just avoids to have very high ranked address parts in features
-- that span very large areas (or vice versa).
CREATE OR REPLACE FUNCTION geometry_to_rank(search_rank SMALLINT, geometry GEOMETRY, country_code TEXT)
RETURNS SMALLINT
AS $$
DECLARE
area FLOAT;
BEGIN
IF ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') THEN
area := ST_Area(geometry);
ELSIF ST_GeometryType(geometry) in ('ST_LineString','ST_MultiLineString') THEN
area := (ST_Length(geometry)^2) * 0.1;
ELSE
RETURN search_rank;
END IF;
-- adjust for the fact that countries come in different sizes
IF country_code IN ('ca', 'au', 'ru') THEN
area := area / 5;
ELSIF country_code IN ('br', 'kz', 'cn', 'us', 'ne', 'gb', 'za', 'sa', 'id', 'eh', 'ml', 'tm') THEN
area := area / 3;
ELSIF country_code IN ('bo', 'ar', 'sd', 'mn', 'in', 'et', 'cd', 'mz', 'ly', 'cl', 'zm') THEN
area := area / 2;
END IF;
IF area > 1 THEN
RETURN 7;
ELSIF area > 0.1 THEN
RETURN 9;
ELSIF area > 0.01 THEN
RETURN 13;
ELSIF area > 0.001 THEN
RETURN 17;
ELSIF area > 0.0001 THEN
RETURN 19;
ELSIF area > 0.000005 THEN
RETURN 21;
END IF;
RETURN 23;
END;
$$
LANGUAGE plpgsql IMMUTABLE;
-- Guess a ranking for postcodes from country and postcode format.
CREATE OR REPLACE FUNCTION get_postcode_rank(country_code VARCHAR(2), postcode TEXT,
OUT rank_search SMALLINT,
OUT rank_address SMALLINT)
AS $$
DECLARE
part TEXT;
BEGIN
rank_search := 30;
rank_address := 30;
postcode := upper(postcode);
IF country_code = 'gb' THEN
IF postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN
rank_search := 25;
rank_address := 5;
ELSEIF postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN
rank_search := 23;
rank_address := 5;
ELSEIF postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN
rank_search := 21;
rank_address := 5;
END IF;
ELSEIF country_code = 'sg' THEN
IF postcode ~ '^([0-9]{6})$' THEN
rank_search := 25;
rank_address := 11;
END IF;
ELSEIF country_code = 'de' THEN
IF postcode ~ '^([0-9]{5})$' THEN
rank_search := 21;
rank_address := 11;
END IF;
ELSE
-- Guess at the postcode format and coverage (!)
IF postcode ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local
rank_search := 21;
rank_address := 11;
ELSE
-- Does it look splitable into and area and local code?
part := substring(postcode from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$');
IF part IS NOT NULL THEN
rank_search := 25;
rank_address := 11;
ELSEIF postcode ~ '^[- :A-Z0-9]{6,}$' THEN
rank_search := 21;
rank_address := 11;
END IF;
END IF;
END IF;
END;
$$
LANGUAGE plpgsql IMMUTABLE;
-- Get standard search and address rank for an object.
--
-- \param country Two-letter country code where the object is in.
-- \param extended_type OSM type (N, W, R) or area type (A).
-- \param place_class Class (or tag key) of object.
-- \param place_type Type (or tag value) of object.
-- \param admin_level Value of admin_level tag.
-- \param is_major If true, boost search rank by one.
-- \param postcode Value of addr:postcode tag.
-- \param[out] search_rank Computed search rank.
-- \param[out] address_rank Computed address rank.
--
CREATE OR REPLACE FUNCTION compute_place_rank(country VARCHAR(2),
extended_type VARCHAR(1),
place_class TEXT, place_type TEXT,
admin_level SMALLINT,
is_major BOOLEAN,
postcode TEXT,
OUT search_rank SMALLINT,
OUT address_rank SMALLINT)
AS $$
DECLARE
classtype TEXT;
BEGIN
IF place_class in ('place','boundary')
and place_type in ('postcode','postal_code')
THEN
SELECT * INTO search_rank, address_rank
FROM get_postcode_rank(country, postcode);
ELSEIF extended_type = 'N' AND place_class = 'highway' THEN
search_rank = 30;
address_rank = 30;
ELSEIF place_class = 'landuse' AND extended_type != 'A' THEN
search_rank = 30;
address_rank = 30;
ELSE
IF place_class = 'boundary' and place_type = 'administrative' THEN
classtype = place_type || admin_level::TEXT;
ELSE
classtype = place_type;
END IF;
SELECT l.rank_search, l.rank_address INTO search_rank, address_rank
FROM address_levels l
WHERE (l.country_code = country or l.country_code is NULL)
AND l.class = place_class AND (l.type = classtype or l.type is NULL)
ORDER BY l.country_code, l.class, l.type LIMIT 1;
IF search_rank is NULL OR address_rank is NULL THEN
search_rank := 30;
address_rank := 30;
END IF;
-- some postcorrections
IF place_class = 'waterway' AND extended_type = 'R' THEN
-- Slightly promote waterway relations so that they are processed
-- before their members.
search_rank := search_rank - 1;
END IF;
IF is_major THEN
search_rank := search_rank - 1;
END IF;
END IF;
END;
$$
LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION get_addr_tag_rank(key TEXT, country TEXT,
OUT from_rank SMALLINT,
OUT to_rank SMALLINT,
OUT extent FLOAT)
AS $$
DECLARE
ranks RECORD;
BEGIN
from_rank := null;
FOR ranks IN
SELECT * FROM
(SELECT l.rank_search, l.rank_address FROM address_levels l
WHERE (l.country_code = country or l.country_code is NULL)
AND l.class = 'place' AND l.type = key
ORDER BY l.country_code LIMIT 1) r
WHERE rank_address > 0
LOOP
extent := reverse_place_diameter(ranks.rank_search);
IF ranks.rank_address <= 4 THEN
from_rank := 4;
to_rank := 4;
ELSEIF ranks.rank_address <= 9 THEN
from_rank := 5;
to_rank := 9;
ELSEIF ranks.rank_address <= 12 THEN
from_rank := 10;
to_rank := 12;
ELSEIF ranks.rank_address <= 16 THEN
from_rank := 13;
to_rank := 16;
ELSEIF ranks.rank_address <= 21 THEN
from_rank := 17;
to_rank := 21;
ELSEIF ranks.rank_address <= 24 THEN
from_rank := 22;
to_rank := 24;
ELSE
from_rank := 25;
to_rank := 25;
END IF;
END LOOP;
END;
$$
LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION weigh_search(search_vector INT[],
rankings TEXT,
def_weight FLOAT)
RETURNS FLOAT
AS $$
DECLARE
rank JSON;
BEGIN
FOR rank IN
SELECT * FROM json_array_elements(rankings::JSON)
LOOP
IF true = ALL(SELECT x::int = ANY(search_vector) FROM json_array_elements_text(rank->1) as x) THEN
RETURN (rank->>0)::float;
END IF;
END LOOP;
RETURN def_weight;
END;
$$
LANGUAGE plpgsql IMMUTABLE;