simplify use of secondary importance

The values in the raster are already normalized between 0 and 2**16,
so a simple conversion to [0, 1] will do.

Check for existance of secondary_importance table statically when
creating the SQL function. For that to work importance tables need
to be created before the functions.
This commit is contained in:
Sarah Hoffmann 2022-09-27 22:12:48 +02:00
parent 3185fad918
commit abf349fb0d
4 changed files with 34 additions and 69 deletions

View File

@ -98,46 +98,9 @@ $$
LANGUAGE plpgsql STABLE; LANGUAGE plpgsql STABLE;
CREATE OR REPLACE FUNCTION get_osm_views(centroid GEOMETRY)
RETURNS BIGINT
AS $$
DECLARE
result BIGINT;
BEGIN
SELECT ST_Value(osm_views.rast, centroid)
FROM osm_views
WHERE ST_Intersects(ST_ConvexHull(osm_views.rast), centroid) LIMIT 1 INTO result;
return COALESCE(result, 0);
END;
$$
LANGUAGE plpgsql STABLE;
CREATE OR REPLACE FUNCTION normalize_osm_views(views BIGINT)
RETURNS FLOAT
AS $$
DECLARE
normalized_osm_views FLOAT;
max_views BIGINT;
BEGIN
IF views > 0 THEN
-- Get the highest view count to use it in normalizing the data
SELECT max_views_count FROM osm_views_stat INTO max_views;
normalized_osm_views := (LOG(views))/(LOG(max_views));
ELSE
normalized_osm_views := 0.0;
END IF;
RETURN normalized_osm_views;
END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE, CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE,
country_code varchar(2), country_code varchar(2),
osm_type varchar(1), osm_id BIGINT, rank_search SMALLINT,
centroid GEOMETRY) centroid GEOMETRY)
RETURNS place_importance RETURNS place_importance
AS $$ AS $$
@ -147,39 +110,44 @@ DECLARE
osm_views_exists BIGINT; osm_views_exists BIGINT;
views BIGINT; views BIGINT;
BEGIN BEGIN
-- check if osm_views table exists -- add importance by wikipedia article if the place has one
SELECT COUNT(table_name) FOR match IN
INTO osm_views_exists SELECT * FROM get_wikipedia_match(extratags, country_code)
FROM information_schema.tables WHERE language is not NULL
WHERE table_schema LIKE 'public' AND
table_type LIKE 'BASE TABLE' AND
table_name = 'osm_views';
-- add importance by OSM views if osm_views table exists
IF osm_views_exists THEN
views := get_osm_views(centroid);
result.importance := normalize_osm_views(views) * 0.35;
END IF;
-- add importance by wiki data if the place has one
FOR match IN SELECT * FROM get_wikipedia_match(extratags, country_code)
WHERE language is not NULL
LOOP LOOP
result.importance := COALESCE(result.importance, 0) + match.importance * 0.65; result.importance := match.importance;
result.wikipedia := match.language || ':' || match.title; result.wikipedia := match.language || ':' || match.title;
RETURN result; RETURN result;
END LOOP; END LOOP;
IF extratags ? 'wikidata' THEN -- Nothing? Then try with the wikidata tag.
IF result.importance is null AND extratags ? 'wikidata' THEN
FOR match IN SELECT * FROM wikipedia_article FOR match IN SELECT * FROM wikipedia_article
WHERE wd_page_title = extratags->'wikidata' WHERE wd_page_title = extratags->'wikidata'
ORDER BY language = 'en' DESC, langcount DESC LIMIT 1 LOOP ORDER BY language = 'en' DESC, langcount DESC LIMIT 1
result.importance := COALESCE(result.importance, 0) + match.importance * 0.65; LOOP
result.importance := match.importance;
result.wikipedia := match.language || ':' || match.title; result.wikipedia := match.language || ':' || match.title;
RETURN result; RETURN result;
END LOOP; END LOOP;
END IF; END IF;
-- Still nothing? Fall back to a default.
IF result.importance is null THEN
result.importance := 0.75001 - (rank_search::float / 40);
END IF;
{% if 'secondary_importance' in db.tables %}
FOR match IN
SELECT ST_Value(rast, centroid) as importance
FROM secondary_importance
WHERE ST_Intersects(ST_ConvexHull(rast), centroid) LIMIT 1
LOOP
-- Secondary importance as tie breaker with 0.0001 weight.
result.importance := result.importance + match.importance::float / 655350000;
END LOOP;
{% endif %}
RETURN result; RETURN result;
END; END;
$$ $$

View File

@ -965,7 +965,7 @@ BEGIN
NEW.importance := null; NEW.importance := null;
SELECT wikipedia, importance SELECT wikipedia, importance
FROM compute_importance(NEW.extratags, NEW.country_code, NEW.osm_type, NEW.osm_id, NEW.centroid) FROM compute_importance(NEW.extratags, NEW.country_code, NEW.rank_search, NEW.centroid)
INTO NEW.wikipedia,NEW.importance; INTO NEW.wikipedia,NEW.importance;
{% if debug %}RAISE WARNING 'Importance computed from wikipedia: %', NEW.importance;{% endif %} {% if debug %}RAISE WARNING 'Importance computed from wikipedia: %', NEW.importance;{% endif %}
@ -1047,7 +1047,7 @@ BEGIN
IF linked_place is not null THEN IF linked_place is not null THEN
-- Recompute the ranks here as the ones from the linked place might -- Recompute the ranks here as the ones from the linked place might
-- have been shifted to accommodate surrounding boundaries. -- have been shifted to accommodate surrounding boundaries.
SELECT place_id, osm_id, class, type, extratags, SELECT place_id, osm_id, class, type, extratags, rank_search,
centroid, geometry, centroid, geometry,
(compute_place_rank(country_code, osm_type, class, type, admin_level, (compute_place_rank(country_code, osm_type, class, type, admin_level,
(extratags->'capital') = 'yes', null)).* (extratags->'capital') = 'yes', null)).*
@ -1088,7 +1088,7 @@ BEGIN
SELECT wikipedia, importance SELECT wikipedia, importance
FROM compute_importance(location.extratags, NEW.country_code, FROM compute_importance(location.extratags, NEW.country_code,
'N', location.osm_id, NEW.centroid) location.rank_search, NEW.centroid)
INTO linked_wikipedia,linked_importance; INTO linked_wikipedia,linked_importance;
-- Use the maximum importance if one could be computed from the linked object. -- Use the maximum importance if one could be computed from the linked object.

View File

@ -276,7 +276,7 @@ CREATE SEQUENCE file start 1;
-- null table so it won't error -- null table so it won't error
-- deliberately no drop - importing the table is expensive and static, if it is already there better to avoid removing it -- deliberately no drop - importing the table is expensive and static, if it is already there better to avoid removing it
CREATE TABLE wikipedia_article ( CREATE TABLE IF NOT EXISTS wikipedia_article (
language text NOT NULL, language text NOT NULL,
title text NOT NULL, title text NOT NULL,
langcount integer, langcount integer,
@ -290,15 +290,12 @@ CREATE TABLE wikipedia_article (
wd_page_title text, wd_page_title text,
instance_of text instance_of text
); );
ALTER TABLE ONLY wikipedia_article ADD CONSTRAINT wikipedia_article_pkey PRIMARY KEY (language, title);
CREATE INDEX idx_wikipedia_article_osm_id ON wikipedia_article USING btree (osm_type, osm_id);
CREATE TABLE wikipedia_redirect ( CREATE TABLE IF NOT EXISTS wikipedia_redirect (
language text, language text,
from_title text, from_title text,
to_title text to_title text
); );
ALTER TABLE ONLY wikipedia_redirect ADD CONSTRAINT wikipedia_redirect_pkey PRIMARY KEY (language, from_title);
-- osm2pgsql does not create indexes on the middle tables for Nominatim -- osm2pgsql does not create indexes on the middle tables for Nominatim
-- Add one for lookup of associated street relations. -- Add one for lookup of associated street relations.

View File

@ -96,8 +96,6 @@ class SetupAll:
drop=args.no_updates, drop=args.no_updates,
ignore_errors=args.ignore_errors) ignore_errors=args.ignore_errors)
self._setup_tables(args.config, args.reverse_only)
LOG.warning('Importing wikipedia importance data') LOG.warning('Importing wikipedia importance data')
data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir) data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(), if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
@ -112,6 +110,8 @@ class SetupAll:
LOG.error('Secondary importance file not imported. ' LOG.error('Secondary importance file not imported. '
'Falling back to default ranking.') 'Falling back to default ranking.')
self._setup_tables(args.config, args.reverse_only)
if args.continue_at is None or args.continue_at == 'load-data': if args.continue_at is None or args.continue_at == 'load-data':
LOG.warning('Initialise tables') LOG.warning('Initialise tables')
with connect(args.config.get_libpq_dsn()) as conn: with connect(args.config.get_libpq_dsn()) as conn: