Integrated OSM views into importance computation

This commit is contained in:
Tareq Al-Ahdal 2022-08-25 09:45:18 +02:00 committed by Sarah Hoffmann
parent ac467c7a2d
commit 0ab0f0ea44
8 changed files with 113 additions and 37 deletions

View File

@ -98,19 +98,74 @@ $$
LANGUAGE plpgsql STABLE;
CREATE OR REPLACE FUNCTION get_osm_views(centroid GEOMETRY)
RETURNS BIGINT
AS $$
DECLARE
result BIGINT;
BEGIN
SELECT ST_Value(osm_views.rast, centroid)
FROM osm_views
WHERE ST_Intersects(ST_ConvexHull(osm_views.rast), centroid) LIMIT 1 INTO result;
return COALESCE(result, 0);
END;
$$
LANGUAGE plpgsql STABLE;
CREATE OR REPLACE FUNCTION normalize_osm_views(views BIGINT)
RETURNS FLOAT
AS $$
DECLARE
normalized_osm_views FLOAT;
max_views BIGINT;
BEGIN
IF views > 0 THEN
-- Get the highest view count to use it in normalizing the data
SELECT max_views_count FROM osm_views_stat INTO max_views;
normalized_osm_views := (LOG(views))/(LOG(max_views));
ELSE
normalized_osm_views := 0.0;
END IF;
RETURN normalized_osm_views;
END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE,
country_code varchar(2),
osm_type varchar(1), osm_id BIGINT)
osm_type varchar(1), osm_id BIGINT,
centroid GEOMETRY)
RETURNS place_importance
AS $$
DECLARE
match RECORD;
result place_importance;
osm_views_exists BIGINT;
views BIGINT;
BEGIN
-- check if osm_views table exists
SELECT COUNT(table_name)
INTO osm_views_exists
FROM information_schema.tables
WHERE table_schema LIKE 'public' AND
table_type LIKE 'BASE TABLE' AND
table_name = 'osm_views';
-- add importance by OSM views if osm_views table exists
IF osm_views_exists THEN
views := get_osm_views(centroid);
result.importance := normalize_osm_views(views) * 0.35;
END IF;
-- add importance by wiki data if the place has one
FOR match IN SELECT * FROM get_wikipedia_match(extratags, country_code)
WHERE language is not NULL
LOOP
result.importance := match.importance;
result.importance := COALESCE(result.importance, 0) + match.importance * 0.65;
result.wikipedia := match.language || ':' || match.title;
RETURN result;
END LOOP;
@ -119,13 +174,13 @@ BEGIN
FOR match IN SELECT * FROM wikipedia_article
WHERE wd_page_title = extratags->'wikidata'
ORDER BY language = 'en' DESC, langcount DESC LIMIT 1 LOOP
result.importance := match.importance;
result.importance := COALESCE(result.importance, 0) + match.importance * 0.65;
result.wikipedia := match.language || ':' || match.title;
RETURN result;
END LOOP;
END IF;
RETURN null;
RETURN result;
END;
$$
LANGUAGE plpgsql;

View File

@ -965,7 +965,7 @@ BEGIN
NEW.importance := null;
SELECT wikipedia, importance
FROM compute_importance(NEW.extratags, NEW.country_code, NEW.osm_type, NEW.osm_id)
FROM compute_importance(NEW.extratags, NEW.country_code, NEW.osm_type, NEW.osm_id, NEW.centroid)
INTO NEW.wikipedia,NEW.importance;
{% if debug %}RAISE WARNING 'Importance computed from wikipedia: %', NEW.importance;{% endif %}
@ -1088,7 +1088,7 @@ BEGIN
SELECT wikipedia, importance
FROM compute_importance(location.extratags, NEW.country_code,
'N', location.osm_id)
'N', location.osm_id, NEW.centroid)
INTO linked_wikipedia,linked_importance;
-- Use the maximum importance if one could be computed from the linked object.

View File

@ -136,13 +136,13 @@ class UpdateRefresh:
if args.osm_views:
data_path = Path(args.project_dir)
LOG.warning('Import OSM views GeoTIFF data from %s', data_path)
with connect(args.config.get_libpq_dsn()) as conn:
if refresh.import_osm_views_geotiff(conn, data_path) == 1:
LOG.fatal('FATAL: OSM views GeoTIFF file not found')
return 1
if refresh.import_osm_views_geotiff(conn, data_path) == 2:
LOG.fatal('FATAL: PostGIS version number is less than 3')
return 1
num = refresh.import_osm_views_geotiff(args.config.get_libpq_dsn(), data_path)
if num == 1:
LOG.fatal('FATAL: OSM views GeoTIFF file not found')
return 1
if num == 2:
LOG.fatal('FATAL: PostGIS version number is less than 3')
return 1
# Attention: importance MUST come after wiki data import.
if args.importance:

View File

@ -105,18 +105,18 @@ class SetupAll:
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
data_path) > 0:
LOG.error('Wikipedia importance dump file not found. '
'Calculating importance values of locations will not \
use Wikipedia importance data.')
'Calculating importance values of locations will not '
'use Wikipedia importance data.')
LOG.warning('Importing OSM views GeoTIFF data')
data_path = Path(args.project_dir)
with connect(args.config.get_libpq_dsn()) as conn:
if refresh.import_osm_views_geotiff(conn, data_path) == 1:
LOG.error('OSM views GeoTIFF file not found. '
'Calculating importance values of locations will not use OSM views data.')
elif refresh.import_osm_views_geotiff(conn, data_path) == 2:
LOG.error('PostGIS version number is less than 3. '
'Calculating importance values of locations will not use OSM views data.')
num = refresh.import_osm_views_geotiff(args.config.get_libpq_dsn(), data_path)
if num == 1:
LOG.error('OSM views GeoTIFF file not found. '
'Calculating importance values of locations will not use OSM views data.')
elif num == 2:
LOG.error('PostGIS version number is less than 3. '
'Calculating importance values of locations will not use OSM views data.')
if args.continue_at is None or args.continue_at == 'load-data':
LOG.warning('Initialise tables')

View File

@ -16,7 +16,7 @@ from pathlib import Path
from psycopg2 import sql as pysql
from nominatim.config import Configuration
from nominatim.db.connection import Connection
from nominatim.db.connection import Connection, connect
from nominatim.db.utils import execute_file
from nominatim.db.sql_preprocessor import SQLPreprocessor
from nominatim.version import version_str
@ -147,28 +147,48 @@ def import_wikipedia_articles(dsn: str, data_path: Path, ignore_errors: bool = F
return 0
def import_osm_views_geotiff(conn: Connection, data_path: Path) -> int:
def import_osm_views_geotiff(dsn: str, data_path: Path) -> int:
""" Replaces the OSM views table with new data.
Returns 0 if all was well and 1 if the OSM views GeoTIFF file could not
be found. Throws an exception if there was an error reading the file.
"""
datafile = data_path / 'osmviews.tiff'
if not datafile.exists():
return 1
with connect(dsn) as conn:
postgis_version = conn.postgis_version_tuple()
if postgis_version[0] < 3:
return 2
postgis_version = conn.postgis_version_tuple()
if postgis_version[0] < 3:
return 2
with conn.cursor() as cur:
cur.execute('DROP TABLE IF EXISTS "osm_views"')
conn.commit()
with conn.cursor() as cur:
cur.drop_table("osm_views")
cur.drop_table("osm_views_stat")
cmd = f"raster2pgsql -s 4326 -I -C -t 100x100 {datafile} \
public.osm_views | psql nominatim > /dev/null"
subprocess.run(["/bin/bash", "-c" , cmd], check=True)
# -ovr: 6 -> zoom 12, 5 -> zoom 13, 4 -> zoom 14, 3 -> zoom 15
reproject_geotiff = f"gdalwarp -q -multi -ovr 3 -overwrite \
-co COMPRESS=LZW -tr 0.01 0.01 -t_srs EPSG:4326 {datafile} raster2import.tiff"
subprocess.run(["/bin/bash", "-c" , reproject_geotiff], check=True)
tile_size = 256
import_geotiff = f"raster2pgsql -I -C -Y -t {tile_size}x{tile_size} raster2import.tiff \
public.osm_views | psql {dsn} > /dev/null"
subprocess.run(["/bin/bash", "-c" , import_geotiff], check=True)
cleanup = "rm raster2import.tiff"
subprocess.run(["/bin/bash", "-c" , cleanup], check=True)
# To normalize osm views data, the max view value is needed
cur.execute(f"""
CREATE TABLE osm_views_stat AS (
SELECT MAX(ST_Value(osm_views.rast, 1, x, y)) AS max_views_count
FROM osm_views CROSS JOIN
generate_series(1, {tile_size}) As x
CROSS JOIN generate_series(1, {tile_size}) As y
WHERE x <= ST_Width(rast) AND y <= ST_Height(rast));
""")
conn.commit()
return 0
@ -182,7 +202,7 @@ def recompute_importance(conn: Connection) -> None:
cur.execute("""
UPDATE placex SET (wikipedia, importance) =
(SELECT wikipedia, importance
FROM compute_importance(extratags, country_code, osm_type, osm_id))
FROM compute_importance(extratags, country_code, osm_type, osm_id, centroid))
""")
cur.execute("""
UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance

View File

@ -69,7 +69,7 @@ class TestCliImportWithDb:
assert cf_mock.called > 1
for mock in mocks:
assert mock.called > 0, "Mock '{}' not called".format(mock.func_name)
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
def test_import_continue_load_data(self, mock_func_factory):

View File

@ -32,7 +32,7 @@ class TestRefresh:
func_mock = mock_func_factory(nominatim.tools.refresh, func)
assert self.call_nominatim('refresh', '--' + command) == 0
assert func_mock.called > 0
assert func_mock.called == 1
def test_refresh_word_count(self):

View File

@ -38,6 +38,7 @@ def test_recompute_importance(placex_table, table_factory, temp_db_conn, temp_db
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE,
country_code varchar(2),
osm_type varchar(1), osm_id BIGINT,
centroid GEOMETRY,
OUT importance FLOAT,
OUT wikipedia TEXT)
AS $$ SELECT 0.1::float, 'foo'::text $$ LANGUAGE SQL""")