From 98c1b923fc090f830b7b3a575e3d08ba399bd870 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 18 Nov 2024 10:11:09 +0100 Subject: [PATCH] remove code only needed for older PostgreSQL/PostGIS versions --- lib-sql/indices.sql | 20 ++-- lib-sql/tables.sql | 6 +- lib-sql/tiger_import_finish.sql | 2 - src/nominatim_api/core.py | 18 ++-- src/nominatim_db/db/sql_preprocessor.py | 11 +- src/nominatim_db/tokenizer/icu_tokenizer.py | 107 ++++++-------------- src/nominatim_db/tools/check_database.py | 9 +- src/nominatim_db/tools/database_import.py | 5 +- src/nominatim_db/tools/refresh.py | 9 +- src/nominatim_db/tools/replication.py | 4 +- 10 files changed, 61 insertions(+), 130 deletions(-) diff --git a/lib-sql/indices.sql b/lib-sql/indices.sql index 4d92452d..8a4e91cd 100644 --- a/lib-sql/indices.sql +++ b/lib-sql/indices.sql @@ -97,18 +97,14 @@ CREATE INDEX IF NOT EXISTS idx_postcode_postcode --- CREATE INDEX IF NOT EXISTS idx_search_name_centroid ON search_name USING GIST (centroid) {{db.tablespace.search_index}}; - - {% if postgres.has_index_non_key_column %} --- - CREATE INDEX IF NOT EXISTS idx_placex_housenumber - ON placex USING btree (parent_place_id) - INCLUDE (housenumber) {{db.tablespace.search_index}} - WHERE housenumber is not null; + CREATE INDEX IF NOT EXISTS idx_placex_housenumber + ON placex USING btree (parent_place_id) + INCLUDE (housenumber) {{db.tablespace.search_index}} + WHERE housenumber is not null; --- - CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id_with_hnr - ON location_property_osmline USING btree(parent_place_id) - INCLUDE (startnumber, endnumber) {{db.tablespace.search_index}} - WHERE startnumber is not null; - {% endif %} - + CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id_with_hnr + ON location_property_osmline USING btree(parent_place_id) + INCLUDE (startnumber, endnumber) {{db.tablespace.search_index}} + WHERE startnumber is not null; {% endif %} diff --git a/lib-sql/tables.sql b/lib-sql/tables.sql index d3bc972a..cde33952 100644 --- a/lib-sql/tables.sql +++ b/lib-sql/tables.sql @@ -184,21 +184,21 @@ CREATE INDEX idx_placex_geometry_address_area_candidates ON placex -- Usage: - POI is within building with housenumber CREATE INDEX idx_placex_geometry_buildings ON placex - USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}} + USING SPGIST (geometry) {{db.tablespace.address_index}} WHERE address is not null and rank_search = 30 and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon'); -- Usage: - linking of similar named places to boundaries -- - linking of place nodes with same type to boundaries CREATE INDEX idx_placex_geometry_placenode ON placex - USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}} + USING SPGIST (geometry) {{db.tablespace.address_index}} WHERE osm_type = 'N' and rank_search < 26 and class = 'place' and type != 'postcode'; -- Usage: - is node part of a way? -- - find parent of interpolation spatially CREATE INDEX idx_placex_geometry_lower_rank_ways ON placex - USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}} + USING SPGIST (geometry) {{db.tablespace.address_index}} WHERE osm_type = 'W' and rank_search >= 26; -- Usage: - linking place nodes by wikidata tag to boundaries diff --git a/lib-sql/tiger_import_finish.sql b/lib-sql/tiger_import_finish.sql index c02ce2a3..b7c32d72 100644 --- a/lib-sql/tiger_import_finish.sql +++ b/lib-sql/tiger_import_finish.sql @@ -8,9 +8,7 @@ --index only on parent_place_id CREATE INDEX IF NOT EXISTS idx_location_property_tiger_parent_place_id_imp ON location_property_tiger_import (parent_place_id) -{% if postgres.has_index_non_key_column %} INCLUDE (startnumber, endnumber, step) -{% endif %} {{db.tablespace.aux_index}}; CREATE UNIQUE INDEX IF NOT EXISTS idx_location_property_tiger_place_id_imp ON location_property_tiger_import (place_id) {{db.tablespace.aux_index}}; diff --git a/src/nominatim_api/core.py b/src/nominatim_api/core.py index c45b24de..3cf9e989 100644 --- a/src/nominatim_api/core.py +++ b/src/nominatim_api/core.py @@ -138,19 +138,17 @@ class NominatimAPIAsync: async with engine.begin() as conn: result = await conn.scalar(sa.text('SHOW server_version_num')) server_version = int(result) - if server_version >= 110000: - await conn.execute(sa.text("SET jit_above_cost TO '-1'")) - await conn.execute(sa.text( - "SET max_parallel_workers_per_gather TO '0'")) + await conn.execute(sa.text("SET jit_above_cost TO '-1'")) + await conn.execute(sa.text( + "SET max_parallel_workers_per_gather TO '0'")) except (PGCORE_ERROR, sa.exc.OperationalError): server_version = 0 - if server_version >= 110000: - @sa.event.listens_for(engine.sync_engine, "connect") - def _on_connect(dbapi_con: Any, _: Any) -> None: - cursor = dbapi_con.cursor() - cursor.execute("SET jit_above_cost TO '-1'") - cursor.execute("SET max_parallel_workers_per_gather TO '0'") + @sa.event.listens_for(engine.sync_engine, "connect") + def _on_connect(dbapi_con: Any, _: Any) -> None: + cursor = dbapi_con.cursor() + cursor.execute("SET jit_above_cost TO '-1'") + cursor.execute("SET max_parallel_workers_per_gather TO '0'") self._property_cache['DB:server_version'] = server_version diff --git a/src/nominatim_db/db/sql_preprocessor.py b/src/nominatim_db/db/sql_preprocessor.py index 85dbaabc..4424b3d8 100644 --- a/src/nominatim_db/db/sql_preprocessor.py +++ b/src/nominatim_db/db/sql_preprocessor.py @@ -11,7 +11,7 @@ from typing import Set, Dict, Any, cast import jinja2 -from .connection import Connection, server_version_tuple, postgis_version_tuple +from .connection import Connection from ..config import Configuration from ..db.query_pool import QueryPool @@ -69,14 +69,7 @@ def _setup_postgresql_features(conn: Connection) -> Dict[str, Any]: """ Set up a dictionary with various optional Postgresql/Postgis features that depend on the database version. """ - pg_version = server_version_tuple(conn) - postgis_version = postgis_version_tuple(conn) - pg11plus = pg_version >= (11, 0, 0) - ps3 = postgis_version >= (3, 0) - return { - 'has_index_non_key_column': pg11plus, - 'spgist_geom': 'SPGIST' if pg11plus and ps3 else 'GIST' - } + return {} class SQLPreprocessor: diff --git a/src/nominatim_db/tokenizer/icu_tokenizer.py b/src/nominatim_db/tokenizer/icu_tokenizer.py index 83928644..5595fcb2 100644 --- a/src/nominatim_db/tokenizer/icu_tokenizer.py +++ b/src/nominatim_db/tokenizer/icu_tokenizer.py @@ -17,7 +17,7 @@ from pathlib import Path from psycopg.types.json import Jsonb from psycopg import sql as pysql -from ..db.connection import connect, Connection, Cursor, server_version_tuple, \ +from ..db.connection import connect, Connection, Cursor, \ drop_tables, table_exists, execute_scalar from ..config import Configuration from ..db.sql_preprocessor import SQLPreprocessor @@ -110,80 +110,37 @@ class ICUTokenizer(AbstractTokenizer): cur.execute(pysql.SQL('SET max_parallel_workers_per_gather TO {}') .format(pysql.Literal(min(threads, 6),))) - if server_version_tuple(conn) < (12, 0): - LOG.info('Computing word frequencies') - drop_tables(conn, 'word_frequencies', 'addressword_frequencies') - cur.execute("""CREATE TEMP TABLE word_frequencies AS - SELECT unnest(name_vector) as id, count(*) - FROM search_name GROUP BY id""") - cur.execute('CREATE INDEX ON word_frequencies(id)') - cur.execute("""CREATE TEMP TABLE addressword_frequencies AS - SELECT unnest(nameaddress_vector) as id, count(*) - FROM search_name GROUP BY id""") - cur.execute('CREATE INDEX ON addressword_frequencies(id)') - cur.execute(""" - CREATE OR REPLACE FUNCTION word_freq_update(wid INTEGER, - INOUT info JSONB) - AS $$ - DECLARE rec RECORD; - BEGIN - IF info is null THEN - info = '{}'::jsonb; - END IF; - FOR rec IN SELECT count FROM word_frequencies WHERE id = wid - LOOP - info = info || jsonb_build_object('count', rec.count); - END LOOP; - FOR rec IN SELECT count FROM addressword_frequencies WHERE id = wid - LOOP - info = info || jsonb_build_object('addr_count', rec.count); - END LOOP; - IF info = '{}'::jsonb THEN - info = null; - END IF; - END; - $$ LANGUAGE plpgsql IMMUTABLE; - """) - LOG.info('Update word table with recomputed frequencies') - drop_tables(conn, 'tmp_word') - cur.execute("""CREATE TABLE tmp_word AS - SELECT word_id, word_token, type, word, - word_freq_update(word_id, info) as info - FROM word - """) - drop_tables(conn, 'word_frequencies', 'addressword_frequencies') - else: - LOG.info('Computing word frequencies') - drop_tables(conn, 'word_frequencies') - cur.execute(""" - CREATE TEMP TABLE word_frequencies AS - WITH word_freq AS MATERIALIZED ( - SELECT unnest(name_vector) as id, count(*) - FROM search_name GROUP BY id), - addr_freq AS MATERIALIZED ( - SELECT unnest(nameaddress_vector) as id, count(*) - FROM search_name GROUP BY id) - SELECT coalesce(a.id, w.id) as id, - (CASE WHEN w.count is null THEN '{}'::JSONB - ELSE jsonb_build_object('count', w.count) END - || - CASE WHEN a.count is null THEN '{}'::JSONB - ELSE jsonb_build_object('addr_count', a.count) END) as info - FROM word_freq w FULL JOIN addr_freq a ON a.id = w.id; - """) - cur.execute('CREATE UNIQUE INDEX ON word_frequencies(id) INCLUDE(info)') - cur.execute('ANALYSE word_frequencies') - LOG.info('Update word table with recomputed frequencies') - drop_tables(conn, 'tmp_word') - cur.execute("""CREATE TABLE tmp_word AS - SELECT word_id, word_token, type, word, - (CASE WHEN wf.info is null THEN word.info - ELSE coalesce(word.info, '{}'::jsonb) || wf.info - END) as info - FROM word LEFT JOIN word_frequencies wf - ON word.word_id = wf.id - """) - drop_tables(conn, 'word_frequencies') + LOG.info('Computing word frequencies') + drop_tables(conn, 'word_frequencies') + cur.execute(""" + CREATE TEMP TABLE word_frequencies AS + WITH word_freq AS MATERIALIZED ( + SELECT unnest(name_vector) as id, count(*) + FROM search_name GROUP BY id), + addr_freq AS MATERIALIZED ( + SELECT unnest(nameaddress_vector) as id, count(*) + FROM search_name GROUP BY id) + SELECT coalesce(a.id, w.id) as id, + (CASE WHEN w.count is null THEN '{}'::JSONB + ELSE jsonb_build_object('count', w.count) END + || + CASE WHEN a.count is null THEN '{}'::JSONB + ELSE jsonb_build_object('addr_count', a.count) END) as info + FROM word_freq w FULL JOIN addr_freq a ON a.id = w.id; + """) + cur.execute('CREATE UNIQUE INDEX ON word_frequencies(id) INCLUDE(info)') + cur.execute('ANALYSE word_frequencies') + LOG.info('Update word table with recomputed frequencies') + drop_tables(conn, 'tmp_word') + cur.execute("""CREATE TABLE tmp_word AS + SELECT word_id, word_token, type, word, + (CASE WHEN wf.info is null THEN word.info + ELSE coalesce(word.info, '{}'::jsonb) || wf.info + END) as info + FROM word LEFT JOIN word_frequencies wf + ON word.word_id = wf.id + """) + drop_tables(conn, 'word_frequencies') with conn.cursor() as cur: cur.execute('SET max_parallel_workers_per_gather TO 0') diff --git a/src/nominatim_db/tools/check_database.py b/src/nominatim_db/tools/check_database.py index 79770142..4c6f0331 100644 --- a/src/nominatim_db/tools/check_database.py +++ b/src/nominatim_db/tools/check_database.py @@ -12,7 +12,7 @@ from enum import Enum from textwrap import dedent from ..config import Configuration -from ..db.connection import connect, Connection, server_version_tuple, \ +from ..db.connection import connect, Connection, \ index_exists, table_exists, execute_scalar from ..db import properties from ..errors import UsageError @@ -121,10 +121,9 @@ def _get_indexes(conn: Connection) -> List[str]: if table_exists(conn, 'search_name'): indexes.extend(('idx_search_name_nameaddress_vector', 'idx_search_name_name_vector', - 'idx_search_name_centroid')) - if server_version_tuple(conn) >= (11, 0, 0): - indexes.extend(('idx_placex_housenumber', - 'idx_osmline_parent_osm_id_with_hnr')) + 'idx_search_name_centroid', + 'idx_placex_housenumber', + 'idx_osmline_parent_osm_id_with_hnr')) # These won't exist if --no-updates import was used if table_exists(conn, 'place'): diff --git a/src/nominatim_db/tools/database_import.py b/src/nominatim_db/tools/database_import.py index 415e9d24..a7ee807e 100644 --- a/src/nominatim_db/tools/database_import.py +++ b/src/nominatim_db/tools/database_import.py @@ -98,10 +98,7 @@ def setup_database_skeleton(dsn: str, rouser: Optional[str] = None) -> None: with conn.cursor() as cur: cur.execute('CREATE EXTENSION IF NOT EXISTS hstore') cur.execute('CREATE EXTENSION IF NOT EXISTS postgis') - - postgis_version = postgis_version_tuple(conn) - if postgis_version[0] >= 3: - cur.execute('CREATE EXTENSION IF NOT EXISTS postgis_raster') + cur.execute('CREATE EXTENSION IF NOT EXISTS postgis_raster') conn.commit() diff --git a/src/nominatim_db/tools/refresh.py b/src/nominatim_db/tools/refresh.py index dc98fe41..80d0a74d 100644 --- a/src/nominatim_db/tools/refresh.py +++ b/src/nominatim_db/tools/refresh.py @@ -16,8 +16,7 @@ from pathlib import Path from psycopg import sql as pysql from ..config import Configuration -from ..db.connection import Connection, connect, postgis_version_tuple, \ - drop_tables +from ..db.connection import Connection, connect, drop_tables from ..db.utils import execute_file from ..db.sql_preprocessor import SQLPreprocessor @@ -190,12 +189,6 @@ def import_secondary_importance(dsn: str, data_path: Path, ignore_errors: bool = if not datafile.exists(): return 1 - with connect(dsn) as conn: - postgis_version = postgis_version_tuple(conn) - if postgis_version[0] < 3: - LOG.error('PostGIS version is too old for using OSM raster data.') - return 2 - execute_file(dsn, datafile, ignore_errors=ignore_errors) return 0 diff --git a/src/nominatim_db/tools/replication.py b/src/nominatim_db/tools/replication.py index 082523cd..e50eedee 100644 --- a/src/nominatim_db/tools/replication.py +++ b/src/nominatim_db/tools/replication.py @@ -18,7 +18,7 @@ import urllib.request as urlrequest from ..errors import UsageError from ..db import status -from ..db.connection import Connection, connect, server_version_tuple +from ..db.connection import Connection, connect from .exec_utils import run_osm2pgsql try: @@ -156,7 +156,7 @@ def run_osm2pgsql_updates(conn: Connection, options: MutableMapping[str, Any]) - # Consume updates with osm2pgsql. options['append'] = True - options['disable_jit'] = server_version_tuple(conn) >= (11, 0) + options['disable_jit'] = True run_osm2pgsql(options) # Handle deletions