From 4bb4db0668a37979725678a1690f6163d5cae03f Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 25 Jun 2023 09:38:44 +0200 Subject: [PATCH] introduce slim Geometry database type --- nominatim/api/connection.py | 4 +- nominatim/api/reverse.py | 53 ++++++++---------- nominatim/api/types.py | 9 ++- nominatim/db/sqlalchemy_schema.py | 17 +++--- nominatim/db/sqlalchemy_types.py | 93 +++++++++++++++++++++++++++++++ 5 files changed, 131 insertions(+), 45 deletions(-) create mode 100644 nominatim/db/sqlalchemy_types.py diff --git a/nominatim/api/connection.py b/nominatim/api/connection.py index e157d062..49dad664 100644 --- a/nominatim/api/connection.py +++ b/nominatim/api/connection.py @@ -10,11 +10,11 @@ Extended SQLAlchemy connection class that also includes access to the schema. from typing import cast, Any, Mapping, Sequence, Union, Dict, Optional, Set import sqlalchemy as sa -from geoalchemy2 import Geometry from sqlalchemy.ext.asyncio import AsyncConnection from nominatim.typing import SaFromClause from nominatim.db.sqlalchemy_schema import SearchTables +from nominatim.db.sqlalchemy_types import Geometry from nominatim.api.logging import log class SearchConnection: @@ -112,4 +112,4 @@ class SearchConnection: return sa.Table(tablename, self.t.meta, sa.Column('place_id', sa.BigInteger), - sa.Column('centroid', Geometry(srid=4326, spatial_index=False))) + sa.Column('centroid', Geometry)) diff --git a/nominatim/api/reverse.py b/nominatim/api/reverse.py index 10c97cad..b8687b42 100644 --- a/nominatim/api/reverse.py +++ b/nominatim/api/reverse.py @@ -10,7 +10,6 @@ Implementation of reverse geocoding. from typing import Optional, List, Callable, Type, Tuple import sqlalchemy as sa -from geoalchemy2 import WKTElement from nominatim.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow from nominatim.api.connection import SearchConnection @@ -33,11 +32,8 @@ def _select_from_placex(t: SaFromClause, wkt: Optional[str] = None) -> SaSelect: centroid = t.c.centroid else: distance = t.c.geometry.ST_Distance(wkt) - centroid = sa.case( - (t.c.geometry.ST_GeometryType().in_(('ST_LineString', - 'ST_MultiLineString')), - t.c.geometry.ST_ClosestPoint(wkt)), - else_=t.c.centroid).label('centroid') + centroid = sa.case((t.c.geometry.is_line_like(), t.c.geometry.ST_ClosestPoint(wkt)), + else_=t.c.centroid).label('centroid') return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name, @@ -66,11 +62,10 @@ def _interpolated_position(table: SaFromClause) -> SaLabel: else_=table.c.linegeo.ST_LineInterpolatePoint(rounded_pos)).label('centroid') -def _locate_interpolation(table: SaFromClause, wkt: WKTElement) -> SaLabel: +def _locate_interpolation(table: SaFromClause, wkt: str) -> SaLabel: """ Given a position, locate the closest point on the line. """ - return sa.case((table.c.linegeo.ST_GeometryType() == 'ST_LineString', - sa.func.ST_LineLocatePoint(table.c.linegeo, wkt)), + return sa.case((table.c.linegeo.is_line_like(), table.c.linegeo.ST_LineLocatePoint(wkt)), else_=0).label('position') @@ -129,16 +124,16 @@ class ReverseGeocoder: out = [] if self.params.geometry_simplification > 0.0: - col = col.ST_SimplifyPreserveTopology(self.params.geometry_simplification) + col = sa.func.ST_SimplifyPreserveTopology(col, self.params.geometry_simplification) if self.params.geometry_output & GeometryFormat.GEOJSON: - out.append(col.ST_AsGeoJSON().label('geometry_geojson')) + out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson')) if self.params.geometry_output & GeometryFormat.TEXT: - out.append(col.ST_AsText().label('geometry_text')) + out.append(sa.func.ST_AsText(col).label('geometry_text')) if self.params.geometry_output & GeometryFormat.KML: - out.append(col.ST_AsKML().label('geometry_kml')) + out.append(sa.func.ST_AsKML(col).label('geometry_kml')) if self.params.geometry_output & GeometryFormat.SVG: - out.append(col.ST_AsSVG().label('geometry_svg')) + out.append(sa.func.ST_AsSVG(col).label('geometry_svg')) return sql.add_columns(*out) @@ -160,7 +155,7 @@ class ReverseGeocoder: return table.c.class_.in_(tuple(include)) - async def _find_closest_street_or_poi(self, wkt: WKTElement, + async def _find_closest_street_or_poi(self, wkt: str, distance: float) -> Optional[SaRow]: """ Look up the closest rank 26+ place in the database, which is closer than the given distance. @@ -171,8 +166,7 @@ class ReverseGeocoder: .where(t.c.geometry.ST_DWithin(wkt, distance))\ .where(t.c.indexed_status == 0)\ .where(t.c.linked_place_id == None)\ - .where(sa.or_(t.c.geometry.ST_GeometryType() - .not_in(('ST_Polygon', 'ST_MultiPolygon')), + .where(sa.or_(sa.not_(t.c.geometry.is_area()), t.c.centroid.ST_Distance(wkt) < distance))\ .order_by('distance')\ .limit(1) @@ -189,7 +183,7 @@ class ReverseGeocoder: if self.layer_enabled(DataLayer.POI) and self.max_rank == 30: restrict.append(sa.and_(t.c.rank_search == 30, t.c.class_.not_in(('place', 'building')), - t.c.geometry.ST_GeometryType() != 'ST_LineString')) + sa.not_(t.c.geometry.is_line_like()))) if self.has_feature_layers(): restrict.append(sa.and_(t.c.rank_search.between(26, self.max_rank), t.c.rank_address == 0, @@ -202,7 +196,7 @@ class ReverseGeocoder: async def _find_housenumber_for_street(self, parent_place_id: int, - wkt: WKTElement) -> Optional[SaRow]: + wkt: str) -> Optional[SaRow]: t = self.conn.t.placex sql = _select_from_placex(t, wkt)\ @@ -220,7 +214,7 @@ class ReverseGeocoder: async def _find_interpolation_for_street(self, parent_place_id: Optional[int], - wkt: WKTElement, + wkt: str, distance: float) -> Optional[SaRow]: t = self.conn.t.osmline @@ -253,7 +247,7 @@ class ReverseGeocoder: async def _find_tiger_number_for_street(self, parent_place_id: int, parent_type: str, parent_id: int, - wkt: WKTElement) -> Optional[SaRow]: + wkt: str) -> Optional[SaRow]: t = self.conn.t.tiger inner = sa.select(t, @@ -282,7 +276,7 @@ class ReverseGeocoder: async def lookup_street_poi(self, - wkt: WKTElement) -> Tuple[Optional[SaRow], RowFunc]: + wkt: str) -> Tuple[Optional[SaRow], RowFunc]: """ Find a street or POI/address for the given WKT point. """ log().section('Reverse lookup on street/address level') @@ -337,7 +331,7 @@ class ReverseGeocoder: return row, row_func - async def _lookup_area_address(self, wkt: WKTElement) -> Optional[SaRow]: + async def _lookup_area_address(self, wkt: str) -> Optional[SaRow]: """ Lookup large addressable areas for the given WKT point. """ log().comment('Reverse lookup by larger address area features') @@ -348,7 +342,7 @@ class ReverseGeocoder: inner = sa.select(t, sa.literal(0.0).label('distance'))\ .where(t.c.rank_search.between(5, self.max_rank))\ .where(t.c.rank_address.between(5, 25))\ - .where(t.c.geometry.ST_GeometryType().in_(('ST_Polygon', 'ST_MultiPolygon')))\ + .where(t.c.geometry.is_area())\ .where(t.c.geometry.intersects(wkt))\ .where(t.c.name != None)\ .where(t.c.indexed_status == 0)\ @@ -406,7 +400,7 @@ class ReverseGeocoder: return address_row - async def _lookup_area_others(self, wkt: WKTElement) -> Optional[SaRow]: + async def _lookup_area_others(self, wkt: str) -> Optional[SaRow]: t = self.conn.t.placex inner = sa.select(t, t.c.geometry.ST_Distance(wkt).label('distance'))\ @@ -424,8 +418,7 @@ class ReverseGeocoder: .subquery() sql = _select_from_placex(inner)\ - .where(sa.or_(inner.c.geometry.ST_GeometryType() - .not_in(('ST_Polygon', 'ST_MultiPolygon')), + .where(sa.or_(not inner.c.geometry.is_area(), inner.c.geometry.ST_Contains(wkt)))\ .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\ .limit(1) @@ -438,7 +431,7 @@ class ReverseGeocoder: return row - async def lookup_area(self, wkt: WKTElement) -> Optional[SaRow]: + async def lookup_area(self, wkt: str) -> Optional[SaRow]: """ Lookup large areas for the given WKT point. """ log().section('Reverse lookup by larger area features') @@ -456,7 +449,7 @@ class ReverseGeocoder: return _get_closest(address_row, other_row) - async def lookup_country(self, wkt: WKTElement) -> Optional[SaRow]: + async def lookup_country(self, wkt: str) -> Optional[SaRow]: """ Lookup the country for the given WKT point. """ log().section('Reverse lookup by country code') @@ -528,7 +521,7 @@ class ReverseGeocoder: log().function('reverse_lookup', coord=coord, params=self.params) - wkt = WKTElement(f'POINT({coord[0]} {coord[1]})', srid=4326) + wkt = f'POINT({coord[0]} {coord[1]})' row: Optional[SaRow] = None row_func: RowFunc = nres.create_from_placex_row diff --git a/nominatim/api/types.py b/nominatim/api/types.py index 87568a09..9d19b75d 100644 --- a/nominatim/api/types.py +++ b/nominatim/api/types.py @@ -15,8 +15,7 @@ import enum import math from struct import unpack -from geoalchemy2 import WKTElement -import geoalchemy2.functions +import sqlalchemy as sa from nominatim.errors import UsageError @@ -122,10 +121,10 @@ class Point(NamedTuple): return Point(x, y) - def sql_value(self) -> WKTElement: + def sql_value(self) -> str: """ Create an SQL expression for the point. """ - return WKTElement(f'POINT({self.x} {self.y})', srid=4326) + return f'POINT({self.x} {self.y})' @@ -182,7 +181,7 @@ class Bbox: def sql_value(self) -> Any: """ Create an SQL expression for the box. """ - return geoalchemy2.functions.ST_MakeEnvelope(*self.coords, 4326) + return sa.func.ST_MakeEnvelope(*self.coords, 4326) def contains(self, pt: Point) -> bool: diff --git a/nominatim/db/sqlalchemy_schema.py b/nominatim/db/sqlalchemy_schema.py index 550f1f12..7af3d44c 100644 --- a/nominatim/db/sqlalchemy_schema.py +++ b/nominatim/db/sqlalchemy_schema.py @@ -10,10 +10,11 @@ SQLAlchemy definitions for all tables used by the frontend. from typing import Any import sqlalchemy as sa -from geoalchemy2 import Geometry from sqlalchemy.dialects.postgresql import HSTORE, ARRAY, JSONB from sqlalchemy.dialects.sqlite import JSON as sqlite_json +from nominatim.db.sqlalchemy_types import Geometry + class PostgresTypes: """ Type definitions for complex types as used in Postgres variants. """ @@ -72,12 +73,12 @@ class SearchTables: sa.Column('name', self.types.Composite), sa.Column('address', self.types.Composite), sa.Column('extratags', self.types.Composite), - sa.Column('geometry', Geometry(srid=4326), nullable=False), + sa.Column('geometry', Geometry, nullable=False), sa.Column('wikipedia', sa.Text), sa.Column('country_code', sa.String(2)), sa.Column('housenumber', sa.Text), sa.Column('postcode', sa.Text), - sa.Column('centroid', Geometry(srid=4326, spatial_index=False))) + sa.Column('centroid', Geometry)) self.addressline = sa.Table('place_addressline', meta, sa.Column('place_id', sa.BigInteger, index=True), @@ -96,7 +97,7 @@ class SearchTables: sa.Column('indexed_date', sa.DateTime), sa.Column('country_code', sa.String(2)), sa.Column('postcode', sa.Text, index=True), - sa.Column('geometry', Geometry(srid=4326))) + sa.Column('geometry', Geometry)) self.osmline = sa.Table('location_property_osmline', meta, sa.Column('place_id', sa.BigInteger, nullable=False, unique=True), @@ -108,7 +109,7 @@ class SearchTables: sa.Column('step', sa.SmallInteger), sa.Column('partition', sa.SmallInteger), sa.Column('indexed_status', sa.SmallInteger), - sa.Column('linegeo', Geometry(srid=4326)), + sa.Column('linegeo', Geometry), sa.Column('address', self.types.Composite), sa.Column('postcode', sa.Text), sa.Column('country_code', sa.String(2))) @@ -123,7 +124,7 @@ class SearchTables: self.country_grid = sa.Table('country_osm_grid', meta, sa.Column('country_code', sa.String(2)), sa.Column('area', sa.Float), - sa.Column('geometry', Geometry(srid=4326))) + sa.Column('geometry', Geometry)) # The following tables are not necessarily present. self.search_name = sa.Table('search_name', meta, @@ -134,7 +135,7 @@ class SearchTables: sa.Column('name_vector', self.types.IntArray, index=True), sa.Column('nameaddress_vector', self.types.IntArray, index=True), sa.Column('country_code', sa.String(2)), - sa.Column('centroid', Geometry(srid=4326))) + sa.Column('centroid', Geometry)) self.tiger = sa.Table('location_property_tiger', meta, sa.Column('place_id', sa.BigInteger), @@ -143,5 +144,5 @@ class SearchTables: sa.Column('endnumber', sa.Integer), sa.Column('step', sa.SmallInteger), sa.Column('partition', sa.SmallInteger), - sa.Column('linegeo', Geometry(srid=4326, spatial_index=False)), + sa.Column('linegeo', Geometry), sa.Column('postcode', sa.Text)) diff --git a/nominatim/db/sqlalchemy_types.py b/nominatim/db/sqlalchemy_types.py new file mode 100644 index 00000000..88cae29f --- /dev/null +++ b/nominatim/db/sqlalchemy_types.py @@ -0,0 +1,93 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2023 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Custom types for SQLAlchemy. +""" +from typing import Callable, Any + +import sqlalchemy as sa +import sqlalchemy.types as types + +from nominatim.typing import SaColumn + +class Geometry(types.UserDefinedType[Any]): + """ Simplified type decorator for PostGIS geometry. This type + only supports geometries in 4326 projection. + """ + cache_ok = True + + def __init__(self, subtype: str = 'Geometry'): + self.subtype = subtype + + + def get_col_spec(self) -> str: + return f'GEOMETRY({self.subtype}, 4326)' + + + def bind_processor(self, dialect: sa.Dialect) -> Callable[[Any], str]: + def process(value: Any) -> str: + assert isinstance(value, str) + return value + return process + + + def result_processor(self, dialect: sa.Dialect, coltype: object) -> Callable[[Any], str]: + def process(value: Any) -> str: + assert isinstance(value, str) + return value + return process + + + def bind_expression(self, bindvalue: sa.BindParameter[Any]) -> SaColumn: + return sa.func.ST_GeomFromText(bindvalue, type_=self) + + + class comparator_factory(types.UserDefinedType.Comparator): + + def is_line_like(self) -> SaColumn: + return sa.func.ST_GeometryType(self, type_=sa.String).in_(('ST_LineString', + 'ST_MultiLineString')) + + def is_area(self) -> SaColumn: + return sa.func.ST_GeometryType(self, type_=sa.String).in_(('ST_Polygon', + 'ST_MultiPolygon')) + + + def ST_DWithin(self, other: SaColumn, distance: SaColumn) -> SaColumn: + return sa.func.ST_DWithin(self, other, distance, type_=sa.Float) + + + def ST_Distance(self, other: SaColumn) -> SaColumn: + return sa.func.ST_Distance(self, other, type_=sa.Float) + + + def ST_Contains(self, other: SaColumn) -> SaColumn: + return sa.func.ST_Contains(self, other, type_=sa.Float) + + + def ST_ClosestPoint(self, other: SaColumn) -> SaColumn: + return sa.func.ST_ClosestPoint(self, other, type_=Geometry) + + + def ST_Buffer(self, other: SaColumn) -> SaColumn: + return sa.func.ST_Buffer(self, other, type_=Geometry) + + + def ST_Expand(self, other: SaColumn) -> SaColumn: + return sa.func.ST_Expand(self, other, type_=Geometry) + + + def ST_Centroid(self) -> SaColumn: + return sa.func.ST_Centroid(self, type_=Geometry) + + + def ST_LineInterpolatePoint(self, other: SaColumn) -> SaColumn: + return sa.func.ST_LineInterpolatePoint(self, other, type_=Geometry) + + + def ST_LineLocatePoint(self, other: SaColumn) -> SaColumn: + return sa.func.ST_LineLocatePoint(self, other, type_=sa.Float)