make SQL statements in reverse lambda functions

Further improves internal caching of statements by SQLAlchemy.
This commit is contained in:
Sarah Hoffmann 2023-07-12 11:46:07 +02:00
parent 35fd74af6d
commit f264eaeda2
2 changed files with 115 additions and 90 deletions

View File

@ -11,7 +11,8 @@ from typing import Optional, List, Callable, Type, Tuple, Dict, Any
import sqlalchemy as sa
from nominatim.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow, SaBind
from nominatim.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow,\
SaBind, SaLambdaSelect
from nominatim.api.connection import SearchConnection
import nominatim.api.results as nres
from nominatim.api.logging import log
@ -135,10 +136,7 @@ class ReverseGeocoder:
return self.layer_enabled(DataLayer.RAILWAY, DataLayer.MANMADE, DataLayer.NATURAL)
def _add_geometry_columns(self, sql: SaSelect, col: SaColumn) -> SaSelect:
if not self.has_geometries():
return sql
def _add_geometry_columns(self, sql: SaLambdaSelect, col: SaColumn) -> SaSelect:
out = []
if self.params.geometry_simplification > 0.0:
@ -182,18 +180,19 @@ class ReverseGeocoder:
# PostgreSQL must not get the distance as a parameter because
# there is a danger it won't be able to proberly estimate index use
# when used with prepared statements
dist_param = sa.text(f"{distance}")
diststr = sa.text(f"{distance}")
sql = _select_from_placex(t)\
.where(t.c.geometry.ST_DWithin(WKT_PARAM, dist_param))\
.where(t.c.indexed_status == 0)\
.where(t.c.linked_place_id == None)\
sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
.where(t.c.geometry.ST_DWithin(WKT_PARAM, diststr))
.where(t.c.indexed_status == 0)
.where(t.c.linked_place_id == None)
.where(sa.or_(sa.not_(t.c.geometry.is_area()),
t.c.centroid.ST_Distance(WKT_PARAM) < dist_param))\
.order_by('distance')\
.limit(1)
t.c.centroid.ST_Distance(WKT_PARAM) < diststr))
.order_by('distance')
.limit(1))
sql = self._add_geometry_columns(sql, t.c.geometry)
if self.has_geometries():
sql = self._add_geometry_columns(sql, t.c.geometry)
restrict: List[SaColumn] = []
@ -221,16 +220,17 @@ class ReverseGeocoder:
async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]:
t = self.conn.t.placex
sql = _select_from_placex(t)\
.where(t.c.geometry.ST_DWithin(WKT_PARAM, 0.001))\
.where(t.c.parent_place_id == parent_place_id)\
.where(_is_address_point(t))\
.where(t.c.indexed_status == 0)\
.where(t.c.linked_place_id == None)\
.order_by('distance')\
.limit(1)
sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
.where(t.c.geometry.ST_DWithin(WKT_PARAM, 0.001))
.where(t.c.parent_place_id == parent_place_id)
.where(_is_address_point(t))
.where(t.c.indexed_status == 0)
.where(t.c.linked_place_id == None)
.order_by('distance')
.limit(1))
sql = self._add_geometry_columns(sql, t.c.geometry)
if self.has_geometries():
sql = self._add_geometry_columns(sql, t.c.geometry)
return (await self.conn.execute(sql, self.bind_params)).one_or_none()
@ -239,25 +239,29 @@ class ReverseGeocoder:
distance: float) -> Optional[SaRow]:
t = self.conn.t.osmline
sql = sa.select(t,
t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
_locate_interpolation(t))\
.where(t.c.linegeo.ST_DWithin(WKT_PARAM, distance))\
.where(t.c.startnumber != None)\
.order_by('distance')\
.limit(1)
sql: Any = sa.lambda_stmt(lambda:
sa.select(t,
t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
_locate_interpolation(t))
.where(t.c.linegeo.ST_DWithin(WKT_PARAM, distance))
.where(t.c.startnumber != None)
.order_by('distance')
.limit(1))
if parent_place_id is not None:
sql = sql.where(t.c.parent_place_id == parent_place_id)
sql += lambda s: s.where(t.c.parent_place_id == parent_place_id)
inner = sql.subquery('ipol')
def _wrap_query(base_sql: SaLambdaSelect) -> SaSelect:
inner = base_sql.subquery('ipol')
sql = sa.select(inner.c.place_id, inner.c.osm_id,
inner.c.parent_place_id, inner.c.address,
_interpolated_housenumber(inner),
_interpolated_position(inner),
inner.c.postcode, inner.c.country_code,
inner.c.distance)
return sa.select(inner.c.place_id, inner.c.osm_id,
inner.c.parent_place_id, inner.c.address,
_interpolated_housenumber(inner),
_interpolated_position(inner),
inner.c.postcode, inner.c.country_code,
inner.c.distance)
sql += _wrap_query
if self.has_geometries():
sub = sql.subquery('geom')
@ -271,23 +275,26 @@ class ReverseGeocoder:
parent_id: int) -> Optional[SaRow]:
t = self.conn.t.tiger
inner = sa.select(t,
t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
_locate_interpolation(t))\
.where(t.c.linegeo.ST_DWithin(WKT_PARAM, 0.001))\
.where(t.c.parent_place_id == parent_place_id)\
.order_by('distance')\
.limit(1)\
.subquery('tiger')
def _base_query() -> SaSelect:
inner = sa.select(t,
t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
_locate_interpolation(t))\
.where(t.c.linegeo.ST_DWithin(WKT_PARAM, 0.001))\
.where(t.c.parent_place_id == parent_place_id)\
.order_by('distance')\
.limit(1)\
.subquery('tiger')
sql = sa.select(inner.c.place_id,
inner.c.parent_place_id,
sa.literal(parent_type).label('osm_type'),
sa.literal(parent_id).label('osm_id'),
_interpolated_housenumber(inner),
_interpolated_position(inner),
inner.c.postcode,
inner.c.distance)
return sa.select(inner.c.place_id,
inner.c.parent_place_id,
sa.sql.expression.label('osm_type', parent_type),
sa.sql.expression.label('osm_id', parent_id),
_interpolated_housenumber(inner),
_interpolated_position(inner),
inner.c.postcode,
inner.c.distance)
sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
if self.has_geometries():
sub = sql.subquery('geom')
@ -356,31 +363,40 @@ class ReverseGeocoder:
log().comment('Reverse lookup by larger address area features')
t = self.conn.t.placex
# The inner SQL brings results in the right order, so that
# later only a minimum of results needs to be checked with ST_Contains.
inner = sa.select(t, sa.literal(0.0).label('distance'))\
.where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
.where(t.c.geometry.intersects(WKT_PARAM))\
.where(snfn.select_index_placex_geometry_reverse_lookuppolygon('placex'))\
.order_by(sa.desc(t.c.rank_search))\
.limit(50)\
.subquery('area')
def _base_query() -> SaSelect:
# The inner SQL brings results in the right order, so that
# later only a minimum of results needs to be checked with ST_Contains.
inner = sa.select(t, sa.literal(0.0).label('distance'))\
.where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
.where(t.c.geometry.intersects(WKT_PARAM))\
.where(snfn.select_index_placex_geometry_reverse_lookuppolygon('placex'))\
.order_by(sa.desc(t.c.rank_search))\
.limit(50)\
.subquery('area')
sql = _select_from_placex(inner, False)\
.where(inner.c.geometry.ST_Contains(WKT_PARAM))\
.order_by(sa.desc(inner.c.rank_search))\
.limit(1)
return _select_from_placex(inner, False)\
.where(inner.c.geometry.ST_Contains(WKT_PARAM))\
.order_by(sa.desc(inner.c.rank_search))\
.limit(1)
sql = self._add_geometry_columns(sql, inner.c.geometry)
sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
if self.has_geometries():
sql = self._add_geometry_columns(sql, sa.literal_column('area.geometry'))
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
log().var_dump('Result (area)', address_row)
if address_row is not None and address_row.rank_search < self.max_rank:
log().comment('Search for better matching place nodes inside the area')
inner = sa.select(t,
address_rank = address_row.rank_search
address_id = address_row.place_id
def _place_inside_area_query() -> SaSelect:
inner = \
sa.select(t,
t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
.where(t.c.rank_search > address_row.rank_search)\
.where(t.c.rank_search > address_rank)\
.where(t.c.rank_search <= MAX_RANK_PARAM)\
.where(t.c.indexed_status == 0)\
.where(snfn.select_index_placex_geometry_reverse_lookupplacenode('placex'))\
@ -391,15 +407,17 @@ class ReverseGeocoder:
.limit(50)\
.subquery('places')
touter = self.conn.t.placex.alias('outer')
sql = _select_from_placex(inner, False)\
.join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\
.where(touter.c.place_id == address_row.place_id)\
.where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
.limit(1)
touter = t.alias('outer')
return _select_from_placex(inner, False)\
.join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\
.where(touter.c.place_id == address_id)\
.where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
.limit(1)
sql = self._add_geometry_columns(sql, inner.c.geometry)
sql = sa.lambda_stmt(_place_inside_area_query)
if self.has_geometries():
sql = self._add_geometry_columns(sql, sa.literal_column('places.geometry'))
place_address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
log().var_dump('Result (place node)', place_address_row)
@ -433,7 +451,8 @@ class ReverseGeocoder:
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
.limit(1)
sql = self._add_geometry_columns(sql, inner.c.geometry)
if self.has_geometries():
sql = self._add_geometry_columns(sql, inner.c.geometry)
row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
log().var_dump('Result (non-address feature)', row)
@ -464,7 +483,7 @@ class ReverseGeocoder:
"""
log().section('Reverse lookup by country code')
t = self.conn.t.country_grid
sql = sa.select(t.c.country_code).distinct()\
sql: SaLambdaSelect = sa.select(t.c.country_code).distinct()\
.where(t.c.geometry.ST_Contains(WKT_PARAM))
ccodes = tuple((r[0] for r in await self.conn.execute(sql, self.bind_params)))
@ -477,7 +496,9 @@ class ReverseGeocoder:
if self.max_rank > 4:
log().comment('Search for place nodes in country')
inner = sa.select(t,
def _base_query() -> SaSelect:
inner = \
sa.select(t,
t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
.where(t.c.rank_search > 4)\
.where(t.c.rank_search <= MAX_RANK_PARAM)\
@ -489,14 +510,16 @@ class ReverseGeocoder:
.intersects(WKT_PARAM))\
.order_by(sa.desc(t.c.rank_search))\
.limit(50)\
.subquery()
.subquery('area')
sql = _select_from_placex(inner, False)\
.where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
.limit(1)
return _select_from_placex(inner, False)\
.where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
.limit(1)
sql = self._add_geometry_columns(sql, inner.c.geometry)
sql = sa.lambda_stmt(_base_query)
if self.has_geometries():
sql = self._add_geometry_columns(sql, sa.literal_column('area.geometry'))
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
log().var_dump('Result (addressable place node)', address_row)
@ -505,15 +528,16 @@ class ReverseGeocoder:
if address_row is None:
# Still nothing, then return a country with the appropriate country code.
sql = _select_from_placex(t)\
sql = sa.lambda_stmt(lambda: _select_from_placex(t)\
.where(t.c.country_code.in_(ccodes))\
.where(t.c.rank_address == 4)\
.where(t.c.rank_search == 4)\
.where(t.c.linked_place_id == None)\
.order_by('distance')\
.limit(1)
.limit(1))
sql = self._add_geometry_columns(sql, t.c.geometry)
if self.has_geometries():
sql = self._add_geometry_columns(sql, t.c.geometry)
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()

View File

@ -62,6 +62,7 @@ if TYPE_CHECKING:
else:
TypeAlias = str
SaLambdaSelect: TypeAlias = 'Union[sa.Select[Any], sa.StatementLambdaElement]'
SaSelect: TypeAlias = 'sa.Select[Any]'
SaScalarSelect: TypeAlias = 'sa.ScalarSelect[Any]'
SaRow: TypeAlias = 'sa.Row[Any]'