diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index 3fe0b9d8..7dfb3f1d 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -74,6 +74,8 @@ jobs: php-version: ${{ matrix.php }} tools: phpunit:9, phpcs, composer ini-values: opcache.jit=disable + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - uses: actions/setup-python@v4 with: @@ -138,7 +140,7 @@ jobs: working-directory: Nominatim/test/bdd - name: Install mypy and typechecking info - run: pip3 install -U mypy osmium uvicorn types-PyYAML types-jinja2 types-psycopg2 types-psutil types-requests types-ujson typing-extensions + run: pip3 install -U mypy osmium uvicorn types-PyYAML types-jinja2 types-psycopg2 types-psutil types-requests types-ujson types-Pygments typing-extensions if: matrix.flavour != 'oldstuff' - name: Python static typechecking diff --git a/docs/develop/Development-Environment.md b/docs/develop/Development-Environment.md index 7284e99e..3234b8cb 100644 --- a/docs/develop/Development-Environment.md +++ b/docs/develop/Development-Environment.md @@ -65,7 +65,7 @@ sudo apt install php-cgi phpunit php-codesniffer \ pip3 install --user behave mkdocs mkdocstrings pytest pytest-asyncio pylint \ mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil \ - types-ujson types-requests typing-extensions\ + types-ujson types-requests types-Pygments typing-extensions\ sanic-testing httpx asgi-lifespan ``` diff --git a/nominatim/api/connection.py b/nominatim/api/connection.py index 79a5e347..efa4490e 100644 --- a/nominatim/api/connection.py +++ b/nominatim/api/connection.py @@ -13,6 +13,7 @@ import sqlalchemy as sa from sqlalchemy.ext.asyncio import AsyncConnection from nominatim.db.sqlalchemy_schema import SearchTables +from nominatim.api.logging import log class SearchConnection: """ An extended SQLAlchemy connection class, that also contains @@ -34,14 +35,16 @@ class SearchConnection: ) -> Any: """ Execute a 'scalar()' query on the connection. """ + log().sql(self.connection, sql) return await self.connection.scalar(sql, params) - async def execute(self, sql: sa.sql.base.Executable, + async def execute(self, sql: 'sa.Executable', params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None] = None - ) -> 'sa.engine.Result[Any]': + ) -> 'sa.Result[Any]': """ Execute a 'execute()' query on the connection. """ + log().sql(self.connection, sql) return await self.connection.execute(sql, params) diff --git a/nominatim/api/logging.py b/nominatim/api/logging.py new file mode 100644 index 00000000..e9c88470 --- /dev/null +++ b/nominatim/api/logging.py @@ -0,0 +1,240 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2023 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Functions for specialised logging with HTML output. +""" +from typing import Any, cast +from contextvars import ContextVar +import textwrap +import io + +import sqlalchemy as sa +from sqlalchemy.ext.asyncio import AsyncConnection + +try: + from pygments import highlight + from pygments.lexers import PythonLexer, PostgresLexer + from pygments.formatters import HtmlFormatter + CODE_HIGHLIGHT = True +except ModuleNotFoundError: + CODE_HIGHLIGHT = False + + +class BaseLogger: + """ Interface for logging function. + + The base implementation does nothing. Overwrite the functions + in derived classes which implement logging functionality. + """ + def get_buffer(self) -> str: + """ Return the current content of the log buffer. + """ + return '' + + def function(self, func: str, **kwargs: Any) -> None: + """ Start a new debug chapter for the given function and its parameters. + """ + + + def section(self, heading: str) -> None: + """ Start a new section with the given title. + """ + + + def comment(self, text: str) -> None: + """ Add a simple comment to the debug output. + """ + + + def var_dump(self, heading: str, var: Any) -> None: + """ Print the content of the variable to the debug output prefixed by + the given heading. + """ + + + def sql(self, conn: AsyncConnection, statement: 'sa.Executable') -> None: + """ Print the SQL for the given statement. + """ + + +class HTMLLogger(BaseLogger): + """ Logger that formats messages in HTML. + """ + def __init__(self) -> None: + self.buffer = io.StringIO() + + + def get_buffer(self) -> str: + return HTML_HEADER + self.buffer.getvalue() + HTML_FOOTER + + + def function(self, func: str, **kwargs: Any) -> None: + self._write(f"

Debug output for {func}()

\n

Parameters:

") + for name, value in kwargs.items(): + self._write(f'
{name}
{self._python_var(value)}
') + self._write('

') + + + def section(self, heading: str) -> None: + self._write(f"

{heading}

") + + + def comment(self, text: str) -> None: + self._write(f"

{text}

") + + + def var_dump(self, heading: str, var: Any) -> None: + self._write(f'
{heading}
{self._python_var(var)}') + + + def sql(self, conn: AsyncConnection, statement: 'sa.Executable') -> None: + sqlstr = str(cast('sa.ClauseElement', statement) + .compile(conn.sync_engine, compile_kwargs={"literal_binds": True})) + if CODE_HIGHLIGHT: + sqlstr = highlight(sqlstr, PostgresLexer(), + HtmlFormatter(nowrap=True, lineseparator='
')) + self._write(f'
{sqlstr}
') + else: + self._write(f'{sqlstr}') + + + def _python_var(self, var: Any) -> str: + if CODE_HIGHLIGHT: + fmt = highlight(repr(var), PythonLexer(), HtmlFormatter(nowrap=True)) + return f'
{fmt}
' + + return f'{str(var)}' + + + def _write(self, text: str) -> None: + """ Add the raw text to the debug output. + """ + self.buffer.write(text) + + +class TextLogger(BaseLogger): + """ Logger creating output suitable for the console. + """ + def __init__(self) -> None: + self.buffer = io.StringIO() + + + def get_buffer(self) -> str: + return self.buffer.getvalue() + + + def function(self, func: str, **kwargs: Any) -> None: + self._write(f"#### Debug output for {func}()\n\nParameters:\n") + for name, value in kwargs.items(): + self._write(f' {name}: {self._python_var(value)}\n') + self._write('\n') + + + def section(self, heading: str) -> None: + self._write(f"\n# {heading}\n\n") + + + def comment(self, text: str) -> None: + self._write(f"{text}\n") + + + def var_dump(self, heading: str, var: Any) -> None: + self._write(f'{heading}:\n {self._python_var(var)}\n\n') + + + def sql(self, conn: AsyncConnection, statement: 'sa.Executable') -> None: + sqlstr = str(cast('sa.ClauseElement', statement) + .compile(conn.sync_engine, compile_kwargs={"literal_binds": True})) + sqlstr = '\n| '.join(textwrap.wrap(sqlstr, width=78)) + self._write(f"| {sqlstr}\n\n") + + + def _python_var(self, var: Any) -> str: + return str(var) + + + def _write(self, text: str) -> None: + self.buffer.write(text) + + +logger: ContextVar[BaseLogger] = ContextVar('logger', default=BaseLogger()) + + +def set_log_output(fmt: str) -> None: + """ Enable collecting debug information. + """ + if fmt == 'html': + logger.set(HTMLLogger()) + elif fmt == 'text': + logger.set(TextLogger()) + else: + logger.set(BaseLogger()) + + +def log() -> BaseLogger: + """ Return the logger for the current context. + """ + return logger.get() + + +def get_and_disable() -> str: + """ Return the current content of the debug buffer and disable logging. + """ + buf = logger.get().get_buffer() + logger.set(BaseLogger()) + return buf + + +HTML_HEADER: str = """ + + + Nominatim - Debug + + + +""" + +HTML_FOOTER: str = "" diff --git a/nominatim/api/lookup.py b/nominatim/api/lookup.py index 56a41310..27706ff3 100644 --- a/nominatim/api/lookup.py +++ b/nominatim/api/lookup.py @@ -15,6 +15,7 @@ from nominatim.typing import SaColumn, SaLabel, SaRow from nominatim.api.connection import SearchConnection import nominatim.api.types as ntyp import nominatim.api.results as nres +from nominatim.api.logging import log def _select_column_geometry(column: SaColumn, geometry_output: ntyp.GeometryFormat) -> SaLabel: @@ -36,6 +37,7 @@ async def find_in_placex(conn: SearchConnection, place: ntyp.PlaceRef, """ Search for the given place in the placex table and return the base information. """ + log().section("Find in placex table") t = conn.t.placex sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name, t.c.class_, t.c.type, t.c.admin_level, @@ -69,6 +71,7 @@ async def find_in_osmline(conn: SearchConnection, place: ntyp.PlaceRef, """ Search for the given place in the osmline table and return the base information. """ + log().section("Find in interpolation table") t = conn.t.osmline sql = sa.select(t.c.place_id, t.c.osm_id, t.c.parent_place_id, t.c.indexed_date, t.c.startnumber, t.c.endnumber, @@ -98,6 +101,7 @@ async def find_in_tiger(conn: SearchConnection, place: ntyp.PlaceRef, """ Search for the given place in the table of Tiger addresses and return the base information. Only lookup by place ID is supported. """ + log().section("Find in TIGER table") t = conn.t.tiger sql = sa.select(t.c.place_id, t.c.parent_place_id, t.c.startnumber, t.c.endnumber, t.c.step, @@ -119,6 +123,7 @@ async def find_in_postcode(conn: SearchConnection, place: ntyp.PlaceRef, """ Search for the given place in the postcode table and return the base information. Only lookup by place ID is supported. """ + log().section("Find in postcode table") t = conn.t.postcode sql = sa.select(t.c.place_id, t.c.parent_place_id, t.c.rank_search, t.c.rank_address, @@ -139,30 +144,36 @@ async def get_place_by_id(conn: SearchConnection, place: ntyp.PlaceRef, details: ntyp.LookupDetails) -> Optional[nres.SearchResult]: """ Retrieve a place with additional details from the database. """ + log().function('get_place_by_id', place=place, details=details) + if details.geometry_output and details.geometry_output != ntyp.GeometryFormat.GEOJSON: raise ValueError("lookup only supports geojosn polygon output.") row = await find_in_placex(conn, place, details) if row is not None: result = nres.create_from_placex_row(row) + log().var_dump('Result', result) await nres.add_result_details(conn, result, details) return result row = await find_in_osmline(conn, place, details) if row is not None: result = nres.create_from_osmline_row(row) + log().var_dump('Result', result) await nres.add_result_details(conn, result, details) return result row = await find_in_postcode(conn, place, details) if row is not None: result = nres.create_from_postcode_row(row) + log().var_dump('Result', result) await nres.add_result_details(conn, result, details) return result row = await find_in_tiger(conn, place, details) if row is not None: result = nres.create_from_tiger_row(row) + log().var_dump('Result', result) await nres.add_result_details(conn, result, details) return result diff --git a/nominatim/api/results.py b/nominatim/api/results.py index 63c9cf12..23cb47f4 100644 --- a/nominatim/api/results.py +++ b/nominatim/api/results.py @@ -21,6 +21,7 @@ import sqlalchemy as sa from nominatim.typing import SaSelect, SaRow from nominatim.api.types import Point, LookupDetails from nominatim.api.connection import SearchConnection +from nominatim.api.logging import log # This file defines complex result data classes. # pylint: disable=too-many-instance-attributes @@ -228,13 +229,18 @@ async def add_result_details(conn: SearchConnection, result: SearchResult, """ Retrieve more details from the database according to the parameters specified in 'details'. """ + log().section('Query details for result') if details.address_details: + log().comment('Query address details') await complete_address_details(conn, result) if details.linked_places: + log().comment('Query linked places') await complete_linked_places(conn, result) if details.parented_places: + log().comment('Query parent places') await complete_parented_places(conn, result) if details.keywords: + log().comment('Query keywords') await complete_keywords(conn, result) diff --git a/nominatim/api/v1/server_glue.py b/nominatim/api/v1/server_glue.py index 8aa28cfe..35028526 100644 --- a/nominatim/api/v1/server_glue.py +++ b/nominatim/api/v1/server_glue.py @@ -13,12 +13,14 @@ import abc from nominatim.config import Configuration import nominatim.api as napi +import nominatim.api.logging as loglib from nominatim.api.v1.format import dispatch as formatting CONTENT_TYPE = { 'text': 'text/plain; charset=utf-8', 'xml': 'text/xml; charset=utf-8', - 'jsonp': 'application/javascript' + 'jsonp': 'application/javascript', + 'debug': 'text/html; charset=utf-8' } @@ -131,6 +133,18 @@ class ASGIAdaptor(abc.ABC): or self.config().DEFAULT_LANGUAGE + def setup_debugging(self) -> bool: + """ Set up collection of debug information if requested. + + Return True when debugging was requested. + """ + if self.get_bool('debug', False): + loglib.set_log_output('html') + return True + + return False + + def parse_format(params: ASGIAdaptor, result_type: Type[Any], default: str) -> str: """ Get and check the 'format' parameter and prepare the formatter. `fmtter` is a formatter and `default` the @@ -175,6 +189,8 @@ async def details_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> raise params.error("Missing ID parameter 'place_id' or 'osmtype'.") place = napi.OsmID(osmtype, params.get_int('osmid'), params.get('class')) + debug = params.setup_debugging() + details = napi.LookupDetails(address_details=params.get_bool('addressdetails', False), linked_places=params.get_bool('linkedplaces', False), parented_places=params.get_bool('hierarchy', False), @@ -184,10 +200,12 @@ async def details_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> details.geometry_output = napi.GeometryFormat.GEOJSON locales = napi.Locales.from_accept_languages(params.get_accepted_languages()) - print(locales.languages) result = await api.lookup(place, details) + if debug: + return params.build_response(loglib.get_and_disable(), 'debug') + if result is None: raise params.error('No place with that OSM ID found.', status=404) diff --git a/test/python/api/conftest.py b/test/python/api/conftest.py index 29424016..0275e275 100644 --- a/test/python/api/conftest.py +++ b/test/python/api/conftest.py @@ -14,6 +14,7 @@ import datetime as dt import nominatim.api as napi from nominatim.db.sql_preprocessor import SQLPreprocessor +import nominatim.api.logging as loglib class APITester: @@ -138,6 +139,8 @@ def apiobj(temp_db_with_extensions, temp_db_conn, monkeypatch): SQLPreprocessor(temp_db_conn, testapi.api.config)\ .run_sql_file(temp_db_conn, 'functions/address_lookup.sql') + loglib.set_log_output('text') yield testapi + print(loglib.get_and_disable()) testapi.api.close()