2022-01-03 18:23:58 +03:00
|
|
|
# SPDX-License-Identifier: GPL-2.0-only
|
|
|
|
#
|
|
|
|
# This file is part of Nominatim. (https://nominatim.org)
|
|
|
|
#
|
|
|
|
# Copyright (C) 2022 by the Nominatim developer community.
|
|
|
|
# For a full list of authors see the git log.
|
2021-01-13 20:25:15 +03:00
|
|
|
"""
|
|
|
|
Helper functions for executing external programs.
|
|
|
|
"""
|
2022-07-03 18:38:11 +03:00
|
|
|
from typing import Any, Union, Optional, Mapping, IO
|
2022-07-03 15:48:15 +03:00
|
|
|
from pathlib import Path
|
2021-01-19 13:05:02 +03:00
|
|
|
import logging
|
2022-11-03 19:15:01 +03:00
|
|
|
import os
|
2021-01-13 20:25:15 +03:00
|
|
|
import subprocess
|
2021-01-27 00:45:24 +03:00
|
|
|
import urllib.request as urlrequest
|
2021-01-19 13:05:02 +03:00
|
|
|
from urllib.parse import urlencode
|
2021-01-13 20:25:15 +03:00
|
|
|
|
2022-11-27 00:00:43 +03:00
|
|
|
from nominatim.config import Configuration
|
2022-07-03 15:48:15 +03:00
|
|
|
from nominatim.typing import StrPath
|
2022-12-13 12:36:19 +03:00
|
|
|
from nominatim.version import NOMINATIM_VERSION
|
2021-04-16 15:20:09 +03:00
|
|
|
from nominatim.db.connection import get_pg_env
|
2021-01-27 00:45:24 +03:00
|
|
|
|
|
|
|
LOG = logging.getLogger()
|
|
|
|
|
2022-07-03 15:48:15 +03:00
|
|
|
def run_legacy_script(script: StrPath, *args: Union[int, str],
|
2022-11-27 00:00:43 +03:00
|
|
|
config: Configuration,
|
2022-07-03 15:48:15 +03:00
|
|
|
throw_on_fail: bool = False) -> int:
|
2021-01-14 12:19:21 +03:00
|
|
|
""" Run a Nominatim PHP script with the given arguments.
|
2021-01-14 11:37:47 +03:00
|
|
|
|
2021-01-14 12:19:21 +03:00
|
|
|
Returns the exit code of the script. If `throw_on_fail` is True
|
|
|
|
then throw a `CalledProcessError` on a non-zero exit.
|
|
|
|
"""
|
|
|
|
cmd = ['/usr/bin/env', 'php', '-Cq',
|
2022-11-27 00:00:43 +03:00
|
|
|
str(config.lib_dir.php / 'admin' / script)]
|
2021-01-14 12:19:21 +03:00
|
|
|
cmd.extend([str(a) for a in args])
|
2021-01-13 20:25:15 +03:00
|
|
|
|
2022-11-27 00:00:43 +03:00
|
|
|
env = config.get_os_env()
|
|
|
|
env['NOMINATIM_DATADIR'] = str(config.lib_dir.data)
|
|
|
|
env['NOMINATIM_SQLDIR'] = str(config.lib_dir.sql)
|
|
|
|
env['NOMINATIM_CONFIGDIR'] = str(config.config_dir)
|
|
|
|
env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str(config.lib_dir.module)
|
2021-01-14 12:19:21 +03:00
|
|
|
if not env['NOMINATIM_OSM2PGSQL_BINARY']:
|
2022-11-27 00:00:43 +03:00
|
|
|
env['NOMINATIM_OSM2PGSQL_BINARY'] = str(config.lib_dir.osm2pgsql)
|
2021-01-13 20:25:15 +03:00
|
|
|
|
2022-11-27 00:00:43 +03:00
|
|
|
proc = subprocess.run(cmd, cwd=str(config.project_dir), env=env,
|
2021-01-15 12:38:28 +03:00
|
|
|
check=throw_on_fail)
|
2021-01-13 20:25:15 +03:00
|
|
|
|
2021-01-14 12:19:21 +03:00
|
|
|
return proc.returncode
|
2021-01-19 13:05:02 +03:00
|
|
|
|
2022-07-03 15:48:15 +03:00
|
|
|
def run_api_script(endpoint: str, project_dir: Path,
|
|
|
|
extra_env: Optional[Mapping[str, str]] = None,
|
2022-07-17 19:31:51 +03:00
|
|
|
phpcgi_bin: Optional[Path] = None,
|
|
|
|
params: Optional[Mapping[str, Any]] = None) -> int:
|
2022-05-05 02:04:47 +03:00
|
|
|
""" Execute a Nominatim API function.
|
2021-01-19 13:05:02 +03:00
|
|
|
|
|
|
|
The function needs a project directory that contains the website
|
|
|
|
directory with the scripts to be executed. The scripts will be run
|
2021-01-20 23:09:41 +03:00
|
|
|
using php_cgi. Query parameters can be added as named arguments.
|
2021-01-19 13:05:02 +03:00
|
|
|
|
|
|
|
Returns the exit code of the script.
|
|
|
|
"""
|
|
|
|
log = logging.getLogger()
|
|
|
|
webdir = str(project_dir / 'website')
|
|
|
|
query_string = urlencode(params or {})
|
|
|
|
|
|
|
|
env = dict(QUERY_STRING=query_string,
|
2022-05-11 09:59:28 +03:00
|
|
|
SCRIPT_NAME=f'/{endpoint}.php',
|
|
|
|
REQUEST_URI=f'/{endpoint}.php?{query_string}',
|
2021-01-19 13:05:02 +03:00
|
|
|
CONTEXT_DOCUMENT_ROOT=webdir,
|
2022-05-11 09:59:28 +03:00
|
|
|
SCRIPT_FILENAME=f'{webdir}/{endpoint}.php',
|
2021-01-19 13:05:02 +03:00
|
|
|
HTTP_HOST='localhost',
|
|
|
|
HTTP_USER_AGENT='nominatim-tool',
|
|
|
|
REMOTE_ADDR='0.0.0.0',
|
|
|
|
DOCUMENT_ROOT=webdir,
|
|
|
|
REQUEST_METHOD='GET',
|
|
|
|
SERVER_PROTOCOL='HTTP/1.1',
|
|
|
|
GATEWAY_INTERFACE='CGI/1.1',
|
|
|
|
REDIRECT_STATUS='CGI')
|
|
|
|
|
|
|
|
if extra_env:
|
|
|
|
env.update(extra_env)
|
|
|
|
|
|
|
|
if phpcgi_bin is None:
|
|
|
|
cmd = ['/usr/bin/env', 'php-cgi']
|
|
|
|
else:
|
|
|
|
cmd = [str(phpcgi_bin)]
|
|
|
|
|
2021-07-06 17:10:18 +03:00
|
|
|
proc = subprocess.run(cmd, cwd=str(project_dir), env=env,
|
|
|
|
stdout=subprocess.PIPE,
|
|
|
|
stderr=subprocess.PIPE,
|
2021-01-19 13:05:02 +03:00
|
|
|
check=False)
|
|
|
|
|
|
|
|
if proc.returncode != 0 or proc.stderr:
|
2021-01-20 11:02:45 +03:00
|
|
|
if proc.stderr:
|
|
|
|
log.error(proc.stderr.decode('utf-8').replace('\\n', '\n'))
|
|
|
|
else:
|
|
|
|
log.error(proc.stdout.decode('utf-8').replace('\\n', '\n'))
|
2021-01-19 13:05:02 +03:00
|
|
|
return proc.returncode or 1
|
|
|
|
|
|
|
|
result = proc.stdout.decode('utf-8')
|
|
|
|
content_start = result.find('\r\n\r\n')
|
|
|
|
|
|
|
|
print(result[content_start + 4:].replace('\\n', '\n'))
|
|
|
|
|
|
|
|
return 0
|
2021-01-27 00:45:24 +03:00
|
|
|
|
|
|
|
|
2022-07-03 15:48:15 +03:00
|
|
|
def run_php_server(server_address: str, base_dir: StrPath) -> None:
|
2021-02-03 18:17:46 +03:00
|
|
|
""" Run the built-in server from the given directory.
|
|
|
|
"""
|
|
|
|
subprocess.run(['/usr/bin/env', 'php', '-S', server_address],
|
|
|
|
cwd=str(base_dir), check=True)
|
|
|
|
|
|
|
|
|
2022-07-03 15:48:15 +03:00
|
|
|
def run_osm2pgsql(options: Mapping[str, Any]) -> None:
|
2021-01-30 17:50:34 +03:00
|
|
|
""" Run osm2pgsql with the given options.
|
|
|
|
"""
|
2021-02-23 16:11:11 +03:00
|
|
|
env = get_pg_env(options['dsn'])
|
2021-04-23 22:49:41 +03:00
|
|
|
cmd = [str(options['osm2pgsql']),
|
2021-01-30 17:50:34 +03:00
|
|
|
'--hstore', '--latlon', '--slim',
|
|
|
|
'--log-progress', 'true',
|
2023-01-05 13:34:56 +03:00
|
|
|
'--number-processes', '1' if options['append'] else str(options['threads']),
|
2021-01-30 17:50:34 +03:00
|
|
|
'--cache', str(options['osm2pgsql_cache']),
|
|
|
|
'--style', str(options['osm2pgsql_style'])
|
|
|
|
]
|
2022-11-03 19:15:01 +03:00
|
|
|
|
|
|
|
if str(options['osm2pgsql_style']).endswith('.lua'):
|
2022-12-23 19:35:28 +03:00
|
|
|
env['LUA_PATH'] = ';'.join((str(options['osm2pgsql_style_path'] / '?.lua'),
|
2022-11-03 19:15:01 +03:00
|
|
|
os.environ.get('LUAPATH', ';')))
|
|
|
|
cmd.extend(('--output', 'flex'))
|
|
|
|
else:
|
|
|
|
cmd.extend(('--output', 'gazetteer'))
|
|
|
|
|
2022-11-04 19:48:05 +03:00
|
|
|
cmd.append('--append' if options['append'] else '--create')
|
2021-01-30 17:50:34 +03:00
|
|
|
|
|
|
|
if options['flatnode_file']:
|
|
|
|
cmd.extend(('--flat-nodes', options['flatnode_file']))
|
|
|
|
|
2021-02-24 19:21:45 +03:00
|
|
|
for key, param in (('slim_data', '--tablespace-slim-data'),
|
|
|
|
('slim_index', '--tablespace-slim-index'),
|
|
|
|
('main_data', '--tablespace-main-data'),
|
|
|
|
('main_index', '--tablespace-main-index')):
|
|
|
|
if options['tablespaces'][key]:
|
|
|
|
cmd.extend((param, options['tablespaces'][key]))
|
|
|
|
|
2021-02-16 17:05:14 +03:00
|
|
|
if options.get('disable_jit', False):
|
|
|
|
env['PGOPTIONS'] = '-c jit=off -c max_parallel_workers_per_gather=0'
|
|
|
|
|
2021-07-26 00:29:15 +03:00
|
|
|
if 'import_data' in options:
|
|
|
|
cmd.extend(('-r', 'xml', '-'))
|
2021-08-14 22:42:21 +03:00
|
|
|
elif isinstance(options['import_file'], list):
|
|
|
|
for fname in options['import_file']:
|
|
|
|
cmd.append(str(fname))
|
2021-07-26 00:29:15 +03:00
|
|
|
else:
|
|
|
|
cmd.append(str(options['import_file']))
|
2021-01-30 17:50:34 +03:00
|
|
|
|
2021-07-26 00:29:15 +03:00
|
|
|
subprocess.run(cmd, cwd=options.get('cwd', '.'),
|
|
|
|
input=options.get('import_data'),
|
|
|
|
env=env, check=True)
|
2021-01-30 17:50:34 +03:00
|
|
|
|
|
|
|
|
2022-07-03 15:48:15 +03:00
|
|
|
def get_url(url: str) -> str:
|
2021-01-27 00:45:24 +03:00
|
|
|
""" Get the contents from the given URL and return it as a UTF-8 string.
|
|
|
|
"""
|
2022-12-13 12:36:19 +03:00
|
|
|
headers = {"User-Agent": f"Nominatim/{NOMINATIM_VERSION!s}"}
|
2021-01-27 00:45:24 +03:00
|
|
|
|
|
|
|
try:
|
2022-07-03 18:38:11 +03:00
|
|
|
request = urlrequest.Request(url, headers=headers)
|
|
|
|
with urlrequest.urlopen(request) as response: # type: IO[bytes]
|
2021-01-27 00:45:24 +03:00
|
|
|
return response.read().decode('utf-8')
|
2021-07-12 21:32:46 +03:00
|
|
|
except Exception:
|
2021-01-27 00:45:24 +03:00
|
|
|
LOG.fatal('Failed to load URL: %s', url)
|
|
|
|
raise
|