2022-01-03 18:23:58 +03:00
|
|
|
# SPDX-License-Identifier: GPL-2.0-only
|
|
|
|
#
|
|
|
|
# This file is part of Nominatim. (https://nominatim.org)
|
|
|
|
#
|
|
|
|
# Copyright (C) 2022 by the Nominatim developer community.
|
|
|
|
# For a full list of authors see the git log.
|
2021-01-13 20:25:15 +03:00
|
|
|
"""
|
|
|
|
Nominatim configuration accessor.
|
|
|
|
"""
|
2022-07-02 12:59:19 +03:00
|
|
|
from typing import Dict, Any, List, Mapping, Optional
|
2022-07-25 16:17:20 +03:00
|
|
|
import importlib.util
|
2021-01-30 17:50:34 +03:00
|
|
|
import logging
|
2021-01-13 20:25:15 +03:00
|
|
|
import os
|
2022-07-25 16:17:20 +03:00
|
|
|
import sys
|
2021-01-30 17:50:34 +03:00
|
|
|
from pathlib import Path
|
2021-10-22 15:41:14 +03:00
|
|
|
import json
|
2021-09-03 19:16:12 +03:00
|
|
|
import yaml
|
2021-01-13 20:25:15 +03:00
|
|
|
|
|
|
|
from dotenv import dotenv_values
|
2022-11-18 18:11:31 +03:00
|
|
|
from psycopg2.extensions import parse_dsn
|
2021-01-13 20:25:15 +03:00
|
|
|
|
2022-07-02 12:59:19 +03:00
|
|
|
from nominatim.typing import StrPath
|
2021-04-16 15:20:09 +03:00
|
|
|
from nominatim.errors import UsageError
|
2022-11-27 00:00:43 +03:00
|
|
|
import nominatim.paths
|
2021-01-30 18:20:10 +03:00
|
|
|
|
2021-01-30 17:50:34 +03:00
|
|
|
LOG = logging.getLogger()
|
2022-06-30 11:48:04 +03:00
|
|
|
CONFIG_CACHE : Dict[str, Any] = {}
|
2021-10-04 12:56:54 +03:00
|
|
|
|
2022-06-30 16:43:18 +03:00
|
|
|
def flatten_config_list(content: Any, section: str = '') -> List[Any]:
|
2021-10-04 12:56:54 +03:00
|
|
|
""" Flatten YAML configuration lists that contain include sections
|
|
|
|
which are lists themselves.
|
|
|
|
"""
|
|
|
|
if not content:
|
|
|
|
return []
|
|
|
|
|
|
|
|
if not isinstance(content, list):
|
|
|
|
raise UsageError(f"List expected in section '{section}'.")
|
|
|
|
|
|
|
|
output = []
|
|
|
|
for ele in content:
|
|
|
|
if isinstance(ele, list):
|
|
|
|
output.extend(flatten_config_list(ele, section))
|
|
|
|
else:
|
|
|
|
output.append(ele)
|
|
|
|
|
|
|
|
return output
|
|
|
|
|
|
|
|
|
2021-01-13 20:25:15 +03:00
|
|
|
class Configuration:
|
2023-08-22 23:16:34 +03:00
|
|
|
""" This class wraps access to the configuration settings
|
|
|
|
for the Nominatim instance in use.
|
2021-01-13 20:25:15 +03:00
|
|
|
|
|
|
|
All Nominatim configuration options are prefixed with 'NOMINATIM_' to
|
2023-08-20 11:42:33 +03:00
|
|
|
avoid conflicts with other environment variables. All settings can
|
|
|
|
be accessed as properties of the class under the same name as the
|
|
|
|
setting but with the `NOMINATIM_` prefix removed. In addition, there
|
|
|
|
are accessor functions that convert the setting values to types
|
|
|
|
other than string.
|
2021-01-13 20:25:15 +03:00
|
|
|
"""
|
|
|
|
|
2022-11-27 00:00:43 +03:00
|
|
|
def __init__(self, project_dir: Optional[Path],
|
2022-06-30 16:43:18 +03:00
|
|
|
environ: Optional[Mapping[str, str]] = None) -> None:
|
2021-02-19 21:29:57 +03:00
|
|
|
self.environ = environ or os.environ
|
2021-01-24 16:35:35 +03:00
|
|
|
self.project_dir = project_dir
|
2022-11-27 00:00:43 +03:00
|
|
|
self.config_dir = nominatim.paths.CONFIG_DIR
|
|
|
|
self._config = dotenv_values(str(self.config_dir / 'env.defaults'))
|
|
|
|
if self.project_dir is not None and (self.project_dir / '.env').is_file():
|
|
|
|
self.project_dir = self.project_dir.resolve()
|
|
|
|
self._config.update(dotenv_values(str(self.project_dir / '.env')))
|
2021-01-13 20:25:15 +03:00
|
|
|
|
2021-04-24 12:39:44 +03:00
|
|
|
class _LibDirs:
|
2022-07-05 12:24:53 +03:00
|
|
|
module: Path
|
|
|
|
osm2pgsql: Path
|
2022-11-27 00:00:43 +03:00
|
|
|
php = nominatim.paths.PHPLIB_DIR
|
|
|
|
sql = nominatim.paths.SQLLIB_DIR
|
|
|
|
data = nominatim.paths.DATA_DIR
|
2021-04-19 10:06:42 +03:00
|
|
|
|
|
|
|
self.lib_dir = _LibDirs()
|
2022-07-25 16:17:20 +03:00
|
|
|
self._private_plugins: Dict[str, object] = {}
|
2021-04-19 10:06:42 +03:00
|
|
|
|
2022-06-30 16:43:18 +03:00
|
|
|
|
2022-07-02 12:59:19 +03:00
|
|
|
def set_libdirs(self, **kwargs: StrPath) -> None:
|
2021-04-19 10:06:42 +03:00
|
|
|
""" Set paths to library functions and data.
|
|
|
|
"""
|
|
|
|
for key, value in kwargs.items():
|
2022-11-27 00:00:43 +03:00
|
|
|
setattr(self.lib_dir, key, Path(value))
|
2021-01-23 19:25:14 +03:00
|
|
|
|
2022-06-30 16:43:18 +03:00
|
|
|
|
|
|
|
def __getattr__(self, name: str) -> str:
|
2021-01-13 20:25:15 +03:00
|
|
|
name = 'NOMINATIM_' + name
|
|
|
|
|
2021-09-03 23:31:30 +03:00
|
|
|
if name in self.environ:
|
|
|
|
return self.environ[name]
|
|
|
|
|
2022-06-30 16:43:18 +03:00
|
|
|
return self._config[name] or ''
|
2021-01-13 20:25:15 +03:00
|
|
|
|
2022-06-30 16:43:18 +03:00
|
|
|
|
|
|
|
def get_bool(self, name: str) -> bool:
|
2021-01-30 17:50:34 +03:00
|
|
|
""" Return the given configuration parameter as a boolean.
|
2023-08-20 11:42:33 +03:00
|
|
|
|
|
|
|
Parameters:
|
|
|
|
name: Name of the configuration parameter with the NOMINATIM_
|
|
|
|
prefix removed.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
`True` for values of '1', 'yes' and 'true', `False` otherwise.
|
2021-01-24 16:35:35 +03:00
|
|
|
"""
|
2022-06-01 22:26:13 +03:00
|
|
|
return getattr(self, name).lower() in ('1', 'yes', 'true')
|
2021-01-24 16:35:35 +03:00
|
|
|
|
2021-01-30 17:50:34 +03:00
|
|
|
|
2022-06-30 16:43:18 +03:00
|
|
|
def get_int(self, name: str) -> int:
|
2021-01-30 17:50:34 +03:00
|
|
|
""" Return the given configuration parameter as an int.
|
2023-08-20 11:42:33 +03:00
|
|
|
|
|
|
|
Parameters:
|
|
|
|
name: Name of the configuration parameter with the NOMINATIM_
|
|
|
|
prefix removed.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
The configuration value converted to int.
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
ValueError: when the value is not a number.
|
2021-01-30 17:50:34 +03:00
|
|
|
"""
|
|
|
|
try:
|
2022-06-01 22:26:13 +03:00
|
|
|
return int(getattr(self, name))
|
2021-07-06 10:54:11 +03:00
|
|
|
except ValueError as exp:
|
2021-01-30 17:50:34 +03:00
|
|
|
LOG.fatal("Invalid setting NOMINATIM_%s. Needs to be a number.", name)
|
2021-07-06 10:54:11 +03:00
|
|
|
raise UsageError("Configuration error.") from exp
|
2021-01-30 17:50:34 +03:00
|
|
|
|
|
|
|
|
2022-06-30 16:43:18 +03:00
|
|
|
def get_str_list(self, name: str) -> Optional[List[str]]:
|
2022-05-29 14:53:50 +03:00
|
|
|
""" Return the given configuration parameter as a list of strings.
|
|
|
|
The values are assumed to be given as a comma-sparated list and
|
2023-08-22 23:16:34 +03:00
|
|
|
will be stripped before returning them.
|
|
|
|
|
|
|
|
Parameters:
|
|
|
|
name: Name of the configuration parameter with the NOMINATIM_
|
|
|
|
prefix removed.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
(List[str]): The comma-split parameter as a list. The
|
|
|
|
elements are stripped of leading and final spaces before
|
|
|
|
being returned.
|
|
|
|
(None): The configuration parameter was unset or empty.
|
2022-05-29 14:53:50 +03:00
|
|
|
"""
|
2022-06-01 22:26:13 +03:00
|
|
|
raw = getattr(self, name)
|
2022-05-29 14:53:50 +03:00
|
|
|
|
|
|
|
return [v.strip() for v in raw.split(',')] if raw else None
|
|
|
|
|
|
|
|
|
2022-06-30 16:43:18 +03:00
|
|
|
def get_path(self, name: str) -> Optional[Path]:
|
2021-10-22 18:32:51 +03:00
|
|
|
""" Return the given configuration parameter as a Path.
|
2023-08-22 23:16:34 +03:00
|
|
|
|
|
|
|
Parameters:
|
|
|
|
name: Name of the configuration parameter with the NOMINATIM_
|
|
|
|
prefix removed.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
(Path): A Path object of the parameter value.
|
|
|
|
If a relative path is configured, then the function converts this
|
|
|
|
into an absolute path with the project directory as root path.
|
|
|
|
(None): The configuration parameter was unset or empty.
|
2021-10-22 18:32:51 +03:00
|
|
|
"""
|
2022-06-01 22:26:13 +03:00
|
|
|
value = getattr(self, name)
|
2022-06-30 16:43:18 +03:00
|
|
|
if not value:
|
|
|
|
return None
|
|
|
|
|
|
|
|
cfgpath = Path(value)
|
2021-10-22 18:32:51 +03:00
|
|
|
|
2022-06-30 16:43:18 +03:00
|
|
|
if not cfgpath.is_absolute():
|
2022-11-27 00:00:43 +03:00
|
|
|
assert self.project_dir is not None
|
2022-06-30 16:43:18 +03:00
|
|
|
cfgpath = self.project_dir / cfgpath
|
2021-10-22 18:32:51 +03:00
|
|
|
|
2022-06-30 16:43:18 +03:00
|
|
|
return cfgpath.resolve()
|
2021-10-22 18:32:51 +03:00
|
|
|
|
|
|
|
|
2022-06-30 16:43:18 +03:00
|
|
|
def get_libpq_dsn(self) -> str:
|
2021-01-17 19:06:18 +03:00
|
|
|
""" Get configured database DSN converted into the key/value format
|
|
|
|
understood by libpq and psycopg.
|
|
|
|
"""
|
|
|
|
dsn = self.DATABASE_DSN
|
|
|
|
|
2022-06-30 16:43:18 +03:00
|
|
|
def quote_param(param: str) -> str:
|
2021-01-30 17:50:34 +03:00
|
|
|
key, val = param.split('=')
|
|
|
|
val = val.replace('\\', '\\\\').replace("'", "\\'")
|
|
|
|
if ' ' in val:
|
|
|
|
val = "'" + val + "'"
|
|
|
|
return key + '=' + val
|
|
|
|
|
2021-01-17 19:06:18 +03:00
|
|
|
if dsn.startswith('pgsql:'):
|
|
|
|
# Old PHP DSN format. Convert before returning.
|
2021-01-30 17:50:34 +03:00
|
|
|
return ' '.join([quote_param(p) for p in dsn[6:].split(';')])
|
2021-01-17 19:06:18 +03:00
|
|
|
|
|
|
|
return dsn
|
|
|
|
|
2021-01-30 17:50:34 +03:00
|
|
|
|
2022-11-18 18:11:31 +03:00
|
|
|
def get_database_params(self) -> Mapping[str, str]:
|
|
|
|
""" Get the configured parameters for the database connection
|
|
|
|
as a mapping.
|
|
|
|
"""
|
|
|
|
dsn = self.DATABASE_DSN
|
|
|
|
|
|
|
|
if dsn.startswith('pgsql:'):
|
|
|
|
return dict((p.split('=', 1) for p in dsn[6:].split(';')))
|
|
|
|
|
|
|
|
return parse_dsn(dsn)
|
|
|
|
|
|
|
|
|
2022-06-30 16:43:18 +03:00
|
|
|
def get_import_style_file(self) -> Path:
|
2021-01-30 17:50:34 +03:00
|
|
|
""" Return the import style file as a path object. Translates the
|
|
|
|
name of the standard styles automatically into a file in the
|
|
|
|
config style.
|
|
|
|
"""
|
2022-06-01 22:26:13 +03:00
|
|
|
style = getattr(self, 'IMPORT_STYLE')
|
2021-01-30 17:50:34 +03:00
|
|
|
|
|
|
|
if style in ('admin', 'street', 'address', 'full', 'extratags'):
|
2022-12-16 13:23:33 +03:00
|
|
|
return self.config_dir / f'import-{style}.lua'
|
2021-01-30 17:50:34 +03:00
|
|
|
|
2021-10-22 17:49:57 +03:00
|
|
|
return self.find_config_file('', 'IMPORT_STYLE')
|
2021-01-30 17:50:34 +03:00
|
|
|
|
|
|
|
|
2022-11-27 00:00:43 +03:00
|
|
|
def get_os_env(self) -> Dict[str, str]:
|
2021-01-13 20:25:15 +03:00
|
|
|
""" Return a copy of the OS environment with the Nominatim configuration
|
|
|
|
merged in.
|
|
|
|
"""
|
2022-11-27 00:00:43 +03:00
|
|
|
env = {k: v for k, v in self._config.items() if v is not None}
|
2021-02-19 20:20:55 +03:00
|
|
|
env.update(self.environ)
|
2021-01-13 20:25:15 +03:00
|
|
|
|
|
|
|
return env
|
2021-09-03 19:16:12 +03:00
|
|
|
|
|
|
|
|
2022-07-02 12:59:19 +03:00
|
|
|
def load_sub_configuration(self, filename: StrPath,
|
2022-06-30 16:43:18 +03:00
|
|
|
config: Optional[str] = None) -> Any:
|
2021-09-03 19:16:12 +03:00
|
|
|
""" Load additional configuration from a file. `filename` is the name
|
|
|
|
of the configuration file. The file is first searched in the
|
2022-07-20 17:05:25 +03:00
|
|
|
project directory and then in the global settings directory.
|
2021-09-03 19:16:12 +03:00
|
|
|
|
|
|
|
If `config` is set, then the name of the configuration file can
|
|
|
|
be additionally given through a .env configuration option. When
|
|
|
|
the option is set, then the file will be exclusively loaded as set:
|
|
|
|
if the name is an absolute path, the file name is taken as is,
|
|
|
|
if the name is relative, it is taken to be relative to the
|
|
|
|
project directory.
|
|
|
|
|
|
|
|
The format of the file is determined from the filename suffix.
|
|
|
|
Currently only files with extension '.yaml' are supported.
|
|
|
|
|
|
|
|
YAML files support a special '!include' construct. When the
|
|
|
|
directive is given, the value is taken to be a filename, the file
|
|
|
|
is loaded using this function and added at the position in the
|
|
|
|
configuration tree.
|
|
|
|
"""
|
2021-10-22 15:41:14 +03:00
|
|
|
configfile = self.find_config_file(filename, config)
|
2021-09-03 19:16:12 +03:00
|
|
|
|
2022-03-20 13:30:03 +03:00
|
|
|
if str(configfile) in CONFIG_CACHE:
|
|
|
|
return CONFIG_CACHE[str(configfile)]
|
2021-09-03 19:16:12 +03:00
|
|
|
|
2022-03-20 13:30:03 +03:00
|
|
|
if configfile.suffix in ('.yaml', '.yml'):
|
|
|
|
result = self._load_from_yaml(configfile)
|
|
|
|
elif configfile.suffix == '.json':
|
2022-05-10 16:36:29 +03:00
|
|
|
with configfile.open('r', encoding='utf-8') as cfg:
|
2022-03-20 13:30:03 +03:00
|
|
|
result = json.load(cfg)
|
|
|
|
else:
|
|
|
|
raise UsageError(f"Config file '{configfile}' has unknown format.")
|
2021-09-03 19:16:12 +03:00
|
|
|
|
2022-03-20 13:30:03 +03:00
|
|
|
CONFIG_CACHE[str(configfile)] = result
|
|
|
|
return result
|
2021-09-03 19:16:12 +03:00
|
|
|
|
2021-10-22 15:41:14 +03:00
|
|
|
|
2022-07-25 17:27:22 +03:00
|
|
|
def load_plugin_module(self, module_name: str, internal_path: str) -> Any:
|
2022-07-25 16:17:20 +03:00
|
|
|
""" Load a Python module as a plugin.
|
|
|
|
|
|
|
|
The module_name may have three variants:
|
|
|
|
|
|
|
|
* A name without any '.' is assumed to be an internal module
|
|
|
|
and will be searched relative to `internal_path`.
|
|
|
|
* If the name ends in `.py`, module_name is assumed to be a
|
|
|
|
file name relative to the project directory.
|
|
|
|
* Any other name is assumed to be an absolute module name.
|
|
|
|
|
|
|
|
In either of the variants the module name must start with a letter.
|
|
|
|
"""
|
|
|
|
if not module_name or not module_name[0].isidentifier():
|
|
|
|
raise UsageError(f'Invalid module name {module_name}')
|
|
|
|
|
|
|
|
if '.' not in module_name:
|
|
|
|
module_name = module_name.replace('-', '_')
|
|
|
|
full_module = f'{internal_path}.{module_name}'
|
|
|
|
return sys.modules.get(full_module) or importlib.import_module(full_module)
|
|
|
|
|
|
|
|
if module_name.endswith('.py'):
|
|
|
|
if self.project_dir is None or not (self.project_dir / module_name).exists():
|
|
|
|
raise UsageError(f"Cannot find module '{module_name}' in project directory.")
|
|
|
|
|
|
|
|
if module_name in self._private_plugins:
|
|
|
|
return self._private_plugins[module_name]
|
|
|
|
|
|
|
|
file_path = str(self.project_dir / module_name)
|
|
|
|
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
|
|
|
if spec:
|
|
|
|
module = importlib.util.module_from_spec(spec)
|
|
|
|
# Do not add to global modules because there is no standard
|
|
|
|
# module name that Python can resolve.
|
|
|
|
self._private_plugins[module_name] = module
|
|
|
|
assert spec.loader is not None
|
|
|
|
spec.loader.exec_module(module)
|
|
|
|
|
|
|
|
return module
|
|
|
|
|
|
|
|
return sys.modules.get(module_name) or importlib.import_module(module_name)
|
|
|
|
|
|
|
|
|
2022-07-02 12:59:19 +03:00
|
|
|
def find_config_file(self, filename: StrPath,
|
2022-06-30 16:43:18 +03:00
|
|
|
config: Optional[str] = None) -> Path:
|
2021-09-03 19:16:12 +03:00
|
|
|
""" Resolve the location of a configuration file given a filename and
|
|
|
|
an optional configuration option with the file name.
|
|
|
|
Raises a UsageError when the file cannot be found or is not
|
|
|
|
a regular file.
|
|
|
|
"""
|
|
|
|
if config is not None:
|
2022-06-30 16:43:18 +03:00
|
|
|
cfg_value = getattr(self, config)
|
|
|
|
if cfg_value:
|
|
|
|
cfg_filename = Path(cfg_value)
|
2021-09-03 19:16:12 +03:00
|
|
|
|
2021-09-03 23:31:30 +03:00
|
|
|
if cfg_filename.is_absolute():
|
|
|
|
cfg_filename = cfg_filename.resolve()
|
2021-09-03 19:16:12 +03:00
|
|
|
|
2021-09-03 23:31:30 +03:00
|
|
|
if not cfg_filename.is_file():
|
|
|
|
LOG.fatal("Cannot find config file '%s'.", cfg_filename)
|
|
|
|
raise UsageError("Config file not found.")
|
2021-09-03 19:16:12 +03:00
|
|
|
|
2021-09-03 23:31:30 +03:00
|
|
|
return cfg_filename
|
2021-09-03 19:16:12 +03:00
|
|
|
|
2021-09-03 23:31:30 +03:00
|
|
|
filename = cfg_filename
|
2021-09-03 19:16:12 +03:00
|
|
|
|
|
|
|
|
|
|
|
search_paths = [self.project_dir, self.config_dir]
|
|
|
|
for path in search_paths:
|
2021-09-04 01:22:21 +03:00
|
|
|
if path is not None and (path / filename).is_file():
|
2021-09-03 19:16:12 +03:00
|
|
|
return path / filename
|
|
|
|
|
|
|
|
LOG.fatal("Configuration file '%s' not found.\nDirectories searched: %s",
|
|
|
|
filename, search_paths)
|
|
|
|
raise UsageError("Config file not found.")
|
|
|
|
|
|
|
|
|
2022-06-30 16:43:18 +03:00
|
|
|
def _load_from_yaml(self, cfgfile: Path) -> Any:
|
2021-09-03 19:16:12 +03:00
|
|
|
""" Load a YAML configuration file. This installs a special handler that
|
|
|
|
allows to include other YAML files using the '!include' operator.
|
|
|
|
"""
|
|
|
|
yaml.add_constructor('!include', self._yaml_include_representer,
|
|
|
|
Loader=yaml.SafeLoader)
|
|
|
|
return yaml.safe_load(cfgfile.read_text(encoding='utf-8'))
|
|
|
|
|
|
|
|
|
2022-06-30 16:43:18 +03:00
|
|
|
def _yaml_include_representer(self, loader: Any, node: yaml.Node) -> Any:
|
2021-09-03 19:16:12 +03:00
|
|
|
""" Handler for the '!include' operator in YAML files.
|
|
|
|
|
|
|
|
When the filename is relative, then the file is first searched in the
|
2022-07-20 17:05:25 +03:00
|
|
|
project directory and then in the global settings directory.
|
2021-09-03 19:16:12 +03:00
|
|
|
"""
|
|
|
|
fname = loader.construct_scalar(node)
|
|
|
|
|
|
|
|
if Path(fname).is_absolute():
|
|
|
|
configfile = Path(fname)
|
|
|
|
else:
|
2021-10-22 15:41:14 +03:00
|
|
|
configfile = self.find_config_file(loader.construct_scalar(node))
|
2021-09-03 19:16:12 +03:00
|
|
|
|
|
|
|
if configfile.suffix != '.yaml':
|
2021-09-03 23:31:30 +03:00
|
|
|
LOG.fatal("Format error while reading '%s': only YAML format supported.",
|
|
|
|
configfile)
|
2021-09-03 19:16:12 +03:00
|
|
|
raise UsageError("Cannot handle config file format.")
|
|
|
|
|
|
|
|
return yaml.safe_load(configfile.read_text(encoding='utf-8'))
|