replace NOMINATIM_PHRASE_CONFIG with command line option

This commit is contained in:
Sarah Hoffmann 2021-10-22 14:41:14 +02:00
parent cefae021db
commit c77df2d1eb
9 changed files with 43 additions and 146 deletions

View File

@ -15,6 +15,20 @@ breaking changes. **Please read them before running the migration.**
If you are migrating from a version <3.6, then you still have to follow
the manual migration steps up to 3.6.
## 3.7.0 -> master
### NOMINATIM_PHRASE_CONFIG removed
Custom blacklist configurations for special phrases now need to be handed
with the `--config` parameter to `nominatim special-phrases`. Alternatively
you can put your custom configuration in the project directory in a file
named `phrase-settings.json`.
Version 3.8 also removes the automatic converter for the php format of
the configuration in older versions. If you are updating from Nominatim < 3.7
and still work with a custom `phrase-settings.php`, you need to manually
convert it into a json format.
## 3.6.0 -> 3.7.0
### New format and name of configuration file

View File

@ -303,19 +303,6 @@ Set a custom location for the
[wikipedia ranking file](../admin/Import.md#wikipediawikidata-rankings). When
unset, Nominatim expects the data to be saved in the project directory.
#### NOMINATIM_PHRASE_CONFIG
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Configuration file for special phrase imports |
| **Format:** | path |
| **Default:** | _empty_ (use default settings) |
The _phrase_config_ file configures black and white lists of tag types,
so that some of them can be ignored, when loading special phrases from
the OSM wiki. The default settings can be found in the configuration
directory as `phrase-settings.json`.
#### NOMINATIM_ADDRESS_LEVEL_CONFIG
| Summary | |

View File

@ -1,21 +0,0 @@
<?php
$phpPhraseSettingsFile = $argv[1];
$jsonPhraseSettingsFile = dirname($phpPhraseSettingsFile).'/'.basename($phpPhraseSettingsFile, '.php').'.json';
if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)) {
include $phpPhraseSettingsFile;
$data = array();
if (isset($aTagsBlacklist)) {
$data['blackList'] = $aTagsBlacklist;
}
if (isset($aTagsWhitelist)) {
$data['whiteList'] = $aTagsWhitelist;
}
$jsonFile = fopen($jsonPhraseSettingsFile, 'w');
fwrite($jsonFile, json_encode($data));
fclose($jsonFile);
}

View File

@ -35,6 +35,13 @@ class ImportSpecialPhrases:
An example file can be found in the Nominatim sources at
'test/testdb/full_en_phrases_test.csv'.
The import can be further configured to ignore specific key/value pairs.
This is particularly useful when importing phrases from the wiki. The
default configuration excludes some very common tags like building=yes.
The configuration can be customized by putting a file `phrase-settings.json`
with custom rules into the project directory or by using the `--config`
option to point to another configuration file.
"""
@staticmethod
def add_args(parser):
@ -45,6 +52,9 @@ class ImportSpecialPhrases:
help='Import special phrases from a CSV file')
group.add_argument('--no-replace', action='store_true',
help='Keep the old phrases and only add the new ones')
group.add_argument('--config', action='store',
help='Configuration file for black/white listing '
'(default: phrase-settings.json)')
@staticmethod
def run(args):
@ -72,5 +82,5 @@ class ImportSpecialPhrases:
should_replace = not args.no_replace
with connect(args.config.get_libpq_dsn()) as db_connection:
SPImporter(
args.config, args.phplib_dir, db_connection, loader
args.config, db_connection, loader
).import_phrases(tokenizer, should_replace)

View File

@ -4,6 +4,7 @@ Nominatim configuration accessor.
import logging
import os
from pathlib import Path
import json
import yaml
from dotenv import dotenv_values
@ -161,14 +162,19 @@ class Configuration:
is loaded using this function and added at the position in the
configuration tree.
"""
assert Path(filename).suffix == '.yaml'
configfile = self._find_config_file(filename, config)
configfile = self.find_config_file(filename, config)
if configfile.suffix in ('.yaml', '.yml'):
return self._load_from_yaml(configfile)
if configfile.suffix == '.json':
with configfile.open('r') as cfg:
return json.load(cfg)
def _find_config_file(self, filename, config=None):
raise UsageError(f"Config file '{configfile}' has unknown format.")
def find_config_file(self, filename, config=None):
""" Resolve the location of a configuration file given a filename and
an optional configuration option with the file name.
Raises a UsageError when the file cannot be found or is not
@ -221,7 +227,7 @@ class Configuration:
if Path(fname).is_absolute():
configfile = Path(fname)
else:
configfile = self._find_config_file(loader.construct_scalar(node))
configfile = self.find_config_file(loader.construct_scalar(node))
if configfile.suffix != '.yaml':
LOG.fatal("Format error while reading '%s': only YAML format supported.",

View File

@ -8,15 +8,9 @@
valids anymore are removed.
"""
import logging
import os
from os.path import isfile
from pathlib import Path
import re
import subprocess
import json
from psycopg2.sql import Identifier, Literal, SQL
from nominatim.errors import UsageError
from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
LOG = logging.getLogger()
@ -33,9 +27,8 @@ class SPImporter():
Take a sp loader which load the phrases from an external source.
"""
def __init__(self, config, phplib_dir, db_connection, sp_loader) -> None:
def __init__(self, config, db_connection, sp_loader) -> None:
self.config = config
self.phplib_dir = phplib_dir
self.db_connection = db_connection
self.sp_loader = sp_loader
self.statistics_handler = SpecialPhrasesImporterStatistics()
@ -101,13 +94,8 @@ class SPImporter():
"""
Load white and black lists from phrases-settings.json.
"""
settings_path = (self.config.config_dir / 'phrase-settings.json').resolve()
settings = self.config.load_sub_configuration('phrase-settings.json')
if self.config.PHRASE_CONFIG:
settings_path = self._convert_php_settings_if_needed(self.config.PHRASE_CONFIG)
with settings_path.open("r") as json_settings:
settings = json.load(json_settings)
return settings['blackList'], settings['whiteList']
def _check_sanity(self, phrase):
@ -255,29 +243,3 @@ class SPImporter():
for table in self.table_phrases_to_delete:
self.statistics_handler.notify_one_table_deleted()
db_cursor.drop_table(table)
def _convert_php_settings_if_needed(self, file_path):
"""
Convert php settings file of special phrases to json file if it is still in php format.
"""
if not isfile(file_path):
raise UsageError(str(file_path) + ' is not a valid file.')
file, extension = os.path.splitext(file_path)
json_file_path = Path(file + '.json').resolve()
if extension not in ('.php', '.json'):
raise UsageError('The custom NOMINATIM_PHRASE_CONFIG file has not a valid extension.')
if extension == '.php' and not isfile(json_file_path):
try:
subprocess.run(['/usr/bin/env', 'php', '-Cq',
(self.phplib_dir / 'migration/PhraseSettingsToJson.php').resolve(),
file_path], check=True)
LOG.warning('special_phrase configuration file has been converted to json.')
except subprocess.CalledProcessError:
LOG.error('Error while converting %s to json.', file_path)
raise
return json_file_path

View File

@ -89,8 +89,8 @@ NOMINATIM_TIGER_DATA_PATH=
NOMINATIM_WIKIPEDIA_DATA_PATH=
# Configuration file for special phrase import.
# When unset, the internal default settings from 'settings/phrase-settings.json'
# are used.
# OBSOLETE: use `nominatim special-phrases --config <file>` or simply put
# a custom phrase-settings.json into your project directory.
NOMINATIM_PHRASE_CONFIG=
# Configuration file for rank assignments.

View File

@ -6,7 +6,7 @@
"Also use this list to exclude an entire class from special phrases."
],
"blackList": {
"bounday": [
"boundary": [
"administrative"
],
"place": [

View File

@ -17,30 +17,12 @@ def testfile_dir(src_dir):
@pytest.fixture
def sp_importer(temp_db_conn, def_config, temp_phplib_dir_with_migration):
def sp_importer(temp_db_conn, def_config):
"""
Return an instance of SPImporter.
"""
loader = SPWikiLoader(def_config, ['en'])
return SPImporter(def_config, temp_phplib_dir_with_migration, temp_db_conn, loader)
@pytest.fixture
def temp_phplib_dir_with_migration(src_dir, tmp_path):
"""
Return temporary phpdir with migration subdirectory and
PhraseSettingsToJson.php script inside.
"""
migration_file = (src_dir / 'lib-php' / 'migration' / 'PhraseSettingsToJson.php').resolve()
phpdir = tmp_path / 'tempphp'
phpdir.mkdir()
(phpdir / 'migration').mkdir()
migration_dest_path = (phpdir / 'migration' / 'PhraseSettingsToJson.php').resolve()
copyfile(str(migration_file), str(migration_dest_path))
return phpdir
return SPImporter(def_config, temp_db_conn, loader)
@pytest.fixture
@ -90,49 +72,6 @@ def test_load_white_and_black_lists(sp_importer):
assert isinstance(black_list, dict) and isinstance(white_list, dict)
def test_convert_php_settings(sp_importer, testfile_dir, tmp_path):
"""
Test that _convert_php_settings_if_needed() convert the given
php file to a json file.
"""
php_file = (testfile_dir / 'phrase_settings.php').resolve()
temp_settings = (tmp_path / 'phrase_settings.php').resolve()
copyfile(php_file, temp_settings)
sp_importer._convert_php_settings_if_needed(temp_settings)
assert (tmp_path / 'phrase_settings.json').is_file()
def test_convert_settings_wrong_file(sp_importer):
"""
Test that _convert_php_settings_if_needed() raise an exception
if the given file is not a valid file.
"""
with pytest.raises(UsageError, match='random_file is not a valid file.'):
sp_importer._convert_php_settings_if_needed('random_file')
def test_convert_settings_json_already_exist(sp_importer, testfile_dir):
"""
Test that if we give to '_convert_php_settings_if_needed' a php file path
and that a the corresponding json file already exists, it is returned.
"""
php_file = (testfile_dir / 'phrase_settings.php').resolve()
json_file = (testfile_dir / 'phrase_settings.json').resolve()
returned = sp_importer._convert_php_settings_if_needed(php_file)
assert returned == json_file
def test_convert_settings_giving_json(sp_importer, testfile_dir):
"""
Test that if we give to '_convert_php_settings_if_needed' a json file path
the same path is directly returned
"""
json_file = (testfile_dir / 'phrase_settings.json').resolve()
returned = sp_importer._convert_php_settings_if_needed(json_file)
assert returned == json_file
def test_create_place_classtype_indexes(temp_db_with_extensions, temp_db_conn,
table_factory, sp_importer):