mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-25 08:52:52 +03:00
replace NOMINATIM_PHRASE_CONFIG with command line option
This commit is contained in:
parent
cefae021db
commit
c77df2d1eb
@ -15,6 +15,20 @@ breaking changes. **Please read them before running the migration.**
|
||||
If you are migrating from a version <3.6, then you still have to follow
|
||||
the manual migration steps up to 3.6.
|
||||
|
||||
## 3.7.0 -> master
|
||||
|
||||
### NOMINATIM_PHRASE_CONFIG removed
|
||||
|
||||
Custom blacklist configurations for special phrases now need to be handed
|
||||
with the `--config` parameter to `nominatim special-phrases`. Alternatively
|
||||
you can put your custom configuration in the project directory in a file
|
||||
named `phrase-settings.json`.
|
||||
|
||||
Version 3.8 also removes the automatic converter for the php format of
|
||||
the configuration in older versions. If you are updating from Nominatim < 3.7
|
||||
and still work with a custom `phrase-settings.php`, you need to manually
|
||||
convert it into a json format.
|
||||
|
||||
## 3.6.0 -> 3.7.0
|
||||
|
||||
### New format and name of configuration file
|
||||
|
@ -303,19 +303,6 @@ Set a custom location for the
|
||||
[wikipedia ranking file](../admin/Import.md#wikipediawikidata-rankings). When
|
||||
unset, Nominatim expects the data to be saved in the project directory.
|
||||
|
||||
#### NOMINATIM_PHRASE_CONFIG
|
||||
|
||||
| Summary | |
|
||||
| -------------- | --------------------------------------------------- |
|
||||
| **Description:** | Configuration file for special phrase imports |
|
||||
| **Format:** | path |
|
||||
| **Default:** | _empty_ (use default settings) |
|
||||
|
||||
The _phrase_config_ file configures black and white lists of tag types,
|
||||
so that some of them can be ignored, when loading special phrases from
|
||||
the OSM wiki. The default settings can be found in the configuration
|
||||
directory as `phrase-settings.json`.
|
||||
|
||||
#### NOMINATIM_ADDRESS_LEVEL_CONFIG
|
||||
|
||||
| Summary | |
|
||||
|
@ -1,21 +0,0 @@
|
||||
<?php
|
||||
|
||||
$phpPhraseSettingsFile = $argv[1];
|
||||
$jsonPhraseSettingsFile = dirname($phpPhraseSettingsFile).'/'.basename($phpPhraseSettingsFile, '.php').'.json';
|
||||
|
||||
if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)) {
|
||||
include $phpPhraseSettingsFile;
|
||||
|
||||
$data = array();
|
||||
|
||||
if (isset($aTagsBlacklist)) {
|
||||
$data['blackList'] = $aTagsBlacklist;
|
||||
}
|
||||
if (isset($aTagsWhitelist)) {
|
||||
$data['whiteList'] = $aTagsWhitelist;
|
||||
}
|
||||
|
||||
$jsonFile = fopen($jsonPhraseSettingsFile, 'w');
|
||||
fwrite($jsonFile, json_encode($data));
|
||||
fclose($jsonFile);
|
||||
}
|
@ -35,6 +35,13 @@ class ImportSpecialPhrases:
|
||||
|
||||
An example file can be found in the Nominatim sources at
|
||||
'test/testdb/full_en_phrases_test.csv'.
|
||||
|
||||
The import can be further configured to ignore specific key/value pairs.
|
||||
This is particularly useful when importing phrases from the wiki. The
|
||||
default configuration excludes some very common tags like building=yes.
|
||||
The configuration can be customized by putting a file `phrase-settings.json`
|
||||
with custom rules into the project directory or by using the `--config`
|
||||
option to point to another configuration file.
|
||||
"""
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
@ -45,6 +52,9 @@ class ImportSpecialPhrases:
|
||||
help='Import special phrases from a CSV file')
|
||||
group.add_argument('--no-replace', action='store_true',
|
||||
help='Keep the old phrases and only add the new ones')
|
||||
group.add_argument('--config', action='store',
|
||||
help='Configuration file for black/white listing '
|
||||
'(default: phrase-settings.json)')
|
||||
|
||||
@staticmethod
|
||||
def run(args):
|
||||
@ -72,5 +82,5 @@ class ImportSpecialPhrases:
|
||||
should_replace = not args.no_replace
|
||||
with connect(args.config.get_libpq_dsn()) as db_connection:
|
||||
SPImporter(
|
||||
args.config, args.phplib_dir, db_connection, loader
|
||||
args.config, db_connection, loader
|
||||
).import_phrases(tokenizer, should_replace)
|
||||
|
@ -4,6 +4,7 @@ Nominatim configuration accessor.
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
import json
|
||||
import yaml
|
||||
|
||||
from dotenv import dotenv_values
|
||||
@ -161,14 +162,19 @@ class Configuration:
|
||||
is loaded using this function and added at the position in the
|
||||
configuration tree.
|
||||
"""
|
||||
assert Path(filename).suffix == '.yaml'
|
||||
configfile = self.find_config_file(filename, config)
|
||||
|
||||
configfile = self._find_config_file(filename, config)
|
||||
if configfile.suffix in ('.yaml', '.yml'):
|
||||
return self._load_from_yaml(configfile)
|
||||
|
||||
return self._load_from_yaml(configfile)
|
||||
if configfile.suffix == '.json':
|
||||
with configfile.open('r') as cfg:
|
||||
return json.load(cfg)
|
||||
|
||||
raise UsageError(f"Config file '{configfile}' has unknown format.")
|
||||
|
||||
|
||||
def _find_config_file(self, filename, config=None):
|
||||
def find_config_file(self, filename, config=None):
|
||||
""" Resolve the location of a configuration file given a filename and
|
||||
an optional configuration option with the file name.
|
||||
Raises a UsageError when the file cannot be found or is not
|
||||
@ -221,7 +227,7 @@ class Configuration:
|
||||
if Path(fname).is_absolute():
|
||||
configfile = Path(fname)
|
||||
else:
|
||||
configfile = self._find_config_file(loader.construct_scalar(node))
|
||||
configfile = self.find_config_file(loader.construct_scalar(node))
|
||||
|
||||
if configfile.suffix != '.yaml':
|
||||
LOG.fatal("Format error while reading '%s': only YAML format supported.",
|
||||
|
@ -8,15 +8,9 @@
|
||||
valids anymore are removed.
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
from os.path import isfile
|
||||
from pathlib import Path
|
||||
import re
|
||||
import subprocess
|
||||
import json
|
||||
|
||||
from psycopg2.sql import Identifier, Literal, SQL
|
||||
from nominatim.errors import UsageError
|
||||
from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
|
||||
|
||||
LOG = logging.getLogger()
|
||||
@ -33,9 +27,8 @@ class SPImporter():
|
||||
|
||||
Take a sp loader which load the phrases from an external source.
|
||||
"""
|
||||
def __init__(self, config, phplib_dir, db_connection, sp_loader) -> None:
|
||||
def __init__(self, config, db_connection, sp_loader) -> None:
|
||||
self.config = config
|
||||
self.phplib_dir = phplib_dir
|
||||
self.db_connection = db_connection
|
||||
self.sp_loader = sp_loader
|
||||
self.statistics_handler = SpecialPhrasesImporterStatistics()
|
||||
@ -101,13 +94,8 @@ class SPImporter():
|
||||
"""
|
||||
Load white and black lists from phrases-settings.json.
|
||||
"""
|
||||
settings_path = (self.config.config_dir / 'phrase-settings.json').resolve()
|
||||
settings = self.config.load_sub_configuration('phrase-settings.json')
|
||||
|
||||
if self.config.PHRASE_CONFIG:
|
||||
settings_path = self._convert_php_settings_if_needed(self.config.PHRASE_CONFIG)
|
||||
|
||||
with settings_path.open("r") as json_settings:
|
||||
settings = json.load(json_settings)
|
||||
return settings['blackList'], settings['whiteList']
|
||||
|
||||
def _check_sanity(self, phrase):
|
||||
@ -255,29 +243,3 @@ class SPImporter():
|
||||
for table in self.table_phrases_to_delete:
|
||||
self.statistics_handler.notify_one_table_deleted()
|
||||
db_cursor.drop_table(table)
|
||||
|
||||
|
||||
def _convert_php_settings_if_needed(self, file_path):
|
||||
"""
|
||||
Convert php settings file of special phrases to json file if it is still in php format.
|
||||
"""
|
||||
if not isfile(file_path):
|
||||
raise UsageError(str(file_path) + ' is not a valid file.')
|
||||
|
||||
file, extension = os.path.splitext(file_path)
|
||||
json_file_path = Path(file + '.json').resolve()
|
||||
|
||||
if extension not in ('.php', '.json'):
|
||||
raise UsageError('The custom NOMINATIM_PHRASE_CONFIG file has not a valid extension.')
|
||||
|
||||
if extension == '.php' and not isfile(json_file_path):
|
||||
try:
|
||||
subprocess.run(['/usr/bin/env', 'php', '-Cq',
|
||||
(self.phplib_dir / 'migration/PhraseSettingsToJson.php').resolve(),
|
||||
file_path], check=True)
|
||||
LOG.warning('special_phrase configuration file has been converted to json.')
|
||||
except subprocess.CalledProcessError:
|
||||
LOG.error('Error while converting %s to json.', file_path)
|
||||
raise
|
||||
|
||||
return json_file_path
|
||||
|
@ -89,8 +89,8 @@ NOMINATIM_TIGER_DATA_PATH=
|
||||
NOMINATIM_WIKIPEDIA_DATA_PATH=
|
||||
|
||||
# Configuration file for special phrase import.
|
||||
# When unset, the internal default settings from 'settings/phrase-settings.json'
|
||||
# are used.
|
||||
# OBSOLETE: use `nominatim special-phrases --config <file>` or simply put
|
||||
# a custom phrase-settings.json into your project directory.
|
||||
NOMINATIM_PHRASE_CONFIG=
|
||||
|
||||
# Configuration file for rank assignments.
|
||||
|
@ -6,7 +6,7 @@
|
||||
"Also use this list to exclude an entire class from special phrases."
|
||||
],
|
||||
"blackList": {
|
||||
"bounday": [
|
||||
"boundary": [
|
||||
"administrative"
|
||||
],
|
||||
"place": [
|
||||
|
@ -17,30 +17,12 @@ def testfile_dir(src_dir):
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sp_importer(temp_db_conn, def_config, temp_phplib_dir_with_migration):
|
||||
def sp_importer(temp_db_conn, def_config):
|
||||
"""
|
||||
Return an instance of SPImporter.
|
||||
"""
|
||||
loader = SPWikiLoader(def_config, ['en'])
|
||||
return SPImporter(def_config, temp_phplib_dir_with_migration, temp_db_conn, loader)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_phplib_dir_with_migration(src_dir, tmp_path):
|
||||
"""
|
||||
Return temporary phpdir with migration subdirectory and
|
||||
PhraseSettingsToJson.php script inside.
|
||||
"""
|
||||
migration_file = (src_dir / 'lib-php' / 'migration' / 'PhraseSettingsToJson.php').resolve()
|
||||
|
||||
phpdir = tmp_path / 'tempphp'
|
||||
phpdir.mkdir()
|
||||
|
||||
(phpdir / 'migration').mkdir()
|
||||
migration_dest_path = (phpdir / 'migration' / 'PhraseSettingsToJson.php').resolve()
|
||||
copyfile(str(migration_file), str(migration_dest_path))
|
||||
|
||||
return phpdir
|
||||
return SPImporter(def_config, temp_db_conn, loader)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@ -90,49 +72,6 @@ def test_load_white_and_black_lists(sp_importer):
|
||||
|
||||
assert isinstance(black_list, dict) and isinstance(white_list, dict)
|
||||
|
||||
def test_convert_php_settings(sp_importer, testfile_dir, tmp_path):
|
||||
"""
|
||||
Test that _convert_php_settings_if_needed() convert the given
|
||||
php file to a json file.
|
||||
"""
|
||||
php_file = (testfile_dir / 'phrase_settings.php').resolve()
|
||||
|
||||
temp_settings = (tmp_path / 'phrase_settings.php').resolve()
|
||||
copyfile(php_file, temp_settings)
|
||||
sp_importer._convert_php_settings_if_needed(temp_settings)
|
||||
|
||||
assert (tmp_path / 'phrase_settings.json').is_file()
|
||||
|
||||
def test_convert_settings_wrong_file(sp_importer):
|
||||
"""
|
||||
Test that _convert_php_settings_if_needed() raise an exception
|
||||
if the given file is not a valid file.
|
||||
"""
|
||||
with pytest.raises(UsageError, match='random_file is not a valid file.'):
|
||||
sp_importer._convert_php_settings_if_needed('random_file')
|
||||
|
||||
def test_convert_settings_json_already_exist(sp_importer, testfile_dir):
|
||||
"""
|
||||
Test that if we give to '_convert_php_settings_if_needed' a php file path
|
||||
and that a the corresponding json file already exists, it is returned.
|
||||
"""
|
||||
php_file = (testfile_dir / 'phrase_settings.php').resolve()
|
||||
json_file = (testfile_dir / 'phrase_settings.json').resolve()
|
||||
|
||||
returned = sp_importer._convert_php_settings_if_needed(php_file)
|
||||
|
||||
assert returned == json_file
|
||||
|
||||
def test_convert_settings_giving_json(sp_importer, testfile_dir):
|
||||
"""
|
||||
Test that if we give to '_convert_php_settings_if_needed' a json file path
|
||||
the same path is directly returned
|
||||
"""
|
||||
json_file = (testfile_dir / 'phrase_settings.json').resolve()
|
||||
|
||||
returned = sp_importer._convert_php_settings_if_needed(json_file)
|
||||
|
||||
assert returned == json_file
|
||||
|
||||
def test_create_place_classtype_indexes(temp_db_with_extensions, temp_db_conn,
|
||||
table_factory, sp_importer):
|
||||
|
Loading…
Reference in New Issue
Block a user