mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-25 08:52:52 +03:00
replace NOMINATIM_PHRASE_CONFIG with command line option
This commit is contained in:
parent
cefae021db
commit
c77df2d1eb
@ -15,6 +15,20 @@ breaking changes. **Please read them before running the migration.**
|
|||||||
If you are migrating from a version <3.6, then you still have to follow
|
If you are migrating from a version <3.6, then you still have to follow
|
||||||
the manual migration steps up to 3.6.
|
the manual migration steps up to 3.6.
|
||||||
|
|
||||||
|
## 3.7.0 -> master
|
||||||
|
|
||||||
|
### NOMINATIM_PHRASE_CONFIG removed
|
||||||
|
|
||||||
|
Custom blacklist configurations for special phrases now need to be handed
|
||||||
|
with the `--config` parameter to `nominatim special-phrases`. Alternatively
|
||||||
|
you can put your custom configuration in the project directory in a file
|
||||||
|
named `phrase-settings.json`.
|
||||||
|
|
||||||
|
Version 3.8 also removes the automatic converter for the php format of
|
||||||
|
the configuration in older versions. If you are updating from Nominatim < 3.7
|
||||||
|
and still work with a custom `phrase-settings.php`, you need to manually
|
||||||
|
convert it into a json format.
|
||||||
|
|
||||||
## 3.6.0 -> 3.7.0
|
## 3.6.0 -> 3.7.0
|
||||||
|
|
||||||
### New format and name of configuration file
|
### New format and name of configuration file
|
||||||
|
@ -303,19 +303,6 @@ Set a custom location for the
|
|||||||
[wikipedia ranking file](../admin/Import.md#wikipediawikidata-rankings). When
|
[wikipedia ranking file](../admin/Import.md#wikipediawikidata-rankings). When
|
||||||
unset, Nominatim expects the data to be saved in the project directory.
|
unset, Nominatim expects the data to be saved in the project directory.
|
||||||
|
|
||||||
#### NOMINATIM_PHRASE_CONFIG
|
|
||||||
|
|
||||||
| Summary | |
|
|
||||||
| -------------- | --------------------------------------------------- |
|
|
||||||
| **Description:** | Configuration file for special phrase imports |
|
|
||||||
| **Format:** | path |
|
|
||||||
| **Default:** | _empty_ (use default settings) |
|
|
||||||
|
|
||||||
The _phrase_config_ file configures black and white lists of tag types,
|
|
||||||
so that some of them can be ignored, when loading special phrases from
|
|
||||||
the OSM wiki. The default settings can be found in the configuration
|
|
||||||
directory as `phrase-settings.json`.
|
|
||||||
|
|
||||||
#### NOMINATIM_ADDRESS_LEVEL_CONFIG
|
#### NOMINATIM_ADDRESS_LEVEL_CONFIG
|
||||||
|
|
||||||
| Summary | |
|
| Summary | |
|
||||||
|
@ -1,21 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
$phpPhraseSettingsFile = $argv[1];
|
|
||||||
$jsonPhraseSettingsFile = dirname($phpPhraseSettingsFile).'/'.basename($phpPhraseSettingsFile, '.php').'.json';
|
|
||||||
|
|
||||||
if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)) {
|
|
||||||
include $phpPhraseSettingsFile;
|
|
||||||
|
|
||||||
$data = array();
|
|
||||||
|
|
||||||
if (isset($aTagsBlacklist)) {
|
|
||||||
$data['blackList'] = $aTagsBlacklist;
|
|
||||||
}
|
|
||||||
if (isset($aTagsWhitelist)) {
|
|
||||||
$data['whiteList'] = $aTagsWhitelist;
|
|
||||||
}
|
|
||||||
|
|
||||||
$jsonFile = fopen($jsonPhraseSettingsFile, 'w');
|
|
||||||
fwrite($jsonFile, json_encode($data));
|
|
||||||
fclose($jsonFile);
|
|
||||||
}
|
|
@ -35,6 +35,13 @@ class ImportSpecialPhrases:
|
|||||||
|
|
||||||
An example file can be found in the Nominatim sources at
|
An example file can be found in the Nominatim sources at
|
||||||
'test/testdb/full_en_phrases_test.csv'.
|
'test/testdb/full_en_phrases_test.csv'.
|
||||||
|
|
||||||
|
The import can be further configured to ignore specific key/value pairs.
|
||||||
|
This is particularly useful when importing phrases from the wiki. The
|
||||||
|
default configuration excludes some very common tags like building=yes.
|
||||||
|
The configuration can be customized by putting a file `phrase-settings.json`
|
||||||
|
with custom rules into the project directory or by using the `--config`
|
||||||
|
option to point to another configuration file.
|
||||||
"""
|
"""
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def add_args(parser):
|
def add_args(parser):
|
||||||
@ -45,6 +52,9 @@ class ImportSpecialPhrases:
|
|||||||
help='Import special phrases from a CSV file')
|
help='Import special phrases from a CSV file')
|
||||||
group.add_argument('--no-replace', action='store_true',
|
group.add_argument('--no-replace', action='store_true',
|
||||||
help='Keep the old phrases and only add the new ones')
|
help='Keep the old phrases and only add the new ones')
|
||||||
|
group.add_argument('--config', action='store',
|
||||||
|
help='Configuration file for black/white listing '
|
||||||
|
'(default: phrase-settings.json)')
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def run(args):
|
def run(args):
|
||||||
@ -72,5 +82,5 @@ class ImportSpecialPhrases:
|
|||||||
should_replace = not args.no_replace
|
should_replace = not args.no_replace
|
||||||
with connect(args.config.get_libpq_dsn()) as db_connection:
|
with connect(args.config.get_libpq_dsn()) as db_connection:
|
||||||
SPImporter(
|
SPImporter(
|
||||||
args.config, args.phplib_dir, db_connection, loader
|
args.config, db_connection, loader
|
||||||
).import_phrases(tokenizer, should_replace)
|
).import_phrases(tokenizer, should_replace)
|
||||||
|
@ -4,6 +4,7 @@ Nominatim configuration accessor.
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
import json
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from dotenv import dotenv_values
|
from dotenv import dotenv_values
|
||||||
@ -161,14 +162,19 @@ class Configuration:
|
|||||||
is loaded using this function and added at the position in the
|
is loaded using this function and added at the position in the
|
||||||
configuration tree.
|
configuration tree.
|
||||||
"""
|
"""
|
||||||
assert Path(filename).suffix == '.yaml'
|
configfile = self.find_config_file(filename, config)
|
||||||
|
|
||||||
configfile = self._find_config_file(filename, config)
|
if configfile.suffix in ('.yaml', '.yml'):
|
||||||
|
return self._load_from_yaml(configfile)
|
||||||
|
|
||||||
return self._load_from_yaml(configfile)
|
if configfile.suffix == '.json':
|
||||||
|
with configfile.open('r') as cfg:
|
||||||
|
return json.load(cfg)
|
||||||
|
|
||||||
|
raise UsageError(f"Config file '{configfile}' has unknown format.")
|
||||||
|
|
||||||
|
|
||||||
def _find_config_file(self, filename, config=None):
|
def find_config_file(self, filename, config=None):
|
||||||
""" Resolve the location of a configuration file given a filename and
|
""" Resolve the location of a configuration file given a filename and
|
||||||
an optional configuration option with the file name.
|
an optional configuration option with the file name.
|
||||||
Raises a UsageError when the file cannot be found or is not
|
Raises a UsageError when the file cannot be found or is not
|
||||||
@ -221,7 +227,7 @@ class Configuration:
|
|||||||
if Path(fname).is_absolute():
|
if Path(fname).is_absolute():
|
||||||
configfile = Path(fname)
|
configfile = Path(fname)
|
||||||
else:
|
else:
|
||||||
configfile = self._find_config_file(loader.construct_scalar(node))
|
configfile = self.find_config_file(loader.construct_scalar(node))
|
||||||
|
|
||||||
if configfile.suffix != '.yaml':
|
if configfile.suffix != '.yaml':
|
||||||
LOG.fatal("Format error while reading '%s': only YAML format supported.",
|
LOG.fatal("Format error while reading '%s': only YAML format supported.",
|
||||||
|
@ -8,15 +8,9 @@
|
|||||||
valids anymore are removed.
|
valids anymore are removed.
|
||||||
"""
|
"""
|
||||||
import logging
|
import logging
|
||||||
import os
|
|
||||||
from os.path import isfile
|
|
||||||
from pathlib import Path
|
|
||||||
import re
|
import re
|
||||||
import subprocess
|
|
||||||
import json
|
|
||||||
|
|
||||||
from psycopg2.sql import Identifier, Literal, SQL
|
from psycopg2.sql import Identifier, Literal, SQL
|
||||||
from nominatim.errors import UsageError
|
|
||||||
from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
|
from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
|
||||||
|
|
||||||
LOG = logging.getLogger()
|
LOG = logging.getLogger()
|
||||||
@ -33,9 +27,8 @@ class SPImporter():
|
|||||||
|
|
||||||
Take a sp loader which load the phrases from an external source.
|
Take a sp loader which load the phrases from an external source.
|
||||||
"""
|
"""
|
||||||
def __init__(self, config, phplib_dir, db_connection, sp_loader) -> None:
|
def __init__(self, config, db_connection, sp_loader) -> None:
|
||||||
self.config = config
|
self.config = config
|
||||||
self.phplib_dir = phplib_dir
|
|
||||||
self.db_connection = db_connection
|
self.db_connection = db_connection
|
||||||
self.sp_loader = sp_loader
|
self.sp_loader = sp_loader
|
||||||
self.statistics_handler = SpecialPhrasesImporterStatistics()
|
self.statistics_handler = SpecialPhrasesImporterStatistics()
|
||||||
@ -101,13 +94,8 @@ class SPImporter():
|
|||||||
"""
|
"""
|
||||||
Load white and black lists from phrases-settings.json.
|
Load white and black lists from phrases-settings.json.
|
||||||
"""
|
"""
|
||||||
settings_path = (self.config.config_dir / 'phrase-settings.json').resolve()
|
settings = self.config.load_sub_configuration('phrase-settings.json')
|
||||||
|
|
||||||
if self.config.PHRASE_CONFIG:
|
|
||||||
settings_path = self._convert_php_settings_if_needed(self.config.PHRASE_CONFIG)
|
|
||||||
|
|
||||||
with settings_path.open("r") as json_settings:
|
|
||||||
settings = json.load(json_settings)
|
|
||||||
return settings['blackList'], settings['whiteList']
|
return settings['blackList'], settings['whiteList']
|
||||||
|
|
||||||
def _check_sanity(self, phrase):
|
def _check_sanity(self, phrase):
|
||||||
@ -255,29 +243,3 @@ class SPImporter():
|
|||||||
for table in self.table_phrases_to_delete:
|
for table in self.table_phrases_to_delete:
|
||||||
self.statistics_handler.notify_one_table_deleted()
|
self.statistics_handler.notify_one_table_deleted()
|
||||||
db_cursor.drop_table(table)
|
db_cursor.drop_table(table)
|
||||||
|
|
||||||
|
|
||||||
def _convert_php_settings_if_needed(self, file_path):
|
|
||||||
"""
|
|
||||||
Convert php settings file of special phrases to json file if it is still in php format.
|
|
||||||
"""
|
|
||||||
if not isfile(file_path):
|
|
||||||
raise UsageError(str(file_path) + ' is not a valid file.')
|
|
||||||
|
|
||||||
file, extension = os.path.splitext(file_path)
|
|
||||||
json_file_path = Path(file + '.json').resolve()
|
|
||||||
|
|
||||||
if extension not in ('.php', '.json'):
|
|
||||||
raise UsageError('The custom NOMINATIM_PHRASE_CONFIG file has not a valid extension.')
|
|
||||||
|
|
||||||
if extension == '.php' and not isfile(json_file_path):
|
|
||||||
try:
|
|
||||||
subprocess.run(['/usr/bin/env', 'php', '-Cq',
|
|
||||||
(self.phplib_dir / 'migration/PhraseSettingsToJson.php').resolve(),
|
|
||||||
file_path], check=True)
|
|
||||||
LOG.warning('special_phrase configuration file has been converted to json.')
|
|
||||||
except subprocess.CalledProcessError:
|
|
||||||
LOG.error('Error while converting %s to json.', file_path)
|
|
||||||
raise
|
|
||||||
|
|
||||||
return json_file_path
|
|
||||||
|
@ -89,8 +89,8 @@ NOMINATIM_TIGER_DATA_PATH=
|
|||||||
NOMINATIM_WIKIPEDIA_DATA_PATH=
|
NOMINATIM_WIKIPEDIA_DATA_PATH=
|
||||||
|
|
||||||
# Configuration file for special phrase import.
|
# Configuration file for special phrase import.
|
||||||
# When unset, the internal default settings from 'settings/phrase-settings.json'
|
# OBSOLETE: use `nominatim special-phrases --config <file>` or simply put
|
||||||
# are used.
|
# a custom phrase-settings.json into your project directory.
|
||||||
NOMINATIM_PHRASE_CONFIG=
|
NOMINATIM_PHRASE_CONFIG=
|
||||||
|
|
||||||
# Configuration file for rank assignments.
|
# Configuration file for rank assignments.
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
"Also use this list to exclude an entire class from special phrases."
|
"Also use this list to exclude an entire class from special phrases."
|
||||||
],
|
],
|
||||||
"blackList": {
|
"blackList": {
|
||||||
"bounday": [
|
"boundary": [
|
||||||
"administrative"
|
"administrative"
|
||||||
],
|
],
|
||||||
"place": [
|
"place": [
|
||||||
|
@ -17,30 +17,12 @@ def testfile_dir(src_dir):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def sp_importer(temp_db_conn, def_config, temp_phplib_dir_with_migration):
|
def sp_importer(temp_db_conn, def_config):
|
||||||
"""
|
"""
|
||||||
Return an instance of SPImporter.
|
Return an instance of SPImporter.
|
||||||
"""
|
"""
|
||||||
loader = SPWikiLoader(def_config, ['en'])
|
loader = SPWikiLoader(def_config, ['en'])
|
||||||
return SPImporter(def_config, temp_phplib_dir_with_migration, temp_db_conn, loader)
|
return SPImporter(def_config, temp_db_conn, loader)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def temp_phplib_dir_with_migration(src_dir, tmp_path):
|
|
||||||
"""
|
|
||||||
Return temporary phpdir with migration subdirectory and
|
|
||||||
PhraseSettingsToJson.php script inside.
|
|
||||||
"""
|
|
||||||
migration_file = (src_dir / 'lib-php' / 'migration' / 'PhraseSettingsToJson.php').resolve()
|
|
||||||
|
|
||||||
phpdir = tmp_path / 'tempphp'
|
|
||||||
phpdir.mkdir()
|
|
||||||
|
|
||||||
(phpdir / 'migration').mkdir()
|
|
||||||
migration_dest_path = (phpdir / 'migration' / 'PhraseSettingsToJson.php').resolve()
|
|
||||||
copyfile(str(migration_file), str(migration_dest_path))
|
|
||||||
|
|
||||||
return phpdir
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
@ -90,49 +72,6 @@ def test_load_white_and_black_lists(sp_importer):
|
|||||||
|
|
||||||
assert isinstance(black_list, dict) and isinstance(white_list, dict)
|
assert isinstance(black_list, dict) and isinstance(white_list, dict)
|
||||||
|
|
||||||
def test_convert_php_settings(sp_importer, testfile_dir, tmp_path):
|
|
||||||
"""
|
|
||||||
Test that _convert_php_settings_if_needed() convert the given
|
|
||||||
php file to a json file.
|
|
||||||
"""
|
|
||||||
php_file = (testfile_dir / 'phrase_settings.php').resolve()
|
|
||||||
|
|
||||||
temp_settings = (tmp_path / 'phrase_settings.php').resolve()
|
|
||||||
copyfile(php_file, temp_settings)
|
|
||||||
sp_importer._convert_php_settings_if_needed(temp_settings)
|
|
||||||
|
|
||||||
assert (tmp_path / 'phrase_settings.json').is_file()
|
|
||||||
|
|
||||||
def test_convert_settings_wrong_file(sp_importer):
|
|
||||||
"""
|
|
||||||
Test that _convert_php_settings_if_needed() raise an exception
|
|
||||||
if the given file is not a valid file.
|
|
||||||
"""
|
|
||||||
with pytest.raises(UsageError, match='random_file is not a valid file.'):
|
|
||||||
sp_importer._convert_php_settings_if_needed('random_file')
|
|
||||||
|
|
||||||
def test_convert_settings_json_already_exist(sp_importer, testfile_dir):
|
|
||||||
"""
|
|
||||||
Test that if we give to '_convert_php_settings_if_needed' a php file path
|
|
||||||
and that a the corresponding json file already exists, it is returned.
|
|
||||||
"""
|
|
||||||
php_file = (testfile_dir / 'phrase_settings.php').resolve()
|
|
||||||
json_file = (testfile_dir / 'phrase_settings.json').resolve()
|
|
||||||
|
|
||||||
returned = sp_importer._convert_php_settings_if_needed(php_file)
|
|
||||||
|
|
||||||
assert returned == json_file
|
|
||||||
|
|
||||||
def test_convert_settings_giving_json(sp_importer, testfile_dir):
|
|
||||||
"""
|
|
||||||
Test that if we give to '_convert_php_settings_if_needed' a json file path
|
|
||||||
the same path is directly returned
|
|
||||||
"""
|
|
||||||
json_file = (testfile_dir / 'phrase_settings.json').resolve()
|
|
||||||
|
|
||||||
returned = sp_importer._convert_php_settings_if_needed(json_file)
|
|
||||||
|
|
||||||
assert returned == json_file
|
|
||||||
|
|
||||||
def test_create_place_classtype_indexes(temp_db_with_extensions, temp_db_conn,
|
def test_create_place_classtype_indexes(temp_db_with_extensions, temp_db_conn,
|
||||||
table_factory, sp_importer):
|
table_factory, sp_importer):
|
||||||
|
Loading…
Reference in New Issue
Block a user