mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-12-26 22:44:44 +03:00
Merge pull request #2641 from lonvia/reinit-tokenizer-dir
Transparantly reinitialize tokenizer directory when necessary
This commit is contained in:
commit
d33c82cb66
@ -198,11 +198,10 @@ target machine.
|
||||
of a full database.
|
||||
|
||||
Next install Nominatim on the target machine by following the standard installation
|
||||
instructions. Again make sure to use the same version as the source machine.
|
||||
instructions. Again, make sure to use the same version as the source machine.
|
||||
|
||||
You can now copy the project directory from the source machine to the new machine.
|
||||
If necessary, edit the `.env` file to point it to the restored database.
|
||||
Finally run
|
||||
Create a project directory on your destination machine and set up the `.env`
|
||||
file to match the configuration on the source machine. Finally run
|
||||
|
||||
nominatim refresh --website
|
||||
|
||||
@ -210,6 +209,8 @@ to make sure that the local installation of Nominatim will be used.
|
||||
|
||||
If you are using the legacy tokenizer you might also have to switch to the
|
||||
PostgreSQL module that was compiled on your target machine. If you get errors
|
||||
that PostgreSQL cannot find or access `nominatim.so` then copy the installed
|
||||
version into the `module` directory of your project directory. The installed
|
||||
copy can usually be found under `/usr/local/lib/nominatim/module/nominatim.so`.
|
||||
that PostgreSQL cannot find or access `nominatim.so` then rerun
|
||||
|
||||
nominatim refresh --functions
|
||||
|
||||
on the target machine to update the the location of the module.
|
||||
|
@ -117,6 +117,10 @@ class UpdateRefresh:
|
||||
if args.website:
|
||||
webdir = args.project_dir / 'website'
|
||||
LOG.warning('Setting up website directory at %s', webdir)
|
||||
# This is a little bit hacky: call the tokenizer setup, so that
|
||||
# the tokenizer directory gets repopulated as well, in case it
|
||||
# wasn't there yet.
|
||||
self._get_tokenizer(args.config)
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
refresh.setup_website(webdir, args.config, conn)
|
||||
|
||||
|
@ -18,7 +18,7 @@ from dotenv import dotenv_values
|
||||
from nominatim.errors import UsageError
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
CONFIG_CACHE = {}
|
||||
|
||||
def flatten_config_list(content, section=''):
|
||||
""" Flatten YAML configuration lists that contain include sections
|
||||
@ -181,14 +181,19 @@ class Configuration:
|
||||
"""
|
||||
configfile = self.find_config_file(filename, config)
|
||||
|
||||
if str(configfile) in CONFIG_CACHE:
|
||||
return CONFIG_CACHE[str(configfile)]
|
||||
|
||||
if configfile.suffix in ('.yaml', '.yml'):
|
||||
return self._load_from_yaml(configfile)
|
||||
|
||||
if configfile.suffix == '.json':
|
||||
result = self._load_from_yaml(configfile)
|
||||
elif configfile.suffix == '.json':
|
||||
with configfile.open('r') as cfg:
|
||||
return json.load(cfg)
|
||||
result = json.load(cfg)
|
||||
else:
|
||||
raise UsageError(f"Config file '{configfile}' has unknown format.")
|
||||
|
||||
raise UsageError(f"Config file '{configfile}' has unknown format.")
|
||||
CONFIG_CACHE[str(configfile)] = result
|
||||
return result
|
||||
|
||||
|
||||
def find_config_file(self, filename, config=None):
|
||||
|
@ -27,6 +27,9 @@ def get_property(conn, name):
|
||||
""" Return the current value of the given propery or None if the property
|
||||
is not set.
|
||||
"""
|
||||
if not conn.table_exists('nominatim_properties'):
|
||||
return None
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('SELECT value FROM nominatim_properties WHERE property = %s',
|
||||
(name, ))
|
||||
|
@ -78,8 +78,8 @@ def get_tokenizer_for_db(config):
|
||||
"""
|
||||
basedir = config.project_dir / 'tokenizer'
|
||||
if not basedir.is_dir():
|
||||
LOG.fatal("Cannot find tokenizer data in '%s'.", basedir)
|
||||
raise UsageError('Cannot initialize tokenizer.')
|
||||
# Directory will be repopulated by tokenizer below.
|
||||
basedir.mkdir()
|
||||
|
||||
with connect(config.get_libpq_dsn()) as conn:
|
||||
name = properties.get_property(conn, 'tokenizer')
|
||||
|
@ -51,7 +51,7 @@ class LegacyICUTokenizer(AbstractTokenizer):
|
||||
"""
|
||||
self.loader = ICURuleLoader(config)
|
||||
|
||||
self._install_php(config.lib_dir.php)
|
||||
self._install_php(config.lib_dir.php, overwrite=True)
|
||||
self._save_config()
|
||||
|
||||
if init_db:
|
||||
@ -67,6 +67,8 @@ class LegacyICUTokenizer(AbstractTokenizer):
|
||||
with connect(self.dsn) as conn:
|
||||
self.loader.load_config_from_db(conn)
|
||||
|
||||
self._install_php(config.lib_dir.php, overwrite=False)
|
||||
|
||||
|
||||
def finalize_import(self, config):
|
||||
""" Do any required postprocessing to make the tokenizer data ready
|
||||
@ -174,16 +176,18 @@ class LegacyICUTokenizer(AbstractTokenizer):
|
||||
self.loader.make_token_analysis())
|
||||
|
||||
|
||||
def _install_php(self, phpdir):
|
||||
def _install_php(self, phpdir, overwrite=True):
|
||||
""" Install the php script for the tokenizer.
|
||||
"""
|
||||
php_file = self.data_dir / "tokenizer.php"
|
||||
php_file.write_text(dedent(f"""\
|
||||
<?php
|
||||
@define('CONST_Max_Word_Frequency', 10000000);
|
||||
@define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
|
||||
@define('CONST_Transliteration', "{self.loader.get_search_rules()}");
|
||||
require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""))
|
||||
|
||||
if not php_file.exists() or overwrite:
|
||||
php_file.write_text(dedent(f"""\
|
||||
<?php
|
||||
@define('CONST_Max_Word_Frequency', 10000000);
|
||||
@define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
|
||||
@define('CONST_Transliteration', "{self.loader.get_search_rules()}");
|
||||
require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""))
|
||||
|
||||
|
||||
def _save_config(self):
|
||||
|
@ -107,7 +107,7 @@ class LegacyTokenizer(AbstractTokenizer):
|
||||
|
||||
self.normalization = config.TERM_NORMALIZATION
|
||||
|
||||
self._install_php(config)
|
||||
self._install_php(config, overwrite=True)
|
||||
|
||||
with connect(self.dsn) as conn:
|
||||
_check_module(module_dir, conn)
|
||||
@ -119,12 +119,18 @@ class LegacyTokenizer(AbstractTokenizer):
|
||||
self._init_db_tables(config)
|
||||
|
||||
|
||||
def init_from_project(self, _):
|
||||
def init_from_project(self, config):
|
||||
""" Initialise the tokenizer from the project directory.
|
||||
"""
|
||||
with connect(self.dsn) as conn:
|
||||
self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
|
||||
|
||||
if not (config.project_dir / 'module' / 'nominatim.so').exists():
|
||||
_install_module(config.DATABASE_MODULE_PATH,
|
||||
config.lib_dir.module,
|
||||
config.project_dir / 'module')
|
||||
|
||||
self._install_php(config, overwrite=False)
|
||||
|
||||
def finalize_import(self, config):
|
||||
""" Do any required postprocessing to make the tokenizer data ready
|
||||
@ -238,16 +244,18 @@ class LegacyTokenizer(AbstractTokenizer):
|
||||
return LegacyNameAnalyzer(self.dsn, normalizer)
|
||||
|
||||
|
||||
def _install_php(self, config):
|
||||
def _install_php(self, config, overwrite=True):
|
||||
""" Install the php script for the tokenizer.
|
||||
"""
|
||||
php_file = self.data_dir / "tokenizer.php"
|
||||
php_file.write_text(dedent("""\
|
||||
<?php
|
||||
@define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
|
||||
@define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
|
||||
require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
|
||||
""".format(config)))
|
||||
|
||||
if not php_file.exists() or overwrite:
|
||||
php_file.write_text(dedent("""\
|
||||
<?php
|
||||
@define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
|
||||
@define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
|
||||
require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
|
||||
""".format(config)))
|
||||
|
||||
|
||||
def _init_db_tables(self, config):
|
||||
|
@ -217,7 +217,7 @@ class NominatimEnvironment:
|
||||
self.db_drop_database(self.api_test_db)
|
||||
raise
|
||||
|
||||
tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False)
|
||||
tokenizer_factory.get_tokenizer_for_db(self.get_test_config())
|
||||
|
||||
|
||||
def setup_unknown_db(self):
|
||||
|
@ -63,13 +63,13 @@ class TestFactory:
|
||||
assert tokenizer.init_state == "loaded"
|
||||
|
||||
|
||||
def test_load_no_tokenizer_dir(self):
|
||||
def test_load_repopulate_tokenizer_dir(self):
|
||||
factory.create_tokenizer(self.config)
|
||||
|
||||
self.config.project_dir = self.config.project_dir / 'foo'
|
||||
self.config.project_dir = self.config.project_dir
|
||||
|
||||
with pytest.raises(UsageError):
|
||||
factory.get_tokenizer_for_db(self.config)
|
||||
factory.get_tokenizer_for_db(self.config)
|
||||
assert (self.config.project_dir / 'tokenizer').exists()
|
||||
|
||||
|
||||
def test_load_missing_property(self, temp_db_cursor):
|
||||
|
Loading…
Reference in New Issue
Block a user