mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-12-24 13:31:37 +03:00
add migration for configurable tokenizer
Adds a migration that initialises a legacy tokenizer for an existing database. The migration is not active yet as it will need completion when more functionality is added to the legacy tokenizer.
This commit is contained in:
parent
296a66558f
commit
b5540dc35c
@ -33,12 +33,15 @@ def _import_tokenizer(name):
|
||||
raise UsageError('Tokenizer not found') from exp
|
||||
|
||||
|
||||
def create_tokenizer(config):
|
||||
def create_tokenizer(config, init_db=True, module_name=None):
|
||||
""" Create a new tokenizer as defined by the given configuration.
|
||||
|
||||
The tokenizer data and code is copied into the 'tokenizer' directory
|
||||
of the project directory and the tokenizer loaded from its new location.
|
||||
"""
|
||||
if module_name is None:
|
||||
module_name = config.TOKENIZER
|
||||
|
||||
# Create the directory for the tokenizer data
|
||||
basedir = config.project_dir / 'tokenizer'
|
||||
if not basedir.exists():
|
||||
@ -47,13 +50,15 @@ def create_tokenizer(config):
|
||||
LOG.fatal("Tokenizer directory '%s' cannot be created.", basedir)
|
||||
raise UsageError("Tokenizer setup failed.")
|
||||
|
||||
tokenizer_module = _import_tokenizer(config.TOKENIZER)
|
||||
# Import and initialize the tokenizer.
|
||||
tokenizer_module = _import_tokenizer(module_name)
|
||||
|
||||
tokenizer = tokenizer_module.create(config.get_libpq_dsn(), basedir)
|
||||
tokenizer.init_new_db(config)
|
||||
if init_db:
|
||||
tokenizer.init_new_db(config)
|
||||
|
||||
with connect(config.get_libpq_dsn()) as conn:
|
||||
properties.set_property(conn, 'tokenizer', config.TOKENIZER)
|
||||
properties.set_property(conn, 'tokenizer', module_name)
|
||||
|
||||
return tokenizer
|
||||
|
||||
|
@ -20,7 +20,7 @@ def create(dsn, data_dir):
|
||||
return LegacyTokenizer(dsn, data_dir)
|
||||
|
||||
|
||||
def _install_module(src_dir, module_dir):
|
||||
def _install_module(config_module_path, src_dir, module_dir):
|
||||
""" Copies the PostgreSQL normalisation module into the project
|
||||
directory if necessary. For historical reasons the module is
|
||||
saved in the '/module' subdirectory and not with the other tokenizer
|
||||
@ -29,10 +29,17 @@ def _install_module(src_dir, module_dir):
|
||||
The function detects when the installation is run from the
|
||||
build directory. It doesn't touch the module in that case.
|
||||
"""
|
||||
# Custom module locations are simply used as is.
|
||||
if config_module_path:
|
||||
LOG.info("Using custom path for database module at '%s'", config_module_path)
|
||||
return config_module_path
|
||||
|
||||
# Compatibility mode for builddir installations.
|
||||
if module_dir.exists() and src_dir.samefile(module_dir):
|
||||
LOG.info('Running from build directory. Leaving database module as is.')
|
||||
return
|
||||
return module_dir
|
||||
|
||||
# In any other case install the module in the project directory.
|
||||
if not module_dir.exists():
|
||||
module_dir.mkdir()
|
||||
|
||||
@ -42,6 +49,8 @@ def _install_module(src_dir, module_dir):
|
||||
|
||||
LOG.info('Database module installed at %s', str(destfile))
|
||||
|
||||
return module_dir
|
||||
|
||||
|
||||
def _check_module(module_dir, conn):
|
||||
with conn.cursor() as cur:
|
||||
@ -74,24 +83,15 @@ class LegacyTokenizer:
|
||||
This copies all necessary data in the project directory to make
|
||||
sure the tokenizer remains stable even over updates.
|
||||
"""
|
||||
# Find and optionally install the PsotgreSQL normalization module.
|
||||
if config.DATABASE_MODULE_PATH:
|
||||
LOG.info("Using custom path for database module at '%s'",
|
||||
config.DATABASE_MODULE_PATH)
|
||||
module_dir = config.DATABASE_MODULE_PATH
|
||||
else:
|
||||
_install_module(config.lib_dir.module, config.project_dir / 'module')
|
||||
module_dir = config.project_dir / 'module'
|
||||
module_dir = _install_module(config.DATABASE_MODULE_PATH,
|
||||
config.lib_dir.module,
|
||||
config.project_dir / 'module')
|
||||
|
||||
self.normalization = config.TERM_NORMALIZATION
|
||||
|
||||
with connect(self.dsn) as conn:
|
||||
_check_module(module_dir, conn)
|
||||
|
||||
# Stable configuration is saved in the database.
|
||||
properties.set_property(conn, DBCFG_NORMALIZATION, self.normalization)
|
||||
|
||||
conn.commit()
|
||||
self._save_config(conn)
|
||||
|
||||
|
||||
def init_from_project(self):
|
||||
@ -99,3 +99,26 @@ class LegacyTokenizer:
|
||||
"""
|
||||
with connect(self.dsn) as conn:
|
||||
self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
|
||||
|
||||
|
||||
def migrate_database(self, config):
|
||||
""" Initialise the project directory of an existing database for
|
||||
use with this tokenizer.
|
||||
|
||||
This is a special migration function for updating existing databases
|
||||
to new software versions.
|
||||
"""
|
||||
module_dir = _install_module(config.DATABASE_MODULE_PATH,
|
||||
config.lib_dir.module,
|
||||
config.project_dir / 'module')
|
||||
|
||||
with connect(self.dsn) as conn:
|
||||
_check_module(module_dir, conn)
|
||||
self._save_config(conn)
|
||||
|
||||
|
||||
def _save_config(self, conn):
|
||||
""" Save the configuration that needs to remain stable for the given
|
||||
database as database properties.
|
||||
"""
|
||||
properties.set_property(conn, DBCFG_NORMALIZATION, self.normalization)
|
||||
|
@ -6,7 +6,8 @@ import logging
|
||||
from nominatim.db import properties
|
||||
from nominatim.db.connection import connect
|
||||
from nominatim.version import NOMINATIM_VERSION
|
||||
from nominatim.tools import refresh, database_import
|
||||
from nominatim.tools import refresh
|
||||
from nominatim.tokenizer import factory as tokenizer_factory
|
||||
from nominatim.errors import UsageError
|
||||
|
||||
LOG = logging.getLogger()
|
||||
@ -108,17 +109,6 @@ def import_status_timestamp_change(conn, **_):
|
||||
TYPE timestamp with time zone;""")
|
||||
|
||||
|
||||
@_migration(3, 5, 0, 99)
|
||||
def install_database_module_in_project_directory(conn, config, paths, **_):
|
||||
""" Install database module in project directory.
|
||||
|
||||
The database module needs to be present in the project directory
|
||||
since those were introduced.
|
||||
"""
|
||||
database_import.install_module(paths.module_dir, paths.project_dir,
|
||||
config.DATABASE_MODULE_PATH, conn=conn)
|
||||
|
||||
|
||||
@_migration(3, 5, 0, 99)
|
||||
def add_nominatim_property_table(conn, config, **_):
|
||||
""" Add nominatim_property table.
|
||||
@ -173,3 +163,17 @@ def switch_placenode_geometry_index(conn, **_):
|
||||
and class = 'place' and type != 'postcode'
|
||||
and linked_place_id is null""")
|
||||
cur.execute(""" DROP INDEX IF EXISTS idx_placex_adminname """)
|
||||
|
||||
|
||||
@_migration(3, 7, 0, 1)
|
||||
def install_legacy_tokenizer(conn, config, **_):
|
||||
""" Setup legacy tokenizer.
|
||||
|
||||
If no other tokenizer has been configured yet, then create the
|
||||
configuration for the backwards-compatible legacy tokenizer
|
||||
"""
|
||||
if properties.get_property(conn, 'tokenizer') is None:
|
||||
tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False,
|
||||
module_name='legacy')
|
||||
|
||||
tokenizer.migrate_database(config)
|
||||
|
Loading…
Reference in New Issue
Block a user