diff --git a/docs/admin/Migration.md b/docs/admin/Migration.md index 17019da4..13e6d7f5 100644 --- a/docs/admin/Migration.md +++ b/docs/admin/Migration.md @@ -1,6 +1,6 @@ # Database Migrations -Nominatim offers automatic migrations since version 3.7. Please follow +Nominatim offers automatic migrations for versions 4.3+. Please follow the following steps: * Stop any updates that are potentially running @@ -17,8 +17,9 @@ Below you find additional migrations and hints about other structural and breaking changes. **Please read them before running the migration.** !!! note - If you are migrating from a version <3.6, then you still have to follow - the manual migration steps up to 3.6. + If you are migrating from a version <4.3, you need to install 4.3 + first and migrate to 4.3 first. Then you can migrate to the current + version. It is strongly recommended to do a reimport instead. ## 4.5.0 -> master diff --git a/src/nominatim_db/tools/migration.py b/src/nominatim_db/tools/migration.py index 54836532..12abe7fe 100644 --- a/src/nominatim_db/tools/migration.py +++ b/src/nominatim_db/tools/migration.py @@ -10,13 +10,11 @@ Functions for database migration to newer software versions. from typing import List, Tuple, Callable, Any import logging -from psycopg import sql as pysql - from ..errors import UsageError from ..config import Configuration from ..db import properties -from ..db.connection import connect, Connection, server_version_tuple,\ - table_has_column, table_exists, execute_scalar, register_hstore +from ..db.connection import connect, Connection,\ + table_exists, register_hstore from ..version import NominatimVersion, NOMINATIM_VERSION, parse_version from ..tokenizer import factory as tokenizer_factory from . import refresh @@ -38,19 +36,24 @@ def migrate(config: Configuration, paths: Any) -> int: if db_version_str is not None: db_version = parse_version(db_version_str) - - if db_version == NOMINATIM_VERSION: - LOG.warning("Database already at latest version (%s)", db_version_str) - return 0 - - LOG.info("Detected database version: %s", db_version_str) else: - db_version = _guess_version(conn) + db_version = None + if db_version is None or db_version < (4, 3, 0, 0): + LOG.fatal('Your database version is older than 4.3. ' + 'Direct migration is not possible.\n' + 'You should strongly consider a reimport. If that is not possible\n' + 'please upgrade to 4.3 first and then to the newest version.') + raise UsageError('Migration not possible.') + + if db_version == NOMINATIM_VERSION: + LOG.warning("Database already at latest version (%s)", db_version_str) + return 0 + + LOG.info("Detected database version: %s", db_version_str) for version, func in _MIGRATION_FUNCTIONS: - if db_version < version or \ - (db_version == (3, 5, 0, 99) and version == (3, 5, 0, 99)): + if db_version < version: title = func.__doc__ or '' LOG.warning("Running: %s (%s)", title.split('\n', 1)[0], version) kwargs = dict(conn=conn, config=config, paths=paths) @@ -69,25 +72,6 @@ def migrate(config: Configuration, paths: Any) -> int: return 0 -def _guess_version(conn: Connection) -> NominatimVersion: - """ Guess a database version when there is no property table yet. - Only migrations for 3.6 and later are supported, so bail out - when the version seems older. - """ - # In version 3.6, the country_name table was updated. Check for that. - cnt = execute_scalar(conn, """SELECT count(*) FROM - (SELECT svals(name) FROM country_name - WHERE country_code = 'gb')x; - """) - if cnt < 100: - LOG.fatal('It looks like your database was imported with a version ' - 'prior to 3.6.0. Automatic migration not possible.') - raise UsageError('Migration not possible.') - - return NominatimVersion(3, 5, 0, 99) - - - def _migration(major: int, minor: int, patch: int = 0, dbpatch: int = 0) -> Callable[[Callable[..., None]], Callable[..., None]]: """ Decorator for a single migration step. The parameters describe the @@ -110,273 +94,6 @@ def _migration(major: int, minor: int, patch: int = 0, return decorator -@_migration(3, 5, 0, 99) -def import_status_timestamp_change(conn: Connection, **_: Any) -> None: - """ Add timezone to timestamp in status table. - - The import_status table has been changed to include timezone information - with the time stamp. - """ - with conn.cursor() as cur: - cur.execute("""ALTER TABLE import_status ALTER COLUMN lastimportdate - TYPE timestamp with time zone;""") - - -@_migration(3, 5, 0, 99) -def add_nominatim_property_table(conn: Connection, config: Configuration, **_: Any) -> None: - """ Add nominatim_property table. - """ - if not table_exists(conn, 'nominatim_properties'): - with conn.cursor() as cur: - cur.execute(pysql.SQL("""CREATE TABLE nominatim_properties ( - property TEXT, - value TEXT); - GRANT SELECT ON TABLE nominatim_properties TO {}; - """).format(pysql.Identifier(config.DATABASE_WEBUSER))) - -@_migration(3, 6, 0, 0) -def change_housenumber_transliteration(conn: Connection, **_: Any) -> None: - """ Transliterate housenumbers. - - The database schema switched from saving raw housenumbers in - placex.housenumber to saving transliterated ones. - - Note: the function create_housenumber_id() has been dropped in later - versions. - """ - with conn.cursor() as cur: - cur.execute("""CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT) - RETURNS TEXT AS $$ - DECLARE - normtext TEXT; - BEGIN - SELECT array_to_string(array_agg(trans), ';') - INTO normtext - FROM (SELECT lookup_word as trans, - getorcreate_housenumber_id(lookup_word) - FROM (SELECT make_standard_name(h) as lookup_word - FROM regexp_split_to_table(housenumber, '[,;]') h) x) y; - return normtext; - END; - $$ LANGUAGE plpgsql STABLE STRICT;""") - cur.execute("DELETE FROM word WHERE class = 'place' and type = 'house'") - cur.execute("""UPDATE placex - SET housenumber = create_housenumber_id(housenumber) - WHERE housenumber is not null""") - - -@_migration(3, 7, 0, 0) -def switch_placenode_geometry_index(conn: Connection, **_: Any) -> None: - """ Replace idx_placex_geometry_reverse_placeNode index. - - Make the index slightly more permissive, so that it can also be used - when matching up boundaries and place nodes. It makes the index - idx_placex_adminname index unnecessary. - """ - with conn.cursor() as cur: - cur.execute(""" CREATE INDEX IF NOT EXISTS idx_placex_geometry_placenode ON placex - USING GIST (geometry) - WHERE osm_type = 'N' and rank_search < 26 - and class = 'place' and type != 'postcode' - and linked_place_id is null""") - cur.execute(""" DROP INDEX IF EXISTS idx_placex_adminname """) - - -@_migration(3, 7, 0, 1) -def install_legacy_tokenizer(conn: Connection, config: Configuration, **_: Any) -> None: - """ Setup legacy tokenizer. - - If no other tokenizer has been configured yet, then create the - configuration for the backwards-compatible legacy tokenizer - """ - if properties.get_property(conn, 'tokenizer') is None: - for table in ('placex', 'location_property_osmline'): - if not table_has_column(conn, table, 'token_info'): - with conn.cursor() as cur: - cur.execute(pysql.SQL('ALTER TABLE {} ADD COLUMN token_info JSONB') - .format(pysql.Identifier(table))) - tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False, - module_name='legacy') - - tokenizer.migrate_database(config) # type: ignore[attr-defined] - - -@_migration(4, 0, 99, 0) -def create_tiger_housenumber_index(conn: Connection, **_: Any) -> None: - """ Create idx_location_property_tiger_parent_place_id with included - house number. - - The inclusion is needed for efficient lookup of housenumbers in - full address searches. - """ - if server_version_tuple(conn) >= (11, 0, 0): - with conn.cursor() as cur: - cur.execute(""" CREATE INDEX IF NOT EXISTS - idx_location_property_tiger_housenumber_migrated - ON location_property_tiger - USING btree(parent_place_id) - INCLUDE (startnumber, endnumber) """) - - -@_migration(4, 0, 99, 1) -def create_interpolation_index_on_place(conn: Connection, **_: Any) -> None: - """ Create idx_place_interpolations for lookup of interpolation lines - on updates. - """ - with conn.cursor() as cur: - cur.execute("""CREATE INDEX IF NOT EXISTS idx_place_interpolations - ON place USING gist(geometry) - WHERE osm_type = 'W' and address ? 'interpolation'""") - - -@_migration(4, 0, 99, 2) -def add_step_column_for_interpolation(conn: Connection, **_: Any) -> None: - """ Add a new column 'step' to the interpolations table. - - Also converts the data into the stricter format which requires that - startnumbers comply with the odd/even requirements. - """ - if table_has_column(conn, 'location_property_osmline', 'step'): - return - - with conn.cursor() as cur: - # Mark invalid all interpolations with no intermediate numbers. - cur.execute("""UPDATE location_property_osmline SET startnumber = null - WHERE endnumber - startnumber <= 1 """) - # Align the start numbers where odd/even does not match. - cur.execute("""UPDATE location_property_osmline - SET startnumber = startnumber + 1, - linegeo = ST_LineSubString(linegeo, - 1.0 / (endnumber - startnumber)::float, - 1) - WHERE (interpolationtype = 'odd' and startnumber % 2 = 0) - or (interpolationtype = 'even' and startnumber % 2 = 1) - """) - # Mark invalid odd/even interpolations with no intermediate numbers. - cur.execute("""UPDATE location_property_osmline SET startnumber = null - WHERE interpolationtype in ('odd', 'even') - and endnumber - startnumber = 2""") - # Finally add the new column and populate it. - cur.execute("ALTER TABLE location_property_osmline ADD COLUMN step SMALLINT") - cur.execute("""UPDATE location_property_osmline - SET step = CASE WHEN interpolationtype = 'all' - THEN 1 ELSE 2 END - """) - - -@_migration(4, 0, 99, 3) -def add_step_column_for_tiger(conn: Connection, **_: Any) -> None: - """ Add a new column 'step' to the tiger data table. - """ - if table_has_column(conn, 'location_property_tiger', 'step'): - return - - with conn.cursor() as cur: - cur.execute("ALTER TABLE location_property_tiger ADD COLUMN step SMALLINT") - cur.execute("""UPDATE location_property_tiger - SET step = CASE WHEN interpolationtype = 'all' - THEN 1 ELSE 2 END - """) - - -@_migration(4, 0, 99, 4) -def add_derived_name_column_for_country_names(conn: Connection, **_: Any) -> None: - """ Add a new column 'derived_name' which in the future takes the - country names as imported from OSM data. - """ - if not table_has_column(conn, 'country_name', 'derived_name'): - with conn.cursor() as cur: - cur.execute("ALTER TABLE country_name ADD COLUMN derived_name public.HSTORE") - - -@_migration(4, 0, 99, 5) -def mark_internal_country_names(conn: Connection, config: Configuration, **_: Any) -> None: - """ Names from the country table should be marked as internal to prevent - them from being deleted. Only necessary for ICU tokenizer. - """ - tokenizer = tokenizer_factory.get_tokenizer_for_db(config) - with tokenizer.name_analyzer() as analyzer: - with conn.cursor() as cur: - cur.execute("SELECT country_code, name FROM country_name") - - for country_code, names in cur: - if not names: - names = {} - names['countrycode'] = country_code - analyzer.add_country_names(country_code, names) - - -@_migration(4, 1, 99, 0) -def add_place_deletion_todo_table(conn: Connection, **_: Any) -> None: - """ Add helper table for deleting data on updates. - - The table is only necessary when updates are possible, i.e. - the database is not in freeze mode. - """ - if table_exists(conn, 'place'): - with conn.cursor() as cur: - cur.execute("""CREATE TABLE IF NOT EXISTS place_to_be_deleted ( - osm_type CHAR(1), - osm_id BIGINT, - class TEXT, - type TEXT, - deferred BOOLEAN)""") - - -@_migration(4, 1, 99, 1) -def split_pending_index(conn: Connection, **_: Any) -> None: - """ Reorganise indexes for pending updates. - """ - if table_exists(conn, 'place'): - with conn.cursor() as cur: - cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_rank_address_sector - ON placex USING BTREE (rank_address, geometry_sector) - WHERE indexed_status > 0""") - cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_rank_boundaries_sector - ON placex USING BTREE (rank_search, geometry_sector) - WHERE class = 'boundary' and type = 'administrative' - and indexed_status > 0""") - cur.execute("DROP INDEX IF EXISTS idx_placex_pendingsector") - - -@_migration(4, 2, 99, 0) -def enable_forward_dependencies(conn: Connection, **_: Any) -> None: - """ Create indexes for updates with forward dependency tracking (long-running). - """ - if table_exists(conn, 'planet_osm_ways'): - with conn.cursor() as cur: - cur.execute("""SELECT * FROM pg_indexes - WHERE tablename = 'planet_osm_ways' - and indexdef LIKE '%nodes%'""") - if cur.rowcount == 0: - cur.execute("""CREATE OR REPLACE FUNCTION public.planet_osm_index_bucket(bigint[]) - RETURNS bigint[] - LANGUAGE sql IMMUTABLE - AS $function$ - SELECT ARRAY(SELECT DISTINCT unnest($1) >> 5) - $function$""") - cur.execute("""CREATE INDEX planet_osm_ways_nodes_bucket_idx - ON planet_osm_ways - USING gin (planet_osm_index_bucket(nodes)) - WITH (fastupdate=off)""") - cur.execute("""CREATE INDEX planet_osm_rels_parts_idx - ON planet_osm_rels USING gin (parts) - WITH (fastupdate=off)""") - cur.execute("ANALYZE planet_osm_ways") - - -@_migration(4, 2, 99, 1) -def add_improved_geometry_reverse_placenode_index(conn: Connection, **_: Any) -> None: - """ Create improved index for reverse lookup of place nodes. - """ - with conn.cursor() as cur: - cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPlaceNode - ON placex - USING gist (ST_Buffer(geometry, reverse_place_diameter(rank_search))) - WHERE rank_address between 4 and 25 AND type != 'postcode' - AND name is not null AND linked_place_id is null AND osm_type = 'N' - """) - @_migration(4, 4, 99, 0) def create_postcode_area_lookup_index(conn: Connection, **_: Any) -> None: """ Create index needed for looking up postcode areas from postocde points. diff --git a/test/python/tools/test_migration.py b/test/python/tools/test_migration.py index fd790f75..0b4d2ec6 100644 --- a/test/python/tools/test_migration.py +++ b/test/python/tools/test_migration.py @@ -27,35 +27,13 @@ def postprocess_mock(monkeypatch): lambda *args: DummyTokenizer()) -def test_no_migration_old_versions(temp_db_with_extensions, table_factory, def_config): - table_factory('country_name', 'name HSTORE, country_code TEXT') +def test_no_migration_old_versions(temp_db_with_extensions, def_config, property_table): + property_table.set('database_version', '4.2.99-0') with pytest.raises(UsageError, match='Migration not possible'): migration.migrate(def_config, {}) -def test_set_up_migration_for_36(temp_db_with_extensions, temp_db_cursor, - table_factory, def_config, monkeypatch, - postprocess_mock): - # don't actually run any migration, except the property table creation - monkeypatch.setattr(migration, '_MIGRATION_FUNCTIONS', - [((3, 5, 0, 99), migration.add_nominatim_property_table)]) - # Use a r/o user name that always exists - monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', 'postgres') - - table_factory('country_name', 'name HSTORE, country_code TEXT', - (({str(x): 'a' for x in range(200)}, 'gb'),)) - - assert not temp_db_cursor.table_exists('nominatim_properties') - - assert migration.migrate(def_config, {}) == 0 - - assert temp_db_cursor.table_exists('nominatim_properties') - - assert 1 == temp_db_cursor.scalar(""" SELECT count(*) FROM nominatim_properties - WHERE property = 'database_version'""") - - def test_already_at_version(temp_db_with_extensions, def_config, property_table): property_table.set('database_version', @@ -66,8 +44,7 @@ def test_already_at_version(temp_db_with_extensions, def_config, property_table) def test_run_single_migration(temp_db_with_extensions, def_config, temp_db_cursor, property_table, monkeypatch, postprocess_mock): - oldversion = [x for x in nominatim_db.version.NOMINATIM_VERSION] - oldversion[0] -= 1 + oldversion = [4, 4, 99, 0] property_table.set('database_version', str(nominatim_db.version.NominatimVersion(*oldversion))) @@ -80,7 +57,7 @@ def test_run_single_migration(temp_db_with_extensions, def_config, temp_db_curso """ Dummy migration""" done['old'] = True - oldversion[0] = 0 + oldversion[1] = 0 monkeypatch.setattr(migration, '_MIGRATION_FUNCTIONS', [(tuple(oldversion), _old_migration), (nominatim_db.version.NOMINATIM_VERSION, _migration)]) @@ -97,111 +74,3 @@ def test_run_single_migration(temp_db_with_extensions, def_config, temp_db_curso # Each migration should come with two tests: # 1. Test that migration from old to new state works as expected. # 2. Test that the migration can be rerun on the new state without side effects. - - -@pytest.mark.parametrize('in_attr', ('', 'with time zone')) -def test_import_status_timestamp_change(temp_db_conn, temp_db_cursor, - table_factory, in_attr): - table_factory('import_status', - f"""lastimportdate timestamp {in_attr}, - sequence_id integer, - indexed boolean""") - - migration.import_status_timestamp_change(temp_db_conn) - temp_db_conn.commit() - - assert temp_db_cursor.scalar("""SELECT data_type FROM information_schema.columns - WHERE table_name = 'import_status' - and column_name = 'lastimportdate'""")\ - == 'timestamp with time zone' - - -def test_add_nominatim_property_table(temp_db_conn, temp_db_cursor, - def_config, monkeypatch): - # Use a r/o user name that always exists - monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', 'postgres') - - assert not temp_db_cursor.table_exists('nominatim_properties') - - migration.add_nominatim_property_table(temp_db_conn, def_config) - temp_db_conn.commit() - - assert temp_db_cursor.table_exists('nominatim_properties') - - -def test_add_nominatim_property_table_repeat(temp_db_conn, temp_db_cursor, - def_config, property_table): - assert temp_db_cursor.table_exists('nominatim_properties') - - migration.add_nominatim_property_table(temp_db_conn, def_config) - temp_db_conn.commit() - - assert temp_db_cursor.table_exists('nominatim_properties') - - -def test_switch_placenode_geometry_index(temp_db_conn, temp_db_cursor, placex_table): - temp_db_cursor.execute("""CREATE INDEX idx_placex_adminname - ON placex (place_id)""") - - migration.switch_placenode_geometry_index(temp_db_conn) - temp_db_conn.commit() - - assert temp_db_cursor.index_exists('placex', 'idx_placex_geometry_placenode') - assert not temp_db_cursor.index_exists('placex', 'idx_placex_adminname') - - -def test_switch_placenode_geometry_index_repeat(temp_db_conn, temp_db_cursor, placex_table): - temp_db_cursor.execute("""CREATE INDEX idx_placex_geometry_placenode - ON placex (place_id)""") - - migration.switch_placenode_geometry_index(temp_db_conn) - temp_db_conn.commit() - - assert temp_db_cursor.index_exists('placex', 'idx_placex_geometry_placenode') - assert not temp_db_cursor.index_exists('placex', 'idx_placex_adminname') - assert temp_db_cursor.scalar("""SELECT indexdef from pg_indexes - WHERE tablename = 'placex' - and indexname = 'idx_placex_geometry_placenode' - """).endswith('(place_id)') - - -def test_install_legacy_tokenizer(temp_db_conn, temp_db_cursor, project_env, - property_table, table_factory, monkeypatch, - tmp_path): - table_factory('placex', 'place_id BIGINT') - table_factory('location_property_osmline', 'place_id BIGINT') - - # Setting up the tokenizer is problematic - class MiniTokenizer: - def migrate_database(self, config): - pass - - monkeypatch.setattr(migration.tokenizer_factory, 'create_tokenizer', - lambda cfg, **kwargs: MiniTokenizer()) - - migration.install_legacy_tokenizer(temp_db_conn, project_env) - temp_db_conn.commit() - - - -def test_install_legacy_tokenizer_repeat(temp_db_conn, temp_db_cursor, - def_config, property_table): - - property_table.set('tokenizer', 'dummy') - migration.install_legacy_tokenizer(temp_db_conn, def_config) - temp_db_conn.commit() - - -def test_create_tiger_housenumber_index(temp_db_conn, temp_db_cursor, table_factory): - table_factory('location_property_tiger', - 'parent_place_id BIGINT, startnumber INT, endnumber INT') - - migration.create_tiger_housenumber_index(temp_db_conn) - temp_db_conn.commit() - - if server_version_tuple(temp_db_conn) >= (11, 0, 0): - assert temp_db_cursor.index_exists('location_property_tiger', - 'idx_location_property_tiger_housenumber_migrated') - - migration.create_tiger_housenumber_index(temp_db_conn) - temp_db_conn.commit()