Merge pull request #2279 from lonvia/add-index-for-continued-indexing

Add index for continued indexing
This commit is contained in:
Sarah Hoffmann 2021-04-17 11:51:21 +02:00 committed by GitHub
commit 7aeae9da81
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 82 additions and 2 deletions

View File

@ -105,11 +105,11 @@ class SetupAll:
LOG.error('Wikipedia importance dump file not found. '
'Will be using default importances.')
if args.continue_at is None or args.continue_at == 'load-data':
LOG.warning('Initialise tables')
with connect(args.config.get_libpq_dsn()) as conn:
database_import.truncate_data_tables(conn, args.config.MAX_WORD_FREQUENCY)
if args.continue_at is None or args.continue_at == 'load-data':
LOG.warning('Load data into placex table')
database_import.load_data(args.config.get_libpq_dsn(),
args.data_dir,
@ -119,6 +119,9 @@ class SetupAll:
postcodes.import_postcodes(args.config.get_libpq_dsn(), args.project_dir)
if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
if args.continue_at is not None and args.continue_at != 'load-data':
with connect(args.config.get_libpq_dsn()) as conn:
SetupAll._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
LOG.warning('Indexing places')
indexer = Indexer(args.config.get_libpq_dsn(),
args.threads or psutil.cpu_count() or 1)
@ -148,3 +151,25 @@ class SetupAll:
'{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
return 0
@staticmethod
def _create_pending_index(conn, tablespace):
""" Add a supporting index for finding places still to be indexed.
This index is normally created at the end of the import process
for later updates. When indexing was partially done, then this
index can greatly improve speed going through already indexed data.
"""
if conn.index_exists('idx_placex_pendingsector'):
return
with conn.cursor() as cur:
LOG.warning('Creating support index')
if tablespace:
tablespace = 'TABLESPACE ' + tablespace
cur.execute("""CREATE INDEX idx_placex_pendingsector
ON placex USING BTREE (rank_address,geometry_sector)
{} WHERE indexed_status > 0
""".format(tablespace))
conn.commit()

View File

@ -49,6 +49,7 @@ def mock_run_legacy(monkeypatch):
def mock_func_factory(monkeypatch):
def get_mock(module, func):
mock = MockParamCapture()
mock.func_name = func
monkeypatch.setattr(module, func, mock)
return mock
@ -110,7 +111,61 @@ def test_import_full(temp_db, mock_func_factory):
assert cf_mock.called > 1
for mock in mocks:
assert mock.called == 1
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
def test_import_continue_load_data(temp_db, mock_func_factory):
mocks = [
mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
mock_func_factory(nominatim.tools.database_import, 'load_data'),
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
mock_func_factory(nominatim.tools.postcodes, 'import_postcodes'),
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
mock_func_factory(nominatim.db.properties, 'set_property')
]
assert 0 == call_nominatim('import', '--continue', 'load-data')
for mock in mocks:
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
def test_import_continue_indexing(temp_db, mock_func_factory, placex_table, temp_db_conn):
mocks = [
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
mock_func_factory(nominatim.db.properties, 'set_property')
]
assert 0 == call_nominatim('import', '--continue', 'indexing')
for mock in mocks:
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
assert temp_db_conn.index_exists('idx_placex_pendingsector')
# Calling it again still works for the index
assert 0 == call_nominatim('import', '--continue', 'indexing')
assert temp_db_conn.index_exists('idx_placex_pendingsector')
def test_import_continue_postprocess(temp_db, mock_func_factory):
mocks = [
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
mock_func_factory(nominatim.db.properties, 'set_property')
]
assert 0 == call_nominatim('import', '--continue', 'db-postprocess')
for mock in mocks:
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
def test_freeze_command(mock_func_factory, temp_db):
mock_drop = mock_func_factory(nominatim.tools.freeze, 'drop_update_tables')