move generation of country tables in own module

This commit is contained in:
Sarah Hoffmann 2021-09-01 16:02:10 +02:00
parent 0b349761a8
commit 284645f505
5 changed files with 109 additions and 130 deletions

View File

@ -52,17 +52,21 @@ class SetupAll:
@staticmethod
def run(args):
from ..tools import database_import, refresh, postcodes, freeze
from ..tools import database_import, refresh, postcodes, freeze, country_info
from ..indexer.indexer import Indexer
if args.continue_at is None:
files = args.get_osm_file_list()
LOG.warning('Creating database')
database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
args.data_dir,
args.no_partitions,
rouser=args.config.DATABASE_WEBUSER)
LOG.warning('Setting up country tables')
country_info.setup_country_tables(args.config.get_libpq_dsn(),
args.data_dir,
args.no_partitions)
LOG.warning('Importing OSM data file')
database_import.import_osm_data(files,
args.osm2pgsql_options(0, 1),

View File

@ -0,0 +1,18 @@
"""
Functions for importing and managing static country information.
"""
from nominatim.db import utils as db_utils
from nominatim.db.connection import connect
def setup_country_tables(dsn, sql_dir, ignore_partitions=False):
""" Create and populate the tables with basic static data that provides
the background for geocoding. Data is assumed to not yet exist.
"""
db_utils.execute_file(dsn, sql_dir / 'country_name.sql')
db_utils.execute_file(dsn, sql_dir / 'country_osm_grid.sql.gz')
if ignore_partitions:
with connect(dsn) as conn:
with conn.cursor() as cur:
cur.execute('UPDATE country_name SET partition = 0')
conn.commit()

View File

@ -12,7 +12,6 @@ import psycopg2.extras
from psycopg2 import sql as pysql
from nominatim.db.connection import connect, get_pg_env
from nominatim.db import utils as db_utils
from nominatim.db.async_connection import DBConnection
from nominatim.db.sql_preprocessor import SQLPreprocessor
from nominatim.tools.exec_utils import run_osm2pgsql
@ -21,24 +20,24 @@ from nominatim.version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERS
LOG = logging.getLogger()
def setup_database_skeleton(dsn, data_dir, no_partitions, rouser=None):
""" Create a new database for Nominatim and populate it with the
essential extensions and data.
def _require_version(module, actual, expected):
""" Compares the version for the given module and raises an exception
if the actual version is too old.
"""
LOG.warning('Creating database')
create_db(dsn, rouser)
LOG.warning('Setting up database')
with connect(dsn) as conn:
setup_extensions(conn)
LOG.warning('Loading basic data')
import_base_data(dsn, data_dir, no_partitions)
if actual < expected:
LOG.fatal('Minimum supported version of %s is %d.%d. '
'Found version %d.%d.',
module, expected[0], expected[1], actual[0], actual[1])
raise UsageError(f'{module} is too old.')
def create_db(dsn, rouser=None):
""" Create a new database for the given DSN. Fails when the database
already exists or the PostgreSQL version is too old.
def setup_database_skeleton(dsn, rouser=None):
""" Create a new database for Nominatim and populate it with the
essential extensions.
The function fails when the database already exists or Postgresql or
PostGIS versions are too old.
Uses `createdb` to create the database.
If 'rouser' is given, then the function also checks that the user
@ -52,13 +51,9 @@ def create_db(dsn, rouser=None):
raise UsageError('Creating new database failed.')
with connect(dsn) as conn:
postgres_version = conn.server_version_tuple()
if postgres_version < POSTGRESQL_REQUIRED_VERSION:
LOG.fatal('Minimum supported version of Postgresql is %d.%d. '
'Found version %d.%d.',
POSTGRESQL_REQUIRED_VERSION[0], POSTGRESQL_REQUIRED_VERSION[1],
postgres_version[0], postgres_version[1])
raise UsageError('PostgreSQL server is too old.')
_require_version('PostgreSQL server',
conn.server_version_tuple(),
POSTGRESQL_REQUIRED_VERSION)
if rouser is not None:
with conn.cursor() as cur:
@ -69,38 +64,15 @@ def create_db(dsn, rouser=None):
"\n createuser %s", rouser, rouser)
raise UsageError('Missing read-only user.')
# Create extensions.
with conn.cursor() as cur:
cur.execute('CREATE EXTENSION IF NOT EXISTS hstore')
cur.execute('CREATE EXTENSION IF NOT EXISTS postgis')
conn.commit()
def setup_extensions(conn):
""" Set up all extensions needed for Nominatim. Also checks that the
versions of the extensions are sufficient.
"""
with conn.cursor() as cur:
cur.execute('CREATE EXTENSION IF NOT EXISTS hstore')
cur.execute('CREATE EXTENSION IF NOT EXISTS postgis')
conn.commit()
postgis_version = conn.postgis_version_tuple()
if postgis_version < POSTGIS_REQUIRED_VERSION:
LOG.fatal('Minimum supported version of PostGIS is %d.%d. '
'Found version %d.%d.',
POSTGIS_REQUIRED_VERSION[0], POSTGIS_REQUIRED_VERSION[1],
postgis_version[0], postgis_version[1])
raise UsageError('PostGIS version is too old.')
def import_base_data(dsn, sql_dir, ignore_partitions=False):
""" Create and populate the tables with basic static data that provides
the background for geocoding. Data is assumed to not yet exist.
"""
db_utils.execute_file(dsn, sql_dir / 'country_name.sql')
db_utils.execute_file(dsn, sql_dir / 'country_osm_grid.sql.gz')
if ignore_partitions:
with connect(dsn) as conn:
with conn.cursor() as cur:
cur.execute('UPDATE country_name SET partition = 0')
conn.commit()
_require_version('PostGIS',
conn.postgis_version_tuple(),
POSTGIS_REQUIRED_VERSION)
def import_osm_data(osm_files, options, drop=False, ignore_errors=False):

View File

@ -18,6 +18,7 @@ import nominatim.tools.admin
import nominatim.tools.add_osm_data
import nominatim.tools.check_database
import nominatim.tools.database_import
import nominatim.tools.country_info
import nominatim.tools.freeze
import nominatim.tools.refresh
import nominatim.tools.postcodes
@ -170,6 +171,7 @@ class TestCliWithDb:
def test_import_full(self, mock_func_factory):
mocks = [
mock_func_factory(nominatim.tools.database_import, 'setup_database_skeleton'),
mock_func_factory(nominatim.tools.country_info, 'setup_country_tables'),
mock_func_factory(nominatim.tools.database_import, 'import_osm_data'),
mock_func_factory(nominatim.tools.refresh, 'import_wikipedia_articles'),
mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),

View File

@ -2,6 +2,7 @@
Tests for functions to import a new database.
"""
from pathlib import Path
from contextlib import closing
import pytest
import psycopg2
@ -9,90 +10,72 @@ import psycopg2
from nominatim.tools import database_import
from nominatim.errors import UsageError
@pytest.fixture
def nonexistant_db():
dbname = 'test_nominatim_python_unittest'
class TestDatabaseSetup:
DBNAME = 'test_nominatim_python_unittest'
conn = psycopg2.connect(database='postgres')
@pytest.fixture(autouse=True)
def setup_nonexistant_db(self):
conn = psycopg2.connect(database='postgres')
conn.set_isolation_level(0)
with conn.cursor() as cur:
cur.execute('DROP DATABASE IF EXISTS {}'.format(dbname))
try:
conn.set_isolation_level(0)
with conn.cursor() as cur:
cur.execute(f'DROP DATABASE IF EXISTS {self.DBNAME}')
yield dbname
yield True
with conn.cursor() as cur:
cur.execute('DROP DATABASE IF EXISTS {}'.format(dbname))
with conn.cursor() as cur:
cur.execute(f'DROP DATABASE IF EXISTS {self.DBNAME}')
finally:
conn.close()
@pytest.mark.parametrize("no_partitions", (True, False))
def test_setup_skeleton(src_dir, nonexistant_db, no_partitions):
database_import.setup_database_skeleton('dbname=' + nonexistant_db,
src_dir / 'data', no_partitions)
@pytest.fixture
def cursor(self):
conn = psycopg2.connect(database=self.DBNAME)
conn = psycopg2.connect(database=nonexistant_db)
try:
with conn.cursor() as cur:
cur.execute("SELECT distinct partition FROM country_name")
partitions = set((r[0] for r in list(cur)))
if no_partitions:
assert partitions == set((0, ))
else:
assert len(partitions) > 10
finally:
conn.close()
try:
with conn.cursor() as cur:
yield cur
finally:
conn.close()
def test_create_db_success(nonexistant_db):
database_import.create_db('dbname=' + nonexistant_db, rouser='www-data')
conn = psycopg2.connect(database=nonexistant_db)
conn.close()
def conn(self):
return closing(psycopg2.connect(database=self.DBNAME))
def test_create_db_already_exists(temp_db):
def test_setup_skeleton(self):
database_import.setup_database_skeleton(f'dbname={self.DBNAME}')
# Check that all extensions are set up.
with self.conn() as conn:
with conn.cursor() as cur:
cur.execute('CREATE TABLE t (h HSTORE, geom GEOMETRY(Geometry, 4326))')
def test_unsupported_pg_version(self, monkeypatch):
monkeypatch.setattr(database_import, 'POSTGRESQL_REQUIRED_VERSION', (100, 4))
with pytest.raises(UsageError, match='PostgreSQL server is too old.'):
database_import.setup_database_skeleton(f'dbname={self.DBNAME}')
def test_create_db_missing_ro_user(self):
with pytest.raises(UsageError, match='Missing read-only user.'):
database_import.setup_database_skeleton(f'dbname={self.DBNAME}',
rouser='sdfwkjkjgdugu2;jgsafkljas;')
def test_setup_extensions_old_postgis(self, monkeypatch):
monkeypatch.setattr(database_import, 'POSTGIS_REQUIRED_VERSION', (50, 50))
with pytest.raises(UsageError, match='PostGIS is too old.'):
database_import.setup_database_skeleton(f'dbname={self.DBNAME}')
def test_setup_skeleton_already_exists(temp_db):
with pytest.raises(UsageError):
database_import.create_db('dbname=' + temp_db)
def test_create_db_unsupported_version(nonexistant_db, monkeypatch):
monkeypatch.setattr(database_import, 'POSTGRESQL_REQUIRED_VERSION', (100, 4))
with pytest.raises(UsageError, match='PostgreSQL server is too old.'):
database_import.create_db('dbname=' + nonexistant_db)
def test_create_db_missing_ro_user(nonexistant_db):
with pytest.raises(UsageError, match='Missing read-only user.'):
database_import.create_db('dbname=' + nonexistant_db, rouser='sdfwkjkjgdugu2;jgsafkljas;')
def test_setup_extensions(temp_db_conn, table_factory):
database_import.setup_extensions(temp_db_conn)
# Use table creation to check that hstore and geometry types are available.
table_factory('t', 'h HSTORE, geom GEOMETRY(Geometry, 4326)')
def test_setup_extensions_old_postgis(temp_db_conn, monkeypatch):
monkeypatch.setattr(database_import, 'POSTGIS_REQUIRED_VERSION', (50, 50))
with pytest.raises(UsageError, match='PostGIS version is too old.'):
database_import.setup_extensions(temp_db_conn)
def test_import_base_data(dsn, src_dir, temp_db_with_extensions, temp_db_cursor):
database_import.import_base_data(dsn, src_dir / 'data')
assert temp_db_cursor.table_rows('country_name') > 0
def test_import_base_data_ignore_partitions(dsn, src_dir, temp_db_with_extensions,
temp_db_cursor):
database_import.import_base_data(dsn, src_dir / 'data', ignore_partitions=True)
assert temp_db_cursor.table_rows('country_name') > 0
assert temp_db_cursor.table_rows('country_name', where='partition != 0') == 0
database_import.setup_database_skeleton(f'dbname={temp_db}')
def test_import_osm_data_simple(table_factory, osm2pgsql_options):