move import-data option to native python

This adds a new dependecy to the Python psutil package.
This commit is contained in:
Sarah Hoffmann 2021-02-24 17:21:45 +01:00
parent 7222235579
commit 32683f73c7
21 changed files with 205 additions and 93 deletions

11
.pylintrc Normal file
View File

@ -0,0 +1,11 @@
[MASTER]
extension-pkg-whitelist=osmium
[MESSAGES CONTROL]
[TYPECHECK]
# closing added here because it sometimes triggers a false positive with
# 'with' statements.
ignored-classes=NominatimArgs,closing

View File

@ -177,7 +177,7 @@ if (BUILD_TESTS)
if (PYLINT)
message(STATUS "Using pylint binary ${PYLINT}")
add_test(NAME pylint
COMMAND ${PYLINT} --extension-pkg-whitelist=osmium nominatim
COMMAND ${PYLINT} nominatim
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
else()
message(WARNING "pylint not found. Python linting tests disabled.")

View File

@ -41,10 +41,11 @@ For running Nominatim:
* [Python 3](https://www.python.org/) (3.5+)
* [Psycopg2](https://www.psycopg.org) (2.7+)
* [Python Dotenv](https://github.com/theskumar/python-dotenv)
* [psutil] (https://github.com/giampaolo/psutil)
* [PHP](https://php.net) (7.0 or later)
* PHP-pgsql
* PHP-intl (bundled with PHP)
( PHP-cgi (for running queries from the command line)
* PHP-cgi (for running queries from the command line)
For running continuous updates:

View File

@ -48,7 +48,7 @@ class Shell
return join(' ', $aEscaped);
}
public function run($bExitOnFail = False)
public function run($bExitOnFail = false)
{
$sCmd = $this->escapedCmd();
// $aEnv does not need escaping, proc_open seems to handle it fine

View File

@ -86,12 +86,25 @@ if ($aCMDResult['create-db'] || $aCMDResult['all']) {
if ($aCMDResult['setup-db'] || $aCMDResult['all']) {
$bDidSomething = true;
(clone($oNominatimCmd))->addParams('transition', '--setup-db')->run(true);
$oCmd = (clone($oNominatimCmd))->addParams('transition', '--setup-db');
if ($aCMDResult['no-partitions'] ?? false) {
$oCmd->addParams('--no-partitions');
}
$oCmd->run(true);
}
if ($aCMDResult['import-data'] || $aCMDResult['all']) {
$bDidSomething = true;
$oSetup->importData($aCMDResult['osm-file']);
$oCmd = (clone($oNominatimCmd))
->addParams('transition', '--import-data')
->addParams('--osm-file', $aCMDResult['osm-file']);
if ($aCMDResult['drop'] ?? false) {
$oCmd->addParams('--drop');
}
$oCmd->run(true);
}
if ($aCMDResult['create-functions'] || $aCMDResult['all']) {

View File

@ -84,66 +84,6 @@ class SetupFunctions
}
}
public function importData($sOSMFile)
{
info('Import data');
if (!file_exists(getOsm2pgsqlBinary())) {
echo "Check NOMINATIM_OSM2PGSQL_BINARY in your local .env file.\n";
echo "Normally you should not need to set this manually.\n";
fail("osm2pgsql not found in '".getOsm2pgsqlBinary()."'");
}
$oCmd = new \Nominatim\Shell(getOsm2pgsqlBinary());
$oCmd->addParams('--style', getImportStyle());
if (getSetting('FLATNODE_FILE')) {
$oCmd->addParams('--flat-nodes', getSetting('FLATNODE_FILE'));
}
if (getSetting('TABLESPACE_OSM_DATA')) {
$oCmd->addParams('--tablespace-slim-data', getSetting('TABLESPACE_OSM_DATA'));
}
if (getSetting('TABLESPACE_OSM_INDEX')) {
$oCmd->addParams('--tablespace-slim-index', getSetting('TABLESPACE_OSM_INDEX'));
}
if (getSetting('TABLESPACE_PLACE_DATA')) {
$oCmd->addParams('--tablespace-main-data', getSetting('TABLESPACE_PLACE_DATA'));
}
if (getSetting('TABLESPACE_PLACE_INDEX')) {
$oCmd->addParams('--tablespace-main-index', getSetting('TABLESPACE_PLACE_INDEX'));
}
$oCmd->addParams('--latlong', '--slim', '--create');
$oCmd->addParams('--output', 'gazetteer');
$oCmd->addParams('--hstore');
$oCmd->addParams('--number-processes', 1);
$oCmd->addParams('--with-forward-dependencies', 'false');
$oCmd->addParams('--log-progress', 'true');
$oCmd->addParams('--cache', $this->iCacheMemory);
$oCmd->addParams('--port', $this->aDSNInfo['port']);
if (isset($this->aDSNInfo['username'])) {
$oCmd->addParams('--username', $this->aDSNInfo['username']);
}
if (isset($this->aDSNInfo['password'])) {
$oCmd->addEnvPair('PGPASSWORD', $this->aDSNInfo['password']);
}
if (isset($this->aDSNInfo['hostspec'])) {
$oCmd->addParams('--host', $this->aDSNInfo['hostspec']);
}
$oCmd->addParams('--database', $this->aDSNInfo['database']);
$oCmd->addParams($sOSMFile);
$oCmd->run();
if (!$this->sIgnoreErrors && !$this->db()->getRow('select * from place limit 1')) {
fail('No Data');
}
if ($this->bDrop) {
$this->dropTable('planet_osm_nodes');
$this->removeFlatnodeFile();
}
}
public function createFunctions()
{
info('Create Functions');

View File

@ -3,7 +3,7 @@ Provides custom functions over command-line arguments.
"""
class NominatimArgs:
class NominatimArgs: # pylint: disable=too-few-public-methods
""" Customized namespace class for the nominatim command line tool
to receive the command-line arguments.
"""
@ -18,5 +18,10 @@ class NominatimArgs:
osm2pgsql_style=self.config.get_import_style_file(),
threads=self.threads or default_threads,
dsn=self.config.get_libpq_dsn(),
flatnode_file=self.config.FLATNODE_FILE)
flatnode_file=self.config.FLATNODE_FILE,
tablespaces=dict(slim_data=self.config.TABLESPACE_OSM_DATA,
slim_index=self.config.TABLESPACE_OSM_INDEX,
main_data=self.config.TABLESPACE_PLACE_DATA,
main_index=self.config.TABLESPACE_PLACE_INDEX
)
)

View File

@ -1,7 +1,7 @@
"""
Implementation of the 'index' subcommand.
"""
import os
import psutil
from ..db import status
from ..db.connection import connect
@ -11,14 +11,6 @@ from ..db.connection import connect
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415
def _num_system_cpus():
try:
cpus = len(os.sched_getaffinity(0))
except NotImplementedError:
cpus = None
return cpus or os.cpu_count()
class UpdateIndex:
"""\
@ -42,7 +34,7 @@ class UpdateIndex:
from ..indexer.indexer import Indexer
indexer = Indexer(args.config.get_libpq_dsn(),
args.threads or _num_system_cpus() or 1)
args.threads or psutil.cpu_count() or 1)
if not args.no_boundaries:
indexer.index_boundaries(args.minrank, args.maxrank)

View File

@ -6,8 +6,10 @@ through the PHP scripts but are now no longer directly accessible.
This module will be removed as soon as the transition phase is over.
"""
import logging
from pathlib import Path
from ..db.connection import connect
from ..errors import UsageError
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
@ -28,9 +30,17 @@ class AdminTransition:
help='Create nominatim db')
group.add_argument('--setup-db', action='store_true',
help='Build a blank nominatim db')
group.add_argument('--import-data', action='store_true',
help='Import a osm file')
group = parser.add_argument_group('Options')
group.add_argument('--no-partitions', action='store_true',
help='Do not partition search indices')
group.add_argument('--osm-file', metavar='FILE',
help='File to import')
group.add_argument('--drop', action='store_true',
help='Drop tables needed for updates, making the database readonly')
group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
help='Size of cache to be used by osm2pgsql (in MB)')
@staticmethod
def run(args):
@ -51,3 +61,11 @@ class AdminTransition:
database_import.import_base_data(args.config.get_libpq_dsn(),
args.data_dir, args.no_partitions)
if args.import_data:
LOG.warning('Import data')
if not args.osm_file:
raise UsageError('Missing required --osm-file argument')
database_import.import_osm_data(Path(args.osm_file),
args.osm2pgsql_options(0, 1),
drop=args.drop)

View File

@ -75,6 +75,17 @@ class _Connection(psycopg2.extensions.connection):
return True
def drop_table(self, name, if_exists=True):
""" Drop the table with the given name.
Set `if_exists` to False if a non-existant table should raise
an exception instead of just being ignored.
"""
with self.cursor() as cur:
cur.execute("""DROP TABLE {} "{}"
""".format('IF EXISTS' if if_exists else '', name))
self.commit()
def server_version_tuple(self):
""" Return the server version as a tuple of (major, minor).
Converts correctly for pre-10 and post-10 PostgreSQL versions.

View File

@ -2,11 +2,16 @@
Functions for setting up and importing a new Nominatim database.
"""
import logging
import os
import subprocess
import shutil
from pathlib import Path
import psutil
from ..db.connection import connect, get_pg_env
from ..db import utils as db_utils
from .exec_utils import run_osm2pgsql
from ..errors import UsageError
from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
@ -28,7 +33,7 @@ def create_db(dsn, rouser=None):
raise UsageError('Creating new database failed.')
with connect(dsn) as conn:
postgres_version = conn.server_version_tuple() # pylint: disable=E1101
postgres_version = conn.server_version_tuple()
if postgres_version < POSTGRESQL_REQUIRED_VERSION:
LOG.fatal('Minimum supported version of Postgresql is %d.%d. '
'Found version %d.%d.',
@ -37,7 +42,7 @@ def create_db(dsn, rouser=None):
raise UsageError('PostgreSQL server is too old.')
if rouser is not None:
with conn.cursor() as cur: # pylint: disable=E1101
with conn.cursor() as cur:
cnt = cur.scalar('SELECT count(*) FROM pg_user where usename = %s',
(rouser, ))
if cnt == 0:
@ -109,13 +114,45 @@ def check_module_dir_path(conn, path):
def import_base_data(dsn, sql_dir, ignore_partitions=False):
""" Create and populate the tables with basic static data that provides
the background for geocoding.
the background for geocoding. Data is assumed to not yet exist.
"""
db_utils.execute_file(dsn, sql_dir / 'country_name.sql')
db_utils.execute_file(dsn, sql_dir / 'country_osm_grid.sql.gz')
if ignore_partitions:
with connect(dsn) as conn:
with conn.cursor() as cur: # pylint: disable=E1101
with conn.cursor() as cur:
cur.execute('UPDATE country_name SET partition = 0')
conn.commit() # pylint: disable=E1101
conn.commit()
def import_osm_data(osm_file, options, drop=False):
""" Import the given OSM file. 'options' contains the list of
default settings for osm2pgsql.
"""
options['import_file'] = osm_file
options['append'] = False
options['threads'] = 1
if not options['flatnode_file'] and options['osm2pgsql_cache'] == 0:
# Make some educated guesses about cache size based on the size
# of the import file and the available memory.
mem = psutil.virtual_memory()
fsize = os.stat(str(osm_file)).st_size
options['osm2pgsql_cache'] = int(min((mem.available + mem.cached) * 0.75,
fsize * 2) / 1024 / 1024) + 1
run_osm2pgsql(options)
with connect(options['dsn']) as conn:
with conn.cursor() as cur:
cur.execute('SELECT * FROM place LIMIT 1')
if cur.rowcount == 0:
raise UsageError('No data imported by osm2pgsql.')
if drop:
conn.drop_table('planet_osm_nodes')
if drop:
if options['flatnode_file']:
Path(options['flatnode_file']).unlink()

View File

@ -110,10 +110,19 @@ def run_osm2pgsql(options):
]
if options['append']:
cmd.append('--append')
else:
cmd.append('--create')
if options['flatnode_file']:
cmd.extend(('--flat-nodes', options['flatnode_file']))
for key, param in (('slim_data', '--tablespace-slim-data'),
('slim_index', '--tablespace-slim-index'),
('main_data', '--tablespace-main-data'),
('main_index', '--tablespace-main-index')):
if options['tablespaces'][key]:
cmd.extend((param, options['tablespaces'][key]))
if options.get('disable_jit', False):
env['PGOPTIONS'] = '-c jit=off -c max_parallel_workers_per_gather=0'

View File

@ -198,4 +198,13 @@ def placex_table(temp_db_with_extensions, temp_db_conn):
temp_db_conn.commit()
@pytest.fixture
def osm2pgsql_options(temp_db):
return dict(osm2pgsql='echo',
osm2pgsql_cache=10,
osm2pgsql_style='style.file',
threads=1,
dsn='dbname=' + temp_db,
flatnode_file='',
tablespaces=dict(slim_data='', slim_index='',
main_data='', main_index=''))

View File

@ -40,6 +40,7 @@ def mock_run_legacy(monkeypatch):
monkeypatch.setattr(nominatim.cli, 'run_legacy_script', mock)
return mock
@pytest.fixture
def mock_func_factory(monkeypatch):
def get_mock(module, func):
@ -49,6 +50,7 @@ def mock_func_factory(monkeypatch):
return get_mock
def test_cli_help(capsys):
""" Running nominatim tool without arguments prints help.
"""

View File

@ -2,6 +2,7 @@
Tests for specialised conenction and cursor classes.
"""
import pytest
import psycopg2
from nominatim.db.connection import connect, get_pg_env
@ -30,6 +31,22 @@ def test_connection_index_exists(db, temp_db_cursor):
assert db.index_exists('some_index', table='bar') == False
def test_drop_table_existing(db, temp_db_cursor):
temp_db_cursor.execute('CREATE TABLE dummy (id INT)')
assert db.table_exists('dummy')
db.drop_table('dummy')
assert not db.table_exists('dummy')
def test_drop_table_non_existsing(db):
db.drop_table('dfkjgjriogjigjgjrdghehtre')
def test_drop_table_non_existing_force(db):
with pytest.raises(psycopg2.ProgrammingError, match='.*does not exist.*'):
db.drop_table('dfkjgjriogjigjgjrdghehtre', if_exists=False)
def test_connection_server_version_tuple(db):
ver = db.server_version_tuple()

View File

@ -4,6 +4,7 @@ Tests for functions to import a new database.
import pytest
import psycopg2
import sys
from pathlib import Path
from nominatim.tools import database_import
from nominatim.errors import UsageError
@ -94,3 +95,42 @@ def test_import_base_data_ignore_partitions(src_dir, temp_db, temp_db_cursor):
assert temp_db_cursor.scalar('SELECT count(*) FROM country_name') > 0
assert temp_db_cursor.scalar('SELECT count(*) FROM country_name WHERE partition != 0') == 0
def test_import_osm_data_simple(temp_db_cursor,osm2pgsql_options):
temp_db_cursor.execute('CREATE TABLE place (id INT)')
temp_db_cursor.execute('INSERT INTO place values (1)')
database_import.import_osm_data('file.pdf', osm2pgsql_options)
def test_import_osm_data_simple_no_data(temp_db_cursor,osm2pgsql_options):
temp_db_cursor.execute('CREATE TABLE place (id INT)')
with pytest.raises(UsageError, match='No data.*'):
database_import.import_osm_data('file.pdf', osm2pgsql_options)
def test_import_osm_data_drop(temp_db_conn, temp_db_cursor, tmp_path, osm2pgsql_options):
temp_db_cursor.execute('CREATE TABLE place (id INT)')
temp_db_cursor.execute('CREATE TABLE planet_osm_nodes (id INT)')
temp_db_cursor.execute('INSERT INTO place values (1)')
flatfile = tmp_path / 'flatfile'
flatfile.write_text('touch')
osm2pgsql_options['flatnode_file'] = str(flatfile.resolve())
database_import.import_osm_data('file.pdf', osm2pgsql_options, drop=True)
assert not flatfile.exists()
assert not temp_db_conn.table_exists('planet_osm_nodes')
def test_import_osm_data_default_cache(temp_db_cursor,osm2pgsql_options):
temp_db_cursor.execute('CREATE TABLE place (id INT)')
temp_db_cursor.execute('INSERT INTO place values (1)')
osm2pgsql_options['osm2pgsql_cache'] = 0
database_import.import_osm_data(Path(__file__), osm2pgsql_options)

View File

@ -105,8 +105,15 @@ def test_run_api_with_extra_env(tmp_project_dir):
### run_osm2pgsql
def test_run_osm2pgsql():
exec_utils.run_osm2pgsql(dict(osm2pgsql='echo', append=False, flatnode_file=None,
dsn='dbname=foobar', threads=1, osm2pgsql_cache=500,
osm2pgsql_style='./my.style',
import_file='foo.bar'))
def test_run_osm2pgsql(osm2pgsql_options):
osm2pgsql_options['append'] = False
osm2pgsql_options['import_file'] = 'foo.bar'
osm2pgsql_options['tablespaces']['osm_data'] = 'extra'
exec_utils.run_osm2pgsql(osm2pgsql_options)
def test_run_osm2pgsql_disable_jit(osm2pgsql_options):
osm2pgsql_options['append'] = True
osm2pgsql_options['import_file'] = 'foo.bar'
osm2pgsql_options['disable_jit'] = True
exec_utils.run_osm2pgsql(osm2pgsql_options)

View File

@ -42,7 +42,7 @@
python3-pip python3-setuptools python3-devel \
expat-devel zlib-devel
pip3 install --user psycopg2 python-dotenv
pip3 install --user psycopg2 python-dotenv psutil
#

View File

@ -35,7 +35,7 @@
python3-pip python3-setuptools python3-devel \
expat-devel zlib-devel
pip3 install --user psycopg2 python-dotenv
pip3 install --user psycopg2 python-dotenv psutil
#

View File

@ -30,7 +30,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
postgresql-server-dev-10 postgresql-10-postgis-2.4 \
postgresql-contrib-10 postgresql-10-postgis-scripts \
php php-pgsql php-intl python3-pip \
python3-psycopg2 git
python3-psycopg2 python3-psutil git
# The python-dotenv package that comes with Ubuntu 18.04 is too old, so
# install the latest version from pip:

View File

@ -33,7 +33,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
postgresql-server-dev-12 postgresql-12-postgis-3 \
postgresql-contrib-12 postgresql-12-postgis-3-scripts \
php php-pgsql php-intl python3-dotenv \
python3-psycopg2 git
python3-psycopg2 python3-psutil git
#
# System Configuration