allow multiple files for the import command

The files are forwarded to osm2pgsql which is now able to merge
them correctly.
This commit is contained in:
Sarah Hoffmann 2021-08-14 21:42:21 +02:00
parent bf4f05fff3
commit 87dedde5d6
4 changed files with 36 additions and 13 deletions

View File

@ -27,8 +27,9 @@ class SetupAll:
def add_args(parser): def add_args(parser):
group_name = parser.add_argument_group('Required arguments') group_name = parser.add_argument_group('Required arguments')
group = group_name.add_mutually_exclusive_group(required=True) group = group_name.add_mutually_exclusive_group(required=True)
group.add_argument('--osm-file', metavar='FILE', group.add_argument('--osm-file', metavar='FILE', action='append',
help='OSM file to be imported.') help='OSM file to be imported'
' (repeat for importing multiple files.')
group.add_argument('--continue', dest='continue_at', group.add_argument('--continue', dest='continue_at',
choices=['load-data', 'indexing', 'db-postprocess'], choices=['load-data', 'indexing', 'db-postprocess'],
help='Continue an import that was interrupted') help='Continue an import that was interrupted')
@ -56,9 +57,12 @@ class SetupAll:
from ..indexer.indexer import Indexer from ..indexer.indexer import Indexer
from ..tokenizer import factory as tokenizer_factory from ..tokenizer import factory as tokenizer_factory
if args.osm_file and not Path(args.osm_file).is_file(): if args.osm_file:
LOG.fatal("OSM file '%s' does not exist.", args.osm_file) files = [Path(f) for f in args.osm_file]
raise UsageError('Cannot access file.') for fname in files:
if not fname.is_file():
LOG.fatal("OSM file '%s' does not exist.", fname)
raise UsageError('Cannot access file.')
if args.continue_at is None: if args.continue_at is None:
database_import.setup_database_skeleton(args.config.get_libpq_dsn(), database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
@ -67,7 +71,7 @@ class SetupAll:
rouser=args.config.DATABASE_WEBUSER) rouser=args.config.DATABASE_WEBUSER)
LOG.warning('Importing OSM data file') LOG.warning('Importing OSM data file')
database_import.import_osm_data(Path(args.osm_file), database_import.import_osm_data(files,
args.osm2pgsql_options(0, 1), args.osm2pgsql_options(0, 1),
drop=args.no_updates, drop=args.no_updates,
ignore_errors=args.ignore_errors) ignore_errors=args.ignore_errors)

View File

@ -103,11 +103,11 @@ def import_base_data(dsn, sql_dir, ignore_partitions=False):
conn.commit() conn.commit()
def import_osm_data(osm_file, options, drop=False, ignore_errors=False): def import_osm_data(osm_files, options, drop=False, ignore_errors=False):
""" Import the given OSM file. 'options' contains the list of """ Import the given OSM files. 'options' contains the list of
default settings for osm2pgsql. default settings for osm2pgsql.
""" """
options['import_file'] = osm_file options['import_file'] = osm_files
options['append'] = False options['append'] = False
options['threads'] = 1 options['threads'] = 1
@ -115,7 +115,12 @@ def import_osm_data(osm_file, options, drop=False, ignore_errors=False):
# Make some educated guesses about cache size based on the size # Make some educated guesses about cache size based on the size
# of the import file and the available memory. # of the import file and the available memory.
mem = psutil.virtual_memory() mem = psutil.virtual_memory()
fsize = os.stat(str(osm_file)).st_size fsize = 0
if isinstance(osm_files, list):
for fname in osm_files:
fsize += os.stat(str(fname)).st_size
else:
fsize = os.stat(str(osm_files)).st_size
options['osm2pgsql_cache'] = int(min((mem.available + mem.cached) * 0.75, options['osm2pgsql_cache'] = int(min((mem.available + mem.cached) * 0.75,
fsize * 2) / 1024 / 1024) + 1 fsize * 2) / 1024 / 1024) + 1

View File

@ -130,6 +130,9 @@ def run_osm2pgsql(options):
if 'import_data' in options: if 'import_data' in options:
cmd.extend(('-r', 'xml', '-')) cmd.extend(('-r', 'xml', '-'))
elif isinstance(options['import_file'], list):
for fname in options['import_file']:
cmd.append(str(fname))
else: else:
cmd.append(str(options['import_file'])) cmd.append(str(options['import_file']))

View File

@ -98,14 +98,25 @@ def test_import_base_data_ignore_partitions(dsn, src_dir, temp_db_with_extension
def test_import_osm_data_simple(table_factory, osm2pgsql_options): def test_import_osm_data_simple(table_factory, osm2pgsql_options):
table_factory('place', content=((1, ), )) table_factory('place', content=((1, ), ))
database_import.import_osm_data('file.pdf', osm2pgsql_options) database_import.import_osm_data(Path('file.pbf'), osm2pgsql_options)
def test_import_osm_data_multifile(table_factory, tmp_path, osm2pgsql_options):
table_factory('place', content=((1, ), ))
osm2pgsql_options['osm2pgsql_cache'] = 0
files = [tmp_path / 'file1.osm', tmp_path / 'file2.osm']
for f in files:
f.write_text('test')
database_import.import_osm_data(files, osm2pgsql_options)
def test_import_osm_data_simple_no_data(table_factory, osm2pgsql_options): def test_import_osm_data_simple_no_data(table_factory, osm2pgsql_options):
table_factory('place') table_factory('place')
with pytest.raises(UsageError, match='No data.*'): with pytest.raises(UsageError, match='No data.*'):
database_import.import_osm_data('file.pdf', osm2pgsql_options) database_import.import_osm_data(Path('file.pbf'), osm2pgsql_options)
def test_import_osm_data_drop(table_factory, temp_db_conn, tmp_path, osm2pgsql_options): def test_import_osm_data_drop(table_factory, temp_db_conn, tmp_path, osm2pgsql_options):
@ -117,7 +128,7 @@ def test_import_osm_data_drop(table_factory, temp_db_conn, tmp_path, osm2pgsql_o
osm2pgsql_options['flatnode_file'] = str(flatfile.resolve()) osm2pgsql_options['flatnode_file'] = str(flatfile.resolve())
database_import.import_osm_data('file.pdf', osm2pgsql_options, drop=True) database_import.import_osm_data(Path('file.pbf'), osm2pgsql_options, drop=True)
assert not flatfile.exists() assert not flatfile.exists()
assert not temp_db_conn.table_exists('planet_osm_nodes') assert not temp_db_conn.table_exists('planet_osm_nodes')