allow multiple files for the import command

The files are forwarded to osm2pgsql which is now able to merge
them correctly.
This commit is contained in:
Sarah Hoffmann 2021-08-14 21:42:21 +02:00
parent bf4f05fff3
commit 87dedde5d6
4 changed files with 36 additions and 13 deletions

View File

@ -27,8 +27,9 @@ class SetupAll:
def add_args(parser):
group_name = parser.add_argument_group('Required arguments')
group = group_name.add_mutually_exclusive_group(required=True)
group.add_argument('--osm-file', metavar='FILE',
help='OSM file to be imported.')
group.add_argument('--osm-file', metavar='FILE', action='append',
help='OSM file to be imported'
' (repeat for importing multiple files.')
group.add_argument('--continue', dest='continue_at',
choices=['load-data', 'indexing', 'db-postprocess'],
help='Continue an import that was interrupted')
@ -56,9 +57,12 @@ class SetupAll:
from ..indexer.indexer import Indexer
from ..tokenizer import factory as tokenizer_factory
if args.osm_file and not Path(args.osm_file).is_file():
LOG.fatal("OSM file '%s' does not exist.", args.osm_file)
raise UsageError('Cannot access file.')
if args.osm_file:
files = [Path(f) for f in args.osm_file]
for fname in files:
if not fname.is_file():
LOG.fatal("OSM file '%s' does not exist.", fname)
raise UsageError('Cannot access file.')
if args.continue_at is None:
database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
@ -67,7 +71,7 @@ class SetupAll:
rouser=args.config.DATABASE_WEBUSER)
LOG.warning('Importing OSM data file')
database_import.import_osm_data(Path(args.osm_file),
database_import.import_osm_data(files,
args.osm2pgsql_options(0, 1),
drop=args.no_updates,
ignore_errors=args.ignore_errors)

View File

@ -103,11 +103,11 @@ def import_base_data(dsn, sql_dir, ignore_partitions=False):
conn.commit()
def import_osm_data(osm_file, options, drop=False, ignore_errors=False):
""" Import the given OSM file. 'options' contains the list of
def import_osm_data(osm_files, options, drop=False, ignore_errors=False):
""" Import the given OSM files. 'options' contains the list of
default settings for osm2pgsql.
"""
options['import_file'] = osm_file
options['import_file'] = osm_files
options['append'] = False
options['threads'] = 1
@ -115,7 +115,12 @@ def import_osm_data(osm_file, options, drop=False, ignore_errors=False):
# Make some educated guesses about cache size based on the size
# of the import file and the available memory.
mem = psutil.virtual_memory()
fsize = os.stat(str(osm_file)).st_size
fsize = 0
if isinstance(osm_files, list):
for fname in osm_files:
fsize += os.stat(str(fname)).st_size
else:
fsize = os.stat(str(osm_files)).st_size
options['osm2pgsql_cache'] = int(min((mem.available + mem.cached) * 0.75,
fsize * 2) / 1024 / 1024) + 1

View File

@ -130,6 +130,9 @@ def run_osm2pgsql(options):
if 'import_data' in options:
cmd.extend(('-r', 'xml', '-'))
elif isinstance(options['import_file'], list):
for fname in options['import_file']:
cmd.append(str(fname))
else:
cmd.append(str(options['import_file']))

View File

@ -98,14 +98,25 @@ def test_import_base_data_ignore_partitions(dsn, src_dir, temp_db_with_extension
def test_import_osm_data_simple(table_factory, osm2pgsql_options):
table_factory('place', content=((1, ), ))
database_import.import_osm_data('file.pdf', osm2pgsql_options)
database_import.import_osm_data(Path('file.pbf'), osm2pgsql_options)
def test_import_osm_data_multifile(table_factory, tmp_path, osm2pgsql_options):
table_factory('place', content=((1, ), ))
osm2pgsql_options['osm2pgsql_cache'] = 0
files = [tmp_path / 'file1.osm', tmp_path / 'file2.osm']
for f in files:
f.write_text('test')
database_import.import_osm_data(files, osm2pgsql_options)
def test_import_osm_data_simple_no_data(table_factory, osm2pgsql_options):
table_factory('place')
with pytest.raises(UsageError, match='No data.*'):
database_import.import_osm_data('file.pdf', osm2pgsql_options)
database_import.import_osm_data(Path('file.pbf'), osm2pgsql_options)
def test_import_osm_data_drop(table_factory, temp_db_conn, tmp_path, osm2pgsql_options):
@ -117,7 +128,7 @@ def test_import_osm_data_drop(table_factory, temp_db_conn, tmp_path, osm2pgsql_o
osm2pgsql_options['flatnode_file'] = str(flatfile.resolve())
database_import.import_osm_data('file.pdf', osm2pgsql_options, drop=True)
database_import.import_osm_data(Path('file.pbf'), osm2pgsql_options, drop=True)
assert not flatfile.exists()
assert not temp_db_conn.table_exists('planet_osm_nodes')