Merge pull request #2708 from lonvia/use-format-literals

Assorted fixes for new pylint warnings
This commit is contained in:
Sarah Hoffmann 2022-05-11 14:29:56 +02:00 committed by GitHub
commit a7a5f0161f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 178 additions and 150 deletions

View File

@ -11,6 +11,6 @@ ignored-modules=icu,datrie
# 'with' statements.
ignored-classes=NominatimArgs,closing
# 'too-many-ancestors' is triggered already by deriving from UserDict
disable=too-few-public-methods,duplicate-code,too-many-ancestors
disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value,no-self-use
good-names=i,x,y,fd,db

View File

@ -60,9 +60,9 @@ class CommandlineParser:
def nominatim_version_text():
""" Program name and version number as string
"""
text = 'Nominatim version %s.%s.%s.%s' % version.NOMINATIM_VERSION
text = f'Nominatim version {version.version_str()}'
if version.GIT_COMMIT_HASH is not None:
text += ' (%s)' % version.GIT_COMMIT_HASH
text += f' ({version.GIT_COMMIT_HASH})'
return text
def add_subcommand(self, name, cmd):

View File

@ -21,7 +21,7 @@ LOG = logging.getLogger()
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
# Using non-top-level imports to make pyosmium optional for replication only.
# pylint: disable=E0012,C0415
# pylint: disable=C0415
class UpdateReplication:
"""\
@ -96,8 +96,7 @@ class UpdateReplication:
end = dt.datetime.now(dt.timezone.utc)
LOG.warning("Update completed. Import: %s. %sTotal: %s. Remaining backlog: %s.",
round_time((start_index or end) - start_import),
"Indexing: {} ".format(round_time(end - start_index))
if start_index else '',
f"Indexing: {round_time(end - start_index)} " if start_index else '',
round_time(end - start_import),
round_time(end - batchdate))

View File

@ -14,12 +14,12 @@ import psutil
from nominatim.db.connection import connect
from nominatim.db import status, properties
from nominatim.version import NOMINATIM_VERSION
from nominatim.version import version_str
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415
# pylint: disable=C0415
LOG = logging.getLogger()
@ -194,10 +194,10 @@ class SetupAll:
LOG.warning('Creating support index')
if tablespace:
tablespace = 'TABLESPACE ' + tablespace
cur.execute("""CREATE INDEX idx_placex_pendingsector
ON placex USING BTREE (rank_address,geometry_sector)
{} WHERE indexed_status > 0
""".format(tablespace))
cur.execute(f"""CREATE INDEX idx_placex_pendingsector
ON placex USING BTREE (rank_address,geometry_sector)
{tablespace} WHERE indexed_status > 0
""")
conn.commit()
@ -213,5 +213,4 @@ class SetupAll:
except Exception as exc: # pylint: disable=broad-except
LOG.error('Cannot determine date of database: %s', exc)
properties.set_property(conn, 'database_version',
'{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
properties.set_property(conn, 'database_version', version_str())

View File

@ -144,7 +144,7 @@ class Configuration:
style = self.__getattr__('IMPORT_STYLE')
if style in ('admin', 'street', 'address', 'full', 'extratags'):
return self.config_dir / 'import-{}.style'.format(style)
return self.config_dir / f'import-{style}.style'
return self.find_config_file('', 'IMPORT_STYLE')

View File

@ -25,7 +25,8 @@ class _Cursor(psycopg2.extras.DictCursor):
execution functions.
"""
def execute(self, query, args=None): # pylint: disable=W0221
# pylint: disable=arguments-renamed,arguments-differ
def execute(self, query, args=None):
""" Query execution that logs the SQL query when debugging is enabled.
"""
LOG.debug(self.mogrify(query, args).decode('utf-8'))
@ -163,7 +164,7 @@ def connect(dsn):
ctxmgr.connection = conn
return ctxmgr
except psycopg2.OperationalError as err:
raise UsageError("Cannot connect to database: {}".format(err)) from err
raise UsageError(f"Cannot connect to database: {err}") from err
# Translation from PG connection string parameters to PG environment variables.

View File

@ -39,10 +39,10 @@ def _setup_tablespace_sql(config):
out = {}
for subset in ('ADDRESS', 'SEARCH', 'AUX'):
for kind in ('DATA', 'INDEX'):
tspace = getattr(config, 'TABLESPACE_{}_{}'.format(subset, kind))
tspace = getattr(config, f'TABLESPACE_{subset}_{kind}')
if tspace:
tspace = 'TABLESPACE "{}"'.format(tspace)
out['{}_{}'.format(subset.lower(), kind.lower())] = tspace
tspace = f'TABLESPACE "{tspace}"'
out[f'{subset.lower()}_{kind.lower()}'] = tspace
return out

View File

@ -34,7 +34,7 @@ def compute_database_date(conn):
LOG.info("Using node id %d for timestamp lookup", osmid)
# Get the node from the API to find the timestamp when it was created.
node_url = 'https://www.openstreetmap.org/api/0.6/node/{}/1'.format(osmid)
node_url = f'https://www.openstreetmap.org/api/0.6/node/{osmid}/1'
data = get_url(node_url)
match = re.search(r'timestamp="((\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}))Z"', data)

View File

@ -40,36 +40,36 @@ def execute_file(dsn, fname, ignore_errors=False, pre_code=None, post_code=None)
cmd.extend(('-v', 'ON_ERROR_STOP=1'))
if not LOG.isEnabledFor(logging.INFO):
cmd.append('--quiet')
proc = subprocess.Popen(cmd, env=get_pg_env(dsn), stdin=subprocess.PIPE)
try:
if not LOG.isEnabledFor(logging.INFO):
proc.stdin.write('set client_min_messages to WARNING;'.encode('utf-8'))
with subprocess.Popen(cmd, env=get_pg_env(dsn), stdin=subprocess.PIPE) as proc:
try:
if not LOG.isEnabledFor(logging.INFO):
proc.stdin.write('set client_min_messages to WARNING;'.encode('utf-8'))
if pre_code:
proc.stdin.write((pre_code + ';').encode('utf-8'))
if pre_code:
proc.stdin.write((pre_code + ';').encode('utf-8'))
if fname.suffix == '.gz':
with gzip.open(str(fname), 'rb') as fdesc:
remain = _pipe_to_proc(proc, fdesc)
else:
with fname.open('rb') as fdesc:
remain = _pipe_to_proc(proc, fdesc)
if fname.suffix == '.gz':
with gzip.open(str(fname), 'rb') as fdesc:
remain = _pipe_to_proc(proc, fdesc)
else:
with fname.open('rb') as fdesc:
remain = _pipe_to_proc(proc, fdesc)
if remain == 0 and post_code:
proc.stdin.write((';' + post_code).encode('utf-8'))
finally:
proc.stdin.close()
ret = proc.wait()
if remain == 0 and post_code:
proc.stdin.write((';' + post_code).encode('utf-8'))
finally:
proc.stdin.close()
ret = proc.wait()
if ret != 0 or remain > 0:
raise UsageError("Failed to execute SQL file.")
# List of characters that need to be quoted for the copy command.
_SQL_TRANSLATION = {ord(u'\\'): u'\\\\',
ord(u'\t'): u'\\t',
ord(u'\n'): u'\\n'}
_SQL_TRANSLATION = {ord('\\'): '\\\\',
ord('\t'): '\\t',
ord('\n'): '\\n'}
class CopyBuffer:

View File

@ -66,7 +66,7 @@ class RankRunner(AbstractPlacexRunner):
"""
def name(self):
return "rank {}".format(self.rank)
return f"rank {self.rank}"
def sql_count_objects(self):
return pysql.SQL("""SELECT count(*) FROM placex
@ -86,7 +86,7 @@ class BoundaryRunner(AbstractPlacexRunner):
"""
def name(self):
return "boundaries rank {}".format(self.rank)
return f"boundaries rank {self.rank}"
def sql_count_objects(self):
return pysql.SQL("""SELECT count(*) FROM placex

View File

@ -278,8 +278,7 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
+ [(k, v, part_ids.get(v, None)) for k, v in partial_tokens.items()]
@staticmethod
def normalize_postcode(postcode):
def normalize_postcode(self, postcode):
""" Convert the postcode to a standardized form.
This function must yield exactly the same result as the SQL function

View File

@ -74,10 +74,10 @@ def _check_module(module_dir, conn):
with conn.cursor() as cur:
try:
cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
RETURNS text AS '{}/nominatim.so', 'transliteration'
RETURNS text AS %s, 'transliteration'
LANGUAGE c IMMUTABLE STRICT;
DROP FUNCTION nominatim_test_import_func(text)
""".format(module_dir))
""", (f'{module_dir}/nominatim.so', ))
except psycopg2.DatabaseError as err:
LOG.fatal("Error accessing database module: %s", err)
raise UsageError("Database module cannot be accessed.") from err
@ -250,12 +250,12 @@ class LegacyTokenizer(AbstractTokenizer):
php_file = self.data_dir / "tokenizer.php"
if not php_file.exists() or overwrite:
php_file.write_text(dedent("""\
php_file.write_text(dedent(f"""\
<?php
@define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
@define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
""".format(config)), encoding='utf-8')
@define('CONST_Max_Word_Frequency', {config.MAX_WORD_FREQUENCY});
@define('CONST_Term_Normalization_Rules', "{config.TERM_NORMALIZATION}");
require_once('{config.lib_dir.php}/tokenizer/legacy_tokenizer.php');
"""), encoding='utf-8')
def _init_db_tables(self, config):
@ -337,8 +337,7 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
return self.normalizer.transliterate(phrase)
@staticmethod
def normalize_postcode(postcode):
def normalize_postcode(self, postcode):
""" Convert the postcode to a standardized form.
This function must yield exactly the same result as the SQL function

View File

@ -66,7 +66,7 @@ class _VariantMaker:
"""
parts = re.split(r'(\|)?([=-])>', rule)
if len(parts) != 4:
raise UsageError("Syntax error in variant rule: " + rule)
raise UsageError(f"Syntax error in variant rule: {rule}")
decompose = parts[1] is None
src_terms = [self._parse_variant_word(t) for t in parts[0].split(',')]
@ -89,7 +89,7 @@ class _VariantMaker:
name = name.strip()
match = re.fullmatch(r'([~^]?)([^~$^]*)([~$]?)', name)
if match is None or (match.group(1) == '~' and match.group(3) == '~'):
raise UsageError("Invalid variant word descriptor '{}'".format(name))
raise UsageError(f"Invalid variant word descriptor '{name}'")
norm_name = self.norm.transliterate(match.group(2)).strip()
if not norm_name:
return None

View File

@ -234,7 +234,7 @@ def create_search_indices(conn, config, drop=False):
bad_indices = [row[0] for row in list(cur)]
for idx in bad_indices:
LOG.info("Drop invalid index %s.", idx)
cur.execute('DROP INDEX "{}"'.format(idx))
cur.execute(pysql.SQL('DROP INDEX {}').format(pysql.Identifier(idx)))
conn.commit()
sql = SQLPreprocessor(conn, config)

View File

@ -12,7 +12,7 @@ import subprocess
import urllib.request as urlrequest
from urllib.parse import urlencode
from nominatim.version import NOMINATIM_VERSION
from nominatim.version import version_str
from nominatim.db.connection import get_pg_env
LOG = logging.getLogger()
@ -55,10 +55,10 @@ def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None,
query_string = urlencode(params or {})
env = dict(QUERY_STRING=query_string,
SCRIPT_NAME='/{}.php'.format(endpoint),
REQUEST_URI='/{}.php?{}'.format(endpoint, query_string),
SCRIPT_NAME=f'/{endpoint}.php',
REQUEST_URI=f'/{endpoint}.php?{query_string}',
CONTEXT_DOCUMENT_ROOT=webdir,
SCRIPT_FILENAME='{}/{}.php'.format(webdir, endpoint),
SCRIPT_FILENAME=f'{webdir}/{endpoint}.php',
HTTP_HOST='localhost',
HTTP_USER_AGENT='nominatim-tool',
REMOTE_ADDR='0.0.0.0',
@ -150,7 +150,7 @@ def run_osm2pgsql(options):
def get_url(url):
""" Get the contents from the given URL and return it as a UTF-8 string.
"""
headers = {"User-Agent": "Nominatim/{0[0]}.{0[1]}.{0[2]}-{0[3]}".format(NOMINATIM_VERSION)}
headers = {"User-Agent": f"Nominatim/{version_str()}"}
try:
with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response:

View File

@ -9,9 +9,11 @@ Functions for database migration to newer software versions.
"""
import logging
from psycopg2 import sql as pysql
from nominatim.db import properties
from nominatim.db.connection import connect
from nominatim.version import NOMINATIM_VERSION
from nominatim.version import NOMINATIM_VERSION, version_str
from nominatim.tools import refresh
from nominatim.tokenizer import factory as tokenizer_factory
from nominatim.errors import UsageError
@ -47,7 +49,7 @@ def migrate(config, paths):
for version, func in _MIGRATION_FUNCTIONS:
if db_version <= version:
LOG.warning("Runnning: %s (%s)", func.__doc__.split('\n', 1)[0],
'{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(version))
version_str(version))
kwargs = dict(conn=conn, config=config, paths=paths)
func(**kwargs)
conn.commit()
@ -59,8 +61,7 @@ def migrate(config, paths):
tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
tokenizer.update_sql_functions(config)
properties.set_property(conn, 'database_version',
'{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
properties.set_property(conn, 'database_version', version_str())
conn.commit()
@ -125,11 +126,11 @@ def add_nominatim_property_table(conn, config, **_):
"""
if not conn.table_exists('nominatim_properties'):
with conn.cursor() as cur:
cur.execute("""CREATE TABLE nominatim_properties (
property TEXT,
value TEXT);
GRANT SELECT ON TABLE nominatim_properties TO "{}";
""".format(config.DATABASE_WEBUSER))
cur.execute(pysql.SQL("""CREATE TABLE nominatim_properties (
property TEXT,
value TEXT);
GRANT SELECT ON TABLE nominatim_properties TO {};
""").format(pysql.Identifier(config.DATABASE_WEBUSER)))
@_migration(3, 6, 0, 0)
def change_housenumber_transliteration(conn, **_):
@ -194,7 +195,8 @@ def install_legacy_tokenizer(conn, config, **_):
and column_name = 'token_info'""",
(table, ))
if has_column == 0:
cur.execute('ALTER TABLE {} ADD COLUMN token_info JSONB'.format(table))
cur.execute(pysql.SQL('ALTER TABLE {} ADD COLUMN token_info JSONB')
.format(pysql.Identifier(table)))
tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False,
module_name='legacy')

View File

@ -36,7 +36,7 @@ class _CountryPostcodesCollector:
def __init__(self, country):
self.country = country
self.collected = dict()
self.collected = {}
def add(self, postcode, x, y):
@ -136,13 +136,13 @@ class _CountryPostcodesCollector:
def _open_external(self, project_dir):
fname = project_dir / '{}_postcodes.csv'.format(self.country)
fname = project_dir / f'{self.country}_postcodes.csv'
if fname.is_file():
LOG.info("Using external postcode file '%s'.", fname)
return open(fname, 'r')
return open(fname, 'r', encoding='utf-8')
fname = project_dir / '{}_postcodes.csv.gz'.format(self.country)
fname = project_dir / f'{self.country}_postcodes.csv.gz'
if fname.is_file():
LOG.info("Using external postcode file '%s'.", fname)

View File

@ -15,7 +15,7 @@ from psycopg2 import sql as pysql
from nominatim.db.utils import execute_file
from nominatim.db.sql_preprocessor import SQLPreprocessor
from nominatim.version import NOMINATIM_VERSION
from nominatim.version import version_str
LOG = logging.getLogger()
@ -52,16 +52,19 @@ def load_address_levels(conn, table, levels):
with conn.cursor() as cur:
cur.drop_table(table)
cur.execute("""CREATE TABLE {} (country_code varchar(2),
cur.execute(pysql.SQL("""CREATE TABLE {} (
country_code varchar(2),
class TEXT,
type TEXT,
rank_search SMALLINT,
rank_address SMALLINT)""".format(table))
rank_address SMALLINT)
""").format(pysql.Identifier(table)))
cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s")
.format(pysql.Identifier(table)), rows)
cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table))
cur.execute(pysql.SQL('CREATE UNIQUE INDEX ON {} (country_code, class, type)')
.format(pysql.Identifier(table)))
conn.commit()
@ -186,16 +189,15 @@ def setup_website(basedir, config, conn):
LOG.info('Creating website directory.')
basedir.mkdir()
template = dedent("""\
template = dedent(f"""\
<?php
@define('CONST_Debug', $_GET['debug'] ?? false);
@define('CONST_LibDir', '{0}');
@define('CONST_TokenizerDir', '{2}');
@define('CONST_NominatimVersion', '{1[0]}.{1[1]}.{1[2]}-{1[3]}');
@define('CONST_LibDir', '{config.lib_dir.php}');
@define('CONST_TokenizerDir', '{config.project_dir / 'tokenizer'}');
@define('CONST_NominatimVersion', '{version_str()}');
""".format(config.lib_dir.php, NOMINATIM_VERSION,
config.project_dir / 'tokenizer'))
""")
for php_name, conf_name, var_type in PHP_CONST_DEFS:
varout = _quote_php_variable(var_type, config, conf_name)

View File

@ -39,8 +39,8 @@ class SPCsvLoader(Iterator):
"""
phrases = set()
with open(self.csv_path) as file:
reader = csv.DictReader(file, delimiter=',')
with open(self.csv_path, encoding='utf-8') as fd:
reader = csv.DictReader(fd, delimiter=',')
for row in reader:
phrases.add(
SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
@ -54,4 +54,4 @@ class SPCsvLoader(Iterator):
_, extension = os.path.splitext(self.csv_path)
if extension != '.csv':
raise UsageError('The file {} is not a csv file.'.format(self.csv_path))
raise UsageError(f'The file {self.csv_path} is not a csv file.')

View File

@ -16,7 +16,7 @@
import logging
import re
from psycopg2.sql import Identifier, Literal, SQL
from psycopg2.sql import Identifier, SQL
from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
LOG = logging.getLogger()
@ -191,39 +191,41 @@ class SPImporter():
def _create_place_classtype_table(self, sql_tablespace, phrase_class, phrase_type):
"""
Create table place_classtype of the given phrase_class/phrase_type if doesn't exit.
Create table place_classtype of the given phrase_class/phrase_type
if doesn't exit.
"""
table_name = _classtype_table(phrase_class, phrase_type)
with self.db_connection.cursor() as db_cursor:
db_cursor.execute(SQL("""
CREATE TABLE IF NOT EXISTS {{}} {}
AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex
WHERE class = {{}} AND type = {{}}""".format(sql_tablespace))
.format(Identifier(table_name), Literal(phrase_class),
Literal(phrase_type)))
with self.db_connection.cursor() as cur:
cur.execute(SQL("""CREATE TABLE IF NOT EXISTS {} {} AS
SELECT place_id AS place_id,
st_centroid(geometry) AS centroid
FROM placex
WHERE class = %s AND type = %s
""").format(Identifier(table_name), SQL(sql_tablespace)),
(phrase_class, phrase_type))
def _create_place_classtype_indexes(self, sql_tablespace, phrase_class, phrase_type):
"""
Create indexes on centroid and place_id for the place_classtype table.
"""
index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
index_prefix = f'idx_place_classtype_{phrase_class}_{phrase_type}_'
base_table = _classtype_table(phrase_class, phrase_type)
# Index on centroid
if not self.db_connection.index_exists(index_prefix + 'centroid'):
with self.db_connection.cursor() as db_cursor:
db_cursor.execute(SQL("""
CREATE INDEX {{}} ON {{}} USING GIST (centroid) {}""".format(sql_tablespace))
db_cursor.execute(SQL("CREATE INDEX {} ON {} USING GIST (centroid) {}")
.format(Identifier(index_prefix + 'centroid'),
Identifier(base_table)), sql_tablespace)
Identifier(base_table),
SQL(sql_tablespace)))
# Index on place_id
if not self.db_connection.index_exists(index_prefix + 'place_id'):
with self.db_connection.cursor() as db_cursor:
db_cursor.execute(SQL(
"""CREATE INDEX {{}} ON {{}} USING btree(place_id) {}""".format(sql_tablespace))
db_cursor.execute(SQL("CREATE INDEX {} ON {} USING btree(place_id) {}")
.format(Identifier(index_prefix + 'place_id'),
Identifier(base_table)))
Identifier(base_table),
SQL(sql_tablespace)))
def _grant_access_to_webuser(self, phrase_class, phrase_type):

View File

@ -21,33 +21,57 @@ from nominatim.indexer.place_info import PlaceInfo
LOG = logging.getLogger()
def handle_tarfile_or_directory(data_dir):
""" Handles tarfile or directory for importing tiger data
class TigerInput:
""" Context manager that goes through Tiger input files which may
either be in a directory or gzipped together in a tar file.
"""
tar = None
if data_dir.endswith('.tar.gz'):
try:
tar = tarfile.open(data_dir)
except tarfile.ReadError as err:
LOG.fatal("Cannot open '%s'. Is this a tar file?", data_dir)
raise UsageError("Cannot open Tiger data file.") from err
def __init__(self, data_dir):
self.tar_handle = None
self.files = []
csv_files = [i for i in tar.getmembers() if i.name.endswith('.csv')]
LOG.warning("Found %d CSV files in tarfile with path %s", len(csv_files), data_dir)
if not csv_files:
LOG.warning("Tiger data import selected but no files in tarfile's path %s", data_dir)
return None, None
else:
files = os.listdir(data_dir)
csv_files = [os.path.join(data_dir, i) for i in files if i.endswith('.csv')]
LOG.warning("Found %d CSV files in path %s", len(csv_files), data_dir)
if not csv_files:
LOG.warning("Tiger data import selected but no files found in path %s", data_dir)
return None, None
if data_dir.endswith('.tar.gz'):
try:
self.tar_handle = tarfile.open(data_dir) # pylint: disable=consider-using-with
except tarfile.ReadError as err:
LOG.fatal("Cannot open '%s'. Is this a tar file?", data_dir)
raise UsageError("Cannot open Tiger data file.") from err
return csv_files, tar
self.files = [i for i in self.tar_handle.getmembers() if i.name.endswith('.csv')]
LOG.warning("Found %d CSV files in tarfile with path %s", len(self.files), data_dir)
else:
files = os.listdir(data_dir)
self.files = [os.path.join(data_dir, i) for i in files if i.endswith('.csv')]
LOG.warning("Found %d CSV files in path %s", len(self.files), data_dir)
if not self.files:
LOG.warning("Tiger data import selected but no files found at %s", data_dir)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
if self.tar_handle:
self.tar_handle.close()
self.tar_handle = None
def next_file(self):
""" Return a file handle to the next file to be processed.
Raises an IndexError if there is no file left.
"""
fname = self.files.pop(0)
if self.tar_handle is not None:
return io.TextIOWrapper(self.tar_handle.extractfile(fname))
return open(fname, encoding='utf-8')
def __len__(self):
return len(self.files)
def handle_threaded_sql_statements(pool, fd, analyzer):
@ -79,34 +103,27 @@ def add_tiger_data(data_dir, config, threads, tokenizer):
""" Import tiger data from directory or tar file `data dir`.
"""
dsn = config.get_libpq_dsn()
files, tar = handle_tarfile_or_directory(data_dir)
if not files:
return
with TigerInput(data_dir) as tar:
if not tar:
return
with connect(dsn) as conn:
sql = SQLPreprocessor(conn, config)
sql.run_sql_file(conn, 'tiger_import_start.sql')
with connect(dsn) as conn:
sql = SQLPreprocessor(conn, config)
sql.run_sql_file(conn, 'tiger_import_start.sql')
# Reading files and then for each file line handling
# sql_query in <threads - 1> chunks.
place_threads = max(1, threads - 1)
# Reading files and then for each file line handling
# sql_query in <threads - 1> chunks.
place_threads = max(1, threads - 1)
with WorkerPool(dsn, place_threads, ignore_sql_errors=True) as pool:
with tokenizer.name_analyzer() as analyzer:
for fname in files:
if not tar:
fd = open(fname)
else:
fd = io.TextIOWrapper(tar.extractfile(fname))
with WorkerPool(dsn, place_threads, ignore_sql_errors=True) as pool:
with tokenizer.name_analyzer() as analyzer:
while tar:
with tar.next_file() as fd:
handle_threaded_sql_statements(pool, fd, analyzer)
handle_threaded_sql_statements(pool, fd, analyzer)
print('\n')
fd.close()
if tar:
tar.close()
print('\n')
LOG.warning("Creating indexes on Tiger data")
with connect(dsn) as conn:
sql = SQLPreprocessor(conn, config)

View File

@ -34,3 +34,11 @@ POSTGIS_REQUIRED_VERSION = (2, 2)
# cmake/tool-installed.tmpl is used to build the binary 'nominatim'. Inside
# there is a call to set the variable value below.
GIT_COMMIT_HASH = None
# pylint: disable=consider-using-f-string
def version_str(version=NOMINATIM_VERSION):
"""
Return a human-readable string of the version.
"""
return '{}.{}.{}-{}'.format(*version)