Merge pull request #2708 from lonvia/use-format-literals

Assorted fixes for new pylint warnings
This commit is contained in:
Sarah Hoffmann 2022-05-11 14:29:56 +02:00 committed by GitHub
commit a7a5f0161f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 178 additions and 150 deletions

View File

@ -11,6 +11,6 @@ ignored-modules=icu,datrie
# 'with' statements. # 'with' statements.
ignored-classes=NominatimArgs,closing ignored-classes=NominatimArgs,closing
# 'too-many-ancestors' is triggered already by deriving from UserDict # 'too-many-ancestors' is triggered already by deriving from UserDict
disable=too-few-public-methods,duplicate-code,too-many-ancestors disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value,no-self-use
good-names=i,x,y,fd,db good-names=i,x,y,fd,db

View File

@ -60,9 +60,9 @@ class CommandlineParser:
def nominatim_version_text(): def nominatim_version_text():
""" Program name and version number as string """ Program name and version number as string
""" """
text = 'Nominatim version %s.%s.%s.%s' % version.NOMINATIM_VERSION text = f'Nominatim version {version.version_str()}'
if version.GIT_COMMIT_HASH is not None: if version.GIT_COMMIT_HASH is not None:
text += ' (%s)' % version.GIT_COMMIT_HASH text += f' ({version.GIT_COMMIT_HASH})'
return text return text
def add_subcommand(self, name, cmd): def add_subcommand(self, name, cmd):

View File

@ -21,7 +21,7 @@ LOG = logging.getLogger()
# Do not repeat documentation of subcommand classes. # Do not repeat documentation of subcommand classes.
# pylint: disable=C0111 # pylint: disable=C0111
# Using non-top-level imports to make pyosmium optional for replication only. # Using non-top-level imports to make pyosmium optional for replication only.
# pylint: disable=E0012,C0415 # pylint: disable=C0415
class UpdateReplication: class UpdateReplication:
"""\ """\
@ -96,8 +96,7 @@ class UpdateReplication:
end = dt.datetime.now(dt.timezone.utc) end = dt.datetime.now(dt.timezone.utc)
LOG.warning("Update completed. Import: %s. %sTotal: %s. Remaining backlog: %s.", LOG.warning("Update completed. Import: %s. %sTotal: %s. Remaining backlog: %s.",
round_time((start_index or end) - start_import), round_time((start_index or end) - start_import),
"Indexing: {} ".format(round_time(end - start_index)) f"Indexing: {round_time(end - start_index)} " if start_index else '',
if start_index else '',
round_time(end - start_import), round_time(end - start_import),
round_time(end - batchdate)) round_time(end - batchdate))

View File

@ -14,12 +14,12 @@ import psutil
from nominatim.db.connection import connect from nominatim.db.connection import connect
from nominatim.db import status, properties from nominatim.db import status, properties
from nominatim.version import NOMINATIM_VERSION from nominatim.version import version_str
# Do not repeat documentation of subcommand classes. # Do not repeat documentation of subcommand classes.
# pylint: disable=C0111 # pylint: disable=C0111
# Using non-top-level imports to avoid eventually unused imports. # Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415 # pylint: disable=C0415
LOG = logging.getLogger() LOG = logging.getLogger()
@ -194,10 +194,10 @@ class SetupAll:
LOG.warning('Creating support index') LOG.warning('Creating support index')
if tablespace: if tablespace:
tablespace = 'TABLESPACE ' + tablespace tablespace = 'TABLESPACE ' + tablespace
cur.execute("""CREATE INDEX idx_placex_pendingsector cur.execute(f"""CREATE INDEX idx_placex_pendingsector
ON placex USING BTREE (rank_address,geometry_sector) ON placex USING BTREE (rank_address,geometry_sector)
{} WHERE indexed_status > 0 {tablespace} WHERE indexed_status > 0
""".format(tablespace)) """)
conn.commit() conn.commit()
@ -213,5 +213,4 @@ class SetupAll:
except Exception as exc: # pylint: disable=broad-except except Exception as exc: # pylint: disable=broad-except
LOG.error('Cannot determine date of database: %s', exc) LOG.error('Cannot determine date of database: %s', exc)
properties.set_property(conn, 'database_version', properties.set_property(conn, 'database_version', version_str())
'{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))

View File

@ -144,7 +144,7 @@ class Configuration:
style = self.__getattr__('IMPORT_STYLE') style = self.__getattr__('IMPORT_STYLE')
if style in ('admin', 'street', 'address', 'full', 'extratags'): if style in ('admin', 'street', 'address', 'full', 'extratags'):
return self.config_dir / 'import-{}.style'.format(style) return self.config_dir / f'import-{style}.style'
return self.find_config_file('', 'IMPORT_STYLE') return self.find_config_file('', 'IMPORT_STYLE')

View File

@ -25,7 +25,8 @@ class _Cursor(psycopg2.extras.DictCursor):
execution functions. execution functions.
""" """
def execute(self, query, args=None): # pylint: disable=W0221 # pylint: disable=arguments-renamed,arguments-differ
def execute(self, query, args=None):
""" Query execution that logs the SQL query when debugging is enabled. """ Query execution that logs the SQL query when debugging is enabled.
""" """
LOG.debug(self.mogrify(query, args).decode('utf-8')) LOG.debug(self.mogrify(query, args).decode('utf-8'))
@ -163,7 +164,7 @@ def connect(dsn):
ctxmgr.connection = conn ctxmgr.connection = conn
return ctxmgr return ctxmgr
except psycopg2.OperationalError as err: except psycopg2.OperationalError as err:
raise UsageError("Cannot connect to database: {}".format(err)) from err raise UsageError(f"Cannot connect to database: {err}") from err
# Translation from PG connection string parameters to PG environment variables. # Translation from PG connection string parameters to PG environment variables.

View File

@ -39,10 +39,10 @@ def _setup_tablespace_sql(config):
out = {} out = {}
for subset in ('ADDRESS', 'SEARCH', 'AUX'): for subset in ('ADDRESS', 'SEARCH', 'AUX'):
for kind in ('DATA', 'INDEX'): for kind in ('DATA', 'INDEX'):
tspace = getattr(config, 'TABLESPACE_{}_{}'.format(subset, kind)) tspace = getattr(config, f'TABLESPACE_{subset}_{kind}')
if tspace: if tspace:
tspace = 'TABLESPACE "{}"'.format(tspace) tspace = f'TABLESPACE "{tspace}"'
out['{}_{}'.format(subset.lower(), kind.lower())] = tspace out[f'{subset.lower()}_{kind.lower()}'] = tspace
return out return out

View File

@ -34,7 +34,7 @@ def compute_database_date(conn):
LOG.info("Using node id %d for timestamp lookup", osmid) LOG.info("Using node id %d for timestamp lookup", osmid)
# Get the node from the API to find the timestamp when it was created. # Get the node from the API to find the timestamp when it was created.
node_url = 'https://www.openstreetmap.org/api/0.6/node/{}/1'.format(osmid) node_url = f'https://www.openstreetmap.org/api/0.6/node/{osmid}/1'
data = get_url(node_url) data = get_url(node_url)
match = re.search(r'timestamp="((\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}))Z"', data) match = re.search(r'timestamp="((\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}))Z"', data)

View File

@ -40,8 +40,8 @@ def execute_file(dsn, fname, ignore_errors=False, pre_code=None, post_code=None)
cmd.extend(('-v', 'ON_ERROR_STOP=1')) cmd.extend(('-v', 'ON_ERROR_STOP=1'))
if not LOG.isEnabledFor(logging.INFO): if not LOG.isEnabledFor(logging.INFO):
cmd.append('--quiet') cmd.append('--quiet')
proc = subprocess.Popen(cmd, env=get_pg_env(dsn), stdin=subprocess.PIPE)
with subprocess.Popen(cmd, env=get_pg_env(dsn), stdin=subprocess.PIPE) as proc:
try: try:
if not LOG.isEnabledFor(logging.INFO): if not LOG.isEnabledFor(logging.INFO):
proc.stdin.write('set client_min_messages to WARNING;'.encode('utf-8')) proc.stdin.write('set client_min_messages to WARNING;'.encode('utf-8'))
@ -67,9 +67,9 @@ def execute_file(dsn, fname, ignore_errors=False, pre_code=None, post_code=None)
# List of characters that need to be quoted for the copy command. # List of characters that need to be quoted for the copy command.
_SQL_TRANSLATION = {ord(u'\\'): u'\\\\', _SQL_TRANSLATION = {ord('\\'): '\\\\',
ord(u'\t'): u'\\t', ord('\t'): '\\t',
ord(u'\n'): u'\\n'} ord('\n'): '\\n'}
class CopyBuffer: class CopyBuffer:

View File

@ -66,7 +66,7 @@ class RankRunner(AbstractPlacexRunner):
""" """
def name(self): def name(self):
return "rank {}".format(self.rank) return f"rank {self.rank}"
def sql_count_objects(self): def sql_count_objects(self):
return pysql.SQL("""SELECT count(*) FROM placex return pysql.SQL("""SELECT count(*) FROM placex
@ -86,7 +86,7 @@ class BoundaryRunner(AbstractPlacexRunner):
""" """
def name(self): def name(self):
return "boundaries rank {}".format(self.rank) return f"boundaries rank {self.rank}"
def sql_count_objects(self): def sql_count_objects(self):
return pysql.SQL("""SELECT count(*) FROM placex return pysql.SQL("""SELECT count(*) FROM placex

View File

@ -278,8 +278,7 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
+ [(k, v, part_ids.get(v, None)) for k, v in partial_tokens.items()] + [(k, v, part_ids.get(v, None)) for k, v in partial_tokens.items()]
@staticmethod def normalize_postcode(self, postcode):
def normalize_postcode(postcode):
""" Convert the postcode to a standardized form. """ Convert the postcode to a standardized form.
This function must yield exactly the same result as the SQL function This function must yield exactly the same result as the SQL function

View File

@ -74,10 +74,10 @@ def _check_module(module_dir, conn):
with conn.cursor() as cur: with conn.cursor() as cur:
try: try:
cur.execute("""CREATE FUNCTION nominatim_test_import_func(text) cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
RETURNS text AS '{}/nominatim.so', 'transliteration' RETURNS text AS %s, 'transliteration'
LANGUAGE c IMMUTABLE STRICT; LANGUAGE c IMMUTABLE STRICT;
DROP FUNCTION nominatim_test_import_func(text) DROP FUNCTION nominatim_test_import_func(text)
""".format(module_dir)) """, (f'{module_dir}/nominatim.so', ))
except psycopg2.DatabaseError as err: except psycopg2.DatabaseError as err:
LOG.fatal("Error accessing database module: %s", err) LOG.fatal("Error accessing database module: %s", err)
raise UsageError("Database module cannot be accessed.") from err raise UsageError("Database module cannot be accessed.") from err
@ -250,12 +250,12 @@ class LegacyTokenizer(AbstractTokenizer):
php_file = self.data_dir / "tokenizer.php" php_file = self.data_dir / "tokenizer.php"
if not php_file.exists() or overwrite: if not php_file.exists() or overwrite:
php_file.write_text(dedent("""\ php_file.write_text(dedent(f"""\
<?php <?php
@define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY}); @define('CONST_Max_Word_Frequency', {config.MAX_WORD_FREQUENCY});
@define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}"); @define('CONST_Term_Normalization_Rules', "{config.TERM_NORMALIZATION}");
require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php'); require_once('{config.lib_dir.php}/tokenizer/legacy_tokenizer.php');
""".format(config)), encoding='utf-8') """), encoding='utf-8')
def _init_db_tables(self, config): def _init_db_tables(self, config):
@ -337,8 +337,7 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
return self.normalizer.transliterate(phrase) return self.normalizer.transliterate(phrase)
@staticmethod def normalize_postcode(self, postcode):
def normalize_postcode(postcode):
""" Convert the postcode to a standardized form. """ Convert the postcode to a standardized form.
This function must yield exactly the same result as the SQL function This function must yield exactly the same result as the SQL function

View File

@ -66,7 +66,7 @@ class _VariantMaker:
""" """
parts = re.split(r'(\|)?([=-])>', rule) parts = re.split(r'(\|)?([=-])>', rule)
if len(parts) != 4: if len(parts) != 4:
raise UsageError("Syntax error in variant rule: " + rule) raise UsageError(f"Syntax error in variant rule: {rule}")
decompose = parts[1] is None decompose = parts[1] is None
src_terms = [self._parse_variant_word(t) for t in parts[0].split(',')] src_terms = [self._parse_variant_word(t) for t in parts[0].split(',')]
@ -89,7 +89,7 @@ class _VariantMaker:
name = name.strip() name = name.strip()
match = re.fullmatch(r'([~^]?)([^~$^]*)([~$]?)', name) match = re.fullmatch(r'([~^]?)([^~$^]*)([~$]?)', name)
if match is None or (match.group(1) == '~' and match.group(3) == '~'): if match is None or (match.group(1) == '~' and match.group(3) == '~'):
raise UsageError("Invalid variant word descriptor '{}'".format(name)) raise UsageError(f"Invalid variant word descriptor '{name}'")
norm_name = self.norm.transliterate(match.group(2)).strip() norm_name = self.norm.transliterate(match.group(2)).strip()
if not norm_name: if not norm_name:
return None return None

View File

@ -234,7 +234,7 @@ def create_search_indices(conn, config, drop=False):
bad_indices = [row[0] for row in list(cur)] bad_indices = [row[0] for row in list(cur)]
for idx in bad_indices: for idx in bad_indices:
LOG.info("Drop invalid index %s.", idx) LOG.info("Drop invalid index %s.", idx)
cur.execute('DROP INDEX "{}"'.format(idx)) cur.execute(pysql.SQL('DROP INDEX {}').format(pysql.Identifier(idx)))
conn.commit() conn.commit()
sql = SQLPreprocessor(conn, config) sql = SQLPreprocessor(conn, config)

View File

@ -12,7 +12,7 @@ import subprocess
import urllib.request as urlrequest import urllib.request as urlrequest
from urllib.parse import urlencode from urllib.parse import urlencode
from nominatim.version import NOMINATIM_VERSION from nominatim.version import version_str
from nominatim.db.connection import get_pg_env from nominatim.db.connection import get_pg_env
LOG = logging.getLogger() LOG = logging.getLogger()
@ -55,10 +55,10 @@ def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None,
query_string = urlencode(params or {}) query_string = urlencode(params or {})
env = dict(QUERY_STRING=query_string, env = dict(QUERY_STRING=query_string,
SCRIPT_NAME='/{}.php'.format(endpoint), SCRIPT_NAME=f'/{endpoint}.php',
REQUEST_URI='/{}.php?{}'.format(endpoint, query_string), REQUEST_URI=f'/{endpoint}.php?{query_string}',
CONTEXT_DOCUMENT_ROOT=webdir, CONTEXT_DOCUMENT_ROOT=webdir,
SCRIPT_FILENAME='{}/{}.php'.format(webdir, endpoint), SCRIPT_FILENAME=f'{webdir}/{endpoint}.php',
HTTP_HOST='localhost', HTTP_HOST='localhost',
HTTP_USER_AGENT='nominatim-tool', HTTP_USER_AGENT='nominatim-tool',
REMOTE_ADDR='0.0.0.0', REMOTE_ADDR='0.0.0.0',
@ -150,7 +150,7 @@ def run_osm2pgsql(options):
def get_url(url): def get_url(url):
""" Get the contents from the given URL and return it as a UTF-8 string. """ Get the contents from the given URL and return it as a UTF-8 string.
""" """
headers = {"User-Agent": "Nominatim/{0[0]}.{0[1]}.{0[2]}-{0[3]}".format(NOMINATIM_VERSION)} headers = {"User-Agent": f"Nominatim/{version_str()}"}
try: try:
with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response: with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response:

View File

@ -9,9 +9,11 @@ Functions for database migration to newer software versions.
""" """
import logging import logging
from psycopg2 import sql as pysql
from nominatim.db import properties from nominatim.db import properties
from nominatim.db.connection import connect from nominatim.db.connection import connect
from nominatim.version import NOMINATIM_VERSION from nominatim.version import NOMINATIM_VERSION, version_str
from nominatim.tools import refresh from nominatim.tools import refresh
from nominatim.tokenizer import factory as tokenizer_factory from nominatim.tokenizer import factory as tokenizer_factory
from nominatim.errors import UsageError from nominatim.errors import UsageError
@ -47,7 +49,7 @@ def migrate(config, paths):
for version, func in _MIGRATION_FUNCTIONS: for version, func in _MIGRATION_FUNCTIONS:
if db_version <= version: if db_version <= version:
LOG.warning("Runnning: %s (%s)", func.__doc__.split('\n', 1)[0], LOG.warning("Runnning: %s (%s)", func.__doc__.split('\n', 1)[0],
'{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(version)) version_str(version))
kwargs = dict(conn=conn, config=config, paths=paths) kwargs = dict(conn=conn, config=config, paths=paths)
func(**kwargs) func(**kwargs)
conn.commit() conn.commit()
@ -59,8 +61,7 @@ def migrate(config, paths):
tokenizer = tokenizer_factory.get_tokenizer_for_db(config) tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
tokenizer.update_sql_functions(config) tokenizer.update_sql_functions(config)
properties.set_property(conn, 'database_version', properties.set_property(conn, 'database_version', version_str())
'{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
conn.commit() conn.commit()
@ -125,11 +126,11 @@ def add_nominatim_property_table(conn, config, **_):
""" """
if not conn.table_exists('nominatim_properties'): if not conn.table_exists('nominatim_properties'):
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute("""CREATE TABLE nominatim_properties ( cur.execute(pysql.SQL("""CREATE TABLE nominatim_properties (
property TEXT, property TEXT,
value TEXT); value TEXT);
GRANT SELECT ON TABLE nominatim_properties TO "{}"; GRANT SELECT ON TABLE nominatim_properties TO {};
""".format(config.DATABASE_WEBUSER)) """).format(pysql.Identifier(config.DATABASE_WEBUSER)))
@_migration(3, 6, 0, 0) @_migration(3, 6, 0, 0)
def change_housenumber_transliteration(conn, **_): def change_housenumber_transliteration(conn, **_):
@ -194,7 +195,8 @@ def install_legacy_tokenizer(conn, config, **_):
and column_name = 'token_info'""", and column_name = 'token_info'""",
(table, )) (table, ))
if has_column == 0: if has_column == 0:
cur.execute('ALTER TABLE {} ADD COLUMN token_info JSONB'.format(table)) cur.execute(pysql.SQL('ALTER TABLE {} ADD COLUMN token_info JSONB')
.format(pysql.Identifier(table)))
tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False, tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False,
module_name='legacy') module_name='legacy')

View File

@ -36,7 +36,7 @@ class _CountryPostcodesCollector:
def __init__(self, country): def __init__(self, country):
self.country = country self.country = country
self.collected = dict() self.collected = {}
def add(self, postcode, x, y): def add(self, postcode, x, y):
@ -136,13 +136,13 @@ class _CountryPostcodesCollector:
def _open_external(self, project_dir): def _open_external(self, project_dir):
fname = project_dir / '{}_postcodes.csv'.format(self.country) fname = project_dir / f'{self.country}_postcodes.csv'
if fname.is_file(): if fname.is_file():
LOG.info("Using external postcode file '%s'.", fname) LOG.info("Using external postcode file '%s'.", fname)
return open(fname, 'r') return open(fname, 'r', encoding='utf-8')
fname = project_dir / '{}_postcodes.csv.gz'.format(self.country) fname = project_dir / f'{self.country}_postcodes.csv.gz'
if fname.is_file(): if fname.is_file():
LOG.info("Using external postcode file '%s'.", fname) LOG.info("Using external postcode file '%s'.", fname)

View File

@ -15,7 +15,7 @@ from psycopg2 import sql as pysql
from nominatim.db.utils import execute_file from nominatim.db.utils import execute_file
from nominatim.db.sql_preprocessor import SQLPreprocessor from nominatim.db.sql_preprocessor import SQLPreprocessor
from nominatim.version import NOMINATIM_VERSION from nominatim.version import version_str
LOG = logging.getLogger() LOG = logging.getLogger()
@ -52,16 +52,19 @@ def load_address_levels(conn, table, levels):
with conn.cursor() as cur: with conn.cursor() as cur:
cur.drop_table(table) cur.drop_table(table)
cur.execute("""CREATE TABLE {} (country_code varchar(2), cur.execute(pysql.SQL("""CREATE TABLE {} (
country_code varchar(2),
class TEXT, class TEXT,
type TEXT, type TEXT,
rank_search SMALLINT, rank_search SMALLINT,
rank_address SMALLINT)""".format(table)) rank_address SMALLINT)
""").format(pysql.Identifier(table)))
cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s") cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s")
.format(pysql.Identifier(table)), rows) .format(pysql.Identifier(table)), rows)
cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table)) cur.execute(pysql.SQL('CREATE UNIQUE INDEX ON {} (country_code, class, type)')
.format(pysql.Identifier(table)))
conn.commit() conn.commit()
@ -186,16 +189,15 @@ def setup_website(basedir, config, conn):
LOG.info('Creating website directory.') LOG.info('Creating website directory.')
basedir.mkdir() basedir.mkdir()
template = dedent("""\ template = dedent(f"""\
<?php <?php
@define('CONST_Debug', $_GET['debug'] ?? false); @define('CONST_Debug', $_GET['debug'] ?? false);
@define('CONST_LibDir', '{0}'); @define('CONST_LibDir', '{config.lib_dir.php}');
@define('CONST_TokenizerDir', '{2}'); @define('CONST_TokenizerDir', '{config.project_dir / 'tokenizer'}');
@define('CONST_NominatimVersion', '{1[0]}.{1[1]}.{1[2]}-{1[3]}'); @define('CONST_NominatimVersion', '{version_str()}');
""".format(config.lib_dir.php, NOMINATIM_VERSION, """)
config.project_dir / 'tokenizer'))
for php_name, conf_name, var_type in PHP_CONST_DEFS: for php_name, conf_name, var_type in PHP_CONST_DEFS:
varout = _quote_php_variable(var_type, config, conf_name) varout = _quote_php_variable(var_type, config, conf_name)

View File

@ -39,8 +39,8 @@ class SPCsvLoader(Iterator):
""" """
phrases = set() phrases = set()
with open(self.csv_path) as file: with open(self.csv_path, encoding='utf-8') as fd:
reader = csv.DictReader(file, delimiter=',') reader = csv.DictReader(fd, delimiter=',')
for row in reader: for row in reader:
phrases.add( phrases.add(
SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator']) SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
@ -54,4 +54,4 @@ class SPCsvLoader(Iterator):
_, extension = os.path.splitext(self.csv_path) _, extension = os.path.splitext(self.csv_path)
if extension != '.csv': if extension != '.csv':
raise UsageError('The file {} is not a csv file.'.format(self.csv_path)) raise UsageError(f'The file {self.csv_path} is not a csv file.')

View File

@ -16,7 +16,7 @@
import logging import logging
import re import re
from psycopg2.sql import Identifier, Literal, SQL from psycopg2.sql import Identifier, SQL
from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
LOG = logging.getLogger() LOG = logging.getLogger()
@ -191,39 +191,41 @@ class SPImporter():
def _create_place_classtype_table(self, sql_tablespace, phrase_class, phrase_type): def _create_place_classtype_table(self, sql_tablespace, phrase_class, phrase_type):
""" """
Create table place_classtype of the given phrase_class/phrase_type if doesn't exit. Create table place_classtype of the given phrase_class/phrase_type
if doesn't exit.
""" """
table_name = _classtype_table(phrase_class, phrase_type) table_name = _classtype_table(phrase_class, phrase_type)
with self.db_connection.cursor() as db_cursor: with self.db_connection.cursor() as cur:
db_cursor.execute(SQL(""" cur.execute(SQL("""CREATE TABLE IF NOT EXISTS {} {} AS
CREATE TABLE IF NOT EXISTS {{}} {} SELECT place_id AS place_id,
AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex st_centroid(geometry) AS centroid
WHERE class = {{}} AND type = {{}}""".format(sql_tablespace)) FROM placex
.format(Identifier(table_name), Literal(phrase_class), WHERE class = %s AND type = %s
Literal(phrase_type))) """).format(Identifier(table_name), SQL(sql_tablespace)),
(phrase_class, phrase_type))
def _create_place_classtype_indexes(self, sql_tablespace, phrase_class, phrase_type): def _create_place_classtype_indexes(self, sql_tablespace, phrase_class, phrase_type):
""" """
Create indexes on centroid and place_id for the place_classtype table. Create indexes on centroid and place_id for the place_classtype table.
""" """
index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type) index_prefix = f'idx_place_classtype_{phrase_class}_{phrase_type}_'
base_table = _classtype_table(phrase_class, phrase_type) base_table = _classtype_table(phrase_class, phrase_type)
# Index on centroid # Index on centroid
if not self.db_connection.index_exists(index_prefix + 'centroid'): if not self.db_connection.index_exists(index_prefix + 'centroid'):
with self.db_connection.cursor() as db_cursor: with self.db_connection.cursor() as db_cursor:
db_cursor.execute(SQL(""" db_cursor.execute(SQL("CREATE INDEX {} ON {} USING GIST (centroid) {}")
CREATE INDEX {{}} ON {{}} USING GIST (centroid) {}""".format(sql_tablespace))
.format(Identifier(index_prefix + 'centroid'), .format(Identifier(index_prefix + 'centroid'),
Identifier(base_table)), sql_tablespace) Identifier(base_table),
SQL(sql_tablespace)))
# Index on place_id # Index on place_id
if not self.db_connection.index_exists(index_prefix + 'place_id'): if not self.db_connection.index_exists(index_prefix + 'place_id'):
with self.db_connection.cursor() as db_cursor: with self.db_connection.cursor() as db_cursor:
db_cursor.execute(SQL( db_cursor.execute(SQL("CREATE INDEX {} ON {} USING btree(place_id) {}")
"""CREATE INDEX {{}} ON {{}} USING btree(place_id) {}""".format(sql_tablespace))
.format(Identifier(index_prefix + 'place_id'), .format(Identifier(index_prefix + 'place_id'),
Identifier(base_table))) Identifier(base_table),
SQL(sql_tablespace)))
def _grant_access_to_webuser(self, phrase_class, phrase_type): def _grant_access_to_webuser(self, phrase_class, phrase_type):

View File

@ -21,33 +21,57 @@ from nominatim.indexer.place_info import PlaceInfo
LOG = logging.getLogger() LOG = logging.getLogger()
class TigerInput:
def handle_tarfile_or_directory(data_dir): """ Context manager that goes through Tiger input files which may
""" Handles tarfile or directory for importing tiger data either be in a directory or gzipped together in a tar file.
""" """
tar = None def __init__(self, data_dir):
self.tar_handle = None
self.files = []
if data_dir.endswith('.tar.gz'): if data_dir.endswith('.tar.gz'):
try: try:
tar = tarfile.open(data_dir) self.tar_handle = tarfile.open(data_dir) # pylint: disable=consider-using-with
except tarfile.ReadError as err: except tarfile.ReadError as err:
LOG.fatal("Cannot open '%s'. Is this a tar file?", data_dir) LOG.fatal("Cannot open '%s'. Is this a tar file?", data_dir)
raise UsageError("Cannot open Tiger data file.") from err raise UsageError("Cannot open Tiger data file.") from err
csv_files = [i for i in tar.getmembers() if i.name.endswith('.csv')] self.files = [i for i in self.tar_handle.getmembers() if i.name.endswith('.csv')]
LOG.warning("Found %d CSV files in tarfile with path %s", len(csv_files), data_dir) LOG.warning("Found %d CSV files in tarfile with path %s", len(self.files), data_dir)
if not csv_files:
LOG.warning("Tiger data import selected but no files in tarfile's path %s", data_dir)
return None, None
else: else:
files = os.listdir(data_dir) files = os.listdir(data_dir)
csv_files = [os.path.join(data_dir, i) for i in files if i.endswith('.csv')] self.files = [os.path.join(data_dir, i) for i in files if i.endswith('.csv')]
LOG.warning("Found %d CSV files in path %s", len(csv_files), data_dir) LOG.warning("Found %d CSV files in path %s", len(self.files), data_dir)
if not csv_files:
LOG.warning("Tiger data import selected but no files found in path %s", data_dir)
return None, None
return csv_files, tar if not self.files:
LOG.warning("Tiger data import selected but no files found at %s", data_dir)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
if self.tar_handle:
self.tar_handle.close()
self.tar_handle = None
def next_file(self):
""" Return a file handle to the next file to be processed.
Raises an IndexError if there is no file left.
"""
fname = self.files.pop(0)
if self.tar_handle is not None:
return io.TextIOWrapper(self.tar_handle.extractfile(fname))
return open(fname, encoding='utf-8')
def __len__(self):
return len(self.files)
def handle_threaded_sql_statements(pool, fd, analyzer): def handle_threaded_sql_statements(pool, fd, analyzer):
@ -79,9 +103,9 @@ def add_tiger_data(data_dir, config, threads, tokenizer):
""" Import tiger data from directory or tar file `data dir`. """ Import tiger data from directory or tar file `data dir`.
""" """
dsn = config.get_libpq_dsn() dsn = config.get_libpq_dsn()
files, tar = handle_tarfile_or_directory(data_dir)
if not files: with TigerInput(data_dir) as tar:
if not tar:
return return
with connect(dsn) as conn: with connect(dsn) as conn:
@ -94,19 +118,12 @@ def add_tiger_data(data_dir, config, threads, tokenizer):
with WorkerPool(dsn, place_threads, ignore_sql_errors=True) as pool: with WorkerPool(dsn, place_threads, ignore_sql_errors=True) as pool:
with tokenizer.name_analyzer() as analyzer: with tokenizer.name_analyzer() as analyzer:
for fname in files: while tar:
if not tar: with tar.next_file() as fd:
fd = open(fname)
else:
fd = io.TextIOWrapper(tar.extractfile(fname))
handle_threaded_sql_statements(pool, fd, analyzer) handle_threaded_sql_statements(pool, fd, analyzer)
fd.close()
if tar:
tar.close()
print('\n') print('\n')
LOG.warning("Creating indexes on Tiger data") LOG.warning("Creating indexes on Tiger data")
with connect(dsn) as conn: with connect(dsn) as conn:
sql = SQLPreprocessor(conn, config) sql = SQLPreprocessor(conn, config)

View File

@ -34,3 +34,11 @@ POSTGIS_REQUIRED_VERSION = (2, 2)
# cmake/tool-installed.tmpl is used to build the binary 'nominatim'. Inside # cmake/tool-installed.tmpl is used to build the binary 'nominatim'. Inside
# there is a call to set the variable value below. # there is a call to set the variable value below.
GIT_COMMIT_HASH = None GIT_COMMIT_HASH = None
# pylint: disable=consider-using-f-string
def version_str(version=NOMINATIM_VERSION):
"""
Return a human-readable string of the version.
"""
return '{}.{}.{}-{}'.format(*version)