mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-24 06:22:08 +03:00
Merge pull request #2708 from lonvia/use-format-literals
Assorted fixes for new pylint warnings
This commit is contained in:
commit
a7a5f0161f
@ -11,6 +11,6 @@ ignored-modules=icu,datrie
|
||||
# 'with' statements.
|
||||
ignored-classes=NominatimArgs,closing
|
||||
# 'too-many-ancestors' is triggered already by deriving from UserDict
|
||||
disable=too-few-public-methods,duplicate-code,too-many-ancestors
|
||||
disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value,no-self-use
|
||||
|
||||
good-names=i,x,y,fd,db
|
||||
|
@ -60,9 +60,9 @@ class CommandlineParser:
|
||||
def nominatim_version_text():
|
||||
""" Program name and version number as string
|
||||
"""
|
||||
text = 'Nominatim version %s.%s.%s.%s' % version.NOMINATIM_VERSION
|
||||
text = f'Nominatim version {version.version_str()}'
|
||||
if version.GIT_COMMIT_HASH is not None:
|
||||
text += ' (%s)' % version.GIT_COMMIT_HASH
|
||||
text += f' ({version.GIT_COMMIT_HASH})'
|
||||
return text
|
||||
|
||||
def add_subcommand(self, name, cmd):
|
||||
|
@ -21,7 +21,7 @@ LOG = logging.getLogger()
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to make pyosmium optional for replication only.
|
||||
# pylint: disable=E0012,C0415
|
||||
# pylint: disable=C0415
|
||||
|
||||
class UpdateReplication:
|
||||
"""\
|
||||
@ -96,8 +96,7 @@ class UpdateReplication:
|
||||
end = dt.datetime.now(dt.timezone.utc)
|
||||
LOG.warning("Update completed. Import: %s. %sTotal: %s. Remaining backlog: %s.",
|
||||
round_time((start_index or end) - start_import),
|
||||
"Indexing: {} ".format(round_time(end - start_index))
|
||||
if start_index else '',
|
||||
f"Indexing: {round_time(end - start_index)} " if start_index else '',
|
||||
round_time(end - start_import),
|
||||
round_time(end - batchdate))
|
||||
|
||||
|
@ -14,12 +14,12 @@ import psutil
|
||||
|
||||
from nominatim.db.connection import connect
|
||||
from nominatim.db import status, properties
|
||||
from nominatim.version import NOMINATIM_VERSION
|
||||
from nominatim.version import version_str
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
# pylint: disable=C0415
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
@ -194,10 +194,10 @@ class SetupAll:
|
||||
LOG.warning('Creating support index')
|
||||
if tablespace:
|
||||
tablespace = 'TABLESPACE ' + tablespace
|
||||
cur.execute("""CREATE INDEX idx_placex_pendingsector
|
||||
ON placex USING BTREE (rank_address,geometry_sector)
|
||||
{} WHERE indexed_status > 0
|
||||
""".format(tablespace))
|
||||
cur.execute(f"""CREATE INDEX idx_placex_pendingsector
|
||||
ON placex USING BTREE (rank_address,geometry_sector)
|
||||
{tablespace} WHERE indexed_status > 0
|
||||
""")
|
||||
conn.commit()
|
||||
|
||||
|
||||
@ -213,5 +213,4 @@ class SetupAll:
|
||||
except Exception as exc: # pylint: disable=broad-except
|
||||
LOG.error('Cannot determine date of database: %s', exc)
|
||||
|
||||
properties.set_property(conn, 'database_version',
|
||||
'{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
|
||||
properties.set_property(conn, 'database_version', version_str())
|
||||
|
@ -144,7 +144,7 @@ class Configuration:
|
||||
style = self.__getattr__('IMPORT_STYLE')
|
||||
|
||||
if style in ('admin', 'street', 'address', 'full', 'extratags'):
|
||||
return self.config_dir / 'import-{}.style'.format(style)
|
||||
return self.config_dir / f'import-{style}.style'
|
||||
|
||||
return self.find_config_file('', 'IMPORT_STYLE')
|
||||
|
||||
|
@ -25,7 +25,8 @@ class _Cursor(psycopg2.extras.DictCursor):
|
||||
execution functions.
|
||||
"""
|
||||
|
||||
def execute(self, query, args=None): # pylint: disable=W0221
|
||||
# pylint: disable=arguments-renamed,arguments-differ
|
||||
def execute(self, query, args=None):
|
||||
""" Query execution that logs the SQL query when debugging is enabled.
|
||||
"""
|
||||
LOG.debug(self.mogrify(query, args).decode('utf-8'))
|
||||
@ -163,7 +164,7 @@ def connect(dsn):
|
||||
ctxmgr.connection = conn
|
||||
return ctxmgr
|
||||
except psycopg2.OperationalError as err:
|
||||
raise UsageError("Cannot connect to database: {}".format(err)) from err
|
||||
raise UsageError(f"Cannot connect to database: {err}") from err
|
||||
|
||||
|
||||
# Translation from PG connection string parameters to PG environment variables.
|
||||
|
@ -39,10 +39,10 @@ def _setup_tablespace_sql(config):
|
||||
out = {}
|
||||
for subset in ('ADDRESS', 'SEARCH', 'AUX'):
|
||||
for kind in ('DATA', 'INDEX'):
|
||||
tspace = getattr(config, 'TABLESPACE_{}_{}'.format(subset, kind))
|
||||
tspace = getattr(config, f'TABLESPACE_{subset}_{kind}')
|
||||
if tspace:
|
||||
tspace = 'TABLESPACE "{}"'.format(tspace)
|
||||
out['{}_{}'.format(subset.lower(), kind.lower())] = tspace
|
||||
tspace = f'TABLESPACE "{tspace}"'
|
||||
out[f'{subset.lower()}_{kind.lower()}'] = tspace
|
||||
|
||||
return out
|
||||
|
||||
|
@ -34,7 +34,7 @@ def compute_database_date(conn):
|
||||
|
||||
LOG.info("Using node id %d for timestamp lookup", osmid)
|
||||
# Get the node from the API to find the timestamp when it was created.
|
||||
node_url = 'https://www.openstreetmap.org/api/0.6/node/{}/1'.format(osmid)
|
||||
node_url = f'https://www.openstreetmap.org/api/0.6/node/{osmid}/1'
|
||||
data = get_url(node_url)
|
||||
|
||||
match = re.search(r'timestamp="((\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}))Z"', data)
|
||||
|
@ -40,36 +40,36 @@ def execute_file(dsn, fname, ignore_errors=False, pre_code=None, post_code=None)
|
||||
cmd.extend(('-v', 'ON_ERROR_STOP=1'))
|
||||
if not LOG.isEnabledFor(logging.INFO):
|
||||
cmd.append('--quiet')
|
||||
proc = subprocess.Popen(cmd, env=get_pg_env(dsn), stdin=subprocess.PIPE)
|
||||
|
||||
try:
|
||||
if not LOG.isEnabledFor(logging.INFO):
|
||||
proc.stdin.write('set client_min_messages to WARNING;'.encode('utf-8'))
|
||||
with subprocess.Popen(cmd, env=get_pg_env(dsn), stdin=subprocess.PIPE) as proc:
|
||||
try:
|
||||
if not LOG.isEnabledFor(logging.INFO):
|
||||
proc.stdin.write('set client_min_messages to WARNING;'.encode('utf-8'))
|
||||
|
||||
if pre_code:
|
||||
proc.stdin.write((pre_code + ';').encode('utf-8'))
|
||||
if pre_code:
|
||||
proc.stdin.write((pre_code + ';').encode('utf-8'))
|
||||
|
||||
if fname.suffix == '.gz':
|
||||
with gzip.open(str(fname), 'rb') as fdesc:
|
||||
remain = _pipe_to_proc(proc, fdesc)
|
||||
else:
|
||||
with fname.open('rb') as fdesc:
|
||||
remain = _pipe_to_proc(proc, fdesc)
|
||||
if fname.suffix == '.gz':
|
||||
with gzip.open(str(fname), 'rb') as fdesc:
|
||||
remain = _pipe_to_proc(proc, fdesc)
|
||||
else:
|
||||
with fname.open('rb') as fdesc:
|
||||
remain = _pipe_to_proc(proc, fdesc)
|
||||
|
||||
if remain == 0 and post_code:
|
||||
proc.stdin.write((';' + post_code).encode('utf-8'))
|
||||
finally:
|
||||
proc.stdin.close()
|
||||
ret = proc.wait()
|
||||
if remain == 0 and post_code:
|
||||
proc.stdin.write((';' + post_code).encode('utf-8'))
|
||||
finally:
|
||||
proc.stdin.close()
|
||||
ret = proc.wait()
|
||||
|
||||
if ret != 0 or remain > 0:
|
||||
raise UsageError("Failed to execute SQL file.")
|
||||
|
||||
|
||||
# List of characters that need to be quoted for the copy command.
|
||||
_SQL_TRANSLATION = {ord(u'\\'): u'\\\\',
|
||||
ord(u'\t'): u'\\t',
|
||||
ord(u'\n'): u'\\n'}
|
||||
_SQL_TRANSLATION = {ord('\\'): '\\\\',
|
||||
ord('\t'): '\\t',
|
||||
ord('\n'): '\\n'}
|
||||
|
||||
|
||||
class CopyBuffer:
|
||||
|
@ -66,7 +66,7 @@ class RankRunner(AbstractPlacexRunner):
|
||||
"""
|
||||
|
||||
def name(self):
|
||||
return "rank {}".format(self.rank)
|
||||
return f"rank {self.rank}"
|
||||
|
||||
def sql_count_objects(self):
|
||||
return pysql.SQL("""SELECT count(*) FROM placex
|
||||
@ -86,7 +86,7 @@ class BoundaryRunner(AbstractPlacexRunner):
|
||||
"""
|
||||
|
||||
def name(self):
|
||||
return "boundaries rank {}".format(self.rank)
|
||||
return f"boundaries rank {self.rank}"
|
||||
|
||||
def sql_count_objects(self):
|
||||
return pysql.SQL("""SELECT count(*) FROM placex
|
||||
|
@ -278,8 +278,7 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
||||
+ [(k, v, part_ids.get(v, None)) for k, v in partial_tokens.items()]
|
||||
|
||||
|
||||
@staticmethod
|
||||
def normalize_postcode(postcode):
|
||||
def normalize_postcode(self, postcode):
|
||||
""" Convert the postcode to a standardized form.
|
||||
|
||||
This function must yield exactly the same result as the SQL function
|
||||
|
@ -74,10 +74,10 @@ def _check_module(module_dir, conn):
|
||||
with conn.cursor() as cur:
|
||||
try:
|
||||
cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
|
||||
RETURNS text AS '{}/nominatim.so', 'transliteration'
|
||||
RETURNS text AS %s, 'transliteration'
|
||||
LANGUAGE c IMMUTABLE STRICT;
|
||||
DROP FUNCTION nominatim_test_import_func(text)
|
||||
""".format(module_dir))
|
||||
""", (f'{module_dir}/nominatim.so', ))
|
||||
except psycopg2.DatabaseError as err:
|
||||
LOG.fatal("Error accessing database module: %s", err)
|
||||
raise UsageError("Database module cannot be accessed.") from err
|
||||
@ -250,12 +250,12 @@ class LegacyTokenizer(AbstractTokenizer):
|
||||
php_file = self.data_dir / "tokenizer.php"
|
||||
|
||||
if not php_file.exists() or overwrite:
|
||||
php_file.write_text(dedent("""\
|
||||
php_file.write_text(dedent(f"""\
|
||||
<?php
|
||||
@define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
|
||||
@define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
|
||||
require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
|
||||
""".format(config)), encoding='utf-8')
|
||||
@define('CONST_Max_Word_Frequency', {config.MAX_WORD_FREQUENCY});
|
||||
@define('CONST_Term_Normalization_Rules', "{config.TERM_NORMALIZATION}");
|
||||
require_once('{config.lib_dir.php}/tokenizer/legacy_tokenizer.php');
|
||||
"""), encoding='utf-8')
|
||||
|
||||
|
||||
def _init_db_tables(self, config):
|
||||
@ -337,8 +337,7 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
|
||||
return self.normalizer.transliterate(phrase)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def normalize_postcode(postcode):
|
||||
def normalize_postcode(self, postcode):
|
||||
""" Convert the postcode to a standardized form.
|
||||
|
||||
This function must yield exactly the same result as the SQL function
|
||||
|
@ -66,7 +66,7 @@ class _VariantMaker:
|
||||
"""
|
||||
parts = re.split(r'(\|)?([=-])>', rule)
|
||||
if len(parts) != 4:
|
||||
raise UsageError("Syntax error in variant rule: " + rule)
|
||||
raise UsageError(f"Syntax error in variant rule: {rule}")
|
||||
|
||||
decompose = parts[1] is None
|
||||
src_terms = [self._parse_variant_word(t) for t in parts[0].split(',')]
|
||||
@ -89,7 +89,7 @@ class _VariantMaker:
|
||||
name = name.strip()
|
||||
match = re.fullmatch(r'([~^]?)([^~$^]*)([~$]?)', name)
|
||||
if match is None or (match.group(1) == '~' and match.group(3) == '~'):
|
||||
raise UsageError("Invalid variant word descriptor '{}'".format(name))
|
||||
raise UsageError(f"Invalid variant word descriptor '{name}'")
|
||||
norm_name = self.norm.transliterate(match.group(2)).strip()
|
||||
if not norm_name:
|
||||
return None
|
||||
|
@ -234,7 +234,7 @@ def create_search_indices(conn, config, drop=False):
|
||||
bad_indices = [row[0] for row in list(cur)]
|
||||
for idx in bad_indices:
|
||||
LOG.info("Drop invalid index %s.", idx)
|
||||
cur.execute('DROP INDEX "{}"'.format(idx))
|
||||
cur.execute(pysql.SQL('DROP INDEX {}').format(pysql.Identifier(idx)))
|
||||
conn.commit()
|
||||
|
||||
sql = SQLPreprocessor(conn, config)
|
||||
|
@ -12,7 +12,7 @@ import subprocess
|
||||
import urllib.request as urlrequest
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from nominatim.version import NOMINATIM_VERSION
|
||||
from nominatim.version import version_str
|
||||
from nominatim.db.connection import get_pg_env
|
||||
|
||||
LOG = logging.getLogger()
|
||||
@ -55,10 +55,10 @@ def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None,
|
||||
query_string = urlencode(params or {})
|
||||
|
||||
env = dict(QUERY_STRING=query_string,
|
||||
SCRIPT_NAME='/{}.php'.format(endpoint),
|
||||
REQUEST_URI='/{}.php?{}'.format(endpoint, query_string),
|
||||
SCRIPT_NAME=f'/{endpoint}.php',
|
||||
REQUEST_URI=f'/{endpoint}.php?{query_string}',
|
||||
CONTEXT_DOCUMENT_ROOT=webdir,
|
||||
SCRIPT_FILENAME='{}/{}.php'.format(webdir, endpoint),
|
||||
SCRIPT_FILENAME=f'{webdir}/{endpoint}.php',
|
||||
HTTP_HOST='localhost',
|
||||
HTTP_USER_AGENT='nominatim-tool',
|
||||
REMOTE_ADDR='0.0.0.0',
|
||||
@ -150,7 +150,7 @@ def run_osm2pgsql(options):
|
||||
def get_url(url):
|
||||
""" Get the contents from the given URL and return it as a UTF-8 string.
|
||||
"""
|
||||
headers = {"User-Agent": "Nominatim/{0[0]}.{0[1]}.{0[2]}-{0[3]}".format(NOMINATIM_VERSION)}
|
||||
headers = {"User-Agent": f"Nominatim/{version_str()}"}
|
||||
|
||||
try:
|
||||
with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response:
|
||||
|
@ -9,9 +9,11 @@ Functions for database migration to newer software versions.
|
||||
"""
|
||||
import logging
|
||||
|
||||
from psycopg2 import sql as pysql
|
||||
|
||||
from nominatim.db import properties
|
||||
from nominatim.db.connection import connect
|
||||
from nominatim.version import NOMINATIM_VERSION
|
||||
from nominatim.version import NOMINATIM_VERSION, version_str
|
||||
from nominatim.tools import refresh
|
||||
from nominatim.tokenizer import factory as tokenizer_factory
|
||||
from nominatim.errors import UsageError
|
||||
@ -47,7 +49,7 @@ def migrate(config, paths):
|
||||
for version, func in _MIGRATION_FUNCTIONS:
|
||||
if db_version <= version:
|
||||
LOG.warning("Runnning: %s (%s)", func.__doc__.split('\n', 1)[0],
|
||||
'{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(version))
|
||||
version_str(version))
|
||||
kwargs = dict(conn=conn, config=config, paths=paths)
|
||||
func(**kwargs)
|
||||
conn.commit()
|
||||
@ -59,8 +61,7 @@ def migrate(config, paths):
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
|
||||
tokenizer.update_sql_functions(config)
|
||||
|
||||
properties.set_property(conn, 'database_version',
|
||||
'{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
|
||||
properties.set_property(conn, 'database_version', version_str())
|
||||
|
||||
conn.commit()
|
||||
|
||||
@ -125,11 +126,11 @@ def add_nominatim_property_table(conn, config, **_):
|
||||
"""
|
||||
if not conn.table_exists('nominatim_properties'):
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""CREATE TABLE nominatim_properties (
|
||||
property TEXT,
|
||||
value TEXT);
|
||||
GRANT SELECT ON TABLE nominatim_properties TO "{}";
|
||||
""".format(config.DATABASE_WEBUSER))
|
||||
cur.execute(pysql.SQL("""CREATE TABLE nominatim_properties (
|
||||
property TEXT,
|
||||
value TEXT);
|
||||
GRANT SELECT ON TABLE nominatim_properties TO {};
|
||||
""").format(pysql.Identifier(config.DATABASE_WEBUSER)))
|
||||
|
||||
@_migration(3, 6, 0, 0)
|
||||
def change_housenumber_transliteration(conn, **_):
|
||||
@ -194,7 +195,8 @@ def install_legacy_tokenizer(conn, config, **_):
|
||||
and column_name = 'token_info'""",
|
||||
(table, ))
|
||||
if has_column == 0:
|
||||
cur.execute('ALTER TABLE {} ADD COLUMN token_info JSONB'.format(table))
|
||||
cur.execute(pysql.SQL('ALTER TABLE {} ADD COLUMN token_info JSONB')
|
||||
.format(pysql.Identifier(table)))
|
||||
tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False,
|
||||
module_name='legacy')
|
||||
|
||||
|
@ -36,7 +36,7 @@ class _CountryPostcodesCollector:
|
||||
|
||||
def __init__(self, country):
|
||||
self.country = country
|
||||
self.collected = dict()
|
||||
self.collected = {}
|
||||
|
||||
|
||||
def add(self, postcode, x, y):
|
||||
@ -136,13 +136,13 @@ class _CountryPostcodesCollector:
|
||||
|
||||
|
||||
def _open_external(self, project_dir):
|
||||
fname = project_dir / '{}_postcodes.csv'.format(self.country)
|
||||
fname = project_dir / f'{self.country}_postcodes.csv'
|
||||
|
||||
if fname.is_file():
|
||||
LOG.info("Using external postcode file '%s'.", fname)
|
||||
return open(fname, 'r')
|
||||
return open(fname, 'r', encoding='utf-8')
|
||||
|
||||
fname = project_dir / '{}_postcodes.csv.gz'.format(self.country)
|
||||
fname = project_dir / f'{self.country}_postcodes.csv.gz'
|
||||
|
||||
if fname.is_file():
|
||||
LOG.info("Using external postcode file '%s'.", fname)
|
||||
|
@ -15,7 +15,7 @@ from psycopg2 import sql as pysql
|
||||
|
||||
from nominatim.db.utils import execute_file
|
||||
from nominatim.db.sql_preprocessor import SQLPreprocessor
|
||||
from nominatim.version import NOMINATIM_VERSION
|
||||
from nominatim.version import version_str
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
@ -52,16 +52,19 @@ def load_address_levels(conn, table, levels):
|
||||
with conn.cursor() as cur:
|
||||
cur.drop_table(table)
|
||||
|
||||
cur.execute("""CREATE TABLE {} (country_code varchar(2),
|
||||
cur.execute(pysql.SQL("""CREATE TABLE {} (
|
||||
country_code varchar(2),
|
||||
class TEXT,
|
||||
type TEXT,
|
||||
rank_search SMALLINT,
|
||||
rank_address SMALLINT)""".format(table))
|
||||
rank_address SMALLINT)
|
||||
""").format(pysql.Identifier(table)))
|
||||
|
||||
cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s")
|
||||
.format(pysql.Identifier(table)), rows)
|
||||
|
||||
cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table))
|
||||
cur.execute(pysql.SQL('CREATE UNIQUE INDEX ON {} (country_code, class, type)')
|
||||
.format(pysql.Identifier(table)))
|
||||
|
||||
conn.commit()
|
||||
|
||||
@ -186,16 +189,15 @@ def setup_website(basedir, config, conn):
|
||||
LOG.info('Creating website directory.')
|
||||
basedir.mkdir()
|
||||
|
||||
template = dedent("""\
|
||||
template = dedent(f"""\
|
||||
<?php
|
||||
|
||||
@define('CONST_Debug', $_GET['debug'] ?? false);
|
||||
@define('CONST_LibDir', '{0}');
|
||||
@define('CONST_TokenizerDir', '{2}');
|
||||
@define('CONST_NominatimVersion', '{1[0]}.{1[1]}.{1[2]}-{1[3]}');
|
||||
@define('CONST_LibDir', '{config.lib_dir.php}');
|
||||
@define('CONST_TokenizerDir', '{config.project_dir / 'tokenizer'}');
|
||||
@define('CONST_NominatimVersion', '{version_str()}');
|
||||
|
||||
""".format(config.lib_dir.php, NOMINATIM_VERSION,
|
||||
config.project_dir / 'tokenizer'))
|
||||
""")
|
||||
|
||||
for php_name, conf_name, var_type in PHP_CONST_DEFS:
|
||||
varout = _quote_php_variable(var_type, config, conf_name)
|
||||
|
@ -39,8 +39,8 @@ class SPCsvLoader(Iterator):
|
||||
"""
|
||||
phrases = set()
|
||||
|
||||
with open(self.csv_path) as file:
|
||||
reader = csv.DictReader(file, delimiter=',')
|
||||
with open(self.csv_path, encoding='utf-8') as fd:
|
||||
reader = csv.DictReader(fd, delimiter=',')
|
||||
for row in reader:
|
||||
phrases.add(
|
||||
SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
|
||||
@ -54,4 +54,4 @@ class SPCsvLoader(Iterator):
|
||||
_, extension = os.path.splitext(self.csv_path)
|
||||
|
||||
if extension != '.csv':
|
||||
raise UsageError('The file {} is not a csv file.'.format(self.csv_path))
|
||||
raise UsageError(f'The file {self.csv_path} is not a csv file.')
|
||||
|
@ -16,7 +16,7 @@
|
||||
import logging
|
||||
import re
|
||||
|
||||
from psycopg2.sql import Identifier, Literal, SQL
|
||||
from psycopg2.sql import Identifier, SQL
|
||||
from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
|
||||
|
||||
LOG = logging.getLogger()
|
||||
@ -191,39 +191,41 @@ class SPImporter():
|
||||
|
||||
def _create_place_classtype_table(self, sql_tablespace, phrase_class, phrase_type):
|
||||
"""
|
||||
Create table place_classtype of the given phrase_class/phrase_type if doesn't exit.
|
||||
Create table place_classtype of the given phrase_class/phrase_type
|
||||
if doesn't exit.
|
||||
"""
|
||||
table_name = _classtype_table(phrase_class, phrase_type)
|
||||
with self.db_connection.cursor() as db_cursor:
|
||||
db_cursor.execute(SQL("""
|
||||
CREATE TABLE IF NOT EXISTS {{}} {}
|
||||
AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex
|
||||
WHERE class = {{}} AND type = {{}}""".format(sql_tablespace))
|
||||
.format(Identifier(table_name), Literal(phrase_class),
|
||||
Literal(phrase_type)))
|
||||
with self.db_connection.cursor() as cur:
|
||||
cur.execute(SQL("""CREATE TABLE IF NOT EXISTS {} {} AS
|
||||
SELECT place_id AS place_id,
|
||||
st_centroid(geometry) AS centroid
|
||||
FROM placex
|
||||
WHERE class = %s AND type = %s
|
||||
""").format(Identifier(table_name), SQL(sql_tablespace)),
|
||||
(phrase_class, phrase_type))
|
||||
|
||||
|
||||
def _create_place_classtype_indexes(self, sql_tablespace, phrase_class, phrase_type):
|
||||
"""
|
||||
Create indexes on centroid and place_id for the place_classtype table.
|
||||
"""
|
||||
index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
|
||||
index_prefix = f'idx_place_classtype_{phrase_class}_{phrase_type}_'
|
||||
base_table = _classtype_table(phrase_class, phrase_type)
|
||||
# Index on centroid
|
||||
if not self.db_connection.index_exists(index_prefix + 'centroid'):
|
||||
with self.db_connection.cursor() as db_cursor:
|
||||
db_cursor.execute(SQL("""
|
||||
CREATE INDEX {{}} ON {{}} USING GIST (centroid) {}""".format(sql_tablespace))
|
||||
db_cursor.execute(SQL("CREATE INDEX {} ON {} USING GIST (centroid) {}")
|
||||
.format(Identifier(index_prefix + 'centroid'),
|
||||
Identifier(base_table)), sql_tablespace)
|
||||
Identifier(base_table),
|
||||
SQL(sql_tablespace)))
|
||||
|
||||
# Index on place_id
|
||||
if not self.db_connection.index_exists(index_prefix + 'place_id'):
|
||||
with self.db_connection.cursor() as db_cursor:
|
||||
db_cursor.execute(SQL(
|
||||
"""CREATE INDEX {{}} ON {{}} USING btree(place_id) {}""".format(sql_tablespace))
|
||||
db_cursor.execute(SQL("CREATE INDEX {} ON {} USING btree(place_id) {}")
|
||||
.format(Identifier(index_prefix + 'place_id'),
|
||||
Identifier(base_table)))
|
||||
Identifier(base_table),
|
||||
SQL(sql_tablespace)))
|
||||
|
||||
|
||||
def _grant_access_to_webuser(self, phrase_class, phrase_type):
|
||||
|
@ -21,33 +21,57 @@ from nominatim.indexer.place_info import PlaceInfo
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
def handle_tarfile_or_directory(data_dir):
|
||||
""" Handles tarfile or directory for importing tiger data
|
||||
class TigerInput:
|
||||
""" Context manager that goes through Tiger input files which may
|
||||
either be in a directory or gzipped together in a tar file.
|
||||
"""
|
||||
|
||||
tar = None
|
||||
if data_dir.endswith('.tar.gz'):
|
||||
try:
|
||||
tar = tarfile.open(data_dir)
|
||||
except tarfile.ReadError as err:
|
||||
LOG.fatal("Cannot open '%s'. Is this a tar file?", data_dir)
|
||||
raise UsageError("Cannot open Tiger data file.") from err
|
||||
def __init__(self, data_dir):
|
||||
self.tar_handle = None
|
||||
self.files = []
|
||||
|
||||
csv_files = [i for i in tar.getmembers() if i.name.endswith('.csv')]
|
||||
LOG.warning("Found %d CSV files in tarfile with path %s", len(csv_files), data_dir)
|
||||
if not csv_files:
|
||||
LOG.warning("Tiger data import selected but no files in tarfile's path %s", data_dir)
|
||||
return None, None
|
||||
else:
|
||||
files = os.listdir(data_dir)
|
||||
csv_files = [os.path.join(data_dir, i) for i in files if i.endswith('.csv')]
|
||||
LOG.warning("Found %d CSV files in path %s", len(csv_files), data_dir)
|
||||
if not csv_files:
|
||||
LOG.warning("Tiger data import selected but no files found in path %s", data_dir)
|
||||
return None, None
|
||||
if data_dir.endswith('.tar.gz'):
|
||||
try:
|
||||
self.tar_handle = tarfile.open(data_dir) # pylint: disable=consider-using-with
|
||||
except tarfile.ReadError as err:
|
||||
LOG.fatal("Cannot open '%s'. Is this a tar file?", data_dir)
|
||||
raise UsageError("Cannot open Tiger data file.") from err
|
||||
|
||||
return csv_files, tar
|
||||
self.files = [i for i in self.tar_handle.getmembers() if i.name.endswith('.csv')]
|
||||
LOG.warning("Found %d CSV files in tarfile with path %s", len(self.files), data_dir)
|
||||
else:
|
||||
files = os.listdir(data_dir)
|
||||
self.files = [os.path.join(data_dir, i) for i in files if i.endswith('.csv')]
|
||||
LOG.warning("Found %d CSV files in path %s", len(self.files), data_dir)
|
||||
|
||||
if not self.files:
|
||||
LOG.warning("Tiger data import selected but no files found at %s", data_dir)
|
||||
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
if self.tar_handle:
|
||||
self.tar_handle.close()
|
||||
self.tar_handle = None
|
||||
|
||||
|
||||
def next_file(self):
|
||||
""" Return a file handle to the next file to be processed.
|
||||
Raises an IndexError if there is no file left.
|
||||
"""
|
||||
fname = self.files.pop(0)
|
||||
|
||||
if self.tar_handle is not None:
|
||||
return io.TextIOWrapper(self.tar_handle.extractfile(fname))
|
||||
|
||||
return open(fname, encoding='utf-8')
|
||||
|
||||
|
||||
def __len__(self):
|
||||
return len(self.files)
|
||||
|
||||
|
||||
def handle_threaded_sql_statements(pool, fd, analyzer):
|
||||
@ -79,34 +103,27 @@ def add_tiger_data(data_dir, config, threads, tokenizer):
|
||||
""" Import tiger data from directory or tar file `data dir`.
|
||||
"""
|
||||
dsn = config.get_libpq_dsn()
|
||||
files, tar = handle_tarfile_or_directory(data_dir)
|
||||
|
||||
if not files:
|
||||
return
|
||||
with TigerInput(data_dir) as tar:
|
||||
if not tar:
|
||||
return
|
||||
|
||||
with connect(dsn) as conn:
|
||||
sql = SQLPreprocessor(conn, config)
|
||||
sql.run_sql_file(conn, 'tiger_import_start.sql')
|
||||
with connect(dsn) as conn:
|
||||
sql = SQLPreprocessor(conn, config)
|
||||
sql.run_sql_file(conn, 'tiger_import_start.sql')
|
||||
|
||||
# Reading files and then for each file line handling
|
||||
# sql_query in <threads - 1> chunks.
|
||||
place_threads = max(1, threads - 1)
|
||||
# Reading files and then for each file line handling
|
||||
# sql_query in <threads - 1> chunks.
|
||||
place_threads = max(1, threads - 1)
|
||||
|
||||
with WorkerPool(dsn, place_threads, ignore_sql_errors=True) as pool:
|
||||
with tokenizer.name_analyzer() as analyzer:
|
||||
for fname in files:
|
||||
if not tar:
|
||||
fd = open(fname)
|
||||
else:
|
||||
fd = io.TextIOWrapper(tar.extractfile(fname))
|
||||
with WorkerPool(dsn, place_threads, ignore_sql_errors=True) as pool:
|
||||
with tokenizer.name_analyzer() as analyzer:
|
||||
while tar:
|
||||
with tar.next_file() as fd:
|
||||
handle_threaded_sql_statements(pool, fd, analyzer)
|
||||
|
||||
handle_threaded_sql_statements(pool, fd, analyzer)
|
||||
print('\n')
|
||||
|
||||
fd.close()
|
||||
|
||||
if tar:
|
||||
tar.close()
|
||||
print('\n')
|
||||
LOG.warning("Creating indexes on Tiger data")
|
||||
with connect(dsn) as conn:
|
||||
sql = SQLPreprocessor(conn, config)
|
||||
|
@ -34,3 +34,11 @@ POSTGIS_REQUIRED_VERSION = (2, 2)
|
||||
# cmake/tool-installed.tmpl is used to build the binary 'nominatim'. Inside
|
||||
# there is a call to set the variable value below.
|
||||
GIT_COMMIT_HASH = None
|
||||
|
||||
|
||||
# pylint: disable=consider-using-f-string
|
||||
def version_str(version=NOMINATIM_VERSION):
|
||||
"""
|
||||
Return a human-readable string of the version.
|
||||
"""
|
||||
return '{}.{}.{}-{}'.format(*version)
|
||||
|
Loading…
Reference in New Issue
Block a user