mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-30 22:09:31 +03:00
Merge pull request #2708 from lonvia/use-format-literals
Assorted fixes for new pylint warnings
This commit is contained in:
commit
a7a5f0161f
@ -11,6 +11,6 @@ ignored-modules=icu,datrie
|
|||||||
# 'with' statements.
|
# 'with' statements.
|
||||||
ignored-classes=NominatimArgs,closing
|
ignored-classes=NominatimArgs,closing
|
||||||
# 'too-many-ancestors' is triggered already by deriving from UserDict
|
# 'too-many-ancestors' is triggered already by deriving from UserDict
|
||||||
disable=too-few-public-methods,duplicate-code,too-many-ancestors
|
disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value,no-self-use
|
||||||
|
|
||||||
good-names=i,x,y,fd,db
|
good-names=i,x,y,fd,db
|
||||||
|
@ -60,9 +60,9 @@ class CommandlineParser:
|
|||||||
def nominatim_version_text():
|
def nominatim_version_text():
|
||||||
""" Program name and version number as string
|
""" Program name and version number as string
|
||||||
"""
|
"""
|
||||||
text = 'Nominatim version %s.%s.%s.%s' % version.NOMINATIM_VERSION
|
text = f'Nominatim version {version.version_str()}'
|
||||||
if version.GIT_COMMIT_HASH is not None:
|
if version.GIT_COMMIT_HASH is not None:
|
||||||
text += ' (%s)' % version.GIT_COMMIT_HASH
|
text += f' ({version.GIT_COMMIT_HASH})'
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def add_subcommand(self, name, cmd):
|
def add_subcommand(self, name, cmd):
|
||||||
|
@ -21,7 +21,7 @@ LOG = logging.getLogger()
|
|||||||
# Do not repeat documentation of subcommand classes.
|
# Do not repeat documentation of subcommand classes.
|
||||||
# pylint: disable=C0111
|
# pylint: disable=C0111
|
||||||
# Using non-top-level imports to make pyosmium optional for replication only.
|
# Using non-top-level imports to make pyosmium optional for replication only.
|
||||||
# pylint: disable=E0012,C0415
|
# pylint: disable=C0415
|
||||||
|
|
||||||
class UpdateReplication:
|
class UpdateReplication:
|
||||||
"""\
|
"""\
|
||||||
@ -96,8 +96,7 @@ class UpdateReplication:
|
|||||||
end = dt.datetime.now(dt.timezone.utc)
|
end = dt.datetime.now(dt.timezone.utc)
|
||||||
LOG.warning("Update completed. Import: %s. %sTotal: %s. Remaining backlog: %s.",
|
LOG.warning("Update completed. Import: %s. %sTotal: %s. Remaining backlog: %s.",
|
||||||
round_time((start_index or end) - start_import),
|
round_time((start_index or end) - start_import),
|
||||||
"Indexing: {} ".format(round_time(end - start_index))
|
f"Indexing: {round_time(end - start_index)} " if start_index else '',
|
||||||
if start_index else '',
|
|
||||||
round_time(end - start_import),
|
round_time(end - start_import),
|
||||||
round_time(end - batchdate))
|
round_time(end - batchdate))
|
||||||
|
|
||||||
|
@ -14,12 +14,12 @@ import psutil
|
|||||||
|
|
||||||
from nominatim.db.connection import connect
|
from nominatim.db.connection import connect
|
||||||
from nominatim.db import status, properties
|
from nominatim.db import status, properties
|
||||||
from nominatim.version import NOMINATIM_VERSION
|
from nominatim.version import version_str
|
||||||
|
|
||||||
# Do not repeat documentation of subcommand classes.
|
# Do not repeat documentation of subcommand classes.
|
||||||
# pylint: disable=C0111
|
# pylint: disable=C0111
|
||||||
# Using non-top-level imports to avoid eventually unused imports.
|
# Using non-top-level imports to avoid eventually unused imports.
|
||||||
# pylint: disable=E0012,C0415
|
# pylint: disable=C0415
|
||||||
|
|
||||||
LOG = logging.getLogger()
|
LOG = logging.getLogger()
|
||||||
|
|
||||||
@ -194,10 +194,10 @@ class SetupAll:
|
|||||||
LOG.warning('Creating support index')
|
LOG.warning('Creating support index')
|
||||||
if tablespace:
|
if tablespace:
|
||||||
tablespace = 'TABLESPACE ' + tablespace
|
tablespace = 'TABLESPACE ' + tablespace
|
||||||
cur.execute("""CREATE INDEX idx_placex_pendingsector
|
cur.execute(f"""CREATE INDEX idx_placex_pendingsector
|
||||||
ON placex USING BTREE (rank_address,geometry_sector)
|
ON placex USING BTREE (rank_address,geometry_sector)
|
||||||
{} WHERE indexed_status > 0
|
{tablespace} WHERE indexed_status > 0
|
||||||
""".format(tablespace))
|
""")
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
@ -213,5 +213,4 @@ class SetupAll:
|
|||||||
except Exception as exc: # pylint: disable=broad-except
|
except Exception as exc: # pylint: disable=broad-except
|
||||||
LOG.error('Cannot determine date of database: %s', exc)
|
LOG.error('Cannot determine date of database: %s', exc)
|
||||||
|
|
||||||
properties.set_property(conn, 'database_version',
|
properties.set_property(conn, 'database_version', version_str())
|
||||||
'{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
|
|
||||||
|
@ -144,7 +144,7 @@ class Configuration:
|
|||||||
style = self.__getattr__('IMPORT_STYLE')
|
style = self.__getattr__('IMPORT_STYLE')
|
||||||
|
|
||||||
if style in ('admin', 'street', 'address', 'full', 'extratags'):
|
if style in ('admin', 'street', 'address', 'full', 'extratags'):
|
||||||
return self.config_dir / 'import-{}.style'.format(style)
|
return self.config_dir / f'import-{style}.style'
|
||||||
|
|
||||||
return self.find_config_file('', 'IMPORT_STYLE')
|
return self.find_config_file('', 'IMPORT_STYLE')
|
||||||
|
|
||||||
|
@ -25,7 +25,8 @@ class _Cursor(psycopg2.extras.DictCursor):
|
|||||||
execution functions.
|
execution functions.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def execute(self, query, args=None): # pylint: disable=W0221
|
# pylint: disable=arguments-renamed,arguments-differ
|
||||||
|
def execute(self, query, args=None):
|
||||||
""" Query execution that logs the SQL query when debugging is enabled.
|
""" Query execution that logs the SQL query when debugging is enabled.
|
||||||
"""
|
"""
|
||||||
LOG.debug(self.mogrify(query, args).decode('utf-8'))
|
LOG.debug(self.mogrify(query, args).decode('utf-8'))
|
||||||
@ -163,7 +164,7 @@ def connect(dsn):
|
|||||||
ctxmgr.connection = conn
|
ctxmgr.connection = conn
|
||||||
return ctxmgr
|
return ctxmgr
|
||||||
except psycopg2.OperationalError as err:
|
except psycopg2.OperationalError as err:
|
||||||
raise UsageError("Cannot connect to database: {}".format(err)) from err
|
raise UsageError(f"Cannot connect to database: {err}") from err
|
||||||
|
|
||||||
|
|
||||||
# Translation from PG connection string parameters to PG environment variables.
|
# Translation from PG connection string parameters to PG environment variables.
|
||||||
|
@ -39,10 +39,10 @@ def _setup_tablespace_sql(config):
|
|||||||
out = {}
|
out = {}
|
||||||
for subset in ('ADDRESS', 'SEARCH', 'AUX'):
|
for subset in ('ADDRESS', 'SEARCH', 'AUX'):
|
||||||
for kind in ('DATA', 'INDEX'):
|
for kind in ('DATA', 'INDEX'):
|
||||||
tspace = getattr(config, 'TABLESPACE_{}_{}'.format(subset, kind))
|
tspace = getattr(config, f'TABLESPACE_{subset}_{kind}')
|
||||||
if tspace:
|
if tspace:
|
||||||
tspace = 'TABLESPACE "{}"'.format(tspace)
|
tspace = f'TABLESPACE "{tspace}"'
|
||||||
out['{}_{}'.format(subset.lower(), kind.lower())] = tspace
|
out[f'{subset.lower()}_{kind.lower()}'] = tspace
|
||||||
|
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
@ -34,7 +34,7 @@ def compute_database_date(conn):
|
|||||||
|
|
||||||
LOG.info("Using node id %d for timestamp lookup", osmid)
|
LOG.info("Using node id %d for timestamp lookup", osmid)
|
||||||
# Get the node from the API to find the timestamp when it was created.
|
# Get the node from the API to find the timestamp when it was created.
|
||||||
node_url = 'https://www.openstreetmap.org/api/0.6/node/{}/1'.format(osmid)
|
node_url = f'https://www.openstreetmap.org/api/0.6/node/{osmid}/1'
|
||||||
data = get_url(node_url)
|
data = get_url(node_url)
|
||||||
|
|
||||||
match = re.search(r'timestamp="((\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}))Z"', data)
|
match = re.search(r'timestamp="((\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}))Z"', data)
|
||||||
|
@ -40,36 +40,36 @@ def execute_file(dsn, fname, ignore_errors=False, pre_code=None, post_code=None)
|
|||||||
cmd.extend(('-v', 'ON_ERROR_STOP=1'))
|
cmd.extend(('-v', 'ON_ERROR_STOP=1'))
|
||||||
if not LOG.isEnabledFor(logging.INFO):
|
if not LOG.isEnabledFor(logging.INFO):
|
||||||
cmd.append('--quiet')
|
cmd.append('--quiet')
|
||||||
proc = subprocess.Popen(cmd, env=get_pg_env(dsn), stdin=subprocess.PIPE)
|
|
||||||
|
|
||||||
try:
|
with subprocess.Popen(cmd, env=get_pg_env(dsn), stdin=subprocess.PIPE) as proc:
|
||||||
if not LOG.isEnabledFor(logging.INFO):
|
try:
|
||||||
proc.stdin.write('set client_min_messages to WARNING;'.encode('utf-8'))
|
if not LOG.isEnabledFor(logging.INFO):
|
||||||
|
proc.stdin.write('set client_min_messages to WARNING;'.encode('utf-8'))
|
||||||
|
|
||||||
if pre_code:
|
if pre_code:
|
||||||
proc.stdin.write((pre_code + ';').encode('utf-8'))
|
proc.stdin.write((pre_code + ';').encode('utf-8'))
|
||||||
|
|
||||||
if fname.suffix == '.gz':
|
if fname.suffix == '.gz':
|
||||||
with gzip.open(str(fname), 'rb') as fdesc:
|
with gzip.open(str(fname), 'rb') as fdesc:
|
||||||
remain = _pipe_to_proc(proc, fdesc)
|
remain = _pipe_to_proc(proc, fdesc)
|
||||||
else:
|
else:
|
||||||
with fname.open('rb') as fdesc:
|
with fname.open('rb') as fdesc:
|
||||||
remain = _pipe_to_proc(proc, fdesc)
|
remain = _pipe_to_proc(proc, fdesc)
|
||||||
|
|
||||||
if remain == 0 and post_code:
|
if remain == 0 and post_code:
|
||||||
proc.stdin.write((';' + post_code).encode('utf-8'))
|
proc.stdin.write((';' + post_code).encode('utf-8'))
|
||||||
finally:
|
finally:
|
||||||
proc.stdin.close()
|
proc.stdin.close()
|
||||||
ret = proc.wait()
|
ret = proc.wait()
|
||||||
|
|
||||||
if ret != 0 or remain > 0:
|
if ret != 0 or remain > 0:
|
||||||
raise UsageError("Failed to execute SQL file.")
|
raise UsageError("Failed to execute SQL file.")
|
||||||
|
|
||||||
|
|
||||||
# List of characters that need to be quoted for the copy command.
|
# List of characters that need to be quoted for the copy command.
|
||||||
_SQL_TRANSLATION = {ord(u'\\'): u'\\\\',
|
_SQL_TRANSLATION = {ord('\\'): '\\\\',
|
||||||
ord(u'\t'): u'\\t',
|
ord('\t'): '\\t',
|
||||||
ord(u'\n'): u'\\n'}
|
ord('\n'): '\\n'}
|
||||||
|
|
||||||
|
|
||||||
class CopyBuffer:
|
class CopyBuffer:
|
||||||
|
@ -66,7 +66,7 @@ class RankRunner(AbstractPlacexRunner):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def name(self):
|
def name(self):
|
||||||
return "rank {}".format(self.rank)
|
return f"rank {self.rank}"
|
||||||
|
|
||||||
def sql_count_objects(self):
|
def sql_count_objects(self):
|
||||||
return pysql.SQL("""SELECT count(*) FROM placex
|
return pysql.SQL("""SELECT count(*) FROM placex
|
||||||
@ -86,7 +86,7 @@ class BoundaryRunner(AbstractPlacexRunner):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def name(self):
|
def name(self):
|
||||||
return "boundaries rank {}".format(self.rank)
|
return f"boundaries rank {self.rank}"
|
||||||
|
|
||||||
def sql_count_objects(self):
|
def sql_count_objects(self):
|
||||||
return pysql.SQL("""SELECT count(*) FROM placex
|
return pysql.SQL("""SELECT count(*) FROM placex
|
||||||
|
@ -278,8 +278,7 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
|||||||
+ [(k, v, part_ids.get(v, None)) for k, v in partial_tokens.items()]
|
+ [(k, v, part_ids.get(v, None)) for k, v in partial_tokens.items()]
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
def normalize_postcode(self, postcode):
|
||||||
def normalize_postcode(postcode):
|
|
||||||
""" Convert the postcode to a standardized form.
|
""" Convert the postcode to a standardized form.
|
||||||
|
|
||||||
This function must yield exactly the same result as the SQL function
|
This function must yield exactly the same result as the SQL function
|
||||||
|
@ -74,10 +74,10 @@ def _check_module(module_dir, conn):
|
|||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
try:
|
try:
|
||||||
cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
|
cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
|
||||||
RETURNS text AS '{}/nominatim.so', 'transliteration'
|
RETURNS text AS %s, 'transliteration'
|
||||||
LANGUAGE c IMMUTABLE STRICT;
|
LANGUAGE c IMMUTABLE STRICT;
|
||||||
DROP FUNCTION nominatim_test_import_func(text)
|
DROP FUNCTION nominatim_test_import_func(text)
|
||||||
""".format(module_dir))
|
""", (f'{module_dir}/nominatim.so', ))
|
||||||
except psycopg2.DatabaseError as err:
|
except psycopg2.DatabaseError as err:
|
||||||
LOG.fatal("Error accessing database module: %s", err)
|
LOG.fatal("Error accessing database module: %s", err)
|
||||||
raise UsageError("Database module cannot be accessed.") from err
|
raise UsageError("Database module cannot be accessed.") from err
|
||||||
@ -250,12 +250,12 @@ class LegacyTokenizer(AbstractTokenizer):
|
|||||||
php_file = self.data_dir / "tokenizer.php"
|
php_file = self.data_dir / "tokenizer.php"
|
||||||
|
|
||||||
if not php_file.exists() or overwrite:
|
if not php_file.exists() or overwrite:
|
||||||
php_file.write_text(dedent("""\
|
php_file.write_text(dedent(f"""\
|
||||||
<?php
|
<?php
|
||||||
@define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
|
@define('CONST_Max_Word_Frequency', {config.MAX_WORD_FREQUENCY});
|
||||||
@define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
|
@define('CONST_Term_Normalization_Rules', "{config.TERM_NORMALIZATION}");
|
||||||
require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
|
require_once('{config.lib_dir.php}/tokenizer/legacy_tokenizer.php');
|
||||||
""".format(config)), encoding='utf-8')
|
"""), encoding='utf-8')
|
||||||
|
|
||||||
|
|
||||||
def _init_db_tables(self, config):
|
def _init_db_tables(self, config):
|
||||||
@ -337,8 +337,7 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
|
|||||||
return self.normalizer.transliterate(phrase)
|
return self.normalizer.transliterate(phrase)
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
def normalize_postcode(self, postcode):
|
||||||
def normalize_postcode(postcode):
|
|
||||||
""" Convert the postcode to a standardized form.
|
""" Convert the postcode to a standardized form.
|
||||||
|
|
||||||
This function must yield exactly the same result as the SQL function
|
This function must yield exactly the same result as the SQL function
|
||||||
|
@ -66,7 +66,7 @@ class _VariantMaker:
|
|||||||
"""
|
"""
|
||||||
parts = re.split(r'(\|)?([=-])>', rule)
|
parts = re.split(r'(\|)?([=-])>', rule)
|
||||||
if len(parts) != 4:
|
if len(parts) != 4:
|
||||||
raise UsageError("Syntax error in variant rule: " + rule)
|
raise UsageError(f"Syntax error in variant rule: {rule}")
|
||||||
|
|
||||||
decompose = parts[1] is None
|
decompose = parts[1] is None
|
||||||
src_terms = [self._parse_variant_word(t) for t in parts[0].split(',')]
|
src_terms = [self._parse_variant_word(t) for t in parts[0].split(',')]
|
||||||
@ -89,7 +89,7 @@ class _VariantMaker:
|
|||||||
name = name.strip()
|
name = name.strip()
|
||||||
match = re.fullmatch(r'([~^]?)([^~$^]*)([~$]?)', name)
|
match = re.fullmatch(r'([~^]?)([^~$^]*)([~$]?)', name)
|
||||||
if match is None or (match.group(1) == '~' and match.group(3) == '~'):
|
if match is None or (match.group(1) == '~' and match.group(3) == '~'):
|
||||||
raise UsageError("Invalid variant word descriptor '{}'".format(name))
|
raise UsageError(f"Invalid variant word descriptor '{name}'")
|
||||||
norm_name = self.norm.transliterate(match.group(2)).strip()
|
norm_name = self.norm.transliterate(match.group(2)).strip()
|
||||||
if not norm_name:
|
if not norm_name:
|
||||||
return None
|
return None
|
||||||
|
@ -234,7 +234,7 @@ def create_search_indices(conn, config, drop=False):
|
|||||||
bad_indices = [row[0] for row in list(cur)]
|
bad_indices = [row[0] for row in list(cur)]
|
||||||
for idx in bad_indices:
|
for idx in bad_indices:
|
||||||
LOG.info("Drop invalid index %s.", idx)
|
LOG.info("Drop invalid index %s.", idx)
|
||||||
cur.execute('DROP INDEX "{}"'.format(idx))
|
cur.execute(pysql.SQL('DROP INDEX {}').format(pysql.Identifier(idx)))
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
sql = SQLPreprocessor(conn, config)
|
sql = SQLPreprocessor(conn, config)
|
||||||
|
@ -12,7 +12,7 @@ import subprocess
|
|||||||
import urllib.request as urlrequest
|
import urllib.request as urlrequest
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
from nominatim.version import NOMINATIM_VERSION
|
from nominatim.version import version_str
|
||||||
from nominatim.db.connection import get_pg_env
|
from nominatim.db.connection import get_pg_env
|
||||||
|
|
||||||
LOG = logging.getLogger()
|
LOG = logging.getLogger()
|
||||||
@ -55,10 +55,10 @@ def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None,
|
|||||||
query_string = urlencode(params or {})
|
query_string = urlencode(params or {})
|
||||||
|
|
||||||
env = dict(QUERY_STRING=query_string,
|
env = dict(QUERY_STRING=query_string,
|
||||||
SCRIPT_NAME='/{}.php'.format(endpoint),
|
SCRIPT_NAME=f'/{endpoint}.php',
|
||||||
REQUEST_URI='/{}.php?{}'.format(endpoint, query_string),
|
REQUEST_URI=f'/{endpoint}.php?{query_string}',
|
||||||
CONTEXT_DOCUMENT_ROOT=webdir,
|
CONTEXT_DOCUMENT_ROOT=webdir,
|
||||||
SCRIPT_FILENAME='{}/{}.php'.format(webdir, endpoint),
|
SCRIPT_FILENAME=f'{webdir}/{endpoint}.php',
|
||||||
HTTP_HOST='localhost',
|
HTTP_HOST='localhost',
|
||||||
HTTP_USER_AGENT='nominatim-tool',
|
HTTP_USER_AGENT='nominatim-tool',
|
||||||
REMOTE_ADDR='0.0.0.0',
|
REMOTE_ADDR='0.0.0.0',
|
||||||
@ -150,7 +150,7 @@ def run_osm2pgsql(options):
|
|||||||
def get_url(url):
|
def get_url(url):
|
||||||
""" Get the contents from the given URL and return it as a UTF-8 string.
|
""" Get the contents from the given URL and return it as a UTF-8 string.
|
||||||
"""
|
"""
|
||||||
headers = {"User-Agent": "Nominatim/{0[0]}.{0[1]}.{0[2]}-{0[3]}".format(NOMINATIM_VERSION)}
|
headers = {"User-Agent": f"Nominatim/{version_str()}"}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response:
|
with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response:
|
||||||
|
@ -9,9 +9,11 @@ Functions for database migration to newer software versions.
|
|||||||
"""
|
"""
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from psycopg2 import sql as pysql
|
||||||
|
|
||||||
from nominatim.db import properties
|
from nominatim.db import properties
|
||||||
from nominatim.db.connection import connect
|
from nominatim.db.connection import connect
|
||||||
from nominatim.version import NOMINATIM_VERSION
|
from nominatim.version import NOMINATIM_VERSION, version_str
|
||||||
from nominatim.tools import refresh
|
from nominatim.tools import refresh
|
||||||
from nominatim.tokenizer import factory as tokenizer_factory
|
from nominatim.tokenizer import factory as tokenizer_factory
|
||||||
from nominatim.errors import UsageError
|
from nominatim.errors import UsageError
|
||||||
@ -47,7 +49,7 @@ def migrate(config, paths):
|
|||||||
for version, func in _MIGRATION_FUNCTIONS:
|
for version, func in _MIGRATION_FUNCTIONS:
|
||||||
if db_version <= version:
|
if db_version <= version:
|
||||||
LOG.warning("Runnning: %s (%s)", func.__doc__.split('\n', 1)[0],
|
LOG.warning("Runnning: %s (%s)", func.__doc__.split('\n', 1)[0],
|
||||||
'{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(version))
|
version_str(version))
|
||||||
kwargs = dict(conn=conn, config=config, paths=paths)
|
kwargs = dict(conn=conn, config=config, paths=paths)
|
||||||
func(**kwargs)
|
func(**kwargs)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
@ -59,8 +61,7 @@ def migrate(config, paths):
|
|||||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
|
tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
|
||||||
tokenizer.update_sql_functions(config)
|
tokenizer.update_sql_functions(config)
|
||||||
|
|
||||||
properties.set_property(conn, 'database_version',
|
properties.set_property(conn, 'database_version', version_str())
|
||||||
'{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
|
|
||||||
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
@ -125,11 +126,11 @@ def add_nominatim_property_table(conn, config, **_):
|
|||||||
"""
|
"""
|
||||||
if not conn.table_exists('nominatim_properties'):
|
if not conn.table_exists('nominatim_properties'):
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.execute("""CREATE TABLE nominatim_properties (
|
cur.execute(pysql.SQL("""CREATE TABLE nominatim_properties (
|
||||||
property TEXT,
|
property TEXT,
|
||||||
value TEXT);
|
value TEXT);
|
||||||
GRANT SELECT ON TABLE nominatim_properties TO "{}";
|
GRANT SELECT ON TABLE nominatim_properties TO {};
|
||||||
""".format(config.DATABASE_WEBUSER))
|
""").format(pysql.Identifier(config.DATABASE_WEBUSER)))
|
||||||
|
|
||||||
@_migration(3, 6, 0, 0)
|
@_migration(3, 6, 0, 0)
|
||||||
def change_housenumber_transliteration(conn, **_):
|
def change_housenumber_transliteration(conn, **_):
|
||||||
@ -194,7 +195,8 @@ def install_legacy_tokenizer(conn, config, **_):
|
|||||||
and column_name = 'token_info'""",
|
and column_name = 'token_info'""",
|
||||||
(table, ))
|
(table, ))
|
||||||
if has_column == 0:
|
if has_column == 0:
|
||||||
cur.execute('ALTER TABLE {} ADD COLUMN token_info JSONB'.format(table))
|
cur.execute(pysql.SQL('ALTER TABLE {} ADD COLUMN token_info JSONB')
|
||||||
|
.format(pysql.Identifier(table)))
|
||||||
tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False,
|
tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False,
|
||||||
module_name='legacy')
|
module_name='legacy')
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ class _CountryPostcodesCollector:
|
|||||||
|
|
||||||
def __init__(self, country):
|
def __init__(self, country):
|
||||||
self.country = country
|
self.country = country
|
||||||
self.collected = dict()
|
self.collected = {}
|
||||||
|
|
||||||
|
|
||||||
def add(self, postcode, x, y):
|
def add(self, postcode, x, y):
|
||||||
@ -136,13 +136,13 @@ class _CountryPostcodesCollector:
|
|||||||
|
|
||||||
|
|
||||||
def _open_external(self, project_dir):
|
def _open_external(self, project_dir):
|
||||||
fname = project_dir / '{}_postcodes.csv'.format(self.country)
|
fname = project_dir / f'{self.country}_postcodes.csv'
|
||||||
|
|
||||||
if fname.is_file():
|
if fname.is_file():
|
||||||
LOG.info("Using external postcode file '%s'.", fname)
|
LOG.info("Using external postcode file '%s'.", fname)
|
||||||
return open(fname, 'r')
|
return open(fname, 'r', encoding='utf-8')
|
||||||
|
|
||||||
fname = project_dir / '{}_postcodes.csv.gz'.format(self.country)
|
fname = project_dir / f'{self.country}_postcodes.csv.gz'
|
||||||
|
|
||||||
if fname.is_file():
|
if fname.is_file():
|
||||||
LOG.info("Using external postcode file '%s'.", fname)
|
LOG.info("Using external postcode file '%s'.", fname)
|
||||||
|
@ -15,7 +15,7 @@ from psycopg2 import sql as pysql
|
|||||||
|
|
||||||
from nominatim.db.utils import execute_file
|
from nominatim.db.utils import execute_file
|
||||||
from nominatim.db.sql_preprocessor import SQLPreprocessor
|
from nominatim.db.sql_preprocessor import SQLPreprocessor
|
||||||
from nominatim.version import NOMINATIM_VERSION
|
from nominatim.version import version_str
|
||||||
|
|
||||||
LOG = logging.getLogger()
|
LOG = logging.getLogger()
|
||||||
|
|
||||||
@ -52,16 +52,19 @@ def load_address_levels(conn, table, levels):
|
|||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.drop_table(table)
|
cur.drop_table(table)
|
||||||
|
|
||||||
cur.execute("""CREATE TABLE {} (country_code varchar(2),
|
cur.execute(pysql.SQL("""CREATE TABLE {} (
|
||||||
|
country_code varchar(2),
|
||||||
class TEXT,
|
class TEXT,
|
||||||
type TEXT,
|
type TEXT,
|
||||||
rank_search SMALLINT,
|
rank_search SMALLINT,
|
||||||
rank_address SMALLINT)""".format(table))
|
rank_address SMALLINT)
|
||||||
|
""").format(pysql.Identifier(table)))
|
||||||
|
|
||||||
cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s")
|
cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s")
|
||||||
.format(pysql.Identifier(table)), rows)
|
.format(pysql.Identifier(table)), rows)
|
||||||
|
|
||||||
cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table))
|
cur.execute(pysql.SQL('CREATE UNIQUE INDEX ON {} (country_code, class, type)')
|
||||||
|
.format(pysql.Identifier(table)))
|
||||||
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
@ -186,16 +189,15 @@ def setup_website(basedir, config, conn):
|
|||||||
LOG.info('Creating website directory.')
|
LOG.info('Creating website directory.')
|
||||||
basedir.mkdir()
|
basedir.mkdir()
|
||||||
|
|
||||||
template = dedent("""\
|
template = dedent(f"""\
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
@define('CONST_Debug', $_GET['debug'] ?? false);
|
@define('CONST_Debug', $_GET['debug'] ?? false);
|
||||||
@define('CONST_LibDir', '{0}');
|
@define('CONST_LibDir', '{config.lib_dir.php}');
|
||||||
@define('CONST_TokenizerDir', '{2}');
|
@define('CONST_TokenizerDir', '{config.project_dir / 'tokenizer'}');
|
||||||
@define('CONST_NominatimVersion', '{1[0]}.{1[1]}.{1[2]}-{1[3]}');
|
@define('CONST_NominatimVersion', '{version_str()}');
|
||||||
|
|
||||||
""".format(config.lib_dir.php, NOMINATIM_VERSION,
|
""")
|
||||||
config.project_dir / 'tokenizer'))
|
|
||||||
|
|
||||||
for php_name, conf_name, var_type in PHP_CONST_DEFS:
|
for php_name, conf_name, var_type in PHP_CONST_DEFS:
|
||||||
varout = _quote_php_variable(var_type, config, conf_name)
|
varout = _quote_php_variable(var_type, config, conf_name)
|
||||||
|
@ -39,8 +39,8 @@ class SPCsvLoader(Iterator):
|
|||||||
"""
|
"""
|
||||||
phrases = set()
|
phrases = set()
|
||||||
|
|
||||||
with open(self.csv_path) as file:
|
with open(self.csv_path, encoding='utf-8') as fd:
|
||||||
reader = csv.DictReader(file, delimiter=',')
|
reader = csv.DictReader(fd, delimiter=',')
|
||||||
for row in reader:
|
for row in reader:
|
||||||
phrases.add(
|
phrases.add(
|
||||||
SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
|
SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
|
||||||
@ -54,4 +54,4 @@ class SPCsvLoader(Iterator):
|
|||||||
_, extension = os.path.splitext(self.csv_path)
|
_, extension = os.path.splitext(self.csv_path)
|
||||||
|
|
||||||
if extension != '.csv':
|
if extension != '.csv':
|
||||||
raise UsageError('The file {} is not a csv file.'.format(self.csv_path))
|
raise UsageError(f'The file {self.csv_path} is not a csv file.')
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from psycopg2.sql import Identifier, Literal, SQL
|
from psycopg2.sql import Identifier, SQL
|
||||||
from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
|
from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
|
||||||
|
|
||||||
LOG = logging.getLogger()
|
LOG = logging.getLogger()
|
||||||
@ -191,39 +191,41 @@ class SPImporter():
|
|||||||
|
|
||||||
def _create_place_classtype_table(self, sql_tablespace, phrase_class, phrase_type):
|
def _create_place_classtype_table(self, sql_tablespace, phrase_class, phrase_type):
|
||||||
"""
|
"""
|
||||||
Create table place_classtype of the given phrase_class/phrase_type if doesn't exit.
|
Create table place_classtype of the given phrase_class/phrase_type
|
||||||
|
if doesn't exit.
|
||||||
"""
|
"""
|
||||||
table_name = _classtype_table(phrase_class, phrase_type)
|
table_name = _classtype_table(phrase_class, phrase_type)
|
||||||
with self.db_connection.cursor() as db_cursor:
|
with self.db_connection.cursor() as cur:
|
||||||
db_cursor.execute(SQL("""
|
cur.execute(SQL("""CREATE TABLE IF NOT EXISTS {} {} AS
|
||||||
CREATE TABLE IF NOT EXISTS {{}} {}
|
SELECT place_id AS place_id,
|
||||||
AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex
|
st_centroid(geometry) AS centroid
|
||||||
WHERE class = {{}} AND type = {{}}""".format(sql_tablespace))
|
FROM placex
|
||||||
.format(Identifier(table_name), Literal(phrase_class),
|
WHERE class = %s AND type = %s
|
||||||
Literal(phrase_type)))
|
""").format(Identifier(table_name), SQL(sql_tablespace)),
|
||||||
|
(phrase_class, phrase_type))
|
||||||
|
|
||||||
|
|
||||||
def _create_place_classtype_indexes(self, sql_tablespace, phrase_class, phrase_type):
|
def _create_place_classtype_indexes(self, sql_tablespace, phrase_class, phrase_type):
|
||||||
"""
|
"""
|
||||||
Create indexes on centroid and place_id for the place_classtype table.
|
Create indexes on centroid and place_id for the place_classtype table.
|
||||||
"""
|
"""
|
||||||
index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
|
index_prefix = f'idx_place_classtype_{phrase_class}_{phrase_type}_'
|
||||||
base_table = _classtype_table(phrase_class, phrase_type)
|
base_table = _classtype_table(phrase_class, phrase_type)
|
||||||
# Index on centroid
|
# Index on centroid
|
||||||
if not self.db_connection.index_exists(index_prefix + 'centroid'):
|
if not self.db_connection.index_exists(index_prefix + 'centroid'):
|
||||||
with self.db_connection.cursor() as db_cursor:
|
with self.db_connection.cursor() as db_cursor:
|
||||||
db_cursor.execute(SQL("""
|
db_cursor.execute(SQL("CREATE INDEX {} ON {} USING GIST (centroid) {}")
|
||||||
CREATE INDEX {{}} ON {{}} USING GIST (centroid) {}""".format(sql_tablespace))
|
|
||||||
.format(Identifier(index_prefix + 'centroid'),
|
.format(Identifier(index_prefix + 'centroid'),
|
||||||
Identifier(base_table)), sql_tablespace)
|
Identifier(base_table),
|
||||||
|
SQL(sql_tablespace)))
|
||||||
|
|
||||||
# Index on place_id
|
# Index on place_id
|
||||||
if not self.db_connection.index_exists(index_prefix + 'place_id'):
|
if not self.db_connection.index_exists(index_prefix + 'place_id'):
|
||||||
with self.db_connection.cursor() as db_cursor:
|
with self.db_connection.cursor() as db_cursor:
|
||||||
db_cursor.execute(SQL(
|
db_cursor.execute(SQL("CREATE INDEX {} ON {} USING btree(place_id) {}")
|
||||||
"""CREATE INDEX {{}} ON {{}} USING btree(place_id) {}""".format(sql_tablespace))
|
|
||||||
.format(Identifier(index_prefix + 'place_id'),
|
.format(Identifier(index_prefix + 'place_id'),
|
||||||
Identifier(base_table)))
|
Identifier(base_table),
|
||||||
|
SQL(sql_tablespace)))
|
||||||
|
|
||||||
|
|
||||||
def _grant_access_to_webuser(self, phrase_class, phrase_type):
|
def _grant_access_to_webuser(self, phrase_class, phrase_type):
|
||||||
|
@ -21,33 +21,57 @@ from nominatim.indexer.place_info import PlaceInfo
|
|||||||
|
|
||||||
LOG = logging.getLogger()
|
LOG = logging.getLogger()
|
||||||
|
|
||||||
|
class TigerInput:
|
||||||
def handle_tarfile_or_directory(data_dir):
|
""" Context manager that goes through Tiger input files which may
|
||||||
""" Handles tarfile or directory for importing tiger data
|
either be in a directory or gzipped together in a tar file.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tar = None
|
def __init__(self, data_dir):
|
||||||
if data_dir.endswith('.tar.gz'):
|
self.tar_handle = None
|
||||||
try:
|
self.files = []
|
||||||
tar = tarfile.open(data_dir)
|
|
||||||
except tarfile.ReadError as err:
|
|
||||||
LOG.fatal("Cannot open '%s'. Is this a tar file?", data_dir)
|
|
||||||
raise UsageError("Cannot open Tiger data file.") from err
|
|
||||||
|
|
||||||
csv_files = [i for i in tar.getmembers() if i.name.endswith('.csv')]
|
if data_dir.endswith('.tar.gz'):
|
||||||
LOG.warning("Found %d CSV files in tarfile with path %s", len(csv_files), data_dir)
|
try:
|
||||||
if not csv_files:
|
self.tar_handle = tarfile.open(data_dir) # pylint: disable=consider-using-with
|
||||||
LOG.warning("Tiger data import selected but no files in tarfile's path %s", data_dir)
|
except tarfile.ReadError as err:
|
||||||
return None, None
|
LOG.fatal("Cannot open '%s'. Is this a tar file?", data_dir)
|
||||||
else:
|
raise UsageError("Cannot open Tiger data file.") from err
|
||||||
files = os.listdir(data_dir)
|
|
||||||
csv_files = [os.path.join(data_dir, i) for i in files if i.endswith('.csv')]
|
|
||||||
LOG.warning("Found %d CSV files in path %s", len(csv_files), data_dir)
|
|
||||||
if not csv_files:
|
|
||||||
LOG.warning("Tiger data import selected but no files found in path %s", data_dir)
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
return csv_files, tar
|
self.files = [i for i in self.tar_handle.getmembers() if i.name.endswith('.csv')]
|
||||||
|
LOG.warning("Found %d CSV files in tarfile with path %s", len(self.files), data_dir)
|
||||||
|
else:
|
||||||
|
files = os.listdir(data_dir)
|
||||||
|
self.files = [os.path.join(data_dir, i) for i in files if i.endswith('.csv')]
|
||||||
|
LOG.warning("Found %d CSV files in path %s", len(self.files), data_dir)
|
||||||
|
|
||||||
|
if not self.files:
|
||||||
|
LOG.warning("Tiger data import selected but no files found at %s", data_dir)
|
||||||
|
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
if self.tar_handle:
|
||||||
|
self.tar_handle.close()
|
||||||
|
self.tar_handle = None
|
||||||
|
|
||||||
|
|
||||||
|
def next_file(self):
|
||||||
|
""" Return a file handle to the next file to be processed.
|
||||||
|
Raises an IndexError if there is no file left.
|
||||||
|
"""
|
||||||
|
fname = self.files.pop(0)
|
||||||
|
|
||||||
|
if self.tar_handle is not None:
|
||||||
|
return io.TextIOWrapper(self.tar_handle.extractfile(fname))
|
||||||
|
|
||||||
|
return open(fname, encoding='utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.files)
|
||||||
|
|
||||||
|
|
||||||
def handle_threaded_sql_statements(pool, fd, analyzer):
|
def handle_threaded_sql_statements(pool, fd, analyzer):
|
||||||
@ -79,34 +103,27 @@ def add_tiger_data(data_dir, config, threads, tokenizer):
|
|||||||
""" Import tiger data from directory or tar file `data dir`.
|
""" Import tiger data from directory or tar file `data dir`.
|
||||||
"""
|
"""
|
||||||
dsn = config.get_libpq_dsn()
|
dsn = config.get_libpq_dsn()
|
||||||
files, tar = handle_tarfile_or_directory(data_dir)
|
|
||||||
|
|
||||||
if not files:
|
with TigerInput(data_dir) as tar:
|
||||||
return
|
if not tar:
|
||||||
|
return
|
||||||
|
|
||||||
with connect(dsn) as conn:
|
with connect(dsn) as conn:
|
||||||
sql = SQLPreprocessor(conn, config)
|
sql = SQLPreprocessor(conn, config)
|
||||||
sql.run_sql_file(conn, 'tiger_import_start.sql')
|
sql.run_sql_file(conn, 'tiger_import_start.sql')
|
||||||
|
|
||||||
# Reading files and then for each file line handling
|
# Reading files and then for each file line handling
|
||||||
# sql_query in <threads - 1> chunks.
|
# sql_query in <threads - 1> chunks.
|
||||||
place_threads = max(1, threads - 1)
|
place_threads = max(1, threads - 1)
|
||||||
|
|
||||||
with WorkerPool(dsn, place_threads, ignore_sql_errors=True) as pool:
|
with WorkerPool(dsn, place_threads, ignore_sql_errors=True) as pool:
|
||||||
with tokenizer.name_analyzer() as analyzer:
|
with tokenizer.name_analyzer() as analyzer:
|
||||||
for fname in files:
|
while tar:
|
||||||
if not tar:
|
with tar.next_file() as fd:
|
||||||
fd = open(fname)
|
handle_threaded_sql_statements(pool, fd, analyzer)
|
||||||
else:
|
|
||||||
fd = io.TextIOWrapper(tar.extractfile(fname))
|
|
||||||
|
|
||||||
handle_threaded_sql_statements(pool, fd, analyzer)
|
print('\n')
|
||||||
|
|
||||||
fd.close()
|
|
||||||
|
|
||||||
if tar:
|
|
||||||
tar.close()
|
|
||||||
print('\n')
|
|
||||||
LOG.warning("Creating indexes on Tiger data")
|
LOG.warning("Creating indexes on Tiger data")
|
||||||
with connect(dsn) as conn:
|
with connect(dsn) as conn:
|
||||||
sql = SQLPreprocessor(conn, config)
|
sql = SQLPreprocessor(conn, config)
|
||||||
|
@ -34,3 +34,11 @@ POSTGIS_REQUIRED_VERSION = (2, 2)
|
|||||||
# cmake/tool-installed.tmpl is used to build the binary 'nominatim'. Inside
|
# cmake/tool-installed.tmpl is used to build the binary 'nominatim'. Inside
|
||||||
# there is a call to set the variable value below.
|
# there is a call to set the variable value below.
|
||||||
GIT_COMMIT_HASH = None
|
GIT_COMMIT_HASH = None
|
||||||
|
|
||||||
|
|
||||||
|
# pylint: disable=consider-using-f-string
|
||||||
|
def version_str(version=NOMINATIM_VERSION):
|
||||||
|
"""
|
||||||
|
Return a human-readable string of the version.
|
||||||
|
"""
|
||||||
|
return '{}.{}.{}-{}'.format(*version)
|
||||||
|
Loading…
Reference in New Issue
Block a user