From 4e1e166c6a3f4b83c114eb22babca0a4eee3f96e Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Tue, 10 May 2022 23:00:18 +0200
Subject: [PATCH 1/7] add a function to return a formatted version

Replaces the various repeated format strings throughout the code.
---
 .pylintrc                     |  2 +-
 nominatim/cli.py              |  4 ++--
 nominatim/clicmd/setup.py     |  5 ++---
 nominatim/tools/exec_utils.py |  4 ++--
 nominatim/tools/migration.py  |  7 +++----
 nominatim/tools/refresh.py    | 13 ++++++-------
 nominatim/version.py          |  8 ++++++++
 7 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/.pylintrc b/.pylintrc
index d5c4514f..89135138 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -11,6 +11,6 @@ ignored-modules=icu,datrie
 # 'with' statements.
 ignored-classes=NominatimArgs,closing
 # 'too-many-ancestors' is triggered already by deriving from UserDict
-disable=too-few-public-methods,duplicate-code,too-many-ancestors
+disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value
 
 good-names=i,x,y,fd,db
diff --git a/nominatim/cli.py b/nominatim/cli.py
index 01eb6119..f911023b 100644
--- a/nominatim/cli.py
+++ b/nominatim/cli.py
@@ -60,9 +60,9 @@ class CommandlineParser:
     def nominatim_version_text():
         """ Program name and version number as string
         """
-        text = 'Nominatim version %s.%s.%s.%s' % version.NOMINATIM_VERSION
+        text = f'Nominatim version {version.version_str()}'
         if version.GIT_COMMIT_HASH is not None:
-            text += ' (%s)' % version.GIT_COMMIT_HASH
+            text += f' ({version.GIT_COMMIT_HASH})'
         return text
 
     def add_subcommand(self, name, cmd):
diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py
index e822cbe5..7968fbce 100644
--- a/nominatim/clicmd/setup.py
+++ b/nominatim/clicmd/setup.py
@@ -14,7 +14,7 @@ import psutil
 
 from nominatim.db.connection import connect
 from nominatim.db import status, properties
-from nominatim.version import NOMINATIM_VERSION
+from nominatim.version import version_str
 
 # Do not repeat documentation of subcommand classes.
 # pylint: disable=C0111
@@ -213,5 +213,4 @@ class SetupAll:
             except Exception as exc: # pylint: disable=broad-except
                 LOG.error('Cannot determine date of database: %s', exc)
 
-            properties.set_property(conn, 'database_version',
-                                    '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
+            properties.set_property(conn, 'database_version', version_str())
diff --git a/nominatim/tools/exec_utils.py b/nominatim/tools/exec_utils.py
index b06b8533..db2a4c2e 100644
--- a/nominatim/tools/exec_utils.py
+++ b/nominatim/tools/exec_utils.py
@@ -12,7 +12,7 @@ import subprocess
 import urllib.request as urlrequest
 from urllib.parse import urlencode
 
-from nominatim.version import NOMINATIM_VERSION
+from nominatim.version import version_str
 from nominatim.db.connection import get_pg_env
 
 LOG = logging.getLogger()
@@ -150,7 +150,7 @@ def run_osm2pgsql(options):
 def get_url(url):
     """ Get the contents from the given URL and return it as a UTF-8 string.
     """
-    headers = {"User-Agent": "Nominatim/{0[0]}.{0[1]}.{0[2]}-{0[3]}".format(NOMINATIM_VERSION)}
+    headers = {"User-Agent": f"Nominatim/{version_str()}"}
 
     try:
         with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response:
diff --git a/nominatim/tools/migration.py b/nominatim/tools/migration.py
index 76726e8c..290016ad 100644
--- a/nominatim/tools/migration.py
+++ b/nominatim/tools/migration.py
@@ -11,7 +11,7 @@ import logging
 
 from nominatim.db import properties
 from nominatim.db.connection import connect
-from nominatim.version import NOMINATIM_VERSION
+from nominatim.version import NOMINATIM_VERSION, version_str
 from nominatim.tools import refresh
 from nominatim.tokenizer import factory as tokenizer_factory
 from nominatim.errors import UsageError
@@ -47,7 +47,7 @@ def migrate(config, paths):
         for version, func in _MIGRATION_FUNCTIONS:
             if db_version <= version:
                 LOG.warning("Runnning: %s (%s)", func.__doc__.split('\n', 1)[0],
-                            '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(version))
+                            '{}.{}.{}-{}'.format(*version))
                 kwargs = dict(conn=conn, config=config, paths=paths)
                 func(**kwargs)
                 conn.commit()
@@ -59,8 +59,7 @@ def migrate(config, paths):
             tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
             tokenizer.update_sql_functions(config)
 
-        properties.set_property(conn, 'database_version',
-                                '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
+        properties.set_property(conn, 'database_version', version_str())
 
         conn.commit()
 
diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py
index aacc622b..b9a926f2 100644
--- a/nominatim/tools/refresh.py
+++ b/nominatim/tools/refresh.py
@@ -15,7 +15,7 @@ from psycopg2 import sql as pysql
 
 from nominatim.db.utils import execute_file
 from nominatim.db.sql_preprocessor import SQLPreprocessor
-from nominatim.version import NOMINATIM_VERSION
+from nominatim.version import version_str
 
 LOG = logging.getLogger()
 
@@ -186,16 +186,15 @@ def setup_website(basedir, config, conn):
         LOG.info('Creating website directory.')
         basedir.mkdir()
 
-    template = dedent("""\
+    template = dedent(f"""\
                       <?php
 
                       @define('CONST_Debug', $_GET['debug'] ?? false);
-                      @define('CONST_LibDir', '{0}');
-                      @define('CONST_TokenizerDir', '{2}');
-                      @define('CONST_NominatimVersion', '{1[0]}.{1[1]}.{1[2]}-{1[3]}');
+                      @define('CONST_LibDir', '{config.lib_dir.php}');
+                      @define('CONST_TokenizerDir', '{config.project_dir / 'tokenizer'}');
+                      @define('CONST_NominatimVersion', '{version_str()}');
 
-                      """.format(config.lib_dir.php, NOMINATIM_VERSION,
-                                 config.project_dir / 'tokenizer'))
+                      """)
 
     for php_name, conf_name, var_type in PHP_CONST_DEFS:
         varout = _quote_php_variable(var_type, config, conf_name)
diff --git a/nominatim/version.py b/nominatim/version.py
index 47fe3b30..d4f69da1 100644
--- a/nominatim/version.py
+++ b/nominatim/version.py
@@ -34,3 +34,11 @@ POSTGIS_REQUIRED_VERSION = (2, 2)
 # cmake/tool-installed.tmpl is used to build the binary 'nominatim'. Inside
 # there is a call to set the variable value below.
 GIT_COMMIT_HASH = None
+
+
+# pylint: disable=consider-using-f-string
+def version_str():
+    """
+    Return a human-readable string of the version.
+    """
+    return '{}.{}.{}-{}'.format(*NOMINATIM_VERSION)

From bb2bd76f91ed6e4b1530a7fb66553c7fd92afd9d Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Wed, 11 May 2022 08:59:28 +0200
Subject: [PATCH 2/7] pylint: avoid explicit use of format() function

Use psycopg2 SQL formatters for SQL and formatted string literals
everywhere else.
---
 nominatim/clicmd/replication.py               |  5 ++-
 nominatim/clicmd/setup.py                     | 10 +++---
 nominatim/config.py                           |  2 +-
 nominatim/db/connection.py                    |  2 +-
 nominatim/db/sql_preprocessor.py              |  6 ++--
 nominatim/db/status.py                        |  2 +-
 nominatim/indexer/runners.py                  |  4 +--
 nominatim/tokenizer/legacy_tokenizer.py       | 14 ++++-----
 .../token_analysis/config_variants.py         |  4 +--
 nominatim/tools/database_import.py            |  2 +-
 nominatim/tools/exec_utils.py                 |  6 ++--
 nominatim/tools/migration.py                  | 18 ++++++-----
 nominatim/tools/postcodes.py                  |  4 +--
 nominatim/tools/refresh.py                    |  9 ++++--
 .../tools/special_phrases/sp_csv_loader.py    |  2 +-
 .../tools/special_phrases/sp_importer.py      | 31 ++++++++++---------
 nominatim/version.py                          |  4 +--
 17 files changed, 66 insertions(+), 59 deletions(-)

diff --git a/nominatim/clicmd/replication.py b/nominatim/clicmd/replication.py
index 849a0e49..9d946304 100644
--- a/nominatim/clicmd/replication.py
+++ b/nominatim/clicmd/replication.py
@@ -21,7 +21,7 @@ LOG = logging.getLogger()
 # Do not repeat documentation of subcommand classes.
 # pylint: disable=C0111
 # Using non-top-level imports to make pyosmium optional for replication only.
-# pylint: disable=E0012,C0415
+# pylint: disable=C0415
 
 class UpdateReplication:
     """\
@@ -96,8 +96,7 @@ class UpdateReplication:
         end = dt.datetime.now(dt.timezone.utc)
         LOG.warning("Update completed. Import: %s. %sTotal: %s. Remaining backlog: %s.",
                     round_time((start_index or end) - start_import),
-                    "Indexing: {} ".format(round_time(end - start_index))
-                    if start_index else '',
+                    f"Indexing: {round_time(end - start_index)} " if start_index else '',
                     round_time(end - start_import),
                     round_time(end - batchdate))
 
diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py
index 7968fbce..b643c5ba 100644
--- a/nominatim/clicmd/setup.py
+++ b/nominatim/clicmd/setup.py
@@ -19,7 +19,7 @@ from nominatim.version import version_str
 # Do not repeat documentation of subcommand classes.
 # pylint: disable=C0111
 # Using non-top-level imports to avoid eventually unused imports.
-# pylint: disable=E0012,C0415
+# pylint: disable=C0415
 
 LOG = logging.getLogger()
 
@@ -194,10 +194,10 @@ class SetupAll:
             LOG.warning('Creating support index')
             if tablespace:
                 tablespace = 'TABLESPACE ' + tablespace
-            cur.execute("""CREATE INDEX idx_placex_pendingsector
-                           ON placex USING BTREE (rank_address,geometry_sector)
-                           {} WHERE indexed_status > 0
-                        """.format(tablespace))
+            cur.execute(f"""CREATE INDEX idx_placex_pendingsector
+                            ON placex USING BTREE (rank_address,geometry_sector)
+                            {tablespace} WHERE indexed_status > 0
+                         """)
         conn.commit()
 
 
diff --git a/nominatim/config.py b/nominatim/config.py
index a3f91055..ef261079 100644
--- a/nominatim/config.py
+++ b/nominatim/config.py
@@ -144,7 +144,7 @@ class Configuration:
         style = self.__getattr__('IMPORT_STYLE')
 
         if style in ('admin', 'street', 'address', 'full', 'extratags'):
-            return self.config_dir / 'import-{}.style'.format(style)
+            return self.config_dir / f'import-{style}.style'
 
         return self.find_config_file('', 'IMPORT_STYLE')
 
diff --git a/nominatim/db/connection.py b/nominatim/db/connection.py
index 45bc173d..90194e63 100644
--- a/nominatim/db/connection.py
+++ b/nominatim/db/connection.py
@@ -163,7 +163,7 @@ def connect(dsn):
         ctxmgr.connection = conn
         return ctxmgr
     except psycopg2.OperationalError as err:
-        raise UsageError("Cannot connect to database: {}".format(err)) from err
+        raise UsageError(f"Cannot connect to database: {err}") from err
 
 
 # Translation from PG connection string parameters to PG environment variables.
diff --git a/nominatim/db/sql_preprocessor.py b/nominatim/db/sql_preprocessor.py
index 10d93666..4de53886 100644
--- a/nominatim/db/sql_preprocessor.py
+++ b/nominatim/db/sql_preprocessor.py
@@ -39,10 +39,10 @@ def _setup_tablespace_sql(config):
     out = {}
     for subset in ('ADDRESS', 'SEARCH', 'AUX'):
         for kind in ('DATA', 'INDEX'):
-            tspace = getattr(config, 'TABLESPACE_{}_{}'.format(subset, kind))
+            tspace = getattr(config, f'TABLESPACE_{subset}_{kind}')
             if tspace:
-                tspace = 'TABLESPACE "{}"'.format(tspace)
-            out['{}_{}'.format(subset.lower(), kind.lower())] = tspace
+                tspace = f'TABLESPACE "{tspace}"'
+            out[f'{subset.lower()}_{kind.lower()}'] = tspace
 
     return out
 
diff --git a/nominatim/db/status.py b/nominatim/db/status.py
index 12b24a83..d31196b3 100644
--- a/nominatim/db/status.py
+++ b/nominatim/db/status.py
@@ -34,7 +34,7 @@ def compute_database_date(conn):
 
     LOG.info("Using node id %d for timestamp lookup", osmid)
     # Get the node from the API to find the timestamp when it was created.
-    node_url = 'https://www.openstreetmap.org/api/0.6/node/{}/1'.format(osmid)
+    node_url = f'https://www.openstreetmap.org/api/0.6/node/{osmid}/1'
     data = get_url(node_url)
 
     match = re.search(r'timestamp="((\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}))Z"', data)
diff --git a/nominatim/indexer/runners.py b/nominatim/indexer/runners.py
index ac7a0015..9a30ffe6 100644
--- a/nominatim/indexer/runners.py
+++ b/nominatim/indexer/runners.py
@@ -66,7 +66,7 @@ class RankRunner(AbstractPlacexRunner):
     """
 
     def name(self):
-        return "rank {}".format(self.rank)
+        return f"rank {self.rank}"
 
     def sql_count_objects(self):
         return pysql.SQL("""SELECT count(*) FROM placex
@@ -86,7 +86,7 @@ class BoundaryRunner(AbstractPlacexRunner):
     """
 
     def name(self):
-        return "boundaries rank {}".format(self.rank)
+        return f"boundaries rank {self.rank}"
 
     def sql_count_objects(self):
         return pysql.SQL("""SELECT count(*) FROM placex
diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py
index 97ce6d16..299e2fde 100644
--- a/nominatim/tokenizer/legacy_tokenizer.py
+++ b/nominatim/tokenizer/legacy_tokenizer.py
@@ -74,10 +74,10 @@ def _check_module(module_dir, conn):
     with conn.cursor() as cur:
         try:
             cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
-                           RETURNS text AS '{}/nominatim.so', 'transliteration'
+                           RETURNS text AS %s, 'transliteration'
                            LANGUAGE c IMMUTABLE STRICT;
                            DROP FUNCTION nominatim_test_import_func(text)
-                        """.format(module_dir))
+                        """, (f'{module_dir}/nominatim.so', ))
         except psycopg2.DatabaseError as err:
             LOG.fatal("Error accessing database module: %s", err)
             raise UsageError("Database module cannot be accessed.") from err
@@ -250,12 +250,12 @@ class LegacyTokenizer(AbstractTokenizer):
         php_file = self.data_dir / "tokenizer.php"
 
         if not php_file.exists() or overwrite:
-            php_file.write_text(dedent("""\
+            php_file.write_text(dedent(f"""\
                 <?php
-                @define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
-                @define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
-                require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
-                """.format(config)), encoding='utf-8')
+                @define('CONST_Max_Word_Frequency', {config.MAX_WORD_FREQUENCY});
+                @define('CONST_Term_Normalization_Rules', "{config.TERM_NORMALIZATION}");
+                require_once('{config.lib_dir.php}/tokenizer/legacy_tokenizer.php');
+                """), encoding='utf-8')
 
 
     def _init_db_tables(self, config):
diff --git a/nominatim/tokenizer/token_analysis/config_variants.py b/nominatim/tokenizer/token_analysis/config_variants.py
index 59ceeb22..067c4b5b 100644
--- a/nominatim/tokenizer/token_analysis/config_variants.py
+++ b/nominatim/tokenizer/token_analysis/config_variants.py
@@ -66,7 +66,7 @@ class _VariantMaker:
         """
         parts = re.split(r'(\|)?([=-])>', rule)
         if len(parts) != 4:
-            raise UsageError("Syntax error in variant rule: " + rule)
+            raise UsageError(f"Syntax error in variant rule: {rule}")
 
         decompose = parts[1] is None
         src_terms = [self._parse_variant_word(t) for t in parts[0].split(',')]
@@ -89,7 +89,7 @@ class _VariantMaker:
         name = name.strip()
         match = re.fullmatch(r'([~^]?)([^~$^]*)([~$]?)', name)
         if match is None or (match.group(1) == '~' and match.group(3) == '~'):
-            raise UsageError("Invalid variant word descriptor '{}'".format(name))
+            raise UsageError(f"Invalid variant word descriptor '{name}'")
         norm_name = self.norm.transliterate(match.group(2)).strip()
         if not norm_name:
             return None
diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py
index caec9035..50938c19 100644
--- a/nominatim/tools/database_import.py
+++ b/nominatim/tools/database_import.py
@@ -234,7 +234,7 @@ def create_search_indices(conn, config, drop=False):
         bad_indices = [row[0] for row in list(cur)]
         for idx in bad_indices:
             LOG.info("Drop invalid index %s.", idx)
-            cur.execute('DROP INDEX "{}"'.format(idx))
+            cur.execute(pysql.SQL('DROP INDEX {}').format(pysql.Identifier(idx)))
     conn.commit()
 
     sql = SQLPreprocessor(conn, config)
diff --git a/nominatim/tools/exec_utils.py b/nominatim/tools/exec_utils.py
index db2a4c2e..a81a8d6b 100644
--- a/nominatim/tools/exec_utils.py
+++ b/nominatim/tools/exec_utils.py
@@ -55,10 +55,10 @@ def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None,
     query_string = urlencode(params or {})
 
     env = dict(QUERY_STRING=query_string,
-               SCRIPT_NAME='/{}.php'.format(endpoint),
-               REQUEST_URI='/{}.php?{}'.format(endpoint, query_string),
+               SCRIPT_NAME=f'/{endpoint}.php',
+               REQUEST_URI=f'/{endpoint}.php?{query_string}',
                CONTEXT_DOCUMENT_ROOT=webdir,
-               SCRIPT_FILENAME='{}/{}.php'.format(webdir, endpoint),
+               SCRIPT_FILENAME=f'{webdir}/{endpoint}.php',
                HTTP_HOST='localhost',
                HTTP_USER_AGENT='nominatim-tool',
                REMOTE_ADDR='0.0.0.0',
diff --git a/nominatim/tools/migration.py b/nominatim/tools/migration.py
index 290016ad..8eaad2f7 100644
--- a/nominatim/tools/migration.py
+++ b/nominatim/tools/migration.py
@@ -9,6 +9,8 @@ Functions for database migration to newer software versions.
 """
 import logging
 
+from psycopg2 import sql as pysql
+
 from nominatim.db import properties
 from nominatim.db.connection import connect
 from nominatim.version import NOMINATIM_VERSION, version_str
@@ -47,7 +49,7 @@ def migrate(config, paths):
         for version, func in _MIGRATION_FUNCTIONS:
             if db_version <= version:
                 LOG.warning("Runnning: %s (%s)", func.__doc__.split('\n', 1)[0],
-                            '{}.{}.{}-{}'.format(*version))
+                            version_str(version))
                 kwargs = dict(conn=conn, config=config, paths=paths)
                 func(**kwargs)
                 conn.commit()
@@ -124,11 +126,12 @@ def add_nominatim_property_table(conn, config, **_):
     """
     if not conn.table_exists('nominatim_properties'):
         with conn.cursor() as cur:
-            cur.execute("""CREATE TABLE nominatim_properties (
-                               property TEXT,
-                               value TEXT);
-                           GRANT SELECT ON TABLE nominatim_properties TO "{}";
-                        """.format(config.DATABASE_WEBUSER))
+            cur.execute(pysql.SQL("""CREATE TABLE nominatim_properties (
+                                        property TEXT,
+                                        value TEXT);
+                                     GRANT SELECT ON TABLE nominatim_properties TO {};
+                                  """)
+                             .format(pysql.Identifier(config.DATABASE_WEBUSER)))
 
 @_migration(3, 6, 0, 0)
 def change_housenumber_transliteration(conn, **_):
@@ -193,7 +196,8 @@ def install_legacy_tokenizer(conn, config, **_):
                                            and column_name = 'token_info'""",
                                         (table, ))
                 if has_column == 0:
-                    cur.execute('ALTER TABLE {} ADD COLUMN token_info JSONB'.format(table))
+                    cur.execute(pysql.SQL('ALTER TABLE {} ADD COLUMN token_info JSONB')
+                                     .format(pysql.Identifier(table)))
         tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False,
                                                        module_name='legacy')
 
diff --git a/nominatim/tools/postcodes.py b/nominatim/tools/postcodes.py
index adc58ec5..cd778e5f 100644
--- a/nominatim/tools/postcodes.py
+++ b/nominatim/tools/postcodes.py
@@ -136,13 +136,13 @@ class _CountryPostcodesCollector:
 
 
     def _open_external(self, project_dir):
-        fname = project_dir / '{}_postcodes.csv'.format(self.country)
+        fname = project_dir / f'{self.country}_postcodes.csv'
 
         if fname.is_file():
             LOG.info("Using external postcode file '%s'.", fname)
             return open(fname, 'r')
 
-        fname = project_dir / '{}_postcodes.csv.gz'.format(self.country)
+        fname = project_dir / f'{self.country}_postcodes.csv.gz'
 
         if fname.is_file():
             LOG.info("Using external postcode file '%s'.", fname)
diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py
index b9a926f2..ca6f5c62 100644
--- a/nominatim/tools/refresh.py
+++ b/nominatim/tools/refresh.py
@@ -52,16 +52,19 @@ def load_address_levels(conn, table, levels):
     with conn.cursor() as cur:
         cur.drop_table(table)
 
-        cur.execute("""CREATE TABLE {} (country_code varchar(2),
+        cur.execute(pysql.SQL("""CREATE TABLE {} (
+                                        country_code varchar(2),
                                         class TEXT,
                                         type TEXT,
                                         rank_search SMALLINT,
-                                        rank_address SMALLINT)""".format(table))
+                                        rank_address SMALLINT)""")
+                         .format(pysql.Identifier(table)))
 
         cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s")
                            .format(pysql.Identifier(table)), rows)
 
-        cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table))
+        cur.execute(pysql.SQL('CREATE UNIQUE INDEX ON {} (country_code, class, type)')
+                         .format(pysql.Identifier(table)))
 
     conn.commit()
 
diff --git a/nominatim/tools/special_phrases/sp_csv_loader.py b/nominatim/tools/special_phrases/sp_csv_loader.py
index a32a4b39..2a67687f 100644
--- a/nominatim/tools/special_phrases/sp_csv_loader.py
+++ b/nominatim/tools/special_phrases/sp_csv_loader.py
@@ -54,4 +54,4 @@ class SPCsvLoader(Iterator):
         _, extension = os.path.splitext(self.csv_path)
 
         if extension != '.csv':
-            raise UsageError('The file {} is not a csv file.'.format(self.csv_path))
+            raise UsageError(f'The file {self.csv_path} is not a csv file.')
diff --git a/nominatim/tools/special_phrases/sp_importer.py b/nominatim/tools/special_phrases/sp_importer.py
index 195f8387..9eefaa19 100644
--- a/nominatim/tools/special_phrases/sp_importer.py
+++ b/nominatim/tools/special_phrases/sp_importer.py
@@ -16,7 +16,7 @@
 import logging
 import re
 
-from psycopg2.sql import Identifier, Literal, SQL
+from psycopg2.sql import Identifier, SQL
 from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
 
 LOG = logging.getLogger()
@@ -195,35 +195,36 @@ class SPImporter():
         """
         table_name = _classtype_table(phrase_class, phrase_type)
         with self.db_connection.cursor() as db_cursor:
-            db_cursor.execute(SQL("""
-                    CREATE TABLE IF NOT EXISTS {{}} {}
-                    AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex
-                    WHERE class = {{}} AND type = {{}}""".format(sql_tablespace))
-                              .format(Identifier(table_name), Literal(phrase_class),
-                                      Literal(phrase_type)))
+            db_cursor.execute(SQL("""CREATE TABLE IF NOT EXISTS {} {} AS
+                                       SELECT place_id AS place_id,
+                                              st_centroid(geometry) AS centroid
+                                       FROM placex
+                                       WHERE class = %s AND type = %s""")
+                                 .format(Identifier(table_name), SQL(sql_tablespace)),
+                              (phrase_class, phrase_type))
 
 
     def _create_place_classtype_indexes(self, sql_tablespace, phrase_class, phrase_type):
         """
             Create indexes on centroid and place_id for the place_classtype table.
         """
-        index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
+        index_prefix = f'idx_place_classtype_{phrase_class}_{phrase_type}_'
         base_table = _classtype_table(phrase_class, phrase_type)
         # Index on centroid
         if not self.db_connection.index_exists(index_prefix + 'centroid'):
             with self.db_connection.cursor() as db_cursor:
-                db_cursor.execute(SQL("""
-                    CREATE INDEX {{}} ON {{}} USING GIST (centroid) {}""".format(sql_tablespace))
-                                  .format(Identifier(index_prefix + 'centroid'),
-                                          Identifier(base_table)), sql_tablespace)
+                db_cursor.execute(SQL("CREATE INDEX {} ON {} USING GIST (centroid) {}")
+                                     .format(Identifier(index_prefix + 'centroid'),
+                                             Identifier(base_table),
+                                             SQL(sql_tablespace)))
 
         # Index on place_id
         if not self.db_connection.index_exists(index_prefix + 'place_id'):
             with self.db_connection.cursor() as db_cursor:
-                db_cursor.execute(SQL(
-                    """CREATE INDEX {{}} ON {{}} USING btree(place_id) {}""".format(sql_tablespace))
+                db_cursor.execute(SQL("CREATE INDEX {} ON {} USING btree(place_id) {}")
                                   .format(Identifier(index_prefix + 'place_id'),
-                                          Identifier(base_table)))
+                                          Identifier(base_table),
+                                          SQL(sql_tablespace)))
 
 
     def _grant_access_to_webuser(self, phrase_class, phrase_type):
diff --git a/nominatim/version.py b/nominatim/version.py
index d4f69da1..88d42af9 100644
--- a/nominatim/version.py
+++ b/nominatim/version.py
@@ -37,8 +37,8 @@ GIT_COMMIT_HASH = None
 
 
 # pylint: disable=consider-using-f-string
-def version_str():
+def version_str(version=NOMINATIM_VERSION):
     """
     Return a human-readable string of the version.
     """
-    return '{}.{}.{}-{}'.format(*NOMINATIM_VERSION)
+    return '{}.{}.{}-{}'.format(*version)

From ae6b029543f7cacd9a491ae5965981a610ca424e Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Wed, 11 May 2022 09:06:32 +0200
Subject: [PATCH 3/7] remove redundant 'u' prefixes for unicode strings

---
 nominatim/db/utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/nominatim/db/utils.py b/nominatim/db/utils.py
index 87b0545f..7d58a668 100644
--- a/nominatim/db/utils.py
+++ b/nominatim/db/utils.py
@@ -67,9 +67,9 @@ def execute_file(dsn, fname, ignore_errors=False, pre_code=None, post_code=None)
 
 
 # List of characters that need to be quoted for the copy command.
-_SQL_TRANSLATION = {ord(u'\\'): u'\\\\',
-                    ord(u'\t'): u'\\t',
-                    ord(u'\n'): u'\\n'}
+_SQL_TRANSLATION = {ord('\\'): '\\\\',
+                    ord('\t'): '\\t',
+                    ord('\n'): '\\n'}
 
 
 class CopyBuffer:

From 5d5f40a82f07e7e2a341435aeb45d8619d252525 Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Wed, 11 May 2022 09:45:15 +0200
Subject: [PATCH 4/7] use context management when processing Tiger data

---
 nominatim/tools/tiger_data.py | 107 ++++++++++++++++++++--------------
 1 file changed, 62 insertions(+), 45 deletions(-)

diff --git a/nominatim/tools/tiger_data.py b/nominatim/tools/tiger_data.py
index 8610880f..6e37df5e 100644
--- a/nominatim/tools/tiger_data.py
+++ b/nominatim/tools/tiger_data.py
@@ -21,33 +21,57 @@ from nominatim.indexer.place_info import PlaceInfo
 
 LOG = logging.getLogger()
 
-
-def handle_tarfile_or_directory(data_dir):
-    """ Handles tarfile or directory for importing tiger data
+class TigerInput:
+    """ Context manager that goes through Tiger input files which may
+        either be in a directory or gzipped together in a tar file.
     """
 
-    tar = None
-    if data_dir.endswith('.tar.gz'):
-        try:
-            tar = tarfile.open(data_dir)
-        except tarfile.ReadError as err:
-            LOG.fatal("Cannot open '%s'. Is this a tar file?", data_dir)
-            raise UsageError("Cannot open Tiger data file.") from err
+    def __init__(self, data_dir):
+        self.tar_handle = None
+        self.files = []
 
-        csv_files = [i for i in tar.getmembers() if i.name.endswith('.csv')]
-        LOG.warning("Found %d CSV files in tarfile with path %s", len(csv_files), data_dir)
-        if not csv_files:
-            LOG.warning("Tiger data import selected but no files in tarfile's path %s", data_dir)
-            return None, None
-    else:
-        files = os.listdir(data_dir)
-        csv_files = [os.path.join(data_dir, i) for i in files if i.endswith('.csv')]
-        LOG.warning("Found %d CSV files in path %s", len(csv_files), data_dir)
-        if not csv_files:
-            LOG.warning("Tiger data import selected but no files found in path %s", data_dir)
-            return None, None
+        if data_dir.endswith('.tar.gz'):
+            try:
+                self.tar_handle = tarfile.open(data_dir) # pylint: disable=consider-using-with
+            except tarfile.ReadError as err:
+                LOG.fatal("Cannot open '%s'. Is this a tar file?", data_dir)
+                raise UsageError("Cannot open Tiger data file.") from err
 
-    return csv_files, tar
+            self.files = [i for i in self.tar_handle.getmembers() if i.name.endswith('.csv')]
+            LOG.warning("Found %d CSV files in tarfile with path %s", len(self.files), data_dir)
+        else:
+            files = os.listdir(data_dir)
+            self.files = [os.path.join(data_dir, i) for i in files if i.endswith('.csv')]
+            LOG.warning("Found %d CSV files in path %s", len(self.files), data_dir)
+
+        if not self.files:
+            LOG.warning("Tiger data import selected but no files found at %s", data_dir)
+
+
+    def __enter__(self):
+        return self
+
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if self.tar_handle:
+            self.tar_handle.close()
+            self.tar_handle = None
+
+
+    def next_file(self):
+        """ Return a file handle to the next file to be processed.
+            Raises an IndexError if there is no file left.
+        """
+        fname = self.files.pop(0)
+
+        if self.tar_handle is not None:
+            return io.TextIOWrapper(self.tar_handle.extractfile(fname))
+
+        return open(fname, encoding='utf-8')
+
+
+    def __len__(self):
+        return len(self.files)
 
 
 def handle_threaded_sql_statements(pool, fd, analyzer):
@@ -79,34 +103,27 @@ def add_tiger_data(data_dir, config, threads, tokenizer):
     """ Import tiger data from directory or tar file `data dir`.
     """
     dsn = config.get_libpq_dsn()
-    files, tar = handle_tarfile_or_directory(data_dir)
 
-    if not files:
-        return
+    with TigerInput(data_dir) as tar:
+        if not tar:
+            return
 
-    with connect(dsn) as conn:
-        sql = SQLPreprocessor(conn, config)
-        sql.run_sql_file(conn, 'tiger_import_start.sql')
+        with connect(dsn) as conn:
+            sql = SQLPreprocessor(conn, config)
+            sql.run_sql_file(conn, 'tiger_import_start.sql')
 
-    # Reading files and then for each file line handling
-    # sql_query in <threads - 1> chunks.
-    place_threads = max(1, threads - 1)
+        # Reading files and then for each file line handling
+        # sql_query in <threads - 1> chunks.
+        place_threads = max(1, threads - 1)
 
-    with WorkerPool(dsn, place_threads, ignore_sql_errors=True) as pool:
-        with tokenizer.name_analyzer() as analyzer:
-            for fname in files:
-                if not tar:
-                    fd = open(fname)
-                else:
-                    fd = io.TextIOWrapper(tar.extractfile(fname))
+        with WorkerPool(dsn, place_threads, ignore_sql_errors=True) as pool:
+            with tokenizer.name_analyzer() as analyzer:
+                while tar:
+                    with tar.next_file() as fd:
+                        handle_threaded_sql_statements(pool, fd, analyzer)
 
-                handle_threaded_sql_statements(pool, fd, analyzer)
+        print('\n')
 
-                fd.close()
-
-    if tar:
-        tar.close()
-    print('\n')
     LOG.warning("Creating indexes on Tiger data")
     with connect(dsn) as conn:
         sql = SQLPreprocessor(conn, config)

From 7f7a7df3a2dfb7ae97bafb2c2e83209a87b75f24 Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Wed, 11 May 2022 10:22:14 +0200
Subject: [PATCH 5/7] solve assorted issue with newer pylint versions

Includes more use of 'with', adding encodings to open statements
and a couple of issues with parameter renaming.
---
 nominatim/db/connection.py                    |  3 +-
 nominatim/db/utils.py                         | 34 +++++++++----------
 nominatim/tools/postcodes.py                  |  4 +--
 .../tools/special_phrases/sp_csv_loader.py    |  4 +--
 4 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/nominatim/db/connection.py b/nominatim/db/connection.py
index 90194e63..c60bcfdd 100644
--- a/nominatim/db/connection.py
+++ b/nominatim/db/connection.py
@@ -25,7 +25,8 @@ class _Cursor(psycopg2.extras.DictCursor):
         execution functions.
     """
 
-    def execute(self, query, args=None): # pylint: disable=W0221
+    # pylint: disable=arguments-renamed,arguments-differ
+    def execute(self, query, args=None):
         """ Query execution that logs the SQL query when debugging is enabled.
         """
         LOG.debug(self.mogrify(query, args).decode('utf-8'))
diff --git a/nominatim/db/utils.py b/nominatim/db/utils.py
index 7d58a668..b859afa8 100644
--- a/nominatim/db/utils.py
+++ b/nominatim/db/utils.py
@@ -40,27 +40,27 @@ def execute_file(dsn, fname, ignore_errors=False, pre_code=None, post_code=None)
         cmd.extend(('-v', 'ON_ERROR_STOP=1'))
     if not LOG.isEnabledFor(logging.INFO):
         cmd.append('--quiet')
-    proc = subprocess.Popen(cmd, env=get_pg_env(dsn), stdin=subprocess.PIPE)
 
-    try:
-        if not LOG.isEnabledFor(logging.INFO):
-            proc.stdin.write('set client_min_messages to WARNING;'.encode('utf-8'))
+    with subprocess.Popen(cmd, env=get_pg_env(dsn), stdin=subprocess.PIPE) as proc:
+        try:
+            if not LOG.isEnabledFor(logging.INFO):
+                proc.stdin.write('set client_min_messages to WARNING;'.encode('utf-8'))
 
-        if pre_code:
-            proc.stdin.write((pre_code + ';').encode('utf-8'))
+            if pre_code:
+                proc.stdin.write((pre_code + ';').encode('utf-8'))
 
-        if fname.suffix == '.gz':
-            with gzip.open(str(fname), 'rb') as fdesc:
-                remain = _pipe_to_proc(proc, fdesc)
-        else:
-            with fname.open('rb') as fdesc:
-                remain = _pipe_to_proc(proc, fdesc)
+            if fname.suffix == '.gz':
+                with gzip.open(str(fname), 'rb') as fdesc:
+                    remain = _pipe_to_proc(proc, fdesc)
+            else:
+                with fname.open('rb') as fdesc:
+                    remain = _pipe_to_proc(proc, fdesc)
 
-        if remain == 0 and post_code:
-            proc.stdin.write((';' + post_code).encode('utf-8'))
-    finally:
-        proc.stdin.close()
-        ret = proc.wait()
+            if remain == 0 and post_code:
+                proc.stdin.write((';' + post_code).encode('utf-8'))
+        finally:
+            proc.stdin.close()
+            ret = proc.wait()
 
     if ret != 0 or remain > 0:
         raise UsageError("Failed to execute SQL file.")
diff --git a/nominatim/tools/postcodes.py b/nominatim/tools/postcodes.py
index cd778e5f..2b7027e7 100644
--- a/nominatim/tools/postcodes.py
+++ b/nominatim/tools/postcodes.py
@@ -36,7 +36,7 @@ class _CountryPostcodesCollector:
 
     def __init__(self, country):
         self.country = country
-        self.collected = dict()
+        self.collected = {}
 
 
     def add(self, postcode, x, y):
@@ -140,7 +140,7 @@ class _CountryPostcodesCollector:
 
         if fname.is_file():
             LOG.info("Using external postcode file '%s'.", fname)
-            return open(fname, 'r')
+            return open(fname, 'r', encoding='utf-8')
 
         fname = project_dir / f'{self.country}_postcodes.csv.gz'
 
diff --git a/nominatim/tools/special_phrases/sp_csv_loader.py b/nominatim/tools/special_phrases/sp_csv_loader.py
index 2a67687f..55a9d8d0 100644
--- a/nominatim/tools/special_phrases/sp_csv_loader.py
+++ b/nominatim/tools/special_phrases/sp_csv_loader.py
@@ -39,8 +39,8 @@ class SPCsvLoader(Iterator):
         """
         phrases = set()
 
-        with open(self.csv_path) as file:
-            reader = csv.DictReader(file, delimiter=',')
+        with open(self.csv_path, encoding='utf-8') as fd:
+            reader = csv.DictReader(fd, delimiter=',')
             for row in reader:
                 phrases.add(
                     SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])

From d14a585cc92c1558c23ca86855474f14fd711cb2 Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Wed, 11 May 2022 10:25:00 +0200
Subject: [PATCH 6/7] pylint: disable no-self-use check

This checker encourages bad behaviour (namely changing the static
status of a function during inheritence) and will be made optional
in upcoming versions of pylint.
---
 .pylintrc                               | 2 +-
 nominatim/tokenizer/icu_tokenizer.py    | 3 +--
 nominatim/tokenizer/legacy_tokenizer.py | 3 +--
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/.pylintrc b/.pylintrc
index 89135138..fef53872 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -11,6 +11,6 @@ ignored-modules=icu,datrie
 # 'with' statements.
 ignored-classes=NominatimArgs,closing
 # 'too-many-ancestors' is triggered already by deriving from UserDict
-disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value
+disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value,no-self-use
 
 good-names=i,x,y,fd,db
diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py
index 9c7138ce..bf5544ed 100644
--- a/nominatim/tokenizer/icu_tokenizer.py
+++ b/nominatim/tokenizer/icu_tokenizer.py
@@ -278,8 +278,7 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
                + [(k, v, part_ids.get(v, None)) for k, v in partial_tokens.items()]
 
 
-    @staticmethod
-    def normalize_postcode(postcode):
+    def normalize_postcode(self, postcode):
         """ Convert the postcode to a standardized form.
 
             This function must yield exactly the same result as the SQL function
diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py
index 299e2fde..7b78b22a 100644
--- a/nominatim/tokenizer/legacy_tokenizer.py
+++ b/nominatim/tokenizer/legacy_tokenizer.py
@@ -337,8 +337,7 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
         return self.normalizer.transliterate(phrase)
 
 
-    @staticmethod
-    def normalize_postcode(postcode):
+    def normalize_postcode(self, postcode):
         """ Convert the postcode to a standardized form.
 
             This function must yield exactly the same result as the SQL function

From 3ba975466c166e9d9eceee34189d22c1561e69c1 Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Wed, 11 May 2022 10:36:09 +0200
Subject: [PATCH 7/7] fix spacing

Some versions of pylint are oddly picky.
---
 nominatim/tools/migration.py                  |  5 ++--
 nominatim/tools/refresh.py                    |  6 ++---
 .../tools/special_phrases/sp_importer.py      | 25 ++++++++++---------
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/nominatim/tools/migration.py b/nominatim/tools/migration.py
index 8eaad2f7..28a14455 100644
--- a/nominatim/tools/migration.py
+++ b/nominatim/tools/migration.py
@@ -130,8 +130,7 @@ def add_nominatim_property_table(conn, config, **_):
                                         property TEXT,
                                         value TEXT);
                                      GRANT SELECT ON TABLE nominatim_properties TO {};
-                                  """)
-                             .format(pysql.Identifier(config.DATABASE_WEBUSER)))
+                                  """).format(pysql.Identifier(config.DATABASE_WEBUSER)))
 
 @_migration(3, 6, 0, 0)
 def change_housenumber_transliteration(conn, **_):
@@ -197,7 +196,7 @@ def install_legacy_tokenizer(conn, config, **_):
                                         (table, ))
                 if has_column == 0:
                     cur.execute(pysql.SQL('ALTER TABLE {} ADD COLUMN token_info JSONB')
-                                     .format(pysql.Identifier(table)))
+                                .format(pysql.Identifier(table)))
         tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False,
                                                        module_name='legacy')
 
diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py
index ca6f5c62..561bcf83 100644
--- a/nominatim/tools/refresh.py
+++ b/nominatim/tools/refresh.py
@@ -57,14 +57,14 @@ def load_address_levels(conn, table, levels):
                                         class TEXT,
                                         type TEXT,
                                         rank_search SMALLINT,
-                                        rank_address SMALLINT)""")
-                         .format(pysql.Identifier(table)))
+                                        rank_address SMALLINT)
+                              """).format(pysql.Identifier(table)))
 
         cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s")
                            .format(pysql.Identifier(table)), rows)
 
         cur.execute(pysql.SQL('CREATE UNIQUE INDEX ON {} (country_code, class, type)')
-                         .format(pysql.Identifier(table)))
+                    .format(pysql.Identifier(table)))
 
     conn.commit()
 
diff --git a/nominatim/tools/special_phrases/sp_importer.py b/nominatim/tools/special_phrases/sp_importer.py
index 9eefaa19..8137142b 100644
--- a/nominatim/tools/special_phrases/sp_importer.py
+++ b/nominatim/tools/special_phrases/sp_importer.py
@@ -191,17 +191,18 @@ class SPImporter():
 
     def _create_place_classtype_table(self, sql_tablespace, phrase_class, phrase_type):
         """
-            Create table place_classtype of the given phrase_class/phrase_type if doesn't exit.
+            Create table place_classtype of the given phrase_class/phrase_type
+            if doesn't exit.
         """
         table_name = _classtype_table(phrase_class, phrase_type)
-        with self.db_connection.cursor() as db_cursor:
-            db_cursor.execute(SQL("""CREATE TABLE IF NOT EXISTS {} {} AS
-                                       SELECT place_id AS place_id,
-                                              st_centroid(geometry) AS centroid
-                                       FROM placex
-                                       WHERE class = %s AND type = %s""")
-                                 .format(Identifier(table_name), SQL(sql_tablespace)),
-                              (phrase_class, phrase_type))
+        with self.db_connection.cursor() as cur:
+            cur.execute(SQL("""CREATE TABLE IF NOT EXISTS {} {} AS
+                                 SELECT place_id AS place_id,
+                                        st_centroid(geometry) AS centroid
+                                 FROM placex
+                                 WHERE class = %s AND type = %s
+                             """).format(Identifier(table_name), SQL(sql_tablespace)),
+                        (phrase_class, phrase_type))
 
 
     def _create_place_classtype_indexes(self, sql_tablespace, phrase_class, phrase_type):
@@ -214,9 +215,9 @@ class SPImporter():
         if not self.db_connection.index_exists(index_prefix + 'centroid'):
             with self.db_connection.cursor() as db_cursor:
                 db_cursor.execute(SQL("CREATE INDEX {} ON {} USING GIST (centroid) {}")
-                                     .format(Identifier(index_prefix + 'centroid'),
-                                             Identifier(base_table),
-                                             SQL(sql_tablespace)))
+                                  .format(Identifier(index_prefix + 'centroid'),
+                                          Identifier(base_table),
+                                          SQL(sql_tablespace)))
 
         # Index on place_id
         if not self.db_connection.index_exists(index_prefix + 'place_id'):