diff --git a/lib-sql/functions/placex_triggers.sql b/lib-sql/functions/placex_triggers.sql
index 9c2a67a1..8ae8cf39 100644
--- a/lib-sql/functions/placex_triggers.sql
+++ b/lib-sql/functions/placex_triggers.sql
@@ -1,30 +1,33 @@
 -- Trigger functions for the placex table.
 
+-- Information returned by update preparation.
+DROP TYPE IF EXISTS prepare_update_info CASCADE;
+CREATE TYPE prepare_update_info AS (
+  name HSTORE,
+  address HSTORE,
+  rank_address SMALLINT,
+  country_code TEXT,
+  class TEXT,
+  type TEXT,
+  linked_place_id BIGINT
+);
+
 -- Retrieve the data needed by the indexer for updating the place.
---
--- Return parameters:
---  name            list of names
---  address         list of address tags, either from the object or a surrounding
---                  building
---  country_feature If the place is a country feature, this contains the
---                  country code, otherwise it is null.
-CREATE OR REPLACE FUNCTION placex_prepare_update(p placex,
-                                                 OUT name HSTORE,
-                                                 OUT address HSTORE,
-                                                 OUT country_feature VARCHAR,
-                                                 OUT linked_place_id BIGINT)
+CREATE OR REPLACE FUNCTION placex_indexing_prepare(p placex)
+  RETURNS prepare_update_info
   AS $$
 DECLARE
   location RECORD;
+  result prepare_update_info;
 BEGIN
   -- For POI nodes, check if the address should be derived from a surrounding
   -- building.
   IF p.rank_search < 30 OR p.osm_type != 'N' OR p.address is not null THEN
-    address := p.address;
+    result.address := p.address;
   ELSE
     -- The additional && condition works around the misguided query
     -- planner of postgis 3.0.
-    SELECT placex.address || hstore('_inherited', '') INTO address
+    SELECT placex.address || hstore('_inherited', '') INTO result.address
       FROM placex
      WHERE ST_Covers(geometry, p.centroid)
            and geometry && p.centroid
@@ -34,27 +37,26 @@ BEGIN
      LIMIT 1;
   END IF;
 
-  address := address - '_unlisted_place'::TEXT;
-  name := p.name;
+  result.address := result.address - '_unlisted_place'::TEXT;
+  result.name := p.name;
+  result.class := p.class;
+  result.type := p.type;
+  result.country_code := p.country_code;
+  result.rank_address := p.rank_address;
 
   -- Names of linked places need to be merged in, so search for a linkable
   -- place already here.
   SELECT * INTO location FROM find_linked_place(p);
 
   IF location.place_id is not NULL THEN
-    linked_place_id := location.place_id;
+    result.linked_place_id := location.place_id;
 
     IF NOT location.name IS NULL THEN
-      name := location.name || name;
+      result.name := location.name || result.name;
     END IF;
   END IF;
 
-  country_feature := CASE WHEN p.admin_level = 2
-                               and p.class = 'boundary' and p.type = 'administrative'
-                               and p.osm_type = 'R'
-                          THEN p.country_code
-                          ELSE null
-                     END;
+  RETURN result;
 END;
 $$
 LANGUAGE plpgsql STABLE;
diff --git a/nominatim/indexer/place_info.py b/nominatim/indexer/place_info.py
new file mode 100644
index 00000000..06d730e0
--- /dev/null
+++ b/nominatim/indexer/place_info.py
@@ -0,0 +1,68 @@
+"""
+Wrapper around place information the indexer gets from the database and hands to
+the tokenizer.
+"""
+
+import psycopg2.extras
+
+class PlaceInfo:
+    """ Data class containing all information the tokenizer gets about a
+        place it should process the names for.
+    """
+
+    def __init__(self, info):
+        self._info = info
+
+
+    def analyze(self, analyzer):
+        """ Process this place with the given tokenizer and return the
+            result in psycopg2-compatible Json.
+        """
+        return psycopg2.extras.Json(analyzer.process_place(self))
+
+
+    @property
+    def name(self):
+        """ A dictionary with the names of the place or None if the place
+            has no names.
+        """
+        return self._info.get('name')
+
+
+    @property
+    def address(self):
+        """ A dictionary with the address elements of the place
+            or None if no address information is available.
+        """
+        return self._info.get('address')
+
+
+    @property
+    def country_code(self):
+        """ The country code of the country the place is in. Guaranteed
+            to be a two-letter lower-case string or None, if no country
+            could be found.
+        """
+        return self._info.get('country_code')
+
+
+    @property
+    def rank_address(self):
+        """ The computed rank address before rank correction.
+        """
+        return self._info.get('rank_address')
+
+
+    def is_a(self, key, value):
+        """ Check if the place's primary tag corresponds to the given
+            key and value.
+        """
+        return self._info.get('class') == key and self._info.get('type') == value
+
+
+    def is_country(self):
+        """ Check if the place is a valid country boundary.
+        """
+        return self.rank_address == 4 \
+               and self.is_a('boundary', 'administrative') \
+               and self.country_code is not None
diff --git a/nominatim/indexer/runners.py b/nominatim/indexer/runners.py
index 29261ee5..70536a71 100644
--- a/nominatim/indexer/runners.py
+++ b/nominatim/indexer/runners.py
@@ -4,14 +4,16 @@ tasks.
 """
 import functools
 
-import psycopg2.extras
 from psycopg2 import sql as pysql
 
+from nominatim.indexer.place_info import PlaceInfo
+
 # pylint: disable=C0111
 
 def _mk_valuelist(template, num):
     return pysql.SQL(',').join([pysql.SQL(template)] * num)
 
+
 class AbstractPlacexRunner:
     """ Returns SQL commands for indexing of the placex table.
     """
@@ -37,7 +39,7 @@ class AbstractPlacexRunner:
 
     @staticmethod
     def get_place_details(worker, ids):
-        worker.perform("""SELECT place_id, (placex_prepare_update(placex)).*
+        worker.perform("""SELECT place_id, (placex_indexing_prepare(placex)).*
                           FROM placex WHERE place_id IN %s""",
                        (tuple((p[0] for p in ids)), ))
 
@@ -47,7 +49,7 @@ class AbstractPlacexRunner:
         for place in places:
             for field in ('place_id', 'name', 'address', 'linked_place_id'):
                 values.append(place[field])
-            values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
+            values.append(PlaceInfo(place).analyze(self.analyzer))
 
         worker.perform(self._index_sql(len(places)), values)
 
@@ -141,7 +143,7 @@ class InterpolationRunner:
         values = []
         for place in places:
             values.extend((place[x] for x in ('place_id', 'address')))
-            values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
+            values.append(PlaceInfo(place).analyze(self.analyzer))
 
         worker.perform(self._index_sql(len(places)), values)
 
diff --git a/nominatim/tokenizer/base.py b/nominatim/tokenizer/base.py
index 00ecae44..53289c78 100644
--- a/nominatim/tokenizer/base.py
+++ b/nominatim/tokenizer/base.py
@@ -6,6 +6,7 @@ from abc import ABC, abstractmethod
 from typing import List, Tuple, Dict, Any
 
 from nominatim.config import Configuration
+from nominatim.indexer.place_info import PlaceInfo
 
 # pylint: disable=unnecessary-pass
 
@@ -105,20 +106,13 @@ class AbstractAnalyzer(ABC):
 
 
     @abstractmethod
-    def process_place(self, place: Dict) -> Any:
+    def process_place(self, place: PlaceInfo) -> Any:
         """ Extract tokens for the given place and compute the
             information to be handed to the PL/pgSQL processor for building
             the search index.
 
             Arguments:
-                place: Dictionary with the information about the place. Currently
-                       the following fields may be present:
-
-                       - *name* is a dictionary of names for the place together
-                         with the designation of the name.
-                       - *address* is a dictionary of address terms.
-                       - *country_feature* is set to a country code when the
-                         place describes a country.
+                place: Place information retrived from the database.
 
             Returns:
                 A JSON-serialisable structure that will be handed into
@@ -142,7 +136,7 @@ class AbstractTokenizer(ABC):
             the tokenizer remains stable over updates.
 
             Arguments:
-              config: Read-only object with configuration obtions.
+              config: Read-only object with configuration options.
 
               init_db: When set to False, then initialisation of database
                 tables should be skipped. This option is only required for
@@ -155,11 +149,14 @@ class AbstractTokenizer(ABC):
 
 
     @abstractmethod
-    def init_from_project(self) -> None:
+    def init_from_project(self, config: Configuration) -> None:
         """ Initialise the tokenizer from an existing database setup.
 
             The function should load all previously saved configuration from
             the project directory and/or the property table.
+
+            Arguments:
+              config: Read-only object with configuration options.
         """
         pass
 
@@ -172,7 +169,7 @@ class AbstractTokenizer(ABC):
             during query time.
 
             Arguments:
-              config: Read-only object with configuration obtions.
+              config: Read-only object with configuration options.
         """
         pass
 
@@ -187,13 +184,13 @@ class AbstractTokenizer(ABC):
             data structures or data itself must not be changed by this function.
 
             Arguments:
-              config: Read-only object with configuration obtions.
+              config: Read-only object with configuration options.
         """
         pass
 
 
     @abstractmethod
-    def check_database(self) -> str:
+    def check_database(self, config: Configuration) -> str:
         """ Check that the database is set up correctly and ready for being
             queried.
 
@@ -202,6 +199,9 @@ class AbstractTokenizer(ABC):
               description of the issue as well as hints for the user on
               how to resolve the issue.
 
+            Arguments:
+              config: Read-only object with configuration options.
+
               Return `None`, if no issue was found.
         """
         pass
diff --git a/nominatim/tokenizer/factory.py b/nominatim/tokenizer/factory.py
index 069672d4..dc3e7411 100644
--- a/nominatim/tokenizer/factory.py
+++ b/nominatim/tokenizer/factory.py
@@ -85,6 +85,6 @@ def get_tokenizer_for_db(config):
     tokenizer_module = _import_tokenizer(name)
 
     tokenizer = tokenizer_module.create(config.get_libpq_dsn(), basedir)
-    tokenizer.init_from_project()
+    tokenizer.init_from_project(config)
 
     return tokenizer
diff --git a/nominatim/tokenizer/icu_name_processor.py b/nominatim/tokenizer/icu_name_processor.py
index 93d2b0ff..544f5ebc 100644
--- a/nominatim/tokenizer/icu_name_processor.py
+++ b/nominatim/tokenizer/icu_name_processor.py
@@ -8,67 +8,25 @@ import itertools
 from icu import Transliterator
 import datrie
 
-from nominatim.db.properties import set_property, get_property
-from nominatim.tokenizer import icu_variants as variants
-
-DBCFG_IMPORT_NORM_RULES = "tokenizer_import_normalisation"
-DBCFG_IMPORT_TRANS_RULES = "tokenizer_import_transliteration"
-DBCFG_IMPORT_REPLACEMENTS = "tokenizer_import_replacements"
-DBCFG_SEARCH_STD_RULES = "tokenizer_search_standardization"
-
-
-class ICUNameProcessorRules:
-    """ Data object that saves the rules needed for the name processor.
-
-        The rules can either be initialised through an ICURuleLoader or
-        be loaded from a database when a connection is given.
-    """
-    def __init__(self, loader=None, conn=None):
-        if loader is not None:
-            self.norm_rules = loader.get_normalization_rules()
-            self.trans_rules = loader.get_transliteration_rules()
-            self.replacements = loader.get_replacement_pairs()
-            self.search_rules = loader.get_search_rules()
-        elif conn is not None:
-            self.norm_rules = get_property(conn, DBCFG_IMPORT_NORM_RULES)
-            self.trans_rules = get_property(conn, DBCFG_IMPORT_TRANS_RULES)
-            self.replacements = \
-                variants.unpickle_variant_set(get_property(conn, DBCFG_IMPORT_REPLACEMENTS))
-            self.search_rules = get_property(conn, DBCFG_SEARCH_STD_RULES)
-        else:
-            assert False, "Parameter loader or conn required."
-
-
-    def save_rules(self, conn):
-        """ Save the rules in the property table of the given database.
-            the rules can be loaded again by handing in a connection into
-            the constructor of the class.
-        """
-        set_property(conn, DBCFG_IMPORT_NORM_RULES, self.norm_rules)
-        set_property(conn, DBCFG_IMPORT_TRANS_RULES, self.trans_rules)
-        set_property(conn, DBCFG_IMPORT_REPLACEMENTS,
-                     variants.pickle_variant_set(self.replacements))
-        set_property(conn, DBCFG_SEARCH_STD_RULES, self.search_rules)
-
 
 class ICUNameProcessor:
     """ Collects the different transformation rules for normalisation of names
-        and provides the functions to aply the transformations.
+        and provides the functions to apply the transformations.
     """
 
-    def __init__(self, rules):
+    def __init__(self, norm_rules, trans_rules, replacements):
         self.normalizer = Transliterator.createFromRules("icu_normalization",
-                                                         rules.norm_rules)
+                                                         norm_rules)
         self.to_ascii = Transliterator.createFromRules("icu_to_ascii",
-                                                       rules.trans_rules +
+                                                       trans_rules +
                                                        ";[:Space:]+ > ' '")
         self.search = Transliterator.createFromRules("icu_search",
-                                                     rules.search_rules)
+                                                     norm_rules + trans_rules)
 
         # Intermediate reorder by source. Also compute required character set.
         immediate = defaultdict(list)
         chars = set()
-        for variant in rules.replacements:
+        for variant in replacements:
             if variant.source[-1] == ' ' and variant.replacement[-1] == ' ':
                 replstr = variant.replacement[:-1]
             else:
diff --git a/nominatim/tokenizer/icu_rule_loader.py b/nominatim/tokenizer/icu_rule_loader.py
index 0e6e40b4..330179bb 100644
--- a/nominatim/tokenizer/icu_rule_loader.py
+++ b/nominatim/tokenizer/icu_rule_loader.py
@@ -2,17 +2,26 @@
 Helper class to create ICU rules from a configuration file.
 """
 import io
+import json
 import logging
 import itertools
 import re
 
 from icu import Transliterator
 
+from nominatim.db.properties import set_property, get_property
 from nominatim.errors import UsageError
+from nominatim.tokenizer.icu_name_processor import ICUNameProcessor
+from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
 import nominatim.tokenizer.icu_variants as variants
 
 LOG = logging.getLogger()
 
+DBCFG_IMPORT_NORM_RULES = "tokenizer_import_normalisation"
+DBCFG_IMPORT_TRANS_RULES = "tokenizer_import_transliteration"
+DBCFG_IMPORT_ANALYSIS_RULES = "tokenizer_import_analysis_rules"
+
+
 def _flatten_config_list(content):
     if not content:
         return []
@@ -46,12 +55,52 @@ class ICURuleLoader:
     """ Compiler for ICU rules from a tokenizer configuration file.
     """
 
-    def __init__(self, rules):
+    def __init__(self, config):
+        rules = config.load_sub_configuration('icu_tokenizer.yaml',
+                                              config='TOKENIZER_CONFIG')
+
         self.variants = set()
 
         self.normalization_rules = self._cfg_to_icu_rules(rules, 'normalization')
         self.transliteration_rules = self._cfg_to_icu_rules(rules, 'transliteration')
-        self._parse_variant_list(self._get_section(rules, 'variants'))
+        self.analysis_rules = self._get_section(rules, 'variants')
+        self._parse_variant_list()
+
+        # Load optional sanitizer rule set.
+        self.sanitizer_rules = rules.get('sanitizers', [])
+
+
+    def load_config_from_db(self, conn):
+        """ Get previously saved parts of the configuration from the
+            database.
+        """
+        self.normalization_rules = get_property(conn, DBCFG_IMPORT_NORM_RULES)
+        self.transliteration_rules = get_property(conn, DBCFG_IMPORT_TRANS_RULES)
+        self.analysis_rules = json.loads(get_property(conn, DBCFG_IMPORT_ANALYSIS_RULES))
+        self._parse_variant_list()
+
+
+    def save_config_to_db(self, conn):
+        """ Save the part of the configuration that cannot be changed into
+            the database.
+        """
+        set_property(conn, DBCFG_IMPORT_NORM_RULES, self.normalization_rules)
+        set_property(conn, DBCFG_IMPORT_TRANS_RULES, self.transliteration_rules)
+        set_property(conn, DBCFG_IMPORT_ANALYSIS_RULES, json.dumps(self.analysis_rules))
+
+
+    def make_sanitizer(self):
+        """ Create a place sanitizer from the configured rules.
+        """
+        return PlaceSanitizer(self.sanitizer_rules)
+
+
+    def make_token_analysis(self):
+        """ Create a token analyser from the reviouly loaded rules.
+        """
+        return ICUNameProcessor(self.normalization_rules,
+                                self.transliteration_rules,
+                                self.variants)
 
 
     def get_search_rules(self):
@@ -112,7 +161,9 @@ class ICURuleLoader:
         return ';'.join(_flatten_config_list(content)) + ';'
 
 
-    def _parse_variant_list(self, rules):
+    def _parse_variant_list(self):
+        rules = self.analysis_rules
+
         self.variants.clear()
 
         if not rules:
diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py
index 5768fd35..2ece10f2 100644
--- a/nominatim/tokenizer/icu_tokenizer.py
+++ b/nominatim/tokenizer/icu_tokenizer.py
@@ -13,8 +13,8 @@ from nominatim.db.connection import connect
 from nominatim.db.properties import set_property, get_property
 from nominatim.db.utils import CopyBuffer
 from nominatim.db.sql_preprocessor import SQLPreprocessor
+from nominatim.indexer.place_info import PlaceInfo
 from nominatim.tokenizer.icu_rule_loader import ICURuleLoader
-from nominatim.tokenizer.icu_name_processor import ICUNameProcessor, ICUNameProcessorRules
 from nominatim.tokenizer.base import AbstractAnalyzer, AbstractTokenizer
 
 DBCFG_TERM_NORMALIZATION = "tokenizer_term_normalization"
@@ -36,7 +36,7 @@ class LegacyICUTokenizer(AbstractTokenizer):
     def __init__(self, dsn, data_dir):
         self.dsn = dsn
         self.data_dir = data_dir
-        self.naming_rules = None
+        self.loader = None
         self.term_normalization = None
 
 
@@ -46,9 +46,8 @@ class LegacyICUTokenizer(AbstractTokenizer):
             This copies all necessary data in the project directory to make
             sure the tokenizer remains stable even over updates.
         """
-        loader = ICURuleLoader(config.load_sub_configuration('icu_tokenizer.yaml',
-                                                             config='TOKENIZER_CONFIG'))
-        self.naming_rules = ICUNameProcessorRules(loader=loader)
+        self.loader = ICURuleLoader(config)
+
         self.term_normalization = config.TERM_NORMALIZATION
 
         self._install_php(config.lib_dir.php)
@@ -59,11 +58,13 @@ class LegacyICUTokenizer(AbstractTokenizer):
             self._init_db_tables(config)
 
 
-    def init_from_project(self):
+    def init_from_project(self, config):
         """ Initialise the tokenizer from the project directory.
         """
+        self.loader = ICURuleLoader(config)
+
         with connect(self.dsn) as conn:
-            self.naming_rules = ICUNameProcessorRules(conn=conn)
+            self.loader.load_config_from_db(conn)
             self.term_normalization = get_property(conn, DBCFG_TERM_NORMALIZATION)
 
 
@@ -81,12 +82,12 @@ class LegacyICUTokenizer(AbstractTokenizer):
             sqlp.run_sql_file(conn, 'tokenizer/icu_tokenizer.sql')
 
 
-    def check_database(self):
+    def check_database(self, config):
         """ Check that the tokenizer is set up correctly.
         """
-        self.init_from_project()
+        self.init_from_project(config)
 
-        if self.naming_rules is None:
+        if self.term_normalization is None:
             return "Configuration for tokenizer 'icu' are missing."
 
         return None
@@ -107,7 +108,8 @@ class LegacyICUTokenizer(AbstractTokenizer):
 
             Analyzers are not thread-safe. You need to instantiate one per thread.
         """
-        return LegacyICUNameAnalyzer(self.dsn, ICUNameProcessor(self.naming_rules))
+        return LegacyICUNameAnalyzer(self.dsn, self.loader.make_sanitizer(),
+                                     self.loader.make_token_analysis())
 
 
     def _install_php(self, phpdir):
@@ -118,7 +120,7 @@ class LegacyICUTokenizer(AbstractTokenizer):
             <?php
             @define('CONST_Max_Word_Frequency', 10000000);
             @define('CONST_Term_Normalization_Rules', "{self.term_normalization}");
-            @define('CONST_Transliteration', "{self.naming_rules.search_rules}");
+            @define('CONST_Transliteration', "{self.loader.get_search_rules()}");
             require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""))
 
 
@@ -127,8 +129,7 @@ class LegacyICUTokenizer(AbstractTokenizer):
             database as database properties.
         """
         with connect(self.dsn) as conn:
-            self.naming_rules.save_rules(conn)
-
+            self.loader.save_config_to_db(conn)
             set_property(conn, DBCFG_TERM_NORMALIZATION, self.term_normalization)
 
 
@@ -163,7 +164,7 @@ class LegacyICUTokenizer(AbstractTokenizer):
         """ Count the partial terms from the names in the place table.
         """
         words = Counter()
-        name_proc = ICUNameProcessor(self.naming_rules)
+        name_proc = self.loader.make_token_analysis()
 
         with conn.cursor(name="words") as cur:
             cur.execute(""" SELECT v, count(*) FROM
@@ -188,10 +189,11 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
         normalization.
     """
 
-    def __init__(self, dsn, name_proc):
+    def __init__(self, dsn, sanitizer, token_analysis):
         self.conn = connect(dsn).connection
         self.conn.autocommit = True
-        self.name_processor = name_proc
+        self.sanitizer = sanitizer
+        self.token_analysis = token_analysis
 
         self._cache = _TokenCache()
 
@@ -204,6 +206,19 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
             self.conn = None
 
 
+    def _search_normalized(self, name):
+        """ Return the search token transliteration of the given name.
+        """
+        return self.token_analysis.get_search_normalized(name)
+
+
+    def _normalized(self, name):
+        """ Return the normalized version of the given name with all
+            non-relevant information removed.
+        """
+        return self.token_analysis.get_normalized(name)
+
+
     def get_word_token_info(self, words):
         """ Return token information for the given list of words.
             If a word starts with # it is assumed to be a full name
@@ -219,9 +234,9 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
         partial_tokens = {}
         for word in words:
             if word.startswith('#'):
-                full_tokens[word] = self.name_processor.get_search_normalized(word[1:])
+                full_tokens[word] = self._search_normalized(word[1:])
             else:
-                partial_tokens[word] = self.name_processor.get_search_normalized(word)
+                partial_tokens[word] = self._search_normalized(word)
 
         with self.conn.cursor() as cur:
             cur.execute("""SELECT word_token, word_id
@@ -252,7 +267,7 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
 
             This function takes minor shortcuts on transliteration.
         """
-        return self.name_processor.get_search_normalized(hnr)
+        return self._search_normalized(hnr)
 
     def update_postcodes_from_db(self):
         """ Update postcode tokens in the word table from the location_postcode
@@ -275,7 +290,7 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
                     if postcode is None:
                         to_delete.append(word)
                     else:
-                        copystr.add(self.name_processor.get_search_normalized(postcode),
+                        copystr.add(self._search_normalized(postcode),
                                     'P', postcode)
 
                 if to_delete:
@@ -293,7 +308,7 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
             completely replaced. Otherwise the phrases are added to the
             already existing ones.
         """
-        norm_phrases = set(((self.name_processor.get_normalized(p[0]), p[1], p[2], p[3])
+        norm_phrases = set(((self._normalized(p[0]), p[1], p[2], p[3])
                             for p in phrases))
 
         with self.conn.cursor() as cur:
@@ -323,7 +338,7 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
         added = 0
         with CopyBuffer() as copystr:
             for word, cls, typ, oper in to_add:
-                term = self.name_processor.get_search_normalized(word)
+                term = self._search_normalized(word)
                 if term:
                     copystr.add(term, 'S', word,
                                 json.dumps({'class': cls, 'type': typ,
@@ -357,9 +372,21 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
     def add_country_names(self, country_code, names):
         """ Add names for the given country to the search index.
         """
+        # Make sure any name preprocessing for country names applies.
+        info = PlaceInfo({'name': names, 'country_code': country_code,
+                          'rank_address': 4, 'class': 'boundary',
+                          'type': 'administrative'})
+        self._add_country_full_names(country_code,
+                                     self.sanitizer.process_names(info)[0])
+
+
+    def _add_country_full_names(self, country_code, names):
+        """ Add names for the given country from an already sanitized
+            name list.
+        """
         word_tokens = set()
-        for name in self._compute_full_names(names):
-            norm_name = self.name_processor.get_search_normalized(name)
+        for name in names:
+            norm_name = self._search_normalized(name.name)
             if norm_name:
                 word_tokens.add(norm_name)
 
@@ -385,23 +412,21 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
     def process_place(self, place):
         """ Determine tokenizer information about the given place.
 
-            Returns a JSON-serialisable structure that will be handed into
+            Returns a JSON-serializable structure that will be handed into
             the database via the token_info field.
         """
         token_info = _TokenInfo(self._cache)
 
-        names = place.get('name')
+        names, address = self.sanitizer.process_names(place)
 
         if names:
             fulls, partials = self._compute_name_tokens(names)
 
             token_info.add_names(fulls, partials)
 
-            country_feature = place.get('country_feature')
-            if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
-                self.add_country_names(country_feature.lower(), names)
+            if place.is_country():
+                self._add_country_full_names(place.country_code, names)
 
-        address = place.get('address')
         if address:
             self._process_place_address(token_info, address)
 
@@ -411,18 +436,18 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
     def _process_place_address(self, token_info, address):
         hnrs = []
         addr_terms = []
-        for key, value in address.items():
-            if key == 'postcode':
-                self._add_postcode(value)
-            elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'):
-                hnrs.append(value)
-            elif key == 'street':
-                token_info.add_street(self._compute_partial_tokens(value))
-            elif key == 'place':
-                token_info.add_place(self._compute_partial_tokens(value))
-            elif not key.startswith('_') and \
-                 key not in ('country', 'full'):
-                addr_terms.append((key, self._compute_partial_tokens(value)))
+        for item in address:
+            if item.kind == 'postcode':
+                self._add_postcode(item.name)
+            elif item.kind in ('housenumber', 'streetnumber', 'conscriptionnumber'):
+                hnrs.append(item.name)
+            elif item.kind == 'street':
+                token_info.add_street(self._compute_partial_tokens(item.name))
+            elif item.kind == 'place':
+                token_info.add_place(self._compute_partial_tokens(item.name))
+            elif not item.kind.startswith('_') and \
+                 item.kind not in ('country', 'full'):
+                addr_terms.append((item.kind, self._compute_partial_tokens(item.name)))
 
         if hnrs:
             hnrs = self._split_housenumbers(hnrs)
@@ -435,7 +460,7 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
         """ Normalize the given term, split it into partial words and return
             then token list for them.
         """
-        norm_name = self.name_processor.get_search_normalized(name)
+        norm_name = self._search_normalized(name)
 
         tokens = []
         need_lookup = []
@@ -458,19 +483,19 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
 
         return tokens
 
+
     def _compute_name_tokens(self, names):
         """ Computes the full name and partial name tokens for the given
             dictionary of names.
         """
-        full_names = self._compute_full_names(names)
         full_tokens = set()
         partial_tokens = set()
 
-        for name in full_names:
-            norm_name = self.name_processor.get_normalized(name)
+        for name in names:
+            norm_name = self._normalized(name.name)
             full, part = self._cache.names.get(norm_name, (None, None))
             if full is None:
-                variants = self.name_processor.get_variants_ascii(norm_name)
+                variants = self.token_analysis.get_variants_ascii(norm_name)
                 if not variants:
                     continue
 
@@ -487,23 +512,6 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
         return full_tokens, partial_tokens
 
 
-    @staticmethod
-    def _compute_full_names(names):
-        """ Return the set of all full name word ids to be used with the
-            given dictionary of names.
-        """
-        full_names = set()
-        for name in (n.strip() for ns in names.values() for n in re.split('[;,]', ns)):
-            if name:
-                full_names.add(name)
-
-                brace_idx = name.find('(')
-                if brace_idx >= 0:
-                    full_names.add(name[:brace_idx].strip())
-
-        return full_names
-
-
     def _add_postcode(self, postcode):
         """ Make sure the normalized postcode is present in the word table.
         """
@@ -511,7 +519,7 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
             postcode = self.normalize_postcode(postcode)
 
             if postcode not in self._cache.postcodes:
-                term = self.name_processor.get_search_normalized(postcode)
+                term = self._search_normalized(postcode)
                 if not term:
                     return
 
diff --git a/nominatim/tokenizer/icu_variants.py b/nominatim/tokenizer/icu_variants.py
index 9ebe3684..93272f58 100644
--- a/nominatim/tokenizer/icu_variants.py
+++ b/nominatim/tokenizer/icu_variants.py
@@ -2,7 +2,6 @@
 Data structures for saving variant expansions for ICU tokenizer.
 """
 from collections import namedtuple
-import json
 
 _ICU_VARIANT_PORPERTY_FIELDS = ['lang']
 
@@ -24,34 +23,3 @@ class ICUVariantProperties(namedtuple('_ICUVariantProperties', _ICU_VARIANT_PORP
 
 
 ICUVariant = namedtuple('ICUVariant', ['source', 'replacement', 'properties'])
-
-
-def pickle_variant_set(variants):
-    """ Serializes an iterable of variant rules to a string.
-    """
-    # Create a list of property sets. So they don't need to be duplicated
-    properties = {}
-    pid = 1
-    for variant in variants:
-        if variant.properties not in properties:
-            properties[variant.properties] = pid
-            pid += 1
-
-    # Convert the variants into a simple list.
-    variants = [(v.source, v.replacement, properties[v.properties]) for v in variants]
-
-    # Convert everythin to json.
-    return json.dumps({'properties': {v: k._asdict() for k, v in properties.items()},
-                       'variants': variants})
-
-
-def unpickle_variant_set(variant_string):
-    """ Deserializes a variant string that was previously created with
-        pickle_variant_set() into a set of ICUVariants.
-    """
-    data = json.loads(variant_string)
-
-    properties = {int(k): ICUVariantProperties.from_rules(v)
-                  for k, v in data['properties'].items()}
-
-    return set((ICUVariant(src, repl, properties[pid]) for src, repl, pid in data['variants']))
diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py
index 8957426b..c935f20d 100644
--- a/nominatim/tokenizer/legacy_tokenizer.py
+++ b/nominatim/tokenizer/legacy_tokenizer.py
@@ -113,7 +113,7 @@ class LegacyTokenizer(AbstractTokenizer):
             self._init_db_tables(config)
 
 
-    def init_from_project(self):
+    def init_from_project(self, _):
         """ Initialise the tokenizer from the project directory.
         """
         with connect(self.dsn) as conn:
@@ -142,7 +142,7 @@ class LegacyTokenizer(AbstractTokenizer):
                               modulepath=modulepath)
 
 
-    def check_database(self):
+    def check_database(self, _):
         """ Check that the tokenizer is set up correctly.
         """
         hint = """\
@@ -405,16 +405,15 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
         """
         token_info = _TokenInfo(self._cache)
 
-        names = place.get('name')
+        names = place.name
 
         if names:
             token_info.add_names(self.conn, names)
 
-            country_feature = place.get('country_feature')
-            if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
-                self.add_country_names(country_feature.lower(), names)
+            if place.is_country():
+                self.add_country_names(place.country_code, names)
 
-        address = place.get('address')
+        address = place.address
         if address:
             self._process_place_address(token_info, address)
 
diff --git a/nominatim/tokenizer/place_sanitizer.py b/nominatim/tokenizer/place_sanitizer.py
new file mode 100644
index 00000000..5961dcf0
--- /dev/null
+++ b/nominatim/tokenizer/place_sanitizer.py
@@ -0,0 +1,127 @@
+"""
+Handler for cleaning name and address tags in place information before it
+is handed to the token analysis.
+"""
+import importlib
+
+from nominatim.errors import UsageError
+
+class PlaceName:
+    """ A searchable name for a place together with properties.
+        Every name object saves the name proper and two basic properties:
+        * 'kind' describes the name of the OSM key used without any suffixes
+          (i.e. the part after the colon removed)
+        * 'suffix' contains the suffix of the OSM tag, if any. The suffix
+          is the part of the key after the first colon.
+        In addition to that, the name may have arbitrary additional attributes.
+        Which attributes are used, depends on the token analyser.
+    """
+
+    def __init__(self, name, kind, suffix):
+        self.name = name
+        self.kind = kind
+        self.suffix = suffix
+        self.attr = {}
+
+
+    def __repr__(self):
+        return f"PlaceName(name='{self.name}',kind='{self.kind}',suffix='{self.suffix}')"
+
+
+    def clone(self, name=None, kind=None, suffix=None, attr=None):
+        """ Create a deep copy of the place name, optionally with the
+            given parameters replaced. In the attribute list only the given
+            keys are updated. The list is not replaced completely.
+            In particular, the function cannot to be used to remove an
+            attribute from a place name.
+        """
+        newobj = PlaceName(name or self.name,
+                           kind or self.kind,
+                           suffix or self.suffix)
+
+        newobj.attr.update(self.attr)
+        if attr:
+            newobj.attr.update(attr)
+
+        return newobj
+
+
+    def set_attr(self, key, value):
+        """ Add the given property to the name. If the property was already
+            set, then the value is overwritten.
+        """
+        self.attr[key] = value
+
+
+    def get_attr(self, key, default=None):
+        """ Return the given property or the value of 'default' if it
+            is not set.
+        """
+        return self.attr.get(key, default)
+
+
+    def has_attr(self, key):
+        """ Check if the given attribute is set.
+        """
+        return key in self.attr
+
+
+class _ProcessInfo:
+    """ Container class for information handed into to handler functions.
+        The 'names' and 'address' members are mutable. A handler must change
+        them by either modifying the lists place or replacing the old content
+        with a new list.
+    """
+
+    def __init__(self, place):
+        self.place = place
+        self.names = self._convert_name_dict(place.name)
+        self.address = self._convert_name_dict(place.address)
+
+
+    @staticmethod
+    def _convert_name_dict(names):
+        """ Convert a dictionary of names into a list of PlaceNames.
+            The dictionary key is split into the primary part of the key
+            and the suffix (the part after an optional colon).
+        """
+        out = []
+
+        if names:
+            for key, value in names.items():
+                parts = key.split(':', 1)
+                out.append(PlaceName(value.strip(),
+                                     parts[0].strip(),
+                                     parts[1].strip() if len(parts) > 1 else None))
+
+        return out
+
+
+class PlaceSanitizer:
+    """ Controller class which applies sanitizer functions on the place
+        names and address before they are used by the token analysers.
+    """
+
+    def __init__(self, rules):
+        self.handlers = []
+
+        if rules:
+            for func in rules:
+                if 'step' not in func:
+                    raise UsageError("Sanitizer rule is missing the 'step' attribute.")
+                module_name = 'nominatim.tokenizer.sanitizers.' + func['step'].replace('-', '_')
+                handler_module = importlib.import_module(module_name)
+                self.handlers.append(handler_module.create(func))
+
+
+    def process_names(self, place):
+        """ Extract a sanitized list of names and address parts from the
+            given place. The function returns a tuple
+            (list of names, list of address names)
+        """
+        obj = _ProcessInfo(place)
+
+        for func in self.handlers:
+            func(obj)
+
+        return obj.names, obj.address
diff --git a/nominatim/tokenizer/sanitizers/__init__.py b/nominatim/tokenizer/sanitizers/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/nominatim/tokenizer/sanitizers/split_name_list.py b/nominatim/tokenizer/sanitizers/split_name_list.py
new file mode 100644
index 00000000..f1514203
--- /dev/null
+++ b/nominatim/tokenizer/sanitizers/split_name_list.py
@@ -0,0 +1,35 @@
+"""
+Name processor that splits name values with multiple values into their components.
+"""
+import re
+
+from nominatim.errors import UsageError
+
+def create(func):
+    """ Create a name processing function that splits name values with
+        multiple values into their components. The optional parameter
+        'delimiters' can be used to define the characters that should be used
+        for splitting. The default is ',;'.
+    """
+    delimiter_set = set(func.get('delimiters', ',;'))
+    if not delimiter_set:
+        raise UsageError("Set of delimiters in split-name-list sanitizer is empty.")
+
+    regexp = re.compile('\\s*[{}]\\s*'.format(''.join('\\' + d for d in delimiter_set)))
+
+    def _process(obj):
+        if not obj.names:
+            return
+
+        new_names = []
+        for name in obj.names:
+            split_names = regexp.split(name.name)
+            print(split_names)
+            if len(split_names) == 1:
+                new_names.append(name)
+            else:
+                new_names.extend(name.clone(name=n) for n in split_names if n)
+
+        obj.names = new_names
+
+    return _process
diff --git a/nominatim/tokenizer/sanitizers/strip_brace_terms.py b/nominatim/tokenizer/sanitizers/strip_brace_terms.py
new file mode 100644
index 00000000..ec91bac9
--- /dev/null
+++ b/nominatim/tokenizer/sanitizers/strip_brace_terms.py
@@ -0,0 +1,22 @@
+"""
+Sanitizer handling names with addendums in braces.
+"""
+
+def create(_):
+    """ Create a name processing function that creates additional name variants
+        when a name has an addendum in brackets (e.g. "Halle (Saale)"). The
+        additional variant only contains the main name without the bracket part.
+    """
+    def _process(obj):
+        """ Add variants for names that have a bracket extension.
+        """
+        if obj.names:
+            new_names = []
+            for name in (n for n in obj.names if '(' in n.name):
+                new_name = name.name.split('(')[0].strip()
+                if new_name:
+                    new_names.append(name.clone(name=new_name))
+
+            obj.names.extend(new_names)
+
+    return _process
diff --git a/nominatim/tools/check_database.py b/nominatim/tools/check_database.py
index d116554f..30b27d1f 100644
--- a/nominatim/tools/check_database.py
+++ b/nominatim/tools/check_database.py
@@ -166,7 +166,7 @@ def check_tokenizer(_, config):
         return CheckState.FAIL, dict(msg="""\
             Cannot load tokenizer. Did the import finish sucessfully?""")
 
-    result = tokenizer.check_database()
+    result = tokenizer.check_database(config)
 
     if result is None:
         return CheckState.OK
diff --git a/nominatim/tools/tiger_data.py b/nominatim/tools/tiger_data.py
index ff498f77..19a12682 100644
--- a/nominatim/tools/tiger_data.py
+++ b/nominatim/tools/tiger_data.py
@@ -7,12 +7,11 @@ import logging
 import os
 import tarfile
 
-import psycopg2.extras
-
 from nominatim.db.connection import connect
 from nominatim.db.async_connection import WorkerPool
 from nominatim.db.sql_preprocessor import SQLPreprocessor
 from nominatim.errors import UsageError
+from nominatim.indexer.place_info import PlaceInfo
 
 LOG = logging.getLogger()
 
@@ -58,7 +57,7 @@ def handle_threaded_sql_statements(pool, fd, analyzer):
             address = dict(street=row['street'], postcode=row['postcode'])
             args = ('SRID=4326;' + row['geometry'],
                     int(row['from']), int(row['to']), row['interpolation'],
-                    psycopg2.extras.Json(analyzer.process_place(dict(address=address))),
+                    PlaceInfo({'address': address}).analyze(analyzer),
                     analyzer.normalize_postcode(row['postcode']))
         except ValueError:
             continue
diff --git a/settings/icu_tokenizer.yaml b/settings/icu_tokenizer.yaml
index c0c8c043..08b7a7ff 100644
--- a/settings/icu_tokenizer.yaml
+++ b/settings/icu_tokenizer.yaml
@@ -24,6 +24,9 @@ transliteration:
     - "[^[:Ascii:]] >"
     - ":: lower ()"
     - ":: NFC ()"
+sanitizers:
+    - step: split-name-list
+    - step: strip-brace-terms
 variants:
     - !include icu-rules/variants-bg.yaml
     - !include icu-rules/variants-ca.yaml
diff --git a/test/python/dummy_tokenizer.py b/test/python/dummy_tokenizer.py
index 69202bc3..6029eb7c 100644
--- a/test/python/dummy_tokenizer.py
+++ b/test/python/dummy_tokenizer.py
@@ -1,6 +1,8 @@
 """
 Tokenizer for testing.
 """
+from nominatim.indexer.place_info import PlaceInfo
+from nominatim.config import Configuration
 
 def create(dsn, data_dir):
     """ Create a new instance of the tokenizer provided by this module.
@@ -21,7 +23,8 @@ class DummyTokenizer:
         self.init_state = "new"
 
 
-    def init_from_project(self):
+    def init_from_project(self, config):
+        assert isinstance(config, Configuration)
         assert self.init_state is None
         self.init_state = "loaded"
 
@@ -68,4 +71,5 @@ class DummyNameAnalyzer:
 
     @staticmethod
     def process_place(place):
+        assert isinstance(place, PlaceInfo)
         return {}
diff --git a/test/python/test_db_connection.py b/test/python/test_db_connection.py
index 41978e59..00c29a43 100644
--- a/test/python/test_db_connection.py
+++ b/test/python/test_db_connection.py
@@ -100,6 +100,6 @@ def test_get_pg_env_overwrite_variable(monkeypatch):
 
 
 def test_get_pg_env_ignore_unknown():
-    env = get_pg_env('tty=stuff', base_env={})
+    env = get_pg_env('client_encoding=stuff', base_env={})
 
     assert env == {}
diff --git a/test/python/test_indexing.py b/test/python/test_indexing.py
index 60ad0bc4..4c9d940d 100644
--- a/test/python/test_indexing.py
+++ b/test/python/test_indexing.py
@@ -29,6 +29,7 @@ class IndexerTestDB:
                                                 indexed_date TIMESTAMP,
                                                 partition SMALLINT,
                                                 admin_level SMALLINT,
+                                                country_code TEXT,
                                                 address HSTORE,
                                                 token_info JSONB,
                                                 geometry_sector INTEGER)""")
@@ -54,15 +55,26 @@ class IndexerTestDB:
                              END IF;
                              RETURN NEW;
                            END; $$ LANGUAGE plpgsql;""")
-            cur.execute("""CREATE OR REPLACE FUNCTION placex_prepare_update(p placex,
-                                                      OUT name HSTORE,
-                                                      OUT address HSTORE,
-                                                      OUT country_feature VARCHAR,
-                                                      OUT linked_place_id BIGINT)
+            cur.execute("DROP TYPE IF EXISTS prepare_update_info CASCADE")
+            cur.execute("""CREATE TYPE prepare_update_info AS (
+                             name HSTORE,
+                             address HSTORE,
+                             rank_address SMALLINT,
+                             country_code TEXT,
+                             class TEXT,
+                             type TEXT,
+                             linked_place_id BIGINT
+                           )""")
+            cur.execute("""CREATE OR REPLACE FUNCTION placex_indexing_prepare(p placex,
+                                                     OUT result prepare_update_info)
                            AS $$
                            BEGIN
-                            address := p.address;
-                            name := p.name;
+                             result.address := p.address;
+                             result.name := p.name;
+                             result.class := p.class;
+                             result.type := p.type;
+                             result.country_code := p.country_code;
+                             result.rank_address := p.rank_address;
                            END;
                            $$ LANGUAGE plpgsql STABLE;
                         """)
diff --git a/test/python/test_tokenizer_icu.py b/test/python/test_tokenizer_icu.py
index ed079269..9a6f5a94 100644
--- a/test/python/test_tokenizer_icu.py
+++ b/test/python/test_tokenizer_icu.py
@@ -7,10 +7,10 @@ import yaml
 import pytest
 
 from nominatim.tokenizer import icu_tokenizer
-from nominatim.tokenizer.icu_name_processor import ICUNameProcessorRules
 from nominatim.tokenizer.icu_rule_loader import ICURuleLoader
 from nominatim.db import properties
 from nominatim.db.sql_preprocessor import SQLPreprocessor
+from nominatim.indexer.place_info import PlaceInfo
 
 from mock_icu_word_table import MockIcuWordTable
 
@@ -67,11 +67,14 @@ def analyzer(tokenizer_factory, test_config, monkeypatch,
     monkeypatch.undo()
 
     def _mk_analyser(norm=("[[:Punctuation:][:Space:]]+ > ' '",), trans=(':: upper()',),
-                     variants=('~gasse -> gasse', 'street => st', )):
+                     variants=('~gasse -> gasse', 'street => st', ),
+                     sanitizers=[]):
         cfgstr = {'normalization' : list(norm),
-                   'transliteration' : list(trans),
-                   'variants' : [ {'words': list(variants)}]}
-        tok.naming_rules = ICUNameProcessorRules(loader=ICURuleLoader(cfgstr))
+                  'sanitizers' : sanitizers,
+                  'transliteration' : list(trans),
+                  'variants' : [ {'words': list(variants)}]}
+        (test_config.project_dir / 'icu_tokenizer.yaml').write_text(yaml.dump(cfgstr))
+        tok.loader = ICURuleLoader(test_config)
 
         return tok.name_analyzer()
 
@@ -177,9 +180,9 @@ def test_init_from_project(monkeypatch, test_config, tokenizer_factory):
     monkeypatch.undo()
 
     tok = tokenizer_factory()
-    tok.init_from_project()
+    tok.init_from_project(test_config)
 
-    assert tok.naming_rules is not None
+    assert tok.loader is not None
     assert tok.term_normalization == ':: lower();'
 
 
@@ -308,44 +311,54 @@ class TestPlaceNames:
 
     @pytest.fixture(autouse=True)
     def setup(self, analyzer, sql_functions):
-        with analyzer() as anl:
+        sanitizers = [{'step': 'split-name-list'},
+                      {'step': 'strip-brace-terms'}]
+        with analyzer(sanitizers=sanitizers) as anl:
             self.analyzer = anl
             yield anl
 
 
     def expect_name_terms(self, info, *expected_terms):
         tokens = self.analyzer.get_word_token_info(expected_terms)
-        print (tokens)
         for token in tokens:
             assert token[2] is not None, "No token for {0}".format(token)
 
         assert eval(info['names']) == set((t[2] for t in tokens))
 
 
+    def process_named_place(self, names):
+        return self.analyzer.process_place(PlaceInfo({'name': names}))
+
+
     def test_simple_names(self):
-        info = self.analyzer.process_place({'name': {'name': 'Soft bAr', 'ref': '34'}})
+        info = self.process_named_place({'name': 'Soft bAr', 'ref': '34'})
 
         self.expect_name_terms(info, '#Soft bAr', '#34', 'Soft', 'bAr', '34')
 
 
     @pytest.mark.parametrize('sep', [',' , ';'])
     def test_names_with_separator(self, sep):
-        info = self.analyzer.process_place({'name': {'name': sep.join(('New York', 'Big Apple'))}})
+        info = self.process_named_place({'name': sep.join(('New York', 'Big Apple'))})
 
         self.expect_name_terms(info, '#New York', '#Big Apple',
                                      'new', 'york', 'big', 'apple')
 
 
     def test_full_names_with_bracket(self):
-        info = self.analyzer.process_place({'name': {'name': 'Houseboat (left)'}})
+        info = self.process_named_place({'name': 'Houseboat (left)'})
 
         self.expect_name_terms(info, '#Houseboat (left)', '#Houseboat',
                                      'houseboat', 'left')
 
 
     def test_country_name(self, word_table):
-        info = self.analyzer.process_place({'name': {'name': 'Norge'},
-                                           'country_feature': 'no'})
+        place = PlaceInfo({'name' : {'name': 'Norge'},
+                           'country_code': 'no',
+                           'rank_address': 4,
+                           'class': 'boundary',
+                           'type': 'administrative'})
+
+        info = self.analyzer.process_place(place)
 
         self.expect_name_terms(info, '#norge', 'norge')
         assert word_table.get_country() == {('no', 'NORGE')}
@@ -361,7 +374,7 @@ class TestPlaceAddress:
 
 
     def process_address(self, **kwargs):
-        return self.analyzer.process_place({'address': kwargs})
+        return self.analyzer.process_place(PlaceInfo({'address': kwargs}))
 
 
     def name_token_set(self, *expected_terms):
diff --git a/test/python/test_tokenizer_icu_name_processor.py b/test/python/test_tokenizer_icu_name_processor.py
index ae05988a..d0ed21ec 100644
--- a/test/python/test_tokenizer_icu_name_processor.py
+++ b/test/python/test_tokenizer_icu_name_processor.py
@@ -4,15 +4,17 @@ Tests for import name normalisation and variant generation.
 from textwrap import dedent
 
 import pytest
-import yaml
 
 from nominatim.tokenizer.icu_rule_loader import ICURuleLoader
-from nominatim.tokenizer.icu_name_processor import ICUNameProcessor, ICUNameProcessorRules
 
 from nominatim.errors import UsageError
 
 @pytest.fixture
-def cfgfile():
+def cfgfile(def_config, tmp_path):
+    project_dir = tmp_path / 'project_dir'
+    project_dir.mkdir()
+    def_config.project_dir = project_dir
+
     def _create_config(*variants, **kwargs):
         content = dedent("""\
         normalization:
@@ -30,7 +32,9 @@ def cfgfile():
         content += '\n'.join(("      - " + s for s in variants)) + '\n'
         for k, v in kwargs:
             content += "    {}: {}\n".format(k, v)
-        return yaml.safe_load(content)
+        (project_dir / 'icu_tokenizer.yaml').write_text(content)
+
+        return def_config
 
     return _create_config
 
@@ -40,10 +44,9 @@ def get_normalized_variants(proc, name):
 
 
 def test_variants_empty(cfgfile):
-    fpath = cfgfile('saint -> 🜵', 'street -> st')
+    config = cfgfile('saint -> 🜵', 'street -> st')
 
-    rules = ICUNameProcessorRules(loader=ICURuleLoader(fpath))
-    proc = ICUNameProcessor(rules)
+    proc = ICURuleLoader(config).make_token_analysis()
 
     assert get_normalized_variants(proc, '🜵') == []
     assert get_normalized_variants(proc, '🜳') == []
@@ -83,8 +86,8 @@ VARIANT_TESTS = [
 
 @pytest.mark.parametrize("rules,name,variants", VARIANT_TESTS)
 def test_variants(cfgfile, rules, name, variants):
-    fpath = cfgfile(*rules)
-    proc = ICUNameProcessor(ICUNameProcessorRules(loader=ICURuleLoader(fpath)))
+    config = cfgfile(*rules)
+    proc = ICURuleLoader(config).make_token_analysis()
 
     result = get_normalized_variants(proc, name)
 
@@ -93,10 +96,8 @@ def test_variants(cfgfile, rules, name, variants):
 
 
 def test_search_normalized(cfgfile):
-    fpath = cfgfile('~street => s,st', 'master => mstr')
-
-    rules = ICUNameProcessorRules(loader=ICURuleLoader(fpath))
-    proc = ICUNameProcessor(rules)
+    config = cfgfile('~street => s,st', 'master => mstr')
+    proc = ICURuleLoader(config).make_token_analysis()
 
     assert proc.get_search_normalized('Master Street') == 'master street'
     assert proc.get_search_normalized('Earnes St') == 'earnes st'
diff --git a/test/python/test_tokenizer_icu_rule_loader.py b/test/python/test_tokenizer_icu_rule_loader.py
index c3480de8..6ec53edc 100644
--- a/test/python/test_tokenizer_icu_rule_loader.py
+++ b/test/python/test_tokenizer_icu_rule_loader.py
@@ -12,7 +12,16 @@ from nominatim.errors import UsageError
 from icu import Transliterator
 
 @pytest.fixture
-def cfgrules():
+def test_config(def_config, tmp_path):
+    project_dir = tmp_path / 'project_dir'
+    project_dir.mkdir()
+    def_config.project_dir = project_dir
+
+    return def_config
+
+
+@pytest.fixture
+def cfgrules(test_config):
     def _create_config(*variants, **kwargs):
         content = dedent("""\
         normalization:
@@ -29,19 +38,21 @@ def cfgrules():
         content += '\n'.join(("      - " + s for s in variants)) + '\n'
         for k, v in kwargs:
             content += "    {}: {}\n".format(k, v)
-        return yaml.safe_load(content)
+        (test_config.project_dir / 'icu_tokenizer.yaml').write_text(content)
+
+        return test_config
 
     return _create_config
 
 
-def test_empty_rule_set():
-    rule_cfg = yaml.safe_load(dedent("""\
+def test_empty_rule_set(test_config):
+    (test_config.project_dir / 'icu_tokenizer.yaml').write_text(dedent("""\
         normalization:
         transliteration:
         variants:
         """))
 
-    rules = ICURuleLoader(rule_cfg)
+    rules = ICURuleLoader(test_config)
     assert rules.get_search_rules() == ''
     assert rules.get_normalization_rules() == ''
     assert rules.get_transliteration_rules() == ''
@@ -50,11 +61,12 @@ def test_empty_rule_set():
 CONFIG_SECTIONS = ('normalization', 'transliteration', 'variants')
 
 @pytest.mark.parametrize("section", CONFIG_SECTIONS)
-def test_missing_section(section):
+def test_missing_section(section, test_config):
     rule_cfg = { s: {} for s in CONFIG_SECTIONS if s != section}
+    (test_config.project_dir / 'icu_tokenizer.yaml').write_text(yaml.dump(rule_cfg))
 
     with pytest.raises(UsageError):
-        ICURuleLoader(rule_cfg)
+        ICURuleLoader(test_config)
 
 
 def test_get_search_rules(cfgrules):
@@ -88,9 +100,8 @@ def test_get_transliteration_rules(cfgrules):
     assert trans.transliterate(" проспект-Prospekt ") == " prospekt Prospekt "
 
 
-def test_transliteration_rules_from_file(def_config, tmp_path):
-    def_config.project_dir = tmp_path
-    cfgpath = tmp_path / ('test_config.yaml')
+def test_transliteration_rules_from_file(test_config):
+    cfgpath = test_config.project_dir / ('icu_tokenizer.yaml')
     cfgpath.write_text(dedent("""\
         normalization:
         transliteration:
@@ -98,10 +109,10 @@ def test_transliteration_rules_from_file(def_config, tmp_path):
             - !include transliteration.yaml
         variants:
         """))
-    transpath = tmp_path / ('transliteration.yaml')
+    transpath = test_config.project_dir / ('transliteration.yaml')
     transpath.write_text('- "x > y"')
 
-    loader = ICURuleLoader(def_config.load_sub_configuration('test_config.yaml'))
+    loader = ICURuleLoader(test_config)
     rules = loader.get_transliteration_rules()
     trans = Transliterator.createFromRules("test", rules)
 
diff --git a/test/python/test_tokenizer_legacy.py b/test/python/test_tokenizer_legacy.py
index 4dd3a141..53d45c1c 100644
--- a/test/python/test_tokenizer_legacy.py
+++ b/test/python/test_tokenizer_legacy.py
@@ -5,6 +5,7 @@ import shutil
 
 import pytest
 
+from nominatim.indexer.place_info import PlaceInfo
 from nominatim.tokenizer import legacy_tokenizer
 from nominatim.db import properties
 from nominatim.errors import UsageError
@@ -131,10 +132,10 @@ def test_init_module_custom(tokenizer_factory, test_config,
     assert not (test_config.project_dir / 'module').exists()
 
 
-def test_init_from_project(tokenizer_setup, tokenizer_factory):
+def test_init_from_project(tokenizer_setup, tokenizer_factory, test_config):
     tok = tokenizer_factory()
 
-    tok.init_from_project()
+    tok.init_from_project(test_config)
 
     assert tok.normalization is not None
 
@@ -284,21 +285,21 @@ def test_add_more_country_names(analyzer, word_table, make_standard_name):
 
 
 def test_process_place_names(analyzer, make_keywords):
-    info = analyzer.process_place({'name' : {'name' : 'Soft bAr', 'ref': '34'}})
+    info = analyzer.process_place(PlaceInfo({'name' : {'name' : 'Soft bAr', 'ref': '34'}}))
 
     assert info['names'] == '{1,2,3}'
 
 
 @pytest.mark.parametrize('pcode', ['12345', 'AB 123', '34-345'])
 def test_process_place_postcode(analyzer, create_postcode_id, word_table, pcode):
-    analyzer.process_place({'address': {'postcode' : pcode}})
+    analyzer.process_place(PlaceInfo({'address': {'postcode' : pcode}}))
 
     assert word_table.get_postcodes() == {pcode, }
 
 
 @pytest.mark.parametrize('pcode', ['12:23', 'ab;cd;f', '123;836'])
 def test_process_place_bad_postcode(analyzer, create_postcode_id, word_table, pcode):
-    analyzer.process_place({'address': {'postcode' : pcode}})
+    analyzer.process_place(PlaceInfo({'address': {'postcode' : pcode}}))
 
     assert not word_table.get_postcodes()
 
@@ -319,7 +320,7 @@ class TestHousenumberName:
     @staticmethod
     @pytest.mark.parametrize('hnr', ['123a', '1', '101'])
     def test_process_place_housenumbers_simple(analyzer, hnr):
-        info = analyzer.process_place({'address': {'housenumber' : hnr}})
+        info = analyzer.process_place(PlaceInfo({'address': {'housenumber' : hnr}}))
 
         assert info['hnr'] == hnr
         assert info['hnr_tokens'].startswith("{")
@@ -327,15 +328,15 @@ class TestHousenumberName:
 
     @staticmethod
     def test_process_place_housenumbers_lists(analyzer):
-        info = analyzer.process_place({'address': {'conscriptionnumber' : '1; 2;3'}})
+        info = analyzer.process_place(PlaceInfo({'address': {'conscriptionnumber' : '1; 2;3'}}))
 
         assert set(info['hnr'].split(';')) == set(('1', '2', '3'))
 
 
     @staticmethod
     def test_process_place_housenumbers_duplicates(analyzer):
-        info = analyzer.process_place({'address': {'housenumber' : '134',
+        info = analyzer.process_place(PlaceInfo({'address': {'housenumber' : '134',
                                                    'conscriptionnumber' : '134',
-                                                   'streetnumber' : '99a'}})
+                                                   'streetnumber' : '99a'}}))
 
         assert set(info['hnr'].split(';')) == set(('134', '99a'))
diff --git a/test/python/test_tools_check_database.py b/test/python/test_tools_check_database.py
index aed5cb7e..edba3236 100644
--- a/test/python/test_tools_check_database.py
+++ b/test/python/test_tools_check_database.py
@@ -53,7 +53,7 @@ def test_check_tokenizer(temp_db_conn, def_config, monkeypatch,
                          check_result, state):
     class _TestTokenizer:
         @staticmethod
-        def check_database():
+        def check_database(_):
             return check_result
 
     monkeypatch.setattr(chkdb.tokenizer_factory, 'get_tokenizer_for_db',
diff --git a/test/python/tokenizer/sanitizers/test_split_name_list.py b/test/python/tokenizer/sanitizers/test_split_name_list.py
new file mode 100644
index 00000000..ee745469
--- /dev/null
+++ b/test/python/tokenizer/sanitizers/test_split_name_list.py
@@ -0,0 +1,65 @@
+"""
+Tests for the sanitizer that splitts multivalue lists.
+"""
+import pytest
+
+from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
+from nominatim.indexer.place_info import PlaceInfo
+
+from nominatim.errors import UsageError
+
+def run_sanitizer_on(**kwargs):
+    place = PlaceInfo({'name': kwargs})
+    name, _ = PlaceSanitizer([{'step': 'split-name-list'}]).process_names(place)
+
+    return sorted([(p.name, p.kind, p.suffix) for p in name])
+
+
+def sanitize_with_delimiter(delimiter, name):
+    place = PlaceInfo({'name': {'name': name}})
+    san = PlaceSanitizer([{'step': 'split-name-list', 'delimiters': delimiter}])
+    name, _ = san.process_names(place)
+
+    return sorted([p.name for p in name])
+
+
+def test_simple():
+    assert run_sanitizer_on(name='ABC') == [('ABC', 'name', None)]
+    assert run_sanitizer_on(name='') == [('', 'name', None)]
+
+
+def test_splits():
+    assert run_sanitizer_on(name='A;B;C') == [('A', 'name', None),
+                                              ('B', 'name', None),
+                                              ('C', 'name', None)]
+    assert run_sanitizer_on(short_name=' House, boat ') == [('House', 'short_name', None),
+                                                            ('boat', 'short_name', None)]
+
+
+def test_empty_fields():
+    assert run_sanitizer_on(name='A;;B') == [('A', 'name', None),
+                                             ('B', 'name', None)]
+    assert run_sanitizer_on(name='A; ,B') == [('A', 'name', None),
+                                              ('B', 'name', None)]
+    assert run_sanitizer_on(name=' ;B') == [('B', 'name', None)]
+    assert run_sanitizer_on(name='B,') == [('B', 'name', None)]
+
+
+def test_custom_delimiters():
+    assert sanitize_with_delimiter(':', '12:45,3') == ['12', '45,3']
+    assert sanitize_with_delimiter('\\', 'a;\\b!#@ \\') == ['a;', 'b!#@']
+    assert sanitize_with_delimiter('[]', 'foo[to]be') == ['be', 'foo', 'to']
+    assert sanitize_with_delimiter(' ', 'morning  sun') == ['morning', 'sun']
+
+
+def test_empty_delimiter_set():
+    with pytest.raises(UsageError):
+        sanitize_with_delimiter('', 'abc')
+
+
+def test_no_name_list():
+    place = PlaceInfo({'address': {'housenumber': '3'}})
+    name, address = PlaceSanitizer([{'step': 'split-name-list'}]).process_names(place)
+
+    assert not name
+    assert len(address) == 1
diff --git a/test/python/tokenizer/sanitizers/test_strip_brace_terms.py b/test/python/tokenizer/sanitizers/test_strip_brace_terms.py
new file mode 100644
index 00000000..50af2449
--- /dev/null
+++ b/test/python/tokenizer/sanitizers/test_strip_brace_terms.py
@@ -0,0 +1,44 @@
+"""
+Tests for the sanitizer that handles braced suffixes.
+"""
+import pytest
+
+from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
+from nominatim.indexer.place_info import PlaceInfo
+
+def run_sanitizer_on(**kwargs):
+    place = PlaceInfo({'name': kwargs})
+    name, _ = PlaceSanitizer([{'step': 'strip-brace-terms'}]).process_names(place)
+
+    return sorted([(p.name, p.kind, p.suffix) for p in name])
+
+
+def test_no_braces():
+    assert run_sanitizer_on(name='foo', ref='23') == [('23', 'ref', None),
+                                                      ('foo', 'name', None)]
+
+
+def test_simple_braces():
+    assert run_sanitizer_on(name='Halle (Saale)', ref='3')\
+      == [('3', 'ref', None), ('Halle', 'name', None), ('Halle (Saale)', 'name', None)]
+    assert run_sanitizer_on(name='ack ( bar')\
+      == [('ack', 'name', None), ('ack ( bar', 'name', None)]
+
+
+def test_only_braces():
+    assert run_sanitizer_on(name='(maybe)') == [('(maybe)', 'name', None)]
+
+
+def test_double_braces():
+    assert run_sanitizer_on(name='a((b))') == [('a', 'name', None),
+                                               ('a((b))', 'name', None)]
+    assert run_sanitizer_on(name='a (b) (c)') == [('a', 'name', None),
+                                                  ('a (b) (c)', 'name', None)]
+
+
+def test_no_names():
+    place = PlaceInfo({'address': {'housenumber': '3'}})
+    name, address = PlaceSanitizer([{'step': 'strip-brace-terms'}]).process_names(place)
+
+    assert not name
+    assert len(address) == 1
diff --git a/test/python/tokenizer/test_place_sanitizer.py b/test/python/tokenizer/test_place_sanitizer.py
new file mode 100644
index 00000000..389b068c
--- /dev/null
+++ b/test/python/tokenizer/test_place_sanitizer.py
@@ -0,0 +1,71 @@
+"""
+Tests for execution of the sanitztion step.
+"""
+import pytest
+
+from nominatim.errors import UsageError
+import nominatim.tokenizer.place_sanitizer as sanitizer
+from nominatim.indexer.place_info import PlaceInfo
+
+
+def test_placeinfo_clone_new_name():
+    place = sanitizer.PlaceName('foo', 'ki', 'su')
+
+    newplace = place.clone(name='bar')
+
+    assert place.name == 'foo'
+    assert newplace.name == 'bar'
+    assert newplace.kind == 'ki'
+    assert newplace.suffix == 'su'
+
+
+def test_placeinfo_clone_merge_attr():
+    place = sanitizer.PlaceName('foo', 'ki', 'su')
+    place.set_attr('a1', 'v1')
+    place.set_attr('a2', 'v2')
+
+    newplace = place.clone(attr={'a2': 'new', 'b2': 'foo'})
+
+    assert place.get_attr('a2') == 'v2'
+    assert place.get_attr('b2') is None
+    assert newplace.get_attr('a1') == 'v1'
+    assert newplace.get_attr('a2') == 'new'
+    assert newplace.get_attr('b2') == 'foo'
+
+
+def test_placeinfo_has_attr():
+    place = sanitizer.PlaceName('foo', 'ki', 'su')
+    place.set_attr('a1', 'v1')
+
+    assert place.has_attr('a1')
+    assert not place.has_attr('whatever')
+
+
+def test_sanitizer_default():
+    san = sanitizer.PlaceSanitizer([{'step': 'split-name-list'}])
+
+    name, address =  san.process_names(PlaceInfo({'name': {'name:de:de': '1;2;3'},
+                                                  'address': {'street': 'Bald'}}))
+
+    assert len(name) == 3
+    assert all(isinstance(n, sanitizer.PlaceName) for n in name)
+    assert all(n.kind == 'name'  for n in name)
+    assert all(n.suffix == 'de:de'  for n in name)
+
+    assert len(address) == 1
+    assert all(isinstance(n, sanitizer.PlaceName) for n in address)
+
+
+@pytest.mark.parametrize('rules', [None, []])
+def test_sanitizer_empty_list(rules):
+    san = sanitizer.PlaceSanitizer(rules)
+
+    name, address =  san.process_names(PlaceInfo({'name': {'name:de:de': '1;2;3'}}))
+
+    assert len(name) == 1
+    assert all(isinstance(n, sanitizer.PlaceName) for n in name)
+
+
+def test_sanitizer_missing_step_definition():
+    with pytest.raises(UsageError):
+        san = sanitizer.PlaceSanitizer([{'id': 'split-name-list'}])