mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-24 06:22:08 +03:00
0bb59b2e22
When changing something in the default configuration of the sanatizers that refers to an analyzer that is not yet loaded, there shouldn't be any errors.
37 lines
1.4 KiB
Python
37 lines
1.4 KiB
Python
# SPDX-License-Identifier: GPL-2.0-only
|
|
#
|
|
# This file is part of Nominatim. (https://nominatim.org)
|
|
#
|
|
# Copyright (C) 2022 by the Nominatim developer community.
|
|
# For a full list of authors see the git log.
|
|
"""
|
|
Container class collecting all components required to transform an OSM name
|
|
into a Nominatim token.
|
|
"""
|
|
|
|
from icu import Transliterator
|
|
|
|
class ICUTokenAnalysis:
|
|
""" Container class collecting the transliterators and token analysis
|
|
modules for a single NameAnalyser instance.
|
|
"""
|
|
|
|
def __init__(self, norm_rules, trans_rules, analysis_rules):
|
|
self.normalizer = Transliterator.createFromRules("icu_normalization",
|
|
norm_rules)
|
|
trans_rules += ";[:Space:]+ > ' '"
|
|
self.to_ascii = Transliterator.createFromRules("icu_to_ascii",
|
|
trans_rules)
|
|
self.search = Transliterator.createFromRules("icu_search",
|
|
norm_rules + trans_rules)
|
|
|
|
self.analysis = {name: arules.create(self.normalizer, self.to_ascii, arules.config)
|
|
for name, arules in analysis_rules.items()}
|
|
|
|
|
|
def get_analyzer(self, name):
|
|
""" Return the given named analyzer. If no analyzer with that
|
|
name exists, return the default analyzer.
|
|
"""
|
|
return self.analysis.get(name) or self.analysis[None]
|