mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-23 21:54:10 +03:00
d35400a7d7
Implements per-name choice of analyzer. If a non-default analyzer is choosen, then the 'word' identifier is extended with the name of the ana;yzer, so that we still have unique items.
24 lines
985 B
Python
24 lines
985 B
Python
"""
|
|
Container class collecting all components required to transform an OSM name
|
|
into a Nominatim token.
|
|
"""
|
|
|
|
from icu import Transliterator
|
|
|
|
class ICUTokenAnalysis:
|
|
""" Container class collecting the transliterators and token analysis
|
|
modules for a single NameAnalyser instance.
|
|
"""
|
|
|
|
def __init__(self, norm_rules, trans_rules, analysis_rules):
|
|
self.normalizer = Transliterator.createFromRules("icu_normalization",
|
|
norm_rules)
|
|
trans_rules += ";[:Space:]+ > ' '"
|
|
self.to_ascii = Transliterator.createFromRules("icu_to_ascii",
|
|
trans_rules)
|
|
self.search = Transliterator.createFromRules("icu_search",
|
|
norm_rules + trans_rules)
|
|
|
|
self.analysis = {name: arules.create(self.to_ascii, arules.config)
|
|
for name, arules in analysis_rules.items()}
|