mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-23 21:54:10 +03:00
1e86dc1d93
This is only available in Python 3.7.
58 lines
1.8 KiB
Python
58 lines
1.8 KiB
Python
"""
|
|
Data structures for saving variant expansions for ICU tokenizer.
|
|
"""
|
|
from collections import namedtuple
|
|
import json
|
|
|
|
_ICU_VARIANT_PORPERTY_FIELDS = ['lang']
|
|
|
|
|
|
class ICUVariantProperties(namedtuple('_ICUVariantProperties', _ICU_VARIANT_PORPERTY_FIELDS)):
|
|
""" Data container for saving properties that describe when a variant
|
|
should be applied.
|
|
|
|
Property instances are hashable.
|
|
"""
|
|
@classmethod
|
|
def from_rules(cls, _):
|
|
""" Create a new property type from a generic dictionary.
|
|
|
|
The function only takes into account the properties that are
|
|
understood presently and ignores all others.
|
|
"""
|
|
return cls(lang=None)
|
|
|
|
|
|
ICUVariant = namedtuple('ICUVariant', ['source', 'replacement', 'properties'])
|
|
|
|
|
|
def pickle_variant_set(variants):
|
|
""" Serializes an iterable of variant rules to a string.
|
|
"""
|
|
# Create a list of property sets. So they don't need to be duplicated
|
|
properties = {}
|
|
pid = 1
|
|
for variant in variants:
|
|
if variant.properties not in properties:
|
|
properties[variant.properties] = pid
|
|
pid += 1
|
|
|
|
# Convert the variants into a simple list.
|
|
variants = [(v.source, v.replacement, properties[v.properties]) for v in variants]
|
|
|
|
# Convert everythin to json.
|
|
return json.dumps({'properties': {v: k._asdict() for k, v in properties.items()},
|
|
'variants': variants})
|
|
|
|
|
|
def unpickle_variant_set(variant_string):
|
|
""" Deserializes a variant string that was previously created with
|
|
pickle_variant_set() into a set of ICUVariants.
|
|
"""
|
|
data = json.loads(variant_string)
|
|
|
|
properties = {int(k): ICUVariantProperties.from_rules(v)
|
|
for k, v in data['properties'].items()}
|
|
|
|
return set((ICUVariant(src, repl, properties[pid]) for src, repl, pid in data['variants']))
|