2022-05-24 22:45:06 +03:00
|
|
|
# SPDX-License-Identifier: GPL-2.0-only
|
|
|
|
#
|
|
|
|
# This file is part of Nominatim. (https://nominatim.org)
|
|
|
|
#
|
|
|
|
# Copyright (C) 2022 by the Nominatim developer community.
|
|
|
|
# For a full list of authors see the git log.
|
|
|
|
"""
|
|
|
|
Specialized processor for postcodes. Supports a 'lookup' variant of the
|
|
|
|
token, which produces variants with optional spaces.
|
|
|
|
"""
|
2022-07-13 18:18:53 +03:00
|
|
|
from typing import Mapping, Any, List
|
2022-05-24 22:45:06 +03:00
|
|
|
|
|
|
|
from nominatim.tokenizer.token_analysis.generic_mutation import MutationVariantGenerator
|
|
|
|
|
|
|
|
### Configuration section
|
|
|
|
|
2022-07-13 18:18:53 +03:00
|
|
|
def configure(rules: Mapping[str, Any], normalization_rules: str) -> None: # pylint: disable=W0613
|
2022-05-24 22:45:06 +03:00
|
|
|
""" All behaviour is currently hard-coded.
|
|
|
|
"""
|
|
|
|
return None
|
|
|
|
|
|
|
|
### Analysis section
|
|
|
|
|
2022-07-13 18:18:53 +03:00
|
|
|
def create(normalizer: Any, transliterator: Any, config: None) -> 'PostcodeTokenAnalysis': # pylint: disable=W0613
|
2022-05-24 22:45:06 +03:00
|
|
|
""" Create a new token analysis instance for this module.
|
|
|
|
"""
|
|
|
|
return PostcodeTokenAnalysis(normalizer, transliterator)
|
|
|
|
|
2022-06-21 23:05:35 +03:00
|
|
|
|
2022-05-24 22:45:06 +03:00
|
|
|
class PostcodeTokenAnalysis:
|
2022-06-21 23:05:35 +03:00
|
|
|
""" Special normalization and variant generation for postcodes.
|
|
|
|
|
|
|
|
This analyser must not be used with anything but postcodes as
|
|
|
|
it follows some special rules: `normalize` doesn't necessarily
|
|
|
|
need to return a standard form as per normalization rules. It
|
|
|
|
needs to return the canonical form of the postcode that is also
|
|
|
|
used for output. `get_variants_ascii` then needs to ensure that
|
|
|
|
the generated variants once more follow the standard normalization
|
|
|
|
and transliteration, so that postcodes are correctly recognised by
|
|
|
|
the search algorithm.
|
2022-05-24 22:45:06 +03:00
|
|
|
"""
|
2022-07-13 18:18:53 +03:00
|
|
|
def __init__(self, norm: Any, trans: Any) -> None:
|
2022-05-24 22:45:06 +03:00
|
|
|
self.norm = norm
|
|
|
|
self.trans = trans
|
|
|
|
|
|
|
|
self.mutator = MutationVariantGenerator(' ', (' ', ''))
|
|
|
|
|
|
|
|
|
2022-07-13 18:18:53 +03:00
|
|
|
def normalize(self, name: str) -> str:
|
2022-05-24 22:45:06 +03:00
|
|
|
""" Return the standard form of the postcode.
|
|
|
|
"""
|
|
|
|
return name.strip().upper()
|
|
|
|
|
|
|
|
|
2022-07-13 18:18:53 +03:00
|
|
|
def get_variants_ascii(self, norm_name: str) -> List[str]:
|
2022-05-24 22:45:06 +03:00
|
|
|
""" Compute the spelling variants for the given normalized postcode.
|
|
|
|
|
2022-06-21 23:05:35 +03:00
|
|
|
Takes the canonical form of the postcode, normalizes it using the
|
|
|
|
standard rules and then creates variants of the result where
|
|
|
|
all spaces are optional.
|
2022-05-24 22:45:06 +03:00
|
|
|
"""
|
|
|
|
# Postcodes follow their own transliteration rules.
|
|
|
|
# Make sure at this point, that the terms are normalized in a way
|
|
|
|
# that they are searchable with the standard transliteration rules.
|
|
|
|
return [self.trans.transliterate(term) for term in
|
2022-06-21 23:05:35 +03:00
|
|
|
self.mutator.generate([self.norm.transliterate(norm_name)]) if term]
|