mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-10-27 03:29:24 +03:00
fbe40e005d
Include postcode pattern in postcode normalisation regex, instead of removing it from postcode pattern in config. It properly handles postcode validation and normalization when country code is part of the postcode, e.g. for Isle of Man, Jersey, Anguilla, Andorra, Cayman Islands and more. Fixes #3227.
115 lines
4.3 KiB
Python
115 lines
4.3 KiB
Python
# SPDX-License-Identifier: GPL-2.0-only
|
|
#
|
|
# This file is part of Nominatim. (https://nominatim.org)
|
|
#
|
|
# Copyright (C) 2022 by the Nominatim developer community.
|
|
# For a full list of authors see the git log.
|
|
"""
|
|
Functions for formatting postcodes according to their country-specific
|
|
format.
|
|
"""
|
|
from typing import Any, Mapping, Optional, Set, Match
|
|
import re
|
|
|
|
from nominatim.errors import UsageError
|
|
from nominatim.data import country_info
|
|
|
|
class CountryPostcodeMatcher:
|
|
""" Matches and formats a postcode according to a format definition
|
|
of the given country.
|
|
"""
|
|
def __init__(self, country_code: str, config: Mapping[str, Any]) -> None:
|
|
if 'pattern' not in config:
|
|
raise UsageError("Field 'pattern' required for 'postcode' "
|
|
f"for country '{country_code}'")
|
|
|
|
pc_pattern = config['pattern'].replace('d', '[0-9]').replace('l', '[A-Z]')
|
|
|
|
self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?({pc_pattern})\\s*')
|
|
self.pattern = re.compile(pc_pattern)
|
|
|
|
self.output = config.get('output', r'\g<0>')
|
|
|
|
|
|
def match(self, postcode: str) -> Optional[Match[str]]:
|
|
""" Match the given postcode against the postcode pattern for this
|
|
matcher. Returns a `re.Match` object if the match was successful
|
|
and None otherwise.
|
|
"""
|
|
# Upper-case, strip spaces and leading country code.
|
|
normalized = self.norm_pattern.fullmatch(postcode.upper())
|
|
|
|
if normalized:
|
|
return self.pattern.fullmatch(normalized.group(1))
|
|
|
|
return None
|
|
|
|
|
|
def normalize(self, match: Match[str]) -> str:
|
|
""" Return the default format of the postcode for the given match.
|
|
`match` must be a `re.Match` object previously returned by
|
|
`match()`
|
|
"""
|
|
return match.expand(self.output)
|
|
|
|
|
|
class PostcodeFormatter:
|
|
""" Container for different postcode formats of the world and
|
|
access functions.
|
|
"""
|
|
def __init__(self) -> None:
|
|
# Objects without a country code can't have a postcode per definition.
|
|
self.country_without_postcode: Set[Optional[str]] = {None}
|
|
self.country_matcher = {}
|
|
self.default_matcher = CountryPostcodeMatcher('', {'pattern': '.*'})
|
|
|
|
for ccode, prop in country_info.iterate('postcode'):
|
|
if prop is False:
|
|
self.country_without_postcode.add(ccode)
|
|
elif isinstance(prop, dict):
|
|
self.country_matcher[ccode] = CountryPostcodeMatcher(ccode, prop)
|
|
else:
|
|
raise UsageError(f"Invalid entry 'postcode' for country '{ccode}'")
|
|
|
|
|
|
def set_default_pattern(self, pattern: str) -> None:
|
|
""" Set the postcode match pattern to use, when a country does not
|
|
have a specific pattern.
|
|
"""
|
|
self.default_matcher = CountryPostcodeMatcher('', {'pattern': pattern})
|
|
|
|
|
|
def get_matcher(self, country_code: Optional[str]) -> Optional[CountryPostcodeMatcher]:
|
|
""" Return the CountryPostcodeMatcher for the given country.
|
|
Returns None if the country doesn't have a postcode and the
|
|
default matcher if there is no specific matcher configured for
|
|
the country.
|
|
"""
|
|
if country_code in self.country_without_postcode:
|
|
return None
|
|
|
|
assert country_code is not None
|
|
|
|
return self.country_matcher.get(country_code, self.default_matcher)
|
|
|
|
|
|
def match(self, country_code: Optional[str], postcode: str) -> Optional[Match[str]]:
|
|
""" Match the given postcode against the postcode pattern for this
|
|
matcher. Returns a `re.Match` object if the country has a pattern
|
|
and the match was successful or None if the match failed.
|
|
"""
|
|
if country_code in self.country_without_postcode:
|
|
return None
|
|
|
|
assert country_code is not None
|
|
|
|
return self.country_matcher.get(country_code, self.default_matcher).match(postcode)
|
|
|
|
|
|
def normalize(self, country_code: str, match: Match[str]) -> str:
|
|
""" Return the default format of the postcode for the given match.
|
|
`match` must be a `re.Match` object previously returned by
|
|
`match()`
|
|
"""
|
|
return self.country_matcher.get(country_code, self.default_matcher).normalize(match)
|