remove postcodes from countries that don't have them

The postcodes will only be removed as a 'computed postcode' they
are still searchable for the given object.
This commit is contained in:
Sarah Hoffmann 2022-05-12 11:43:47 +02:00
parent 21fb501699
commit 8080625747
6 changed files with 45 additions and 6 deletions

View File

@ -13,4 +13,4 @@ ignored-classes=NominatimArgs,closing
# 'too-many-ancestors' is triggered already by deriving from UserDict
disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value,no-self-use
good-names=i,x,y,fd,db
good-names=i,x,y,fd,db,cc

View File

@ -44,6 +44,20 @@ class SanitizerConfig(UserDict):
return values
def get_bool(self, param, default=None):
""" Extract a configuration parameter as a boolean.
The parameter must be one of the yaml boolean values or an
user error will be raised. If `default` is given, then the parameter
may also be missing or empty.
"""
value = self.data.get(param, default)
if not isinstance(value, bool):
raise UsageError(f"Parameter '{param}' must be a boolean value ('yes' or 'no'.")
return value
def get_delimiter(self, default=',;'):
""" Return the 'delimiter' parameter in the configuration as a
compiled regular expression that can be used to split the names on the

View File

@ -48,8 +48,7 @@ class _AnalyzerByLanguage:
self.deflangs = {}
if use_defaults in ('mono', 'all'):
for ccode, prop in country_info.iterate():
clangs = prop['languages']
for ccode, clangs in country_info.iterate('languages'):
if len(clangs) == 1 or use_defaults == 'all':
if self.whitelist:
self.deflangs[ccode] = [l for l in clangs if l in self.whitelist]

View File

@ -84,10 +84,20 @@ def setup_country_config(config):
_COUNTRY_INFO.load(config)
def iterate():
def iterate(prop=None):
""" Iterate over country code and properties.
When `prop` is None, all countries are returned with their complete
set of properties.
If `prop` is given, then only countries are returned where the
given property is set. The second item of the tuple contains only
the content of the given property.
"""
return _COUNTRY_INFO.items()
if prop is None:
return _COUNTRY_INFO.items()
return ((c, p[prop]) for c, p in _COUNTRY_INFO.items() if prop in p)
def setup_country_tables(dsn, sql_dir, ignore_partitions=False):

View File

@ -32,6 +32,8 @@ sanitizers:
- streetnumber
convert-to-name:
- (\A|.*,)[^\d,]{3,}(,.*|\Z)
- step: clean-postcodes
convert-to-address: yes
- step: split-name-list
- step: strip-brace-terms
- step: tag-analyzer-by-language

View File

@ -246,4 +246,18 @@ Feature: Import of postcodes
| 12 445 4 | ca | 25 | 11 |
| A1:BC10 | ca | 25 | 11 |
Scenario: Postcodes outside all countries are not added to the postcode and word table
Given the places
| osm | class | type | addr+postcode | addr+housenumber | addr+place | geometry |
| N34 | place | house | 01982 | 111 | Null Island | 0 0.00001 |
And the places
| osm | class | type | name | geometry |
| N1 | place | hamlet | Null Island | 0 0 |
When importing
Then location_postcode contains exactly
| country | postcode | geometry |
And there are no word tokens for postcodes 01982
When sending search query "111, 01982 Null Island"
Then results contain
| osm | display_name |
| N34 | 111, Null Island, 01982 |