move PlaceName into the generic data module

This commit is contained in:
Sarah Hoffmann 2022-07-29 11:39:55 +02:00
parent 094100bbf6
commit 34d27ed45c
6 changed files with 87 additions and 74 deletions

View File

@ -77,7 +77,7 @@ adding extra attributes) or completely replace the list with a different one.
#### PlaceName - extended naming information
::: nominatim.tokenizer.sanitizers.base.PlaceName
::: nominatim.data.place_name.PlaceName
rendering:
show_source: no
heading_level: 6
@ -94,7 +94,7 @@ functions:
heading_level: 6
::: nominatim.tokenizer.token_analysis.base.Analyser
::: nominatim.tokenizer.token_analysis.base.Analyzer
rendering:
show_source: no
heading_level: 6

View File

@ -0,0 +1,78 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2022 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Data class for a single name of a place.
"""
from typing import Optional, Dict, Mapping
class PlaceName:
""" Each name and address part of a place is encapsulated in an object of
this class. It saves not only the name proper but also describes the
kind of name with two properties:
* `kind` describes the name of the OSM key used without any suffixes
(i.e. the part after the colon removed)
* `suffix` contains the suffix of the OSM tag, if any. The suffix
is the part of the key after the first colon.
In addition to that, a name may have arbitrary additional attributes.
How attributes are used, depends on the sanatizers and token analysers.
The exception is is the 'analyzer' attribute. This attribute determines
which token analysis module will be used to finalize the treatment of
names.
"""
def __init__(self, name: str, kind: str, suffix: Optional[str]):
self.name = name
self.kind = kind
self.suffix = suffix
self.attr: Dict[str, str] = {}
def __repr__(self) -> str:
return f"PlaceName(name='{self.name}',kind='{self.kind}',suffix='{self.suffix}')"
def clone(self, name: Optional[str] = None,
kind: Optional[str] = None,
suffix: Optional[str] = None,
attr: Optional[Mapping[str, str]] = None) -> 'PlaceName':
""" Create a deep copy of the place name, optionally with the
given parameters replaced. In the attribute list only the given
keys are updated. The list is not replaced completely.
In particular, the function cannot to be used to remove an
attribute from a place name.
"""
newobj = PlaceName(name or self.name,
kind or self.kind,
suffix or self.suffix)
newobj.attr.update(self.attr)
if attr:
newobj.attr.update(attr)
return newobj
def set_attr(self, key: str, value: str) -> None:
""" Add the given property to the name. If the property was already
set, then the value is overwritten.
"""
self.attr[key] = value
def get_attr(self, key: str, default: Optional[str] = None) -> Optional[str]:
""" Return the given property or the value of 'default' if it
is not set.
"""
return self.attr.get(key, default)
def has_attr(self, key: str) -> bool:
""" Check if the given attribute is set.
"""
return key in self.attr

View File

@ -23,7 +23,7 @@ from nominatim.db.sql_preprocessor import SQLPreprocessor
from nominatim.data.place_info import PlaceInfo
from nominatim.tokenizer.icu_rule_loader import ICURuleLoader
from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
from nominatim.tokenizer.sanitizers.base import PlaceName
from nominatim.data.place_name import PlaceName
from nominatim.tokenizer.icu_token_analysis import ICUTokenAnalysis
from nominatim.tokenizer.base import AbstractAnalyzer, AbstractTokenizer

View File

@ -13,7 +13,8 @@ from typing import Optional, List, Mapping, Sequence, Callable, Any, Tuple
from nominatim.errors import UsageError
from nominatim.config import Configuration
from nominatim.tokenizer.sanitizers.config import SanitizerConfig
from nominatim.tokenizer.sanitizers.base import SanitizerHandler, ProcessInfo, PlaceName
from nominatim.tokenizer.sanitizers.base import SanitizerHandler, ProcessInfo
from nominatim.data.place_name import PlaceName
from nominatim.data.place_info import PlaceInfo

View File

@ -7,80 +7,13 @@
"""
Common data types and protocols for sanitizers.
"""
from typing import Optional, Dict, List, Mapping, Callable
from typing import Optional, List, Mapping, Callable
from nominatim.tokenizer.sanitizers.config import SanitizerConfig
from nominatim.data.place_info import PlaceInfo
from nominatim.data.place_name import PlaceName
from nominatim.typing import Protocol, Final
class PlaceName:
""" Each name and address part of a place is encapsulated in an object of
this class. It saves not only the name proper but also describes the
kind of name with two properties:
* `kind` describes the name of the OSM key used without any suffixes
(i.e. the part after the colon removed)
* `suffix` contains the suffix of the OSM tag, if any. The suffix
is the part of the key after the first colon.
In addition to that, a name may have arbitrary additional attributes.
How attributes are used, depends on the sanatizers and token analysers.
The exception is is the 'analyzer' attribute. This apptribute determines
which token analysis module will be used to finalize the treatment of
names.
"""
def __init__(self, name: str, kind: str, suffix: Optional[str]):
self.name = name
self.kind = kind
self.suffix = suffix
self.attr: Dict[str, str] = {}
def __repr__(self) -> str:
return f"PlaceName(name='{self.name}',kind='{self.kind}',suffix='{self.suffix}')"
def clone(self, name: Optional[str] = None,
kind: Optional[str] = None,
suffix: Optional[str] = None,
attr: Optional[Mapping[str, str]] = None) -> 'PlaceName':
""" Create a deep copy of the place name, optionally with the
given parameters replaced. In the attribute list only the given
keys are updated. The list is not replaced completely.
In particular, the function cannot to be used to remove an
attribute from a place name.
"""
newobj = PlaceName(name or self.name,
kind or self.kind,
suffix or self.suffix)
newobj.attr.update(self.attr)
if attr:
newobj.attr.update(attr)
return newobj
def set_attr(self, key: str, value: str) -> None:
""" Add the given property to the name. If the property was already
set, then the value is overwritten.
"""
self.attr[key] = value
def get_attr(self, key: str, default: Optional[str] = None) -> Optional[str]:
""" Return the given property or the value of 'default' if it
is not set.
"""
return self.attr.get(key, default)
def has_attr(self, key: str) -> bool:
""" Check if the given attribute is set.
"""
return key in self.attr
class ProcessInfo:
""" Container class for information handed into to handler functions.

View File

@ -27,7 +27,8 @@ Arguments:
from typing import Callable, Iterator, List
import re
from nominatim.tokenizer.sanitizers.base import ProcessInfo, PlaceName
from nominatim.tokenizer.sanitizers.base import ProcessInfo
from nominatim.data.place_name import PlaceName
from nominatim.tokenizer.sanitizers.config import SanitizerConfig
class _HousenumberSanitizer: