From e828d0d3f79400b3f3541b38d3a7d4de5d9cfc35 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 30 May 2022 14:32:36 +0200 Subject: [PATCH] move quoting hack to wiki loader The bad quotes around the type for special phrases specifically occure in the Wiki pages, so it should be removed by the loader and not in the generic SpecialPhrase object. --- .../tools/special_phrases/sp_wiki_loader.py | 7 +++++- .../tools/special_phrases/special_phrase.py | 4 +--- test/python/tools/test_sp_wiki_loader.py | 23 ++++++------------- .../testdata/special_phrases_test_content.txt | 2 +- 4 files changed, 15 insertions(+), 21 deletions(-) diff --git a/nominatim/tools/special_phrases/sp_wiki_loader.py b/nominatim/tools/special_phrases/sp_wiki_loader.py index 6093fa45..ca4758ac 100644 --- a/nominatim/tools/special_phrases/sp_wiki_loader.py +++ b/nominatim/tools/special_phrases/sp_wiki_loader.py @@ -37,6 +37,8 @@ class SPWikiLoader: self.occurence_pattern = re.compile( r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])' ) + # Hack around a bug where building=yes was imported with quotes into the wiki + self.type_fix_pattern = re.compile(r'\"|"') self._load_languages() @@ -52,7 +54,10 @@ class SPWikiLoader: matches = self.occurence_pattern.findall(loaded_xml) for match in matches: - yield SpecialPhrase(match[0], match[1], match[2], match[3]) + yield SpecialPhrase(match[0], + match[1], + self.type_fix_pattern.sub('', match[2]), + match[3]) def _load_languages(self): diff --git a/nominatim/tools/special_phrases/special_phrase.py b/nominatim/tools/special_phrases/special_phrase.py index dc7f69fe..d9bf9e58 100644 --- a/nominatim/tools/special_phrases/special_phrase.py +++ b/nominatim/tools/special_phrases/special_phrase.py @@ -10,8 +10,6 @@ This class is a model used to transfer a special phrase through the process of load and importation. """ -import re - class SpecialPhrase(): """ Model representing a special phrase. @@ -20,7 +18,7 @@ class SpecialPhrase(): self.p_label = p_label.strip() self.p_class = p_class.strip() # Hack around a bug where building=yes was imported with quotes into the wiki - self.p_type = re.sub(r'\"|"', '', p_type.strip()) + self.p_type = p_type.strip() # Needed if some operator in the wiki are not written in english p_operator = p_operator.strip().lower() self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator diff --git a/test/python/tools/test_sp_wiki_loader.py b/test/python/tools/test_sp_wiki_loader.py index 5bd45de3..2f47734e 100644 --- a/test/python/tools/test_sp_wiki_loader.py +++ b/test/python/tools/test_sp_wiki_loader.py @@ -10,24 +10,21 @@ import pytest from nominatim.tools.special_phrases.sp_wiki_loader import SPWikiLoader -@pytest.fixture -def xml_wiki_content(src_dir): - """ - return the content of the static xml test file. - """ - xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt' - return xml_test_content.read_text() - @pytest.fixture -def sp_wiki_loader(monkeypatch, def_config, xml_wiki_content): +def sp_wiki_loader(src_dir, monkeypatch, def_config): """ Return an instance of SPWikiLoader. """ monkeypatch.setenv('NOMINATIM_LANGUAGES', 'en') loader = SPWikiLoader(def_config) + + def _mock_wiki_content(lang): + xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt' + return xml_test_content.read_text() + monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader._get_wiki_content', - lambda lang: xml_wiki_content) + _mock_wiki_content) return loader @@ -38,13 +35,7 @@ def test_generate_phrases(sp_wiki_loader): the 'en' special phrases. """ phrases = list(sp_wiki_loader.generate_phrases()) - check_phrases_content(phrases) -def check_phrases_content(phrases): - """ - Asserts that the given phrases list contains - the right phrases of the 'en' special phrases. - """ assert set((p.p_label, p.p_class, p.p_type, p.p_operator) for p in phrases) ==\ {('Zip Line', 'aerialway', 'zip_line', '-'), ('Zip Lines', 'aerialway', 'zip_line', '-'), diff --git a/test/testdata/special_phrases_test_content.txt b/test/testdata/special_phrases_test_content.txt index e790ca58..e5f340b9 100644 --- a/test/testdata/special_phrases_test_content.txt +++ b/test/testdata/special_phrases_test_content.txt @@ -70,7 +70,7 @@ wikitext text/x-wiki -== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Animal shelter || amenity || animal_shelter || - || N |- | Animal shelters || amenity || animal_shelter || - || Y |- | Animal shelter in || amenity || animal_shelter || in || N |- | Animal shelters in || amenity || animal_shelter || in || Y |- | Animal shelter near || amenity || animal_shelter || near|| N |- | Animal shelters near || amenity || animal_shelter || NEAR|| Y |- | Drinking Water near || amenity || drinking_water || near || N |- | Water || amenity || drinking_water || - || N |- | Water in || amenity || drinking_water || In || N |- | Water near || amenity || drinking_water || near || N |- | Embassy || amenity || embassy || - || N |- | Embassys || amenity || embassy || - || Y |- | Embassies || amenity || embassy || - || Y |- |Coworkings near |amenity |coworking_space |near |Y |} [[Category:Word list]] +== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Animal shelter || amenity || animal_shelter || - || N |- | Animal shelters || amenity || animal_shelter || - || Y |- | Animal shelter in || amenity || animal_shelter || in || N |- | Animal shelters in || amenity || animal_shelter || in || Y |- | Animal shelter near || amenity || animal_shelter || near|| N |- | Animal shelters near || amenity || animal_shelter || NEAR|| Y |- | Drinking Water near || amenity || drinking_water || near || N |- | Water || amenity || drinking_water || - || N |- | Water in || amenity || drinking_water || In || N |- | Water near || amenity || drinking_water || near || N |- | Embassy || amenity || embassy || - || N |- | Embassys || amenity || "embassy" || - || Y |- | Embassies || amenity || embassy || - || Y |- |Coworkings near |amenity |coworking_space |near |Y |} [[Category:Word list]] cst5x7tt58izti1pxzgljf27tx8qjcj