From cce0e5ea38fe3466e157651e789554d99fbdc8fe Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 30 May 2022 14:12:46 +0200 Subject: [PATCH] convert special phrase loaders to generators Generators simplify the code quite a bit compared to the previous Iterator approach. --- .../tools/special_phrases/sp_csv_loader.py | 26 ++----- .../tools/special_phrases/sp_importer.py | 9 +-- .../tools/special_phrases/sp_wiki_loader.py | 61 +++++++-------- .../tools/test_import_special_phrases.py | 4 +- test/python/tools/test_sp_csv_loader.py | 76 ++++++++----------- test/python/tools/test_sp_wiki_loader.py | 17 +---- 6 files changed, 76 insertions(+), 117 deletions(-) diff --git a/nominatim/tools/special_phrases/sp_csv_loader.py b/nominatim/tools/special_phrases/sp_csv_loader.py index 55a9d8d0..0bd93c00 100644 --- a/nominatim/tools/special_phrases/sp_csv_loader.py +++ b/nominatim/tools/special_phrases/sp_csv_loader.py @@ -11,43 +11,31 @@ """ import csv import os -from collections.abc import Iterator from nominatim.tools.special_phrases.special_phrase import SpecialPhrase from nominatim.errors import UsageError -class SPCsvLoader(Iterator): +class SPCsvLoader: """ Handles loading of special phrases from external csv file. """ def __init__(self, csv_path): super().__init__() self.csv_path = csv_path - self.has_been_read = False - def __next__(self): - if self.has_been_read: - raise StopIteration() - self.has_been_read = True - self.check_csv_validity() - return self.parse_csv() - - def parse_csv(self): - """ - Open and parse the given csv file. + def generate_phrases(self): + """ Open and parse the given csv file. Create the corresponding SpecialPhrases. """ - phrases = set() + self._check_csv_validity() with open(self.csv_path, encoding='utf-8') as fd: reader = csv.DictReader(fd, delimiter=',') for row in reader: - phrases.add( - SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator']) - ) - return phrases + yield SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator']) - def check_csv_validity(self): + + def _check_csv_validity(self): """ Check that the csv file has the right extension. """ diff --git a/nominatim/tools/special_phrases/sp_importer.py b/nominatim/tools/special_phrases/sp_importer.py index 8137142b..31bbc355 100644 --- a/nominatim/tools/special_phrases/sp_importer.py +++ b/nominatim/tools/special_phrases/sp_importer.py @@ -62,11 +62,10 @@ class SPImporter(): # Store pairs of class/type for further processing class_type_pairs = set() - for loaded_phrases in self.sp_loader: - for phrase in loaded_phrases: - result = self._process_phrase(phrase) - if result: - class_type_pairs.add(result) + for phrase in self.sp_loader.generate_phrases(): + result = self._process_phrase(phrase) + if result: + class_type_pairs.add(result) self._create_place_classtype_table_and_indexes(class_type_pairs) if should_replace: diff --git a/nominatim/tools/special_phrases/sp_wiki_loader.py b/nominatim/tools/special_phrases/sp_wiki_loader.py index b5f8db83..6093fa45 100644 --- a/nominatim/tools/special_phrases/sp_wiki_loader.py +++ b/nominatim/tools/special_phrases/sp_wiki_loader.py @@ -9,12 +9,24 @@ """ import re import logging -from collections.abc import Iterator from nominatim.tools.special_phrases.special_phrase import SpecialPhrase from nominatim.tools.exec_utils import get_url LOG = logging.getLogger() -class SPWikiLoader(Iterator): + +def _get_wiki_content(lang): + """ + Request and return the wiki page's content + corresponding to special phrases for a given lang. + Requested URL Example : + https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN + """ + url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \ + + lang.upper() + return get_url(url) + + +class SPWikiLoader: """ Handles loading of special phrases from the wiki. """ @@ -27,28 +39,21 @@ class SPWikiLoader(Iterator): ) self._load_languages() - def __next__(self): - if not self.languages: - raise StopIteration - lang = self.languages.pop(0) - loaded_xml = self._get_wiki_content(lang) - LOG.warning('Importing phrases for lang: %s...', lang) - return self.parse_xml(loaded_xml) + def generate_phrases(self): + """ Download the wiki pages for the configured languages + and extract the phrases from the page. + """ + for lang in self.languages: + LOG.warning('Importing phrases for lang: %s...', lang) + loaded_xml = _get_wiki_content(lang) + + # One match will be of format [label, class, type, operator, plural] + matches = self.occurence_pattern.findall(loaded_xml) + + for match in matches: + yield SpecialPhrase(match[0], match[1], match[2], match[3]) - def parse_xml(self, xml): - """ - Parses XML content and extracts special phrases from it. - Return a list of SpecialPhrase. - """ - # One match will be of format [label, class, type, operator, plural] - matches = self.occurence_pattern.findall(xml) - returned_phrases = set() - for match in matches: - returned_phrases.add( - SpecialPhrase(match[0], match[1], match[2], match[3]) - ) - return returned_phrases def _load_languages(self): """ @@ -64,15 +69,3 @@ class SPWikiLoader(Iterator): 'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu', 'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl', 'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi'] - - @staticmethod - def _get_wiki_content(lang): - """ - Request and return the wiki page's content - corresponding to special phrases for a given lang. - Requested URL Example : - https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN - """ - url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \ - + lang.upper() - return get_url(url) diff --git a/test/python/tools/test_import_special_phrases.py b/test/python/tools/test_import_special_phrases.py index 57664586..7026a549 100644 --- a/test/python/tools/test_import_special_phrases.py +++ b/test/python/tools/test_import_special_phrases.py @@ -187,8 +187,8 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer, table_factory('place_classtype_amenity_animal_shelter') table_factory('place_classtype_wrongclass_wrongtype') - monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content', - lambda self, lang: xml_wiki_content) + monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader._get_wiki_content', + lambda lang: xml_wiki_content) tokenizer = tokenizer_mock() sp_importer.import_phrases(tokenizer, should_replace) diff --git a/test/python/tools/test_sp_csv_loader.py b/test/python/tools/test_sp_csv_loader.py index 6f5670f0..b5069a52 100644 --- a/test/python/tools/test_sp_csv_loader.py +++ b/test/python/tools/test_sp_csv_loader.py @@ -12,50 +12,6 @@ import pytest from nominatim.errors import UsageError from nominatim.tools.special_phrases.sp_csv_loader import SPCsvLoader -def test_parse_csv(sp_csv_loader): - """ - Test method parse_csv() - Should return the right SpecialPhrase objects. - """ - phrases = sp_csv_loader.parse_csv() - assert check_phrases_content(phrases) - -def test_next(sp_csv_loader): - """ - Test objects returned from the next() method. - It should return all SpecialPhrases objects of - the sp_csv_test.csv special phrases. - """ - phrases = next(sp_csv_loader) - assert check_phrases_content(phrases) - -def test_check_csv_validity(sp_csv_loader): - """ - Test method check_csv_validity() - It should raise an exception when file with a - different exception than .csv is given. - """ - sp_csv_loader.csv_path = 'test.csv' - sp_csv_loader.check_csv_validity() - sp_csv_loader.csv_path = 'test.wrong' - with pytest.raises(UsageError): - assert sp_csv_loader.check_csv_validity() - -def check_phrases_content(phrases): - """ - Asserts that the given phrases list contains - the right phrases of the sp_csv_test.csv special phrases. - """ - return len(phrases) > 1 \ - and any(p.p_label == 'Billboard' - and p.p_class == 'advertising' - and p.p_type == 'billboard' - and p.p_operator == '-' for p in phrases) \ - and any(p.p_label == 'Zip Lines' - and p.p_class == 'aerialway' - and p.p_type == 'zip_line' - and p.p_operator == '-' for p in phrases) - @pytest.fixture def sp_csv_loader(src_dir): """ @@ -64,3 +20,35 @@ def sp_csv_loader(src_dir): csv_path = (src_dir / 'test' / 'testdata' / 'sp_csv_test.csv').resolve() loader = SPCsvLoader(csv_path) return loader + + +def test_generate_phrases(sp_csv_loader): + """ + Test method parse_csv() + Should return the right SpecialPhrase objects. + """ + phrases = list(sp_csv_loader.generate_phrases()) + + assert len(phrases) == 41 + assert len(set(phrases)) == 41 + + assert any(p.p_label == 'Billboard' + and p.p_class == 'advertising' + and p.p_type == 'billboard' + and p.p_operator == '-' for p in phrases) + assert any(p.p_label == 'Zip Lines' + and p.p_class == 'aerialway' + and p.p_type == 'zip_line' + and p.p_operator == '-' for p in phrases) + + +def test_invalid_cvs_file(): + """ + Test method check_csv_validity() + It should raise an exception when file with a + different exception than .csv is given. + """ + loader = SPCsvLoader('test.wrong') + + with pytest.raises(UsageError, match='not a csv file'): + next(loader.generate_phrases()) diff --git a/test/python/tools/test_sp_wiki_loader.py b/test/python/tools/test_sp_wiki_loader.py index 0a64cd56..5bd45de3 100644 --- a/test/python/tools/test_sp_wiki_loader.py +++ b/test/python/tools/test_sp_wiki_loader.py @@ -26,27 +26,18 @@ def sp_wiki_loader(monkeypatch, def_config, xml_wiki_content): """ monkeypatch.setenv('NOMINATIM_LANGUAGES', 'en') loader = SPWikiLoader(def_config) - monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content', - lambda self, lang: xml_wiki_content) + monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader._get_wiki_content', + lambda lang: xml_wiki_content) return loader -def test_parse_xml(sp_wiki_loader, xml_wiki_content): - """ - Test method parse_xml() - Should return the right SpecialPhrase objects. - """ - phrases = sp_wiki_loader.parse_xml(xml_wiki_content) - check_phrases_content(phrases) - - -def test_next(sp_wiki_loader): +def test_generate_phrases(sp_wiki_loader): """ Test objects returned from the next() method. It should return all SpecialPhrases objects of the 'en' special phrases. """ - phrases = next(sp_wiki_loader) + phrases = list(sp_wiki_loader.generate_phrases()) check_phrases_content(phrases) def check_phrases_content(phrases):