convert special phrase loaders to generators

Generators simplify the code quite a bit compared to the previous Iterator approach.
2024-12-26 22:44:44 +03:00 · 2022-05-30 14:12:46 +02:00 · 2022-05-30 14:12:46 +02:00 · cce0e5ea38
commit cce0e5ea38
parent 042e314589
6 changed files with 76 additions and 117 deletions
--- a/nominatim/tools/special_phrases/sp_csv_loader.py
+++ b/nominatim/tools/special_phrases/sp_csv_loader.py
@ -11,43 +11,31 @@
 """
 import csv
 import os
 from collections.abc import Iterator
 from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
 from nominatim.errors import UsageError
-class SPCsvLoader(Iterator):
+class SPCsvLoader:
    """
        Handles loading of special phrases from external csv file.
    """
    def __init__(self, csv_path):
        super().__init__()
        self.csv_path = csv_path
        self.has_been_read = False
    def __next__(self):
        if self.has_been_read:
            raise StopIteration()
-        self.has_been_read = True
+    def generate_phrases(self):
-        self.check_csv_validity()
+        """ Open and parse the given csv file.
        return self.parse_csv()
    def parse_csv(self):
        """
            Open and parse the given csv file.
            Create the corresponding SpecialPhrases.
        """
-        phrases = set()
+        self._check_csv_validity()
        with open(self.csv_path, encoding='utf-8') as fd:
            reader = csv.DictReader(fd, delimiter=',')
            for row in reader:
-                phrases.add(
+                yield SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
                    SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
                )
        return phrases
-    def check_csv_validity(self):
+
    def _check_csv_validity(self):
        """
            Check that the csv file has the right extension.
        """
--- a/nominatim/tools/special_phrases/sp_importer.py
+++ b/nominatim/tools/special_phrases/sp_importer.py
@ -62,8 +62,7 @@ class SPImporter():
        # Store pairs of class/type for further processing
        class_type_pairs = set()
-        for loaded_phrases in self.sp_loader:
+        for phrase in self.sp_loader.generate_phrases():
            for phrase in loaded_phrases:
            result = self._process_phrase(phrase)
            if result:
                class_type_pairs.add(result)
--- a/nominatim/tools/special_phrases/sp_wiki_loader.py
+++ b/nominatim/tools/special_phrases/sp_wiki_loader.py
@ -9,12 +9,24 @@
 """
 import re
 import logging
 from collections.abc import Iterator
 from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
 from nominatim.tools.exec_utils import get_url
 LOG = logging.getLogger()
-class SPWikiLoader(Iterator):
+
 def _get_wiki_content(lang):
    """
        Request and return the wiki page's content
        corresponding to special phrases for a given lang.
        Requested URL Example :
            https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
    """
    url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \
          + lang.upper()
    return get_url(url)
 class SPWikiLoader:
    """
        Handles loading of special phrases from the wiki.
    """
@ -27,28 +39,21 @@ class SPWikiLoader(Iterator):
        )
        self._load_languages()
    def __next__(self):
        if not self.languages:
            raise StopIteration
-        lang = self.languages.pop(0)
+    def generate_phrases(self):
-        loaded_xml = self._get_wiki_content(lang)
+        """ Download the wiki pages for the configured languages
            and extract the phrases from the page.
        """
        for lang in self.languages:
            LOG.warning('Importing phrases for lang: %s...', lang)
-        return self.parse_xml(loaded_xml)
+            loaded_xml = _get_wiki_content(lang)
    def parse_xml(self, xml):
        """
            Parses XML content and extracts special phrases from it.
            Return a list of SpecialPhrase.
        """
            # One match will be of format [label, class, type, operator, plural]
-        matches = self.occurence_pattern.findall(xml)
+            matches = self.occurence_pattern.findall(loaded_xml)
-        returned_phrases = set()
+
            for match in matches:
-            returned_phrases.add(
+                yield SpecialPhrase(match[0], match[1], match[2], match[3])
-                SpecialPhrase(match[0], match[1], match[2], match[3])
+
            )
        return returned_phrases
    def _load_languages(self):
        """
@ -64,15 +69,3 @@ class SPWikiLoader(Iterator):
            'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
            'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
            'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
    @staticmethod
    def _get_wiki_content(lang):
        """
            Request and return the wiki page's content
            corresponding to special phrases for a given lang.
            Requested URL Example :
                https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
        """
        url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \
              + lang.upper()
        return get_url(url)
--- a/test/python/tools/test_import_special_phrases.py
+++ b/test/python/tools/test_import_special_phrases.py
@ -187,8 +187,8 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
    table_factory('place_classtype_amenity_animal_shelter')
    table_factory('place_classtype_wrongclass_wrongtype')
-    monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content',
+    monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader._get_wiki_content',
-                        lambda self, lang: xml_wiki_content)
+                        lambda lang: xml_wiki_content)
    tokenizer = tokenizer_mock()
    sp_importer.import_phrases(tokenizer, should_replace)
--- a/test/python/tools/test_sp_csv_loader.py
+++ b/test/python/tools/test_sp_csv_loader.py
@ -12,50 +12,6 @@ import pytest
 from nominatim.errors import UsageError
 from nominatim.tools.special_phrases.sp_csv_loader import SPCsvLoader
 def test_parse_csv(sp_csv_loader):
    """
        Test method parse_csv()
        Should return the right SpecialPhrase objects.
    """
    phrases = sp_csv_loader.parse_csv()
    assert check_phrases_content(phrases)
 def test_next(sp_csv_loader):
    """
        Test objects returned from the next() method.
        It should return all SpecialPhrases objects of
        the sp_csv_test.csv special phrases.
    """
    phrases = next(sp_csv_loader)
    assert check_phrases_content(phrases)
 def test_check_csv_validity(sp_csv_loader):
    """
        Test method check_csv_validity()
        It should raise an exception when file with a
        different exception than .csv is given.
    """
    sp_csv_loader.csv_path = 'test.csv'
    sp_csv_loader.check_csv_validity()
    sp_csv_loader.csv_path = 'test.wrong'
    with pytest.raises(UsageError):
        assert sp_csv_loader.check_csv_validity()
 def check_phrases_content(phrases):
    """
        Asserts that the given phrases list contains
        the right phrases of the sp_csv_test.csv special phrases.
    """
    return  len(phrases) > 1 \
            and any(p.p_label == 'Billboard'
                    and p.p_class == 'advertising'
                    and p.p_type == 'billboard'
                    and p.p_operator == '-' for p in phrases) \
            and any(p.p_label == 'Zip Lines'
                    and p.p_class == 'aerialway'
                    and p.p_type == 'zip_line'
                    and p.p_operator == '-' for p in phrases)
@pytest.fixture
 def sp_csv_loader(src_dir):
    """
@ -64,3 +20,35 @@ def sp_csv_loader(src_dir):
    csv_path = (src_dir / 'test' / 'testdata' / 'sp_csv_test.csv').resolve()
    loader = SPCsvLoader(csv_path)
    return loader
 def test_generate_phrases(sp_csv_loader):
    """
        Test method parse_csv()
        Should return the right SpecialPhrase objects.
    """
    phrases = list(sp_csv_loader.generate_phrases())
    assert len(phrases) == 41
    assert len(set(phrases)) == 41
    assert any(p.p_label == 'Billboard'
               and p.p_class == 'advertising'
               and p.p_type == 'billboard'
               and p.p_operator == '-' for p in phrases)
    assert any(p.p_label == 'Zip Lines'
               and p.p_class == 'aerialway'
               and p.p_type == 'zip_line'
               and p.p_operator == '-' for p in phrases)
 def test_invalid_cvs_file():
    """
        Test method check_csv_validity()
        It should raise an exception when file with a
        different exception than .csv is given.
    """
    loader = SPCsvLoader('test.wrong')
    with pytest.raises(UsageError, match='not a csv file'):
        next(loader.generate_phrases())
--- a/test/python/tools/test_sp_wiki_loader.py
+++ b/test/python/tools/test_sp_wiki_loader.py
@ -26,27 +26,18 @@ def sp_wiki_loader(monkeypatch, def_config, xml_wiki_content):
    """
    monkeypatch.setenv('NOMINATIM_LANGUAGES', 'en')
    loader = SPWikiLoader(def_config)
-    monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content',
+    monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader._get_wiki_content',
-                        lambda self, lang: xml_wiki_content)
+                        lambda lang: xml_wiki_content)
    return loader
-def test_parse_xml(sp_wiki_loader, xml_wiki_content):
+def test_generate_phrases(sp_wiki_loader):
    """
        Test method parse_xml()
        Should return the right SpecialPhrase objects.
    """
    phrases = sp_wiki_loader.parse_xml(xml_wiki_content)
    check_phrases_content(phrases)
 def test_next(sp_wiki_loader):
    """
        Test objects returned from the next() method.
        It should return all SpecialPhrases objects of
        the 'en' special phrases.
    """
-    phrases = next(sp_wiki_loader)
+    phrases = list(sp_wiki_loader.generate_phrases())
    check_phrases_content(phrases)
 def check_phrases_content(phrases):