mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-12-26 22:44:44 +03:00
convert special phrase loaders to generators
Generators simplify the code quite a bit compared to the previous Iterator approach.
This commit is contained in:
parent
042e314589
commit
cce0e5ea38
@ -11,43 +11,31 @@
|
|||||||
"""
|
"""
|
||||||
import csv
|
import csv
|
||||||
import os
|
import os
|
||||||
from collections.abc import Iterator
|
|
||||||
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
|
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
|
||||||
from nominatim.errors import UsageError
|
from nominatim.errors import UsageError
|
||||||
|
|
||||||
class SPCsvLoader(Iterator):
|
class SPCsvLoader:
|
||||||
"""
|
"""
|
||||||
Handles loading of special phrases from external csv file.
|
Handles loading of special phrases from external csv file.
|
||||||
"""
|
"""
|
||||||
def __init__(self, csv_path):
|
def __init__(self, csv_path):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.csv_path = csv_path
|
self.csv_path = csv_path
|
||||||
self.has_been_read = False
|
|
||||||
|
|
||||||
def __next__(self):
|
|
||||||
if self.has_been_read:
|
|
||||||
raise StopIteration()
|
|
||||||
|
|
||||||
self.has_been_read = True
|
def generate_phrases(self):
|
||||||
self.check_csv_validity()
|
""" Open and parse the given csv file.
|
||||||
return self.parse_csv()
|
|
||||||
|
|
||||||
def parse_csv(self):
|
|
||||||
"""
|
|
||||||
Open and parse the given csv file.
|
|
||||||
Create the corresponding SpecialPhrases.
|
Create the corresponding SpecialPhrases.
|
||||||
"""
|
"""
|
||||||
phrases = set()
|
self._check_csv_validity()
|
||||||
|
|
||||||
with open(self.csv_path, encoding='utf-8') as fd:
|
with open(self.csv_path, encoding='utf-8') as fd:
|
||||||
reader = csv.DictReader(fd, delimiter=',')
|
reader = csv.DictReader(fd, delimiter=',')
|
||||||
for row in reader:
|
for row in reader:
|
||||||
phrases.add(
|
yield SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
|
||||||
SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
|
|
||||||
)
|
|
||||||
return phrases
|
|
||||||
|
|
||||||
def check_csv_validity(self):
|
|
||||||
|
def _check_csv_validity(self):
|
||||||
"""
|
"""
|
||||||
Check that the csv file has the right extension.
|
Check that the csv file has the right extension.
|
||||||
"""
|
"""
|
||||||
|
@ -62,8 +62,7 @@ class SPImporter():
|
|||||||
# Store pairs of class/type for further processing
|
# Store pairs of class/type for further processing
|
||||||
class_type_pairs = set()
|
class_type_pairs = set()
|
||||||
|
|
||||||
for loaded_phrases in self.sp_loader:
|
for phrase in self.sp_loader.generate_phrases():
|
||||||
for phrase in loaded_phrases:
|
|
||||||
result = self._process_phrase(phrase)
|
result = self._process_phrase(phrase)
|
||||||
if result:
|
if result:
|
||||||
class_type_pairs.add(result)
|
class_type_pairs.add(result)
|
||||||
|
@ -9,12 +9,24 @@
|
|||||||
"""
|
"""
|
||||||
import re
|
import re
|
||||||
import logging
|
import logging
|
||||||
from collections.abc import Iterator
|
|
||||||
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
|
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
|
||||||
from nominatim.tools.exec_utils import get_url
|
from nominatim.tools.exec_utils import get_url
|
||||||
|
|
||||||
LOG = logging.getLogger()
|
LOG = logging.getLogger()
|
||||||
class SPWikiLoader(Iterator):
|
|
||||||
|
def _get_wiki_content(lang):
|
||||||
|
"""
|
||||||
|
Request and return the wiki page's content
|
||||||
|
corresponding to special phrases for a given lang.
|
||||||
|
Requested URL Example :
|
||||||
|
https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
|
||||||
|
"""
|
||||||
|
url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \
|
||||||
|
+ lang.upper()
|
||||||
|
return get_url(url)
|
||||||
|
|
||||||
|
|
||||||
|
class SPWikiLoader:
|
||||||
"""
|
"""
|
||||||
Handles loading of special phrases from the wiki.
|
Handles loading of special phrases from the wiki.
|
||||||
"""
|
"""
|
||||||
@ -27,28 +39,21 @@ class SPWikiLoader(Iterator):
|
|||||||
)
|
)
|
||||||
self._load_languages()
|
self._load_languages()
|
||||||
|
|
||||||
def __next__(self):
|
|
||||||
if not self.languages:
|
|
||||||
raise StopIteration
|
|
||||||
|
|
||||||
lang = self.languages.pop(0)
|
def generate_phrases(self):
|
||||||
loaded_xml = self._get_wiki_content(lang)
|
""" Download the wiki pages for the configured languages
|
||||||
|
and extract the phrases from the page.
|
||||||
|
"""
|
||||||
|
for lang in self.languages:
|
||||||
LOG.warning('Importing phrases for lang: %s...', lang)
|
LOG.warning('Importing phrases for lang: %s...', lang)
|
||||||
return self.parse_xml(loaded_xml)
|
loaded_xml = _get_wiki_content(lang)
|
||||||
|
|
||||||
def parse_xml(self, xml):
|
|
||||||
"""
|
|
||||||
Parses XML content and extracts special phrases from it.
|
|
||||||
Return a list of SpecialPhrase.
|
|
||||||
"""
|
|
||||||
# One match will be of format [label, class, type, operator, plural]
|
# One match will be of format [label, class, type, operator, plural]
|
||||||
matches = self.occurence_pattern.findall(xml)
|
matches = self.occurence_pattern.findall(loaded_xml)
|
||||||
returned_phrases = set()
|
|
||||||
for match in matches:
|
for match in matches:
|
||||||
returned_phrases.add(
|
yield SpecialPhrase(match[0], match[1], match[2], match[3])
|
||||||
SpecialPhrase(match[0], match[1], match[2], match[3])
|
|
||||||
)
|
|
||||||
return returned_phrases
|
|
||||||
|
|
||||||
def _load_languages(self):
|
def _load_languages(self):
|
||||||
"""
|
"""
|
||||||
@ -64,15 +69,3 @@ class SPWikiLoader(Iterator):
|
|||||||
'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
|
'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
|
||||||
'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
|
'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
|
||||||
'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
|
'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _get_wiki_content(lang):
|
|
||||||
"""
|
|
||||||
Request and return the wiki page's content
|
|
||||||
corresponding to special phrases for a given lang.
|
|
||||||
Requested URL Example :
|
|
||||||
https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
|
|
||||||
"""
|
|
||||||
url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \
|
|
||||||
+ lang.upper()
|
|
||||||
return get_url(url)
|
|
||||||
|
@ -187,8 +187,8 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
|
|||||||
table_factory('place_classtype_amenity_animal_shelter')
|
table_factory('place_classtype_amenity_animal_shelter')
|
||||||
table_factory('place_classtype_wrongclass_wrongtype')
|
table_factory('place_classtype_wrongclass_wrongtype')
|
||||||
|
|
||||||
monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content',
|
monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader._get_wiki_content',
|
||||||
lambda self, lang: xml_wiki_content)
|
lambda lang: xml_wiki_content)
|
||||||
|
|
||||||
tokenizer = tokenizer_mock()
|
tokenizer = tokenizer_mock()
|
||||||
sp_importer.import_phrases(tokenizer, should_replace)
|
sp_importer.import_phrases(tokenizer, should_replace)
|
||||||
|
@ -12,50 +12,6 @@ import pytest
|
|||||||
from nominatim.errors import UsageError
|
from nominatim.errors import UsageError
|
||||||
from nominatim.tools.special_phrases.sp_csv_loader import SPCsvLoader
|
from nominatim.tools.special_phrases.sp_csv_loader import SPCsvLoader
|
||||||
|
|
||||||
def test_parse_csv(sp_csv_loader):
|
|
||||||
"""
|
|
||||||
Test method parse_csv()
|
|
||||||
Should return the right SpecialPhrase objects.
|
|
||||||
"""
|
|
||||||
phrases = sp_csv_loader.parse_csv()
|
|
||||||
assert check_phrases_content(phrases)
|
|
||||||
|
|
||||||
def test_next(sp_csv_loader):
|
|
||||||
"""
|
|
||||||
Test objects returned from the next() method.
|
|
||||||
It should return all SpecialPhrases objects of
|
|
||||||
the sp_csv_test.csv special phrases.
|
|
||||||
"""
|
|
||||||
phrases = next(sp_csv_loader)
|
|
||||||
assert check_phrases_content(phrases)
|
|
||||||
|
|
||||||
def test_check_csv_validity(sp_csv_loader):
|
|
||||||
"""
|
|
||||||
Test method check_csv_validity()
|
|
||||||
It should raise an exception when file with a
|
|
||||||
different exception than .csv is given.
|
|
||||||
"""
|
|
||||||
sp_csv_loader.csv_path = 'test.csv'
|
|
||||||
sp_csv_loader.check_csv_validity()
|
|
||||||
sp_csv_loader.csv_path = 'test.wrong'
|
|
||||||
with pytest.raises(UsageError):
|
|
||||||
assert sp_csv_loader.check_csv_validity()
|
|
||||||
|
|
||||||
def check_phrases_content(phrases):
|
|
||||||
"""
|
|
||||||
Asserts that the given phrases list contains
|
|
||||||
the right phrases of the sp_csv_test.csv special phrases.
|
|
||||||
"""
|
|
||||||
return len(phrases) > 1 \
|
|
||||||
and any(p.p_label == 'Billboard'
|
|
||||||
and p.p_class == 'advertising'
|
|
||||||
and p.p_type == 'billboard'
|
|
||||||
and p.p_operator == '-' for p in phrases) \
|
|
||||||
and any(p.p_label == 'Zip Lines'
|
|
||||||
and p.p_class == 'aerialway'
|
|
||||||
and p.p_type == 'zip_line'
|
|
||||||
and p.p_operator == '-' for p in phrases)
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def sp_csv_loader(src_dir):
|
def sp_csv_loader(src_dir):
|
||||||
"""
|
"""
|
||||||
@ -64,3 +20,35 @@ def sp_csv_loader(src_dir):
|
|||||||
csv_path = (src_dir / 'test' / 'testdata' / 'sp_csv_test.csv').resolve()
|
csv_path = (src_dir / 'test' / 'testdata' / 'sp_csv_test.csv').resolve()
|
||||||
loader = SPCsvLoader(csv_path)
|
loader = SPCsvLoader(csv_path)
|
||||||
return loader
|
return loader
|
||||||
|
|
||||||
|
|
||||||
|
def test_generate_phrases(sp_csv_loader):
|
||||||
|
"""
|
||||||
|
Test method parse_csv()
|
||||||
|
Should return the right SpecialPhrase objects.
|
||||||
|
"""
|
||||||
|
phrases = list(sp_csv_loader.generate_phrases())
|
||||||
|
|
||||||
|
assert len(phrases) == 41
|
||||||
|
assert len(set(phrases)) == 41
|
||||||
|
|
||||||
|
assert any(p.p_label == 'Billboard'
|
||||||
|
and p.p_class == 'advertising'
|
||||||
|
and p.p_type == 'billboard'
|
||||||
|
and p.p_operator == '-' for p in phrases)
|
||||||
|
assert any(p.p_label == 'Zip Lines'
|
||||||
|
and p.p_class == 'aerialway'
|
||||||
|
and p.p_type == 'zip_line'
|
||||||
|
and p.p_operator == '-' for p in phrases)
|
||||||
|
|
||||||
|
|
||||||
|
def test_invalid_cvs_file():
|
||||||
|
"""
|
||||||
|
Test method check_csv_validity()
|
||||||
|
It should raise an exception when file with a
|
||||||
|
different exception than .csv is given.
|
||||||
|
"""
|
||||||
|
loader = SPCsvLoader('test.wrong')
|
||||||
|
|
||||||
|
with pytest.raises(UsageError, match='not a csv file'):
|
||||||
|
next(loader.generate_phrases())
|
||||||
|
@ -26,27 +26,18 @@ def sp_wiki_loader(monkeypatch, def_config, xml_wiki_content):
|
|||||||
"""
|
"""
|
||||||
monkeypatch.setenv('NOMINATIM_LANGUAGES', 'en')
|
monkeypatch.setenv('NOMINATIM_LANGUAGES', 'en')
|
||||||
loader = SPWikiLoader(def_config)
|
loader = SPWikiLoader(def_config)
|
||||||
monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content',
|
monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader._get_wiki_content',
|
||||||
lambda self, lang: xml_wiki_content)
|
lambda lang: xml_wiki_content)
|
||||||
return loader
|
return loader
|
||||||
|
|
||||||
|
|
||||||
def test_parse_xml(sp_wiki_loader, xml_wiki_content):
|
def test_generate_phrases(sp_wiki_loader):
|
||||||
"""
|
|
||||||
Test method parse_xml()
|
|
||||||
Should return the right SpecialPhrase objects.
|
|
||||||
"""
|
|
||||||
phrases = sp_wiki_loader.parse_xml(xml_wiki_content)
|
|
||||||
check_phrases_content(phrases)
|
|
||||||
|
|
||||||
|
|
||||||
def test_next(sp_wiki_loader):
|
|
||||||
"""
|
"""
|
||||||
Test objects returned from the next() method.
|
Test objects returned from the next() method.
|
||||||
It should return all SpecialPhrases objects of
|
It should return all SpecialPhrases objects of
|
||||||
the 'en' special phrases.
|
the 'en' special phrases.
|
||||||
"""
|
"""
|
||||||
phrases = next(sp_wiki_loader)
|
phrases = list(sp_wiki_loader.generate_phrases())
|
||||||
check_phrases_content(phrases)
|
check_phrases_content(phrases)
|
||||||
|
|
||||||
def check_phrases_content(phrases):
|
def check_phrases_content(phrases):
|
||||||
|
Loading…
Reference in New Issue
Block a user