mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-22 21:28:10 +03:00
complete tests for rule loader
This commit is contained in:
parent
a0a7b05c9f
commit
2e81084f35
@ -3,7 +3,6 @@ Tokenizer implementing normalisation as used before Nominatim 4 but using
|
||||
libICU instead of the PostgreSQL module.
|
||||
"""
|
||||
from collections import Counter
|
||||
import io
|
||||
import itertools
|
||||
import logging
|
||||
import re
|
||||
@ -178,7 +177,7 @@ class LegacyICUTokenizer:
|
||||
|
||||
with conn.cursor() as cur:
|
||||
copystr.copy_out(cur, 'word',
|
||||
columns=['word_token', 'search_name_count'])
|
||||
columns=['word_token', 'search_name_count'])
|
||||
cur.execute("""UPDATE word SET word_id = nextval('seq_word')
|
||||
WHERE word_id is null""")
|
||||
|
||||
|
@ -63,6 +63,22 @@ def test_missing_normalization(tmp_path, section):
|
||||
with pytest.raises(UsageError):
|
||||
ICURuleLoader(fpath)
|
||||
|
||||
@pytest.mark.parametrize("abbr", ["simple",
|
||||
"double => arrow => bad",
|
||||
"bad = > arrow"])
|
||||
def test_bad_abbreviation_syntax(tmp_path, abbr):
|
||||
fpath = tmp_path / ('test_config.yaml')
|
||||
fpath.write_text(dedent("""\
|
||||
normalization:
|
||||
transliteration:
|
||||
compound_suffixes:
|
||||
abbreviations:
|
||||
- {}
|
||||
""".format(abbr)))
|
||||
|
||||
with pytest.raises(UsageError):
|
||||
rules = ICURuleLoader(fpath)
|
||||
|
||||
|
||||
def test_get_search_rules(cfgfile):
|
||||
fpath = cfgfile(['strasse', 'straße', 'weg'],
|
||||
@ -105,18 +121,54 @@ def test_get_transliteration_rules(cfgfile):
|
||||
assert trans.transliterate(" проспект-Prospekt ") == " prospekt Prospekt "
|
||||
|
||||
|
||||
def test_get_synonym_pairs(cfgfile):
|
||||
fpath = cfgfile(['Weg', 'Strasse'],
|
||||
def test_get_replacement_pairs_multi_to(cfgfile):
|
||||
fpath = cfgfile(['Pfad', 'Strasse'],
|
||||
['Strasse => str,st'])
|
||||
|
||||
loader = ICURuleLoader(fpath)
|
||||
repl = ICURuleLoader(fpath).get_replacement_pairs()
|
||||
|
||||
repl = loader.get_replacement_pairs()
|
||||
assert [(a, sorted(b)) for a, b in repl] == \
|
||||
[(' strasse ', [' st ', ' str ', ' strasse ']),
|
||||
('strasse ', [' st ', ' str ', ' strasse ']),
|
||||
('pfad ', [' pfad ']),
|
||||
('str ' , [' str ']),
|
||||
('st ' , [' st '])]
|
||||
|
||||
assert sorted(((a, sorted(b)) for a, b in repl)) == \
|
||||
sorted([(' strasse ', [' st ', ' str ', ' strasse ']),
|
||||
('strasse ', [' st ', ' str ', ' strasse ']),
|
||||
('st ' , [' st ']),
|
||||
('str ' , [' str ']),
|
||||
('weg ', [' weg '])])
|
||||
|
||||
def test_get_replacement_pairs_multi_from(cfgfile):
|
||||
fpath = cfgfile([], ['saint,Sainte => st'])
|
||||
|
||||
repl = ICURuleLoader(fpath).get_replacement_pairs()
|
||||
|
||||
assert [(a, sorted(b)) for a, b in repl] == \
|
||||
[(' sainte ', [' sainte ', ' st ']),
|
||||
(' saint ', [' saint ', ' st '])]
|
||||
|
||||
|
||||
def test_get_replacement_pairs_cross_abbreviations(cfgfile):
|
||||
fpath = cfgfile([], ['saint,Sainte => st',
|
||||
'sainte => ste'])
|
||||
|
||||
repl = ICURuleLoader(fpath).get_replacement_pairs()
|
||||
|
||||
assert [(a, sorted(b)) for a, b in repl] == \
|
||||
[(' sainte ', [' sainte ', ' st ', ' ste ']),
|
||||
(' saint ', [' saint ', ' st '])]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("abbr", ["missing to =>",
|
||||
" => missing from",
|
||||
"=>"])
|
||||
def test_bad_abbreviation_syntax(tmp_path, abbr):
|
||||
fpath = tmp_path / ('test_config.yaml')
|
||||
fpath.write_text(dedent("""\
|
||||
normalization:
|
||||
transliteration:
|
||||
compound_suffixes:
|
||||
abbreviations:
|
||||
- {}
|
||||
""".format(abbr)))
|
||||
|
||||
repl = ICURuleLoader(fpath).get_replacement_pairs()
|
||||
|
||||
assert repl == []
|
||||
|
Loading…
Reference in New Issue
Block a user