implement token assignment

This commit is contained in:
Sarah Hoffmann 2023-05-22 15:49:03 +02:00
parent d8240f9ee4
commit 3bf489cd7c
3 changed files with 690 additions and 0 deletions

View File

@ -114,6 +114,24 @@ class TokenRange(NamedTuple):
start: int
end: int
def replace_start(self, new_start: int) -> 'TokenRange':
""" Return a new token range with the new start.
"""
return TokenRange(new_start, self.end)
def replace_end(self, new_end: int) -> 'TokenRange':
""" Return a new token range with the new end.
"""
return TokenRange(self.start, new_end)
def split(self, index: int) -> Tuple['TokenRange', 'TokenRange']:
""" Split the span into two spans at the given index.
The index must be within the span.
"""
return self.replace_end(index), self.replace_start(index)
@dataclasses.dataclass
class TokenList:

View File

@ -0,0 +1,345 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Create query interpretations where each vertice in the query is assigned
a specific function (expressed as a token type).
"""
from typing import Optional, List, Iterator
import dataclasses
import nominatim.api.search.query as qmod
from nominatim.api.logging import log
# pylint: disable=too-many-return-statements,too-many-branches
@dataclasses.dataclass
class TypedRange:
""" A token range for a specific type of tokens.
"""
ttype: qmod.TokenType
trange: qmod.TokenRange
PENALTY_TOKENCHANGE = {
qmod.BreakType.START: 0.0,
qmod.BreakType.END: 0.0,
qmod.BreakType.PHRASE: 0.0,
qmod.BreakType.WORD: 0.1,
qmod.BreakType.PART: 0.2,
qmod.BreakType.TOKEN: 0.4
}
TypedRangeSeq = List[TypedRange]
@dataclasses.dataclass
class TokenAssignment: # pylint: disable=too-many-instance-attributes
""" Representation of a possible assignment of token types
to the tokens in a tokenized query.
"""
penalty: float = 0.0
name: Optional[qmod.TokenRange] = None
address: List[qmod.TokenRange] = dataclasses.field(default_factory=list)
housenumber: Optional[qmod.TokenRange] = None
postcode: Optional[qmod.TokenRange] = None
country: Optional[qmod.TokenRange] = None
category: Optional[qmod.TokenRange] = None
qualifier: Optional[qmod.TokenRange] = None
@staticmethod
def from_ranges(ranges: TypedRangeSeq) -> 'TokenAssignment':
""" Create a new token assignment from a sequence of typed spans.
"""
out = TokenAssignment()
for token in ranges:
if token.ttype == qmod.TokenType.PARTIAL:
out.address.append(token.trange)
elif token.ttype == qmod.TokenType.HOUSENUMBER:
out.housenumber = token.trange
elif token.ttype == qmod.TokenType.POSTCODE:
out.postcode = token.trange
elif token.ttype == qmod.TokenType.COUNTRY:
out.country = token.trange
elif token.ttype == qmod.TokenType.CATEGORY:
out.category = token.trange
elif token.ttype == qmod.TokenType.QUALIFIER:
out.qualifier = token.trange
return out
class _TokenSequence:
""" Working state used to put together the token assignements.
Represents an intermediate state while traversing the tokenized
query.
"""
def __init__(self, seq: TypedRangeSeq,
direction: int = 0, penalty: float = 0.0) -> None:
self.seq = seq
self.direction = direction
self.penalty = penalty
def __str__(self) -> str:
seq = ''.join(f'[{r.trange.start} - {r.trange.end}: {r.ttype.name}]' for r in self.seq)
return f'{seq} (dir: {self.direction}, penalty: {self.penalty})'
@property
def end_pos(self) -> int:
""" Return the index of the global end of the current sequence.
"""
return self.seq[-1].trange.end if self.seq else 0
def has_types(self, *ttypes: qmod.TokenType) -> bool:
""" Check if the current sequence contains any typed ranges of
the given types.
"""
return any(s.ttype in ttypes for s in self.seq)
def is_final(self) -> bool:
""" Return true when the sequence cannot be extended by any
form of token anymore.
"""
# Country and category must be the final term for left-to-right
return len(self.seq) > 1 and \
self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.CATEGORY)
def appendable(self, ttype: qmod.TokenType) -> Optional[int]:
""" Check if the give token type is appendable to the existing sequence.
Returns None if the token type is not appendable, otherwise the
new direction of the sequence after adding such a type. The
token is not added.
"""
if ttype == qmod.TokenType.WORD:
return None
if not self.seq:
# Append unconditionally to the empty list
if ttype == qmod.TokenType.COUNTRY:
return -1
if ttype in (qmod.TokenType.HOUSENUMBER, qmod.TokenType.QUALIFIER):
return 1
return self.direction
# Name tokens are always acceptable and don't change direction
if ttype == qmod.TokenType.PARTIAL:
return self.direction
# Other tokens may only appear once
if self.has_types(ttype):
return None
if ttype == qmod.TokenType.HOUSENUMBER:
if self.direction == 1:
if len(self.seq) == 1 and self.seq[0].ttype == qmod.TokenType.QUALIFIER:
return None
if len(self.seq) > 2 \
or self.has_types(qmod.TokenType.POSTCODE, qmod.TokenType.COUNTRY):
return None # direction left-to-right: housenumber must come before anything
elif self.direction == -1 \
or self.has_types(qmod.TokenType.POSTCODE, qmod.TokenType.COUNTRY):
return -1 # force direction right-to-left if after other terms
return self.direction
if ttype == qmod.TokenType.POSTCODE:
if self.direction == -1:
if self.has_types(qmod.TokenType.HOUSENUMBER, qmod.TokenType.QUALIFIER):
return None
return -1
if self.direction == 1:
return None if self.has_types(qmod.TokenType.COUNTRY) else 1
if self.has_types(qmod.TokenType.HOUSENUMBER, qmod.TokenType.QUALIFIER):
return 1
return self.direction
if ttype == qmod.TokenType.COUNTRY:
return None if self.direction == -1 else 1
if ttype == qmod.TokenType.CATEGORY:
return self.direction
if ttype == qmod.TokenType.QUALIFIER:
if self.direction == 1:
if (len(self.seq) == 1
and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.CATEGORY)) \
or (len(self.seq) == 2
and self.seq[0].ttype == qmod.TokenType.CATEGORY
and self.seq[1].ttype == qmod.TokenType.PARTIAL):
return 1
return None
if self.direction == -1:
return -1
tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.CATEGORY else self.seq
if len(tempseq) == 0:
return 1
if len(tempseq) == 1 and self.seq[0].ttype == qmod.TokenType.HOUSENUMBER:
return None
if len(tempseq) > 1 or self.has_types(qmod.TokenType.POSTCODE, qmod.TokenType.COUNTRY):
return -1
return 0
return None
def advance(self, ttype: qmod.TokenType, end_pos: int,
btype: qmod.BreakType) -> Optional['_TokenSequence']:
""" Return a new token sequence state with the given token type
extended.
"""
newdir = self.appendable(ttype)
if newdir is None:
return None
if not self.seq:
newseq = [TypedRange(ttype, qmod.TokenRange(0, end_pos))]
new_penalty = 0.0
else:
last = self.seq[-1]
if btype != qmod.BreakType.PHRASE and last.ttype == ttype:
# extend the existing range
newseq = self.seq[:-1] + [TypedRange(ttype, last.trange.replace_end(end_pos))]
new_penalty = 0.0
else:
# start a new range
newseq = list(self.seq) + [TypedRange(ttype,
qmod.TokenRange(last.trange.end, end_pos))]
new_penalty = PENALTY_TOKENCHANGE[btype]
return _TokenSequence(newseq, newdir, self.penalty + new_penalty)
def _adapt_penalty_from_priors(self, priors: int, new_dir: int) -> bool:
if priors == 2:
self.penalty += 1.0
elif priors > 2:
if self.direction == 0:
self.direction = new_dir
else:
return False
return True
def recheck_sequence(self) -> bool:
""" Check that the sequence is a fully valid token assignment
and addapt direction and penalties further if necessary.
This function catches some impossible assignments that need
forward context and can therefore not be exluded when building
the assignment.
"""
# housenumbers may not be further than 2 words from the beginning.
# If there are two words in front, give it a penalty.
hnrpos = next((i for i, tr in enumerate(self.seq)
if tr.ttype == qmod.TokenType.HOUSENUMBER),
None)
if hnrpos is not None:
if self.direction != -1:
priors = sum(1 for t in self.seq[:hnrpos] if t.ttype == qmod.TokenType.PARTIAL)
if not self._adapt_penalty_from_priors(priors, -1):
return False
if self.direction != 1:
priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TokenType.PARTIAL)
if not self._adapt_penalty_from_priors(priors, 1):
return False
return True
def get_assignments(self, query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
""" Yield possible assignments for the current sequence.
This function splits up general name assignments into name
and address and yields all possible variants of that.
"""
base = TokenAssignment.from_ranges(self.seq)
# Postcode search (postcode-only search is covered in next case)
if base.postcode is not None and base.address:
if (base.postcode.start == 0 and self.direction != -1)\
or (base.postcode.end == query.num_token_slots() and self.direction != 1):
log().comment('postcode search')
yield dataclasses.replace(base, penalty=self.penalty)
# Postcode or country-only search
if not base.address:
if not base.housenumber and (base.postcode or base.country or base.category):
log().comment('postcode/country search')
yield dataclasses.replace(base, penalty=self.penalty)
else:
# Use entire first word as name
if self.direction != -1:
log().comment('first word = name')
yield dataclasses.replace(base, name=base.address[0],
penalty=self.penalty,
address=base.address[1:])
# Use entire last word as name
if self.direction == -1 or (self.direction == 0 and len(base.address) > 1):
log().comment('last word = name')
yield dataclasses.replace(base, name=base.address[-1],
penalty=self.penalty,
address=base.address[:-1])
# Use beginning of first word as name
if self.direction != -1:
first = base.address[0]
if (not base.housenumber or first.end >= base.housenumber.start)\
and (not base.qualifier or first.start >= base.qualifier.end):
for i in range(first.start + 1, first.end):
name, addr = first.split(i)
penalty = self.penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype]
log().comment(f'split first word = name ({i - first.start})')
yield dataclasses.replace(base, name=name, penalty=penalty,
address=[addr] + base.address[1:])
# Use end of last word as name
if self.direction != 1:
last = base.address[-1]
if (not base.housenumber or last.start <= base.housenumber.end)\
and (not base.qualifier or last.end <= base.qualifier.start):
for i in range(last.start + 1, last.end):
addr, name = last.split(i)
penalty = self.penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype]
log().comment(f'split last word = name ({i - last.start})')
yield dataclasses.replace(base, name=name, penalty=penalty,
address=base.address[:-1] + [addr])
def yield_token_assignments(query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
""" Return possible word type assignments to word positions.
The assignments are computed from the concrete tokens listed
in the tokenized query.
The result includes the penalty for transitions from one word type to
another. It does not include penalties for transitions within a
type.
"""
todo = [_TokenSequence([], direction=0 if query.source[0].ptype == qmod.PhraseType.NONE else 1)]
while todo:
state = todo.pop()
node = query.nodes[state.end_pos]
for tlist in node.starting:
newstate = state.advance(tlist.ttype, tlist.end, node.btype)
if newstate is not None:
if newstate.end_pos == query.num_token_slots():
if newstate.recheck_sequence():
log().var_dump('Assignment', newstate)
yield from newstate.get_assignments(query)
elif not newstate.is_final():
todo.append(newstate)

View File

@ -0,0 +1,327 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Test for creation of token assignments from tokenized queries.
"""
import pytest
from nominatim.api.search.query import QueryStruct, Phrase, PhraseType, BreakType, TokenType, TokenRange, Token
from nominatim.api.search.token_assignment import yield_token_assignments, TokenAssignment, PENALTY_TOKENCHANGE
class MyToken(Token):
def get_category(self):
return 'this', 'that'
def make_query(*args):
q = None
dummy = MyToken(3.0, 45, 1, 'foo', True)
for btype, ptype, tlist in args:
if q is None:
q = QueryStruct([Phrase(ptype, '')])
else:
q.add_node(btype, ptype)
start = len(q.nodes) - 1
for end, ttype in tlist:
q.add_token(TokenRange(start, end), ttype, [dummy])
q.add_node(BreakType.END, PhraseType.NONE)
return q
def check_assignments(actual, *expected):
todo = list(expected)
for assignment in actual:
assert assignment in todo, f"Unexpected assignment: {assignment}"
todo.remove(assignment)
assert not todo, f"Missing assignments: {expected}"
def test_query_with_missing_tokens():
q = QueryStruct([Phrase(PhraseType.NONE, '')])
q.add_node(BreakType.END, PhraseType.NONE)
assert list(yield_token_assignments(q)) == []
def test_one_word_query():
q = make_query((BreakType.START, PhraseType.NONE,
[(1, TokenType.PARTIAL),
(1, TokenType.WORD),
(1, TokenType.HOUSENUMBER)]))
res = list(yield_token_assignments(q))
assert res == [TokenAssignment(name=TokenRange(0, 1))]
def test_single_postcode():
q = make_query((BreakType.START, PhraseType.NONE,
[(1, TokenType.POSTCODE)]))
res = list(yield_token_assignments(q))
assert res == [TokenAssignment(postcode=TokenRange(0, 1))]
def test_single_country_name():
q = make_query((BreakType.START, PhraseType.NONE,
[(1, TokenType.COUNTRY)]))
res = list(yield_token_assignments(q))
assert res == [TokenAssignment(country=TokenRange(0, 1))]
def test_single_word_poi_search():
q = make_query((BreakType.START, PhraseType.NONE,
[(1, TokenType.CATEGORY),
(1, TokenType.QUALIFIER)]))
res = list(yield_token_assignments(q))
assert res == [TokenAssignment(category=TokenRange(0, 1))]
@pytest.mark.parametrize('btype', [BreakType.WORD, BreakType.PART, BreakType.TOKEN])
def test_multiple_simple_words(btype):
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(btype, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
(btype, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
penalty = PENALTY_TOKENCHANGE[btype]
check_assignments(yield_token_assignments(q),
TokenAssignment(name=TokenRange(0, 3)),
TokenAssignment(penalty=penalty, name=TokenRange(0, 2),
address=[TokenRange(2, 3)]),
TokenAssignment(penalty=penalty, name=TokenRange(0, 1),
address=[TokenRange(1, 3)]),
TokenAssignment(penalty=penalty, name=TokenRange(1, 3),
address=[TokenRange(0, 1)]),
TokenAssignment(penalty=penalty, name=TokenRange(2, 3),
address=[TokenRange(0, 2)])
)
def test_multiple_words_respect_phrase_break():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q),
TokenAssignment(name=TokenRange(0, 1),
address=[TokenRange(1, 2)]),
TokenAssignment(name=TokenRange(1, 2),
address=[TokenRange(0, 1)]))
def test_housenumber_and_street():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]),
(BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q),
TokenAssignment(name=TokenRange(1, 2),
housenumber=TokenRange(0, 1)))
def test_housenumber_and_street_backwards():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]))
check_assignments(yield_token_assignments(q),
TokenAssignment(name=TokenRange(0, 1),
housenumber=TokenRange(1, 2)))
def test_housenumber_and_postcode():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]),
(BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(4, TokenType.POSTCODE)]))
check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=pytest.approx(0.3),
name=TokenRange(0, 1),
housenumber=TokenRange(1, 2),
address=[TokenRange(2, 3)],
postcode=TokenRange(3, 4)))
def test_postcode_and_housenumber():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.POSTCODE)]),
(BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(4, TokenType.HOUSENUMBER)]))
check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=pytest.approx(0.3),
name=TokenRange(2, 3),
housenumber=TokenRange(3, 4),
address=[TokenRange(0, 1)],
postcode=TokenRange(1, 2)))
def test_country_housenumber_postcode():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.COUNTRY)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(3, TokenType.HOUSENUMBER)]),
(BreakType.WORD, PhraseType.NONE, [(4, TokenType.POSTCODE)]))
check_assignments(yield_token_assignments(q))
@pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.COUNTRY,
TokenType.CATEGORY, TokenType.QUALIFIER])
def test_housenumber_with_only_special_terms(ttype):
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]),
(BreakType.WORD, PhraseType.NONE, [(2, ttype)]))
check_assignments(yield_token_assignments(q))
@pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.HOUSENUMBER, TokenType.COUNTRY])
def test_multiple_special_tokens(ttype):
q = make_query((BreakType.START, PhraseType.NONE, [(1, ttype)]),
(BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
(BreakType.PHRASE, PhraseType.NONE, [(3, ttype)]))
check_assignments(yield_token_assignments(q))
def test_housenumber_many_phrases():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
(BreakType.PHRASE, PhraseType.NONE, [(3, TokenType.PARTIAL)]),
(BreakType.PHRASE, PhraseType.NONE, [(4, TokenType.HOUSENUMBER)]),
(BreakType.WORD, PhraseType.NONE, [(5, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.1,
name=TokenRange(4, 5),
housenumber=TokenRange(3, 4),\
address=[TokenRange(0, 1), TokenRange(1, 2),
TokenRange(2, 3)]))
def test_country_at_beginning():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.COUNTRY)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
country=TokenRange(0, 1)))
def test_country_at_end():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.COUNTRY)]))
check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.1, name=TokenRange(0, 1),
country=TokenRange(1, 2)))
def test_country_in_middle():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.COUNTRY)]),
(BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q))
def test_postcode_with_designation():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.POSTCODE)]),
(BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q),
TokenAssignment(name=TokenRange(1, 2),
postcode=TokenRange(0, 1)),
TokenAssignment(postcode=TokenRange(0, 1),
address=[TokenRange(1, 2)]))
def test_postcode_with_designation_backwards():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.POSTCODE)]))
check_assignments(yield_token_assignments(q),
TokenAssignment(name=TokenRange(0, 1),
postcode=TokenRange(1, 2)),
TokenAssignment(postcode=TokenRange(1, 2),
address=[TokenRange(0, 1)]))
def test_category_at_beginning():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.CATEGORY)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
category=TokenRange(0, 1)))
def test_category_at_end():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.CATEGORY)]))
check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.1, name=TokenRange(0, 1),
category=TokenRange(1, 2)))
def test_category_in_middle():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.CATEGORY)]),
(BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q))
def test_qualifier_at_beginning():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.QUALIFIER)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.1, name=TokenRange(1, 3),
qualifier=TokenRange(0, 1)),
TokenAssignment(penalty=0.2, name=TokenRange(1, 2),
qualifier=TokenRange(0, 1),
address=[TokenRange(2, 3)]))
def test_qualifier_after_name():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(3, TokenType.QUALIFIER)]),
(BreakType.WORD, PhraseType.NONE, [(4, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(5, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.2, name=TokenRange(0, 2),
qualifier=TokenRange(2, 3),
address=[TokenRange(3, 5)]),
TokenAssignment(penalty=0.2, name=TokenRange(3, 5),
qualifier=TokenRange(2, 3),
address=[TokenRange(0, 2)]))
def test_qualifier_before_housenumber():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.QUALIFIER)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]),
(BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q))
def test_qualifier_after_housenumber():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.QUALIFIER)]),
(BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q))