Merge pull request #3290 from lonvia/near-vs-quaifier-words

Do not run near queries on qualifier words
This commit is contained in:
Sarah Hoffmann 2024-01-07 15:23:00 +01:00 committed by GitHub
commit 2592bf1954
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 2 additions and 7 deletions

View File

@ -8,7 +8,6 @@
Implementation of query analysis for the ICU tokenizer.
"""
from typing import Tuple, Dict, List, Optional, NamedTuple, Iterator, Any, cast
from copy import copy
from collections import defaultdict
import dataclasses
import difflib
@ -188,10 +187,6 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
else:
query.add_token(trange, qmod.TokenType.QUALIFIER, token)
if trange.start == 0 or trange.end == query.num_token_slots():
token = copy(token)
token.penalty += 0.1 * (query.num_token_slots())
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
else:
query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token)

View File

@ -148,9 +148,9 @@ async def test_qualifier_words(conn):
query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo'))
assert query.num_token_slots() == 5
assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER}
assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.QUALIFIER}
assert set(t.ttype for t in query.nodes[2].starting) == {TokenType.QUALIFIER}
assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER}
assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.QUALIFIER}
@pytest.mark.asyncio