remove Token from explicit input for SearchDescription extension

The token string is only required by the PartialToken type, so
it can simply save the token string internally. No need to pass
it to every type.

Also moves the check for multi-word partials to the token loader
code in the tokenizer. Multi-word partials can only happen with
the legacy tokenizer and when the database was loaded with an
older version of Nominatim. No need to keep the check for
everybody.
This commit is contained in:
Sarah Hoffmann 2021-07-15 14:48:20 +02:00
parent ec3f6c9c42
commit 3cd85eaaf1
5 changed files with 13 additions and 11 deletions

View File

@ -363,7 +363,6 @@ class Geocode
foreach ($aWordsetSearches as $oCurrentSearch) {
foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
$aNewSearches = $oCurrentSearch->extendWithSearchTerm(
$sToken,
$oSearchTerm,
$oPosition
);

View File

@ -152,14 +152,13 @@ class SearchDescription
/**
* Derive new searches by adding a full term to the existing search.
*
* @param string $sToken Term for the token.
* @param object $oSearchTerm Description of the token.
* @param object $oPosition Description of the token position within
the query.
*
* @return SearchDescription[] List of derived search descriptions.
*/
public function extendWithSearchTerm($sToken, $oSearchTerm, $oPosition)
public function extendWithSearchTerm($oSearchTerm, $oPosition)
{
$aNewSearches = array();
@ -315,10 +314,8 @@ class SearchDescription
}
} elseif (!$oPosition->isPhrase('country')
&& is_a($oSearchTerm, '\Nominatim\Token\Partial')
&& strpos($sToken, ' ') === false
) {
$aNewSearches = $this->extendWithPartialTerm(
$sToken,
$oSearchTerm,
$oPosition
);
@ -330,14 +327,13 @@ class SearchDescription
/**
* Derive new searches by adding a partial term to the existing search.
*
* @param string $sToken Term for the token.
* @param object $oSearchTerm Description of the token.
* @param object $oPosition Description of the token position within
the query.
*
* @return SearchDescription[] List of derived search descriptions.
*/
private function extendWithPartialTerm($sToken, $oSearchTerm, $oPosition)
private function extendWithPartialTerm($oSearchTerm, $oPosition)
{
$aNewSearches = array();
$iWordID = $oSearchTerm->iId;
@ -347,7 +343,7 @@ class SearchDescription
) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
if (preg_match('#^[0-9 ]+$#', $sToken)) {
if (preg_match('#^[0-9 ]+$#', $oSearchTerm->sToken)) {
$oSearch->iSearchRank++;
}
if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
@ -367,7 +363,7 @@ class SearchDescription
if (empty($this->aName) && empty($this->aNameNonSearch)) {
$oSearch->iSearchRank++;
}
if (preg_match('#^[0-9 ]+$#', $sToken)) {
if (preg_match('#^[0-9 ]+$#', $oSearchTerm->sToken)) {
$oSearch->iSearchRank++;
}
if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {

View File

@ -11,10 +11,13 @@ class Partial
public $iId;
/// Number of appearances in the database.
public $iSearchNameCount;
/// Normalised version of the partial word.
public $sToken;
public function __construct($iId, $iSearchNameCount)
public function __construct($iId, $sToken, $iSearchNameCount)
{
$this->iId = $iId;
$this->sToken = $sToken;
$this->iSearchNameCount = $iSearchNameCount;
}

View File

@ -205,6 +205,7 @@ class Tokenizer
} else {
$oToken = new Token\Partial(
$iId,
$aWord['word_token'],
(int) $aWord['count']
);
}

View File

@ -218,9 +218,12 @@ class Tokenizer
(int) $aWord['count'],
substr_count($aWord['word_token'], ' ')
);
} else {
// For backward compatibility: ignore all partial tokens with more
// than one word.
} elseif (strpos($aWord['word_token'], ' ') === false) {
$oToken = new Token\Partial(
$iId,
$aWord['word_token'],
(int) $aWord['count']
);
}