factor out query position

Moves token and phrase position and phrase type into a separate
class that is handed in when assembling the search description.
This drastically reduces the number of parameters for the function
to extend the search descriptions and gives us more flexibility
in the future for more complex positional analysis.
This commit is contained in:
Sarah Hoffmann 2021-07-15 14:12:59 +02:00
parent 143ff14466
commit ec3f6c9c42
3 changed files with 122 additions and 34 deletions

View File

@ -7,6 +7,7 @@ require_once(CONST_LibDir.'/Phrase.php');
require_once(CONST_LibDir.'/ReverseGeocode.php');
require_once(CONST_LibDir.'/SearchDescription.php');
require_once(CONST_LibDir.'/SearchContext.php');
require_once(CONST_LibDir.'/SearchPosition.php');
require_once(CONST_LibDir.'/TokenList.php');
require_once(CONST_TokenizerDir.'/tokenizer.php');
@ -345,7 +346,11 @@ class Geocode
*/
foreach ($aPhrases as $iPhrase => $oPhrase) {
$aNewPhraseSearches = array();
$sPhraseType = $oPhrase->getPhraseType();
$oPosition = new SearchPosition(
$oPhrase->getPhraseType(),
$iPhrase,
count($aPhrases)
);
foreach ($oPhrase->getWordSets() as $aWordset) {
$aWordsetSearches = $aSearches;
@ -353,17 +358,14 @@ class Geocode
// Add all words from this wordset
foreach ($aWordset as $iToken => $sToken) {
$aNewWordsetSearches = array();
$oPosition->setTokenPosition($iToken, count($aWordset));
foreach ($aWordsetSearches as $oCurrentSearch) {
foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
$aNewSearches = $oCurrentSearch->extendWithSearchTerm(
$sToken,
$oSearchTerm,
$sPhraseType,
$iToken == 0 && $iPhrase == 0,
$iToken + 1 == count($aWordset)
&& $iPhrase + 1 == count($aPhrases),
$iPhrase
$oPosition
);
foreach ($aNewSearches as $oSearch) {

View File

@ -154,19 +154,16 @@ class SearchDescription
*
* @param string $sToken Term for the token.
* @param object $oSearchTerm Description of the token.
* @param string $sPhraseType Type of phrase the token is contained in.
* @param bool $bFirstToken True if the token is at the beginning of the
* query.
* @param bool $bLastToken True if the token is at the end of the query.
* @param integer $iPhrase Number of the phrase the token is in.
* @param object $oPosition Description of the token position within
the query.
*
* @return SearchDescription[] List of derived search descriptions.
*/
public function extendWithSearchTerm($sToken, $oSearchTerm, $sPhraseType, $bFirstToken, $bLastToken, $iPhrase)
public function extendWithSearchTerm($sToken, $oSearchTerm, $oPosition)
{
$aNewSearches = array();
if (($sPhraseType == '' || $sPhraseType == 'country')
if ($oPosition->maybePhrase('country')
&& is_a($oSearchTerm, '\Nominatim\Token\Country')
) {
if (!$this->sCountryCode) {
@ -175,19 +172,19 @@ class SearchDescription
$oSearch->sCountryCode = $oSearchTerm->sCountryCode;
// Country is almost always at the end of the string
// - increase score for finding it anywhere else (optimisation)
if (!$bLastToken) {
if (!$oPosition->isLastToken()) {
$oSearch->iSearchRank += 5;
$oSearch->iNamePhrase = -1;
}
$aNewSearches[] = $oSearch;
}
} elseif (($sPhraseType == '' || $sPhraseType == 'postalcode')
} elseif ($oPosition->maybePhrase('postalcode')
&& is_a($oSearchTerm, '\Nominatim\Token\Postcode')
) {
if (!$this->sPostcode) {
// If we have structured search or this is the first term,
// make the postcode the primary search element.
if ($this->iOperator == Operator::NONE && $bFirstToken) {
if ($this->iOperator == Operator::NONE && $oPosition->isFirstToken()) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
$oSearch->iOperator = Operator::POSTCODE;
@ -200,7 +197,7 @@ class SearchDescription
// If we have a structured search or this is not the first term,
// add the postcode as an addendum.
if ($this->iOperator != Operator::POSTCODE
&& ($sPhraseType == 'postalcode' || !empty($this->aName))
&& ($oPosition->isPhrase('postalcode') || !empty($this->aName))
) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
@ -212,7 +209,7 @@ class SearchDescription
$aNewSearches[] = $oSearch;
}
}
} elseif (($sPhraseType == '' || $sPhraseType == 'street')
} elseif ($oPosition->maybePhrase('street')
&& is_a($oSearchTerm, '\Nominatim\Token\HouseNumber')
) {
if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) {
@ -257,7 +254,7 @@ class SearchDescription
$aNewSearches[] = $oSearch;
}
}
} elseif ($sPhraseType == ''
} elseif ($oPosition->isPhrase('')
&& is_a($oSearchTerm, '\Nominatim\Token\SpecialTerm')
) {
if ($this->iOperator == Operator::NONE) {
@ -273,7 +270,7 @@ class SearchDescription
$iOp = Operator::NEAR;
}
$oSearch->iSearchRank += 2;
} elseif (!$bFirstToken && !$bLastToken) {
} elseif (!$oPosition->isFirstToken() && !$oPosition->isLastToken()) {
$oSearch->iSearchRank += 2;
}
if ($this->sHouseNumber) {
@ -287,7 +284,7 @@ class SearchDescription
);
$aNewSearches[] = $oSearch;
}
} elseif ($sPhraseType != 'country'
} elseif (!$oPosition->isPhrase('country')
&& is_a($oSearchTerm, '\Nominatim\Token\Word')
) {
$iWordID = $oSearchTerm->iId;
@ -295,8 +292,10 @@ class SearchDescription
// of the phrase. In structured search the name must forcably in
// the first phrase. In unstructured search it may be in a later
// phrase when the first phrase is a house number.
if (!empty($this->aName) || !($iPhrase == 0 || $sPhraseType == '')) {
if (($sPhraseType == '' || $iPhrase > 0) && $oSearchTerm->iTermCount > 1) {
if (!empty($this->aName) || !($oPosition->isFirstPhrase() || $oPosition->isPhrase(''))) {
if (($oPosition->isPhrase('') || !$oPosition->isFirstPhrase())
&& $oSearchTerm->iTermCount > 1
) {
$oSearch = clone $this;
$oSearch->iNamePhrase = -1;
$oSearch->iSearchRank += 1;
@ -314,15 +313,14 @@ class SearchDescription
}
$aNewSearches[] = $oSearch;
}
} elseif ($sPhraseType != 'country'
} elseif (!$oPosition->isPhrase('country')
&& is_a($oSearchTerm, '\Nominatim\Token\Partial')
&& strpos($sToken, ' ') === false
) {
$aNewSearches = $this->extendWithPartialTerm(
$sToken,
$oSearchTerm,
(bool) $sPhraseType,
$iPhrase
$oPosition
);
}
@ -334,17 +332,17 @@ class SearchDescription
*
* @param string $sToken Term for the token.
* @param object $oSearchTerm Description of the token.
* @param bool $bStructuredPhrases True if the search is structured.
* @param integer $iPhrase Number of the phrase the token is in.
* @param object $oPosition Description of the token position within
the query.
*
* @return SearchDescription[] List of derived search descriptions.
*/
private function extendWithPartialTerm($sToken, $oSearchTerm, $bStructuredPhrases, $iPhrase)
private function extendWithPartialTerm($sToken, $oSearchTerm, $oPosition)
{
$aNewSearches = array();
$iWordID = $oSearchTerm->iId;
if ((!$bStructuredPhrases || $iPhrase > 0)
if (($oPosition->isPhrase('') || !$oPosition->isFirstPhrase())
&& (!empty($this->aName))
) {
$oSearch = clone $this;
@ -361,7 +359,8 @@ class SearchDescription
}
if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch)
&& ((empty($this->aName) && empty($this->aNameNonSearch)) || $this->iNamePhrase == $iPhrase)
&& ((empty($this->aName) && empty($this->aNameNonSearch))
|| $this->iNamePhrase == $oPosition->getPhrase())
) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
@ -385,7 +384,7 @@ class SearchDescription
} else {
$oSearch->aNameNonSearch[$iWordID] = $iWordID;
}
$oSearch->iNamePhrase = $iPhrase;
$oSearch->iNamePhrase = $oPosition->getPhrase();
$aNewSearches[] = $oSearch;
}

View File

@ -0,0 +1,87 @@
<?php
namespace Nominatim;
/**
* Description of the position of a token within a query.
*/
class SearchPosition
{
private $sPhraseType;
private $iPhrase;
private $iNumPhrases;
private $iToken;
private $iNumTokens;
public function __construct($sPhraseType, $iPhrase, $iNumPhrases)
{
$this->sPhraseType = $sPhraseType;
$this->iPhrase = $iPhrase;
$this->iNumPhrases = $iNumPhrases;
}
public function setTokenPosition($iToken, $iNumTokens)
{
$this->iToken = $iToken;
$this->iNumTokens = $iNumTokens;
}
/**
* Check if the phrase can be of the given type.
*
* @param string $sType Type of phrse requested.
*
* @return True if the phrase is untyped or of the given type.
*/
public function maybePhrase($sType)
{
return $this->sPhraseType == '' || $this->sPhraseType == $sType;
}
/**
* Check if the phrase is exactly of the given type.
*
* @param string $sType Type of phrse requested.
*
* @return True if the phrase of the given type.
*/
public function isPhrase($sType)
{
return $this->sPhraseType == $sType;
}
/**
* Return true if the token is the very first in the query.
*/
public function isFirstToken()
{
return $this->iPhrase == 0 && $this->iToken == 0;
}
/**
* Check if the token is the final one in the query.
*/
public function isLastToken()
{
return $this->iToken + 1 == $this->iNumTokens && $this->iPhrase + 1 == $this->iNumPhrases;
}
/**
* Check if the current token is part of the first phrase in the query.
*/
public function isFirstPhrase()
{
return $this->iPhrase == 0;
}
/**
* Get the phrase position in the query.
*/
public function getPhrase()
{
return $this->iPhrase;
}
}