mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-09 16:26:02 +03:00
factor out query position
Moves token and phrase position and phrase type into a separate class that is handed in when assembling the search description. This drastically reduces the number of parameters for the function to extend the search descriptions and gives us more flexibility in the future for more complex positional analysis.
This commit is contained in:
parent
143ff14466
commit
ec3f6c9c42
@ -7,6 +7,7 @@ require_once(CONST_LibDir.'/Phrase.php');
|
||||
require_once(CONST_LibDir.'/ReverseGeocode.php');
|
||||
require_once(CONST_LibDir.'/SearchDescription.php');
|
||||
require_once(CONST_LibDir.'/SearchContext.php');
|
||||
require_once(CONST_LibDir.'/SearchPosition.php');
|
||||
require_once(CONST_LibDir.'/TokenList.php');
|
||||
require_once(CONST_TokenizerDir.'/tokenizer.php');
|
||||
|
||||
@ -345,7 +346,11 @@ class Geocode
|
||||
*/
|
||||
foreach ($aPhrases as $iPhrase => $oPhrase) {
|
||||
$aNewPhraseSearches = array();
|
||||
$sPhraseType = $oPhrase->getPhraseType();
|
||||
$oPosition = new SearchPosition(
|
||||
$oPhrase->getPhraseType(),
|
||||
$iPhrase,
|
||||
count($aPhrases)
|
||||
);
|
||||
|
||||
foreach ($oPhrase->getWordSets() as $aWordset) {
|
||||
$aWordsetSearches = $aSearches;
|
||||
@ -353,17 +358,14 @@ class Geocode
|
||||
// Add all words from this wordset
|
||||
foreach ($aWordset as $iToken => $sToken) {
|
||||
$aNewWordsetSearches = array();
|
||||
$oPosition->setTokenPosition($iToken, count($aWordset));
|
||||
|
||||
foreach ($aWordsetSearches as $oCurrentSearch) {
|
||||
foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
|
||||
$aNewSearches = $oCurrentSearch->extendWithSearchTerm(
|
||||
$sToken,
|
||||
$oSearchTerm,
|
||||
$sPhraseType,
|
||||
$iToken == 0 && $iPhrase == 0,
|
||||
$iToken + 1 == count($aWordset)
|
||||
&& $iPhrase + 1 == count($aPhrases),
|
||||
$iPhrase
|
||||
$oPosition
|
||||
);
|
||||
|
||||
foreach ($aNewSearches as $oSearch) {
|
||||
|
@ -154,19 +154,16 @@ class SearchDescription
|
||||
*
|
||||
* @param string $sToken Term for the token.
|
||||
* @param object $oSearchTerm Description of the token.
|
||||
* @param string $sPhraseType Type of phrase the token is contained in.
|
||||
* @param bool $bFirstToken True if the token is at the beginning of the
|
||||
* query.
|
||||
* @param bool $bLastToken True if the token is at the end of the query.
|
||||
* @param integer $iPhrase Number of the phrase the token is in.
|
||||
* @param object $oPosition Description of the token position within
|
||||
the query.
|
||||
*
|
||||
* @return SearchDescription[] List of derived search descriptions.
|
||||
*/
|
||||
public function extendWithSearchTerm($sToken, $oSearchTerm, $sPhraseType, $bFirstToken, $bLastToken, $iPhrase)
|
||||
public function extendWithSearchTerm($sToken, $oSearchTerm, $oPosition)
|
||||
{
|
||||
$aNewSearches = array();
|
||||
|
||||
if (($sPhraseType == '' || $sPhraseType == 'country')
|
||||
if ($oPosition->maybePhrase('country')
|
||||
&& is_a($oSearchTerm, '\Nominatim\Token\Country')
|
||||
) {
|
||||
if (!$this->sCountryCode) {
|
||||
@ -175,19 +172,19 @@ class SearchDescription
|
||||
$oSearch->sCountryCode = $oSearchTerm->sCountryCode;
|
||||
// Country is almost always at the end of the string
|
||||
// - increase score for finding it anywhere else (optimisation)
|
||||
if (!$bLastToken) {
|
||||
if (!$oPosition->isLastToken()) {
|
||||
$oSearch->iSearchRank += 5;
|
||||
$oSearch->iNamePhrase = -1;
|
||||
}
|
||||
$aNewSearches[] = $oSearch;
|
||||
}
|
||||
} elseif (($sPhraseType == '' || $sPhraseType == 'postalcode')
|
||||
} elseif ($oPosition->maybePhrase('postalcode')
|
||||
&& is_a($oSearchTerm, '\Nominatim\Token\Postcode')
|
||||
) {
|
||||
if (!$this->sPostcode) {
|
||||
// If we have structured search or this is the first term,
|
||||
// make the postcode the primary search element.
|
||||
if ($this->iOperator == Operator::NONE && $bFirstToken) {
|
||||
if ($this->iOperator == Operator::NONE && $oPosition->isFirstToken()) {
|
||||
$oSearch = clone $this;
|
||||
$oSearch->iSearchRank++;
|
||||
$oSearch->iOperator = Operator::POSTCODE;
|
||||
@ -200,7 +197,7 @@ class SearchDescription
|
||||
// If we have a structured search or this is not the first term,
|
||||
// add the postcode as an addendum.
|
||||
if ($this->iOperator != Operator::POSTCODE
|
||||
&& ($sPhraseType == 'postalcode' || !empty($this->aName))
|
||||
&& ($oPosition->isPhrase('postalcode') || !empty($this->aName))
|
||||
) {
|
||||
$oSearch = clone $this;
|
||||
$oSearch->iSearchRank++;
|
||||
@ -212,7 +209,7 @@ class SearchDescription
|
||||
$aNewSearches[] = $oSearch;
|
||||
}
|
||||
}
|
||||
} elseif (($sPhraseType == '' || $sPhraseType == 'street')
|
||||
} elseif ($oPosition->maybePhrase('street')
|
||||
&& is_a($oSearchTerm, '\Nominatim\Token\HouseNumber')
|
||||
) {
|
||||
if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) {
|
||||
@ -257,7 +254,7 @@ class SearchDescription
|
||||
$aNewSearches[] = $oSearch;
|
||||
}
|
||||
}
|
||||
} elseif ($sPhraseType == ''
|
||||
} elseif ($oPosition->isPhrase('')
|
||||
&& is_a($oSearchTerm, '\Nominatim\Token\SpecialTerm')
|
||||
) {
|
||||
if ($this->iOperator == Operator::NONE) {
|
||||
@ -273,7 +270,7 @@ class SearchDescription
|
||||
$iOp = Operator::NEAR;
|
||||
}
|
||||
$oSearch->iSearchRank += 2;
|
||||
} elseif (!$bFirstToken && !$bLastToken) {
|
||||
} elseif (!$oPosition->isFirstToken() && !$oPosition->isLastToken()) {
|
||||
$oSearch->iSearchRank += 2;
|
||||
}
|
||||
if ($this->sHouseNumber) {
|
||||
@ -287,7 +284,7 @@ class SearchDescription
|
||||
);
|
||||
$aNewSearches[] = $oSearch;
|
||||
}
|
||||
} elseif ($sPhraseType != 'country'
|
||||
} elseif (!$oPosition->isPhrase('country')
|
||||
&& is_a($oSearchTerm, '\Nominatim\Token\Word')
|
||||
) {
|
||||
$iWordID = $oSearchTerm->iId;
|
||||
@ -295,8 +292,10 @@ class SearchDescription
|
||||
// of the phrase. In structured search the name must forcably in
|
||||
// the first phrase. In unstructured search it may be in a later
|
||||
// phrase when the first phrase is a house number.
|
||||
if (!empty($this->aName) || !($iPhrase == 0 || $sPhraseType == '')) {
|
||||
if (($sPhraseType == '' || $iPhrase > 0) && $oSearchTerm->iTermCount > 1) {
|
||||
if (!empty($this->aName) || !($oPosition->isFirstPhrase() || $oPosition->isPhrase(''))) {
|
||||
if (($oPosition->isPhrase('') || !$oPosition->isFirstPhrase())
|
||||
&& $oSearchTerm->iTermCount > 1
|
||||
) {
|
||||
$oSearch = clone $this;
|
||||
$oSearch->iNamePhrase = -1;
|
||||
$oSearch->iSearchRank += 1;
|
||||
@ -314,15 +313,14 @@ class SearchDescription
|
||||
}
|
||||
$aNewSearches[] = $oSearch;
|
||||
}
|
||||
} elseif ($sPhraseType != 'country'
|
||||
} elseif (!$oPosition->isPhrase('country')
|
||||
&& is_a($oSearchTerm, '\Nominatim\Token\Partial')
|
||||
&& strpos($sToken, ' ') === false
|
||||
) {
|
||||
$aNewSearches = $this->extendWithPartialTerm(
|
||||
$sToken,
|
||||
$oSearchTerm,
|
||||
(bool) $sPhraseType,
|
||||
$iPhrase
|
||||
$oPosition
|
||||
);
|
||||
}
|
||||
|
||||
@ -334,17 +332,17 @@ class SearchDescription
|
||||
*
|
||||
* @param string $sToken Term for the token.
|
||||
* @param object $oSearchTerm Description of the token.
|
||||
* @param bool $bStructuredPhrases True if the search is structured.
|
||||
* @param integer $iPhrase Number of the phrase the token is in.
|
||||
* @param object $oPosition Description of the token position within
|
||||
the query.
|
||||
*
|
||||
* @return SearchDescription[] List of derived search descriptions.
|
||||
*/
|
||||
private function extendWithPartialTerm($sToken, $oSearchTerm, $bStructuredPhrases, $iPhrase)
|
||||
private function extendWithPartialTerm($sToken, $oSearchTerm, $oPosition)
|
||||
{
|
||||
$aNewSearches = array();
|
||||
$iWordID = $oSearchTerm->iId;
|
||||
|
||||
if ((!$bStructuredPhrases || $iPhrase > 0)
|
||||
if (($oPosition->isPhrase('') || !$oPosition->isFirstPhrase())
|
||||
&& (!empty($this->aName))
|
||||
) {
|
||||
$oSearch = clone $this;
|
||||
@ -361,7 +359,8 @@ class SearchDescription
|
||||
}
|
||||
|
||||
if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch)
|
||||
&& ((empty($this->aName) && empty($this->aNameNonSearch)) || $this->iNamePhrase == $iPhrase)
|
||||
&& ((empty($this->aName) && empty($this->aNameNonSearch))
|
||||
|| $this->iNamePhrase == $oPosition->getPhrase())
|
||||
) {
|
||||
$oSearch = clone $this;
|
||||
$oSearch->iSearchRank++;
|
||||
@ -385,7 +384,7 @@ class SearchDescription
|
||||
} else {
|
||||
$oSearch->aNameNonSearch[$iWordID] = $iWordID;
|
||||
}
|
||||
$oSearch->iNamePhrase = $iPhrase;
|
||||
$oSearch->iNamePhrase = $oPosition->getPhrase();
|
||||
$aNewSearches[] = $oSearch;
|
||||
}
|
||||
|
||||
|
87
lib-php/SearchPosition.php
Normal file
87
lib-php/SearchPosition.php
Normal file
@ -0,0 +1,87 @@
|
||||
<?php
|
||||
|
||||
namespace Nominatim;
|
||||
|
||||
/**
|
||||
* Description of the position of a token within a query.
|
||||
*/
|
||||
class SearchPosition
|
||||
{
|
||||
private $sPhraseType;
|
||||
|
||||
private $iPhrase;
|
||||
private $iNumPhrases;
|
||||
|
||||
private $iToken;
|
||||
private $iNumTokens;
|
||||
|
||||
|
||||
public function __construct($sPhraseType, $iPhrase, $iNumPhrases)
|
||||
{
|
||||
$this->sPhraseType = $sPhraseType;
|
||||
$this->iPhrase = $iPhrase;
|
||||
$this->iNumPhrases = $iNumPhrases;
|
||||
}
|
||||
|
||||
public function setTokenPosition($iToken, $iNumTokens)
|
||||
{
|
||||
$this->iToken = $iToken;
|
||||
$this->iNumTokens = $iNumTokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the phrase can be of the given type.
|
||||
*
|
||||
* @param string $sType Type of phrse requested.
|
||||
*
|
||||
* @return True if the phrase is untyped or of the given type.
|
||||
*/
|
||||
public function maybePhrase($sType)
|
||||
{
|
||||
return $this->sPhraseType == '' || $this->sPhraseType == $sType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the phrase is exactly of the given type.
|
||||
*
|
||||
* @param string $sType Type of phrse requested.
|
||||
*
|
||||
* @return True if the phrase of the given type.
|
||||
*/
|
||||
public function isPhrase($sType)
|
||||
{
|
||||
return $this->sPhraseType == $sType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the token is the very first in the query.
|
||||
*/
|
||||
public function isFirstToken()
|
||||
{
|
||||
return $this->iPhrase == 0 && $this->iToken == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the token is the final one in the query.
|
||||
*/
|
||||
public function isLastToken()
|
||||
{
|
||||
return $this->iToken + 1 == $this->iNumTokens && $this->iPhrase + 1 == $this->iNumPhrases;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the current token is part of the first phrase in the query.
|
||||
*/
|
||||
public function isFirstPhrase()
|
||||
{
|
||||
return $this->iPhrase == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the phrase position in the query.
|
||||
*/
|
||||
public function getPhrase()
|
||||
{
|
||||
return $this->iPhrase;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user