mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-09-19 23:17:21 +03:00
convert phrase array to class
This commit is contained in:
parent
7ea1ef3feb
commit
023f94b066
@ -3,6 +3,7 @@
|
||||
namespace Nominatim;
|
||||
|
||||
require_once(CONST_BasePath.'/lib/PlaceLookup.php');
|
||||
require_once(CONST_BasePath.'/lib/Phrase.php');
|
||||
require_once(CONST_BasePath.'/lib/ReverseGeocode.php');
|
||||
require_once(CONST_BasePath.'/lib/SearchDescription.php');
|
||||
require_once(CONST_BasePath.'/lib/SearchContext.php');
|
||||
@ -668,7 +669,7 @@ class Geocode
|
||||
return $aSearchResults;
|
||||
}
|
||||
|
||||
public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery)
|
||||
public function getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bIsStructured, $sNormQuery)
|
||||
{
|
||||
/*
|
||||
Calculate all searches using aValidTokens i.e.
|
||||
@ -683,15 +684,11 @@ class Geocode
|
||||
*/
|
||||
$iGlobalRank = 0;
|
||||
|
||||
foreach ($aPhrases as $iPhrase => $aPhrase) {
|
||||
foreach ($aPhrases as $iPhrase => $oPhrase) {
|
||||
$aNewPhraseSearches = array();
|
||||
if ($bStructuredPhrases) {
|
||||
$sPhraseType = $aPhraseTypes[$iPhrase];
|
||||
} else {
|
||||
$sPhraseType = '';
|
||||
}
|
||||
$sPhraseType = $bIsStructured ? $oPhrase->getPhraseType() : '';
|
||||
|
||||
foreach ($aPhrase['wordsets'] as $iWordSet => $aWordset) {
|
||||
foreach ($oPhrase->getWordSets() as $iWordSet => $aWordset) {
|
||||
// Too many permutations - too expensive
|
||||
if ($iWordSet > 120) break;
|
||||
|
||||
@ -746,7 +743,7 @@ class Geocode
|
||||
foreach ($aValidTokens[$sToken] as $aSearchTerm) {
|
||||
$aNewSearches = $oCurrentSearch->extendWithPartialTerm(
|
||||
$aSearchTerm,
|
||||
$bStructuredPhrases,
|
||||
$bIsStructured,
|
||||
$iPhrase,
|
||||
$aWordFrequencyScores,
|
||||
isset($aValidTokens[' '.$sToken]) ? $aValidTokens[' '.$sToken] : array()
|
||||
@ -955,10 +952,10 @@ class Geocode
|
||||
// Split query into phrases
|
||||
// Commas are used to reduce the search space by indicating where phrases split
|
||||
if ($this->aStructuredQuery) {
|
||||
$aPhrases = $this->aStructuredQuery;
|
||||
$aInPhrases = $this->aStructuredQuery;
|
||||
$bStructuredPhrases = true;
|
||||
} else {
|
||||
$aPhrases = explode(',', $sQuery);
|
||||
$aInPhrases = explode(',', $sQuery);
|
||||
$bStructuredPhrases = false;
|
||||
}
|
||||
|
||||
@ -967,25 +964,19 @@ class Geocode
|
||||
// Get all 'sets' of words
|
||||
// Generate a complete list of all
|
||||
$aTokens = array();
|
||||
foreach ($aPhrases as $iPhrase => $sPhrase) {
|
||||
$aPhrase = chksql(
|
||||
$this->oDB->getRow("SELECT make_standard_name('".pg_escape_string($sPhrase)."') as string"),
|
||||
$aPhrases = array();
|
||||
foreach ($aInPhrases as $iPhrase => $sPhrase) {
|
||||
$sPhrase = chksql(
|
||||
$this->oDB->getOne('SELECT make_standard_name('.getDBQuoted($sPhrase).')'),
|
||||
"Cannot normalize query string (is it a UTF-8 string?)"
|
||||
);
|
||||
if (trim($aPhrase['string'])) {
|
||||
$aPhrases[$iPhrase] = $aPhrase;
|
||||
$aPhrases[$iPhrase]['words'] = explode(' ', $aPhrases[$iPhrase]['string']);
|
||||
$aPhrases[$iPhrase]['wordsets'] = getWordSets($aPhrases[$iPhrase]['words'], 0);
|
||||
$aTokens = array_merge($aTokens, getTokensFromSets($aPhrases[$iPhrase]['wordsets']));
|
||||
} else {
|
||||
unset($aPhrases[$iPhrase]);
|
||||
if (trim($sPhrase)) {
|
||||
$oPhrase = new Phrase($sPhrase, is_string($iPhrase) ? $iPhrase : '');
|
||||
$oPhrase->addTokens($aTokens);
|
||||
$aPhrases[] = $oPhrase;
|
||||
}
|
||||
}
|
||||
|
||||
// Reindex phrases - we make assumptions later on that they are numerically keyed in order
|
||||
$aPhraseTypes = array_keys($aPhrases);
|
||||
$aPhrases = array_values($aPhrases);
|
||||
|
||||
if (sizeof($aTokens)) {
|
||||
// Check which tokens we have, get the ID numbers
|
||||
$sSQL = 'SELECT word_id, word_token, word, class, type, country_code, operator, search_name_count';
|
||||
@ -1046,19 +1037,18 @@ class Geocode
|
||||
// Any words that have failed completely?
|
||||
// TODO: suggestions
|
||||
|
||||
$aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery);
|
||||
$aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery);
|
||||
|
||||
if ($this->bReverseInPlan) {
|
||||
// Reverse phrase array and also reverse the order of the wordsets in
|
||||
// the first and final phrase. Don't bother about phrases in the middle
|
||||
// because order in the address doesn't matter.
|
||||
$aPhrases = array_reverse($aPhrases);
|
||||
$aPhrases[0]['wordsets'] = getInverseWordSets($aPhrases[0]['words'], 0);
|
||||
$aPhrases[0]->invertWordSets();
|
||||
if (sizeof($aPhrases) > 1) {
|
||||
$aFinalPhrase = end($aPhrases);
|
||||
$aPhrases[sizeof($aPhrases)-1]['wordsets'] = getInverseWordSets($aFinalPhrase['words'], 0);
|
||||
$aPhrases[sizeof($aPhrases)-1]->invertWordSets();
|
||||
}
|
||||
$aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false, $sNormQuery);
|
||||
$aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $aWordFrequencyScores, false, $sNormQuery);
|
||||
|
||||
foreach ($aGroupedSearches as $aSearches) {
|
||||
foreach ($aSearches as $aSearch) {
|
||||
|
92
lib/Phrase.php
Normal file
92
lib/Phrase.php
Normal file
@ -0,0 +1,92 @@
|
||||
<?php
|
||||
|
||||
namespace Nominatim;
|
||||
|
||||
/**
|
||||
* Segment of a query string.
|
||||
*
|
||||
* The parts of a query strings are usually separated by commas.
|
||||
*/
|
||||
class Phrase
|
||||
{
|
||||
CONST MAX_DEPTH = 7;
|
||||
|
||||
// Complete phrase as a string.
|
||||
private $sPhrase;
|
||||
// Element type for structured searches.
|
||||
private $sPhraseType;
|
||||
// Space-separated words of the phrase.
|
||||
private $aWords;
|
||||
// Possible segmentations of the phrase.
|
||||
private $aWordSets;
|
||||
|
||||
|
||||
public function __construct($sPhrase, $sPhraseType)
|
||||
{
|
||||
$this->sPhrase = trim($sPhrase);
|
||||
$this->sPhraseType = $sPhraseType;
|
||||
$this->aWords = explode(' ', $this->sPhrase);
|
||||
$this->aWordSets = $this->createWordSets($this->aWords, 0);
|
||||
}
|
||||
|
||||
public function getPhraseType()
|
||||
{
|
||||
return $this->sPhraseType;
|
||||
}
|
||||
|
||||
public function getWordSets()
|
||||
{
|
||||
return $this->aWordSets;
|
||||
}
|
||||
|
||||
public function addTokens(&$aTokens)
|
||||
{
|
||||
foreach ($this->aWordSets as $aSet) {
|
||||
foreach ($aSet as $sWord) {
|
||||
$aTokens[' '.$sWord] = ' '.$sWord;
|
||||
$aTokens[$sWord] = $sWord;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public function invertWordSets()
|
||||
{
|
||||
$this->aWordSets = $this->createInverseWordSets($this->aWords, 0);
|
||||
}
|
||||
|
||||
private function createWordSets($aWords, $iDepth)
|
||||
{
|
||||
$aResult = array(array(join(' ', $aWords)));
|
||||
$sFirstToken = '';
|
||||
if ($iDepth < Phrase::MAX_DEPTH) {
|
||||
while (sizeof($aWords) > 1) {
|
||||
$sWord = array_shift($aWords);
|
||||
$sFirstToken .= ($sFirstToken?' ':'').$sWord;
|
||||
$aRest = $this->createWordSets($aWords, $iDepth + 1);
|
||||
foreach ($aRest as $aSet) {
|
||||
$aResult[] = array_merge(array($sFirstToken), $aSet);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $aResult;
|
||||
}
|
||||
|
||||
public function createInverseWordSets($aWords, $iDepth)
|
||||
{
|
||||
$aResult = array(array(join(' ', $aWords)));
|
||||
$sFirstToken = '';
|
||||
if ($iDepth < Phrase::MAX_DEPTH) {
|
||||
while (sizeof($aWords) > 1) {
|
||||
$sWord = array_pop($aWords);
|
||||
$sFirstToken = $sWord.($sFirstToken?' ':'').$sFirstToken;
|
||||
$aRest = $this->createInverseWordSets($aWords, $iDepth + 1);
|
||||
foreach ($aRest as $aSet) {
|
||||
$aResult[] = array_merge(array($sFirstToken), $aSet);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $aResult;
|
||||
}
|
||||
};
|
48
lib/lib.php
48
lib/lib.php
@ -60,54 +60,6 @@ function byImportance($a, $b)
|
||||
}
|
||||
|
||||
|
||||
function getWordSets($aWords, $iDepth)
|
||||
{
|
||||
$aResult = array(array(join(' ', $aWords)));
|
||||
$sFirstToken = '';
|
||||
if ($iDepth < 7) {
|
||||
while (sizeof($aWords) > 1) {
|
||||
$sWord = array_shift($aWords);
|
||||
$sFirstToken .= ($sFirstToken?' ':'').$sWord;
|
||||
$aRest = getWordSets($aWords, $iDepth+1);
|
||||
foreach ($aRest as $aSet) {
|
||||
$aResult[] = array_merge(array($sFirstToken), $aSet);
|
||||
}
|
||||
}
|
||||
}
|
||||
return $aResult;
|
||||
}
|
||||
|
||||
function getInverseWordSets($aWords, $iDepth)
|
||||
{
|
||||
$aResult = array(array(join(' ', $aWords)));
|
||||
$sFirstToken = '';
|
||||
if ($iDepth < 8) {
|
||||
while (sizeof($aWords) > 1) {
|
||||
$sWord = array_pop($aWords);
|
||||
$sFirstToken = $sWord.($sFirstToken?' ':'').$sFirstToken;
|
||||
$aRest = getInverseWordSets($aWords, $iDepth+1);
|
||||
foreach ($aRest as $aSet) {
|
||||
$aResult[] = array_merge(array($sFirstToken), $aSet);
|
||||
}
|
||||
}
|
||||
}
|
||||
return $aResult;
|
||||
}
|
||||
|
||||
|
||||
function getTokensFromSets($aSets)
|
||||
{
|
||||
$aTokens = array();
|
||||
foreach ($aSets as $aSet) {
|
||||
foreach ($aSet as $sWord) {
|
||||
$aTokens[' '.$sWord] = ' '.$sWord;
|
||||
$aTokens[$sWord] = $sWord;
|
||||
}
|
||||
}
|
||||
return $aTokens;
|
||||
}
|
||||
|
||||
|
||||
function getClassTypes()
|
||||
{
|
||||
return array(
|
||||
|
Loading…
Reference in New Issue
Block a user