Nominatim/lib/Phrase.php

127 lines
3.3 KiB
PHP
Raw Normal View History

2017-10-12 23:37:44 +03:00
<?php
namespace Nominatim;
/**
* Segment of a query string.
*
* The parts of a query strings are usually separated by commas.
*/
class Phrase
{
2017-10-14 00:11:09 +03:00
const MAX_DEPTH = 7;
2017-10-12 23:37:44 +03:00
// Complete phrase as a string.
private $sPhrase;
// Element type for structured searches.
private $sPhraseType;
// Space-separated words of the phrase.
private $aWords;
// Possible segmentations of the phrase.
private $aWordSets;
public function __construct($sPhrase, $sPhraseType)
{
$this->sPhrase = trim($sPhrase);
$this->sPhraseType = $sPhraseType;
$this->aWords = explode(' ', $this->sPhrase);
$this->aWordSets = $this->createWordSets($this->aWords, 0);
}
2017-10-13 22:23:45 +03:00
/**
* Return the element type of the phrase.
*
* @return string Pharse type if the phrase comes from a structured query
* or empty string otherwise.
*/
2017-10-12 23:37:44 +03:00
public function getPhraseType()
{
return $this->sPhraseType;
}
2017-10-13 22:23:45 +03:00
/**
* Return the array of possible segmentations of the phrase.
*
* @return string[][] Array of segmentations, each consisting of an
* array of terms.
*/
2017-10-12 23:37:44 +03:00
public function getWordSets()
{
return $this->aWordSets;
}
2017-10-13 22:23:45 +03:00
/**
* Add the tokens from this phrase to the given list of tokens.
*
* @param string[] $aTokens List of tokens to append.
*
* @return void
*/
2017-10-12 23:37:44 +03:00
public function addTokens(&$aTokens)
{
foreach ($this->aWordSets as $aSet) {
foreach ($aSet as $sWord) {
$aTokens[' '.$sWord] = ' '.$sWord;
$aTokens[$sWord] = $sWord;
}
}
}
2017-10-13 22:23:45 +03:00
/**
* Invert the set of possible segmentations.
*
* @return void
*/
2017-10-12 23:37:44 +03:00
public function invertWordSets()
{
$this->aWordSets = $this->createInverseWordSets($this->aWords, 0);
}
private function createWordSets($aWords, $iDepth)
{
$aResult = array(array(join(' ', $aWords)));
$sFirstToken = '';
if ($iDepth < Phrase::MAX_DEPTH) {
while (count($aWords) > 1) {
2017-10-12 23:37:44 +03:00
$sWord = array_shift($aWords);
$sFirstToken .= ($sFirstToken?' ':'').$sWord;
$aRest = $this->createWordSets($aWords, $iDepth + 1);
foreach ($aRest as $aSet) {
$aResult[] = array_merge(array($sFirstToken), $aSet);
}
}
}
return $aResult;
}
2017-10-13 22:23:45 +03:00
private function createInverseWordSets($aWords, $iDepth)
2017-10-12 23:37:44 +03:00
{
$aResult = array(array(join(' ', $aWords)));
$sFirstToken = '';
if ($iDepth < Phrase::MAX_DEPTH) {
while (count($aWords) > 1) {
2017-10-12 23:37:44 +03:00
$sWord = array_pop($aWords);
$sFirstToken = $sWord.($sFirstToken?' ':'').$sFirstToken;
$aRest = $this->createInverseWordSets($aWords, $iDepth + 1);
foreach ($aRest as $aSet) {
$aResult[] = array_merge(array($sFirstToken), $aSet);
}
}
}
return $aResult;
}
public function debugInfo()
{
return array(
'Type' => $this->sPhraseType,
'Phrase' => $this->sPhrase,
'Words' => $this->aWords,
'WordSets' => $this->aWordSets
);
}
2017-10-14 00:11:09 +03:00
}