mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-12-26 06:22:13 +03:00
avoid dropping tokens completely just because they are expensive. Use ' ' token in preference to just dropping
This commit is contained in:
parent
4bc40d2c0b
commit
3852096c80
20
lib/lib.php
20
lib/lib.php
@ -646,7 +646,7 @@
|
||||
}
|
||||
}
|
||||
echo "<table border=\"1\">";
|
||||
echo "<tr><th>rank</th><th>Name Tokens</th><th>Address Tokens</th><th>country</th><th>operator</th><th>class</th><th>type</th><th>house#</th><th>Lat</th><th>Lon</th><th>Radius</th></tr>";
|
||||
echo "<tr><th>rank</th><th>Name Tokens</th><th>Name Not</th><th>Address Tokens</th><th>Address Not</th><th>country</th><th>operator</th><th>class</th><th>type</th><th>house#</th><th>Lat</th><th>Lon</th><th>Radius</th></tr>";
|
||||
foreach($aData as $iRank => $aRankedSet)
|
||||
{
|
||||
foreach($aRankedSet as $aRow)
|
||||
@ -663,6 +663,15 @@
|
||||
}
|
||||
echo "</td>";
|
||||
|
||||
echo "<td>";
|
||||
$sSep = '';
|
||||
foreach($aRow['aNameNonSearch'] as $iWordID)
|
||||
{
|
||||
echo $sSep.'#'.$aWordsIDs[$iWordID].'#';
|
||||
$sSep = ', ';
|
||||
}
|
||||
echo "</td>";
|
||||
|
||||
echo "<td>";
|
||||
$sSep = '';
|
||||
foreach($aRow['aAddress'] as $iWordID)
|
||||
@ -672,6 +681,15 @@
|
||||
}
|
||||
echo "</td>";
|
||||
|
||||
echo "<td>";
|
||||
$sSep = '';
|
||||
foreach($aRow['aAddressNonSearch'] as $iWordID)
|
||||
{
|
||||
echo $sSep.'#'.$aWordsIDs[$iWordID].'#';
|
||||
$sSep = ', ';
|
||||
}
|
||||
echo "</td>";
|
||||
|
||||
echo "<td>".$aRow['sCountryCode']."</td>";
|
||||
|
||||
echo "<td>".$aRow['sOperator']."</td>";
|
||||
|
@ -638,10 +638,30 @@
|
||||
$aSearch = $aCurrentSearch;
|
||||
$aSearch['iSearchRank'] += 1;
|
||||
if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency)
|
||||
{
|
||||
$aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
||||
if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
||||
}
|
||||
elseif (isset($aValidTokens[' '.$sToken])) // revert to the token version?
|
||||
{
|
||||
foreach($aValidTokens[' '.$sToken] as $aSearchTermToken)
|
||||
{
|
||||
if (empty($aSearchTermToken['country_code'])
|
||||
&& empty($aSearchTermToken['lat'])
|
||||
&& empty($aSearchTermToken['class']))
|
||||
{
|
||||
$aSearch = $aCurrentSearch;
|
||||
$aSearch['iSearchRank'] += 1;
|
||||
$aSearch['aAddress'][$aSearchTermToken['word_id']] = $aSearchTermToken['word_id'];
|
||||
if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
$aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
||||
if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
||||
if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
||||
}
|
||||
}
|
||||
|
||||
if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase)
|
||||
@ -900,6 +920,7 @@
|
||||
// TODO: filter out the pointless search terms (2 letter name tokens and less)
|
||||
// they might be right - but they are just too darned expensive to run
|
||||
if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'],",")."]";
|
||||
if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'],",")."]";
|
||||
if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress'])
|
||||
{
|
||||
// For infrequent name terms disable index usage for address
|
||||
@ -907,11 +928,12 @@
|
||||
sizeof($aSearch['aName']) == 1 &&
|
||||
$aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold)
|
||||
{
|
||||
$aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]";
|
||||
$aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'],$aSearch['aAddressNonSearch']),",")."]";
|
||||
}
|
||||
else
|
||||
{
|
||||
$aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'],",")."]";
|
||||
if (sizeof($aSearch['aAddressNonSearch'])) $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'],",")."]";
|
||||
}
|
||||
}
|
||||
if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'";
|
||||
|
Loading…
Reference in New Issue
Block a user