calculate search position based to 'importance' rather than address rank

This commit is contained in:
Brian Quinion 2011-03-18 09:52:16 +00:00
parent cc6b90b490
commit 2148d81474
7 changed files with 120 additions and 47 deletions

View File

@ -40,6 +40,9 @@
function byImportance($a, $b)
{
/*
if ($a['importance'] != $b['importance'])
return ($a['importance'] > $b['importance']?-1:1);
if ($a['aPointPolygon']['numfeatures'] != $b['aPointPolygon']['numfeatures'])
return ($a['aPointPolygon']['numfeatures'] > $b['aPointPolygon']['numfeatures']?-1:1);
if ($a['aPointPolygon']['area'] != $b['aPointPolygon']['area'])
@ -48,8 +51,7 @@
// return ($a['levenshtein'] < $b['levenshtein']?-1:1);
if ($a['rank_search'] != $b['rank_search'])
return ($a['rank_search'] < $b['rank_search']?-1:1);
if ($a['importance'] != $b['importance'])
return ($a['importance'] < $b['importance']?-1:1);
*/
return ($a['foundorder'] < $b['foundorder']?-1:1);
}

View File

@ -140,6 +140,13 @@ void StartElement(xmlTextReaderPtr reader, const xmlChar *name)
feature.rankSearch = xmlTextReaderGetAttribute(reader, BAD_CAST "importance");
feature.parentPlaceID = xmlTextReaderGetAttribute(reader, BAD_CAST "parent_place_id");
/*
if (strlen(feature.parentPlaceID) == 0)
{
xmlFree(feature.parentPlaceID);
feature.parentPlaceID = NULL;
}
*/
feature.parentType = xmlTextReaderGetAttribute(reader, BAD_CAST "parent_type");
feature.parentID = xmlTextReaderGetAttribute(reader, BAD_CAST "parent_id");
@ -320,7 +327,7 @@ void StartElement(xmlTextReaderPtr reader, const xmlChar *name)
void EndElement(xmlTextReaderPtr reader, const xmlChar *name)
{
PGresult * res;
const char * paramValues[11];
const char * paramValues[14];
char * place_id;
char * partionQueryName;
int i, namePos, lineTypeLen, lineValueLen;
@ -438,6 +445,8 @@ void EndElement(xmlTextReaderPtr reader, const xmlChar *name)
}
paramValues[5] = (const char *)featureNameString;
paramValues[6] = (const char *)feature.countryCode;
featureExtraTagString[0] = 0;
if (featureExtraTagLines)
{
@ -464,18 +473,21 @@ void EndElement(xmlTextReaderPtr reader, const xmlChar *name)
strcpy(featureExtraTagString+(namePos++), "\"");
}
}
paramValues[6] = (const char *)featureExtraTagString;
paramValues[7] = (const char *)featureExtraTagString;
paramValues[7] = (const char *)feature.parentPlaceID;
if (strlen(feature.parentPlaceID) == 0)
paramValues[8] = "0";
else
paramValues[8] = (const char *)feature.parentPlaceID;
paramValues[8] = (const char *)feature.adminLevel;
paramValues[9] = (const char *)feature.houseNumber;
paramValues[10] = (const char *)feature.rankAddress;
paramValues[11] = (const char *)feature.rankSearch;
paramValues[12] = (const char *)feature.geometry;
paramValues[9] = (const char *)feature.adminLevel;
paramValues[10] = (const char *)feature.houseNumber;
paramValues[11] = (const char *)feature.rankAddress;
paramValues[12] = (const char *)feature.rankSearch;
paramValues[13] = (const char *)feature.geometry;
if (strlen(paramValues[3]))
{
res = PQexecPrepared(conn, "placex_insert", 13, paramValues, NULL, NULL, 0);
res = PQexecPrepared(conn, "placex_insert", 14, paramValues, NULL, NULL, 0);
if (PQresultStatus(res) != PGRES_COMMAND_OK)
{
fprintf(stderr, "index_placex: INSERT failed: %s", PQerrorMessage(conn));
@ -561,6 +573,9 @@ void EndElement(xmlTextReaderPtr reader, const xmlChar *name)
xmlFree(feature.value);
xmlFree(feature.rankAddress);
xmlFree(feature.rankSearch);
if (feature.parentPlaceID) xmlFree(feature.parentPlaceID);
if (feature.parentType) xmlFree(feature.parentType);
if (feature.parentID) xmlFree(feature.parentID);
// if (feature.name) xmlFree(feature.name);
if (feature.countryCode) xmlFree(feature.countryCode);
if (feature.adminLevel) xmlFree(feature.adminLevel);
@ -704,8 +719,8 @@ int nominatim_import(const char *conninfo, const char *partionTagsFilename, cons
}
res = PQprepare(conn, "placex_insert",
"insert into placex (place_id,osm_type,osm_id,class,type,name,extratags,parent_place_id,admin_level,housenumber,rank_address,rank_search,geometry) "
"values ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, st_setsrid($13, 4326))",
"insert into placex (place_id,osm_type,osm_id,class,type,name,country_code,extratags,parent_place_id,admin_level,housenumber,rank_address,rank_search,geometry) "
"values ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, st_setsrid($14, 4326))",
12, NULL);
if (PQresultStatus(res) != PGRES_COMMAND_OK)
{

View File

@ -1220,7 +1220,9 @@ BEGIN
DELETE FROM place_boundingbox where place_id = NEW.place_id;
result := deleteRoad(NEW.partition, NEW.place_id);
result := deleteLocationArea(NEW.partition, NEW.place_id);
-- reclaculate country and partition (should probably have a country_code and calculated_country_code as seperate fields)
SELECT country_code from place where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class and type = NEW.type INTO NEW.country_code;
NEW.country_code := lower(get_country_code(NEW.geometry, NEW.country_code));
NEW.partition := get_partition(NEW.geometry, NEW.country_code);
NEW.geometry_sector := geometry_sector(NEW.partition, NEW.geometry);
@ -1589,6 +1591,12 @@ BEGIN
-- RAISE WARNING 'delete: % % % %',OLD.osm_type,OLD.osm_id,OLD.class,OLD.type;
-- deleting large polygons can have a massive effect ont he system - require manual intervention to let them through
IF st_area(OLD.geometry) > 2 THEN
insert into import_polygon_delete values (OLD.osm_type,OLD.osm_id,OLD.class,OLD.type);
RETURN NULL;
END IF;
-- mark for delete
UPDATE placex set indexed_status = 100 where osm_type = OLD.osm_type and osm_id = OLD.osm_id and class = OLD.class and type = OLD.type;
@ -1665,6 +1673,7 @@ BEGIN
END IF;
DELETE from import_polygon_error where osm_type = NEW.osm_type and osm_id = NEW.osm_id;
DELETE from import_polygon_delete where osm_type = NEW.osm_type and osm_id = NEW.osm_id;
-- To paraphrase, if there isn't an existing item, OR if the admin level has changed, OR if it is a major change in geometry
IF existing.osm_type IS NULL

View File

@ -299,3 +299,12 @@ CREATE TABLE import_polygon_error (
SELECT AddGeometryColumn('import_polygon_error', 'prevgeometry', 4326, 'GEOMETRY', 2);
SELECT AddGeometryColumn('import_polygon_error', 'newgeometry', 4326, 'GEOMETRY', 2);
CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id);
drop table import_polygon_delete;
CREATE TABLE import_polygon_delete (
osm_type char(1),
osm_id INTEGER,
class TEXT NOT NULL,
type TEXT NOT NULL
);
CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id);

View File

@ -84,6 +84,11 @@
{
preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/',$aMatch[1],$aPolyPoints,PREG_SET_ORDER);
}
elseif (preg_match('#MULTIPOLYGON\\(\\(\\(([- 0-9.,]+)#',$aPointPolygon['outlinestring'],$aMatch))
{
// TODO: this just takes the first ring
preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/',$aMatch[1],$aPolyPoints,PREG_SET_ORDER);
}
elseif (preg_match('#POINT\\((-?[0-9.]+) (-?[0-9.]+)\\)#',$aPointPolygon['outlinestring'],$aMatch))
{
$fRadius = 0.01;

View File

@ -25,6 +25,10 @@
$sOutputFormat = $_GET['format'];
}
// Show address breakdown
$bShowAddressDetails = true;
if (isset($_GET['addressdetails'])) $bShowAddressDetails = (bool)$_GET['addressdetails'];
// Prefered language
$aLangPrefOrder = getPrefferedLangauges();
$sLanguagePrefArraySQL = "ARRAY[".join(',',array_map("getDBQuoted",$aLangPrefOrder))."]";
@ -143,7 +147,10 @@
$sSQL .= " from placex where place_id = $iPlaceID ";
$aPlace = $oDB->getRow($sSQL);
$aAddress = getAddressDetails($oDB, $sLanguagePrefArraySQL, $iPlaceID, $aPlace['country_code']);
if ($bShowAddressDetails)
{
$aAddress = getAddressDetails($oDB, $sLanguagePrefArraySQL, $iPlaceID, $aPlace['country_code']);
}
$aClassType = getClassTypes();
$sAddressType = '';

View File

@ -37,7 +37,12 @@
// Prefered language
$aLangPrefOrder = getPrefferedLangauges();
// if (isset($aLangPrefOrder['name:de'])) $bReverseInPlan = true;
if (isset($aLangPrefOrder['name:de'])) $bReverseInPlan = true;
if (isset($aLangPrefOrder['name:ru'])) $bReverseInPlan = true;
if (isset($aLangPrefOrder['name:ja'])) $bReverseInPlan = true;
$bReverseInPlan = true;
$sLanguagePrefArraySQL = "ARRAY[".join(',',array_map("getDBQuoted",$aLangPrefOrder))."]";
if (isset($_GET['exclude_place_ids']) && $_GET['exclude_place_ids'])
@ -62,7 +67,11 @@
break;
case 'city':
$iMinAddressRank = 14;
$iMaxAddressRank = 18;
$iMaxAddressRank = 16;
break;
case 'settlement':
$iMinAddressRank = 8;
$iMaxAddressRank = 20;
break;
}
}
@ -283,6 +292,7 @@
var_dump($sSQL, $aDatabaseWords);
exit;
}
$aPossibleMainWordIDs = array();
foreach($aDatabaseWords as $aToken)
{
if (isset($aValidTokens[$aToken['word_token']]))
@ -293,6 +303,7 @@
{
$aValidTokens[$aToken['word_token']] = array($aToken);
}
if ($aToken['word_token'][0]==' ' && !$aToken['class'] && !$aToken['country_code']) $aPossibleMainWordIDs[$aToken['word_id']] = 1;
}
if (CONST_Debug) var_Dump($aPhrases, $aValidTokens);
@ -572,18 +583,24 @@
if (CONST_Debug) var_Dump($aGroupedSearches);
if ($bReverseInPlan && false)
if ($bReverseInPlan)
{
foreach($aGroupedSearches as $iGroup => $aSearches)
$aCopyGroupedSearches = $aGroupedSearches;
foreach($aCopyGroupedSearches as $iGroup => $aSearches)
{
foreach($aSearches as $iSearch => $aSearch)
{
if (sizeof($aSearch['aAddress']))
{
$aReverseSearch = $aSearch;
$iReverseItem = array_pop($aSearch['aAddress']);
$aReverseSearch['aName'][$iReverseItem] = $iReverseItem;
$aGroupedSearches[$iGroup][] = $aReverseSearch;
if (isset($aPossibleMainWordIDs[$iReverseItem]))
{
$aSearch['aAddress'] = array_merge($aSearch['aAddress'], $aSearch['aName']);
$aSearch['aName'] = array($iReverseItem);
$aGroupedSearches[$iGroup][] = $aSearch;
}
// $aReverseSearch['aName'][$iReverseItem] = $iReverseItem;
// $aGroupedSearches[$iGroup][] = $aReverseSearch;
}
}
}
@ -692,17 +709,16 @@
}
if ($bBoundingBoxSearch) $aTerms[] = "centroid && $sViewboxSmallSQL";
if ($sNearPointSQL) $aOrder[] = "ST_Distance($sNearPointSQL, centroid) asc";
if ($sViewboxSmallSQL) $aOrder[] = "ST_Contains($sViewboxSmallSQL, centroid) desc";
if ($sViewboxLargeSQL) $aOrder[] = "ST_Contains($sViewboxLargeSQL, centroid) desc";
$aOrder[] = "search_rank ASC";
$sImportanceSQL = 'case when importance = 0 OR importance IS NULL then 0.92-(search_rank::float/33) else importance end';
if ($sViewboxSmallSQL) $sImportanceSQL .= " * case when ST_Contains($sViewboxSmallSQL, centroid) THEN 1 ELSE 0.5 END";
if ($sViewboxLargeSQL) $sImportanceSQL .= " * case when ST_Contains($sViewboxLargeSQL, centroid) THEN 1 ELSE 0.5 END";
$aOrder[] = "$sImportanceSQL DESC";
if (sizeof($aTerms))
{
$sSQL = "select place_id";
if ($sViewboxSmallSQL) $sSQL .= ",ST_Contains($sViewboxSmallSQL, centroid) as in_small";
else $sSQL .= ",false as in_small";
if ($sViewboxLargeSQL) $sSQL .= ",ST_Contains($sViewboxLargeSQL, centroid) as in_large";
else $sSQL .= ",false as in_large";
$sSQL .= " from search_name";
$sSQL .= " where ".join(' and ',$aTerms);
$sSQL .= " order by ".join(', ',$aOrder);
@ -720,19 +736,21 @@
var_dump($sSQL, $aViewBoxPlaceIDs);
exit;
}
//var_dump($aViewBoxPlaceIDs);
// Did we have an viewbox matches?
$aPlaceIDs = array();
$bViewBoxMatch = false;
foreach($aViewBoxPlaceIDs as $aViewBoxRow)
{
if ($bViewBoxMatch == 1 && $aViewBoxRow['in_small'] == 'f') break;
if ($bViewBoxMatch == 2 && $aViewBoxRow['in_large'] == 'f') break;
if ($aViewBoxRow['in_small'] == 't') $bViewBoxMatch = 1;
else if ($aViewBoxRow['in_large'] == 't') $bViewBoxMatch = 2;
// if ($bViewBoxMatch == 1 && $aViewBoxRow['in_small'] == 'f') break;
// if ($bViewBoxMatch == 2 && $aViewBoxRow['in_large'] == 'f') break;
// if ($aViewBoxRow['in_small'] == 't') $bViewBoxMatch = 1;
// else if ($aViewBoxRow['in_large'] == 't') $bViewBoxMatch = 2;
$aPlaceIDs[] = $aViewBoxRow['place_id'];
}
}
//var_Dump($aPlaceIDs);
//exit;
if ($aSearch['sHouseNumber'] && sizeof($aPlaceIDs))
{
@ -807,6 +825,9 @@
$aPlaceIDs = $oDB->getCol($sSQL);
$sPlaceIDs = join(',',$aPlaceIDs);
if ($sPlaceIDs)
{
$fRange = 0.01;
$sSQL = "select count(*) from pg_tables where tablename = 'place_classtype_".$aSearch['sClass']."_".$aSearch['sType']."'";
if ($oDB->getOne($sSQL))
@ -830,7 +851,7 @@
{
if (isset($aSearch['fRadius']) && $aSearch['fRadius']) $fRange = $aSearch['fRadius'];
$sSQL = "select l.place_id from placex as l,placex as f where ";
$sSQL .= "f.place_id in ($sPlaceIDs) and ST_DWithin(l.geometry, st_centroid(f.geometry), $fRange) ";
$sSQL .= "f.place_id in ( $sPlaceIDs) and ST_DWithin(l.geometry, st_centroid(f.geometry), $fRange) ";
$sSQL .= "and l.class='".$aSearch['sClass']."' and l.type='".$aSearch['sType']."' ";
if (sizeof($aExcludePlaceIDs))
{
@ -842,6 +863,7 @@
if (CONST_Debug) var_dump($sSQL);
$aPlaceIDs = $oDB->getCol($sSQL);
}
}
}
}
@ -884,10 +906,11 @@
$sSQL .= "get_name_by_language(name, $sLanguagePrefArraySQL) as placename,";
$sSQL .= "get_name_by_language(name, ARRAY['ref']) as ref,";
$sSQL .= "avg(ST_X(ST_Centroid(geometry))) as lon,avg(ST_Y(ST_Centroid(geometry))) as lat, ";
$sSQL .= $sOrderSQL." as porder ";
$sSQL .= $sOrderSQL." as porder, ";
$sSQL .= "coalesce(importance,0.9-(rank_search::float/30)) as importance ";
$sSQL .= "from placex where place_id in ($sPlaceIDs) ";
$sSQL .= "and placex.rank_address between $iMinAddressRank and $iMaxAddressRank ";
$sSQL .= "group by osm_type,osm_id,class,type,admin_level,rank_search,rank_address,country_code";
$sSQL .= "group by osm_type,osm_id,class,type,admin_level,rank_search,rank_address,country_code,importance";
if (!$bDeDupe) $sSQL .= ",place_id";
$sSQL .= ",get_address_by_language(place_id, $sLanguagePrefArraySQL) ";
$sSQL .= ",get_name_by_language(name, $sLanguagePrefArraySQL) ";
@ -898,24 +921,27 @@
$sSQL .= "null as placename,";
$sSQL .= "null as ref,";
$sSQL .= "avg(ST_X(centroid)) as lon,avg(ST_Y(centroid)) as lat, ";
$sSQL .= $sOrderSQL." as porder ";
$sSQL .= $sOrderSQL." as porder, ";
$sSQL .= "-0.15 as importance ";
$sSQL .= "from location_property_tiger where place_id in ($sPlaceIDs) ";
$sSQL .= "and 30 between $iMinAddressRank and $iMaxAddressRank ";
$sSQL .= "group by place_id";
if (!$bDeDupe) $sSQL .= ",place_id";
$sSQL .= " union ";
$sSQL .= "select 'T' as osm_type,place_id as osm_id,'place' as class,'house' as type,null as admin_level,30 as rank_search,30 as rank_address,min(place_id) as place_id,'us' as country_code,";
$sSQL .= "select 'L' as osm_type,place_id as osm_id,'place' as class,'house' as type,null as admin_level,30 as rank_search,30 as rank_address,min(place_id) as place_id,'us' as country_code,";
$sSQL .= "get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,";
$sSQL .= "null as placename,";
$sSQL .= "null as ref,";
$sSQL .= "avg(ST_X(centroid)) as lon,avg(ST_Y(centroid)) as lat, ";
$sSQL .= $sOrderSQL." as porder ";
$sSQL .= $sOrderSQL." as porder, ";
$sSQL .= "-0.15 as importance ";
$sSQL .= "from location_property_aux where place_id in ($sPlaceIDs) ";
$sSQL .= "and 30 between $iMinAddressRank and $iMaxAddressRank ";
$sSQL .= "group by place_id";
if (!$bDeDupe) $sSQL .= ",place_id";
$sSQL .= ",get_address_by_language(place_id, $sLanguagePrefArraySQL) ";
$sSQL .= "order by rank_search,rank_address,porder asc";
$sSQL .= "order by porder asc";
// $sSQL .= "order by rank_search,rank_address,porder asc";
if (CONST_Debug) var_dump('<hr>',$sSQL);
$aSearchResults = $oDB->getAll($sSQL);
//var_dump($sSQL,$aSearchResults);exit;
@ -934,9 +960,9 @@
{
$sSearchResult = 'No Results Found';
}
//var_Dump($aSearchResults);
//exit;
$aClassType = getClassTypesWithImportance();
foreach($aSearchResults as $iResNum => $aResult)
{
if (CONST_Search_AreaPolygons || true)
@ -1049,7 +1075,7 @@
}
//if (CONST_Debug) var_dump($aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']);
/*
if (isset($aClassType[$aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']]['importance'])
&& $aClassType[$aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']]['importance'])
{
@ -1064,15 +1090,15 @@
{
$aResult['importance'] = 1000000000000000;
}
*/
$aResult['name'] = $aResult['langaddress'];
$aResult['foundorder'] = $iResNum;
$aSearchResults[$iResNum] = $aResult;
}
//var_dump($aSearchResults);exit;
uasort($aSearchResults, 'byImportance');
//var_dump($aSearchResults);exit;
$aOSMIDDone = array();
$aClassTypeNameDone = array();