mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-23 05:35:13 +03:00
calculate importance from wikipedia
This commit is contained in:
parent
709ab1bb03
commit
1c0fa81d88
@ -1225,6 +1225,10 @@ DECLARE
|
||||
name_vector INTEGER[];
|
||||
nameaddress_vector INTEGER[];
|
||||
|
||||
wiki_article TEXT;
|
||||
wiki_article_title TEXT;
|
||||
wiki_article_language TEXT;
|
||||
|
||||
result BOOLEAN;
|
||||
BEGIN
|
||||
|
||||
@ -1308,6 +1312,34 @@ BEGIN
|
||||
address_havelevel[i] := false;
|
||||
END LOOP;
|
||||
|
||||
NEW.importance := null;
|
||||
-- WARNING: see duplicate of code below (yuk!)
|
||||
IF NEW.extratags?'wikipedia' THEN
|
||||
wiki_article := replace(regexp_replace(NEW.extratags->'wikipedia',E'(.*?)([a-z]+).wikipedia.org/wiki/',E'\\2:'),' ','_');
|
||||
wiki_article_title := split_part(wiki_article, ':', 2);
|
||||
IF wiki_article_title IS NULL OR wiki_article_title = '' THEN
|
||||
wiki_article_title := wiki_article;
|
||||
wiki_article_language := 'en';
|
||||
ELSE
|
||||
wiki_article_language := lower(split_part(wiki_article, ':', 1));
|
||||
END IF;
|
||||
--RAISE WARNING '% %', wiki_article_language, wiki_article_title;
|
||||
|
||||
select wikipedia_article.importance,wikipedia_article.language||':'||wikipedia_article.title
|
||||
from wikipedia_article
|
||||
where language = wiki_article_language and
|
||||
(title = wiki_article_title OR title = decode_url_part(wiki_article_title) OR title = replace(decode_url_part(wiki_article_title),E'\\',''))
|
||||
UNION ALL
|
||||
select wikipedia_article.importance,wikipedia_article.language||':'||wikipedia_article.title
|
||||
from wikipedia_redirect join wikipedia_article on (wikipedia_redirect.language = wikipedia_article.language and wikipedia_redirect.to_title = wikipedia_article.title)
|
||||
where wikipedia_redirect.language = wiki_article_language and
|
||||
(from_title = wiki_article_title OR from_title = decode_url_part(wiki_article_title) OR from_title = replace(decode_url_part(wiki_article_title),E'\\',''))
|
||||
order by importance asc limit 1 INTO NEW.importance,NEW.wikipedia;
|
||||
|
||||
ELSE
|
||||
select importance,language||':'||title from wikipedia_article where osm_type = NEW.osm_type and osm_id = NEW.osm_id order by importance asc limit 1 INTO NEW.importance,NEW.wikipedia;
|
||||
END IF;
|
||||
|
||||
--RAISE WARNING '% %', NEW.place_id, NEW.rank_search;
|
||||
|
||||
-- For low level elements we inherit from our parent road
|
||||
@ -1535,7 +1567,7 @@ BEGIN
|
||||
END IF;
|
||||
|
||||
-- not found one yet? how about doing a name search
|
||||
IF NEW.centroid IS NULL AND NEW.name->'name' is not null and make_standard_name(NEW.name->'name') != '' THEN
|
||||
IF NEW.centroid IS NULL AND (NEW.name->'name') is not null and make_standard_name(NEW.name->'name') != '' THEN
|
||||
|
||||
FOR linkedPlacex IN select placex.* from placex WHERE
|
||||
make_standard_name(name->'name') = make_standard_name(NEW.name->'name')
|
||||
@ -1567,6 +1599,31 @@ BEGIN
|
||||
place_centroid := NEW.centroid;
|
||||
END IF;
|
||||
|
||||
-- Did we gain a wikipedia tag in the process? then we need to recalculate our importance
|
||||
-- WARNING: duplicate of code above (yuk!)
|
||||
IF NEW.importance is null AND NEW.extratags?'wikipedia' THEN
|
||||
wiki_article := replace(regexp_replace(NEW.extratags->'wikipedia',E'(.*?)([a-z]+).wikipedia.org/wiki/',E'\\2:'),' ','_');
|
||||
wiki_article_title := split_part(wiki_article, ':', 2);
|
||||
IF wiki_article_title IS NULL OR wiki_article_title = '' THEN
|
||||
wiki_article_title := wiki_article;
|
||||
wiki_article_language := 'en';
|
||||
ELSE
|
||||
wiki_article_language := lower(split_part(wiki_article, ':', 1));
|
||||
END IF;
|
||||
|
||||
select wikipedia_article.importance,wikipedia_article.language||':'||wikipedia_article.title
|
||||
from wikipedia_article
|
||||
where language = wiki_article_language and
|
||||
(title = wiki_article_title OR title = decode_url_part(wiki_article_title) OR title = replace(decode_url_part(wiki_article_title),E'\\',''))
|
||||
UNION ALL
|
||||
select wikipedia_article.importance,wikipedia_article.language||':'||wikipedia_article.title
|
||||
from wikipedia_redirect join wikipedia_article on (wikipedia_redirect.language = wikipedia_article.language and wikipedia_redirect.to_title = wikipedia_article.title)
|
||||
where wikipedia_redirect.language = wiki_article_language and
|
||||
(from_title = wiki_article_title OR from_title = decode_url_part(wiki_article_title) OR from_title = replace(decode_url_part(wiki_article_title),E'\\',''))
|
||||
order by importance asc limit 1 INTO NEW.importance,NEW.wikipedia;
|
||||
|
||||
END IF;
|
||||
|
||||
END IF;
|
||||
|
||||
NEW.parent_place_id = 0;
|
||||
@ -2706,4 +2763,10 @@ END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
-- See: http://stackoverflow.com/questions/6410088/how-can-i-mimic-the-php-urldecode-function-in-postgresql
|
||||
CREATE OR REPLACE FUNCTION decode_url_part(p varchar) RETURNS varchar
|
||||
AS $$
|
||||
SELECT convert_from(CAST(E'\\x' || string_agg(CASE WHEN length(r.m[1]) = 1 THEN encode(convert_to(r.m[1], 'SQL_ASCII'), 'hex') ELSE substring(r.m[1] from 2 for 2) END, '') AS bytea), 'UTF8')
|
||||
FROM regexp_matches($1, '%[0-9a-f][0-9a-f]|.', 'gi') AS r(m);
|
||||
$$
|
||||
LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
@ -210,6 +210,7 @@ CREATE TABLE placex (
|
||||
importance FLOAT,
|
||||
indexed_status INTEGER,
|
||||
indexed_date TIMESTAMP,
|
||||
wikipedia TEXT, -- calculated wikipedia article name (language:title)
|
||||
geometry_sector INTEGER
|
||||
);
|
||||
SELECT AddGeometryColumn('placex', 'geometry', 4326, 'GEOMETRY', 2);
|
||||
@ -301,3 +302,28 @@ CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTRE
|
||||
|
||||
drop sequence file;
|
||||
CREATE SEQUENCE file start 1;
|
||||
|
||||
-- null table so it won't error
|
||||
-- deliberately no drop - importing the table is expensive and static, if it is already there better to avoid removing it
|
||||
CREATE TABLE wikipedia_article (
|
||||
language text NOT NULL,
|
||||
title text NOT NULL,
|
||||
langcount integer,
|
||||
othercount integer,
|
||||
totalcount integer,
|
||||
lat double precision,
|
||||
lon double precision,
|
||||
importance double precision,
|
||||
osm_type character(1),
|
||||
osm_id bigint
|
||||
);
|
||||
ALTER TABLE ONLY wikipedia_article ADD CONSTRAINT wikipedia_article_pkey PRIMARY KEY (language, title);
|
||||
CREATE INDEX idx_wikipedia_article_osm_id ON wikipedia_article USING btree (osm_type, osm_id);
|
||||
|
||||
CREATE TABLE wikipedia_redirect (
|
||||
language text,
|
||||
from_title text,
|
||||
to_title text
|
||||
);
|
||||
ALTER TABLE ONLY wikipedia_redirect ADD CONSTRAINT wikipedia_redirect_pkey PRIMARY KEY (language, from_title);
|
||||
|
||||
|
@ -23,6 +23,7 @@
|
||||
array('create-minimal-tables', '', 0, 1, 0, 0, 'bool', 'Create minimal main tables'),
|
||||
array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
|
||||
array('create-partitions', '', 0, 1, 0, 0, 'bool', 'Create required partition tables and triggers'),
|
||||
array('import-wikipedia-articles', '', 0, 1, 0, 0, 'bool', 'Import wikipedia article dump'),
|
||||
array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
|
||||
array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'),
|
||||
array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
|
||||
@ -207,6 +208,34 @@
|
||||
pgsqlRunScript($sTemplate);
|
||||
}
|
||||
|
||||
if ($aCMDResult['import-wikipedia-articles'] || $aCMDResult['all'])
|
||||
{
|
||||
$bDidSomething = true;
|
||||
$sWikiArticlesFile = CONST_BasePath.'/data/wikipedia_article.sql.bin';
|
||||
$sWikiRedirectsFile = CONST_BasePath.'/data/wikipedia_redirect.sql.bin';
|
||||
if (file_exists($sWikiArticlesFile))
|
||||
{
|
||||
echo "Importing wikipedia articles...";
|
||||
pgsqlRunRestoreData($sWikiArticlesFile);
|
||||
echo "...done\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
echo "WARNING: wikipedia article dump file not found - places will have default importance\n";
|
||||
}
|
||||
if (file_exists($sWikiRedirectsFile))
|
||||
{
|
||||
echo "Importing wikipedia redirects...";
|
||||
pgsqlRunRestoreData($sWikiRedirectsFile);
|
||||
echo "...done\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
echo "WARNING: wikipedia redirect dump file not found - some place importance values may be missing\n";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if ($aCMDResult['load-data'] || $aCMDResult['all'])
|
||||
{
|
||||
echo "Load Data\n";
|
||||
@ -326,7 +355,7 @@
|
||||
}
|
||||
|
||||
fclose($hFile);
|
||||
|
||||
|
||||
$bAnyBusy = true;
|
||||
while($bAnyBusy)
|
||||
{
|
||||
@ -451,7 +480,8 @@
|
||||
|
||||
// Convert database DSN to psql paramaters
|
||||
$aDSNInfo = DB::parseDSN(CONST_Database_DSN);
|
||||
$sCMD = 'psql -f '.$sFilename.' '.$aDSNInfo['database'];
|
||||
if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
|
||||
$sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'].' -f '.$sFilename;
|
||||
|
||||
$aDescriptors = array(
|
||||
0 => array('pipe', 'r'),
|
||||
@ -479,7 +509,7 @@
|
||||
// Convert database DSN to psql paramaters
|
||||
$aDSNInfo = DB::parseDSN(CONST_Database_DSN);
|
||||
if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
|
||||
$sCMD = 'psql -p '.$aDSNInfo['port'].' '.$aDSNInfo['database'];
|
||||
$sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'];
|
||||
$aDescriptors = array(
|
||||
0 => array('pipe', 'r'),
|
||||
1 => STDOUT,
|
||||
@ -497,3 +527,31 @@
|
||||
fclose($ahPipes[0]);
|
||||
proc_close($hProcess);
|
||||
}
|
||||
|
||||
function pgsqlRunRestoreData($sDumpFile)
|
||||
{
|
||||
// Convert database DSN to psql paramaters
|
||||
$aDSNInfo = DB::parseDSN(CONST_Database_DSN);
|
||||
if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
|
||||
$sCMD = 'pg_restore -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'].' -Fc -a '.$sDumpFile;
|
||||
|
||||
$aDescriptors = array(
|
||||
0 => array('pipe', 'r'),
|
||||
1 => array('pipe', 'w'),
|
||||
2 => array('file', '/dev/null', 'a')
|
||||
);
|
||||
$ahPipes = null;
|
||||
$hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
|
||||
if (!is_resource($hProcess)) fail('unable to start pg_restore');
|
||||
|
||||
fclose($ahPipes[0]);
|
||||
|
||||
// TODO: error checking
|
||||
while(!feof($ahPipes[1]))
|
||||
{
|
||||
echo fread($ahPipes[1], 4096);
|
||||
}
|
||||
fclose($ahPipes[1]);
|
||||
|
||||
proc_close($hProcess);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user