move country search term creation into setup script

Search results can become odd without the country search
terms, so make their inclusion a mandatory part of the
setup.

Also adds a new configuration variable to restrict the
languages taken into account by Nominatim.
This commit is contained in:
Sarah Hoffmann 2016-09-28 23:30:44 +02:00
parent 6fd2887543
commit 1982978f74
5 changed files with 40 additions and 53 deletions

View File

@ -67,10 +67,8 @@ avoid swapping, never give more than 2/3 of RAM to osm2pgsql.
Loading additional datasets
---------------------------
The following commands will create additional entries for countries and POI searches:
The following commands will create additional entries for POI searches:
./utils/specialphrases.php --countries > specialphrases_countries.sql
psql -d nominatim -f specialphrases_countries.sql
./utils/specialphrases.php --wiki-import > specialphrases.sql
psql -d nominatim -f specialphrases.sql

View File

@ -11,6 +11,13 @@ if (isset($_GET['debug']) && $_GET['debug']) @define('CONST_Debug', true);
@define('CONST_Database_Web_User', 'www-data');
@define('CONST_Max_Word_Frequency', '50000');
@define('CONST_Limit_Reindexing', true);
// Restrict search languages.
// Normally Nominatim will include all language variants of name:XX
// in the search index. Set this to a comma separated list of language
// codes, to restrict import to a subset of languages.
// Currently only affects the import of country names and special phrases.
@define('CONST_Languages', false);
// Set to false to avoid importing extra postcodes for the US.
@define('CONST_Use_Extra_US_Postcodes', true);
/* Set to true after importing Tiger house number data for the US.

View File

@ -2,42 +2,6 @@
// These settings control the import of special phrases from the wiki.
// Languages to download the special phrases for.
$aLanguageIn
= array(
'af',
'ar',
'br',
'ca',
'cs',
'de',
'en',
'es',
'et',
'eu',
'fa',
'fi',
'fr',
'gl',
'hr',
'hu',
'ia',
'is',
'it',
'ja',
'mk',
'nl',
'no',
'pl',
'ps',
'pt',
'ru',
'sk',
'sv',
'uk',
'vi',
);
// class/type combinations to exclude
$aTagsBlacklist
= array(

View File

@ -38,6 +38,7 @@ $aCMDOptions
array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'),
array('index-noanalyse', '', 0, 1, 0, 0, 'bool', 'Do not perform analyse operations during index (EXPERT)'),
array('create-search-indices', '', 0, 1, 0, 0, 'bool', 'Create additional indices required for search and update'),
array('create-country-names', '', 0, 1, 0, 0, 'bool', 'Create default list of searchable country names'),
array('drop', '', 0, 1, 0, 0, 'bool', 'Drop tables needed for updates, making the database readonly (EXPERIMENTAL)'),
);
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
@ -590,6 +591,32 @@ if ($aCMDResult['create-search-indices'] || $aCMDResult['all']) {
pgsqlRunScript($sTemplate);
}
if ($aCMDResult['create-country-names'] || $aCMDResult['all']) {
echo 'Creating search index for default country names';
$bDidSomething = true;
pgsqlRunScript("select getorcreate_country(make_standard_name('uk'), 'gb')");
pgsqlRunScript("select getorcreate_country(make_standard_name('united states'), 'us')");
pgsqlRunScript("select count(*) from (select getorcreate_country(make_standard_name(country_code), country_code) from country_name where country_code is not null) as x");
pgsqlRunScript("select count(*) from (select getorcreate_country(make_standard_name(name->'name'), country_code) from country_name where name ? 'name') as x");
$sSQL = 'select count(*) from (select getorcreate_country(make_standard_name(v), country_code) from (select country_code, skeys(name) as k, svals(name) as v from country_name) x where k ';
if (CONST_Languages) {
$sSQL .= 'in ';
$sDelim = '(';
foreach (explode(',', CONST_Languages) as $sLang) {
$sSQL .= $sDelim."'name:$sLang'";
$sDelim = ',';
}
$sSQL .= ')';
} else {
// all include all simple name tags
$sSQL .= "like 'name:%'";
}
$sSQL .= ') v';
pgsqlRunScript($sSQL);
}
if ($aCMDResult['drop']) {
// The implementation is potentially a bit dangerous because it uses
// a positive selection of tables to keep, and deletes everything else.

View File

@ -12,29 +12,20 @@ $aCMDOptions
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
array('countries', '', 0, 1, 0, 0, 'bool', 'Create import script for country codes and names'),
array('wiki-import', '', 0, 1, 0, 0, 'bool', 'Create import script for search phrases '),
);
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
include(CONST_InstallPath.'/settings/phrase_settings.php');
if ($aCMDResult['countries']) {
echo "select getorcreate_country(make_standard_name('uk'), 'gb');\n";
echo "select getorcreate_country(make_standard_name('united states'), 'us');\n";
echo "select count(*) from (select getorcreate_country(make_standard_name(country_code), country_code) from country_name where country_code is not null) as x;\n";
echo "select count(*) from (select getorcreate_country(make_standard_name(get_name_by_language(country_name.name,ARRAY['name'])), country_code) from country_name where get_name_by_language(country_name.name, ARRAY['name']) is not null) as x;\n";
foreach ($aLanguageIn as $sLanguage) {
echo "select count(*) from (select getorcreate_country(make_standard_name(get_name_by_language(country_name.name,ARRAY['name:".$sLanguage."'])), country_code) from country_name where get_name_by_language(country_name.name, ARRAY['name:".$sLanguage."']) is not null) as x;\n";
}
}
if ($aCMDResult['wiki-import']) {
$aPairs = array();
foreach ($aLanguageIn as $sLanguage) {
$sLanguageIn = CONST_Languages ? CONST_Languages :
('af,ar,br,ca,cs,de,en,es,et,eu,fa,fi,fr,gl,hr,hu,'.
'ia,is,it,ja,mk,nl,no,pl,ps,pt,ru,sk,sl,sv,uk,vi');
foreach (explode(',', $sLanguageIn) as $sLanguage) {
$sURL = 'http://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/'.strtoupper($sLanguage);
$sWikiPageXML = file_get_contents($sURL);
if (preg_match_all('#\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([\\-YN])#', $sWikiPageXML, $aMatches, PREG_SET_ORDER)) {