Merge pull request #742 from lonvia/compare-normalized

Require more exact match for special search terms
This commit is contained in:
Sarah Hoffmann 2017-06-05 08:44:04 +02:00 committed by GitHub
commit 6db110f5cb
10 changed files with 55 additions and 21 deletions

View File

@ -19,6 +19,7 @@ script:
- cd $TRAVIS_BUILD_DIR/build
- if [[ $TEST_SUITE == "monaco" ]]; then wget --no-verbose --output-document=../data/monaco.osm.pbf http://download.geofabrik.de/europe/monaco-latest.osm.pbf; fi
- if [[ $TEST_SUITE == "monaco" ]]; then ./utils/setup.php --osm-file ../data/monaco.osm.pbf --osm2pgsql-cache 1000 --all 2>&1 | grep -v 'ETA (seconds)'; fi
- if [[ $TEST_SUITE == "monaco" ]]; then ./utils/specialphrases.php --wiki-import | psql -d test_api_nominatim >/dev/null; fi
- cd $TRAVIS_BUILD_DIR/test/php
- if [[ $TEST_SUITE == "tests" ]]; then phpunit ./ ; fi
- if [[ $TEST_SUITE == "tests" ]]; then phpcs --report-width=120 */**.php ; fi

11
Vagrantfile vendored
View File

@ -23,7 +23,16 @@ Vagrant.configure("2") do |config|
end
end
config.vm.define "centos" do |sub|
config.vm.define "travis" do |sub|
sub.vm.box = "bento/ubuntu-14.04"
sub.vm.provision :shell do |s|
s.path = "vagrant/install-on-travis-ci.sh"
s.privileged = false
s.args = [checkout]
end
end
config.vm.define "centos" do |sub|
sub.vm.box = "bento/centos-7.2"
sub.vm.provision :shell do |s|
s.path = "vagrant/install-on-centos-7.sh"

View File

@ -39,6 +39,7 @@ For running Nominatim:
* [PostGIS](http://postgis.refractions.net) (2.0 or later)
* [PHP](http://php.net) (5.4 or later)
* PHP-pgsql
* PHP-intl (bundled with PHP)
* [PEAR::DB](http://pear.php.net/package/DB)
* a webserver (apache or nginx are recommended)

View File

@ -653,7 +653,7 @@ class Geocode
return $aSearchResults;
}
public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases)
public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery)
{
/*
Calculate all searches using aValidTokens i.e.
@ -752,13 +752,19 @@ class Geocode
*/
}
} elseif ($sPhraseType == '' && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null) {
if ($aSearch['sClass'] === '') {
$aSearch['sOperator'] = $aSearchTerm['operator'];
// require a normalized exact match of the term
// if we have the normalizer version of the query
// available
if ($aSearch['sClass'] === ''
&& ($sNormQuery === null || !($aSearchTerm['word'] && strpos($sNormQuery, $aSearchTerm['word']) === false))) {
$aSearch['sClass'] = $aSearchTerm['class'];
$aSearch['sType'] = $aSearchTerm['type'];
if (sizeof($aSearch['aName'])) $aSearch['sOperator'] = 'name';
else $aSearch['sOperator'] = 'near'; // near = in for the moment
if (strlen($aSearchTerm['operator']) == 0) $aSearch['iSearchRank'] += 1;
if ($aSearchTerm['operator'] == '') {
$aSearch['sOperator'] = sizeof($aSearch['aName']) ? 'name' : 'near';
$aSearch['iSearchRank'] += 2;
} else {
$aSearch['sOperator'] = 'near'; // near = in for the moment
}
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
}
@ -913,6 +919,13 @@ class Geocode
{
if (!$this->sQuery && !$this->aStructuredQuery) return array();
$oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
if ($oNormalizer !== null) {
$sNormQuery = $oNormalizer->transliterate($this->sQuery);
} else {
$sNormQuery = null;
}
$sLanguagePrefArraySQL = "ARRAY[".join(',', array_map("getDBQuoted", $this->aLangPrefOrder))."]";
$sCountryCodesSQL = false;
if ($this->aCountryCodes) {
@ -1139,7 +1152,7 @@ class Geocode
// array with: placeid => -1 | tiger-housenumber
$aResultPlaceIDs = array();
$aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases);
$aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery);
if ($this->bReverseInPlan) {
// Reverse phrase array and also reverse the order of the wordsets in
@ -1151,7 +1164,7 @@ class Geocode
$aFinalPhrase = end($aPhrases);
$aPhrases[sizeof($aPhrases)-1]['wordsets'] = getInverseWordSets($aFinalPhrase['words'], 0);
}
$aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false);
$aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false, $sNormQuery);
foreach ($aGroupedSearches as $aSearches) {
foreach ($aSearches as $aSearch) {

View File

@ -17,6 +17,10 @@ if (isset($_GET['debug']) && $_GET['debug']) @define('CONST_Debug', true);
// codes, to restrict import to a subset of languages.
// Currently only affects the import of country names and special phrases.
@define('CONST_Languages', false);
// Rules for normalizing terms for comparison before doing comparisons.
// The default is to remove accents and punctuation and to lower-case the
// term. Spaces are kept but collapsed to one standard space.
@define('CONST_Term_Normalization_Rules', ":: NFD (); [:Nonspacing Mark:] >; :: lower (); [[:Punctuation:][:Space:]]+ > ' '; :: NFC ();");
// Set to false to avoid importing extra postcodes for the US.
@define('CONST_Use_Extra_US_Postcodes', true);

View File

@ -101,7 +101,7 @@ END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, lookup_class text, lookup_type text)
CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT, lookup_class text, lookup_type text)
RETURNS INTEGER
AS $$
DECLARE
@ -109,17 +109,17 @@ DECLARE
return_word_id INTEGER;
BEGIN
lookup_token := ' '||trim(lookup_word);
SELECT min(word_id) FROM word WHERE word_token = lookup_token and class=lookup_class and type = lookup_type into return_word_id;
SELECT min(word_id) FROM word WHERE word_token = lookup_token and word=normalized_word and class=lookup_class and type = lookup_type into return_word_id;
IF return_word_id IS NULL THEN
return_word_id := nextval('seq_word');
INSERT INTO word VALUES (return_word_id, lookup_token, null, lookup_class, lookup_type, null, 0);
INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, lookup_class, lookup_type, null, 0);
END IF;
RETURN return_word_id;
END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, lookup_class text, lookup_type text, op text)
CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT, lookup_class text, lookup_type text, op text)
RETURNS INTEGER
AS $$
DECLARE
@ -127,10 +127,10 @@ DECLARE
return_word_id INTEGER;
BEGIN
lookup_token := ' '||trim(lookup_word);
SELECT min(word_id) FROM word WHERE word_token = lookup_token and class=lookup_class and type = lookup_type and operator = op into return_word_id;
SELECT min(word_id) FROM word WHERE word_token = lookup_token and word=normalized_word and class=lookup_class and type = lookup_type and operator = op into return_word_id;
IF return_word_id IS NULL THEN
return_word_id := nextval('seq_word');
INSERT INTO word VALUES (return_word_id, lookup_token, null, lookup_class, lookup_type, null, 0, op);
INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, lookup_class, lookup_type, null, 0, op);
END IF;
RETURN return_word_id;
END;

View File

@ -19,6 +19,7 @@ getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
include(CONST_InstallPath.'/settings/phrase_settings.php');
if ($aCMDResult['wiki-import']) {
$oNormalizer = Transliterator::createFromRules(CONST_Term_Normalization_Rules);
$aPairs = array();
$sLanguageIn = CONST_Languages ? CONST_Languages :
@ -31,6 +32,11 @@ if ($aCMDResult['wiki-import']) {
if (preg_match_all('#\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([\\-YN])#', $sWikiPageXML, $aMatches, PREG_SET_ORDER)) {
foreach ($aMatches as $aMatch) {
$sLabel = trim($aMatch[1]);
if ($oNormalizer !== null) {
$sTrans = pg_escape_string($oNormalizer->transliterate($sLabel));
} else {
$sTrans = null;
}
$sClass = trim($aMatch[2]);
$sType = trim($aMatch[3]);
// hack around a bug where building=yes was imported with
@ -57,13 +63,13 @@ if ($aCMDResult['wiki-import']) {
switch (trim($aMatch[4])) {
case 'near':
echo "select getorcreate_amenityoperator(make_standard_name('".pg_escape_string($sLabel)."'), '$sClass', '$sType', 'near');\n";
echo "select getorcreate_amenityoperator(make_standard_name('".pg_escape_string($sLabel)."'), '$sTrans', '$sClass', '$sType', 'near');\n";
break;
case 'in':
echo "select getorcreate_amenityoperator(make_standard_name('".pg_escape_string($sLabel)."'), '$sClass', '$sType', 'in');\n";
echo "select getorcreate_amenityoperator(make_standard_name('".pg_escape_string($sLabel)."'), '$sTrans', '$sClass', '$sType', 'in');\n";
break;
default:
echo "select getorcreate_amenity(make_standard_name('".pg_escape_string($sLabel)."'), '$sClass', '$sType');\n";
echo "select getorcreate_amenity(make_standard_name('".pg_escape_string($sLabel)."'), '$sTrans', '$sClass', '$sType');\n";
break;
}
}

View File

@ -21,7 +21,7 @@
sudo yum install -y postgresql-server postgresql-contrib postgresql-devel postgis postgis-utils \
git cmake make gcc gcc-c++ libtool policycoreutils-python \
php-pgsql php php-pear php-pear-DB libpqxx-devel proj-epsg \
php-pgsql php php-pear php-pear-DB php-intl libpqxx-devel proj-epsg \
bzip2-devel proj-devel geos-devel libxml2-devel boost-devel expat-devel zlib-devel
# If you want to run the test suite, you need to install the following

View File

@ -16,7 +16,7 @@ sudo apt-get install -y -qq libboost-dev libboost-system-dev \
libboost-filesystem-dev libexpat1-dev zlib1g-dev libxml2-dev\
libbz2-dev libpq-dev libgeos-c1 libgeos++-dev libproj-dev \
postgresql-server-dev-9.6 postgresql-9.6-postgis-2.3 postgresql-contrib-9.6 \
apache2 php5 php5-pgsql php-pear php-db
apache2 php5 php5-pgsql php-pear php-db php5-intl
sudo apt-get install -y -qq python3-dev python3-pip python3-psycopg2 phpunit php5-cgi

View File

@ -28,7 +28,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
libbz2-dev libpq-dev libgeos-dev libgeos++-dev libproj-dev \
postgresql-server-dev-9.5 postgresql-9.5-postgis-2.2 postgresql-contrib-9.5 \
apache2 php php-pgsql libapache2-mod-php php-pear php-db \
git
php-intl git
# If you want to run the test suite, you need to install the following
# additional packages: