mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-12-26 06:22:13 +03:00
Merge pull request #742 from lonvia/compare-normalized
Require more exact match for special search terms
This commit is contained in:
commit
6db110f5cb
@ -19,6 +19,7 @@ script:
|
||||
- cd $TRAVIS_BUILD_DIR/build
|
||||
- if [[ $TEST_SUITE == "monaco" ]]; then wget --no-verbose --output-document=../data/monaco.osm.pbf http://download.geofabrik.de/europe/monaco-latest.osm.pbf; fi
|
||||
- if [[ $TEST_SUITE == "monaco" ]]; then ./utils/setup.php --osm-file ../data/monaco.osm.pbf --osm2pgsql-cache 1000 --all 2>&1 | grep -v 'ETA (seconds)'; fi
|
||||
- if [[ $TEST_SUITE == "monaco" ]]; then ./utils/specialphrases.php --wiki-import | psql -d test_api_nominatim >/dev/null; fi
|
||||
- cd $TRAVIS_BUILD_DIR/test/php
|
||||
- if [[ $TEST_SUITE == "tests" ]]; then phpunit ./ ; fi
|
||||
- if [[ $TEST_SUITE == "tests" ]]; then phpcs --report-width=120 */**.php ; fi
|
||||
|
11
Vagrantfile
vendored
11
Vagrantfile
vendored
@ -23,7 +23,16 @@ Vagrant.configure("2") do |config|
|
||||
end
|
||||
end
|
||||
|
||||
config.vm.define "centos" do |sub|
|
||||
config.vm.define "travis" do |sub|
|
||||
sub.vm.box = "bento/ubuntu-14.04"
|
||||
sub.vm.provision :shell do |s|
|
||||
s.path = "vagrant/install-on-travis-ci.sh"
|
||||
s.privileged = false
|
||||
s.args = [checkout]
|
||||
end
|
||||
end
|
||||
|
||||
config.vm.define "centos" do |sub|
|
||||
sub.vm.box = "bento/centos-7.2"
|
||||
sub.vm.provision :shell do |s|
|
||||
s.path = "vagrant/install-on-centos-7.sh"
|
||||
|
@ -39,6 +39,7 @@ For running Nominatim:
|
||||
* [PostGIS](http://postgis.refractions.net) (2.0 or later)
|
||||
* [PHP](http://php.net) (5.4 or later)
|
||||
* PHP-pgsql
|
||||
* PHP-intl (bundled with PHP)
|
||||
* [PEAR::DB](http://pear.php.net/package/DB)
|
||||
* a webserver (apache or nginx are recommended)
|
||||
|
||||
|
@ -653,7 +653,7 @@ class Geocode
|
||||
return $aSearchResults;
|
||||
}
|
||||
|
||||
public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases)
|
||||
public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery)
|
||||
{
|
||||
/*
|
||||
Calculate all searches using aValidTokens i.e.
|
||||
@ -752,13 +752,19 @@ class Geocode
|
||||
*/
|
||||
}
|
||||
} elseif ($sPhraseType == '' && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null) {
|
||||
if ($aSearch['sClass'] === '') {
|
||||
$aSearch['sOperator'] = $aSearchTerm['operator'];
|
||||
// require a normalized exact match of the term
|
||||
// if we have the normalizer version of the query
|
||||
// available
|
||||
if ($aSearch['sClass'] === ''
|
||||
&& ($sNormQuery === null || !($aSearchTerm['word'] && strpos($sNormQuery, $aSearchTerm['word']) === false))) {
|
||||
$aSearch['sClass'] = $aSearchTerm['class'];
|
||||
$aSearch['sType'] = $aSearchTerm['type'];
|
||||
if (sizeof($aSearch['aName'])) $aSearch['sOperator'] = 'name';
|
||||
else $aSearch['sOperator'] = 'near'; // near = in for the moment
|
||||
if (strlen($aSearchTerm['operator']) == 0) $aSearch['iSearchRank'] += 1;
|
||||
if ($aSearchTerm['operator'] == '') {
|
||||
$aSearch['sOperator'] = sizeof($aSearch['aName']) ? 'name' : 'near';
|
||||
$aSearch['iSearchRank'] += 2;
|
||||
} else {
|
||||
$aSearch['sOperator'] = 'near'; // near = in for the moment
|
||||
}
|
||||
|
||||
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
||||
}
|
||||
@ -913,6 +919,13 @@ class Geocode
|
||||
{
|
||||
if (!$this->sQuery && !$this->aStructuredQuery) return array();
|
||||
|
||||
$oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
|
||||
if ($oNormalizer !== null) {
|
||||
$sNormQuery = $oNormalizer->transliterate($this->sQuery);
|
||||
} else {
|
||||
$sNormQuery = null;
|
||||
}
|
||||
|
||||
$sLanguagePrefArraySQL = "ARRAY[".join(',', array_map("getDBQuoted", $this->aLangPrefOrder))."]";
|
||||
$sCountryCodesSQL = false;
|
||||
if ($this->aCountryCodes) {
|
||||
@ -1139,7 +1152,7 @@ class Geocode
|
||||
// array with: placeid => -1 | tiger-housenumber
|
||||
$aResultPlaceIDs = array();
|
||||
|
||||
$aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases);
|
||||
$aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery);
|
||||
|
||||
if ($this->bReverseInPlan) {
|
||||
// Reverse phrase array and also reverse the order of the wordsets in
|
||||
@ -1151,7 +1164,7 @@ class Geocode
|
||||
$aFinalPhrase = end($aPhrases);
|
||||
$aPhrases[sizeof($aPhrases)-1]['wordsets'] = getInverseWordSets($aFinalPhrase['words'], 0);
|
||||
}
|
||||
$aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false);
|
||||
$aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false, $sNormQuery);
|
||||
|
||||
foreach ($aGroupedSearches as $aSearches) {
|
||||
foreach ($aSearches as $aSearch) {
|
||||
|
@ -17,6 +17,10 @@ if (isset($_GET['debug']) && $_GET['debug']) @define('CONST_Debug', true);
|
||||
// codes, to restrict import to a subset of languages.
|
||||
// Currently only affects the import of country names and special phrases.
|
||||
@define('CONST_Languages', false);
|
||||
// Rules for normalizing terms for comparison before doing comparisons.
|
||||
// The default is to remove accents and punctuation and to lower-case the
|
||||
// term. Spaces are kept but collapsed to one standard space.
|
||||
@define('CONST_Term_Normalization_Rules', ":: NFD (); [:Nonspacing Mark:] >; :: lower (); [[:Punctuation:][:Space:]]+ > ' '; :: NFC ();");
|
||||
|
||||
// Set to false to avoid importing extra postcodes for the US.
|
||||
@define('CONST_Use_Extra_US_Postcodes', true);
|
||||
|
@ -101,7 +101,7 @@ END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, lookup_class text, lookup_type text)
|
||||
CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT, lookup_class text, lookup_type text)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
@ -109,17 +109,17 @@ DECLARE
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_token := ' '||trim(lookup_word);
|
||||
SELECT min(word_id) FROM word WHERE word_token = lookup_token and class=lookup_class and type = lookup_type into return_word_id;
|
||||
SELECT min(word_id) FROM word WHERE word_token = lookup_token and word=normalized_word and class=lookup_class and type = lookup_type into return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, null, lookup_class, lookup_type, null, 0);
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, lookup_class, lookup_type, null, 0);
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, lookup_class text, lookup_type text, op text)
|
||||
CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT, lookup_class text, lookup_type text, op text)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
@ -127,10 +127,10 @@ DECLARE
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_token := ' '||trim(lookup_word);
|
||||
SELECT min(word_id) FROM word WHERE word_token = lookup_token and class=lookup_class and type = lookup_type and operator = op into return_word_id;
|
||||
SELECT min(word_id) FROM word WHERE word_token = lookup_token and word=normalized_word and class=lookup_class and type = lookup_type and operator = op into return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, null, lookup_class, lookup_type, null, 0, op);
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word, lookup_class, lookup_type, null, 0, op);
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
|
@ -19,6 +19,7 @@ getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
|
||||
include(CONST_InstallPath.'/settings/phrase_settings.php');
|
||||
|
||||
if ($aCMDResult['wiki-import']) {
|
||||
$oNormalizer = Transliterator::createFromRules(CONST_Term_Normalization_Rules);
|
||||
$aPairs = array();
|
||||
|
||||
$sLanguageIn = CONST_Languages ? CONST_Languages :
|
||||
@ -31,6 +32,11 @@ if ($aCMDResult['wiki-import']) {
|
||||
if (preg_match_all('#\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([\\-YN])#', $sWikiPageXML, $aMatches, PREG_SET_ORDER)) {
|
||||
foreach ($aMatches as $aMatch) {
|
||||
$sLabel = trim($aMatch[1]);
|
||||
if ($oNormalizer !== null) {
|
||||
$sTrans = pg_escape_string($oNormalizer->transliterate($sLabel));
|
||||
} else {
|
||||
$sTrans = null;
|
||||
}
|
||||
$sClass = trim($aMatch[2]);
|
||||
$sType = trim($aMatch[3]);
|
||||
// hack around a bug where building=yes was imported with
|
||||
@ -57,13 +63,13 @@ if ($aCMDResult['wiki-import']) {
|
||||
|
||||
switch (trim($aMatch[4])) {
|
||||
case 'near':
|
||||
echo "select getorcreate_amenityoperator(make_standard_name('".pg_escape_string($sLabel)."'), '$sClass', '$sType', 'near');\n";
|
||||
echo "select getorcreate_amenityoperator(make_standard_name('".pg_escape_string($sLabel)."'), '$sTrans', '$sClass', '$sType', 'near');\n";
|
||||
break;
|
||||
case 'in':
|
||||
echo "select getorcreate_amenityoperator(make_standard_name('".pg_escape_string($sLabel)."'), '$sClass', '$sType', 'in');\n";
|
||||
echo "select getorcreate_amenityoperator(make_standard_name('".pg_escape_string($sLabel)."'), '$sTrans', '$sClass', '$sType', 'in');\n";
|
||||
break;
|
||||
default:
|
||||
echo "select getorcreate_amenity(make_standard_name('".pg_escape_string($sLabel)."'), '$sClass', '$sType');\n";
|
||||
echo "select getorcreate_amenity(make_standard_name('".pg_escape_string($sLabel)."'), '$sTrans', '$sClass', '$sType');\n";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -21,7 +21,7 @@
|
||||
|
||||
sudo yum install -y postgresql-server postgresql-contrib postgresql-devel postgis postgis-utils \
|
||||
git cmake make gcc gcc-c++ libtool policycoreutils-python \
|
||||
php-pgsql php php-pear php-pear-DB libpqxx-devel proj-epsg \
|
||||
php-pgsql php php-pear php-pear-DB php-intl libpqxx-devel proj-epsg \
|
||||
bzip2-devel proj-devel geos-devel libxml2-devel boost-devel expat-devel zlib-devel
|
||||
|
||||
# If you want to run the test suite, you need to install the following
|
||||
|
@ -16,7 +16,7 @@ sudo apt-get install -y -qq libboost-dev libboost-system-dev \
|
||||
libboost-filesystem-dev libexpat1-dev zlib1g-dev libxml2-dev\
|
||||
libbz2-dev libpq-dev libgeos-c1 libgeos++-dev libproj-dev \
|
||||
postgresql-server-dev-9.6 postgresql-9.6-postgis-2.3 postgresql-contrib-9.6 \
|
||||
apache2 php5 php5-pgsql php-pear php-db
|
||||
apache2 php5 php5-pgsql php-pear php-db php5-intl
|
||||
|
||||
sudo apt-get install -y -qq python3-dev python3-pip python3-psycopg2 phpunit php5-cgi
|
||||
|
||||
|
@ -28,7 +28,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
|
||||
libbz2-dev libpq-dev libgeos-dev libgeos++-dev libproj-dev \
|
||||
postgresql-server-dev-9.5 postgresql-9.5-postgis-2.2 postgresql-contrib-9.5 \
|
||||
apache2 php php-pgsql libapache2-mod-php php-pear php-db \
|
||||
git
|
||||
php-intl git
|
||||
|
||||
# If you want to run the test suite, you need to install the following
|
||||
# additional packages:
|
||||
|
Loading…
Reference in New Issue
Block a user