mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-10 11:42:13 +03:00
Merge pull request #2305 from lonvia/tokenizer
Factor out normalization into a separate module
This commit is contained in:
commit
8b1a509442
@ -8,12 +8,14 @@ require_once(CONST_LibDir.'/ReverseGeocode.php');
|
||||
require_once(CONST_LibDir.'/SearchDescription.php');
|
||||
require_once(CONST_LibDir.'/SearchContext.php');
|
||||
require_once(CONST_LibDir.'/TokenList.php');
|
||||
require_once(CONST_TokenizerDir.'/tokenizer.php');
|
||||
|
||||
class Geocode
|
||||
{
|
||||
protected $oDB;
|
||||
|
||||
protected $oPlaceLookup;
|
||||
protected $oTokenizer;
|
||||
|
||||
protected $aLangPrefOrder = array();
|
||||
|
||||
@ -41,23 +43,12 @@ class Geocode
|
||||
protected $sQuery = false;
|
||||
protected $aStructuredQuery = false;
|
||||
|
||||
protected $oNormalizer = null;
|
||||
|
||||
|
||||
public function __construct(&$oDB)
|
||||
{
|
||||
$this->oDB =& $oDB;
|
||||
$this->oPlaceLookup = new PlaceLookup($this->oDB);
|
||||
$this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
|
||||
}
|
||||
|
||||
private function normTerm($sTerm)
|
||||
{
|
||||
if ($this->oNormalizer === null) {
|
||||
return $sTerm;
|
||||
}
|
||||
|
||||
return $this->oNormalizer->transliterate($sTerm);
|
||||
$this->oTokenizer = new \Nominatim\Tokenizer($this->oDB);
|
||||
}
|
||||
|
||||
public function setLanguagePreference($aLangPref)
|
||||
@ -510,12 +501,10 @@ class Geocode
|
||||
if ($this->aCountryCodes) {
|
||||
$oCtx->setCountryList($this->aCountryCodes);
|
||||
}
|
||||
$this->oTokenizer->setCountryRestriction($this->aCountryCodes);
|
||||
|
||||
Debug::newSection('Query Preprocessing');
|
||||
|
||||
$sNormQuery = $this->normTerm($this->sQuery);
|
||||
Debug::printVar('Normalized query', $sNormQuery);
|
||||
|
||||
$sLanguagePrefArraySQL = $this->oDB->getArraySQL(
|
||||
$this->oDB->getDBQuotedList($this->aLangPrefOrder)
|
||||
);
|
||||
@ -569,108 +558,55 @@ class Geocode
|
||||
}
|
||||
|
||||
if ($sSpecialTerm && !$aSearches[0]->hasOperator()) {
|
||||
$sSpecialTerm = pg_escape_string($sSpecialTerm);
|
||||
$sToken = $this->oDB->getOne(
|
||||
'SELECT make_standard_name(:term)',
|
||||
array(':term' => $sSpecialTerm),
|
||||
'Cannot decode query. Wrong encoding?'
|
||||
);
|
||||
$sSQL = 'SELECT class, type FROM word ';
|
||||
$sSQL .= ' WHERE word_token in (\' '.$sToken.'\')';
|
||||
$sSQL .= ' AND class is not null AND class not in (\'place\')';
|
||||
$aTokens = $this->oTokenizer->tokensForSpecialTerm($sSpecialTerm);
|
||||
|
||||
Debug::printSQL($sSQL);
|
||||
$aSearchWords = $this->oDB->getAll($sSQL);
|
||||
if (!empty($aTokens)) {
|
||||
$aNewSearches = array();
|
||||
foreach ($aSearches as $oSearch) {
|
||||
foreach ($aSearchWords as $aSearchTerm) {
|
||||
foreach ($aTokens as $oToken) {
|
||||
$oNewSearch = clone $oSearch;
|
||||
$oNewSearch->setPoiSearch(
|
||||
Operator::TYPE,
|
||||
$aSearchTerm['class'],
|
||||
$aSearchTerm['type']
|
||||
$oToken->iOperator,
|
||||
$oToken->sClass,
|
||||
$oToken->sType
|
||||
);
|
||||
$aNewSearches[] = $oNewSearch;
|
||||
}
|
||||
}
|
||||
$aSearches = $aNewSearches;
|
||||
}
|
||||
}
|
||||
|
||||
// Split query into phrases
|
||||
// Commas are used to reduce the search space by indicating where phrases split
|
||||
$aPhrases = array();
|
||||
if ($this->aStructuredQuery) {
|
||||
$aInPhrases = $this->aStructuredQuery;
|
||||
foreach ($this->aStructuredQuery as $iPhrase => $sPhrase) {
|
||||
$aPhrases[] = new Phrase($sPhrase, $iPhrase);
|
||||
}
|
||||
} else {
|
||||
$aInPhrases = explode(',', $sQuery);
|
||||
foreach (explode(',', $sQuery) as $sPhrase) {
|
||||
$aPhrases[] = new Phrase($sPhrase, '');
|
||||
}
|
||||
}
|
||||
|
||||
Debug::printDebugArray('Search context', $oCtx);
|
||||
Debug::printDebugArray('Base search', empty($aSearches) ? null : $aSearches[0]);
|
||||
Debug::printVar('Final query phrases', $aInPhrases);
|
||||
|
||||
// Convert each phrase to standard form
|
||||
// Create a list of standard words
|
||||
// Get all 'sets' of words
|
||||
// Generate a complete list of all
|
||||
Debug::newSection('Tokenization');
|
||||
$aTokens = array();
|
||||
$aPhrases = array();
|
||||
foreach ($aInPhrases as $iPhrase => $sPhrase) {
|
||||
$sPhrase = $this->oDB->getOne(
|
||||
'SELECT make_standard_name(:phrase)',
|
||||
array(':phrase' => $sPhrase),
|
||||
'Cannot normalize query string (is it a UTF-8 string?)'
|
||||
);
|
||||
if (trim($sPhrase)) {
|
||||
$oPhrase = new Phrase($sPhrase, is_string($iPhrase) ? $iPhrase : '');
|
||||
$oPhrase->addTokens($aTokens);
|
||||
$aPhrases[] = $oPhrase;
|
||||
}
|
||||
}
|
||||
|
||||
Debug::printVar('Tokens', $aTokens);
|
||||
|
||||
$oValidTokens = new TokenList();
|
||||
|
||||
if (!empty($aTokens)) {
|
||||
$oValidTokens->addTokensFromDB(
|
||||
$this->oDB,
|
||||
$aTokens,
|
||||
$this->aCountryCodes,
|
||||
$sNormQuery,
|
||||
$this->oNormalizer
|
||||
);
|
||||
$oValidTokens = $this->oTokenizer->extractTokensFromPhrases($aPhrases);
|
||||
|
||||
if ($oValidTokens->count() > 0) {
|
||||
$oCtx->setFullNameWords($oValidTokens->getFullWordIDs());
|
||||
|
||||
// Try more interpretations for Tokens that could not be matched.
|
||||
foreach ($aTokens as $sToken) {
|
||||
if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
|
||||
if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
|
||||
// US ZIP+4 codes - merge in the 5-digit ZIP code
|
||||
$oValidTokens->addToken(
|
||||
$sToken,
|
||||
new Token\Postcode(null, $aData[1], 'us')
|
||||
);
|
||||
} elseif (preg_match('/^ [0-9]+$/', $sToken)) {
|
||||
// Unknown single word token with a number.
|
||||
// Assume it is a house number.
|
||||
$oValidTokens->addToken(
|
||||
$sToken,
|
||||
new Token\HouseNumber(null, trim($sToken))
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
$aPhrases = array_filter($aPhrases, function ($oPhrase) {
|
||||
return $oPhrase->getWordSets() !== null;
|
||||
});
|
||||
|
||||
// Any words that have failed completely?
|
||||
// TODO: suggestions
|
||||
|
||||
Debug::printGroupTable('Valid Tokens', $oValidTokens->debugInfo());
|
||||
|
||||
foreach ($aPhrases as $oPhrase) {
|
||||
$oPhrase->computeWordSets($oValidTokens);
|
||||
}
|
||||
Debug::printDebugTable('Phrases', $aPhrases);
|
||||
|
||||
Debug::newSection('Search candidates');
|
||||
|
@ -16,8 +16,6 @@ class Phrase
|
||||
private $sPhrase;
|
||||
// Element type for structured searches.
|
||||
private $sPhraseType;
|
||||
// Space-separated words of the phrase.
|
||||
private $aWords;
|
||||
// Possible segmentations of the phrase.
|
||||
private $aWordSets;
|
||||
|
||||
@ -38,7 +36,14 @@ class Phrase
|
||||
{
|
||||
$this->sPhrase = trim($sPhrase);
|
||||
$this->sPhraseType = $sPhraseType;
|
||||
$this->aWords = explode(' ', $this->sPhrase);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the orginal phrase of the string.
|
||||
*/
|
||||
public function getPhrase()
|
||||
{
|
||||
return $this->sPhrase;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -63,30 +68,6 @@ class Phrase
|
||||
return $this->aWordSets;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the tokens from this phrase to the given list of tokens.
|
||||
*
|
||||
* @param string[] $aTokens List of tokens to append.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function addTokens(&$aTokens)
|
||||
{
|
||||
$iNumWords = count($this->aWords);
|
||||
|
||||
for ($i = 0; $i < $iNumWords; $i++) {
|
||||
$sPhrase = $this->aWords[$i];
|
||||
$aTokens[' '.$sPhrase] = ' '.$sPhrase;
|
||||
$aTokens[$sPhrase] = $sPhrase;
|
||||
|
||||
for ($j = $i + 1; $j < $iNumWords; $j++) {
|
||||
$sPhrase .= ' '.$this->aWords[$j];
|
||||
$aTokens[' '.$sPhrase] = ' '.$sPhrase;
|
||||
$aTokens[$sPhrase] = $sPhrase;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Invert the set of possible segmentations.
|
||||
*
|
||||
@ -99,21 +80,27 @@ class Phrase
|
||||
}
|
||||
}
|
||||
|
||||
public function computeWordSets($oTokens)
|
||||
public function computeWordSets($aWords, $oTokens)
|
||||
{
|
||||
$iNumWords = count($this->aWords);
|
||||
$iNumWords = count($aWords);
|
||||
|
||||
if ($iNumWords == 0) {
|
||||
$this->aWordSets = null;
|
||||
return;
|
||||
}
|
||||
|
||||
// Caches the word set for the partial phrase up to word i.
|
||||
$aSetCache = array_fill(0, $iNumWords, array());
|
||||
|
||||
// Initialise first element of cache. There can only be the word.
|
||||
if ($oTokens->containsAny($this->aWords[0])) {
|
||||
$aSetCache[0][] = array($this->aWords[0]);
|
||||
if ($oTokens->containsAny($aWords[0])) {
|
||||
$aSetCache[0][] = array($aWords[0]);
|
||||
}
|
||||
|
||||
// Now do the next elements using what we already have.
|
||||
for ($i = 1; $i < $iNumWords; $i++) {
|
||||
for ($j = $i; $j > 0; $j--) {
|
||||
$sPartial = $j == $i ? $this->aWords[$j] : $this->aWords[$j].' '.$sPartial;
|
||||
$sPartial = $j == $i ? $aWords[$j] : $aWords[$j].' '.$sPartial;
|
||||
if (!empty($aSetCache[$j - 1]) && $oTokens->containsAny($sPartial)) {
|
||||
$aPartial = array($sPartial);
|
||||
foreach ($aSetCache[$j - 1] as $aSet) {
|
||||
@ -136,7 +123,7 @@ class Phrase
|
||||
}
|
||||
|
||||
// finally the current full phrase
|
||||
$sPartial = $this->aWords[0].' '.$sPartial;
|
||||
$sPartial = $aWords[0].' '.$sPartial;
|
||||
if ($oTokens->containsAny($sPartial)) {
|
||||
$aSetCache[$i][] = array($sPartial);
|
||||
}
|
||||
@ -153,7 +140,6 @@ class Phrase
|
||||
return array(
|
||||
'Type' => $this->sPhraseType,
|
||||
'Phrase' => $this->sPhrase,
|
||||
'Words' => $this->aWords,
|
||||
'WordSets' => $this->aWordSets
|
||||
);
|
||||
}
|
||||
|
@ -2,6 +2,8 @@
|
||||
|
||||
namespace Nominatim;
|
||||
|
||||
require_once(CONST_TokenizerDir.'/tokenizer.php');
|
||||
|
||||
use Exception;
|
||||
|
||||
class Status
|
||||
@ -25,24 +27,8 @@ class Status
|
||||
throw new Exception('Database connection failed', 700);
|
||||
}
|
||||
|
||||
$sStandardWord = $this->oDB->getOne("SELECT make_standard_name('a')");
|
||||
if ($sStandardWord === false) {
|
||||
throw new Exception('Module failed', 701);
|
||||
}
|
||||
|
||||
if ($sStandardWord != 'a') {
|
||||
throw new Exception('Module call failed', 702);
|
||||
}
|
||||
|
||||
$sSQL = 'SELECT word_id, word_token, word, class, type, country_code, ';
|
||||
$sSQL .= "operator, search_name_count FROM word WHERE word_token IN (' a')";
|
||||
$iWordID = $this->oDB->getOne($sSQL);
|
||||
if ($iWordID === false) {
|
||||
throw new Exception('Query failed', 703);
|
||||
}
|
||||
if (!$iWordID) {
|
||||
throw new Exception('No value', 704);
|
||||
}
|
||||
$oTokenizer = new \Nominatim\Tokenizer($this->oDB);
|
||||
$oTokenizer->checkStatus();
|
||||
}
|
||||
|
||||
public function dataDate()
|
||||
|
@ -95,88 +95,6 @@ class TokenList
|
||||
return $ids;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add token information from the word table in the database.
|
||||
*
|
||||
* @param object $oDB Nominatim::DB instance.
|
||||
* @param string[] $aTokens List of tokens to look up in the database.
|
||||
* @param string[] $aCountryCodes List of country restrictions.
|
||||
* @param string $sNormQuery Normalized query string.
|
||||
* @param object $oNormalizer Normalizer function to use on tokens.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function addTokensFromDB(&$oDB, &$aTokens, &$aCountryCodes, $sNormQuery, $oNormalizer)
|
||||
{
|
||||
// Check which tokens we have, get the ID numbers
|
||||
$sSQL = 'SELECT word_id, word_token, word, class, type, country_code,';
|
||||
$sSQL .= ' operator, coalesce(search_name_count, 0) as count';
|
||||
$sSQL .= ' FROM word WHERE word_token in (';
|
||||
$sSQL .= join(',', $oDB->getDBQuotedList($aTokens)).')';
|
||||
|
||||
Debug::printSQL($sSQL);
|
||||
|
||||
$aDBWords = $oDB->getAll($sSQL, null, 'Could not get word tokens.');
|
||||
|
||||
foreach ($aDBWords as $aWord) {
|
||||
$oToken = null;
|
||||
$iId = (int) $aWord['word_id'];
|
||||
|
||||
if ($aWord['class']) {
|
||||
// Special terms need to appear in their normalized form.
|
||||
if ($aWord['word']) {
|
||||
$sNormWord = $aWord['word'];
|
||||
if ($oNormalizer != null) {
|
||||
$sNormWord = $oNormalizer->transliterate($aWord['word']);
|
||||
}
|
||||
if (strpos($sNormQuery, $sNormWord) === false) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if ($aWord['class'] == 'place' && $aWord['type'] == 'house') {
|
||||
$oToken = new Token\HouseNumber($iId, trim($aWord['word_token']));
|
||||
} elseif ($aWord['class'] == 'place' && $aWord['type'] == 'postcode') {
|
||||
if ($aWord['word']
|
||||
&& pg_escape_string($aWord['word']) == $aWord['word']
|
||||
) {
|
||||
$oToken = new Token\Postcode(
|
||||
$iId,
|
||||
$aWord['word'],
|
||||
$aWord['country_code']
|
||||
);
|
||||
}
|
||||
} else {
|
||||
// near and in operator the same at the moment
|
||||
$oToken = new Token\SpecialTerm(
|
||||
$iId,
|
||||
$aWord['class'],
|
||||
$aWord['type'],
|
||||
$aWord['operator'] ? Operator::NEAR : Operator::NONE
|
||||
);
|
||||
}
|
||||
} elseif ($aWord['country_code']) {
|
||||
// Filter country tokens that do not match restricted countries.
|
||||
if (!$aCountryCodes
|
||||
|| in_array($aWord['country_code'], $aCountryCodes)
|
||||
) {
|
||||
$oToken = new Token\Country($iId, $aWord['country_code']);
|
||||
}
|
||||
} else {
|
||||
$oToken = new Token\Word(
|
||||
$iId,
|
||||
$aWord['word_token'][0] != ' ',
|
||||
(int) $aWord['count'],
|
||||
substr_count($aWord['word_token'], ' ')
|
||||
);
|
||||
}
|
||||
|
||||
if ($oToken) {
|
||||
$this->addToken($aWord['word_token'], $oToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new token for the given word.
|
||||
*
|
||||
|
@ -2,7 +2,6 @@
|
||||
@define('CONST_LibDir', dirname(dirname(__FILE__)));
|
||||
|
||||
require_once(CONST_LibDir.'/init-cmd.php');
|
||||
require_once(CONST_LibDir.'/Geocode.php');
|
||||
require_once(CONST_LibDir.'/ParameterParser.php');
|
||||
ini_set('memory_limit', '800M');
|
||||
|
||||
@ -41,16 +40,16 @@ loadSettings($aCMDResult['project-dir'] ?? getcwd());
|
||||
@define('CONST_Default_Language', getSetting('DEFAULT_LANGUAGE', false));
|
||||
@define('CONST_Log_DB', getSettingBool('LOG_DB'));
|
||||
@define('CONST_Log_File', getSetting('LOG_FILE', false));
|
||||
@define('CONST_Max_Word_Frequency', getSetting('MAX_WORD_FREQUENCY'));
|
||||
@define('CONST_NoAccessControl', getSettingBool('CORS_NOACCESSCONTROL'));
|
||||
@define('CONST_Places_Max_ID_count', getSetting('LOOKUP_MAX_COUNT'));
|
||||
@define('CONST_PolygonOutput_MaximumTypes', getSetting('POLYGON_OUTPUT_MAX_TYPES'));
|
||||
@define('CONST_Search_BatchMode', getSettingBool('SEARCH_BATCH_MODE'));
|
||||
@define('CONST_Search_NameOnlySearchFrequencyThreshold', getSetting('SEARCH_NAME_ONLY_THRESHOLD'));
|
||||
@define('CONST_Term_Normalization_Rules', getSetting('TERM_NORMALIZATION'));
|
||||
@define('CONST_Use_US_Tiger_Data', getSettingBool('USE_US_TIGER_DATA'));
|
||||
@define('CONST_MapIcon_URL', getSetting('MAPICON_URL', false));
|
||||
@define('CONST_TokenizerDir', CONST_InstallDir.'/tokenizer');
|
||||
|
||||
require_once(CONST_LibDir.'/Geocode.php');
|
||||
|
||||
$oDB = new Nominatim\DB;
|
||||
$oDB->connect();
|
||||
|
@ -3,7 +3,6 @@
|
||||
|
||||
require_once(CONST_LibDir.'/init-cmd.php');
|
||||
require_once(CONST_LibDir.'/log.php');
|
||||
require_once(CONST_LibDir.'/Geocode.php');
|
||||
require_once(CONST_LibDir.'/PlaceLookup.php');
|
||||
require_once(CONST_LibDir.'/ReverseGeocode.php');
|
||||
|
||||
@ -26,16 +25,16 @@ loadSettings($aCMDResult['project-dir'] ?? getcwd());
|
||||
@define('CONST_Default_Language', getSetting('DEFAULT_LANGUAGE', false));
|
||||
@define('CONST_Log_DB', getSettingBool('LOG_DB'));
|
||||
@define('CONST_Log_File', getSetting('LOG_FILE', false));
|
||||
@define('CONST_Max_Word_Frequency', getSetting('MAX_WORD_FREQUENCY'));
|
||||
@define('CONST_NoAccessControl', getSettingBool('CORS_NOACCESSCONTROL'));
|
||||
@define('CONST_Places_Max_ID_count', getSetting('LOOKUP_MAX_COUNT'));
|
||||
@define('CONST_PolygonOutput_MaximumTypes', getSetting('POLYGON_OUTPUT_MAX_TYPES'));
|
||||
@define('CONST_Search_BatchMode', getSettingBool('SEARCH_BATCH_MODE'));
|
||||
@define('CONST_Search_NameOnlySearchFrequencyThreshold', getSetting('SEARCH_NAME_ONLY_THRESHOLD'));
|
||||
@define('CONST_Term_Normalization_Rules', getSetting('TERM_NORMALIZATION'));
|
||||
@define('CONST_Use_US_Tiger_Data', getSettingBool('USE_US_TIGER_DATA'));
|
||||
@define('CONST_MapIcon_URL', getSetting('MAPICON_URL', false));
|
||||
@define('CONST_TokenizerDir', CONST_InstallDir.'/tokenizer');
|
||||
|
||||
require_once(CONST_LibDir.'/Geocode.php');
|
||||
|
||||
$oDB = new Nominatim\DB();
|
||||
$oDB->connect();
|
||||
|
254
lib-php/tokenizer/legacy_tokenizer.php
Normal file
254
lib-php/tokenizer/legacy_tokenizer.php
Normal file
@ -0,0 +1,254 @@
|
||||
<?php
|
||||
|
||||
namespace Nominatim;
|
||||
|
||||
class Tokenizer
|
||||
{
|
||||
private $oDB;
|
||||
|
||||
private $oNormalizer = null;
|
||||
private $aCountryRestriction = null;
|
||||
|
||||
public function __construct(&$oDB)
|
||||
{
|
||||
$this->oDB =& $oDB;
|
||||
$this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
|
||||
}
|
||||
|
||||
public function checkStatus()
|
||||
{
|
||||
$sStandardWord = $this->oDB->getOne("SELECT make_standard_name('a')");
|
||||
if ($sStandardWord === false) {
|
||||
throw new Exception('Module failed', 701);
|
||||
}
|
||||
|
||||
if ($sStandardWord != 'a') {
|
||||
throw new Exception('Module call failed', 702);
|
||||
}
|
||||
|
||||
$sSQL = "SELECT word_id FROM word WHERE word_token IN (' a')";
|
||||
$iWordID = $this->oDB->getOne($sSQL);
|
||||
if ($iWordID === false) {
|
||||
throw new Exception('Query failed', 703);
|
||||
}
|
||||
if (!$iWordID) {
|
||||
throw new Exception('No value', 704);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public function setCountryRestriction($aCountries)
|
||||
{
|
||||
$this->aCountryRestriction = $aCountries;
|
||||
}
|
||||
|
||||
|
||||
public function normalizeString($sTerm)
|
||||
{
|
||||
if ($this->oNormalizer === null) {
|
||||
return $sTerm;
|
||||
}
|
||||
|
||||
return $this->oNormalizer->transliterate($sTerm);
|
||||
}
|
||||
|
||||
|
||||
public function tokensForSpecialTerm($sTerm)
|
||||
{
|
||||
$aResults = array();
|
||||
|
||||
$sSQL = 'SELECT word_id, class, type FROM word ';
|
||||
$sSQL .= ' WHERE word_token = \' \' || make_standard_name(:term)';
|
||||
$sSQL .= ' AND class is not null AND class not in (\'place\')';
|
||||
|
||||
Debug::printVar('Term', $sTerm);
|
||||
Debug::printSQL($sSQL);
|
||||
$aSearchWords = $this->oDB->getAll($sSQL, array(':term' => $sTerm));
|
||||
|
||||
Debug::printVar('Results', $aSearchWords);
|
||||
|
||||
foreach ($aSearchWords as $aSearchTerm) {
|
||||
$aResults[] = new \Nominatim\Token\SpecialTerm(
|
||||
$aSearchTerm['word_id'],
|
||||
$aSearchTerm['class'],
|
||||
$aSearchTerm['type'],
|
||||
\Nominatim\Operator::TYPE
|
||||
);
|
||||
}
|
||||
|
||||
Debug::printVar('Special term tokens', $aResults);
|
||||
|
||||
return $aResults;
|
||||
}
|
||||
|
||||
|
||||
public function extractTokensFromPhrases(&$aPhrases)
|
||||
{
|
||||
// First get the normalized version of all phrases
|
||||
$sNormQuery = '';
|
||||
$sSQL = 'SELECT ';
|
||||
$aParams = array();
|
||||
foreach ($aPhrases as $iPhrase => $oPhrase) {
|
||||
$sNormQuery .= ','.$this->normalizeString($oPhrase->getPhrase());
|
||||
$sSQL .= 'make_standard_name(:' .$iPhrase.') as p'.$iPhrase.',';
|
||||
$aParams[':'.$iPhrase] = $oPhrase->getPhrase();
|
||||
}
|
||||
$sSQL = substr($sSQL, 0, -1);
|
||||
|
||||
Debug::printSQL($sSQL);
|
||||
Debug::printVar('SQL parameters', $aParams);
|
||||
|
||||
$aNormPhrases = $this->oDB->getRow($sSQL, $aParams);
|
||||
|
||||
Debug::printVar('SQL result', $aNormPhrases);
|
||||
|
||||
// now compute all possible tokens
|
||||
$aWordLists = array();
|
||||
$aTokens = array();
|
||||
foreach ($aNormPhrases as $sTitle => $sPhrase) {
|
||||
if (strlen($sPhrase) > 0) {
|
||||
$aWords = explode(' ', $sPhrase);
|
||||
Tokenizer::addTokens($aTokens, $aWords);
|
||||
$aWordLists[] = $aWords;
|
||||
} else {
|
||||
$aWordLists[] = array();
|
||||
}
|
||||
}
|
||||
|
||||
Debug::printVar('Tokens', $aTokens);
|
||||
Debug::printVar('WordLists', $aWordLists);
|
||||
|
||||
$oValidTokens = $this->computeValidTokens($aTokens, $sNormQuery);
|
||||
|
||||
foreach ($aPhrases as $iPhrase => $oPhrase) {
|
||||
$oPhrase->computeWordSets($aWordLists[$iPhrase], $oValidTokens);
|
||||
}
|
||||
|
||||
return $oValidTokens;
|
||||
}
|
||||
|
||||
|
||||
private function computeValidTokens($aTokens, $sNormQuery)
|
||||
{
|
||||
$oValidTokens = new TokenList();
|
||||
|
||||
if (!empty($aTokens)) {
|
||||
$this->addTokensFromDB($oValidTokens, $aTokens, $sNormQuery);
|
||||
|
||||
// Try more interpretations for Tokens that could not be matched.
|
||||
foreach ($aTokens as $sToken) {
|
||||
if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
|
||||
if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
|
||||
// US ZIP+4 codes - merge in the 5-digit ZIP code
|
||||
$oValidTokens->addToken(
|
||||
$sToken,
|
||||
new Token\Postcode(null, $aData[1], 'us')
|
||||
);
|
||||
} elseif (preg_match('/^ [0-9]+$/', $sToken)) {
|
||||
// Unknown single word token with a number.
|
||||
// Assume it is a house number.
|
||||
$oValidTokens->addToken(
|
||||
$sToken,
|
||||
new Token\HouseNumber(null, trim($sToken))
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $oValidTokens;
|
||||
}
|
||||
|
||||
|
||||
private function addTokensFromDB(&$oValidTokens, $aTokens, $sNormQuery)
|
||||
{
|
||||
// Check which tokens we have, get the ID numbers
|
||||
$sSQL = 'SELECT word_id, word_token, word, class, type, country_code,';
|
||||
$sSQL .= ' operator, coalesce(search_name_count, 0) as count';
|
||||
$sSQL .= ' FROM word WHERE word_token in (';
|
||||
$sSQL .= join(',', $this->oDB->getDBQuotedList($aTokens)).')';
|
||||
|
||||
Debug::printSQL($sSQL);
|
||||
|
||||
$aDBWords = $this->oDB->getAll($sSQL, null, 'Could not get word tokens.');
|
||||
|
||||
foreach ($aDBWords as $aWord) {
|
||||
$oToken = null;
|
||||
$iId = (int) $aWord['word_id'];
|
||||
|
||||
if ($aWord['class']) {
|
||||
// Special terms need to appear in their normalized form.
|
||||
// (postcodes are not normalized in the word table)
|
||||
$sNormWord = $this->normalizeString($aWord['word']);
|
||||
if ($aWord['word'] && strpos($sNormQuery, $sNormWord) === false) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($aWord['class'] == 'place' && $aWord['type'] == 'house') {
|
||||
$oToken = new Token\HouseNumber($iId, trim($aWord['word_token']));
|
||||
} elseif ($aWord['class'] == 'place' && $aWord['type'] == 'postcode') {
|
||||
if ($aWord['word']
|
||||
&& pg_escape_string($aWord['word']) == $aWord['word']
|
||||
) {
|
||||
$oToken = new Token\Postcode(
|
||||
$iId,
|
||||
$aWord['word'],
|
||||
$aWord['country_code']
|
||||
);
|
||||
}
|
||||
} else {
|
||||
// near and in operator the same at the moment
|
||||
$oToken = new Token\SpecialTerm(
|
||||
$iId,
|
||||
$aWord['class'],
|
||||
$aWord['type'],
|
||||
$aWord['operator'] ? Operator::NEAR : Operator::NONE
|
||||
);
|
||||
}
|
||||
} elseif ($aWord['country_code']) {
|
||||
// Filter country tokens that do not match restricted countries.
|
||||
if (!$this->aCountryRestriction
|
||||
|| in_array($aWord['country_code'], $this->aCountryRestriction)
|
||||
) {
|
||||
$oToken = new Token\Country($iId, $aWord['country_code']);
|
||||
}
|
||||
} else {
|
||||
$oToken = new Token\Word(
|
||||
$iId,
|
||||
$aWord['word_token'][0] != ' ',
|
||||
(int) $aWord['count'],
|
||||
substr_count($aWord['word_token'], ' ')
|
||||
);
|
||||
}
|
||||
|
||||
if ($oToken) {
|
||||
$oValidTokens->addToken($aWord['word_token'], $oToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add the tokens from this phrase to the given list of tokens.
|
||||
*
|
||||
* @param string[] $aTokens List of tokens to append.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
private static function addTokens(&$aTokens, $aWords)
|
||||
{
|
||||
$iNumWords = count($aWords);
|
||||
|
||||
for ($i = 0; $i < $iNumWords; $i++) {
|
||||
$sPhrase = $aWords[$i];
|
||||
$aTokens[' '.$sPhrase] = ' '.$sPhrase;
|
||||
$aTokens[$sPhrase] = $sPhrase;
|
||||
|
||||
for ($j = $i + 1; $j < $iNumWords; $j++) {
|
||||
$sPhrase .= ' '.$aWords[$j];
|
||||
$aTokens[' '.$sPhrase] = ' '.$sPhrase;
|
||||
$aTokens[$sPhrase] = $sPhrase;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,5 +1,4 @@
|
||||
{% include('functions/utils.sql') %}
|
||||
{% include('functions/normalization.sql') %}
|
||||
{% include('functions/ranking.sql') %}
|
||||
{% include('functions/importance.sql') %}
|
||||
{% include('functions/address_lookup.sql') %}
|
||||
|
@ -12,39 +12,47 @@ $$
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT)
|
||||
RETURNS HSTORE
|
||||
AS $$
|
||||
DECLARE
|
||||
location RECORD;
|
||||
waynodes BIGINT[];
|
||||
BEGIN
|
||||
IF akeys(in_address) != ARRAY['interpolation'] THEN
|
||||
RETURN in_address;
|
||||
END IF;
|
||||
|
||||
SELECT nodes INTO waynodes FROM planet_osm_ways WHERE id = wayid;
|
||||
FOR location IN
|
||||
SELECT placex.address, placex.osm_id FROM placex
|
||||
WHERE osm_type = 'N' and osm_id = ANY(waynodes)
|
||||
and placex.address is not null
|
||||
and (placex.address ? 'street' or placex.address ? 'place')
|
||||
and indexed_status < 100
|
||||
LOOP
|
||||
-- mark it as a derived address
|
||||
RETURN location.address || in_address || hstore('_inherited', '');
|
||||
END LOOP;
|
||||
|
||||
RETURN in_address;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
|
||||
-- find the parent road of the cut road parts
|
||||
CREATE OR REPLACE FUNCTION get_interpolation_parent(wayid BIGINT, street TEXT,
|
||||
place TEXT, partition SMALLINT,
|
||||
CREATE OR REPLACE FUNCTION get_interpolation_parent(street INTEGER[], place INTEGER[],
|
||||
partition SMALLINT,
|
||||
centroid GEOMETRY, geom GEOMETRY)
|
||||
RETURNS BIGINT
|
||||
AS $$
|
||||
DECLARE
|
||||
addr_street TEXT;
|
||||
addr_place TEXT;
|
||||
parent_place_id BIGINT;
|
||||
|
||||
waynodes BIGINT[];
|
||||
|
||||
location RECORD;
|
||||
BEGIN
|
||||
addr_street = street;
|
||||
addr_place = place;
|
||||
|
||||
IF addr_street is null and addr_place is null THEN
|
||||
select nodes from planet_osm_ways where id = wayid INTO waynodes;
|
||||
FOR location IN SELECT placex.address from placex
|
||||
where osm_type = 'N' and osm_id = ANY(waynodes)
|
||||
and placex.address is not null
|
||||
and (placex.address ? 'street' or placex.address ? 'place')
|
||||
and indexed_status < 100
|
||||
limit 1 LOOP
|
||||
addr_street = location.address->'street';
|
||||
addr_place = location.address->'place';
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
parent_place_id := find_parent_for_address(addr_street, addr_place,
|
||||
partition, centroid);
|
||||
parent_place_id := find_parent_for_address(street, place, partition, centroid);
|
||||
|
||||
IF parent_place_id is null THEN
|
||||
FOR location IN SELECT place_id FROM placex
|
||||
@ -147,15 +155,15 @@ BEGIN
|
||||
NEW.interpolationtype = NEW.address->'interpolation';
|
||||
|
||||
place_centroid := ST_PointOnSurface(NEW.linegeo);
|
||||
NEW.parent_place_id = get_interpolation_parent(NEW.osm_id, NEW.address->'street',
|
||||
NEW.address->'place',
|
||||
NEW.parent_place_id = get_interpolation_parent(token_addr_street_match_tokens(NEW.token_info),
|
||||
token_addr_place_match_tokens(NEW.token_info),
|
||||
NEW.partition, place_centroid, NEW.linegeo);
|
||||
|
||||
IF NEW.address is not NULL AND NEW.address ? 'postcode' AND NEW.address->'postcode' not similar to '%(,|;)%' THEN
|
||||
interpol_postcode := NEW.address->'postcode';
|
||||
housenum := getorcreate_postcode_id(NEW.address->'postcode');
|
||||
ELSE
|
||||
interpol_postcode := NULL;
|
||||
interpol_postcode := token_normalized_postcode(NEW.address->'postcode');
|
||||
|
||||
NEW.token_info := token_strip_info(NEW.token_info);
|
||||
IF NEW.address ? '_inherited' THEN
|
||||
NEW.address := hstore('interpolation', NEW.interpolationtype);
|
||||
END IF;
|
||||
|
||||
-- if the line was newly inserted, split the line as necessary
|
||||
@ -202,12 +210,13 @@ BEGIN
|
||||
|
||||
-- determine postcode
|
||||
postcode := coalesce(interpol_postcode,
|
||||
prevnode.address->'postcode',
|
||||
nextnode.address->'postcode',
|
||||
token_normalized_postcode(prevnode.address->'postcode'),
|
||||
token_normalized_postcode(nextnode.address->'postcode'),
|
||||
postcode);
|
||||
|
||||
IF postcode is NULL THEN
|
||||
SELECT placex.postcode FROM placex WHERE place_id = NEW.parent_place_id INTO postcode;
|
||||
SELECT token_normalized_postcode(placex.postcode)
|
||||
FROM placex WHERE place_id = NEW.parent_place_id INTO postcode;
|
||||
END IF;
|
||||
IF postcode is NULL THEN
|
||||
postcode := get_nearest_postcode(NEW.country_code, nextnode.geometry);
|
||||
@ -217,7 +226,7 @@ BEGIN
|
||||
NEW.startnumber := startnumber;
|
||||
NEW.endnumber := endnumber;
|
||||
NEW.linegeo := sectiongeo;
|
||||
NEW.postcode := upper(trim(postcode));
|
||||
NEW.postcode := postcode;
|
||||
ELSE
|
||||
insert into location_property_osmline
|
||||
(linegeo, partition, osm_id, parent_place_id,
|
||||
|
@ -1,525 +0,0 @@
|
||||
-- Functions for term normalisation and access to the 'word' table.
|
||||
|
||||
CREATE OR REPLACE FUNCTION transliteration(text) RETURNS text
|
||||
AS '{{ modulepath }}/nominatim.so', 'transliteration'
|
||||
LANGUAGE c IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION gettokenstring(text) RETURNS text
|
||||
AS '{{ modulepath }}/nominatim.so', 'gettokenstring'
|
||||
LANGUAGE c IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT
|
||||
AS $$
|
||||
DECLARE
|
||||
o TEXT;
|
||||
BEGIN
|
||||
o := public.gettokenstring(public.transliteration(name));
|
||||
RETURN trim(substr(o,1,length(o)));
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
-- returns NULL if the word is too common
|
||||
CREATE OR REPLACE FUNCTION getorcreate_word_id(lookup_word TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
count INTEGER;
|
||||
BEGIN
|
||||
lookup_token := trim(lookup_word);
|
||||
SELECT min(word_id), max(search_name_count) FROM word
|
||||
WHERE word_token = lookup_token and class is null and type is null
|
||||
INTO return_word_id, count;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, null, null, null, null, 0);
|
||||
ELSE
|
||||
IF count > get_maxwordfreq() THEN
|
||||
return_word_id := NULL;
|
||||
END IF;
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
-- Create housenumber tokens from an OSM addr:housenumber.
|
||||
-- The housnumber is split at comma and semicolon as necessary.
|
||||
-- The function returns the normalized form of the housenumber suitable
|
||||
-- for comparison.
|
||||
CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT)
|
||||
RETURNS TEXT
|
||||
AS $$
|
||||
DECLARE
|
||||
normtext TEXT;
|
||||
BEGIN
|
||||
SELECT array_to_string(array_agg(trans), ';')
|
||||
INTO normtext
|
||||
FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word)
|
||||
FROM (SELECT make_standard_name(h) as lookup_word
|
||||
FROM regexp_split_to_table(housenumber, '[,;]') h) x) y;
|
||||
|
||||
return normtext;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql STABLE STRICT;
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_token := ' ' || trim(lookup_word);
|
||||
SELECT min(word_id) FROM word
|
||||
WHERE word_token = lookup_token and class='place' and type='house'
|
||||
INTO return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, null,
|
||||
'place', 'house', null, 0);
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
lookup_word TEXT;
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_word := upper(trim(postcode));
|
||||
lookup_token := ' ' || make_standard_name(lookup_word);
|
||||
SELECT min(word_id) FROM word
|
||||
WHERE word_token = lookup_token and word = lookup_word
|
||||
and class='place' and type='postcode'
|
||||
INTO return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word,
|
||||
'place', 'postcode', null, 0);
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_country(lookup_word TEXT,
|
||||
lookup_country_code varchar(2))
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_token := ' '||trim(lookup_word);
|
||||
SELECT min(word_id) FROM word
|
||||
WHERE word_token = lookup_token and country_code=lookup_country_code
|
||||
INTO return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, null,
|
||||
null, null, lookup_country_code, 0);
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
|
||||
lookup_class text, lookup_type text)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_token := ' '||trim(lookup_word);
|
||||
SELECT min(word_id) FROM word
|
||||
WHERE word_token = lookup_token and word = normalized_word
|
||||
and class = lookup_class and type = lookup_type
|
||||
INTO return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
|
||||
lookup_class, lookup_type, null, 0);
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT,
|
||||
normalized_word TEXT,
|
||||
lookup_class text,
|
||||
lookup_type text,
|
||||
op text)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_token := ' '||trim(lookup_word);
|
||||
SELECT min(word_id) FROM word
|
||||
WHERE word_token = lookup_token and word = normalized_word
|
||||
and class = lookup_class and type = lookup_type and operator = op
|
||||
INTO return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
|
||||
lookup_class, lookup_type, null, 0, op);
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT, src_word TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
nospace_lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_token := ' '||trim(lookup_word);
|
||||
SELECT min(word_id) FROM word
|
||||
WHERE word_token = lookup_token and class is null and type is null
|
||||
INTO return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, src_word,
|
||||
null, null, null, 0);
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
BEGIN
|
||||
RETURN getorcreate_name_id(lookup_word, '');
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
-- Normalize a string and lookup its word ids (partial words).
|
||||
CREATE OR REPLACE FUNCTION addr_ids_from_name(lookup_word TEXT)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
DECLARE
|
||||
words TEXT[];
|
||||
id INTEGER;
|
||||
return_word_id INTEGER[];
|
||||
word_ids INTEGER[];
|
||||
j INTEGER;
|
||||
BEGIN
|
||||
words := string_to_array(make_standard_name(lookup_word), ' ');
|
||||
IF array_upper(words, 1) IS NOT NULL THEN
|
||||
FOR j IN 1..array_upper(words, 1) LOOP
|
||||
IF (words[j] != '') THEN
|
||||
SELECT array_agg(word_id) INTO word_ids
|
||||
FROM word
|
||||
WHERE word_token = words[j] and class is null and type is null;
|
||||
|
||||
IF word_ids IS NULL THEN
|
||||
id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (id, words[j], null, null, null, null, 0);
|
||||
return_word_id := return_word_id || id;
|
||||
ELSE
|
||||
return_word_id := array_merge(return_word_id, word_ids);
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
-- Normalize a string and look up its name ids (full words).
|
||||
CREATE OR REPLACE FUNCTION word_ids_from_name(lookup_word TEXT)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
return_word_ids INTEGER[];
|
||||
BEGIN
|
||||
lookup_token := ' '|| make_standard_name(lookup_word);
|
||||
SELECT array_agg(word_id) FROM word
|
||||
WHERE word_token = lookup_token and class is null and type is null
|
||||
INTO return_word_ids;
|
||||
RETURN return_word_ids;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION create_country(src HSTORE, country_code varchar(2))
|
||||
RETURNS VOID
|
||||
AS $$
|
||||
DECLARE
|
||||
s TEXT;
|
||||
w INTEGER;
|
||||
words TEXT[];
|
||||
item RECORD;
|
||||
j INTEGER;
|
||||
BEGIN
|
||||
FOR item IN SELECT (each(src)).* LOOP
|
||||
|
||||
s := make_standard_name(item.value);
|
||||
w := getorcreate_country(s, country_code);
|
||||
|
||||
words := regexp_split_to_array(item.value, E'[,;()]');
|
||||
IF array_upper(words, 1) != 1 THEN
|
||||
FOR j IN 1..array_upper(words, 1) LOOP
|
||||
s := make_standard_name(words[j]);
|
||||
IF s != '' THEN
|
||||
w := getorcreate_country(s, country_code);
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION make_keywords(src HSTORE)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
DECLARE
|
||||
result INTEGER[];
|
||||
s TEXT;
|
||||
w INTEGER;
|
||||
words TEXT[];
|
||||
item RECORD;
|
||||
j INTEGER;
|
||||
BEGIN
|
||||
result := '{}'::INTEGER[];
|
||||
|
||||
FOR item IN SELECT (each(src)).* LOOP
|
||||
|
||||
s := make_standard_name(item.value);
|
||||
w := getorcreate_name_id(s, item.value);
|
||||
|
||||
IF not(ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
|
||||
w := getorcreate_word_id(s);
|
||||
|
||||
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
|
||||
words := string_to_array(s, ' ');
|
||||
IF array_upper(words, 1) IS NOT NULL THEN
|
||||
FOR j IN 1..array_upper(words, 1) LOOP
|
||||
IF (words[j] != '') THEN
|
||||
w = getorcreate_word_id(words[j]);
|
||||
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
words := regexp_split_to_array(item.value, E'[,;()]');
|
||||
IF array_upper(words, 1) != 1 THEN
|
||||
FOR j IN 1..array_upper(words, 1) LOOP
|
||||
s := make_standard_name(words[j]);
|
||||
IF s != '' THEN
|
||||
w := getorcreate_word_id(s);
|
||||
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
s := regexp_replace(item.value, '市$', '');
|
||||
IF s != item.value THEN
|
||||
s := make_standard_name(s);
|
||||
IF s != '' THEN
|
||||
w := getorcreate_name_id(s, item.value);
|
||||
IF NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
END LOOP;
|
||||
|
||||
RETURN result;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION precompute_words(src TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
s TEXT;
|
||||
w INTEGER;
|
||||
words TEXT[];
|
||||
i INTEGER;
|
||||
j INTEGER;
|
||||
BEGIN
|
||||
s := make_standard_name(src);
|
||||
w := getorcreate_name_id(s, src);
|
||||
|
||||
w := getorcreate_word_id(s);
|
||||
|
||||
words := string_to_array(s, ' ');
|
||||
IF array_upper(words, 1) IS NOT NULL THEN
|
||||
FOR j IN 1..array_upper(words, 1) LOOP
|
||||
IF (words[j] != '') THEN
|
||||
w := getorcreate_word_id(words[j]);
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
words := regexp_split_to_array(src, E'[,;()]');
|
||||
IF array_upper(words, 1) != 1 THEN
|
||||
FOR j IN 1..array_upper(words, 1) LOOP
|
||||
s := make_standard_name(words[j]);
|
||||
IF s != '' THEN
|
||||
w := getorcreate_word_id(s);
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
s := regexp_replace(src, '市$', '');
|
||||
IF s != src THEN
|
||||
s := make_standard_name(s);
|
||||
IF s != '' THEN
|
||||
w := getorcreate_name_id(s, src);
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
RETURN 1;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION create_poi_search_terms(obj_place_id BIGINT,
|
||||
in_partition SMALLINT,
|
||||
parent_place_id BIGINT,
|
||||
address HSTORE,
|
||||
country TEXT,
|
||||
housenumber TEXT,
|
||||
initial_name_vector INTEGER[],
|
||||
geometry GEOMETRY,
|
||||
OUT name_vector INTEGER[],
|
||||
OUT nameaddress_vector INTEGER[])
|
||||
AS $$
|
||||
DECLARE
|
||||
parent_name_vector INTEGER[];
|
||||
parent_address_vector INTEGER[];
|
||||
addr_place_ids INTEGER[];
|
||||
|
||||
addr_item RECORD;
|
||||
parent_address_place_ids BIGINT[];
|
||||
filtered_address HSTORE;
|
||||
BEGIN
|
||||
nameaddress_vector := '{}'::INTEGER[];
|
||||
|
||||
SELECT s.name_vector, s.nameaddress_vector
|
||||
INTO parent_name_vector, parent_address_vector
|
||||
FROM search_name s
|
||||
WHERE s.place_id = parent_place_id;
|
||||
|
||||
-- Find all address tags that don't appear in the parent search names.
|
||||
SELECT hstore(array_agg(ARRAY[k, v])) INTO filtered_address
|
||||
FROM (SELECT skeys(address) as k, svals(address) as v) a
|
||||
WHERE not addr_ids_from_name(v) && parent_address_vector
|
||||
AND k not in ('country', 'street', 'place', 'postcode',
|
||||
'housenumber', 'streetnumber', 'conscriptionnumber');
|
||||
|
||||
-- Compute all search terms from the addr: tags.
|
||||
IF filtered_address IS NOT NULL THEN
|
||||
FOR addr_item IN
|
||||
SELECT * FROM
|
||||
get_places_for_addr_tags(in_partition, geometry, filtered_address, country)
|
||||
LOOP
|
||||
IF addr_item.place_id is null THEN
|
||||
nameaddress_vector := array_merge(nameaddress_vector,
|
||||
addr_item.keywords);
|
||||
CONTINUE;
|
||||
END IF;
|
||||
|
||||
IF parent_address_place_ids is null THEN
|
||||
SELECT array_agg(parent_place_id) INTO parent_address_place_ids
|
||||
FROM place_addressline
|
||||
WHERE place_id = parent_place_id;
|
||||
END IF;
|
||||
|
||||
IF not parent_address_place_ids @> ARRAY[addr_item.place_id] THEN
|
||||
nameaddress_vector := array_merge(nameaddress_vector,
|
||||
addr_item.keywords);
|
||||
|
||||
INSERT INTO place_addressline (place_id, address_place_id, fromarea,
|
||||
isaddress, distance, cached_rank_address)
|
||||
VALUES (obj_place_id, addr_item.place_id, not addr_item.isguess,
|
||||
true, addr_item.distance, addr_item.rank_address);
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
name_vector := initial_name_vector;
|
||||
|
||||
-- Check if the parent covers all address terms.
|
||||
-- If not, create a search name entry with the house number as the name.
|
||||
-- This is unusual for the search_name table but prevents that the place
|
||||
-- is returned when we only search for the street/place.
|
||||
|
||||
IF housenumber is not null and not nameaddress_vector <@ parent_address_vector THEN
|
||||
name_vector := array_merge(name_vector,
|
||||
ARRAY[getorcreate_housenumber_id(make_standard_name(housenumber))]);
|
||||
END IF;
|
||||
|
||||
IF not address ? 'street' and address ? 'place' THEN
|
||||
addr_place_ids := addr_ids_from_name(address->'place');
|
||||
IF not addr_place_ids <@ parent_name_vector THEN
|
||||
-- make sure addr:place terms are always searchable
|
||||
nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids);
|
||||
-- If there is a housenumber, also add the place name as a name,
|
||||
-- so we can search it by the usual housenumber+place algorithms.
|
||||
IF housenumber is not null THEN
|
||||
name_vector := array_merge(name_vector,
|
||||
ARRAY[getorcreate_name_id(make_standard_name(address->'place'))]);
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
-- Cheating here by not recomputing all terms but simply using the ones
|
||||
-- from the parent object.
|
||||
nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector);
|
||||
nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector);
|
||||
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
@ -63,54 +63,36 @@ END
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION get_places_for_addr_tags(in_partition SMALLINT,
|
||||
feature GEOMETRY,
|
||||
address HSTORE, country TEXT)
|
||||
RETURNS SETOF nearfeaturecentr
|
||||
|
||||
CREATE OR REPLACE FUNCTION get_address_place(in_partition SMALLINT, feature GEOMETRY,
|
||||
from_rank SMALLINT, to_rank SMALLINT,
|
||||
extent FLOAT, tokens INT[])
|
||||
RETURNS nearfeaturecentr
|
||||
AS $$
|
||||
DECLARE
|
||||
r nearfeaturecentr%rowtype;
|
||||
item RECORD;
|
||||
BEGIN
|
||||
FOR item IN
|
||||
SELECT (get_addr_tag_rank(key, country)).*, key, name FROM
|
||||
(SELECT skeys(address) as key, svals(address) as name) x
|
||||
LOOP
|
||||
IF item.from_rank is null THEN
|
||||
CONTINUE;
|
||||
END IF;
|
||||
|
||||
{% for partition in db.partitions %}
|
||||
IF in_partition = {{ partition }} THEN
|
||||
SELECT place_id, keywords, rank_address, rank_search,
|
||||
min(ST_Distance(feature, centroid)) as distance,
|
||||
isguess, postcode, centroid INTO r
|
||||
FROM location_area_large_{{ partition }}
|
||||
WHERE geometry && ST_Expand(feature, item.extent)
|
||||
AND rank_address between item.from_rank and item.to_rank
|
||||
AND word_ids_from_name(item.name) && keywords
|
||||
WHERE geometry && ST_Expand(feature, extent)
|
||||
AND rank_address between from_rank and to_rank
|
||||
AND tokens && keywords
|
||||
GROUP BY place_id, keywords, rank_address, rank_search, isguess, postcode, centroid
|
||||
ORDER BY bool_or(ST_Intersects(geometry, feature)), distance LIMIT 1;
|
||||
IF r.place_id is null THEN
|
||||
-- If we cannot find a place for the term, just return the
|
||||
-- search term for the given name. That ensures that the address
|
||||
-- element can still be searched for, even though it will not be
|
||||
-- displayed.
|
||||
RETURN NEXT ROW(null, addr_ids_from_name(item.name), null, null,
|
||||
null, null, null, null)::nearfeaturecentr;
|
||||
ELSE
|
||||
RETURN NEXT r;
|
||||
END IF;
|
||||
CONTINUE;
|
||||
RETURN r;
|
||||
END IF;
|
||||
{% endfor %}
|
||||
|
||||
RAISE EXCEPTION 'Unknown partition %', in_partition;
|
||||
END LOOP;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
create or replace function deleteLocationArea(in_partition INTEGER, in_place_id BIGINT, in_rank_search INTEGER) RETURNS BOOLEAN AS $$
|
||||
DECLARE
|
||||
BEGIN
|
||||
|
@ -1,28 +1,58 @@
|
||||
-- Trigger functions for the placex table.
|
||||
|
||||
-- Find the parent road of a POI.
|
||||
-- Retrieve the data needed by the indexer for updating the place.
|
||||
--
|
||||
-- \returns Place ID of parent object or NULL if none
|
||||
--
|
||||
-- Copy data from linked items (POIs on ways, addr:street links, relations).
|
||||
--
|
||||
CREATE OR REPLACE FUNCTION find_parent_for_poi(poi_osm_type CHAR(1),
|
||||
poi_osm_id BIGINT,
|
||||
poi_partition SMALLINT,
|
||||
bbox GEOMETRY,
|
||||
addr_street TEXT,
|
||||
addr_place TEXT,
|
||||
fallback BOOL = true)
|
||||
-- Return parameters:
|
||||
-- name list of names
|
||||
-- address list of address tags, either from the object or a surrounding
|
||||
-- building
|
||||
-- country_feature If the place is a country feature, this contains the
|
||||
-- country code, otherwise it is null.
|
||||
CREATE OR REPLACE FUNCTION placex_prepare_update(p placex,
|
||||
OUT name HSTORE,
|
||||
OUT address HSTORE,
|
||||
OUT country_feature VARCHAR)
|
||||
AS $$
|
||||
BEGIN
|
||||
-- For POI nodes, check if the address should be derived from a surrounding
|
||||
-- building.
|
||||
IF p.rank_search < 30 OR p.osm_type != 'N' OR p.address is not null THEN
|
||||
address := p.address;
|
||||
ELSE
|
||||
-- The additional && condition works around the misguided query
|
||||
-- planner of postgis 3.0.
|
||||
SELECT placex.address || hstore('_inherited', '') INTO address
|
||||
FROM placex
|
||||
WHERE ST_Covers(geometry, p.centroid)
|
||||
and geometry && p.centroid
|
||||
and placex.address is not null
|
||||
and (placex.address ? 'housenumber' or placex.address ? 'street' or placex.address ? 'place')
|
||||
and rank_search = 30 AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')
|
||||
LIMIT 1;
|
||||
END IF;
|
||||
|
||||
address := address - '_unlisted_place'::TEXT;
|
||||
name := p.name;
|
||||
|
||||
country_feature := CASE WHEN p.admin_level = 2
|
||||
and p.class = 'boundary' and p.type = 'administrative'
|
||||
and p.osm_type = 'R'
|
||||
THEN p.country_code
|
||||
ELSE null
|
||||
END;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION find_associated_street(poi_osm_type CHAR(1),
|
||||
poi_osm_id BIGINT)
|
||||
RETURNS BIGINT
|
||||
AS $$
|
||||
DECLARE
|
||||
parent_place_id BIGINT DEFAULT NULL;
|
||||
location RECORD;
|
||||
parent RECORD;
|
||||
BEGIN
|
||||
{% if debug %}RAISE WARNING 'finding street for % %', poi_osm_type, poi_osm_id;{% endif %}
|
||||
|
||||
-- Is this object part of an associatedStreet relation?
|
||||
FOR location IN
|
||||
SELECT members FROM planet_osm_rels
|
||||
WHERE parts @> ARRAY[poi_osm_id]
|
||||
@ -43,26 +73,54 @@ BEGIN
|
||||
END LOOP;
|
||||
END LOOP;
|
||||
|
||||
RETURN NULL;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
-- Find the parent road of a POI.
|
||||
--
|
||||
-- \returns Place ID of parent object or NULL if none
|
||||
--
|
||||
-- Copy data from linked items (POIs on ways, addr:street links, relations).
|
||||
--
|
||||
CREATE OR REPLACE FUNCTION find_parent_for_poi(poi_osm_type CHAR(1),
|
||||
poi_osm_id BIGINT,
|
||||
poi_partition SMALLINT,
|
||||
bbox GEOMETRY,
|
||||
addr_street INTEGER[],
|
||||
addr_place INTEGER[],
|
||||
is_place_addr BOOLEAN)
|
||||
RETURNS BIGINT
|
||||
AS $$
|
||||
DECLARE
|
||||
parent_place_id BIGINT DEFAULT NULL;
|
||||
location RECORD;
|
||||
BEGIN
|
||||
{% if debug %}RAISE WARNING 'finding street for % %', poi_osm_type, poi_osm_id;{% endif %}
|
||||
|
||||
-- Is this object part of an associatedStreet relation?
|
||||
parent_place_id := find_associated_street(poi_osm_type, poi_osm_id);
|
||||
|
||||
IF parent_place_id is null THEN
|
||||
parent_place_id := find_parent_for_address(addr_street, addr_place,
|
||||
poi_partition, bbox);
|
||||
IF parent_place_id is not null THEN
|
||||
RETURN parent_place_id;
|
||||
END IF;
|
||||
|
||||
IF poi_osm_type = 'N' THEN
|
||||
IF parent_place_id is null and poi_osm_type = 'N' THEN
|
||||
-- Is this node part of an interpolation?
|
||||
FOR parent IN
|
||||
FOR location IN
|
||||
SELECT q.parent_place_id
|
||||
FROM location_property_osmline q, planet_osm_ways x
|
||||
WHERE q.linegeo && bbox and x.id = q.osm_id
|
||||
and poi_osm_id = any(x.nodes)
|
||||
LIMIT 1
|
||||
LOOP
|
||||
{% if debug %}RAISE WARNING 'Get parent from interpolation: %', parent.parent_place_id;{% endif %}
|
||||
RETURN parent.parent_place_id;
|
||||
{% if debug %}RAISE WARNING 'Get parent from interpolation: %', location.parent_place_id;{% endif %}
|
||||
RETURN location.parent_place_id;
|
||||
END LOOP;
|
||||
|
||||
-- Is this node part of any other way?
|
||||
FOR location IN
|
||||
SELECT p.place_id, p.osm_id, p.rank_search, p.address,
|
||||
coalesce(p.centroid, ST_Centroid(p.geometry)) as centroid
|
||||
@ -76,23 +134,15 @@ BEGIN
|
||||
-- Way IS a road then we are on it - that must be our road
|
||||
IF location.rank_search < 28 THEN
|
||||
{% if debug %}RAISE WARNING 'node in way that is a street %',location;{% endif %}
|
||||
return location.place_id;
|
||||
RETURN location.place_id;
|
||||
END IF;
|
||||
|
||||
SELECT find_parent_for_poi('W', location.osm_id, poi_partition,
|
||||
location.centroid,
|
||||
location.address->'street',
|
||||
location.address->'place',
|
||||
false)
|
||||
INTO parent_place_id;
|
||||
IF parent_place_id is not null THEN
|
||||
RETURN parent_place_id;
|
||||
END IF;
|
||||
parent_place_id := find_associated_street('W', location.osm_id);
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
IF fallback THEN
|
||||
IF addr_street is null and addr_place is not null THEN
|
||||
IF parent_place_id is NULL THEN
|
||||
IF is_place_addr THEN
|
||||
-- The address is attached to a place we don't know.
|
||||
-- Instead simply use the containing area with the largest rank.
|
||||
FOR location IN
|
||||
@ -240,6 +290,101 @@ $$
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION create_poi_search_terms(obj_place_id BIGINT,
|
||||
in_partition SMALLINT,
|
||||
parent_place_id BIGINT,
|
||||
is_place_addr BOOLEAN,
|
||||
country TEXT,
|
||||
token_info JSONB,
|
||||
geometry GEOMETRY,
|
||||
OUT name_vector INTEGER[],
|
||||
OUT nameaddress_vector INTEGER[])
|
||||
AS $$
|
||||
DECLARE
|
||||
parent_name_vector INTEGER[];
|
||||
parent_address_vector INTEGER[];
|
||||
addr_place_ids INTEGER[];
|
||||
hnr_vector INTEGER[];
|
||||
|
||||
addr_item RECORD;
|
||||
addr_place RECORD;
|
||||
parent_address_place_ids BIGINT[];
|
||||
BEGIN
|
||||
nameaddress_vector := '{}'::INTEGER[];
|
||||
|
||||
SELECT s.name_vector, s.nameaddress_vector
|
||||
INTO parent_name_vector, parent_address_vector
|
||||
FROM search_name s
|
||||
WHERE s.place_id = parent_place_id;
|
||||
|
||||
FOR addr_item IN
|
||||
SELECT (get_addr_tag_rank(key, country)).*, match_tokens, search_tokens
|
||||
FROM token_get_address_tokens(token_info)
|
||||
WHERE not search_tokens <@ parent_address_vector
|
||||
LOOP
|
||||
addr_place := get_address_place(in_partition, geometry,
|
||||
addr_item.from_rank, addr_item.to_rank,
|
||||
addr_item.extent, addr_item.match_tokens);
|
||||
|
||||
IF addr_place is null THEN
|
||||
-- No place found in OSM that matches. Make it at least searchable.
|
||||
nameaddress_vector := array_merge(nameaddress_vector, addr_item.search_tokens);
|
||||
ELSE
|
||||
IF parent_address_place_ids is null THEN
|
||||
SELECT array_agg(parent_place_id) INTO parent_address_place_ids
|
||||
FROM place_addressline
|
||||
WHERE place_id = parent_place_id;
|
||||
END IF;
|
||||
|
||||
-- If the parent already lists the place in place_address line, then we
|
||||
-- are done. Otherwise, add its own place_address line.
|
||||
IF not parent_address_place_ids @> ARRAY[addr_place.place_id] THEN
|
||||
nameaddress_vector := array_merge(nameaddress_vector, addr_place.keywords);
|
||||
|
||||
INSERT INTO place_addressline (place_id, address_place_id, fromarea,
|
||||
isaddress, distance, cached_rank_address)
|
||||
VALUES (obj_place_id, addr_place.place_id, not addr_place.isguess,
|
||||
true, addr_place.distance, addr_place.rank_address);
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
|
||||
name_vector := token_get_name_search_tokens(token_info);
|
||||
|
||||
-- Check if the parent covers all address terms.
|
||||
-- If not, create a search name entry with the house number as the name.
|
||||
-- This is unusual for the search_name table but prevents that the place
|
||||
-- is returned when we only search for the street/place.
|
||||
|
||||
hnr_vector := token_get_housenumber_search_tokens(token_info);
|
||||
|
||||
IF hnr_vector is not null and not nameaddress_vector <@ parent_address_vector THEN
|
||||
name_vector := array_merge(name_vector, hnr_vector);
|
||||
END IF;
|
||||
|
||||
IF is_place_addr THEN
|
||||
addr_place_ids := token_addr_place_search_tokens(token_info);
|
||||
IF not addr_place_ids <@ parent_name_vector THEN
|
||||
-- make sure addr:place terms are always searchable
|
||||
nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids);
|
||||
-- If there is a housenumber, also add the place name as a name,
|
||||
-- so we can search it by the usual housenumber+place algorithms.
|
||||
IF hnr_vector is not null THEN
|
||||
name_vector := array_merge(name_vector, addr_place_ids);
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
-- Cheating here by not recomputing all terms but simply using the ones
|
||||
-- from the parent object.
|
||||
nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector);
|
||||
nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector);
|
||||
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
-- Insert address of a place into the place_addressline table.
|
||||
--
|
||||
-- \param obj_place_id Place_id of the place to compute the address for.
|
||||
@ -260,7 +405,7 @@ LANGUAGE plpgsql STABLE;
|
||||
CREATE OR REPLACE FUNCTION insert_addresslines(obj_place_id BIGINT,
|
||||
partition SMALLINT,
|
||||
maxrank SMALLINT,
|
||||
address HSTORE,
|
||||
token_info JSONB,
|
||||
geometry GEOMETRY,
|
||||
country TEXT,
|
||||
OUT parent_place_id BIGINT,
|
||||
@ -275,7 +420,8 @@ DECLARE
|
||||
current_node_area GEOMETRY := NULL;
|
||||
|
||||
parent_place_rank INT := 0;
|
||||
addr_place_ids BIGINT[];
|
||||
addr_place_ids BIGINT[] := '{}'::int[];
|
||||
new_address_vector INT[];
|
||||
|
||||
location RECORD;
|
||||
BEGIN
|
||||
@ -285,16 +431,21 @@ BEGIN
|
||||
address_havelevel := array_fill(false, ARRAY[maxrank]);
|
||||
|
||||
FOR location IN
|
||||
SELECT * FROM get_places_for_addr_tags(partition, geometry,
|
||||
address, country)
|
||||
SELECT (get_address_place(partition, geometry, from_rank, to_rank,
|
||||
extent, match_tokens)).*, search_tokens
|
||||
FROM (SELECT (get_addr_tag_rank(key, country)).*, match_tokens, search_tokens
|
||||
FROM token_get_address_tokens(token_info)) x
|
||||
ORDER BY rank_address, distance, isguess desc
|
||||
LOOP
|
||||
IF location.place_id is null THEN
|
||||
{% if not db.reverse_only %}
|
||||
nameaddress_vector := array_merge(nameaddress_vector,
|
||||
location.keywords::int[]);
|
||||
nameaddress_vector := array_merge(nameaddress_vector, location.search_tokens);
|
||||
{% endif %}
|
||||
ELSE
|
||||
{% if not db.reverse_only %}
|
||||
nameaddress_vector := array_merge(nameaddress_vector, location.keywords::INTEGER[]);
|
||||
{% endif %}
|
||||
|
||||
IF location.place_id is not null THEN
|
||||
location_isaddress := not address_havelevel[location.rank_address];
|
||||
IF not address_havelevel[location.rank_address] THEN
|
||||
address_havelevel[location.rank_address] := true;
|
||||
@ -309,13 +460,13 @@ BEGIN
|
||||
VALUES (obj_place_id, location.place_id, not location.isguess,
|
||||
true, location.distance, location.rank_address);
|
||||
|
||||
addr_place_ids := array_append(addr_place_ids, location.place_id);
|
||||
addr_place_ids := addr_place_ids || location.place_id;
|
||||
END IF;
|
||||
END LOOP;
|
||||
|
||||
FOR location IN
|
||||
SELECT * FROM getNearFeatures(partition, geometry, maxrank)
|
||||
WHERE addr_place_ids is null or not addr_place_ids @> ARRAY[place_id]
|
||||
WHERE not addr_place_ids @> ARRAY[place_id]
|
||||
ORDER BY rank_address, isguess asc,
|
||||
distance *
|
||||
CASE WHEN rank_address = 16 AND rank_search = 15 THEN 0.2
|
||||
@ -397,10 +548,11 @@ BEGIN
|
||||
NEW.place_id := nextval('seq_place');
|
||||
NEW.indexed_status := 1; --STATUS_NEW
|
||||
|
||||
NEW.country_code := lower(get_country_code(NEW.geometry));
|
||||
NEW.centroid := ST_PointOnSurface(NEW.geometry);
|
||||
NEW.country_code := lower(get_country_code(NEW.centroid));
|
||||
|
||||
NEW.partition := get_partition(NEW.country_code);
|
||||
NEW.geometry_sector := geometry_sector(NEW.partition, NEW.geometry);
|
||||
NEW.geometry_sector := geometry_sector(NEW.partition, NEW.centroid);
|
||||
|
||||
IF NEW.osm_type = 'X' THEN
|
||||
-- E'X'ternal records should already be in the right format so do nothing
|
||||
@ -522,8 +674,8 @@ DECLARE
|
||||
parent_address_level SMALLINT;
|
||||
place_address_level SMALLINT;
|
||||
|
||||
addr_street TEXT;
|
||||
addr_place TEXT;
|
||||
addr_street INTEGER[];
|
||||
addr_place INTEGER[];
|
||||
|
||||
max_rank SMALLINT;
|
||||
|
||||
@ -531,12 +683,11 @@ DECLARE
|
||||
nameaddress_vector INTEGER[];
|
||||
addr_nameaddress_vector INTEGER[];
|
||||
|
||||
inherited_address HSTORE;
|
||||
|
||||
linked_node_id BIGINT;
|
||||
linked_importance FLOAT;
|
||||
linked_wikipedia TEXT;
|
||||
|
||||
is_place_address BOOLEAN;
|
||||
result BOOLEAN;
|
||||
BEGIN
|
||||
-- deferred delete
|
||||
@ -566,9 +717,9 @@ BEGIN
|
||||
-- update not necessary for osmline, cause linked_place_id does not exist
|
||||
|
||||
NEW.extratags := NEW.extratags - 'linked_place'::TEXT;
|
||||
NEW.address := NEW.address - '_unlisted_place'::TEXT;
|
||||
|
||||
IF NEW.linked_place_id is not null THEN
|
||||
NEW.token_info := null;
|
||||
{% if debug %}RAISE WARNING 'place already linked to %', NEW.linked_place_id;{% endif %}
|
||||
RETURN NEW;
|
||||
END IF;
|
||||
@ -579,13 +730,34 @@ BEGIN
|
||||
-- imported as place=postcode. That's why relations are allowed to pass here.
|
||||
-- This can go away in a couple of versions.
|
||||
IF NEW.class = 'place' and NEW.type = 'postcode' and NEW.osm_type != 'R' THEN
|
||||
NEW.token_info := null;
|
||||
RETURN NEW;
|
||||
END IF;
|
||||
|
||||
-- Speed up searches - just use the centroid of the feature
|
||||
-- cheaper but less acurate
|
||||
-- Compute a preliminary centroid.
|
||||
NEW.centroid := ST_PointOnSurface(NEW.geometry);
|
||||
{% if debug %}RAISE WARNING 'Computing preliminary centroid at %',ST_AsText(NEW.centroid);{% endif %}
|
||||
|
||||
-- recalculate country and partition
|
||||
IF NEW.rank_search = 4 AND NEW.address is not NULL AND NEW.address ? 'country' THEN
|
||||
-- for countries, believe the mapped country code,
|
||||
-- so that we remain in the right partition if the boundaries
|
||||
-- suddenly expand.
|
||||
NEW.country_code := lower(NEW.address->'country');
|
||||
NEW.partition := get_partition(lower(NEW.country_code));
|
||||
IF NEW.partition = 0 THEN
|
||||
NEW.country_code := lower(get_country_code(NEW.centroid));
|
||||
NEW.partition := get_partition(NEW.country_code);
|
||||
END IF;
|
||||
ELSE
|
||||
IF NEW.rank_search >= 4 THEN
|
||||
NEW.country_code := lower(get_country_code(NEW.centroid));
|
||||
ELSE
|
||||
NEW.country_code := NULL;
|
||||
END IF;
|
||||
NEW.partition := get_partition(NEW.country_code);
|
||||
END IF;
|
||||
{% if debug %}RAISE WARNING 'Country updated: "%"', NEW.country_code;{% endif %}
|
||||
|
||||
|
||||
-- recompute the ranks, they might change when linking changes
|
||||
SELECT * INTO NEW.rank_search, NEW.rank_address
|
||||
@ -665,54 +837,12 @@ BEGIN
|
||||
parent_address_level := 3;
|
||||
END IF;
|
||||
|
||||
{% if debug %}RAISE WARNING 'Copy over address tags';{% endif %}
|
||||
-- housenumber is a computed field, so start with an empty value
|
||||
NEW.housenumber := NULL;
|
||||
IF NEW.address is not NULL THEN
|
||||
IF NEW.address ? 'conscriptionnumber' THEN
|
||||
IF NEW.address ? 'streetnumber' THEN
|
||||
NEW.housenumber := (NEW.address->'conscriptionnumber') || '/' || (NEW.address->'streetnumber');
|
||||
ELSE
|
||||
NEW.housenumber := NEW.address->'conscriptionnumber';
|
||||
END IF;
|
||||
ELSEIF NEW.address ? 'streetnumber' THEN
|
||||
NEW.housenumber := NEW.address->'streetnumber';
|
||||
ELSEIF NEW.address ? 'housenumber' THEN
|
||||
NEW.housenumber := NEW.address->'housenumber';
|
||||
END IF;
|
||||
NEW.housenumber := create_housenumber_id(NEW.housenumber);
|
||||
|
||||
addr_street := NEW.address->'street';
|
||||
addr_place := NEW.address->'place';
|
||||
|
||||
IF NEW.address ? 'postcode' and NEW.address->'postcode' not similar to '%(:|,|;)%' THEN
|
||||
i := getorcreate_postcode_id(NEW.address->'postcode');
|
||||
END IF;
|
||||
END IF;
|
||||
NEW.housenumber := token_normalized_housenumber(NEW.token_info);
|
||||
addr_street := token_addr_street_match_tokens(NEW.token_info);
|
||||
addr_place := token_addr_place_match_tokens(NEW.token_info);
|
||||
|
||||
NEW.postcode := null;
|
||||
|
||||
-- recalculate country and partition
|
||||
IF NEW.rank_search = 4 AND NEW.address is not NULL AND NEW.address ? 'country' THEN
|
||||
-- for countries, believe the mapped country code,
|
||||
-- so that we remain in the right partition if the boundaries
|
||||
-- suddenly expand.
|
||||
NEW.country_code := lower(NEW.address->'country');
|
||||
NEW.partition := get_partition(lower(NEW.country_code));
|
||||
IF NEW.partition = 0 THEN
|
||||
NEW.country_code := lower(get_country_code(NEW.centroid));
|
||||
NEW.partition := get_partition(NEW.country_code);
|
||||
END IF;
|
||||
ELSE
|
||||
IF NEW.rank_search >= 4 THEN
|
||||
NEW.country_code := lower(get_country_code(NEW.centroid));
|
||||
ELSE
|
||||
NEW.country_code := NULL;
|
||||
END IF;
|
||||
NEW.partition := get_partition(NEW.country_code);
|
||||
END IF;
|
||||
{% if debug %}RAISE WARNING 'Country updated: "%"', NEW.country_code;{% endif %}
|
||||
|
||||
-- waterway ways are linked when they are part of a relation and have the same class/type
|
||||
IF NEW.osm_type = 'R' and NEW.class = 'waterway' THEN
|
||||
FOR relation_members IN select members from planet_osm_rels r where r.id = NEW.osm_id and r.parts != array[]::bigint[]
|
||||
@ -749,33 +879,14 @@ BEGIN
|
||||
|
||||
{% if debug %}RAISE WARNING 'finding street for % %', NEW.osm_type, NEW.osm_id;{% endif %}
|
||||
NEW.parent_place_id := null;
|
||||
|
||||
-- if we have a POI and there is no address information,
|
||||
-- see if we can get it from a surrounding building
|
||||
inherited_address := ''::HSTORE;
|
||||
IF NEW.osm_type = 'N' AND addr_street IS NULL AND addr_place IS NULL
|
||||
AND NEW.housenumber IS NULL THEN
|
||||
FOR location IN
|
||||
-- The additional && condition works around the misguided query
|
||||
-- planner of postgis 3.0.
|
||||
SELECT address from placex where ST_Covers(geometry, NEW.centroid)
|
||||
and geometry && NEW.centroid
|
||||
and (address ? 'housenumber' or address ? 'street' or address ? 'place')
|
||||
and rank_search > 28 AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')
|
||||
limit 1
|
||||
LOOP
|
||||
NEW.housenumber := location.address->'housenumber';
|
||||
addr_street := location.address->'street';
|
||||
addr_place := location.address->'place';
|
||||
inherited_address := location.address;
|
||||
END LOOP;
|
||||
END IF;
|
||||
is_place_address := coalesce(not NEW.address ? 'street' and NEW.address ? 'place', FALSE);
|
||||
|
||||
-- We have to find our parent road.
|
||||
NEW.parent_place_id := find_parent_for_poi(NEW.osm_type, NEW.osm_id,
|
||||
NEW.partition,
|
||||
ST_Envelope(NEW.geometry),
|
||||
addr_street, addr_place);
|
||||
addr_street, addr_place,
|
||||
is_place_address);
|
||||
|
||||
-- If we found the road take a shortcut here.
|
||||
-- Otherwise fall back to the full address getting method below.
|
||||
@ -785,12 +896,12 @@ BEGIN
|
||||
SELECT p.country_code, p.postcode, p.name FROM placex p
|
||||
WHERE p.place_id = NEW.parent_place_id INTO location;
|
||||
|
||||
IF addr_street is null and addr_place is not null THEN
|
||||
IF is_place_address THEN
|
||||
-- Check if the addr:place tag is part of the parent name
|
||||
SELECT count(*) INTO i
|
||||
FROM svals(location.name) AS pname WHERE pname = addr_place;
|
||||
FROM svals(location.name) AS pname WHERE pname = NEW.address->'place';
|
||||
IF i = 0 THEN
|
||||
NEW.address = NEW.address || hstore('_unlisted_place', addr_place);
|
||||
NEW.address = NEW.address || hstore('_unlisted_place', NEW.address->'place');
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
@ -798,39 +909,21 @@ BEGIN
|
||||
{% if debug %}RAISE WARNING 'Got parent details from search name';{% endif %}
|
||||
|
||||
-- determine postcode
|
||||
IF NEW.address is not null AND NEW.address ? 'postcode' THEN
|
||||
NEW.postcode = upper(trim(NEW.address->'postcode'));
|
||||
ELSE
|
||||
NEW.postcode := location.postcode;
|
||||
END IF;
|
||||
IF NEW.postcode is null THEN
|
||||
NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry);
|
||||
END IF;
|
||||
NEW.postcode := coalesce(token_normalized_postcode(NEW.address->'postcode'),
|
||||
location.postcode,
|
||||
get_nearest_postcode(NEW.country_code, NEW.geometry));
|
||||
|
||||
IF NEW.name is not NULL THEN
|
||||
NEW.name := add_default_place_name(NEW.country_code, NEW.name);
|
||||
name_vector := make_keywords(NEW.name);
|
||||
|
||||
IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN
|
||||
result := add_location(NEW.place_id, NEW.country_code, NEW.partition,
|
||||
name_vector, NEW.rank_search, NEW.rank_address,
|
||||
upper(trim(NEW.address->'postcode')), NEW.geometry,
|
||||
NEW.centroid);
|
||||
{% if debug %}RAISE WARNING 'Place added to location table';{% endif %}
|
||||
END IF;
|
||||
|
||||
END IF;
|
||||
|
||||
{% if not db.reverse_only %}
|
||||
IF array_length(name_vector, 1) is not NULL
|
||||
OR inherited_address is not NULL OR NEW.address is not NULL
|
||||
THEN
|
||||
IF NEW.name is not NULL OR NEW.address is not NULL THEN
|
||||
SELECT * INTO name_vector, nameaddress_vector
|
||||
FROM create_poi_search_terms(NEW.place_id,
|
||||
NEW.partition, NEW.parent_place_id,
|
||||
inherited_address || NEW.address,
|
||||
NEW.country_code, NEW.housenumber,
|
||||
name_vector, NEW.centroid);
|
||||
is_place_address, NEW.country_code,
|
||||
NEW.token_info, NEW.centroid);
|
||||
|
||||
IF array_length(name_vector, 1) is not NULL THEN
|
||||
INSERT INTO search_name (place_id, search_rank, address_rank,
|
||||
@ -844,6 +937,17 @@ BEGIN
|
||||
END IF;
|
||||
{% endif %}
|
||||
|
||||
NEW.token_info := token_strip_info(NEW.token_info);
|
||||
-- If the address was inherited from a surrounding building,
|
||||
-- do not add it permanently to the table.
|
||||
IF NEW.address ? '_inherited' THEN
|
||||
IF NEW.address ? '_unlisted_place' THEN
|
||||
NEW.address := hstore('_unlisted_place', NEW.address->'_unlisted_place');
|
||||
ELSE
|
||||
NEW.address := null;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
RETURN NEW;
|
||||
END IF;
|
||||
|
||||
@ -914,19 +1018,11 @@ BEGIN
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
-- Initialise the name vector using our name
|
||||
NEW.name := add_default_place_name(NEW.country_code, NEW.name);
|
||||
name_vector := make_keywords(NEW.name);
|
||||
|
||||
-- make sure all names are in the word table
|
||||
IF NEW.admin_level = 2
|
||||
AND NEW.class = 'boundary' AND NEW.type = 'administrative'
|
||||
AND NEW.country_code IS NOT NULL AND NEW.osm_type = 'R'
|
||||
THEN
|
||||
PERFORM create_country(NEW.name, lower(NEW.country_code));
|
||||
{% if debug %}RAISE WARNING 'Country names updated';{% endif %}
|
||||
|
||||
-- Also update the list of country names. Adding an additional sanity
|
||||
-- Update the list of country names. Adding an additional sanity
|
||||
-- check here: make sure the country does overlap with the area where
|
||||
-- we expect it to be as per static country grid.
|
||||
FOR location IN
|
||||
@ -959,29 +1055,28 @@ BEGIN
|
||||
ELSEIF NEW.rank_address > 25 THEN
|
||||
max_rank := 25;
|
||||
ELSE
|
||||
max_rank = NEW.rank_address;
|
||||
max_rank := NEW.rank_address;
|
||||
END IF;
|
||||
|
||||
SELECT * FROM insert_addresslines(NEW.place_id, NEW.partition, max_rank,
|
||||
NEW.address, geom, NEW.country_code)
|
||||
NEW.token_info, geom, NEW.country_code)
|
||||
INTO NEW.parent_place_id, NEW.postcode, nameaddress_vector;
|
||||
|
||||
{% if debug %}RAISE WARNING 'RETURN insert_addresslines: %, %, %', NEW.parent_place_id, NEW.postcode, nameaddress_vector;{% endif %}
|
||||
|
||||
IF NEW.address is not null AND NEW.address ? 'postcode'
|
||||
AND NEW.address->'postcode' not similar to '%(,|;)%' THEN
|
||||
NEW.postcode := upper(trim(NEW.address->'postcode'));
|
||||
END IF;
|
||||
|
||||
IF NEW.postcode is null AND NEW.rank_search > 8 THEN
|
||||
NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry);
|
||||
END IF;
|
||||
NEW.postcode := coalesce(token_normalized_postcode(NEW.address->'postcode'),
|
||||
NEW.postcode);
|
||||
|
||||
-- if we have a name add this to the name search table
|
||||
IF NEW.name IS NOT NULL THEN
|
||||
-- Initialise the name vector using our name
|
||||
NEW.name := add_default_place_name(NEW.country_code, NEW.name);
|
||||
name_vector := token_get_name_search_tokens(NEW.token_info);
|
||||
|
||||
IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN
|
||||
result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, upper(trim(NEW.address->'postcode')), NEW.geometry, NEW.centroid);
|
||||
result := add_location(NEW.place_id, NEW.country_code, NEW.partition,
|
||||
name_vector, NEW.rank_search, NEW.rank_address,
|
||||
NEW.postcode, NEW.geometry, NEW.centroid);
|
||||
{% if debug %}RAISE WARNING 'added to location (full)';{% endif %}
|
||||
END IF;
|
||||
|
||||
@ -990,8 +1085,11 @@ BEGIN
|
||||
{% if debug %}RAISE WARNING 'insert into road location table (full)';{% endif %}
|
||||
END IF;
|
||||
|
||||
result := insertSearchName(NEW.partition, NEW.place_id, name_vector,
|
||||
IF NEW.rank_address between 16 and 27 THEN
|
||||
result := insertSearchName(NEW.partition, NEW.place_id,
|
||||
token_get_name_match_tokens(NEW.token_info),
|
||||
NEW.rank_search, NEW.rank_address, NEW.geometry);
|
||||
END IF;
|
||||
{% if debug %}RAISE WARNING 'added to search name (full)';{% endif %}
|
||||
|
||||
{% if not db.reverse_only %}
|
||||
@ -1002,11 +1100,15 @@ BEGIN
|
||||
NEW.importance, NEW.country_code, name_vector,
|
||||
nameaddress_vector, NEW.centroid);
|
||||
{% endif %}
|
||||
END IF;
|
||||
|
||||
IF NEW.postcode is null AND NEW.rank_search > 8 THEN
|
||||
NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry);
|
||||
END IF;
|
||||
|
||||
{% if debug %}RAISE WARNING 'place update % % finsihed.', NEW.osm_type, NEW.osm_id;{% endif %}
|
||||
|
||||
NEW.token_info := token_strip_info(NEW.token_info);
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$
|
||||
|
@ -221,39 +221,32 @@ LANGUAGE plpgsql STABLE;
|
||||
-- \param centroid Location of the address.
|
||||
--
|
||||
-- \return Place ID of the parent if one was found, NULL otherwise.
|
||||
CREATE OR REPLACE FUNCTION find_parent_for_address(street TEXT, place TEXT,
|
||||
CREATE OR REPLACE FUNCTION find_parent_for_address(street INTEGER[], place INTEGER[],
|
||||
partition SMALLINT,
|
||||
centroid GEOMETRY)
|
||||
RETURNS BIGINT
|
||||
AS $$
|
||||
DECLARE
|
||||
parent_place_id BIGINT;
|
||||
word_ids INTEGER[];
|
||||
BEGIN
|
||||
IF street is not null THEN
|
||||
-- Check for addr:street attributes
|
||||
-- Note that addr:street links can only be indexed, once the street itself is indexed
|
||||
word_ids := word_ids_from_name(street);
|
||||
IF word_ids is not null THEN
|
||||
parent_place_id := getNearestNamedRoadPlaceId(partition, centroid, word_ids);
|
||||
parent_place_id := getNearestNamedRoadPlaceId(partition, centroid, street);
|
||||
IF parent_place_id is not null THEN
|
||||
{% if debug %}RAISE WARNING 'Get parent form addr:street: %', parent_place_id;{% endif %}
|
||||
RETURN parent_place_id;
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
-- Check for addr:place attributes.
|
||||
IF place is not null THEN
|
||||
word_ids := word_ids_from_name(place);
|
||||
IF word_ids is not null THEN
|
||||
parent_place_id := getNearestNamedPlacePlaceId(partition, centroid, word_ids);
|
||||
parent_place_id := getNearestNamedPlacePlaceId(partition, centroid, place);
|
||||
IF parent_place_id is not null THEN
|
||||
{% if debug %}RAISE WARNING 'Get parent form addr:place: %', parent_place_id;{% endif %}
|
||||
RETURN parent_place_id;
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
RETURN NULL;
|
||||
END;
|
||||
|
@ -1,9 +1,6 @@
|
||||
-- Indices used only during search and update.
|
||||
-- These indices are created only after the indexing process is done.
|
||||
|
||||
CREATE INDEX {{sql.if_index_not_exists}} idx_word_word_id
|
||||
ON word USING BTREE (word_id) {{db.tablespace.search_index}};
|
||||
|
||||
CREATE INDEX {{sql.if_index_not_exists}} idx_place_addressline_address_place_id
|
||||
ON place_addressline USING BTREE (address_place_id) {{db.tablespace.search_index}};
|
||||
|
||||
|
@ -43,22 +43,6 @@ CREATE TABLE nominatim_properties (
|
||||
);
|
||||
GRANT SELECT ON TABLE nominatim_properties TO "{{config.DATABASE_WEBUSER}}";
|
||||
|
||||
drop table IF EXISTS word;
|
||||
CREATE TABLE word (
|
||||
word_id INTEGER,
|
||||
word_token text,
|
||||
word text,
|
||||
class text,
|
||||
type text,
|
||||
country_code varchar(2),
|
||||
search_name_count INTEGER,
|
||||
operator TEXT
|
||||
) {{db.tablespace.search_data}};
|
||||
CREATE INDEX idx_word_word_token on word USING BTREE (word_token) {{db.tablespace.search_index}};
|
||||
GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}" ;
|
||||
DROP SEQUENCE IF EXISTS seq_word;
|
||||
CREATE SEQUENCE seq_word start 1;
|
||||
|
||||
drop table IF EXISTS location_area CASCADE;
|
||||
CREATE TABLE location_area (
|
||||
place_id BIGINT,
|
||||
@ -109,6 +93,7 @@ CREATE TABLE location_property_osmline (
|
||||
linegeo GEOMETRY,
|
||||
interpolationtype TEXT,
|
||||
address HSTORE,
|
||||
token_info JSONB, -- custom column for tokenizer use only
|
||||
postcode TEXT,
|
||||
country_code VARCHAR(2)
|
||||
){{db.tablespace.search_data}};
|
||||
@ -158,6 +143,7 @@ CREATE TABLE placex (
|
||||
indexed_status SMALLINT,
|
||||
LIKE place INCLUDING CONSTRAINTS,
|
||||
wikipedia TEXT, -- calculated wikipedia article name (language:title)
|
||||
token_info JSONB, -- custom column for tokenizer use only
|
||||
country_code varchar(2),
|
||||
housenumber TEXT,
|
||||
postcode TEXT,
|
||||
@ -168,6 +154,10 @@ CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) {{db.tabl
|
||||
CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) {{db.tablespace.address_index}} WHERE linked_place_id IS NOT NULL;
|
||||
CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}};
|
||||
CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) {{db.tablespace.search_index}};
|
||||
CREATE INDEX idx_placex_geometry_buildings ON placex
|
||||
USING GIST (geometry) {{db.tablespace.search_index}}
|
||||
WHERE address is not null and rank_search = 30
|
||||
and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');
|
||||
CREATE INDEX idx_placex_geometry_placenode ON placex
|
||||
USING GIST (geometry) {{db.tablespace.search_index}}
|
||||
WHERE osm_type = 'N' and rank_search < 26
|
||||
@ -178,7 +168,6 @@ DROP SEQUENCE IF EXISTS seq_place;
|
||||
CREATE SEQUENCE seq_place start 1;
|
||||
GRANT SELECT on placex to "{{config.DATABASE_WEBUSER}}" ;
|
||||
GRANT SELECT on place_addressline to "{{config.DATABASE_WEBUSER}}" ;
|
||||
GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}" ;
|
||||
GRANT SELECT ON planet_osm_ways to "{{config.DATABASE_WEBUSER}}" ;
|
||||
GRANT SELECT ON planet_osm_rels to "{{config.DATABASE_WEBUSER}}" ;
|
||||
GRANT SELECT on location_area to "{{config.DATABASE_WEBUSER}}" ;
|
||||
|
399
lib-sql/tokenizer/legacy_tokenizer.sql
Normal file
399
lib-sql/tokenizer/legacy_tokenizer.sql
Normal file
@ -0,0 +1,399 @@
|
||||
-- Get tokens used for searching the given place.
|
||||
--
|
||||
-- These are the tokens that will be saved in the search_name table.
|
||||
CREATE OR REPLACE FUNCTION token_get_name_search_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'names')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
-- Get tokens for matching the place name against others.
|
||||
--
|
||||
-- This should usually be restricted to full name tokens.
|
||||
CREATE OR REPLACE FUNCTION token_get_name_match_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'names')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
-- Return the housenumber tokens applicable for the place.
|
||||
CREATE OR REPLACE FUNCTION token_get_housenumber_search_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'hnr_tokens')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
-- Return the housenumber in the form that it can be matched during search.
|
||||
CREATE OR REPLACE FUNCTION token_normalized_housenumber(info JSONB)
|
||||
RETURNS TEXT
|
||||
AS $$
|
||||
SELECT info->>'hnr';
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_addr_street_match_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'street')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_addr_place_match_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'place_match')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_addr_place_search_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'place_search')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
DROP TYPE IF EXISTS token_addresstoken CASCADE;
|
||||
CREATE TYPE token_addresstoken AS (
|
||||
key TEXT,
|
||||
match_tokens INT[],
|
||||
search_tokens INT[]
|
||||
);
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_get_address_tokens(info JSONB)
|
||||
RETURNS SETOF token_addresstoken
|
||||
AS $$
|
||||
SELECT key, (value->>1)::int[] as match_tokens,
|
||||
(value->>0)::int[] as search_tokens
|
||||
FROM jsonb_each(info->'addr');
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
|
||||
RETURNS TEXT
|
||||
AS $$
|
||||
SELECT CASE WHEN postcode SIMILAR TO '%(,|;)%' THEN NULL ELSE upper(trim(postcode))END;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
-- Return token info that should be saved permanently in the database.
|
||||
CREATE OR REPLACE FUNCTION token_strip_info(info JSONB)
|
||||
RETURNS JSONB
|
||||
AS $$
|
||||
SELECT NULL::JSONB;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
--------------- private functions ----------------------------------------------
|
||||
|
||||
-- Functions for term normalisation and access to the 'word' table.
|
||||
|
||||
CREATE OR REPLACE FUNCTION transliteration(text) RETURNS text
|
||||
AS '{{ modulepath }}/nominatim.so', 'transliteration'
|
||||
LANGUAGE c IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION gettokenstring(text) RETURNS text
|
||||
AS '{{ modulepath }}/nominatim.so', 'gettokenstring'
|
||||
LANGUAGE c IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT
|
||||
AS $$
|
||||
DECLARE
|
||||
o TEXT;
|
||||
BEGIN
|
||||
o := public.gettokenstring(public.transliteration(name));
|
||||
RETURN trim(substr(o,1,length(o)));
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
-- returns NULL if the word is too common
|
||||
CREATE OR REPLACE FUNCTION getorcreate_word_id(lookup_word TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
count INTEGER;
|
||||
BEGIN
|
||||
lookup_token := trim(lookup_word);
|
||||
SELECT min(word_id), max(search_name_count) FROM word
|
||||
WHERE word_token = lookup_token and class is null and type is null
|
||||
INTO return_word_id, count;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, null, null, null, null, 0);
|
||||
ELSE
|
||||
IF count > {{ max_word_freq }} THEN
|
||||
return_word_id := NULL;
|
||||
END IF;
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
-- Create housenumber tokens from an OSM addr:housenumber.
|
||||
-- The housnumber is split at comma and semicolon as necessary.
|
||||
-- The function returns the normalized form of the housenumber suitable
|
||||
-- for comparison.
|
||||
CREATE OR REPLACE FUNCTION create_housenumbers(housenumbers TEXT[],
|
||||
OUT tokens TEXT,
|
||||
OUT normtext TEXT)
|
||||
AS $$
|
||||
BEGIN
|
||||
SELECT array_to_string(array_agg(trans), ';'), array_agg(tid)::TEXT
|
||||
INTO normtext, tokens
|
||||
FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word) as tid
|
||||
FROM (SELECT make_standard_name(h) as lookup_word
|
||||
FROM unnest(housenumbers) h) x) y;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql STABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_token := ' ' || trim(lookup_word);
|
||||
SELECT min(word_id) FROM word
|
||||
WHERE word_token = lookup_token and class='place' and type='house'
|
||||
INTO return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, null,
|
||||
'place', 'house', null, 0);
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION create_postcode_id(postcode TEXT)
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
DECLARE
|
||||
r RECORD;
|
||||
lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_token := ' ' || make_standard_name(postcode);
|
||||
FOR r IN
|
||||
SELECT word_id FROM word
|
||||
WHERE word_token = lookup_token and word = postcode
|
||||
and class='place' and type='postcode'
|
||||
LOOP
|
||||
RETURN false;
|
||||
END LOOP;
|
||||
|
||||
INSERT INTO word VALUES (nextval('seq_word'), lookup_token, postcode,
|
||||
'place', 'postcode', null, 0);
|
||||
RETURN true;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT, src_word TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
nospace_lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_token := ' '||trim(lookup_word);
|
||||
SELECT min(word_id) FROM word
|
||||
WHERE word_token = lookup_token and class is null and type is null
|
||||
INTO return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, src_word,
|
||||
null, null, null, 0);
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
-- Normalize a string and lookup its word ids (partial words).
|
||||
CREATE OR REPLACE FUNCTION addr_ids_from_name(lookup_word TEXT)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
DECLARE
|
||||
words TEXT[];
|
||||
id INTEGER;
|
||||
return_word_id INTEGER[];
|
||||
word_ids INTEGER[];
|
||||
j INTEGER;
|
||||
BEGIN
|
||||
words := string_to_array(make_standard_name(lookup_word), ' ');
|
||||
IF array_upper(words, 1) IS NOT NULL THEN
|
||||
FOR j IN 1..array_upper(words, 1) LOOP
|
||||
IF (words[j] != '') THEN
|
||||
SELECT array_agg(word_id) INTO word_ids
|
||||
FROM word
|
||||
WHERE word_token = words[j] and class is null and type is null;
|
||||
|
||||
IF word_ids IS NULL THEN
|
||||
id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (id, words[j], null, null, null, null, 0);
|
||||
return_word_id := return_word_id || id;
|
||||
ELSE
|
||||
return_word_id := array_merge(return_word_id, word_ids);
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
-- Normalize a string and look up its name ids (full words).
|
||||
CREATE OR REPLACE FUNCTION word_ids_from_name(lookup_word TEXT)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
return_word_ids INTEGER[];
|
||||
BEGIN
|
||||
lookup_token := ' '|| make_standard_name(lookup_word);
|
||||
SELECT array_agg(word_id) FROM word
|
||||
WHERE word_token = lookup_token and class is null and type is null
|
||||
INTO return_word_ids;
|
||||
RETURN return_word_ids;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION make_keywords(src HSTORE)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
DECLARE
|
||||
result INTEGER[];
|
||||
s TEXT;
|
||||
w INTEGER;
|
||||
words TEXT[];
|
||||
item RECORD;
|
||||
j INTEGER;
|
||||
BEGIN
|
||||
result := '{}'::INTEGER[];
|
||||
|
||||
FOR item IN SELECT (each(src)).* LOOP
|
||||
|
||||
s := make_standard_name(item.value);
|
||||
w := getorcreate_name_id(s, item.value);
|
||||
|
||||
IF not(ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
|
||||
w := getorcreate_word_id(s);
|
||||
|
||||
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
|
||||
words := string_to_array(s, ' ');
|
||||
IF array_upper(words, 1) IS NOT NULL THEN
|
||||
FOR j IN 1..array_upper(words, 1) LOOP
|
||||
IF (words[j] != '') THEN
|
||||
w = getorcreate_word_id(words[j]);
|
||||
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
words := regexp_split_to_array(item.value, E'[,;()]');
|
||||
IF array_upper(words, 1) != 1 THEN
|
||||
FOR j IN 1..array_upper(words, 1) LOOP
|
||||
s := make_standard_name(words[j]);
|
||||
IF s != '' THEN
|
||||
w := getorcreate_word_id(s);
|
||||
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
s := regexp_replace(item.value, '市$', '');
|
||||
IF s != item.value THEN
|
||||
s := make_standard_name(s);
|
||||
IF s != '' THEN
|
||||
w := getorcreate_name_id(s, item.value);
|
||||
IF NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
END LOOP;
|
||||
|
||||
RETURN result;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION precompute_words(src TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
s TEXT;
|
||||
w INTEGER;
|
||||
words TEXT[];
|
||||
i INTEGER;
|
||||
j INTEGER;
|
||||
BEGIN
|
||||
s := make_standard_name(src);
|
||||
w := getorcreate_name_id(s, src);
|
||||
|
||||
w := getorcreate_word_id(s);
|
||||
|
||||
words := string_to_array(s, ' ');
|
||||
IF array_upper(words, 1) IS NOT NULL THEN
|
||||
FOR j IN 1..array_upper(words, 1) LOOP
|
||||
IF (words[j] != '') THEN
|
||||
w := getorcreate_word_id(words[j]);
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
words := regexp_split_to_array(src, E'[,;()]');
|
||||
IF array_upper(words, 1) != 1 THEN
|
||||
FOR j IN 1..array_upper(words, 1) LOOP
|
||||
s := make_standard_name(words[j]);
|
||||
IF s != '' THEN
|
||||
w := getorcreate_word_id(s);
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
s := regexp_replace(src, '市$', '');
|
||||
IF s != src THEN
|
||||
s := make_standard_name(s);
|
||||
IF s != '' THEN
|
||||
w := getorcreate_name_id(s, src);
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
RETURN 1;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
2
lib-sql/tokenizer/legacy_tokenizer_indices.sql
Normal file
2
lib-sql/tokenizer/legacy_tokenizer_indices.sql
Normal file
@ -0,0 +1,2 @@
|
||||
CREATE INDEX {{sql.if_index_not_exists}} idx_word_word_id
|
||||
ON word USING BTREE (word_id) {{db.tablespace.search_index}};
|
19
lib-sql/tokenizer/legacy_tokenizer_tables.sql
Normal file
19
lib-sql/tokenizer/legacy_tokenizer_tables.sql
Normal file
@ -0,0 +1,19 @@
|
||||
DROP TABLE IF EXISTS word;
|
||||
CREATE TABLE word (
|
||||
word_id INTEGER,
|
||||
word_token text NOT NULL,
|
||||
word text,
|
||||
class text,
|
||||
type text,
|
||||
country_code varchar(2),
|
||||
search_name_count INTEGER,
|
||||
operator TEXT
|
||||
) {{db.tablespace.search_data}};
|
||||
|
||||
CREATE INDEX idx_word_word_token ON word
|
||||
USING BTREE (word_token) {{db.tablespace.search_index}};
|
||||
GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}";
|
||||
|
||||
DROP SEQUENCE IF EXISTS seq_word;
|
||||
CREATE SEQUENCE seq_word start 1;
|
||||
GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}";
|
@ -32,8 +32,11 @@ class UpdateIndex:
|
||||
@staticmethod
|
||||
def run(args):
|
||||
from ..indexer.indexer import Indexer
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
|
||||
indexer = Indexer(args.config.get_libpq_dsn(),
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||
|
||||
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
|
||||
args.threads or psutil.cpu_count() or 1)
|
||||
|
||||
if not args.no_boundaries:
|
||||
|
@ -46,6 +46,7 @@ class UpdateRefresh:
|
||||
@staticmethod
|
||||
def run(args):
|
||||
from ..tools import refresh
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
|
||||
if args.postcodes:
|
||||
LOG.warning("Update postcodes centroid")
|
||||
@ -66,6 +67,8 @@ class UpdateRefresh:
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
refresh.create_functions(conn, args.config,
|
||||
args.diffs, args.enable_debug_statements)
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||
tokenizer.update_sql_functions(args.config)
|
||||
|
||||
if args.wiki_data:
|
||||
data_path = Path(args.config.WIKIPEDIA_DATA_PATH
|
||||
|
@ -83,6 +83,7 @@ class UpdateReplication:
|
||||
def _update(args):
|
||||
from ..tools import replication
|
||||
from ..indexer.indexer import Indexer
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
|
||||
params = args.osm2pgsql_options(default_cache=2000, default_threads=1)
|
||||
params.update(base_url=args.config.REPLICATION_URL,
|
||||
@ -106,6 +107,8 @@ class UpdateReplication:
|
||||
raise UsageError("Bad argument '--no-index'.")
|
||||
recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
|
||||
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||
|
||||
while True:
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
start = dt.datetime.now(dt.timezone.utc)
|
||||
@ -116,7 +119,7 @@ class UpdateReplication:
|
||||
|
||||
if state is not replication.UpdateState.NO_CHANGES and args.do_index:
|
||||
index_start = dt.datetime.now(dt.timezone.utc)
|
||||
indexer = Indexer(args.config.get_libpq_dsn(),
|
||||
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
|
||||
args.threads or 1)
|
||||
indexer.index_boundaries(0, 30)
|
||||
indexer.index_by_rank(0, 30)
|
||||
|
@ -56,6 +56,7 @@ class SetupAll:
|
||||
from ..tools import refresh
|
||||
from ..indexer.indexer import Indexer
|
||||
from ..tools import postcodes
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
|
||||
if args.osm_file and not Path(args.osm_file).is_file():
|
||||
LOG.fatal("OSM file '%s' does not exist.", args.osm_file)
|
||||
@ -67,12 +68,6 @@ class SetupAll:
|
||||
args.no_partitions,
|
||||
rouser=args.config.DATABASE_WEBUSER)
|
||||
|
||||
LOG.warning('Installing database module')
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
database_import.install_module(args.module_dir, args.project_dir,
|
||||
args.config.DATABASE_MODULE_PATH,
|
||||
conn=conn)
|
||||
|
||||
LOG.warning('Importing OSM data file')
|
||||
database_import.import_osm_data(Path(args.osm_file),
|
||||
args.osm2pgsql_options(0, 1),
|
||||
@ -105,22 +100,31 @@ class SetupAll:
|
||||
if args.continue_at is None or args.continue_at == 'load-data':
|
||||
LOG.warning('Initialise tables')
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
database_import.truncate_data_tables(conn, args.config.MAX_WORD_FREQUENCY)
|
||||
database_import.truncate_data_tables(conn)
|
||||
|
||||
LOG.warning('Load data into placex table')
|
||||
database_import.load_data(args.config.get_libpq_dsn(),
|
||||
args.data_dir,
|
||||
args.threads or psutil.cpu_count() or 1)
|
||||
|
||||
LOG.warning("Setting up tokenizer")
|
||||
if args.continue_at is None or args.continue_at == 'load-data':
|
||||
# (re)initialise the tokenizer data
|
||||
tokenizer = tokenizer_factory.create_tokenizer(args.config)
|
||||
else:
|
||||
# just load the tokenizer
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||
|
||||
if args.continue_at is None or args.continue_at == 'load-data':
|
||||
LOG.warning('Calculate postcodes')
|
||||
postcodes.import_postcodes(args.config.get_libpq_dsn(), args.project_dir)
|
||||
postcodes.import_postcodes(args.config.get_libpq_dsn(), args.project_dir,
|
||||
tokenizer)
|
||||
|
||||
if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
|
||||
if args.continue_at is not None and args.continue_at != 'load-data':
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
SetupAll._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
|
||||
LOG.warning('Indexing places')
|
||||
indexer = Indexer(args.config.get_libpq_dsn(),
|
||||
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
|
||||
args.threads or psutil.cpu_count() or 1)
|
||||
indexer.index_full(analyse=not args.index_noanalyse)
|
||||
|
||||
@ -129,7 +133,9 @@ class SetupAll:
|
||||
database_import.create_search_indices(conn, args.config,
|
||||
drop=args.no_updates)
|
||||
LOG.warning('Create search index for default country names.')
|
||||
database_import.create_country_names(conn, args.config)
|
||||
database_import.create_country_names(conn, tokenizer,
|
||||
args.config.LANGUAGES)
|
||||
tokenizer.finalize_import(args.config)
|
||||
|
||||
webdir = args.project_dir / 'website'
|
||||
LOG.warning('Setup website at %s', webdir)
|
||||
|
@ -9,6 +9,8 @@ LOG = logging.getLogger()
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
class ImportSpecialPhrases:
|
||||
"""\
|
||||
@ -22,10 +24,13 @@ class ImportSpecialPhrases:
|
||||
|
||||
@staticmethod
|
||||
def run(args):
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
|
||||
if args.import_from_wiki:
|
||||
LOG.warning('Special phrases importation starting')
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||
with connect(args.config.get_libpq_dsn()) as db_connection:
|
||||
SpecialPhrasesImporter(
|
||||
args.config, args.phplib_dir, db_connection
|
||||
).import_from_wiki()
|
||||
).import_from_wiki(tokenizer)
|
||||
return 0
|
||||
|
@ -48,14 +48,14 @@ class DBConnection:
|
||||
""" A single non-blocking database connection.
|
||||
"""
|
||||
|
||||
def __init__(self, dsn):
|
||||
def __init__(self, dsn, cursor_factory=None):
|
||||
self.current_query = None
|
||||
self.current_params = None
|
||||
self.dsn = dsn
|
||||
|
||||
self.conn = None
|
||||
self.cursor = None
|
||||
self.connect()
|
||||
self.connect(cursor_factory=cursor_factory)
|
||||
|
||||
def close(self):
|
||||
""" Close all open connections. Does not wait for pending requests.
|
||||
@ -66,7 +66,7 @@ class DBConnection:
|
||||
|
||||
self.conn = None
|
||||
|
||||
def connect(self):
|
||||
def connect(self, cursor_factory=None):
|
||||
""" (Re)connect to the database. Creates an asynchronous connection
|
||||
with JIT and parallel processing disabled. If a connection was
|
||||
already open, it is closed and a new connection established.
|
||||
@ -79,7 +79,7 @@ class DBConnection:
|
||||
self.conn = psycopg2.connect(**{'dsn' : self.dsn, 'async' : True})
|
||||
self.wait()
|
||||
|
||||
self.cursor = self.conn.cursor()
|
||||
self.cursor = self.conn.cursor(cursor_factory=cursor_factory)
|
||||
# Disable JIT and parallel workers as they are known to cause problems.
|
||||
# Update pg_settings instead of using SET because it does not yield
|
||||
# errors on older versions of Postgres where the settings are not
|
||||
|
@ -89,8 +89,6 @@ class SQLPreprocessor:
|
||||
self.env.globals['db'] = db_info
|
||||
self.env.globals['sql'] = _setup_postgres_sql(conn)
|
||||
self.env.globals['postgres'] = _setup_postgresql_features(conn)
|
||||
self.env.globals['modulepath'] = config.DATABASE_MODULE_PATH or \
|
||||
str((config.project_dir / 'module').resolve())
|
||||
|
||||
|
||||
def run_sql_file(self, conn, name, **kwargs):
|
||||
|
@ -3,6 +3,9 @@ Main work horse for indexing (computing addresses) the database.
|
||||
"""
|
||||
import logging
|
||||
import select
|
||||
import time
|
||||
|
||||
import psycopg2.extras
|
||||
|
||||
from nominatim.indexer.progress import ProgressLogger
|
||||
from nominatim.indexer import runners
|
||||
@ -11,6 +14,73 @@ from nominatim.db.connection import connect
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
|
||||
class PlaceFetcher:
|
||||
""" Asynchronous connection that fetches place details for processing.
|
||||
"""
|
||||
def __init__(self, dsn, setup_conn):
|
||||
self.wait_time = 0
|
||||
self.current_ids = None
|
||||
self.conn = DBConnection(dsn, cursor_factory=psycopg2.extras.DictCursor)
|
||||
|
||||
with setup_conn.cursor() as cur:
|
||||
# need to fetch those manually because register_hstore cannot
|
||||
# fetch them on an asynchronous connection below.
|
||||
hstore_oid = cur.scalar("SELECT 'hstore'::regtype::oid")
|
||||
hstore_array_oid = cur.scalar("SELECT 'hstore[]'::regtype::oid")
|
||||
|
||||
psycopg2.extras.register_hstore(self.conn.conn, oid=hstore_oid,
|
||||
array_oid=hstore_array_oid)
|
||||
|
||||
def close(self):
|
||||
""" Close the underlying asynchronous connection.
|
||||
"""
|
||||
if self.conn:
|
||||
self.conn.close()
|
||||
self.conn = None
|
||||
|
||||
|
||||
def fetch_next_batch(self, cur, runner):
|
||||
""" Send a request for the next batch of places.
|
||||
If details for the places are required, they will be fetched
|
||||
asynchronously.
|
||||
|
||||
Returns true if there is still data available.
|
||||
"""
|
||||
ids = cur.fetchmany(100)
|
||||
|
||||
if not ids:
|
||||
self.current_ids = None
|
||||
return False
|
||||
|
||||
if hasattr(runner, 'get_place_details'):
|
||||
runner.get_place_details(self.conn, ids)
|
||||
self.current_ids = []
|
||||
else:
|
||||
self.current_ids = ids
|
||||
|
||||
return True
|
||||
|
||||
def get_batch(self):
|
||||
""" Get the next batch of data, previously requested with
|
||||
`fetch_next_batch`.
|
||||
"""
|
||||
if self.current_ids is not None and not self.current_ids:
|
||||
tstart = time.time()
|
||||
self.conn.wait()
|
||||
self.wait_time += time.time() - tstart
|
||||
self.current_ids = self.conn.cursor.fetchall()
|
||||
|
||||
return self.current_ids
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
self.conn.wait()
|
||||
self.close()
|
||||
|
||||
class WorkerPool:
|
||||
""" A pool of asynchronous database connections.
|
||||
|
||||
@ -21,6 +91,7 @@ class WorkerPool:
|
||||
def __init__(self, dsn, pool_size):
|
||||
self.threads = [DBConnection(dsn) for _ in range(pool_size)]
|
||||
self.free_workers = self._yield_free_worker()
|
||||
self.wait_time = 0
|
||||
|
||||
|
||||
def finish_all(self):
|
||||
@ -64,7 +135,9 @@ class WorkerPool:
|
||||
ready = self.threads
|
||||
command_stat = 0
|
||||
else:
|
||||
tstart = time.time()
|
||||
_, ready, _ = select.select([], self.threads, [])
|
||||
self.wait_time += time.time() - tstart
|
||||
|
||||
|
||||
def __enter__(self):
|
||||
@ -72,6 +145,7 @@ class WorkerPool:
|
||||
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
self.finish_all()
|
||||
self.close()
|
||||
|
||||
|
||||
@ -79,8 +153,9 @@ class Indexer:
|
||||
""" Main indexing routine.
|
||||
"""
|
||||
|
||||
def __init__(self, dsn, num_threads):
|
||||
def __init__(self, dsn, tokenizer, num_threads):
|
||||
self.dsn = dsn
|
||||
self.tokenizer = tokenizer
|
||||
self.num_threads = num_threads
|
||||
|
||||
|
||||
@ -123,8 +198,9 @@ class Indexer:
|
||||
LOG.warning("Starting indexing boundaries using %s threads",
|
||||
self.num_threads)
|
||||
|
||||
with self.tokenizer.name_analyzer() as analyzer:
|
||||
for rank in range(max(minrank, 4), min(maxrank, 26)):
|
||||
self._index(runners.BoundaryRunner(rank))
|
||||
self._index(runners.BoundaryRunner(rank, analyzer))
|
||||
|
||||
def index_by_rank(self, minrank, maxrank):
|
||||
""" Index all entries of placex in the given rank range (inclusive)
|
||||
@ -137,15 +213,16 @@ class Indexer:
|
||||
LOG.warning("Starting indexing rank (%i to %i) using %i threads",
|
||||
minrank, maxrank, self.num_threads)
|
||||
|
||||
with self.tokenizer.name_analyzer() as analyzer:
|
||||
for rank in range(max(1, minrank), maxrank):
|
||||
self._index(runners.RankRunner(rank))
|
||||
self._index(runners.RankRunner(rank, analyzer))
|
||||
|
||||
if maxrank == 30:
|
||||
self._index(runners.RankRunner(0))
|
||||
self._index(runners.InterpolationRunner(), 20)
|
||||
self._index(runners.RankRunner(30), 20)
|
||||
self._index(runners.RankRunner(0, analyzer))
|
||||
self._index(runners.InterpolationRunner(analyzer), 20)
|
||||
self._index(runners.RankRunner(30, analyzer), 20)
|
||||
else:
|
||||
self._index(runners.RankRunner(maxrank))
|
||||
self._index(runners.RankRunner(maxrank, analyzer))
|
||||
|
||||
|
||||
def index_postcodes(self):
|
||||
@ -173,6 +250,7 @@ class Indexer:
|
||||
LOG.warning("Starting %s (using batch size %s)", runner.name(), batch)
|
||||
|
||||
with connect(self.dsn) as conn:
|
||||
psycopg2.extras.register_hstore(conn)
|
||||
with conn.cursor() as cur:
|
||||
total_tuples = cur.scalar(runner.sql_count_objects())
|
||||
LOG.debug("Total number of rows: %i", total_tuples)
|
||||
@ -185,19 +263,24 @@ class Indexer:
|
||||
with conn.cursor(name='places') as cur:
|
||||
cur.execute(runner.sql_get_objects())
|
||||
|
||||
with PlaceFetcher(self.dsn, conn) as fetcher:
|
||||
with WorkerPool(self.dsn, self.num_threads) as pool:
|
||||
while True:
|
||||
places = [p[0] for p in cur.fetchmany(batch)]
|
||||
if not places:
|
||||
break
|
||||
has_more = fetcher.fetch_next_batch(cur, runner)
|
||||
while has_more:
|
||||
places = fetcher.get_batch()
|
||||
|
||||
LOG.debug("Processing places: %s", str(places))
|
||||
worker = pool.next_free_worker()
|
||||
# asynchronously get the next batch
|
||||
has_more = fetcher.fetch_next_batch(cur, runner)
|
||||
|
||||
worker.perform(runner.sql_index_place(places))
|
||||
progress.add(len(places))
|
||||
# And insert the curent batch
|
||||
for idx in range(0, len(places), batch):
|
||||
part = places[idx:idx+batch]
|
||||
LOG.debug("Processing places: %s", str(part))
|
||||
runner.index_places(pool.next_free_worker(), part)
|
||||
progress.add(len(part))
|
||||
|
||||
pool.finish_all()
|
||||
LOG.info("Wait time: fetcher: %.2fs, pool: %.2fs",
|
||||
fetcher.wait_time, pool.wait_time)
|
||||
|
||||
conn.commit()
|
||||
|
||||
|
@ -2,15 +2,52 @@
|
||||
Mix-ins that provide the actual commands for the indexer for various indexing
|
||||
tasks.
|
||||
"""
|
||||
import functools
|
||||
|
||||
import psycopg2.extras
|
||||
|
||||
# pylint: disable=C0111
|
||||
|
||||
class RankRunner:
|
||||
class AbstractPlacexRunner:
|
||||
""" Returns SQL commands for indexing of the placex table.
|
||||
"""
|
||||
SELECT_SQL = 'SELECT place_id FROM placex'
|
||||
|
||||
def __init__(self, rank, analyzer):
|
||||
self.rank = rank
|
||||
self.analyzer = analyzer
|
||||
|
||||
|
||||
@staticmethod
|
||||
@functools.lru_cache(maxsize=1)
|
||||
def _index_sql(num_places):
|
||||
return """ UPDATE placex
|
||||
SET indexed_status = 0, address = v.addr, token_info = v.ti
|
||||
FROM (VALUES {}) as v(id, addr, ti)
|
||||
WHERE place_id = v.id
|
||||
""".format(','.join(["(%s, %s::hstore, %s::jsonb)"] * num_places))
|
||||
|
||||
|
||||
@staticmethod
|
||||
def get_place_details(worker, ids):
|
||||
worker.perform("""SELECT place_id, (placex_prepare_update(placex)).*
|
||||
FROM placex WHERE place_id IN %s""",
|
||||
(tuple((p[0] for p in ids)), ))
|
||||
|
||||
|
||||
def index_places(self, worker, places):
|
||||
values = []
|
||||
for place in places:
|
||||
values.extend((place[x] for x in ('place_id', 'address')))
|
||||
values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
|
||||
|
||||
worker.perform(self._index_sql(len(places)), values)
|
||||
|
||||
|
||||
class RankRunner(AbstractPlacexRunner):
|
||||
""" Returns SQL commands for indexing one rank within the placex table.
|
||||
"""
|
||||
|
||||
def __init__(self, rank):
|
||||
self.rank = rank
|
||||
|
||||
def name(self):
|
||||
return "rank {}".format(self.rank)
|
||||
|
||||
@ -20,24 +57,16 @@ class RankRunner:
|
||||
""".format(self.rank)
|
||||
|
||||
def sql_get_objects(self):
|
||||
return """SELECT place_id FROM placex
|
||||
WHERE indexed_status > 0 and rank_address = {}
|
||||
ORDER BY geometry_sector""".format(self.rank)
|
||||
|
||||
@staticmethod
|
||||
def sql_index_place(ids):
|
||||
return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\
|
||||
.format(','.join((str(i) for i in ids)))
|
||||
return """{} WHERE indexed_status > 0 and rank_address = {}
|
||||
ORDER BY geometry_sector
|
||||
""".format(self.SELECT_SQL, self.rank)
|
||||
|
||||
|
||||
class BoundaryRunner:
|
||||
class BoundaryRunner(AbstractPlacexRunner):
|
||||
""" Returns SQL commands for indexing the administrative boundaries
|
||||
of a certain rank.
|
||||
"""
|
||||
|
||||
def __init__(self, rank):
|
||||
self.rank = rank
|
||||
|
||||
def name(self):
|
||||
return "boundaries rank {}".format(self.rank)
|
||||
|
||||
@ -49,16 +78,10 @@ class BoundaryRunner:
|
||||
""".format(self.rank)
|
||||
|
||||
def sql_get_objects(self):
|
||||
return """SELECT place_id FROM placex
|
||||
WHERE indexed_status > 0 and rank_search = {}
|
||||
return """{} WHERE indexed_status > 0 and rank_search = {}
|
||||
and class = 'boundary' and type = 'administrative'
|
||||
ORDER BY partition, admin_level
|
||||
""".format(self.rank)
|
||||
|
||||
@staticmethod
|
||||
def sql_index_place(ids):
|
||||
return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\
|
||||
.format(','.join((str(i) for i in ids)))
|
||||
""".format(self.SELECT_SQL, self.rank)
|
||||
|
||||
|
||||
class InterpolationRunner:
|
||||
@ -66,6 +89,10 @@ class InterpolationRunner:
|
||||
location_property_osmline.
|
||||
"""
|
||||
|
||||
def __init__(self, analyzer):
|
||||
self.analyzer = analyzer
|
||||
|
||||
|
||||
@staticmethod
|
||||
def name():
|
||||
return "interpolation lines (location_property_osmline)"
|
||||
@ -77,15 +104,37 @@ class InterpolationRunner:
|
||||
|
||||
@staticmethod
|
||||
def sql_get_objects():
|
||||
return """SELECT place_id FROM location_property_osmline
|
||||
return """SELECT place_id
|
||||
FROM location_property_osmline
|
||||
WHERE indexed_status > 0
|
||||
ORDER BY geometry_sector"""
|
||||
|
||||
|
||||
@staticmethod
|
||||
def sql_index_place(ids):
|
||||
return """UPDATE location_property_osmline
|
||||
SET indexed_status = 0 WHERE place_id IN ({})
|
||||
""".format(','.join((str(i) for i in ids)))
|
||||
def get_place_details(worker, ids):
|
||||
worker.perform("""SELECT place_id, get_interpolation_address(address, osm_id) as address
|
||||
FROM location_property_osmline WHERE place_id IN %s""",
|
||||
(tuple((p[0] for p in ids)), ))
|
||||
|
||||
|
||||
@staticmethod
|
||||
@functools.lru_cache(maxsize=1)
|
||||
def _index_sql(num_places):
|
||||
return """ UPDATE location_property_osmline
|
||||
SET indexed_status = 0, address = v.addr, token_info = v.ti
|
||||
FROM (VALUES {}) as v(id, addr, ti)
|
||||
WHERE place_id = v.id
|
||||
""".format(','.join(["(%s, %s::hstore, %s::jsonb)"] * num_places))
|
||||
|
||||
|
||||
def index_places(self, worker, places):
|
||||
values = []
|
||||
for place in places:
|
||||
values.extend((place[x] for x in ('place_id', 'address')))
|
||||
values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
|
||||
|
||||
worker.perform(self._index_sql(len(places)), values)
|
||||
|
||||
|
||||
|
||||
class PostcodeRunner:
|
||||
@ -107,7 +156,7 @@ class PostcodeRunner:
|
||||
ORDER BY country_code, postcode"""
|
||||
|
||||
@staticmethod
|
||||
def sql_index_place(ids):
|
||||
return """UPDATE location_postcode SET indexed_status = 0
|
||||
def index_places(worker, ids):
|
||||
worker.perform(""" UPDATE location_postcode SET indexed_status = 0
|
||||
WHERE place_id IN ({})
|
||||
""".format(','.join((str(i) for i in ids)))
|
||||
""".format(','.join((str(i[0]) for i in ids))))
|
||||
|
0
nominatim/tokenizer/__init__.py
Normal file
0
nominatim/tokenizer/__init__.py
Normal file
88
nominatim/tokenizer/factory.py
Normal file
88
nominatim/tokenizer/factory.py
Normal file
@ -0,0 +1,88 @@
|
||||
"""
|
||||
Functions for creating a tokenizer or initialising the right one for an
|
||||
existing database.
|
||||
|
||||
A tokenizer is something that is bound to the lifetime of a database. It
|
||||
can be choosen and configured before the intial import but then needs to
|
||||
be used consistently when querying and updating the database.
|
||||
|
||||
This module provides the functions to create and configure a new tokenizer
|
||||
as well as instanciating the appropriate tokenizer for updating an existing
|
||||
database.
|
||||
|
||||
A tokenizer usually also includes PHP code for querying. The appropriate PHP
|
||||
normalizer module is installed, when the tokenizer is created.
|
||||
"""
|
||||
import logging
|
||||
import importlib
|
||||
|
||||
from ..errors import UsageError
|
||||
from ..db import properties
|
||||
from ..db.connection import connect
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def _import_tokenizer(name):
|
||||
""" Load the tokenizer.py module from project directory.
|
||||
"""
|
||||
try:
|
||||
return importlib.import_module('nominatim.tokenizer.' + name + '_tokenizer')
|
||||
except ModuleNotFoundError as exp:
|
||||
LOG.fatal("No tokenizer named '%s' available. "
|
||||
"Check the setting of NOMINATIM_TOKENIZER.", name)
|
||||
raise UsageError('Tokenizer not found') from exp
|
||||
|
||||
|
||||
def create_tokenizer(config, init_db=True, module_name=None):
|
||||
""" Create a new tokenizer as defined by the given configuration.
|
||||
|
||||
The tokenizer data and code is copied into the 'tokenizer' directory
|
||||
of the project directory and the tokenizer loaded from its new location.
|
||||
"""
|
||||
if module_name is None:
|
||||
module_name = config.TOKENIZER
|
||||
|
||||
# Create the directory for the tokenizer data
|
||||
basedir = config.project_dir / 'tokenizer'
|
||||
if not basedir.exists():
|
||||
basedir.mkdir()
|
||||
elif not basedir.is_dir():
|
||||
LOG.fatal("Tokenizer directory '%s' cannot be created.", basedir)
|
||||
raise UsageError("Tokenizer setup failed.")
|
||||
|
||||
# Import and initialize the tokenizer.
|
||||
tokenizer_module = _import_tokenizer(module_name)
|
||||
|
||||
tokenizer = tokenizer_module.create(config.get_libpq_dsn(), basedir)
|
||||
tokenizer.init_new_db(config, init_db=init_db)
|
||||
|
||||
with connect(config.get_libpq_dsn()) as conn:
|
||||
properties.set_property(conn, 'tokenizer', module_name)
|
||||
|
||||
return tokenizer
|
||||
|
||||
|
||||
def get_tokenizer_for_db(config):
|
||||
""" Instantiate a tokenizer for an existing database.
|
||||
|
||||
The function looks up the appropriate tokenizer in the database
|
||||
and initialises it.
|
||||
"""
|
||||
basedir = config.project_dir / 'tokenizer'
|
||||
if not basedir.is_dir():
|
||||
LOG.fatal("Cannot find tokenizer data in '%s'.", basedir)
|
||||
raise UsageError('Cannot initialize tokenizer.')
|
||||
|
||||
with connect(config.get_libpq_dsn()) as conn:
|
||||
name = properties.get_property(conn, 'tokenizer')
|
||||
|
||||
if name is None:
|
||||
LOG.fatal("Tokenizer was not set up properly. Database property missing.")
|
||||
raise UsageError('Cannot initialize tokenizer.')
|
||||
|
||||
tokenizer_module = _import_tokenizer(name)
|
||||
|
||||
tokenizer = tokenizer_module.create(config.get_libpq_dsn(), basedir)
|
||||
tokenizer.init_from_project()
|
||||
|
||||
return tokenizer
|
540
nominatim/tokenizer/legacy_tokenizer.py
Normal file
540
nominatim/tokenizer/legacy_tokenizer.py
Normal file
@ -0,0 +1,540 @@
|
||||
"""
|
||||
Tokenizer implementing normalisation as used before Nominatim 4.
|
||||
"""
|
||||
from collections import OrderedDict
|
||||
import logging
|
||||
import re
|
||||
import shutil
|
||||
from textwrap import dedent
|
||||
|
||||
from icu import Transliterator
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
from nominatim.db.connection import connect
|
||||
from nominatim.db import properties
|
||||
from nominatim.db import utils as db_utils
|
||||
from nominatim.db.sql_preprocessor import SQLPreprocessor
|
||||
from nominatim.errors import UsageError
|
||||
|
||||
DBCFG_NORMALIZATION = "tokenizer_normalization"
|
||||
DBCFG_MAXWORDFREQ = "tokenizer_maxwordfreq"
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def create(dsn, data_dir):
|
||||
""" Create a new instance of the tokenizer provided by this module.
|
||||
"""
|
||||
return LegacyTokenizer(dsn, data_dir)
|
||||
|
||||
|
||||
def _install_module(config_module_path, src_dir, module_dir):
|
||||
""" Copies the PostgreSQL normalisation module into the project
|
||||
directory if necessary. For historical reasons the module is
|
||||
saved in the '/module' subdirectory and not with the other tokenizer
|
||||
data.
|
||||
|
||||
The function detects when the installation is run from the
|
||||
build directory. It doesn't touch the module in that case.
|
||||
"""
|
||||
# Custom module locations are simply used as is.
|
||||
if config_module_path:
|
||||
LOG.info("Using custom path for database module at '%s'", config_module_path)
|
||||
return config_module_path
|
||||
|
||||
# Compatibility mode for builddir installations.
|
||||
if module_dir.exists() and src_dir.samefile(module_dir):
|
||||
LOG.info('Running from build directory. Leaving database module as is.')
|
||||
return module_dir
|
||||
|
||||
# In any other case install the module in the project directory.
|
||||
if not module_dir.exists():
|
||||
module_dir.mkdir()
|
||||
|
||||
destfile = module_dir / 'nominatim.so'
|
||||
shutil.copy(str(src_dir / 'nominatim.so'), str(destfile))
|
||||
destfile.chmod(0o755)
|
||||
|
||||
LOG.info('Database module installed at %s', str(destfile))
|
||||
|
||||
return module_dir
|
||||
|
||||
|
||||
def _check_module(module_dir, conn):
|
||||
""" Try to use the PostgreSQL module to confirm that it is correctly
|
||||
installed and accessible from PostgreSQL.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
try:
|
||||
cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
|
||||
RETURNS text AS '{}/nominatim.so', 'transliteration'
|
||||
LANGUAGE c IMMUTABLE STRICT;
|
||||
DROP FUNCTION nominatim_test_import_func(text)
|
||||
""".format(module_dir))
|
||||
except psycopg2.DatabaseError as err:
|
||||
LOG.fatal("Error accessing database module: %s", err)
|
||||
raise UsageError("Database module cannot be accessed.") from err
|
||||
|
||||
|
||||
class LegacyTokenizer:
|
||||
""" The legacy tokenizer uses a special PostgreSQL module to normalize
|
||||
names and queries. The tokenizer thus implements normalization through
|
||||
calls to the database.
|
||||
"""
|
||||
|
||||
def __init__(self, dsn, data_dir):
|
||||
self.dsn = dsn
|
||||
self.data_dir = data_dir
|
||||
self.normalization = None
|
||||
|
||||
|
||||
def init_new_db(self, config, init_db=True):
|
||||
""" Set up a new tokenizer for the database.
|
||||
|
||||
This copies all necessary data in the project directory to make
|
||||
sure the tokenizer remains stable even over updates.
|
||||
"""
|
||||
module_dir = _install_module(config.DATABASE_MODULE_PATH,
|
||||
config.lib_dir.module,
|
||||
config.project_dir / 'module')
|
||||
|
||||
self.normalization = config.TERM_NORMALIZATION
|
||||
|
||||
self._install_php(config)
|
||||
|
||||
with connect(self.dsn) as conn:
|
||||
_check_module(module_dir, conn)
|
||||
self._save_config(conn, config)
|
||||
conn.commit()
|
||||
|
||||
if init_db:
|
||||
self.update_sql_functions(config)
|
||||
self._init_db_tables(config)
|
||||
|
||||
|
||||
def init_from_project(self):
|
||||
""" Initialise the tokenizer from the project directory.
|
||||
"""
|
||||
with connect(self.dsn) as conn:
|
||||
self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
|
||||
|
||||
|
||||
def finalize_import(self, config):
|
||||
""" Do any required postprocessing to make the tokenizer data ready
|
||||
for use.
|
||||
"""
|
||||
with connect(self.dsn) as conn:
|
||||
sqlp = SQLPreprocessor(conn, config)
|
||||
sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_indices.sql')
|
||||
|
||||
|
||||
def update_sql_functions(self, config):
|
||||
""" Reimport the SQL functions for this tokenizer.
|
||||
"""
|
||||
with connect(self.dsn) as conn:
|
||||
max_word_freq = properties.get_property(conn, DBCFG_MAXWORDFREQ)
|
||||
modulepath = config.DATABASE_MODULE_PATH or \
|
||||
str((config.project_dir / 'module').resolve())
|
||||
sqlp = SQLPreprocessor(conn, config)
|
||||
sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer.sql',
|
||||
max_word_freq=max_word_freq,
|
||||
modulepath=modulepath)
|
||||
|
||||
|
||||
def check_database(self):
|
||||
""" Check that the tokenizer is set up correctly.
|
||||
"""
|
||||
hint = """\
|
||||
The Postgresql extension nominatim.so was not correctly loaded.
|
||||
|
||||
Error: {error}
|
||||
|
||||
Hints:
|
||||
* Check the output of the CMmake/make installation step
|
||||
* Does nominatim.so exist?
|
||||
* Does nominatim.so exist on the database server?
|
||||
* Can nominatim.so be accessed by the database user?
|
||||
"""
|
||||
with connect(self.dsn) as conn:
|
||||
with conn.cursor() as cur:
|
||||
try:
|
||||
out = cur.scalar("SELECT make_standard_name('a')")
|
||||
except psycopg2.Error as err:
|
||||
return hint.format(error=str(err))
|
||||
|
||||
if out != 'a':
|
||||
return hint.format(error='Unexpected result for make_standard_name()')
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def migrate_database(self, config):
|
||||
""" Initialise the project directory of an existing database for
|
||||
use with this tokenizer.
|
||||
|
||||
This is a special migration function for updating existing databases
|
||||
to new software versions.
|
||||
"""
|
||||
self.normalization = config.TERM_NORMALIZATION
|
||||
module_dir = _install_module(config.DATABASE_MODULE_PATH,
|
||||
config.lib_dir.module,
|
||||
config.project_dir / 'module')
|
||||
|
||||
with connect(self.dsn) as conn:
|
||||
_check_module(module_dir, conn)
|
||||
self._save_config(conn, config)
|
||||
|
||||
|
||||
def name_analyzer(self):
|
||||
""" Create a new analyzer for tokenizing names and queries
|
||||
using this tokinzer. Analyzers are context managers and should
|
||||
be used accordingly:
|
||||
|
||||
```
|
||||
with tokenizer.name_analyzer() as analyzer:
|
||||
analyser.tokenize()
|
||||
```
|
||||
|
||||
When used outside the with construct, the caller must ensure to
|
||||
call the close() function before destructing the analyzer.
|
||||
|
||||
Analyzers are not thread-safe. You need to instantiate one per thread.
|
||||
"""
|
||||
normalizer = Transliterator.createFromRules("phrase normalizer",
|
||||
self.normalization)
|
||||
return LegacyNameAnalyzer(self.dsn, normalizer)
|
||||
|
||||
|
||||
def _install_php(self, config):
|
||||
""" Install the php script for the tokenizer.
|
||||
"""
|
||||
php_file = self.data_dir / "tokenizer.php"
|
||||
php_file.write_text(dedent("""\
|
||||
<?php
|
||||
@define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
|
||||
@define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
|
||||
require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
|
||||
""".format(config)))
|
||||
|
||||
|
||||
def _init_db_tables(self, config):
|
||||
""" Set up the word table and fill it with pre-computed word
|
||||
frequencies.
|
||||
"""
|
||||
with connect(self.dsn) as conn:
|
||||
sqlp = SQLPreprocessor(conn, config)
|
||||
sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_tables.sql')
|
||||
conn.commit()
|
||||
|
||||
LOG.warning("Precomputing word tokens")
|
||||
db_utils.execute_file(self.dsn, config.lib_dir.data / 'words.sql')
|
||||
|
||||
|
||||
def _save_config(self, conn, config):
|
||||
""" Save the configuration that needs to remain stable for the given
|
||||
database as database properties.
|
||||
"""
|
||||
properties.set_property(conn, DBCFG_NORMALIZATION, self.normalization)
|
||||
properties.set_property(conn, DBCFG_MAXWORDFREQ, config.MAX_WORD_FREQUENCY)
|
||||
|
||||
|
||||
class LegacyNameAnalyzer:
|
||||
""" The legacy analyzer uses the special Postgresql module for
|
||||
splitting names.
|
||||
|
||||
Each instance opens a connection to the database to request the
|
||||
normalization.
|
||||
"""
|
||||
|
||||
def __init__(self, dsn, normalizer):
|
||||
self.conn = connect(dsn).connection
|
||||
self.conn.autocommit = True
|
||||
self.normalizer = normalizer
|
||||
psycopg2.extras.register_hstore(self.conn)
|
||||
|
||||
self._cache = _TokenCache(self.conn)
|
||||
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
self.close()
|
||||
|
||||
|
||||
def close(self):
|
||||
""" Free all resources used by the analyzer.
|
||||
"""
|
||||
if self.conn:
|
||||
self.conn.close()
|
||||
self.conn = None
|
||||
|
||||
|
||||
def normalize(self, phrase):
|
||||
""" Normalize the given phrase, i.e. remove all properties that
|
||||
are irrelevant for search.
|
||||
"""
|
||||
return self.normalizer.transliterate(phrase)
|
||||
|
||||
|
||||
def add_postcodes_from_db(self):
|
||||
""" Add postcodes from the location_postcode table to the word table.
|
||||
"""
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("""SELECT count(create_postcode_id(pc))
|
||||
FROM (SELECT distinct(postcode) as pc
|
||||
FROM location_postcode) x""")
|
||||
|
||||
|
||||
def update_special_phrases(self, phrases):
|
||||
""" Replace the search index for special phrases with the new phrases.
|
||||
"""
|
||||
norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
|
||||
for p in phrases))
|
||||
|
||||
with self.conn.cursor() as cur:
|
||||
# Get the old phrases.
|
||||
existing_phrases = set()
|
||||
cur.execute("""SELECT word, class, type, operator FROM word
|
||||
WHERE class != 'place'
|
||||
OR (type != 'house' AND type != 'postcode')""")
|
||||
for label, cls, typ, oper in cur:
|
||||
existing_phrases.add((label, cls, typ, oper or '-'))
|
||||
|
||||
to_add = norm_phrases - existing_phrases
|
||||
to_delete = existing_phrases - norm_phrases
|
||||
|
||||
if to_add:
|
||||
psycopg2.extras.execute_values(
|
||||
cur,
|
||||
""" INSERT INTO word (word_id, word_token, word, class, type,
|
||||
search_name_count, operator)
|
||||
(SELECT nextval('seq_word'), make_standard_name(name), name,
|
||||
class, type, 0,
|
||||
CASE WHEN op in ('in', 'near') THEN op ELSE null END
|
||||
FROM (VALUES %s) as v(name, class, type, op))""",
|
||||
to_add)
|
||||
|
||||
if to_delete:
|
||||
psycopg2.extras.execute_values(
|
||||
cur,
|
||||
""" DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
|
||||
WHERE word = name and class = in_class and type = in_type
|
||||
and ((op = '-' and operator is null) or op = operator)""",
|
||||
to_delete)
|
||||
|
||||
LOG.info("Total phrases: %s. Added: %s. Deleted: %s",
|
||||
len(norm_phrases), len(to_add), len(to_delete))
|
||||
|
||||
|
||||
def add_country_names(self, country_code, names):
|
||||
""" Add names for the given country to the search index.
|
||||
"""
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""INSERT INTO word (word_id, word_token, country_code)
|
||||
(SELECT nextval('seq_word'), lookup_token, %s
|
||||
FROM (SELECT ' ' || make_standard_name(n) as lookup_token
|
||||
FROM unnest(%s)n) y
|
||||
WHERE NOT EXISTS(SELECT * FROM word
|
||||
WHERE word_token = lookup_token and country_code = %s))
|
||||
""", (country_code, names, country_code))
|
||||
|
||||
|
||||
def process_place(self, place):
|
||||
""" Determine tokenizer information about the given place.
|
||||
|
||||
Returns a JSON-serialisable structure that will be handed into
|
||||
the database via the token_info field.
|
||||
"""
|
||||
token_info = _TokenInfo(self._cache)
|
||||
|
||||
names = place.get('name')
|
||||
|
||||
if names:
|
||||
token_info.add_names(self.conn, names)
|
||||
|
||||
country_feature = place.get('country_feature')
|
||||
if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
|
||||
self.add_country_names(country_feature.lower(), list(names.values()))
|
||||
|
||||
address = place.get('address')
|
||||
|
||||
if address:
|
||||
hnrs = []
|
||||
addr_terms = []
|
||||
for key, value in address.items():
|
||||
if key == 'postcode':
|
||||
self._add_postcode(value)
|
||||
elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'):
|
||||
hnrs.append(value)
|
||||
elif key == 'street':
|
||||
token_info.add_street(self.conn, value)
|
||||
elif key == 'place':
|
||||
token_info.add_place(self.conn, value)
|
||||
elif not key.startswith('_') and \
|
||||
key not in ('country', 'full'):
|
||||
addr_terms.append((key, value))
|
||||
|
||||
if hnrs:
|
||||
token_info.add_housenumbers(self.conn, hnrs)
|
||||
|
||||
if addr_terms:
|
||||
token_info.add_address_terms(self.conn, addr_terms)
|
||||
|
||||
return token_info.data
|
||||
|
||||
|
||||
def _add_postcode(self, postcode):
|
||||
""" Make sure the normalized postcode is present in the word table.
|
||||
"""
|
||||
def _create_postcode_from_db(pcode):
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute('SELECT create_postcode_id(%s)', (pcode, ))
|
||||
|
||||
if re.search(r'[:,;]', postcode) is None:
|
||||
self._cache.postcodes.get(postcode.strip().upper(), _create_postcode_from_db)
|
||||
|
||||
|
||||
class _TokenInfo:
|
||||
""" Collect token information to be sent back to the database.
|
||||
"""
|
||||
def __init__(self, cache):
|
||||
self.cache = cache
|
||||
self.data = {}
|
||||
|
||||
|
||||
def add_names(self, conn, names):
|
||||
""" Add token information for the names of the place.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
# Create the token IDs for all names.
|
||||
self.data['names'] = cur.scalar("SELECT make_keywords(%s)::text",
|
||||
(names, ))
|
||||
|
||||
|
||||
def add_housenumbers(self, conn, hnrs):
|
||||
""" Extract housenumber information from the address.
|
||||
"""
|
||||
if len(hnrs) == 1:
|
||||
token = self.cache.get_housenumber(hnrs[0])
|
||||
if token is not None:
|
||||
self.data['hnr_tokens'] = token
|
||||
self.data['hnr'] = hnrs[0]
|
||||
return
|
||||
|
||||
# split numbers if necessary
|
||||
simple_list = []
|
||||
for hnr in hnrs:
|
||||
simple_list.extend((x.strip() for x in re.split(r'[;,]', hnr)))
|
||||
|
||||
if len(simple_list) > 1:
|
||||
simple_list = list(set(simple_list))
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT (create_housenumbers(%s)).* ", (simple_list, ))
|
||||
self.data['hnr_tokens'], self.data['hnr'] = cur.fetchone()
|
||||
|
||||
|
||||
def add_street(self, conn, street):
|
||||
""" Add addr:street match terms.
|
||||
"""
|
||||
def _get_street(name):
|
||||
with conn.cursor() as cur:
|
||||
return cur.scalar("SELECT word_ids_from_name(%s)::text", (name, ))
|
||||
|
||||
self.data['street'] = self.cache.streets.get(street, _get_street)
|
||||
|
||||
|
||||
def add_place(self, conn, place):
|
||||
""" Add addr:place search and match terms.
|
||||
"""
|
||||
def _get_place(name):
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""SELECT (addr_ids_from_name(%s)
|
||||
|| getorcreate_name_id(make_standard_name(%s), ''))::text,
|
||||
word_ids_from_name(%s)::text""",
|
||||
(name, name, name))
|
||||
return cur.fetchone()
|
||||
|
||||
self.data['place_search'], self.data['place_match'] = \
|
||||
self.cache.places.get(place, _get_place)
|
||||
|
||||
|
||||
def add_address_terms(self, conn, terms):
|
||||
""" Add additional address terms.
|
||||
"""
|
||||
def _get_address_term(name):
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""SELECT addr_ids_from_name(%s)::text,
|
||||
word_ids_from_name(%s)::text""",
|
||||
(name, name))
|
||||
return cur.fetchone()
|
||||
|
||||
tokens = {}
|
||||
for key, value in terms:
|
||||
tokens[key] = self.cache.address_terms.get(value, _get_address_term)
|
||||
|
||||
self.data['addr'] = tokens
|
||||
|
||||
|
||||
class _LRU:
|
||||
""" Least recently used cache that accepts a generator function to
|
||||
produce the item when there is a cache miss.
|
||||
"""
|
||||
|
||||
def __init__(self, maxsize=128, init_data=None):
|
||||
self.data = init_data or OrderedDict()
|
||||
self.maxsize = maxsize
|
||||
if init_data is not None and len(init_data) > maxsize:
|
||||
self.maxsize = len(init_data)
|
||||
|
||||
def get(self, key, generator):
|
||||
""" Get the item with the given key from the cache. If nothing
|
||||
is found in the cache, generate the value through the
|
||||
generator function and store it in the cache.
|
||||
"""
|
||||
value = self.data.get(key)
|
||||
if value is not None:
|
||||
self.data.move_to_end(key)
|
||||
else:
|
||||
value = generator(key)
|
||||
if len(self.data) >= self.maxsize:
|
||||
self.data.popitem(last=False)
|
||||
self.data[key] = value
|
||||
|
||||
return value
|
||||
|
||||
|
||||
class _TokenCache:
|
||||
""" Cache for token information to avoid repeated database queries.
|
||||
|
||||
This cache is not thread-safe and needs to be instantiated per
|
||||
analyzer.
|
||||
"""
|
||||
def __init__(self, conn):
|
||||
# various LRU caches
|
||||
self.streets = _LRU(maxsize=256)
|
||||
self.places = _LRU(maxsize=128)
|
||||
self.address_terms = _LRU(maxsize=1024)
|
||||
|
||||
# Lookup houseunumbers up to 100 and cache them
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""SELECT i, ARRAY[getorcreate_housenumber_id(i::text)]::text
|
||||
FROM generate_series(1, 100) as i""")
|
||||
self._cached_housenumbers = {str(r[0]) : r[1] for r in cur}
|
||||
|
||||
# Get postcodes that are already saved
|
||||
postcodes = OrderedDict()
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""SELECT word FROM word
|
||||
WHERE class ='place' and type = 'postcode'""")
|
||||
for row in cur:
|
||||
postcodes[row[0]] = None
|
||||
self.postcodes = _LRU(maxsize=32, init_data=postcodes)
|
||||
|
||||
def get_housenumber(self, number):
|
||||
""" Get a housenumber token from the cache.
|
||||
"""
|
||||
return self._cached_housenumbers.get(number)
|
@ -4,10 +4,9 @@ Collection of functions that check if the database is complete and functional.
|
||||
from enum import Enum
|
||||
from textwrap import dedent
|
||||
|
||||
import psycopg2
|
||||
|
||||
from nominatim.db.connection import connect
|
||||
from nominatim.errors import UsageError
|
||||
from nominatim.tokenizer import factory as tokenizer_factory
|
||||
|
||||
CHECKLIST = []
|
||||
|
||||
@ -78,8 +77,7 @@ def check_database(config):
|
||||
|
||||
|
||||
def _get_indexes(conn):
|
||||
indexes = ['idx_word_word_id',
|
||||
'idx_place_addressline_address_place_id',
|
||||
indexes = ['idx_place_addressline_address_place_id',
|
||||
'idx_placex_rank_search',
|
||||
'idx_placex_rank_address',
|
||||
'idx_placex_parent_place_id',
|
||||
@ -149,7 +147,7 @@ def check_placex_table(conn, config):
|
||||
|
||||
|
||||
@_check(hint="""placex table has no data. Did the import finish sucessfully?""")
|
||||
def check_placex_size(conn, config): # pylint: disable=W0613
|
||||
def check_placex_size(conn, _):
|
||||
""" Checking for placex content
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
@ -158,38 +156,30 @@ def check_placex_size(conn, config): # pylint: disable=W0613
|
||||
return CheckState.OK if cnt > 0 else CheckState.FATAL
|
||||
|
||||
|
||||
@_check(hint="""\
|
||||
The Postgresql extension nominatim.so was not correctly loaded.
|
||||
|
||||
Error: {error}
|
||||
|
||||
Hints:
|
||||
* Check the output of the CMmake/make installation step
|
||||
* Does nominatim.so exist?
|
||||
* Does nominatim.so exist on the database server?
|
||||
* Can nominatim.so be accessed by the database user?
|
||||
""")
|
||||
def check_module(conn, config): # pylint: disable=W0613
|
||||
""" Checking that nominatim.so module is installed
|
||||
@_check(hint="""{msg}""")
|
||||
def check_tokenizer(_, config):
|
||||
""" Checking that tokenizer works
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
try:
|
||||
out = cur.scalar("SELECT make_standard_name('a')")
|
||||
except psycopg2.ProgrammingError as err:
|
||||
return CheckState.FAIL, dict(error=str(err))
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
|
||||
except UsageError:
|
||||
return CheckState.FAIL, dict(msg="""\
|
||||
Cannot load tokenizer. Did the import finish sucessfully?""")
|
||||
|
||||
if out != 'a':
|
||||
return CheckState.FAIL, dict(error='Unexpected result for make_standard_name()')
|
||||
result = tokenizer.check_database()
|
||||
|
||||
if result is None:
|
||||
return CheckState.OK
|
||||
|
||||
return CheckState.FAIL, dict(msg=result)
|
||||
|
||||
|
||||
@_check(hint="""\
|
||||
The indexing didn't finish. {count} entries are not yet indexed.
|
||||
|
||||
To index the remaining entries, run: {index_cmd}
|
||||
""")
|
||||
def check_indexing(conn, config): # pylint: disable=W0613
|
||||
def check_indexing(conn, _):
|
||||
""" Checking indexing status
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
@ -198,7 +188,7 @@ def check_indexing(conn, config): # pylint: disable=W0613
|
||||
if cnt == 0:
|
||||
return CheckState.OK
|
||||
|
||||
if conn.index_exists('idx_word_word_id'):
|
||||
if conn.index_exists('idx_placex_rank_search'):
|
||||
# Likely just an interrupted update.
|
||||
index_cmd = 'nominatim index'
|
||||
else:
|
||||
@ -214,7 +204,7 @@ def check_indexing(conn, config): # pylint: disable=W0613
|
||||
|
||||
Rerun the index creation with: nominatim import --continue db-postprocess
|
||||
""")
|
||||
def check_database_indexes(conn, config): # pylint: disable=W0613
|
||||
def check_database_indexes(conn, _):
|
||||
""" Checking that database indexes are complete
|
||||
"""
|
||||
missing = []
|
||||
@ -236,7 +226,7 @@ def check_database_indexes(conn, config): # pylint: disable=W0613
|
||||
Invalid indexes:
|
||||
{indexes}
|
||||
""")
|
||||
def check_database_index_valid(conn, config): # pylint: disable=W0613
|
||||
def check_database_index_valid(conn, _):
|
||||
""" Checking that all database indexes are valid
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
|
@ -5,11 +5,10 @@ import logging
|
||||
import os
|
||||
import selectors
|
||||
import subprocess
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
from nominatim.db.connection import connect, get_pg_env
|
||||
from nominatim.db import utils as db_utils
|
||||
@ -89,49 +88,6 @@ def setup_extensions(conn):
|
||||
raise UsageError('PostGIS version is too old.')
|
||||
|
||||
|
||||
def install_module(src_dir, project_dir, module_dir, conn=None):
|
||||
""" Copy the normalization module from src_dir into the project
|
||||
directory under the '/module' directory. If 'module_dir' is set, then
|
||||
use the module from there instead and check that it is accessible
|
||||
for Postgresql.
|
||||
|
||||
The function detects when the installation is run from the
|
||||
build directory. It doesn't touch the module in that case.
|
||||
|
||||
If 'conn' is given, then the function also tests if the module
|
||||
can be access via the given database.
|
||||
"""
|
||||
if not module_dir:
|
||||
module_dir = project_dir / 'module'
|
||||
|
||||
if not module_dir.exists() or not src_dir.samefile(module_dir):
|
||||
|
||||
if not module_dir.exists():
|
||||
module_dir.mkdir()
|
||||
|
||||
destfile = module_dir / 'nominatim.so'
|
||||
shutil.copy(str(src_dir / 'nominatim.so'), str(destfile))
|
||||
destfile.chmod(0o755)
|
||||
|
||||
LOG.info('Database module installed at %s', str(destfile))
|
||||
else:
|
||||
LOG.info('Running from build directory. Leaving database module as is.')
|
||||
else:
|
||||
LOG.info("Using custom path for database module at '%s'", module_dir)
|
||||
|
||||
if conn is not None:
|
||||
with conn.cursor() as cur:
|
||||
try:
|
||||
cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
|
||||
RETURNS text AS '{}/nominatim.so', 'transliteration'
|
||||
LANGUAGE c IMMUTABLE STRICT;
|
||||
DROP FUNCTION nominatim_test_import_func(text)
|
||||
""".format(module_dir))
|
||||
except psycopg2.DatabaseError as err:
|
||||
LOG.fatal("Error accessing database module: %s", err)
|
||||
raise UsageError("Database module cannot be accessed.") from err
|
||||
|
||||
|
||||
def import_base_data(dsn, sql_dir, ignore_partitions=False):
|
||||
""" Create and populate the tables with basic static data that provides
|
||||
the background for geocoding. Data is assumed to not yet exist.
|
||||
@ -205,11 +161,10 @@ def create_partition_tables(conn, config):
|
||||
sql.run_sql_file(conn, 'partition-tables.src.sql')
|
||||
|
||||
|
||||
def truncate_data_tables(conn, max_word_frequency=None):
|
||||
def truncate_data_tables(conn):
|
||||
""" Truncate all data tables to prepare for a fresh load.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('TRUNCATE word')
|
||||
cur.execute('TRUNCATE placex')
|
||||
cur.execute('TRUNCATE place_addressline')
|
||||
cur.execute('TRUNCATE location_area')
|
||||
@ -228,23 +183,13 @@ def truncate_data_tables(conn, max_word_frequency=None):
|
||||
for table in [r[0] for r in list(cur)]:
|
||||
cur.execute('TRUNCATE ' + table)
|
||||
|
||||
if max_word_frequency is not None:
|
||||
# Used by getorcreate_word_id to ignore frequent partial words.
|
||||
cur.execute("""CREATE OR REPLACE FUNCTION get_maxwordfreq()
|
||||
RETURNS integer AS $$
|
||||
SELECT {} as maxwordfreq;
|
||||
$$ LANGUAGE SQL IMMUTABLE
|
||||
""".format(max_word_frequency))
|
||||
conn.commit()
|
||||
|
||||
_COPY_COLUMNS = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry'
|
||||
|
||||
def load_data(dsn, data_dir, threads):
|
||||
def load_data(dsn, threads):
|
||||
""" Copy data into the word and placex table.
|
||||
"""
|
||||
# Pre-calculate the most important terms in the word list.
|
||||
db_utils.execute_file(dsn, data_dir / 'words.sql')
|
||||
|
||||
sel = selectors.DefaultSelector()
|
||||
# Then copy data from place to placex in <threads - 1> chunks.
|
||||
place_threads = max(1, threads - 1)
|
||||
@ -306,34 +251,37 @@ def create_search_indices(conn, config, drop=False):
|
||||
|
||||
sql.run_sql_file(conn, 'indices.sql', drop=drop)
|
||||
|
||||
def create_country_names(conn, config):
|
||||
""" Create search index for default country names.
|
||||
def create_country_names(conn, tokenizer, languages=None):
|
||||
""" Add default country names to search index. `languages` is a comma-
|
||||
separated list of language codes as used in OSM. If `languages` is not
|
||||
empty then only name translations for the given languages are added
|
||||
to the index.
|
||||
"""
|
||||
if languages:
|
||||
languages = languages.split(',')
|
||||
|
||||
def _include_key(key):
|
||||
return key == 'name' or \
|
||||
(key.startswith('name:') \
|
||||
and (not languages or key[5:] in languages))
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""SELECT getorcreate_country(make_standard_name('uk'), 'gb')""")
|
||||
cur.execute("""SELECT getorcreate_country(make_standard_name('united states'), 'us')""")
|
||||
cur.execute("""SELECT COUNT(*) FROM
|
||||
(SELECT getorcreate_country(make_standard_name(country_code),
|
||||
country_code) FROM country_name WHERE country_code is not null) AS x""")
|
||||
cur.execute("""SELECT COUNT(*) FROM
|
||||
(SELECT getorcreate_country(make_standard_name(name->'name'), country_code)
|
||||
FROM country_name WHERE name ? 'name') AS x""")
|
||||
sql_statement = """SELECT COUNT(*) FROM (SELECT getorcreate_country(make_standard_name(v),
|
||||
country_code) FROM (SELECT country_code, skeys(name)
|
||||
AS k, svals(name) AS v FROM country_name) x WHERE k"""
|
||||
psycopg2.extras.register_hstore(cur)
|
||||
cur.execute("""SELECT country_code, name FROM country_name
|
||||
WHERE country_code is not null""")
|
||||
|
||||
languages = config.LANGUAGES
|
||||
with tokenizer.name_analyzer() as analyzer:
|
||||
for code, name in cur:
|
||||
names = [code]
|
||||
if code == 'gb':
|
||||
names.append('UK')
|
||||
if code == 'us':
|
||||
names.append('United States')
|
||||
|
||||
# country names (only in languages as provided)
|
||||
if name:
|
||||
names.extend((v for k, v in name.items() if _include_key(k)))
|
||||
|
||||
analyzer.add_country_names(code, names)
|
||||
|
||||
if languages:
|
||||
sql_statement = "{} IN (".format(sql_statement)
|
||||
delim = ''
|
||||
for language in languages.split(','):
|
||||
sql_statement = "{}{}'name:{}'".format(sql_statement, delim, language)
|
||||
delim = ', '
|
||||
sql_statement = '{})'.format(sql_statement)
|
||||
else:
|
||||
sql_statement = "{} LIKE 'name:%'".format(sql_statement)
|
||||
sql_statement = "{}) v".format(sql_statement)
|
||||
cur.execute(sql_statement)
|
||||
conn.commit()
|
||||
|
@ -6,7 +6,8 @@ import logging
|
||||
from nominatim.db import properties
|
||||
from nominatim.db.connection import connect
|
||||
from nominatim.version import NOMINATIM_VERSION
|
||||
from nominatim.tools import refresh, database_import
|
||||
from nominatim.tools import refresh
|
||||
from nominatim.tokenizer import factory as tokenizer_factory
|
||||
from nominatim.errors import UsageError
|
||||
|
||||
LOG = logging.getLogger()
|
||||
@ -43,11 +44,14 @@ def migrate(config, paths):
|
||||
'{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(version))
|
||||
kwargs = dict(conn=conn, config=config, paths=paths)
|
||||
func(**kwargs)
|
||||
conn.commit()
|
||||
has_run_migration = True
|
||||
|
||||
if has_run_migration:
|
||||
LOG.warning('Updating SQL functions.')
|
||||
refresh.create_functions(conn, config)
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
|
||||
tokenizer.update_sql_functions(config)
|
||||
|
||||
properties.set_property(conn, 'database_version',
|
||||
'{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
|
||||
@ -108,17 +112,6 @@ def import_status_timestamp_change(conn, **_):
|
||||
TYPE timestamp with time zone;""")
|
||||
|
||||
|
||||
@_migration(3, 5, 0, 99)
|
||||
def install_database_module_in_project_directory(conn, config, paths, **_):
|
||||
""" Install database module in project directory.
|
||||
|
||||
The database module needs to be present in the project directory
|
||||
since those were introduced.
|
||||
"""
|
||||
database_import.install_module(paths.module_dir, paths.project_dir,
|
||||
config.DATABASE_MODULE_PATH, conn=conn)
|
||||
|
||||
|
||||
@_migration(3, 5, 0, 99)
|
||||
def add_nominatim_property_table(conn, config, **_):
|
||||
""" Add nominatim_property table.
|
||||
@ -137,6 +130,9 @@ def change_housenumber_transliteration(conn, **_):
|
||||
|
||||
The database schema switched from saving raw housenumbers in
|
||||
placex.housenumber to saving transliterated ones.
|
||||
|
||||
Note: the function create_housenumber_id() has been dropped in later
|
||||
versions.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT)
|
||||
@ -173,3 +169,25 @@ def switch_placenode_geometry_index(conn, **_):
|
||||
and class = 'place' and type != 'postcode'
|
||||
and linked_place_id is null""")
|
||||
cur.execute(""" DROP INDEX IF EXISTS idx_placex_adminname """)
|
||||
|
||||
|
||||
@_migration(3, 7, 0, 1)
|
||||
def install_legacy_tokenizer(conn, config, **_):
|
||||
""" Setup legacy tokenizer.
|
||||
|
||||
If no other tokenizer has been configured yet, then create the
|
||||
configuration for the backwards-compatible legacy tokenizer
|
||||
"""
|
||||
if properties.get_property(conn, 'tokenizer') is None:
|
||||
with conn.cursor() as cur:
|
||||
for table in ('placex', 'location_property_osmline'):
|
||||
has_column = cur.scalar("""SELECT count(*) FROM information_schema.columns
|
||||
WHERE table_name = %s
|
||||
and column_name = 'token_info'""",
|
||||
(table, ))
|
||||
if has_column == 0:
|
||||
cur.execute('ALTER TABLE {} ADD COLUMN token_info JSONB'.format(table))
|
||||
tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False,
|
||||
module_name='legacy')
|
||||
|
||||
tokenizer.migrate_database(config)
|
||||
|
@ -6,7 +6,7 @@ of artificial postcode centroids.
|
||||
from nominatim.db.utils import execute_file
|
||||
from nominatim.db.connection import connect
|
||||
|
||||
def import_postcodes(dsn, project_dir):
|
||||
def import_postcodes(dsn, project_dir, tokenizer):
|
||||
""" Set up the initial list of postcodes.
|
||||
"""
|
||||
|
||||
@ -41,10 +41,11 @@ def import_postcodes(dsn, project_dir):
|
||||
INSERT INTO location_postcode
|
||||
(place_id, indexed_status, country_code, postcode, geometry)
|
||||
SELECT nextval('seq_place'), 1, country_code,
|
||||
upper(trim (both ' ' from address->'postcode')) as pc,
|
||||
token_normalized_postcode(address->'postcode') as pc,
|
||||
ST_Centroid(ST_Collect(ST_Centroid(geometry)))
|
||||
FROM placex
|
||||
WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'
|
||||
WHERE address ? 'postcode'
|
||||
and token_normalized_postcode(address->'postcode') is not null
|
||||
AND geometry IS NOT null
|
||||
GROUP BY country_code, pc
|
||||
""")
|
||||
@ -52,9 +53,10 @@ def import_postcodes(dsn, project_dir):
|
||||
cur.execute("""
|
||||
INSERT INTO location_postcode
|
||||
(place_id, indexed_status, country_code, postcode, geometry)
|
||||
SELECT nextval('seq_place'), 1, 'us', postcode,
|
||||
SELECT nextval('seq_place'), 1, 'us',
|
||||
token_normalized_postcode(postcode),
|
||||
ST_SetSRID(ST_Point(x,y),4326)
|
||||
FROM us_postcode WHERE postcode NOT IN
|
||||
FROM us_postcode WHERE token_normalized_postcode(postcode) NOT IN
|
||||
(SELECT postcode FROM location_postcode
|
||||
WHERE country_code = 'us')
|
||||
""")
|
||||
@ -62,8 +64,9 @@ def import_postcodes(dsn, project_dir):
|
||||
cur.execute("""
|
||||
INSERT INTO location_postcode
|
||||
(place_id, indexed_status, country_code, postcode, geometry)
|
||||
SELECT nextval('seq_place'), 1, 'gb', postcode, geometry
|
||||
FROM gb_postcode WHERE postcode NOT IN
|
||||
SELECT nextval('seq_place'), 1, 'gb',
|
||||
token_normalized_postcode(postcode), geometry
|
||||
FROM gb_postcode WHERE token_normalized_postcode(postcode) NOT IN
|
||||
(SELECT postcode FROM location_postcode
|
||||
WHERE country_code = 'gb')
|
||||
""")
|
||||
@ -72,9 +75,7 @@ def import_postcodes(dsn, project_dir):
|
||||
DELETE FROM word WHERE class='place' and type='postcode'
|
||||
and word NOT IN (SELECT postcode FROM location_postcode)
|
||||
""")
|
||||
|
||||
cur.execute("""
|
||||
SELECT count(getorcreate_postcode_id(v)) FROM
|
||||
(SELECT distinct(postcode) as v FROM location_postcode) p
|
||||
""")
|
||||
conn.commit()
|
||||
|
||||
with tokenizer.name_analyzer() as analyzer:
|
||||
analyzer.add_postcodes_from_db()
|
||||
|
@ -104,13 +104,11 @@ PHP_CONST_DEFS = (
|
||||
('Default_Language', 'DEFAULT_LANGUAGE', str),
|
||||
('Log_DB', 'LOG_DB', bool),
|
||||
('Log_File', 'LOG_FILE', str),
|
||||
('Max_Word_Frequency', 'MAX_WORD_FREQUENCY', int),
|
||||
('NoAccessControl', 'CORS_NOACCESSCONTROL', bool),
|
||||
('Places_Max_ID_count', 'LOOKUP_MAX_COUNT', int),
|
||||
('PolygonOutput_MaximumTypes', 'POLYGON_OUTPUT_MAX_TYPES', int),
|
||||
('Search_BatchMode', 'SEARCH_BATCH_MODE', bool),
|
||||
('Search_NameOnlySearchFrequencyThreshold', 'SEARCH_NAME_ONLY_THRESHOLD', str),
|
||||
('Term_Normalization_Rules', 'TERM_NORMALIZATION', str),
|
||||
('Use_US_Tiger_Data', 'USE_US_TIGER_DATA', bool),
|
||||
('MapIcon_URL', 'MAPICON_URL', str),
|
||||
)
|
||||
@ -175,9 +173,11 @@ def setup_website(basedir, config):
|
||||
|
||||
@define('CONST_Debug', $_GET['debug'] ?? false);
|
||||
@define('CONST_LibDir', '{0}');
|
||||
@define('CONST_TokenizerDir', '{2}');
|
||||
@define('CONST_NominatimVersion', '{1[0]}.{1[1]}.{1[2]}-{1[3]}');
|
||||
|
||||
""".format(config.lib_dir.php, NOMINATIM_VERSION))
|
||||
""".format(config.lib_dir.php, NOMINATIM_VERSION,
|
||||
config.project_dir / 'tokenizer'))
|
||||
|
||||
for php_name, conf_name, var_type in PHP_CONST_DEFS:
|
||||
if var_type == bool:
|
||||
|
@ -24,9 +24,6 @@ class SpecialPhrasesImporterStatistics():
|
||||
self.tables_deleted = 0
|
||||
self.tables_ignored = 0
|
||||
self.global_phrases_invalid = 0
|
||||
self.global_phrases_added = 0
|
||||
self.global_phrases_ignored = 0
|
||||
self.global_phrases_deleted = 0
|
||||
|
||||
def _set_lang_values_to_0(self):
|
||||
"""
|
||||
@ -34,8 +31,6 @@ class SpecialPhrasesImporterStatistics():
|
||||
lang to 0.
|
||||
"""
|
||||
self.lang_phrases_invalid = 0
|
||||
self.lang_phrases_added = 0
|
||||
self.lang_phrases_ignored = 0
|
||||
|
||||
def notify_one_phrase_invalid(self):
|
||||
"""
|
||||
@ -45,29 +40,6 @@ class SpecialPhrasesImporterStatistics():
|
||||
self.lang_phrases_invalid += 1
|
||||
self.global_phrases_invalid += 1
|
||||
|
||||
def notify_one_phrase_added(self):
|
||||
"""
|
||||
Add +1 to the count of entries
|
||||
added to the db.
|
||||
"""
|
||||
self.lang_phrases_added += 1
|
||||
self.global_phrases_added += 1
|
||||
|
||||
def notify_one_phrase_ignored(self):
|
||||
"""
|
||||
Add +1 to the count of ignored
|
||||
entries as it was already in the db.
|
||||
"""
|
||||
self.lang_phrases_ignored += 1
|
||||
self.global_phrases_ignored += 1
|
||||
|
||||
def notify_one_phrase_deleted(self):
|
||||
"""
|
||||
Add +1 to the count of phrases deleted
|
||||
from the database.
|
||||
"""
|
||||
self.global_phrases_deleted += 1
|
||||
|
||||
def notify_one_table_created(self):
|
||||
"""
|
||||
Add +1 to the count of created tables.
|
||||
@ -97,12 +69,6 @@ class SpecialPhrasesImporterStatistics():
|
||||
LOG.info('- %s phrases were invalid.', self.global_phrases_invalid)
|
||||
if self.global_phrases_invalid > 0:
|
||||
LOG.info(' Those invalid phrases have been skipped.')
|
||||
LOG.info('- %s phrases were ignored as they are already in the database',
|
||||
self.global_phrases_ignored)
|
||||
LOG.info('- %s phrases were added to the database', self.global_phrases_added)
|
||||
LOG.info('- %s phrases were deleted from the database', self.global_phrases_deleted)
|
||||
if self.global_phrases_deleted > 0:
|
||||
LOG.info(' They were deleted as they are not valid anymore.')
|
||||
LOG.info('- %s tables were ignored as they already exist on the database',
|
||||
self.tables_ignored)
|
||||
LOG.info('- %s tables were created', self.tables_created)
|
||||
@ -126,9 +92,6 @@ class SpecialPhrasesImporterStatistics():
|
||||
LOG.info('- %s phrases were invalid.', self.lang_phrases_invalid)
|
||||
if self.lang_phrases_invalid > 0:
|
||||
LOG.info(' Those invalid phrases have been skipped.')
|
||||
LOG.info('- %s phrases were ignored as they are already in the database',
|
||||
self.lang_phrases_ignored)
|
||||
LOG.info('- %s phrases were added to the database', self.lang_phrases_added)
|
||||
LOG.info('====================================================================')
|
||||
|
||||
if self.lang_phrases_invalid > 0:
|
||||
|
@ -9,7 +9,6 @@ import re
|
||||
import subprocess
|
||||
import json
|
||||
|
||||
from icu import Transliterator
|
||||
from psycopg2.sql import Identifier, Literal, SQL
|
||||
|
||||
from nominatim.tools.exec_utils import get_url
|
||||
@ -33,21 +32,14 @@ class SpecialPhrasesImporter():
|
||||
r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])'
|
||||
)
|
||||
self.sanity_check_pattern = re.compile(r'^\w+$')
|
||||
self.transliterator = Transliterator.createFromRules("special-phrases normalizer",
|
||||
self.config.TERM_NORMALIZATION)
|
||||
#This set will contain all existing phrases from the word table which
|
||||
#no longer exist on the wiki.
|
||||
#It contain tuples with the following format: (normalized_word, class, type, operator)
|
||||
self.words_phrases_to_delete = set()
|
||||
#This set will contain the phrases which still exist from the wiki.
|
||||
#It is used to prevent duplicates on the wiki by removing them from
|
||||
#the word_phrases_to_delete only at the end.
|
||||
self.words_phrases_still_exist = set()
|
||||
# This set will contain all existing phrases to be added.
|
||||
# It contains tuples with the following format: (lable, class, type, operator)
|
||||
self.word_phrases = set()
|
||||
#This set will contain all existing place_classtype tables which doesn't match any
|
||||
#special phrases class/type on the wiki.
|
||||
self.table_phrases_to_delete = set()
|
||||
|
||||
def import_from_wiki(self, languages=None):
|
||||
def import_from_wiki(self, tokenizer, languages=None):
|
||||
"""
|
||||
Iterate through all specified languages and
|
||||
extract corresponding special phrases from the wiki.
|
||||
@ -55,7 +47,6 @@ class SpecialPhrasesImporter():
|
||||
if languages is not None and not isinstance(languages, list):
|
||||
raise TypeError('The \'languages\' argument should be of type list.')
|
||||
|
||||
self._fetch_existing_words_phrases()
|
||||
self._fetch_existing_place_classtype_tables()
|
||||
|
||||
#Get all languages to process.
|
||||
@ -71,30 +62,15 @@ class SpecialPhrasesImporter():
|
||||
self.statistics_handler.notify_current_lang_done(lang)
|
||||
|
||||
self._create_place_classtype_table_and_indexes(class_type_pairs)
|
||||
self._remove_non_existent_phrases_from_db()
|
||||
self._remove_non_existent_tables_from_db()
|
||||
self.db_connection.commit()
|
||||
|
||||
with tokenizer.name_analyzer() as analyzer:
|
||||
analyzer.update_special_phrases(self.word_phrases)
|
||||
|
||||
LOG.warning('Import done.')
|
||||
self.statistics_handler.notify_import_done()
|
||||
|
||||
def _fetch_existing_words_phrases(self):
|
||||
"""
|
||||
Fetch existing special phrases from the word table.
|
||||
Fill the word_phrases_to_delete set of the class.
|
||||
"""
|
||||
#Only extract special phrases terms:
|
||||
#If class=place and type=house then it is a housenumber term.
|
||||
#If class=place and type=postcode then it is a postcode term.
|
||||
word_query = """
|
||||
SELECT word, class, type, operator FROM word
|
||||
WHERE class != 'place' OR (type != 'house' AND type != 'postcode')
|
||||
"""
|
||||
with self.db_connection.cursor() as db_cursor:
|
||||
db_cursor.execute(SQL(word_query))
|
||||
for row in db_cursor:
|
||||
row[3] = '-' if row[3] is None else row[3]
|
||||
self.words_phrases_to_delete.add(
|
||||
(row[0], row[1], row[2], row[3])
|
||||
)
|
||||
|
||||
def _fetch_existing_place_classtype_tables(self):
|
||||
"""
|
||||
@ -176,7 +152,6 @@ class SpecialPhrasesImporter():
|
||||
|
||||
for match in matches:
|
||||
phrase_label = match[0].strip()
|
||||
normalized_label = self.transliterator.transliterate(phrase_label)
|
||||
phrase_class = match[1].strip()
|
||||
phrase_type = match[2].strip()
|
||||
phrase_operator = match[3].strip()
|
||||
@ -198,20 +173,6 @@ class SpecialPhrasesImporter():
|
||||
):
|
||||
continue
|
||||
|
||||
#Check if the phrase already exists in the database.
|
||||
if (
|
||||
(normalized_label, phrase_class, phrase_type, phrase_operator)
|
||||
in self.words_phrases_to_delete
|
||||
):
|
||||
#Remove this phrase from the ones to delete as it still exist on the wiki.
|
||||
self.words_phrases_still_exist.add(
|
||||
(normalized_label, phrase_class, phrase_type, phrase_operator)
|
||||
)
|
||||
class_type_pairs.add((phrase_class, phrase_type))
|
||||
self.statistics_handler.notify_one_phrase_ignored()
|
||||
#Dont need to add this phrase as it already exists in the word table.
|
||||
continue
|
||||
|
||||
#sanity check, in case somebody added garbage in the wiki
|
||||
if not self._check_sanity(lang, phrase_class, phrase_type):
|
||||
self.statistics_handler.notify_one_phrase_invalid()
|
||||
@ -219,35 +180,11 @@ class SpecialPhrasesImporter():
|
||||
|
||||
class_type_pairs.add((phrase_class, phrase_type))
|
||||
|
||||
self._process_amenity(
|
||||
phrase_label, normalized_label, phrase_class,
|
||||
phrase_type, phrase_operator
|
||||
)
|
||||
self.statistics_handler.notify_one_phrase_added()
|
||||
self.word_phrases.add((phrase_label, phrase_class,
|
||||
phrase_type, phrase_operator))
|
||||
|
||||
return class_type_pairs
|
||||
|
||||
def _process_amenity(self, phrase_label, normalized_label,
|
||||
phrase_class, phrase_type, phrase_operator):
|
||||
# pylint: disable-msg=too-many-arguments
|
||||
"""
|
||||
Add phrase lookup and corresponding class and
|
||||
type to the word table based on the operator.
|
||||
"""
|
||||
with self.db_connection.cursor() as db_cursor:
|
||||
if phrase_operator == 'near':
|
||||
db_cursor.execute("""SELECT getorcreate_amenityoperator(
|
||||
make_standard_name(%s), %s, %s, %s, 'near')""",
|
||||
(phrase_label, normalized_label, phrase_class, phrase_type))
|
||||
elif phrase_operator == 'in':
|
||||
db_cursor.execute("""SELECT getorcreate_amenityoperator(
|
||||
make_standard_name(%s), %s, %s, %s, 'in')""",
|
||||
(phrase_label, normalized_label, phrase_class, phrase_type))
|
||||
else:
|
||||
db_cursor.execute("""SELECT getorcreate_amenity(
|
||||
make_standard_name(%s), %s, %s, %s)""",
|
||||
(phrase_label, normalized_label, phrase_class, phrase_type))
|
||||
|
||||
|
||||
def _create_place_classtype_table_and_indexes(self, class_type_pairs):
|
||||
"""
|
||||
@ -339,33 +276,15 @@ class SpecialPhrasesImporter():
|
||||
.format(Identifier(table_name),
|
||||
Identifier(self.config.DATABASE_WEBUSER)))
|
||||
|
||||
def _remove_non_existent_phrases_from_db(self):
|
||||
def _remove_non_existent_tables_from_db(self):
|
||||
"""
|
||||
Remove special phrases which doesn't exist on the wiki anymore.
|
||||
Delete from the word table and delete the place_classtype tables.
|
||||
Delete the place_classtype tables.
|
||||
"""
|
||||
LOG.warning('Cleaning database...')
|
||||
self.words_phrases_to_delete = self.words_phrases_to_delete - self.words_phrases_still_exist
|
||||
#Array containing all queries to execute. Contain tuples of format (query, parameters)
|
||||
queries_parameters = []
|
||||
|
||||
#Delete phrases from the word table which are not on the wiki anymore.
|
||||
for phrase_to_delete in self.words_phrases_to_delete:
|
||||
self.statistics_handler.notify_one_phrase_deleted()
|
||||
if phrase_to_delete[3] == '-':
|
||||
query = """
|
||||
DELETE FROM word WHERE word = %s AND class = %s AND type = %s AND operator IS null
|
||||
"""
|
||||
parameters = (phrase_to_delete[0], phrase_to_delete[1], phrase_to_delete[2], )
|
||||
queries_parameters.append((query, parameters))
|
||||
else:
|
||||
query = """
|
||||
DELETE FROM word WHERE word = %s AND class = %s AND type = %s AND operator = %s
|
||||
"""
|
||||
parameters = (phrase_to_delete[0], phrase_to_delete[1],
|
||||
phrase_to_delete[2], phrase_to_delete[3], )
|
||||
queries_parameters.append((query, parameters))
|
||||
|
||||
#Delete place_classtype tables corresponding to class/type which are not on the wiki anymore
|
||||
for table in self.table_phrases_to_delete:
|
||||
self.statistics_handler.notify_one_table_deleted()
|
||||
|
@ -10,7 +10,7 @@ Version information for Nominatim.
|
||||
# and must always be increased when there is a change to the database or code
|
||||
# that requires a migration.
|
||||
# Released versions always have a database patch level of 0.
|
||||
NOMINATIM_VERSION = (3, 7, 0, 1)
|
||||
NOMINATIM_VERSION = (3, 7, 0, 2)
|
||||
|
||||
POSTGRESQL_REQUIRED_VERSION = (9, 3)
|
||||
POSTGIS_REQUIRED_VERSION = (2, 2)
|
||||
|
@ -18,6 +18,12 @@ NOMINATIM_DATABASE_WEBUSER="www-data"
|
||||
# Changing this value requires to run 'nominatim refresh --functions'.
|
||||
NOMINATIM_DATABASE_MODULE_PATH=
|
||||
|
||||
# Tokenizer used for normalizing and parsing queries and names.
|
||||
# The tokenizer is set up during import and cannot be changed afterwards
|
||||
# without a reimport.
|
||||
# Currently available tokenizers: legacy
|
||||
NOMINATIM_TOKENIZER="legacy"
|
||||
|
||||
# Number of occurances of a word before it is considered frequent.
|
||||
# Similar to the concept of stop words. Frequent partial words get ignored
|
||||
# or handled differently during search.
|
||||
|
@ -24,7 +24,7 @@ Feature: Creation of search terms
|
||||
When importing
|
||||
Then search_name contains
|
||||
| object | nameaddress_vector |
|
||||
| N1 | Rose, Street, Walltown |
|
||||
| N1 | #Rose Street, Walltown |
|
||||
When searching for "23 Rose Street, Walltown"
|
||||
Then results contain
|
||||
| osm_type | osm_id | name |
|
||||
@ -248,7 +248,7 @@ Feature: Creation of search terms
|
||||
When importing
|
||||
Then search_name contains
|
||||
| object | name_vector | nameaddress_vector |
|
||||
| N1 | #Green Moss | Rose, Street, Walltown |
|
||||
| N1 | #Green Moss | #Rose Street, Walltown |
|
||||
When searching for "Green Moss, Rose Street, Walltown"
|
||||
Then results contain
|
||||
| osm_type | osm_id | name |
|
||||
@ -299,7 +299,7 @@ Feature: Creation of search terms
|
||||
When importing
|
||||
Then search_name contains
|
||||
| object | name_vector | nameaddress_vector |
|
||||
| N1 | foo | the road |
|
||||
| N1 | foo | #the road |
|
||||
|
||||
Scenario: Some addr: tags are added to address
|
||||
Given the scene roads-with-pois
|
||||
|
@ -10,6 +10,7 @@ sys.path.insert(1, str((Path(__file__) / '..' / '..' / '..' / '..').resolve()))
|
||||
from nominatim import cli
|
||||
from nominatim.config import Configuration
|
||||
from nominatim.tools import refresh
|
||||
from nominatim.tokenizer import factory as tokenizer_factory
|
||||
from steps.utils import run_script
|
||||
|
||||
class NominatimEnvironment:
|
||||
@ -106,9 +107,19 @@ class NominatimEnvironment:
|
||||
self.website_dir.cleanup()
|
||||
|
||||
self.website_dir = tempfile.TemporaryDirectory()
|
||||
cfg = Configuration(None, self.src_dir / 'settings', environ=self.test_env)
|
||||
cfg.lib_dir.php = self.src_dir / 'lib-php'
|
||||
refresh.setup_website(Path(self.website_dir.name) / 'website', cfg)
|
||||
refresh.setup_website(Path(self.website_dir.name) / 'website',
|
||||
self.get_test_config())
|
||||
|
||||
|
||||
def get_test_config(self):
|
||||
cfg = Configuration(Path(self.website_dir.name), self.src_dir / 'settings',
|
||||
environ=self.test_env)
|
||||
cfg.set_libdirs(module=self.build_dir / 'module',
|
||||
osm2pgsql=self.build_dir / 'osm2pgsql' / 'osm2pgsql',
|
||||
php=self.src_dir / 'lib-php',
|
||||
sql=self.src_dir / 'lib-sql',
|
||||
data=self.src_dir / 'data')
|
||||
return cfg
|
||||
|
||||
def get_libpq_dsn(self):
|
||||
dsn = self.test_env['NOMINATIM_DATABASE_DSN']
|
||||
@ -169,14 +180,10 @@ class NominatimEnvironment:
|
||||
"""
|
||||
self.write_nominatim_config(self.api_test_db)
|
||||
|
||||
if self.api_db_done:
|
||||
return
|
||||
|
||||
if not self.api_db_done:
|
||||
self.api_db_done = True
|
||||
|
||||
if self._reuse_or_drop_db(self.api_test_db):
|
||||
return
|
||||
|
||||
if not self._reuse_or_drop_db(self.api_test_db):
|
||||
testdata = Path('__file__') / '..' / '..' / 'testdb'
|
||||
self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve())
|
||||
|
||||
@ -191,11 +198,23 @@ class NominatimEnvironment:
|
||||
self.db_drop_database(self.api_test_db)
|
||||
raise
|
||||
|
||||
tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False)
|
||||
|
||||
|
||||
def setup_unknown_db(self):
|
||||
""" Setup a test against a non-existing database.
|
||||
"""
|
||||
self.write_nominatim_config('UNKNOWN_DATABASE_NAME')
|
||||
# The tokenizer needs an existing database to function.
|
||||
# So start with the usual database
|
||||
class _Context:
|
||||
db = None
|
||||
|
||||
context = _Context()
|
||||
self.setup_db(context)
|
||||
tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False)
|
||||
|
||||
# Then drop the DB again
|
||||
self.teardown_db(context, force_drop=True)
|
||||
|
||||
def setup_db(self, context):
|
||||
""" Setup a test against a fresh, empty test database.
|
||||
@ -212,13 +231,13 @@ class NominatimEnvironment:
|
||||
context.db.autocommit = True
|
||||
psycopg2.extras.register_hstore(context.db, globally=False)
|
||||
|
||||
def teardown_db(self, context):
|
||||
def teardown_db(self, context, force_drop=False):
|
||||
""" Remove the test database, if it exists.
|
||||
"""
|
||||
if 'db' in context:
|
||||
if hasattr(context, 'db'):
|
||||
context.db.close()
|
||||
|
||||
if not self.keep_scenario_db:
|
||||
if force_drop or not self.keep_scenario_db:
|
||||
self.db_drop_database(self.test_db)
|
||||
|
||||
def _reuse_or_drop_db(self, name):
|
||||
|
@ -7,6 +7,7 @@ from place_inserter import PlaceColumn
|
||||
from table_compare import NominatimID, DBRow
|
||||
|
||||
from nominatim.indexer import indexer
|
||||
from nominatim.tokenizer import factory as tokenizer_factory
|
||||
|
||||
def check_database_integrity(context):
|
||||
""" Check some generic constraints on the tables.
|
||||
@ -86,6 +87,9 @@ def add_data_to_planet_ways(context):
|
||||
def import_and_index_data_from_place_table(context):
|
||||
""" Import data previously set up in the place table.
|
||||
"""
|
||||
nctx = context.nominatim
|
||||
|
||||
tokenizer = tokenizer_factory.create_tokenizer(nctx.get_test_config())
|
||||
context.nominatim.copy_from_place(context.db)
|
||||
|
||||
# XXX use tool function as soon as it is ported
|
||||
@ -105,7 +109,7 @@ def import_and_index_data_from_place_table(context):
|
||||
|
||||
# Call directly as the refresh function does not include postcodes.
|
||||
indexer.LOG.setLevel(logging.ERROR)
|
||||
indexer.Indexer(context.nominatim.get_libpq_dsn(), 1).index_full(analyse=False)
|
||||
indexer.Indexer(context.nominatim.get_libpq_dsn(), tokenizer, 1).index_full(analyse=False)
|
||||
|
||||
check_database_integrity(context)
|
||||
|
||||
@ -230,7 +234,7 @@ def check_search_name_contents(context, exclude):
|
||||
if exclude:
|
||||
assert not present, "Found term for {}/{}: {}".format(row['object'], name, wid[1])
|
||||
else:
|
||||
assert present, "Missing term for {}/{}: {}".fromat(row['object'], name, wid[1])
|
||||
assert present, "Missing term for {}/{}: {}".format(row['object'], name, wid[1])
|
||||
elif name != 'object':
|
||||
assert db_row.contains(name, value), db_row.assert_msg(name, value)
|
||||
|
||||
|
@ -44,19 +44,16 @@ class PhraseTest extends \PHPUnit\Framework\TestCase
|
||||
public function testEmptyPhrase()
|
||||
{
|
||||
$oPhrase = new Phrase('', '');
|
||||
$oPhrase->computeWordSets(new TokensFullSet());
|
||||
$oPhrase->computeWordSets(array(), new TokensFullSet());
|
||||
|
||||
$this->assertEquals(
|
||||
array(array('')),
|
||||
$oPhrase->getWordSets()
|
||||
);
|
||||
$this->assertNull($oPhrase->getWordSets());
|
||||
}
|
||||
|
||||
|
||||
public function testSingleWordPhrase()
|
||||
{
|
||||
$oPhrase = new Phrase('a', '');
|
||||
$oPhrase->computeWordSets(new TokensFullSet());
|
||||
$oPhrase->computeWordSets(array('a'), new TokensFullSet());
|
||||
|
||||
$this->assertEquals(
|
||||
'(a)',
|
||||
@ -68,21 +65,21 @@ class PhraseTest extends \PHPUnit\Framework\TestCase
|
||||
public function testMultiWordPhrase()
|
||||
{
|
||||
$oPhrase = new Phrase('a b', '');
|
||||
$oPhrase->computeWordSets(new TokensFullSet());
|
||||
$oPhrase->computeWordSets(array('a', 'b'), new TokensFullSet());
|
||||
$this->assertEquals(
|
||||
'(a b),(a|b)',
|
||||
$this->serializeSets($oPhrase->getWordSets())
|
||||
);
|
||||
|
||||
$oPhrase = new Phrase('a b c', '');
|
||||
$oPhrase->computeWordSets(new TokensFullSet());
|
||||
$oPhrase->computeWordSets(array('a', 'b', 'c'), new TokensFullSet());
|
||||
$this->assertEquals(
|
||||
'(a b c),(a|b c),(a b|c),(a|b|c)',
|
||||
$this->serializeSets($oPhrase->getWordSets())
|
||||
);
|
||||
|
||||
$oPhrase = new Phrase('a b c d', '');
|
||||
$oPhrase->computeWordSets(new TokensFullSet());
|
||||
$oPhrase->computeWordSets(array('a', 'b', 'c', 'd'), new TokensFullSet());
|
||||
$this->assertEquals(
|
||||
'(a b c d),(a b c|d),(a b|c d),(a|b c d),(a b|c|d),(a|b c|d),(a|b|c d),(a|b|c|d)',
|
||||
$this->serializeSets($oPhrase->getWordSets())
|
||||
@ -93,7 +90,7 @@ class PhraseTest extends \PHPUnit\Framework\TestCase
|
||||
public function testInverseWordSets()
|
||||
{
|
||||
$oPhrase = new Phrase('a b c', '');
|
||||
$oPhrase->computeWordSets(new TokensFullSet());
|
||||
$oPhrase->computeWordSets(array('a', 'b', 'c'), new TokensFullSet());
|
||||
$oPhrase->invertWordSets();
|
||||
|
||||
$this->assertEquals(
|
||||
@ -105,14 +102,16 @@ class PhraseTest extends \PHPUnit\Framework\TestCase
|
||||
|
||||
public function testMaxWordSets()
|
||||
{
|
||||
$oPhrase = new Phrase(join(' ', array_fill(0, 4, 'a')), '');
|
||||
$oPhrase->computeWordSets(new TokensFullSet());
|
||||
$aWords = array_fill(0, 4, 'a');
|
||||
$oPhrase = new Phrase(join(' ', $aWords), '');
|
||||
$oPhrase->computeWordSets($aWords, new TokensFullSet());
|
||||
$this->assertEquals(8, count($oPhrase->getWordSets()));
|
||||
$oPhrase->invertWordSets();
|
||||
$this->assertEquals(8, count($oPhrase->getWordSets()));
|
||||
|
||||
$oPhrase = new Phrase(join(' ', array_fill(0, 18, 'a')), '');
|
||||
$oPhrase->computeWordSets(new TokensFullSet());
|
||||
$aWords = array_fill(0, 18, 'a');
|
||||
$oPhrase = new Phrase(join(' ', $aWords), '');
|
||||
$oPhrase->computeWordSets($aWords, new TokensFullSet());
|
||||
$this->assertEquals(100, count($oPhrase->getWordSets()));
|
||||
$oPhrase->invertWordSets();
|
||||
$this->assertEquals(100, count($oPhrase->getWordSets()));
|
||||
@ -122,7 +121,7 @@ class PhraseTest extends \PHPUnit\Framework\TestCase
|
||||
public function testPartialTokensShortTerm()
|
||||
{
|
||||
$oPhrase = new Phrase('a b c d', '');
|
||||
$oPhrase->computeWordSets(new TokensPartialSet(array('a', 'b', 'd', 'b c', 'b c d')));
|
||||
$oPhrase->computeWordSets(array('a', 'b', 'c', 'd'), new TokensPartialSet(array('a', 'b', 'd', 'b c', 'b c d')));
|
||||
$this->assertEquals(
|
||||
'(a|b c d),(a|b c|d)',
|
||||
$this->serializeSets($oPhrase->getWordSets())
|
||||
@ -132,8 +131,9 @@ class PhraseTest extends \PHPUnit\Framework\TestCase
|
||||
|
||||
public function testPartialTokensLongTerm()
|
||||
{
|
||||
$oPhrase = new Phrase(join(' ', array_fill(0, 18, 'a')), '');
|
||||
$oPhrase->computeWordSets(new TokensPartialSet(array('a', 'a a a a a')));
|
||||
$aWords = array_fill(0, 18, 'a');
|
||||
$oPhrase = new Phrase(join(' ', $aWords), '');
|
||||
$oPhrase->computeWordSets($aWords, new TokensPartialSet(array('a', 'a a a a a')));
|
||||
$this->assertEquals(80, count($oPhrase->getWordSets()));
|
||||
}
|
||||
}
|
||||
|
@ -2,6 +2,8 @@
|
||||
|
||||
namespace Nominatim;
|
||||
|
||||
@define('CONST_TokenizerDir', dirname(__FILE__));
|
||||
|
||||
require_once(CONST_LibDir.'/DB.php');
|
||||
require_once(CONST_LibDir.'/Status.php');
|
||||
|
||||
@ -40,45 +42,6 @@ class StatusTest extends \PHPUnit\Framework\TestCase
|
||||
$this->assertEquals('No database', $oStatus->status());
|
||||
}
|
||||
|
||||
|
||||
public function testModuleFail()
|
||||
{
|
||||
$this->expectException(\Exception::class);
|
||||
$this->expectExceptionMessage('Module call failed');
|
||||
$this->expectExceptionCode(702);
|
||||
|
||||
// stub has getOne method but doesn't return anything
|
||||
$oDbStub = $this->getMockBuilder(Nominatim\DB::class)
|
||||
->setMethods(array('connect', 'getOne'))
|
||||
->getMock();
|
||||
|
||||
$oStatus = new Status($oDbStub);
|
||||
$this->assertNull($oStatus->status());
|
||||
}
|
||||
|
||||
|
||||
public function testWordIdQueryFail()
|
||||
{
|
||||
$this->expectException(\Exception::class);
|
||||
$this->expectExceptionMessage('No value');
|
||||
$this->expectExceptionCode(704);
|
||||
|
||||
$oDbStub = $this->getMockBuilder(Nominatim\DB::class)
|
||||
->setMethods(array('connect', 'getOne'))
|
||||
->getMock();
|
||||
|
||||
// return no word_id
|
||||
$oDbStub->method('getOne')
|
||||
->will($this->returnCallback(function ($sql) {
|
||||
if (preg_match("/make_standard_name\('a'\)/", $sql)) return 'a';
|
||||
if (preg_match('/SELECT word_id, word_token/', $sql)) return null;
|
||||
}));
|
||||
|
||||
$oStatus = new Status($oDbStub);
|
||||
$this->assertNull($oStatus->status());
|
||||
}
|
||||
|
||||
|
||||
public function testOK()
|
||||
{
|
||||
$oDbStub = $this->getMockBuilder(Nominatim\DB::class)
|
||||
|
@ -49,88 +49,4 @@ class TokenTest extends \PHPUnit\Framework\TestCase
|
||||
$this->assertFalse($TL->contains('unknownword'));
|
||||
$this->assertEquals(array(), $TL->get('unknownword'));
|
||||
}
|
||||
|
||||
public function testAddress()
|
||||
{
|
||||
$this->expectOutputRegex('/<p><tt>/');
|
||||
|
||||
$oDbStub = $this->getMockBuilder(Nominatim\DB::class)
|
||||
->setMethods(array('getAll', 'getDBQuotedList'))
|
||||
->getMock();
|
||||
|
||||
$oDbStub->method('getDBQuotedList')
|
||||
->will($this->returnCallback(function ($aVals) {
|
||||
return array_map(function ($sVal) {
|
||||
return "'".$sVal."'";
|
||||
}, $aVals);
|
||||
}));
|
||||
|
||||
|
||||
$oDbStub->method('getAll')
|
||||
->will($this->returnCallback(function ($sql) {
|
||||
$aResults = array();
|
||||
if (preg_match('/1051/', $sql)) {
|
||||
$aResults[] = $this->wordResult(array(
|
||||
'word_id' => 999,
|
||||
'word_token' => '1051',
|
||||
'class' => 'place',
|
||||
'type' => 'house'
|
||||
));
|
||||
}
|
||||
if (preg_match('/hauptstr/', $sql)) {
|
||||
$aResults[] = $this->wordResult(array(
|
||||
'word_id' => 999,
|
||||
'word_token' => 'hauptstr',
|
||||
'class' => 'place',
|
||||
'type' => 'street',
|
||||
'operator' => true
|
||||
));
|
||||
}
|
||||
if (preg_match('/64286/', $sql)) {
|
||||
$aResults[] = $this->wordResult(array(
|
||||
'word_id' => 999,
|
||||
'word_token' => '64286',
|
||||
'word' => '64286',
|
||||
'class' => 'place',
|
||||
'type' => 'postcode'
|
||||
));
|
||||
}
|
||||
if (preg_match('/darmstadt/', $sql)) {
|
||||
$aResults[] = $this->wordResult(array(
|
||||
'word_id' => 999,
|
||||
'word_token' => 'darmstadt',
|
||||
'count' => 533
|
||||
));
|
||||
}
|
||||
if (preg_match('/alemagne/', $sql)) {
|
||||
$aResults[] = $this->wordResult(array(
|
||||
'word_id' => 999,
|
||||
'word_token' => 'alemagne',
|
||||
'country_code' => 'de',
|
||||
));
|
||||
}
|
||||
if (preg_match('/mexico/', $sql)) {
|
||||
$aResults[] = $this->wordResult(array(
|
||||
'word_id' => 999,
|
||||
'word_token' => 'mexico',
|
||||
'country_code' => 'mx',
|
||||
));
|
||||
}
|
||||
return $aResults;
|
||||
}));
|
||||
|
||||
$aCountryCodes = array('de', 'fr');
|
||||
$sNormQuery = '1051 hauptstr 64286 darmstadt alemagne mexico';
|
||||
$aTokens = explode(' ', $sNormQuery);
|
||||
|
||||
$TL = new TokenList;
|
||||
$TL->addTokensFromDB($oDbStub, $aTokens, $aCountryCodes, $sNormQuery, $this->oNormalizer);
|
||||
$this->assertEquals(5, $TL->count());
|
||||
|
||||
$this->assertEquals(array(new Token\HouseNumber(999, '1051')), $TL->get('1051'));
|
||||
$this->assertEquals(array(new Token\Country(999, 'de')), $TL->get('alemagne'));
|
||||
$this->assertEquals(array(new Token\Postcode(999, '64286')), $TL->get('64286'));
|
||||
$this->assertEquals(array(new Token\Word(999, true, 533, 0)), $TL->get('darmstadt'));
|
||||
$this->assertEquals(array(new Token\SpecialTerm(999, 'place', 'street', true)), $TL->get('hauptstr'));
|
||||
}
|
||||
}
|
||||
|
17
test/php/Nominatim/tokenizer.php
Normal file
17
test/php/Nominatim/tokenizer.php
Normal file
@ -0,0 +1,17 @@
|
||||
<?php
|
||||
|
||||
namespace Nominatim;
|
||||
|
||||
class Tokenizer
|
||||
{
|
||||
private $oDB;
|
||||
|
||||
public function __construct(&$oDB)
|
||||
{
|
||||
$this->oDB =& $oDB;
|
||||
}
|
||||
|
||||
public function checkStatus()
|
||||
{
|
||||
}
|
||||
}
|
@ -1,3 +1,4 @@
|
||||
import importlib
|
||||
import itertools
|
||||
import sys
|
||||
from pathlib import Path
|
||||
@ -15,6 +16,9 @@ sys.path.insert(0, str(SRC_DIR.resolve()))
|
||||
from nominatim.config import Configuration
|
||||
from nominatim.db import connection
|
||||
from nominatim.db.sql_preprocessor import SQLPreprocessor
|
||||
from nominatim.db import properties
|
||||
|
||||
import dummy_tokenizer
|
||||
|
||||
class _TestingCursor(psycopg2.extras.DictCursor):
|
||||
""" Extension to the DictCursor class that provides execution
|
||||
@ -117,9 +121,8 @@ def table_factory(temp_db_cursor):
|
||||
def mk_table(name, definition='id INT', content=None):
|
||||
temp_db_cursor.execute('CREATE TABLE {} ({})'.format(name, definition))
|
||||
if content is not None:
|
||||
if not isinstance(content, str):
|
||||
content = '),('.join([str(x) for x in content])
|
||||
temp_db_cursor.execute("INSERT INTO {} VALUES ({})".format(name, content))
|
||||
psycopg2.extras.execute_values(
|
||||
temp_db_cursor, "INSERT INTO {} VALUES %s".format(name), content)
|
||||
|
||||
return mk_table
|
||||
|
||||
@ -144,6 +147,11 @@ def tmp_phplib_dir():
|
||||
|
||||
yield Path(phpdir)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def property_table(table_factory):
|
||||
table_factory('nominatim_properties', 'property TEXT, value TEXT')
|
||||
|
||||
@pytest.fixture
|
||||
def status_table(temp_db_conn):
|
||||
""" Create an empty version of the status table and
|
||||
@ -281,10 +289,29 @@ def osm2pgsql_options(temp_db):
|
||||
|
||||
@pytest.fixture
|
||||
def sql_preprocessor(temp_db_conn, tmp_path, monkeypatch, table_factory):
|
||||
monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', '.')
|
||||
table_factory('country_name', 'partition INT', (0, 1, 2))
|
||||
table_factory('country_name', 'partition INT', ((0, ), (1, ), (2, )))
|
||||
cfg = Configuration(None, SRC_DIR.resolve() / 'settings')
|
||||
cfg.set_libdirs(module='.', osm2pgsql='.', php=SRC_DIR / 'lib-php',
|
||||
sql=tmp_path, data=SRC_DIR / 'data')
|
||||
|
||||
return SQLPreprocessor(temp_db_conn, cfg)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tokenizer_mock(monkeypatch, property_table, temp_db_conn, tmp_path):
|
||||
""" Sets up the configuration so that the test dummy tokenizer will be
|
||||
loaded when the tokenizer factory is used. Also returns a factory
|
||||
with which a new dummy tokenizer may be created.
|
||||
"""
|
||||
monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy')
|
||||
|
||||
def _import_dummy(module, *args, **kwargs):
|
||||
return dummy_tokenizer
|
||||
|
||||
monkeypatch.setattr(importlib, "import_module", _import_dummy)
|
||||
properties.set_property(temp_db_conn, 'tokenizer', 'dummy')
|
||||
|
||||
def _create_tokenizer():
|
||||
return dummy_tokenizer.DummyTokenizer(None, None)
|
||||
|
||||
return _create_tokenizer
|
||||
|
64
test/python/dummy_tokenizer.py
Normal file
64
test/python/dummy_tokenizer.py
Normal file
@ -0,0 +1,64 @@
|
||||
"""
|
||||
Tokenizer for testing.
|
||||
"""
|
||||
|
||||
def create(dsn, data_dir):
|
||||
""" Create a new instance of the tokenizer provided by this module.
|
||||
"""
|
||||
return DummyTokenizer(dsn, data_dir)
|
||||
|
||||
class DummyTokenizer:
|
||||
|
||||
def __init__(self, dsn, data_dir):
|
||||
self.dsn = dsn
|
||||
self.data_dir = data_dir
|
||||
self.init_state = None
|
||||
self.analyser_cache = {}
|
||||
|
||||
|
||||
def init_new_db(self, *args, **kwargs):
|
||||
assert self.init_state == None
|
||||
self.init_state = "new"
|
||||
|
||||
|
||||
def init_from_project(self):
|
||||
assert self.init_state == None
|
||||
self.init_state = "loaded"
|
||||
|
||||
|
||||
def finalize_import(self, _):
|
||||
pass
|
||||
|
||||
|
||||
def name_analyzer(self):
|
||||
return DummyNameAnalyzer(self.analyser_cache)
|
||||
|
||||
|
||||
class DummyNameAnalyzer:
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
self.close()
|
||||
|
||||
|
||||
def __init__(self, cache):
|
||||
self.analyser_cache = cache
|
||||
cache['countries'] = []
|
||||
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def add_postcodes_from_db(self):
|
||||
pass
|
||||
|
||||
def update_special_phrases(self, phrases):
|
||||
self.analyser_cache['special_phrases'] = phrases
|
||||
|
||||
def add_country_names(self, code, names):
|
||||
self.analyser_cache['countries'].append((code, names))
|
||||
|
||||
def process_place(self, place):
|
||||
return {}
|
@ -22,6 +22,7 @@ import nominatim.tools.database_import
|
||||
import nominatim.tools.freeze
|
||||
import nominatim.tools.refresh
|
||||
import nominatim.tools.postcodes
|
||||
import nominatim.tokenizer.factory
|
||||
|
||||
from mocks import MockParamCapture
|
||||
|
||||
@ -56,6 +57,28 @@ def mock_func_factory(monkeypatch):
|
||||
return get_mock
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tokenizer_mock(monkeypatch):
|
||||
class DummyTokenizer:
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.update_sql_functions_called = False
|
||||
self.finalize_import_called = False
|
||||
|
||||
def update_sql_functions(self, *args):
|
||||
self.update_sql_functions_called = True
|
||||
|
||||
def finalize_import(self, *args):
|
||||
self.finalize_import_called = True
|
||||
|
||||
tok = DummyTokenizer()
|
||||
monkeypatch.setattr(nominatim.tokenizer.factory, 'get_tokenizer_for_db' ,
|
||||
lambda *args: tok)
|
||||
monkeypatch.setattr(nominatim.tokenizer.factory, 'create_tokenizer' ,
|
||||
lambda *args: tok)
|
||||
|
||||
return tok
|
||||
|
||||
|
||||
def test_cli_help(capsys):
|
||||
""" Running nominatim tool without arguments prints help.
|
||||
"""
|
||||
@ -84,10 +107,9 @@ def test_import_bad_file(temp_db):
|
||||
assert 1 == call_nominatim('import', '--osm-file', '.')
|
||||
|
||||
|
||||
def test_import_full(temp_db, mock_func_factory):
|
||||
def test_import_full(temp_db, mock_func_factory, tokenizer_mock):
|
||||
mocks = [
|
||||
mock_func_factory(nominatim.tools.database_import, 'setup_database_skeleton'),
|
||||
mock_func_factory(nominatim.tools.database_import, 'install_module'),
|
||||
mock_func_factory(nominatim.tools.database_import, 'import_osm_data'),
|
||||
mock_func_factory(nominatim.tools.refresh, 'import_wikipedia_articles'),
|
||||
mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
|
||||
@ -107,6 +129,7 @@ def test_import_full(temp_db, mock_func_factory):
|
||||
cf_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions')
|
||||
|
||||
assert 0 == call_nominatim('import', '--osm-file', __file__)
|
||||
assert tokenizer_mock.finalize_import_called
|
||||
|
||||
assert cf_mock.called > 1
|
||||
|
||||
@ -114,7 +137,7 @@ def test_import_full(temp_db, mock_func_factory):
|
||||
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
|
||||
|
||||
|
||||
def test_import_continue_load_data(temp_db, mock_func_factory):
|
||||
def test_import_continue_load_data(temp_db, mock_func_factory, tokenizer_mock):
|
||||
mocks = [
|
||||
mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
|
||||
mock_func_factory(nominatim.tools.database_import, 'load_data'),
|
||||
@ -127,12 +150,14 @@ def test_import_continue_load_data(temp_db, mock_func_factory):
|
||||
]
|
||||
|
||||
assert 0 == call_nominatim('import', '--continue', 'load-data')
|
||||
assert tokenizer_mock.finalize_import_called
|
||||
|
||||
for mock in mocks:
|
||||
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
|
||||
|
||||
|
||||
def test_import_continue_indexing(temp_db, mock_func_factory, placex_table, temp_db_conn):
|
||||
def test_import_continue_indexing(temp_db, mock_func_factory, placex_table,
|
||||
temp_db_conn, tokenizer_mock):
|
||||
mocks = [
|
||||
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
|
||||
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
|
||||
@ -153,7 +178,7 @@ def test_import_continue_indexing(temp_db, mock_func_factory, placex_table, temp
|
||||
assert temp_db_conn.index_exists('idx_placex_pendingsector')
|
||||
|
||||
|
||||
def test_import_continue_postprocess(temp_db, mock_func_factory):
|
||||
def test_import_continue_postprocess(temp_db, mock_func_factory, tokenizer_mock):
|
||||
mocks = [
|
||||
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
|
||||
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
|
||||
@ -163,6 +188,8 @@ def test_import_continue_postprocess(temp_db, mock_func_factory):
|
||||
|
||||
assert 0 == call_nominatim('import', '--continue', 'db-postprocess')
|
||||
|
||||
assert tokenizer_mock.finalize_import_called
|
||||
|
||||
for mock in mocks:
|
||||
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
|
||||
|
||||
@ -217,7 +244,8 @@ def test_add_data_command(mock_run_legacy, name, oid):
|
||||
(['--boundaries-only'], 1, 0),
|
||||
(['--no-boundaries'], 0, 1),
|
||||
(['--boundaries-only', '--no-boundaries'], 0, 0)])
|
||||
def test_index_command(mock_func_factory, temp_db_cursor, params, do_bnds, do_ranks):
|
||||
def test_index_command(mock_func_factory, temp_db_cursor, tokenizer_mock,
|
||||
params, do_bnds, do_ranks):
|
||||
temp_db_cursor.execute("CREATE TABLE import_status (indexed bool)")
|
||||
bnd_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_boundaries')
|
||||
rank_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_by_rank')
|
||||
@ -227,7 +255,7 @@ def test_index_command(mock_func_factory, temp_db_cursor, params, do_bnds, do_ra
|
||||
assert bnd_mock.called == do_bnds
|
||||
assert rank_mock.called == do_ranks
|
||||
|
||||
def test_special_phrases_command(temp_db, mock_func_factory):
|
||||
def test_special_phrases_command(temp_db, mock_func_factory, tokenizer_mock):
|
||||
func = mock_func_factory(nominatim.clicmd.special_phrases.SpecialPhrasesImporter, 'import_from_wiki')
|
||||
|
||||
call_nominatim('special-phrases', '--import-from-wiki')
|
||||
@ -238,7 +266,6 @@ def test_special_phrases_command(temp_db, mock_func_factory):
|
||||
('postcodes', 'update_postcodes'),
|
||||
('word-counts', 'recompute_word_counts'),
|
||||
('address-levels', 'load_address_levels_from_file'),
|
||||
('functions', 'create_functions'),
|
||||
('wiki-data', 'import_wikipedia_articles'),
|
||||
('importance', 'recompute_importance'),
|
||||
('website', 'setup_website'),
|
||||
@ -250,6 +277,14 @@ def test_refresh_command(mock_func_factory, temp_db, command, func):
|
||||
assert func_mock.called == 1
|
||||
|
||||
|
||||
def test_refresh_create_functions(mock_func_factory, temp_db, tokenizer_mock):
|
||||
func_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions')
|
||||
|
||||
assert 0 == call_nominatim('refresh', '--functions')
|
||||
assert func_mock.called == 1
|
||||
assert tokenizer_mock.update_sql_functions_called
|
||||
|
||||
|
||||
def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db):
|
||||
calls = []
|
||||
monkeypatch.setattr(nominatim.tools.refresh, 'import_wikipedia_articles',
|
||||
|
@ -27,7 +27,29 @@ def call_nominatim(*args):
|
||||
cli_args=['replication'] + list(args))
|
||||
|
||||
@pytest.fixture
|
||||
def index_mock(monkeypatch):
|
||||
def tokenizer_mock(monkeypatch):
|
||||
class DummyTokenizer:
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.update_sql_functions_called = False
|
||||
self.finalize_import_called = False
|
||||
|
||||
def update_sql_functions(self, *args):
|
||||
self.update_sql_functions_called = True
|
||||
|
||||
def finalize_import(self, *args):
|
||||
self.finalize_import_called = True
|
||||
|
||||
tok = DummyTokenizer()
|
||||
monkeypatch.setattr(nominatim.tokenizer.factory, 'get_tokenizer_for_db' ,
|
||||
lambda *args: tok)
|
||||
monkeypatch.setattr(nominatim.tokenizer.factory, 'create_tokenizer' ,
|
||||
lambda *args: tok)
|
||||
|
||||
return tok
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def index_mock(monkeypatch, tokenizer_mock):
|
||||
mock = MockParamCapture()
|
||||
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', mock)
|
||||
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', mock)
|
||||
@ -52,7 +74,7 @@ def init_status(temp_db_conn, status_table):
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def update_mock(mock_func_factory, init_status):
|
||||
def update_mock(mock_func_factory, init_status, tokenizer_mock):
|
||||
return mock_func_factory(nominatim.tools.replication, 'update')
|
||||
|
||||
@pytest.mark.parametrize("params,func", [
|
||||
|
@ -24,7 +24,6 @@ def sql_factory(tmp_path):
|
||||
("'{{db.partitions|join}}'", '012'),
|
||||
("{% if 'country_name' in db.tables %}'yes'{% else %}'no'{% endif %}", "yes"),
|
||||
("{% if 'xxx' in db.tables %}'yes'{% else %}'no'{% endif %}", "no"),
|
||||
("'{{config.DATABASE_MODULE_PATH}}'", '.')
|
||||
])
|
||||
def test_load_file_simple(sql_preprocessor, sql_factory, temp_db_conn, temp_db_cursor, expr, ret):
|
||||
sqlfile = sql_factory("RETURN {};".format(expr))
|
||||
|
@ -6,6 +6,7 @@ import psycopg2
|
||||
import pytest
|
||||
|
||||
from nominatim.indexer import indexer
|
||||
from nominatim.tokenizer import factory
|
||||
|
||||
class IndexerTestDB:
|
||||
|
||||
@ -17,6 +18,7 @@ class IndexerTestDB:
|
||||
self.conn = conn
|
||||
self.conn.set_isolation_level(0)
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute('CREATE EXTENSION hstore')
|
||||
cur.execute("""CREATE TABLE placex (place_id BIGINT,
|
||||
class TEXT,
|
||||
type TEXT,
|
||||
@ -26,9 +28,14 @@ class IndexerTestDB:
|
||||
indexed_date TIMESTAMP,
|
||||
partition SMALLINT,
|
||||
admin_level SMALLINT,
|
||||
address HSTORE,
|
||||
token_info JSONB,
|
||||
geometry_sector INTEGER)""")
|
||||
cur.execute("""CREATE TABLE location_property_osmline (
|
||||
place_id BIGINT,
|
||||
osm_id BIGINT,
|
||||
address HSTORE,
|
||||
token_info JSONB,
|
||||
indexed_status SMALLINT,
|
||||
indexed_date TIMESTAMP,
|
||||
geometry_sector INTEGER)""")
|
||||
@ -46,6 +53,25 @@ class IndexerTestDB:
|
||||
END IF;
|
||||
RETURN NEW;
|
||||
END; $$ LANGUAGE plpgsql;""")
|
||||
cur.execute("""CREATE OR REPLACE FUNCTION placex_prepare_update(p placex,
|
||||
OUT name HSTORE,
|
||||
OUT address HSTORE,
|
||||
OUT country_feature VARCHAR)
|
||||
AS $$
|
||||
BEGIN
|
||||
address := p.address;
|
||||
name := p.address;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql STABLE;
|
||||
""")
|
||||
cur.execute("""CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT)
|
||||
RETURNS HSTORE AS $$
|
||||
BEGIN
|
||||
RETURN in_address;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql STABLE;
|
||||
""")
|
||||
|
||||
for table in ('placex', 'location_property_osmline', 'location_postcode'):
|
||||
cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0}
|
||||
FOR EACH ROW EXECUTE PROCEDURE date_update()
|
||||
@ -76,9 +102,9 @@ class IndexerTestDB:
|
||||
next_id = next(self.osmline_id)
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("""INSERT INTO location_property_osmline
|
||||
(place_id, indexed_status, geometry_sector)
|
||||
VALUES (%s, 1, %s)""",
|
||||
(next_id, sector))
|
||||
(place_id, osm_id, indexed_status, geometry_sector)
|
||||
VALUES (%s, %s, 1, %s)""",
|
||||
(next_id, next_id, sector))
|
||||
return next_id
|
||||
|
||||
def add_postcode(self, country, postcode):
|
||||
@ -102,8 +128,14 @@ def test_db(temp_db_conn):
|
||||
yield IndexerTestDB(temp_db_conn)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_tokenizer(tokenizer_mock, def_config, tmp_path):
|
||||
def_config.project_dir = tmp_path
|
||||
return factory.create_tokenizer(def_config)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("threads", [1, 15])
|
||||
def test_index_all_by_rank(test_db, threads):
|
||||
def test_index_all_by_rank(test_db, threads, test_tokenizer):
|
||||
for rank in range(31):
|
||||
test_db.add_place(rank_address=rank, rank_search=rank)
|
||||
test_db.add_osmline()
|
||||
@ -111,7 +143,7 @@ def test_index_all_by_rank(test_db, threads):
|
||||
assert 31 == test_db.placex_unindexed()
|
||||
assert 1 == test_db.osmline_unindexed()
|
||||
|
||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
|
||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
|
||||
idx.index_by_rank(0, 30)
|
||||
|
||||
assert 0 == test_db.placex_unindexed()
|
||||
@ -142,7 +174,7 @@ def test_index_all_by_rank(test_db, threads):
|
||||
|
||||
|
||||
@pytest.mark.parametrize("threads", [1, 15])
|
||||
def test_index_partial_without_30(test_db, threads):
|
||||
def test_index_partial_without_30(test_db, threads, test_tokenizer):
|
||||
for rank in range(31):
|
||||
test_db.add_place(rank_address=rank, rank_search=rank)
|
||||
test_db.add_osmline()
|
||||
@ -150,7 +182,8 @@ def test_index_partial_without_30(test_db, threads):
|
||||
assert 31 == test_db.placex_unindexed()
|
||||
assert 1 == test_db.osmline_unindexed()
|
||||
|
||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
|
||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest',
|
||||
test_tokenizer, threads)
|
||||
idx.index_by_rank(4, 15)
|
||||
|
||||
assert 19 == test_db.placex_unindexed()
|
||||
@ -162,7 +195,7 @@ def test_index_partial_without_30(test_db, threads):
|
||||
|
||||
|
||||
@pytest.mark.parametrize("threads", [1, 15])
|
||||
def test_index_partial_with_30(test_db, threads):
|
||||
def test_index_partial_with_30(test_db, threads, test_tokenizer):
|
||||
for rank in range(31):
|
||||
test_db.add_place(rank_address=rank, rank_search=rank)
|
||||
test_db.add_osmline()
|
||||
@ -170,7 +203,7 @@ def test_index_partial_with_30(test_db, threads):
|
||||
assert 31 == test_db.placex_unindexed()
|
||||
assert 1 == test_db.osmline_unindexed()
|
||||
|
||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
|
||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
|
||||
idx.index_by_rank(28, 30)
|
||||
|
||||
assert 27 == test_db.placex_unindexed()
|
||||
@ -181,7 +214,7 @@ def test_index_partial_with_30(test_db, threads):
|
||||
WHERE indexed_status = 0 AND rank_address between 1 and 27""")
|
||||
|
||||
@pytest.mark.parametrize("threads", [1, 15])
|
||||
def test_index_boundaries(test_db, threads):
|
||||
def test_index_boundaries(test_db, threads, test_tokenizer):
|
||||
for rank in range(4, 10):
|
||||
test_db.add_admin(rank_address=rank, rank_search=rank)
|
||||
for rank in range(31):
|
||||
@ -191,7 +224,7 @@ def test_index_boundaries(test_db, threads):
|
||||
assert 37 == test_db.placex_unindexed()
|
||||
assert 1 == test_db.osmline_unindexed()
|
||||
|
||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
|
||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
|
||||
idx.index_boundaries(0, 30)
|
||||
|
||||
assert 31 == test_db.placex_unindexed()
|
||||
@ -203,13 +236,13 @@ def test_index_boundaries(test_db, threads):
|
||||
|
||||
|
||||
@pytest.mark.parametrize("threads", [1, 15])
|
||||
def test_index_postcodes(test_db, threads):
|
||||
def test_index_postcodes(test_db, threads, test_tokenizer):
|
||||
for postcode in range(1000):
|
||||
test_db.add_postcode('de', postcode)
|
||||
for postcode in range(32000, 33000):
|
||||
test_db.add_postcode('us', postcode)
|
||||
|
||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
|
||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
|
||||
idx.index_postcodes()
|
||||
|
||||
assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
|
||||
@ -217,7 +250,7 @@ def test_index_postcodes(test_db, threads):
|
||||
|
||||
|
||||
@pytest.mark.parametrize("analyse", [True, False])
|
||||
def test_index_full(test_db, analyse):
|
||||
def test_index_full(test_db, analyse, test_tokenizer):
|
||||
for rank in range(4, 10):
|
||||
test_db.add_admin(rank_address=rank, rank_search=rank)
|
||||
for rank in range(31):
|
||||
@ -226,7 +259,7 @@ def test_index_full(test_db, analyse):
|
||||
for postcode in range(1000):
|
||||
test_db.add_postcode('de', postcode)
|
||||
|
||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', 4)
|
||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, 4)
|
||||
idx.index_full(analyse=analyse)
|
||||
|
||||
assert 0 == test_db.placex_unindexed()
|
||||
@ -236,13 +269,13 @@ def test_index_full(test_db, analyse):
|
||||
|
||||
|
||||
@pytest.mark.parametrize("threads", [1, 15])
|
||||
def test_index_reopen_connection(test_db, threads, monkeypatch):
|
||||
def test_index_reopen_connection(test_db, threads, monkeypatch, test_tokenizer):
|
||||
monkeypatch.setattr(indexer.WorkerPool, "REOPEN_CONNECTIONS_AFTER", 15)
|
||||
|
||||
for _ in range(1000):
|
||||
test_db.add_place(rank_address=30, rank_search=30)
|
||||
|
||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
|
||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
|
||||
idx.index_by_rank(28, 30)
|
||||
|
||||
assert 0 == test_db.placex_unindexed()
|
||||
|
77
test/python/test_tokenizer_factory.py
Normal file
77
test/python/test_tokenizer_factory.py
Normal file
@ -0,0 +1,77 @@
|
||||
"""
|
||||
Tests for creating new tokenizers.
|
||||
"""
|
||||
import importlib
|
||||
import pytest
|
||||
|
||||
from nominatim.db import properties
|
||||
from nominatim.tokenizer import factory
|
||||
from nominatim.errors import UsageError
|
||||
from dummy_tokenizer import DummyTokenizer
|
||||
|
||||
@pytest.fixture
|
||||
def test_config(def_config, tmp_path):
|
||||
def_config.project_dir = tmp_path
|
||||
return def_config
|
||||
|
||||
|
||||
def test_setup_dummy_tokenizer(temp_db_conn, test_config,
|
||||
tokenizer_mock, property_table):
|
||||
tokenizer = factory.create_tokenizer(test_config)
|
||||
|
||||
assert isinstance(tokenizer, DummyTokenizer)
|
||||
assert tokenizer.init_state == "new"
|
||||
assert (test_config.project_dir / 'tokenizer').is_dir()
|
||||
|
||||
assert properties.get_property(temp_db_conn, 'tokenizer') == 'dummy'
|
||||
|
||||
|
||||
def test_setup_tokenizer_dir_exists(test_config, tokenizer_mock, property_table):
|
||||
(test_config.project_dir / 'tokenizer').mkdir()
|
||||
|
||||
tokenizer = factory.create_tokenizer(test_config)
|
||||
|
||||
assert isinstance(tokenizer, DummyTokenizer)
|
||||
assert tokenizer.init_state == "new"
|
||||
|
||||
|
||||
def test_setup_tokenizer_dir_failure(test_config, tokenizer_mock, property_table):
|
||||
(test_config.project_dir / 'tokenizer').write_text("foo")
|
||||
|
||||
with pytest.raises(UsageError):
|
||||
factory.create_tokenizer(test_config)
|
||||
|
||||
|
||||
def test_setup_bad_tokenizer_name(test_config, monkeypatch):
|
||||
monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy')
|
||||
|
||||
with pytest.raises(UsageError):
|
||||
factory.create_tokenizer(test_config)
|
||||
|
||||
def test_load_tokenizer(temp_db_conn, test_config,
|
||||
tokenizer_mock, property_table):
|
||||
factory.create_tokenizer(test_config)
|
||||
|
||||
tokenizer = factory.get_tokenizer_for_db(test_config)
|
||||
|
||||
assert isinstance(tokenizer, DummyTokenizer)
|
||||
assert tokenizer.init_state == "loaded"
|
||||
|
||||
|
||||
def test_load_no_tokenizer_dir(test_config, tokenizer_mock, property_table):
|
||||
factory.create_tokenizer(test_config)
|
||||
|
||||
test_config.project_dir = test_config.project_dir / 'foo'
|
||||
|
||||
with pytest.raises(UsageError):
|
||||
factory.get_tokenizer_for_db(test_config)
|
||||
|
||||
|
||||
def test_load_missing_propoerty(temp_db_cursor, test_config, tokenizer_mock, property_table):
|
||||
factory.create_tokenizer(test_config)
|
||||
|
||||
temp_db_cursor.execute("TRUNCATE TABLE nominatim_properties")
|
||||
|
||||
with pytest.raises(UsageError):
|
||||
factory.get_tokenizer_for_db(test_config)
|
||||
|
298
test/python/test_tokenizer_legacy.py
Normal file
298
test/python/test_tokenizer_legacy.py
Normal file
@ -0,0 +1,298 @@
|
||||
"""
|
||||
Test for legacy tokenizer.
|
||||
"""
|
||||
import shutil
|
||||
|
||||
import pytest
|
||||
|
||||
from nominatim.tokenizer import legacy_tokenizer
|
||||
from nominatim.db import properties
|
||||
from nominatim.errors import UsageError
|
||||
|
||||
@pytest.fixture
|
||||
def test_config(def_config, tmp_path):
|
||||
def_config.project_dir = tmp_path / 'project'
|
||||
def_config.project_dir.mkdir()
|
||||
|
||||
module_dir = tmp_path / 'module_src'
|
||||
module_dir.mkdir()
|
||||
(module_dir / 'nominatim.so').write_text('TEST nomiantim.so')
|
||||
|
||||
def_config.lib_dir.module = module_dir
|
||||
|
||||
sqldir = tmp_path / 'sql'
|
||||
sqldir.mkdir()
|
||||
(sqldir / 'tokenizer').mkdir()
|
||||
(sqldir / 'tokenizer' / 'legacy_tokenizer.sql').write_text("SELECT 'a'")
|
||||
(sqldir / 'words.sql').write_text("SELECT 'a'")
|
||||
shutil.copy(str(def_config.lib_dir.sql / 'tokenizer' / 'legacy_tokenizer_tables.sql'),
|
||||
str(sqldir / 'tokenizer' / 'legacy_tokenizer_tables.sql'))
|
||||
|
||||
def_config.lib_dir.sql = sqldir
|
||||
def_config.lib_dir.data = sqldir
|
||||
|
||||
return def_config
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tokenizer_factory(dsn, tmp_path, monkeypatch, property_table):
|
||||
(tmp_path / 'tokenizer').mkdir()
|
||||
|
||||
def _maker():
|
||||
return legacy_tokenizer.create(dsn, tmp_path / 'tokenizer')
|
||||
|
||||
return _maker
|
||||
|
||||
@pytest.fixture
|
||||
def tokenizer_setup(tokenizer_factory, test_config, monkeypatch, sql_preprocessor):
|
||||
monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
|
||||
tok = tokenizer_factory()
|
||||
tok.init_new_db(test_config)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def analyzer(tokenizer_factory, test_config, monkeypatch, sql_preprocessor,
|
||||
word_table, temp_db_with_extensions, tmp_path):
|
||||
sql = tmp_path / 'sql' / 'tokenizer' / 'legacy_tokenizer.sql'
|
||||
sql.write_text("""
|
||||
CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT)
|
||||
RETURNS INTEGER AS $$ SELECT 342; $$ LANGUAGE SQL;
|
||||
""")
|
||||
|
||||
monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
|
||||
monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', ':: lower();')
|
||||
tok = tokenizer_factory()
|
||||
tok.init_new_db(test_config)
|
||||
monkeypatch.undo()
|
||||
|
||||
with tok.name_analyzer() as analyzer:
|
||||
yield analyzer
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def make_standard_name(temp_db_cursor):
|
||||
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION make_standard_name(name TEXT)
|
||||
RETURNS TEXT AS $$ SELECT ' ' || name; $$ LANGUAGE SQL""")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def create_postcode_id(table_factory, temp_db_cursor):
|
||||
table_factory('out_postcode_table', 'postcode TEXT')
|
||||
|
||||
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION create_postcode_id(postcode TEXT)
|
||||
RETURNS BOOLEAN AS $$
|
||||
INSERT INTO out_postcode_table VALUES (postcode) RETURNING True;
|
||||
$$ LANGUAGE SQL""")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def create_housenumbers(temp_db_cursor):
|
||||
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION create_housenumbers(
|
||||
housenumbers TEXT[],
|
||||
OUT tokens TEXT, OUT normtext TEXT)
|
||||
AS $$
|
||||
SELECT housenumbers::TEXT, array_to_string(housenumbers, ';')
|
||||
$$ LANGUAGE SQL""")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def make_keywords(temp_db_cursor, temp_db_with_extensions):
|
||||
temp_db_cursor.execute(
|
||||
"""CREATE OR REPLACE FUNCTION make_keywords(names HSTORE)
|
||||
RETURNS INTEGER[] AS $$ SELECT ARRAY[1, 2, 3] $$ LANGUAGE SQL""")
|
||||
|
||||
def test_init_new(tokenizer_factory, test_config, monkeypatch,
|
||||
temp_db_conn, sql_preprocessor):
|
||||
monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', 'xxvv')
|
||||
monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
|
||||
|
||||
tok = tokenizer_factory()
|
||||
tok.init_new_db(test_config)
|
||||
|
||||
assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_NORMALIZATION) == 'xxvv'
|
||||
|
||||
outfile = test_config.project_dir / 'module' / 'nominatim.so'
|
||||
|
||||
assert outfile.exists()
|
||||
assert outfile.read_text() == 'TEST nomiantim.so'
|
||||
assert outfile.stat().st_mode == 33261
|
||||
|
||||
|
||||
def test_init_module_load_failed(tokenizer_factory, test_config,
|
||||
monkeypatch, temp_db_conn):
|
||||
tok = tokenizer_factory()
|
||||
|
||||
with pytest.raises(UsageError):
|
||||
tok.init_new_db(test_config)
|
||||
|
||||
|
||||
def test_init_module_custom(tokenizer_factory, test_config,
|
||||
monkeypatch, tmp_path, sql_preprocessor):
|
||||
module_dir = (tmp_path / 'custom').resolve()
|
||||
module_dir.mkdir()
|
||||
(module_dir/ 'nominatim.so').write_text('CUSTOM nomiantim.so')
|
||||
|
||||
monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', str(module_dir))
|
||||
monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
|
||||
|
||||
tok = tokenizer_factory()
|
||||
tok.init_new_db(test_config)
|
||||
|
||||
assert not (test_config.project_dir / 'module').exists()
|
||||
|
||||
|
||||
def test_init_from_project(tokenizer_setup, tokenizer_factory):
|
||||
tok = tokenizer_factory()
|
||||
|
||||
tok.init_from_project()
|
||||
|
||||
assert tok.normalization is not None
|
||||
|
||||
|
||||
def test_update_sql_functions(sql_preprocessor, temp_db_conn,
|
||||
tokenizer_factory, test_config, table_factory,
|
||||
monkeypatch, temp_db_cursor):
|
||||
monkeypatch.setenv('NOMINATIM_MAX_WORD_FREQUENCY', '1133')
|
||||
monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
|
||||
tok = tokenizer_factory()
|
||||
tok.init_new_db(test_config)
|
||||
monkeypatch.undo()
|
||||
|
||||
assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_MAXWORDFREQ) == '1133'
|
||||
|
||||
table_factory('test', 'txt TEXT')
|
||||
|
||||
func_file = test_config.lib_dir.sql / 'tokenizer' / 'legacy_tokenizer.sql'
|
||||
func_file.write_text("""INSERT INTO test VALUES ('{{max_word_freq}}'),
|
||||
('{{modulepath}}')""")
|
||||
|
||||
tok.update_sql_functions(test_config)
|
||||
|
||||
test_content = temp_db_cursor.row_set('SELECT * FROM test')
|
||||
assert test_content == set((('1133', ), (str(test_config.project_dir / 'module'), )))
|
||||
|
||||
|
||||
def test_migrate_database(tokenizer_factory, test_config, temp_db_conn, monkeypatch):
|
||||
monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
|
||||
tok = tokenizer_factory()
|
||||
tok.migrate_database(test_config)
|
||||
|
||||
assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_MAXWORDFREQ) is not None
|
||||
assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_NORMALIZATION) is not None
|
||||
|
||||
outfile = test_config.project_dir / 'module' / 'nominatim.so'
|
||||
|
||||
assert outfile.exists()
|
||||
assert outfile.read_text() == 'TEST nomiantim.so'
|
||||
assert outfile.stat().st_mode == 33261
|
||||
|
||||
|
||||
def test_normalize(analyzer):
|
||||
assert analyzer.normalize('TEsT') == 'test'
|
||||
|
||||
|
||||
def test_add_postcodes_from_db(analyzer, table_factory, temp_db_cursor,
|
||||
create_postcode_id):
|
||||
table_factory('location_postcode', 'postcode TEXT',
|
||||
content=(('1234',), ('12 34',), ('AB23',), ('1234',)))
|
||||
|
||||
analyzer.add_postcodes_from_db()
|
||||
|
||||
assert temp_db_cursor.row_set("SELECT * from out_postcode_table") \
|
||||
== set((('1234', ), ('12 34', ), ('AB23',)))
|
||||
|
||||
|
||||
def test_update_special_phrase_empty_table(analyzer, word_table, temp_db_cursor,
|
||||
make_standard_name):
|
||||
analyzer.update_special_phrases([
|
||||
("König bei", "amenity", "royal", "near"),
|
||||
("Könige", "amenity", "royal", "-"),
|
||||
("strasse", "highway", "primary", "in")
|
||||
])
|
||||
|
||||
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
|
||||
FROM word WHERE class != 'place'""") \
|
||||
== set(((' könig bei', 'könig bei', 'amenity', 'royal', 'near'),
|
||||
(' könige', 'könige', 'amenity', 'royal', None),
|
||||
(' strasse', 'strasse', 'highway', 'primary', 'in')))
|
||||
|
||||
|
||||
def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor,
|
||||
make_standard_name):
|
||||
temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
|
||||
VALUES (' foo', 'foo', 'amenity', 'prison', 'in'),
|
||||
(' bar', 'bar', 'highway', 'road', null)""")
|
||||
|
||||
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
||||
|
||||
analyzer.update_special_phrases([])
|
||||
|
||||
assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
||||
|
||||
|
||||
def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor,
|
||||
make_standard_name):
|
||||
temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
|
||||
VALUES (' foo', 'foo', 'amenity', 'prison', 'in'),
|
||||
(' bar', 'bar', 'highway', 'road', null)""")
|
||||
|
||||
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
||||
|
||||
analyzer.update_special_phrases([
|
||||
('prison', 'amenity', 'prison', 'in'),
|
||||
('bar', 'highway', 'road', '-'),
|
||||
('garden', 'leisure', 'garden', 'near')
|
||||
])
|
||||
|
||||
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
|
||||
FROM word WHERE class != 'place'""") \
|
||||
== set(((' prison', 'prison', 'amenity', 'prison', 'in'),
|
||||
(' bar', 'bar', 'highway', 'road', None),
|
||||
(' garden', 'garden', 'leisure', 'garden', 'near')))
|
||||
|
||||
|
||||
def test_process_place_names(analyzer, make_keywords):
|
||||
|
||||
info = analyzer.process_place({'name' : {'name' : 'Soft bAr', 'ref': '34'}})
|
||||
|
||||
assert info['names'] == '{1,2,3}'
|
||||
|
||||
|
||||
@pytest.mark.parametrize('pc', ['12345', 'AB 123', '34-345'])
|
||||
def test_process_place_postcode(analyzer, temp_db_cursor, create_postcode_id, pc):
|
||||
|
||||
info = analyzer.process_place({'address': {'postcode' : pc}})
|
||||
|
||||
assert temp_db_cursor.row_set("SELECT * from out_postcode_table") \
|
||||
== set(((pc, ),))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('pc', ['12:23', 'ab;cd;f', '123;836'])
|
||||
def test_process_place_bad_postcode(analyzer, temp_db_cursor, create_postcode_id,
|
||||
pc):
|
||||
|
||||
info = analyzer.process_place({'address': {'postcode' : pc}})
|
||||
|
||||
assert 0 == temp_db_cursor.scalar("SELECT count(*) from out_postcode_table")
|
||||
|
||||
|
||||
@pytest.mark.parametrize('hnr', ['123a', '1', '101'])
|
||||
def test_process_place_housenumbers_simple(analyzer, create_housenumbers, hnr):
|
||||
info = analyzer.process_place({'address': {'housenumber' : hnr}})
|
||||
|
||||
assert info['hnr'] == hnr
|
||||
assert info['hnr_tokens'].startswith("{")
|
||||
|
||||
|
||||
def test_process_place_housenumbers_lists(analyzer, create_housenumbers):
|
||||
info = analyzer.process_place({'address': {'conscriptionnumber' : '1; 2;3'}})
|
||||
|
||||
assert set(info['hnr'].split(';')) == set(('1', '2', '3'))
|
||||
|
||||
|
||||
def test_process_place_housenumbers_duplicates(analyzer, create_housenumbers):
|
||||
info = analyzer.process_place({'address': {'housenumber' : '134',
|
||||
'conscriptionnumber' : '134',
|
||||
'streetnumber' : '99a'}})
|
||||
|
||||
assert set(info['hnr'].split(';')) == set(('134', '99a'))
|
@ -43,8 +43,22 @@ def test_check_placex_table_size_bad(temp_db_cursor, temp_db_conn, def_config):
|
||||
assert chkdb.check_placex_size(temp_db_conn, def_config) == chkdb.CheckState.FATAL
|
||||
|
||||
|
||||
def test_check_module_bad(temp_db_conn, def_config):
|
||||
assert chkdb.check_module(temp_db_conn, def_config) == chkdb.CheckState.FAIL
|
||||
def test_check_tokenizer_missing(temp_db_conn, def_config, tmp_path):
|
||||
def_config.project_dir = tmp_path
|
||||
assert chkdb.check_tokenizer(temp_db_conn, def_config) == chkdb.CheckState.FAIL
|
||||
|
||||
|
||||
@pytest.mark.parametrize("check_result,state", [(None, chkdb.CheckState.OK),
|
||||
("Something wrong", chkdb.CheckState.FAIL)])
|
||||
def test_check_tokenizer(tokenizer_mock, temp_db_conn, def_config, monkeypatch,
|
||||
check_result, state):
|
||||
class _TestTokenizer:
|
||||
def check_database(self):
|
||||
return check_result
|
||||
|
||||
monkeypatch.setattr(chkdb.tokenizer_factory, 'get_tokenizer_for_db',
|
||||
lambda *a, **k: _TestTokenizer())
|
||||
assert chkdb.check_tokenizer(temp_db_conn, def_config) == state
|
||||
|
||||
|
||||
def test_check_indexing_good(temp_db_cursor, temp_db_conn, def_config):
|
||||
|
@ -80,39 +80,6 @@ def test_setup_extensions_old_postgis(temp_db_conn, monkeypatch):
|
||||
database_import.setup_extensions(temp_db_conn)
|
||||
|
||||
|
||||
def test_install_module(tmp_path):
|
||||
src_dir = tmp_path / 'source'
|
||||
src_dir.mkdir()
|
||||
(src_dir / 'nominatim.so').write_text('TEST nomiantim.so')
|
||||
|
||||
project_dir = tmp_path / 'project'
|
||||
project_dir.mkdir()
|
||||
|
||||
database_import.install_module(src_dir, project_dir, '')
|
||||
|
||||
outfile = project_dir / 'module' / 'nominatim.so'
|
||||
|
||||
assert outfile.exists()
|
||||
assert outfile.read_text() == 'TEST nomiantim.so'
|
||||
assert outfile.stat().st_mode == 33261
|
||||
|
||||
|
||||
def test_install_module_custom(tmp_path):
|
||||
(tmp_path / 'nominatim.so').write_text('TEST nomiantim.so')
|
||||
|
||||
database_import.install_module(tmp_path, tmp_path, str(tmp_path.resolve()))
|
||||
|
||||
assert not (tmp_path / 'module').exists()
|
||||
|
||||
|
||||
def test_install_module_fail_access(temp_db_conn, tmp_path):
|
||||
(tmp_path / 'nominatim.so').write_text('TEST nomiantim.so')
|
||||
|
||||
with pytest.raises(UsageError, match='.*module cannot be accessed.*'):
|
||||
database_import.install_module(tmp_path, tmp_path, '',
|
||||
conn=temp_db_conn)
|
||||
|
||||
|
||||
def test_import_base_data(src_dir, temp_db, temp_db_cursor):
|
||||
temp_db_cursor.execute('CREATE EXTENSION hstore')
|
||||
temp_db_cursor.execute('CREATE EXTENSION postgis')
|
||||
@ -171,14 +138,15 @@ def test_import_osm_data_default_cache(temp_db_cursor,osm2pgsql_options):
|
||||
|
||||
|
||||
def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory):
|
||||
tables = ('word', 'placex', 'place_addressline', 'location_area',
|
||||
tables = ('placex', 'place_addressline', 'location_area',
|
||||
'location_area_country',
|
||||
'location_property_tiger', 'location_property_osmline',
|
||||
'location_postcode', 'search_name', 'location_road_23')
|
||||
for table in tables:
|
||||
table_factory(table, content=(1, 2, 3))
|
||||
table_factory(table, content=((1, ), (2, ), (3, )))
|
||||
assert temp_db_cursor.table_rows(table) == 3
|
||||
|
||||
database_import.truncate_data_tables(temp_db_conn, max_word_frequency=23)
|
||||
database_import.truncate_data_tables(temp_db_conn)
|
||||
|
||||
for table in tables:
|
||||
assert temp_db_cursor.table_rows(table) == 0
|
||||
@ -196,36 +164,33 @@ def test_load_data(dsn, src_dir, place_row, placex_table, osmline_table, word_ta
|
||||
place_row(osm_type='W', osm_id=342, cls='place', typ='houses',
|
||||
geom='SRID=4326;LINESTRING(0 0, 10 10)')
|
||||
|
||||
database_import.load_data(dsn, src_dir / 'data', threads)
|
||||
database_import.load_data(dsn, threads)
|
||||
|
||||
assert temp_db_cursor.table_rows('placex') == 30
|
||||
assert temp_db_cursor.table_rows('location_property_osmline') == 1
|
||||
|
||||
@pytest.mark.parametrize("languages", (False, True))
|
||||
def test_create_country_names(temp_db_conn, temp_db_cursor, def_config,
|
||||
temp_db_with_extensions, monkeypatch, languages):
|
||||
|
||||
@pytest.mark.parametrize("languages", (None, ' fr,en'))
|
||||
def test_create_country_names(temp_db_with_extensions, temp_db_conn, temp_db_cursor,
|
||||
table_factory, tokenizer_mock, languages):
|
||||
|
||||
table_factory('country_name', 'country_code varchar(2), name hstore',
|
||||
content=(('us', '"name"=>"us1","name:af"=>"us2"'),
|
||||
('fr', '"name"=>"Fra", "name:en"=>"Fren"')))
|
||||
|
||||
assert temp_db_cursor.scalar("SELECT count(*) FROM country_name") == 2
|
||||
|
||||
tokenizer = tokenizer_mock()
|
||||
|
||||
database_import.create_country_names(temp_db_conn, tokenizer, languages)
|
||||
|
||||
assert len(tokenizer.analyser_cache['countries']) == 2
|
||||
|
||||
result_set = {k: set(v) for k, v in tokenizer.analyser_cache['countries']}
|
||||
|
||||
if languages:
|
||||
monkeypatch.setenv('NOMINATIM_LANGUAGES', 'fr,en')
|
||||
temp_db_cursor.execute("""CREATE FUNCTION make_standard_name (name TEXT)
|
||||
RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL
|
||||
""")
|
||||
temp_db_cursor.execute('CREATE TABLE country_name (country_code varchar(2), name hstore)')
|
||||
temp_db_cursor.execute('CREATE TABLE word (code varchar(2))')
|
||||
temp_db_cursor.execute("""INSERT INTO country_name VALUES ('us',
|
||||
'"name"=>"us","name:af"=>"us"')""")
|
||||
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_country(lookup_word TEXT,
|
||||
lookup_country_code varchar(2))
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
BEGIN
|
||||
INSERT INTO word VALUES (lookup_country_code);
|
||||
RETURN 5;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
""")
|
||||
database_import.create_country_names(temp_db_conn, def_config)
|
||||
if languages:
|
||||
assert temp_db_cursor.table_rows('word') == 4
|
||||
assert result_set == {'us' : set(('us', 'us1', 'United States')),
|
||||
'fr' : set(('fr', 'Fra', 'Fren'))}
|
||||
else:
|
||||
assert temp_db_cursor.table_rows('word') == 5
|
||||
assert result_set == {'us' : set(('us', 'us1', 'us2', 'United States')),
|
||||
'fr' : set(('fr', 'Fra', 'Fren'))}
|
||||
|
@ -11,41 +11,6 @@ from nominatim.tools import SpecialPhrasesImporter
|
||||
|
||||
TEST_BASE_DIR = Path(__file__) / '..' / '..'
|
||||
|
||||
def test_fetch_existing_words_phrases_basic(special_phrases_importer, word_table,
|
||||
temp_db_cursor):
|
||||
"""
|
||||
Check for the fetch_existing_words_phrases() method.
|
||||
It should return special phrase term added to the word
|
||||
table.
|
||||
"""
|
||||
query ="""
|
||||
INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word',
|
||||
'class', 'type', null, 0, 'near');
|
||||
"""
|
||||
temp_db_cursor.execute(query)
|
||||
|
||||
assert not special_phrases_importer.words_phrases_to_delete
|
||||
special_phrases_importer._fetch_existing_words_phrases()
|
||||
contained_phrase = special_phrases_importer.words_phrases_to_delete.pop()
|
||||
assert contained_phrase == ('normalized_word', 'class', 'type', 'near')
|
||||
|
||||
@pytest.mark.parametrize("house_type", ['house', 'postcode'])
|
||||
def test_fetch_existing_words_phrases_special_cases(special_phrases_importer, word_table,
|
||||
house_type, temp_db_cursor):
|
||||
"""
|
||||
Check for the fetch_existing_words_phrases() method.
|
||||
It should return nothing as the terms added correspond
|
||||
to a housenumber and postcode term.
|
||||
"""
|
||||
query ="""
|
||||
INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word',
|
||||
'place', %s, null, 0, 'near');
|
||||
"""
|
||||
temp_db_cursor.execute(query, (house_type,))
|
||||
|
||||
special_phrases_importer._fetch_existing_words_phrases()
|
||||
assert not special_phrases_importer.words_phrases_to_delete
|
||||
|
||||
def test_fetch_existing_place_classtype_tables(special_phrases_importer, temp_db_cursor):
|
||||
"""
|
||||
Check for the fetch_existing_place_classtype_tables() method.
|
||||
@ -123,36 +88,6 @@ def test_convert_settings_giving_json(special_phrases_importer):
|
||||
|
||||
assert returned == json_file
|
||||
|
||||
def test_process_amenity_with_operator(special_phrases_importer, getorcreate_amenityoperator_funcs,
|
||||
temp_db_conn, word_table):
|
||||
"""
|
||||
Test that _process_amenity() execute well the
|
||||
getorcreate_amenityoperator() SQL function and that
|
||||
the 2 differents operators are well handled.
|
||||
"""
|
||||
special_phrases_importer._process_amenity('', '', '', '', 'near')
|
||||
special_phrases_importer._process_amenity('', '', '', '', 'in')
|
||||
|
||||
with temp_db_conn.cursor() as temp_db_cursor:
|
||||
temp_db_cursor.execute("SELECT * FROM word WHERE operator='near' OR operator='in'")
|
||||
results = temp_db_cursor.fetchall()
|
||||
|
||||
assert len(results) == 2
|
||||
|
||||
def test_process_amenity_without_operator(special_phrases_importer, getorcreate_amenity_funcs,
|
||||
temp_db_conn, word_table):
|
||||
"""
|
||||
Test that _process_amenity() execute well the
|
||||
getorcreate_amenity() SQL function.
|
||||
"""
|
||||
special_phrases_importer._process_amenity('', '', '', '', '')
|
||||
|
||||
with temp_db_conn.cursor() as temp_db_cursor:
|
||||
temp_db_cursor.execute("SELECT * FROM word WHERE operator='no_operator'")
|
||||
result = temp_db_cursor.fetchone()
|
||||
|
||||
assert result
|
||||
|
||||
def test_create_place_classtype_indexes(temp_db_conn, special_phrases_importer):
|
||||
"""
|
||||
Test that _create_place_classtype_indexes() create the
|
||||
@ -215,8 +150,7 @@ def test_create_place_classtype_table_and_indexes(
|
||||
assert check_placeid_and_centroid_indexes(temp_db_conn, pair[0], pair[1])
|
||||
assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, pair[0], pair[1])
|
||||
|
||||
def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer, word_table,
|
||||
getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs):
|
||||
def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer):
|
||||
"""
|
||||
Test that _process_xml_content() process the given xml content right
|
||||
by executing the right SQL functions for amenities and
|
||||
@ -228,11 +162,9 @@ def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer,
|
||||
#Converted output set to a dict for easy assert further.
|
||||
results = dict(special_phrases_importer._process_xml_content(get_test_xml_wiki_content(), 'en'))
|
||||
|
||||
assert check_amenities_with_op(temp_db_conn)
|
||||
assert check_amenities_without_op(temp_db_conn)
|
||||
assert results[class_test] and type_test in results.values()
|
||||
|
||||
def test_remove_non_existent_phrases_from_db(special_phrases_importer, default_phrases,
|
||||
def test_remove_non_existent_tables_from_db(special_phrases_importer, default_phrases,
|
||||
temp_db_conn):
|
||||
"""
|
||||
Check for the remove_non_existent_phrases_from_db() method.
|
||||
@ -245,22 +177,10 @@ def test_remove_non_existent_phrases_from_db(special_phrases_importer, default_p
|
||||
be deleted.
|
||||
"""
|
||||
with temp_db_conn.cursor() as temp_db_cursor:
|
||||
to_delete_phrase_tuple = ('normalized_word', 'class', 'type', 'near')
|
||||
to_keep_phrase_tuple = (
|
||||
'normalized_word_exists', 'class_exists', 'type_exists', 'near'
|
||||
)
|
||||
special_phrases_importer.words_phrases_to_delete = {
|
||||
to_delete_phrase_tuple,
|
||||
to_keep_phrase_tuple
|
||||
}
|
||||
special_phrases_importer.words_phrases_still_exist = {
|
||||
to_keep_phrase_tuple
|
||||
}
|
||||
special_phrases_importer.table_phrases_to_delete = {
|
||||
'place_classtype_testclasstypetable_to_delete'
|
||||
}
|
||||
|
||||
query_words = 'SELECT word, class, type, operator FROM word;'
|
||||
query_tables = """
|
||||
SELECT table_name
|
||||
FROM information_schema.tables
|
||||
@ -268,21 +188,16 @@ def test_remove_non_existent_phrases_from_db(special_phrases_importer, default_p
|
||||
AND table_name like 'place_classtype_%';
|
||||
"""
|
||||
|
||||
special_phrases_importer._remove_non_existent_phrases_from_db()
|
||||
special_phrases_importer._remove_non_existent_tables_from_db()
|
||||
|
||||
temp_db_cursor.execute(query_words)
|
||||
words_result = temp_db_cursor.fetchall()
|
||||
temp_db_cursor.execute(query_tables)
|
||||
tables_result = temp_db_cursor.fetchall()
|
||||
assert len(words_result) == 1 and words_result[0] == [
|
||||
'normalized_word_exists', 'class_exists', 'type_exists', 'near'
|
||||
]
|
||||
assert (len(tables_result) == 1 and
|
||||
tables_result[0][0] == 'place_classtype_testclasstypetable_to_keep'
|
||||
)
|
||||
|
||||
def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases_importer, placex_table,
|
||||
getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs, word_table):
|
||||
def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases_importer,
|
||||
placex_table, tokenizer_mock):
|
||||
"""
|
||||
Check that the main import_from_wiki() method is well executed.
|
||||
It should create the place_classtype table, the place_id and centroid indexes,
|
||||
@ -294,17 +209,14 @@ def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases
|
||||
#what is deleted and what is preserved.
|
||||
with temp_db_conn.cursor() as temp_db_cursor:
|
||||
temp_db_cursor.execute("""
|
||||
INSERT INTO word VALUES(99999, ' animal shelter', 'animal shelter',
|
||||
'amenity', 'animal_shelter', null, 0, null);
|
||||
|
||||
INSERT INTO word VALUES(99999, ' wrong_lookup_token', 'wrong_normalized_word',
|
||||
'wrong_class', 'wrong_type', null, 0, 'near');
|
||||
|
||||
CREATE TABLE place_classtype_amenity_animal_shelter();
|
||||
CREATE TABLE place_classtype_wrongclass_wrongtype();""")
|
||||
|
||||
monkeypatch.setattr('nominatim.tools.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content)
|
||||
special_phrases_importer.import_from_wiki(['en'])
|
||||
tokenizer = tokenizer_mock()
|
||||
special_phrases_importer.import_from_wiki(tokenizer, ['en'])
|
||||
|
||||
assert len(tokenizer.analyser_cache['special_phrases']) == 18
|
||||
|
||||
class_test = 'aerialway'
|
||||
type_test = 'zip_line'
|
||||
@ -312,22 +224,12 @@ def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases
|
||||
assert check_table_exist(temp_db_conn, class_test, type_test)
|
||||
assert check_placeid_and_centroid_indexes(temp_db_conn, class_test, type_test)
|
||||
assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test)
|
||||
assert check_amenities_with_op(temp_db_conn)
|
||||
assert check_amenities_without_op(temp_db_conn)
|
||||
assert check_table_exist(temp_db_conn, 'amenity', 'animal_shelter')
|
||||
assert not check_table_exist(temp_db_conn, 'wrong_class', 'wrong_type')
|
||||
|
||||
#Format (query, should_return_something_bool) use to easily execute all asserts
|
||||
queries_tests = set()
|
||||
|
||||
#Used to check that the correct phrase already in the word table before is still there.
|
||||
query_correct_word = "SELECT * FROM word WHERE word = 'animal shelter'"
|
||||
queries_tests.add((query_correct_word, True))
|
||||
|
||||
#Used to check if wrong phrase was deleted from the word table of the database.
|
||||
query_wrong_word = "SELECT word FROM word WHERE word = 'wrong_normalized_word'"
|
||||
queries_tests.add((query_wrong_word, False))
|
||||
|
||||
#Used to check that correct place_classtype table already in the datase before is still there.
|
||||
query_existing_table = """
|
||||
SELECT table_name
|
||||
@ -412,24 +314,6 @@ def check_placeid_and_centroid_indexes(temp_db_conn, phrase_class, phrase_type):
|
||||
temp_db_conn.index_exists(index_prefix + 'place_id')
|
||||
)
|
||||
|
||||
def check_amenities_with_op(temp_db_conn):
|
||||
"""
|
||||
Check that the test table for the SQL function getorcreate_amenityoperator()
|
||||
contains more than one value (so that the SQL function was call more than one time).
|
||||
"""
|
||||
with temp_db_conn.cursor() as temp_db_cursor:
|
||||
temp_db_cursor.execute("SELECT * FROM word WHERE operator != 'no_operator'")
|
||||
return len(temp_db_cursor.fetchall()) > 1
|
||||
|
||||
def check_amenities_without_op(temp_db_conn):
|
||||
"""
|
||||
Check that the test table for the SQL function getorcreate_amenity()
|
||||
contains more than one value (so that the SQL function was call more than one time).
|
||||
"""
|
||||
with temp_db_conn.cursor() as temp_db_cursor:
|
||||
temp_db_cursor.execute("SELECT * FROM word WHERE operator = 'no_operator'")
|
||||
return len(temp_db_cursor.fetchall()) > 1
|
||||
|
||||
@pytest.fixture
|
||||
def special_phrases_importer(temp_db_conn, def_config, temp_phplib_dir_with_migration):
|
||||
"""
|
||||
@ -453,48 +337,7 @@ def temp_phplib_dir_with_migration():
|
||||
yield Path(phpdir)
|
||||
|
||||
@pytest.fixture
|
||||
def default_phrases(word_table, temp_db_cursor):
|
||||
def default_phrases(temp_db_cursor):
|
||||
temp_db_cursor.execute("""
|
||||
INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word',
|
||||
'class', 'type', null, 0, 'near');
|
||||
|
||||
INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word_exists',
|
||||
'class_exists', 'type_exists', null, 0, 'near');
|
||||
|
||||
CREATE TABLE place_classtype_testclasstypetable_to_delete();
|
||||
CREATE TABLE place_classtype_testclasstypetable_to_keep();""")
|
||||
|
||||
@pytest.fixture
|
||||
def make_strandard_name_func(temp_db_cursor):
|
||||
temp_db_cursor.execute("""
|
||||
CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT AS $$
|
||||
BEGIN
|
||||
RETURN trim(name); --Basically return only the trimed name for the tests
|
||||
END;
|
||||
$$ LANGUAGE plpgsql IMMUTABLE;""")
|
||||
|
||||
@pytest.fixture
|
||||
def getorcreate_amenity_funcs(temp_db_cursor, make_strandard_name_func):
|
||||
temp_db_cursor.execute("""
|
||||
CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
|
||||
lookup_class text, lookup_type text)
|
||||
RETURNS void as $$
|
||||
BEGIN
|
||||
INSERT INTO word VALUES(null, lookup_word, normalized_word,
|
||||
lookup_class, lookup_type, null, 0, 'no_operator');
|
||||
END;
|
||||
$$ LANGUAGE plpgsql""")
|
||||
|
||||
@pytest.fixture
|
||||
def getorcreate_amenityoperator_funcs(temp_db_cursor, make_strandard_name_func):
|
||||
temp_db_cursor.execute("""
|
||||
CREATE TABLE temp_with_operator(op TEXT);
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT,
|
||||
lookup_class text, lookup_type text, op text)
|
||||
RETURNS void as $$
|
||||
BEGIN
|
||||
INSERT INTO word VALUES(null, lookup_word, normalized_word,
|
||||
lookup_class, lookup_type, null, 0, op);
|
||||
END;
|
||||
$$ LANGUAGE plpgsql""")
|
@ -5,6 +5,11 @@ Tests for functions to maintain the artificial postcode table.
|
||||
import pytest
|
||||
|
||||
from nominatim.tools import postcodes
|
||||
import dummy_tokenizer
|
||||
|
||||
@pytest.fixture
|
||||
def tokenizer():
|
||||
return dummy_tokenizer.DummyTokenizer(None, None)
|
||||
|
||||
@pytest.fixture
|
||||
def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory,
|
||||
@ -20,26 +25,26 @@ def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory,
|
||||
postcode TEXT,
|
||||
geometry GEOMETRY(Geometry, 4326)""")
|
||||
temp_db_cursor.execute('CREATE SEQUENCE seq_place')
|
||||
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT)
|
||||
RETURNS INTEGER AS $$ BEGIN RETURN 1; END; $$ LANGUAGE plpgsql;
|
||||
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
|
||||
RETURNS TEXT AS $$ BEGIN RETURN postcode; END; $$ LANGUAGE plpgsql;
|
||||
""")
|
||||
|
||||
|
||||
def test_import_postcodes_empty(dsn, temp_db_cursor, postcode_table, tmp_path):
|
||||
postcodes.import_postcodes(dsn, tmp_path)
|
||||
def test_import_postcodes_empty(dsn, temp_db_cursor, postcode_table, tmp_path, tokenizer):
|
||||
postcodes.import_postcodes(dsn, tmp_path, tokenizer)
|
||||
|
||||
assert temp_db_cursor.table_exists('gb_postcode')
|
||||
assert temp_db_cursor.table_exists('us_postcode')
|
||||
assert temp_db_cursor.table_rows('location_postcode') == 0
|
||||
|
||||
|
||||
def test_import_postcodes_from_placex(dsn, temp_db_cursor, postcode_table, tmp_path):
|
||||
def test_import_postcodes_from_placex(dsn, temp_db_cursor, postcode_table, tmp_path, tokenizer):
|
||||
temp_db_cursor.execute("""
|
||||
INSERT INTO placex (place_id, country_code, address, geometry)
|
||||
VALUES (1, 'xx', '"postcode"=>"9486"', 'SRID=4326;POINT(10 12)')
|
||||
""")
|
||||
|
||||
postcodes.import_postcodes(dsn, tmp_path)
|
||||
postcodes.import_postcodes(dsn, tmp_path, tokenizer)
|
||||
|
||||
rows = temp_db_cursor.row_set(""" SELECT postcode, country_code,
|
||||
ST_X(geometry), ST_Y(geometry)
|
||||
|
@ -11,9 +11,7 @@ def sql_tmp_path(tmp_path, def_config):
|
||||
return tmp_path
|
||||
|
||||
@pytest.fixture
|
||||
def conn(temp_db_conn, table_factory, monkeypatch):
|
||||
monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', '.')
|
||||
table_factory('country_name', 'partition INT', (0, 1, 2))
|
||||
def conn(sql_preprocessor, temp_db_conn):
|
||||
return temp_db_conn
|
||||
|
||||
|
||||
|
@ -26,6 +26,7 @@ def test_script(envdir):
|
||||
|
||||
def run_website_script(envdir, config):
|
||||
config.lib_dir.php = envdir / 'php'
|
||||
config.project_dir = envdir
|
||||
refresh.setup_website(envdir, config)
|
||||
|
||||
proc = subprocess.run(['/usr/bin/env', 'php', '-Cq',
|
||||
|
@ -1,120 +1,170 @@
|
||||
SELECT getorcreate_amenity(make_standard_name('Aerodrome'), 'aerodrome', 'aeroway', 'aerodrome');
|
||||
SELECT getorcreate_amenity(make_standard_name('Aerodromes'), 'aerodromes', 'aeroway', 'aerodrome');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Aerodrome in'), 'aerodrome in', 'aeroway', 'aerodrome', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Aerodromes in'), 'aerodromes in', 'aeroway', 'aerodrome', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Aerodrome near'), 'aerodrome near', 'aeroway', 'aerodrome', 'near');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Aerodromes near'), 'aerodromes near', 'aeroway', 'aerodrome', 'near');
|
||||
SELECT getorcreate_amenity(make_standard_name('Airport'), 'airport', 'aeroway', 'aerodrome');
|
||||
SELECT getorcreate_amenity(make_standard_name('Airports'), 'airports', 'aeroway', 'aerodrome');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Airport in'), 'airport in', 'aeroway', 'aerodrome', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Airports in'), 'airports in', 'aeroway', 'aerodrome', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Airport near'), 'airport near', 'aeroway', 'aerodrome', 'near');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Airports near'), 'airports near', 'aeroway', 'aerodrome', 'near');
|
||||
SELECT getorcreate_amenity(make_standard_name('Bar'), 'bar', 'amenity', 'bar');
|
||||
SELECT getorcreate_amenity(make_standard_name('Bars'), 'bars', 'amenity', 'bar');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Bar in'), 'bar in', 'amenity', 'bar', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Bars in'), 'bars in', 'amenity', 'bar', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Bar near'), 'bar near', 'amenity', 'bar', 'near');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Bars near'), 'bars near', 'amenity', 'bar', 'near');
|
||||
SELECT getorcreate_amenity(make_standard_name('Bar'), 'bar', 'amenity', 'pub');
|
||||
SELECT getorcreate_amenity(make_standard_name('Bars'), 'bars', 'amenity', 'pub');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Bar in'), 'bar in', 'amenity', 'pub', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Bars in'), 'bars in', 'amenity', 'pub', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Bar near'), 'bar near', 'amenity', 'pub', 'near');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Bars near'), 'bars near', 'amenity', 'pub', 'near');
|
||||
SELECT getorcreate_amenity(make_standard_name('Food'), 'food', 'amenity', 'restaurant');
|
||||
SELECT getorcreate_amenity(make_standard_name('Food'), 'food', 'amenity', 'restaurant');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Food in'), 'food in', 'amenity', 'restaurant', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Food in'), 'food in', 'amenity', 'restaurant', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Food near'), 'food near', 'amenity', 'restaurant', 'near');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Food near'), 'food near', 'amenity', 'restaurant', 'near');
|
||||
SELECT getorcreate_amenity(make_standard_name('Pub'), 'pub', 'amenity', 'bar');
|
||||
SELECT getorcreate_amenity(make_standard_name('Pubs'), 'pubs', 'amenity', 'bar');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Pub in'), 'pub in', 'amenity', 'bar', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Pubs in'), 'pubs in', 'amenity', 'bar', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Pub near'), 'pub near', 'amenity', 'bar', 'near');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Pubs near'), 'pubs near', 'amenity', 'bar', 'near');
|
||||
SELECT getorcreate_amenity(make_standard_name('Pub'), 'pub', 'amenity', 'pub');
|
||||
SELECT getorcreate_amenity(make_standard_name('Pubs'), 'pubs', 'amenity', 'pub');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Pub in'), 'pub in', 'amenity', 'pub', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Pubs in'), 'pubs in', 'amenity', 'pub', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Pub near'), 'pub near', 'amenity', 'pub', 'near');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Pubs near'), 'pubs near', 'amenity', 'pub', 'near');
|
||||
SELECT getorcreate_amenity(make_standard_name('Restaurant'), 'restaurant', 'amenity', 'restaurant');
|
||||
SELECT getorcreate_amenity(make_standard_name('Restaurants'), 'restaurants', 'amenity', 'restaurant');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Restaurant in'), 'restaurant in', 'amenity', 'restaurant', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Restaurants in'), 'restaurants in', 'amenity', 'restaurant', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Restaurant near'), 'restaurant near', 'amenity', 'restaurant', 'near');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Restaurants near'), 'restaurants near', 'amenity', 'restaurant', 'near');
|
||||
SELECT getorcreate_amenity(make_standard_name('Mural'), 'mural', 'artwork_type', 'mural');
|
||||
SELECT getorcreate_amenity(make_standard_name('Murals'), 'murals', 'artwork_type', 'mural');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Mural in'), 'mural in', 'artwork_type', 'mural', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Murals in'), 'murals in', 'artwork_type', 'mural', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Mural near'), 'mural near', 'artwork_type', 'mural', 'near');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Murals near'), 'murals near', 'artwork_type', 'mural', 'near');
|
||||
SELECT getorcreate_amenity(make_standard_name('Sculpture'), 'sculpture', 'artwork_type', 'sculpture');
|
||||
SELECT getorcreate_amenity(make_standard_name('Sculptures'), 'sculptures', 'artwork_type', 'sculpture');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Sculpture in'), 'sculpture in', 'artwork_type', 'sculpture', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Sculptures in'), 'sculptures in', 'artwork_type', 'sculpture', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Sculpture near'), 'sculpture near', 'artwork_type', 'sculpture', 'near');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Sculptures near'), 'sculptures near', 'artwork_type', 'sculpture', 'near');
|
||||
SELECT getorcreate_amenity(make_standard_name('Statue'), 'statue', 'artwork_type', 'statue');
|
||||
SELECT getorcreate_amenity(make_standard_name('Statues'), 'statues', 'artwork_type', 'statue');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Statue in'), 'statue in', 'artwork_type', 'statue', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Statues in'), 'statues in', 'artwork_type', 'statue', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Statue near'), 'statue near', 'artwork_type', 'statue', 'near');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Statues near'), 'statues near', 'artwork_type', 'statue', 'near');
|
||||
SELECT getorcreate_amenity(make_standard_name('ATM'), 'atm', 'atm', 'yes');
|
||||
SELECT getorcreate_amenity(make_standard_name('ATMs'), 'atms', 'atm', 'yes');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('ATM in'), 'atm in', 'atm', 'yes', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('ATMs in'), 'atms in', 'atm', 'yes', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('ATM near'), 'atm near', 'atm', 'yes', 'near');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('ATMs near'), 'atms near', 'atm', 'yes', 'near');
|
||||
SELECT getorcreate_amenity(make_standard_name('National Park'), 'national park', 'boundary', 'national_park');
|
||||
SELECT getorcreate_amenity(make_standard_name('National Parks'), 'national parks', 'boundary', 'national_park');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('National Park in'), 'national park in', 'boundary', 'national_park', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('National Parks in'), 'national parks in', 'boundary', 'national_park', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('National Park near'), 'national park near', 'boundary', 'national_park', 'near');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('National Parks near'), 'national parks near', 'boundary', 'national_park', 'near');
|
||||
SELECT getorcreate_amenity(make_standard_name('Changing table'), 'changing table', 'changing_table', 'yes');
|
||||
SELECT getorcreate_amenity(make_standard_name('Changing tables'), 'changing tables', 'changing_table', 'yes');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Changing table in'), 'changing table in', 'changing_table', 'yes', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Changing tables in'), 'changing tables in', 'changing_table', 'yes', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Changing table near'), 'changing table near', 'changing_table', 'yes', 'near');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Changing tables near'), 'changing tables near', 'changing_table', 'yes', 'near');
|
||||
SELECT getorcreate_amenity(make_standard_name('Roundabout'), 'roundabout', 'junction', 'roundabout');
|
||||
SELECT getorcreate_amenity(make_standard_name('Roundabouts'), 'roundabouts', 'junction', 'roundabout');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Roundabout in'), 'roundabout in', 'junction', 'roundabout', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Roundabouts in'), 'roundabouts in', 'junction', 'roundabout', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Roundabout near'), 'roundabout near', 'junction', 'roundabout', 'near');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Roundabouts near'), 'roundabouts near', 'junction', 'roundabout', 'near');
|
||||
SELECT getorcreate_amenity(make_standard_name('Plaque'), 'plaque', 'memorial', 'plaque');
|
||||
SELECT getorcreate_amenity(make_standard_name('Plaques'), 'plaques', 'memorial', 'plaque');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Plaque in'), 'plaque in', 'memorial', 'plaque', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Plaques in'), 'plaques in', 'memorial', 'plaque', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Plaque near'), 'plaque near', 'memorial', 'plaque', 'near');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Plaques near'), 'plaques near', 'memorial', 'plaque', 'near');
|
||||
SELECT getorcreate_amenity(make_standard_name('Statue'), 'statue', 'memorial', 'statue');
|
||||
SELECT getorcreate_amenity(make_standard_name('Statues'), 'statues', 'memorial', 'statue');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Statue in'), 'statue in', 'memorial', 'statue', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Statues in'), 'statues in', 'memorial', 'statue', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Statue near'), 'statue near', 'memorial', 'statue', 'near');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Statues near'), 'statues near', 'memorial', 'statue', 'near');
|
||||
SELECT getorcreate_amenity(make_standard_name('Stolperstein'), 'stolperstein', 'memorial', 'stolperstein');
|
||||
SELECT getorcreate_amenity(make_standard_name('Stolpersteins'), 'stolpersteins', 'memorial', 'stolperstein');
|
||||
SELECT getorcreate_amenity(make_standard_name('Stolpersteine'), 'stolpersteine', 'memorial', 'stolperstein');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Stolperstein in'), 'stolperstein in', 'memorial', 'stolperstein', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Stolpersteins in'), 'stolpersteins in', 'memorial', 'stolperstein', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Stolpersteine in'), 'stolpersteine in', 'memorial', 'stolperstein', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Stolperstein near'), 'stolperstein near', 'memorial', 'stolperstein', 'near');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Stolpersteins near'), 'stolpersteins near', 'memorial', 'stolperstein', 'near');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('Stolpersteine near'), 'stolpersteine near', 'memorial', 'stolperstein', 'near');
|
||||
SELECT getorcreate_amenity(make_standard_name('War Memorial'), 'war memorial', 'memorial', 'war_memorial');
|
||||
SELECT getorcreate_amenity(make_standard_name('War Memorials'), 'war memorials', 'memorial', 'war_memorial');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('War Memorial in'), 'war memorial in', 'memorial', 'war_memorial', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('War Memorials in'), 'war memorials in', 'memorial', 'war_memorial', 'in');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('War Memorial near'), 'war memorial near', 'memorial', 'war_memorial', 'near');
|
||||
SELECT getorcreate_amenityoperator(make_standard_name('War Memorials near'), 'war memorials near', 'memorial', 'war_memorial', 'near');
|
||||
CREATE OR REPLACE FUNCTION test_getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
|
||||
lookup_class text, lookup_type text)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_token := ' '||trim(lookup_word);
|
||||
SELECT min(word_id) FROM word
|
||||
WHERE word_token = lookup_token and word = normalized_word
|
||||
and class = lookup_class and type = lookup_type
|
||||
INTO return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
|
||||
lookup_class, lookup_type, null, 0);
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION test_getorcreate_amenityoperator(lookup_word TEXT,
|
||||
normalized_word TEXT,
|
||||
lookup_class text,
|
||||
lookup_type text,
|
||||
op text)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_token := ' '||trim(lookup_word);
|
||||
SELECT min(word_id) FROM word
|
||||
WHERE word_token = lookup_token and word = normalized_word
|
||||
and class = lookup_class and type = lookup_type and operator = op
|
||||
INTO return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
|
||||
lookup_class, lookup_type, null, 0, op);
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Aerodrome'), 'aerodrome', 'aeroway', 'aerodrome');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Aerodromes'), 'aerodromes', 'aeroway', 'aerodrome');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Aerodrome in'), 'aerodrome in', 'aeroway', 'aerodrome', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Aerodromes in'), 'aerodromes in', 'aeroway', 'aerodrome', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Aerodrome near'), 'aerodrome near', 'aeroway', 'aerodrome', 'near');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Aerodromes near'), 'aerodromes near', 'aeroway', 'aerodrome', 'near');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Airport'), 'airport', 'aeroway', 'aerodrome');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Airports'), 'airports', 'aeroway', 'aerodrome');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Airport in'), 'airport in', 'aeroway', 'aerodrome', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Airports in'), 'airports in', 'aeroway', 'aerodrome', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Airport near'), 'airport near', 'aeroway', 'aerodrome', 'near');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Airports near'), 'airports near', 'aeroway', 'aerodrome', 'near');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Bar'), 'bar', 'amenity', 'bar');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Bars'), 'bars', 'amenity', 'bar');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Bar in'), 'bar in', 'amenity', 'bar', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Bars in'), 'bars in', 'amenity', 'bar', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Bar near'), 'bar near', 'amenity', 'bar', 'near');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Bars near'), 'bars near', 'amenity', 'bar', 'near');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Bar'), 'bar', 'amenity', 'pub');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Bars'), 'bars', 'amenity', 'pub');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Bar in'), 'bar in', 'amenity', 'pub', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Bars in'), 'bars in', 'amenity', 'pub', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Bar near'), 'bar near', 'amenity', 'pub', 'near');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Bars near'), 'bars near', 'amenity', 'pub', 'near');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Food'), 'food', 'amenity', 'restaurant');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Food'), 'food', 'amenity', 'restaurant');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Food in'), 'food in', 'amenity', 'restaurant', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Food in'), 'food in', 'amenity', 'restaurant', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Food near'), 'food near', 'amenity', 'restaurant', 'near');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Food near'), 'food near', 'amenity', 'restaurant', 'near');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Pub'), 'pub', 'amenity', 'bar');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Pubs'), 'pubs', 'amenity', 'bar');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Pub in'), 'pub in', 'amenity', 'bar', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Pubs in'), 'pubs in', 'amenity', 'bar', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Pub near'), 'pub near', 'amenity', 'bar', 'near');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Pubs near'), 'pubs near', 'amenity', 'bar', 'near');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Pub'), 'pub', 'amenity', 'pub');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Pubs'), 'pubs', 'amenity', 'pub');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Pub in'), 'pub in', 'amenity', 'pub', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Pubs in'), 'pubs in', 'amenity', 'pub', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Pub near'), 'pub near', 'amenity', 'pub', 'near');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Pubs near'), 'pubs near', 'amenity', 'pub', 'near');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Restaurant'), 'restaurant', 'amenity', 'restaurant');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Restaurants'), 'restaurants', 'amenity', 'restaurant');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Restaurant in'), 'restaurant in', 'amenity', 'restaurant', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Restaurants in'), 'restaurants in', 'amenity', 'restaurant', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Restaurant near'), 'restaurant near', 'amenity', 'restaurant', 'near');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Restaurants near'), 'restaurants near', 'amenity', 'restaurant', 'near');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Mural'), 'mural', 'artwork_type', 'mural');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Murals'), 'murals', 'artwork_type', 'mural');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Mural in'), 'mural in', 'artwork_type', 'mural', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Murals in'), 'murals in', 'artwork_type', 'mural', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Mural near'), 'mural near', 'artwork_type', 'mural', 'near');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Murals near'), 'murals near', 'artwork_type', 'mural', 'near');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Sculpture'), 'sculpture', 'artwork_type', 'sculpture');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Sculptures'), 'sculptures', 'artwork_type', 'sculpture');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Sculpture in'), 'sculpture in', 'artwork_type', 'sculpture', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Sculptures in'), 'sculptures in', 'artwork_type', 'sculpture', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Sculpture near'), 'sculpture near', 'artwork_type', 'sculpture', 'near');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Sculptures near'), 'sculptures near', 'artwork_type', 'sculpture', 'near');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Statue'), 'statue', 'artwork_type', 'statue');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Statues'), 'statues', 'artwork_type', 'statue');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Statue in'), 'statue in', 'artwork_type', 'statue', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Statues in'), 'statues in', 'artwork_type', 'statue', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Statue near'), 'statue near', 'artwork_type', 'statue', 'near');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Statues near'), 'statues near', 'artwork_type', 'statue', 'near');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('ATM'), 'atm', 'atm', 'yes');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('ATMs'), 'atms', 'atm', 'yes');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('ATM in'), 'atm in', 'atm', 'yes', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('ATMs in'), 'atms in', 'atm', 'yes', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('ATM near'), 'atm near', 'atm', 'yes', 'near');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('ATMs near'), 'atms near', 'atm', 'yes', 'near');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('National Park'), 'national park', 'boundary', 'national_park');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('National Parks'), 'national parks', 'boundary', 'national_park');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('National Park in'), 'national park in', 'boundary', 'national_park', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('National Parks in'), 'national parks in', 'boundary', 'national_park', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('National Park near'), 'national park near', 'boundary', 'national_park', 'near');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('National Parks near'), 'national parks near', 'boundary', 'national_park', 'near');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Changing table'), 'changing table', 'changing_table', 'yes');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Changing tables'), 'changing tables', 'changing_table', 'yes');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Changing table in'), 'changing table in', 'changing_table', 'yes', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Changing tables in'), 'changing tables in', 'changing_table', 'yes', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Changing table near'), 'changing table near', 'changing_table', 'yes', 'near');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Changing tables near'), 'changing tables near', 'changing_table', 'yes', 'near');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Roundabout'), 'roundabout', 'junction', 'roundabout');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Roundabouts'), 'roundabouts', 'junction', 'roundabout');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Roundabout in'), 'roundabout in', 'junction', 'roundabout', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Roundabouts in'), 'roundabouts in', 'junction', 'roundabout', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Roundabout near'), 'roundabout near', 'junction', 'roundabout', 'near');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Roundabouts near'), 'roundabouts near', 'junction', 'roundabout', 'near');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Plaque'), 'plaque', 'memorial', 'plaque');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Plaques'), 'plaques', 'memorial', 'plaque');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Plaque in'), 'plaque in', 'memorial', 'plaque', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Plaques in'), 'plaques in', 'memorial', 'plaque', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Plaque near'), 'plaque near', 'memorial', 'plaque', 'near');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Plaques near'), 'plaques near', 'memorial', 'plaque', 'near');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Statue'), 'statue', 'memorial', 'statue');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Statues'), 'statues', 'memorial', 'statue');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Statue in'), 'statue in', 'memorial', 'statue', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Statues in'), 'statues in', 'memorial', 'statue', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Statue near'), 'statue near', 'memorial', 'statue', 'near');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Statues near'), 'statues near', 'memorial', 'statue', 'near');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Stolperstein'), 'stolperstein', 'memorial', 'stolperstein');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Stolpersteins'), 'stolpersteins', 'memorial', 'stolperstein');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('Stolpersteine'), 'stolpersteine', 'memorial', 'stolperstein');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Stolperstein in'), 'stolperstein in', 'memorial', 'stolperstein', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Stolpersteins in'), 'stolpersteins in', 'memorial', 'stolperstein', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Stolpersteine in'), 'stolpersteine in', 'memorial', 'stolperstein', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Stolperstein near'), 'stolperstein near', 'memorial', 'stolperstein', 'near');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Stolpersteins near'), 'stolpersteins near', 'memorial', 'stolperstein', 'near');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('Stolpersteine near'), 'stolpersteine near', 'memorial', 'stolperstein', 'near');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('War Memorial'), 'war memorial', 'memorial', 'war_memorial');
|
||||
SELECT test_getorcreate_amenity(make_standard_name('War Memorials'), 'war memorials', 'memorial', 'war_memorial');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('War Memorial in'), 'war memorial in', 'memorial', 'war_memorial', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('War Memorials in'), 'war memorials in', 'memorial', 'war_memorial', 'in');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('War Memorial near'), 'war memorial near', 'memorial', 'war_memorial', 'near');
|
||||
SELECT test_getorcreate_amenityoperator(make_standard_name('War Memorials near'), 'war memorials near', 'memorial', 'war_memorial', 'near');
|
||||
CREATE INDEX idx_placex_classtype ON placex (class, type);CREATE TABLE place_classtype_aeroway_aerodrome AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex WHERE class = 'aeroway' AND type = 'aerodrome';
|
||||
CREATE INDEX idx_place_classtype_aeroway_aerodrome_centroid ON place_classtype_aeroway_aerodrome USING GIST (centroid);
|
||||
CREATE INDEX idx_place_classtype_aeroway_aerodrome_place_id ON place_classtype_aeroway_aerodrome USING btree(place_id);
|
||||
@ -176,3 +226,6 @@ CREATE INDEX idx_place_classtype_memorial_war_memorial_centroid ON place_classty
|
||||
CREATE INDEX idx_place_classtype_memorial_war_memorial_place_id ON place_classtype_memorial_war_memorial USING btree(place_id);
|
||||
GRANT SELECT ON place_classtype_memorial_war_memorial TO "www-data";
|
||||
DROP INDEX idx_placex_classtype;
|
||||
|
||||
DROP FUNCTION test_getorcreate_amenity;
|
||||
DROP FUNCTION test_getorcreate_amenityoperator;
|
||||
|
Loading…
Reference in New Issue
Block a user