mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-27 00:49:55 +03:00
move warm script to python code
This commit is contained in:
parent
261e0cfd5a
commit
faeee7528f
@ -1,115 +0,0 @@
|
||||
<?php
|
||||
/**
|
||||
* SPDX-License-Identifier: GPL-2.0-only
|
||||
*
|
||||
* This file is part of Nominatim. (https://nominatim.org)
|
||||
*
|
||||
* Copyright (C) 2022 by the Nominatim developer community.
|
||||
* For a full list of authors see the git log.
|
||||
*/
|
||||
@define('CONST_LibDir', dirname(dirname(__FILE__)));
|
||||
|
||||
require_once(CONST_LibDir.'/init-cmd.php');
|
||||
require_once(CONST_LibDir.'/log.php');
|
||||
require_once(CONST_LibDir.'/PlaceLookup.php');
|
||||
require_once(CONST_LibDir.'/ReverseGeocode.php');
|
||||
|
||||
ini_set('memory_limit', '800M');
|
||||
|
||||
$aCMDOptions = array(
|
||||
'Tools to warm nominatim db',
|
||||
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
|
||||
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
|
||||
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
|
||||
array('reverse-only', '', 0, 1, 0, 0, 'bool', 'Warm reverse only'),
|
||||
array('search-only', '', 0, 1, 0, 0, 'bool', 'Warm search only'),
|
||||
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
|
||||
);
|
||||
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
|
||||
|
||||
loadSettings($aCMDResult['project-dir'] ?? getcwd());
|
||||
|
||||
@define('CONST_Database_DSN', getSetting('DATABASE_DSN'));
|
||||
@define('CONST_Default_Language', getSetting('DEFAULT_LANGUAGE', false));
|
||||
@define('CONST_Log_DB', getSettingBool('LOG_DB'));
|
||||
@define('CONST_Log_File', getSetting('LOG_FILE', false));
|
||||
@define('CONST_NoAccessControl', getSettingBool('CORS_NOACCESSCONTROL'));
|
||||
@define('CONST_Places_Max_ID_count', getSetting('LOOKUP_MAX_COUNT'));
|
||||
@define('CONST_PolygonOutput_MaximumTypes', getSetting('POLYGON_OUTPUT_MAX_TYPES'));
|
||||
@define('CONST_Search_BatchMode', getSettingBool('SEARCH_BATCH_MODE'));
|
||||
@define('CONST_Search_NameOnlySearchFrequencyThreshold', getSetting('SEARCH_NAME_ONLY_THRESHOLD'));
|
||||
@define('CONST_Use_US_Tiger_Data', getSettingBool('USE_US_TIGER_DATA'));
|
||||
@define('CONST_MapIcon_URL', getSetting('MAPICON_URL', false));
|
||||
@define('CONST_TokenizerDir', CONST_InstallDir.'/tokenizer');
|
||||
@define('CONST_Search_WithinCountries', getSetting('SEARCH_WITHIN_COUNTRIES', false));
|
||||
|
||||
require_once(CONST_LibDir.'/Geocode.php');
|
||||
|
||||
$oDB = new Nominatim\DB();
|
||||
$oDB->connect();
|
||||
|
||||
$bVerbose = $aResult['verbose'];
|
||||
|
||||
function print_results($aResults, $bVerbose)
|
||||
{
|
||||
if ($bVerbose) {
|
||||
if ($aResults && count($aResults)) {
|
||||
echo $aResults[0]['langaddress']."\n";
|
||||
} else {
|
||||
echo "<not found>\n";
|
||||
}
|
||||
} else {
|
||||
echo '.';
|
||||
}
|
||||
}
|
||||
|
||||
if (!$aResult['search-only']) {
|
||||
$oReverseGeocode = new Nominatim\ReverseGeocode($oDB);
|
||||
$oReverseGeocode->setZoom(20);
|
||||
$oPlaceLookup = new Nominatim\PlaceLookup($oDB);
|
||||
$oPlaceLookup->setIncludeAddressDetails(true);
|
||||
$oPlaceLookup->setLanguagePreference(array('en'));
|
||||
|
||||
echo 'Warm reverse: ';
|
||||
if ($bVerbose) {
|
||||
echo "\n";
|
||||
}
|
||||
for ($i = 0; $i < 1000; $i++) {
|
||||
$fLat = rand(-9000, 9000) / 100;
|
||||
$fLon = rand(-18000, 18000) / 100;
|
||||
if ($bVerbose) {
|
||||
echo "$fLat, $fLon = ";
|
||||
}
|
||||
|
||||
$oLookup = $oReverseGeocode->lookup($fLat, $fLon);
|
||||
$aSearchResults = $oLookup ? $oPlaceLookup->lookup(array($oLookup->iId => $oLookup)) : null;
|
||||
print_results($aSearchResults, $bVerbose);
|
||||
}
|
||||
echo "\n";
|
||||
}
|
||||
|
||||
if (!$aResult['reverse-only']) {
|
||||
$oGeocode = new Nominatim\Geocode($oDB);
|
||||
|
||||
echo 'Warm search: ';
|
||||
if ($bVerbose) {
|
||||
echo "\n";
|
||||
}
|
||||
|
||||
$oTokenizer = new \Nominatim\Tokenizer($oDB);
|
||||
|
||||
$aWords = $oTokenizer->mostFrequentWords(1000);
|
||||
|
||||
$sSQL = 'SELECT word FROM word WHERE word is not null ORDER BY search_name_count DESC LIMIT 1000';
|
||||
foreach ($aWords as $sWord) {
|
||||
if ($bVerbose) {
|
||||
echo "$sWord = ";
|
||||
}
|
||||
|
||||
$oGeocode->setLanguagePreference(array('en'));
|
||||
$oGeocode->setQuery($sWord);
|
||||
$aSearchResults = $oGeocode->lookup();
|
||||
print_results($aSearchResults, $bVerbose);
|
||||
}
|
||||
echo "\n";
|
||||
}
|
@ -9,9 +9,11 @@ Implementation of the 'admin' subcommand.
|
||||
"""
|
||||
import logging
|
||||
import argparse
|
||||
import random
|
||||
|
||||
from nominatim.tools.exec_utils import run_legacy_script
|
||||
from nominatim.db.connection import connect
|
||||
from nominatim.clicmd.args import NominatimArgs
|
||||
import nominatim.api as napi
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
@ -81,11 +83,25 @@ class AdminFuncs:
|
||||
|
||||
return 1
|
||||
|
||||
|
||||
def _warm(self, args: NominatimArgs) -> int:
|
||||
LOG.warning('Warming database caches')
|
||||
params = ['warm.php']
|
||||
if args.target == 'reverse':
|
||||
params.append('--reverse-only')
|
||||
if args.target == 'search':
|
||||
params.append('--search-only')
|
||||
return run_legacy_script(*params, config=args.config)
|
||||
|
||||
api = napi.NominatimAPI(args.project_dir)
|
||||
|
||||
if args.target != 'reverse':
|
||||
for _ in range(1000):
|
||||
api.reverse((random.uniform(-90, 90), random.uniform(-180, 180)),
|
||||
address_details=True)
|
||||
|
||||
if args.target != 'search':
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
words = tokenizer.most_frequent_words(conn, 1000)
|
||||
|
||||
for word in words:
|
||||
api.search(word)
|
||||
|
||||
return 0
|
||||
|
@ -13,6 +13,7 @@ from typing import List, Tuple, Dict, Any, Optional, Iterable
|
||||
from pathlib import Path
|
||||
|
||||
from nominatim.config import Configuration
|
||||
from nominatim.db.connection import Connection
|
||||
from nominatim.data.place_info import PlaceInfo
|
||||
from nominatim.typing import Protocol
|
||||
|
||||
@ -233,6 +234,13 @@ class AbstractTokenizer(ABC):
|
||||
"""
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
|
||||
""" Return a list of the `num` most frequent full words
|
||||
in the database.
|
||||
"""
|
||||
|
||||
|
||||
class TokenizerModule(Protocol):
|
||||
""" Interface that must be exported by modules that implement their
|
||||
own tokenizer.
|
||||
|
@ -183,6 +183,18 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
self.loader.make_token_analysis())
|
||||
|
||||
|
||||
def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
|
||||
""" Return a list of the `num` most frequent full words
|
||||
in the database.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""SELECT word, sum((info->'count')::int) as count
|
||||
FROM word WHERE type = 'W'
|
||||
GROUP BY word
|
||||
ORDER BY count DESC LIMIT %s""", (num,))
|
||||
return list(s[0].split('@')[0] for s in cur)
|
||||
|
||||
|
||||
def _install_php(self, phpdir: Path, overwrite: bool = True) -> None:
|
||||
""" Install the php script for the tokenizer.
|
||||
"""
|
||||
|
@ -256,6 +256,16 @@ class LegacyTokenizer(AbstractTokenizer):
|
||||
return LegacyNameAnalyzer(self.dsn, normalizer)
|
||||
|
||||
|
||||
def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
|
||||
""" Return a list of the `num` most frequent full words
|
||||
in the database.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(""" SELECT word FROM word WHERE word is not null
|
||||
ORDER BY search_name_count DESC LIMIT %s""", (num,))
|
||||
return list(s[0] for s in cur)
|
||||
|
||||
|
||||
def _install_php(self, config: Configuration, overwrite: bool = True) -> None:
|
||||
""" Install the php script for the tokenizer.
|
||||
"""
|
||||
|
@ -19,17 +19,6 @@ import nominatim.tools.migration
|
||||
import nominatim.clicmd.admin
|
||||
|
||||
|
||||
@pytest.mark.parametrize("params", [('--warm', ),
|
||||
('--warm', '--reverse-only'),
|
||||
('--warm', '--search-only')])
|
||||
def test_admin_command_legacy(cli_call, mock_func_factory, params):
|
||||
mock_run_legacy = mock_func_factory(nominatim.clicmd.admin, 'run_legacy_script')
|
||||
|
||||
assert cli_call('admin', *params) == 0
|
||||
|
||||
assert mock_run_legacy.called == 1
|
||||
|
||||
|
||||
def test_admin_command_check_database(cli_call, mock_func_factory):
|
||||
mock = mock_func_factory(nominatim.tools.check_database, 'check_database')
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user