switch CLI search command to python implementation

This commit is contained in:
Sarah Hoffmann 2023-05-24 22:54:54 +02:00
parent f335e78d1e
commit 0608cf1476
5 changed files with 124 additions and 68 deletions

View File

@ -9,6 +9,8 @@ Helper function for parsing parameters and and outputting data
specifically for the v1 version of the API.
"""
from nominatim.api.results import SearchResult, SearchResults, SourceTable
REVERSE_MAX_RANKS = [2, 2, 2, # 0-2 Continent/Sea
4, 4, # 3-4 Country
8, # 5 State
@ -29,3 +31,41 @@ def zoom_to_rank(zoom: int) -> int:
""" Convert a zoom parameter into a rank according to the v1 API spec.
"""
return REVERSE_MAX_RANKS[max(0, min(18, zoom))]
def deduplicate_results(results: SearchResults, max_results: int) -> SearchResults:
""" Remove results that look like duplicates.
Two results are considered the same if they have the same OSM ID
or if they have the same category, display name and rank.
"""
osm_ids_done = set()
classification_done = set()
deduped = SearchResults()
for result in results:
if result.source_table == SourceTable.POSTCODE:
assert result.names and 'ref' in result.names
if any(_is_postcode_relation_for(r, result.names['ref']) for r in results):
continue
classification = (result.osm_object[0] if result.osm_object else None,
result.category,
result.display_name,
result.rank_address)
if result.osm_object not in osm_ids_done \
and classification not in classification_done:
deduped.append(result)
osm_ids_done.add(result.osm_object)
classification_done.add(classification)
if len(deduped) >= max_results:
break
return deduped
def _is_postcode_relation_for(result: SearchResult, postcode: str) -> bool:
return result.source_table == SourceTable.PLACEX \
and result.osm_object is not None \
and result.osm_object[0] == 'R' \
and result.category == ('boundary', 'postal_code') \
and result.names is not None \
and result.names.get('ref') == postcode

View File

@ -273,14 +273,11 @@ def get_set_parser(**kwargs: Any) -> CommandlineParser:
parser.add_subcommand('export', QueryExport())
parser.add_subcommand('serve', AdminServe())
if kwargs.get('phpcgi_path'):
parser.add_subcommand('search', clicmd.APISearch())
parser.add_subcommand('reverse', clicmd.APIReverse())
parser.add_subcommand('lookup', clicmd.APILookup())
parser.add_subcommand('details', clicmd.APIDetails())
parser.add_subcommand('status', clicmd.APIStatus())
else:
parser.parser.epilog = 'php-cgi not found. Query commands not available.'
parser.add_subcommand('search', clicmd.APISearch())
parser.add_subcommand('reverse', clicmd.APIReverse())
parser.add_subcommand('lookup', clicmd.APILookup())
parser.add_subcommand('details', clicmd.APIDetails())
parser.add_subcommand('status', clicmd.APIStatus())
return parser

View File

@ -7,7 +7,7 @@
"""
Subcommand definitions for API calls from the command line.
"""
from typing import Mapping, Dict
from typing import Mapping, Dict, Any
import argparse
import logging
import json
@ -18,7 +18,7 @@ from nominatim.errors import UsageError
from nominatim.clicmd.args import NominatimArgs
import nominatim.api as napi
import nominatim.api.v1 as api_output
from nominatim.api.v1.helpers import zoom_to_rank
from nominatim.api.v1.helpers import zoom_to_rank, deduplicate_results
import nominatim.api.logging as loglib
# Do not repeat documentation of subcommand classes.
@ -27,6 +27,7 @@ import nominatim.api.logging as loglib
LOG = logging.getLogger()
STRUCTURED_QUERY = (
('amenity', 'name and/or type of POI'),
('street', 'housenumber and street'),
('city', 'city, town or village'),
('county', 'county'),
@ -97,7 +98,7 @@ class APISearch:
help='Limit search results to one or more countries')
group.add_argument('--exclude_place_ids', metavar='ID,..',
help='List of search object to be excluded')
group.add_argument('--limit', type=int,
group.add_argument('--limit', type=int, default=10,
help='Limit the number of returned results')
group.add_argument('--viewbox', metavar='X1,Y1,X2,Y2',
help='Preferred area to find search results')
@ -110,30 +111,58 @@ class APISearch:
def run(self, args: NominatimArgs) -> int:
params: Dict[str, object]
if args.format == 'debug':
loglib.set_log_output('text')
api = napi.NominatimAPI(args.project_dir)
params: Dict[str, Any] = {'max_results': args.limit + min(args.limit, 10),
'address_details': True, # needed for display name
'geometry_output': args.get_geometry_output(),
'geometry_simplification': args.polygon_threshold,
'countries': args.countrycodes,
'excluded': args.exclude_place_ids,
'viewbox': args.viewbox,
'bounded_viewbox': args.bounded
}
if args.query:
params = dict(q=args.query)
results = api.search(args.query, **params)
else:
params = {k: getattr(args, k) for k, _ in STRUCTURED_QUERY if getattr(args, k)}
results = api.search_address(amenity=args.amenity,
street=args.street,
city=args.city,
county=args.county,
state=args.state,
postalcode=args.postalcode,
country=args.country,
**params)
for param, _ in EXTRADATA_PARAMS:
if getattr(args, param):
params[param] = '1'
for param in ('format', 'countrycodes', 'exclude_place_ids', 'limit', 'viewbox'):
if getattr(args, param):
params[param] = getattr(args, param)
if args.lang:
params['accept-language'] = args.lang
if args.polygon_output:
params['polygon_' + args.polygon_output] = '1'
if args.polygon_threshold:
params['polygon_threshold'] = args.polygon_threshold
if args.bounded:
params['bounded'] = '1'
if not args.dedupe:
params['dedupe'] = '0'
for result in results:
result.localize(args.get_locales(api.config.DEFAULT_LANGUAGE))
if args.dedupe and len(results) > 1:
results = deduplicate_results(results, args.limit)
if args.format == 'debug':
print(loglib.get_and_disable())
return 0
output = api_output.format_result(
results,
args.format,
{'extratags': args.extratags,
'namedetails': args.namedetails,
'addressdetails': args.addressdetails})
if args.format != 'xml':
# reformat the result, so it is pretty-printed
json.dump(json.loads(output), sys.stdout, indent=4, ensure_ascii=False)
else:
sys.stdout.write(output)
sys.stdout.write('\n')
return 0
return _run_api('search', args, params)
class APIReverse:
"""\

View File

@ -147,6 +147,7 @@ class NominatimArgs:
# Arguments to 'search'
query: Optional[str]
amenity: Optional[str]
street: Optional[str]
city: Optional[str]
county: Optional[str]
@ -155,7 +156,7 @@ class NominatimArgs:
postalcode: Optional[str]
countrycodes: Optional[str]
exclude_place_ids: Optional[str]
limit: Optional[int]
limit: int
viewbox: Optional[str]
bounded: bool
dedupe: bool

View File

@ -14,42 +14,6 @@ import nominatim.clicmd.api
import nominatim.api as napi
@pytest.mark.parametrize("endpoint", (('search', 'reverse', 'lookup', 'details', 'status')))
def test_no_api_without_phpcgi(endpoint):
assert nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
phpcgi_path=None,
cli_args=[endpoint]) == 1
@pytest.mark.parametrize("params", [('search', '--query', 'new'),
('search', '--city', 'Berlin')])
class TestCliApiCallPhp:
@pytest.fixture(autouse=True)
def setup_cli_call(self, params, cli_call, mock_func_factory, tmp_path):
self.mock_run_api = mock_func_factory(nominatim.clicmd.api, 'run_api_script')
def _run():
return cli_call(*params, '--project-dir', str(tmp_path))
self.run_nominatim = _run
def test_api_commands_simple(self, tmp_path, params):
(tmp_path / 'website').mkdir()
(tmp_path / 'website' / (params[0] + '.php')).write_text('')
assert self.run_nominatim() == 0
assert self.mock_run_api.called == 1
assert self.mock_run_api.last_args[0] == params[0]
def test_bad_project_dir(self):
assert self.run_nominatim() == 1
class TestCliStatusCall:
@pytest.fixture(autouse=True)
@ -181,6 +145,31 @@ class TestCliLookupCall:
assert 'namedetails' not in out[0]
@pytest.mark.parametrize('endpoint, params', [('search', ('--query', 'Berlin')),
('search_address', ('--city', 'Berlin'))
])
def test_search(cli_call, tmp_path, capsys, monkeypatch, endpoint, params):
result = napi.SearchResult(napi.SourceTable.PLACEX, ('place', 'thing'),
napi.Point(1.0, -3.0),
names={'name':'Name', 'name:fr': 'Nom'},
extratags={'extra':'Extra'})
monkeypatch.setattr(napi.NominatimAPI, endpoint,
lambda *args, **kwargs: napi.SearchResults([result]))
result = cli_call('search', '--project-dir', str(tmp_path), *params)
assert result == 0
out = json.loads(capsys.readouterr().out)
assert len(out) == 1
assert out[0]['name'] == 'Name'
assert 'address' not in out[0]
assert 'extratags' not in out[0]
assert 'namedetails' not in out[0]
class TestCliApiCommonParameters:
@pytest.fixture(autouse=True)