mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-23 13:44:36 +03:00
7fcbe13669
The pgsql function get_addressdata() does a lookup of a lot of data that is already available in Python.
201 lines
7.1 KiB
Python
201 lines
7.1 KiB
Python
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
#
|
|
# This file is part of Nominatim. (https://nominatim.org)
|
|
#
|
|
# Copyright (C) 2023 by the Nominatim developer community.
|
|
# For a full list of authors see the git log.
|
|
"""
|
|
Implementation of the 'export' subcommand.
|
|
"""
|
|
from typing import Optional, List, cast
|
|
import logging
|
|
import argparse
|
|
import asyncio
|
|
import csv
|
|
import sys
|
|
|
|
import sqlalchemy as sa
|
|
|
|
from nominatim.clicmd.args import NominatimArgs
|
|
import nominatim.api as napi
|
|
from nominatim.api.results import create_from_placex_row, ReverseResult, add_result_details
|
|
from nominatim.api.types import LookupDetails
|
|
from nominatim.errors import UsageError
|
|
|
|
# Do not repeat documentation of subcommand classes.
|
|
# pylint: disable=C0111
|
|
# Using non-top-level imports to avoid eventually unused imports.
|
|
# pylint: disable=E0012,C0415
|
|
# Needed for SQLAlchemy
|
|
# pylint: disable=singleton-comparison
|
|
|
|
LOG = logging.getLogger()
|
|
|
|
RANK_RANGE_MAP = {
|
|
'country': (4, 4),
|
|
'state': (5, 9),
|
|
'county': (10, 12),
|
|
'city': (13, 16),
|
|
'suburb': (17, 21),
|
|
'street': (26, 26),
|
|
'path': (27, 27)
|
|
}
|
|
|
|
RANK_TO_OUTPUT_MAP = {
|
|
4: 'country',
|
|
5: 'state', 6: 'state', 7: 'state', 8: 'state', 9: 'state',
|
|
10: 'county', 11: 'county', 12: 'county',
|
|
13: 'city', 14: 'city', 15: 'city', 16: 'city',
|
|
17: 'suburb', 18: 'suburb', 19: 'suburb', 20: 'suburb', 21: 'suburb',
|
|
26: 'street', 27: 'path'}
|
|
|
|
class QueryExport:
|
|
"""\
|
|
Export places as CSV file from the database.
|
|
|
|
|
|
"""
|
|
|
|
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
|
group = parser.add_argument_group('Output arguments')
|
|
group.add_argument('--output-type', default='street',
|
|
choices=('country', 'state', 'county',
|
|
'city', 'suburb', 'street', 'path'),
|
|
help='Type of places to output (default: street)')
|
|
group.add_argument('--output-format',
|
|
default='street;suburb;city;county;state;country',
|
|
help=("Semicolon-separated list of address types "
|
|
"(see --output-type). Additionally accepts:"
|
|
"placeid,postcode"))
|
|
group.add_argument('--language',
|
|
help=("Preferred language for output "
|
|
"(use local name, if omitted)"))
|
|
group = parser.add_argument_group('Filter arguments')
|
|
group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE',
|
|
help='Export only objects within country')
|
|
group.add_argument('--restrict-to-osm-node', metavar='ID', type=int,
|
|
dest='node',
|
|
help='Export only children of this OSM node')
|
|
group.add_argument('--restrict-to-osm-way', metavar='ID', type=int,
|
|
dest='way',
|
|
help='Export only children of this OSM way')
|
|
group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int,
|
|
dest='relation',
|
|
help='Export only children of this OSM relation')
|
|
|
|
|
|
def run(self, args: NominatimArgs) -> int:
|
|
return asyncio.run(export(args))
|
|
|
|
|
|
async def export(args: NominatimArgs) -> int:
|
|
""" The actual export as a asynchronous function.
|
|
"""
|
|
|
|
api = napi.NominatimAPIAsync(args.project_dir)
|
|
|
|
try:
|
|
output_range = RANK_RANGE_MAP[args.output_type]
|
|
|
|
writer = init_csv_writer(args.output_format)
|
|
|
|
async with api.begin() as conn, api.begin() as detail_conn:
|
|
t = conn.t.placex
|
|
|
|
sql = sa.select(t.c.place_id, t.c.parent_place_id,
|
|
t.c.osm_type, t.c.osm_id, t.c.name,
|
|
t.c.class_, t.c.type, t.c.admin_level,
|
|
t.c.address, t.c.extratags,
|
|
t.c.housenumber, t.c.postcode, t.c.country_code,
|
|
t.c.importance, t.c.wikipedia, t.c.indexed_date,
|
|
t.c.rank_address, t.c.rank_search,
|
|
t.c.centroid)\
|
|
.where(t.c.linked_place_id == None)\
|
|
.where(t.c.rank_address.between(*output_range))
|
|
|
|
parent_place_id = await get_parent_id(conn, args.node, args.way, args.relation)
|
|
if parent_place_id:
|
|
taddr = conn.t.addressline
|
|
|
|
sql = sql.join(taddr, taddr.c.place_id == t.c.place_id)\
|
|
.where(taddr.c.address_place_id == parent_place_id)\
|
|
.where(taddr.c.isaddress)
|
|
|
|
if args.restrict_to_country:
|
|
sql = sql.where(t.c.country_code == args.restrict_to_country.lower())
|
|
|
|
results = []
|
|
for row in await conn.execute(sql):
|
|
result = create_from_placex_row(row, ReverseResult)
|
|
if result is not None:
|
|
results.append(result)
|
|
|
|
if len(results) == 1000:
|
|
await dump_results(detail_conn, results, writer, args.language)
|
|
results = []
|
|
|
|
if results:
|
|
await dump_results(detail_conn, results, writer, args.language)
|
|
finally:
|
|
await api.close()
|
|
|
|
return 0
|
|
|
|
|
|
def init_csv_writer(output_format: str) -> 'csv.DictWriter[str]':
|
|
fields = output_format.split(';')
|
|
writer = csv.DictWriter(sys.stdout, fieldnames=fields, extrasaction='ignore')
|
|
writer.writeheader()
|
|
|
|
return writer
|
|
|
|
|
|
async def dump_results(conn: napi.SearchConnection,
|
|
results: List[ReverseResult],
|
|
writer: 'csv.DictWriter[str]',
|
|
lang: Optional[str]) -> None:
|
|
locale = napi.Locales([lang] if lang else None)
|
|
await add_result_details(conn, results,
|
|
LookupDetails(address_details=True, locales=locale))
|
|
|
|
|
|
for result in results:
|
|
data = {'placeid': result.place_id,
|
|
'postcode': result.postcode}
|
|
|
|
for line in (result.address_rows or []):
|
|
if line.isaddress and line.local_name:
|
|
if line.category[1] == 'postcode':
|
|
data['postcode'] = line.local_name
|
|
elif line.rank_address in RANK_TO_OUTPUT_MAP:
|
|
data[RANK_TO_OUTPUT_MAP[line.rank_address]] = line.local_name
|
|
|
|
writer.writerow(data)
|
|
|
|
|
|
async def get_parent_id(conn: napi.SearchConnection, node_id: Optional[int],
|
|
way_id: Optional[int],
|
|
relation_id: Optional[int]) -> Optional[int]:
|
|
""" Get the place ID for the given OSM object.
|
|
"""
|
|
if node_id is not None:
|
|
osm_type, osm_id = 'N', node_id
|
|
elif way_id is not None:
|
|
osm_type, osm_id = 'W', way_id
|
|
elif relation_id is not None:
|
|
osm_type, osm_id = 'R', relation_id
|
|
else:
|
|
return None
|
|
|
|
t = conn.t.placex
|
|
sql = sa.select(t.c.place_id).limit(1)\
|
|
.where(t.c.osm_type == osm_type)\
|
|
.where(t.c.osm_id == osm_id)\
|
|
.where(t.c.rank_address > 0)\
|
|
.order_by(t.c.rank_address)
|
|
|
|
for result in await conn.execute(sql):
|
|
return cast(int, result[0])
|
|
|
|
raise UsageError(f'Cannot find a place {osm_type}{osm_id}.')
|