From 27a07b02a6c747d48e9b653e34b04d2450c4aa72 Mon Sep 17 00:00:00 2001 From: Dustin Carlino Date: Sat, 13 Mar 2021 10:06:19 -0800 Subject: [PATCH] Use the osmio crate to convert OSM pbf to xml, clipping it in the process. #523 Tested really quickly, might not be working right yet. The .xml output seems to have a duplicate '' for some reason. --- Cargo.lock | 65 +++++++++++++++++++++++++- importer/Cargo.toml | 1 + importer/src/bin/clip_osm.rs | 90 ++++++++++++++++++++++++++++++++++++ importer/src/seattle.rs | 4 +- 4 files changed, 157 insertions(+), 3 deletions(-) create mode 100644 importer/src/bin/clip_osm.rs diff --git a/Cargo.lock b/Cargo.lock index da125e95d5..105dd3e03b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -850,6 +850,31 @@ dependencies = [ "syn", ] +[[package]] +name = "derive_builder" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2658621297f2cf68762a6f7dc0bb7e1ff2cfd6583daef8ee0fed6f7ec468ec0" +dependencies = [ + "darling", + "derive_builder_core", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "derive_builder_core" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2791ea3e372c8495c0bc2033991d76b512cd799d07491fbd6890124db9458bef" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "discard" version = "1.0.4" @@ -1756,6 +1781,7 @@ dependencies = [ "kml", "log", "map_model", + "osmio", "popdat", "rand", "rand_xorshift", @@ -2603,6 +2629,22 @@ dependencies = [ "shared_library", ] +[[package]] +name = "osmio" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03a3b636ff74392dae71bb18787ff7b1d7cfab2b58d3d750361641bd17ea5279" +dependencies = [ + "byteorder", + "chrono", + "derive_builder", + "flate2", + "protobuf", + "quick-xml 0.18.1", + "separator", + "xml-rs", +] + [[package]] name = "owned_ttf_parser" version = "0.6.0" @@ -2858,6 +2900,12 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "protobuf" +version = "2.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70731852eec72c56d11226c8a5f96ad5058a3dab73647ca5f7ee351e464f2571" + [[package]] name = "publicsuffix" version = "1.5.4" @@ -2880,6 +2928,15 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "quick-xml" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cc440ee4802a86e357165021e3e255a9143724da31db1e2ea540214c96a0f82" +dependencies = [ + "memchr", +] + [[package]] name = "quick-xml" version = "0.21.0" @@ -3305,6 +3362,12 @@ dependencies = [ "pest", ] +[[package]] +name = "separator" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f97841a747eef040fcd2e7b3b9a220a7205926e60488e673d9e4926d27772ce5" + [[package]] name = "serde" version = "1.0.123" @@ -3579,7 +3642,7 @@ dependencies = [ "anyhow", "geom", "map_model", - "quick-xml", + "quick-xml 0.21.0", "serde", ] diff --git a/importer/Cargo.toml b/importer/Cargo.toml index fc054e310c..10a9f2c0c0 100644 --- a/importer/Cargo.toml +++ b/importer/Cargo.toml @@ -23,6 +23,7 @@ gdal = { version = "0.7.2", optional = true } kml = { path = "../kml" } log = "0.4.14" map_model = { path = "../map_model" } +osmio = "0.3.0" popdat = { path = "../popdat" } rand = "0.8.3" rand_xorshift = "0.3.0" diff --git a/importer/src/bin/clip_osm.rs b/importer/src/bin/clip_osm.rs new file mode 100644 index 0000000000..5715b58946 --- /dev/null +++ b/importer/src/bin/clip_osm.rs @@ -0,0 +1,90 @@ +use std::collections::HashMap; + +use anyhow::Result; +use geo::prelude::Contains; +use geo::{LineString, Point, Polygon}; +use osmio::obj_types::{RcNode, RcOSMObj, RcRelation, RcWay}; +use osmio::{Node, OSMObj, OSMObjBase, OSMObjectType, OSMReader, OSMWriter, Relation, Way}; + +use abstutil::CmdArgs; +use geom::LonLat; + +/// Clips an .osm.pbf specified by `--pbf` using the Osmosis boundary polygon specified by +/// `--clip`, writing the result as .osm.xml to `--out`. This is a simple Rust port of `osmconvert +/// large_map.osm -B=clipping.poly --complete-ways -o=smaller_map.osm`. +fn main() -> Result<()> { + let mut args = CmdArgs::new(); + let pbf_path = args.required("--pbf"); + let clip_path = args.required("--clip"); + let out_path = args.required("--out"); + args.done(); + + let boundary_pts = LonLat::read_osmosis_polygon(&clip_path)?; + let raw_pts: Vec<(f64, f64)> = boundary_pts + .into_iter() + .map(|pt| (pt.x(), pt.y())) + .collect(); + let boundary = Polygon::new(LineString::from(raw_pts), Vec::new()); + clip(&pbf_path, &boundary, &out_path) +} + +fn clip(pbf_path: &str, boundary: &Polygon, out_path: &str) -> Result<()> { + // TODO Maybe just have a single map with RcOSMObj. But then the order we write will be wrong. + let mut nodes: HashMap = HashMap::new(); + let mut ways: HashMap = HashMap::new(); + let mut relations: HashMap = HashMap::new(); + + // TODO Buffer? + let mut reader = osmio::pbf::PBFReader::new(std::fs::File::open(pbf_path)?); + for obj in reader.objects() { + match obj.object_type() { + OSMObjectType::Node => { + let node = obj.into_node().unwrap(); + if let Some(pt) = node.lat_lon() { + // TODO Include all nodes belonging to ways that're partly in-bounds. + if boundary.contains(&to_pt(pt)) { + nodes.insert(node.id(), node); + } + } + } + OSMObjectType::Way => { + // Assume all nodes appear before any way. + let way = obj.into_way().unwrap(); + if way.nodes().iter().any(|id| nodes.contains_key(id)) { + ways.insert(way.id(), way); + } + } + OSMObjectType::Relation => { + let relation = obj.into_relation().unwrap(); + if relation.members().any(|(obj_type, id, _)| { + (obj_type == OSMObjectType::Node && nodes.contains_key(&id)) + || (obj_type == OSMObjectType::Way && ways.contains_key(&id)) + || (obj_type == OSMObjectType::Relation && relations.contains_key(&id)) + }) { + relations.insert(relation.id(), relation); + } + } + } + } + + // TODO Buffer? + let mut writer = osmio::xml::XMLWriter::new(std::fs::File::create(out_path)?); + // TODO Nondetermistic output because of HashMap! + for (_, node) in nodes { + writer.write_obj(&RcOSMObj::Node(node))?; + } + for (_, way) in ways { + writer.write_obj(&RcOSMObj::Way(way))?; + } + for (_, relation) in relations { + writer.write_obj(&RcOSMObj::Relation(relation))?; + } + + writer.close()?; + Ok(()) +} + +fn to_pt(pair: (osmio::Lat, osmio::Lon)) -> Point { + // Note our polygon uses (lon, lat) + (pair.1.into(), pair.0.into()).into() +} diff --git a/importer/src/seattle.rs b/importer/src/seattle.rs index 958256c76d..ce35ad8c1c 100644 --- a/importer/src/seattle.rs +++ b/importer/src/seattle.rs @@ -158,7 +158,7 @@ pub fn ensure_popdat_exists( println!("- {} exists, not regenerating it", abstio::path_popdat()); return ( abstio::read_binary(abstio::path_popdat(), timer), - map_model::Map::new(huge_name.path(), timer), + map_model::Map::load_synchronously(huge_name.path(), timer), ); } @@ -166,7 +166,7 @@ pub fn ensure_popdat_exists( osm_to_raw("huge_seattle", timer, config); } let huge_map = if abstio::file_exists(huge_name.path()) { - map_model::Map::new(huge_name.path(), timer) + map_model::Map::load_synchronously(huge_name.path(), timer) } else { crate::utils::raw_to_map(&huge_name, true, false, timer) };