Use the osmio crate to convert OSM pbf to xml, clipping it in the

process. #523

Tested really quickly, might not be working right yet. The .xml output
seems to have a duplicate '</osm>' for some reason.
This commit is contained in:
Dustin Carlino 2021-03-13 10:06:19 -08:00
parent 0d8cc27961
commit 27a07b02a6
4 changed files with 157 additions and 3 deletions

65
Cargo.lock generated
View File

@ -850,6 +850,31 @@ dependencies = [
"syn",
]
[[package]]
name = "derive_builder"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2658621297f2cf68762a6f7dc0bb7e1ff2cfd6583daef8ee0fed6f7ec468ec0"
dependencies = [
"darling",
"derive_builder_core",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "derive_builder_core"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2791ea3e372c8495c0bc2033991d76b512cd799d07491fbd6890124db9458bef"
dependencies = [
"darling",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "discard"
version = "1.0.4"
@ -1756,6 +1781,7 @@ dependencies = [
"kml",
"log",
"map_model",
"osmio",
"popdat",
"rand",
"rand_xorshift",
@ -2603,6 +2629,22 @@ dependencies = [
"shared_library",
]
[[package]]
name = "osmio"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03a3b636ff74392dae71bb18787ff7b1d7cfab2b58d3d750361641bd17ea5279"
dependencies = [
"byteorder",
"chrono",
"derive_builder",
"flate2",
"protobuf",
"quick-xml 0.18.1",
"separator",
"xml-rs",
]
[[package]]
name = "owned_ttf_parser"
version = "0.6.0"
@ -2858,6 +2900,12 @@ dependencies = [
"unicode-xid",
]
[[package]]
name = "protobuf"
version = "2.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70731852eec72c56d11226c8a5f96ad5058a3dab73647ca5f7ee351e464f2571"
[[package]]
name = "publicsuffix"
version = "1.5.4"
@ -2880,6 +2928,15 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "quick-xml"
version = "0.18.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3cc440ee4802a86e357165021e3e255a9143724da31db1e2ea540214c96a0f82"
dependencies = [
"memchr",
]
[[package]]
name = "quick-xml"
version = "0.21.0"
@ -3305,6 +3362,12 @@ dependencies = [
"pest",
]
[[package]]
name = "separator"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f97841a747eef040fcd2e7b3b9a220a7205926e60488e673d9e4926d27772ce5"
[[package]]
name = "serde"
version = "1.0.123"
@ -3579,7 +3642,7 @@ dependencies = [
"anyhow",
"geom",
"map_model",
"quick-xml",
"quick-xml 0.21.0",
"serde",
]

View File

@ -23,6 +23,7 @@ gdal = { version = "0.7.2", optional = true }
kml = { path = "../kml" }
log = "0.4.14"
map_model = { path = "../map_model" }
osmio = "0.3.0"
popdat = { path = "../popdat" }
rand = "0.8.3"
rand_xorshift = "0.3.0"

View File

@ -0,0 +1,90 @@
use std::collections::HashMap;
use anyhow::Result;
use geo::prelude::Contains;
use geo::{LineString, Point, Polygon};
use osmio::obj_types::{RcNode, RcOSMObj, RcRelation, RcWay};
use osmio::{Node, OSMObj, OSMObjBase, OSMObjectType, OSMReader, OSMWriter, Relation, Way};
use abstutil::CmdArgs;
use geom::LonLat;
/// Clips an .osm.pbf specified by `--pbf` using the Osmosis boundary polygon specified by
/// `--clip`, writing the result as .osm.xml to `--out`. This is a simple Rust port of `osmconvert
/// large_map.osm -B=clipping.poly --complete-ways -o=smaller_map.osm`.
fn main() -> Result<()> {
let mut args = CmdArgs::new();
let pbf_path = args.required("--pbf");
let clip_path = args.required("--clip");
let out_path = args.required("--out");
args.done();
let boundary_pts = LonLat::read_osmosis_polygon(&clip_path)?;
let raw_pts: Vec<(f64, f64)> = boundary_pts
.into_iter()
.map(|pt| (pt.x(), pt.y()))
.collect();
let boundary = Polygon::new(LineString::from(raw_pts), Vec::new());
clip(&pbf_path, &boundary, &out_path)
}
fn clip(pbf_path: &str, boundary: &Polygon<f64>, out_path: &str) -> Result<()> {
// TODO Maybe just have a single map with RcOSMObj. But then the order we write will be wrong.
let mut nodes: HashMap<i64, RcNode> = HashMap::new();
let mut ways: HashMap<i64, RcWay> = HashMap::new();
let mut relations: HashMap<i64, RcRelation> = HashMap::new();
// TODO Buffer?
let mut reader = osmio::pbf::PBFReader::new(std::fs::File::open(pbf_path)?);
for obj in reader.objects() {
match obj.object_type() {
OSMObjectType::Node => {
let node = obj.into_node().unwrap();
if let Some(pt) = node.lat_lon() {
// TODO Include all nodes belonging to ways that're partly in-bounds.
if boundary.contains(&to_pt(pt)) {
nodes.insert(node.id(), node);
}
}
}
OSMObjectType::Way => {
// Assume all nodes appear before any way.
let way = obj.into_way().unwrap();
if way.nodes().iter().any(|id| nodes.contains_key(id)) {
ways.insert(way.id(), way);
}
}
OSMObjectType::Relation => {
let relation = obj.into_relation().unwrap();
if relation.members().any(|(obj_type, id, _)| {
(obj_type == OSMObjectType::Node && nodes.contains_key(&id))
|| (obj_type == OSMObjectType::Way && ways.contains_key(&id))
|| (obj_type == OSMObjectType::Relation && relations.contains_key(&id))
}) {
relations.insert(relation.id(), relation);
}
}
}
}
// TODO Buffer?
let mut writer = osmio::xml::XMLWriter::new(std::fs::File::create(out_path)?);
// TODO Nondetermistic output because of HashMap!
for (_, node) in nodes {
writer.write_obj(&RcOSMObj::Node(node))?;
}
for (_, way) in ways {
writer.write_obj(&RcOSMObj::Way(way))?;
}
for (_, relation) in relations {
writer.write_obj(&RcOSMObj::Relation(relation))?;
}
writer.close()?;
Ok(())
}
fn to_pt(pair: (osmio::Lat, osmio::Lon)) -> Point<f64> {
// Note our polygon uses (lon, lat)
(pair.1.into(), pair.0.into()).into()
}

View File

@ -158,7 +158,7 @@ pub fn ensure_popdat_exists(
println!("- {} exists, not regenerating it", abstio::path_popdat());
return (
abstio::read_binary(abstio::path_popdat(), timer),
map_model::Map::new(huge_name.path(), timer),
map_model::Map::load_synchronously(huge_name.path(), timer),
);
}
@ -166,7 +166,7 @@ pub fn ensure_popdat_exists(
osm_to_raw("huge_seattle", timer, config);
}
let huge_map = if abstio::file_exists(huge_name.path()) {
map_model::Map::new(huge_name.path(), timer)
map_model::Map::load_synchronously(huge_name.path(), timer)
} else {
crate::utils::raw_to_map(&huge_name, true, false, timer)
};