Cleanup: Use roxmltree instead of quick-xml to parse kml

This commit is contained in:
Dustin Carlino 2020-10-05 16:25:11 -07:00
parent 5706226790
commit ac342445ed
3 changed files with 74 additions and 97 deletions

11
Cargo.lock generated
View File

@ -1592,7 +1592,7 @@ version = "0.1.0"
dependencies = [
"abstutil 0.1.0",
"geom 0.1.0",
"quick-xml 0.18.1 (registry+https://github.com/rust-lang/crates.io-index)",
"roxmltree 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.116 (registry+https://github.com/rust-lang/crates.io-index)",
]
@ -2365,14 +2365,6 @@ dependencies = [
"unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "quick-xml"
version = "0.18.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "quote"
version = "0.3.15"
@ -4170,7 +4162,6 @@ dependencies = [
"checksum proc-macro-hack 0.5.15 (registry+https://github.com/rust-lang/crates.io-index)" = "0d659fe7c6d27f25e9d80a1a094c223f5246f6a6596453e09d7229bf42750b63"
"checksum proc-macro-nested 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "eba180dafb9038b050a4c280019bbedf9f2467b61e5d892dcad585bb57aadc5a"
"checksum proc-macro2 1.0.21 (registry+https://github.com/rust-lang/crates.io-index)" = "36e28516df94f3dd551a587da5357459d9b36d945a7c37c3557928c1c2ff2a2c"
"checksum quick-xml 0.18.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3cc440ee4802a86e357165021e3e255a9143724da31db1e2ea540214c96a0f82"
"checksum quote 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6e920b65c65f10b2ae65c831a81a073a89edd28c7cce89475bff467ab4167a"
"checksum quote 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37"
"checksum rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca"

View File

@ -7,5 +7,5 @@ edition = "2018"
[dependencies]
abstutil = { path = "../abstutil" }
geom = { path = "../geom" }
quick-xml = "0.18.1"
roxmltree = "0.13.0"
serde = "1.0.116"

View File

@ -1,7 +1,5 @@
use abstutil::{prettyprint_usize, Timer};
use geom::{GPSBounds, LonLat};
use quick_xml::events::Event;
use quick_xml::Reader;
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
use std::error::Error;
@ -17,109 +15,97 @@ pub struct ExtraShape {
pub attributes: BTreeMap<String, String>,
}
#[cfg(not(target_arch = "wasm32"))]
pub fn load(
path: &str,
gps_bounds: &GPSBounds,
require_all_pts_in_bounds: bool,
timer: &mut Timer,
) -> Result<ExtraShapes, Box<dyn Error>> {
println!("Opening {}", path);
let (f, done) = abstutil::FileWithProgress::new(path)?;
// TODO FileWithProgress should implement BufRead, so we don't have to double wrap like this
let mut reader = Reader::from_reader(std::io::BufReader::new(f));
reader.trim_text(true);
timer.start(format!("read {}", path));
let bytes = abstutil::slurp_file(path)?;
let raw_string = std::str::from_utf8(&bytes)?;
let tree = roxmltree::Document::parse(raw_string)?;
timer.stop(format!("read {}", path));
let mut buf = Vec::new();
// TODO uncomfortably stateful
let mut shapes = Vec::new();
let mut scanned_schema = false;
let mut attributes: BTreeMap<String, String> = BTreeMap::new();
let mut attrib_key: Option<String> = None;
let mut skipped_count = 0;
let mut kv = BTreeMap::new();
loop {
match reader.read_event(&mut buf) {
Ok(Event::Start(e)) => {
let name = e.unescape_and_decode(&reader).unwrap();
if name == "Placemark" {
scanned_schema = true;
} else if name.starts_with("SimpleData name=\"") {
attrib_key = Some(name["SimpleData name=\"".len()..name.len() - 1].to_string());
} else if name == "coordinates" {
attrib_key = Some(name);
} else {
attrib_key = None;
}
}
Ok(Event::Text(e)) => {
if scanned_schema {
if let Some(ref key) = attrib_key {
let text = e.unescape_and_decode(&reader).unwrap();
if key == "coordinates" {
let mut any_oob = false;
let mut any_ok = false;
let mut pts: Vec<LonLat> = Vec::new();
for pair in text.split(' ') {
if let Some(pt) = parse_pt(pair) {
pts.push(pt);
if gps_bounds.contains(pt) {
any_ok = true;
} else {
any_oob = true;
}
} else {
return Err(format!("Malformed coordinates: {}", pair).into());
}
}
if any_ok && (!any_oob || !require_all_pts_in_bounds) {
shapes.push(ExtraShape {
points: pts,
attributes: attributes.clone(),
});
} else {
skipped_count += 1;
}
attributes.clear();
} else {
attributes.insert(key.to_string(), text);
}
}
}
}
Ok(Event::Eof) => break,
Err(e) => panic!(
"XML error at position {}: {:?}",
reader.buffer_position(),
e
),
_ => (),
}
buf.clear();
}
timer.start("scrape objects");
recurse(
tree.root(),
&mut shapes,
&mut skipped_count,
&mut kv,
gps_bounds,
require_all_pts_in_bounds,
)?;
timer.stop("scrape objects");
println!(
timer.note(format!(
"Got {} shapes from {} and skipped {} shapes",
prettyprint_usize(shapes.len()),
path,
prettyprint_usize(skipped_count)
);
done(timer);
));
Ok(ExtraShapes { shapes })
}
// TODO Handle FileWithProgress on web
#[cfg(target_arch = "wasm32")]
pub fn load(
_path: &str,
_gps_bounds: &GPSBounds,
_require_all_pts_in_bounds: bool,
_timer: &mut Timer,
) -> Result<ExtraShapes, Box<dyn Error>> {
Ok(ExtraShapes { shapes: Vec::new() })
fn recurse(
node: roxmltree::Node,
shapes: &mut Vec<ExtraShape>,
skipped_count: &mut usize,
kv: &mut BTreeMap<String, String>,
gps_bounds: &GPSBounds,
require_all_pts_in_bounds: bool,
) -> Result<(), Box<dyn Error>> {
for child in node.children() {
recurse(
child,
shapes,
skipped_count,
kv,
gps_bounds,
require_all_pts_in_bounds,
)?;
}
if node.tag_name().name() == "SimpleData" {
let key = node.attribute("name").unwrap().to_string();
let value = node
.text()
.map(|x| x.to_string())
.unwrap_or_else(String::new);
kv.insert(key, value);
} else if node.tag_name().name() == "coordinates" {
let mut any_oob = false;
let mut any_ok = false;
let mut pts: Vec<LonLat> = Vec::new();
if let Some(txt) = node.text() {
for pair in txt.split(' ') {
if let Some(pt) = parse_pt(pair) {
pts.push(pt);
if gps_bounds.contains(pt) {
any_ok = true;
} else {
any_oob = true;
}
} else {
return Err(format!("Malformed coordinates: {}", pair).into());
}
}
}
if any_ok && (!any_oob || !require_all_pts_in_bounds) {
let attributes = std::mem::replace(kv, BTreeMap::new());
shapes.push(ExtraShape {
points: pts,
attributes,
});
} else {
*skipped_count += 1;
}
}
Ok(())
}
fn parse_pt(input: &str) -> Option<LonLat> {