use itertools in gtfs

This commit is contained in:
Dustin Carlino 2018-09-07 10:46:55 -07:00
parent 1b30dafb03
commit 9c317137d7
2 changed files with 21 additions and 45 deletions

View File

@ -7,3 +7,4 @@ authors = ["Dustin Carlino <dabreegster@gmail.com>"]
csv = "1.0.1"
failure = "0.1.2"
geom = { path = "../geom" }
itertools = "0.7.8"

View File

@ -1,9 +1,11 @@
extern crate csv;
extern crate failure;
extern crate geom;
extern crate itertools;
use failure::Error;
use geom::LonLat;
use itertools::Itertools;
use std::collections::{HashMap, HashSet};
use std::fs::File;
use std::time::Instant;
@ -39,59 +41,32 @@ pub fn load(dir_path: &str) -> Result<Vec<Route>, Error> {
}
// Each route has many trips. Ignore all but the first and assume the list of stops is the
// same.
// same. Also assume that records with the same trip are contiguous and that stop_sequence is
// monotonic.
let mut route_ids_used: HashSet<String> = HashSet::new();
let mut results: Vec<Route> = Vec::new();
// TODO This isn't simple or fast. :(
// Try implementing an iterator that groups adjacent records matching a predicate.
let mut reader = csv::Reader::from_reader(File::open(format!("{}/stop_times.txt", dir_path))?);
let mut iter = reader.records();
let mut records: Vec<csv::StringRecord> = Vec::new();
loop {
if let Some(rec) = iter.next() {
records.push(rec?);
} else {
// We shouldn't have 1 record from next_rec, because a trip shouldn't have just one
// stop.
assert!(records.is_empty());
break;
}
let route_id = trip_id_to_route_id[&records[0][0]].to_string();
let keep_records = !route_ids_used.contains(&route_id);
// Slurp all records with the same trip ID. Assume they're contiguous.
let mut next_rec: Option<csv::StringRecord> = None;
loop {
if let Some(rec) = iter.next() {
let rec = rec?;
if records[0][0] == rec[0] {
if keep_records {
records.push(rec);
}
for (key, group) in reader
.records()
.group_by(|rec| rec.as_ref().unwrap()[0].to_string())
.into_iter()
{
let route_id = trip_id_to_route_id[&key].to_string();
if route_ids_used.contains(&route_id) {
continue;
} else {
next_rec = Some(rec);
}
}
break;
}
if keep_records {
route_ids_used.insert(route_id.clone());
results.push(Route {
name: route_id_to_name[&route_id].to_string(),
stops: records.iter().map(|rec| stop_id_to_pt[&rec[3]]).collect(),
stops: group
.into_iter()
.map(|rec| stop_id_to_pt[&rec.unwrap()[3]])
.collect(),
});
}
records.clear();
if let Some(rec) = next_rec {
records.push(rec);
}
}
let elapsed = timer.elapsed();
let dt = elapsed.as_secs() as f64 + f64::from(elapsed.subsec_nanos()) * 1e-9;
println!("Loading GTFS took {}s", dt);