fixing the bug with grouping PSRC trips by unique person -- thanks to

folks at PSRC for helping track this down
This commit is contained in:
Dustin Carlino 2020-03-05 11:19:32 -08:00
parent 39ff3b1310
commit f97918ff04
5 changed files with 84 additions and 46 deletions

View File

@ -46,6 +46,10 @@ where
self.map.get(&key).unwrap_or(&self.empty)
}
pub fn len(&self) -> usize {
self.map.len()
}
pub(crate) fn raw_map(&self) -> &BTreeMap<K, BTreeSet<V>> {
&self.map
}

View File

@ -51,6 +51,7 @@ pub fn load(dir_path: &str) -> Vec<Route> {
GTFSIterator::<_, transitfeed::StopTime>::from_path(&format!("{}/stop_times.txt", dir_path))
.unwrap()
.map(|rec| rec.unwrap())
// TODO This only groups records with consecutive same trip ID. Might be a bug.
.group_by(|rec| rec.trip_id.clone())
.into_iter()
{

View File

@ -9,7 +9,6 @@ abstutil = { path = "../abstutil" }
csv = "1.0.1"
failure = "0.1.2"
geom = { path = "../geom" }
itertools = "0.8.0"
kml = { path = "../kml" }
map_model = { path = "../map_model" }
serde = "1.0.98"

View File

@ -2,7 +2,7 @@ use abstutil::{prettyprint_usize, FileWithProgress, Timer};
use geom::{Distance, Duration, FindClosest, LonLat, Pt2D, Time};
use map_model::Map;
use serde_derive::{Deserialize, Serialize};
use std::collections::{BTreeMap, HashMap};
use std::collections::{BTreeMap, HashMap, HashSet};
use std::fs::File;
use std::io::{BufRead, BufReader, BufWriter, Write};
@ -65,9 +65,20 @@ pub fn import_trips(
) -> Result<(Vec<Trip>, BTreeMap<i64, Parcel>), failure::Error> {
let (parcels, metadata, oob_parcels) = import_parcels(parcels_path, timer)?;
if false {
timer.start("recording parcel IDs");
let mut f = File::create("parcels.csv")?;
writeln!(f, "parcel_id")?;
for id in parcels.keys() {
writeln!(f, "{}", id)?;
}
timer.stop("recording parcel IDs");
}
let mut trips = Vec::new();
let (reader, done) = FileWithProgress::new(trips_path)?;
let mut total_records = 0;
let mut people: HashSet<(usize, usize)> = HashSet::new();
for rec in csv::Reader::from_reader(reader).deserialize() {
total_records += 1;
@ -91,7 +102,7 @@ pub fn import_trips(
};
if from.osm_building == to.osm_building {
// TODO Plumb along pass-through trips later
// TODO Losing some people here.
if from.osm_building.is_some() {
/*timer.warn(format!(
"Skipping trip from parcel {} to {}; both match OSM building {:?}",
@ -106,6 +117,7 @@ pub fn import_trips(
let mode = if let Some(m) = get_mode(&rec.mode) {
m
} else {
// TODO Losing some people here.
continue;
};
@ -115,6 +127,7 @@ pub fn import_trips(
let trip_dist = Distance::miles(rec.travdist);
let person = (rec.hhno as usize, rec.pno as usize);
people.insert(person);
let seq = (rec.tour as usize, rec.half == 2.0, rec.tseg as usize);
trips.push(Trip {
@ -132,8 +145,9 @@ pub fn import_trips(
done(timer);
timer.note(format!(
"{} trips total. {} records filtered out",
"{} trips total, over {} people. {} records filtered out",
prettyprint_usize(trips.len()),
prettyprint_usize(people.len()),
prettyprint_usize(total_records - trips.len())
));
@ -195,10 +209,21 @@ fn import_parcels(
"run cs2cs on {} points",
prettyprint_usize(parcel_metadata.len())
));
// If you have an ancient version of cs2cs (like from Ubuntu's proj-bin package), the command
// should instead be:
let mut output = std::process::Command::new("cs2cs")
.args(vec![
"esri:102748",
"+to",
"epsg:4326",
"-f",
"%.5f",
"/tmp/parcels",
])
.output()?;
if !output.status.success() {
// If you have an ancient version of cs2cs (like from Ubuntu's proj-bin package), the
// command should instead be:
// cs2cs +init=esri:102748 +to +init=epsg:4326 -f '%.5f' foo
let output = std::process::Command::new("cs2cs")
output = std::process::Command::new("cs2cs")
.args(vec![
"+init=esri:102748",
"+to",
@ -208,6 +233,7 @@ fn import_parcels(
"/tmp/parcels",
])
.output()?;
}
assert!(output.status.success());
timer.stop(format!(
"run cs2cs on {} points",

View File

@ -1,9 +1,7 @@
use crate::psrc::{Endpoint, Mode, Parcel, Purpose};
use crate::PopDat;
use abstutil::prettyprint_usize;
use abstutil::Timer;
use abstutil::{prettyprint_usize, MultiMap, Timer};
use geom::{Distance, Duration, LonLat, Polygon, Pt2D, Time};
use itertools::Itertools;
use map_model::{BuildingID, IntersectionID, Map, PathConstraints, Position};
use sim::{DrivingGoal, Scenario, SidewalkSpot, SpawnTrip, TripSpec};
use std::collections::{BTreeMap, HashMap};
@ -226,6 +224,7 @@ pub fn clip_trips(map: &Map, timer: &mut Timer) -> (Vec<Trip>, HashMap<BuildingI
.filter_map(|i| i.polygon.center().to_gps(bounds).map(|pt| (i.id, pt)))
.collect();
let total_trips = popdat.trips.len();
let maybe_results: Vec<Option<Trip>> = timer.parallelize("clip trips", popdat.trips, |trip| {
let from = TripEndpt::new(
&trip.from,
@ -275,7 +274,13 @@ pub fn clip_trips(map: &Map, timer: &mut Timer) -> (Vec<Trip>, HashMap<BuildingI
Some(trip)
});
let trips = maybe_results.into_iter().flatten().collect();
let trips: Vec<Trip> = maybe_results.into_iter().flatten().collect();
timer.note(format!(
"{} trips clipped down to just {}",
prettyprint_usize(total_trips),
prettyprint_usize(trips.len())
));
let mut bldgs = HashMap::new();
for (osm_id, metadata) in popdat.parcels {
@ -288,39 +293,52 @@ pub fn clip_trips(map: &Map, timer: &mut Timer) -> (Vec<Trip>, HashMap<BuildingI
pub fn trips_to_scenario(map: &Map, timer: &mut Timer) -> Scenario {
let (trips, _) = clip_trips(map, timer);
let mut individ_trips: Vec<SpawnTrip> = Vec::new();
// TODO Don't clone trips for parallelize
let mut num_ppl = 0;
let orig_trips = trips.len();
for (person, list) in timer
.parallelize("turn PSRC trips into SpawnTrips", trips.clone(), |trip| {
let individ_parked_cars = count_cars(&trips, map);
let mut individ_trips: Vec<SpawnTrip> = Vec::new();
// person -> (trip seq, index into individ_trips)
let mut trips_per_person: MultiMap<(usize, usize), ((usize, bool, usize), usize)> =
MultiMap::new();
for (trip, person, seq) in timer
.parallelize("turn PSRC trips into SpawnTrips", trips, |trip| {
trip.to_spawn_trip(map)
.map(|spawn| (spawn, trip.person, trip.seq))
})
.into_iter()
.flatten()
.group_by(|(_, person, _)| *person)
.into_iter()
{
// TODO Try doing the grouping earlier, before we filter out cases where to_spawn_trip
// fails
num_ppl += 1;
let mut seqs = Vec::new();
for (spawn, _, seq) in list {
seqs.push(seq);
individ_trips.push(spawn);
}
if seqs.len() > 1 {
println!("{:?} takes {} trips: {:?}", person, seqs.len(), seqs);
}
let idx = individ_trips.len();
individ_trips.push(trip);
trips_per_person.insert(person, (seq, idx));
}
timer.note(format!(
"{} trips over {} people. {} trips filtered out",
"{} clipped trips down to {}, over {} people",
prettyprint_usize(orig_trips),
prettyprint_usize(individ_trips.len()),
prettyprint_usize(num_ppl),
prettyprint_usize(orig_trips - individ_trips.len())
prettyprint_usize(trips_per_person.len())
));
// TODO Just debugging for now, but plumb through a full representation of population into
// Scenario later. Track when there are gaps in the sequence, to explain the person warping.
/*for (person, seq_trips) in trips_per_person.consume() {
println!("{:?} takes {} trips", person, seq_trips.len());
}*/
Scenario {
scenario_name: "weekday".to_string(),
map_name: map.get_name().to_string(),
only_seed_buses: None,
seed_parked_cars: Vec::new(),
spawn_over_time: Vec::new(),
border_spawn_over_time: Vec::new(),
individ_trips,
individ_parked_cars,
}
}
fn count_cars(trips: &Vec<Trip>, map: &Map) -> BTreeMap<BuildingID, usize> {
// How many parked cars do we need to spawn near each building?
// TODO This assumes trips are instantaneous. At runtime, somebody might try to use a parked
// car from a building, but one hasn't been delivered yet.
@ -345,15 +363,5 @@ pub fn trips_to_scenario(map: &Map, timer: &mut Timer) -> Scenario {
*avail_per_bldg.get_mut(&b).unwrap() += 1;
}
}
Scenario {
scenario_name: "weekday".to_string(),
map_name: map.get_name().to_string(),
only_seed_buses: None,
seed_parked_cars: Vec::new(),
spawn_over_time: Vec::new(),
border_spawn_over_time: Vec::new(),
individ_trips,
individ_parked_cars,
}
individ_parked_cars
}