From acadad8bdf7077a095b27a8fcaf2b2c07438cc15 Mon Sep 17 00:00:00 2001 From: Michael Kirk Date: Thu, 31 Dec 2020 16:20:32 -0800 Subject: [PATCH] fetch US census data from a hosted FlatGeoBuf (#432) --- Cargo.lock | 204 ++++++++++++++++++++++- Cargo.toml | 5 +- README.md | 1 + book/src/trafficsim/travel_demand.md | 14 ++ game/Cargo.toml | 1 + game/src/sandbox/gameplay/mod.rs | 42 ++++- game/src/sandbox/mod.rs | 22 +++ geom/src/polygon.rs | 2 +- map_gui/Cargo.toml | 10 +- map_gui/src/load.rs | 125 +++++++++++++- popdat/Cargo.toml | 4 + popdat/scripts/build_population_areas.sh | 124 ++++++++++++++ popdat/src/distribute_people.rs | 14 +- popdat/src/import_census.rs | 176 ++++++++----------- popdat/src/lib.rs | 16 +- 15 files changed, 623 insertions(+), 137 deletions(-) create mode 100755 popdat/scripts/build_population_areas.sh diff --git a/Cargo.lock b/Cargo.lock index 2128f7ae91..55b66772e9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -123,9 +123,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.35" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c0df63cb2955042487fad3aefd2c6e3ae7389ac5dc1beb28921de0b69f779d4" +checksum = "ee67c11feeac938fae061b232e38e0b6d94f97a9df10e6271319325ac4c56a86" [[package]] name = "approx" @@ -160,6 +160,17 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eab1c04a571841102f5345a8fc0f6bb3d31c315dec879b5c6e42e40ce7ffa34e" +[[package]] +name = "async-trait" +version = "0.1.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d3a45e77e34375a7923b1e8febb049bb011f064714a8e17a1a616fef01da13d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "atty" version = "0.2.14" @@ -1003,6 +1014,15 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d" +[[package]] +name = "flatbuffers" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a788f068dd10687940565bf4b5480ee943176cbd114b12e811074bcf7c04e4b9" +dependencies = [ + "smallvec", +] + [[package]] name = "flate2" version = "1.0.19" @@ -1015,6 +1035,21 @@ dependencies = [ "miniz_oxide 0.4.3", ] +[[package]] +name = "flatgeobuf" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bee1d16f8b1c0d3617e79167384c81b321af70ee98cceec763d9265fd8ae979" +dependencies = [ + "async-trait", + "byteorder", + "bytes", + "flatbuffers", + "geozero", + "log", + "reqwest", +] + [[package]] name = "float-cmp" version = "0.5.3" @@ -1189,6 +1224,7 @@ version = "0.1.0" dependencies = [ "aabb-quadtree", "abstutil", + "anyhow", "built", "chrono", "collisions", @@ -1351,7 +1387,7 @@ dependencies = [ [[package]] name = "geojson" version = "0.21.0" -source = "git+https://github.com/georust/geojson?branch=mkirk/try_from#ac0429218756af96c379a8701383ef4d36d78779" +source = "git+https://github.com/georust/geojson#7f1de484ae0e3a640a9539f6adec14b17f8f6fc1" dependencies = [ "geo-types 0.6.2", "num-traits 0.2.14", @@ -1377,6 +1413,30 @@ dependencies = [ "serde", ] +[[package]] +name = "geozero" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b6299285fa39bde0802a8926affab1b4555663b970e2c6289aa77248da68233" +dependencies = [ + "async-trait", + "seek_bufread", + "thiserror", +] + +[[package]] +name = "geozero-core" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33413ff58db476c26e398142bcfc861fa285b78a0d0d7a03192b7e385797cd35" +dependencies = [ + "bytes", + "geo-types 0.6.2", + "geojson", + "geozero", + "scroll", +] + [[package]] name = "getrandom" version = "0.1.15" @@ -1665,6 +1725,19 @@ dependencies = [ "webpki", ] +[[package]] +name = "hyper-tls" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d979acc56dcb5b8dddba3917601745e877576475aa046df3226eabdecef78eed" +dependencies = [ + "bytes", + "hyper", + "native-tls", + "tokio", + "tokio-tls", +] + [[package]] name = "ident_case" version = "1.0.1" @@ -2089,6 +2162,7 @@ version = "0.1.0" dependencies = [ "aabb-quadtree", "abstutil", + "anyhow", "colorous", "contour", "flate2", @@ -2103,6 +2177,7 @@ dependencies = [ "reqwest", "serde", "sim", + "tokio", "wasm-bindgen", "wasm-bindgen-futures", "web-sys", @@ -2279,6 +2354,24 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "native-tls" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8d96b2e1c8da3957d58100b09f102c6d9cfdfced01b7ec5a8974044bb09dbd4" +dependencies = [ + "lazy_static", + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + [[package]] name = "nbez" version = "0.1.0" @@ -2542,6 +2635,39 @@ version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0" +[[package]] +name = "openssl" +version = "0.10.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "038d43985d1ddca7a9900630d8cd031b56e4794eecc2e9ea39dd17aa04399a70" +dependencies = [ + "bitflags", + "cfg-if 1.0.0", + "foreign-types", + "lazy_static", + "libc", + "openssl-sys", +] + +[[package]] +name = "openssl-probe" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77af24da69f9d9341038eba93a073b1fdaaa1b788221b00a69bce9e762cb32de" + +[[package]] +name = "openssl-sys" +version = "0.9.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "921fc71883267538946025deffb622905ecad223c28efbfdef9bb59a0175f3e6" +dependencies = [ + "autocfg", + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "ordered-float" version = "2.0.1" @@ -2766,9 +2892,13 @@ version = "0.1.0" dependencies = [ "abstutil", "anyhow", + "flatgeobuf", + "futures", "geo 0.16.0", + "geo-booleanop", "geojson", "geom", + "geozero-core", "log", "map_model", "rand", @@ -2982,12 +3112,14 @@ dependencies = [ "http-body", "hyper", "hyper-rustls", + "hyper-tls", "ipnet", "js-sys", "lazy_static", "log", "mime", "mime_guess", + "native-tls", "percent-encoding 2.1.0", "pin-project-lite 0.2.0", "rustls 0.18.1", @@ -2995,6 +3127,7 @@ dependencies = [ "serde_urlencoded", "tokio", "tokio-rustls", + "tokio-tls", "url 2.2.0", "wasm-bindgen", "wasm-bindgen-futures", @@ -3165,6 +3298,16 @@ dependencies = [ "widgetry", ] +[[package]] +name = "schannel" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f05ba609c234e60bee0d547fe94a4c7e9da733d1c962cf6e59efa4cd9c8bc75" +dependencies = [ + "lazy_static", + "winapi 0.3.9", +] + [[package]] name = "scoped-tls" version = "1.0.0" @@ -3183,6 +3326,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "scroll" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fda28d4b4830b807a8b43f7b0e6b5df875311b3e7621d84577188c175b6ec1ec" + [[package]] name = "sct" version = "0.6.0" @@ -3193,6 +3342,35 @@ dependencies = [ "untrusted", ] +[[package]] +name = "security-framework" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1759c2e3c8580017a484a7ac56d3abc5a6c1feadf88db2f3633f12ae4268c69" +dependencies = [ + "bitflags", + "core-foundation 0.9.1", + "core-foundation-sys 0.8.2", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f99b9d5e26d2a71633cc4f2ebae7cc9f874044e0c351a27e17892d76dce5678b" +dependencies = [ + "core-foundation-sys 0.8.2", + "libc", +] + +[[package]] +name = "seek_bufread" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a248bca144497822c75e8a12d26b7c0bfd9240fc4baa0b2f64f81619ddd58d" + [[package]] name = "semver" version = "0.9.0" @@ -3279,9 +3457,9 @@ dependencies = [ [[package]] name = "signal-hook-registry" -version = "1.2.2" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce32ea0c6c56d5eacaeb814fbed9960547021d3edd010ded1425f180536b20ab" +checksum = "16f1d0fef1604ba8f7a073c7e701f213e056707210e9020af4528e0101ce11a6" dependencies = [ "libc", ] @@ -3634,6 +3812,16 @@ dependencies = [ "webpki", ] +[[package]] +name = "tokio-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a70f4fcd7b3b24fb194f837560168208f669ca8cb70d0c4b862944452396343" +dependencies = [ + "native-tls", + "tokio", +] + [[package]] name = "tokio-util" version = "0.3.1" @@ -3909,6 +4097,12 @@ dependencies = [ "xmlwriter", ] +[[package]] +name = "vcpkg" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b00bca6106a5e23f3eee943593759b7fcddb00554332e856d990c893966879fb" + [[package]] name = "vec_map" version = "0.8.2" diff --git a/Cargo.toml b/Cargo.toml index 37acf09553..8f5394eeda 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,4 +34,7 @@ members = [ opt-level = 3 [patch.crates-io] -geojson = { git = "https://github.com/georust/geojson", branch = "mkirk/try_from" } +# Some niceties for parsing geojson feature properties. +# Upstreaming at https://github.com/georust/geojson/pull/155 +geojson = { git = "https://github.com/georust/geojson" } + diff --git a/README.md b/README.md index d7de96201e..110b9fbc35 100644 --- a/README.md +++ b/README.md @@ -128,3 +128,4 @@ Data: - [King County GIS](https://www.kingcounty.gov/services/gis.aspx) - [Seattle Open Data](https://data.seattle.gov/) - [Puget Sound Regional Council](https://www.psrc.org/) +- [IPUMS NHGIS, University of Minnesota](https://www.nhgis.org) diff --git a/book/src/trafficsim/travel_demand.md b/book/src/trafficsim/travel_demand.md index 8476ffc71d..ee2f39b464 100644 --- a/book/src/trafficsim/travel_demand.md +++ b/book/src/trafficsim/travel_demand.md @@ -60,6 +60,20 @@ different types of people (students, workers), give them a set of activities with durations (go to school for 7 hours, 1 hour lunch break), and then further pick specfic buildings to travel to using more OSM tags. +### Census Based + +Trips are distributed based on where we believe people live. For the US, this +information comes from the US Census. To take advantage of this model for areas +outside the US, you'll need to add your data to the global `population_areas` +file. This is one huge file that is shared across regions. This is more work up +front, but makes adding individual cities relatively straight forward. + +#### Preparing the `population_areas` file + +See `popdat/scripts/build_population_areas.sh` for updating or adding to the +existing population areas. Once rebuilt, you'll need to upload the file so that +popdat can find it. + ### Custom import If you have your own data, you can import it. The input format is JSON -- an diff --git a/game/Cargo.toml b/game/Cargo.toml index 1781eead7d..85583ff995 100644 --- a/game/Cargo.toml +++ b/game/Cargo.toml @@ -17,6 +17,7 @@ wasm = ["map_gui/wasm", "wasm-bindgen", "widgetry/wasm-backend"] [dependencies] aabb-quadtree = "0.1.0" abstutil = { path = "../abstutil" } +anyhow = "1.0.37" built = { version = "0.4.3", optional = true, features=["chrono"] } chrono = "0.4.15" collisions = { path = "../collisions" } diff --git a/game/src/sandbox/gameplay/mod.rs b/game/src/sandbox/gameplay/mod.rs index 88e24879d8..b3b44e8b4d 100644 --- a/game/src/sandbox/gameplay/mod.rs +++ b/game/src/sandbox/gameplay/mod.rs @@ -1,3 +1,6 @@ +use core::future::Future; +use core::pin::Pin; + use rand_xorshift::XorShiftRng; use abstutil::{MapName, Timer}; @@ -81,6 +84,19 @@ pub enum LoadScenario { Nothing, Path(String), Scenario(Scenario), + // wasm futures are not `Send`, since they all ultimately run on the browser's single threaded + // runloop + #[cfg(target_arch = "wasm32")] + Future(Pin Scenario>>>>>), + #[cfg(not(target_arch = "wasm32"))] + Future( + Pin< + Box< + dyn Send + + Future Scenario>>>, + >, + >, + ), } impl GameplayMode { @@ -118,11 +134,27 @@ impl GameplayMode { } else if name == "home_to_work" { LoadScenario::Scenario(ScenarioGenerator::proletariat_robot(map, &mut rng, timer)) } else if name == "census" { - let config = popdat::Config::default(); - LoadScenario::Scenario( - popdat::generate_scenario("typical monday", config, map, &mut rng) - .expect("unable to build census scenario"), - ) + let map_area = map.get_boundary_polygon().clone(); + let map_bounds = map.get_gps_bounds().clone(); + let mut rng = sim::fork_rng(&mut rng); + + LoadScenario::Future(Box::pin(async move { + let areas = popdat::CensusArea::fetch_all_for_map(&map_area, &map_bounds).await?; + + let scenario_from_app: Box Scenario> = + Box::new(move |app: &App| { + let config = popdat::Config::default(); + popdat::generate_scenario( + "typical monday", + areas, + config, + &app.primary.map, + &mut rng, + ) + }); + + Ok(scenario_from_app) + })) } else { LoadScenario::Path(abstutil::path_scenario(map.get_name(), &name)) } diff --git a/game/src/sandbox/mod.rs b/game/src/sandbox/mod.rs index 22a3f4102b..85c3642e2f 100644 --- a/game/src/sandbox/mod.rs +++ b/game/src/sandbox/mod.rs @@ -678,6 +678,28 @@ impl State for SandboxLoader { self.stage = Some(LoadStage::GotScenario(scenario)); continue; } + gameplay::LoadScenario::Future(future) => { + use map_gui::load::FutureLoader; + return Transition::Push(FutureLoader::::new( + ctx, + Box::pin(future), + "Loading Scenario", + Box::new(|_, _, scenario| { + // TODO show error/retry alert? + let scenario = + scenario.expect("failed to load scenario from future"); + Transition::Multi(vec![ + Transition::Pop, + Transition::ModifyState(Box::new(|state, _, app| { + let loader = + state.downcast_mut::().unwrap(); + app.primary.scenario = Some(scenario.clone()); + loader.stage = Some(LoadStage::GotScenario(scenario)); + })), + ]) + }), + )); + } gameplay::LoadScenario::Path(path) => { // Reuse the cached scenario, if possible. if let Some(ref scenario) = app.primary.scenario { diff --git a/geom/src/polygon.rs b/geom/src/polygon.rs index 012835cbc3..929d322402 100644 --- a/geom/src/polygon.rs +++ b/geom/src/polygon.rs @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize}; use crate::{Angle, Bounds, Distance, HashablePt2D, PolyLine, Pt2D, Ring}; -#[derive(Serialize, Deserialize, Clone, Debug)] +#[derive(PartialEq, Serialize, Deserialize, Clone, Debug)] pub struct Polygon { points: Vec, /// Groups of three indices make up the triangles diff --git a/map_gui/Cargo.toml b/map_gui/Cargo.toml index 3b57af669f..bfeb0553a8 100644 --- a/map_gui/Cargo.toml +++ b/map_gui/Cargo.toml @@ -5,8 +5,8 @@ authors = ["Dustin Carlino "] edition = "2018" [features] -native = ["reqwest"] -wasm = ["futures", "futures-channel", "js-sys", "wasm-bindgen", "wasm-bindgen-futures", "web-sys"] +native = ["reqwest", "tokio"] +wasm = ["js-sys", "wasm-bindgen", "wasm-bindgen-futures", "web-sys"] # Just a marker to not use localhost URLs wasm_s3 = [] # A marker to use a named release from S3 instead of dev for updating files @@ -15,11 +15,12 @@ release_s3 = [] [dependencies] aabb-quadtree = "0.1.0" abstutil = { path = "../abstutil" } +anyhow = "1.0.37" colorous = "1.0.3" contour = "0.3.0" flate2 = "1.0.19" -futures = { version = "0.3.8", optional = true } -futures-channel = { version = "0.3.8", optional = true } +futures = { version = "0.3.8" } +futures-channel = { version = "0.3.8"} geojson = "0.21.0" geom = { path = "../geom" } instant = "0.1.7" @@ -29,6 +30,7 @@ map_model = { path = "../map_model" } reqwest = { version = "0.10.8", optional = true, default-features=false, features=["blocking", "rustls-tls"] } serde = "1.0.116" sim = { path = "../sim" } +tokio = { version ="0.2", features=["rt-core"], optional = true } wasm-bindgen = { version = "0.2.68", optional = true } wasm-bindgen-futures = { version = "0.4.18", optional = true } webbrowser = "0.5.5" diff --git a/map_gui/src/load.rs b/map_gui/src/load.rs index b7ab9c93a5..d3072ed93c 100644 --- a/map_gui/src/load.rs +++ b/map_gui/src/load.rs @@ -1,10 +1,18 @@ //! Loading large resources (like maps, scenarios, and prebaked data) requires different strategies //! on native and web. Both cases are wrapped up as a State that runs a callback when done. +use std::future::Future; +use std::pin::Pin; + +use futures_channel::oneshot; +use instant::Instant; use serde::de::DeserializeOwned; +#[cfg(not(target_arch = "wasm32"))] +use tokio::runtime::Runtime; use abstutil::{MapName, Timer}; -use widgetry::{Color, EventCtx, GfxCtx, State, Transition}; +use geom::Duration; +use widgetry::{Color, EventCtx, GfxCtx, Line, Panel, State, Text, Transition, UpdateType}; use crate::tools::PopupMsg; use crate::AppLike; @@ -244,3 +252,118 @@ mod wasm_loader { } } } + +pub struct FutureLoader +where + A: AppLike, +{ + loading_title: String, + started: Instant, + panel: Panel, + receiver: oneshot::Receiver T>>>, + on_load: Option) -> Transition>>, + + // If Runtime is dropped, any active tasks will be canceled, so we retain it here even + // though we never access it. It might make more sense for Runtime to live on App if we're + // going to be doing more background spawning. + #[cfg(not(target_arch = "wasm32"))] + #[allow(dead_code)] + runtime: Runtime, +} + +impl FutureLoader +where + A: 'static + AppLike, + T: 'static, +{ + #[cfg(target_arch = "wasm32")] + pub fn new( + ctx: &mut EventCtx, + future: Pin T>>>>>, + loading_title: &str, + on_load: Box) -> Transition>, + ) -> Box> { + let (tx, receiver) = oneshot::channel(); + wasm_bindgen_futures::spawn_local(async move { + tx.send(future.await).ok().unwrap(); + }); + Box::new(FutureLoader { + loading_title: loading_title.to_string(), + started: Instant::now(), + panel: ctx.make_loading_screen(Text::from(Line(loading_title))), + receiver, + on_load: Some(on_load), + }) + } + + #[cfg(not(target_arch = "wasm32"))] + pub fn new( + ctx: &mut EventCtx, + future: Pin< + Box T>>>>, + >, + loading_title: &str, + on_load: Box) -> Transition>, + ) -> Box> { + let runtime = Runtime::new().unwrap(); + let (tx, receiver) = oneshot::channel(); + runtime.spawn(async move { + tx.send(future.await).ok().unwrap(); + }); + + Box::new(FutureLoader { + loading_title: loading_title.to_string(), + started: Instant::now(), + panel: ctx.make_loading_screen(Text::from(Line(loading_title))), + receiver, + on_load: Some(on_load), + runtime, + }) + } +} + +impl State for FutureLoader +where + A: 'static + AppLike, + T: 'static, +{ + fn event(&mut self, ctx: &mut EventCtx, app: &mut A) -> Transition { + match self.receiver.try_recv() { + Err(e) => { + error!("channel failed: {:?}", e); + let on_load = self.on_load.take().unwrap(); + return on_load(ctx, app, Err(anyhow::anyhow!("channel canceled"))); + } + Ok(None) => { + self.panel = ctx.make_loading_screen(Text::from_multiline(vec![ + Line(&self.loading_title), + Line(format!( + "Time spent: {}", + Duration::realtime_elapsed(self.started) + )), + ])); + + // Until the response is received, just ask winit to regularly call event(), so we + // can keep polling the channel. + ctx.request_update(UpdateType::Game); + return Transition::Keep; + } + Ok(Some(Err(e))) => { + error!("error in fetching data"); + let on_load = self.on_load.take().unwrap(); + return on_load(ctx, app, Err(e)); + } + Ok(Some(Ok(builder))) => { + debug!("future complete"); + let t = builder(app); + let on_load = self.on_load.take().unwrap(); + return on_load(ctx, app, Ok(t)); + } + } + } + + fn draw(&self, g: &mut GfxCtx, _: &A) { + g.clear(Color::BLACK); + self.panel.draw(g); + } +} diff --git a/popdat/Cargo.toml b/popdat/Cargo.toml index 7a59ec0a35..f4452cc93c 100644 --- a/popdat/Cargo.toml +++ b/popdat/Cargo.toml @@ -7,12 +7,16 @@ edition = "2018" [dependencies] abstutil = { path = "../abstutil" } anyhow = "1.0.35" +flatgeobuf = { version = "0.4" } +futures = "0.3.8" geo = "0.16.0" geojson = { version = "0.21.0", features = ["geo-types"] } geom = { path = "../geom" } +geozero-core = { version = "0.5.1" } log = "0.4.11" map_model = { path = "../map_model" } rand = "0.7.0" rand_xorshift = "0.2.0" +geo-booleanop = "0.3.2" serde_json = "1.0.60" sim = { path = "../sim" } diff --git a/popdat/scripts/build_population_areas.sh b/popdat/scripts/build_population_areas.sh new file mode 100755 index 0000000000..b43e76815e --- /dev/null +++ b/popdat/scripts/build_population_areas.sh @@ -0,0 +1,124 @@ +#!/bin/bash -e + +# Outputs a FlatGeoBuf of geometries and their corresponding population, +# suitable for use in the popdat crate. +# +# You'll need to update the `configuration` section to get this to work on your +# machine. +# +# Note: I've only ever used this for US data, with "Census Blocks" as areas. In +# theory other countries could also massage their data to fit into the combined +# FlatGeoBuf. +# +# ## Input +# +# I wasn't able to find a reasonable solution using the official census.gov +# files or API's. Instead I batch-downloaded all the census blocks and their +# populations from the very nice nhgis.org +# +# Specifically, at the time of writing the steps were: +# - create an account for nhgis.org and log in +# - select "Get Data" +# - Under "Apply Filters": +# - Geographic Level: choose "Block", then "submit" +# - Years: "2010" (hopefully 2020 will be available before long) +# - Topics: choose "General: Total Population", then "submit" +# - Under "Select Data": +# - Select "Total Population" +# - Click "Continue" towards top right +# - Click "Continue" again towards top right +# - Source Tables: +# - You should see that you've selected "1 source table" +# - This corresponds to a CSV file of the populations and GISJOIN id but +# no geometries +# - Click "Geographic Extents" > "Select All" > Submit +# - This corresponds to shapefiles for every state + DC + Puerto Rico +# with a GISJOIN attribute to join to the population CSV +# - Enter a "Description" which is memorable to you like (2010 all US block populations) +# - Submit and wait for the batch to be prepared +# +# Input files: +# - a csv of populations with a GISJOIN field +# - geometry shapefiles with the area boundaries and a GISJOIN field +# +# Output: +# An single FGB with a feature for each area which includes its population. + +# Configuration + +## Inputs + +geometry_shapefiles=$(ls source/nhgis_us_block_population/*.shp) +population_csv=source/nhgis_us_block_population/nhgis0004_ds172_2010_block.csv +population_column_name=H7V001 + +# ## SpatiaLite +# +# Even though we're not doing spatial changes in sqlite, making *any* changes +# to spatial tables, fires db triggers, which require you have the spatialite +# lib linked. +spatialite_bin=/usr/local/Cellar/sqlite/3.34.0/bin/sqlite3 +spatialite_lib=/usr/local/lib/mod_spatialite.dylib + +# Main program follows + +gpkg_output=generated/population_areas.gpkg + +rm -f $gpkg_output + +echo "Importing geometries - start $(date)" + +for i in $geometry_shapefiles; do + filename=$(basename -- "$i") + extension="${filename##*.}" + layer_name="${filename%.*}" + if [ ! -f "$gpkg_output" ]; then + echo "starting with $i" + # first file - create the consolidated output file + + # I tried selecting only the columns we needed here, but I got a "table + # not found" error, so instead we filter later when building the FGB + # ogr2ogr -f "GPKG" -nln areas -t_srs "WGS84" -dialect SQLite -sql "SELECT GISJOIN FROM $layer_name" $gpkg_output $i + + # Note the `nlt` option addresses this warning: + # > Warning 1: A geometry of type MULTIPOLYGON is inserted into layer areas of geometry type POLYGON, which is not normally allowed by the GeoPackage specification, but the driver will however do it. To create a conformant GeoPackage, if using ogr2ogr, the -nlt option can be used to override the layer geometry type. This warning will no longer be emitted for this combination of layer and feature geometry type. + ogr2ogr -f "GPKG" -nlt MULTIPOLYGON -nln areas -t_srs "WGS84" $gpkg_output $i + else + echo "merging $i" + # update the output file with new file content + #ogr2ogr -f "GPKG" -nln areas -t_srs "WGS84" -dialect SQLite -sql "SELECT GISJOIN FROM $layer_name" -update -append $gpkg_output $i + ogr2ogr -f "GPKG" -nlt MULTIPOLYGON -nln areas -t_srs "WGS84" -update -append $gpkg_output $i + fi +done + +echo "### Indexing geometries - start $(date)" +echo " +PRAGMA journal_mode = off; +CREATE INDEX index_areas_on_gisjoin on areas(GISJOIN) +" | $spatialite_bin $gpkg_output + +echo "## Importing and indexing populations data - start $(date)" + +echo " +PRAGMA journal_mode = off; +DROP TABLE IF EXISTS populations; +.import --csv $population_csv populations +ALTER TABLE populations RENAME COLUMN $population_column_name TO population; +CREATE INDEX index_populations_on_GISJOIN ON populations(GISJOIN); +" | $spatialite_bin $gpkg_output + +echo "## join populations and geometries - start $(date)" + +echo " +PRAGMA journal_mode = off; +.load $spatialite_lib +ALTER TABLE areas ADD COLUMN population; +update areas set population=(select population from populations where populations.GISJOIN=areas.GISJOIN); +" | $spatialite_bin $gpkg_output + +echo "## outputting fgb - start $(date)" + +ogr2ogr generated/population_areas.fgb $gpkg_output -dialect SQLite -sql "SELECT geom, population, GISJOIN FROM areas" + +echo "## Done - $(date)" + diff --git a/popdat/src/distribute_people.rs b/popdat/src/distribute_people.rs index 9fc50ba83c..d1f45c286f 100644 --- a/popdat/src/distribute_people.rs +++ b/popdat/src/distribute_people.rs @@ -1,8 +1,8 @@ +use geo::algorithm::{area::Area, contains::Contains}; use rand::Rng; use rand_xorshift::XorShiftRng; use abstutil::prettyprint_usize; -use geom::Polygon; use map_model::Map; use crate::{CensusArea, CensusPerson, Config}; @@ -15,19 +15,23 @@ pub fn assign_people_to_houses( ) -> Vec { let mut people = Vec::new(); + let map_boundary = geo::Polygon::from(map.get_boundary_polygon().clone()); for area in areas { let bldgs: Vec = map .all_buildings() .into_iter() - .filter(|b| area.polygon.contains_pt(b.label_center) && b.bldg_type.has_residents()) + .filter(|b| { + area.polygon.contains(&geo::Point::from(b.label_center)) + && b.bldg_type.has_residents() + }) .map(|b| b.id) .collect(); // If the area is partly out-of-bounds, then scale down the number of residents linearly // based on area of the overlapping part of the polygon. - let pct_overlap = Polygon::union_all(area.polygon.intersection(map.get_boundary_polygon())) - .area() - / area.polygon.area(); + use geo_booleanop::boolean::BooleanOp; + let pct_overlap = + area.polygon.intersection(&map_boundary).unsigned_area() / area.polygon.unsigned_area(); let num_residents = (pct_overlap * (area.population as f64)) as usize; debug!( "Distributing {} residents to {} buildings. {}% of this area overlapped with the map, \ diff --git a/popdat/src/import_census.rs b/popdat/src/import_census.rs index 4dc200c5c6..e759799dec 100644 --- a/popdat/src/import_census.rs +++ b/popdat/src/import_census.rs @@ -1,128 +1,92 @@ -use std::convert::TryFrom; - use geo::algorithm::intersects::Intersects; -use geojson::GeoJson; -use abstutil::Timer; -use map_model::Map; +use geom::{GPSBounds, Polygon}; use crate::CensusArea; impl CensusArea { - pub fn find_data_for_map(map: &Map, timer: &mut Timer) -> anyhow::Result> { - // TODO eventually it'd be nice to lazily download the info needed. For now we expect a - // prepared geojson file to exist in data/system//population_areas.geojson - // - // expected geojson formatted contents like: - // { - // "type": "FeatureCollection", - // "features": [ - // { - // "type": "Feature", - // "properties": { "population": 123 }, - // "geometry": { - // "type": "MultiPolygon", - // "coordinates": [ [ [ [ -73.7, 40.8 ], [ -73.7, 40.8 ], ...] ] ] ] - // } - // }, - // { - // "type": "Feature", - // "properties": { "population": 249 }, - // "geometry": { - // "type": "MultiPolygon", - // "coordinates": [ [ [ [ -73.8, 40.8 ], [ -73.8, 40.8 ], ...] ] ] - // } - // }, - // ... - // ] - // } - // - // Note: intuitively you might expect a collection of Polygon's rather than MultiPolygons, - // but anecdotally, the census data I've seen uses MultiPolygons. In practice almost - // all are MultiPoly's with just one element, but some truly have multiple polygons. - // - // When we implement downloading, importer/src/utils.rs has a download() helper that we - // could copy here. (And later dedupe, after deciding how this crate will integrate with - // the importer) - let path = abstutil::path(format!( - "system/{}/population_areas.geojson", - map.get_name().city - )); - let bytes = abstutil::slurp_file(&path).map_err(|s| anyhow!(s))?; - debug!("parsing geojson at path: {}", &path); - timer.start("parsing geojson"); - let geojson = GeoJson::from_reader(&*bytes)?; - timer.stop("parsing geojson"); - let mut results = vec![]; - let collection = geojson::FeatureCollection::try_from(geojson)?; + pub async fn fetch_all_for_map( + map_area: &Polygon, + bounds: &GPSBounds, + ) -> anyhow::Result> { + use flatgeobuf::HttpFgbReader; + use geozero_core::geo_types::Geo; - let map_area = map.get_boundary_polygon(); - let bounds = map.get_gps_bounds(); - - use geo::algorithm::map_coords::MapCoordsInplace; + use geo::algorithm::{bounding_rect::BoundingRect, map_coords::MapCoordsInplace}; let mut geo_map_area: geo::Polygon<_> = map_area.clone().into(); geo_map_area.map_coords_inplace(|c| { let projected = geom::Pt2D::new(c.0, c.1).to_gps(bounds); (projected.x(), projected.y()) }); - debug!("collection.features: {}", &collection.features.len()); - timer.start("converting to `CensusArea`s"); - for feature in collection.features.into_iter() { - let population = match feature - .property("population") - .ok_or(anyhow!("missing 'population' property"))? - { - serde_json::Value::Number(n) => n - .as_u64() - .ok_or(anyhow!("unexpected population number: {:?}", n))? - as usize, - _ => { - bail!( - "unexpected format for 'population': {:?}", - feature.property("population") - ); - } - }; + // See the import handbook for how to prepare this file. + let mut fgb = + HttpFgbReader::open("https://abstreet.s3.amazonaws.com/population_areas.fgb").await?; - let geometry = feature - .geometry - .ok_or(anyhow!("geojson feature missing geometry"))?; - let mut multi_poly = geo::MultiPolygon::::try_from(geometry.value)?; - let mut geo_polygon = multi_poly - .0 - .pop() - .ok_or(anyhow!("multipolygon was unexpectedly empty"))?; - if !multi_poly.0.is_empty() { - // I haven't looked into why this is happening - but intuitively a census area could - // include separate polygons - e.g. across bodies of water. In practice they are a - // vast minority, so we naively just take the first one for now. - warn!( - "dropping {} polygons from census area with multiple polygons", - multi_poly.0.len() - ); - } + let bounding_rect = geo_map_area + .bounding_rect() + .ok_or(anyhow!("missing bound rect"))?; + fgb.select_bbox( + bounding_rect.min().x, + bounding_rect.min().y, + bounding_rect.max().x, + bounding_rect.max().y, + ) + .await?; - if !geo_polygon.intersects(&geo_map_area) { - debug!( - "skipping polygon outside of map area. polygon: {:?}, map_area: {:?}", - geo_polygon, geo_map_area - ); + let mut results = vec![]; + while let Some(feature) = fgb.next().await? { + // PERF TODO: how to parse into usize directly? And avoid parsing entire props dict? + let props = feature.properties()?; + if !props.contains_key("population") { + warn!("skipping feature with missing population"); continue; } + let population: usize = props["population"].parse()?; + let geometry = match feature.geometry() { + Some(g) => g, + None => { + warn!("skipping feature with missing geometry"); + continue; + } + }; + let mut geo = Geo::new(); + geometry.process(&mut geo, flatgeobuf::GeometryType::MultiPolygon)?; + if let geo::Geometry::MultiPolygon(multi_poly) = geo.geometry() { + let geo_polygon = multi_poly + .0 + .first() + .ok_or(anyhow!("multipolygon was unexpectedly empty"))?; + if multi_poly.0.len() > 1 { + warn!( + "dropping {} extra polygons from census area: {:?}", + multi_poly.0.len() - 1, + props + ); + } - geo_polygon.map_coords_inplace(|(x, y)| { - let point = geom::LonLat::new(*x, *y).to_pt(bounds); - (point.x(), point.y()) - }); - let polygon = geom::Polygon::from(geo_polygon); - results.push(CensusArea { - polygon, - population, - }); + if !geo_polygon.intersects(&geo_map_area) { + debug!( + "skipping polygon outside of map area. polygon: {:?}, map_area: {:?}", + geo_polygon, geo_map_area + ); + continue; + } + + let mut polygon = geo_polygon.clone(); + polygon.map_coords_inplace(|(x, y)| { + let point = geom::LonLat::new(*x, *y).to_pt(bounds); + (point.x(), point.y()) + }); + results.push(CensusArea { + polygon, + population, + }); + } else { + warn!("skipping unexpected geometry"); + continue; + } } - debug!("built {} CensusAreas within map bounds", results.len()); - timer.stop("converting to `CensusArea`s"); Ok(results) } diff --git a/popdat/src/lib.rs b/popdat/src/lib.rs index 1ef1e875bf..447d64e099 100644 --- a/popdat/src/lib.rs +++ b/popdat/src/lib.rs @@ -25,7 +25,6 @@ extern crate log; use rand_xorshift::XorShiftRng; use abstutil::Timer; -use geom::Polygon; use geom::{Distance, Time}; use map_model::{BuildingID, Map}; use sim::Scenario; @@ -39,8 +38,9 @@ mod make_person; /// blocks, depending what data we find. All of the areas should roughly partition the map -- we /// probably don't need to guarantee we cover every single building, but we definitely shouldn't /// have two overlapping areas. +#[derive(Debug, PartialEq)] pub struct CensusArea { - pub polygon: Polygon, + pub polygon: geo::Polygon, pub population: usize, // TODO Not sure what goes here, whatever census data actually has that could be useful } @@ -102,17 +102,15 @@ impl Config { /// appropriate census data, and use it to produce a Scenario. pub fn generate_scenario( scenario_name: &str, + areas: Vec, config: Config, map: &Map, rng: &mut XorShiftRng, -) -> Result { +) -> Scenario { + let mut timer = Timer::new("building scenario"); + // find_data_for_map may return an error. If so, just plumb it back to the caller using the ? // operator - let mut timer = Timer::new("generate census scenario"); - timer.start("building population areas for map"); - let areas = CensusArea::find_data_for_map(map, &mut timer).map_err(|e| e.to_string())?; - timer.stop("building population areas for map"); - timer.start("assigning people to houses"); let people = distribute_people::assign_people_to_houses(areas, map, rng, &config); timer.stop("assigning people to houses"); @@ -128,5 +126,5 @@ pub fn generate_scenario( scenario = scenario.remove_weird_schedules(); timer.stop("removing weird schedules"); - Ok(scenario) + scenario }