diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 667c3ef007..c59e65d793 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,14 +6,15 @@ jobs: test: name: fmt, clippy, test, test --release runs-on: ubuntu-latest + timeout-minutes: 60 steps: - uses: actions/checkout@v1 - name: Verify compiler/builtin/bitcode/regenerate.sh was run if necessary run: pushd compiler/builtins/bitcode && ./regenerate.sh && git diff --exit-code ../../gen/src/llvm/builtins.bc && popd - - name: Install LLVM - run: sudo ./ci/install-llvm.sh 10 + - name: Install CI Libraries + run: sudo ./ci/install-ci-libraries.sh 10 - name: Enable LLD run: sudo ./ci/enable-lld.sh diff --git a/BUILDING_FROM_SOURCE.md b/BUILDING_FROM_SOURCE.md index ad547620b4..78e8faf718 100644 --- a/BUILDING_FROM_SOURCE.md +++ b/BUILDING_FROM_SOURCE.md @@ -1,9 +1,21 @@ # Building the Roc compiler from source -## Installing LLVM and libc++abi +## Installing LLVM, valgrind, libunwind, and libc++-dev -To build the compiler, you need both `libc++abi` and a particular version of LLVM installed on your system. Some systems may already have `libc++abi` on them, but if not, you may need to install it. (On Ubuntu, this can be done with `apt-get install libc++abi-dev`.) +To build the compiler, you need these installed: + +* `libunwind` (macOS should already have this one installed) +* `libc++-dev` +* a particular version of LLVM + +To run the test suite (via `cargo test`), you additionally need to install: + +* [`valgrind`](https://www.valgrind.org/) + +Some systems may already have `libc++-dev` on them, but if not, you may need to install it. (On Ubuntu, this can be done with `sudo apt-get install libc++-dev`.) macOS systems +should already have `libunwind`, but other systems will need to install it +(e.g. with `sudo apt-get install libunwind-dev`). To see which version of LLVM you need, take a look at `Cargo.toml`, in particular the `branch` section of the `inkwell` dependency. It should have something like `llvmX-Y` where X and Y are the major and minor revisions of LLVM you need. diff --git a/Cargo.lock b/Cargo.lock index dc61b0d4b1..b2ad755281 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -16,6 +16,21 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2692800d602527d2b8fea50036119c37df74ab565b10e285706a3dcec0ec3e16" +[[package]] +name = "addr2line" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b6a2d3371669ab3ca9797670853d61402b03d0b4b9ebf33d677dfa720203072" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e" + [[package]] name = "aho-corasick" version = "0.7.13" @@ -120,6 +135,20 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +[[package]] +name = "backtrace" +version = "0.3.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f813291114c186a042350e787af10c26534601062603d888be110f59f85ef8fa" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + [[package]] name = "bitflags" version = "1.2.1" @@ -279,6 +308,15 @@ dependencies = [ "bitflags", ] +[[package]] +name = "cloudabi" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4344512281c643ae7638bbabc3af17a11307803ec8f0fcad9fae512a8bf36467" +dependencies = [ + "bitflags", +] + [[package]] name = "cocoa" version = "0.20.2" @@ -634,6 +672,12 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" +[[package]] +name = "fixedbitset" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d" + [[package]] name = "fnv" version = "1.0.7" @@ -829,7 +873,7 @@ dependencies = [ "gfx-hal", "libloading", "log", - "parking_lot", + "parking_lot 0.10.2", "range-alloc", "raw-window-handle", "smallvec", @@ -885,7 +929,7 @@ dependencies = [ "log", "metal", "objc", - "parking_lot", + "parking_lot 0.10.2", "range-alloc", "raw-window-handle", "smallvec", @@ -947,6 +991,12 @@ dependencies = [ "slab", ] +[[package]] +name = "gimli" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aaf91faf136cb47367fa430cd46e37a788775e7fa104f8b4bcb3861dc389b724" + [[package]] name = "glyph_brush" version = "0.7.0" @@ -1119,7 +1169,7 @@ dependencies = [ "libc", "llvm-sys", "once_cell", - "parking_lot", + "parking_lot 0.10.2", "regex", ] @@ -1259,6 +1309,15 @@ dependencies = [ "scopeguard", ] +[[package]] +name = "lock_api" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28247cc5a5be2f05fbcd76dd0cf2c7d3b5400cb978a28042abcd4fa0b3f8261c" +dependencies = [ + "scopeguard", +] + [[package]] name = "log" version = "0.4.11" @@ -1328,6 +1387,16 @@ dependencies = [ "objc", ] +[[package]] +name = "miniz_oxide" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f2d26ec3309788e423cfbf68ad1800f061638098d76a83681af979dc4eda19d" +dependencies = [ + "adler", + "autocfg 1.0.1", +] + [[package]] name = "mio" version = "0.6.22" @@ -1519,6 +1588,12 @@ dependencies = [ "cc", ] +[[package]] +name = "object" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ab52be62400ca80aa00285d25253d7f7c437b7375c4de678f5405d3afe82ca5" + [[package]] name = "once_cell" version = "1.4.1" @@ -1561,8 +1636,19 @@ version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3a704eb390aafdc107b0e392f56a82b668e3a71366993b5340f5833fd62505e" dependencies = [ - "lock_api", - "parking_lot_core", + "lock_api 0.3.4", + "parking_lot_core 0.7.2", +] + +[[package]] +name = "parking_lot" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4893845fa2ca272e647da5d0e46660a314ead9c2fdd9a883aabc32e481a8733" +dependencies = [ + "instant", + "lock_api 0.4.1", + "parking_lot_core 0.8.0", ] [[package]] @@ -1572,13 +1658,31 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d58c7c768d4ba344e3e8d72518ac13e259d7c7ade24167003b8488e10b6740a3" dependencies = [ "cfg-if", - "cloudabi", + "cloudabi 0.0.3", "libc", "redox_syscall", "smallvec", "winapi 0.3.9", ] +[[package]] +name = "parking_lot_core" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c361aa727dd08437f2f1447be8b59a33b0edd15e0fcee698f935613d9efbca9b" +dependencies = [ + "backtrace", + "cfg-if", + "cloudabi 0.1.0", + "instant", + "libc", + "petgraph", + "redox_syscall", + "smallvec", + "thread-id", + "winapi 0.3.9", +] + [[package]] name = "peek-poke" version = "0.2.0" @@ -1650,6 +1754,16 @@ dependencies = [ "sha-1", ] +[[package]] +name = "petgraph" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "467d164a6de56270bd7c4d070df81d07beace25012d5103ced4e9ff08d6afdb7" +dependencies = [ + "fixedbitset", + "indexmap", +] + [[package]] name = "pin-project" version = "0.4.23" @@ -1965,7 +2079,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071" dependencies = [ - "cloudabi", + "cloudabi 0.0.3", "fuchsia-cprng", "libc", "rand_core 0.4.2", @@ -2203,6 +2317,8 @@ dependencies = [ "roc_types", "roc_unify", "roc_uniq", + "serde", + "serde-xml-rs", "strip-ansi-escapes", "target-lexicon", "tokio", @@ -2319,6 +2435,7 @@ dependencies = [ "roc_can", "roc_collections", "roc_constrain", + "roc_load", "roc_module", "roc_mono", "roc_parse", @@ -2343,6 +2460,7 @@ dependencies = [ "inlinable_string", "maplit", "num_cpus", + "parking_lot 0.11.0", "pretty_assertions", "quickcheck", "quickcheck_macros", @@ -2351,6 +2469,7 @@ dependencies = [ "roc_collections", "roc_constrain", "roc_module", + "roc_mono", "roc_parse", "roc_problem", "roc_region", @@ -2394,6 +2513,7 @@ dependencies = [ "roc_solve", "roc_types", "roc_unify", + "ven_ena", "ven_pretty", ] @@ -2548,6 +2668,12 @@ dependencies = [ "roc_types", ] +[[package]] +name = "rustc-demangle" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2610b7f643d18c87dff3b489950269617e6601a51f1f05aa5daefee36f64f0b" + [[package]] name = "rustc-hash" version = "1.1.0" @@ -2624,6 +2750,21 @@ name = "serde" version = "1.0.116" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96fe57af81d28386a513cbc6858332abc6117cfdb5999647c6444b8f43a370a5" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde-xml-rs" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efe415925cf3d0bbb2fc47d09b56ce03eef51c5d56846468a39bcc293c7a846c" +dependencies = [ + "log", + "serde", + "thiserror", + "xml-rs", +] [[package]] name = "serde_cbor" @@ -2755,7 +2896,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd0a4829a5c591dc24a944a736d6b1e4053e51339a79fd5d4702c4c999a9c45e" dependencies = [ - "lock_api", + "lock_api 0.3.4", ] [[package]] @@ -2856,6 +2997,37 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "thiserror" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dfdd070ccd8ccb78f4ad66bf1982dc37f620ef696c6b5028fe2ed83dd3d0d08" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd80fc12f73063ac132ac92aceea36734f04a1d93c1240c6944e23a3b8841793" +dependencies = [ + "proc-macro2 1.0.21", + "quote 1.0.7", + "syn 1.0.40", +] + +[[package]] +name = "thread-id" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7fbf4c9d56b320106cd64fd024dadfa0be7cb4706725fc44a7d7ce952d820c1" +dependencies = [ + "libc", + "redox_syscall", + "winapi 0.3.9", +] + [[package]] name = "thread_local" version = "1.0.1" @@ -3183,7 +3355,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5dece29f3cd403aabf4056595eabe4b9af56b8bfae12445f097cf8666a41829" dependencies = [ "arrayvec", - "parking_lot", + "parking_lot 0.10.2", "raw-window-handle", "smallvec", "wgpu-core", @@ -3210,7 +3382,7 @@ dependencies = [ "gfx-hal", "gfx-memory", "log", - "parking_lot", + "parking_lot 0.10.2", "peek-poke", "smallvec", "vec_map", @@ -3227,7 +3399,7 @@ dependencies = [ "lazy_static", "libc", "objc", - "parking_lot", + "parking_lot 0.10.2", "raw-window-handle", "wgpu-core", "wgpu-types", @@ -3320,7 +3492,7 @@ dependencies = [ "ndk-glue", "ndk-sys", "objc", - "parking_lot", + "parking_lot 0.10.2", "percent-encoding", "raw-window-handle", "smithay-client-toolkit", diff --git a/ci/install-llvm.sh b/ci/install-ci-libraries.sh similarity index 97% rename from ci/install-llvm.sh rename to ci/install-ci-libraries.sh index 0c327914ee..5a0bcb88a0 100755 --- a/ci/install-llvm.sh +++ b/ci/install-ci-libraries.sh @@ -59,4 +59,4 @@ esac wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - add-apt-repository "${REPO_NAME}" apt-get update -apt-get install -y clang-$LLVM_VERSION lldb-$LLVM_VERSION lld-$LLVM_VERSION clangd-$LLVM_VERSION +apt-get install -y clang-$LLVM_VERSION lldb-$LLVM_VERSION lld-$LLVM_VERSION clangd-$LLVM_VERSION libc++abi-dev libunwind-dev valgrind diff --git a/cli/Cargo.toml b/cli/Cargo.toml index ac1cb022dd..38aed29460 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -86,3 +86,5 @@ indoc = "0.3.3" quickcheck = "0.8" quickcheck_macros = "0.8" strip-ansi-escapes = "0.1" +serde = { version = "1.0", features = ["derive"] } +serde-xml-rs = "0.4" diff --git a/cli/src/build.rs b/cli/src/build.rs new file mode 100644 index 0000000000..4b5797a310 --- /dev/null +++ b/cli/src/build.rs @@ -0,0 +1,117 @@ +use bumpalo::Bump; +use roc_build::{link::link, program}; +use roc_collections::all::MutMap; +use roc_gen::llvm::build::OptLevel; +use roc_load::file::LoadingProblem; +use std::fs; +use std::path::PathBuf; +use std::time::{Duration, SystemTime}; +use target_lexicon::Triple; + +fn report_timing(buf: &mut String, label: &str, duration: Duration) { + buf.push_str(&format!( + " {:.3} ms {}\n", + duration.as_secs_f64() * 1000.0, + label, + )); +} + +pub fn build_file( + target: &Triple, + src_dir: PathBuf, + filename: PathBuf, + opt_level: OptLevel, +) -> Result { + let compilation_start = SystemTime::now(); + let arena = Bump::new(); + + // Step 1: compile the app and generate the .o file + let subs_by_module = MutMap::default(); + + // Release builds use uniqueness optimizations + let stdlib = match opt_level { + OptLevel::Normal => roc_builtins::std::standard_stdlib(), + OptLevel::Optimize => roc_builtins::unique::uniq_stdlib(), + }; + let loaded = roc_load::file::load_and_monomorphize( + &arena, + filename.clone(), + stdlib, + src_dir.as_path(), + subs_by_module, + )?; + let dest_filename = filename.with_file_name("roc_app.o"); + let buf = &mut String::with_capacity(1024); + + for (module_id, module_timing) in loaded.timings.iter() { + let module_name = loaded.interns.module_name(*module_id); + + buf.push_str(" "); + buf.push_str(module_name); + buf.push_str("\n"); + + report_timing(buf, "Read .roc file from disk", module_timing.read_roc_file); + report_timing(buf, "Parse header", module_timing.parse_header); + report_timing(buf, "Parse body", module_timing.parse_body); + report_timing(buf, "Canonicalize", module_timing.canonicalize); + report_timing(buf, "Constrain", module_timing.constrain); + report_timing(buf, "Solve", module_timing.solve); + report_timing(buf, "Other", module_timing.other()); + buf.push('\n'); + report_timing(buf, "Total", module_timing.total()); + } + + println!( + "\n\nCompilation finished! Here's how long each module took to compile:\n\n{}", + buf + ); + + program::gen_from_mono_module( + &arena, + loaded, + filename, + Triple::host(), + &dest_filename, + opt_level, + ); + + let compilation_end = compilation_start.elapsed().unwrap(); + + println!( + "Finished compilation and code gen in {} ms\n", + compilation_end.as_millis() + ); + + let cwd = dest_filename.parent().unwrap(); + + // Step 2: link the precompiled host and compiled app + let host_input_path = cwd.join("platform").join("host.o"); + let binary_path = cwd.join("app"); // TODO should be app.exe on Windows + + // TODO try to move as much of this linking as possible to the precompiled + // host, to minimize the amount of host-application linking required. + let cmd_result = // TODO use lld + link( + target, + binary_path.as_path(), + host_input_path.as_path(), + dest_filename.as_path(), + ) + .map_err(|_| { + todo!("gracefully handle `rustc` failing to spawn."); + })? + .wait() + .map_err(|_| { + todo!("gracefully handle error after `rustc` spawned"); + }); + + // Clean up the leftover .o file from the Roc, if possible. + // (If cleaning it up fails, that's fine. No need to take action.) + // TODO compile the dest_filename to a tmpdir, as an extra precaution. + let _ = fs::remove_file(dest_filename); + + // If the cmd errored out, return the Err. + cmd_result?; + + Ok(binary_path) +} diff --git a/cli/src/lib.rs b/cli/src/lib.rs index b84b6ed76a..e821a13017 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -1,20 +1,16 @@ #[macro_use] extern crate clap; -use bumpalo::Bump; use clap::ArgMatches; use clap::{App, Arg}; -use roc_build::program::gen; -use roc_collections::all::MutMap; use roc_gen::llvm::build::OptLevel; -use roc_load::file::LoadingProblem; -use std::io::{self, ErrorKind}; -use std::path::{Path, PathBuf}; +use std::io; +use std::path::Path; use std::process; use std::process::Command; -use std::time::{Duration, SystemTime}; use target_lexicon::Triple; +pub mod build; pub mod repl; pub static FLAG_OPTIMIZE: &str = "optimize"; @@ -66,7 +62,7 @@ pub fn build_app<'a>() -> App<'a> { ) } -pub fn build(matches: &ArgMatches, run_after_build: bool) -> io::Result<()> { +pub fn build(target: &Triple, matches: &ArgMatches, run_after_build: bool) -> io::Result<()> { let filename = matches.value_of(FLAG_ROC_FILE).unwrap(); let opt_level = if matches.is_present(FLAG_OPTIMIZE) { OptLevel::Optimize @@ -78,7 +74,7 @@ pub fn build(matches: &ArgMatches, run_after_build: bool) -> io::Result<()> { // Spawn the root task let path = path.canonicalize().unwrap_or_else(|err| { - use ErrorKind::*; + use io::ErrorKind::*; match err.kind() { NotFound => { @@ -95,8 +91,8 @@ pub fn build(matches: &ArgMatches, run_after_build: bool) -> io::Result<()> { } }); - let binary_path = - build_file(src_dir, path, opt_level).expect("TODO gracefully handle build_file failing"); + let binary_path = build::build_file(target, src_dir, path, opt_level) + .expect("TODO gracefully handle build_file failing"); if run_after_build { // Run the compiled app @@ -109,123 +105,3 @@ pub fn build(matches: &ArgMatches, run_after_build: bool) -> io::Result<()> { Ok(()) } - -fn report_timing(buf: &mut String, label: &str, duration: Duration) { - buf.push_str(&format!( - " {:.3} ms {}\n", - duration.as_secs_f64() * 1000.0, - label, - )); -} - -fn build_file( - src_dir: PathBuf, - filename: PathBuf, - opt_level: OptLevel, -) -> Result { - let compilation_start = SystemTime::now(); - let arena = Bump::new(); - - // Step 1: compile the app and generate the .o file - let subs_by_module = MutMap::default(); - - // Release builds use uniqueness optimizations - let stdlib = match opt_level { - OptLevel::Normal => roc_builtins::std::standard_stdlib(), - OptLevel::Optimize => roc_builtins::unique::uniq_stdlib(), - }; - let loaded = - roc_load::file::load(filename.clone(), &stdlib, src_dir.as_path(), subs_by_module)?; - let dest_filename = filename.with_extension("o"); - - let buf = &mut String::with_capacity(1024); - - for (module_id, module_timing) in loaded.timings.iter() { - let module_name = loaded.interns.module_name(*module_id); - - buf.push_str(" "); - buf.push_str(module_name); - buf.push_str("\n"); - - report_timing(buf, "Read .roc file from disk", module_timing.read_roc_file); - report_timing(buf, "Parse header", module_timing.parse_header); - report_timing(buf, "Parse body", module_timing.parse_body); - report_timing(buf, "Canonicalize", module_timing.canonicalize); - report_timing(buf, "Constrain", module_timing.constrain); - report_timing(buf, "Solve", module_timing.solve); - report_timing(buf, "Other", module_timing.other()); - buf.push('\n'); - report_timing(buf, "Total", module_timing.total()); - } - - println!( - "\n\nCompilation finished! Here's how long each module took to compile:\n\n{}", - buf - ); - - gen( - &arena, - loaded, - filename, - Triple::host(), - &dest_filename, - opt_level, - ); - - let compilation_end = compilation_start.elapsed().unwrap(); - - println!( - "Finished compilation and code gen in {} ms\n", - compilation_end.as_millis() - ); - - let cwd = dest_filename.parent().unwrap(); - let lib_path = dest_filename.with_file_name("libroc_app.a"); - - // Step 2: turn the .o file into a .a static library - Command::new("ar") // TODO on Windows, use `link` - .args(&[ - "rcs", - lib_path.to_str().unwrap(), - dest_filename.to_str().unwrap(), - ]) - .spawn() - .map_err(|_| { - todo!("gracefully handle `ar` failing to spawn."); - })? - .wait() - .map_err(|_| { - todo!("gracefully handle error after `ar` spawned"); - })?; - - // Step 3: have rustc compile the host and link in the .a file - let binary_path = cwd.join("app"); - - Command::new("rustc") - .args(&[ - "-L", - ".", - "--crate-type", - "bin", - "host.rs", - "-o", - binary_path.as_path().to_str().unwrap(), - // ensure we don't make a position-independent executable - "-C", - "link-arg=-no-pie", - // explicitly link in the c++ stdlib, for exceptions - "-C", - "link-arg=-lc++", - ]) - .current_dir(cwd) - .spawn() - .map_err(|_| { - todo!("gracefully handle `rustc` failing to spawn."); - })? - .wait() - .map_err(|_| { - todo!("gracefully handle error after `rustc` spawned"); - })?; - - Ok(binary_path) -} diff --git a/cli/src/main.rs b/cli/src/main.rs index dd92325b72..3a07efd95a 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,14 +1,23 @@ use roc_cli::{build, build_app, repl, DIRECTORY_OR_FILES}; use std::io; use std::path::Path; +use target_lexicon::Triple; fn main() -> io::Result<()> { let matches = build_app().get_matches(); match matches.subcommand_name() { None => roc_editor::launch(&[]), - Some("build") => build(matches.subcommand_matches("build").unwrap(), false), - Some("run") => build(matches.subcommand_matches("run").unwrap(), true), + Some("build") => build( + &Triple::host(), + matches.subcommand_matches("build").unwrap(), + false, + ), + Some("run") => build( + &Triple::host(), + matches.subcommand_matches("run").unwrap(), + true, + ), Some("repl") => repl::main(), Some("edit") => { match matches diff --git a/cli/src/repl.rs b/cli/src/repl.rs index efafca3cec..82c184a061 100644 --- a/cli/src/repl.rs +++ b/cli/src/repl.rs @@ -12,12 +12,9 @@ use roc_collections::all::{ImMap, ImSet, MutMap, MutSet, SendMap, SendSet}; use roc_constrain::expr::constrain_expr; use roc_constrain::module::{constrain_imported_values, load_builtin_aliases, Import}; use roc_fmt::annotation::{Formattable, Newlines, Parens}; -use roc_gen::layout_id::LayoutIds; use roc_gen::llvm::build::{build_proc, build_proc_header, OptLevel}; use roc_module::ident::Ident; use roc_module::symbol::{IdentIds, Interns, ModuleId, ModuleIds, Symbol}; -use roc_mono::ir::Procs; -use roc_mono::layout::{Layout, LayoutCache}; use roc_parse::ast::{self, Attempting}; use roc_parse::blankspace::space0_before; use roc_parse::parser::{loc, Fail, FailReason, Parser, State}; @@ -29,7 +26,7 @@ use roc_types::subs::{Content, Subs, VarStore, Variable}; use roc_types::types::Type; use std::hash::Hash; use std::io::{self, Write}; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::str::from_utf8_unchecked; use target_lexicon::Triple; @@ -158,61 +155,135 @@ pub fn repl_home() -> ModuleId { ModuleIds::default().get_or_insert(&"REPL".into()) } +fn promote_expr_to_module(src: &str) -> String { + let mut buffer = String::from("app Repl provides [ replOutput ] imports []\n\nreplOutput =\n"); + + for line in src.lines() { + // indent the body! + buffer.push_str(" "); + buffer.push_str(line); + buffer.push('\n'); + } + + buffer +} + fn gen(src: &[u8], target: Triple, opt_level: OptLevel) -> Result { - use roc_reporting::report::{can_problem, type_problem, RocDocAllocator, DEFAULT_PALETTE}; + use roc_reporting::report::{ + can_problem, mono_problem, type_problem, RocDocAllocator, DEFAULT_PALETTE, + }; - // Look up the types and expressions of the `provided` values - let ptr_bytes = target.pointer_width().unwrap().bytes() as u32; let arena = Bump::new(); - let CanExprOut { - loc_expr, - var_store, - var, - constraint, - home, - interns, - problems: can_problems, - .. - } = can_expr(src)?; // IMPORTANT: we must bail out here if there were UTF-8 errors! - - let subs = Subs::new(var_store.into()); - let mut type_problems = Vec::new(); - let (content, mut subs) = infer_expr(subs, &mut type_problems, &constraint, var); // SAFETY: we've already verified that this is valid UTF-8 during parsing. - let src_lines: Vec<&str> = unsafe { from_utf8_unchecked(src).split('\n').collect() }; + let src_str: &str = unsafe { from_utf8_unchecked(src) }; - // Report problems - let palette = DEFAULT_PALETTE; + let stdlib = roc_builtins::std::standard_stdlib(); + let stdlib_mode = stdlib.mode; + let filename = PathBuf::from("REPL.roc"); + let src_dir = Path::new("fake/test/path"); - // Report parsing and canonicalization problems - let alloc = RocDocAllocator::new(&src_lines, home, &interns); + let module_src = promote_expr_to_module(src_str); - // Used for reporting where an error came from. - // - // TODO: maybe Reporting should have this be an Option? - let path = PathBuf::new(); - let total_problems = can_problems.len() + type_problems.len(); + let exposed_types = MutMap::default(); + let loaded = roc_load::file::load_and_monomorphize_from_str( + &arena, + filename, + &module_src, + stdlib, + src_dir, + exposed_types, + ); - if total_problems == 0 { + let loaded = loaded.expect("failed to load module"); + + use roc_load::file::MonomorphizedModule; + let MonomorphizedModule { + can_problems, + type_problems, + mono_problems, + mut procedures, + interns, + exposed_to_host, + mut subs, + module_id: home, + .. + } = loaded; + + let error_count = can_problems.len() + type_problems.len() + mono_problems.len(); + + if error_count > 0 { + // There were problems; report them and return. + let src_lines: Vec<&str> = module_src.split('\n').collect(); + + // Used for reporting where an error came from. + // + // TODO: maybe Reporting should have this be an Option? + let path = PathBuf::new(); + + // Report problems + let palette = DEFAULT_PALETTE; + + // Report parsing and canonicalization problems + let alloc = RocDocAllocator::new(&src_lines, home, &interns); + + let mut lines = Vec::with_capacity(error_count); + + for problem in can_problems.into_iter() { + let report = can_problem(&alloc, path.clone(), problem); + let mut buf = String::new(); + + report.render_color_terminal(&mut buf, &alloc, &palette); + + lines.push(buf); + } + + for problem in type_problems.into_iter() { + let report = type_problem(&alloc, path.clone(), problem); + let mut buf = String::new(); + + report.render_color_terminal(&mut buf, &alloc, &palette); + + lines.push(buf); + } + + for problem in mono_problems.into_iter() { + let report = mono_problem(&alloc, path.clone(), problem); + let mut buf = String::new(); + + report.render_color_terminal(&mut buf, &alloc, &palette); + + lines.push(buf); + } + + Ok(ReplOutput::Problems(lines)) + } else { let context = Context::create(); let module = arena.alloc(roc_gen::llvm::build::module_from_builtins(&context, "app")); let builder = context.create_builder(); - // pretty-print the expr type string for later. - name_all_type_vars(var, &mut subs); + debug_assert_eq!(exposed_to_host.len(), 1); + let (main_fn_symbol, main_fn_var) = exposed_to_host.iter().next().unwrap(); + let main_fn_symbol = *main_fn_symbol; + let main_fn_var = *main_fn_var; + // pretty-print the expr type string for later. + name_all_type_vars(main_fn_var, &mut subs); + let content = subs.get(main_fn_var).content; let expr_type_str = content_to_string(content.clone(), &subs, home, &interns); + + let (_, main_fn_layout) = procedures + .keys() + .find(|(s, _)| *s == main_fn_symbol) + .unwrap() + .clone(); + + let ptr_bytes = target.pointer_width().unwrap().bytes() as u32; + + let module = arena.alloc(module); let (module_pass, function_pass) = roc_gen::llvm::build::construct_optimization_passes(module, opt_level); - // Compute main_fn_type before moving subs to Env - let main_ret_layout = Layout::new(&arena, content.clone(), &subs).unwrap_or_else(|err| { - panic!( - "Code gen error in test: could not convert Content to main_layout. Err was {:?}", - err - ) - }); let execution_engine = module .create_jit_execution_engine(OptimizationLevel::None) .expect("Error creating JIT execution engine for test"); @@ -222,7 +293,7 @@ fn gen(src: &[u8], target: Triple, opt_level: OptLevel) -> Result Result map.len(), - None => 0, - }; - - Vec::with_capacity(num_headers) - }; - let mut layout_cache = LayoutCache::default(); - let procs = roc_mono::ir::specialize_all(&mut mono_env, procs, &mut layout_cache); - - assert_eq!( - procs.runtime_errors, - roc_collections::all::MutMap::default() - ); - - let (mut procs, param_map) = procs.get_specialized_procs_help(mono_env.arena); - let main_body = roc_mono::inc_dec::visit_declaration( - mono_env.arena, - param_map, - mono_env.arena.alloc(main_body), - ); - - // Put this module's ident_ids back in the interns, so we can use them in env. - // This must happen *after* building the headers, because otherwise there's - // a conflicting mutable borrow on ident_ids. - env.interns.all_ident_ids.insert(home, ident_ids); + let mut layout_ids = roc_gen::layout_id::LayoutIds::default(); + let mut headers = Vec::with_capacity(procedures.len()); // Add all the Proc headers to the module. // We have to do this in a separate pass first, // because their bodies may reference each other. - for ((symbol, layout), proc) in procs.drain() { + let mut scope = roc_gen::llvm::build::Scope::default(); + for ((symbol, layout), proc) in procedures.drain() { let fn_val = build_proc_header(&env, &mut layout_ids, symbol, &layout, &proc); + if proc.args.is_empty() { + // this is a 0-argument thunk, i.e. a top-level constant definition + // it must be in-scope everywhere in the module! + scope.insert_top_level_thunk(symbol, layout, fn_val); + } + headers.push((proc, fn_val)); } // Build each proc using its header info. for (proc, fn_val) in headers { - // NOTE: This is here to be uncommented in case verification fails. - // (This approach means we don't have to defensively clone name here.) - // - // println!("\n\nBuilding and then verifying function {}\n\n", name); - build_proc(&env, &mut layout_ids, proc, fn_val); + let mut current_scope = scope.clone(); + + // only have top-level thunks for this proc's module in scope + // this retain is not needed for correctness, but will cause less confusion when debugging + let home = proc.name.module_id(); + current_scope.retain_top_level_thunks_for_module(home); + + build_proc(&env, &mut layout_ids, scope.clone(), proc, fn_val); if fn_val.verify(true) { function_pass.run_on(&fn_val); } else { + use roc_builtins::std::Mode; + + let mode = match stdlib_mode { + Mode::Uniqueness => "OPTIMIZED", + Mode::Standard => "NON-OPTIMIZED", + }; + eprintln!( - "\n\nFunction {:?} failed LLVM verification in build. Its content was:\n", - fn_val.get_name().to_str().unwrap() + "\n\nFunction {:?} failed LLVM verification in {} build. Its content was:\n", + fn_val.get_name().to_str().unwrap(), + mode, ); fn_val.print_to_stderr(); panic!( - "The preceding code was from {:?}, which failed LLVM verification in build.", - fn_val.get_name().to_str().unwrap() + "The preceding code was from {:?}, which failed LLVM verification in {} build.", + fn_val.get_name().to_str().unwrap(), + mode, ); } } - - let (main_fn_name, main_fn) = roc_gen::llvm::build::make_main_function( + let (main_fn_name, main_fn) = roc_gen::llvm::build::promote_to_main_function( &env, &mut layout_ids, - &main_ret_layout, - &main_body, + main_fn_symbol, + &main_fn_layout, ); // Uncomment this to see the module's un-optimized LLVM instruction output: @@ -347,7 +391,7 @@ fn gen(src: &[u8], target: Triple, opt_level: OptLevel) -> Result Result Out { } } +#[allow(dead_code)] +pub fn run_with_valgrind(args: &[&str]) -> Out { + //TODO: figure out if there is a better way to get the valgrind executable. + let mut cmd = Command::new("valgrind"); + + cmd.arg("--tool=memcheck"); + cmd.arg("--xml=yes"); + cmd.arg("--xml-fd=2"); + for arg in args { + cmd.arg(arg); + } + + let output = cmd + .output() + .expect("failed to execute compiled `valgrind` binary in CLI test"); + + Out { + stdout: String::from_utf8(output.stdout).unwrap(), + stderr: String::from_utf8(output.stderr).unwrap(), + status: output.status, + } +} + +#[derive(Debug, Deserialize)] +struct ValgrindOutput { + #[serde(rename = "$value")] + pub fields: Vec, +} + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "lowercase")] +enum ValgrindField { + ProtocolVersion(isize), + ProtocolTool(String), + Preamble(ValgrindDummyStruct), + Pid(isize), + PPid(isize), + Tool(String), + Args(ValgrindDummyStruct), + Error(ValgrindError), + Status(ValgrindDummyStruct), + ErrorCounts(ValgrindDummyStruct), + SuppCounts(ValgrindDummyStruct), +} + +#[derive(Debug, Deserialize)] +struct ValgrindDummyStruct {} + +#[derive(Debug, Deserialize, Clone)] +pub struct ValgrindError { + kind: String, + #[serde(default)] + what: Option, + #[serde(default)] + xwhat: Option, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct ValgrindErrorXWhat { + text: String, + #[serde(default)] + leakedbytes: Option, + #[serde(default)] + leakedblocks: Option, +} + +#[allow(dead_code)] +pub fn extract_valgrind_errors(xml: &str) -> Vec { + let parsed_xml: ValgrindOutput = + from_str(xml).expect("failed to parse the `valgrind` xml output"); + parsed_xml + .fields + .iter() + .filter_map(|field| match field { + ValgrindField::Error(err) => Some(err.clone()), + _ => None, + }) + .collect() +} + #[allow(dead_code)] pub fn example_dir(dir_name: &str) -> PathBuf { let mut path = env::current_exe().ok().unwrap(); diff --git a/cli/tests/repl_eval.rs b/cli/tests/repl_eval.rs index ffadf96e7f..e7801decef 100644 --- a/cli/tests/repl_eval.rs +++ b/cli/tests/repl_eval.rs @@ -1,12 +1,17 @@ #[macro_use] extern crate pretty_assertions; +#[macro_use] +extern crate indoc; + mod helpers; #[cfg(test)] mod repl_eval { use crate::helpers; + const ERROR_MESSAGE_START: char = '─'; + fn expect_success(input: &str, expected: &str) { let out = helpers::repl_eval(input); @@ -15,6 +20,28 @@ mod repl_eval { assert!(out.status.success()); } + fn expect_failure(input: &str, expected: &str) { + let out = helpers::repl_eval(input); + + // there may be some other stuff printed (e.g. unification errors) + // so skip till the header of the first error + match out.stdout.find(ERROR_MESSAGE_START) { + Some(index) => { + assert_eq!(&out.stderr, ""); + assert_eq!(&out.stdout[index..], expected); + assert!(out.status.success()); + } + None => { + assert_eq!(&out.stderr, ""); + assert!(out.status.success()); + panic!( + "I expected a failure, but there is no error message in stdout:\n\n{}", + &out.stdout + ); + } + } + } + #[test] fn literal_0() { expect_success("0", "0 : Num *"); @@ -256,13 +283,46 @@ mod repl_eval { // expect_success(r#""\n\nhi!\n\n""#, "\"\"\"\n\nhi!\n\n\"\"\""); // } - // TODO uncomment this once https://github.com/rtfeldman/roc/issues/295 is done + #[test] + fn list_of_3_field_records() { + expect_success( + "[ { foo: 4.1, bar: 2, baz: 0x3 } ]", + "[ { bar: 2, baz: 3, foo: 4.1 } ] : List { bar : Num *, baz : Int, foo : Float }", + ); + } + + #[test] + fn type_problem() { + expect_failure( + "1 + \"\"", + indoc!( + r#" + ── TYPE MISMATCH ─────────────────────────────────────────────────────────────── + + The 2nd argument to add is not what I expect: + + 4│ 1 + "" + ^^ + + This argument is a string of type: + + Str + + But add needs the 2nd argument to be: + + Num a + "# + ), + ); + } + + // #[test] + // fn parse_problem() { + // // can't find something that won't parse currently + // } // - // #[test] - // fn list_of_3_field_records() { - // expect_success( - // "[ { foo: 4.1, bar: 2, baz: 0x3 } ]", - // "[ { foo: 4.1, bar: 2, baz: 0x3 } ] : List { foo : Float, bar : Num *, baz : Int }", - // ); - // } + // #[test] + // fn mono_problem() { + // // can't produce a mono error (non-exhaustive pattern) yet + // } } diff --git a/compiler/build/src/lib.rs b/compiler/build/src/lib.rs index 15ced922ef..896bcc6c21 100644 --- a/compiler/build/src/lib.rs +++ b/compiler/build/src/lib.rs @@ -10,4 +10,6 @@ // and encouraging shortcuts here creates bad incentives. I would rather temporarily // re-enable this when working on performance optimizations than have it block PRs. #![allow(clippy::large_enum_variant)] +pub mod link; pub mod program; +pub mod target; diff --git a/compiler/build/src/link.rs b/compiler/build/src/link.rs new file mode 100644 index 0000000000..8e2b98c5b9 --- /dev/null +++ b/compiler/build/src/link.rs @@ -0,0 +1,204 @@ +use crate::target::arch_str; +use std::io; +use std::path::Path; +use std::process::{Child, Command}; +use target_lexicon::{Architecture, OperatingSystem, Triple}; + +pub fn link( + target: &Triple, + binary_path: &Path, + host_input_path: &Path, + dest_filename: &Path, +) -> io::Result { + // TODO we should no longer need to do this once we have platforms on + // a package repository, as we can then get precompiled hosts from there. + rebuild_host(host_input_path); + + match target { + Triple { + architecture: Architecture::X86_64, + operating_system: OperatingSystem::Linux, + .. + } => link_linux(target, binary_path, host_input_path, dest_filename), + Triple { + architecture: Architecture::X86_64, + operating_system: OperatingSystem::Darwin, + .. + } => link_macos(target, binary_path, host_input_path, dest_filename), + _ => panic!("TODO gracefully handle unsupported target: {:?}", target), + } +} + +fn rebuild_host(host_input_path: &Path) { + let c_host_src = host_input_path.with_file_name("host.c"); + let c_host_dest = host_input_path.with_file_name("c_host.o"); + let rust_host_src = host_input_path.with_file_name("host.rs"); + let rust_host_dest = host_input_path.with_file_name("rust_host.o"); + let cargo_host_src = host_input_path.with_file_name("Cargo.toml"); + let host_dest = host_input_path.with_file_name("host.o"); + + // Compile host.c + Command::new("clang") + .env_clear() + .args(&[ + "-c", + c_host_src.to_str().unwrap(), + "-o", + c_host_dest.to_str().unwrap(), + ]) + .output() + .unwrap(); + + if cargo_host_src.exists() { + // Compile and link Cargo.toml, if it exists + let cargo_dir = host_input_path.parent().unwrap(); + let libhost_dir = cargo_dir.join("target").join("release"); + + Command::new("cargo") + .args(&["build", "--release"]) + .current_dir(cargo_dir) + .output() + .unwrap(); + + Command::new("ld") + .env_clear() + .args(&[ + "-r", + "-L", + libhost_dir.to_str().unwrap(), + c_host_dest.to_str().unwrap(), + "-lhost", + "-o", + host_dest.to_str().unwrap(), + ]) + .output() + .unwrap(); + } else if rust_host_src.exists() { + // Compile and link host.rs, if it exists + Command::new("rustc") + .args(&[ + rust_host_src.to_str().unwrap(), + "-o", + rust_host_dest.to_str().unwrap(), + ]) + .output() + .unwrap(); + + Command::new("ld") + .env_clear() + .args(&[ + "-r", + c_host_dest.to_str().unwrap(), + rust_host_dest.to_str().unwrap(), + "-o", + host_dest.to_str().unwrap(), + ]) + .output() + .unwrap(); + + // Clean up rust_host.o + Command::new("rm") + .env_clear() + .args(&[ + "-f", + rust_host_dest.to_str().unwrap(), + c_host_dest.to_str().unwrap(), + ]) + .output() + .unwrap(); + } else { + // Clean up rust_host.o + Command::new("mv") + .env_clear() + .args(&[c_host_dest, host_dest]) + .output() + .unwrap(); + } +} + +fn link_linux( + target: &Triple, + binary_path: &Path, + host_input_path: &Path, + dest_filename: &Path, +) -> io::Result { + let libcrt_path = if Path::new("/usr/lib/x86_64-linux-gnu").exists() { + Path::new("/usr/lib/x86_64-linux-gnu") + } else { + Path::new("/usr/lib") + }; + let libgcc_path = if Path::new("/lib/x86_64-linux-gnu/libgcc_s.so.1").exists() { + Path::new("/lib/x86_64-linux-gnu/libgcc_s.so.1") + } else if Path::new("/usr/lib/x86_64-linux-gnu/libgcc_s.so.1").exists() { + Path::new("/usr/lib/x86_64-linux-gnu/libgcc_s.so.1") + } else { + Path::new("/usr/lib/libgcc_s.so.1") + }; + // NOTE: order of arguments to `ld` matters here! + // The `-l` flags should go after the `.o` arguments + Command::new("ld") + // Don't allow LD_ env vars to affect this + .env_clear() + .args(&[ + "-arch", + arch_str(target), + libcrt_path.join("crti.o").to_str().unwrap(), + libcrt_path.join("crtn.o").to_str().unwrap(), + libcrt_path.join("Scrt1.o").to_str().unwrap(), + "-dynamic-linker", + "/lib64/ld-linux-x86-64.so.2", + // Inputs + host_input_path.to_str().unwrap(), // host.o + dest_filename.to_str().unwrap(), // app.o + // Libraries - see https://github.com/rtfeldman/roc/pull/554#discussion_r496365925 + // for discussion and further references + "-lc", + "-lm", + "-lpthread", + "-ldl", + "-lrt", + "-lutil", + "-lc_nonshared", + "-lc++", + "-lunwind", + libgcc_path.to_str().unwrap(), + // Output + "-o", + binary_path.to_str().unwrap(), // app + ]) + .spawn() +} + +fn link_macos( + target: &Triple, + binary_path: &Path, + host_input_path: &Path, + dest_filename: &Path, +) -> io::Result { + // NOTE: order of arguments to `ld` matters here! + // The `-l` flags should go after the `.o` arguments + Command::new("ld") + // Don't allow LD_ env vars to affect this + .env_clear() + .args(&[ + "-arch", + target.architecture.to_string().as_str(), + // Inputs + host_input_path.to_str().unwrap(), // host.o + dest_filename.to_str().unwrap(), // roc_app.o + // Libraries - see https://github.com/rtfeldman/roc/pull/554#discussion_r496392274 + // for discussion and further references + "-lSystem", + "-lresolv", + "-lpthread", + // "-lrt", // TODO shouldn't we need this? + // "-lc_nonshared", // TODO shouldn't we need this? + // "-lgcc", // TODO will eventually need compiler_rt from gcc or something - see https://github.com/rtfeldman/roc/pull/554#discussion_r496370840 + // "-lunwind", // TODO will eventually need this, see https://github.com/rtfeldman/roc/pull/554#discussion_r496370840 + "-lc++", // TODO shouldn't we need this? + // Output + "-o", + binary_path.to_str().unwrap(), // app + ]) + .spawn() +} diff --git a/compiler/build/src/program.rs b/compiler/build/src/program.rs index 3464df53b4..d8c4126daa 100644 --- a/compiler/build/src/program.rs +++ b/compiler/build/src/program.rs @@ -1,26 +1,21 @@ +use crate::target; use bumpalo::Bump; use inkwell::context::Context; -use inkwell::targets::{ - CodeModel, FileType, InitializationConfig, RelocMode, Target, TargetTriple, -}; +use inkwell::targets::{CodeModel, FileType, RelocMode}; use inkwell::OptimizationLevel; -use roc_collections::all::default_hasher; use roc_gen::layout_id::LayoutIds; -use roc_gen::llvm::build::{build_proc, build_proc_header, module_from_builtins, OptLevel}; -use roc_load::file::LoadedModule; -use roc_mono::ir::{Env, PartialProc, Procs}; -use roc_mono::layout::{Layout, LayoutCache}; -use std::collections::HashSet; +use roc_gen::llvm::build::{build_proc, build_proc_header, module_from_builtins, OptLevel, Scope}; +use roc_load::file::MonomorphizedModule; use std::path::{Path, PathBuf}; -use target_lexicon::{Architecture, OperatingSystem, Triple, Vendor}; +use target_lexicon::Triple; // TODO how should imported modules factor into this? What if those use builtins too? // TODO this should probably use more helper functions // TODO make this polymorphic in the llvm functions so it can be reused for another backend. #[allow(clippy::cognitive_complexity)] -pub fn gen( +pub fn gen_from_mono_module( arena: &Bump, - mut loaded: LoadedModule, + loaded: MonomorphizedModule, filename: PathBuf, target: Triple, dest_filename: &Path, @@ -54,14 +49,6 @@ pub fn gen( println!("\n{}\n", buf); } - // Look up the types and expressions of the `provided` values - let mut decls_by_id = loaded.declarations_by_id; - let home_decls = decls_by_id - .remove(&loaded.module_id) - .expect("Root module ID not found in loaded declarations_by_id"); - - let mut subs = loaded.solved.into_inner(); - // Generate the binary let context = Context::create(); @@ -71,157 +58,8 @@ pub fn gen( let ptr_bytes = target.pointer_width().unwrap().bytes() as u32; - let mut exposed_to_host = - HashSet::with_capacity_and_hasher(loaded.exposed_vars_by_symbol.len(), default_hasher()); - - for (symbol, _) in loaded.exposed_vars_by_symbol { - exposed_to_host.insert(symbol); - } - - let mut ident_ids = loaded.interns.all_ident_ids.remove(&home).unwrap(); - let mut layout_ids = LayoutIds::default(); - let mut procs = Procs::default(); - let mut mono_problems = std::vec::Vec::new(); - let mut layout_cache = LayoutCache::default(); - let mut mono_env = Env { - arena, - subs: &mut subs, - problems: &mut mono_problems, - home, - ident_ids: &mut ident_ids, - }; - - // Add modules' decls to Procs - for (_, mut decls) in decls_by_id - .drain() - .chain(std::iter::once((loaded.module_id, home_decls))) - { - for decl in decls.drain(..) { - use roc_can::def::Declaration::*; - use roc_can::expr::Expr::*; - use roc_can::pattern::Pattern::*; - - match decl { - Declare(def) | Builtin(def) => match def.loc_pattern.value { - Identifier(symbol) => { - match def.loc_expr.value { - Closure { - function_type: annotation, - return_type: ret_var, - recursive: recursivity, - arguments: loc_args, - loc_body: boxed_body, - .. - } => { - let is_tail_recursive = - matches!(recursivity, roc_can::expr::Recursive::TailRecursive); - - let loc_body = *boxed_body; - - // If this is an exposed symbol, we need to - // register it as such. Otherwise, since it - // never gets called by Roc code, it will never - // get specialized! - if exposed_to_host.contains(&symbol) { - let mut pattern_vars = - bumpalo::collections::Vec::with_capacity_in( - loc_args.len(), - arena, - ); - - for (var, _) in loc_args.iter() { - pattern_vars.push(*var); - } - - let layout = layout_cache.from_var(mono_env.arena, annotation, mono_env.subs).unwrap_or_else(|err| - todo!("TODO gracefully handle the situation where we expose a function to the host which doesn't have a valid layout (e.g. maybe the function wasn't monomorphic): {:?}", err) - ); - - procs.insert_exposed( - symbol, - layout, - pattern_vars, //: Vec<'a, Variable>, - annotation, - ret_var, - ); - } - - procs.insert_named( - &mut mono_env, - &mut layout_cache, - symbol, - annotation, - loc_args, - loc_body, - is_tail_recursive, - ret_var, - ); - } - body => { - let annotation = def.expr_var; - let proc = PartialProc { - annotation, - // This is a 0-arity thunk, so it has no arguments. - pattern_symbols: bumpalo::collections::Vec::new_in( - mono_env.arena, - ), - is_self_recursive: false, - body, - }; - - // If this is an exposed symbol, we need to - // register it as such. Otherwise, since it - // never gets called by Roc code, it will never - // get specialized! - if exposed_to_host.contains(&symbol) { - let pattern_vars = bumpalo::collections::Vec::new_in(arena); - let ret_layout = layout_cache.from_var(mono_env.arena, annotation, mono_env.subs).unwrap_or_else(|err| - todo!("TODO gracefully handle the situation where we expose a function to the host which doesn't have a valid layout (e.g. maybe the function wasn't monomorphic): {:?}", err) - ); - let layout = - Layout::FunctionPointer(&[], arena.alloc(ret_layout)); - - procs.insert_exposed( - symbol, - layout, - pattern_vars, - // It seems brittle that we're passing - // annotation twice - especially since - // in both cases we're giving the - // annotation to the top-level value, - // not the thunk function it will code - // gen to. It seems to work, but that - // may only be because at present we - // only use the function annotation - // variable during specialization, and - // exposed values are never specialized - // because they must be monomorphic. - annotation, - annotation, - ); - } - - procs.partial_procs.insert(symbol, proc); - procs.module_thunks.insert(symbol); - } - }; - } - other => { - todo!("TODO gracefully handle Declare({:?})", other); - } - }, - DeclareRec(_defs) => { - todo!("TODO support DeclareRec"); - } - InvalidCycle(_loc_idents, _regions) => { - todo!("TODO handle InvalidCycle"); - } - } - } - } - // Compile and add all the Procs before adding main - let mut env = roc_gen::llvm::build::Env { + let env = roc_gen::llvm::build::Env { arena: &arena, builder: &builder, context: &context, @@ -229,36 +67,27 @@ pub fn gen( module, ptr_bytes, leak: false, - exposed_to_host, + exposed_to_host: loaded.exposed_to_host.keys().copied().collect(), }; // Populate Procs further and get the low-level Expr from the canonical Expr - let mut headers = { - let num_headers = match &procs.pending_specializations { - Some(map) => map.len(), - None => 0, - }; - - Vec::with_capacity(num_headers) - }; - let procs = roc_mono::ir::specialize_all(&mut mono_env, procs, &mut layout_cache); - - assert_eq!( - procs.runtime_errors, - roc_collections::all::MutMap::default() - ); - - // Put this module's ident_ids back in the interns, so we can use them in env. - // This must happen *after* building the headers, because otherwise there's - // a conflicting mutable borrow on ident_ids. - env.interns.all_ident_ids.insert(home, ident_ids); + let mut headers = Vec::with_capacity(loaded.procedures.len()); // Add all the Proc headers to the module. // We have to do this in a separate pass first, // because their bodies may reference each other. - for ((symbol, layout), proc) in procs.get_specialized_procs(arena) { + let mut layout_ids = LayoutIds::default(); + + let mut scope = Scope::default(); + for ((symbol, layout), proc) in loaded.procedures { let fn_val = build_proc_header(&env, &mut layout_ids, symbol, &layout, &proc); + if proc.args.is_empty() { + // this is a 0-argument thunk, i.e. a top-level constant definition + // it must be in-scope everywhere in the module! + scope.insert_top_level_thunk(symbol, layout, fn_val); + } + headers.push((proc, fn_val)); } @@ -268,11 +97,13 @@ pub fn gen( // (This approach means we don't have to defensively clone name here.) // // println!("\n\nBuilding and then verifying function {:?}\n\n", proc); - build_proc(&env, &mut layout_ids, proc, fn_val); + build_proc(&env, &mut layout_ids, scope.clone(), proc, fn_val); if fn_val.verify(true) { fpm.run_on(&fn_val); } else { + // fn_val.print_to_stderr(); + // env.module.print_to_stderr(); // NOTE: If this fails, uncomment the above println to debug. panic!( "Non-main function failed LLVM verification. Uncomment the above println to debug!" @@ -295,80 +126,10 @@ pub fn gen( // Emit the .o file - // NOTE: arch_str is *not* the same as the beginning of the magic target triple - // string! For example, if it's "x86-64" here, the magic target triple string - // will begin with "x86_64" (with an underscore) instead. - let arch_str = match target.architecture { - Architecture::X86_64 => { - Target::initialize_x86(&InitializationConfig::default()); - - "x86-64" - } - Architecture::Arm(_) if cfg!(feature = "target-arm") => { - // NOTE: why not enable arm and wasm by default? - // - // We had some trouble getting them to link properly. This may be resolved in the - // future, or maybe it was just some weird configuration on one machine. - Target::initialize_arm(&InitializationConfig::default()); - - "arm" - } - Architecture::Wasm32 if cfg!(feature = "target-webassembly") => { - Target::initialize_webassembly(&InitializationConfig::default()); - - "wasm32" - } - _ => panic!( - "TODO gracefully handle unsupported target architecture: {:?}", - target.architecture - ), - }; - let opt = OptimizationLevel::Aggressive; let reloc = RelocMode::Default; let model = CodeModel::Default; - - // Best guide I've found on how to determine these magic strings: - // - // https://stackoverflow.com/questions/15036909/clang-how-to-list-supported-target-architectures - let target_triple_str = match target { - Triple { - architecture: Architecture::X86_64, - vendor: Vendor::Unknown, - operating_system: OperatingSystem::Linux, - .. - } => "x86_64-unknown-linux-gnu", - Triple { - architecture: Architecture::X86_64, - vendor: Vendor::Pc, - operating_system: OperatingSystem::Linux, - .. - } => "x86_64-pc-linux-gnu", - Triple { - architecture: Architecture::X86_64, - vendor: Vendor::Unknown, - operating_system: OperatingSystem::Darwin, - .. - } => "x86_64-unknown-darwin10", - Triple { - architecture: Architecture::X86_64, - vendor: Vendor::Apple, - operating_system: OperatingSystem::Darwin, - .. - } => "x86_64-apple-darwin10", - _ => panic!("TODO gracefully handle unsupported target: {:?}", target), - }; - let target_machine = Target::from_name(arch_str) - .unwrap() - .create_target_machine( - &TargetTriple::create(target_triple_str), - arch_str, - "+avx2", // TODO this string was used uncritically from an example, and should be reexamined - opt, - reloc, - model, - ) - .unwrap(); + let target_machine = target::target_machine(&target, opt, reloc, model).unwrap(); target_machine .write_to_file(&env.module, FileType::Object, &dest_filename) diff --git a/compiler/build/src/target.rs b/compiler/build/src/target.rs new file mode 100644 index 0000000000..48aad60072 --- /dev/null +++ b/compiler/build/src/target.rs @@ -0,0 +1,76 @@ +use inkwell::targets::{ + CodeModel, InitializationConfig, RelocMode, Target, TargetMachine, TargetTriple, +}; +use inkwell::OptimizationLevel; +use target_lexicon::{Architecture, OperatingSystem, Triple}; + +pub fn target_triple_str(target: &Triple) -> &'static str { + // Best guide I've found on how to determine these magic strings: + // + // https://stackoverflow.com/questions/15036909/clang-how-to-list-supported-target-architectures + match target { + Triple { + architecture: Architecture::X86_64, + operating_system: OperatingSystem::Linux, + .. + } => "x86_64-unknown-linux-gnu", + Triple { + architecture: Architecture::X86_64, + operating_system: OperatingSystem::Darwin, + .. + } => "x86_64-unknown-darwin10", + _ => panic!("TODO gracefully handle unsupported target: {:?}", target), + } +} + +/// NOTE: arch_str is *not* the same as the beginning of the magic target triple +/// string! For example, if it's "x86-64" here, the magic target triple string +/// will begin with "x86_64" (with an underscore) instead. +pub fn arch_str(target: &Triple) -> &'static str { + // Best guide I've found on how to determine these magic strings: + // + // https://stackoverflow.com/questions/15036909/clang-how-to-list-supported-target-architectures + match target.architecture { + Architecture::X86_64 => { + Target::initialize_x86(&InitializationConfig::default()); + + "x86-64" + } + Architecture::Arm(_) if cfg!(feature = "target-arm") => { + // NOTE: why not enable arm and wasm by default? + // + // We had some trouble getting them to link properly. This may be resolved in the + // future, or maybe it was just some weird configuration on one machine. + Target::initialize_arm(&InitializationConfig::default()); + + "arm" + } + Architecture::Wasm32 if cfg!(feature = "target-webassembly") => { + Target::initialize_webassembly(&InitializationConfig::default()); + + "wasm32" + } + _ => panic!( + "TODO gracefully handle unsupported target architecture: {:?}", + target.architecture + ), + } +} + +pub fn target_machine( + target: &Triple, + opt: OptimizationLevel, + reloc: RelocMode, + model: CodeModel, +) -> Option { + let arch = arch_str(target); + + Target::from_name(arch).unwrap().create_target_machine( + &TargetTriple::create(target_triple_str(target)), + arch, + "+avx2", // TODO this string was used uncritically from an example, and should be reexamined + opt, + reloc, + model, + ) +} diff --git a/compiler/builtins/bitcode/src/lib.rs b/compiler/builtins/bitcode/src/lib.rs index 57dfd48ba8..b64bbf6580 100644 --- a/compiler/builtins/bitcode/src/lib.rs +++ b/compiler/builtins/bitcode/src/lib.rs @@ -6,8 +6,8 @@ mod libm; -/// TODO replace this with a normal Inkwell build_cast call - this was just -/// used as a proof of concept for getting bitcode importing working! +/// TODO this is no longer used. Feel free to delete it the next time +/// we need to rebuild builtins.bc! #[no_mangle] pub fn i64_to_f64_(num: i64) -> f64 { num as f64 diff --git a/compiler/builtins/src/std.rs b/compiler/builtins/src/std.rs index 95e580d013..55a8a80bc5 100644 --- a/compiler/builtins/src/std.rs +++ b/compiler/builtins/src/std.rs @@ -12,6 +12,7 @@ pub enum Mode { Uniqueness, } +#[derive(Debug, Clone)] pub struct StdLib { pub mode: Mode, pub types: MutMap, diff --git a/compiler/can/src/env.rs b/compiler/can/src/env.rs index c9dd72b3c5..962906a8fd 100644 --- a/compiler/can/src/env.rs +++ b/compiler/can/src/env.rs @@ -25,7 +25,7 @@ pub struct Env<'a> { pub tailcallable_symbol: Option, /// Symbols which were referenced by qualified lookups. - pub referenced_symbols: MutSet, + pub qualified_lookups: MutSet, pub ident_ids: IdentIds, pub exposed_ident_ids: IdentIds, @@ -46,7 +46,7 @@ impl<'a> Env<'a> { exposed_ident_ids, problems: Vec::new(), closures: MutMap::default(), - referenced_symbols: MutSet::default(), + qualified_lookups: MutSet::default(), tailcallable_symbol: None, } } @@ -77,7 +77,7 @@ impl<'a> Env<'a> { Some(ident_id) => { let symbol = Symbol::new(module_id, *ident_id); - self.referenced_symbols.insert(symbol); + self.qualified_lookups.insert(symbol); Ok(symbol) } @@ -101,7 +101,7 @@ impl<'a> Env<'a> { Some(ident_id) => { let symbol = Symbol::new(module_id, *ident_id); - self.referenced_symbols.insert(symbol); + self.qualified_lookups.insert(symbol); Ok(symbol) } diff --git a/compiler/can/src/expr.rs b/compiler/can/src/expr.rs index b4e73bebe7..0660e274de 100644 --- a/compiler/can/src/expr.rs +++ b/compiler/can/src/expr.rs @@ -126,6 +126,7 @@ pub enum Expr { }, /// field accessor as a function, e.g. (.foo) expr Accessor { + function_var: Variable, record_var: Variable, closure_var: Variable, ext_var: Variable, @@ -550,6 +551,7 @@ pub fn canonicalize_expr<'a>( } ast::Expr::AccessorFunction(field) => ( Accessor { + function_var: var_store.fresh(), record_var: var_store.fresh(), ext_var: var_store.fresh(), closure_var: var_store.fresh(), diff --git a/compiler/can/src/module.rs b/compiler/can/src/module.rs index a6230251b4..ab6a2c9a4b 100644 --- a/compiler/can/src/module.rs +++ b/compiler/can/src/module.rs @@ -1,4 +1,3 @@ -use crate::builtins::builtin_defs; use crate::def::{canonicalize_defs, sort_can_defs, Declaration}; use crate::env::Env; use crate::expr::Output; @@ -115,7 +114,7 @@ pub fn canonicalize_module_defs<'a>( } } - let (mut defs, _scope, output, symbols_introduced) = canonicalize_defs( + let (defs, _scope, output, symbols_introduced) = canonicalize_defs( &mut env, Output::default(), var_store, @@ -149,17 +148,12 @@ pub fn canonicalize_module_defs<'a>( } // Gather up all the symbols that were referenced from other modules. - for symbol in env.referenced_symbols.iter() { + for symbol in env.qualified_lookups.iter() { references.insert(*symbol); } - // Add defs for any referenced builtins. - for (symbol, def) in builtin_defs(var_store) { - if output.references.lookups.contains(&symbol) || output.references.calls.contains(&symbol) - { - defs.can_defs_by_symbol.insert(symbol, def); - } - } + // NOTE previously we inserted builtin defs into the list of defs here + // this is now done later, in file.rs. match sort_can_defs(&mut env, defs, Output::default()) { (Ok(declarations), output) => { @@ -250,6 +244,11 @@ pub fn canonicalize_module_defs<'a>( references.insert(symbol); } + // Gather up all the symbols that were referenced from other modules. + for symbol in env.qualified_lookups.iter() { + references.insert(*symbol); + } + Ok(ModuleOutput { aliases, rigid_variables, diff --git a/compiler/constrain/src/expr.rs b/compiler/constrain/src/expr.rs index 685bf37f1b..928ef9d1a0 100644 --- a/compiler/constrain/src/expr.rs +++ b/compiler/constrain/src/expr.rs @@ -675,6 +675,7 @@ pub fn constrain_expr( ) } Accessor { + function_var, field, record_var, closure_var, @@ -701,16 +702,19 @@ pub fn constrain_expr( region, ); + let function_type = Type::Function( + vec![record_type], + Box::new(Type::Variable(*closure_var)), + Box::new(field_type), + ); + exists( - vec![*record_var, *closure_var, field_var, ext_var], + vec![*record_var, *function_var, *closure_var, field_var, ext_var], And(vec![ + Eq(function_type.clone(), expected, category.clone(), region), Eq( - Type::Function( - vec![record_type], - Box::new(Type::Variable(*closure_var)), - Box::new(field_type), - ), - expected, + function_type, + NoExpectation(Variable(*function_var)), category, region, ), diff --git a/compiler/constrain/src/module.rs b/compiler/constrain/src/module.rs index c51100c9fe..509f3bd7b6 100644 --- a/compiler/constrain/src/module.rs +++ b/compiler/constrain/src/module.rs @@ -174,7 +174,11 @@ pub struct FreeVars { pub wildcards: Vec, } -fn to_type(solved_type: &SolvedType, free_vars: &mut FreeVars, var_store: &mut VarStore) -> Type { +pub fn to_type( + solved_type: &SolvedType, + free_vars: &mut FreeVars, + var_store: &mut VarStore, +) -> Type { use roc_types::solved_types::SolvedType::*; match solved_type { diff --git a/compiler/constrain/src/uniq.rs b/compiler/constrain/src/uniq.rs index 417dc068a1..ef6ed59689 100644 --- a/compiler/constrain/src/uniq.rs +++ b/compiler/constrain/src/uniq.rs @@ -67,6 +67,7 @@ pub fn constrain_decls( // perform usage analysis on the whole file let mut var_usage = VarUsage::default(); + for decl in decls.iter().rev() { // NOTE: rigids are empty because they are not shared between top-level definitions match decl { @@ -1445,6 +1446,7 @@ pub fn constrain_expr( } Accessor { + function_var, field, record_var, closure_var, @@ -1490,6 +1492,7 @@ pub fn constrain_expr( exists( vec![ *record_var, + *function_var, *closure_var, *field_var, *ext_var, @@ -1497,7 +1500,16 @@ pub fn constrain_expr( field_uniq_var, record_uniq_var, ], - And(vec![Eq(fn_type, expected, category, region), record_con]), + And(vec![ + Eq(fn_type.clone(), expected, category.clone(), region), + Eq( + fn_type, + Expected::NoExpectation(Variable(*function_var)), + category, + region, + ), + record_con, + ]), ) } RuntimeError(_) => True, diff --git a/compiler/gen/Cargo.toml b/compiler/gen/Cargo.toml index f132dca97b..00b197c742 100644 --- a/compiler/gen/Cargo.toml +++ b/compiler/gen/Cargo.toml @@ -44,6 +44,7 @@ target-lexicon = "0.10" [dev-dependencies] roc_can = { path = "../can" } roc_parse = { path = "../parse" } +roc_load = { path = "../load" } pretty_assertions = "0.5.1" maplit = "1.0.1" indoc = "0.3.3" diff --git a/compiler/gen/src/layout_id.rs b/compiler/gen/src/layout_id.rs index 06133558b4..263f011087 100644 --- a/compiler/gen/src/layout_id.rs +++ b/compiler/gen/src/layout_id.rs @@ -10,7 +10,7 @@ impl LayoutId { // Returns something like "foo#1" when given a symbol that interns to "foo" // and a LayoutId of 1. pub fn to_symbol_string(self, symbol: Symbol, interns: &Interns) -> String { - format!("{}#{}", symbol.ident_string(interns), self.0) + format!("{}_{}", symbol.ident_string(interns), self.0) } } diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs index 2d422b8f79..4a5a8b0a64 100644 --- a/compiler/gen/src/llvm/build.rs +++ b/compiler/gen/src/llvm/build.rs @@ -31,7 +31,7 @@ use inkwell::OptimizationLevel; use inkwell::{AddressSpace, IntPredicate}; use roc_collections::all::{ImMap, MutSet}; use roc_module::low_level::LowLevel; -use roc_module::symbol::{Interns, Symbol}; +use roc_module::symbol::{Interns, ModuleId, Symbol}; use roc_mono::ir::{JoinPointId, Wrapped}; use roc_mono::layout::{Builtin, Layout, MemoryMode}; use target_lexicon::CallingConvention; @@ -53,6 +53,7 @@ pub enum OptLevel { #[derive(Default, Debug, Clone, PartialEq)] pub struct Scope<'a, 'ctx> { symbols: ImMap, PointerValue<'ctx>)>, + pub top_level_thunks: ImMap, FunctionValue<'ctx>)>, join_points: ImMap, &'a [PointerValue<'ctx>])>, } @@ -63,23 +64,23 @@ impl<'a, 'ctx> Scope<'a, 'ctx> { pub fn insert(&mut self, symbol: Symbol, value: (Layout<'a>, PointerValue<'ctx>)) { self.symbols.insert(symbol, value); } + pub fn insert_top_level_thunk( + &mut self, + symbol: Symbol, + layout: Layout<'a>, + function_value: FunctionValue<'ctx>, + ) { + self.top_level_thunks + .insert(symbol, (layout, function_value)); + } fn remove(&mut self, symbol: &Symbol) { self.symbols.remove(symbol); } - /* - fn get_join_point(&self, symbol: &JoinPointId) -> Option<&PhiValue<'ctx>> { - self.join_points.get(symbol) + + pub fn retain_top_level_thunks_for_module(&mut self, module_id: ModuleId) { + self.top_level_thunks + .retain(|s, _| s.module_id() == module_id); } - fn remove_join_point(&mut self, symbol: &JoinPointId) { - self.join_points.remove(symbol); - } - fn get_mut_join_point(&mut self, symbol: &JoinPointId) -> Option<&mut PhiValue<'ctx>> { - self.join_points.get_mut(symbol) - } - fn insert_join_point(&mut self, symbol: JoinPointId, value: PhiValue<'ctx>) { - self.join_points.insert(symbol, value); - } - */ } pub struct Env<'a, 'ctx, 'env> { @@ -416,26 +417,47 @@ pub fn build_roc_main<'a, 'ctx, 'env>( env.arena.alloc(roc_main_fn) } +pub fn promote_to_main_function<'a, 'ctx, 'env>( + env: &Env<'a, 'ctx, 'env>, + layout_ids: &mut LayoutIds<'a>, + symbol: Symbol, + layout: &Layout<'a>, +) -> (&'static str, &'a FunctionValue<'ctx>) { + let fn_name = layout_ids + .get(symbol, layout) + .to_symbol_string(symbol, &env.interns); + + let wrapped = env.module.get_function(&fn_name).unwrap(); + + make_main_function_help(env, layout, wrapped) +} + pub fn make_main_function<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, layout_ids: &mut LayoutIds<'a>, layout: &Layout<'a>, main_body: &roc_mono::ir::Stmt<'a>, ) -> (&'static str, &'a FunctionValue<'ctx>) { + // internal main function + let roc_main_fn = *build_roc_main(env, layout_ids, layout, main_body); + + make_main_function_help(env, layout, roc_main_fn) +} + +fn make_main_function_help<'a, 'ctx, 'env>( + env: &Env<'a, 'ctx, 'env>, + layout: &Layout<'a>, + roc_main_fn: FunctionValue<'ctx>, +) -> (&'static str, &'a FunctionValue<'ctx>) { + // build the C calling convention wrapper use inkwell::types::BasicType; use PassVia::*; let context = env.context; let builder = env.builder; - let u8_ptr = context.i8_type().ptr_type(AddressSpace::Generic); - - // internal main function - let roc_main_fn = *build_roc_main(env, layout_ids, layout, main_body); - - // build the C calling convention wrapper - let main_fn_name = "$Test.main"; + let u8_ptr = env.context.i8_type().ptr_type(AddressSpace::Generic); let fields = [Layout::Builtin(Builtin::Int64), layout.clone()]; let main_return_layout = Layout::Struct(&fields); @@ -1136,17 +1158,32 @@ pub fn build_exp_expr<'a, 'ctx, 'env>( list_literal(env, inplace, scope, elem_layout, elems) } FunctionPointer(symbol, layout) => { - let fn_name = layout_ids - .get(*symbol, layout) - .to_symbol_string(*symbol, &env.interns); - let ptr = env - .module - .get_function(fn_name.as_str()) - .unwrap_or_else(|| panic!("Could not get pointer to unknown function {:?}", symbol)) - .as_global_value() - .as_pointer_value(); + match scope.top_level_thunks.get(symbol) { + Some((_layout, function_value)) => { + // this is a 0-argument thunk, evaluate it! + let call = + env.builder + .build_call(*function_value, &[], "evaluate_top_level_thunk"); - BasicValueEnum::PointerValue(ptr) + call.try_as_basic_value().left().unwrap() + } + None => { + // this is a function pointer, store it + let fn_name = layout_ids + .get(*symbol, layout) + .to_symbol_string(*symbol, &env.interns); + let ptr = env + .module + .get_function(fn_name.as_str()) + .unwrap_or_else(|| { + panic!("Could not get pointer to unknown function {:?}", symbol) + }) + .as_global_value() + .as_pointer_value(); + + BasicValueEnum::PointerValue(ptr) + } + } } RuntimeErrorFunction(_) => todo!(), } @@ -1511,16 +1548,6 @@ pub fn build_exp_stmt<'a, 'ctx, 'env>( increment_refcount_layout(env, parent, layout_ids, value, &layout); } - /* - match layout { - Layout::Builtin(Builtin::List(MemoryMode::Refcounted, _)) => { - increment_refcount_list(env, parent, value.into_struct_value()); - build_exp_stmt(env, layout_ids, scope, parent, cont) - } - _ => build_exp_stmt(env, layout_ids, scope, parent, cont), - } - */ - build_exp_stmt(env, layout_ids, scope, parent, cont) } Dec(symbol, cont) => { @@ -1836,6 +1863,7 @@ pub fn build_proc_header<'a, 'ctx, 'env>( pub fn build_proc<'a, 'ctx, 'env>( env: &'a Env<'a, 'ctx, 'env>, layout_ids: &mut LayoutIds<'a>, + mut scope: Scope<'a, 'ctx>, proc: roc_mono::ir::Proc<'a>, fn_val: FunctionValue<'ctx>, ) { @@ -1848,8 +1876,6 @@ pub fn build_proc<'a, 'ctx, 'env>( builder.position_at_end(entry); - let mut scope = Scope::default(); - // Add args to scope for (arg_val, (layout, arg_symbol)) in fn_val.get_param_iter().zip(args) { set_name(arg_val, arg_symbol.ident_string(&env.interns)); @@ -1899,17 +1925,18 @@ fn call_with_args<'a, 'ctx, 'env>( let fn_name = layout_ids .get(symbol, layout) .to_symbol_string(symbol, &env.interns); + let fn_name = fn_name.as_str(); - let fn_val = env - .module - .get_function(fn_name.as_str()) - .unwrap_or_else(|| { - if symbol.is_builtin() { - panic!("Unrecognized builtin function: {:?}", symbol) - } else { - panic!("Unrecognized non-builtin function: {:?}", symbol) - } - }); + let fn_val = env.module.get_function(fn_name).unwrap_or_else(|| { + if symbol.is_builtin() { + panic!("Unrecognized builtin function: {:?}", fn_name) + } else { + panic!( + "Unrecognized non-builtin function: {:?} {:?}", + fn_name, layout + ) + } + }); let call = env.builder.build_call(fn_val, args, "call"); @@ -2619,8 +2646,13 @@ fn build_int_unary_op<'a, 'ctx, 'env>( )) } NumToFloat => { - // TODO specialize this to be not just for i64! - call_bitcode_fn(NumToFloat, env, &[arg.into()], "i64_to_f64_") + // This is an Int, so we need to convert it. + bd.build_cast( + InstructionOpcode::SIToFP, + arg, + env.context.f64_type(), + "i64_to_f64", + ) } _ => { unreachable!("Unrecognized int unary operation: {:?}", op); diff --git a/compiler/gen/tests/gen_list.rs b/compiler/gen/tests/gen_list.rs index ab6e7b9a0a..48348c1a8b 100644 --- a/compiler/gen/tests/gen_list.rs +++ b/compiler/gen/tests/gen_list.rs @@ -1019,8 +1019,7 @@ mod gen_list { assert_evals_to!( indoc!( r#" - main = \shared -> - + wrapper = \shared -> # This should not mutate the original x = when List.get (List.set shared 1 7.7) 1 is @@ -1034,7 +1033,7 @@ mod gen_list { { x, y } - main [ 2.1, 4.3 ] + wrapper [ 2.1, 4.3 ] "# ), (7.7, 4.3), @@ -1047,23 +1046,20 @@ mod gen_list { assert_evals_to!( indoc!( r#" - main = \{} -> - shared = [ 2, 4 ] + shared = [ 2, 4 ] - # This List.set is out of bounds, and should have no effect - x = - when List.get (List.set shared 422 0) 1 is - Ok num -> num - Err _ -> 0 + # This List.set is out of bounds, and should have no effect + x = + when List.get (List.set shared 422 0) 1 is + Ok num -> num + Err _ -> 0 - y = - when List.get shared 1 is - Ok num -> num - Err _ -> 0 + y = + when List.get shared 1 is + Ok num -> num + Err _ -> 0 - { x, y } - - main {} + { x, y } "# ), (4, 4), @@ -1149,16 +1145,21 @@ mod gen_list { assert_evals_to!( indoc!( r#" - swap : Int, Int, List a -> List a - swap = \i, j, list -> - when Pair (List.get list i) (List.get list j) is - Pair (Ok atI) (Ok atJ) -> - list - |> List.set i atJ - |> List.set j atI + app Quicksort provides [ main ] imports [] - _ -> - [] + + swap : Int, Int, List a -> List a + swap = \i, j, list -> + when Pair (List.get list i) (List.get list j) is + Pair (Ok atI) (Ok atJ) -> + list + |> List.set i atJ + |> List.set j atI + + _ -> + [] + + main = swap 0 1 [ 1, 2 ] "# ), diff --git a/compiler/gen/tests/gen_num.rs b/compiler/gen/tests/gen_num.rs index 16129a03f3..795c26d375 100644 --- a/compiler/gen/tests/gen_num.rs +++ b/compiler/gen/tests/gen_num.rs @@ -482,12 +482,12 @@ mod gen_num { assert_evals_to!( indoc!( r#" - main = \{} -> + wrapper = \{} -> when 10 is x if x == 5 -> 0 _ -> 42 - main {} + wrapper {} "# ), 42, @@ -500,12 +500,12 @@ mod gen_num { assert_evals_to!( indoc!( r#" - main = \{} -> + wrapper = \{} -> when 10 is x if x == 10 -> 42 _ -> 0 - main {} + wrapper {} "# ), 42, diff --git a/compiler/gen/tests/gen_primitives.rs b/compiler/gen/tests/gen_primitives.rs index 455bc504c2..0e3b527a21 100644 --- a/compiler/gen/tests/gen_primitives.rs +++ b/compiler/gen/tests/gen_primitives.rs @@ -276,10 +276,10 @@ mod gen_primitives { assert_evals_to!( indoc!( r#" - main = \{} -> + wrapper = \{} -> (\a -> a) 5 - main {} + wrapper {} "# ), 5, @@ -292,14 +292,14 @@ mod gen_primitives { assert_evals_to!( indoc!( r#" - main = \{} -> + wrapper = \{} -> alwaysFloatIdentity : Int -> (Float -> Float) alwaysFloatIdentity = \num -> (\a -> a) (alwaysFloatIdentity 2) 3.14 - main {} + wrapper {} "# ), 3.14, @@ -402,8 +402,9 @@ mod gen_primitives { i64 ); } + #[test] - fn gen_nested_defs() { + fn gen_nested_defs_old() { assert_evals_to!( indoc!( r#" @@ -443,6 +444,28 @@ mod gen_primitives { ); } + #[test] + fn let_x_in_x() { + assert_evals_to!( + indoc!( + r#" + x = 5 + + answer = + 1337 + + unused = + nested = 17 + nested + + answer + "# + ), + 1337, + i64 + ); + } + #[test] fn factorial() { assert_evals_to!( @@ -469,15 +492,15 @@ mod gen_primitives { assert_evals_to!( indoc!( r#" - Peano : [ S Peano, Z ] + Peano : [ S Peano, Z ] - three : Peano - three = S (S (S Z)) + three : Peano + three = S (S (S Z)) - when three is - Z -> 2 - S _ -> 1 - "# + when three is + Z -> 2 + S _ -> 1 + "# ), 1, i64 @@ -489,31 +512,51 @@ mod gen_primitives { assert_evals_to!( indoc!( r#" - Peano : [ S Peano, Z ] + Peano : [ S Peano, Z ] - three : Peano - three = S (S (S Z)) + three : Peano + three = S (S (S Z)) - when three is - S (S _) -> 1 - S (_) -> 0 - Z -> 0 - "# + when three is + S (S _) -> 1 + S (_) -> 0 + Z -> 0 + "# ), 1, i64 ); } + #[test] + fn top_level_constant() { + assert_evals_to!( + indoc!( + r#" + app LinkedListLen0 provides [ main ] imports [] + + pi = 3.1415 + + main = + pi + pi + "# + ), + 3.1415 + 3.1415, + f64 + ); + } + #[test] fn linked_list_len_0() { assert_evals_to!( indoc!( r#" + app LinkedListLen0 provides [ main ] imports [] + LinkedList a : [ Nil, Cons a (LinkedList a) ] nil : LinkedList Int - nil = Nil + nil = Nil length : LinkedList a -> Int length = \list -> @@ -522,13 +565,12 @@ mod gen_primitives { Cons _ rest -> 1 + length rest - length nil + main = + length nil "# ), 0, - i64, - |x| x, - false + i64 ); } @@ -537,10 +579,12 @@ mod gen_primitives { assert_evals_to!( indoc!( r#" + app LinkedListLenTwice0 provides [ main ] imports [] + LinkedList a : [ Nil, Cons a (LinkedList a) ] nil : LinkedList Int - nil = Nil + nil = Nil length : LinkedList a -> Int length = \list -> @@ -548,13 +592,12 @@ mod gen_primitives { Nil -> 0 Cons _ rest -> 1 + length rest - length nil + length nil + main = + length nil + length nil "# ), 0, - i64, - |x| x, - false + i64 ); } @@ -563,10 +606,12 @@ mod gen_primitives { assert_evals_to!( indoc!( r#" + app Test provides [ main ] imports [] + LinkedList a : [ Nil, Cons a (LinkedList a) ] one : LinkedList Int - one = Cons 1 Nil + one = Cons 1 Nil length : LinkedList a -> Int length = \list -> @@ -574,14 +619,12 @@ mod gen_primitives { Nil -> 0 Cons _ rest -> 1 + length rest - - length one + main = + length one "# ), 1, - i64, - |x| x, - false + i64 ); } @@ -590,10 +633,12 @@ mod gen_primitives { assert_evals_to!( indoc!( r#" + app Test provides [ main ] imports [] + LinkedList a : [ Nil, Cons a (LinkedList a) ] one : LinkedList Int - one = Cons 1 Nil + one = Cons 1 Nil length : LinkedList a -> Int length = \list -> @@ -601,14 +646,12 @@ mod gen_primitives { Nil -> 0 Cons _ rest -> 1 + length rest - - length one + length one - "# + main = + length one + length one + "# ), 2, - i64, - |x| x, - false + i64 ); } @@ -617,10 +660,12 @@ mod gen_primitives { assert_evals_to!( indoc!( r#" + app Test provides [ main ] imports [] + LinkedList a : [ Nil, Cons a (LinkedList a) ] three : LinkedList Int - three = Cons 3 (Cons 2 (Cons 1 Nil)) + three = Cons 3 (Cons 2 (Cons 1 Nil)) length : LinkedList a -> Int length = \list -> @@ -629,52 +674,83 @@ mod gen_primitives { Cons _ rest -> 1 + length rest - length three + main = + length three "# ), 3, - i64, - |x| x, - false - ); - } - - #[test] - fn linked_list_sum() { - assert_evals_to!( - indoc!( - r#" - LinkedList a : [ Nil, Cons a (LinkedList a) ] - - three : LinkedList Int - three = Cons 3 (Cons 2 (Cons 1 Nil)) - - sum : LinkedList a -> Int - sum = \list -> - when list is - Nil -> 0 - Cons x rest -> x + sum rest - - sum three - "# - ), - 3 + 2 + 1, i64 ); } #[test] - fn linked_list_map() { - // `f` is not actually a function, so the call to it fails currently + fn linked_list_sum_num_a() { assert_evals_to!( indoc!( r#" + app Test provides [ main ] imports [] + LinkedList a : [ Nil, Cons a (LinkedList a) ] three : LinkedList Int - three = Cons 3 (Cons 2 (Cons 1 Nil)) + three = Cons 3 (Cons 2 (Cons 1 Nil)) - sum : LinkedList a -> Int + + sum : LinkedList (Num a) -> Num a + sum = \list -> + when list is + Nil -> 0 + Cons x rest -> x + sum rest + + main = + sum three + "# + ), + 3 + 2 + 1, + i64 + ) + } + + #[test] + fn linked_list_sum_int() { + assert_evals_to!( + indoc!( + r#" + app Test provides [ main ] imports [] + + LinkedList a : [ Nil, Cons a (LinkedList a) ] + + zero : LinkedList Int + zero = Nil + + sum : LinkedList Int -> Int + sum = \list -> + when list is + Nil -> 0 + Cons x rest -> x + sum rest + + main = + sum zero + "# + ), + 0, + i64 + ) + } + + #[test] + fn linked_list_map() { + assert_evals_to!( + indoc!( + r#" + app Test provides [ main ] imports [] + + LinkedList a : [ Nil, Cons a (LinkedList a) ] + + three : LinkedList Int + three = Cons 3 (Cons 2 (Cons 1 Nil)) + + sum : LinkedList (Num a) -> Num a sum = \list -> when list is Nil -> 0 @@ -686,7 +762,8 @@ mod gen_primitives { Nil -> Nil Cons x rest -> Cons (f x) (map f rest) - sum (map (\_ -> 1) three) + main = + sum (map (\_ -> 1) three) "# ), 3, @@ -699,15 +776,15 @@ mod gen_primitives { assert_evals_to!( indoc!( r#" - Maybe a : [ Nothing, Just a ] + Maybe a : [ Nothing, Just a ] - x : Maybe (Maybe Int) - x = Just (Just 41) + x : Maybe (Maybe Int) + x = Just (Just 41) - when x is - Just (Just v) -> v + 0x1 - _ -> 0x1 - "# + when x is + Just (Just v) -> v + 0x1 + _ -> 0x1 + "# ), 42, i64 @@ -716,16 +793,16 @@ mod gen_primitives { assert_evals_to!( indoc!( r#" - Maybe a : [ Nothing, Just a ] + Maybe a : [ Nothing, Just a ] - x : Maybe (Maybe Int) - x = Just Nothing + x : Maybe (Maybe Int) + x = Just Nothing - when x is - Just (Just v) -> v + 0x1 - Just Nothing -> 0x2 - Nothing -> 0x1 - "# + when x is + Just (Just v) -> v + 0x1 + Just Nothing -> 0x2 + Nothing -> 0x1 + "# ), 2, i64 @@ -734,16 +811,16 @@ mod gen_primitives { assert_evals_to!( indoc!( r#" - Maybe a : [ Nothing, Just a ] + Maybe a : [ Nothing, Just a ] - x : Maybe (Maybe Int) - x = Nothing + x : Maybe (Maybe Int) + x = Nothing - when x is - Just (Just v) -> v + 0x1 - Just Nothing -> 0x2 - Nothing -> 0x1 - "# + when x is + Just (Just v) -> v + 0x1 + Just Nothing -> 0x2 + Nothing -> 0x1 + "# ), 1, i64 @@ -755,16 +832,16 @@ mod gen_primitives { assert_evals_to!( indoc!( r#" - Peano : [ S Peano, Z ] + Peano : [ S Peano, Z ] - three : Peano - three = S (S (S Z)) + three : Peano + three = S (S (S Z)) - when three is - S (S _) -> 1 - S (_) -> 2 - Z -> 3 - "# + when three is + S (S _) -> 1 + S (_) -> 2 + Z -> 3 + "# ), 1, i64 @@ -773,16 +850,16 @@ mod gen_primitives { assert_evals_to!( indoc!( r#" - Peano : [ S Peano, Z ] + Peano : [ S Peano, Z ] - three : Peano - three = S Z + three : Peano + three = S Z - when three is - S (S _) -> 1 - S (_) -> 2 - Z -> 3 - "# + when three is + S (S _) -> 1 + S (_) -> 2 + Z -> 3 + "# ), 2, i64 @@ -791,16 +868,16 @@ mod gen_primitives { assert_evals_to!( indoc!( r#" - Peano : [ S Peano, Z ] + Peano : [ S Peano, Z ] - three : Peano - three = Z + three : Peano + three = Z - when three is - S (S _) -> 1 - S (_) -> 2 - Z -> 3 - "# + when three is + S (S _) -> 1 + S (_) -> 2 + Z -> 3 + "# ), 3, i64 @@ -813,11 +890,11 @@ mod gen_primitives { assert_evals_to!( indoc!( r#" - if True then - x + z - else - y + z - "# + if True then + x + z + else + y + z + "# ), 3, i64 diff --git a/compiler/gen/tests/gen_records.rs b/compiler/gen/tests/gen_records.rs index ffcb65cca3..d9e5dcf9cd 100644 --- a/compiler/gen/tests/gen_records.rs +++ b/compiler/gen/tests/gen_records.rs @@ -410,9 +410,9 @@ mod gen_records { { x: Blue, y ? 3 } -> y { x: Red, y ? 5 } -> y - a = f { x: Blue, y: 7 } + a = f { x: Blue, y: 7 } b = f { x: Blue } - c = f { x: Red, y: 11 } + c = f { x: Red, y: 11 } d = f { x: Red } a * b * c * d @@ -617,7 +617,7 @@ mod gen_records { assert_evals_to!( indoc!( r#" - { a: 3.14, b: 0x1 } + { a: 3.14, b: 0x1 } "# ), (3.14, 0x1), @@ -678,15 +678,58 @@ mod gen_records { } #[test] - fn just_to_be_sure() { + fn accessor() { assert_evals_to!( indoc!( r#" - { a: 1, b : 2, c : 3 } + .foo { foo: 4 } + .foo { bar: 6.28, foo: 3 } "# ), - [1, 2, 3], - [i64; 3] + 7, + i64 + ); + } + + #[test] + fn accessor_single_element_record() { + assert_evals_to!( + indoc!( + r#" + .foo { foo: 4 } + "# + ), + 4, + i64 + ); + } + + #[test] + fn update_record() { + assert_evals_to!( + indoc!( + r#" + rec = { foo: 42, bar: 6.28 } + + { rec & foo: rec.foo + 1 } + "# + ), + (6.28, 43), + (f64, i64) + ); + } + + #[test] + fn update_single_element_record() { + assert_evals_to!( + indoc!( + r#" + rec = { foo: 42} + + { rec & foo: rec.foo + 1 } + "# + ), + 43, + i64 ); } } diff --git a/compiler/gen/tests/gen_tags.rs b/compiler/gen/tests/gen_tags.rs index 67bf2e8830..01242bda23 100644 --- a/compiler/gen/tests/gen_tags.rs +++ b/compiler/gen/tests/gen_tags.rs @@ -455,12 +455,12 @@ mod gen_tags { assert_evals_to!( indoc!( r#" - main = \{} -> + wrapper = \{} -> when 2 is 2 if False -> 0 _ -> 42 - main {} + wrapper {} "# ), 42, @@ -473,12 +473,12 @@ mod gen_tags { assert_evals_to!( indoc!( r#" - main = \{} -> + wrapper = \{} -> when 2 is 2 if True -> 42 _ -> 0 - main {} + wrapper {} "# ), 42, @@ -491,12 +491,12 @@ mod gen_tags { assert_evals_to!( indoc!( r#" - main = \{} -> + wrapper = \{} -> when 2 is _ if False -> 0 _ -> 42 - main {} + wrapper {} "# ), 42, @@ -674,7 +674,7 @@ mod gen_tags { assert_evals_to!( indoc!( r#" - main = \{} -> + wrapper = \{} -> x : [ Red, White, Blue ] x = Blue @@ -686,7 +686,7 @@ mod gen_tags { y - main {} + wrapper {} "# ), 3.1, @@ -699,7 +699,7 @@ mod gen_tags { assert_evals_to!( indoc!( r#" - main = \{} -> + wrapper = \{} -> y = when 1 + 2 is 3 -> 3 @@ -708,7 +708,7 @@ mod gen_tags { y - main {} + wrapper {} "# ), 3, diff --git a/compiler/gen/tests/helpers/eval.rs b/compiler/gen/tests/helpers/eval.rs index a12e284202..4268255912 100644 --- a/compiler/gen/tests/helpers/eval.rs +++ b/compiler/gen/tests/helpers/eval.rs @@ -1,9 +1,22 @@ -use roc_collections::all::MutSet; -use roc_types::subs::Subs; +use roc_collections::all::{MutMap, MutSet}; -pub fn helper_without_uniqueness<'a>( +fn promote_expr_to_module(src: &str) -> String { + let mut buffer = String::from("app Test provides [ main ] imports []\n\nmain =\n"); + + for line in src.lines() { + // indent the body! + buffer.push_str(" "); + buffer.push_str(line); + buffer.push('\n'); + } + + buffer +} + +pub fn helper<'a>( arena: &'a bumpalo::Bump, src: &str, + stdlib: roc_builtins::std::StdLib, leak: bool, context: &'a inkwell::context::Context, ) -> ( @@ -11,26 +24,62 @@ pub fn helper_without_uniqueness<'a>( Vec, inkwell::execution_engine::ExecutionEngine<'a>, ) { - use crate::helpers::{can_expr, infer_expr, CanExprOut}; use inkwell::OptimizationLevel; - use roc_gen::llvm::build::{build_proc, build_proc_header}; - use roc_mono::layout::Layout; + use roc_gen::llvm::build::{build_proc, build_proc_header, Scope}; + use std::path::{Path, PathBuf}; + + let stdlib_mode = stdlib.mode; + let filename = PathBuf::from("Test.roc"); + let src_dir = Path::new("fake/test/path"); + + let module_src; + let temp; + if src.starts_with("app") { + // this is already a module + module_src = src; + } else { + // this is an expression, promote it to a module + temp = promote_expr_to_module(src); + module_src = &temp; + } + + let exposed_types = MutMap::default(); + let loaded = roc_load::file::load_and_monomorphize_from_str( + arena, + filename, + &module_src, + stdlib, + src_dir, + exposed_types, + ); + + let loaded = loaded.expect("failed to load module"); + + use roc_load::file::MonomorphizedModule; + let MonomorphizedModule { + can_problems, + type_problems, + mono_problems, + mut procedures, + interns, + exposed_to_host, + .. + } = loaded; + + debug_assert_eq!(exposed_to_host.len(), 1); + let main_fn_symbol = exposed_to_host.keys().copied().nth(0).unwrap(); + + let (_, main_fn_layout) = procedures + .keys() + .find(|(s, _)| *s == main_fn_symbol) + .unwrap() + .clone(); let target = target_lexicon::Triple::host(); let ptr_bytes = target.pointer_width().unwrap().bytes() as u32; - let CanExprOut { - loc_expr, - var_store, - var, - constraint, - home, - interns, - problems, - .. - } = can_expr(src); // don't panic based on the errors here, so we can test that RuntimeError generates the correct code - let errors = problems + let errors = can_problems .into_iter() .filter(|problem| { use roc_problem::can::Problem::*; @@ -43,15 +92,18 @@ pub fn helper_without_uniqueness<'a>( }) .collect::>(); - let subs = Subs::new(var_store.into()); - let mut unify_problems = Vec::new(); - let (content, mut subs, solve_env) = infer_expr(subs, &mut unify_problems, &constraint, var); - assert_eq!( - unify_problems, + type_problems, Vec::new(), "Encountered type mismatches: {:?}", - unify_problems + type_problems, + ); + + assert_eq!( + mono_problems, + Vec::new(), + "Encountered monomorphization errors: {:?}", + mono_problems, ); let module = roc_gen::llvm::build::module_from_builtins(context, "app"); @@ -66,19 +118,12 @@ pub fn helper_without_uniqueness<'a>( let (module_pass, function_pass) = roc_gen::llvm::build::construct_optimization_passes(module, opt_level); - // Compute main_fn_type before moving subs to Env - let return_layout = Layout::new(&arena, content, &subs).unwrap_or_else(|err| { - panic!( - "Code gen error in NON-OPTIMIZED test: could not convert to layout. Err was {:?}", - err - ) - }); let execution_engine = module .create_jit_execution_engine(OptimizationLevel::None) .expect("Error creating JIT execution engine for test"); // Compile and add all the Procs before adding main - let mut env = roc_gen::llvm::build::Env { + let env = roc_gen::llvm::build::Env { arena: &arena, builder: &builder, context, @@ -86,85 +131,71 @@ pub fn helper_without_uniqueness<'a>( module, ptr_bytes, leak, + // important! we don't want any procedures to get the C calling convention exposed_to_host: MutSet::default(), }; - let mut procs = roc_mono::ir::Procs::default(); - let mut ident_ids = env.interns.all_ident_ids.remove(&home).unwrap(); + let mut layout_ids = roc_gen::layout_id::LayoutIds::default(); - - // Populate Procs and get the low-level Expr from the canonical Expr - let mut mono_problems = Vec::new(); - let mut mono_env = roc_mono::ir::Env { - arena: &arena, - subs: &mut subs, - problems: &mut mono_problems, - home, - ident_ids: &mut ident_ids, - }; - - // infer the size of any closures - roc_mono::closures::infer_closure_size(&loc_expr.value, mono_env.subs, &solve_env); - - let main_body = roc_mono::ir::Stmt::new(&mut mono_env, loc_expr.value, &mut procs); - - let mut headers = { - let num_headers = match &procs.pending_specializations { - Some(map) => map.len(), - None => 0, - }; - - Vec::with_capacity(num_headers) - }; - let mut layout_cache = roc_mono::layout::LayoutCache::default(); - let procs = roc_mono::ir::specialize_all(&mut mono_env, procs, &mut layout_cache); - - assert_eq!( - procs.runtime_errors, - roc_collections::all::MutMap::default() - ); - - let (mut procs, param_map) = procs.get_specialized_procs_help(mono_env.arena); - let main_body = roc_mono::inc_dec::visit_declaration( - mono_env.arena, - param_map, - mono_env.arena.alloc(main_body), - ); - - // Put this module's ident_ids back in the interns, so we can use them in env. - // This must happen *after* building the headers, because otherwise there's - // a conflicting mutable borrow on ident_ids. - env.interns.all_ident_ids.insert(home, ident_ids); + let mut headers = Vec::with_capacity(procedures.len()); // Add all the Proc headers to the module. // We have to do this in a separate pass first, // because their bodies may reference each other. - for ((symbol, layout), proc) in procs.drain() { + let mut scope = Scope::default(); + for ((symbol, layout), proc) in procedures.drain() { let fn_val = build_proc_header(&env, &mut layout_ids, symbol, &layout, &proc); + if proc.args.is_empty() { + // this is a 0-argument thunk, i.e. a top-level constant definition + // it must be in-scope everywhere in the module! + scope.insert_top_level_thunk(symbol, layout, fn_val); + } + headers.push((proc, fn_val)); } // Build each proc using its header info. for (proc, fn_val) in headers { - build_proc(&env, &mut layout_ids, proc, fn_val); + let mut current_scope = scope.clone(); + + // only have top-level thunks for this proc's module in scope + // this retain is not needed for correctness, but will cause less confusion when debugging + let home = proc.name.module_id(); + current_scope.retain_top_level_thunks_for_module(home); + + build_proc(&env, &mut layout_ids, scope.clone(), proc, fn_val); if fn_val.verify(true) { function_pass.run_on(&fn_val); } else { + use roc_builtins::std::Mode; + + let mode = match stdlib_mode { + Mode::Uniqueness => "OPTIMIZED", + Mode::Standard => "NON-OPTIMIZED", + }; + eprintln!( - "\n\nFunction {:?} failed LLVM verification in NON-OPTIMIZED build. Its content was:\n", fn_val.get_name().to_str().unwrap() - ); + "\n\nFunction {:?} failed LLVM verification in {} build. Its content was:\n", + fn_val.get_name().to_str().unwrap(), + mode, + ); fn_val.print_to_stderr(); panic!( - "The preceding code was from {:?}, which failed LLVM verification in NON-OPTIMIZED build.", fn_val.get_name().to_str().unwrap() - ); + "The preceding code was from {:?}, which failed LLVM verification in {} build.", + fn_val.get_name().to_str().unwrap(), + mode, + ); } } - - let (main_fn_name, main_fn) = - roc_gen::llvm::build::make_main_function(&env, &mut layout_ids, &return_layout, &main_body); + let (main_fn_name, main_fn) = roc_gen::llvm::build::promote_to_main_function( + &env, + &mut layout_ids, + main_fn_symbol, + &main_fn_layout, + ); // Uncomment this to see the module's un-optimized LLVM instruction output: // env.module.print_to_stderr(); @@ -188,180 +219,6 @@ pub fn helper_without_uniqueness<'a>( (main_fn_name, errors, execution_engine.clone()) } -pub fn helper_with_uniqueness<'a>( - arena: &'a bumpalo::Bump, - src: &str, - leak: bool, - context: &'a inkwell::context::Context, -) -> (&'static str, inkwell::execution_engine::ExecutionEngine<'a>) { - use crate::helpers::{infer_expr, uniq_expr}; - use inkwell::OptimizationLevel; - use roc_gen::llvm::build::{build_proc, build_proc_header}; - use roc_mono::layout::Layout; - - let target = target_lexicon::Triple::host(); - let ptr_bytes = target.pointer_width().unwrap().bytes() as u32; - let (loc_expr, _output, problems, subs, var, constraint, home, interns) = uniq_expr(src); - - let errors = problems - .into_iter() - .filter(|problem| { - use roc_problem::can::Problem::*; - - // Ignore "unused" problems - match problem { - UnusedDef(_, _) | UnusedArgument(_, _, _) | UnusedImport(_, _) => false, - _ => true, - } - }) - .collect::>(); - - assert_eq!(errors, Vec::new(), "Encountered errors: {:?}", errors); - - let mut unify_problems = Vec::new(); - let (content, mut subs, solve_env) = infer_expr(subs, &mut unify_problems, &constraint, var); - - assert_eq!( - unify_problems, - Vec::new(), - "Encountered one or more type mismatches: {:?}", - unify_problems - ); - - let module = arena.alloc(roc_gen::llvm::build::module_from_builtins(context, "app")); - let builder = context.create_builder(); - let opt_level = if cfg!(debug_assertions) { - roc_gen::llvm::build::OptLevel::Normal - } else { - roc_gen::llvm::build::OptLevel::Optimize - }; - let (mpm, fpm) = roc_gen::llvm::build::construct_optimization_passes(module, opt_level); - - // Compute main_fn_type before moving subs to Env - let return_layout = Layout::new(&arena, content, &subs).unwrap_or_else(|err| { - panic!( - "Code gen error in OPTIMIZED test: could not convert to layout. Err was {:?}", - err - ) - }); - - let execution_engine = module - .create_jit_execution_engine(OptimizationLevel::None) - .expect("Error creating JIT execution engine for test"); - - // Compile and add all the Procs before adding main - let mut env = roc_gen::llvm::build::Env { - arena: &arena, - builder: &builder, - context, - interns, - module, - ptr_bytes, - leak, - exposed_to_host: MutSet::default(), - }; - let mut procs = roc_mono::ir::Procs::default(); - let mut ident_ids = env.interns.all_ident_ids.remove(&home).unwrap(); - let mut layout_ids = roc_gen::layout_id::LayoutIds::default(); - - // Populate Procs and get the low-level Expr from the canonical Expr - let mut mono_problems = Vec::new(); - let mut mono_env = roc_mono::ir::Env { - arena: &arena, - subs: &mut subs, - problems: &mut mono_problems, - home, - ident_ids: &mut ident_ids, - }; - - // infer the size of any closures - roc_mono::closures::infer_closure_size(&loc_expr.value, mono_env.subs, &solve_env); - - let main_body = roc_mono::ir::Stmt::new(&mut mono_env, loc_expr.value, &mut procs); - let mut headers = { - let num_headers = match &procs.pending_specializations { - Some(map) => map.len(), - None => 0, - }; - - Vec::with_capacity(num_headers) - }; - let mut layout_cache = roc_mono::layout::LayoutCache::default(); - let procs = roc_mono::ir::specialize_all(&mut mono_env, procs, &mut layout_cache); - - assert_eq!( - procs.runtime_errors, - roc_collections::all::MutMap::default() - ); - - let (mut procs, param_map) = procs.get_specialized_procs_help(mono_env.arena); - let main_body = roc_mono::inc_dec::visit_declaration( - mono_env.arena, - param_map, - mono_env.arena.alloc(main_body), - ); - - // Put this module's ident_ids back in the interns, so we can use them in env. - // This must happen *after* building the headers, because otherwise there's - // a conflicting mutable borrow on ident_ids. - env.interns.all_ident_ids.insert(home, ident_ids); - - // Add all the Proc headers to the module. - // We have to do this in a separate pass first, - // because their bodies may reference each other. - for ((symbol, layout), proc) in procs.drain() { - let fn_val = build_proc_header(&env, &mut layout_ids, symbol, &layout, &proc); - - headers.push((proc, fn_val)); - } - - // Build each proc using its header info. - for (proc, fn_val) in headers { - build_proc(&env, &mut layout_ids, proc, fn_val); - - if fn_val.verify(true) { - fpm.run_on(&fn_val); - } else { - eprintln!( - "\n\nFunction {:?} failed LLVM verification in OPTIMIZED build. Its content was:\n", - fn_val.get_name().to_str().unwrap() - ); - - fn_val.print_to_stderr(); - - panic!( - "The preceding code was from {:?}, which failed LLVM verification in OPTIMIZED build.", fn_val.get_name().to_str().unwrap() - ); - } - } - - let (main_fn_name, main_fn) = - roc_gen::llvm::build::make_main_function(&env, &mut layout_ids, &return_layout, &main_body); - - // you're in the version with uniqueness! - - // Uncomment this to see the module's un-optimized LLVM instruction output: - // env.module.print_to_stderr(); - - if main_fn.verify(true) { - fpm.run_on(&main_fn); - } else { - panic!("main function {} failed LLVM verification in OPTIMIZED build. Uncomment nearby statements to see more details.", main_fn_name); - } - - mpm.run_on(module); - - // Verify the module - if let Err(errors) = env.module.verify() { - panic!("Errors defining module: {:?}", errors); - } - - // Uncomment this to see the module's optimized LLVM instruction output: - // env.module.print_to_stderr(); - - (main_fn_name, execution_engine) -} - // TODO this is almost all code duplication with assert_llvm_evals_to // the only difference is that this calls uniq_expr instead of can_expr. // Should extract the common logic into test helpers. @@ -376,11 +233,17 @@ macro_rules! assert_opt_evals_to { let context = Context::create(); - let (main_fn_name, execution_engine) = - $crate::helpers::eval::helper_with_uniqueness(&arena, $src, $leak, &context); + let stdlib = roc_builtins::unique::uniq_stdlib(); - let transform = |success| assert_eq!($transform(success), $expected); - run_jit_function!(execution_engine, main_fn_name, $ty, transform) + let (main_fn_name, errors, execution_engine) = + $crate::helpers::eval::helper(&arena, $src, stdlib, $leak, &context); + + let transform = |success| { + let expected = $expected; + let given = $transform(success); + assert_eq!(&given, &expected); + }; + run_jit_function!(execution_engine, main_fn_name, $ty, transform, errors) }; ($src:expr, $expected:expr, $ty:ty, $transform:expr) => { @@ -398,9 +261,10 @@ macro_rules! assert_llvm_evals_to { let arena = Bump::new(); let context = Context::create(); + let stdlib = roc_builtins::std::standard_stdlib(); let (main_fn_name, errors, execution_engine) = - $crate::helpers::eval::helper_without_uniqueness(&arena, $src, $leak, &context); + $crate::helpers::eval::helper(&arena, $src, stdlib, $leak, &context); let transform = |success| { let expected = $expected; @@ -417,29 +281,20 @@ macro_rules! assert_llvm_evals_to { #[macro_export] macro_rules! assert_evals_to { - ($src:expr, $expected:expr, $ty:ty) => { + ($src:expr, $expected:expr, $ty:ty) => {{ + assert_evals_to!($src, $expected, $ty, (|val| val)); + }}; + ($src:expr, $expected:expr, $ty:ty, $transform:expr) => { + // Same as above, except with an additional transformation argument. + { + assert_evals_to!($src, $expected, $ty, $transform, true); + } + }; + ($src:expr, $expected:expr, $ty:ty, $transform:expr, $leak:expr) => { // Run un-optimized tests, and then optimized tests, in separate scopes. // These each rebuild everything from scratch, starting with // parsing the source, so that there's no chance their passing // or failing depends on leftover state from the previous one. - { - assert_llvm_evals_to!($src, $expected, $ty, (|val| val)); - } - { - assert_opt_evals_to!($src, $expected, $ty, (|val| val)); - } - }; - ($src:expr, $expected:expr, $ty:ty, $transform:expr) => { - // Same as above, except with an additional transformation argument. - { - assert_llvm_evals_to!($src, $expected, $ty, $transform); - } - { - assert_opt_evals_to!($src, $expected, $ty, $transform); - } - }; - ($src:expr, $expected:expr, $ty:ty, $transform:expr, $leak:expr) => { - // Same as above, except with an additional transformation argument. { assert_llvm_evals_to!($src, $expected, $ty, $transform, $leak); } diff --git a/compiler/gen/tests/helpers/mod.rs b/compiler/gen/tests/helpers/mod.rs index c9e51d9c55..d896f4aca9 100644 --- a/compiler/gen/tests/helpers/mod.rs +++ b/compiler/gen/tests/helpers/mod.rs @@ -3,32 +3,6 @@ extern crate bumpalo; #[macro_use] pub mod eval; -use self::bumpalo::Bump; -use roc_builtins::unique::uniq_stdlib; -use roc_can::constraint::Constraint; -use roc_can::env::Env; -use roc_can::expected::Expected; -use roc_can::expr::{canonicalize_expr, Expr, Output}; -use roc_can::operator; -use roc_can::scope::Scope; -use roc_collections::all::{ImMap, MutMap, SendMap}; -use roc_constrain::expr::constrain_expr; -use roc_constrain::module::{constrain_imported_values, load_builtin_aliases, Import}; -use roc_module::ident::Ident; -use roc_module::symbol::{IdentIds, Interns, ModuleId, ModuleIds, Symbol}; -use roc_parse::ast::{self, Attempting}; -use roc_parse::blankspace::space0_before; -use roc_parse::parser::{loc, Fail, Parser, State}; -use roc_problem::can::Problem; -use roc_region::all::{Located, Region}; -use roc_solve::solve; -use roc_types::subs::{Content, Subs, VarStore, Variable}; -use roc_types::types::Type; - -pub fn test_home() -> ModuleId { - ModuleIds::default().get_or_insert(&"Test".into()) -} - /// Used in the with_larger_debug_stack() function, for tests that otherwise /// run out of stack space in debug builds (but don't in --release builds) #[allow(dead_code)] @@ -68,249 +42,3 @@ where { run_test() } - -pub fn infer_expr( - subs: Subs, - problems: &mut Vec, - constraint: &Constraint, - expr_var: Variable, -) -> (Content, Subs, solve::Env) { - let env = solve::Env { - aliases: MutMap::default(), - vars_by_symbol: SendMap::default(), - }; - let (solved, env) = solve::run(&env, problems, subs, constraint); - - let content = solved.inner().get_without_compacting(expr_var).content; - - (content, solved.into_inner(), env) -} - -pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result>, Fail> { - let state = State::new(input.trim().as_bytes(), Attempting::Module); - let parser = space0_before(loc(roc_parse::expr::expr(0)), 0); - let answer = parser.parse(&arena, state); - - answer - .map(|(loc_expr, _)| loc_expr) - .map_err(|(fail, _)| fail) -} - -pub fn can_expr(expr_str: &str) -> CanExprOut { - can_expr_with(&Bump::new(), test_home(), expr_str) -} - -pub fn uniq_expr( - expr_str: &str, -) -> ( - Located, - Output, - Vec, - Subs, - Variable, - Constraint, - ModuleId, - Interns, -) { - let declared_idents: &ImMap = &ImMap::default(); - - uniq_expr_with(&Bump::new(), expr_str, declared_idents) -} - -pub fn uniq_expr_with( - arena: &Bump, - expr_str: &str, - declared_idents: &ImMap, -) -> ( - Located, - Output, - Vec, - Subs, - Variable, - Constraint, - ModuleId, - Interns, -) { - let home = test_home(); - let CanExprOut { - loc_expr, - output, - problems, - var_store: mut old_var_store, - var, - interns, - .. - } = can_expr_with(arena, home, expr_str); - - // double check - let mut var_store = VarStore::new(old_var_store.fresh()); - - let expected2 = Expected::NoExpectation(Type::Variable(var)); - let constraint = roc_constrain::uniq::constrain_declaration( - home, - &mut var_store, - Region::zero(), - &loc_expr, - declared_idents, - expected2, - ); - - let stdlib = uniq_stdlib(); - - let types = stdlib.types; - let imports: Vec<_> = types - .into_iter() - .map(|(symbol, (solved_type, region))| Import { - loc_symbol: Located::at(region, symbol), - solved_type, - }) - .collect(); - - // load builtin values - - // TODO what to do with those rigids? - let (_introduced_rigids, constraint) = - constrain_imported_values(imports, constraint, &mut var_store); - - // load builtin types - let mut constraint = load_builtin_aliases(stdlib.aliases, constraint, &mut var_store); - - constraint.instantiate_aliases(&mut var_store); - - let subs2 = Subs::new(var_store.into()); - - ( - loc_expr, output, problems, subs2, var, constraint, home, interns, - ) -} - -pub struct CanExprOut { - pub loc_expr: Located, - pub output: Output, - pub problems: Vec, - pub home: ModuleId, - pub interns: Interns, - pub var_store: VarStore, - pub var: Variable, - pub constraint: Constraint, -} - -pub fn can_expr_with(arena: &Bump, home: ModuleId, expr_str: &str) -> CanExprOut { - let loc_expr = parse_loc_with(&arena, expr_str).unwrap_or_else(|e| { - panic!( - "can_expr_with() got a parse error when attempting to canonicalize:\n\n{:?} {:?}", - expr_str, e - ) - }); - - let mut var_store = VarStore::default(); - let var = var_store.fresh(); - let expected = Expected::NoExpectation(Type::Variable(var)); - let module_ids = ModuleIds::default(); - - // Desugar operators (convert them to Apply calls, taking into account - // operator precedence and associativity rules), before doing other canonicalization. - // - // If we did this *during* canonicalization, then each time we - // visited a BinOp node we'd recursively try to apply this to each of its nested - // operators, and then again on *their* nested operators, ultimately applying the - // rules multiple times unnecessarily. - let loc_expr = operator::desugar_expr(arena, &loc_expr); - - let mut scope = Scope::new(home); - let dep_idents = IdentIds::exposed_builtins(0); - let mut env = Env::new(home, dep_idents, &module_ids, IdentIds::default()); - let (loc_expr, output) = canonicalize_expr( - &mut env, - &mut var_store, - &mut scope, - Region::zero(), - &loc_expr.value, - ); - - // Add builtin defs (e.g. List.get) directly to the canonical Expr, - // since we aren't using modules here. - let mut with_builtins = loc_expr.value; - let builtin_defs = roc_can::builtins::builtin_defs(&mut var_store); - - for (symbol, def) in builtin_defs { - if output.references.lookups.contains(&symbol) || output.references.calls.contains(&symbol) - { - with_builtins = Expr::LetNonRec( - Box::new(def), - Box::new(Located { - region: Region::zero(), - value: with_builtins, - }), - var_store.fresh(), - SendMap::default(), - ); - } - } - - let loc_expr = Located { - region: loc_expr.region, - value: with_builtins, - }; - - let constraint = constrain_expr( - &roc_constrain::expr::Env { - rigids: ImMap::default(), - home, - }, - loc_expr.region, - &loc_expr.value, - expected, - ); - - let types = roc_builtins::std::types(); - - let imports: Vec<_> = types - .into_iter() - .map(|(symbol, (solved_type, region))| Import { - loc_symbol: Located::at(region, symbol), - solved_type, - }) - .collect(); - - // load builtin values - let (_introduced_rigids, constraint) = - constrain_imported_values(imports, constraint, &mut var_store); - - // TODO determine what to do with those rigids - // for var in introduced_rigids { - // output.ftv.insert(var, format!("internal_{:?}", var).into()); - // } - - //load builtin types - let mut constraint = - load_builtin_aliases(roc_builtins::std::aliases(), constraint, &mut var_store); - - constraint.instantiate_aliases(&mut var_store); - - let mut all_ident_ids = MutMap::default(); - - // When pretty printing types, we may need the exposed builtins, - // so include them in the Interns we'll ultimately return. - for (module_id, ident_ids) in IdentIds::exposed_builtins(0) { - all_ident_ids.insert(module_id, ident_ids); - } - - all_ident_ids.insert(home, env.ident_ids); - - let interns = Interns { - module_ids: env.module_ids.clone(), - all_ident_ids, - }; - - CanExprOut { - loc_expr, - output, - problems: env.problems, - home: env.home, - var_store, - interns, - var, - constraint, - } -} diff --git a/compiler/load/Cargo.toml b/compiler/load/Cargo.toml index 5214c94411..2ddd58a367 100644 --- a/compiler/load/Cargo.toml +++ b/compiler/load/Cargo.toml @@ -17,8 +17,10 @@ roc_problem = { path = "../problem" } roc_unify = { path = "../unify" } roc_parse = { path = "../parse" } roc_solve = { path = "../solve" } +roc_mono = { path = "../mono" } bumpalo = { version = "3.2", features = ["collections"] } inlinable_string = "0.1" +parking_lot = { version = "0.11", features = ["deadlock_detection"] } crossbeam = "0.7" num_cpus = "1" diff --git a/compiler/load/src/docs.rs b/compiler/load/src/docs.rs index 6310157ab2..ac1dd3036f 100644 --- a/compiler/load/src/docs.rs +++ b/compiler/load/src/docs.rs @@ -5,6 +5,7 @@ use crossbeam::channel::{bounded, Sender}; use crossbeam::deque::{Injector, Stealer, Worker}; use crossbeam::thread; use inlinable_string::InlinableString; +use parking_lot::Mutex; use roc_builtins::std::{Mode, StdLib}; use roc_can::constraint::Constraint; use roc_can::def::Declaration; @@ -32,7 +33,7 @@ use std::io; use std::iter; use std::path::{Path, PathBuf}; use std::str::from_utf8_unchecked; -use std::sync::{Arc, Mutex}; +use std::sync::Arc; use std::time::{Duration, SystemTime}; /// Filename extension for normal Roc modules @@ -349,8 +350,14 @@ pub fn load( let arena = Bump::new(); // Reserve one CPU for the main thread, and let all the others be eligible - // to spawn workers. - let num_workers = num_cpus::get() - 1; + // to spawn workers. We use .max(2) to enforce that we always + // end up with at least 1 worker - since (.max(2) - 1) will + // always return a number that's at least 1. Using + // .max(2) on the initial number of CPUs instead of + // doing .max(1) on the entire expression guards against + // num_cpus returning 0, while also avoiding wrapping + // unsigned subtraction overflow. + let num_workers = num_cpus::get().max(2) - 1; let mut worker_arenas = bumpalo::collections::Vec::with_capacity_in(num_workers, &arena); @@ -878,8 +885,7 @@ fn finish<'a>( let module_ids = Arc::try_unwrap(state.arc_modules) .unwrap_or_else(|_| panic!("There were still outstanding Arc references to module_ids")) - .into_inner() - .expect("Unwrapping mutex for module_ids"); + .into_inner(); let interns = Interns { module_ids, @@ -1079,10 +1085,8 @@ fn send_header<'a>( let ident_ids = { // Lock just long enough to perform the minimal operations necessary. - let mut module_ids = (*module_ids).lock().expect("Failed to acquire lock for interning module IDs, presumably because a thread panicked."); - let mut ident_ids_by_module = (*ident_ids_by_module).lock().expect( - "Failed to acquire lock for interning ident IDs, presumably because a thread panicked.", - ); + let mut module_ids = (*module_ids).lock(); + let mut ident_ids_by_module = (*ident_ids_by_module).lock(); home = module_ids.get_or_insert(&declared_name.as_inline_str()); @@ -1243,9 +1247,7 @@ impl<'a> BuildTask<'a> { let mut dep_idents: IdentIdsByModule = IdentIds::exposed_builtins(num_deps); { - let ident_ids_by_module = (*ident_ids_by_module).lock().expect( - "Failed to acquire lock for interning ident IDs, presumably because a thread panicked.", - ); + let ident_ids_by_module = (*ident_ids_by_module).lock(); // Populate dep_idents with each of their IdentIds, // which we'll need during canonicalization to translate @@ -1277,9 +1279,11 @@ impl<'a> BuildTask<'a> { waiting_for_solve.insert(module_id, solve_needed); - let module_ids = { - (*module_ids).lock().expect("Failed to acquire lock for obtaining module IDs, presumably because a thread panicked.").clone() - }; + // Clone the module_ids we'll need for canonicalization. + // This should be small, and cloning it should be quick. + // We release the lock as soon as we're done cloning, so we don't have + // to lock the global module_ids while canonicalizing any given module. + let module_ids = { (*module_ids).lock().clone() }; // Now that we have waiting_for_solve populated, continue parsing, // canonicalizing, and constraining the module. diff --git a/compiler/load/src/file.rs b/compiler/load/src/file.rs index 1b999b5e2d..32015ea662 100644 --- a/compiler/load/src/file.rs +++ b/compiler/load/src/file.rs @@ -2,6 +2,7 @@ use bumpalo::Bump; use crossbeam::channel::{bounded, Sender}; use crossbeam::deque::{Injector, Stealer, Worker}; use crossbeam::thread; +use parking_lot::Mutex; use roc_builtins::std::{Mode, StdLib}; use roc_can::constraint::Constraint; use roc_can::def::Declaration; @@ -13,6 +14,10 @@ use roc_constrain::module::{ use roc_constrain::module::{constrain_module, ExposedModuleTypes, SubsByModule}; use roc_module::ident::{Ident, ModuleName}; use roc_module::symbol::{IdentIds, Interns, ModuleId, ModuleIds, Symbol}; +use roc_mono::ir::{ + ExternalSpecializations, MonoProblem, PartialProc, PendingSpecialization, Proc, Procs, +}; +use roc_mono::layout::{Layout, LayoutCache}; use roc_parse::ast::{self, Attempting, ExposesEntry, ImportsEntry}; use roc_parse::module::module_defs; use roc_parse::parser::{self, Fail, Parser}; @@ -22,13 +27,14 @@ use roc_solve::solve; use roc_types::solved_types::Solved; use roc_types::subs::{Subs, VarStore, Variable}; use roc_types::types::Alias; +use std::collections::hash_map::Entry::{Occupied, Vacant}; use std::collections::{HashMap, HashSet}; use std::fs; use std::io; use std::iter; use std::path::{Path, PathBuf}; use std::str::from_utf8_unchecked; -use std::sync::{Arc, Mutex}; +use std::sync::Arc; use std::time::{Duration, SystemTime}; /// Filename extension for normal Roc modules @@ -37,6 +43,329 @@ const ROC_FILE_EXTENSION: &str = "roc"; /// The . in between module names like Foo.Bar.Baz const MODULE_SEPARATOR: char = '.'; +const SHOW_MESSAGE_LOG: bool = false; + +macro_rules! log { + () => (if SHOW_MESSAGE_LOG { println!()} else {}); + ($($arg:tt)*) => (if SHOW_MESSAGE_LOG { println!($($arg)*); } else {}) +} + +/// NOTE the order of definition of the phases is used by the ord instance +/// make sure they are ordered from first to last! +#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, Debug)] +pub enum Phase { + LoadHeader, + ParseAndGenerateConstraints, + SolveTypes, + FindSpecializations, + MakeSpecializations, +} + +/// NOTE keep up to date manually, from ParseAndGenerateConstraints to the highest phase we support +const PHASES: [Phase; 5] = [ + Phase::LoadHeader, + Phase::ParseAndGenerateConstraints, + Phase::SolveTypes, + Phase::FindSpecializations, + Phase::MakeSpecializations, +]; + +#[derive(Default, Debug)] +struct Dependencies { + waiting_for: MutMap<(ModuleId, Phase), MutSet<(ModuleId, Phase)>>, + notifies: MutMap<(ModuleId, Phase), MutSet<(ModuleId, Phase)>>, +} + +impl Dependencies { + /// Add all the dependencies for a module, return (module, phase) pairs that can make progress + pub fn add_module( + &mut self, + module_id: ModuleId, + dependencies: &MutSet, + goal_phase: Phase, + ) -> MutSet<(ModuleId, Phase)> { + use Phase::*; + + for dep in dependencies.iter().copied() { + // to parse and generate constraints, the headers of all dependencies must be loaded! + // otherwise, we don't know whether an imported symbol is actually exposed + self.add_dependency_help( + module_id, + dep, + Phase::ParseAndGenerateConstraints, + Phase::LoadHeader, + ); + + self.add_dependency(module_id, dep, Phase::SolveTypes); + + if goal_phase >= FindSpecializations { + self.add_dependency(module_id, dep, Phase::FindSpecializations); + } + + if goal_phase >= MakeSpecializations { + self.add_dependency(dep, module_id, Phase::MakeSpecializations); + } + } + + // add dependencies for self + // phase i + 1 of a file always depends on phase i being completed + { + let mut i = 0; + while PHASES[i] < goal_phase { + self.add_dependency_help(module_id, module_id, PHASES[i + 1], PHASES[i]); + i += 1; + } + } + + let mut output = MutSet::default(); + + // all the dependencies can be loaded + for dep in dependencies { + output.insert((*dep, LoadHeader)); + } + + output + } + + /// Propagate a notification, return (module, phase) pairs that can make progress + pub fn notify(&mut self, module_id: ModuleId, phase: Phase) -> MutSet<(ModuleId, Phase)> { + let mut output = MutSet::default(); + + let key = (module_id, phase); + if let Some(to_notify) = self.notifies.get(&key) { + for notify_key in to_notify { + let mut is_empty = false; + if let Some(waiting_for_pairs) = self.waiting_for.get_mut(¬ify_key) { + waiting_for_pairs.remove(&key); + is_empty = waiting_for_pairs.is_empty(); + } + + if is_empty { + self.waiting_for.remove(notify_key); + output.insert(*notify_key); + } + } + } + + self.notifies.remove(&key); + + output + } + + /// A waits for B, and B will notify A when it completes the phase + fn add_dependency(&mut self, a: ModuleId, b: ModuleId, phase: Phase) { + self.add_dependency_help(a, b, phase, phase); + } + + fn add_dependency_help(&mut self, a: ModuleId, b: ModuleId, phase_a: Phase, phase_b: Phase) { + let key = (a, phase_a); + let value = (b, phase_b); + match self.waiting_for.get_mut(&key) { + Some(existing) => { + existing.insert(value); + } + None => { + let mut set = MutSet::default(); + set.insert(value); + self.waiting_for.insert(key, set); + } + } + + let key = (b, phase_b); + let value = (a, phase_a); + match self.notifies.get_mut(&key) { + Some(existing) => { + existing.insert(value); + } + None => { + let mut set = MutSet::default(); + set.insert(value); + self.notifies.insert(key, set); + } + } + } + + fn solved_all(&self) -> bool { + debug_assert_eq!(self.notifies.is_empty(), self.waiting_for.is_empty()); + + self.notifies.is_empty() + } +} + +/// Struct storing various intermediate stages by their ModuleId +#[derive(Debug, Default)] +struct ModuleCache<'a> { + module_names: MutMap, + headers: MutMap>, + constrained: MutMap>, + typechecked: MutMap>, + found_specializations: MutMap>, + external_specializations_requested: MutMap, +} + +fn start_phase<'a>(module_id: ModuleId, phase: Phase, state: &mut State<'a>) -> BuildTask<'a> { + // we blindly assume all dependencies are met + match phase { + Phase::LoadHeader => { + let dep_name = state + .module_cache + .module_names + .remove(&module_id) + .expect("module id is present"); + + BuildTask::LoadModule { + module_name: dep_name, + // Provide mutexes of ModuleIds and IdentIds by module, + // so other modules can populate them as they load. + module_ids: Arc::clone(&state.arc_modules), + ident_ids_by_module: Arc::clone(&state.ident_ids_by_module), + } + } + + Phase::ParseAndGenerateConstraints => { + let header = state.module_cache.headers.remove(&module_id).unwrap(); + let module_id = header.module_id; + let deps_by_name = &header.deps_by_name; + let num_deps = deps_by_name.len(); + let mut dep_idents: IdentIdsByModule = IdentIds::exposed_builtins(num_deps); + + let State { + ident_ids_by_module, + .. + } = &state; + + { + let ident_ids_by_module = (*ident_ids_by_module).lock(); + + // Populate dep_idents with each of their IdentIds, + // which we'll need during canonicalization to translate + // identifier strings into IdentIds, which we need to build Symbols. + // We only include the modules we care about (the ones we import). + // + // At the end of this loop, dep_idents contains all the information to + // resolve a symbol from another module: if it's in here, that means + // we have both imported the module and the ident was exported by that mdoule. + for dep_id in header.deps_by_name.values() { + // We already verified that these are all present, + // so unwrapping should always succeed here. + let idents = ident_ids_by_module.get(&dep_id).unwrap(); + + dep_idents.insert(*dep_id, idents.clone()); + } + } + + // Clone the module_ids we'll need for canonicalization. + // This should be small, and cloning it should be quick. + // We release the lock as soon as we're done cloning, so we don't have + // to lock the global module_ids while canonicalizing any given module. + let module_ids = Arc::clone(&state.arc_modules); + let module_ids = { (*module_ids).lock().clone() }; + + debug_assert!(header + .imported_modules + .iter() + .all(|id| module_ids.get_name(*id).is_some())); + + let exposed_symbols = state + .exposed_symbols_by_module + .remove(&module_id) + .expect("Could not find listener ID in exposed_symbols_by_module"); + + BuildTask::ParseAndConstrain { + header, + mode: state.stdlib.mode, + module_ids, + dep_idents, + exposed_symbols, + } + } + Phase::SolveTypes => { + let constrained = state.module_cache.constrained.remove(&module_id).unwrap(); + + let ConstrainedModule { + module, + ident_ids, + module_timing, + src, + constraint, + var_store, + imported_modules, + declarations, + .. + } = constrained; + + BuildTask::solve_module( + module, + ident_ids, + module_timing, + src, + constraint, + var_store, + imported_modules, + &mut state.exposed_types, + &state.stdlib, + declarations, + ) + } + Phase::FindSpecializations => { + let typechecked = state.module_cache.typechecked.remove(&module_id).unwrap(); + + let TypeCheckedModule { + layout_cache, + module_id, + module_timing, + solved_subs, + decls, + finished_info, + ident_ids, + } = typechecked; + + BuildTask::BuildPendingSpecializations { + layout_cache, + module_id, + module_timing, + solved_subs, + decls, + finished_info, + ident_ids, + exposed_to_host: state.exposed_to_host.clone(), + } + } + Phase::MakeSpecializations => { + let found_specializations = state + .module_cache + .found_specializations + .remove(&module_id) + .unwrap(); + + let specializations_we_must_make = state + .module_cache + .external_specializations_requested + .remove(&module_id) + .unwrap_or_default(); + + let FoundSpecializationsModule { + module_id, + ident_ids, + subs, + procs, + layout_cache, + finished_info, + } = found_specializations; + + BuildTask::MakeSpecializations { + module_id, + ident_ids, + subs, + procs, + layout_cache, + specializations_we_must_make, + finished_info, + } + } + } +} + #[derive(Debug)] pub struct LoadedModule { pub module_id: ModuleId, @@ -68,6 +397,53 @@ struct ModuleHeader<'a> { module_timing: ModuleTiming, } +#[derive(Debug)] +struct ConstrainedModule<'a> { + module: Module, + declarations: Vec, + imported_modules: MutSet, + src: &'a str, + constraint: Constraint, + ident_ids: IdentIds, + var_store: VarStore, + module_timing: ModuleTiming, +} + +#[derive(Debug)] +pub struct TypeCheckedModule<'a> { + pub module_id: ModuleId, + pub layout_cache: LayoutCache<'a>, + pub module_timing: ModuleTiming, + pub solved_subs: Solved, + pub decls: Vec, + pub ident_ids: IdentIds, + pub finished_info: FinishedInfo<'a>, +} + +#[derive(Debug)] +pub struct FoundSpecializationsModule<'a> { + pub module_id: ModuleId, + pub ident_ids: IdentIds, + pub layout_cache: LayoutCache<'a>, + pub procs: Procs<'a>, + pub subs: Subs, + pub finished_info: FinishedInfo<'a>, +} + +#[derive(Debug)] +pub struct MonomorphizedModule<'a> { + pub module_id: ModuleId, + pub interns: Interns, + pub subs: Subs, + pub can_problems: Vec, + pub type_problems: Vec, + pub mono_problems: Vec, + pub procedures: MutMap<(Symbol, Layout<'a>), Proc<'a>>, + pub exposed_to_host: MutMap, + pub src: Box, + pub timings: MutMap, +} + #[derive(Debug)] enum Msg<'a> { Header(ModuleHeader<'a>), @@ -82,29 +458,71 @@ enum Msg<'a> { var_store: VarStore, module_timing: ModuleTiming, }, - Solved { + SolvedTypes { src: &'a str, module_id: ModuleId, + ident_ids: IdentIds, solved_module: SolvedModule, solved_subs: Solved, + decls: Vec, module_timing: ModuleTiming, }, - Finished { + FinishedAllTypeChecking { solved_subs: Solved, - problems: Vec, exposed_vars_by_symbol: Vec<(Symbol, Variable)>, src: &'a str, }, + FoundSpecializations { + module_id: ModuleId, + ident_ids: IdentIds, + layout_cache: LayoutCache<'a>, + procs: Procs<'a>, + problems: Vec, + solved_subs: Solved, + finished_info: FinishedInfo<'a>, + }, + MadeSpecializations { + module_id: ModuleId, + ident_ids: IdentIds, + layout_cache: LayoutCache<'a>, + external_specializations_requested: MutMap, + procedures: MutMap<(Symbol, Layout<'a>), Proc<'a>>, + problems: Vec, + subs: Subs, + finished_info: FinishedInfo<'a>, + }, + + /// The task is to only typecheck AND monomorphize modules + /// all modules are now monomorphized, we are done + FinishedAllSpecialization { + subs: Subs, + exposed_to_host: MutMap, + src: &'a str, + }, +} + +#[derive(Debug)] +pub struct FinishedInfo<'a> { + exposed_vars_by_symbol: Vec<(Symbol, Variable)>, + src: &'a str, } #[derive(Debug)] struct State<'a> { pub root_id: ModuleId, + pub goal_phase: Phase, + pub stdlib: StdLib, pub exposed_types: SubsByModule, - pub can_problems: Vec, + pub can_problems: std::vec::Vec, + pub mono_problems: std::vec::Vec, pub headers_parsed: MutSet, - pub type_problems: Vec, + pub type_problems: std::vec::Vec, + + pub module_cache: ModuleCache<'a>, + pub dependencies: Dependencies, + pub procedures: MutMap<(Symbol, Layout<'a>), Proc<'a>>, + pub exposed_to_host: MutMap, /// This is the "final" list of IdentIds, after canonicalization and constraint gen /// have completed for a given module. @@ -123,27 +541,29 @@ struct State<'a> { pub exposed_symbols_by_module: MutMap>, - /// Modules which are waiting for certain headers to be parsed - pub waiting_for_headers: MutMap>, - - // When the key ModuleId gets solved, iterate through each of the given modules - // a,d remove that ModuleId from the appropriate waiting_for_headers entry. - // If the relevant module's waiting_for_headers entry is now empty, canonicalize the module. - pub header_listeners: MutMap>, - - pub unparsed_modules: MutMap>, - - // Modules which are waiting for certain deps to be solved - pub waiting_for_solve: MutMap>, - - // When the key ModuleId gets solved, iterate through each of the given modules - // and remove that ModuleId from the appropriate waiting_for_solve entry. - // If the relevant module's waiting_for_solve entry is now empty, solve the module. - pub solve_listeners: MutMap>, - pub unsolved_modules: MutMap>, + /// These are the modules which need to add their pending specializations to + /// the queue. Adding specializations to the queue can be done completely in + /// parallel, and order doesn't matter, so as soon as a module has been solved, + /// it gets an entry in here, and then immediately begins working on its + /// pending specializations in the same thread. + pub needs_specialization: MutSet, + + pub all_pending_specializations: MutMap, PendingSpecialization>>, + + pub specializations_in_flight: u32, + pub timings: MutMap, + + // Each thread gets its own layout cache. When one "pending specializations" + // pass completes, it returns its layout cache so another thread can use it. + // We don't bother trying to union them all together to maximize cache hits, + // since the unioning process could potentially take longer than the savings. + // (Granted, this has not been attempted or measured!) + pub layout_caches: std::vec::Vec>, + + pub procs: Procs<'a>, } #[derive(Debug)] @@ -151,9 +571,11 @@ struct UnsolvedModule<'a> { module: Module, src: &'a str, imported_modules: MutSet, + ident_ids: IdentIds, constraint: Constraint, var_store: VarStore, module_timing: ModuleTiming, + declarations: Vec, } #[derive(Debug)] @@ -209,6 +631,7 @@ impl ModuleTiming { } #[derive(Debug)] +#[allow(dead_code)] enum BuildTask<'a> { LoadModule { module_name: ModuleName, @@ -222,16 +645,36 @@ enum BuildTask<'a> { dep_idents: IdentIdsByModule, exposed_symbols: MutSet, }, - Solve { module: Module, + ident_ids: IdentIds, imported_symbols: Vec, imported_aliases: MutMap, module_timing: ModuleTiming, constraint: Constraint, var_store: VarStore, + declarations: Vec, src: &'a str, }, + BuildPendingSpecializations { + module_timing: ModuleTiming, + layout_cache: LayoutCache<'a>, + solved_subs: Solved, + module_id: ModuleId, + ident_ids: IdentIds, + decls: Vec, + finished_info: FinishedInfo<'a>, + exposed_to_host: MutMap, + }, + MakeSpecializations { + module_id: ModuleId, + ident_ids: IdentIds, + subs: Subs, + procs: Procs<'a>, + layout_cache: LayoutCache<'a>, + finished_info: FinishedInfo<'a>, + specializations_we_must_make: ExternalSpecializations, + }, } enum WorkerMsg { @@ -254,6 +697,13 @@ pub enum LoadingProblem { TriedToImportAppModule, } +pub enum Phases { + /// Parse, canonicalize, check types + TypeCheck, + /// Parse, canonicalize, check types, monomorphize + Monomorphize, +} + type IdentIdsByModule = MutMap; type MsgSender<'a> = Sender>; @@ -274,6 +724,150 @@ fn enqueue_task<'a>( Ok(()) } +pub fn load_and_typecheck( + arena: &Bump, + filename: PathBuf, + stdlib: StdLib, + src_dir: &Path, + exposed_types: SubsByModule, +) -> Result { + use LoadResult::*; + + let load_start = LoadStart::from_path(arena, filename)?; + + match load( + arena, + load_start, + stdlib, + src_dir, + exposed_types, + Phase::SolveTypes, + )? { + Monomorphized(_) => unreachable!(""), + TypeChecked(module) => Ok(module), + } +} + +pub fn load_and_monomorphize<'a>( + arena: &'a Bump, + filename: PathBuf, + stdlib: StdLib, + src_dir: &Path, + exposed_types: SubsByModule, +) -> Result, LoadingProblem> { + use LoadResult::*; + + let load_start = LoadStart::from_path(arena, filename)?; + + match load( + arena, + load_start, + stdlib, + src_dir, + exposed_types, + Phase::MakeSpecializations, + )? { + Monomorphized(module) => Ok(module), + TypeChecked(_) => unreachable!(""), + } +} + +pub fn load_and_monomorphize_from_str<'a>( + arena: &'a Bump, + filename: PathBuf, + src: &'a str, + stdlib: StdLib, + src_dir: &Path, + exposed_types: SubsByModule, +) -> Result, LoadingProblem> { + use LoadResult::*; + + let load_start = LoadStart::from_str(arena, filename, src)?; + + match load( + arena, + load_start, + stdlib, + src_dir, + exposed_types, + Phase::MakeSpecializations, + )? { + Monomorphized(module) => Ok(module), + TypeChecked(_) => unreachable!(""), + } +} + +struct LoadStart<'a> { + pub arc_modules: Arc>, + pub ident_ids_by_module: Arc>, + pub root_id: ModuleId, + pub root_msg: Msg<'a>, +} + +impl<'a> LoadStart<'a> { + pub fn from_path(arena: &'a Bump, filename: PathBuf) -> Result { + let arc_modules = Arc::new(Mutex::new(ModuleIds::default())); + let root_exposed_ident_ids = IdentIds::exposed_builtins(0); + let ident_ids_by_module = Arc::new(Mutex::new(root_exposed_ident_ids)); + + // Load the root module synchronously; we can't proceed until we have its id. + let (root_id, root_msg) = { + let root_start_time = SystemTime::now(); + + load_filename( + arena, + filename, + Arc::clone(&arc_modules), + Arc::clone(&ident_ids_by_module), + root_start_time, + )? + }; + + Ok(LoadStart { + arc_modules, + ident_ids_by_module, + root_id, + root_msg, + }) + } + + pub fn from_str( + arena: &'a Bump, + filename: PathBuf, + src: &'a str, + ) -> Result { + let arc_modules = Arc::new(Mutex::new(ModuleIds::default())); + let root_exposed_ident_ids = IdentIds::exposed_builtins(0); + let ident_ids_by_module = Arc::new(Mutex::new(root_exposed_ident_ids)); + + // Load the root module synchronously; we can't proceed until we have its id. + let (root_id, root_msg) = { + let root_start_time = SystemTime::now(); + + load_from_str( + arena, + filename, + src, + Arc::clone(&arc_modules), + Arc::clone(&ident_ids_by_module), + root_start_time, + )? + }; + + Ok(LoadStart { + arc_modules, + ident_ids_by_module, + root_id, + root_msg, + }) + } +} + +enum LoadResult<'a> { + TypeChecked(LoadedModule), + Monomorphized(MonomorphizedModule<'a>), +} + /// The loading process works like this, starting from the given filename (e.g. "main.roc"): /// /// 1. Open the file. @@ -317,47 +911,48 @@ fn enqueue_task<'a>( /// and then linking them together, and possibly caching them by the hash of their /// specializations, so if none of their specializations changed, we don't even need /// to rebuild the module and can link in the cached one directly.) -// #[allow(clippy::cognitive_complexity)] -pub fn load( - filename: PathBuf, - stdlib: &StdLib, +fn load<'a>( + arena: &'a Bump, + //filename: PathBuf, + load_start: LoadStart<'a>, + stdlib: StdLib, src_dir: &Path, exposed_types: SubsByModule, -) -> Result { - let arena = Bump::new(); + goal_phase: Phase, +) -> Result, LoadingProblem> +where +{ + let LoadStart { + arc_modules, + ident_ids_by_module, + root_id, + root_msg, + } = load_start; + + let (msg_tx, msg_rx) = bounded(1024); + msg_tx + .send(root_msg) + .map_err(|_| LoadingProblem::MsgChannelDied)?; // Reserve one CPU for the main thread, and let all the others be eligible - // to spawn workers. - let num_workers = num_cpus::get() - 1; + // to spawn workers. We use .max(2) to enforce that we always + // end up with at least 1 worker - since (.max(2) - 1) will + // always return a number that's at least 1. Using + // .max(2) on the initial number of CPUs instead of + // doing .max(1) on the entire expression guards against + // num_cpus returning 0, while also avoiding wrapping + // unsigned subtraction overflow. + let num_workers = num_cpus::get().max(2) - 1; - let mut worker_arenas = bumpalo::collections::Vec::with_capacity_in(num_workers, &arena); + let worker_arenas = arena.alloc(bumpalo::collections::Vec::with_capacity_in( + num_workers, + arena, + )); for _ in 0..num_workers { worker_arenas.push(Bump::new()); } - let (msg_tx, msg_rx) = bounded(1024); - let arc_modules = Arc::new(Mutex::new(ModuleIds::default())); - let root_exposed_ident_ids = IdentIds::exposed_builtins(0); - let ident_ids_by_module = Arc::new(Mutex::new(root_exposed_ident_ids)); - - // Load the root module synchronously; we can't proceed until we have its id. - let (root_id, root_msg) = { - let root_start_time = SystemTime::now(); - - load_filename( - &arena, - filename, - Arc::clone(&arc_modules), - Arc::clone(&ident_ids_by_module), - root_start_time, - )? - }; - - msg_tx - .send(root_msg) - .map_err(|_| LoadingProblem::MsgChannelDied)?; - // We'll add tasks to this, and then worker threads will take tasks from it. let injector = Injector::new(); @@ -365,188 +960,223 @@ pub fn load( // into the worker threads, because those workers' stealers need to be // shared bet,een all threads, and this coordination work is much easier // on the main thread. - let mut worker_queues = bumpalo::collections::Vec::with_capacity_in(num_workers, &arena); - let mut stealers = bumpalo::collections::Vec::with_capacity_in(num_workers, &arena); + let mut worker_queues = bumpalo::collections::Vec::with_capacity_in(num_workers, arena); + let mut stealers = bumpalo::collections::Vec::with_capacity_in(num_workers, arena); - thread::scope(|thread_scope| { - for _ in 0..num_workers { - let worker = Worker::new_lifo(); + let it = worker_arenas.iter_mut(); - stealers.push(worker.stealer()); - worker_queues.push(worker); - } + { + thread::scope(|thread_scope| { + for _ in 0..num_workers { + let worker = Worker::new_lifo(); - // Get a reference to the completed stealers, so we can send that - // reference to each worker. (Slices are Sync, but bumpalo Vecs are not.) - let stealers = stealers.into_bump_slice(); + stealers.push(worker.stealer()); + worker_queues.push(worker); + } - let mut headers_parsed = MutSet::default(); + // Get a reference to the completed stealers, so we can send that + // reference to each worker. (Slices are Sync, but bumpalo Vecs are not.) + let stealers = stealers.into_bump_slice(); - // We've already parsed the root's header. (But only its header, so far.) - headers_parsed.insert(root_id); + let mut headers_parsed = MutSet::default(); - let mut loading_started = MutSet::default(); + // We've already parsed the root's header. (But only its header, so far.) + headers_parsed.insert(root_id); - // If the root module we're still processing happens to be an interface, - // it's possible that something else will import it. That will - // necessarily cause a cyclic import error, but in the meantime - // we still shouldn't load it. - loading_started.insert(root_id); + let mut loading_started = MutSet::default(); - let mut state = State { - root_id, - exposed_types, - headers_parsed, - loading_started, - can_problems: Vec::new(), - type_problems: Vec::new(), - arc_modules, - constrained_ident_ids: IdentIds::exposed_builtins(0), - ident_ids_by_module, - declarations_by_id: MutMap::default(), - exposed_symbols_by_module: MutMap::default(), - waiting_for_headers: MutMap::default(), - header_listeners: MutMap::default(), - unparsed_modules: MutMap::default(), - waiting_for_solve: MutMap::default(), - solve_listeners: MutMap::default(), - unsolved_modules: MutMap::default(), - timings: MutMap::default(), - }; + // If the root module we're still processing happens to be an interface, + // it's possible that something else will import it. That will + // necessarily cause a cyclic import error, but in the meantime + // we still shouldn't load it. + loading_started.insert(root_id); - let mut worker_listeners = bumpalo::collections::Vec::with_capacity_in(num_workers, &arena); + let mut worker_listeners = + bumpalo::collections::Vec::with_capacity_in(num_workers, arena); - for worker_arena in worker_arenas.iter_mut() { - let msg_tx = msg_tx.clone(); - let worker = worker_queues.pop().unwrap(); - let (worker_msg_tx, worker_msg_rx) = bounded(1024); + let stdlib_mode = stdlib.mode; - worker_listeners.push(worker_msg_tx); + for worker_arena in it { + let msg_tx = msg_tx.clone(); + let worker = worker_queues.pop().unwrap(); + let (worker_msg_tx, worker_msg_rx) = bounded(1024); - // We only want to move a *reference* to the main task queue's - // injector in the thread, not the injector itself - // (since other threads need to reference it too). - let injector = &injector; + worker_listeners.push(worker_msg_tx); - // Record this thread's handle so the main thread can join it later. - thread_scope.spawn(move |_| { - // Keep listening until we receive a Shutdown msg - for msg in worker_msg_rx.iter() { - match msg { - WorkerMsg::Shutdown => { - // We've finished all our work. It's time to - // shut down the thread, so when the main thread - // blocks on joining with all the worker threads, - // it can finally exit too! - return; - } - WorkerMsg::TaskAdded => { - // Find a task - either from this thread's queue, - // or from the main queue, or from another worker's - // queue - and run it. - // - // There might be no tasks to work on! That could - // happen if another thread is working on a task - // which will later result in more tasks being - // added. In that case, do nothing, and keep waiting - // until we receive a Shutdown message. - if let Some(task) = find_task(&worker, injector, stealers) { - run_task(task, worker_arena, src_dir, msg_tx.clone(), stdlib) + // We only want to move a *reference* to the main task queue's + // injector in the thread, not the injector itself + // (since other threads need to reference it too). + let injector = &injector; + + // Record this thread's handle so the main thread can join it later. + thread_scope.spawn(move |_| { + // Keep listening until we receive a Shutdown msg + for msg in worker_msg_rx.iter() { + match msg { + WorkerMsg::Shutdown => { + // We've finished all our work. It's time to + // shut down the thread, so when the main thread + // blocks on joining with all the worker threads, + // it can finally exit too! + return; + } + WorkerMsg::TaskAdded => { + // Find a task - either from this thread's queue, + // or from the main queue, or from another worker's + // queue - and run it. + // + // There might be no tasks to work on! That could + // happen if another thread is working on a task + // which will later result in more tasks being + // added. In that case, do nothing, and keep waiting + // until we receive a Shutdown message. + if let Some(task) = find_task(&worker, injector, stealers) { + run_task( + task, + worker_arena, + src_dir, + msg_tx.clone(), + stdlib_mode, + ) .expect("Msg channel closed unexpectedly."); + } } } } - } - // Needed to prevent a borrow checker error about this closure - // outliving its enclosing function. - drop(worker_msg_rx); - }); - } + // Needed to prevent a borrow checker error about this closure + // outliving its enclosing function. + drop(worker_msg_rx); + }); + } - // We've now distributed one worker queue to each thread. - // There should be no queues left to distribute! - debug_assert!(worker_queues.is_empty()); - drop(worker_queues); + let mut state = State { + root_id, + goal_phase, + stdlib, + module_cache: ModuleCache::default(), + dependencies: Dependencies::default(), + procedures: MutMap::default(), + exposed_to_host: MutMap::default(), + exposed_types, + headers_parsed, + loading_started, + can_problems: std::vec::Vec::new(), + type_problems: std::vec::Vec::new(), + mono_problems: std::vec::Vec::new(), + arc_modules, + constrained_ident_ids: IdentIds::exposed_builtins(0), + ident_ids_by_module, + declarations_by_id: MutMap::default(), + exposed_symbols_by_module: MutMap::default(), + unsolved_modules: MutMap::default(), + timings: MutMap::default(), + needs_specialization: MutSet::default(), + all_pending_specializations: MutMap::default(), + specializations_in_flight: 0, + layout_caches: std::vec::Vec::with_capacity(num_cpus::get()), + procs: Procs::default(), + }; - // Grab a reference to these Senders outside the loop, so we can share - // it across each iteration of the loop. - let worker_listeners = worker_listeners.into_bump_slice(); - let msg_tx = msg_tx.clone(); + // We've now distributed one worker queue to each thread. + // There should be no queues left to distribute! + debug_assert!(worker_queues.is_empty()); + drop(worker_queues); - // The root module will have already queued up messages to process, - // and processing those messages will in turn queue up more messages. - for msg in msg_rx.iter() { - match msg { - Msg::Finished { - solved_subs, - problems, - exposed_vars_by_symbol, - src, - } => { - // We're done! There should be no more messages pending. - debug_assert!(msg_rx.is_empty()); + // Grab a reference to these Senders outside the loop, so we can share + // it across each iteration of the loop. + let worker_listeners = worker_listeners.into_bump_slice(); + let msg_tx = msg_tx.clone(); - // Shut down all the worker threads. - for listener in worker_listeners { - listener - .send(WorkerMsg::Shutdown) - .map_err(|_| LoadingProblem::MsgChannelDied)?; - } - - return Ok(finish( - state, + // The root module will have already queued up messages to process, + // and processing those messages will in turn queue up more messages. + for msg in msg_rx.iter() { + match msg { + Msg::FinishedAllTypeChecking { solved_subs, - problems, exposed_vars_by_symbol, src, - )); - } - msg => { - // This is where most of the main thread's work gets done. - // Everything up to this point has been setting up the threading - // system which lets this logic work efficiently. - state = update( - state, - msg, - stdlib, - msg_tx.clone(), - &injector, - worker_listeners, - )?; + } => { + // We're done! There should be no more messages pending. + debug_assert!(msg_rx.is_empty()); + + // Shut down all the worker threads. + for listener in worker_listeners { + listener + .send(WorkerMsg::Shutdown) + .map_err(|_| LoadingProblem::MsgChannelDied)?; + } + + return Ok(LoadResult::TypeChecked(finish( + state, + solved_subs, + exposed_vars_by_symbol, + src, + ))); + } + Msg::FinishedAllSpecialization { + subs, + exposed_to_host, + src, + } => { + // We're done! There should be no more messages pending. + debug_assert!(msg_rx.is_empty()); + + // Shut down all the worker threads. + for listener in worker_listeners { + listener + .send(WorkerMsg::Shutdown) + .map_err(|_| LoadingProblem::MsgChannelDied)?; + } + + return Ok(LoadResult::Monomorphized(finish_specialization( + state, + subs, + exposed_to_host, + src, + ))); + } + msg => { + // This is where most of the main thread's work gets done. + // Everything up to this point has been setting up the threading + // system which lets this logic work efficiently. + state = update( + state, + msg, + msg_tx.clone(), + &injector, + worker_listeners, + arena, + )?; + } } } - } - // The msg_rx receiver closed unexpectedly before we finished solving everything - Err(LoadingProblem::MsgChannelDied) - }) + // The msg_rx receiver closed unexpectedly before we finished solving everything + Err(LoadingProblem::MsgChannelDied) + }) + } .unwrap() } fn update<'a>( mut state: State<'a>, msg: Msg<'a>, - stdlib: &StdLib, msg_tx: MsgSender<'a>, injector: &Injector>, worker_listeners: &'a [Sender], + arena: &'a Bump, ) -> Result, LoadingProblem> { use self::Msg::*; match msg { Header(header) => { + log!("loaded header for {:?}", header.module_id); let home = header.module_id; - let deps_by_name = &header.deps_by_name; - let mut headers_needed = - HashSet::with_capacity_and_hasher(deps_by_name.len(), default_hasher()); - state.headers_parsed.insert(home); - - for dep_id in deps_by_name.values() { - if !state.headers_parsed.contains(&dep_id) { - headers_needed.insert(*dep_id); - } + // store an ID to name mapping, so we know the file to read when fetching dependencies' headers + for (name, id) in header.deps_by_name.iter() { + state.module_cache.module_names.insert(*id, name.clone()); } // This was a dependency. Write it down and keep processing messaages. @@ -563,106 +1193,26 @@ fn update<'a>( .exposed_symbols_by_module .insert(home, exposed_symbols); - // Notify all the listeners that headers are now available for this module. - if let Some(listeners) = state.header_listeners.remove(&home) { - for listener_id in listeners { - // This listener is longer waiting for this module, - // because this module's headers are now available! - let waiting_for = state - .waiting_for_headers - .get_mut(&listener_id) - .expect("Unable to find module ID in waiting_for_headers"); + let work = state.dependencies.add_module( + header.module_id, + &header.imported_modules, + state.goal_phase, + ); - waiting_for.remove(&home); + state.module_cache.headers.insert(header.module_id, header); - // If it's no longer waiting for anything else, solve it. - if waiting_for.is_empty() { - let header = state - .unparsed_modules - .remove(&listener_id) - .expect("Could not find listener ID in unparsed_modules"); + for (module_id, phase) in work { + let task = start_phase(module_id, phase, &mut state); - let exposed_symbols = state - .exposed_symbols_by_module - .remove(&listener_id) - .expect("Could not find listener ID in exposed_symbols_by_module"); - - enqueue_task( - injector, - worker_listeners, - BuildTask::parse_and_constrain( - header, - stdlib.mode, - Arc::clone(&state.arc_modules), - Arc::clone(&state.ident_ids_by_module), - &state.exposed_types, - exposed_symbols.clone(), - &mut state.waiting_for_solve, - ), - )?; - } - } + enqueue_task(&injector, worker_listeners, task)? } - // If any of our deps weren't loaded before, start loading them. - for (dep_name, dep_id) in deps_by_name.iter() { - if !state.loading_started.contains(&dep_id) { - // Record that we've started loading the module *before* - // we actually start loading it. - state.loading_started.insert(*dep_id); + let work = state.dependencies.notify(home, Phase::LoadHeader); - // Start loading this module in the background. - enqueue_task( - injector, - worker_listeners, - BuildTask::LoadModule { - module_name: dep_name.clone(), - // Provide mutexes of ModuleIds and IdentIds by module, - // so other modules can populate them as they load. - module_ids: Arc::clone(&state.arc_modules), - ident_ids_by_module: Arc::clone(&state.ident_ids_by_module), - }, - )?; - } - } + for (module_id, phase) in work { + let task = start_phase(module_id, phase, &mut state); - if headers_needed.is_empty() { - let exposed_symbols = state - .exposed_symbols_by_module - .remove(&home) - .expect("Could not find listener ID in exposed_symbols_by_module"); - - enqueue_task( - injector, - worker_listeners, - BuildTask::parse_and_constrain( - header, - stdlib.mode, - Arc::clone(&state.arc_modules), - Arc::clone(&state.ident_ids_by_module), - &state.exposed_types, - exposed_symbols, - &mut state.waiting_for_solve, - ), - )?; - } else { - // We will have to wait for our deps' headers to be parsed, - // so we can access their IdentId, which we need for canonicalization. - debug_assert!(!state.unparsed_modules.contains_key(&home)); - state.unparsed_modules.insert(home, header); - - // Register a listener with each of these. - for dep_id in headers_needed.iter() { - let listeners = state - .header_listeners - .entry(*dep_id) - .or_insert_with(|| Vec::with_capacity(1)); - - (*listeners).push(home); - } - - debug_assert!(!state.waiting_for_headers.contains_key(&home)); - state.waiting_for_headers.insert(home, headers_needed); + enqueue_task(&injector, worker_listeners, task)? } Ok(state) @@ -678,178 +1228,296 @@ fn update<'a>( var_store, module_timing, } => { + log!("generated constraints for {:?}", module.module_id); + let module_id = module.module_id; state.can_problems.extend(problems); - let module_id = module.module_id; - let State { - waiting_for_solve, - exposed_types, - constrained_ident_ids, - declarations_by_id, - unsolved_modules, - solve_listeners, - .. - } = &mut state; - let waiting_for = waiting_for_solve.get_mut(&module_id).unwrap_or_else(|| { - panic!( - "Could not find module ID {:?} in waiting_for_solve", - module_id - ) - }); + let constrained_module = ConstrainedModule { + module, + constraint, + declarations, + ident_ids, + src, + module_timing, + var_store, + imported_modules, + }; + state + .module_cache + .constrained + .insert(module_id, constrained_module); - // Record the final IdentIds - debug_assert!(!constrained_ident_ids.contains_key(&module_id)); - constrained_ident_ids.insert(module_id, ident_ids); + let work = state + .dependencies + .notify(module_id, Phase::ParseAndGenerateConstraints); - // It's possible that some modules have been solved since - // we began waiting for them. Remove those from waiting_for, - // because we no longer need to wait for them! - waiting_for.retain(|id| !exposed_types.contains_key(id)); + for (module_id, phase) in work { + let task = start_phase(module_id, phase, &mut state); - declarations_by_id.insert(module_id, declarations); - - if waiting_for.is_empty() { - // All of our dependencies have already been solved. Great! - // That means we can proceed directly to solving. - enqueue_task( - injector, - worker_listeners, - BuildTask::solve_module( - module, - module_timing, - src, - constraint, - var_store, - imported_modules, - &mut state.exposed_types, - stdlib, - ), - )?; - } else { - // We will have to wait for our dependencies to be solved. - debug_assert!(!unsolved_modules.contains_key(&module_id)); - unsolved_modules.insert( - module_id, - UnsolvedModule { - module, - src, - imported_modules, - constraint, - var_store, - module_timing, - }, - ); - - // Register a listener with each of these. - for dep_id in waiting_for.iter() { - let listeners = solve_listeners - .entry(*dep_id) - .or_insert_with(|| Vec::with_capacity(1)); - - (*listeners).push(module_id); - } + enqueue_task(&injector, worker_listeners, task)? } Ok(state) } - Solved { + SolvedTypes { src, module_id, + ident_ids, solved_module, solved_subs, + decls, mut module_timing, } => { + log!("solved types for {:?}", module_id); module_timing.end_time = SystemTime::now(); - // We've finished recording all the timings for this module, - // add them to state.timings - state.timings.insert(module_id, module_timing); + state.type_problems.extend(solved_module.problems); + + let work = state.dependencies.notify(module_id, Phase::SolveTypes); if module_id == state.root_id { + state + .exposed_to_host + .extend(solved_module.exposed_vars_by_symbol.iter().copied()); + } + + if module_id == state.root_id && state.goal_phase == Phase::SolveTypes { + debug_assert!(work.is_empty()); + debug_assert!(state.dependencies.solved_all()); + + state.timings.insert(module_id, module_timing); + msg_tx - .send(Msg::Finished { + .send(Msg::FinishedAllTypeChecking { solved_subs, - problems: solved_module.problems, exposed_vars_by_symbol: solved_module.exposed_vars_by_symbol, src, }) .map_err(|_| LoadingProblem::MsgChannelDied)?; + + // bookkeeping + state.declarations_by_id.insert(module_id, decls); + state.constrained_ident_ids.insert(module_id, ident_ids); + + // As far as type-checking goes, once we've solved + // the originally requested module, we're all done! + return Ok(state); } else { - state.type_problems.extend(solved_module.problems); + if module_id != state.root_id { + state.exposed_types.insert( + module_id, + ExposedModuleTypes::Valid( + solved_module.solved_types, + solved_module.aliases, + ), + ); + } - // This was a dependency. Write it down and keep processing messages. - debug_assert!(!state.exposed_types.contains_key(&module_id)); - state.exposed_types.insert( - module_id, - ExposedModuleTypes::Valid(solved_module.solved_types, solved_module.aliases), - ); + if state.goal_phase > Phase::SolveTypes { + let layout_cache = state.layout_caches.pop().unwrap_or_default(); - // Notify all the listeners that this solved. - if let Some(listeners) = state.solve_listeners.remove(&module_id) { - for listener_id in listeners { - // This listener is longer waiting for this module, - // because this module has now been solved! - let waiting_for = state - .waiting_for_solve - .get_mut(&listener_id) - .expect("Unable to find module ID in waiting_for_solve"); + let finished_info = FinishedInfo { + src, + exposed_vars_by_symbol: solved_module.exposed_vars_by_symbol, + }; - waiting_for.remove(&module_id); + let typechecked = TypeCheckedModule { + module_id, + decls, + solved_subs, + ident_ids, + module_timing, + layout_cache, + finished_info, + }; - // If it's no longer waiting for anything else, solve it. - if waiting_for.is_empty() { - let UnsolvedModule { - module, - src, - imported_modules, - constraint, - var_store, - module_timing, - } = state - .unsolved_modules - .remove(&listener_id) - .expect("Could not find listener ID in unsolved_modules"); + state + .module_cache + .typechecked + .insert(module_id, typechecked); + } else { + state.constrained_ident_ids.insert(module_id, ident_ids); + } - enqueue_task( - injector, - worker_listeners, - BuildTask::solve_module( - module, - module_timing, - src, - constraint, - var_store, - imported_modules, - &mut state.exposed_types, - stdlib, - ), - )?; - } - } + for (module_id, phase) in work { + let task = start_phase(module_id, phase, &mut state); + + enqueue_task(&injector, worker_listeners, task)? } } Ok(state) } - Msg::Finished { .. } => { + FoundSpecializations { + module_id, + procs, + finished_info, + solved_subs, + ident_ids, + layout_cache, + problems: _, + } => { + log!("found specializations for {:?}", module_id); + let subs = solved_subs.into_inner(); + + if let Some(pending) = &procs.pending_specializations { + for (symbol, specs) in pending { + let existing = match state.all_pending_specializations.entry(*symbol) { + Vacant(entry) => entry.insert(MutMap::default()), + Occupied(entry) => entry.into_mut(), + }; + + for (layout, pend) in specs { + existing.insert(layout.clone(), pend.clone()); + } + } + } + + let found_specializations_module = FoundSpecializationsModule { + layout_cache, + module_id, + procs, + finished_info, + ident_ids, + subs, + }; + + state + .module_cache + .found_specializations + .insert(module_id, found_specializations_module); + + let work = state + .dependencies + .notify(module_id, Phase::FindSpecializations); + + for (module_id, phase) in work { + let task = start_phase(module_id, phase, &mut state); + + enqueue_task(&injector, worker_listeners, task)? + } + Ok(state) + } + MadeSpecializations { + module_id, + ident_ids, + subs, + finished_info, + procedures, + external_specializations_requested, + problems, + .. + } => { + log!("made specializations for {:?}", module_id); + + state.mono_problems.extend(problems); + + for (module_id, requested) in external_specializations_requested { + let existing = match state + .module_cache + .external_specializations_requested + .entry(module_id) + { + Vacant(entry) => entry.insert(ExternalSpecializations::default()), + Occupied(entry) => entry.into_mut(), + }; + + existing.extend(requested); + } + + state.procedures.extend(procedures); + + let work = state + .dependencies + .notify(module_id, Phase::MakeSpecializations); + + state.constrained_ident_ids.insert(module_id, ident_ids); + + if work.is_empty() + && state.dependencies.solved_all() + && state.goal_phase == Phase::MakeSpecializations + { + // state.timings.insert(module_id, module_timing); + + Proc::insert_refcount_operations(arena, &mut state.procedures); + + msg_tx + .send(Msg::FinishedAllSpecialization { + subs, + // TODO thread through mono problems + exposed_to_host: state.exposed_to_host.clone(), + src: finished_info.src, + }) + .map_err(|_| LoadingProblem::MsgChannelDied)?; + + // As far as type-checking goes, once we've solved + // the originally requested module, we're all done! + return Ok(state); + } else { + for (module_id, phase) in work { + let task = start_phase(module_id, phase, &mut state); + + enqueue_task(&injector, worker_listeners, task)? + } + } + + Ok(state) + } + Msg::FinishedAllTypeChecking { .. } => { + unreachable!(); + } + Msg::FinishedAllSpecialization { .. } => { unreachable!(); } } } +fn finish_specialization<'a>( + state: State<'a>, + subs: Subs, + exposed_to_host: MutMap, + src: &'a str, +) -> MonomorphizedModule<'a> { + let module_ids = Arc::try_unwrap(state.arc_modules) + .unwrap_or_else(|_| panic!("There were still outstanding Arc references to module_ids")) + .into_inner(); + + let interns = Interns { + module_ids, + all_ident_ids: state.constrained_ident_ids, + }; + + let State { + mono_problems, + type_problems, + can_problems, + procedures, + .. + } = state; + + MonomorphizedModule { + can_problems, + mono_problems, + type_problems, + exposed_to_host, + module_id: state.root_id, + subs, + interns, + procedures, + src: src.into(), + timings: state.timings, + } +} + fn finish<'a>( - mut state: State<'a>, + state: State<'a>, solved: Solved, - problems: Vec, exposed_vars_by_symbol: Vec<(Symbol, Variable)>, src: &'a str, ) -> LoadedModule { - state.type_problems.extend(problems); - let module_ids = Arc::try_unwrap(state.arc_modules) .unwrap_or_else(|_| panic!("There were still outstanding Arc references to module_ids")) - .into_inner() - .expect("Unwrapping mutex for module_ids"); + .into_inner(); let interns = Interns { module_ids, @@ -1006,6 +1674,30 @@ fn load_filename<'a>( } } +/// Load a module from a str +/// the `filename` is never read, but used for the module name +fn load_from_str<'a>( + arena: &'a Bump, + filename: PathBuf, + src: &'a str, + module_ids: Arc>, + ident_ids_by_module: Arc>, + module_start_time: SystemTime, +) -> Result<(ModuleId, Msg<'a>), LoadingProblem> { + let file_io_start = SystemTime::now(); + let file_io_duration = file_io_start.elapsed().unwrap(); + + parse_header( + arena, + file_io_duration, + filename, + module_ids, + ident_ids_by_module, + src.as_bytes(), + module_start_time, + ) +} + #[allow(clippy::too_many_arguments)] fn send_header<'a>( name: Located>, @@ -1046,10 +1738,8 @@ fn send_header<'a>( let ident_ids = { // Lock just long enough to perform the minimal operations necessary. - let mut module_ids = (*module_ids).lock().expect("Failed to acquire lock for interning module IDs, presumably because a thread panicked."); - let mut ident_ids_by_module = (*ident_ids_by_module).lock().expect( - "Failed to acquire lock for interning ident IDs, presumably because a thread panicked.", - ); + let mut module_ids = (*module_ids).lock(); + let mut ident_ids_by_module = (*ident_ids_by_module).lock(); home = module_ids.get_or_insert(&declared_name.as_inline_str()); @@ -1147,6 +1837,7 @@ impl<'a> BuildTask<'a> { #[allow(clippy::too_many_arguments)] pub fn solve_module( module: Module, + ident_ids: IdentIds, module_timing: ModuleTiming, src: &'a str, constraint: Constraint, @@ -1154,6 +1845,7 @@ impl<'a> BuildTask<'a> { imported_modules: MutSet, exposed_types: &mut SubsByModule, stdlib: &StdLib, + declarations: Vec, ) -> Self { let home = module.module_id; @@ -1172,106 +1864,45 @@ impl<'a> BuildTask<'a> { stdlib, ); - for unused_import in unused_imports { + if !unused_imports.is_empty() { todo!( "TODO gracefully handle unused import {:?} from module {:?}", - unused_import, - home + &unused_imports, + home, ); } // Next, solve this module in the background. Self::Solve { module, + ident_ids, imported_symbols, imported_aliases, constraint, var_store, src, + declarations, module_timing, } } - - #[allow(clippy::too_many_arguments)] - pub fn parse_and_constrain( - header: ModuleHeader<'a>, - mode: Mode, - module_ids: Arc>, - ident_ids_by_module: Arc>, - exposed_types: &SubsByModule, - exposed_symbols: MutSet, - waiting_for_solve: &mut MutMap>, - ) -> Self { - let module_id = header.module_id; - let deps_by_name = &header.deps_by_name; - let num_deps = deps_by_name.len(); - let mut dep_idents: IdentIdsByModule = IdentIds::exposed_builtins(num_deps); - - { - let ident_ids_by_module = (*ident_ids_by_module).lock().expect( - "Failed to acquire lock for interning ident IDs, presumably because a thread panicked.", - ); - - // Populate dep_idents with each of their IdentIds, - // which we'll need during canonicalization to translate - // identifier strings into IdentIds, which we need to build Symbols. - // We only include the modules we care about (the ones we import). - // - // At the end of this loop, dep_idents contains all the information to - // resolve a symbol from another module: if it's in here, that means - // we have both imported the module and the ident was exported by that mdoule. - for dep_id in header.deps_by_name.values() { - // We already verified that these are all present, - // so unwrapping should always succeed here. - let idents = ident_ids_by_module.get(&dep_id).unwrap(); - - dep_idents.insert(*dep_id, idents.clone()); - } - } - - // Once this step has completed, the next thing we'll need - // is solving. Register the modules we'll need to have been - // solved before we can solve. - let mut solve_needed = HashSet::with_capacity_and_hasher(num_deps, default_hasher()); - - for dep_id in deps_by_name.values() { - if !exposed_types.contains_key(dep_id) { - solve_needed.insert(*dep_id); - } - } - - waiting_for_solve.insert(module_id, solve_needed); - - let module_ids = { - (*module_ids).lock().expect("Failed to acquire lock for obtaining module IDs, presumably because a thread panicked.").clone() - }; - - // Now that we have waiting_for_solve populated, continue parsing, - // canonicalizing, and constraining the module. - Self::ParseAndConstrain { - header, - mode, - module_ids, - dep_idents, - exposed_symbols, - } - } } #[allow(clippy::too_many_arguments)] fn run_solve<'a>( module: Module, + ident_ids: IdentIds, mut module_timing: ModuleTiming, - stdlib: &StdLib, + stdlib_mode: Mode, imported_symbols: Vec, imported_aliases: MutMap, constraint: Constraint, mut var_store: VarStore, + decls: Vec, src: &'a str, ) -> Msg<'a> { // Rebuild the aliases in this thread, so we don't have to clone all of // stdlib.aliases on the main thread. - let aliases = match stdlib.mode { + let aliases = match stdlib_mode { Mode::Standard => roc_builtins::std::aliases(), Mode::Uniqueness => roc_builtins::unique::aliases(), }; @@ -1306,10 +1937,12 @@ fn run_solve<'a>( module_timing.solve = solve_end.duration_since(constrain_end).unwrap(); // Send the subs to the main thread for processing, - Msg::Solved { + Msg::SolvedTypes { src, module_id, solved_subs, + ident_ids, + decls, solved_module, module_timing, } @@ -1319,7 +1952,7 @@ fn run_solve<'a>( fn parse_and_constrain<'a>( header: ModuleHeader<'a>, mode: Mode, - module_ids: ModuleIds, + module_ids: &ModuleIds, dep_idents: IdentIdsByModule, exposed_symbols: MutSet, ) -> Result, LoadingProblem> { @@ -1340,7 +1973,7 @@ fn parse_and_constrain<'a>( &arena, parsed_defs, module_id, - &module_ids, + module_ids, header.exposed_ident_ids, dep_idents, header.exposed_imports, @@ -1444,12 +2077,229 @@ fn ident_from_exposed(entry: &ExposesEntry<'_>) -> Ident { } } +#[allow(clippy::too_many_arguments)] +fn make_specializations<'a>( + arena: &'a Bump, + home: ModuleId, + mut ident_ids: IdentIds, + mut subs: Subs, + mut procs: Procs<'a>, + mut layout_cache: LayoutCache<'a>, + specializations_we_must_make: ExternalSpecializations, + finished_info: FinishedInfo<'a>, +) -> Msg<'a> { + let mut mono_problems = Vec::new(); + // do the thing + let mut mono_env = roc_mono::ir::Env { + arena, + problems: &mut mono_problems, + subs: &mut subs, + home, + ident_ids: &mut ident_ids, + }; + + procs + .externals_others_need + .extend(specializations_we_must_make); + + // TODO: for now this final specialization pass is sequential, + // with no parallelization at all. We should try to parallelize + // this, but doing so will require a redesign of Procs. + procs = roc_mono::ir::specialize_all( + &mut mono_env, + procs, + &mut layout_cache, + // &finished_info.vars_by_symbol, + ); + + let external_specializations_requested = procs.externals_we_need.clone(); + let procedures = procs.get_specialized_procs_without_rc(mono_env.arena); + + Msg::MadeSpecializations { + module_id: home, + ident_ids, + layout_cache, + procedures, + problems: mono_problems, + subs, + finished_info, + external_specializations_requested, + } +} + +#[allow(clippy::too_many_arguments)] +fn build_pending_specializations<'a>( + arena: &'a Bump, + solved_subs: Solved, + home: ModuleId, + mut ident_ids: IdentIds, + decls: Vec, + // TODO use this? + _module_timing: ModuleTiming, + mut layout_cache: LayoutCache<'a>, + // TODO remove + exposed_to_host: MutMap, + finished_info: FinishedInfo<'a>, +) -> Msg<'a> { + let mut procs = Procs::default(); + + let mut mono_problems = std::vec::Vec::new(); + let mut subs = solved_subs.into_inner(); + let mut mono_env = roc_mono::ir::Env { + arena, + problems: &mut mono_problems, + subs: &mut subs, + home, + ident_ids: &mut ident_ids, + }; + + // Add modules' decls to Procs + for decl in decls { + use roc_can::def::Declaration::*; + + match decl { + Declare(def) | Builtin(def) => add_def_to_module( + &mut layout_cache, + &mut procs, + &mut mono_env, + def, + &exposed_to_host, + false, + ), + DeclareRec(defs) => { + for def in defs { + add_def_to_module( + &mut layout_cache, + &mut procs, + &mut mono_env, + def, + &exposed_to_host, + true, + ) + } + } + InvalidCycle(_loc_idents, _regions) => { + todo!("TODO handle InvalidCycle"); + } + } + } + + let problems = mono_env.problems.to_vec(); + + Msg::FoundSpecializations { + module_id: home, + solved_subs: roc_types::solved_types::Solved(subs), + ident_ids, + layout_cache, + procs, + problems, + finished_info, + } +} + +fn add_def_to_module<'a>( + layout_cache: &mut LayoutCache<'a>, + procs: &mut Procs<'a>, + mono_env: &mut roc_mono::ir::Env<'a, '_>, + def: roc_can::def::Def, + exposed_to_host: &MutMap, + is_recursive: bool, +) { + use roc_can::expr::Expr::*; + use roc_can::pattern::Pattern::*; + + match def.loc_pattern.value { + Identifier(symbol) => { + let is_exposed = exposed_to_host.contains_key(&symbol); + + match def.loc_expr.value { + Closure { + function_type: annotation, + return_type: ret_var, + arguments: loc_args, + loc_body, + .. + } => { + // If this is an exposed symbol, we need to + // register it as such. Otherwise, since it + // never gets called by Roc code, it will never + // get specialized! + if is_exposed { + let mut pattern_vars = bumpalo::collections::Vec::with_capacity_in( + loc_args.len(), + mono_env.arena, + ); + + for (var, _) in loc_args.iter() { + pattern_vars.push(*var); + } + + let layout = match layout_cache.from_var( + mono_env.arena, + annotation, + mono_env.subs, + ) { + Ok(l) => l, + Err(err) => { + // a host-exposed function is not monomorphized + todo!("The host-exposed function {:?} does not have a valid layout (e.g. maybe the function wasn't monomorphic): {:?}", symbol, err) + } + }; + + procs.insert_exposed(symbol, layout, mono_env.subs, annotation); + } + + procs.insert_named( + mono_env, + layout_cache, + symbol, + annotation, + loc_args, + *loc_body, + is_recursive, + ret_var, + ); + } + body => { + // If this is an exposed symbol, we need to + // register it as such. Otherwise, since it + // never gets called by Roc code, it will never + // get specialized! + if is_exposed { + let annotation = def.expr_var; + let layout = layout_cache.from_var(mono_env.arena, annotation, mono_env.subs).unwrap_or_else(|err| + todo!("TODO gracefully handle the situation where we expose a function to the host which doesn't have a valid layout (e.g. maybe the function wasn't monomorphic): {:?}", err) + ); + + procs.insert_exposed(symbol, layout, mono_env.subs, annotation); + } + + let proc = PartialProc { + annotation: def.expr_var, + // This is a 0-arity thunk, so it has no arguments. + pattern_symbols: &[], + body, + // This is a 0-arity thunk, so it cannot be recursive + is_self_recursive: false, + }; + + procs.partial_procs.insert(symbol, proc); + procs.module_thunks.insert(symbol); + } + }; + } + other => { + todo!("TODO gracefully handle Declare({:?})", other); + } + } +} + fn run_task<'a>( task: BuildTask<'a>, arena: &'a Bump, src_dir: &Path, msg_tx: MsgSender<'a>, - stdlib: &StdLib, + stdlib_mode: Mode, ) -> Result<(), LoadingProblem> { use BuildTask::*; @@ -1466,7 +2316,7 @@ fn run_task<'a>( module_ids, dep_idents, exposed_symbols, - } => parse_and_constrain(header, mode, module_ids, dep_idents, exposed_symbols), + } => parse_and_constrain(header, mode, &module_ids, dep_idents, exposed_symbols), Solve { module, module_timing, @@ -1474,17 +2324,59 @@ fn run_task<'a>( imported_aliases, constraint, var_store, + ident_ids, + declarations, src, } => Ok(run_solve( module, + ident_ids, module_timing, - stdlib, + stdlib_mode, imported_symbols, imported_aliases, constraint, var_store, + declarations, src, )), + BuildPendingSpecializations { + module_id, + ident_ids, + decls, + module_timing, + layout_cache, + solved_subs, + finished_info, + exposed_to_host, + } => Ok(build_pending_specializations( + arena, + solved_subs, + module_id, + ident_ids, + decls, + module_timing, + layout_cache, + exposed_to_host, + finished_info, + )), + MakeSpecializations { + module_id, + ident_ids, + subs, + procs, + layout_cache, + specializations_we_must_make, + finished_info, + } => Ok(make_specializations( + arena, + module_id, + ident_ids, + subs, + procs, + layout_cache, + specializations_we_must_make, + finished_info, + )), }?; msg_tx diff --git a/compiler/load/tests/test_load.rs b/compiler/load/tests/test_load.rs index 862835f6a8..17c8222226 100644 --- a/compiler/load/tests/test_load.rs +++ b/compiler/load/tests/test_load.rs @@ -14,12 +14,13 @@ mod helpers; #[cfg(test)] mod test_load { use crate::helpers::fixtures_dir; + use bumpalo::Bump; use inlinable_string::InlinableString; use roc_can::def::Declaration::*; use roc_can::def::Def; use roc_collections::all::MutMap; use roc_constrain::module::SubsByModule; - use roc_load::file::{load, LoadedModule}; + use roc_load::file::LoadedModule; use roc_module::symbol::{Interns, ModuleId}; use roc_types::pretty_print::{content_to_string, name_all_type_vars}; use roc_types::subs::Subs; @@ -34,9 +35,11 @@ mod test_load { ) -> LoadedModule { let src_dir = fixtures_dir().join(dir_name); let filename = src_dir.join(format!("{}.roc", module_name)); - let loaded = load( + let arena = Bump::new(); + let loaded = roc_load::file::load_and_typecheck( + &arena, filename, - &roc_builtins::std::standard_stdlib(), + roc_builtins::std::standard_stdlib(), src_dir.as_path(), subs_by_module, ); @@ -128,9 +131,11 @@ mod test_load { let subs_by_module = MutMap::default(); let src_dir = fixtures_dir().join("interface_with_deps"); let filename = src_dir.join("Primary.roc"); - let loaded = load( + let arena = Bump::new(); + let loaded = roc_load::file::load_and_typecheck( + &arena, filename, - &roc_builtins::std::standard_stdlib(), + roc_builtins::std::standard_stdlib(), src_dir.as_path(), subs_by_module, ); diff --git a/compiler/load/tests/test_uniq_load.rs b/compiler/load/tests/test_uniq_load.rs index b0908da0d0..aaede3ddc6 100644 --- a/compiler/load/tests/test_uniq_load.rs +++ b/compiler/load/tests/test_uniq_load.rs @@ -14,13 +14,14 @@ mod helpers; #[cfg(test)] mod test_uniq_load { use crate::helpers::fixtures_dir; + use bumpalo::Bump; use inlinable_string::InlinableString; use roc_builtins::unique; use roc_can::def::Declaration::*; use roc_can::def::Def; use roc_collections::all::MutMap; use roc_constrain::module::SubsByModule; - use roc_load::file::{load, LoadedModule}; + use roc_load::file::LoadedModule; use roc_module::symbol::{Interns, ModuleId}; use roc_types::pretty_print::{content_to_string, name_all_type_vars}; use roc_types::subs::Subs; @@ -33,11 +34,13 @@ mod test_uniq_load { module_name: &str, subs_by_module: SubsByModule, ) -> LoadedModule { + let arena = Bump::new(); let src_dir = fixtures_dir().join(dir_name); let filename = src_dir.join(format!("{}.roc", module_name)); - let loaded = load( + let loaded = roc_load::file::load_and_typecheck( + &arena, filename, - &unique::uniq_stdlib(), + unique::uniq_stdlib(), src_dir.as_path(), subs_by_module, ); @@ -126,12 +129,14 @@ mod test_uniq_load { #[test] fn interface_with_deps() { + let arena = Bump::new(); let subs_by_module = MutMap::default(); let src_dir = fixtures_dir().join("interface_with_deps"); let filename = src_dir.join("Primary.roc"); - let loaded = load( + let loaded = roc_load::file::load_and_typecheck( + &arena, filename, - &roc_builtins::std::standard_stdlib(), + roc_builtins::std::standard_stdlib(), src_dir.as_path(), subs_by_module, ); diff --git a/compiler/module/src/symbol.rs b/compiler/module/src/symbol.rs index b8bc4beafc..8d22c88615 100644 --- a/compiler/module/src/symbol.rs +++ b/compiler/module/src/symbol.rs @@ -288,14 +288,18 @@ impl fmt::Debug for ModuleId { .lock() .expect("Failed to acquire lock for Debug reading from DEBUG_MODULE_ID_NAMES, presumably because a thread panicked."); - match names.get(&self.0) { - Some(str_ref) => write!(f, "{}", str_ref.clone()), - None => { - panic!( - "Could not find a Debug name for module ID {} in {:?}", - self.0, names, - ); + if PRETTY_PRINT_DEBUG_SYMBOLS { + match names.get(&self.0) { + Some(str_ref) => write!(f, "{}", str_ref.clone()), + None => { + panic!( + "Could not find a Debug name for module ID {} in {:?}", + self.0, names, + ); + } } + } else { + write!(f, "{}", self.0) } } @@ -376,7 +380,7 @@ pub struct IdentId(u32); /// /// Each module name is stored twice, for faster lookups. /// Since these are interned strings, this shouldn't result in many total allocations in practice. -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug, Default, PartialEq, Eq)] pub struct IdentIds { by_ident: MutMap, diff --git a/compiler/mono/Cargo.toml b/compiler/mono/Cargo.toml index 079e0f0da0..0a812aa98b 100644 --- a/compiler/mono/Cargo.toml +++ b/compiler/mono/Cargo.toml @@ -17,6 +17,7 @@ roc_solve = { path = "../solve" } roc_problem = { path = "../problem" } ven_pretty = { path = "../../vendor/pretty" } bumpalo = { version = "3.2", features = ["collections"] } +ven_ena = { path = "../../vendor/ena" } [dev-dependencies] roc_constrain = { path = "../constrain" } diff --git a/compiler/mono/src/borrow.rs b/compiler/mono/src/borrow.rs index 4f209d9dd3..9d917abe89 100644 --- a/compiler/mono/src/borrow.rs +++ b/compiler/mono/src/borrow.rs @@ -337,6 +337,7 @@ impl<'a> BorrowInfState<'a> { self.own_var(*x); } } + FunctionCall { call_type, args, diff --git a/compiler/mono/src/decision_tree.rs b/compiler/mono/src/decision_tree.rs index f25faaedd1..1f03ed13a8 100644 --- a/compiler/mono/src/decision_tree.rs +++ b/compiler/mono/src/decision_tree.rs @@ -1143,7 +1143,7 @@ fn test_to_equality<'a>( // TODO procs and layout are currently unused, but potentially required // for defining optional fields? // if not, do remove -#[allow(clippy::too_many_arguments)] +#[allow(clippy::too_many_arguments, clippy::needless_collect)] fn decide_to_branching<'a>( env: &mut Env<'a, '_>, procs: &mut Procs<'a>, @@ -1256,6 +1256,9 @@ fn decide_to_branching<'a>( // TODO There must be some way to remove this iterator/loop let nr = (tests.len() as i64) - 1 + (guard.is_some() as i64); + + let arena = env.arena; + let accum_symbols = std::iter::once(true_symbol) .chain((0..nr).map(|_| env.unique_symbol())) .rev() @@ -1268,15 +1271,14 @@ fn decide_to_branching<'a>( let accum = accum_it.next().unwrap(); let test_symbol = env.unique_symbol(); - let and_expr = - Expr::RunLowLevel(LowLevel::And, env.arena.alloc([test_symbol, accum])); + let and_expr = Expr::RunLowLevel(LowLevel::And, arena.alloc([test_symbol, accum])); // write to the branching symbol cond = Stmt::Let( current_symbol, and_expr, Layout::Builtin(Builtin::Int1), - env.arena.alloc(cond), + arena.alloc(cond), ); // calculate the guard value @@ -1287,9 +1289,9 @@ fn decide_to_branching<'a>( }; cond = Stmt::Join { id, - parameters: env.arena.alloc([param]), - remainder: env.arena.alloc(stmt), - continuation: env.arena.alloc(cond), + parameters: arena.alloc([param]), + remainder: arena.alloc(stmt), + continuation: arena.alloc(cond), }; // load all the variables (the guard might need them); @@ -1301,18 +1303,17 @@ fn decide_to_branching<'a>( let test_symbol = env.unique_symbol(); let test = Expr::RunLowLevel( LowLevel::Eq, - bumpalo::vec![in env.arena; lhs, rhs].into_bump_slice(), + bumpalo::vec![in arena; lhs, rhs].into_bump_slice(), ); - let and_expr = - Expr::RunLowLevel(LowLevel::And, env.arena.alloc([test_symbol, accum])); + let and_expr = Expr::RunLowLevel(LowLevel::And, arena.alloc([test_symbol, accum])); // write to the branching symbol cond = Stmt::Let( current_symbol, and_expr, Layout::Builtin(Builtin::Int1), - env.arena.alloc(cond), + arena.alloc(cond), ); // write to the test symbol @@ -1320,11 +1321,11 @@ fn decide_to_branching<'a>( test_symbol, test, Layout::Builtin(Builtin::Int1), - env.arena.alloc(cond), + arena.alloc(cond), ); for (symbol, layout, expr) in new_stores.into_iter() { - cond = Stmt::Let(symbol, expr, layout, env.arena.alloc(cond)); + cond = Stmt::Let(symbol, expr, layout, arena.alloc(cond)); } current_symbol = accum; @@ -1334,7 +1335,7 @@ fn decide_to_branching<'a>( true_symbol, Expr::Literal(Literal::Bool(true)), Layout::Builtin(Builtin::Int1), - env.arena.alloc(cond), + arena.alloc(cond), ); cond diff --git a/compiler/mono/src/ir.rs b/compiler/mono/src/ir.rs index 42a4c5217f..4346c69c7b 100644 --- a/compiler/mono/src/ir.rs +++ b/compiler/mono/src/ir.rs @@ -9,11 +9,12 @@ use roc_module::low_level::LowLevel; use roc_module::symbol::{IdentIds, ModuleId, Symbol}; use roc_problem::can::RuntimeError; use roc_region::all::{Located, Region}; +use roc_types::solved_types::SolvedType; use roc_types::subs::{Content, FlatType, Subs, Variable}; use std::collections::HashMap; use ven_pretty::{BoxAllocator, DocAllocator, DocBuilder}; -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub enum MonoProblem { PatternProblem(crate::exhaustive::Error), } @@ -21,16 +22,21 @@ pub enum MonoProblem { #[derive(Clone, Debug, PartialEq)] pub struct PartialProc<'a> { pub annotation: Variable, - pub pattern_symbols: Vec<'a, Symbol>, + pub pattern_symbols: &'a [Symbol], pub body: roc_can::expr::Expr, pub is_self_recursive: bool, } #[derive(Clone, Debug, PartialEq)] -pub struct PendingSpecialization<'a> { - pub fn_var: Variable, - pub ret_var: Variable, - pub pattern_vars: Vec<'a, Variable>, +pub struct PendingSpecialization { + solved_type: SolvedType, +} + +impl PendingSpecialization { + pub fn from_var(subs: &Subs, var: Variable) -> Self { + let solved_type = SolvedType::from_var(subs, var); + PendingSpecialization { solved_type } + } } #[derive(Clone, Debug, PartialEq)] @@ -79,16 +85,59 @@ impl<'a> Proc<'a> { w.push(b'\n'); String::from_utf8(w).unwrap() } + + pub fn insert_refcount_operations( + arena: &'a Bump, + procs: &mut MutMap<(Symbol, Layout<'a>), Proc<'a>>, + ) { + let borrow_params = arena.alloc(crate::borrow::infer_borrow(arena, procs)); + + for (_, proc) in procs.iter_mut() { + crate::inc_dec::visit_proc(arena, borrow_params, proc); + } + } } -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, Default)] +pub struct ExternalSpecializations { + pub specs: MutMap>, +} + +impl ExternalSpecializations { + pub fn insert(&mut self, symbol: Symbol, typ: SolvedType) { + use std::collections::hash_map::Entry::{Occupied, Vacant}; + + let existing = match self.specs.entry(symbol) { + Vacant(entry) => entry.insert(MutSet::default()), + Occupied(entry) => entry.into_mut(), + }; + + existing.insert(typ); + } + + pub fn extend(&mut self, other: Self) { + use std::collections::hash_map::Entry::{Occupied, Vacant}; + + for (symbol, solved_types) in other.specs { + let existing = match self.specs.entry(symbol) { + Vacant(entry) => entry.insert(MutSet::default()), + Occupied(entry) => entry.into_mut(), + }; + + existing.extend(solved_types); + } + } +} + +#[derive(Clone, Debug)] pub struct Procs<'a> { pub partial_procs: MutMap>, pub module_thunks: MutSet, - pub pending_specializations: - Option, PendingSpecialization<'a>>>>, + pub pending_specializations: Option, PendingSpecialization>>>, pub specialized: MutMap<(Symbol, Layout<'a>), InProgressProc<'a>>, pub runtime_errors: MutMap, + pub externals_others_need: ExternalSpecializations, + pub externals_we_need: MutMap, } impl<'a> Default for Procs<'a> { @@ -99,6 +148,8 @@ impl<'a> Default for Procs<'a> { pending_specializations: Some(MutMap::default()), specialized: MutMap::default(), runtime_errors: MutMap::default(), + externals_we_need: MutMap::default(), + externals_others_need: ExternalSpecializations::default(), } } } @@ -110,6 +161,68 @@ pub enum InProgressProc<'a> { } impl<'a> Procs<'a> { + /// Absorb the contents of another Procs into this one. + pub fn absorb(&mut self, mut other: Procs<'a>) { + debug_assert!(self.pending_specializations.is_some()); + debug_assert!(other.pending_specializations.is_some()); + + match self.pending_specializations { + Some(ref mut pending_specializations) => { + for (k, v) in other.pending_specializations.unwrap().drain() { + pending_specializations.insert(k, v); + } + } + None => { + unreachable!(); + } + } + + for (k, v) in other.partial_procs.drain() { + self.partial_procs.insert(k, v); + } + + for (k, v) in other.specialized.drain() { + self.specialized.insert(k, v); + } + + for (k, v) in other.runtime_errors.drain() { + self.runtime_errors.insert(k, v); + } + + for symbol in other.module_thunks.drain() { + self.module_thunks.insert(symbol); + } + } + + pub fn get_specialized_procs_without_rc( + self, + arena: &'a Bump, + ) -> MutMap<(Symbol, Layout<'a>), Proc<'a>> { + let mut result = MutMap::with_capacity_and_hasher(self.specialized.len(), default_hasher()); + + for (key, in_prog_proc) in self.specialized.into_iter() { + match in_prog_proc { + InProgress => unreachable!("The procedure {:?} should have be done by now", key), + Done(mut proc) => { + use self::SelfRecursive::*; + if let SelfRecursive(id) = proc.is_self_recursive { + proc.body = crate::tail_recursion::make_tail_recursive( + arena, + id, + proc.name, + proc.body.clone(), + proc.args, + ); + } + + result.insert(key, proc); + } + } + } + + result + } + // TODO investigate make this an iterator? pub fn get_specialized_procs(self, arena: &'a Bump) -> MutMap<(Symbol, Layout<'a>), Proc<'a>> { let mut result = MutMap::with_capacity_and_hasher(self.specialized.len(), default_hasher()); @@ -204,6 +317,7 @@ impl<'a> Procs<'a> { // context, we can't add pending specializations for them yet. // (If we did, all named polymorphic functions would immediately error // on trying to convert a flex var to a Layout.) + let pattern_symbols = pattern_symbols.into_bump_slice(); self.partial_procs.insert( name, PartialProc { @@ -247,7 +361,7 @@ impl<'a> Procs<'a> { let is_self_recursive = false; match patterns_to_when(env, layout_cache, loc_args, ret_var, loc_body) { - Ok((pattern_vars, pattern_symbols, body)) => { + Ok((_, pattern_symbols, body)) => { // an anonymous closure. These will always be specialized already // by the surrounding context, so we can add pending specializations // for them immediately. @@ -265,12 +379,10 @@ impl<'a> Procs<'a> { // Changing it to use .entry() would necessarily make it incorrect. #[allow(clippy::map_entry)] if !already_specialized { - let pending = PendingSpecialization { - ret_var, - fn_var: annotation, - pattern_vars, - }; + let solved_type = SolvedType::from_var(env.subs, annotation); + let pending = PendingSpecialization { solved_type }; + let pattern_symbols = pattern_symbols.into_bump_slice(); match &mut self.pending_specializations { Some(pending_specializations) => { // register the pending specialization, so this gets code genned later @@ -303,11 +415,13 @@ impl<'a> Procs<'a> { self.specialized .insert((symbol, layout.clone()), InProgress); + let outside_layout = layout.clone(); + match specialize(env, self, symbol, layout_cache, pending, partial_proc) { - Ok(proc) => { - self.specialized - .insert((symbol, layout.clone()), Done(proc)); + Ok((proc, layout)) => { + debug_assert_eq!(outside_layout, layout); + self.specialized.insert((symbol, layout), Done(proc)); } Err(error) => { let error_msg = format!( @@ -316,6 +430,7 @@ impl<'a> Procs<'a> { ); self.runtime_errors .insert(symbol, env.arena.alloc(error_msg)); + panic!(); } } } @@ -333,9 +448,8 @@ impl<'a> Procs<'a> { &mut self, name: Symbol, layout: Layout<'a>, - pattern_vars: Vec<'a, Variable>, + subs: &Subs, fn_var: Variable, - ret_var: Variable, ) { let tuple = (name, layout); @@ -346,11 +460,7 @@ impl<'a> Procs<'a> { // We're done with that tuple, so move layout back out to avoid cloning it. let (name, layout) = tuple; - let pending = PendingSpecialization { - pattern_vars, - ret_var, - fn_var, - }; + let pending = PendingSpecialization::from_var(subs, fn_var); // This should only be called when pending_specializations is Some. // Otherwise, it's being called in the wrong pass! @@ -382,78 +492,49 @@ impl<'a> Procs<'a> { // We're done with that tuple, so move layout back out to avoid cloning it. let (name, layout) = tuple; - // now we have to pull some tricks to extract the return var and pattern vars from Subs - match get_args_ret_var(env.subs, fn_var) { - Some((pattern_vars, ret_var)) => { - let pattern_vars = Vec::from_iter_in(pattern_vars.into_iter(), env.arena); - let pending = PendingSpecialization { - pattern_vars, - ret_var, - fn_var, - }; + let pending = PendingSpecialization::from_var(env.subs, fn_var); - // This should only be called when pending_specializations is Some. - // Otherwise, it's being called in the wrong pass! - match &mut self.pending_specializations { - Some(pending_specializations) => { - // register the pending specialization, so this gets code genned later - add_pending(pending_specializations, name, layout, pending) + // This should only be called when pending_specializations is Some. + // Otherwise, it's being called in the wrong pass! + match &mut self.pending_specializations { + Some(pending_specializations) => { + // register the pending specialization, so this gets code genned later + add_pending(pending_specializations, name, layout, pending) + } + None => { + let symbol = name; + + // TODO should pending_procs hold a Rc? + let partial_proc = self.partial_procs.get(&symbol).unwrap().clone(); + + // Mark this proc as in-progress, so if we're dealing with + // mutually recursive functions, we don't loop forever. + // (We had a bug around this before this system existed!) + self.specialized + .insert((symbol, layout.clone()), InProgress); + + match specialize(env, self, symbol, layout_cache, pending, partial_proc) { + Ok((proc, layout)) => { + self.specialized.insert((symbol, layout), Done(proc)); } - None => { - let symbol = name; - - // TODO should pending_procs hold a Rc? - let partial_proc = self.partial_procs.get(&symbol).unwrap().clone(); - - // Mark this proc as in-progress, so if we're dealing with - // mutually recursive functions, we don't loop forever. - // (We had a bug around this before this system existed!) - self.specialized - .insert((symbol, layout.clone()), InProgress); - - match specialize(env, self, symbol, layout_cache, pending, partial_proc) { - Ok(proc) => { - self.specialized - .insert((symbol, layout.clone()), Done(proc)); - } - Err(error) => { - let error_msg = - format!("TODO generate a RuntimeError message for {:?}", error); - self.runtime_errors - .insert(symbol, env.arena.alloc(error_msg)); - } - } + Err(error) => { + let error_msg = + format!("TODO generate a RuntimeError message for {:?}", error); + self.runtime_errors + .insert(symbol, env.arena.alloc(error_msg)); + panic!(); } } } - other => { - unreachable!( - "trying to insert a symbol that is not a function: {:?} {:?}", - name, other - ); - } } } } -fn get_args_ret_var(subs: &Subs, var: Variable) -> Option<(std::vec::Vec, Variable)> { - match subs.get_without_compacting(var).content { - Content::Structure(FlatType::Func(pattern_vars, _closure_var, ret_var)) => { - Some((pattern_vars, ret_var)) - } - Content::Structure(FlatType::Apply(Symbol::ATTR_ATTR, args)) => { - get_args_ret_var(subs, args[1]) - } - Content::Alias(_, _, actual) => get_args_ret_var(subs, actual), - _ => None, - } -} - fn add_pending<'a>( - pending_specializations: &mut MutMap, PendingSpecialization<'a>>>, + pending_specializations: &mut MutMap, PendingSpecialization>>, symbol: Symbol, layout: Layout<'a>, - pending: PendingSpecialization<'a>, + pending: PendingSpecialization, ) { let all_pending = pending_specializations .entry(symbol) @@ -512,7 +593,7 @@ impl<'a> Specializations<'a> { pub struct Env<'a, 'i> { pub arena: &'a Bump, - pub subs: &'a mut Subs, + pub subs: &'i mut Subs, pub problems: &'i mut std::vec::Vec, pub home: ModuleId, pub ident_ids: &'i mut IdentIds, @@ -680,6 +761,7 @@ pub enum Expr<'a> { arguments: &'a [Symbol], }, Struct(&'a [Symbol]), + AccessAtIndex { index: u64, field_layouts: &'a [Layout<'a>], @@ -856,11 +938,11 @@ impl<'a> Stmt<'a> { env: &mut Env<'a, '_>, can_expr: roc_can::expr::Expr, procs: &mut Procs<'a>, + layout_cache: &mut LayoutCache<'a>, ) -> Self { - let mut layout_cache = LayoutCache::default(); - - from_can(env, can_expr, procs, &mut layout_cache) + from_can(env, can_expr, procs, layout_cache) } + pub fn to_doc<'b, D, A>(&'b self, alloc: &'b D) -> DocBuilder<'b, D, A> where D: DocAllocator<'b, A>, @@ -1166,6 +1248,41 @@ pub fn specialize_all<'a>( mut procs: Procs<'a>, layout_cache: &mut LayoutCache<'a>, ) -> Procs<'a> { + let it = procs.externals_others_need.specs.clone(); + let it = it + .into_iter() + .map(|(symbol, solved_types)| solved_types.into_iter().map(move |s| (symbol, s))) + .flatten(); + for (name, solved_type) in it.into_iter() { + let partial_proc = match procs.partial_procs.get(&name) { + Some(v) => v.clone(), + None => { + unreachable!("now this is an error"); + } + }; + + match specialize_solved_type( + env, + &mut procs, + name, + layout_cache, + solved_type, + partial_proc, + ) { + Ok((proc, layout)) => { + procs.specialized.insert((name, layout), Done(proc)); + } + Err(error) => { + let error_msg = env.arena.alloc(format!( + "TODO generate a RuntimeError message for {:?}", + error + )); + + procs.runtime_errors.insert(name, error_msg); + } + } + } + let mut pending_specializations = procs.pending_specializations.unwrap_or_default(); // When calling from_can, pending_specializations should be unavailable. @@ -1182,19 +1299,34 @@ pub fn specialize_all<'a>( #[allow(clippy::map_entry)] if !procs.specialized.contains_key(&(name, layout.clone())) { // TODO should pending_procs hold a Rc? - let partial_proc = procs - .partial_procs - .get(&name) - .unwrap_or_else(|| panic!("Could not find partial_proc for {:?}", name)) - .clone(); + let partial_proc = match procs.partial_procs.get(&name) { + Some(v) => v.clone(), + None => { + // TODO this assumes the specialization is done by another module + // make sure this does not become a problem down the road! + continue; + } + }; // Mark this proc as in-progress, so if we're dealing with // mutually recursive functions, we don't loop forever. // (We had a bug around this before this system existed!) - procs.specialized.insert((name, layout.clone()), InProgress); + let outside_layout = layout.clone(); + procs + .specialized + .insert((name, outside_layout.clone()), InProgress); - match specialize(env, &mut procs, name, layout_cache, pending, partial_proc) { - Ok(proc) => { + match specialize( + env, + &mut procs, + name, + layout_cache, + pending.clone(), + partial_proc, + ) { + Ok((proc, layout)) => { + debug_assert_eq!(outside_layout, layout); + procs.specialized.remove(&(name, outside_layout)); procs.specialized.insert((name, layout), Done(proc)); } Err(error) => { @@ -1204,6 +1336,7 @@ pub fn specialize_all<'a>( )); procs.runtime_errors.insert(name, error_msg); + panic!("failed to specialize {:?}", name); } } } @@ -1213,20 +1346,14 @@ pub fn specialize_all<'a>( procs } -fn specialize<'a>( +fn specialize_external<'a>( env: &mut Env<'a, '_>, procs: &mut Procs<'a>, proc_name: Symbol, layout_cache: &mut LayoutCache<'a>, - pending: PendingSpecialization<'a>, + fn_var: Variable, partial_proc: PartialProc<'a>, ) -> Result, LayoutProblem> { - let PendingSpecialization { - ret_var, - fn_var, - pattern_vars, - } = pending; - let PartialProc { annotation, pattern_symbols, @@ -1236,35 +1363,22 @@ fn specialize<'a>( // unify the called function with the specialized signature, then specialize the function body let snapshot = env.subs.snapshot(); + let cache_snapshot = layout_cache.snapshot(); + let unified = roc_unify::unify::unify(env.subs, annotation, fn_var); - debug_assert!(matches!(unified, roc_unify::unify::Unified::Success(_))); + let is_valid = matches!(unified, roc_unify::unify::Unified::Success(_)); + debug_assert!(is_valid); let specialized_body = from_can(env, body, procs, layout_cache); + let (proc_args, ret_layout) = + build_specialized_proc_from_var(env, layout_cache, pattern_symbols, fn_var)?; + // reset subs, so we don't get type errors when specializing for a different signature + layout_cache.rollback_to(cache_snapshot); env.subs.rollback_to(snapshot); - let mut proc_args = Vec::with_capacity_in(pattern_vars.len(), &env.arena); - - debug_assert_eq!( - &pattern_vars.len(), - &pattern_symbols.len(), - "Tried to zip two vecs with different lengths!" - ); - - for (arg_var, arg_name) in pattern_vars.iter().zip(pattern_symbols.iter()) { - let layout = layout_cache.from_var(&env.arena, *arg_var, env.subs)?; - - proc_args.push((layout, *arg_name)); - } - - let proc_args = proc_args.into_bump_slice(); - - let ret_layout = layout_cache - .from_var(&env.arena, ret_var, env.subs) - .unwrap_or_else(|err| panic!("TODO handle invalid function {:?}", err)); - // TODO WRONG let closes_over_layout = Layout::Struct(&[]); @@ -1286,6 +1400,157 @@ fn specialize<'a>( Ok(proc) } +#[allow(clippy::type_complexity)] +fn build_specialized_proc_from_var<'a>( + env: &mut Env<'a, '_>, + layout_cache: &mut LayoutCache<'a>, + pattern_symbols: &[Symbol], + fn_var: Variable, +) -> Result<(&'a [(Layout<'a>, Symbol)], Layout<'a>), LayoutProblem> { + match env.subs.get_without_compacting(fn_var).content { + Content::Structure(FlatType::Func(pattern_vars, _closure_var, ret_var)) => { + build_specialized_proc(env, layout_cache, pattern_symbols, &pattern_vars, ret_var) + } + Content::Structure(FlatType::Apply(Symbol::ATTR_ATTR, args)) + if !pattern_symbols.is_empty() => + { + build_specialized_proc_from_var(env, layout_cache, pattern_symbols, args[1]) + } + Content::Alias(_, _, actual) => { + build_specialized_proc_from_var(env, layout_cache, pattern_symbols, actual) + } + _ => { + // a top-level constant 0-argument thunk + + build_specialized_proc(env, layout_cache, pattern_symbols, &[], fn_var) + } + } +} + +#[allow(clippy::type_complexity)] +fn build_specialized_proc<'a>( + env: &mut Env<'a, '_>, + layout_cache: &mut LayoutCache<'a>, + pattern_symbols: &[Symbol], + pattern_vars: &[Variable], + ret_var: Variable, +) -> Result<(&'a [(Layout<'a>, Symbol)], Layout<'a>), LayoutProblem> { + let mut proc_args = Vec::with_capacity_in(pattern_vars.len(), &env.arena); + + debug_assert_eq!( + &pattern_vars.len(), + &pattern_symbols.len(), + "Tried to zip two vecs with different lengths!" + ); + + for (arg_var, arg_name) in pattern_vars.iter().zip(pattern_symbols.iter()) { + let layout = layout_cache.from_var(&env.arena, *arg_var, env.subs)?; + + proc_args.push((layout, *arg_name)); + } + + let proc_args = proc_args.into_bump_slice(); + + let ret_layout = layout_cache + .from_var(&env.arena, ret_var, env.subs) + .unwrap_or_else(|err| panic!("TODO handle invalid function {:?}", err)); + + Ok((proc_args, ret_layout)) +} + +fn specialize<'a>( + env: &mut Env<'a, '_>, + procs: &mut Procs<'a>, + proc_name: Symbol, + layout_cache: &mut LayoutCache<'a>, + pending: PendingSpecialization, + partial_proc: PartialProc<'a>, +) -> Result<(Proc<'a>, Layout<'a>), LayoutProblem> { + let PendingSpecialization { solved_type } = pending; + specialize_solved_type( + env, + procs, + proc_name, + layout_cache, + solved_type, + partial_proc, + ) +} + +fn specialize_solved_type<'a>( + env: &mut Env<'a, '_>, + procs: &mut Procs<'a>, + proc_name: Symbol, + layout_cache: &mut LayoutCache<'a>, + solved_type: SolvedType, + partial_proc: PartialProc<'a>, +) -> Result<(Proc<'a>, Layout<'a>), LayoutProblem> { + // add the specializations that other modules require of us + use roc_constrain::module::{to_type, FreeVars}; + use roc_solve::solve::{insert_type_into_subs, instantiate_rigids}; + use roc_types::subs::VarStore; + + let snapshot = env.subs.snapshot(); + let cache_snapshot = layout_cache.snapshot(); + + let mut free_vars = FreeVars::default(); + let mut var_store = VarStore::new_from_subs(env.subs); + + let before = var_store.peek(); + + let normal_type = to_type(&solved_type, &mut free_vars, &mut var_store); + + let after = var_store.peek(); + let variables_introduced = after - before; + + env.subs.extend_by(variables_introduced as usize); + + let fn_var = insert_type_into_subs(env.subs, &normal_type); + + // make sure rigid variables in the annotation are converted to flex variables + instantiate_rigids(env.subs, partial_proc.annotation); + + match specialize_external(env, procs, proc_name, layout_cache, fn_var, partial_proc) { + Ok(proc) => { + let layout = layout_cache + .from_var(&env.arena, fn_var, env.subs) + .unwrap_or_else(|err| panic!("TODO handle invalid function {:?}", err)); + env.subs.rollback_to(snapshot); + layout_cache.rollback_to(cache_snapshot); + Ok((proc, layout)) + } + Err(error) => { + env.subs.rollback_to(snapshot); + layout_cache.rollback_to(cache_snapshot); + Err(error) + } + } +} + +#[derive(Debug)] +struct FunctionLayouts<'a> { + full: Layout<'a>, + arguments: &'a [Layout<'a>], + result: Layout<'a>, +} + +impl<'a> FunctionLayouts<'a> { + pub fn from_layout(layout: Layout<'a>) -> Self { + match &layout { + Layout::FunctionPointer(arguments, result) => FunctionLayouts { + arguments, + result: (*result).clone(), + full: layout, + }, + _ => FunctionLayouts { + full: layout.clone(), + arguments: &[], + result: layout, + }, + } + } +} + pub fn with_hole<'a>( env: &mut Env<'a, '_>, can_expr: roc_can::expr::Expr, @@ -1369,21 +1634,40 @@ pub fn with_hole<'a>( } if let roc_can::pattern::Pattern::Identifier(symbol) = def.loc_pattern.value { - let mut stmt = with_hole(env, cont.value, procs, layout_cache, assigned, hole); - - // this is an alias of a variable - if let roc_can::expr::Expr::Var(original) = def.loc_expr.value { - substitute_in_exprs(env.arena, &mut stmt, symbol, original); + // special-case the form `let x = E in x` + // not doing so will drop the `hole` + match &cont.value { + roc_can::expr::Expr::Var(original) if *original == symbol => { + return with_hole( + env, + def.loc_expr.value, + procs, + layout_cache, + assigned, + hole, + ); + } + _ => {} } - with_hole( - env, - def.loc_expr.value, - procs, - layout_cache, - symbol, - env.arena.alloc(stmt), - ) + // continue with the default path + let mut stmt = with_hole(env, cont.value, procs, layout_cache, assigned, hole); + + // a variable is aliased + if let roc_can::expr::Expr::Var(original) = def.loc_expr.value { + substitute_in_exprs(env.arena, &mut stmt, symbol, original); + + stmt + } else { + with_hole( + env, + def.loc_expr.value, + procs, + layout_cache, + symbol, + env.arena.alloc(stmt), + ) + } } else { // this may be a destructure pattern let mono_pattern = from_can_pattern(env, layout_cache, &def.loc_pattern.value); @@ -1468,14 +1752,12 @@ pub fn with_hole<'a>( if procs.module_thunks.contains(&symbol) { let partial_proc = procs.partial_procs.get(&symbol).unwrap(); let fn_var = partial_proc.annotation; - let ret_var = fn_var; // These are the same for a thunk. // This is a top-level declaration, which will code gen to a 0-arity thunk. let result = call_by_name( env, procs, fn_var, - ret_var, symbol, std::vec::Vec::new(), layout_cache, @@ -1959,7 +2241,168 @@ pub fn with_hole<'a>( stmt } - Accessor { .. } | Update { .. } => todo!("record access/accessor/update"), + Accessor { + function_var, + record_var, + closure_var: _, + ext_var, + field_var, + field, + } => { + // IDEA: convert accessor fromt + // + // .foo + // + // into + // + // (\r -> r.foo) + let record_symbol = env.unique_symbol(); + let body = roc_can::expr::Expr::Access { + record_var, + ext_var, + field_var, + loc_expr: Box::new(Located::at_zero(roc_can::expr::Expr::Var(record_symbol))), + field, + }; + + let loc_body = Located::at_zero(body); + + let name = env.unique_symbol(); + + let arguments = vec![( + record_var, + Located::at_zero(roc_can::pattern::Pattern::Identifier(record_symbol)), + )]; + + match procs.insert_anonymous( + env, + name, + function_var, + arguments, + loc_body, + field_var, + layout_cache, + ) { + Ok(layout) => { + // TODO should the let have layout Pointer? + Stmt::Let( + assigned, + Expr::FunctionPointer(name, layout.clone()), + layout, + hole, + ) + } + + Err(_error) => Stmt::RuntimeError( + "TODO convert anonymous function error to a RuntimeError string", + ), + } + } + + Update { + record_var, + symbol: structure, + updates, + .. + } => { + use FieldType::*; + + enum FieldType<'a> { + CopyExisting(u64), + UpdateExisting(&'a roc_can::expr::Field), + }; + + // Strategy: turn a record update into the creation of a new record. + // This has the benefit that we don't need to do anything special for reference + // counting + + let sorted_fields = crate::layout::sort_record_fields(env.arena, record_var, env.subs); + + let mut field_layouts = Vec::with_capacity_in(sorted_fields.len(), env.arena); + + let mut symbols = Vec::with_capacity_in(sorted_fields.len(), env.arena); + let mut fields = Vec::with_capacity_in(sorted_fields.len(), env.arena); + + let mut current = 0; + for (label, opt_field_layout) in sorted_fields.into_iter() { + match opt_field_layout { + Err(_) => { + debug_assert!(!updates.contains_key(&label)); + // this was an optional field, and now does not exist! + // do not increment `current`! + } + Ok(field_layout) => { + field_layouts.push(field_layout); + + if let Some(field) = updates.get(&label) { + // TODO + let field_symbol = + possible_reuse_symbol(env, procs, &field.loc_expr.value); + + fields.push(UpdateExisting(field)); + symbols.push(field_symbol); + } else { + fields.push(CopyExisting(current)); + symbols.push(env.unique_symbol()); + } + + current += 1; + } + } + } + let symbols = symbols.into_bump_slice(); + + let record_layout = layout_cache + .from_var(env.arena, record_var, env.subs) + .unwrap_or_else(|err| panic!("TODO turn fn_var into a RuntimeError {:?}", err)); + + let field_layouts = match &record_layout { + Layout::Struct(layouts) => *layouts, + other => arena.alloc([other.clone()]), + }; + + let wrapped = if field_layouts.len() == 1 { + Wrapped::SingleElementRecord + } else { + Wrapped::RecordOrSingleTagUnion + }; + + let expr = Expr::Struct(symbols); + let mut stmt = Stmt::Let(assigned, expr, record_layout, hole); + + let it = field_layouts.iter().zip(symbols.iter()).zip(fields); + for ((field_layout, symbol), what_to_do) in it { + match what_to_do { + UpdateExisting(field) => { + stmt = assign_to_symbol( + env, + procs, + layout_cache, + field.var, + *field.loc_expr.clone(), + *symbol, + stmt, + ); + } + CopyExisting(index) => { + let access_expr = Expr::AccessAtIndex { + structure, + index, + field_layouts, + wrapped, + }; + stmt = Stmt::Let( + *symbol, + access_expr, + field_layout.clone(), + arena.alloc(stmt), + ); + } + } + } + + stmt + } Closure { function_type, @@ -2004,21 +2447,22 @@ pub fn with_hole<'a>( // So we check the function name against the list of partial procedures, // the procedures that we have lifted to the top-level and can call by name // if it's in there, it's a call by name, otherwise it's a call by pointer - let known_functions = &procs.partial_procs; + let is_known = |key| { + // a proc in this module, or an imported symbol + procs.partial_procs.contains_key(key) || key.module_id() != assigned.module_id() + }; + match loc_expr.value { - roc_can::expr::Expr::Var(proc_name) if known_functions.contains_key(&proc_name) => { - call_by_name( - env, - procs, - fn_var, - ret_var, - proc_name, - loc_args, - layout_cache, - assigned, - hole, - ) - } + roc_can::expr::Expr::Var(proc_name) if is_known(&proc_name) => call_by_name( + env, + procs, + fn_var, + proc_name, + loc_args, + layout_cache, + assigned, + hole, + ), _ => { // Call by pointer - the closure was anonymous, e.g. // @@ -2266,7 +2710,7 @@ pub fn from_can<'a>( from_can(env, cont.value, procs, layout_cache) } - LetNonRec(def, cont, _, _) => { + LetNonRec(def, cont, outer_pattern_vars, outer_annotation) => { if let roc_can::pattern::Pattern::Identifier(symbol) = &def.loc_pattern.value { if let Closure { .. } = &def.loc_expr.value { // Now that we know for sure it's a closure, get an owned @@ -2304,15 +2748,131 @@ pub fn from_can<'a>( _ => unreachable!(), } } - let rest = from_can(env, cont.value, procs, layout_cache); - return with_hole( - env, - def.loc_expr.value, - procs, - layout_cache, - *symbol, - env.arena.alloc(rest), - ); + + match def.loc_expr.value { + roc_can::expr::Expr::Var(original) => { + let mut rest = from_can(env, cont.value, procs, layout_cache); + // a variable is aliased + substitute_in_exprs(env.arena, &mut rest, *symbol, original); + + return rest; + } + roc_can::expr::Expr::LetNonRec( + nested_def, + nested_cont, + nested_pattern_vars, + nested_annotation, + ) => { + use roc_can::expr::Expr::*; + // We must transform + // + // let answer = 1337 + // in + // let unused = + // let nested = 17 + // in + // nested + // in + // answer + // + // into + // + // let answer = 1337 + // in + // let nested = 17 + // in + // let unused = nested + // in + // answer + + let new_def = roc_can::def::Def { + loc_pattern: def.loc_pattern, + loc_expr: *nested_cont, + pattern_vars: def.pattern_vars, + annotation: def.annotation, + expr_var: def.expr_var, + }; + + let new_inner = LetNonRec( + Box::new(new_def), + cont, + outer_pattern_vars, + outer_annotation, + ); + + let new_outer = LetNonRec( + nested_def, + Box::new(Located::at_zero(new_inner)), + nested_pattern_vars, + nested_annotation, + ); + + return from_can(env, new_outer, procs, layout_cache); + } + roc_can::expr::Expr::LetRec( + nested_defs, + nested_cont, + nested_pattern_vars, + nested_annotation, + ) => { + use roc_can::expr::Expr::*; + // We must transform + // + // let answer = 1337 + // in + // let unused = + // let nested = \{} -> nested {} + // in + // nested + // in + // answer + // + // into + // + // let answer = 1337 + // in + // let nested = \{} -> nested {} + // in + // let unused = nested + // in + // answer + + let new_def = roc_can::def::Def { + loc_pattern: def.loc_pattern, + loc_expr: *nested_cont, + pattern_vars: def.pattern_vars, + annotation: def.annotation, + expr_var: def.expr_var, + }; + + let new_inner = LetNonRec( + Box::new(new_def), + cont, + outer_pattern_vars, + outer_annotation, + ); + + let new_outer = LetRec( + nested_defs, + Box::new(Located::at_zero(new_inner)), + nested_pattern_vars, + nested_annotation, + ); + + return from_can(env, new_outer, procs, layout_cache); + } + _ => { + let rest = from_can(env, cont.value, procs, layout_cache); + return with_hole( + env, + def.loc_expr.value, + procs, + layout_cache, + *symbol, + env.arena.alloc(rest), + ); + } + } } // this may be a destructure pattern @@ -2322,6 +2882,7 @@ pub fn from_can<'a>( let hole = env .arena .alloc(from_can(env, cont.value, procs, layout_cache)); + with_hole(env, def.loc_expr.value, procs, layout_cache, symbol, hole) } else { let context = crate::exhaustive::Context::BadDestruct; @@ -2437,7 +2998,7 @@ fn to_opt_branches<'a>( env.problems.push(MonoProblem::PatternProblem(error)) } - overlapping_branches.sort(); + overlapping_branches.sort_unstable(); for i in overlapping_branches.into_iter().rev() { opt_branches.remove(i); @@ -3186,13 +3747,14 @@ fn call_by_name<'a>( env: &mut Env<'a, '_>, procs: &mut Procs<'a>, fn_var: Variable, - ret_var: Variable, proc_name: Symbol, loc_args: std::vec::Vec<(Variable, Located)>, layout_cache: &mut LayoutCache<'a>, assigned: Symbol, hole: &'a Stmt<'a>, ) -> Stmt<'a> { + let original_fn_var = fn_var; + // Register a pending_specialization for this function match layout_cache.from_var(env.arena, fn_var, env.subs) { Ok(layout) => { @@ -3249,11 +3811,7 @@ fn call_by_name<'a>( let iter = loc_args.into_iter().rev().zip(field_symbols.iter().rev()); assign_to_symbols(env, procs, layout_cache, iter, result) } else { - let pending = PendingSpecialization { - pattern_vars, - ret_var, - fn_var, - }; + let pending = PendingSpecialization::from_var(env.subs, fn_var); // When requested (that is, when procs.pending_specializations is `Some`), // store a pending specialization rather than specializing immediately. @@ -3312,16 +3870,22 @@ fn call_by_name<'a>( pending, partial_proc, ) { - Ok(proc) => { - procs - .specialized - .insert((proc_name, full_layout.clone()), Done(proc)); + Ok((proc, layout)) => { + debug_assert_eq!(full_layout, layout); + let function_layout = FunctionLayouts::from_layout(layout); + + procs.specialized.remove(&(proc_name, full_layout)); + + procs.specialized.insert( + (proc_name, function_layout.full.clone()), + Done(proc), + ); let call = Expr::FunctionCall { call_type: CallType::ByName(proc_name), - ret_layout: ret_layout.clone(), - full_layout: full_layout.clone(), - arg_layouts, + ret_layout: function_layout.result.clone(), + full_layout: function_layout.full, + arg_layouts: function_layout.arguments, args: field_symbols, }; @@ -3331,7 +3895,7 @@ fn call_by_name<'a>( .zip(field_symbols.iter().rev()); let result = - Stmt::Let(assigned, call, ret_layout.clone(), hole); + Stmt::Let(assigned, call, function_layout.result, hole); assign_to_symbols(env, procs, layout_cache, iter, result) } @@ -3343,11 +3907,44 @@ fn call_by_name<'a>( procs.runtime_errors.insert(proc_name, error_msg); - Stmt::RuntimeError(error_msg) + panic!(); + // Stmt::RuntimeError(error_msg) } } } + None if assigned.module_id() != proc_name.module_id() => { + let fn_var = original_fn_var; + + // call of a function that is not not in this module + use std::collections::hash_map::Entry::{Occupied, Vacant}; + + let existing = + match procs.externals_we_need.entry(proc_name.module_id()) { + Vacant(entry) => { + entry.insert(ExternalSpecializations::default()) + } + Occupied(entry) => entry.into_mut(), + }; + + let solved_type = SolvedType::from_var(env.subs, fn_var); + existing.insert(proc_name, solved_type); + + let call = Expr::FunctionCall { + call_type: CallType::ByName(proc_name), + ret_layout: ret_layout.clone(), + full_layout: full_layout.clone(), + arg_layouts, + args: field_symbols, + }; + + let iter = + loc_args.into_iter().rev().zip(field_symbols.iter().rev()); + + let result = Stmt::Let(assigned, call, ret_layout.clone(), hole); + assign_to_symbols(env, procs, layout_cache, iter, result) + } + None => { // This must have been a runtime error. match procs.runtime_errors.get(&proc_name) { diff --git a/compiler/mono/src/layout.rs b/compiler/mono/src/layout.rs index 94233d33e7..6c4b438b39 100644 --- a/compiler/mono/src/layout.rs +++ b/compiler/mono/src/layout.rs @@ -226,9 +226,51 @@ impl<'a> Layout<'a> { } /// Avoid recomputing Layout from Variable multiple times. -#[derive(Default)] +/// We use `ena` for easy snapshots and rollbacks of the cache. +/// During specialization, a type variable `a` can be specialized to different layouts, +/// e.g. `identity : a -> a` could be specialized to `Bool -> Bool` or `Str -> Str`. +/// Therefore in general it's invalid to store a map from variables to layouts +/// But if we're careful when to invalidate certain keys, we still get some benefit +#[derive(Default, Debug)] pub struct LayoutCache<'a> { - layouts: MutMap, LayoutProblem>>, + layouts: ven_ena::unify::UnificationTable>>, +} + +#[derive(Debug, Clone)] +pub enum CachedLayout<'a> { + Cached(Layout<'a>), + NotCached, + Problem(LayoutProblem), +} + +/// Must wrap so we can define a specific UnifyKey instance +/// PhantomData so we can store the 'a lifetime, which is needed to implement the UnifyKey trait, +/// specifically so we can use `type Value = CachedLayout<'a>` +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub struct CachedVariable<'a>(Variable, std::marker::PhantomData<&'a ()>); + +impl<'a> CachedVariable<'a> { + fn new(var: Variable) -> Self { + CachedVariable(var, std::marker::PhantomData) + } +} + +// use ven_ena::unify::{InPlace, Snapshot, UnificationTable, UnifyKey}; + +impl<'a> ven_ena::unify::UnifyKey for CachedVariable<'a> { + type Value = CachedLayout<'a>; + + fn index(&self) -> u32 { + self.0.index() + } + + fn from_index(index: u32) -> Self { + CachedVariable(Variable::from_index(index), std::marker::PhantomData) + } + + fn tag() -> &'static str { + "CachedVariable" + } } impl<'a> LayoutCache<'a> { @@ -244,16 +286,60 @@ impl<'a> LayoutCache<'a> { // Store things according to the root Variable, to avoid duplicate work. let var = subs.get_root_key_without_compacting(var); - let mut env = Env { - arena, - subs, - seen: MutSet::default(), - }; + let cached_var = CachedVariable::new(var); - self.layouts - .entry(var) - .or_insert_with(|| Layout::from_var(&mut env, var)) - .clone() + self.expand_to_fit(cached_var); + + use CachedLayout::*; + match self.layouts.probe_value(cached_var) { + Cached(result) => Ok(result), + Problem(problem) => Err(problem), + NotCached => { + let mut env = Env { + arena, + subs, + seen: MutSet::default(), + }; + + let result = Layout::from_var(&mut env, var); + + let cached_layout = match &result { + Ok(layout) => Cached(layout.clone()), + Err(problem) => Problem(problem.clone()), + }; + + self.layouts + .update_value(cached_var, |existing| existing.value = cached_layout); + + result + } + } + } + + fn expand_to_fit(&mut self, var: CachedVariable<'a>) { + use ven_ena::unify::UnifyKey; + + let required = (var.index() as isize) - (self.layouts.len() as isize) + 1; + if required > 0 { + self.layouts.reserve(required as usize); + + for _ in 0..required { + self.layouts.new_key(CachedLayout::NotCached); + } + } + } + + pub fn snapshot( + &mut self, + ) -> ven_ena::unify::Snapshot>> { + self.layouts.snapshot() + } + + pub fn rollback_to( + &mut self, + snapshot: ven_ena::unify::Snapshot>>, + ) { + self.layouts.rollback_to(snapshot) } } @@ -370,7 +456,7 @@ fn layout_from_flat_type<'a>( // correct the memory mode of unique lists match Layout::from_var(env, wrapped_var)? { - Layout::Builtin(Builtin::List(_, elem_layout)) => { + Layout::Builtin(Builtin::List(_ignored, elem_layout)) => { let uniqueness_var = args[0]; let uniqueness_content = subs.get_without_compacting(uniqueness_var).content; @@ -406,15 +492,19 @@ fn layout_from_flat_type<'a>( )) } Record(fields, ext_var) => { - debug_assert!(ext_var_is_empty_record(subs, ext_var)); - // Sort the fields by label let mut sorted_fields = Vec::with_capacity_in(fields.len(), arena); + sorted_fields.extend(fields.into_iter()); - for tuple in fields { - sorted_fields.push(tuple); + // extract any values from the ext_var + let mut fields_map = MutMap::default(); + match roc_types::pretty_print::chase_ext_record(subs, ext_var, &mut fields_map) { + Ok(()) | Err((_, Content::FlexVar(_))) => {} + Err(_) => unreachable!("this would have been a type error"), } + sorted_fields.extend(fields_map.into_iter()); + sorted_fields.sort_by(|(label1, _), (label2, _)| label1.cmp(label2)); // Determine the layouts of the fields, maintaining sort order @@ -781,22 +871,6 @@ fn ext_var_is_empty_tag_union(_: &Subs, _: Variable) -> bool { unreachable!(); } -#[cfg(debug_assertions)] -fn ext_var_is_empty_record(subs: &Subs, ext_var: Variable) -> bool { - // the ext_var is empty - let mut ext_fields = MutMap::default(); - match roc_types::pretty_print::chase_ext_record(subs, ext_var, &mut ext_fields) { - Ok(()) | Err((_, Content::FlexVar(_))) => ext_fields.is_empty(), - Err((_, content)) => panic!("invalid content in ext_var: {:?}", content), - } -} - -#[cfg(not(debug_assertions))] -fn ext_var_is_empty_record(_: &Subs, _: Variable) -> bool { - // This should only ever be used in debug_assert! macros - unreachable!(); -} - fn layout_from_num_content<'a>(content: Content) -> Result, LayoutProblem> { use roc_types::subs::Content::*; use roc_types::subs::FlatType::*; diff --git a/compiler/mono/tests/test_mono.rs b/compiler/mono/tests/test_mono.rs index 8e9b7e41c5..b5d15eb69e 100644 --- a/compiler/mono/tests/test_mono.rs +++ b/compiler/mono/tests/test_mono.rs @@ -875,7 +875,8 @@ mod test_mono { indoc!( r#" let Test.0 = 5i64; - ret Test.0; + let Test.2 = 3i64; + ret Test.2; "# ), ); @@ -1751,14 +1752,14 @@ mod test_mono { nonEmpty : List Int nonEmpty = [ 1, 1, -4, 1, 2 ] - - + + greaterThanOne : Int -> Bool greaterThanOne = \i -> i > 0 - + List.map nonEmpty greaterThanOne - + main {} "# ), @@ -1920,4 +1921,149 @@ mod test_mono { ), ) } + + #[test] + fn rigids() { + compiles_to_ir( + indoc!( + r#" + swap : Int, Int, List a -> List a + swap = \i, j, list -> + when Pair (List.get list i) (List.get list j) is + Pair (Ok atI) (Ok atJ) -> + foo = atJ + + list + |> List.set i foo + |> List.set j atI + + _ -> + [] + + swap 0 0 [0x1] + "# + ), + indoc!( + r#" + procedure List.3 (#Attr.2, #Attr.3): + let Test.43 = lowlevel ListLen #Attr.2; + let Test.39 = lowlevel NumLt #Attr.3 Test.43; + if Test.39 then + let Test.41 = 1i64; + let Test.42 = lowlevel ListGetUnsafe #Attr.2 #Attr.3; + let Test.40 = Ok Test.41 Test.42; + ret Test.40; + else + let Test.37 = 0i64; + let Test.38 = Struct {}; + let Test.36 = Err Test.37 Test.38; + ret Test.36; + + procedure List.4 (#Attr.2, #Attr.3, #Attr.4): + let Test.19 = lowlevel ListLen #Attr.2; + let Test.17 = lowlevel NumLt #Attr.3 Test.19; + if Test.17 then + let Test.18 = lowlevel ListSet #Attr.2 #Attr.3 #Attr.4; + ret Test.18; + else + ret #Attr.2; + + procedure Test.0 (Test.2, Test.3, Test.4): + let Test.34 = CallByName List.3 Test.4 Test.2; + let Test.35 = CallByName List.3 Test.4 Test.3; + let Test.13 = Struct {Test.34, Test.35}; + let Test.24 = true; + let Test.26 = 1i64; + let Test.25 = Index 0 Test.13; + let Test.27 = Index 0 Test.25; + let Test.33 = lowlevel Eq Test.26 Test.27; + let Test.31 = lowlevel And Test.33 Test.24; + let Test.29 = 1i64; + let Test.28 = Index 1 Test.13; + let Test.30 = Index 0 Test.28; + let Test.32 = lowlevel Eq Test.29 Test.30; + let Test.23 = lowlevel And Test.32 Test.31; + if Test.23 then + let Test.21 = Index 0 Test.13; + let Test.5 = Index 1 Test.21; + let Test.20 = Index 1 Test.13; + let Test.6 = Index 1 Test.20; + let Test.15 = CallByName List.4 Test.4 Test.2 Test.6; + let Test.14 = CallByName List.4 Test.15 Test.3 Test.5; + ret Test.14; + else + dec Test.4; + let Test.22 = Array []; + ret Test.22; + + let Test.9 = 0i64; + let Test.10 = 0i64; + let Test.12 = 1i64; + let Test.11 = Array [Test.12]; + let Test.8 = CallByName Test.0 Test.9 Test.10 Test.11; + ret Test.8; + "# + ), + ) + } + + #[test] + fn let_x_in_x() { + compiles_to_ir( + indoc!( + r#" + x = 5 + + answer = + 1337 + + unused = + nested = 17 + nested + + answer + "# + ), + indoc!( + r#" + let Test.1 = 1337i64; + let Test.0 = 5i64; + let Test.3 = 17i64; + ret Test.1; + "# + ), + ) + } + + #[test] + fn let_x_in_x_indirect() { + compiles_to_ir( + indoc!( + r#" + x = 5 + + answer = + 1337 + + unused = + nested = 17 + + i = 1 + + nested + + answer + "# + ), + indoc!( + r#" + let Test.1 = 1337i64; + let Test.0 = 5i64; + let Test.3 = 17i64; + let Test.4 = 1i64; + ret Test.1; + "# + ), + ) + } } diff --git a/compiler/reporting/tests/test_reporting.rs b/compiler/reporting/tests/test_reporting.rs index 900e4f980e..c93d6ced43 100644 --- a/compiler/reporting/tests/test_reporting.rs +++ b/compiler/reporting/tests/test_reporting.rs @@ -10,21 +10,21 @@ mod helpers; #[cfg(test)] mod test_reporting { use crate::helpers::test_home; + use crate::helpers::{can_expr, infer_expr, CanExprOut, ParseErrOut}; use bumpalo::Bump; use roc_module::symbol::{Interns, ModuleId}; use roc_mono::ir::{Procs, Stmt}; + use roc_mono::layout::LayoutCache; use roc_reporting::report::{ can_problem, mono_problem, parse_problem, type_problem, Report, BLUE_CODE, BOLD_CODE, CYAN_CODE, DEFAULT_PALETTE, GREEN_CODE, MAGENTA_CODE, RED_CODE, RESET_CODE, UNDERLINE_CODE, WHITE_CODE, YELLOW_CODE, }; + use roc_reporting::report::{RocDocAllocator, RocDocBuilder}; + use roc_solve::solve; use roc_types::pretty_print::name_all_type_vars; use roc_types::subs::Subs; use std::path::PathBuf; - // use roc_region::all; - use crate::helpers::{can_expr, infer_expr, CanExprOut, ParseErrOut}; - use roc_reporting::report::{RocDocAllocator, RocDocBuilder}; - use roc_solve::solve; fn filename_from_string(str: &str) -> PathBuf { let mut filename = PathBuf::new(); @@ -87,6 +87,7 @@ mod test_reporting { let mut ident_ids = interns.all_ident_ids.remove(&home).unwrap(); // Populate Procs and Subs, and get the low-level Expr from the canonical Expr + let mut layout_cache = LayoutCache::default(); let mut mono_env = roc_mono::ir::Env { arena: &arena, subs: &mut subs, @@ -94,7 +95,8 @@ mod test_reporting { home, ident_ids: &mut ident_ids, }; - let _mono_expr = Stmt::new(&mut mono_env, loc_expr.value, &mut procs); + let _mono_expr = + Stmt::new(&mut mono_env, loc_expr.value, &mut procs, &mut layout_cache); } Ok((unify_problems, can_problems, mono_problems, home, interns)) diff --git a/compiler/solve/src/solve.rs b/compiler/solve/src/solve.rs index cbfaadad2c..7d67533562 100644 --- a/compiler/solve/src/solve.rs +++ b/compiler/solve/src/solve.rs @@ -497,22 +497,35 @@ fn solve( let visit_mark = young_mark.next(); let final_mark = visit_mark.next(); - debug_assert!({ - let offenders = next_pools - .get(next_rank) - .iter() - .filter(|var| { - subs.get_without_compacting(roc_types::subs::Variable::clone( - var, - )) - .rank - .into_usize() - > next_rank.into_usize() - }) - .collect::>(); + debug_assert_eq!( + { + let offenders = next_pools + .get(next_rank) + .iter() + .filter(|var| { + let current = subs.get_without_compacting( + roc_types::subs::Variable::clone(var), + ); - offenders.is_empty() - }); + current.rank.into_usize() > next_rank.into_usize() + }) + .collect::>(); + + let result = offenders.len(); + + if result > 0 { + dbg!( + &subs, + &offenders, + &let_con.def_types, + &let_con.def_aliases + ); + } + + result + }, + 0 + ); // pop pool generalize(subs, young_mark, visit_mark, next_rank, next_pools); @@ -596,6 +609,16 @@ fn type_to_var( type_to_variable(subs, rank, pools, cached, typ) } +/// Abusing existing functions for our purposes +/// this is to put a solved type back into subs +pub fn insert_type_into_subs(subs: &mut Subs, typ: &Type) -> Variable { + let rank = Rank::NONE; + let mut pools = Pools::default(); + let mut cached = MutMap::default(); + + type_to_variable(subs, rank, &mut pools, &mut cached, typ) +} + fn type_to_variable( subs: &mut Subs, rank: Rank, @@ -792,10 +815,10 @@ fn check_for_infinite_type( ) { let var = loc_var.value; - let is_uniq_infer = match subs.get(var).content { - Content::Alias(Symbol::ATTR_ATTR, _, _) => true, - _ => false, - }; + let is_uniq_infer = matches!( + subs.get(var).content, + Content::Alias(Symbol::ATTR_ATTR, _, _) + ); while let Some((recursive, chain)) = subs.occurs(var) { let description = subs.get(recursive); @@ -1210,6 +1233,184 @@ fn introduce(subs: &mut Subs, rank: Rank, pools: &mut Pools, vars: &[Variable]) pool.extend(vars); } +/// Function that converts rigids variables to flex variables +/// this is used during the monomorphization process +pub fn instantiate_rigids(subs: &mut Subs, var: Variable) { + let rank = Rank::NONE; + let mut pools = Pools::default(); + + instantiate_rigids_help(subs, rank, &mut pools, var); +} + +fn instantiate_rigids_help( + subs: &mut Subs, + max_rank: Rank, + pools: &mut Pools, + var: Variable, +) -> Variable { + use roc_types::subs::Content::*; + use roc_types::subs::FlatType::*; + + let desc = subs.get_without_compacting(var); + + if let Some(copy) = desc.copy.into_variable() { + return copy; + } + + let make_descriptor = |content| Descriptor { + content, + rank: max_rank, + mark: Mark::NONE, + copy: OptVariable::NONE, + }; + + let content = desc.content; + let copy = var; + + pools.get_mut(max_rank).push(copy); + + // Link the original variable to the new variable. This lets us + // avoid making multiple copies of the variable we are instantiating. + // + // Need to do this before recursively copying to avoid looping. + subs.set( + var, + Descriptor { + content: content.clone(), + rank: desc.rank, + mark: Mark::NONE, + copy: copy.into(), + }, + ); + + // Now we recursively copy the content of the variable. + // We have already marked the variable as copied, so we + // will not repeat this work or crawl this variable again. + match content { + Structure(flat_type) => { + let new_flat_type = match flat_type { + Apply(symbol, args) => { + let args = args + .into_iter() + .map(|var| instantiate_rigids_help(subs, max_rank, pools, var)) + .collect(); + + Apply(symbol, args) + } + + Func(arg_vars, closure_var, ret_var) => { + let new_ret_var = instantiate_rigids_help(subs, max_rank, pools, ret_var); + let new_closure_var = + instantiate_rigids_help(subs, max_rank, pools, closure_var); + let arg_vars = arg_vars + .into_iter() + .map(|var| instantiate_rigids_help(subs, max_rank, pools, var)) + .collect(); + + Func(arg_vars, new_closure_var, new_ret_var) + } + + same @ EmptyRecord | same @ EmptyTagUnion | same @ Erroneous(_) => same, + + Record(fields, ext_var) => { + let mut new_fields = MutMap::default(); + + for (label, field) in fields { + use RecordField::*; + + let new_field = match field { + Demanded(var) => { + Demanded(instantiate_rigids_help(subs, max_rank, pools, var)) + } + Required(var) => { + Required(instantiate_rigids_help(subs, max_rank, pools, var)) + } + Optional(var) => { + Optional(instantiate_rigids_help(subs, max_rank, pools, var)) + } + }; + + new_fields.insert(label, new_field); + } + + Record( + new_fields, + instantiate_rigids_help(subs, max_rank, pools, ext_var), + ) + } + + TagUnion(tags, ext_var) => { + let mut new_tags = MutMap::default(); + + for (tag, vars) in tags { + let new_vars: Vec = vars + .into_iter() + .map(|var| instantiate_rigids_help(subs, max_rank, pools, var)) + .collect(); + new_tags.insert(tag, new_vars); + } + + TagUnion( + new_tags, + instantiate_rigids_help(subs, max_rank, pools, ext_var), + ) + } + + RecursiveTagUnion(rec_var, tags, ext_var) => { + let mut new_tags = MutMap::default(); + + let new_rec_var = instantiate_rigids_help(subs, max_rank, pools, rec_var); + + for (tag, vars) in tags { + let new_vars: Vec = vars + .into_iter() + .map(|var| instantiate_rigids_help(subs, max_rank, pools, var)) + .collect(); + new_tags.insert(tag, new_vars); + } + + RecursiveTagUnion( + new_rec_var, + new_tags, + instantiate_rigids_help(subs, max_rank, pools, ext_var), + ) + } + + Boolean(b) => { + let mut mapper = |var| instantiate_rigids_help(subs, max_rank, pools, var); + + Boolean(b.map_variables(&mut mapper)) + } + }; + + subs.set(copy, make_descriptor(Structure(new_flat_type))); + + copy + } + + FlexVar(_) | Error => copy, + + RigidVar(name) => { + subs.set(copy, make_descriptor(FlexVar(Some(name)))); + + copy + } + + Alias(symbol, args, real_type_var) => { + let new_args = args + .into_iter() + .map(|(name, var)| (name, instantiate_rigids_help(subs, max_rank, pools, var))) + .collect(); + let new_real_type_var = instantiate_rigids_help(subs, max_rank, pools, real_type_var); + let new_content = Alias(symbol, new_args, new_real_type_var); + + subs.set(copy, make_descriptor(new_content)); + + copy + } + } +} + fn deep_copy_var(subs: &mut Subs, rank: Rank, pools: &mut Pools, var: Variable) -> Variable { let copy = deep_copy_var_help(subs, rank, pools, var); diff --git a/compiler/types/src/boolean_algebra.rs b/compiler/types/src/boolean_algebra.rs index 97a0e78a5c..9e79804e18 100644 --- a/compiler/types/src/boolean_algebra.rs +++ b/compiler/types/src/boolean_algebra.rs @@ -52,10 +52,10 @@ pub enum Bool { } pub fn var_is_shared(subs: &Subs, var: Variable) -> bool { - match subs.get_without_compacting(var).content { - Content::Structure(FlatType::Boolean(Bool::Shared)) => true, - _ => false, - } + matches!( + subs.get_without_compacting(var).content, + Content::Structure(FlatType::Boolean(Bool::Shared)) + ) } /// Given the Subs @@ -163,10 +163,7 @@ impl Bool { } pub fn is_unique(&self, subs: &Subs) -> bool { - match self.simplify(subs) { - Shared => false, - _ => true, - } + !matches!(self.simplify(subs), Shared) } pub fn variables(&self) -> SendSet { diff --git a/compiler/types/src/solved_types.rs b/compiler/types/src/solved_types.rs index 4957a53596..81f13895a1 100644 --- a/compiler/types/src/solved_types.rs +++ b/compiler/types/src/solved_types.rs @@ -1,6 +1,7 @@ use crate::boolean_algebra; use crate::subs::{FlatType, Subs, VarId, Variable}; use crate::types::{Problem, RecordField, Type}; +use roc_collections::all::MutSet; use roc_module::ident::{Lowercase, TagName}; use roc_module::symbol::Symbol; use roc_region::all::{Located, Region}; @@ -15,13 +16,17 @@ impl Solved { &self.0 } + pub fn inner_mut(&mut self) -> &'_ mut T { + &mut self.0 + } + pub fn into_inner(self) -> T { self.0 } } /// This is a fully solved type, with no Variables remaining in it. -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum SolvedType { /// A function. The types of its arguments, then the type of its return value. Func(Vec, Box, Box), @@ -55,7 +60,7 @@ pub enum SolvedType { Error, } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum SolvedBool { SolvedShared, SolvedContainer(VarId, Vec), @@ -68,10 +73,13 @@ impl SolvedBool { match boolean { Bool::Shared => SolvedBool::SolvedShared, Bool::Container(cvar, mvars) => { - debug_assert!(matches!( - subs.get_without_compacting(*cvar).content, - crate::subs::Content::FlexVar(_) - )); + match subs.get_without_compacting(*cvar).content { + crate::subs::Content::FlexVar(_) => {} + crate::subs::Content::Structure(FlatType::Boolean(Bool::Shared)) => { + return SolvedBool::SolvedShared; + } + other => panic!("Container var is not flex but {:?}", other), + } SolvedBool::SolvedContainer( VarId::from_var(*cvar, subs), @@ -193,21 +201,32 @@ impl SolvedType { } } - fn from_var(subs: &Subs, var: Variable) -> Self { + pub fn from_var(subs: &Subs, var: Variable) -> Self { + let mut seen = RecursionVars::default(); + Self::from_var_help(subs, &mut seen, var) + } + + fn from_var_help(subs: &Subs, recursion_vars: &mut RecursionVars, var: Variable) -> Self { use crate::subs::Content::*; + // if this is a recursion var we've seen before, just generate a Flex + // (not doing so would have this function loop forever) + if recursion_vars.contains(subs, var) { + return SolvedType::Flex(VarId::from_var(var, subs)); + } + match subs.get_without_compacting(var).content { FlexVar(_) => SolvedType::Flex(VarId::from_var(var, subs)), RigidVar(name) => SolvedType::Rigid(name), - Structure(flat_type) => Self::from_flat_type(subs, flat_type), + Structure(flat_type) => Self::from_flat_type(subs, recursion_vars, flat_type), Alias(symbol, args, actual_var) => { let mut new_args = Vec::with_capacity(args.len()); for (arg_name, arg_var) in args { - new_args.push((arg_name, Self::from_var(subs, arg_var))); + new_args.push((arg_name, Self::from_var_help(subs, recursion_vars, arg_var))); } - let aliased_to = Self::from_var(subs, actual_var); + let aliased_to = Self::from_var_help(subs, recursion_vars, actual_var); SolvedType::Alias(symbol, new_args, Box::new(aliased_to)) } @@ -215,15 +234,19 @@ impl SolvedType { } } - fn from_flat_type(subs: &Subs, flat_type: FlatType) -> Self { + fn from_flat_type( + subs: &Subs, + recursion_vars: &mut RecursionVars, + flat_type: FlatType, + ) -> Self { use crate::subs::FlatType::*; match flat_type { Apply(symbol, args) => { let mut new_args = Vec::with_capacity(args.len()); - for var in args { - new_args.push(Self::from_var(subs, var)); + for var in args.iter().copied() { + new_args.push(Self::from_var_help(subs, recursion_vars, var)); } SolvedType::Apply(symbol, new_args) @@ -232,11 +255,11 @@ impl SolvedType { let mut new_args = Vec::with_capacity(args.len()); for var in args { - new_args.push(Self::from_var(subs, var)); + new_args.push(Self::from_var_help(subs, recursion_vars, var)); } - let ret = Self::from_var(subs, ret); - let closure = Self::from_var(subs, closure); + let ret = Self::from_var_help(subs, recursion_vars, ret); + let closure = Self::from_var_help(subs, recursion_vars, closure); SolvedType::Func(new_args, Box::new(closure), Box::new(ret)) } @@ -247,15 +270,15 @@ impl SolvedType { use RecordField::*; let solved_type = match field { - Optional(var) => Optional(Self::from_var(subs, var)), - Required(var) => Required(Self::from_var(subs, var)), - Demanded(var) => Demanded(Self::from_var(subs, var)), + Optional(var) => Optional(Self::from_var_help(subs, recursion_vars, var)), + Required(var) => Required(Self::from_var_help(subs, recursion_vars, var)), + Demanded(var) => Demanded(Self::from_var_help(subs, recursion_vars, var)), }; new_fields.push((label, solved_type)); } - let ext = Self::from_var(subs, ext_var); + let ext = Self::from_var_help(subs, recursion_vars, ext_var); SolvedType::Record { fields: new_fields, @@ -269,30 +292,32 @@ impl SolvedType { let mut new_args = Vec::with_capacity(args.len()); for var in args { - new_args.push(Self::from_var(subs, var)); + new_args.push(Self::from_var_help(subs, recursion_vars, var)); } new_tags.push((tag_name, new_args)); } - let ext = Self::from_var(subs, ext_var); + let ext = Self::from_var_help(subs, recursion_vars, ext_var); SolvedType::TagUnion(new_tags, Box::new(ext)) } RecursiveTagUnion(rec_var, tags, ext_var) => { + recursion_vars.insert(subs, rec_var); + let mut new_tags = Vec::with_capacity(tags.len()); for (tag_name, args) in tags { let mut new_args = Vec::with_capacity(args.len()); for var in args { - new_args.push(Self::from_var(subs, var)); + new_args.push(Self::from_var_help(subs, recursion_vars, var)); } new_tags.push((tag_name, new_args)); } - let ext = Self::from_var(subs, ext_var); + let ext = Self::from_var_help(subs, recursion_vars, ext_var); SolvedType::RecursiveTagUnion( VarId::from_var(rec_var, subs), @@ -314,3 +339,20 @@ pub struct BuiltinAlias { pub vars: Vec>, pub typ: SolvedType, } + +#[derive(Default)] +struct RecursionVars(MutSet); + +impl RecursionVars { + fn contains(&self, subs: &Subs, var: Variable) -> bool { + let var = subs.get_root_key_without_compacting(var); + + self.0.contains(&var) + } + + fn insert(&mut self, subs: &Subs, var: Variable) { + let var = subs.get_root_key_without_compacting(var); + + self.0.insert(var); + } +} diff --git a/compiler/types/src/subs.rs b/compiler/types/src/subs.rs index 4f7f0b4cb7..72e1f748da 100644 --- a/compiler/types/src/subs.rs +++ b/compiler/types/src/subs.rs @@ -73,13 +73,16 @@ impl VarStore { } pub fn new_from_subs(subs: &Subs) -> Self { - // TODO why -2, are we not overwriting something here? - let next_var = (subs.utable.len() - 1) as u32; + let next_var = (subs.utable.len()) as u32; debug_assert!(next_var >= Variable::FIRST_USER_SPACE_VAR.0); VarStore { next: next_var } } + pub fn peek(&mut self) -> u32 { + self.next + } + pub fn fresh(&mut self) -> Variable { // Increment the counter and return the value it had before it was incremented. let answer = self.next; @@ -549,10 +552,10 @@ pub enum Content { impl Content { #[inline(always)] pub fn is_number(&self) -> bool { - match &self { - Content::Structure(FlatType::Apply(Symbol::NUM_NUM, _)) => true, - _ => false, - } + matches!( + &self, + Content::Structure(FlatType::Apply(Symbol::NUM_NUM, _)) + ) } pub fn is_unique(&self, subs: &Subs) -> bool { diff --git a/compiler/types/src/types.rs b/compiler/types/src/types.rs index 07ba293f05..f804732027 100644 --- a/compiler/types/src/types.rs +++ b/compiler/types/src/types.rs @@ -22,7 +22,7 @@ pub const TYPE_FLOATINGPOINT: &str = "FloatingPoint"; /// Can unify with Optional and Demanded /// - Optional: introduced by pattern matches and annotations. /// Can unify with Required, but not with Demanded -#[derive(PartialEq, Eq, Clone)] +#[derive(PartialEq, Eq, Clone, Hash)] pub enum RecordField { Optional(T), Required(T), @@ -989,7 +989,7 @@ pub struct Alias { pub typ: Type, } -#[derive(PartialEq, Eq, Debug, Clone)] +#[derive(PartialEq, Eq, Debug, Clone, Hash)] pub enum Problem { CanonicalizationProblem, CircularType(Symbol, ErrorType, Region), @@ -1015,7 +1015,7 @@ pub enum Mismatch { CanonicalizationProblem, } -#[derive(PartialEq, Eq, Clone)] +#[derive(PartialEq, Eq, Clone, Hash)] pub enum ErrorType { Infinite, Type(Symbol, Vec), @@ -1360,7 +1360,7 @@ fn write_debug_error_type_help(error_type: ErrorType, buf: &mut String, parens: } } -#[derive(PartialEq, Eq, Debug, Clone)] +#[derive(PartialEq, Eq, Debug, Clone, Hash)] pub enum TypeExt { Closed, FlexOpen(Lowercase), diff --git a/compiler/unify/src/unify.rs b/compiler/unify/src/unify.rs index e8aff8f5c2..887e394c0d 100644 --- a/compiler/unify/src/unify.rs +++ b/compiler/unify/src/unify.rs @@ -31,21 +31,11 @@ macro_rules! mismatch { println!(""); } + vec![Mismatch::TypeMismatch] }}; ($msg:expr,) => {{ - if cfg!(debug_assertions) { - println!( - "Mismatch in {} Line {} Column {}", - file!(), - line!(), - column!() - ); - println!($msg); - println!(""); - } - - vec![Mismatch::TypeMismatch] + mismatch!($msg) }}; ($msg:expr, $($arg:tt)*) => {{ if cfg!(debug_assertions) { @@ -681,7 +671,7 @@ fn unify_shared_tags( merge(subs, ctx, Structure(flat_type)) } else { - mismatch!() + mismatch!("Problem with Tag Union") } } @@ -911,7 +901,7 @@ fn unify_rigid(subs: &mut Subs, ctx: &Context, name: &Lowercase, other: &Content RigidVar(_) | Structure(_) | Alias(_, _, _) => { // Type mismatch! Rigid can only unify with flex, even if the // rigid names are the same. - mismatch!() + mismatch!("Rigid with {:?}", &other) } Error => { // Error propagates. diff --git a/editor/editor-ideas.md b/editor/editor-ideas.md index 86ef1be20e..7f3b9ef4f9 100644 --- a/editor/editor-ideas.md +++ b/editor/editor-ideas.md @@ -47,6 +47,8 @@ These are potentially inspirational resources for the editor's design. * [Unreal Engine 4](https://www.unrealengine.com/en-US/) * [Blueprints](https://docs.unrealengine.com/en-US/Engine/Blueprints/index.html) visual scripting (not suggesting visual scripting for Roc) +* [Live Programing](https://www.microsoft.com/en-us/research/project/live-programming/?from=http%3A%2F%2Fresearch.microsoft.com%2Fen-us%2Fprojects%2Fliveprogramming%2Ftypography.aspx#!publications) by [Microsoft Research] it contains many interesting research papers. + ### Non-Code Related Inspiration * [Scrivner](https://www.literatureandlatte.com/scrivener/overview) writing app for novelists, screenwriters, and more diff --git a/examples/.gitignore b/examples/.gitignore index 0bd05ae96a..c66fcb3fdd 100644 --- a/examples/.gitignore +++ b/examples/.gitignore @@ -1,3 +1,3 @@ app -*.o -*.a +host.o +c_host.o diff --git a/examples/hello-world/host.rs b/examples/hello-world/host.rs deleted file mode 100644 index f774532676..0000000000 --- a/examples/hello-world/host.rs +++ /dev/null @@ -1,14 +0,0 @@ -use std::ffi::CStr; -use std::os::raw::c_char; - -#[link(name = "roc_app", kind = "static")] -extern "C" { - #[link_name = "main#1"] - fn str_from_roc() -> *const c_char; -} - -pub fn main() { - let c_str = unsafe { CStr::from_ptr(str_from_roc()) }; - - println!("Roc says: {}", c_str.to_str().unwrap()); -} diff --git a/examples/hello-world/platform/Cargo.lock b/examples/hello-world/platform/Cargo.lock new file mode 100644 index 0000000000..c386bb6c4a --- /dev/null +++ b/examples/hello-world/platform/Cargo.lock @@ -0,0 +1,23 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "host" +version = "0.1.0" +dependencies = [ + "roc_std 0.1.0", +] + +[[package]] +name = "libc" +version = "0.2.79" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "roc_std" +version = "0.1.0" +dependencies = [ + "libc 0.2.79 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[metadata] +"checksum libc 0.2.79 (registry+https://github.com/rust-lang/crates.io-index)" = "2448f6066e80e3bfc792e9c98bf705b4b0fc6e8ef5b43e5889aff0eaa9c58743" diff --git a/examples/hello-world/platform/Cargo.toml b/examples/hello-world/platform/Cargo.toml new file mode 100644 index 0000000000..70f3c1f86c --- /dev/null +++ b/examples/hello-world/platform/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "host" +version = "0.1.0" +authors = ["Richard Feldman "] +edition = "2018" + +[lib] +crate-type = ["staticlib"] + +[dependencies] +roc_std = { path = "../../../roc_std" } + +[workspace] diff --git a/examples/hello-world/platform/README.md b/examples/hello-world/platform/README.md new file mode 100644 index 0000000000..eefff1996c --- /dev/null +++ b/examples/hello-world/platform/README.md @@ -0,0 +1,8 @@ +# Rebuilding the host from source + +Run `build.sh` to manually rebuild this platform's host. + +Note that the compiler currently has its own logic for rebuilding these hosts +(in `link.rs`). It's hardcoded for now, but the long-term goal is that +hosts will be precompiled by platform authors and distributed in packages, +at which point only package authors will need to think about rebuilding hosts. diff --git a/examples/hello-world/platform/build.sh b/examples/hello-world/platform/build.sh new file mode 100755 index 0000000000..010c502222 --- /dev/null +++ b/examples/hello-world/platform/build.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# compile c_host.o and rust_host.o +clang -c host.c -o c_host.o +rustc host.rs -o rust_host.o + +# link them together into host.o +ld -r c_host.o rust_host.o -o host.o + +# clean up +rm -f c_host.o +rm -f rust_host.o diff --git a/examples/hello-world/platform/host.c b/examples/hello-world/platform/host.c new file mode 100644 index 0000000000..0378c69589 --- /dev/null +++ b/examples/hello-world/platform/host.c @@ -0,0 +1,7 @@ +#include + +extern int rust_main(); + +int main() { + return rust_main(); +} diff --git a/examples/hello-world/platform/src/lib.rs b/examples/hello-world/platform/src/lib.rs new file mode 100644 index 0000000000..07375c7b46 --- /dev/null +++ b/examples/hello-world/platform/src/lib.rs @@ -0,0 +1,18 @@ +use roc_std::RocStr; +use std::str; + +extern "C" { + #[link_name = "main_1"] + fn main() -> RocStr; +} + +#[no_mangle] +pub fn rust_main() -> isize { + println!( + "Roc says: {}", + str::from_utf8(unsafe { main().as_slice() }).unwrap() + ); + + // Exit code + 0 +} diff --git a/examples/multi-module/Quicksort.roc b/examples/multi-module/Quicksort.roc new file mode 100644 index 0000000000..c641c362fc --- /dev/null +++ b/examples/multi-module/Quicksort.roc @@ -0,0 +1,49 @@ +app Quicksort + provides [ quicksort ] + imports [ Utils.{swap} ] + + +quicksort : List Int -> List Int +quicksort = \originalList -> + quicksortHelp : List (Num a), Int, Int -> List (Num a) + quicksortHelp = \list, low, high -> + if low < high then + when partition low high list is + Pair partitionIndex partitioned -> + partitioned + |> quicksortHelp low (partitionIndex - 1) + |> quicksortHelp (partitionIndex + 1) high + else + list + + partition : Int, Int, List (Num a) -> [ Pair Int (List (Num a)) ] + partition = \low, high, initialList -> + when List.get initialList high is + Ok pivot -> + when partitionHelp (low - 1) low initialList high pivot is + Pair newI newList -> + Pair (newI + 1) (Utils.swap (newI + 1) high newList) + + Err _ -> + Pair (low - 1) initialList + + + partitionHelp : Int, Int, List (Num a), Int, (Num a) -> [ Pair Int (List (Num a)) ] + partitionHelp = \i, j, list, high, pivot -> + if j < high then + when List.get list j is + Ok value -> + if value <= pivot then + partitionHelp (i + 1) (j + 1) (Utils.swap (i + 1) j list) high pivot + else + partitionHelp i (j + 1) list high pivot + + Err _ -> + Pair i list + else + Pair i list + + + + n = List.len originalList + quicksortHelp originalList 0 (n - 1) diff --git a/examples/multi-module/Utils.roc b/examples/multi-module/Utils.roc new file mode 100644 index 0000000000..bad64dcab9 --- /dev/null +++ b/examples/multi-module/Utils.roc @@ -0,0 +1,12 @@ +interface Utils exposes [ swap ] imports [] + +swap : Int, Int, List a -> List a +swap = \i, j, list -> + when Pair (List.get list i) (List.get list j) is + Pair (Ok atI) (Ok atJ) -> + list + |> List.set i atJ + |> List.set j atI + + _ -> + [] diff --git a/examples/multi-module/platform/Cargo.lock b/examples/multi-module/platform/Cargo.lock new file mode 100644 index 0000000000..c386bb6c4a --- /dev/null +++ b/examples/multi-module/platform/Cargo.lock @@ -0,0 +1,23 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "host" +version = "0.1.0" +dependencies = [ + "roc_std 0.1.0", +] + +[[package]] +name = "libc" +version = "0.2.79" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "roc_std" +version = "0.1.0" +dependencies = [ + "libc 0.2.79 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[metadata] +"checksum libc 0.2.79 (registry+https://github.com/rust-lang/crates.io-index)" = "2448f6066e80e3bfc792e9c98bf705b4b0fc6e8ef5b43e5889aff0eaa9c58743" diff --git a/examples/multi-module/platform/Cargo.toml b/examples/multi-module/platform/Cargo.toml new file mode 100644 index 0000000000..70f3c1f86c --- /dev/null +++ b/examples/multi-module/platform/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "host" +version = "0.1.0" +authors = ["Richard Feldman "] +edition = "2018" + +[lib] +crate-type = ["staticlib"] + +[dependencies] +roc_std = { path = "../../../roc_std" } + +[workspace] diff --git a/examples/multi-module/platform/README.md b/examples/multi-module/platform/README.md new file mode 100644 index 0000000000..eefff1996c --- /dev/null +++ b/examples/multi-module/platform/README.md @@ -0,0 +1,8 @@ +# Rebuilding the host from source + +Run `build.sh` to manually rebuild this platform's host. + +Note that the compiler currently has its own logic for rebuilding these hosts +(in `link.rs`). It's hardcoded for now, but the long-term goal is that +hosts will be precompiled by platform authors and distributed in packages, +at which point only package authors will need to think about rebuilding hosts. diff --git a/examples/multi-module/platform/build.sh b/examples/multi-module/platform/build.sh new file mode 100755 index 0000000000..010c502222 --- /dev/null +++ b/examples/multi-module/platform/build.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# compile c_host.o and rust_host.o +clang -c host.c -o c_host.o +rustc host.rs -o rust_host.o + +# link them together into host.o +ld -r c_host.o rust_host.o -o host.o + +# clean up +rm -f c_host.o +rm -f rust_host.o diff --git a/examples/multi-module/platform/host.c b/examples/multi-module/platform/host.c new file mode 100644 index 0000000000..0378c69589 --- /dev/null +++ b/examples/multi-module/platform/host.c @@ -0,0 +1,7 @@ +#include + +extern int rust_main(); + +int main() { + return rust_main(); +} diff --git a/examples/multi-module/platform/src/lib.rs b/examples/multi-module/platform/src/lib.rs new file mode 100644 index 0000000000..ddbadca30f --- /dev/null +++ b/examples/multi-module/platform/src/lib.rs @@ -0,0 +1,40 @@ +use roc_std::RocList; +use std::time::SystemTime; + +extern "C" { + #[link_name = "quicksort_1"] + fn quicksort(list: RocList) -> RocList; +} + +const NUM_NUMS: usize = 10_000; + +#[no_mangle] +pub fn rust_main() -> isize { + let nums: RocList = { + let mut nums = Vec::with_capacity(NUM_NUMS); + + for index in 0..nums.capacity() { + let num = index as i64 % 123; + + nums.push(num); + } + + RocList::from_slice(&nums) + }; + + println!("Running Roc quicksort on {} numbers...", nums.len()); + let start_time = SystemTime::now(); + let answer = unsafe { quicksort(nums) }; + let end_time = SystemTime::now(); + let duration = end_time.duration_since(start_time).unwrap(); + + println!( + "Roc quicksort took {:.4} ms to compute this answer: {:?}", + duration.as_secs_f64() * 1000.0, + // truncate the answer, so stdout is not swamped + &answer.as_slice()[0..20] + ); + + // Exit code + 0 +} diff --git a/examples/quicksort/Quicksort.roc b/examples/quicksort/Quicksort.roc index ebf5a2ebde..3c6fed01b8 100644 --- a/examples/quicksort/Quicksort.roc +++ b/examples/quicksort/Quicksort.roc @@ -1,6 +1,7 @@ app Quicksort provides [ quicksort ] imports [] quicksort = \originalList -> + quicksortHelp : List (Num a), Int, Int -> List (Num a) quicksortHelp = \list, low, high -> if low < high then diff --git a/examples/quicksort/README.md b/examples/quicksort/README.md index 6437a5e24e..c900f17b24 100644 --- a/examples/quicksort/README.md +++ b/examples/quicksort/README.md @@ -14,4 +14,4 @@ $ cargo run --release run Quicksort.roc ## Troubleshooting -If you encounter `cannot find -lc++`, run the following for ubuntu `sudo apt install libc++-dev`. \ No newline at end of file +If you encounter `cannot find -lc++`, run the following for ubuntu `sudo apt install libc++-dev`. diff --git a/examples/quicksort/platform/Cargo.lock b/examples/quicksort/platform/Cargo.lock new file mode 100644 index 0000000000..c386bb6c4a --- /dev/null +++ b/examples/quicksort/platform/Cargo.lock @@ -0,0 +1,23 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "host" +version = "0.1.0" +dependencies = [ + "roc_std 0.1.0", +] + +[[package]] +name = "libc" +version = "0.2.79" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "roc_std" +version = "0.1.0" +dependencies = [ + "libc 0.2.79 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[metadata] +"checksum libc 0.2.79 (registry+https://github.com/rust-lang/crates.io-index)" = "2448f6066e80e3bfc792e9c98bf705b4b0fc6e8ef5b43e5889aff0eaa9c58743" diff --git a/examples/quicksort/platform/Cargo.toml b/examples/quicksort/platform/Cargo.toml new file mode 100644 index 0000000000..70f3c1f86c --- /dev/null +++ b/examples/quicksort/platform/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "host" +version = "0.1.0" +authors = ["Richard Feldman "] +edition = "2018" + +[lib] +crate-type = ["staticlib"] + +[dependencies] +roc_std = { path = "../../../roc_std" } + +[workspace] diff --git a/examples/quicksort/platform/README.md b/examples/quicksort/platform/README.md new file mode 100644 index 0000000000..eefff1996c --- /dev/null +++ b/examples/quicksort/platform/README.md @@ -0,0 +1,8 @@ +# Rebuilding the host from source + +Run `build.sh` to manually rebuild this platform's host. + +Note that the compiler currently has its own logic for rebuilding these hosts +(in `link.rs`). It's hardcoded for now, but the long-term goal is that +hosts will be precompiled by platform authors and distributed in packages, +at which point only package authors will need to think about rebuilding hosts. diff --git a/examples/quicksort/platform/build.sh b/examples/quicksort/platform/build.sh new file mode 100755 index 0000000000..010c502222 --- /dev/null +++ b/examples/quicksort/platform/build.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# compile c_host.o and rust_host.o +clang -c host.c -o c_host.o +rustc host.rs -o rust_host.o + +# link them together into host.o +ld -r c_host.o rust_host.o -o host.o + +# clean up +rm -f c_host.o +rm -f rust_host.o diff --git a/examples/quicksort/platform/host.c b/examples/quicksort/platform/host.c new file mode 100644 index 0000000000..0378c69589 --- /dev/null +++ b/examples/quicksort/platform/host.c @@ -0,0 +1,7 @@ +#include + +extern int rust_main(); + +int main() { + return rust_main(); +} diff --git a/examples/quicksort/platform/src/lib.rs b/examples/quicksort/platform/src/lib.rs new file mode 100644 index 0000000000..ddbadca30f --- /dev/null +++ b/examples/quicksort/platform/src/lib.rs @@ -0,0 +1,40 @@ +use roc_std::RocList; +use std::time::SystemTime; + +extern "C" { + #[link_name = "quicksort_1"] + fn quicksort(list: RocList) -> RocList; +} + +const NUM_NUMS: usize = 10_000; + +#[no_mangle] +pub fn rust_main() -> isize { + let nums: RocList = { + let mut nums = Vec::with_capacity(NUM_NUMS); + + for index in 0..nums.capacity() { + let num = index as i64 % 123; + + nums.push(num); + } + + RocList::from_slice(&nums) + }; + + println!("Running Roc quicksort on {} numbers...", nums.len()); + let start_time = SystemTime::now(); + let answer = unsafe { quicksort(nums) }; + let end_time = SystemTime::now(); + let duration = end_time.duration_since(start_time).unwrap(); + + println!( + "Roc quicksort took {:.4} ms to compute this answer: {:?}", + duration.as_secs_f64() * 1000.0, + // truncate the answer, so stdout is not swamped + &answer.as_slice()[0..20] + ); + + // Exit code + 0 +} diff --git a/examples/shared-quicksort/host.rs b/examples/shared-quicksort/host.rs deleted file mode 100644 index 08eaf722e4..0000000000 --- a/examples/shared-quicksort/host.rs +++ /dev/null @@ -1,47 +0,0 @@ -use std::time::SystemTime; - -#[link(name = "roc_app", kind = "static")] -extern "C" { - #[allow(improper_ctypes)] - #[link_name = "quicksort#1"] - fn quicksort(list: &[i64]) -> Box<[i64]>; -} - -const NUM_NUMS: usize = 1_000_000; - -pub fn main() { - let nums = { - let mut nums = Vec::with_capacity(NUM_NUMS + 1); - - // give this list refcount 1 - nums.push((std::usize::MAX - 1) as i64); - - for index in 1..nums.capacity() { - let num = index as i64 % 12345; - - nums.push(num); - } - - nums - }; - - println!("Running Roc shared quicksort"); - let start_time = SystemTime::now(); - let answer = unsafe { quicksort(&nums[1..]) }; - let end_time = SystemTime::now(); - let duration = end_time.duration_since(start_time).unwrap(); - - println!( - "Roc quicksort took {:.4} ms to compute this answer: {:?}", - duration.as_secs_f64() * 1000.0, - // truncate the answer, so stdout is not swamped - // NOTE index 0 is the refcount! - &answer[1..20] - ); - - // the pointer is to the first _element_ of the list, - // but the refcount precedes it. Thus calling free() on - // this pointer would segfault/cause badness. Therefore, we - // leak it for now - Box::leak(answer); -} diff --git a/examples/shared-quicksort/platform/README.md b/examples/shared-quicksort/platform/README.md new file mode 100644 index 0000000000..f51d79714d --- /dev/null +++ b/examples/shared-quicksort/platform/README.md @@ -0,0 +1,49 @@ +# Rebuilding the host from source + +Here are the current steps to rebuild this host. These +steps can likely be moved into a `build.rs` script after +turning `host.rs` into a `cargo` project, but that hasn't +been attempted yet. + +## Compile the Rust and C sources + +Currently this host has both a `host.rs` and a `host.c`. +This is only because we haven't figured out a way to convince +Rust to emit a `.o` file that doesn't define a `main` entrypoint, +but which is capable of being linked into one later. + +As a workaround, we have `host.rs` expose a function called +`rust_main` instead of `main`, and all `host.c` does is provide +an actual `main` which imports and then calls `rust_main` from +the compiled `host.rs`. It's not the most elegant workaround, +but [asking on `users.rust-lang.org`](https://users.rust-lang.org/t/error-when-compiling-linking-with-o-files/49635/4) +didn't turn up any nicer approaches. Maybe they're out there though! + +To make this workaround happen, we need to compile both `host.rs` +and `host.c`. First, `cd` into `platform/host/src/` and then run: + +``` +$ clang -c host.c -o c_host.o +$ rustc host.rs -o rust_host.o +``` + +Now we should have `c_host.o` and `rust_host.o` in the curent directory. + +## Link together the `.o` files + +Next, combine `c_host.o` and `rust_host.o` into `host.o` using `ld -r` like so: + +``` +$ ld -r c_host.o rust_host.o -o host.o +``` + +Move `host.o` into the appropriate `platform/` subdirectory +based on your architecture and operating system. For example, +on macOS, you'd move `host.o` into the `platform/host/x86_64-unknown-darwin10/` directory. + +## All done! + +Congratulations! You now have an updated host. + +It's now fine to delete `c_host.o` and `rust_host.o`, +since they were only needed to produce `host.o`. diff --git a/examples/shared-quicksort/platform/build.sh b/examples/shared-quicksort/platform/build.sh new file mode 100755 index 0000000000..010c502222 --- /dev/null +++ b/examples/shared-quicksort/platform/build.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# compile c_host.o and rust_host.o +clang -c host.c -o c_host.o +rustc host.rs -o rust_host.o + +# link them together into host.o +ld -r c_host.o rust_host.o -o host.o + +# clean up +rm -f c_host.o +rm -f rust_host.o diff --git a/examples/shared-quicksort/platform/host.c b/examples/shared-quicksort/platform/host.c new file mode 100644 index 0000000000..0378c69589 --- /dev/null +++ b/examples/shared-quicksort/platform/host.c @@ -0,0 +1,7 @@ +#include + +extern int rust_main(); + +int main() { + return rust_main(); +} diff --git a/examples/quicksort/host.rs b/examples/shared-quicksort/platform/host.rs similarity index 83% rename from examples/quicksort/host.rs rename to examples/shared-quicksort/platform/host.rs index 277c61afeb..53b46f036d 100644 --- a/examples/quicksort/host.rs +++ b/examples/shared-quicksort/platform/host.rs @@ -1,20 +1,22 @@ +#![crate_type = "staticlib"] + use std::time::SystemTime; -#[link(name = "roc_app", kind = "static")] extern "C" { #[allow(improper_ctypes)] - #[link_name = "quicksort#1"] + #[link_name = "quicksort_1"] fn quicksort(list: Box<[i64]>) -> Box<[i64]>; } -const NUM_NUMS: usize = 1_000_000; +const NUM_NUMS: usize = 10_000; -pub fn main() { +#[no_mangle] +pub fn rust_main() -> isize { let nums: Box<[i64]> = { let mut nums = Vec::with_capacity(NUM_NUMS); for index in 0..nums.capacity() { - let num = index as i64 % 12345; + let num = index as i64 % 123; nums.push(num); } @@ -39,4 +41,7 @@ pub fn main() { // this pointer would segfault/cause badness. Therefore, we // leak it for now Box::leak(answer); + + // Exit code + 0 } diff --git a/roc_std/src/lib.rs b/roc_std/src/lib.rs index 7b50cd2a0b..92f1bc44ed 100644 --- a/roc_std/src/lib.rs +++ b/roc_std/src/lib.rs @@ -71,7 +71,7 @@ impl RocList { let value = *self.get_storage_ptr(); // NOTE doesn't work with elements of 16 or more bytes - match usize::cmp(&0, &value) { + match isize::cmp(&(value as isize), &0) { Equal => Some(Storage::ReadOnly), Less => Some(Storage::Refcounted(value)), Greater => Some(Storage::Capacity(value)), @@ -214,3 +214,209 @@ impl Drop for RocList { } } } + +#[repr(C)] +pub struct RocStr { + elements: *mut u8, + length: usize, +} + +impl RocStr { + pub fn len(&self) -> usize { + if self.is_small_str() { + let bytes = self.length.to_ne_bytes(); + let last_byte = bytes[bytes.len() - 1]; + (last_byte ^ 0b1000_0000) as usize + } else { + self.length + } + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn is_small_str(&self) -> bool { + (self.length as isize) < 0 + } + + pub fn empty() -> Self { + RocStr { + // The first bit of length is 1 to specify small str. + length: 0, + elements: core::ptr::null_mut(), + } + } + + pub fn get(&self, index: usize) -> Option<&u8> { + if index < self.len() { + Some(unsafe { + let raw = if self.is_small_str() { + self.get_small_str_ptr().add(index) + } else { + self.elements.add(index) + }; + + &*raw + }) + } else { + None + } + } + + pub fn storage(&self) -> Option { + use core::cmp::Ordering::*; + + if self.is_small_str() || self.length == 0 { + return None; + } + + unsafe { + let value = *self.get_storage_ptr(); + + // NOTE doesn't work with elements of 16 or more bytes + match isize::cmp(&(value as isize), &0) { + Equal => Some(Storage::ReadOnly), + Less => Some(Storage::Refcounted(value)), + Greater => Some(Storage::Capacity(value)), + } + } + } + + fn get_storage_ptr(&self) -> *const usize { + let ptr = self.elements as *const usize; + + unsafe { ptr.offset(-1) } + } + + fn get_storage_ptr_mut(&mut self) -> *mut usize { + self.get_storage_ptr() as *mut usize + } + + fn get_element_ptr(elements: *const u8) -> *const usize { + let elem_alignment = core::mem::align_of::(); + let ptr = elements as *const usize; + + unsafe { + if elem_alignment <= core::mem::align_of::() { + ptr.offset(1) + } else { + // If elements have an alignment bigger than usize (e.g. an i128), + // we will have necessarily allocated two usize slots worth of + // space for the storage value (with the first usize slot being + // padding for alignment's sake), and we need to skip past both. + ptr.offset(2) + } + } + } + + fn get_small_str_ptr(&self) -> *const u8 { + (self as *const RocStr).cast() + } + + fn get_small_str_ptr_mut(&mut self) -> *mut u8 { + (self as *mut RocStr).cast() + } + + pub fn from_slice_with_capacity(slice: &[u8], capacity: usize) -> RocStr { + assert!(slice.len() <= capacity); + if capacity < core::mem::size_of::() { + let mut rocstr = RocStr::empty(); + let target_ptr = rocstr.get_small_str_ptr_mut(); + let source_ptr = slice.as_ptr() as *const u8; + for index in 0..(slice.len() as isize) { + unsafe { + *target_ptr.offset(index) = *source_ptr.offset(index); + } + } + // Write length and small string bit to last byte of length. + let mut bytes = rocstr.length.to_ne_bytes(); + bytes[bytes.len() - 1] = capacity as u8 ^ 0b1000_0000; + rocstr.length = usize::from_ne_bytes(bytes); + rocstr + } else { + let ptr = slice.as_ptr(); + let element_bytes = capacity; + + let num_bytes = core::mem::size_of::() + element_bytes; + + let elements = unsafe { + let raw_ptr = libc::malloc(num_bytes); + + // write the capacity + let capacity_ptr = raw_ptr as *mut usize; + *capacity_ptr = capacity; + + let raw_ptr = Self::get_element_ptr(raw_ptr as *mut u8); + + { + // NOTE: using a memcpy here causes weird issues + let target_ptr = raw_ptr as *mut u8; + let source_ptr = ptr as *const u8; + let length = slice.len() as isize; + for index in 0..length { + *target_ptr.offset(index) = *source_ptr.offset(index); + } + } + + raw_ptr as *mut u8 + }; + + RocStr { + length: slice.len(), + elements, + } + } + } + + pub fn from_slice(slice: &[u8]) -> RocStr { + Self::from_slice_with_capacity(slice, slice.len()) + } + + pub fn as_slice(&self) -> &[u8] { + if self.is_small_str() { + unsafe { core::slice::from_raw_parts(self.get_small_str_ptr(), self.len()) } + } else { + unsafe { core::slice::from_raw_parts(self.elements, self.length) } + } + } +} + +impl fmt::Debug for RocStr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // RocStr { is_small_str: false, storage: Refcounted(3), elements: [ 1,2,3,4] } + f.debug_struct("RocStr") + .field("is_small_str", &self.is_small_str()) + .field("storage", &self.storage()) + .field("elements", &self.as_slice()) + .finish() + } +} + +impl PartialEq for RocStr { + fn eq(&self, other: &Self) -> bool { + self.as_slice() == other.as_slice() + } +} + +impl Eq for RocStr {} + +impl Drop for RocStr { + fn drop(&mut self) { + if !self.is_small_str() { + use Storage::*; + match self.storage() { + None | Some(ReadOnly) => {} + Some(Capacity(_)) | Some(Refcounted(REFCOUNT_1)) => unsafe { + libc::free(self.get_storage_ptr() as *mut libc::c_void); + }, + Some(Refcounted(rc)) => { + let sptr = self.get_storage_ptr_mut(); + unsafe { + *sptr = rc - 1; + } + } + } + } + } +}