Merge remote-tracking branch 'origin/main' into packages

2024-11-13 09:49:11 +03:00 · 2022-12-13 06:04:43 -05:00 · 2022-12-13 06:04:43 -05:00 · d022c19f5c
commit d022c19f5c
parent ec9cf4efe2 3d1cdf1fd4
24 changed files with 392 additions and 490 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -3172,6 +3172,26 @@ version = "1.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2f61dcf0b917cd75d4521d7343d1ffff3d1583054133c9b5cbea3375c703c40d"

+[[package]]
+name = "proptest"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e0d9cc07f18492d879586c92b485def06bc850da3118075cd45d50e9c95b0e5"
+dependencies = [
+ "bit-set",
+ "bitflags",
+ "byteorder",
+ "lazy_static",
+ "num-traits",
+ "quick-error 2.0.1",
+ "rand",
+ "rand_chacha",
+ "rand_xorshift",
+ "regex-syntax",
+ "rusty-fork",
+ "tempfile",
+]
+
 [[package]]
 name = "ptr_meta"
 version = "0.1.4"
@ -3203,6 +3223,18 @@ dependencies = [
 "unicase",
 ]

+[[package]]
+name = "quick-error"
+version = "1.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
+
+[[package]]
+name = "quick-error"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
+
 [[package]]
 name = "quick-xml"
 version = "0.22.0"
@ -3295,6 +3327,15 @@ dependencies = [
 "getrandom",
 ]

+[[package]]
+name = "rand_xorshift"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f"
+dependencies = [
+ "rand_core",
+]
+
 [[package]]
 name = "rand_xoshiro"
 version = "0.6.0"
@ -4178,6 +4219,7 @@ dependencies = [
 "encode_unicode 1.0.0",
 "indoc",
 "pretty_assertions",
+ "proptest",
 "quickcheck",
 "quickcheck_macros",
 "roc_collections",
@ -4575,6 +4617,18 @@ version = "1.0.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a0a5f7c728f5d284929a1cccb5bc19884422bfe6ef4d6c409da2c41838983fcf"

+[[package]]
+name = "rusty-fork"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f"
+dependencies = [
+ "fnv",
+ "quick-error 1.2.3",
+ "tempfile",
+ "wait-timeout",
+]
+
 [[package]]
 name = "rustyline"
 version = "9.1.1"
@ -5751,6 +5805,15 @@ dependencies = [
 "quote",
 ]

+[[package]]
+name = "wait-timeout"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "walkdir"
 version = "2.3.2"
--- a/crates/compiler/fmt/src/expr.rs
+++ b/crates/compiler/fmt/src/expr.rs
@ -556,8 +556,13 @@ pub fn fmt_str_literal<'buf>(buf: &mut Buf<'buf>, literal: StrLiteral, indent: u

            for segments in lines.iter() {
                for seg in segments.iter() {
-                    buf.indent(indent);
-                    format_str_segment(seg, buf, indent);
+                    // only add indent if the line isn't empty
+                    if *seg != StrSegment::Plaintext("\n") {
+                        buf.indent(indent);
+                        format_str_segment(seg, buf, indent);
+                    } else {
+                        buf.newline();
+                    }
                }

                buf.newline();
--- a/crates/compiler/parse/Cargo.toml
+++ b/crates/compiler/parse/Cargo.toml
@ -19,6 +19,7 @@ encode_unicode.workspace = true

 [dev-dependencies]
 roc_test_utils = { path = "../../test_utils" }
+proptest = "1.0.0"

 criterion.workspace = true
 pretty_assertions.workspace = true
--- a/crates/compiler/parse/benches/bench_parse.rs
+++ b/crates/compiler/parse/benches/bench_parse.rs
@ -1,8 +1,7 @@
-use std::path::PathBuf;
-
 use bumpalo::Bump;
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use roc_parse::{module, module::module_defs, parser::Parser, state::State};
+use std::path::PathBuf;

 pub fn parse_benchmark(c: &mut Criterion) {
    c.bench_function("parse false-interpreter", |b| {
--- a/crates/compiler/parse/fuzz/Cargo.lock
+++ b/crates/compiler/parse/fuzz/Cargo.lock
@ -72,9 +72,9 @@ dependencies = [

 [[package]]
 name = "bumpalo"
-version = "3.10.0"
+version = "3.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3"
+checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba"

 [[package]]
 name = "cc"
@ -96,9 +96,15 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"

 [[package]]
 name = "encode_unicode"
-version = "0.3.6"
+version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
+checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"

 [[package]]
 name = "funty"
@ -167,12 +173,6 @@ dependencies = [
 "version_check",
 ]

-[[package]]
-name = "lazy_static"
-version = "1.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
-
 [[package]]
 name = "libc"
 version = "0.2.131"
@ -264,6 +264,7 @@ version = "0.0.1"
 dependencies = [
 "bitvec",
 "bumpalo",
+ "fnv",
 "hashbrown",
 "im",
 "im-rc",
@ -283,7 +284,6 @@ name = "roc_module"
 version = "0.0.1"
 dependencies = [
 "bumpalo",
- "lazy_static",
 "roc_collections",
 "roc_error_macros",
 "roc_ident",
--- a/crates/compiler/parse/src/blankspace.rs
+++ b/crates/compiler/parse/src/blankspace.rs
@ -37,10 +37,7 @@ where
    E: 'a + SpaceProblem,
 {
    parser::map_with_arena(
-        and(
-            space0_e(indent_before_problem),
-            and(parser, space0_no_after_indent_check()),
-        ),
+        and(space0_e(indent_before_problem), and(parser, spaces())),
        spaces_around_help,
    )
 }
@ -164,474 +161,268 @@ where
    }
 }

+pub fn simple_eat_whitespace(bytes: &[u8]) -> usize {
+    let mut i = 0;
+    while i < bytes.len() {
+        match bytes[i] {
+            b' ' => i += 1,
+            _ => break,
+        }
+    }
+    i
+}
+
+pub fn fast_eat_whitespace(bytes: &[u8]) -> usize {
+    // Load 8 bytes at a time, keeping in mind that the initial offset may not be aligned
+    let mut i = 0;
+    while i + 8 <= bytes.len() {
+        let chunk = unsafe {
+            // Safe because we know the pointer is in bounds
+            (bytes.as_ptr().add(i) as *const u64)
+                .read_unaligned()
+                .to_le()
+        };
+
+        // Space character is 0x20, which has a single bit set
+        // We can check for any space character by checking if any other bit is set
+        let spaces = 0x2020_2020_2020_2020;
+
+        // First, generate a mask where each byte is 0xff if the byte is a space,
+        // and some other bit sequence otherwise
+        let mask = !(chunk ^ spaces);
+
+        // Now mask off the high bit, so there's some place to carry into without
+        // overflowing into the next byte.
+        let mask = mask & !0x8080_8080_8080_8080;
+
+        // Now add 0x0101_0101_0101_0101 to each byte, which will carry into the high bit
+        // if and only if the byte is a space.
+        let mask = mask + 0x0101_0101_0101_0101;
+
+        // Now mask off areas where the original bytes had the high bit set, so that
+        // 0x80|0x20 = 0xa0 will not be considered a space.
+        let mask = mask & !(chunk & 0x8080_8080_8080_8080);
+
+        // Make sure all the _other_ bits aside from the high bit are set,
+        // and count the number of trailing one bits, dividing by 8 to get the number of
+        // bytes that are spaces.
+        let count = ((mask | !0x8080_8080_8080_8080).trailing_ones() as usize) / 8;
+
+        if count == 8 {
+            i += 8;
+        } else {
+            return i + count;
+        }
+    }
+
+    // Check the remaining bytes
+    simple_eat_whitespace(&bytes[i..]) + i
+}
+
+pub fn simple_eat_until_control_character(bytes: &[u8]) -> usize {
+    let mut i = 0;
+    while i < bytes.len() {
+        if bytes[i] < b' ' {
+            break;
+        } else {
+            i += 1;
+        }
+    }
+    i
+}
+
+pub fn fast_eat_until_control_character(bytes: &[u8]) -> usize {
+    // Load 8 bytes at a time, keeping in mind that the initial offset may not be aligned
+    let mut i = 0;
+    while i + 8 <= bytes.len() {
+        let chunk = unsafe {
+            // Safe because we know the pointer is in bounds
+            (bytes.as_ptr().add(i) as *const u64)
+                .read_unaligned()
+                .to_le()
+        };
+
+        // Control characters are 0x00-0x1F, and don't have any high bits set.
+        // They only have bits set that fall under the 0x1F mask.
+        let control = 0x1F1F_1F1F_1F1F_1F1F;
+
+        // First we set up a value where, if a given byte is a control character,
+        // it'll have a all the non-control bits set to 1. All control bits are set to zero.
+        let mask = !(chunk & !control) & !control;
+
+        // Now, down shift by one bit. This will leave room for the following add to
+        // carry, without impacting the next byte.
+        let mask = mask >> 1;
+
+        // Add one (shifted by the right amount), causing all the one bits in the control
+        // characters to cascade, and put a one in the high bit.
+        let mask = mask.wrapping_add(0x1010_1010_1010_1010);
+
+        // Now, we can count the number of trailing zero bits, dividing by 8 to get the
+        // number of bytes before the first control character.
+        let count = (mask & 0x8080_8080_8080_8080).trailing_zeros() as usize / 8;
+
+        if count == 8 {
+            i += 8;
+        } else {
+            return i + count;
+        }
+    }
+
+    // Check the remaining bytes
+    simple_eat_until_control_character(&bytes[i..]) + i
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use proptest::prelude::*;
+
+    #[test]
+    fn test_eat_whitespace_simple() {
+        let bytes = &[0, 0, 0, 0, 0, 0, 0, 0];
+        assert_eq!(simple_eat_whitespace(bytes), fast_eat_whitespace(bytes));
+    }
+
+    proptest! {
+        #[test]
+        fn test_eat_whitespace(bytes in proptest::collection::vec(any::<u8>(), 0..100)) {
+            prop_assert_eq!(simple_eat_whitespace(&bytes), fast_eat_whitespace(&bytes));
+        }
+    }
+
+    #[test]
+    fn test_eat_until_control_character_simple() {
+        let bytes = &[32, 0, 0, 0, 0, 0, 0, 0];
+        assert_eq!(
+            simple_eat_until_control_character(bytes),
+            fast_eat_until_control_character(bytes)
+        );
+    }
+
+    proptest! {
+        #[test]
+        fn test_eat_until_control_character(bytes in proptest::collection::vec(any::<u8>(), 0..100)) {
+            prop_assert_eq!(
+                simple_eat_until_control_character(&bytes),
+                fast_eat_until_control_character(&bytes));
+        }
+    }
+}
+
 pub fn space0_e<'a, E>(
    indent_problem: fn(Position) -> E,
 ) -> impl Parser<'a, &'a [CommentOrNewline<'a>], E>
 where
    E: 'a + SpaceProblem,
 {
-    spaces_help_help(indent_problem)
+    move |arena, state: State<'a>, min_indent: u32| {
+        let start = state.pos();
+        match spaces().parse(arena, state, min_indent) {
+            Ok((progress, spaces, state)) => {
+                if progress == NoProgress || state.column() >= min_indent {
+                    Ok((progress, spaces, state))
+                } else {
+                    Err((progress, indent_problem(start)))
+                }
+            }
+            Err((progress, err)) => Err((progress, err)),
+        }
+    }
 }

-#[inline(always)]
-fn spaces_help_help<'a, E>(
-    indent_problem: fn(Position) -> E,
-) -> impl Parser<'a, &'a [CommentOrNewline<'a>], E>
+fn spaces<'a, E>() -> impl Parser<'a, &'a [CommentOrNewline<'a>], E>
 where
    E: 'a + SpaceProblem,
 {
-    move |arena, state: State<'a>, min_indent: u32| match fast_eat_spaces(&state) {
-        FastSpaceState::HasTab(position) => Err((
-            MadeProgress,
-            E::space_problem(BadInputError::HasTab, position),
-        )),
-        FastSpaceState::Good {
-            newlines,
-            consumed,
-            column,
-        } => {
-            if consumed == 0 {
-                Ok((NoProgress, &[] as &[_], state))
-            } else if column < min_indent {
-                Err((MadeProgress, indent_problem(state.pos())))
-            } else {
-                let comments_and_newlines = Vec::with_capacity_in(newlines, arena);
-                let spaces = eat_spaces(state, comments_and_newlines);
-
-                Ok((
-                    MadeProgress,
-                    spaces.comments_and_newlines.into_bump_slice(),
-                    spaces.state,
-                ))
+    move |arena, mut state: State<'a>, _min_indent: u32| {
+        let mut newlines = Vec::new_in(arena);
+        let mut progress = NoProgress;
+        loop {
+            let whitespace = fast_eat_whitespace(state.bytes());
+            if whitespace > 0 {
+                state.advance_mut(whitespace);
+                progress = MadeProgress;
            }
-        }
-    }
-}

-#[inline(always)]
-fn space0_no_after_indent_check<'a, E>() -> impl Parser<'a, &'a [CommentOrNewline<'a>], E>
-where
-    E: 'a + SpaceProblem,
-{
-    move |arena, state: State<'a>, _min_indent: u32| match fast_eat_spaces(&state) {
-        FastSpaceState::HasTab(position) => Err((
-            MadeProgress,
-            E::space_problem(BadInputError::HasTab, position),
-        )),
-        FastSpaceState::Good {
-            newlines,
-            consumed,
-            column: _,
-        } => {
-            if consumed == 0 {
-                Ok((NoProgress, &[] as &[_], state))
-            } else {
-                let comments_and_newlines = Vec::with_capacity_in(newlines, arena);
-                let spaces = eat_spaces(state, comments_and_newlines);
+            match state.bytes().first() {
+                Some(b'#') => {
+                    state.advance_mut(1);

-                Ok((
-                    MadeProgress,
-                    spaces.comments_and_newlines.into_bump_slice(),
-                    spaces.state,
-                ))
-            }
-        }
-    }
-}
-
-enum FastSpaceState {
-    Good {
-        newlines: usize,
-        consumed: usize,
-        column: u32,
-    },
-    HasTab(Position),
-}
-
-fn fast_eat_spaces(state: &State) -> FastSpaceState {
-    use FastSpaceState::*;
-
-    let mut newlines = 0;
-    let mut line_start = state.line_start.offset as usize;
-    let base_offset = state.pos().offset as usize;
-
-    let mut index = base_offset;
-    let bytes = state.original_bytes();
-    let length = bytes.len();
-
-    'outer: while index < length {
-        match bytes[index] {
-            b' ' => {
-                index += 1;
-            }
-            b'\n' => {
-                newlines += 1;
-                index += 1;
-                line_start = index;
-            }
-            b'\r' => {
-                index += 1;
-                line_start = index;
-            }
-            b'\t' => {
-                return HasTab(Position::new(index as u32));
-            }
-            b'#' => {
-                index += 1;
-
-                // try to use SIMD instructions explicitly
-                // run with RUSTFLAGS="-C target-cpu=native" to enable
-                #[cfg(all(
-                    target_arch = "x86_64",
-                    target_feature = "sse2",
-                    target_feature = "sse4.2"
-                ))]
-                {
-                    use std::arch::x86_64::*;
-
-                    // a bytestring with the three characters we're looking for (the rest is ignored)
-                    let needle = b"\r\n\t=============";
-                    let needle = unsafe { _mm_loadu_si128(needle.as_ptr() as *const _) };
-
-                    while index < length {
-                        let remaining = length - index;
-                        let length = if remaining < 16 { remaining as i32 } else { 16 };
-
-                        // the source bytes we'll be looking at
-                        let haystack =
-                            unsafe { _mm_loadu_si128(bytes.as_ptr().add(index) as *const _) };
-
-                        // use first 3 characters of needle, first `length` characters of haystack
-                        // finds the first index where one of the `needle` characters occurs
-                        // or 16 when none of the needle characters occur
-                        let first_special_char = unsafe {
-                            _mm_cmpestri(needle, 3, haystack, length, _SIDD_CMP_EQUAL_ANY)
-                        };
-
-                        // we've made `first_special_char` characters of progress
-                        index += usize::min(first_special_char as usize, remaining);
-
-                        // if we found a special char, let the outer loop handle it
-                        if first_special_char != 16 {
-                            continue 'outer;
-                        }
-                    }
-                }
-
-                #[cfg(not(all(
-                    target_arch = "x86_64",
-                    target_feature = "sse2",
-                    target_feature = "sse4.2"
-                )))]
-                {
-                    while index < length {
-                        match bytes[index] {
-                            b'\n' | b'\t' | b'\r' => {
-                                continue 'outer;
-                            }
-
-                            _ => {
-                                index += 1;
-                            }
-                        }
-                    }
-                }
-            }
-            _ => break,
-        }
-    }
-
-    Good {
-        newlines,
-        consumed: index - base_offset,
-        column: (index - line_start) as u32,
-    }
-}
-
-struct SpaceState<'a> {
-    state: State<'a>,
-    comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
-}
-
-fn eat_spaces<'a>(
-    mut state: State<'a>,
-    mut comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
-) -> SpaceState<'a> {
-    for c in state.bytes() {
-        match c {
-            b' ' => {
-                state = state.advance(1);
-            }
-            b'\n' => {
-                state = state.advance_newline();
-                comments_and_newlines.push(CommentOrNewline::Newline);
-            }
-            b'\r' => {
-                state = state.advance_newline();
-            }
-            b'\t' => unreachable!(),
-
-            b'#' => {
-                state = state.advance(1);
-                return eat_line_comment(state, comments_and_newlines);
-            }
-            _ => {
-                if !comments_and_newlines.is_empty() {
-                    state = state.mark_current_indent();
-                }
-                break;
-            }
-        }
-    }
-
-    SpaceState {
-        state,
-        comments_and_newlines,
-    }
-}
-
-fn eat_line_comment<'a>(
-    mut state: State<'a>,
-    mut comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
-) -> SpaceState<'a> {
-    let mut index = state.pos().offset as usize;
-    let bytes = state.original_bytes();
-    let length = bytes.len();
-
-    'outer: loop {
-        let is_doc_comment = if let Some(b'#') = bytes.get(index) {
-            match bytes.get(index + 1) {
-                Some(b' ') => {
-                    state = state.advance(2);
-                    index += 2;
-
-                    true
-                }
-                Some(b'\n') => {
-                    // consume the second # and the \n
-                    state = state.advance(1);
-                    state = state.advance_newline();
-                    index += 2;
-
-                    comments_and_newlines.push(CommentOrNewline::DocComment(""));
-
-                    for c in state.bytes() {
-                        match c {
-                            b' ' => {
-                                state = state.advance(1);
-                            }
-                            b'\n' => {
-                                state = state.advance_newline();
-                                comments_and_newlines.push(CommentOrNewline::Newline);
-                            }
-                            b'\r' => {
-                                state = state.advance_newline();
-                            }
-                            b'\t' => unreachable!(),
-                            b'#' => {
-                                state = state.advance(1);
-                                index += 1;
-                                continue 'outer;
-                            }
-                            _ => {
-                                state = state.mark_current_indent();
-                                break;
-                            }
-                        }
-
-                        index += 1;
-                    }
-
-                    return SpaceState {
-                        state,
-                        comments_and_newlines,
-                    };
-                }
-                None => {
-                    // consume the second #
-                    state = state.advance(1);
-
-                    return SpaceState {
-                        state,
-                        comments_and_newlines,
-                    };
-                }
-
-                Some(_) => false,
-            }
-        } else {
-            false
-        };
-
-        let loop_start = index;
-
-        #[cfg(all(
-            target_arch = "x86_64",
-            target_feature = "sse2",
-            target_feature = "sse4.2"
-        ))]
-        {
-            use std::arch::x86_64::*;
-
-            // a bytestring with the three characters we're looking for (the rest is ignored)
-            let needle = b"\r\n\t=============";
-            let needle = unsafe { _mm_loadu_si128(needle.as_ptr() as *const _) };
-
-            while index < length {
-                let remaining = length - index;
-                let chunk = if remaining < 16 { remaining as i32 } else { 16 };
-
-                // the source bytes we'll be looking at
-                let haystack = unsafe { _mm_loadu_si128(bytes.as_ptr().add(index) as *const _) };
-
-                // use first 3 characters of needle, first  chunk` characters of haystack
-                // finds the first index where one of the `needle` characters occurs
-                // or 16 when none of the needle characters occur
-                let first_special_char =
-                    unsafe { _mm_cmpestri(needle, 3, haystack, chunk, _SIDD_CMP_EQUAL_ANY) };
-
-                // we've made `first_special_char` characters of progress
-                let progress = usize::min(first_special_char as usize, remaining);
-                index += progress;
-                state = state.advance(progress);
-
-                if first_special_char != 16 {
-                    match bytes[index] {
-                        b'\t' => unreachable!(),
-                        b'\n' => {
-                            let comment =
-                                unsafe { std::str::from_utf8_unchecked(&bytes[loop_start..index]) };
-
-                            if is_doc_comment {
-                                comments_and_newlines.push(CommentOrNewline::DocComment(comment));
-                            } else {
-                                comments_and_newlines.push(CommentOrNewline::LineComment(comment));
-                            }
-                            state = state.advance_newline();
-
-                            index += 1;
-                            while index < length {
-                                match bytes[index] {
-                                    b' ' => {
-                                        state = state.advance(1);
-                                    }
-                                    b'\n' => {
-                                        state = state.advance_newline();
-                                        comments_and_newlines.push(CommentOrNewline::Newline);
-                                    }
-                                    b'\r' => {
-                                        state = state.advance_newline();
-                                    }
-                                    b'\t' => unreachable!(),
-                                    b'#' => {
-                                        state = state.advance(1);
-                                        index += 1;
-                                        continue 'outer;
-                                    }
-                                    _ => {
-                                        state = state.mark_current_indent();
-                                        break;
-                                    }
-                                }
-
-                                index += 1;
-                            }
-
-                            return SpaceState {
-                                state,
-                                comments_and_newlines,
-                            };
-                        }
-                        b'\r' => {
-                            state = state.advance_newline();
-                            index += 1;
-                        }
-                        odd_character => {
-                            unreachable!(
-                                "unexpected_character {} {}",
-                                odd_character, odd_character as char
-                            )
-                        }
-                    }
-                }
-            }
-        }
-
-        #[cfg(not(all(
-            target_arch = "x86_64",
-            target_feature = "sse2",
-            target_feature = "sse4.2"
-        )))]
-        while index < length {
-            match bytes[index] {
-                b'\t' => unreachable!(),
-                b'\n' => {
-                    let comment =
-                        unsafe { std::str::from_utf8_unchecked(&bytes[loop_start..index]) };
+                    let is_doc_comment = state.bytes().first() == Some(&b'#')
+                        && (state.bytes().get(1) == Some(&b' ')
+                            || state.bytes().get(1) == Some(&b'\n')
+                            || state.bytes().get(1) == None);

                    if is_doc_comment {
-                        comments_and_newlines.push(CommentOrNewline::DocComment(comment));
-                    } else {
-                        comments_and_newlines.push(CommentOrNewline::LineComment(comment));
-                    }
-                    state = state.advance_newline();
-
-                    index += 1;
-                    while index < length {
-                        match bytes[index] {
-                            b' ' => {
-                                state = state.advance(1);
-                            }
-                            b'\n' => {
-                                state = state.advance_newline();
-                                comments_and_newlines.push(CommentOrNewline::Newline);
-                            }
-                            b'\r' => {
-                                state = state.advance_newline();
-                            }
-                            b'\t' => unreachable!(),
-                            b'#' => {
-                                state = state.advance(1);
-                                index += 1;
-                                continue 'outer;
-                            }
-                            _ => {
-                                state = state.mark_current_indent();
-                                break;
-                            }
+                        state.advance_mut(1);
+                        if state.bytes().first() == Some(&b' ') {
+                            state.advance_mut(1);
                        }
-
-                        index += 1;
                    }

-                    return SpaceState {
-                        state,
-                        comments_and_newlines,
+                    let len = fast_eat_until_control_character(state.bytes());
+
+                    // We already checked that the string is valid UTF-8
+                    debug_assert!(std::str::from_utf8(&state.bytes()[..len]).is_ok());
+                    let text = unsafe { std::str::from_utf8_unchecked(&state.bytes()[..len]) };
+
+                    let comment = if is_doc_comment {
+                        CommentOrNewline::DocComment(text)
+                    } else {
+                        CommentOrNewline::LineComment(text)
                    };
+                    newlines.push(comment);
+                    state.advance_mut(len);
+
+                    if state.bytes().first() == Some(&b'\n') {
+                        state = state.advance_newline();
+                    }
+
+                    progress = MadeProgress;
                }
-                b'\r' => {
+                Some(b'\r') => {
+                    if state.bytes().get(1) == Some(&b'\n') {
+                        newlines.push(CommentOrNewline::Newline);
+                        state.advance_mut(1);
+                        state = state.advance_newline();
+                        progress = MadeProgress;
+                    } else {
+                        return Err((
+                            progress,
+                            E::space_problem(
+                                BadInputError::HasMisplacedCarriageReturn,
+                                state.pos(),
+                            ),
+                        ));
+                    }
+                }
+                Some(b'\n') => {
+                    newlines.push(CommentOrNewline::Newline);
                    state = state.advance_newline();
+                    progress = MadeProgress;
+                }
+                Some(b'\t') => {
+                    return Err((
+                        progress,
+                        E::space_problem(BadInputError::HasTab, state.pos()),
+                    ));
+                }
+                Some(x) if *x < b' ' => {
+                    return Err((
+                        progress,
+                        E::space_problem(BadInputError::HasAsciiControl, state.pos()),
+                    ));
                }
                _ => {
-                    state = state.advance(1);
+                    if !newlines.is_empty() {
+                        state = state.mark_current_indent();
+                    }
+                    break;
                }
            }
-
-            index += 1;
        }

-        // We made it to the end of the bytes. This means there's a comment without a trailing newline.
-        let comment = unsafe { std::str::from_utf8_unchecked(&bytes[loop_start..index]) };
-
-        if is_doc_comment {
-            comments_and_newlines.push(CommentOrNewline::DocComment(comment));
-        } else {
-            comments_and_newlines.push(CommentOrNewline::LineComment(comment));
-        }
-
-        return SpaceState {
-            state,
-            comments_and_newlines,
-        };
+        Ok((progress, newlines.into_bump_slice(), state))
    }
 }
--- a/crates/compiler/parse/src/expr.rs
+++ b/crates/compiler/parse/src/expr.rs
@ -1114,7 +1114,15 @@ fn finish_parsing_alias_or_opaque<'a>(
                    Ok(good) => {
                        type_arguments.push(Loc::at(argument.region, good));
                    }
-                    Err(_) => panic!(),
+                    Err(()) => {
+                        return Err((
+                            MadeProgress,
+                            EExpr::Pattern(
+                                arena.alloc(EPattern::NotAPattern(state.pos())),
+                                state.pos(),
+                            ),
+                        ));
+                    }
                }
            }

@ -1577,8 +1585,8 @@ fn parse_expr_operator<'a>(
                    }
                }
            }
-            Err((NoProgress, expr)) => {
-                todo!("{:?} {:?}", expr, state)
+            Err((NoProgress, _e)) => {
+                return Err((MadeProgress, EExpr::TrailingOperator(state.pos())));
            }
        },
    }
@ -1722,10 +1730,17 @@ fn parse_expr_end<'a>(
                        expr_state.consume_spaces(arena);
                        let call = to_call(arena, expr_state.arguments, expr_state.expr);

-                        let loc_pattern = Loc::at(
-                            call.region,
-                            expr_to_pattern_help(arena, &call.value).unwrap(),
-                        );
+                        let pattern = expr_to_pattern_help(arena, &call.value).map_err(|()| {
+                            (
+                                MadeProgress,
+                                EExpr::Pattern(
+                                    arena.alloc(EPattern::NotAPattern(state.pos())),
+                                    state.pos(),
+                                ),
+                            )
+                        })?;
+
+                        let loc_pattern = Loc::at(call.region, pattern);

                        patterns.insert(0, loc_pattern);

--- a/crates/compiler/parse/src/parser.rs
+++ b/crates/compiler/parse/src/parser.rs
@ -64,7 +64,7 @@ pub enum SyntaxError<'a> {
    Space(BadInputError),
    NotEndOfFile(Position),
 }
-pub trait SpaceProblem {
+pub trait SpaceProblem: std::fmt::Debug {
    fn space_problem(e: BadInputError, pos: Position) -> Self;
 }

@ -266,6 +266,8 @@ pub enum EGeneratesWith {
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum BadInputError {
    HasTab,
+    HasMisplacedCarriageReturn,
+    HasAsciiControl,
    ///
    TooManyLines,
    ///
@ -273,15 +275,6 @@ pub enum BadInputError {
    BadUtf8,
 }

-pub fn bad_input_to_syntax_error<'a>(bad_input: BadInputError) -> SyntaxError<'a> {
-    use crate::parser::BadInputError::*;
-    match bad_input {
-        HasTab => SyntaxError::NotYetImplemented("call error on tabs".to_string()),
-        TooManyLines => SyntaxError::TooManyLines,
-        BadUtf8 => SyntaxError::BadUtf8,
-    }
-}
-
 impl<'a, T> SourceError<'a, T> {
    pub fn new(problem: T, state: &State<'a>) -> Self {
        Self {
@ -324,6 +317,8 @@ impl<'a> SyntaxError<'a> {

 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum EExpr<'a> {
+    TrailingOperator(Position),
+
    Start(Position),
    End(Position),
    BadExprEnd(Position),
@ -561,6 +556,7 @@ pub enum EPattern<'a> {
    Record(PRecord<'a>, Position),
    List(PList<'a>, Position),
    Underscore(Position),
+    NotAPattern(Position),

    Start(Position),
    End(Position),
@ -774,7 +770,7 @@ pub struct FileError<'a, T> {
 pub trait Parser<'a, Output, Error> {
    fn parse(
        &self,
-        alloc: &'a Bump,
+        arena: &'a Bump,
        state: State<'a>,
        min_indent: u32,
    ) -> ParseResult<'a, Output, Error>;
--- a/crates/compiler/parse/src/state.rs
+++ b/crates/compiler/parse/src/state.rs
@ -98,7 +98,7 @@ impl<'a> State<'a> {
        self.offset += 1;
        self.line_start = self.pos();

-        // WARNING! COULD CAUSE BUGS IF WE FORGET TO CALL mark_current_ident LATER!
+        // WARNING! COULD CAUSE BUGS IF WE FORGET TO CALL mark_current_indent LATER!
        // We really need to be stricter about this.
        self.line_start_after_whitespace = self.line_start;

--- a/crates/compiler/parse/src/test_helpers.rs
+++ b/crates/compiler/parse/src/test_helpers.rs
@ -41,3 +41,15 @@ pub fn parse_defs_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Defs<'a>,
        Err(tuple) => Err(tuple.1),
    }
 }
+
+pub fn parse_header_with<'a>(
+    arena: &'a Bump,
+    input: &'a str,
+) -> Result<ast::Module<'a>, SyntaxError<'a>> {
+    let state = State::new(input.trim().as_bytes());
+
+    match crate::module::parse_header(arena, state.clone()) {
+        Ok((header, _)) => Ok(header),
+        Err(fail) => Err(SyntaxError::Header(fail.problem)),
+    }
+}
--- a/crates/compiler/parse/tests/snapshots/fail/alias_or_opaque_fail.expr.result-ast
+++ b/crates/compiler/parse/tests/snapshots/fail/alias_or_opaque_fail.expr.result-ast
@ -0,0 +1 @@
+Expr(InParens(End(@3), @0), @0)
--- a/crates/compiler/parse/tests/snapshots/fail/alias_or_opaque_fail.expr.roc
+++ b/crates/compiler/parse/tests/snapshots/fail/alias_or_opaque_fail.expr.roc
@ -0,0 +1,2 @@
+(@,B
+.e:
--- a/crates/compiler/parse/tests/snapshots/fail/expr_to_pattern_fail.expr.result-ast
+++ b/crates/compiler/parse/tests/snapshots/fail/expr_to_pattern_fail.expr.result-ast
@ -0,0 +1 @@
+Expr(Pattern(NotAPattern(@3), @3), @0)
--- a/crates/compiler/parse/tests/snapshots/fail/expr_to_pattern_fail.expr.roc
+++ b/crates/compiler/parse/tests/snapshots/fail/expr_to_pattern_fail.expr.roc
@ -0,0 +1 @@
+.e,
--- a/crates/compiler/parse/tests/snapshots/fail/tab_crash.header.result-ast
+++ b/crates/compiler/parse/tests/snapshots/fail/tab_crash.header.result-ast
@ -0,0 +1 @@
+SourceError { problem: Space(HasMisplacedCarriageReturn, @1), bytes: [35, 13, 12, 9, 65] }
--- a/crates/compiler/parse/tests/snapshots/fail/tab_crash.header.roc
+++ b/crates/compiler/parse/tests/snapshots/fail/tab_crash.header.roc
@ -0,0 +1 @@
+#
	A
--- a/crates/compiler/parse/tests/snapshots/fail/trailing_operator.expr.result-ast
+++ b/crates/compiler/parse/tests/snapshots/fail/trailing_operator.expr.result-ast
@ -0,0 +1 @@
+Expr(TrailingOperator(@2), @0)
--- a/crates/compiler/parse/tests/snapshots/fail/trailing_operator.expr.roc
+++ b/crates/compiler/parse/tests/snapshots/fail/trailing_operator.expr.roc
@ -0,0 +1 @@
+J-
--- a/crates/compiler/parse/tests/snapshots/pass/multiline_string.expr.formatted.roc
+++ b/crates/compiler/parse/tests/snapshots/pass/multiline_string.expr.formatted.roc
@ -6,7 +6,7 @@ Hello,\n\nWorld!
 c =
    """
    Hello,
-    
+
    World!
    """

--- a/crates/compiler/parse/tests/test_parse.rs
+++ b/crates/compiler/parse/tests/test_parse.rs
@ -166,8 +166,12 @@ mod test_parse {
        fail/record_type_open.expr,
        fail/record_type_tab.expr,
        fail/single_no_end.expr,
+        fail/tab_crash.header,
        fail/tag_union_end.expr,
        fail/tag_union_lowercase_tag_name.expr,
+        fail/trailing_operator.expr,
+        fail/expr_to_pattern_fail.expr,
+        fail/alias_or_opaque_fail.expr,
        fail/tag_union_open.expr,
        fail/tag_union_second_lowercase_tag_name.expr,
        fail/type_annotation_double_colon.expr,
--- a/crates/compiler/test_gen/src/helpers/llvm.rs
+++ b/crates/compiler/test_gen/src/helpers/llvm.rs
@ -613,9 +613,6 @@ macro_rules! assert_llvm_evals_to {
                CrashTag::User => panic!(r#"User crash with message: "{}""#, msg),
            },
        }
-
-        // artificially extend the lifetime of `lib`
-        lib.close().unwrap();
    };

    ($src:expr, $expected:expr, $ty:ty) => {
--- a/examples/cli/cli-platform/Arg.roc
+++ b/examples/cli/cli-platform/Arg.roc
@ -574,7 +574,7 @@ formatHelpHelp = \n, cmdHelp ->
                    "\n\n"

            """
-            
+
            \(indented)COMMANDS:
            \(fmtCmdHelp)
            """
@ -606,7 +606,7 @@ formatHelpHelp = \n, cmdHelp ->
                        |> Str.joinWith "\n"

                    """
-                    
+
                    \(indented)OPTIONS:
                    \(helpStr)
                    """
@ -621,7 +621,7 @@ formatHelpHelp = \n, cmdHelp ->
                        |> Str.joinWith "\n"

                    """
-                    
+
                    \(indented)ARGS:
                    \(helpStr)
                    """
@ -909,7 +909,7 @@ expect
    ==
    """
    test
-    
+
    OPTIONS:
        --foo    the foo option  (string)
        --bar, -B  (string)
@ -936,13 +936,13 @@ expect
    ==
    """
    test
-    
+
    COMMANDS:
        login
            OPTIONS:
                --user  (string)
                --pw  (string)
-    
+
        publish
            OPTIONS:
                --file  (string)
@ -960,7 +960,7 @@ expect
    """
    test
    a test cli app
-    
+
    COMMANDS:
        login
    """
--- a/examples/platform-switching/rust-platform/rust-toolchain.toml
+++ b/examples/platform-switching/rust-platform/rust-toolchain.toml
@ -0,0 +1,9 @@
+[toolchain]
+channel = "1.64.0"
+
+profile = "default"
+
+components = [
+    # for usages of rust-analyzer or similar tools inside `nix develop`
+    "rust-src"
+]
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@ -1,6 +1,7 @@
 [toolchain]
 # How to update version:
 #  - update `channel = "RUST_VERSION"`
+#  - update `channel = "RUST_VERSION"` in examples/platform-switching/rust-platform
 #  - to update the nightly version:
 #     - Find the latest nightly release that matches RUST_VERSION here: https://github.com/oxalica/rust-overlay/tree/master/manifests/nightly/2022
 #     - update `channel = "nightly-OLD_DATE"` below
				`@ -0,0 +1 @@`
				`SourceError { problem: Space(HasMisplacedCarriageReturn, @1), bytes: [35, 13, 12, 9, 65] }`