mirror of
https://github.com/roc-lang/roc.git
synced 2024-11-13 09:49:11 +03:00
Merge remote-tracking branch 'origin/main' into packages
This commit is contained in:
commit
d022c19f5c
63
Cargo.lock
generated
63
Cargo.lock
generated
@ -3172,6 +3172,26 @@ version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2f61dcf0b917cd75d4521d7343d1ffff3d1583054133c9b5cbea3375c703c40d"
|
||||
|
||||
[[package]]
|
||||
name = "proptest"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e0d9cc07f18492d879586c92b485def06bc850da3118075cd45d50e9c95b0e5"
|
||||
dependencies = [
|
||||
"bit-set",
|
||||
"bitflags",
|
||||
"byteorder",
|
||||
"lazy_static",
|
||||
"num-traits",
|
||||
"quick-error 2.0.1",
|
||||
"rand",
|
||||
"rand_chacha",
|
||||
"rand_xorshift",
|
||||
"regex-syntax",
|
||||
"rusty-fork",
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ptr_meta"
|
||||
version = "0.1.4"
|
||||
@ -3203,6 +3223,18 @@ dependencies = [
|
||||
"unicase",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-error"
|
||||
version = "1.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
|
||||
|
||||
[[package]]
|
||||
name = "quick-error"
|
||||
version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.22.0"
|
||||
@ -3295,6 +3327,15 @@ dependencies = [
|
||||
"getrandom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_xorshift"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f"
|
||||
dependencies = [
|
||||
"rand_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_xoshiro"
|
||||
version = "0.6.0"
|
||||
@ -4178,6 +4219,7 @@ dependencies = [
|
||||
"encode_unicode 1.0.0",
|
||||
"indoc",
|
||||
"pretty_assertions",
|
||||
"proptest",
|
||||
"quickcheck",
|
||||
"quickcheck_macros",
|
||||
"roc_collections",
|
||||
@ -4575,6 +4617,18 @@ version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a0a5f7c728f5d284929a1cccb5bc19884422bfe6ef4d6c409da2c41838983fcf"
|
||||
|
||||
[[package]]
|
||||
name = "rusty-fork"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f"
|
||||
dependencies = [
|
||||
"fnv",
|
||||
"quick-error 1.2.3",
|
||||
"tempfile",
|
||||
"wait-timeout",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustyline"
|
||||
version = "9.1.1"
|
||||
@ -5751,6 +5805,15 @@ dependencies = [
|
||||
"quote",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wait-timeout"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.3.2"
|
||||
|
@ -556,8 +556,13 @@ pub fn fmt_str_literal<'buf>(buf: &mut Buf<'buf>, literal: StrLiteral, indent: u
|
||||
|
||||
for segments in lines.iter() {
|
||||
for seg in segments.iter() {
|
||||
buf.indent(indent);
|
||||
format_str_segment(seg, buf, indent);
|
||||
// only add indent if the line isn't empty
|
||||
if *seg != StrSegment::Plaintext("\n") {
|
||||
buf.indent(indent);
|
||||
format_str_segment(seg, buf, indent);
|
||||
} else {
|
||||
buf.newline();
|
||||
}
|
||||
}
|
||||
|
||||
buf.newline();
|
||||
|
@ -19,6 +19,7 @@ encode_unicode.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
roc_test_utils = { path = "../../test_utils" }
|
||||
proptest = "1.0.0"
|
||||
|
||||
criterion.workspace = true
|
||||
pretty_assertions.workspace = true
|
||||
|
@ -1,8 +1,7 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use bumpalo::Bump;
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use roc_parse::{module, module::module_defs, parser::Parser, state::State};
|
||||
use std::path::PathBuf;
|
||||
|
||||
pub fn parse_benchmark(c: &mut Criterion) {
|
||||
c.bench_function("parse false-interpreter", |b| {
|
||||
|
22
crates/compiler/parse/fuzz/Cargo.lock
generated
22
crates/compiler/parse/fuzz/Cargo.lock
generated
@ -72,9 +72,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "bumpalo"
|
||||
version = "3.10.0"
|
||||
version = "3.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3"
|
||||
checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
@ -96,9 +96,15 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
|
||||
|
||||
[[package]]
|
||||
name = "encode_unicode"
|
||||
version = "0.3.6"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
|
||||
checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
||||
|
||||
[[package]]
|
||||
name = "funty"
|
||||
@ -167,12 +173,6 @@ dependencies = [
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.131"
|
||||
@ -264,6 +264,7 @@ version = "0.0.1"
|
||||
dependencies = [
|
||||
"bitvec",
|
||||
"bumpalo",
|
||||
"fnv",
|
||||
"hashbrown",
|
||||
"im",
|
||||
"im-rc",
|
||||
@ -283,7 +284,6 @@ name = "roc_module"
|
||||
version = "0.0.1"
|
||||
dependencies = [
|
||||
"bumpalo",
|
||||
"lazy_static",
|
||||
"roc_collections",
|
||||
"roc_error_macros",
|
||||
"roc_ident",
|
||||
|
@ -37,10 +37,7 @@ where
|
||||
E: 'a + SpaceProblem,
|
||||
{
|
||||
parser::map_with_arena(
|
||||
and(
|
||||
space0_e(indent_before_problem),
|
||||
and(parser, space0_no_after_indent_check()),
|
||||
),
|
||||
and(space0_e(indent_before_problem), and(parser, spaces())),
|
||||
spaces_around_help,
|
||||
)
|
||||
}
|
||||
@ -164,474 +161,268 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
pub fn simple_eat_whitespace(bytes: &[u8]) -> usize {
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
match bytes[i] {
|
||||
b' ' => i += 1,
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
i
|
||||
}
|
||||
|
||||
pub fn fast_eat_whitespace(bytes: &[u8]) -> usize {
|
||||
// Load 8 bytes at a time, keeping in mind that the initial offset may not be aligned
|
||||
let mut i = 0;
|
||||
while i + 8 <= bytes.len() {
|
||||
let chunk = unsafe {
|
||||
// Safe because we know the pointer is in bounds
|
||||
(bytes.as_ptr().add(i) as *const u64)
|
||||
.read_unaligned()
|
||||
.to_le()
|
||||
};
|
||||
|
||||
// Space character is 0x20, which has a single bit set
|
||||
// We can check for any space character by checking if any other bit is set
|
||||
let spaces = 0x2020_2020_2020_2020;
|
||||
|
||||
// First, generate a mask where each byte is 0xff if the byte is a space,
|
||||
// and some other bit sequence otherwise
|
||||
let mask = !(chunk ^ spaces);
|
||||
|
||||
// Now mask off the high bit, so there's some place to carry into without
|
||||
// overflowing into the next byte.
|
||||
let mask = mask & !0x8080_8080_8080_8080;
|
||||
|
||||
// Now add 0x0101_0101_0101_0101 to each byte, which will carry into the high bit
|
||||
// if and only if the byte is a space.
|
||||
let mask = mask + 0x0101_0101_0101_0101;
|
||||
|
||||
// Now mask off areas where the original bytes had the high bit set, so that
|
||||
// 0x80|0x20 = 0xa0 will not be considered a space.
|
||||
let mask = mask & !(chunk & 0x8080_8080_8080_8080);
|
||||
|
||||
// Make sure all the _other_ bits aside from the high bit are set,
|
||||
// and count the number of trailing one bits, dividing by 8 to get the number of
|
||||
// bytes that are spaces.
|
||||
let count = ((mask | !0x8080_8080_8080_8080).trailing_ones() as usize) / 8;
|
||||
|
||||
if count == 8 {
|
||||
i += 8;
|
||||
} else {
|
||||
return i + count;
|
||||
}
|
||||
}
|
||||
|
||||
// Check the remaining bytes
|
||||
simple_eat_whitespace(&bytes[i..]) + i
|
||||
}
|
||||
|
||||
pub fn simple_eat_until_control_character(bytes: &[u8]) -> usize {
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
if bytes[i] < b' ' {
|
||||
break;
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
i
|
||||
}
|
||||
|
||||
pub fn fast_eat_until_control_character(bytes: &[u8]) -> usize {
|
||||
// Load 8 bytes at a time, keeping in mind that the initial offset may not be aligned
|
||||
let mut i = 0;
|
||||
while i + 8 <= bytes.len() {
|
||||
let chunk = unsafe {
|
||||
// Safe because we know the pointer is in bounds
|
||||
(bytes.as_ptr().add(i) as *const u64)
|
||||
.read_unaligned()
|
||||
.to_le()
|
||||
};
|
||||
|
||||
// Control characters are 0x00-0x1F, and don't have any high bits set.
|
||||
// They only have bits set that fall under the 0x1F mask.
|
||||
let control = 0x1F1F_1F1F_1F1F_1F1F;
|
||||
|
||||
// First we set up a value where, if a given byte is a control character,
|
||||
// it'll have a all the non-control bits set to 1. All control bits are set to zero.
|
||||
let mask = !(chunk & !control) & !control;
|
||||
|
||||
// Now, down shift by one bit. This will leave room for the following add to
|
||||
// carry, without impacting the next byte.
|
||||
let mask = mask >> 1;
|
||||
|
||||
// Add one (shifted by the right amount), causing all the one bits in the control
|
||||
// characters to cascade, and put a one in the high bit.
|
||||
let mask = mask.wrapping_add(0x1010_1010_1010_1010);
|
||||
|
||||
// Now, we can count the number of trailing zero bits, dividing by 8 to get the
|
||||
// number of bytes before the first control character.
|
||||
let count = (mask & 0x8080_8080_8080_8080).trailing_zeros() as usize / 8;
|
||||
|
||||
if count == 8 {
|
||||
i += 8;
|
||||
} else {
|
||||
return i + count;
|
||||
}
|
||||
}
|
||||
|
||||
// Check the remaining bytes
|
||||
simple_eat_until_control_character(&bytes[i..]) + i
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use proptest::prelude::*;
|
||||
|
||||
#[test]
|
||||
fn test_eat_whitespace_simple() {
|
||||
let bytes = &[0, 0, 0, 0, 0, 0, 0, 0];
|
||||
assert_eq!(simple_eat_whitespace(bytes), fast_eat_whitespace(bytes));
|
||||
}
|
||||
|
||||
proptest! {
|
||||
#[test]
|
||||
fn test_eat_whitespace(bytes in proptest::collection::vec(any::<u8>(), 0..100)) {
|
||||
prop_assert_eq!(simple_eat_whitespace(&bytes), fast_eat_whitespace(&bytes));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_eat_until_control_character_simple() {
|
||||
let bytes = &[32, 0, 0, 0, 0, 0, 0, 0];
|
||||
assert_eq!(
|
||||
simple_eat_until_control_character(bytes),
|
||||
fast_eat_until_control_character(bytes)
|
||||
);
|
||||
}
|
||||
|
||||
proptest! {
|
||||
#[test]
|
||||
fn test_eat_until_control_character(bytes in proptest::collection::vec(any::<u8>(), 0..100)) {
|
||||
prop_assert_eq!(
|
||||
simple_eat_until_control_character(&bytes),
|
||||
fast_eat_until_control_character(&bytes));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn space0_e<'a, E>(
|
||||
indent_problem: fn(Position) -> E,
|
||||
) -> impl Parser<'a, &'a [CommentOrNewline<'a>], E>
|
||||
where
|
||||
E: 'a + SpaceProblem,
|
||||
{
|
||||
spaces_help_help(indent_problem)
|
||||
move |arena, state: State<'a>, min_indent: u32| {
|
||||
let start = state.pos();
|
||||
match spaces().parse(arena, state, min_indent) {
|
||||
Ok((progress, spaces, state)) => {
|
||||
if progress == NoProgress || state.column() >= min_indent {
|
||||
Ok((progress, spaces, state))
|
||||
} else {
|
||||
Err((progress, indent_problem(start)))
|
||||
}
|
||||
}
|
||||
Err((progress, err)) => Err((progress, err)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn spaces_help_help<'a, E>(
|
||||
indent_problem: fn(Position) -> E,
|
||||
) -> impl Parser<'a, &'a [CommentOrNewline<'a>], E>
|
||||
fn spaces<'a, E>() -> impl Parser<'a, &'a [CommentOrNewline<'a>], E>
|
||||
where
|
||||
E: 'a + SpaceProblem,
|
||||
{
|
||||
move |arena, state: State<'a>, min_indent: u32| match fast_eat_spaces(&state) {
|
||||
FastSpaceState::HasTab(position) => Err((
|
||||
MadeProgress,
|
||||
E::space_problem(BadInputError::HasTab, position),
|
||||
)),
|
||||
FastSpaceState::Good {
|
||||
newlines,
|
||||
consumed,
|
||||
column,
|
||||
} => {
|
||||
if consumed == 0 {
|
||||
Ok((NoProgress, &[] as &[_], state))
|
||||
} else if column < min_indent {
|
||||
Err((MadeProgress, indent_problem(state.pos())))
|
||||
} else {
|
||||
let comments_and_newlines = Vec::with_capacity_in(newlines, arena);
|
||||
let spaces = eat_spaces(state, comments_and_newlines);
|
||||
|
||||
Ok((
|
||||
MadeProgress,
|
||||
spaces.comments_and_newlines.into_bump_slice(),
|
||||
spaces.state,
|
||||
))
|
||||
move |arena, mut state: State<'a>, _min_indent: u32| {
|
||||
let mut newlines = Vec::new_in(arena);
|
||||
let mut progress = NoProgress;
|
||||
loop {
|
||||
let whitespace = fast_eat_whitespace(state.bytes());
|
||||
if whitespace > 0 {
|
||||
state.advance_mut(whitespace);
|
||||
progress = MadeProgress;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn space0_no_after_indent_check<'a, E>() -> impl Parser<'a, &'a [CommentOrNewline<'a>], E>
|
||||
where
|
||||
E: 'a + SpaceProblem,
|
||||
{
|
||||
move |arena, state: State<'a>, _min_indent: u32| match fast_eat_spaces(&state) {
|
||||
FastSpaceState::HasTab(position) => Err((
|
||||
MadeProgress,
|
||||
E::space_problem(BadInputError::HasTab, position),
|
||||
)),
|
||||
FastSpaceState::Good {
|
||||
newlines,
|
||||
consumed,
|
||||
column: _,
|
||||
} => {
|
||||
if consumed == 0 {
|
||||
Ok((NoProgress, &[] as &[_], state))
|
||||
} else {
|
||||
let comments_and_newlines = Vec::with_capacity_in(newlines, arena);
|
||||
let spaces = eat_spaces(state, comments_and_newlines);
|
||||
match state.bytes().first() {
|
||||
Some(b'#') => {
|
||||
state.advance_mut(1);
|
||||
|
||||
Ok((
|
||||
MadeProgress,
|
||||
spaces.comments_and_newlines.into_bump_slice(),
|
||||
spaces.state,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum FastSpaceState {
|
||||
Good {
|
||||
newlines: usize,
|
||||
consumed: usize,
|
||||
column: u32,
|
||||
},
|
||||
HasTab(Position),
|
||||
}
|
||||
|
||||
fn fast_eat_spaces(state: &State) -> FastSpaceState {
|
||||
use FastSpaceState::*;
|
||||
|
||||
let mut newlines = 0;
|
||||
let mut line_start = state.line_start.offset as usize;
|
||||
let base_offset = state.pos().offset as usize;
|
||||
|
||||
let mut index = base_offset;
|
||||
let bytes = state.original_bytes();
|
||||
let length = bytes.len();
|
||||
|
||||
'outer: while index < length {
|
||||
match bytes[index] {
|
||||
b' ' => {
|
||||
index += 1;
|
||||
}
|
||||
b'\n' => {
|
||||
newlines += 1;
|
||||
index += 1;
|
||||
line_start = index;
|
||||
}
|
||||
b'\r' => {
|
||||
index += 1;
|
||||
line_start = index;
|
||||
}
|
||||
b'\t' => {
|
||||
return HasTab(Position::new(index as u32));
|
||||
}
|
||||
b'#' => {
|
||||
index += 1;
|
||||
|
||||
// try to use SIMD instructions explicitly
|
||||
// run with RUSTFLAGS="-C target-cpu=native" to enable
|
||||
#[cfg(all(
|
||||
target_arch = "x86_64",
|
||||
target_feature = "sse2",
|
||||
target_feature = "sse4.2"
|
||||
))]
|
||||
{
|
||||
use std::arch::x86_64::*;
|
||||
|
||||
// a bytestring with the three characters we're looking for (the rest is ignored)
|
||||
let needle = b"\r\n\t=============";
|
||||
let needle = unsafe { _mm_loadu_si128(needle.as_ptr() as *const _) };
|
||||
|
||||
while index < length {
|
||||
let remaining = length - index;
|
||||
let length = if remaining < 16 { remaining as i32 } else { 16 };
|
||||
|
||||
// the source bytes we'll be looking at
|
||||
let haystack =
|
||||
unsafe { _mm_loadu_si128(bytes.as_ptr().add(index) as *const _) };
|
||||
|
||||
// use first 3 characters of needle, first `length` characters of haystack
|
||||
// finds the first index where one of the `needle` characters occurs
|
||||
// or 16 when none of the needle characters occur
|
||||
let first_special_char = unsafe {
|
||||
_mm_cmpestri(needle, 3, haystack, length, _SIDD_CMP_EQUAL_ANY)
|
||||
};
|
||||
|
||||
// we've made `first_special_char` characters of progress
|
||||
index += usize::min(first_special_char as usize, remaining);
|
||||
|
||||
// if we found a special char, let the outer loop handle it
|
||||
if first_special_char != 16 {
|
||||
continue 'outer;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(all(
|
||||
target_arch = "x86_64",
|
||||
target_feature = "sse2",
|
||||
target_feature = "sse4.2"
|
||||
)))]
|
||||
{
|
||||
while index < length {
|
||||
match bytes[index] {
|
||||
b'\n' | b'\t' | b'\r' => {
|
||||
continue 'outer;
|
||||
}
|
||||
|
||||
_ => {
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
Good {
|
||||
newlines,
|
||||
consumed: index - base_offset,
|
||||
column: (index - line_start) as u32,
|
||||
}
|
||||
}
|
||||
|
||||
struct SpaceState<'a> {
|
||||
state: State<'a>,
|
||||
comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
|
||||
}
|
||||
|
||||
fn eat_spaces<'a>(
|
||||
mut state: State<'a>,
|
||||
mut comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
|
||||
) -> SpaceState<'a> {
|
||||
for c in state.bytes() {
|
||||
match c {
|
||||
b' ' => {
|
||||
state = state.advance(1);
|
||||
}
|
||||
b'\n' => {
|
||||
state = state.advance_newline();
|
||||
comments_and_newlines.push(CommentOrNewline::Newline);
|
||||
}
|
||||
b'\r' => {
|
||||
state = state.advance_newline();
|
||||
}
|
||||
b'\t' => unreachable!(),
|
||||
|
||||
b'#' => {
|
||||
state = state.advance(1);
|
||||
return eat_line_comment(state, comments_and_newlines);
|
||||
}
|
||||
_ => {
|
||||
if !comments_and_newlines.is_empty() {
|
||||
state = state.mark_current_indent();
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SpaceState {
|
||||
state,
|
||||
comments_and_newlines,
|
||||
}
|
||||
}
|
||||
|
||||
fn eat_line_comment<'a>(
|
||||
mut state: State<'a>,
|
||||
mut comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
|
||||
) -> SpaceState<'a> {
|
||||
let mut index = state.pos().offset as usize;
|
||||
let bytes = state.original_bytes();
|
||||
let length = bytes.len();
|
||||
|
||||
'outer: loop {
|
||||
let is_doc_comment = if let Some(b'#') = bytes.get(index) {
|
||||
match bytes.get(index + 1) {
|
||||
Some(b' ') => {
|
||||
state = state.advance(2);
|
||||
index += 2;
|
||||
|
||||
true
|
||||
}
|
||||
Some(b'\n') => {
|
||||
// consume the second # and the \n
|
||||
state = state.advance(1);
|
||||
state = state.advance_newline();
|
||||
index += 2;
|
||||
|
||||
comments_and_newlines.push(CommentOrNewline::DocComment(""));
|
||||
|
||||
for c in state.bytes() {
|
||||
match c {
|
||||
b' ' => {
|
||||
state = state.advance(1);
|
||||
}
|
||||
b'\n' => {
|
||||
state = state.advance_newline();
|
||||
comments_and_newlines.push(CommentOrNewline::Newline);
|
||||
}
|
||||
b'\r' => {
|
||||
state = state.advance_newline();
|
||||
}
|
||||
b'\t' => unreachable!(),
|
||||
b'#' => {
|
||||
state = state.advance(1);
|
||||
index += 1;
|
||||
continue 'outer;
|
||||
}
|
||||
_ => {
|
||||
state = state.mark_current_indent();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
index += 1;
|
||||
}
|
||||
|
||||
return SpaceState {
|
||||
state,
|
||||
comments_and_newlines,
|
||||
};
|
||||
}
|
||||
None => {
|
||||
// consume the second #
|
||||
state = state.advance(1);
|
||||
|
||||
return SpaceState {
|
||||
state,
|
||||
comments_and_newlines,
|
||||
};
|
||||
}
|
||||
|
||||
Some(_) => false,
|
||||
}
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
let loop_start = index;
|
||||
|
||||
#[cfg(all(
|
||||
target_arch = "x86_64",
|
||||
target_feature = "sse2",
|
||||
target_feature = "sse4.2"
|
||||
))]
|
||||
{
|
||||
use std::arch::x86_64::*;
|
||||
|
||||
// a bytestring with the three characters we're looking for (the rest is ignored)
|
||||
let needle = b"\r\n\t=============";
|
||||
let needle = unsafe { _mm_loadu_si128(needle.as_ptr() as *const _) };
|
||||
|
||||
while index < length {
|
||||
let remaining = length - index;
|
||||
let chunk = if remaining < 16 { remaining as i32 } else { 16 };
|
||||
|
||||
// the source bytes we'll be looking at
|
||||
let haystack = unsafe { _mm_loadu_si128(bytes.as_ptr().add(index) as *const _) };
|
||||
|
||||
// use first 3 characters of needle, first chunk` characters of haystack
|
||||
// finds the first index where one of the `needle` characters occurs
|
||||
// or 16 when none of the needle characters occur
|
||||
let first_special_char =
|
||||
unsafe { _mm_cmpestri(needle, 3, haystack, chunk, _SIDD_CMP_EQUAL_ANY) };
|
||||
|
||||
// we've made `first_special_char` characters of progress
|
||||
let progress = usize::min(first_special_char as usize, remaining);
|
||||
index += progress;
|
||||
state = state.advance(progress);
|
||||
|
||||
if first_special_char != 16 {
|
||||
match bytes[index] {
|
||||
b'\t' => unreachable!(),
|
||||
b'\n' => {
|
||||
let comment =
|
||||
unsafe { std::str::from_utf8_unchecked(&bytes[loop_start..index]) };
|
||||
|
||||
if is_doc_comment {
|
||||
comments_and_newlines.push(CommentOrNewline::DocComment(comment));
|
||||
} else {
|
||||
comments_and_newlines.push(CommentOrNewline::LineComment(comment));
|
||||
}
|
||||
state = state.advance_newline();
|
||||
|
||||
index += 1;
|
||||
while index < length {
|
||||
match bytes[index] {
|
||||
b' ' => {
|
||||
state = state.advance(1);
|
||||
}
|
||||
b'\n' => {
|
||||
state = state.advance_newline();
|
||||
comments_and_newlines.push(CommentOrNewline::Newline);
|
||||
}
|
||||
b'\r' => {
|
||||
state = state.advance_newline();
|
||||
}
|
||||
b'\t' => unreachable!(),
|
||||
b'#' => {
|
||||
state = state.advance(1);
|
||||
index += 1;
|
||||
continue 'outer;
|
||||
}
|
||||
_ => {
|
||||
state = state.mark_current_indent();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
index += 1;
|
||||
}
|
||||
|
||||
return SpaceState {
|
||||
state,
|
||||
comments_and_newlines,
|
||||
};
|
||||
}
|
||||
b'\r' => {
|
||||
state = state.advance_newline();
|
||||
index += 1;
|
||||
}
|
||||
odd_character => {
|
||||
unreachable!(
|
||||
"unexpected_character {} {}",
|
||||
odd_character, odd_character as char
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(all(
|
||||
target_arch = "x86_64",
|
||||
target_feature = "sse2",
|
||||
target_feature = "sse4.2"
|
||||
)))]
|
||||
while index < length {
|
||||
match bytes[index] {
|
||||
b'\t' => unreachable!(),
|
||||
b'\n' => {
|
||||
let comment =
|
||||
unsafe { std::str::from_utf8_unchecked(&bytes[loop_start..index]) };
|
||||
let is_doc_comment = state.bytes().first() == Some(&b'#')
|
||||
&& (state.bytes().get(1) == Some(&b' ')
|
||||
|| state.bytes().get(1) == Some(&b'\n')
|
||||
|| state.bytes().get(1) == None);
|
||||
|
||||
if is_doc_comment {
|
||||
comments_and_newlines.push(CommentOrNewline::DocComment(comment));
|
||||
} else {
|
||||
comments_and_newlines.push(CommentOrNewline::LineComment(comment));
|
||||
}
|
||||
state = state.advance_newline();
|
||||
|
||||
index += 1;
|
||||
while index < length {
|
||||
match bytes[index] {
|
||||
b' ' => {
|
||||
state = state.advance(1);
|
||||
}
|
||||
b'\n' => {
|
||||
state = state.advance_newline();
|
||||
comments_and_newlines.push(CommentOrNewline::Newline);
|
||||
}
|
||||
b'\r' => {
|
||||
state = state.advance_newline();
|
||||
}
|
||||
b'\t' => unreachable!(),
|
||||
b'#' => {
|
||||
state = state.advance(1);
|
||||
index += 1;
|
||||
continue 'outer;
|
||||
}
|
||||
_ => {
|
||||
state = state.mark_current_indent();
|
||||
break;
|
||||
}
|
||||
state.advance_mut(1);
|
||||
if state.bytes().first() == Some(&b' ') {
|
||||
state.advance_mut(1);
|
||||
}
|
||||
|
||||
index += 1;
|
||||
}
|
||||
|
||||
return SpaceState {
|
||||
state,
|
||||
comments_and_newlines,
|
||||
let len = fast_eat_until_control_character(state.bytes());
|
||||
|
||||
// We already checked that the string is valid UTF-8
|
||||
debug_assert!(std::str::from_utf8(&state.bytes()[..len]).is_ok());
|
||||
let text = unsafe { std::str::from_utf8_unchecked(&state.bytes()[..len]) };
|
||||
|
||||
let comment = if is_doc_comment {
|
||||
CommentOrNewline::DocComment(text)
|
||||
} else {
|
||||
CommentOrNewline::LineComment(text)
|
||||
};
|
||||
newlines.push(comment);
|
||||
state.advance_mut(len);
|
||||
|
||||
if state.bytes().first() == Some(&b'\n') {
|
||||
state = state.advance_newline();
|
||||
}
|
||||
|
||||
progress = MadeProgress;
|
||||
}
|
||||
b'\r' => {
|
||||
Some(b'\r') => {
|
||||
if state.bytes().get(1) == Some(&b'\n') {
|
||||
newlines.push(CommentOrNewline::Newline);
|
||||
state.advance_mut(1);
|
||||
state = state.advance_newline();
|
||||
progress = MadeProgress;
|
||||
} else {
|
||||
return Err((
|
||||
progress,
|
||||
E::space_problem(
|
||||
BadInputError::HasMisplacedCarriageReturn,
|
||||
state.pos(),
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
Some(b'\n') => {
|
||||
newlines.push(CommentOrNewline::Newline);
|
||||
state = state.advance_newline();
|
||||
progress = MadeProgress;
|
||||
}
|
||||
Some(b'\t') => {
|
||||
return Err((
|
||||
progress,
|
||||
E::space_problem(BadInputError::HasTab, state.pos()),
|
||||
));
|
||||
}
|
||||
Some(x) if *x < b' ' => {
|
||||
return Err((
|
||||
progress,
|
||||
E::space_problem(BadInputError::HasAsciiControl, state.pos()),
|
||||
));
|
||||
}
|
||||
_ => {
|
||||
state = state.advance(1);
|
||||
if !newlines.is_empty() {
|
||||
state = state.mark_current_indent();
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
index += 1;
|
||||
}
|
||||
|
||||
// We made it to the end of the bytes. This means there's a comment without a trailing newline.
|
||||
let comment = unsafe { std::str::from_utf8_unchecked(&bytes[loop_start..index]) };
|
||||
|
||||
if is_doc_comment {
|
||||
comments_and_newlines.push(CommentOrNewline::DocComment(comment));
|
||||
} else {
|
||||
comments_and_newlines.push(CommentOrNewline::LineComment(comment));
|
||||
}
|
||||
|
||||
return SpaceState {
|
||||
state,
|
||||
comments_and_newlines,
|
||||
};
|
||||
Ok((progress, newlines.into_bump_slice(), state))
|
||||
}
|
||||
}
|
||||
|
@ -1114,7 +1114,15 @@ fn finish_parsing_alias_or_opaque<'a>(
|
||||
Ok(good) => {
|
||||
type_arguments.push(Loc::at(argument.region, good));
|
||||
}
|
||||
Err(_) => panic!(),
|
||||
Err(()) => {
|
||||
return Err((
|
||||
MadeProgress,
|
||||
EExpr::Pattern(
|
||||
arena.alloc(EPattern::NotAPattern(state.pos())),
|
||||
state.pos(),
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1577,8 +1585,8 @@ fn parse_expr_operator<'a>(
|
||||
}
|
||||
}
|
||||
}
|
||||
Err((NoProgress, expr)) => {
|
||||
todo!("{:?} {:?}", expr, state)
|
||||
Err((NoProgress, _e)) => {
|
||||
return Err((MadeProgress, EExpr::TrailingOperator(state.pos())));
|
||||
}
|
||||
},
|
||||
}
|
||||
@ -1722,10 +1730,17 @@ fn parse_expr_end<'a>(
|
||||
expr_state.consume_spaces(arena);
|
||||
let call = to_call(arena, expr_state.arguments, expr_state.expr);
|
||||
|
||||
let loc_pattern = Loc::at(
|
||||
call.region,
|
||||
expr_to_pattern_help(arena, &call.value).unwrap(),
|
||||
);
|
||||
let pattern = expr_to_pattern_help(arena, &call.value).map_err(|()| {
|
||||
(
|
||||
MadeProgress,
|
||||
EExpr::Pattern(
|
||||
arena.alloc(EPattern::NotAPattern(state.pos())),
|
||||
state.pos(),
|
||||
),
|
||||
)
|
||||
})?;
|
||||
|
||||
let loc_pattern = Loc::at(call.region, pattern);
|
||||
|
||||
patterns.insert(0, loc_pattern);
|
||||
|
||||
|
@ -64,7 +64,7 @@ pub enum SyntaxError<'a> {
|
||||
Space(BadInputError),
|
||||
NotEndOfFile(Position),
|
||||
}
|
||||
pub trait SpaceProblem {
|
||||
pub trait SpaceProblem: std::fmt::Debug {
|
||||
fn space_problem(e: BadInputError, pos: Position) -> Self;
|
||||
}
|
||||
|
||||
@ -266,6 +266,8 @@ pub enum EGeneratesWith {
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum BadInputError {
|
||||
HasTab,
|
||||
HasMisplacedCarriageReturn,
|
||||
HasAsciiControl,
|
||||
///
|
||||
TooManyLines,
|
||||
///
|
||||
@ -273,15 +275,6 @@ pub enum BadInputError {
|
||||
BadUtf8,
|
||||
}
|
||||
|
||||
pub fn bad_input_to_syntax_error<'a>(bad_input: BadInputError) -> SyntaxError<'a> {
|
||||
use crate::parser::BadInputError::*;
|
||||
match bad_input {
|
||||
HasTab => SyntaxError::NotYetImplemented("call error on tabs".to_string()),
|
||||
TooManyLines => SyntaxError::TooManyLines,
|
||||
BadUtf8 => SyntaxError::BadUtf8,
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> SourceError<'a, T> {
|
||||
pub fn new(problem: T, state: &State<'a>) -> Self {
|
||||
Self {
|
||||
@ -324,6 +317,8 @@ impl<'a> SyntaxError<'a> {
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum EExpr<'a> {
|
||||
TrailingOperator(Position),
|
||||
|
||||
Start(Position),
|
||||
End(Position),
|
||||
BadExprEnd(Position),
|
||||
@ -561,6 +556,7 @@ pub enum EPattern<'a> {
|
||||
Record(PRecord<'a>, Position),
|
||||
List(PList<'a>, Position),
|
||||
Underscore(Position),
|
||||
NotAPattern(Position),
|
||||
|
||||
Start(Position),
|
||||
End(Position),
|
||||
@ -774,7 +770,7 @@ pub struct FileError<'a, T> {
|
||||
pub trait Parser<'a, Output, Error> {
|
||||
fn parse(
|
||||
&self,
|
||||
alloc: &'a Bump,
|
||||
arena: &'a Bump,
|
||||
state: State<'a>,
|
||||
min_indent: u32,
|
||||
) -> ParseResult<'a, Output, Error>;
|
||||
|
@ -98,7 +98,7 @@ impl<'a> State<'a> {
|
||||
self.offset += 1;
|
||||
self.line_start = self.pos();
|
||||
|
||||
// WARNING! COULD CAUSE BUGS IF WE FORGET TO CALL mark_current_ident LATER!
|
||||
// WARNING! COULD CAUSE BUGS IF WE FORGET TO CALL mark_current_indent LATER!
|
||||
// We really need to be stricter about this.
|
||||
self.line_start_after_whitespace = self.line_start;
|
||||
|
||||
|
@ -41,3 +41,15 @@ pub fn parse_defs_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Defs<'a>,
|
||||
Err(tuple) => Err(tuple.1),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_header_with<'a>(
|
||||
arena: &'a Bump,
|
||||
input: &'a str,
|
||||
) -> Result<ast::Module<'a>, SyntaxError<'a>> {
|
||||
let state = State::new(input.trim().as_bytes());
|
||||
|
||||
match crate::module::parse_header(arena, state.clone()) {
|
||||
Ok((header, _)) => Ok(header),
|
||||
Err(fail) => Err(SyntaxError::Header(fail.problem)),
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1 @@
|
||||
Expr(InParens(End(@3), @0), @0)
|
@ -0,0 +1,2 @@
|
||||
(@,B
|
||||
.e:
|
@ -0,0 +1 @@
|
||||
Expr(Pattern(NotAPattern(@3), @3), @0)
|
@ -0,0 +1 @@
|
||||
.e,
|
@ -0,0 +1 @@
|
||||
SourceError { problem: Space(HasMisplacedCarriageReturn, @1), bytes: [35, 13, 12, 9, 65] }
|
@ -0,0 +1 @@
|
||||
#
A
|
@ -0,0 +1 @@
|
||||
Expr(TrailingOperator(@2), @0)
|
@ -0,0 +1 @@
|
||||
J-
|
@ -6,7 +6,7 @@ Hello,\n\nWorld!
|
||||
c =
|
||||
"""
|
||||
Hello,
|
||||
|
||||
|
||||
World!
|
||||
"""
|
||||
|
||||
|
@ -166,8 +166,12 @@ mod test_parse {
|
||||
fail/record_type_open.expr,
|
||||
fail/record_type_tab.expr,
|
||||
fail/single_no_end.expr,
|
||||
fail/tab_crash.header,
|
||||
fail/tag_union_end.expr,
|
||||
fail/tag_union_lowercase_tag_name.expr,
|
||||
fail/trailing_operator.expr,
|
||||
fail/expr_to_pattern_fail.expr,
|
||||
fail/alias_or_opaque_fail.expr,
|
||||
fail/tag_union_open.expr,
|
||||
fail/tag_union_second_lowercase_tag_name.expr,
|
||||
fail/type_annotation_double_colon.expr,
|
||||
|
@ -613,9 +613,6 @@ macro_rules! assert_llvm_evals_to {
|
||||
CrashTag::User => panic!(r#"User crash with message: "{}""#, msg),
|
||||
},
|
||||
}
|
||||
|
||||
// artificially extend the lifetime of `lib`
|
||||
lib.close().unwrap();
|
||||
};
|
||||
|
||||
($src:expr, $expected:expr, $ty:ty) => {
|
||||
|
@ -574,7 +574,7 @@ formatHelpHelp = \n, cmdHelp ->
|
||||
"\n\n"
|
||||
|
||||
"""
|
||||
|
||||
|
||||
\(indented)COMMANDS:
|
||||
\(fmtCmdHelp)
|
||||
"""
|
||||
@ -606,7 +606,7 @@ formatHelpHelp = \n, cmdHelp ->
|
||||
|> Str.joinWith "\n"
|
||||
|
||||
"""
|
||||
|
||||
|
||||
\(indented)OPTIONS:
|
||||
\(helpStr)
|
||||
"""
|
||||
@ -621,7 +621,7 @@ formatHelpHelp = \n, cmdHelp ->
|
||||
|> Str.joinWith "\n"
|
||||
|
||||
"""
|
||||
|
||||
|
||||
\(indented)ARGS:
|
||||
\(helpStr)
|
||||
"""
|
||||
@ -909,7 +909,7 @@ expect
|
||||
==
|
||||
"""
|
||||
test
|
||||
|
||||
|
||||
OPTIONS:
|
||||
--foo the foo option (string)
|
||||
--bar, -B (string)
|
||||
@ -936,13 +936,13 @@ expect
|
||||
==
|
||||
"""
|
||||
test
|
||||
|
||||
|
||||
COMMANDS:
|
||||
login
|
||||
OPTIONS:
|
||||
--user (string)
|
||||
--pw (string)
|
||||
|
||||
|
||||
publish
|
||||
OPTIONS:
|
||||
--file (string)
|
||||
@ -960,7 +960,7 @@ expect
|
||||
"""
|
||||
test
|
||||
a test cli app
|
||||
|
||||
|
||||
COMMANDS:
|
||||
login
|
||||
"""
|
||||
|
@ -0,0 +1,9 @@
|
||||
[toolchain]
|
||||
channel = "1.64.0"
|
||||
|
||||
profile = "default"
|
||||
|
||||
components = [
|
||||
# for usages of rust-analyzer or similar tools inside `nix develop`
|
||||
"rust-src"
|
||||
]
|
@ -1,6 +1,7 @@
|
||||
[toolchain]
|
||||
# How to update version:
|
||||
# - update `channel = "RUST_VERSION"`
|
||||
# - update `channel = "RUST_VERSION"` in examples/platform-switching/rust-platform
|
||||
# - to update the nightly version:
|
||||
# - Find the latest nightly release that matches RUST_VERSION here: https://github.com/oxalica/rust-overlay/tree/master/manifests/nightly/2022
|
||||
# - update `channel = "nightly-OLD_DATE"` below
|
||||
|
Loading…
Reference in New Issue
Block a user