From 25856f230c604868aea7a957ce5f29fe82d2c8b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Mon, 14 Dec 2020 16:37:11 +0900 Subject: [PATCH] feat(parser): Don't hang on unexpected inputs (#1274) swc_ecma_parser: - Don't hang on unexpected inputs. (#1272, denoland/deno#8719) --- ecmascript/parser/Cargo.toml | 2 +- ecmascript/parser/src/lexer/mod.rs | 7 ++-- ecmascript/parser/src/lexer/state.rs | 12 +++++++ ecmascript/parser/src/lexer/tests.rs | 35 ++++++++++++++++++- .../fail/033c083bb1f44642.js.stderr | 6 ++++ .../fail/6e792760337980f7.js.stderr | 6 ++++ .../fail/abc46381e4e6bcca.js.stderr | 6 ++++ .../fail/c3afed3cb0fb92ab.js.stderr | 6 ++++ .../fail/d4cf8ae9018f6a28.js.stderr | 6 ++++ .../typescript-errors/issue-1272/input.ts | 1 + .../issue-1272/input.ts.stderr | 6 ++++ 11 files changed, 89 insertions(+), 4 deletions(-) create mode 100644 ecmascript/parser/tests/typescript-errors/issue-1272/input.ts create mode 100644 ecmascript/parser/tests/typescript-errors/issue-1272/input.ts.stderr diff --git a/ecmascript/parser/Cargo.toml b/ecmascript/parser/Cargo.toml index ac7d7bfc2cd..1b86323cd3f 100644 --- a/ecmascript/parser/Cargo.toml +++ b/ecmascript/parser/Cargo.toml @@ -7,7 +7,7 @@ include = ["Cargo.toml", "src/**/*.rs", "examples/**/*.rs"] license = "Apache-2.0/MIT" name = "swc_ecma_parser" repository = "https://github.com/swc-project/swc.git" -version = "0.43.3" +version = "0.43.4" [features] default = [] diff --git a/ecmascript/parser/src/lexer/mod.rs b/ecmascript/parser/src/lexer/mod.rs index 08a8f09db1f..ef140652255 100644 --- a/ecmascript/parser/src/lexer/mod.rs +++ b/ecmascript/parser/src/lexer/mod.rs @@ -432,7 +432,10 @@ impl<'a, I: Input> Lexer<'a, I> { } // unexpected character - c => self.error_span(pos_span(start), SyntaxError::UnexpectedChar { c })?, + c => { + self.input.bump(); + self.error_span(pos_span(start), SyntaxError::UnexpectedChar { c })? + } }; Ok(Some(token)) @@ -784,7 +787,7 @@ impl<'a, I: Input> Lexer<'a, I> { }; if !valid { - l.error(start, SyntaxError::InvalidIdentChar)? + l.emit_error(start, SyntaxError::InvalidIdentChar); } buf.extend(c); } diff --git a/ecmascript/parser/src/lexer/state.rs b/ecmascript/parser/src/lexer/state.rs index 9020ee81434..bcce10acfe8 100644 --- a/ecmascript/parser/src/lexer/state.rs +++ b/ecmascript/parser/src/lexer/state.rs @@ -702,3 +702,15 @@ pub(crate) fn lex_tokens_with_target( ) -> Vec { with_lexer(syntax, target, s, |l| Ok(l.map(|ts| ts.token).collect())).unwrap() } + +/// Returns `(tokens, recovered_errors)`. `(tokens)` may contain an error token +/// if the lexer fails to recover from it. +#[cfg(test)] +pub(crate) fn lex_errors(syntax: Syntax, s: &'static str) -> (Vec, Vec) { + with_lexer(syntax, JscTarget::Es2020, s, |l| { + let tokens = l.map(|ts| ts.token).collect(); + let errors = l.take_errors(); + Ok((tokens, errors)) + }) + .unwrap() +} diff --git a/ecmascript/parser/src/lexer/tests.rs b/ecmascript/parser/src/lexer/tests.rs index a433335cd07..e6c091b34f0 100644 --- a/ecmascript/parser/src/lexer/tests.rs +++ b/ecmascript/parser/src/lexer/tests.rs @@ -4,7 +4,10 @@ use super::{ state::{lex, lex_module_errors, lex_tokens, lex_tokens_with_target, with_lexer}, *, }; -use crate::error::{Error, SyntaxError}; +use crate::{ + error::{Error, SyntaxError}, + lexer::state::lex_errors, +}; use std::{ops::Range, str}; use test::{black_box, Bencher}; @@ -1276,3 +1279,33 @@ fn lex_semicolons(b: &mut Bencher) { ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;", ); } + +#[test] +fn issue_1272_1_ts() { + let (tokens, errors) = lex_errors(crate::Syntax::Typescript(Default::default()), "\\u{16}"); + assert_eq!(tokens.len(), 1); + assert_ne!(errors, vec![]); +} + +#[test] +fn issue_1272_1_js() { + let (tokens, errors) = lex_errors(crate::Syntax::Es(Default::default()), "\\u{16}"); + assert_eq!(tokens.len(), 1); + assert_ne!(errors, vec![]); +} + +#[test] +fn issue_1272_2_ts() { + // Not recoverable yet + let (tokens, errors) = lex_errors(crate::Syntax::Typescript(Default::default()), "\u{16}"); + assert_eq!(tokens.len(), 1); + assert_eq!(errors, vec![]); +} + +#[test] +fn issue_1272_2_js() { + // Not recoverable yet + let (tokens, errors) = lex_errors(crate::Syntax::Es(Default::default()), "\u{16}"); + assert_eq!(tokens.len(), 1); + assert_eq!(errors, vec![]); +} diff --git a/ecmascript/parser/tests/test262-error-references/fail/033c083bb1f44642.js.stderr b/ecmascript/parser/tests/test262-error-references/fail/033c083bb1f44642.js.stderr index f338f9488d2..9e5b98c504c 100644 --- a/ecmascript/parser/tests/test262-error-references/fail/033c083bb1f44642.js.stderr +++ b/ecmascript/parser/tests/test262-error-references/fail/033c083bb1f44642.js.stderr @@ -1,3 +1,9 @@ +error: Expected unicode escape + --> $DIR/tests/test262-parser/fail/033c083bb1f44642.js:1:7 + | +1 | \uD800\x62 + | ^ + error: Invalid character in identifier --> $DIR/tests/test262-parser/fail/033c083bb1f44642.js:1:1 | diff --git a/ecmascript/parser/tests/test262-error-references/fail/6e792760337980f7.js.stderr b/ecmascript/parser/tests/test262-error-references/fail/6e792760337980f7.js.stderr index 8237511e1ce..79f90b1cb6b 100644 --- a/ecmascript/parser/tests/test262-error-references/fail/6e792760337980f7.js.stderr +++ b/ecmascript/parser/tests/test262-error-references/fail/6e792760337980f7.js.stderr @@ -1,3 +1,9 @@ +error: Expected unicode escape + --> $DIR/tests/test262-parser/fail/6e792760337980f7.js:1:7 + | +1 | \uD800\ + | ^ + error: Invalid character in identifier --> $DIR/tests/test262-parser/fail/6e792760337980f7.js:1:1 | diff --git a/ecmascript/parser/tests/test262-error-references/fail/abc46381e4e6bcca.js.stderr b/ecmascript/parser/tests/test262-error-references/fail/abc46381e4e6bcca.js.stderr index 8f558e2c2e5..50b2726d1b0 100644 --- a/ecmascript/parser/tests/test262-error-references/fail/abc46381e4e6bcca.js.stderr +++ b/ecmascript/parser/tests/test262-error-references/fail/abc46381e4e6bcca.js.stderr @@ -4,3 +4,9 @@ error: Invalid character in identifier 1 | var \uD83B\uDE00 | ^^^^^^ +error: Invalid character in identifier + --> $DIR/tests/test262-parser/fail/abc46381e4e6bcca.js:1:11 + | +1 | var \uD83B\uDE00 + | ^^^^^^ + diff --git a/ecmascript/parser/tests/test262-error-references/fail/c3afed3cb0fb92ab.js.stderr b/ecmascript/parser/tests/test262-error-references/fail/c3afed3cb0fb92ab.js.stderr index fbcd5034ad9..2174add7bb9 100644 --- a/ecmascript/parser/tests/test262-error-references/fail/c3afed3cb0fb92ab.js.stderr +++ b/ecmascript/parser/tests/test262-error-references/fail/c3afed3cb0fb92ab.js.stderr @@ -1,3 +1,9 @@ +error: Expected 4 hex characters + --> $DIR/tests/test262-parser/fail/c3afed3cb0fb92ab.js:1:7 + | +1 | \uD800\u + | ^^ + error: Invalid character in identifier --> $DIR/tests/test262-parser/fail/c3afed3cb0fb92ab.js:1:1 | diff --git a/ecmascript/parser/tests/test262-error-references/fail/d4cf8ae9018f6a28.js.stderr b/ecmascript/parser/tests/test262-error-references/fail/d4cf8ae9018f6a28.js.stderr index 1bdb76d0f95..e162d2794d4 100644 --- a/ecmascript/parser/tests/test262-error-references/fail/d4cf8ae9018f6a28.js.stderr +++ b/ecmascript/parser/tests/test262-error-references/fail/d4cf8ae9018f6a28.js.stderr @@ -4,3 +4,9 @@ error: Invalid character in identifier 1 | \uD800\uDC00 | ^^^^^^ +error: Invalid character in identifier + --> $DIR/tests/test262-parser/fail/d4cf8ae9018f6a28.js:1:7 + | +1 | \uD800\uDC00 + | ^^^^^^ + diff --git a/ecmascript/parser/tests/typescript-errors/issue-1272/input.ts b/ecmascript/parser/tests/typescript-errors/issue-1272/input.ts new file mode 100644 index 00000000000..2f3bc154ffa --- /dev/null +++ b/ecmascript/parser/tests/typescript-errors/issue-1272/input.ts @@ -0,0 +1 @@ +\u{16} \ No newline at end of file diff --git a/ecmascript/parser/tests/typescript-errors/issue-1272/input.ts.stderr b/ecmascript/parser/tests/typescript-errors/issue-1272/input.ts.stderr new file mode 100644 index 00000000000..3b1ac3e8749 --- /dev/null +++ b/ecmascript/parser/tests/typescript-errors/issue-1272/input.ts.stderr @@ -0,0 +1,6 @@ +error: Invalid character in identifier + --> $DIR/tests/typescript-errors/issue-1272/input.ts:1:1 + | +1 | \u{16} + | ^^^^^^ +