feat(parser): Don't hang on unexpected inputs (#1274)

swc_ecma_parser:
 - Don't hang on unexpected inputs. (#1272, denoland/deno#8719)
This commit is contained in:
강동윤 2020-12-14 16:37:11 +09:00 committed by GitHub
parent ccf4c2b12c
commit 25856f230c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 89 additions and 4 deletions

View File

@ -7,7 +7,7 @@ include = ["Cargo.toml", "src/**/*.rs", "examples/**/*.rs"]
license = "Apache-2.0/MIT"
name = "swc_ecma_parser"
repository = "https://github.com/swc-project/swc.git"
version = "0.43.3"
version = "0.43.4"
[features]
default = []

View File

@ -432,7 +432,10 @@ impl<'a, I: Input> Lexer<'a, I> {
}
// unexpected character
c => self.error_span(pos_span(start), SyntaxError::UnexpectedChar { c })?,
c => {
self.input.bump();
self.error_span(pos_span(start), SyntaxError::UnexpectedChar { c })?
}
};
Ok(Some(token))
@ -784,7 +787,7 @@ impl<'a, I: Input> Lexer<'a, I> {
};
if !valid {
l.error(start, SyntaxError::InvalidIdentChar)?
l.emit_error(start, SyntaxError::InvalidIdentChar);
}
buf.extend(c);
}

View File

@ -702,3 +702,15 @@ pub(crate) fn lex_tokens_with_target(
) -> Vec<Token> {
with_lexer(syntax, target, s, |l| Ok(l.map(|ts| ts.token).collect())).unwrap()
}
/// Returns `(tokens, recovered_errors)`. `(tokens)` may contain an error token
/// if the lexer fails to recover from it.
#[cfg(test)]
pub(crate) fn lex_errors(syntax: Syntax, s: &'static str) -> (Vec<Token>, Vec<Error>) {
with_lexer(syntax, JscTarget::Es2020, s, |l| {
let tokens = l.map(|ts| ts.token).collect();
let errors = l.take_errors();
Ok((tokens, errors))
})
.unwrap()
}

View File

@ -4,7 +4,10 @@ use super::{
state::{lex, lex_module_errors, lex_tokens, lex_tokens_with_target, with_lexer},
*,
};
use crate::error::{Error, SyntaxError};
use crate::{
error::{Error, SyntaxError},
lexer::state::lex_errors,
};
use std::{ops::Range, str};
use test::{black_box, Bencher};
@ -1276,3 +1279,33 @@ fn lex_semicolons(b: &mut Bencher) {
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;",
);
}
#[test]
fn issue_1272_1_ts() {
let (tokens, errors) = lex_errors(crate::Syntax::Typescript(Default::default()), "\\u{16}");
assert_eq!(tokens.len(), 1);
assert_ne!(errors, vec![]);
}
#[test]
fn issue_1272_1_js() {
let (tokens, errors) = lex_errors(crate::Syntax::Es(Default::default()), "\\u{16}");
assert_eq!(tokens.len(), 1);
assert_ne!(errors, vec![]);
}
#[test]
fn issue_1272_2_ts() {
// Not recoverable yet
let (tokens, errors) = lex_errors(crate::Syntax::Typescript(Default::default()), "\u{16}");
assert_eq!(tokens.len(), 1);
assert_eq!(errors, vec![]);
}
#[test]
fn issue_1272_2_js() {
// Not recoverable yet
let (tokens, errors) = lex_errors(crate::Syntax::Es(Default::default()), "\u{16}");
assert_eq!(tokens.len(), 1);
assert_eq!(errors, vec![]);
}

View File

@ -1,3 +1,9 @@
error: Expected unicode escape
--> $DIR/tests/test262-parser/fail/033c083bb1f44642.js:1:7
|
1 | \uD800\x62
| ^
error: Invalid character in identifier
--> $DIR/tests/test262-parser/fail/033c083bb1f44642.js:1:1
|

View File

@ -1,3 +1,9 @@
error: Expected unicode escape
--> $DIR/tests/test262-parser/fail/6e792760337980f7.js:1:7
|
1 | \uD800\
| ^
error: Invalid character in identifier
--> $DIR/tests/test262-parser/fail/6e792760337980f7.js:1:1
|

View File

@ -4,3 +4,9 @@ error: Invalid character in identifier
1 | var \uD83B\uDE00
| ^^^^^^
error: Invalid character in identifier
--> $DIR/tests/test262-parser/fail/abc46381e4e6bcca.js:1:11
|
1 | var \uD83B\uDE00
| ^^^^^^

View File

@ -1,3 +1,9 @@
error: Expected 4 hex characters
--> $DIR/tests/test262-parser/fail/c3afed3cb0fb92ab.js:1:7
|
1 | \uD800\u
| ^^
error: Invalid character in identifier
--> $DIR/tests/test262-parser/fail/c3afed3cb0fb92ab.js:1:1
|

View File

@ -4,3 +4,9 @@ error: Invalid character in identifier
1 | \uD800\uDC00
| ^^^^^^
error: Invalid character in identifier
--> $DIR/tests/test262-parser/fail/d4cf8ae9018f6a28.js:1:7
|
1 | \uD800\uDC00
| ^^^^^^

View File

@ -0,0 +1 @@
\u{16}

View File

@ -0,0 +1,6 @@
error: Invalid character in identifier
--> $DIR/tests/typescript-errors/issue-1272/input.ts:1:1
|
1 | \u{16}
| ^^^^^^