From 1318afe2b4a7a25233db5760b785605019b9f06b Mon Sep 17 00:00:00 2001 From: Austaras Date: Tue, 7 Mar 2023 10:05:04 +0800 Subject: [PATCH] fix(es/minifier): Bailout regex optimization on invalid flags (#7020) --- .../src/compress/pure/misc.rs | 111 +++++++++--------- .../tests/fixture/issues/4234/output.js | 2 +- .../compress/issue_269/regexp/output.js | 2 +- 3 files changed, 59 insertions(+), 56 deletions(-) diff --git a/crates/swc_ecma_minifier/src/compress/pure/misc.rs b/crates/swc_ecma_minifier/src/compress/pure/misc.rs index fdc70fade6f..f2b3eb9da48 100644 --- a/crates/swc_ecma_minifier/src/compress/pure/misc.rs +++ b/crates/swc_ecma_minifier/src/compress/pure/misc.rs @@ -1,6 +1,7 @@ use std::{fmt::Write, iter::once, num::FpCategory}; -use swc_atoms::js_word; +use rustc_hash::FxHashSet; +use swc_atoms::{js_word, JsWord}; use swc_common::{iter::IdentifyLast, util::take::Take, Span, DUMMY_SP}; use swc_ecma_ast::*; use swc_ecma_transforms_optimization::debug_assert_valid; @@ -247,76 +248,78 @@ impl Pure<'_> { /// `new RegExp("([Sap]+)", "ig")` => `/([Sap]+)/gi` fn optimize_regex(&mut self, args: &mut Vec, span: &mut Span) -> Option { - if args.is_empty() || args.len() > 2 { - return None; - } - - // We aborts the method if arguments are not literals. - if args.iter().any(|v| { - v.spread.is_some() - || match &*v.expr { - Expr::Lit(Lit::Str(s)) => { - if s.value.contains(|c: char| { - // whitelist - !c.is_ascii_alphanumeric() - && !matches!(c, '%' | '[' | ']' | '(' | ')' | '{' | '}' | '-' | '+') - }) { - return true; - } - if s.value.contains("\\\0") || s.value.contains('/') { - return true; - } - - false - } - _ => true, + fn valid_pattern(pattern: &Expr) -> Option { + if let Expr::Lit(Lit::Str(s)) = pattern { + if s.value.contains(|c: char| { + // whitelist + !c.is_ascii_alphanumeric() + && !matches!(c, '$' | '[' | ']' | '(' | ')' | '{' | '}' | '-' | '+' | '_') + }) { + None + } else { + Some(s.value.clone()) } - }) { - return None; + } else { + None + } + } + fn valid_flag(flag: &Expr, es_version: EsVersion) -> Option { + if let Expr::Lit(Lit::Str(s)) = flag { + let mut set = FxHashSet::default(); + for c in s.value.chars() { + if !(matches!(c, 'g' | 'i' | 'm') + || (es_version >= EsVersion::Es2015 && matches!(c, 'u' | 'y')) + || (es_version >= EsVersion::Es2018 && matches!(c, 's'))) + || (es_version >= EsVersion::Es2022 && matches!(c, 'd')) + { + return None; + } + + if !set.insert(c) { + return None; + } + } + + Some(s.value.clone()) + } else { + None + } } - let pattern = args[0].expr.take(); - - let pattern = match *pattern { - Expr::Lit(Lit::Str(s)) => s.value, - _ => { - unreachable!() - } + let (pattern, flag) = match args.as_slice() { + [ExprOrSpread { spread: None, expr }] => (valid_pattern(expr)?, "".into()), + [ExprOrSpread { + spread: None, + expr: pattern, + }, ExprOrSpread { + spread: None, + expr: flag, + }] => ( + valid_pattern(pattern)?, + valid_flag(flag, self.options.ecma)?, + ), + _ => return None, }; if pattern.is_empty() { // For some expressions `RegExp()` and `RegExp("")` // Theoretically we can use `/(?:)/` to achieve shorter code // But some browsers released in 2015 don't support them yet. - args[0].expr = pattern.into(); return None; } - let flags = args - .get_mut(1) - .map(|v| v.expr.take()) - .map(|v| match *v { - Expr::Lit(Lit::Str(s)) => { - assert!(s.value.is_ascii()); - - let s = s.value.to_string(); - let mut bytes = s.into_bytes(); - bytes.sort_unstable(); - - String::from_utf8(bytes).unwrap().into() - } - _ => { - unreachable!() - } - }) - .unwrap_or_default(); - report_change!("Optimized regex"); Some(Expr::Lit(Lit::Regex(Regex { span: *span, exp: pattern.into(), - flags, + flags: { + let flag = flag.to_string(); + let mut bytes = flag.into_bytes(); + bytes.sort_unstable(); + + String::from_utf8(bytes).unwrap().into() + }, }))) } diff --git a/crates/swc_ecma_minifier/tests/fixture/issues/4234/output.js b/crates/swc_ecma_minifier/tests/fixture/issues/4234/output.js index 822e92418e9..28592d6451f 100644 --- a/crates/swc_ecma_minifier/tests/fixture/issues/4234/output.js +++ b/crates/swc_ecma_minifier/tests/fixture/issues/4234/output.js @@ -1,4 +1,4 @@ bar(RegExp("")); bar(RegExp("", "u")); bar(/a/); -bar(/a/u); +bar(RegExp("a", "u")); diff --git a/crates/swc_ecma_minifier/tests/terser/compress/issue_269/regexp/output.js b/crates/swc_ecma_minifier/tests/terser/compress/issue_269/regexp/output.js index 6ba076567dd..54785acae53 100644 --- a/crates/swc_ecma_minifier/tests/terser/compress/issue_269/regexp/output.js +++ b/crates/swc_ecma_minifier/tests/terser/compress/issue_269/regexp/output.js @@ -2,4 +2,4 @@ /bar/gi; RegExp(foo); RegExp("bar", ig); -/should/afil; +RegExp("should", "fail");