From 8f280c535ca3f3b1bed0ae617fab4ef4ba527544 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Sun, 1 Mar 2020 20:05:03 +0900 Subject: [PATCH] Performance (#696) - Parser performance is improved by ~15% - Codegen performance is improved by ~90% (when comment and source map is enabled) --- .gitignore | 4 +- Cargo.toml | 6 + common/Cargo.toml | 2 +- common/src/input.rs | 94 +++---- common/src/source_map.rs | 85 ++++-- ecmascript/codegen/Cargo.toml | 6 +- ecmascript/codegen/benches/bench.rs | 21 +- ecmascript/codegen/benches/large-partial.js | 21 ++ ecmascript/codegen/benches/with_parse.rs | 145 +++++++++++ ecmascript/codegen/macros/Cargo.toml | 2 +- ecmascript/codegen/scripts/bench.sh | 9 + ecmascript/codegen/src/lib.rs | 69 +++-- .../codegen/src/text_writer/basic_impl.rs | 72 +++--- ecmascript/parser/Cargo.toml | 2 +- ecmascript/parser/benches/parser.rs | 9 + ecmascript/parser/src/lexer/mod.rs | 243 ++++++++++-------- ecmascript/parser/src/lexer/number.rs | 13 +- ecmascript/parser/src/lexer/tests.rs | 42 ++- ecmascript/parser/src/lexer/util.rs | 14 +- ecmascript/parser/src/lib.rs | 6 +- ecmascript/parser/src/parser/class_and_fn.rs | 10 +- ecmascript/parser/src/parser/expr.rs | 23 +- ecmascript/parser/src/parser/ident.rs | 18 +- ecmascript/parser/src/parser/input.rs | 37 ++- ecmascript/parser/src/parser/jsx.rs | 5 +- ecmascript/parser/src/parser/macros.rs | 5 +- ecmascript/parser/src/parser/object.rs | 5 +- ecmascript/parser/src/parser/pat.rs | 9 +- ecmascript/parser/src/parser/stmt.rs | 10 +- .../parser/src/parser/stmt/module_item.rs | 5 +- ecmascript/parser/src/parser/typescript.rs | 4 +- ecmascript/parser/src/token.rs | 6 +- ecmascript/transforms/Cargo.toml | 8 +- ecmascript/utils/Cargo.toml | 4 +- 34 files changed, 698 insertions(+), 316 deletions(-) create mode 100644 ecmascript/codegen/benches/large-partial.js create mode 100644 ecmascript/codegen/benches/with_parse.rs create mode 100644 ecmascript/codegen/scripts/bench.sh diff --git a/.gitignore b/.gitignore index 0d2f73d561c..f02d71f3ed2 100644 --- a/.gitignore +++ b/.gitignore @@ -21,7 +21,9 @@ node_modules/ *.html *.svg package-lock.json +*.data +*.old # Coverage datas *.zip -*.info \ No newline at end of file +*.info diff --git a/Cargo.toml b/Cargo.toml index 152d1ff46fb..d155f4753a1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,12 @@ rayon = "1" [[example]] name = "usage" +[profile.release] +codegen-units = 1 +#lto = true +debug = true + [profile.bench] codegen-units = 1 #lto = true +debug = true \ No newline at end of file diff --git a/common/Cargo.toml b/common/Cargo.toml index 7d8c3e0ef12..aaf849ec999 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "swc_common" -version = "0.5.5" +version = "0.5.6" authors = ["강동윤 "] license = "Apache-2.0/MIT" repository = "https://github.com/swc-project/swc.git" diff --git a/common/src/input.rs b/common/src/input.rs index 99fefd24103..7c8ea978d91 100644 --- a/common/src/input.rs +++ b/common/src/input.rs @@ -25,26 +25,6 @@ impl<'a> From<&'a SourceFile> for SourceFileInput<'a> { } impl<'a> Input for SourceFileInput<'a> { - fn cur_pos(&mut self) -> BytePos { - self.iter - .clone() - .next() - .map(|(p, _)| self.start_pos + BytePos(p as u32)) - .unwrap_or(self.last_pos) - } - - fn last_pos(&self) -> BytePos { - self.last_pos - } - - fn bump(&mut self) { - if let Some((i, c)) = self.iter.next() { - self.last_pos = self.start_pos + BytePos((i + c.len_utf8()) as u32); - } else { - unreachable!("bump should not be called when cur() == None"); - } - } - fn cur(&mut self) -> Option { self.iter.clone().nth(0).map(|i| i.1) } @@ -57,6 +37,30 @@ impl<'a> Input for SourceFileInput<'a> { self.iter.clone().nth(2).map(|i| i.1) } + fn bump(&mut self) { + if let Some((i, c)) = self.iter.next() { + self.last_pos = self.start_pos + BytePos((i + c.len_utf8()) as u32); + } else { + unreachable!("bump should not be called when cur() == None"); + } + } + + fn is_at_start(&self) -> bool { + self.fm.start_pos == self.last_pos + } + + fn cur_pos(&mut self) -> BytePos { + self.iter + .clone() + .next() + .map(|(p, _)| self.start_pos + BytePos(p as u32)) + .unwrap_or(self.last_pos) + } + + fn last_pos(&self) -> BytePos { + self.last_pos + } + fn slice(&mut self, start: BytePos, end: BytePos) -> &str { assert!(start <= end, "Cannot slice {:?}..{:?}", start, end); let s = self.orig; @@ -73,6 +77,29 @@ impl<'a> Input for SourceFileInput<'a> { ret } + fn uncons_while(&mut self, mut pred: F) -> &str + where + F: FnMut(char) -> bool, + { + let s = self.iter.as_str(); + let mut last = 0; + + for (i, c) in s.char_indices() { + if pred(c) { + last = i + c.len_utf8(); + } else { + break; + } + } + let ret = &s[..last]; + + self.last_pos = self.last_pos + BytePos(last as _); + self.start_pos = self.last_pos; + self.iter = s[last..].char_indices(); + + ret + } + fn find(&mut self, mut pred: F) -> Option where F: FnMut(char) -> bool, @@ -97,29 +124,6 @@ impl<'a> Input for SourceFileInput<'a> { Some(self.last_pos) } - fn uncons_while(&mut self, mut pred: F) -> &str - where - F: FnMut(char) -> bool, - { - let s = self.iter.as_str(); - let mut last = 0; - - for (i, c) in s.char_indices() { - if pred(c) { - last = i + c.len_utf8(); - } else { - break; - } - } - let ret = &s[..last]; - - self.last_pos = self.last_pos + BytePos(last as _); - self.start_pos = self.last_pos; - self.iter = s[last..].char_indices(); - - ret - } - fn reset_to(&mut self, to: BytePos) { let orig = self.orig; let idx = (to - self.fm.start_pos).0 as usize; @@ -129,10 +133,6 @@ impl<'a> Input for SourceFileInput<'a> { self.start_pos = to; self.last_pos = to; } - - fn is_at_start(&self) -> bool { - self.fm.start_pos == self.last_pos - } } pub trait Input: Clone { diff --git a/common/src/source_map.rs b/common/src/source_map.rs index df6b86c44d4..a8a89bef8b1 100644 --- a/common/src/source_map.rs +++ b/common/src/source_map.rs @@ -247,9 +247,22 @@ impl SourceMap { /// Lookup source information about a BytePos pub fn lookup_char_pos(&self, pos: BytePos) -> Loc { - let chpos = self.bytepos_to_file_charpos(pos); - match self.lookup_line(pos) { + let fm = self.lookup_source_file(pos); + self.lookup_char_pos_with(fm, pos) + } + + /// Lookup source information about a BytePos + /// + /// + /// This method exists only for optimization and it's not part of public + /// api. + #[doc(hidden)] + pub fn lookup_char_pos_with(&self, fm: Arc, pos: BytePos) -> Loc { + let line_info = self.lookup_line_with(fm, pos); + match line_info { Ok(SourceFileAndLine { sf: f, line: a }) => { + let chpos = self.bytepos_to_file_charpos_with(&f, pos); + let line = a + 1; // Line numbers start at 1 let linebpos = f.lines[a]; assert!( @@ -260,7 +273,7 @@ impl SourceMap { linebpos, ); - let linechpos = self.bytepos_to_file_charpos(linebpos); + let linechpos = self.bytepos_to_file_charpos_with(&f, linebpos); let col = max(chpos, linechpos) - min(chpos, linechpos); @@ -298,6 +311,8 @@ impl SourceMap { } } Err(f) => { + let chpos = self.bytepos_to_file_charpos(pos); + let col_display = { let end_width_idx = f .non_narrow_chars @@ -319,10 +334,23 @@ impl SourceMap { } } - // If the relevant source_file is empty, we don't return a line number. + /// If the relevant source_file is empty, we don't return a line number. pub fn lookup_line(&self, pos: BytePos) -> Result> { let f = self.lookup_source_file(pos); + self.lookup_line_with(f, pos) + } + + /// If the relevant source_file is empty, we don't return a line number. + /// + /// This method exists only for optimization and it's not part of public + /// api. + #[doc(hidden)] + pub fn lookup_line_with( + &self, + f: Arc, + pos: BytePos, + ) -> Result> { match f.lookup_line(pos) { Some(line) => Ok(SourceFileAndLine { sf: f, line }), None => Err(f), @@ -786,9 +814,14 @@ impl SourceMap { } /// Converts an absolute BytePos to a CharPos relative to the source_file. - pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos { + fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos { let map = self.lookup_source_file(bpos); + self.bytepos_to_file_charpos_with(&map, bpos) + } + + /// Converts an absolute BytePos to a CharPos relative to the source_file. + fn bytepos_to_file_charpos_with(&self, map: &SourceFile, bpos: BytePos) -> CharPos { // The number of extra bytes due to multibyte chars in the SourceFile let mut total_extra_bytes = 0; @@ -816,10 +849,15 @@ impl SourceMap { CharPos(bpos.to_usize() - map.start_pos.to_usize() - total_extra_bytes as usize) } - // Return the index of the source_file (in self.files) which contains pos. - fn lookup_source_file(&self, pos: BytePos) -> Arc { - let files = self.files.borrow(); - let files = &files.source_files; + /// Return the index of the source_file (in self.files) which contains pos. + /// + /// This method exists only for optimization and it's not part of public + /// api. + #[doc(hidden)] + pub fn lookup_source_file_in( + files: &[Arc], + pos: BytePos, + ) -> Option> { let count = files.len(); // Binary search for the source_file. @@ -834,13 +872,30 @@ impl SourceMap { } } - assert!( - a < count, - "position {} does not resolve to a source location", - pos.to_usize() - ); + if a >= count { + return None; + } - files[a].clone() + Some(files[a].clone()) + } + + /// Return the index of the source_file (in self.files) which contains pos. + /// + /// This is not a public api. + #[doc(hidden)] + pub fn lookup_source_file(&self, pos: BytePos) -> Arc { + let files = self.files.borrow(); + let files = &files.source_files; + let fm = Self::lookup_source_file_in(&files, pos); + match fm { + Some(fm) => fm, + None => { + panic!( + "position {} does not resolve to a source location", + pos.to_usize() + ); + } + } } pub fn count_lines(&self) -> usize { diff --git a/ecmascript/codegen/Cargo.toml b/ecmascript/codegen/Cargo.toml index b635d52e316..879a36cf916 100644 --- a/ecmascript/codegen/Cargo.toml +++ b/ecmascript/codegen/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "swc_ecma_codegen" -version = "0.16.0" +version = "0.17.0" authors = ["강동윤 "] license = "Apache-2.0/MIT" repository = "https://github.com/swc-project/swc.git" @@ -14,10 +14,10 @@ hashbrown = "0.6" swc_atoms = { version = "0.2", path ="../../atoms" } swc_common = { version = "0.5", path ="../../common" } swc_ecma_ast = { version = "0.17.0", path ="../ast" } -swc_ecma_codegen_macros = { version = "0.4", path ="./macros" } +swc_ecma_codegen_macros = { version = "0.5", path ="./macros" } sourcemap = "5" num-bigint = { version = "0.2", features = ["serde"] } [dev-dependencies] testing = { version = "0.5", path ="../../testing" } -swc_ecma_parser = { version = "0.19", path ="../parser" } \ No newline at end of file +swc_ecma_parser = { version = "0.20", path ="../parser" } \ No newline at end of file diff --git a/ecmascript/codegen/benches/bench.rs b/ecmascript/codegen/benches/bench.rs index a10c0b7f104..cf3047550b3 100644 --- a/ecmascript/codegen/benches/bench.rs +++ b/ecmascript/codegen/benches/bench.rs @@ -9,7 +9,7 @@ use swc_ecma_codegen::{self, Emitter}; use swc_ecma_parser::{Parser, Session, SourceFileInput, Syntax}; use test::Bencher; -const SOURCE: &str = r#" +const COLORS_JS: &str = r#" 'use strict'; /** * Extract red color out of a color integer: @@ -81,13 +81,14 @@ module.exports = { }; "#; -#[bench] -fn emit_colors(b: &mut Bencher) { - b.bytes = SOURCE.len() as _; +const LARGE_PARTIAL_JS: &str = include_str!("large-partial.js"); + +fn bench_emitter(b: &mut Bencher, s: &str) { + b.bytes = s.len() as _; let _ = ::testing::run_test(true, |cm, handler| { let session = Session { handler: &handler }; - let fm = cm.new_source_file(FileName::Anon, SOURCE.into()); + let fm = cm.new_source_file(FileName::Anon, s.into()); let mut parser = Parser::new( session, Syntax::default(), @@ -128,6 +129,16 @@ fn emit_colors(b: &mut Bencher) { }); } +#[bench] +fn emit_colors(b: &mut Bencher) { + bench_emitter(b, COLORS_JS) +} + +#[bench] +fn emit_large(b: &mut Bencher) { + bench_emitter(b, LARGE_PARTIAL_JS) +} + struct MyHandlers; impl swc_ecma_codegen::Handlers for MyHandlers {} diff --git a/ecmascript/codegen/benches/large-partial.js b/ecmascript/codegen/benches/large-partial.js new file mode 100644 index 00000000000..a05e2beb98b --- /dev/null +++ b/ecmascript/codegen/benches/large-partial.js @@ -0,0 +1,21 @@ + /** + * @license Angular v6.1.10 + * (c) 2010-2018 Google, Inc. https://angular.io/ + * License: MIT + */ + var r=function(){return function(){}}(),i=function(){return function(){}}(),o="*";function a(e,t){return{type:7,name:e,definitions:t,options:{}}}function s(e,t){return void 0===t&&(t=null),{type:4,styles:t,timings:e}}function u(e,t){return void 0===t&&(t=null),{type:2,steps:e,options:t}}function l(e){return{type:6,styles:e,offset:null}}function c(e,t,n){return{type:0,name:e,styles:t,options:n}}function d(e,t,n){return void 0===n&&(n=null),{type:1,expr:e,animation:t,options:n}} + /** + * @license + * Copyright Google Inc. All Rights Reserved. + * + * Use of this source code is governed by an MIT-style license that can be + * found in the LICENSE file at https://angular.io/license + */ + function p(e){Promise.resolve(null).then(e)} + /** + * @license + * Copyright Google Inc. All Rights Reserved. + * + * Use of this source code is governed by an MIT-style license that can be + * found in the LICENSE file at https://angular.io/license + */var f=function(){function e(e,t){void 0===e&&(e=0),void 0===t&&(t=0),this._onDoneFns=[],this._onStartFns=[],this._onDestroyFns=[],this._started=!1,this._destroyed=!1,this._finished=!1,this.parentPlayer=null,this.totalTime=e+t}return e.prototype._onFinish=function(){this._finished||(this._finished=!0,this._onDoneFns.forEach(function(e){return e()}),this._onDoneFns=[])},e.prototype.onStart=function(e){this._onStartFns.push(e)},e.prototype.onDone=function(e){this._onDoneFns.push(e)},e.prototype.onDestroy=function(e){this._onDestroyFns.push(e)},e.prototype.hasStarted=function(){return this._started},e.prototype.init=function(){},e.prototype.play=function(){this.hasStarted()||(this._onStart(),this.triggerMicrotask()),this._started=!0},e.prototype.triggerMicrotask=function(){var e=this;p(function(){return e._onFinish()})},e.prototype._onStart=function(){this._onStartFns.forEach(function(e){return e()}),this._onStartFns=[]},e.prototype.pause=function(){},e.prototype.restart=function(){},e.prototype.finish=function(){this._onFinish()},e.prototype.destroy=function(){this._destroyed||(this._destroyed=!0,this.hasStarted()||this._onStart(),this.finish(),this._onDestroyFns.forEach(function(e){return e()}),this._onDestroyFns=[])},e.prototype.reset=function(){},e.prototype.setPosition=function(e){},e.prototype.getPosition=function(){return 0},e.prototype.triggerCallback=function(e){var t="start"==e?this._onStartFns:this._onDoneFns;t.forEach(function(e){return e()}),t.length=0},e}(),h=function(){function e(e){var t=this;this._onDoneFns=[],this._onStartFns=[],this._finished=!1,this._started=!1,this._destroyed=!1,this._onDestroyFns=[],this.parentPlayer=null,this.totalTime=0,this.players=e;var n=0,r=0,i=0,o=this.players.length;0==o?p(function(){return t._onFinish()}):this.players.forEach(function(e){e.onDone(function(){++n==o&&t._onFinish()}),e.onDestroy(function(){++r==o&&t._onDestroy()}),e.onStart(function(){++i==o&&t._onStart()})}),this.totalTime=this.players.reduce(function(e,t){return Math.max(e,t.totalTime)},0)}return e.prototype._onFinish=function(){this._finished||(this._finished=!0,this._onDoneFns.forEach(function(e){return e()}),this._onDoneFns=[])},e.prototype.init=function(){this.players.forEach(function(e){return e.init()})},e.prototype.onStart=function(e){this._onStartFns.push(e)},e.prototype._onStart=function(){this.hasStarted()||(this._started=!0,this._onStartFns.forEach(function(e){return e()}),this._onStartFns=[])},e.prototype.onDone=function(e){this._onDoneFns.push(e)},e.prototype.onDestroy=function(e){this._onDestroyFns.push(e)},e.prototype.hasStarted=function(){return this._started},e.prototype.play=function(){this.parentPlayer||this.init(),this._onStart(),this.players.forEach(function(e){return e.play()})},e.prototype.pause=function(){this.players.forEach(function(e){return e.pause()})},e.prototype.restart=function(){this.players.forEach(function(e){return e.restart()})},e.prototype.finish=function(){this._onFinish(),this.players.forEach(function(e){return e.finish()})},e.prototype.destroy=function(){this._onDestroy()},e.prototype._onDestroy=function(){this._destroyed||(this._destroyed=!0,this._onFinish(),this.players.forEach(function(e){return e.destroy()}),this._onDestroyFns.forEach(function(e){return e()}),this._onDestroyFns=[])},e.prototype.reset=function(){this.players.forEach(function(e){return e.reset()}),this._destroyed=!1,this._finished=!1,this._started=!1},e.prototype.setPosition=function(e){var t=e*this.totalTime;this.players.forEach(function(e){var n=e.totalTime?Math.min(1,t/e.totalTime):1;e.setPosition(n)})},e.prototype.getPosition=function(){var e=0;return this.players.forEach(function(t){var n=t.getPosition();e=Math.min(n,e)}),e},e.prototype.beforeDestroy=function(){this.players.forEach(function(e){e.beforeDestroy&&e.beforeDestroy()})},e.prototype.triggerCallback=function(e){var t="start"==e?this._onStartFns:this._onDoneFns;t.forEach(function(e){return e()}),t.length=0},e}(),m="!"; \ No newline at end of file diff --git a/ecmascript/codegen/benches/with_parse.rs b/ecmascript/codegen/benches/with_parse.rs new file mode 100644 index 00000000000..fb4627d5482 --- /dev/null +++ b/ecmascript/codegen/benches/with_parse.rs @@ -0,0 +1,145 @@ +#![feature(box_syntax)] +#![feature(test)] + +extern crate test; + +use sourcemap::SourceMapBuilder; +use swc_common::FileName; +use swc_ecma_codegen::{self, Emitter}; +use swc_ecma_parser::{Parser, Session, SourceFileInput, Syntax}; +use test::Bencher; + +const COLORS_JS: &str = r#" +'use strict'; +/** + * Extract red color out of a color integer: + * + * 0x00DEAD -> 0x00 + * + * @param {Number} color + * @return {Number} + */ +function red( color ) +{ + let foo = 3.14; + return color >> 16; +} +/** + * Extract green out of a color integer: + * + * 0x00DEAD -> 0xDE + * + * @param {Number} color + * @return {Number} + */ +function green( color ) +{ + return ( color >> 8 ) & 0xFF; +} +/** + * Extract blue color out of a color integer: + * + * 0x00DEAD -> 0xAD + * + * @param {Number} color + * @return {Number} + */ +function blue( color ) +{ + return color & 0xFF; +} +/** + * Converts an integer containing a color such as 0x00DEAD to a hex + * string, such as '#00DEAD'; + * + * @param {Number} int + * @return {String} + */ +function intToHex( int ) +{ + const mask = '#000000'; + const hex = int.toString( 16 ); + return mask.substring( 0, 7 - hex.length ) + hex; +} +/** + * Converts a hex string containing a color such as '#00DEAD' to + * an integer, such as 0x00DEAD; + * + * @param {Number} num + * @return {String} + */ +function hexToInt( hex ) +{ + return parseInt( hex.substring( 1 ), 16 ); +} +module.exports = { + red, + green, + blue, + intToHex, + hexToInt, +}; +"#; + +const LARGE_PARTIAL_JS: &str = include_str!("large-partial.js"); + +fn bench_emitter(b: &mut Bencher, s: &str) { + b.bytes = s.len() as _; + + let _ = ::testing::run_test(true, |cm, handler| { + let session = Session { handler: &handler }; + + b.iter(|| { + let fm = cm.new_source_file(FileName::Anon, s.into()); + let mut parser = Parser::new( + session, + Syntax::default(), + SourceFileInput::from(&*fm), + None, + ); + let module = parser + .parse_module() + .map_err(|mut e| { + e.emit(); + }) + .unwrap(); + + let buf = vec![]; + let mut src_map_builder = SourceMapBuilder::new(None); + { + let handlers = box MyHandlers; + let mut emitter = Emitter { + cfg: swc_ecma_codegen::Config { + ..Default::default() + }, + comments: None, + cm: cm.clone(), + wr: box swc_ecma_codegen::text_writer::JsWriter::new( + cm.clone(), + "\n", + buf, + Some(&mut src_map_builder), + ), + handlers, + }; + + emitter.emit_module(&module) + } + }); + Ok(()) + }); +} + +#[bench] +fn colors(b: &mut Bencher) { + bench_emitter(b, COLORS_JS) +} + +#[bench] +fn large_partial(b: &mut Bencher) { + bench_emitter(b, LARGE_PARTIAL_JS) +} + +struct MyHandlers; + +impl swc_ecma_codegen::Handlers for MyHandlers {} diff --git a/ecmascript/codegen/macros/Cargo.toml b/ecmascript/codegen/macros/Cargo.toml index 7d6ba6508f2..07cae8600b6 100644 --- a/ecmascript/codegen/macros/Cargo.toml +++ b/ecmascript/codegen/macros/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "swc_ecma_codegen_macros" -version = "0.4.0" +version = "0.5.0" authors = ["강동윤 "] license = "Apache-2.0/MIT" repository = "https://github.com/swc-project/swc.git" diff --git a/ecmascript/codegen/scripts/bench.sh b/ecmascript/codegen/scripts/bench.sh new file mode 100644 index 00000000000..1acb8462d8a --- /dev/null +++ b/ecmascript/codegen/scripts/bench.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -eux + +rm -rf ../../target/release/bench-* + +cargo bench --no-run --bench with_parse +# TODO: +#sudo flamegraph -- "../../target/release/bench-* --bench" \ No newline at end of file diff --git a/ecmascript/codegen/src/lib.rs b/ecmascript/codegen/src/lib.rs index 259fce38d7c..c79f7ea1d93 100644 --- a/ecmascript/codegen/src/lib.rs +++ b/ecmascript/codegen/src/lib.rs @@ -7,7 +7,7 @@ use self::{ text_writer::WriteJs, util::{SourceMapperExt, SpanExt, StartsWithAlphaNum}, }; -use std::{fmt::Write, io, sync::Arc}; +use std::{borrow::Cow, fmt::Write, io, sync::Arc}; use swc_atoms::JsWord; use swc_common::{comments::Comments, BytePos, SourceMap, Span, Spanned, SyntaxContext, DUMMY_SP}; use swc_ecma_ast::*; @@ -39,11 +39,13 @@ pub trait Node: Spanned { fn emit_with(&self, e: &mut Emitter<'_>) -> Result; } impl Node for Box { + #[inline(always)] fn emit_with(&self, e: &mut Emitter<'_>) -> Result { (**self).emit_with(e) } } impl<'a, N: Node> Node for &'a N { + #[inline(always)] fn emit_with(&self, e: &mut Emitter<'_>) -> Result { (**self).emit_with(e) } @@ -2136,23 +2138,50 @@ fn unescape(s: &str) -> String { result } -fn escape(s: &str) -> String { - s.replace("\\", "\\\\") - .replace('\u{0008}', "\\b") - .replace('\u{000C}', "\\f") - .replace("\n", "\\n") - .replace("\r", "\\r") - .replace("\t", "\\t") - .replace('\u{000B}', "\\v") - .replace("\00", "\\x000") - .replace("\01", "\\x001") - .replace("\02", "\\x002") - .replace("\03", "\\x003") - .replace("\04", "\\x004") - .replace("\05", "\\x005") - .replace("\06", "\\x006") - .replace("\07", "\\x007") - .replace("\08", "\\x008") - .replace("\09", "\\x009") - .replace("\0", "\\0") +fn escape(s: &str) -> Cow { + // let patterns = &[ + // "\\", "\u{0008}", "\u{000C}", "\n", "\r", "\t", "\u{000B}", "\00", "\01", + // "\02", "\03", "\04", "\05", "\06", "\07", "\08", "\09", "\0", + // ]; + // let replace_with = &[ + // "\\\\", "\\b", "\\f", "\\n", "\\r", "\\t", "\\v", "\\x000", "\\x001", + // "\\x002", "\\x003", "\\x004", "\\x005", "\\x006", "\\x007", "\\x008", + // "\\x009", "\\0", ]; + // + // { + // let mut found = false; + // for pat in patterns { + // if s.contains(pat) { + // found = true; + // break; + // } + // } + // if !found { + // return Cow::Borrowed(s); + // } + // } + // + // let ac = AhoCorasick::new(patterns); + // + // Cow::Owned(ac.replace_all(s, replace_with)) + Cow::Owned( + s.replace("\\", "\\\\") + .replace('\u{0008}', "\\b") + .replace('\u{000C}', "\\f") + .replace("\n", "\\n") + .replace("\r", "\\r") + .replace("\t", "\\t") + .replace('\u{000B}', "\\v") + .replace("\00", "\\x000") + .replace("\01", "\\x001") + .replace("\02", "\\x002") + .replace("\03", "\\x003") + .replace("\04", "\\x004") + .replace("\05", "\\x005") + .replace("\06", "\\x006") + .replace("\07", "\\x007") + .replace("\08", "\\x008") + .replace("\09", "\\x009") + .replace("\0", "\\0"), + ) } diff --git a/ecmascript/codegen/src/text_writer/basic_impl.rs b/ecmascript/codegen/src/text_writer/basic_impl.rs index fd4fd7cf0a8..d6e1220cefc 100644 --- a/ecmascript/codegen/src/text_writer/basic_impl.rs +++ b/ecmascript/codegen/src/text_writer/basic_impl.rs @@ -5,7 +5,7 @@ use std::{ sync::Arc, u16, }; -use swc_common::{FileName, SourceMap, Span}; +use swc_common::{BytePos, FileName, SourceFile, SourceMap, Span}; /// /// ----- @@ -23,6 +23,8 @@ pub struct JsWriter<'a, W: Write> { srcmap: Option<&'a mut SourceMapBuilder>, wr: W, written_bytes: usize, + + files: Vec>, } impl<'a, W: Write> JsWriter<'a, W> { @@ -42,6 +44,7 @@ impl<'a, W: Write> JsWriter<'a, W> { srcmap, wr, written_bytes: 0, + files: Vec::with_capacity(2), } } @@ -66,33 +69,10 @@ impl<'a, W: Write> JsWriter<'a, W> { fn write(&mut self, span: Option, data: &str) -> io::Result { let mut cnt = 0; - macro_rules! srcmap { - ($byte_pos:expr) => {{ - if let Some(ref mut srcmap) = self.srcmap { - let loc = self.cm.lookup_char_pos($byte_pos); - - let src = match loc.file.name { - FileName::Real(ref p) => Some(p.display().to_string()), - _ => None, - }; - if loc.col.0 < u16::MAX as usize { - srcmap.add( - self.line_count as _, - self.line_pos as _, - (loc.line - 1) as _, - loc.col.0 as _, - src.as_ref().map(|s| &**s), - None, - ); - } - } - }}; - } - if !data.is_empty() { if let Some(span) = span { if !span.is_dummy() { - srcmap!(span.lo()) + self.srcmap(span.lo()) } } @@ -104,13 +84,43 @@ impl<'a, W: Write> JsWriter<'a, W> { if let Some(span) = span { if !span.is_dummy() { - srcmap!(span.hi()) + self.srcmap(span.hi()) } } } Ok(cnt) } + + fn srcmap(&mut self, byte_pos: BytePos) { + if let Some(ref mut srcmap) = self.srcmap { + let fm = match SourceMap::lookup_source_file_in(&self.files, byte_pos) { + Some(fm) => fm, + None => { + let fm = self.cm.lookup_source_file(byte_pos); + self.files.push(fm.clone()); + fm + } + }; + + let loc = self.cm.lookup_char_pos_with(fm, byte_pos); + + let src = match loc.file.name { + FileName::Real(ref p) => Some(p.display().to_string()), + _ => None, + }; + if loc.col.0 < u16::MAX as usize { + srcmap.add( + self.line_count as _, + self.line_pos as _, + (loc.line - 1) as _, + loc.col.0 as _, + src.as_ref().map(|s| &**s), + None, + ); + } + } + } } impl<'a, W: Write> WriteJs for JsWriter<'a, W> { @@ -178,6 +188,11 @@ impl<'a, W: Write> WriteJs for JsWriter<'a, W> { Ok(()) } + fn write_comment(&mut self, span: Span, s: &str) -> Result { + self.write(Some(span), s)?; + Ok(()) + } + fn write_str_lit(&mut self, span: Span, s: &str) -> Result { self.write(Some(span), s)?; Ok(()) @@ -188,11 +203,6 @@ impl<'a, W: Write> WriteJs for JsWriter<'a, W> { Ok(()) } - fn write_comment(&mut self, span: Span, s: &str) -> Result { - self.write(Some(span), s)?; - Ok(()) - } - fn write_symbol(&mut self, span: Span, s: &str) -> Result { self.write(Some(span), s)?; Ok(()) diff --git a/ecmascript/parser/Cargo.toml b/ecmascript/parser/Cargo.toml index 55d115a7095..9e6a2654157 100644 --- a/ecmascript/parser/Cargo.toml +++ b/ecmascript/parser/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "swc_ecma_parser" -version = "0.19.2" +version = "0.20.0" authors = ["강동윤 "] license = "Apache-2.0/MIT" repository = "https://github.com/swc-project/swc.git" diff --git a/ecmascript/parser/benches/parser.rs b/ecmascript/parser/benches/parser.rs index 5790f847dcd..9f7a24711d1 100644 --- a/ecmascript/parser/benches/parser.rs +++ b/ecmascript/parser/benches/parser.rs @@ -144,6 +144,15 @@ fn yui_ts(b: &mut Bencher) { ) } +#[bench] +fn large(b: &mut Bencher) { + bench_module( + b, + Syntax::Typescript(Default::default()), + include_str!("../../codegen/benches/large-partial.js"), + ) +} + fn bench_module(b: &mut Bencher, syntax: Syntax, src: &'static str) { b.bytes = src.len() as _; diff --git a/ecmascript/parser/src/lexer/mod.rs b/ecmascript/parser/src/lexer/mod.rs index 818a802e414..2bfdc4cd3b5 100644 --- a/ecmascript/parser/src/lexer/mod.rs +++ b/ecmascript/parser/src/lexer/mod.rs @@ -16,7 +16,7 @@ use crate::{ }; use either::Either::{Left, Right}; use smallvec::{smallvec, SmallVec}; -use std::{char, iter::FusedIterator}; +use std::{char, iter::FusedIterator, mem::take}; use swc_atoms::{js_word, JsWord}; use swc_common::{ comments::{Comment, Comments}, @@ -103,6 +103,8 @@ pub struct Lexer<'a, I: Input> { state: State, pub(crate) syntax: Syntax, pub(crate) target: JscTarget, + + buf: String, } impl FusedIterator for Lexer<'_, I> {} @@ -128,9 +130,25 @@ impl<'a, I: Input> Lexer<'a, I> { ctx: Default::default(), syntax, target, + buf: String::with_capacity(16), } } + /// Utility method to reuse buffer. + fn with_buf(&mut self, op: F) -> LexResult + where + F: for<'any> FnOnce(&mut Lexer<'any, I>, &mut String) -> LexResult, + { + let mut buf = take(&mut self.buf); + buf.clear(); + + let res = op(self, &mut buf); + + self.buf = buf; + + res + } + /// babel: `getTokenFromCode` fn read_token(&mut self) -> LexResult> { let c = match self.input.cur() { @@ -659,73 +677,75 @@ impl<'a, I: Input> Lexer<'a, I> { debug_assert!(self.cur().is_some()); let mut first = true; - let mut has_escape = false; - let mut word = { - // Optimize for idents without escpae - let s = self.input.uncons_while(|c| { - if c.is_ident_part() { - return true; - } - if c == '\\' { - has_escape = true; - } - false - }); - - if !has_escape { - return Ok((s.into(), false)); - } - if !s.is_empty() { - first = false; - } - String::from(s) - }; - - while let Some(c) = { - // Optimization + self.with_buf(|l, buf| { + let mut has_escape = false; { - let s = self.input.uncons_while(|c| c.is_ident_part()); + // Optimize for idents without escpae + let s = l.input.uncons_while(|c| { + if c.is_ident_part() { + return true; + } + if c == '\\' { + has_escape = true; + } + false + }); + + if !has_escape { + return Ok((s.into(), false)); + } if !s.is_empty() { first = false; } - word.push_str(s) - } + buf.push_str(s); + }; - self.cur() - } { - let start = self.cur_pos(); - - match c { - c if c.is_ident_part() => { - self.bump(); - word.push(c); - } - // unicode escape - '\\' => { - self.bump(); - if !self.is('u') { - self.error_span(pos_span(start), SyntaxError::ExpectedUnicodeEscape)? + while let Some(c) = { + // Optimization + { + let s = l.input.uncons_while(|c| c.is_ident_part()); + if !s.is_empty() { + first = false; } - let c = self.read_unicode_escape(start, &mut Raw(None))?; - let valid = if first { - c.is_ident_start() - } else { - c.is_ident_part() - }; + buf.push_str(s) + } - if !valid { - self.error(start, SyntaxError::InvalidIdentChar)? + l.cur() + } { + let start = l.cur_pos(); + + match c { + c if c.is_ident_part() => { + l.bump(); + buf.push(c); } - word.extend(c); - } + // unicode escape + '\\' => { + l.bump(); + if !l.is('u') { + l.error_span(pos_span(start), SyntaxError::ExpectedUnicodeEscape)? + } + let c = l.read_unicode_escape(start, &mut Raw(None))?; + let valid = if first { + c.is_ident_start() + } else { + c.is_ident_part() + }; - _ => { - break; + if !valid { + l.error(start, SyntaxError::InvalidIdentChar)? + } + buf.extend(c); + } + + _ => { + break; + } } + first = false; } - first = false; - } - Ok((word.into(), has_escape)) + Ok(((&**buf).into(), has_escape)) + }) } fn read_unicode_escape(&mut self, start: BytePos, raw: &mut Raw) -> LexResult { @@ -784,42 +804,43 @@ impl<'a, I: Input> Lexer<'a, I> { let quote = self.cur().unwrap(); self.bump(); // '"' - let mut out = String::new(); - let mut has_escape = false; + self.with_buf(|l, out| { + let mut has_escape = false; - while let Some(c) = { - // Optimization - { - let s = self - .input - .uncons_while(|c| c != quote && c != '\\' && !c.is_line_break()); - out.push_str(s); - } - self.cur() - } { - match c { - c if c == quote => { - self.bump(); - return Ok(Token::Str { - value: out.into(), - has_escape, - }); + while let Some(c) = { + // Optimization + { + let s = l + .input + .uncons_while(|c| c != quote && c != '\\' && !c.is_line_break()); + out.push_str(s); } - '\\' => { - if let Some(s) = self.read_escaped_char(&mut Raw(None))? { - out.extend(s); + l.cur() + } { + match c { + c if c == quote => { + l.bump(); + return Ok(Token::Str { + value: (&**out).into(), + has_escape, + }); + } + '\\' => { + if let Some(s) = l.read_escaped_char(&mut Raw(None))? { + out.extend(s); + } + has_escape = true + } + c if c.is_line_break() => l.error(start, SyntaxError::UnterminatedStrLit)?, + _ => { + out.push(c); + l.bump(); } - has_escape = true - } - c if c.is_line_break() => self.error(start, SyntaxError::UnterminatedStrLit)?, - _ => { - out.push(c); - self.bump(); } } - } - self.error(start, SyntaxError::UnterminatedStrLit)? + l.error(start, SyntaxError::UnterminatedStrLit)? + }) } /// Expects current char to be '/' @@ -829,31 +850,33 @@ impl<'a, I: Input> Lexer<'a, I> { self.bump(); let (mut escaped, mut in_class) = (false, false); - let mut content = String::new(); let content_start = self.cur_pos(); - - while let Some(c) = self.cur() { - // This is ported from babel. - // Seems like regexp literal cannot contain linebreak. - if c.is_line_break() { - self.error(start, SyntaxError::UnterminatedRegxp)?; - } - - if escaped { - escaped = false; - } else { - match c { - '[' => in_class = true, - ']' if in_class => in_class = false, - // Termniates content part of regex literal - '/' if !in_class => break, - _ => {} + let content = self.with_buf(|l, buf| { + while let Some(c) = l.cur() { + // This is ported from babel. + // Seems like regexp literal cannot contain linebreak. + if c.is_line_break() { + l.error(start, SyntaxError::UnterminatedRegxp)?; } - escaped = c == '\\'; + + if escaped { + escaped = false; + } else { + match c { + '[' => in_class = true, + ']' if in_class => in_class = false, + // Termniates content part of regex literal + '/' if !in_class => break, + _ => {} + } + escaped = c == '\\'; + } + l.bump(); + buf.push(c); } - self.bump(); - content.push(c); - } + + Ok((&**buf).into()) + })?; let content_span = Span::new(content_start, self.cur_pos(), Default::default()); // input is terminated without following `/` @@ -874,7 +897,7 @@ impl<'a, I: Input> Lexer<'a, I> { .map(|(value, _)| value) .unwrap_or(js_word!("")); - Ok(Regex(content.into(), flags)) + Ok(Regex(content, flags)) } fn read_shebang(&mut self) -> LexResult> { diff --git a/ecmascript/parser/src/lexer/number.rs b/ecmascript/parser/src/lexer/number.rs index 853e7ff2cc8..404777f59f2 100644 --- a/ecmascript/parser/src/lexer/number.rs +++ b/ecmascript/parser/src/lexer/number.rs @@ -99,14 +99,15 @@ impl<'a, I: Input> Lexer<'a, I> { let mut raw = Raw(Some(String::new())); // Read numbers after dot let dec_val = self.read_int(10, 0, &mut raw)?; - let mut s = String::new(); - write!(s, "{}.", val).unwrap(); + val = self.with_buf(|l, s| { + write!(s, "{}.", val).unwrap(); - if let Some(..) = dec_val { - s.push_str(&raw.0.as_ref().unwrap()); - } + if let Some(..) = dec_val { + s.push_str(&raw.0.as_ref().unwrap()); + } - val = s.parse().expect("failed to parse float using rust's impl"); + Ok(s.parse().expect("failed to parse float using rust's impl")) + })?; } // Handle 'e' and 'E' diff --git a/ecmascript/parser/src/lexer/tests.rs b/ecmascript/parser/src/lexer/tests.rs index e9c1896d12c..a53c9dcdbef 100644 --- a/ecmascript/parser/src/lexer/tests.rs +++ b/ecmascript/parser/src/lexer/tests.rs @@ -4,16 +4,20 @@ use super::{ state::{lex, lex_module, lex_tokens, with_lexer}, *, }; -use crate::error::{Error, SyntaxError}; +use crate::{ + error::{Error, SyntaxError}, + make_span, +}; use std::{ops::Range, str}; +use swc_common::SpanData; use test::{black_box, Bencher}; -fn sp(r: Range) -> Span { - Span::new( - BytePos(r.start as u32), - BytePos(r.end as u32), - Default::default(), - ) +fn sp(r: Range) -> SpanData { + SpanData { + lo: BytePos(r.start as u32), + hi: BytePos(r.end as u32), + ctxt: Default::default(), + } } trait LineBreak: Into { @@ -38,6 +42,11 @@ impl SpanRange for usize { ) } } +impl SpanRange for SpanData { + fn into_span(self) -> Span { + Span::new(self.lo, self.hi, self.ctxt) + } +} impl SpanRange for Range { fn into_span(self) -> Span { Span::new( @@ -56,7 +65,7 @@ trait WithSpan: Sized { TokenAndSpan { token: self.into_token(), had_line_break: false, - span: span.into_span(), + span: span.into_span().data(), } } fn into_token(self) -> Token; @@ -120,7 +129,7 @@ fn module_legacy_decimal() { assert_eq!( lex_module(Syntax::default(), "08"), vec![Token::Error(Error { - span: sp(0..2), + span: make_span(sp(0..2)), error: SyntaxError::LegacyDecimal, }) .span(0..2) @@ -133,7 +142,7 @@ fn module_legacy_comment_1() { assert_eq!( lex_module(Syntax::default(), ""), vec![Token::Error(Error { - span: sp(0..3), + span: make_span(sp(0..3)), error: SyntaxError::LegacyCommentInModule, }) .span(0..3) @@ -278,6 +287,17 @@ fn str_escape_2() { ); } +#[test] +fn str_escape_3() { + assert_eq!( + lex_tokens(Syntax::default(), r#"'\x00'"#), + vec![Token::Str { + value: "\x00".into(), + has_escape: true + }] + ); +} + #[test] fn str_escape_hex() { assert_eq!( diff --git a/ecmascript/parser/src/lexer/util.rs b/ecmascript/parser/src/lexer/util.rs index 9f6ddc3edd7..e0341e23fc1 100644 --- a/ecmascript/parser/src/lexer/util.rs +++ b/ecmascript/parser/src/lexer/util.rs @@ -11,7 +11,7 @@ use std::char; use swc_common::{ comments::{Comment, CommentKind}, errors::DiagnosticBuilder, - BytePos, Span, SyntaxContext, + BytePos, Span, SpanData, SyntaxContext, }; use unicode_xid::UnicodeXID; @@ -40,7 +40,7 @@ impl Raw { // pub const PARAGRAPH_SEPARATOR: char = '\u{2029}'; impl<'a, I: Input> Lexer<'a, I> { - pub(super) fn span(&self, start: BytePos) -> Span { + pub(super) fn span(&self, start: BytePos) -> SpanData { let end = self.last_pos(); if cfg!(debug_assertions) && start > end { unreachable!( @@ -49,7 +49,11 @@ impl<'a, I: Input> Lexer<'a, I> { start.0, end.0 ) } - Span::new(start, end, Default::default()) + SpanData { + lo: start, + hi: end, + ctxt: SyntaxContext::empty(), + } } pub(super) fn bump(&mut self) { @@ -90,7 +94,7 @@ impl<'a, I: Input> Lexer<'a, I> { #[cold] pub(super) fn error(&mut self, start: BytePos, kind: SyntaxError) -> LexResult { let span = self.span(start); - self.error_span(span, kind) + self.error_span(Span::new(span.lo, span.hi, span.ctxt), kind) } #[cold] @@ -106,7 +110,7 @@ impl<'a, I: Input> Lexer<'a, I> { #[cold] pub(super) fn emit_error(&mut self, start: BytePos, kind: SyntaxError) { let span = self.span(start); - self.emit_error_span(span, kind) + self.emit_error_span(Span::new(span.lo, span.hi, span.ctxt), kind) } #[cold] diff --git a/ecmascript/parser/src/lib.rs b/ecmascript/parser/src/lib.rs index 5d71aaf6866..0e536cf1289 100644 --- a/ecmascript/parser/src/lib.rs +++ b/ecmascript/parser/src/lib.rs @@ -102,7 +102,7 @@ pub use self::{ parser::*, }; use serde::{Deserialize, Serialize}; -use swc_common::{errors::Handler, Span}; +use swc_common::{errors::Handler, Span, SpanData}; #[macro_use] mod macros; @@ -453,3 +453,7 @@ where f(Session { handler: &handler }, (&*fm).into()) }) } + +fn make_span(data: SpanData) -> Span { + Span::new(data.lo, data.hi, data.ctxt) +} diff --git a/ecmascript/parser/src/parser/class_and_fn.rs b/ecmascript/parser/src/parser/class_and_fn.rs index 392c5e8465a..7521c7ffa85 100644 --- a/ecmascript/parser/src/parser/class_and_fn.rs +++ b/ecmascript/parser/src/parser/class_and_fn.rs @@ -1,5 +1,5 @@ use super::{ident::MaybeOptionalIdentParser, *}; -use crate::{error::SyntaxError, Tokens}; +use crate::{error::SyntaxError, make_span, Tokens}; use either::Either; use swc_atoms::js_word; use swc_common::Spanned; @@ -118,7 +118,7 @@ impl<'a, I: Tokens> Parser<'a, I> { // Handle TS1172 if eat!("extends") { - p.emit_err(p.input.prev_span(), SyntaxError::TS1172); + p.emit_err(make_span(p.input.prev_span()), SyntaxError::TS1172); p.parse_lhs_expr()?; if p.input.syntax().typescript() && is!('<') { @@ -135,7 +135,7 @@ impl<'a, I: Tokens> Parser<'a, I> { { // Handle TS1175 if p.input.syntax().typescript() && eat!("implements") { - p.emit_err(p.input.prev_span(), SyntaxError::TS1175); + p.emit_err(make_span(p.input.prev_span()), SyntaxError::TS1175); p.parse_ts_heritage_clause()?; } @@ -143,7 +143,7 @@ impl<'a, I: Tokens> Parser<'a, I> { // Handle TS1175 if p.input.syntax().typescript() && eat!("extends") { - p.emit_err(p.input.prev_span(), SyntaxError::TS1175); + p.emit_err(make_span(p.input.prev_span()), SyntaxError::TS1175); let sc = p.parse_lhs_expr()?; let type_params = if p.input.syntax().typescript() && is!('<') { @@ -299,7 +299,7 @@ impl<'a, I: Tokens> Parser<'a, I> { let decorators = self.parse_decorators(false)?; if eat!("declare") { - self.emit_err(self.input.prev_span(), SyntaxError::TS1031); + self.emit_err(make_span(self.input.prev_span()), SyntaxError::TS1031); } let accessibility = if self.input.syntax().typescript() { diff --git a/ecmascript/parser/src/parser/expr.rs b/ecmascript/parser/src/parser/expr.rs index bb62755418d..06e99289879 100644 --- a/ecmascript/parser/src/parser/expr.rs +++ b/ecmascript/parser/src/parser/expr.rs @@ -1,5 +1,5 @@ use super::{pat::PatType, util::ExprExt, *}; -use crate::{lexer::TokenContext, token::AssignOpToken}; +use crate::{lexer::TokenContext, make_span, token::AssignOpToken}; use either::Either; use swc_atoms::js_word; use swc_common::{ast_node, Spanned}; @@ -235,7 +235,7 @@ impl<'a, I: Tokens> Parser<'a, I> { if can_be_arrow && peeked_is!('(') { expect!("async"); - let async_span = self.input.prev_span(); + let async_span = make_span(self.input.prev_span()); return self.parse_paren_expr_or_arrow_fn(can_be_arrow, Some(async_span)); } } @@ -312,7 +312,10 @@ impl<'a, I: Tokens> Parser<'a, I> { | js_word!("private") | js_word!("protected") | js_word!("public") => { - self.emit_err(self.input.prev_span(), SyntaxError::InvalidIdentInStrict); + self.emit_err( + make_span(self.input.prev_span()), + SyntaxError::InvalidIdentInStrict, + ); } _ => {} } @@ -1129,7 +1132,7 @@ impl<'a, I: Tokens> Parser<'a, I> { && (is!(IdentRef) || (is!("...") && peeked_is!(IdentRef))) { let spread = if eat!("...") { - Some(self.input.prev_span()) + Some(make_span(self.input.prev_span())) } else { None }; @@ -1264,11 +1267,8 @@ impl<'a, I: Tokens> Parser<'a, I> { }) => { let new_type_ann = self.try_parse_ts_type_ann()?; if new_type_ann.is_some() { - *span = Span::new( - pat_start, - self.input.prev_span().hi(), - Default::default(), - ); + *span = + Span::new(pat_start, self.input.prev_span().hi, Default::default()); } *type_ann = new_type_ann; } @@ -1367,7 +1367,10 @@ impl<'a, I: Tokens> Parser<'a, I> { // function because any expressions that are part of FormalParameters are // evaluated before the resulting generator object is in a resumable state. if self.ctx().in_parameters { - syntax_error!(self.input.prev_span(), SyntaxError::YieldParamInGen) + syntax_error!( + make_span(self.input.prev_span()), + SyntaxError::YieldParamInGen + ) } if is!(';') || (!is!('*') && !cur!(false).map(Token::starts_expr).unwrap_or(true)) { diff --git a/ecmascript/parser/src/parser/ident.rs b/ecmascript/parser/src/parser/ident.rs index 63c7fb41e0e..28fff9b70e5 100644 --- a/ecmascript/parser/src/parser/ident.rs +++ b/ecmascript/parser/src/parser/ident.rs @@ -1,6 +1,6 @@ //! 12.1 Identifiers use super::*; -use crate::token::Keyword; +use crate::{make_span, token::Keyword}; use either::Either; use swc_atoms::js_word; use swc_ecma_parser_macros::parser; @@ -22,7 +22,7 @@ impl<'a, I: Tokens> Parser<'a, I> { let start = cur_pos!(); assert_and_bump!('#'); - let hash_end = self.input.prev_span().hi(); + let hash_end = self.input.prev_span().hi; if self.input.cur_pos() - hash_end != BytePos(0) { syntax_error!(span!(start), SyntaxError::SpaceBetweenHashAndIdent); } @@ -86,7 +86,10 @@ impl<'a, I: Tokens> Parser<'a, I> { // "package", "private", "protected", "public", "static", or "yield". match w { Word::Ident(js_word!("enum")) => { - p.emit_err(p.input.prev_span(), SyntaxError::InvalidIdentInStrict); + p.emit_err( + make_span(p.input.prev_span()), + SyntaxError::InvalidIdentInStrict, + ); } Word::Keyword(Keyword::Yield) | Word::Ident(js_word!("static")) @@ -99,7 +102,10 @@ impl<'a, I: Tokens> Parser<'a, I> { | Word::Ident(js_word!("public")) if strict => { - p.emit_err(p.input.prev_span(), SyntaxError::InvalidIdentInStrict); + p.emit_err( + make_span(p.input.prev_span()), + SyntaxError::InvalidIdentInStrict, + ); } _ => {} } @@ -113,7 +119,7 @@ impl<'a, I: Tokens> Parser<'a, I> { // It is a Syntax Error if the goal symbol of the syntactic grammar is Module // and the StringValue of IdentifierName is "await". Word::Keyword(Keyword::Await) if p.ctx().module => { - syntax_error!(p.input.prev_span(), SyntaxError::ExpectedIdent) + syntax_error!(make_span(p.input.prev_span()), SyntaxError::ExpectedIdent) } Word::Keyword(Keyword::This) if p.input.syntax().typescript() => { Ok(js_word!("this")) @@ -123,7 +129,7 @@ impl<'a, I: Tokens> Parser<'a, I> { Word::Keyword(Keyword::Yield) if incl_yield => Ok(js_word!("yield")), Word::Keyword(Keyword::Await) if incl_await => Ok(js_word!("await")), Word::Keyword(..) | Word::Null | Word::True | Word::False => { - syntax_error!(p.input.prev_span(), SyntaxError::ExpectedIdent) + syntax_error!(make_span(p.input.prev_span()), SyntaxError::ExpectedIdent) } } })?; diff --git a/ecmascript/parser/src/parser/input.rs b/ecmascript/parser/src/parser/input.rs index 5bc0c89485a..54b2848c39f 100644 --- a/ecmascript/parser/src/parser/input.rs +++ b/ecmascript/parser/src/parser/input.rs @@ -6,7 +6,7 @@ use crate::{ }; use lexer::TokenContexts; use std::{cell::RefCell, mem, rc::Rc}; -use swc_common::{BytePos, Span, DUMMY_SP}; +use swc_common::{BytePos, Span, SpanData, DUMMY_SP}; pub trait Tokens: Clone + Iterator { fn set_ctx(&mut self, ctx: Context); @@ -149,7 +149,7 @@ impl Tokens for Capturing { pub(super) struct Buffer { iter: I, /// Span of the previous token. - prev_span: Span, + prev_span: SpanData, cur: Option, /// Peeked token next: Option, @@ -166,7 +166,7 @@ impl Buffer { Buffer { iter: lexer, cur: None, - prev_span: DUMMY_SP, + prev_span: DUMMY_SP.data(), next: None, } } @@ -183,6 +183,7 @@ impl Buffer { }); } + #[inline(never)] fn bump_inner(&mut self) -> Option { let prev = self.cur.take(); self.prev_span = match prev { @@ -255,6 +256,7 @@ impl Buffer { } /// Get current token. Returns `None` only on eof. + #[inline(always)] pub fn cur(&mut self) -> Option<&Token> { if self.cur.is_none() { self.bump_inner(); @@ -262,6 +264,7 @@ impl Buffer { self.cur.as_ref().map(|item| &item.token) } + #[inline(always)] pub fn is(&mut self, expected: &Token) -> bool { match self.cur() { Some(t) => *expected == *t, @@ -269,6 +272,7 @@ impl Buffer { } } + #[inline(always)] pub fn peeked_is(&mut self, expected: &Token) -> bool { match self.peek() { Some(t) => *expected == *t, @@ -276,6 +280,7 @@ impl Buffer { } } + #[inline(always)] pub fn eat(&mut self, expected: &Token) -> bool { let v = self.is(expected); if v { @@ -284,64 +289,80 @@ impl Buffer { v } + #[inline(always)] pub fn eat_keyword(&mut self, kwd: Keyword) -> bool { self.eat(&Word(Word::Keyword(kwd))) } /// Returns start of current token. + #[inline(always)] pub fn cur_pos(&mut self) -> BytePos { let _ = self.cur(); self.cur .as_ref() - .map(|item| item.span.lo()) + .map(|item| item.span.lo) .unwrap_or_else(|| { // eof self.last_pos() }) } + #[inline(always)] pub fn cur_span(&self) -> Span { - self.cur + let data = self + .cur .as_ref() .map(|item| item.span) - .unwrap_or(self.prev_span) + .unwrap_or(self.prev_span); + + Span::new(data.lo, data.hi, data.ctxt) } /// Returns last byte position of previous token. + #[inline(always)] pub fn last_pos(&self) -> BytePos { - self.prev_span.hi() + self.prev_span.hi } /// Returns span of the previous token. - pub fn prev_span(&self) -> Span { + #[inline(always)] + pub fn prev_span(&self) -> SpanData { self.prev_span } + #[inline(always)] pub(crate) fn get_ctx(&self) -> Context { self.iter.ctx() } + #[inline(always)] pub(crate) fn set_ctx(&mut self, ctx: Context) { self.iter.set_ctx(ctx); } + #[inline(always)] pub fn syntax(&self) -> Syntax { self.iter.syntax() } + #[inline(always)] pub fn target(&self) -> JscTarget { self.iter.target() } + #[inline(always)] pub(crate) fn set_expr_allowed(&mut self, allow: bool) { self.iter.set_expr_allowed(allow) } + #[inline(always)] pub(crate) fn token_context(&self) -> &lexer::TokenContexts { self.iter.token_context() } + #[inline(always)] pub(crate) fn token_context_mut(&mut self) -> &mut lexer::TokenContexts { self.iter.token_context_mut() } + #[inline(always)] pub(crate) fn set_token_context(&mut self, c: lexer::TokenContexts) { self.iter.set_token_context(c) } diff --git a/ecmascript/parser/src/parser/jsx.rs b/ecmascript/parser/src/parser/jsx.rs index cee951a1ee6..34c9f78158d 100644 --- a/ecmascript/parser/src/parser/jsx.rs +++ b/ecmascript/parser/src/parser/jsx.rs @@ -1,4 +1,5 @@ use super::*; +use crate::make_span; use either::Either; use swc_common::{Span, Spanned, SyntaxContext}; @@ -16,7 +17,7 @@ impl<'a, I: Tokens> Parser<'a, I> { Token::JSXName { .. } => match bump!() { Token::JSXName { name } => { let span = self.input.prev_span(); - Ok(Ident::new(name, span)) + Ok(Ident::new(name, make_span(span))) } _ => unreachable!(), }, @@ -399,7 +400,7 @@ impl<'a, I: Tokens> Parser<'a, I> { let span = self.input.prev_span(); match token { Token::JSXText { raw } => Ok(JSXText { - span, + span: make_span(span), // TODO value: raw.clone(), raw, diff --git a/ecmascript/parser/src/parser/macros.rs b/ecmascript/parser/src/parser/macros.rs index ccd1a8c834c..9e4c01d457a 100644 --- a/ecmascript/parser/src/parser/macros.rs +++ b/ecmascript/parser/src/parser/macros.rs @@ -115,8 +115,7 @@ macro_rules! assert_and_bump { /// if token has data like string. macro_rules! eat { ($p:expr, ';') => {{ - use log::trace; - trace!("eat(';'): cur={:?}", cur!($p, true)); + log::trace!("eat(';'): cur={:?}", cur!($p, true)); $p.input.eat(&Token::Semi) || eof!($p) || is!($p, '}') @@ -267,7 +266,7 @@ macro_rules! cur_pos { macro_rules! last_pos { ($p:expr) => { - $p.input.prev_span().hi() + $p.input.prev_span().hi }; } diff --git a/ecmascript/parser/src/parser/object.rs b/ecmascript/parser/src/parser/object.rs index a33b2a4d90a..b0b825109c6 100644 --- a/ecmascript/parser/src/parser/object.rs +++ b/ecmascript/parser/src/parser/object.rs @@ -1,6 +1,7 @@ //! Parser for object literal. use super::*; +use crate::make_span; use swc_atoms::js_word; use swc_common::Spanned; @@ -201,7 +202,7 @@ impl<'a, I: Tokens> ParseObject<'a, Box> for Parser<'a, I> { }; if eat!('?') { - self.emit_err(self.input.prev_span(), SyntaxError::TS1162); + self.emit_err(make_span(self.input.prev_span()), SyntaxError::TS1162); } // `ident` from parse_prop_name is parsed as 'IdentifierName' @@ -230,7 +231,7 @@ impl<'a, I: Tokens> ParseObject<'a, Box> for Parser<'a, I> { match ident.sym { js_word!("get") | js_word!("set") | js_word!("async") => { if has_modifiers { - self.emit_err(modifiers_span, SyntaxError::TS1042); + self.emit_err(make_span(modifiers_span), SyntaxError::TS1042); } let key = self.parse_prop_name()?; diff --git a/ecmascript/parser/src/parser/pat.rs b/ecmascript/parser/src/parser/pat.rs index 54b31171002..157c289babf 100644 --- a/ecmascript/parser/src/parser/pat.rs +++ b/ecmascript/parser/src/parser/pat.rs @@ -1,6 +1,6 @@ //! 13.3.3 Destructuring Binding Patterns use super::{util::ExprExt, *}; -use crate::{parser::expr::PatOrExprOrSpread, token::AssignOpToken}; +use crate::{make_span, parser::expr::PatOrExprOrSpread, token::AssignOpToken}; use std::iter; use swc_atoms::js_word; use swc_common::Spanned; @@ -158,7 +158,7 @@ impl<'a, I: Tokens> Parser<'a, I> { // opt = true; } _ => syntax_error!( - self.input.prev_span(), + make_span(self.input.prev_span()), SyntaxError::TsBindingPatCannotBeOptional ), } @@ -192,8 +192,7 @@ impl<'a, I: Tokens> Parser<'a, I> { }) => { let new_type_ann = self.try_parse_ts_type_ann()?; if new_type_ann.is_some() { - *span = - Span::new(pat_start, self.input.prev_span().hi(), Default::default()); + *span = Span::new(pat_start, self.input.prev_span().hi, Default::default()); } *type_ann = new_type_ann; } @@ -368,7 +367,7 @@ impl<'a, I: Tokens> Parser<'a, I> { params.push(pat); if self.syntax().typescript() && eat!('?') { - self.emit_err(self.input.prev_span(), SyntaxError::TS1047); + self.emit_err(make_span(self.input.prev_span()), SyntaxError::TS1047); // } diff --git a/ecmascript/parser/src/parser/stmt.rs b/ecmascript/parser/src/parser/stmt.rs index 028de47cbfd..0d15fb8bffc 100644 --- a/ecmascript/parser/src/parser/stmt.rs +++ b/ecmascript/parser/src/parser/stmt.rs @@ -1,5 +1,5 @@ use super::{pat::PatType, *}; -use crate::error::SyntaxError; +use crate::{error::SyntaxError, make_span}; use swc_atoms::js_word; use swc_common::Spanned; #[cfg(test)] @@ -462,7 +462,7 @@ impl<'a, I: Tokens> Parser<'a, I> { } cases.push(SwitchCase { - span: Span::new(case_start, p.input.prev_span().hi(), Default::default()), + span: Span::new(case_start, p.input.prev_span().hi, Default::default()), test, cons, }); @@ -638,10 +638,10 @@ impl<'a, I: Tokens> Parser<'a, I> { // NewLine is ok if is_exact!(';') || eof!() { let prev_span = self.input.prev_span(); - let span = if prev_span == var_span { - Span::new(prev_span.hi(), prev_span.hi(), Default::default()) + let span = if prev_span == var_span.data() { + Span::new(prev_span.hi, prev_span.hi, Default::default()) } else { - prev_span + make_span(prev_span) }; self.emit_err(span, SyntaxError::TS1009); break; diff --git a/ecmascript/parser/src/parser/stmt/module_item.rs b/ecmascript/parser/src/parser/stmt/module_item.rs index 4e026653c55..12057cd4cbb 100644 --- a/ecmascript/parser/src/parser/stmt/module_item.rs +++ b/ecmascript/parser/src/parser/stmt/module_item.rs @@ -306,7 +306,10 @@ impl<'a, I: Tokens> Parser<'a, I> { } else if self.input.syntax().export_default_from() && (is!("from") || (is!(',') && peeked_is!('{'))) { - export_default = Some(Ident::new("default".into(), self.input.prev_span())) + export_default = Some(Ident::new( + "default".into(), + make_span(self.input.prev_span()), + )) } else { let expr = self.include_in_expr(true).parse_assignment_expr()?; expect!(';'); diff --git a/ecmascript/parser/src/parser/typescript.rs b/ecmascript/parser/src/parser/typescript.rs index 243437d2c0c..25d7e9dff76 100644 --- a/ecmascript/parser/src/parser/typescript.rs +++ b/ecmascript/parser/src/parser/typescript.rs @@ -1,5 +1,5 @@ use super::*; -use crate::lexer::TokenContexts; +use crate::{lexer::TokenContexts, make_span}; use either::Either; use smallvec::smallvec; use swc_atoms::js_word; @@ -290,7 +290,7 @@ impl<'a, I: Tokens> Parser<'a, I> { expect!("this"); Ok(TsThisType { - span: self.input.prev_span(), + span: make_span(self.input.prev_span()), }) } diff --git a/ecmascript/parser/src/token.rs b/ecmascript/parser/src/token.rs index 7b4ea064f42..40da89a06dc 100644 --- a/ecmascript/parser/src/token.rs +++ b/ecmascript/parser/src/token.rs @@ -12,7 +12,7 @@ use std::{ use swc_atoms::{js_word, JsWord}; #[cfg(feature = "fold")] use swc_common::Fold; -use swc_common::{Span, Spanned}; +use swc_common::SpanData; pub(crate) use swc_ecma_ast::AssignOp as AssignOpToken; use swc_ecma_ast::BinaryOp; @@ -214,12 +214,12 @@ impl BinOpToken { } } -#[derive(Debug, Clone, PartialEq, Spanned)] +#[derive(Debug, Clone, PartialEq)] pub struct TokenAndSpan { pub token: Token, /// Had a line break before this token? pub had_line_break: bool, - pub span: Span, + pub span: SpanData, } #[derive(Kind, Clone, PartialEq, Eq, Hash)] diff --git a/ecmascript/transforms/Cargo.toml b/ecmascript/transforms/Cargo.toml index 6205164e2d9..752b1481eeb 100644 --- a/ecmascript/transforms/Cargo.toml +++ b/ecmascript/transforms/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "swc_ecma_transforms" -version = "0.7.2" +version = "0.8.0" authors = ["강동윤 "] license = "Apache-2.0/MIT" repository = "https://github.com/swc-project/swc.git" @@ -12,8 +12,8 @@ edition = "2018" swc_atoms = { version = "0.2.0", path ="../../atoms" } swc_common = { version = "0.5.0", path ="../../common" } swc_ecma_ast = { version = "0.17.0", path ="../ast" } -swc_ecma_utils = { version = "0.3.0", path ="../utils" } -swc_ecma_parser = { version = "0.19", path ="../parser", features = ["verify"] } +swc_ecma_utils = { version = "0.4.0", path ="../utils" } +swc_ecma_parser = { version = "0.20", path ="../parser", features = ["verify"] } dashmap = "=3.5.1" either = "1.5" fxhash = "0.2" @@ -34,7 +34,7 @@ log = "0.4.8" [dev-dependencies] testing = { version = "0.5", path ="../../testing" } -swc_ecma_codegen = { version = "0.16.0", path ="../codegen" } +swc_ecma_codegen = { version = "0.17.0", path ="../codegen" } tempfile = "3" pretty_assertions = "0.6" sourcemap = "5" diff --git a/ecmascript/utils/Cargo.toml b/ecmascript/utils/Cargo.toml index fa619779453..cd3aefb5a5c 100644 --- a/ecmascript/utils/Cargo.toml +++ b/ecmascript/utils/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "swc_ecma_utils" -version = "0.3.0" +version = "0.4.0" authors = ["강동윤 "] license = "Apache-2.0/MIT" repository = "https://github.com/swc-project/swc.git" @@ -14,7 +14,7 @@ edition = "2018" swc_ecma_ast = { version = "0.17.0", path ="../ast" } swc_atoms = { version = "0.2.0", path ="../../atoms" } swc_common = { version = "0.5.0", path ="../../common" } -swc_ecma_parser = { version = "0.19", path ="../parser", features = ["verify"] } +swc_ecma_parser = { version = "0.20", path ="../parser", features = ["verify"] } anyhow = "1.0.26" once_cell = "1" scoped-tls = "1"