refactor(css/parser): Respect spec (#2487)

This commit is contained in:
Alexander Akait 2021-10-23 15:27:16 +03:00 committed by GitHub
parent c482162206
commit 430a06ce4d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 148 additions and 133 deletions

22
Cargo.lock generated
View File

@ -730,9 +730,9 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
[[package]]
name = "h2"
version = "0.3.6"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c06815895acec637cd6ed6e9662c935b866d20a106f8361892893a7d9234964"
checksum = "7fd819562fcebdac5afc5c113c3ec36f902840b70fd4fc458799c8ce4607ae55"
dependencies = [
"bytes",
"fnv",
@ -784,9 +784,9 @@ dependencies = [
[[package]]
name = "http-body"
version = "0.4.3"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "399c583b2979440c60be0821a6199eca73bc3c8dcd9d070d75ac726e2c6186e5"
checksum = "1ff4f84919677303da5f147645dbea6b1881f368d03ac84e1dc09031ebd7b2c6"
dependencies = [
"bytes",
"http",
@ -816,9 +816,9 @@ dependencies = [
[[package]]
name = "hyper"
version = "0.14.13"
version = "0.14.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15d1cfb9e4f68655fa04c01f59edb405b6074a0f7118ea881e5026e4a1cd8593"
checksum = "2b91bb1f221b6ea1f1e4371216b70f40748774c2fb5971b450c07773fb92d26b"
dependencies = [
"bytes",
"futures-channel",
@ -1317,9 +1317,9 @@ dependencies = [
[[package]]
name = "object"
version = "0.27.0"
version = "0.27.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c821014c18301591b89b843809ef953af9e3df0496c232d5c0611b0a52aac363"
checksum = "67ac1d3f9a1d3616fd9a60c8d74296f22406a238b6a72f5cc1e6f314df4ffbf9"
dependencies = [
"memchr",
]
@ -1531,9 +1531,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "pkg-config"
version = "0.3.20"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c9b1041b4387893b91ee6746cddfc28516aff326a3519fb2adf820932c5e6cb"
checksum = "10e2fcbb64ecbe64c8e040a386c3104d384583af58b956d870aaaf229df6e66d"
[[package]]
name = "pmutil"
@ -2552,7 +2552,7 @@ dependencies = [
[[package]]
name = "swc_css_parser"
version = "0.23.0"
version = "0.23.1"
dependencies = [
"bitflags",
"lexical",

View File

@ -6,7 +6,7 @@ edition = "2018"
license = "Apache-2.0/MIT"
name = "swc_css_parser"
repository = "https://github.com/swc-project/swc.git"
version = "0.23.0"
version = "0.23.1"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[features]

View File

@ -117,6 +117,8 @@ where
}};
}
// TODO: Consume the next input code point. https://www.w3.org/TR/css-syntax-3/#consume-token. We should use `self.input.bump()` and reconsume according spec
if let Some(c) = self.input.cur() {
if is_whitespace(c) {
self.skip_ws()?;
@ -130,12 +132,16 @@ where
}
if self.input.is_byte(b'#') {
let c = self.input.cur().unwrap();
let c = self.input.cur();
self.input.bump();
if is_name_continue(self.input.cur().unwrap()) || self.is_valid_escape()? {
let is_id = self.would_start_ident()?;
let first = self.input.cur();
let second = self.input.peek();
if is_name_continue(first.unwrap()) || self.is_valid_escape(first, second)? {
let third = self.input.peek_ahead();
let is_id = self.would_start_ident(first, second, third)?;
let name = self.read_name()?;
return Ok(Token::Hash {
@ -145,7 +151,7 @@ where
});
}
return Ok(Token::Delim { value: c });
return Ok(Token::Delim { value: c.unwrap() });
}
if self.input.is_byte(b'\'') {
@ -157,30 +163,30 @@ where
try_delim!(b')', ")");
if self.input.is_byte(b'+') {
let pos = self.input.cur_pos();
let c = self.input.cur().unwrap();
let start = self.input.cur_pos();
let c = self.input.cur();
self.input.bump();
if self.would_start_number()? {
self.input.reset_to(pos);
if self.would_start_number(None, None, None)? {
self.input.reset_to(start);
return self.read_numeric();
}
return Ok(Token::Delim { value: c });
return Ok(Token::Delim { value: c.unwrap() });
}
try_delim!(b',', ",");
if self.input.is_byte(b'-') {
let pos = self.input.cur_pos();
let c = self.input.cur().unwrap();
let start = self.input.cur_pos();
let c = self.input.cur();
self.input.bump();
if self.would_start_number()? {
self.input.reset_to(pos);
if self.would_start_number(None, None, None)? {
self.input.reset_to(start);
return self.read_numeric();
} else if self.input.cur() == Some('-') && self.input.peek() == Some('>') {
@ -188,30 +194,30 @@ where
self.input.bump();
return Ok(Token::CDC);
} else if self.would_start_ident()? {
self.input.reset_to(pos);
} else if self.would_start_ident(None, None, None)? {
self.input.reset_to(start);
return self
.read_name()
.map(|(value, raw)| Token::Ident { value, raw });
}
return Ok(Token::Delim { value: c });
return Ok(Token::Delim { value: c.unwrap() });
}
if self.input.is_byte(b'.') {
let pos = self.input.cur_pos();
let c = self.input.cur().unwrap();
let start = self.input.cur_pos();
let c = self.input.cur();
self.input.bump();
if self.would_start_number()? {
self.input.reset_to(pos);
if self.would_start_number(None, None, None)? {
self.input.reset_to(start);
return self.read_numeric();
}
return Ok(Token::Delim { value: c });
return Ok(Token::Delim { value: c.unwrap() });
}
try_delim!(b':', ":");
@ -219,7 +225,7 @@ where
try_delim!(b';', ";");
if self.input.is_byte(b'<') {
let c = self.input.cur().unwrap();
let c = self.input.cur();
self.input.bump();
@ -235,30 +241,34 @@ where
return Ok(tok!("<!--"));
}
return Ok(Token::Delim { value: c });
return Ok(Token::Delim { value: c.unwrap() });
}
if self.input.is_byte(b'@') {
let c = self.input.cur().unwrap();
let c = self.input.cur();
self.input.bump();
if self.would_start_ident()? {
let first = self.input.cur();
let second = self.input.peek();
let third = self.input.peek_ahead();
if self.would_start_ident(first, second, third)? {
return self.read_at_keyword();
}
return Ok(Token::Delim { value: c });
return Ok(Token::Delim { value: c.unwrap() });
}
try_delim!(b'[', "[");
if self.input.is_byte(b'\\') {
let c = self.input.cur().unwrap();
if self.is_valid_escape()? {
if self.is_valid_escape(None, None)? {
return self.read_ident_like();
}
let c = self.input.cur().unwrap();
self.input.bump();
return Ok(Token::Delim { value: c });
@ -292,8 +302,13 @@ where
return Ok(Token::Delim { value: c });
}
fn would_start_number(&mut self) -> LexResult<bool> {
let first = self.input.cur();
fn would_start_number(
&mut self,
maybe_first: Option<char>,
maybe_second: Option<char>,
maybe_third: Option<char>,
) -> LexResult<bool> {
let first = maybe_first.or(self.input.cur());
if first.is_none() {
return Ok(false);
@ -301,11 +316,11 @@ where
match first {
Some('+') | Some('-') => {
if let Some(second) = self.input.peek() {
if let Some(second) = maybe_second.or(self.input.peek()) {
return match second {
second if second.is_digit(10) => Ok(true),
'.' => {
if let Some(third) = self.input.peek_ahead() {
if let Some(third) = maybe_third.or(self.input.peek_ahead()) {
if third.is_digit(10) {
return Ok(true);
}
@ -410,7 +425,11 @@ where
fn read_numeric(&mut self) -> LexResult<Token> {
let number = self.read_number();
if self.would_start_ident()? {
let next_first = self.input.cur();
let next_second = self.input.peek();
let next_third = self.input.peek_ahead();
if self.would_start_ident(next_first, next_second, next_third)? {
let name = self.read_name()?;
return Ok(Token::Dimension {
@ -419,7 +438,7 @@ where
unit: name.0,
raw_unit: name.1,
});
} else if let Some(c) = self.input.cur() {
} else if let Some(c) = next_first {
if c == '%' {
self.input.bump();
@ -436,15 +455,21 @@ where
})
}
fn is_valid_escape(&mut self) -> LexResult<bool> {
if self.input.cur() != Some('\\') {
fn is_valid_escape(
&mut self,
maybe_first: Option<char>,
maybe_second: Option<char>,
) -> LexResult<bool> {
let first = maybe_first.or(self.input.cur());
if first != Some('\\') {
return Ok(false);
}
let c = self.input.peek();
let second = maybe_second.or(self.input.peek());
match c {
Some(c) => Ok(!is_newline(c)),
match second {
Some(second) => Ok(!is_newline(second)),
None => Ok(false),
}
}
@ -483,11 +508,8 @@ where
})
}
fn read_str(&mut self, mut ending_code_point: Option<char>) -> LexResult<Token> {
if ending_code_point.is_none() {
ending_code_point = self.input.cur();
}
fn read_str(&mut self, maybe_ending_code_point: Option<char>) -> LexResult<Token> {
let ending_code_point = maybe_ending_code_point.or(self.input.cur());
let mut value = String::new();
let mut raw = String::new();
@ -535,7 +557,11 @@ where
// Otherwise, (the stream starts with a valid escape) consume an escaped
// code point and append the returned code point to
// the <string-token>s value.
else if self.is_valid_escape()? {
else if self.is_valid_escape(None, None)? {
raw.push(c);
self.input.bump();
let escape = self.read_escape()?;
value.push(escape.0);
@ -579,7 +605,11 @@ where
break;
}
Some(c) => {
if self.is_valid_escape().unwrap() {
if self.is_valid_escape(None, None).unwrap() {
raw.push(c);
self.input.bump();
let escaped = self.read_escape()?;
value.push(escaped.0);
@ -678,8 +708,12 @@ where
});
}
Some('\\') => {
if self.is_valid_escape()? {
Some(c) if c == '\\' => {
if self.is_valid_escape(None, None)? {
raw.push(c);
self.input.bump();
let escaped = self.read_escape()?;
value.push(escaped.0);
@ -708,22 +742,14 @@ where
}
fn read_escape(&mut self) -> LexResult<(char, String)> {
// TODO: from spec - `\` should be consumed before run this https://www.w3.org/TR/css-syntax-3/#consume-escaped-code-point
assert!(
self.input.eat_byte(b'\\'),
"read_escape: Expected a backslash"
);
let mut raw = String::new();
raw.push('\\');
let c = self.input.cur();
let c = match c {
Some(v) => v,
None => return Err(ErrorKind::InvalidEscape),
};
let mut raw = String::new();
if c.is_digit(16) {
let mut hex = c.to_digit(16).unwrap();
@ -776,88 +802,77 @@ where
})
}
/// Ported from `wouldStartIdentifier` of `esbuild`.
fn would_start_ident(&mut self) -> LexResult<bool> {
match self.input.cur() {
Some(cur) => {
if is_name_start(cur) {
return Ok(true);
}
fn would_start_ident(
&mut self,
maybe_first: Option<char>,
maybe_second: Option<char>,
maybe_third: Option<char>,
) -> LexResult<bool> {
if let Some(first) = maybe_first.or(self.input.cur()) {
if first == '-' {
if let Some(second) = maybe_second.or(self.input.peek()) {
if is_name_start(second) || second == '-' {
return Ok(true);
}
if cur == '-' {
if let Some(c) = self.input.peek() {
if is_name_start(c) {
return Ok(true);
}
match c {
'-' => return Ok(true),
match second {
'\\' => match maybe_third.or(self.input.peek_ahead()) {
Some(c2) => return Ok(!is_newline(c2)),
None => return Ok(false),
},
'\\' => match self.input.peek_ahead() {
Some(c2) => return Ok(!is_newline(c2)),
None => return Ok(false),
},
_ => {}
}
_ => {}
}
}
} else if is_name_start(first) {
return Ok(true);
} else if first == '\\' {
let second = self.input.peek();
return Ok(self.is_valid_escape(Some(first), second)?);
} else {
return Ok(false);
}
None => {}
}
Ok(self.is_valid_escape()?)
Ok(false)
}
/// Ported from `consumeName` of esbuild.
///
/// https://github.com/evanw/esbuild/blob/a9456dfbf08ab50607952eefb85f2418968c124c/internal/css_lexer/css_lexer.go#L548
fn read_name(&mut self) -> LexResult<(JsWord, JsWord)> {
let start = self.input.cur_pos();
self.input.uncons_while(is_name_continue);
let end = self.input.last_pos();
if !self.is_valid_escape()? {
let first = self.input.slice(start, end);
return Ok((first.into(), first.into()));
}
let mut raw = String::new();
let mut buf = String::new();
let first = self.input.slice(start, end);
buf.push_str(first);
raw.push_str(first);
let escaped = self.read_escape()?;
buf.push(escaped.0);
raw.push_str(&escaped.1);
let mut value = String::new();
loop {
let c = self.input.cur();
let c = match c {
Some(v) => v,
None => break,
};
if is_name_continue(c) {
self.last_pos = None;
self.input.bump();
match c {
Some(c) => {
if is_name_continue(c) {
self.last_pos = None;
self.input.bump();
buf.push(c);
raw.push(c)
} else if self.is_valid_escape()? {
let escaped = self.read_escape()?;
value.push(c);
raw.push(c);
} else if self.is_valid_escape(None, None)? {
raw.push(c);
buf.push(escaped.0);
raw.push_str(&escaped.1);
} else {
break;
self.input.bump();
let escaped = self.read_escape()?;
value.push(escaped.0);
raw.push_str(&escaped.1);
} else {
break;
}
}
None => {
break;
}
}
}
Ok((buf.into(), raw.into()))
Ok((value.into(), raw.into()))
}
fn skip_ws(&mut self) -> LexResult<()> {