LibJS: Add template literals

Adds fully functioning template literals. Because template literals
contain expressions, most of the work has to be done in the Lexer rather
than the Parser. And because of the complexity of template literals
(expressions, nesting, escapes, etc), the Lexer needs to have some
template-related state.

When entering a new template literal, a TemplateLiteralStart token is
emitted. When inside a literal, all text will be parsed up until a '${'
or '`' (or EOF, but that's a syntax error) is seen, and then a
TemplateLiteralExprStart token is emitted. At this point, the Lexer
proceeds as normal, however it keeps track of the number of opening
and closing curly braces it has seen in order to determine the close
of the expression. Once it finds a matching curly brace for the '${',
a TemplateLiteralExprEnd token is emitted and the state is updated
accordingly.

When the Lexer is inside of a template literal, but not an expression,
and sees a '`', this must be the closing grave: a TemplateLiteralEnd
token is emitted.

The state required to correctly parse template strings consists of a
vector (for nesting) of two pieces of information: whether or not we
are in a template expression (as opposed to a template string); and
the count of the number of unmatched open curly braces we have seen
(only applicable if the Lexer is currently in a template expression).

TODO: Add support for template literal newlines in the JS REPL (this will
cause a syntax error currently):

    > `foo
    > bar`
    'foo
    bar'
This commit is contained in:
mattco98 2020-05-03 15:41:14 -07:00 committed by Andreas Kling
parent 2fdeb464f7
commit adb4accab3
Notes: sideshowbarker 2024-07-19 06:59:38 +09:00
11 changed files with 235 additions and 44 deletions

View File

@ -42,7 +42,9 @@ static TextStyle style_for_token_type(Gfx::Palette palette, JS::TokenType type)
case JS::TokenType::NumericLiteral:
return { palette.syntax_number() };
case JS::TokenType::StringLiteral:
case JS::TokenType::TemplateLiteral:
case JS::TokenType::TemplateLiteralStart:
case JS::TokenType::TemplateLiteralEnd:
case JS::TokenType::TemplateLiteralString:
case JS::TokenType::RegexLiteral:
case JS::TokenType::UnterminatedStringLiteral:
return { palette.syntax_string() };
@ -55,6 +57,8 @@ static TextStyle style_for_token_type(Gfx::Palette palette, JS::TokenType type)
case JS::TokenType::ParenClose:
case JS::TokenType::ParenOpen:
case JS::TokenType::Semicolon:
case JS::TokenType::TemplateLiteralExprStart:
case JS::TokenType::TemplateLiteralExprEnd:
return { palette.syntax_punctuation() };
case JS::TokenType::Ampersand:
case JS::TokenType::AmpersandEquals:

View File

@ -1229,6 +1229,28 @@ Value ArrayExpression::execute(Interpreter& interpreter) const
return array;
}
void TemplateLiteral::dump(int indent) const
{
ASTNode::dump(indent);
for (auto& expression : expressions())
expression.dump(indent + 1);
}
Value TemplateLiteral::execute(Interpreter& interpreter) const
{
StringBuilder string_builder;
for (auto& expression : expressions()) {
auto expr = expression.execute(interpreter);
if (interpreter.exception())
return {};
string_builder.append(expr.to_string());
}
return js_string(interpreter, string_builder.build());
}
void TryStatement::dump(int indent) const
{
ASTNode::dump(indent);
@ -1398,15 +1420,15 @@ Value ConditionalExpression::execute(Interpreter& interpreter) const
void ConditionalExpression::dump(int indent) const
{
ASTNode::dump(indent);
print_indent(indent);
print_indent(indent + 1);
printf("(Test)\n");
m_test->dump(indent + 1);
print_indent(indent);
m_test->dump(indent + 2);
print_indent(indent + 1);
printf("(Consequent)\n");
m_consequent->dump(indent + 1);
print_indent(indent);
m_consequent->dump(indent + 2);
print_indent(indent + 1);
printf("(Alternate)\n");
m_alternate->dump(indent + 1);
m_alternate->dump(indent + 2);
}
void SequenceExpression::dump(int indent) const

View File

@ -762,6 +762,24 @@ private:
Vector<RefPtr<Expression>> m_elements;
};
class TemplateLiteral final : public Expression {
public:
TemplateLiteral(NonnullRefPtrVector<Expression> expressions)
: m_expressions(expressions)
{
}
virtual Value execute(Interpreter&) const override;
virtual void dump(int indent) const override;
const NonnullRefPtrVector<Expression>& expressions() const { return m_expressions; }
private:
virtual const char* class_name() const override { return "TemplateLiteral"; }
const NonnullRefPtrVector<Expression> m_expressions;
};
class MemberExpression final : public Expression {
public:
MemberExpression(NonnullRefPtr<Expression> object, NonnullRefPtr<Expression> property, bool computed = false)

View File

@ -244,34 +244,74 @@ void Lexer::syntax_error(const char* msg)
Token Lexer::next()
{
size_t trivia_start = m_position;
auto in_template = !m_template_states.is_empty();
// consume whitespace and comments
while (true) {
if (isspace(m_current_char)) {
do {
if (!in_template || m_template_states.last().in_expr) {
// consume whitespace and comments
while (true) {
if (isspace(m_current_char)) {
do {
consume();
} while (isspace(m_current_char));
} else if (is_line_comment_start()) {
consume();
} while (isspace(m_current_char));
} else if (is_line_comment_start()) {
consume();
do {
do {
consume();
} while (!is_eof() && m_current_char != '\n');
} else if (is_block_comment_start()) {
consume();
} while (!is_eof() && m_current_char != '\n');
} else if (is_block_comment_start()) {
consume();
do {
consume();
} while (!is_eof() && !is_block_comment_end());
consume(); // consume *
consume(); // consume /
} else {
break;
do {
consume();
} while (!is_eof() && !is_block_comment_end());
consume(); // consume *
consume(); // consume /
} else {
break;
}
}
}
size_t value_start = m_position;
auto token_type = TokenType::Invalid;
if (is_identifier_start()) {
if (m_current_char == '`') {
consume();
if (!in_template) {
token_type = TokenType::TemplateLiteralStart;
m_template_states.append({ false, 0 });
} else {
if (m_template_states.last().in_expr) {
m_template_states.append({ false, 0 });
token_type = TokenType::TemplateLiteralStart;
} else {
m_template_states.take_last();
token_type = TokenType::TemplateLiteralEnd;
}
}
} else if (in_template && m_template_states.last().in_expr && m_template_states.last().open_bracket_count == 0 && m_current_char == '}') {
consume();
token_type = TokenType::TemplateLiteralExprEnd;
m_template_states.last().in_expr = false;
} else if (in_template && !m_template_states.last().in_expr) {
if (is_eof()) {
token_type = TokenType::UnterminatedTemplateLiteral;
m_template_states.take_last();
} else if (match('$', '{')) {
token_type = TokenType::TemplateLiteralExprStart;
consume();
consume();
m_template_states.last().in_expr = true;
} else {
while (!match('$', '{') && m_current_char != '`' && !is_eof()) {
if (match('\\', '$') || match('\\', '`'))
consume();
consume();
}
token_type = TokenType::TemplateLiteralString;
}
} else if (is_identifier_start()) {
// identifier or keyword
do {
consume();
@ -339,7 +379,7 @@ Token Lexer::next()
}
}
token_type = TokenType::NumericLiteral;
} else if (m_current_char == '"' || m_current_char == '\'' || m_current_char == '`') {
} else if (m_current_char == '"' || m_current_char == '\'') {
char stop_char = m_current_char;
consume();
while (m_current_char != stop_char && m_current_char != '\n' && !is_eof()) {
@ -353,10 +393,7 @@ Token Lexer::next()
token_type = TokenType::UnterminatedStringLiteral;
} else {
consume();
if (stop_char == '`')
token_type = TokenType::TemplateLiteral;
else
token_type = TokenType::StringLiteral;
token_type = TokenType::StringLiteral;
}
} else if (m_current_char == EOF) {
token_type = TokenType::Eof;
@ -416,6 +453,14 @@ Token Lexer::next()
}
}
if (!m_template_states.is_empty() && m_template_states.last().in_expr) {
if (token_type == TokenType::CurlyOpen) {
m_template_states.last().open_bracket_count++;
} else if (token_type == TokenType::CurlyClose) {
m_template_states.last().open_bracket_count--;
}
}
m_current_token = Token(
token_type,
m_source.substring_view(trivia_start - 1, value_start - trivia_start),
@ -425,4 +470,5 @@ Token Lexer::next()
return m_current_token;
}
}

View File

@ -71,6 +71,12 @@ private:
size_t m_line_column = 1;
bool m_log_errors = true;
struct TemplateState {
bool in_expr;
u8 open_bracket_count;
};
Vector<TemplateState> m_template_states;
static HashMap<String, TokenType> s_keywords;
static HashMap<String, TokenType> s_three_char_tokens;
static HashMap<String, TokenType> s_two_char_tokens;

View File

@ -394,6 +394,8 @@ NonnullRefPtr<Expression> Parser::parse_primary_expression()
return parse_function_node<FunctionExpression>();
case TokenType::BracketOpen:
return parse_array_expression();
case TokenType::TemplateLiteralStart:
return parse_template_literal();
case TokenType::New:
return parse_new_expression();
default:
@ -536,6 +538,40 @@ NonnullRefPtr<ArrayExpression> Parser::parse_array_expression()
return create_ast_node<ArrayExpression>(move(elements));
}
NonnullRefPtr<TemplateLiteral> Parser::parse_template_literal()
{
consume(TokenType::TemplateLiteralStart);
NonnullRefPtrVector<Expression> expressions;
while (!match(TokenType::TemplateLiteralEnd) && !match(TokenType::UnterminatedTemplateLiteral)) {
if (match(TokenType::TemplateLiteralString)) {
expressions.append(create_ast_node<StringLiteral>(consume().string_value()));
} else if (match(TokenType::TemplateLiteralExprStart)) {
consume(TokenType::TemplateLiteralExprStart);
if (match(TokenType::TemplateLiteralExprEnd)) {
syntax_error("Empty template literal expression block");
return create_ast_node<TemplateLiteral>(expressions);
}
expressions.append(parse_expression(0));
if (match(TokenType::UnterminatedTemplateLiteral)) {
syntax_error("Unterminated template literal");
return create_ast_node<TemplateLiteral>(expressions);
}
consume(TokenType::TemplateLiteralExprEnd);
}
}
if (match(TokenType::UnterminatedTemplateLiteral)) {
syntax_error("Unterminated template literal");
} else {
consume(TokenType::TemplateLiteralEnd);
}
return create_ast_node<TemplateLiteral>(expressions);
}
NonnullRefPtr<Expression> Parser::parse_expression(int min_precedence, Associativity associativity)
{
auto expression = parse_primary_expression();
@ -1087,7 +1123,7 @@ bool Parser::match_expression() const
return type == TokenType::BoolLiteral
|| type == TokenType::NumericLiteral
|| type == TokenType::StringLiteral
|| type == TokenType::TemplateLiteral
|| type == TokenType::TemplateLiteralStart
|| type == TokenType::NullLiteral
|| type == TokenType::Identifier
|| type == TokenType::New

View File

@ -69,6 +69,7 @@ public:
NonnullRefPtr<Expression> parse_unary_prefixed_expression();
NonnullRefPtr<ObjectExpression> parse_object_expression();
NonnullRefPtr<ArrayExpression> parse_array_expression();
NonnullRefPtr<TemplateLiteral> parse_template_literal();
NonnullRefPtr<Expression> parse_secondary_expression(NonnullRefPtr<Expression>, int min_precedence, Associativity associate = Associativity::Right);
NonnullRefPtr<CallExpression> parse_call_expression(NonnullRefPtr<Expression>);
NonnullRefPtr<NewExpression> parse_new_expression();

View File

@ -0,0 +1,45 @@
load("test-common.js");
try {
assert(`foo` === "foo");
assert(`foo{` === "foo{");
assert(`foo}` === "foo}");
assert(`foo$` === "foo$");
assert(`foo\`` === "foo`")
assert(`foo\$` === "foo$");
assert(`foo ${undefined}` === "foo undefined");
assert(`foo ${null}` === "foo null");
assert(`foo ${5}` === "foo 5");
assert(`foo ${true}` === "foo true");
assert(`foo ${"bar"}` === "foo bar");
assert(`foo \${"bar"}` === 'foo ${"bar"}');
assert(`foo ${{}}` === "foo [object Object]");
assert(`foo ${{ bar: { baz: "qux" }}}` === "foo [object Object]");
assert(`foo ${"bar"} ${"baz"}` === "foo bar baz");
assert(`${"foo"} bar baz` === "foo bar baz");
assert(`${"foo bar baz"}` === "foo bar baz");
let a = 27;
assert(`${a}` === "27");
assert(`foo ${a}` === "foo 27");
assert(`foo ${a ? "bar" : "baz"}` === "foo bar");
assert(`foo ${(() => a)()}` === "foo 27");
assert(`foo ${`bar`}` === "foo bar");
assert(`${`${`${`${"foo"}`} bar`}`}` === "foo bar");
assert(`foo
bar` === "foo\n bar");
assertThrowsError(() => {
`${b}`;
}, {
error: ReferenceError,
message: "'b' not known"
})
console.log("PASS");
} catch (e) {
console.log("FAIL: " + e);
}

View File

@ -74,10 +74,14 @@ double Token::double_value() const
String Token::string_value() const
{
ASSERT(type() == TokenType::StringLiteral || type() == TokenType::TemplateLiteral);
ASSERT(type() == TokenType::StringLiteral || type() == TokenType::TemplateLiteralString);
auto is_template = type() == TokenType::TemplateLiteralString;
auto offset = type() == TokenType::TemplateLiteralString ? 0 : 1;
StringBuilder builder;
for (size_t i = 1; i < m_value.length() - 1; ++i) {
if (m_value[i] == '\\' && i + 1 < m_value.length() - 1) {
for (size_t i = offset; i < m_value.length() - offset; ++i) {
if (m_value[i] == '\\' && i + 1 < m_value.length() - offset) {
i++;
switch (m_value[i]) {
case 'b':
@ -107,18 +111,18 @@ String Token::string_value() const
case '"':
builder.append('"');
break;
case '`':
builder.append('`');
break;
case '\\':
builder.append('\\');
break;
default:
// FIXME: Also parse octal, hex and unicode sequences
// should anything else generate a syntax error?
builder.append(m_value[i]);
if (is_template && (m_value[i] == '$' || m_value[i] == '`')) {
builder.append(m_value[i]);
} else {
// FIXME: Also parse octal, hex and unicode sequences
// should anything else generate a syntax error?
builder.append(m_value[i]);
}
}
} else {
builder.append(m_value[i]);
}

View File

@ -112,7 +112,11 @@ namespace JS {
__ENUMERATE_JS_TOKEN(SlashEquals) \
__ENUMERATE_JS_TOKEN(StringLiteral) \
__ENUMERATE_JS_TOKEN(Switch) \
__ENUMERATE_JS_TOKEN(TemplateLiteral) \
__ENUMERATE_JS_TOKEN(TemplateLiteralEnd) \
__ENUMERATE_JS_TOKEN(TemplateLiteralExprEnd) \
__ENUMERATE_JS_TOKEN(TemplateLiteralExprStart) \
__ENUMERATE_JS_TOKEN(TemplateLiteralStart) \
__ENUMERATE_JS_TOKEN(TemplateLiteralString) \
__ENUMERATE_JS_TOKEN(This) \
__ENUMERATE_JS_TOKEN(Throw) \
__ENUMERATE_JS_TOKEN(Tilde) \
@ -122,6 +126,7 @@ namespace JS {
__ENUMERATE_JS_TOKEN(UnsignedShiftRight) \
__ENUMERATE_JS_TOKEN(UnsignedShiftRightEquals) \
__ENUMERATE_JS_TOKEN(UnterminatedStringLiteral) \
__ENUMERATE_JS_TOKEN(UnterminatedTemplateLiteral) \
__ENUMERATE_JS_TOKEN(Var) \
__ENUMERATE_JS_TOKEN(Void) \
__ENUMERATE_JS_TOKEN(While) \

View File

@ -500,7 +500,9 @@ int main(int argc, char** argv)
stylize({ start, end }, { Line::Style::Foreground(Line::Style::Color::Magenta) });
break;
case JS::TokenType::StringLiteral:
case JS::TokenType::TemplateLiteral:
case JS::TokenType::TemplateLiteralStart:
case JS::TokenType::TemplateLiteralEnd:
case JS::TokenType::TemplateLiteralString:
case JS::TokenType::RegexLiteral:
case JS::TokenType::UnterminatedStringLiteral:
stylize({ start, end }, { Line::Style::Foreground(Line::Style::Color::Green), Line::Style::Bold });
@ -571,6 +573,8 @@ int main(int argc, char** argv)
case JS::TokenType::Interface:
case JS::TokenType::Let:
case JS::TokenType::New:
case JS::TokenType::TemplateLiteralExprStart:
case JS::TokenType::TemplateLiteralExprEnd:
case JS::TokenType::Throw:
case JS::TokenType::Typeof:
case JS::TokenType::Var: