From adb4accab3668e60a6998c4b492d1cf7be11f9d1 Mon Sep 17 00:00:00 2001 From: mattco98 Date: Sun, 3 May 2020 15:41:14 -0700 Subject: [PATCH] LibJS: Add template literals Adds fully functioning template literals. Because template literals contain expressions, most of the work has to be done in the Lexer rather than the Parser. And because of the complexity of template literals (expressions, nesting, escapes, etc), the Lexer needs to have some template-related state. When entering a new template literal, a TemplateLiteralStart token is emitted. When inside a literal, all text will be parsed up until a '${' or '`' (or EOF, but that's a syntax error) is seen, and then a TemplateLiteralExprStart token is emitted. At this point, the Lexer proceeds as normal, however it keeps track of the number of opening and closing curly braces it has seen in order to determine the close of the expression. Once it finds a matching curly brace for the '${', a TemplateLiteralExprEnd token is emitted and the state is updated accordingly. When the Lexer is inside of a template literal, but not an expression, and sees a '`', this must be the closing grave: a TemplateLiteralEnd token is emitted. The state required to correctly parse template strings consists of a vector (for nesting) of two pieces of information: whether or not we are in a template expression (as opposed to a template string); and the count of the number of unmatched open curly braces we have seen (only applicable if the Lexer is currently in a template expression). TODO: Add support for template literal newlines in the JS REPL (this will cause a syntax error currently): > `foo > bar` 'foo bar' --- Libraries/LibGUI/JSSyntaxHighlighter.cpp | 6 +- Libraries/LibJS/AST.cpp | 34 ++++++-- Libraries/LibJS/AST.h | 18 +++++ Libraries/LibJS/Lexer.cpp | 94 ++++++++++++++++------ Libraries/LibJS/Lexer.h | 6 ++ Libraries/LibJS/Parser.cpp | 38 ++++++++- Libraries/LibJS/Parser.h | 1 + Libraries/LibJS/Tests/template-literals.js | 45 +++++++++++ Libraries/LibJS/Token.cpp | 24 +++--- Libraries/LibJS/Token.h | 7 +- Userland/js.cpp | 6 +- 11 files changed, 235 insertions(+), 44 deletions(-) create mode 100644 Libraries/LibJS/Tests/template-literals.js diff --git a/Libraries/LibGUI/JSSyntaxHighlighter.cpp b/Libraries/LibGUI/JSSyntaxHighlighter.cpp index 1659fd42636..08769b2a560 100644 --- a/Libraries/LibGUI/JSSyntaxHighlighter.cpp +++ b/Libraries/LibGUI/JSSyntaxHighlighter.cpp @@ -42,7 +42,9 @@ static TextStyle style_for_token_type(Gfx::Palette palette, JS::TokenType type) case JS::TokenType::NumericLiteral: return { palette.syntax_number() }; case JS::TokenType::StringLiteral: - case JS::TokenType::TemplateLiteral: + case JS::TokenType::TemplateLiteralStart: + case JS::TokenType::TemplateLiteralEnd: + case JS::TokenType::TemplateLiteralString: case JS::TokenType::RegexLiteral: case JS::TokenType::UnterminatedStringLiteral: return { palette.syntax_string() }; @@ -55,6 +57,8 @@ static TextStyle style_for_token_type(Gfx::Palette palette, JS::TokenType type) case JS::TokenType::ParenClose: case JS::TokenType::ParenOpen: case JS::TokenType::Semicolon: + case JS::TokenType::TemplateLiteralExprStart: + case JS::TokenType::TemplateLiteralExprEnd: return { palette.syntax_punctuation() }; case JS::TokenType::Ampersand: case JS::TokenType::AmpersandEquals: diff --git a/Libraries/LibJS/AST.cpp b/Libraries/LibJS/AST.cpp index 28d56b56981..a53eac4e001 100644 --- a/Libraries/LibJS/AST.cpp +++ b/Libraries/LibJS/AST.cpp @@ -1229,6 +1229,28 @@ Value ArrayExpression::execute(Interpreter& interpreter) const return array; } +void TemplateLiteral::dump(int indent) const +{ + ASTNode::dump(indent); + + for (auto& expression : expressions()) + expression.dump(indent + 1); +} + +Value TemplateLiteral::execute(Interpreter& interpreter) const +{ + StringBuilder string_builder; + + for (auto& expression : expressions()) { + auto expr = expression.execute(interpreter); + if (interpreter.exception()) + return {}; + string_builder.append(expr.to_string()); + } + + return js_string(interpreter, string_builder.build()); +} + void TryStatement::dump(int indent) const { ASTNode::dump(indent); @@ -1398,15 +1420,15 @@ Value ConditionalExpression::execute(Interpreter& interpreter) const void ConditionalExpression::dump(int indent) const { ASTNode::dump(indent); - print_indent(indent); + print_indent(indent + 1); printf("(Test)\n"); - m_test->dump(indent + 1); - print_indent(indent); + m_test->dump(indent + 2); + print_indent(indent + 1); printf("(Consequent)\n"); - m_consequent->dump(indent + 1); - print_indent(indent); + m_consequent->dump(indent + 2); + print_indent(indent + 1); printf("(Alternate)\n"); - m_alternate->dump(indent + 1); + m_alternate->dump(indent + 2); } void SequenceExpression::dump(int indent) const diff --git a/Libraries/LibJS/AST.h b/Libraries/LibJS/AST.h index 338facf098d..571b035c766 100644 --- a/Libraries/LibJS/AST.h +++ b/Libraries/LibJS/AST.h @@ -762,6 +762,24 @@ private: Vector> m_elements; }; +class TemplateLiteral final : public Expression { +public: + TemplateLiteral(NonnullRefPtrVector expressions) + : m_expressions(expressions) + { + } + + virtual Value execute(Interpreter&) const override; + virtual void dump(int indent) const override; + + const NonnullRefPtrVector& expressions() const { return m_expressions; } + +private: + virtual const char* class_name() const override { return "TemplateLiteral"; } + + const NonnullRefPtrVector m_expressions; +}; + class MemberExpression final : public Expression { public: MemberExpression(NonnullRefPtr object, NonnullRefPtr property, bool computed = false) diff --git a/Libraries/LibJS/Lexer.cpp b/Libraries/LibJS/Lexer.cpp index cfcead69067..1c5c17046c1 100644 --- a/Libraries/LibJS/Lexer.cpp +++ b/Libraries/LibJS/Lexer.cpp @@ -244,34 +244,74 @@ void Lexer::syntax_error(const char* msg) Token Lexer::next() { size_t trivia_start = m_position; + auto in_template = !m_template_states.is_empty(); - // consume whitespace and comments - while (true) { - if (isspace(m_current_char)) { - do { + if (!in_template || m_template_states.last().in_expr) { + // consume whitespace and comments + while (true) { + if (isspace(m_current_char)) { + do { + consume(); + } while (isspace(m_current_char)); + } else if (is_line_comment_start()) { consume(); - } while (isspace(m_current_char)); - } else if (is_line_comment_start()) { - consume(); - do { + do { + consume(); + } while (!is_eof() && m_current_char != '\n'); + } else if (is_block_comment_start()) { consume(); - } while (!is_eof() && m_current_char != '\n'); - } else if (is_block_comment_start()) { - consume(); - do { - consume(); - } while (!is_eof() && !is_block_comment_end()); - consume(); // consume * - consume(); // consume / - } else { - break; + do { + consume(); + } while (!is_eof() && !is_block_comment_end()); + consume(); // consume * + consume(); // consume / + } else { + break; + } } } size_t value_start = m_position; auto token_type = TokenType::Invalid; - if (is_identifier_start()) { + if (m_current_char == '`') { + consume(); + + if (!in_template) { + token_type = TokenType::TemplateLiteralStart; + m_template_states.append({ false, 0 }); + } else { + if (m_template_states.last().in_expr) { + m_template_states.append({ false, 0 }); + token_type = TokenType::TemplateLiteralStart; + } else { + m_template_states.take_last(); + token_type = TokenType::TemplateLiteralEnd; + } + } + } else if (in_template && m_template_states.last().in_expr && m_template_states.last().open_bracket_count == 0 && m_current_char == '}') { + consume(); + token_type = TokenType::TemplateLiteralExprEnd; + m_template_states.last().in_expr = false; + } else if (in_template && !m_template_states.last().in_expr) { + if (is_eof()) { + token_type = TokenType::UnterminatedTemplateLiteral; + m_template_states.take_last(); + } else if (match('$', '{')) { + token_type = TokenType::TemplateLiteralExprStart; + consume(); + consume(); + m_template_states.last().in_expr = true; + } else { + while (!match('$', '{') && m_current_char != '`' && !is_eof()) { + if (match('\\', '$') || match('\\', '`')) + consume(); + consume(); + } + + token_type = TokenType::TemplateLiteralString; + } + } else if (is_identifier_start()) { // identifier or keyword do { consume(); @@ -339,7 +379,7 @@ Token Lexer::next() } } token_type = TokenType::NumericLiteral; - } else if (m_current_char == '"' || m_current_char == '\'' || m_current_char == '`') { + } else if (m_current_char == '"' || m_current_char == '\'') { char stop_char = m_current_char; consume(); while (m_current_char != stop_char && m_current_char != '\n' && !is_eof()) { @@ -353,10 +393,7 @@ Token Lexer::next() token_type = TokenType::UnterminatedStringLiteral; } else { consume(); - if (stop_char == '`') - token_type = TokenType::TemplateLiteral; - else - token_type = TokenType::StringLiteral; + token_type = TokenType::StringLiteral; } } else if (m_current_char == EOF) { token_type = TokenType::Eof; @@ -416,6 +453,14 @@ Token Lexer::next() } } + if (!m_template_states.is_empty() && m_template_states.last().in_expr) { + if (token_type == TokenType::CurlyOpen) { + m_template_states.last().open_bracket_count++; + } else if (token_type == TokenType::CurlyClose) { + m_template_states.last().open_bracket_count--; + } + } + m_current_token = Token( token_type, m_source.substring_view(trivia_start - 1, value_start - trivia_start), @@ -425,4 +470,5 @@ Token Lexer::next() return m_current_token; } + } diff --git a/Libraries/LibJS/Lexer.h b/Libraries/LibJS/Lexer.h index d9be445eb6c..f5a4ae51cd7 100644 --- a/Libraries/LibJS/Lexer.h +++ b/Libraries/LibJS/Lexer.h @@ -71,6 +71,12 @@ private: size_t m_line_column = 1; bool m_log_errors = true; + struct TemplateState { + bool in_expr; + u8 open_bracket_count; + }; + Vector m_template_states; + static HashMap s_keywords; static HashMap s_three_char_tokens; static HashMap s_two_char_tokens; diff --git a/Libraries/LibJS/Parser.cpp b/Libraries/LibJS/Parser.cpp index 10383c57fb1..4ff34318edf 100644 --- a/Libraries/LibJS/Parser.cpp +++ b/Libraries/LibJS/Parser.cpp @@ -394,6 +394,8 @@ NonnullRefPtr Parser::parse_primary_expression() return parse_function_node(); case TokenType::BracketOpen: return parse_array_expression(); + case TokenType::TemplateLiteralStart: + return parse_template_literal(); case TokenType::New: return parse_new_expression(); default: @@ -536,6 +538,40 @@ NonnullRefPtr Parser::parse_array_expression() return create_ast_node(move(elements)); } +NonnullRefPtr Parser::parse_template_literal() +{ + consume(TokenType::TemplateLiteralStart); + + NonnullRefPtrVector expressions; + + while (!match(TokenType::TemplateLiteralEnd) && !match(TokenType::UnterminatedTemplateLiteral)) { + if (match(TokenType::TemplateLiteralString)) { + expressions.append(create_ast_node(consume().string_value())); + } else if (match(TokenType::TemplateLiteralExprStart)) { + consume(TokenType::TemplateLiteralExprStart); + if (match(TokenType::TemplateLiteralExprEnd)) { + syntax_error("Empty template literal expression block"); + return create_ast_node(expressions); + } + + expressions.append(parse_expression(0)); + if (match(TokenType::UnterminatedTemplateLiteral)) { + syntax_error("Unterminated template literal"); + return create_ast_node(expressions); + } + consume(TokenType::TemplateLiteralExprEnd); + } + } + + if (match(TokenType::UnterminatedTemplateLiteral)) { + syntax_error("Unterminated template literal"); + } else { + consume(TokenType::TemplateLiteralEnd); + } + + return create_ast_node(expressions); +} + NonnullRefPtr Parser::parse_expression(int min_precedence, Associativity associativity) { auto expression = parse_primary_expression(); @@ -1087,7 +1123,7 @@ bool Parser::match_expression() const return type == TokenType::BoolLiteral || type == TokenType::NumericLiteral || type == TokenType::StringLiteral - || type == TokenType::TemplateLiteral + || type == TokenType::TemplateLiteralStart || type == TokenType::NullLiteral || type == TokenType::Identifier || type == TokenType::New diff --git a/Libraries/LibJS/Parser.h b/Libraries/LibJS/Parser.h index e11429399b7..273dce7c530 100644 --- a/Libraries/LibJS/Parser.h +++ b/Libraries/LibJS/Parser.h @@ -69,6 +69,7 @@ public: NonnullRefPtr parse_unary_prefixed_expression(); NonnullRefPtr parse_object_expression(); NonnullRefPtr parse_array_expression(); + NonnullRefPtr parse_template_literal(); NonnullRefPtr parse_secondary_expression(NonnullRefPtr, int min_precedence, Associativity associate = Associativity::Right); NonnullRefPtr parse_call_expression(NonnullRefPtr); NonnullRefPtr parse_new_expression(); diff --git a/Libraries/LibJS/Tests/template-literals.js b/Libraries/LibJS/Tests/template-literals.js new file mode 100644 index 00000000000..71734cb0d2d --- /dev/null +++ b/Libraries/LibJS/Tests/template-literals.js @@ -0,0 +1,45 @@ +load("test-common.js"); + +try { + assert(`foo` === "foo"); + assert(`foo{` === "foo{"); + assert(`foo}` === "foo}"); + assert(`foo$` === "foo$"); + assert(`foo\`` === "foo`") + assert(`foo\$` === "foo$"); + + assert(`foo ${undefined}` === "foo undefined"); + assert(`foo ${null}` === "foo null"); + assert(`foo ${5}` === "foo 5"); + assert(`foo ${true}` === "foo true"); + assert(`foo ${"bar"}` === "foo bar"); + assert(`foo \${"bar"}` === 'foo ${"bar"}'); + + assert(`foo ${{}}` === "foo [object Object]"); + assert(`foo ${{ bar: { baz: "qux" }}}` === "foo [object Object]"); + assert(`foo ${"bar"} ${"baz"}` === "foo bar baz"); + assert(`${"foo"} bar baz` === "foo bar baz"); + assert(`${"foo bar baz"}` === "foo bar baz"); + + let a = 27; + assert(`${a}` === "27"); + assert(`foo ${a}` === "foo 27"); + assert(`foo ${a ? "bar" : "baz"}` === "foo bar"); + assert(`foo ${(() => a)()}` === "foo 27"); + + assert(`foo ${`bar`}` === "foo bar"); + assert(`${`${`${`${"foo"}`} bar`}`}` === "foo bar"); + assert(`foo + bar` === "foo\n bar"); + + assertThrowsError(() => { + `${b}`; + }, { + error: ReferenceError, + message: "'b' not known" + }) + + console.log("PASS"); +} catch (e) { + console.log("FAIL: " + e); +} diff --git a/Libraries/LibJS/Token.cpp b/Libraries/LibJS/Token.cpp index 3375146ca2f..ffb95908a55 100644 --- a/Libraries/LibJS/Token.cpp +++ b/Libraries/LibJS/Token.cpp @@ -74,10 +74,14 @@ double Token::double_value() const String Token::string_value() const { - ASSERT(type() == TokenType::StringLiteral || type() == TokenType::TemplateLiteral); + ASSERT(type() == TokenType::StringLiteral || type() == TokenType::TemplateLiteralString); + auto is_template = type() == TokenType::TemplateLiteralString; + + auto offset = type() == TokenType::TemplateLiteralString ? 0 : 1; + StringBuilder builder; - for (size_t i = 1; i < m_value.length() - 1; ++i) { - if (m_value[i] == '\\' && i + 1 < m_value.length() - 1) { + for (size_t i = offset; i < m_value.length() - offset; ++i) { + if (m_value[i] == '\\' && i + 1 < m_value.length() - offset) { i++; switch (m_value[i]) { case 'b': @@ -107,18 +111,18 @@ String Token::string_value() const case '"': builder.append('"'); break; - case '`': - builder.append('`'); - break; case '\\': builder.append('\\'); break; default: - // FIXME: Also parse octal, hex and unicode sequences - // should anything else generate a syntax error? - builder.append(m_value[i]); + if (is_template && (m_value[i] == '$' || m_value[i] == '`')) { + builder.append(m_value[i]); + } else { + // FIXME: Also parse octal, hex and unicode sequences + // should anything else generate a syntax error? + builder.append(m_value[i]); + } } - } else { builder.append(m_value[i]); } diff --git a/Libraries/LibJS/Token.h b/Libraries/LibJS/Token.h index e272eff8041..b3242703d8a 100644 --- a/Libraries/LibJS/Token.h +++ b/Libraries/LibJS/Token.h @@ -112,7 +112,11 @@ namespace JS { __ENUMERATE_JS_TOKEN(SlashEquals) \ __ENUMERATE_JS_TOKEN(StringLiteral) \ __ENUMERATE_JS_TOKEN(Switch) \ - __ENUMERATE_JS_TOKEN(TemplateLiteral) \ + __ENUMERATE_JS_TOKEN(TemplateLiteralEnd) \ + __ENUMERATE_JS_TOKEN(TemplateLiteralExprEnd) \ + __ENUMERATE_JS_TOKEN(TemplateLiteralExprStart) \ + __ENUMERATE_JS_TOKEN(TemplateLiteralStart) \ + __ENUMERATE_JS_TOKEN(TemplateLiteralString) \ __ENUMERATE_JS_TOKEN(This) \ __ENUMERATE_JS_TOKEN(Throw) \ __ENUMERATE_JS_TOKEN(Tilde) \ @@ -122,6 +126,7 @@ namespace JS { __ENUMERATE_JS_TOKEN(UnsignedShiftRight) \ __ENUMERATE_JS_TOKEN(UnsignedShiftRightEquals) \ __ENUMERATE_JS_TOKEN(UnterminatedStringLiteral) \ + __ENUMERATE_JS_TOKEN(UnterminatedTemplateLiteral) \ __ENUMERATE_JS_TOKEN(Var) \ __ENUMERATE_JS_TOKEN(Void) \ __ENUMERATE_JS_TOKEN(While) \ diff --git a/Userland/js.cpp b/Userland/js.cpp index f3048bdd555..38e0fb1623c 100644 --- a/Userland/js.cpp +++ b/Userland/js.cpp @@ -500,7 +500,9 @@ int main(int argc, char** argv) stylize({ start, end }, { Line::Style::Foreground(Line::Style::Color::Magenta) }); break; case JS::TokenType::StringLiteral: - case JS::TokenType::TemplateLiteral: + case JS::TokenType::TemplateLiteralStart: + case JS::TokenType::TemplateLiteralEnd: + case JS::TokenType::TemplateLiteralString: case JS::TokenType::RegexLiteral: case JS::TokenType::UnterminatedStringLiteral: stylize({ start, end }, { Line::Style::Foreground(Line::Style::Color::Green), Line::Style::Bold }); @@ -571,6 +573,8 @@ int main(int argc, char** argv) case JS::TokenType::Interface: case JS::TokenType::Let: case JS::TokenType::New: + case JS::TokenType::TemplateLiteralExprStart: + case JS::TokenType::TemplateLiteralExprEnd: case JS::TokenType::Throw: case JS::TokenType::Typeof: case JS::TokenType::Var: