unleashed-firmware/lib/mjs/mjs_tok.c

/*
 * Copyright (c) 2017 Cesanta Software Limited
 * All rights reserved
 */

#include <stdlib.h>
#include <string.h>

#include "common/cs_dbg.h"
#include "mjs_tok.h"

MJS_PRIVATE void pinit(const char* file_name, const char* buf, struct pstate* p) {
    memset(p, 0, sizeof(*p));
    p->line_no = 1;
    p->last_emitted_line_no = 1;
    p->file_name = file_name;
    p->buf = p->pos = buf;
    mbuf_init(&p->offset_lineno_map, 0);
}

// We're not relying on the target libc ctype, as it may incorrectly
// handle negative arguments, e.g. isspace(-1).
static int mjs_is_space(int c) {
    return c == ' ' || c == '\r' || c == '\n' || c == '\t' || c == '\f' || c == '\v';
}

MJS_PRIVATE int mjs_is_digit(int c) {
    return c >= '0' && c <= '9';
}

static int mjs_is_alpha(int c) {
    return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}

MJS_PRIVATE int mjs_is_ident(int c) {
    return c == '_' || c == '$' || mjs_is_alpha(c);
}

// Try to parse a token that can take one or two chars.
static int longtok(struct pstate* p, const char* first_chars, const char* second_chars) {
    if(strchr(first_chars, p->pos[0]) == NULL) return TOK_EOF;
    if(p->pos[1] != '\0' && strchr(second_chars, p->pos[1]) != NULL) {
        p->tok.len++;
        p->pos++;
        return p->pos[-1] << 8 | p->pos[0];
    }
    return p->pos[0];
}

// Try to parse a token that takes exactly 3 chars.
static int longtok3(struct pstate* p, char a, char b, char c) {
    if(p->pos[0] == a && p->pos[1] == b && p->pos[2] == c) {
        p->tok.len += 2;
        p->pos += 2;
        return p->pos[-2] << 16 | p->pos[-1] << 8 | p->pos[0];
    }
    return TOK_EOF;
}

// Try to parse a token that takes exactly 4 chars.
static int longtok4(struct pstate* p, char a, char b, char c, char d) {
    if(p->pos[0] == a && p->pos[1] == b && p->pos[2] == c && p->pos[3] == d) {
        p->tok.len += 3;
        p->pos += 3;
        return p->pos[-3] << 24 | p->pos[-2] << 16 | p->pos[-1] << 8 | p->pos[0];
    }
    return TOK_EOF;
}

static int getnum(struct pstate* p) {
    if(p->pos[0] == '0' && p->pos[1] == 'x') {
        // MSVC6 strtod cannot parse 0x... numbers, thus this ugly workaround.
        strtoul(p->pos + 2, (char**)&p->pos, 16);
    } else {
        strtod(p->pos, (char**)&p->pos);
    }
    p->tok.len = p->pos - p->tok.ptr;
    p->pos--;
    return TOK_NUM;
}

static int is_reserved_word_token(const char* s, int len) {
    const char* reserved[] = {"break",     "case",   "catch", "continue",   "debugger", "default",
                              "delete",    "do",     "else",  "false",      "finally",  "for",
                              "function",  "if",     "in",    "instanceof", "new",      "null",
                              "return",    "switch", "this",  "throw",      "true",     "try",
                              "typeof",    "var",    "void",  "while",      "with",     "let",
                              "undefined", NULL};
    int i;
    if(!mjs_is_alpha(s[0])) return 0;
    for(i = 0; reserved[i] != NULL; i++) {
        if(len == (int)strlen(reserved[i]) && strncmp(s, reserved[i], len) == 0) return i + 1;
    }
    return 0;
}

static int getident(struct pstate* p) {
    while(mjs_is_ident(p->pos[0]) || mjs_is_digit(p->pos[0])) p->pos++;
    p->tok.len = p->pos - p->tok.ptr;
    p->pos--;
    return TOK_IDENT;
}

static int getstr(struct pstate* p) {
    int quote = *p->pos++;
    p->tok.ptr++;
    while(p->pos[0] != '\0' && p->pos[0] != quote) {
        if(p->pos[0] == '\\' && p->pos[1] != '\0' &&
           (p->pos[1] == quote || strchr("bfnrtv\\", p->pos[1]) != NULL)) {
            p->pos += 2;
        } else {
            p->pos++;
        }
    }
    p->tok.len = p->pos - p->tok.ptr;
    return TOK_STR;
}

static void skip_spaces_and_comments(struct pstate* p) {
    const char* pos;
    do {
        pos = p->pos;
        while(mjs_is_space(p->pos[0])) {
            if(p->pos[0] == '\n') p->line_no++;
            p->pos++;
        }
        if(p->pos[0] == '/' && p->pos[1] == '/') {
            while(p->pos[0] != '\0' && p->pos[0] != '\n') p->pos++;
        }
        if(p->pos[0] == '/' && p->pos[1] == '*') {
            p->pos += 2;
            while(p->pos[0] != '\0') {
                if(p->pos[0] == '\n') p->line_no++;
                if(p->pos[0] == '*' && p->pos[1] == '/') {
                    p->pos += 2;
                    break;
                }
                p->pos++;
            }
        }
    } while(pos < p->pos);
}

static int ptranslate(int tok) {
#define DT(a, b) ((a) << 8 | (b))
#define TT(a, b, c) ((a) << 16 | (b) << 8 | (c))
#define QT(a, b, c, d) ((a) << 24 | (b) << 16 | (c) << 8 | (d))
    /* Map token ID produced by mjs_tok.c to token ID produced by lemon */
    /* clang-format off */
  switch (tok) {
    case ':': return TOK_COLON;
    case ';': return TOK_SEMICOLON;
    case ',': return TOK_COMMA;
    case '=': return TOK_ASSIGN;
    case '{': return TOK_OPEN_CURLY;
    case '}': return TOK_CLOSE_CURLY;
    case '(': return TOK_OPEN_PAREN;
    case ')': return TOK_CLOSE_PAREN;
    case '[': return TOK_OPEN_BRACKET;
    case ']': return TOK_CLOSE_BRACKET;
    case '*': return TOK_MUL;
    case '+': return TOK_PLUS;
    case '-': return TOK_MINUS;
    case '/': return TOK_DIV;
    case '%': return TOK_REM;
    case '&': return TOK_AND;
    case '|': return TOK_OR;
    case '^': return TOK_XOR;
    case '.': return TOK_DOT;
    case '?': return TOK_QUESTION;
    case '!': return TOK_NOT;
    case '~': return TOK_TILDA;
    case '<': return TOK_LT;
    case '>': return TOK_GT;
    case DT('<','<'): return TOK_LSHIFT;
    case DT('>','>'): return TOK_RSHIFT;
    case DT('-','-'): return TOK_MINUS_MINUS;
    case DT('+','+'): return TOK_PLUS_PLUS;
    case DT('+','='): return TOK_PLUS_ASSIGN;
    case DT('-','='): return TOK_MINUS_ASSIGN;
    case DT('*','='): return TOK_MUL_ASSIGN;
    case DT('/','='): return TOK_DIV_ASSIGN;
    case DT('&','='): return TOK_AND_ASSIGN;
    case DT('|','='): return TOK_OR_ASSIGN;
    case DT('%','='): return TOK_REM_ASSIGN;
    case DT('^','='): return TOK_XOR_ASSIGN;
    case DT('=','='): return TOK_EQ;
    case DT('!','='): return TOK_NE;
    case DT('<','='): return TOK_LE;
    case DT('>','='): return TOK_GE;
    case DT('&','&'): return TOK_LOGICAL_AND;
    case DT('|','|'): return TOK_LOGICAL_OR;
    case TT('=','=','='): return TOK_EQ_EQ;
    case TT('!','=','='): return TOK_NE_NE;
    case TT('<','<','='): return TOK_LSHIFT_ASSIGN;
    case TT('>','>','='): return TOK_RSHIFT_ASSIGN;
    case TT('>','>','>'): return TOK_URSHIFT;
    case QT('>','>','>','='): return TOK_URSHIFT_ASSIGN;
  }
    /* clang-format on */
    return tok;
}

MJS_PRIVATE int pnext(struct pstate* p) {
    int tmp, tok = TOK_INVALID;

    skip_spaces_and_comments(p);
    p->tok.ptr = p->pos;
    p->tok.len = 1;

    if(p->pos[0] == '\0') {
        tok = TOK_EOF;
    } else if(mjs_is_digit(p->pos[0])) {
        tok = getnum(p);
    } else if(p->pos[0] == '\'' || p->pos[0] == '"') {
        tok = getstr(p);
    } else if(mjs_is_ident(p->pos[0])) {
        tok = getident(p);
        /*
     * NOTE: getident() has side effects on `p`, and `is_reserved_word_token()`
     * relies on them. Since in C the order of evaluation of the operands is
     * undefined, `is_reserved_word_token()` should be called in a separate
     * statement.
     */
        tok += is_reserved_word_token(p->tok.ptr, p->tok.len);
    } else if(strchr(",.:;{}[]()?", p->pos[0]) != NULL) {
        tok = p->pos[0];
    } else if(
        (tmp = longtok3(p, '<', '<', '=')) != TOK_EOF ||
        (tmp = longtok3(p, '>', '>', '=')) != TOK_EOF ||
        (tmp = longtok4(p, '>', '>', '>', '=')) != TOK_EOF ||
        (tmp = longtok3(p, '>', '>', '>')) != TOK_EOF ||
        (tmp = longtok3(p, '=', '=', '=')) != TOK_EOF ||
        (tmp = longtok3(p, '!', '=', '=')) != TOK_EOF ||
        (tmp = longtok(p, "&", "&=")) != TOK_EOF || (tmp = longtok(p, "|", "|=")) != TOK_EOF ||
        (tmp = longtok(p, "<", "<=")) != TOK_EOF || (tmp = longtok(p, ">", ">=")) != TOK_EOF ||
        (tmp = longtok(p, "-", "-=")) != TOK_EOF || (tmp = longtok(p, "+", "+=")) != TOK_EOF) {
        tok = tmp;
    } else if((tmp = longtok(p, "^~+-%/*<>=!|&", "=")) != TOK_EOF) {
        tok = tmp;
    }
    if(p->pos[0] != '\0') p->pos++;
    LOG(LL_VERBOSE_DEBUG, ("  --> %d [%.*s]", tok, p->tok.len, p->tok.ptr));
    p->prev_tok = p->tok.tok;
    p->tok.tok = ptranslate(tok);
    return p->tok.tok;
}
[FL-3579, FL-3601, FL-3714] JavaScript runner (#3286) * FBT: cdefines to env, libs order * API: strtod, modf, itoa, calloc * Apps: elk js * Apps: mjs * JS: scripts as assets * mjs: composite resolver * mjs: stack trace * ELK JS example removed * MJS thread, MJS lib modified to support script interruption * JS console UI * Module system, BadUSB bindings rework * JS notifications, simple dialog, BadUSB demo * Custom dialogs, dialog demo * MJS as system library, some dirty hacks to make it compile * Plugin-based js modules * js_uart(BadUART) module * js_uart: support for byte array arguments * Script icon and various fixes * File browser: multiple extensions filter, running js scripts from app loader * Running js scripts from archive browser * JS Runner as system app * Example scripts moved to /ext/apps/Scripts * JS bytecode listing generation * MJS builtin printf cleanup * JS examples cleanup * mbedtls version fix * Unused lib cleanup * Making PVS happy & TODOs cleanup * TODOs cleanup #2 * MJS: initial typed arrays support * JS: fix mem leak in uart destructor Co-authored-by: SG <who.just.the.doctor@gmail.com> Co-authored-by: Aleksandr Kutuzov <alleteam@gmail.com> 2024-02-12 11:54:32 +03:00			`/*`
			`* Copyright (c) 2017 Cesanta Software Limited`
			`* All rights reserved`
			`*/`

			`#include <stdlib.h>`
			`#include <string.h>`

			`#include "common/cs_dbg.h"`
			`#include "mjs_tok.h"`

			`MJS_PRIVATE void pinit(const char* file_name, const char* buf, struct pstate* p) {`
			`memset(p, 0, sizeof(*p));`
			`p->line_no = 1;`
			`p->last_emitted_line_no = 1;`
			`p->file_name = file_name;`
			`p->buf = p->pos = buf;`
			`mbuf_init(&p->offset_lineno_map, 0);`
			`}`

			`// We're not relying on the target libc ctype, as it may incorrectly`
			`// handle negative arguments, e.g. isspace(-1).`
			`static int mjs_is_space(int c) {`
			`return c == ' ' \|\| c == '\r' \|\| c == '\n' \|\| c == '\t' \|\| c == '\f' \|\| c == '\v';`
			`}`

			`MJS_PRIVATE int mjs_is_digit(int c) {`
			`return c >= '0' && c <= '9';`
			`}`

			`static int mjs_is_alpha(int c) {`
			`return (c >= 'a' && c <= 'z') \|\| (c >= 'A' && c <= 'Z');`
			`}`

			`MJS_PRIVATE int mjs_is_ident(int c) {`
			`return c == '_' \|\| c == '$' \|\| mjs_is_alpha(c);`
			`}`

			`// Try to parse a token that can take one or two chars.`
			`static int longtok(struct pstate* p, const char* first_chars, const char* second_chars) {`
			`if(strchr(first_chars, p->pos[0]) == NULL) return TOK_EOF;`
			`if(p->pos[1] != '\0' && strchr(second_chars, p->pos[1]) != NULL) {`
			`p->tok.len++;`
			`p->pos++;`
			`return p->pos[-1] << 8 \| p->pos[0];`
			`}`
			`return p->pos[0];`
			`}`

			`// Try to parse a token that takes exactly 3 chars.`
			`static int longtok3(struct pstate* p, char a, char b, char c) {`
			`if(p->pos[0] == a && p->pos[1] == b && p->pos[2] == c) {`
			`p->tok.len += 2;`
			`p->pos += 2;`
			`return p->pos[-2] << 16 \| p->pos[-1] << 8 \| p->pos[0];`
			`}`
			`return TOK_EOF;`
			`}`

			`// Try to parse a token that takes exactly 4 chars.`
			`static int longtok4(struct pstate* p, char a, char b, char c, char d) {`
			`if(p->pos[0] == a && p->pos[1] == b && p->pos[2] == c && p->pos[3] == d) {`
			`p->tok.len += 3;`
			`p->pos += 3;`
			`return p->pos[-3] << 24 \| p->pos[-2] << 16 \| p->pos[-1] << 8 \| p->pos[0];`
			`}`
			`return TOK_EOF;`
			`}`

			`static int getnum(struct pstate* p) {`
			`if(p->pos[0] == '0' && p->pos[1] == 'x') {`
			`// MSVC6 strtod cannot parse 0x... numbers, thus this ugly workaround.`
			`strtoul(p->pos + 2, (char**)&p->pos, 16);`
			`} else {`
			`strtod(p->pos, (char**)&p->pos);`
			`}`
			`p->tok.len = p->pos - p->tok.ptr;`
			`p->pos--;`
			`return TOK_NUM;`
			`}`

			`static int is_reserved_word_token(const char* s, int len) {`
			`const char* reserved[] = {"break", "case", "catch", "continue", "debugger", "default",`
			`"delete", "do", "else", "false", "finally", "for",`
			`"function", "if", "in", "instanceof", "new", "null",`
			`"return", "switch", "this", "throw", "true", "try",`
			`"typeof", "var", "void", "while", "with", "let",`
			`"undefined", NULL};`
			`int i;`
			`if(!mjs_is_alpha(s[0])) return 0;`
			`for(i = 0; reserved[i] != NULL; i++) {`
			`if(len == (int)strlen(reserved[i]) && strncmp(s, reserved[i], len) == 0) return i + 1;`
			`}`
			`return 0;`
			`}`

			`static int getident(struct pstate* p) {`
			`while(mjs_is_ident(p->pos[0]) \|\| mjs_is_digit(p->pos[0])) p->pos++;`
			`p->tok.len = p->pos - p->tok.ptr;`
			`p->pos--;`
			`return TOK_IDENT;`
			`}`

			`static int getstr(struct pstate* p) {`
			`int quote = *p->pos++;`
			`p->tok.ptr++;`
			`while(p->pos[0] != '\0' && p->pos[0] != quote) {`
			`if(p->pos[0] == '\\' && p->pos[1] != '\0' &&`
			`(p->pos[1] == quote \|\| strchr("bfnrtv\\", p->pos[1]) != NULL)) {`
			`p->pos += 2;`
			`} else {`
			`p->pos++;`
			`}`
			`}`
			`p->tok.len = p->pos - p->tok.ptr;`
			`return TOK_STR;`
			`}`

			`static void skip_spaces_and_comments(struct pstate* p) {`
			`const char* pos;`
			`do {`
			`pos = p->pos;`
			`while(mjs_is_space(p->pos[0])) {`
			`if(p->pos[0] == '\n') p->line_no++;`
			`p->pos++;`
			`}`
			`if(p->pos[0] == '/' && p->pos[1] == '/') {`
			`while(p->pos[0] != '\0' && p->pos[0] != '\n') p->pos++;`
			`}`
			`if(p->pos[0] == '/' && p->pos[1] == '*') {`
			`p->pos += 2;`
			`while(p->pos[0] != '\0') {`
			`if(p->pos[0] == '\n') p->line_no++;`
			`if(p->pos[0] == '*' && p->pos[1] == '/') {`
			`p->pos += 2;`
			`break;`
			`}`
			`p->pos++;`
			`}`
			`}`
			`} while(pos < p->pos);`
			`}`

			`static int ptranslate(int tok) {`
			`#define DT(a, b) ((a) << 8 \| (b))`
			`#define TT(a, b, c) ((a) << 16 \| (b) << 8 \| (c))`
			`#define QT(a, b, c, d) ((a) << 24 \| (b) << 16 \| (c) << 8 \| (d))`
			`/* Map token ID produced by mjs_tok.c to token ID produced by lemon */`
			`/* clang-format off */`
			`switch (tok) {`
			`case ':': return TOK_COLON;`
			`case ';': return TOK_SEMICOLON;`
			`case ',': return TOK_COMMA;`
			`case '=': return TOK_ASSIGN;`
			`case '{': return TOK_OPEN_CURLY;`
			`case '}': return TOK_CLOSE_CURLY;`
			`case '(': return TOK_OPEN_PAREN;`
			`case ')': return TOK_CLOSE_PAREN;`
			`case '[': return TOK_OPEN_BRACKET;`
			`case ']': return TOK_CLOSE_BRACKET;`
			`case '*': return TOK_MUL;`
			`case '+': return TOK_PLUS;`
			`case '-': return TOK_MINUS;`
			`case '/': return TOK_DIV;`
			`case '%': return TOK_REM;`
			`case '&': return TOK_AND;`
			`case '\|': return TOK_OR;`
			`case '^': return TOK_XOR;`
			`case '.': return TOK_DOT;`
			`case '?': return TOK_QUESTION;`
			`case '!': return TOK_NOT;`
			`case '~': return TOK_TILDA;`
			`case '<': return TOK_LT;`
			`case '>': return TOK_GT;`
			`case DT('<','<'): return TOK_LSHIFT;`
			`case DT('>','>'): return TOK_RSHIFT;`
			`case DT('-','-'): return TOK_MINUS_MINUS;`
			`case DT('+','+'): return TOK_PLUS_PLUS;`
			`case DT('+','='): return TOK_PLUS_ASSIGN;`
			`case DT('-','='): return TOK_MINUS_ASSIGN;`
			`case DT('*','='): return TOK_MUL_ASSIGN;`
			`case DT('/','='): return TOK_DIV_ASSIGN;`
			`case DT('&','='): return TOK_AND_ASSIGN;`
			`case DT('\|','='): return TOK_OR_ASSIGN;`
			`case DT('%','='): return TOK_REM_ASSIGN;`
			`case DT('^','='): return TOK_XOR_ASSIGN;`
			`case DT('=','='): return TOK_EQ;`
			`case DT('!','='): return TOK_NE;`
			`case DT('<','='): return TOK_LE;`
			`case DT('>','='): return TOK_GE;`
			`case DT('&','&'): return TOK_LOGICAL_AND;`
			`case DT('\|','\|'): return TOK_LOGICAL_OR;`
			`case TT('=','=','='): return TOK_EQ_EQ;`
			`case TT('!','=','='): return TOK_NE_NE;`
			`case TT('<','<','='): return TOK_LSHIFT_ASSIGN;`
			`case TT('>','>','='): return TOK_RSHIFT_ASSIGN;`
			`case TT('>','>','>'): return TOK_URSHIFT;`
			`case QT('>','>','>','='): return TOK_URSHIFT_ASSIGN;`
			`}`
			`/* clang-format on */`
			`return tok;`
			`}`

			`MJS_PRIVATE int pnext(struct pstate* p) {`
			`int tmp, tok = TOK_INVALID;`

			`skip_spaces_and_comments(p);`
			`p->tok.ptr = p->pos;`
			`p->tok.len = 1;`

			`if(p->pos[0] == '\0') {`
			`tok = TOK_EOF;`
			`} else if(mjs_is_digit(p->pos[0])) {`
			`tok = getnum(p);`
			`} else if(p->pos[0] == '\'' \|\| p->pos[0] == '"') {`
			`tok = getstr(p);`
			`} else if(mjs_is_ident(p->pos[0])) {`
			`tok = getident(p);`
			`/*`
			* NOTE: getident() has side effects on `p`, and `is_reserved_word_token()`
			`* relies on them. Since in C the order of evaluation of the operands is`
			* undefined, `is_reserved_word_token()` should be called in a separate
			`* statement.`
			`*/`
			`tok += is_reserved_word_token(p->tok.ptr, p->tok.len);`
			`} else if(strchr(",.:;{}[]()?", p->pos[0]) != NULL) {`
			`tok = p->pos[0];`
			`} else if(`
			`(tmp = longtok3(p, '<', '<', '=')) != TOK_EOF \|\|`
			`(tmp = longtok3(p, '>', '>', '=')) != TOK_EOF \|\|`
			`(tmp = longtok4(p, '>', '>', '>', '=')) != TOK_EOF \|\|`
			`(tmp = longtok3(p, '>', '>', '>')) != TOK_EOF \|\|`
			`(tmp = longtok3(p, '=', '=', '=')) != TOK_EOF \|\|`
			`(tmp = longtok3(p, '!', '=', '=')) != TOK_EOF \|\|`
			`(tmp = longtok(p, "&", "&=")) != TOK_EOF \|\| (tmp = longtok(p, "\|", "\|=")) != TOK_EOF \|\|`
			`(tmp = longtok(p, "<", "<=")) != TOK_EOF \|\| (tmp = longtok(p, ">", ">=")) != TOK_EOF \|\|`
			`(tmp = longtok(p, "-", "-=")) != TOK_EOF \|\| (tmp = longtok(p, "+", "+=")) != TOK_EOF) {`
			`tok = tmp;`
			`} else if((tmp = longtok(p, "^~+-%/*<>=!\|&", "=")) != TOK_EOF) {`
			`tok = tmp;`
			`}`
			`if(p->pos[0] != '\0') p->pos++;`
			`LOG(LL_VERBOSE_DEBUG, (" --> %d [%.*s]", tok, p->tok.len, p->tok.ptr));`
			`p->prev_tok = p->tok.tok;`
			`p->tok.tok = ptranslate(tok);`
			`return p->tok.tok;`
			`}`