unleashed-firmware/lib/mjs/mjs_tok.c

248 lines
8.3 KiB
C
Raw Normal View History

/*
* Copyright (c) 2017 Cesanta Software Limited
* All rights reserved
*/
#include <stdlib.h>
#include <string.h>
#include "common/cs_dbg.h"
#include "mjs_tok.h"
MJS_PRIVATE void pinit(const char* file_name, const char* buf, struct pstate* p) {
memset(p, 0, sizeof(*p));
p->line_no = 1;
p->last_emitted_line_no = 1;
p->file_name = file_name;
p->buf = p->pos = buf;
mbuf_init(&p->offset_lineno_map, 0);
}
// We're not relying on the target libc ctype, as it may incorrectly
// handle negative arguments, e.g. isspace(-1).
static int mjs_is_space(int c) {
return c == ' ' || c == '\r' || c == '\n' || c == '\t' || c == '\f' || c == '\v';
}
MJS_PRIVATE int mjs_is_digit(int c) {
return c >= '0' && c <= '9';
}
static int mjs_is_alpha(int c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
MJS_PRIVATE int mjs_is_ident(int c) {
return c == '_' || c == '$' || mjs_is_alpha(c);
}
// Try to parse a token that can take one or two chars.
static int longtok(struct pstate* p, const char* first_chars, const char* second_chars) {
if(strchr(first_chars, p->pos[0]) == NULL) return TOK_EOF;
if(p->pos[1] != '\0' && strchr(second_chars, p->pos[1]) != NULL) {
p->tok.len++;
p->pos++;
return p->pos[-1] << 8 | p->pos[0];
}
return p->pos[0];
}
// Try to parse a token that takes exactly 3 chars.
static int longtok3(struct pstate* p, char a, char b, char c) {
if(p->pos[0] == a && p->pos[1] == b && p->pos[2] == c) {
p->tok.len += 2;
p->pos += 2;
return p->pos[-2] << 16 | p->pos[-1] << 8 | p->pos[0];
}
return TOK_EOF;
}
// Try to parse a token that takes exactly 4 chars.
static int longtok4(struct pstate* p, char a, char b, char c, char d) {
if(p->pos[0] == a && p->pos[1] == b && p->pos[2] == c && p->pos[3] == d) {
p->tok.len += 3;
p->pos += 3;
return p->pos[-3] << 24 | p->pos[-2] << 16 | p->pos[-1] << 8 | p->pos[0];
}
return TOK_EOF;
}
static int getnum(struct pstate* p) {
if(p->pos[0] == '0' && p->pos[1] == 'x') {
// MSVC6 strtod cannot parse 0x... numbers, thus this ugly workaround.
strtoul(p->pos + 2, (char**)&p->pos, 16);
} else {
strtod(p->pos, (char**)&p->pos);
}
p->tok.len = p->pos - p->tok.ptr;
p->pos--;
return TOK_NUM;
}
static int is_reserved_word_token(const char* s, int len) {
const char* reserved[] = {"break", "case", "catch", "continue", "debugger", "default",
"delete", "do", "else", "false", "finally", "for",
"function", "if", "in", "instanceof", "new", "null",
"return", "switch", "this", "throw", "true", "try",
"typeof", "var", "void", "while", "with", "let",
"undefined", NULL};
int i;
if(!mjs_is_alpha(s[0])) return 0;
for(i = 0; reserved[i] != NULL; i++) {
if(len == (int)strlen(reserved[i]) && strncmp(s, reserved[i], len) == 0) return i + 1;
}
return 0;
}
static int getident(struct pstate* p) {
while(mjs_is_ident(p->pos[0]) || mjs_is_digit(p->pos[0])) p->pos++;
p->tok.len = p->pos - p->tok.ptr;
p->pos--;
return TOK_IDENT;
}
static int getstr(struct pstate* p) {
int quote = *p->pos++;
p->tok.ptr++;
while(p->pos[0] != '\0' && p->pos[0] != quote) {
if(p->pos[0] == '\\' && p->pos[1] != '\0' &&
(p->pos[1] == quote || strchr("bfnrtv\\", p->pos[1]) != NULL)) {
p->pos += 2;
} else {
p->pos++;
}
}
p->tok.len = p->pos - p->tok.ptr;
return TOK_STR;
}
static void skip_spaces_and_comments(struct pstate* p) {
const char* pos;
do {
pos = p->pos;
while(mjs_is_space(p->pos[0])) {
if(p->pos[0] == '\n') p->line_no++;
p->pos++;
}
if(p->pos[0] == '/' && p->pos[1] == '/') {
while(p->pos[0] != '\0' && p->pos[0] != '\n') p->pos++;
}
if(p->pos[0] == '/' && p->pos[1] == '*') {
p->pos += 2;
while(p->pos[0] != '\0') {
if(p->pos[0] == '\n') p->line_no++;
if(p->pos[0] == '*' && p->pos[1] == '/') {
p->pos += 2;
break;
}
p->pos++;
}
}
} while(pos < p->pos);
}
static int ptranslate(int tok) {
#define DT(a, b) ((a) << 8 | (b))
#define TT(a, b, c) ((a) << 16 | (b) << 8 | (c))
#define QT(a, b, c, d) ((a) << 24 | (b) << 16 | (c) << 8 | (d))
/* Map token ID produced by mjs_tok.c to token ID produced by lemon */
/* clang-format off */
switch (tok) {
case ':': return TOK_COLON;
case ';': return TOK_SEMICOLON;
case ',': return TOK_COMMA;
case '=': return TOK_ASSIGN;
case '{': return TOK_OPEN_CURLY;
case '}': return TOK_CLOSE_CURLY;
case '(': return TOK_OPEN_PAREN;
case ')': return TOK_CLOSE_PAREN;
case '[': return TOK_OPEN_BRACKET;
case ']': return TOK_CLOSE_BRACKET;
case '*': return TOK_MUL;
case '+': return TOK_PLUS;
case '-': return TOK_MINUS;
case '/': return TOK_DIV;
case '%': return TOK_REM;
case '&': return TOK_AND;
case '|': return TOK_OR;
case '^': return TOK_XOR;
case '.': return TOK_DOT;
case '?': return TOK_QUESTION;
case '!': return TOK_NOT;
case '~': return TOK_TILDA;
case '<': return TOK_LT;
case '>': return TOK_GT;
case DT('<','<'): return TOK_LSHIFT;
case DT('>','>'): return TOK_RSHIFT;
case DT('-','-'): return TOK_MINUS_MINUS;
case DT('+','+'): return TOK_PLUS_PLUS;
case DT('+','='): return TOK_PLUS_ASSIGN;
case DT('-','='): return TOK_MINUS_ASSIGN;
case DT('*','='): return TOK_MUL_ASSIGN;
case DT('/','='): return TOK_DIV_ASSIGN;
case DT('&','='): return TOK_AND_ASSIGN;
case DT('|','='): return TOK_OR_ASSIGN;
case DT('%','='): return TOK_REM_ASSIGN;
case DT('^','='): return TOK_XOR_ASSIGN;
case DT('=','='): return TOK_EQ;
case DT('!','='): return TOK_NE;
case DT('<','='): return TOK_LE;
case DT('>','='): return TOK_GE;
case DT('&','&'): return TOK_LOGICAL_AND;
case DT('|','|'): return TOK_LOGICAL_OR;
case TT('=','=','='): return TOK_EQ_EQ;
case TT('!','=','='): return TOK_NE_NE;
case TT('<','<','='): return TOK_LSHIFT_ASSIGN;
case TT('>','>','='): return TOK_RSHIFT_ASSIGN;
case TT('>','>','>'): return TOK_URSHIFT;
case QT('>','>','>','='): return TOK_URSHIFT_ASSIGN;
}
/* clang-format on */
return tok;
}
MJS_PRIVATE int pnext(struct pstate* p) {
int tmp, tok = TOK_INVALID;
skip_spaces_and_comments(p);
p->tok.ptr = p->pos;
p->tok.len = 1;
if(p->pos[0] == '\0') {
tok = TOK_EOF;
} else if(mjs_is_digit(p->pos[0])) {
tok = getnum(p);
} else if(p->pos[0] == '\'' || p->pos[0] == '"') {
tok = getstr(p);
} else if(mjs_is_ident(p->pos[0])) {
tok = getident(p);
/*
* NOTE: getident() has side effects on `p`, and `is_reserved_word_token()`
* relies on them. Since in C the order of evaluation of the operands is
* undefined, `is_reserved_word_token()` should be called in a separate
* statement.
*/
tok += is_reserved_word_token(p->tok.ptr, p->tok.len);
} else if(strchr(",.:;{}[]()?", p->pos[0]) != NULL) {
tok = p->pos[0];
} else if(
(tmp = longtok3(p, '<', '<', '=')) != TOK_EOF ||
(tmp = longtok3(p, '>', '>', '=')) != TOK_EOF ||
(tmp = longtok4(p, '>', '>', '>', '=')) != TOK_EOF ||
(tmp = longtok3(p, '>', '>', '>')) != TOK_EOF ||
(tmp = longtok3(p, '=', '=', '=')) != TOK_EOF ||
(tmp = longtok3(p, '!', '=', '=')) != TOK_EOF ||
(tmp = longtok(p, "&", "&=")) != TOK_EOF || (tmp = longtok(p, "|", "|=")) != TOK_EOF ||
(tmp = longtok(p, "<", "<=")) != TOK_EOF || (tmp = longtok(p, ">", ">=")) != TOK_EOF ||
(tmp = longtok(p, "-", "-=")) != TOK_EOF || (tmp = longtok(p, "+", "+=")) != TOK_EOF) {
tok = tmp;
} else if((tmp = longtok(p, "^~+-%/*<>=!|&", "=")) != TOK_EOF) {
tok = tmp;
}
if(p->pos[0] != '\0') p->pos++;
LOG(LL_VERBOSE_DEBUG, (" --> %d [%.*s]", tok, p->tok.len, p->tok.ptr));
p->prev_tok = p->tok.tok;
p->tok.tok = ptranslate(tok);
return p->tok.tok;
}