1
1
mirror of https://github.com/kanaka/mal.git synced 2024-09-20 10:07:45 +03:00
mal/vhdl/reader.vhdl
Joel Martin 4aa0ebdf47 Error on unterminated strings.
Add a step1 test to make sure that implementations are properly
throwing an error on unclosed strings.

Fix 47 implementations and update the guide to note the correct
behavior.
2019-01-25 16:16:06 -06:00

355 lines
10 KiB
VHDL

library STD;
use STD.textio.all;
library WORK;
use WORK.types.all;
package reader is
procedure read_str(s: in string; result: out mal_val_ptr; err: out mal_val_ptr);
end package reader;
package body reader is
type token_list is array(natural range <>) of line;
type token_list_ptr is access token_list;
function is_eol_char(c: in character) return boolean is
begin
case c is
when LF | CR => return true;
when others => return false;
end case;
end function is_eol_char;
function is_separator_char(c: in character) return boolean is
begin
case c is
when LF | CR | ' ' | '[' | ']' | '{' | '}' | '(' | ')' |
''' | '"' | '`' | ',' | ';' => return true;
when others => return false;
end case;
end function is_separator_char;
procedure next_token(str: in string; pos: in positive; token: inout line; next_start_pos: out positive; ok: out boolean) is
variable ch: character;
variable tmppos: positive;
begin
token := new string'("");
if pos > str'length then
ok := false;
return;
end if;
ch := str(pos);
case ch is
when ' ' | ',' | LF | CR | HT =>
next_start_pos := pos + 1;
token := new string'("");
ok := true;
return;
when '[' | ']' | '{' | '}' | '(' | ')' | ''' | '`' | '^' | '@' =>
next_start_pos := pos + 1;
token := new string'("" & ch);
ok := true;
return;
when '~' =>
if str(pos + 1) = '@' then
next_start_pos := pos + 2;
token := new string'("~@");
else
next_start_pos := pos + 1;
token := new string'("~");
end if;
ok := true;
return;
when ';' =>
tmppos := pos + 1;
while tmppos <= str'length and not is_eol_char(str(tmppos)) loop
tmppos := tmppos + 1;
end loop;
next_start_pos := tmppos;
token := new string'("");
ok := true;
return;
when '"' =>
tmppos := pos + 1;
while tmppos < str'length and str(tmppos) /= '"' loop
if str(tmppos) = '\' then
tmppos := tmppos + 2;
else
tmppos := tmppos + 1;
end if;
end loop;
token := new string(1 to (tmppos - pos + 1));
token(1 to (tmppos - pos + 1)) := str(pos to tmppos);
next_start_pos := tmppos + 1;
ok := true;
return;
when others =>
tmppos := pos;
while tmppos <= str'length and not is_separator_char(str(tmppos)) loop
tmppos := tmppos + 1;
end loop;
token := new string(1 to (tmppos - pos));
token(1 to (tmppos - pos)) := str(pos to tmppos - 1);
next_start_pos := tmppos;
ok := true;
return;
end case;
ok := false;
end procedure next_token;
function tokenize(str: in string) return token_list_ptr is
variable next_pos: positive := 1;
variable ok: boolean := true;
variable tokens: token_list_ptr;
variable t: line;
begin
while ok loop
next_token(str, next_pos, t, next_pos, ok);
if t'length > 0 then
if tokens = null then
tokens := new token_list(0 to 0);
tokens(0) := t;
else
tokens := new token_list'(tokens.all & t);
end if;
end if;
end loop;
return tokens;
end function tokenize;
type reader_class is record
tokens: token_list_ptr;
pos: natural;
end record reader_class;
procedure reader_new(r: inout reader_class; a_tokens: inout token_list_ptr) is
begin
r := (tokens => a_tokens, pos => 0);
end procedure reader_new;
procedure reader_peek(r: inout reader_class; token: out line) is
begin
if r.pos < r.tokens'length then
token := r.tokens(r.pos);
else
token := null;
end if;
end procedure reader_peek;
procedure reader_next(r: inout reader_class; token: out line) is
begin
reader_peek(r, token);
r.pos := r.pos + 1;
end procedure reader_next;
-- Forward declaration
procedure read_form(r: inout reader_class; result: out mal_val_ptr; err: out mal_val_ptr);
function is_digit(c: in character) return boolean is
begin
case c is
when '0' to '9' => return true;
when others => return false;
end case;
end function is_digit;
function unescape_char(c: in character) return character is
begin
case c is
when 'n' => return LF;
when others => return c;
end case;
end function unescape_char;
procedure unescape_string_token(token: inout line; result: out line) is
variable s: line;
variable src_i, dst_i: integer;
begin
s := new string(1 to token'length);
dst_i := 0;
src_i := 2; -- skip the initial quote
while src_i <= token'length - 1 loop
dst_i := dst_i + 1;
if token(src_i) = '\' then
s(dst_i) := unescape_char(token(src_i + 1));
src_i := src_i + 2;
else
s(dst_i) := token(src_i);
src_i := src_i + 1;
end if;
end loop;
result := new string'(s(1 to dst_i));
deallocate(s);
end procedure unescape_string_token;
procedure read_atom(r: inout reader_class; result: out mal_val_ptr; err: out mal_val_ptr) is
variable token, s: line;
variable num: integer;
variable ch: character;
begin
reader_next(r, token);
if token.all = "nil" then
new_nil(result);
elsif token.all = "true" then
new_true(result);
elsif token.all = "false" then
new_false(result);
else
ch := token(1);
case ch is
when '-' =>
if token'length > 1 and is_digit(token(2)) then
read(token, num);
new_number(num, result);
else
new_symbol(token, result);
end if;
when '0' to '9' =>
read(token, num);
new_number(num, result);
when ':' =>
s := new string(1 to token'length - 1);
s(1 to s'length) := token(2 to token'length);
new_keyword(s, result);
when '"' =>
if token(token'length) /= '"' then
new_string("expected '""', got EOF", err);
result := null;
return;
end if;
unescape_string_token(token, s);
new_string(s, result);
when others =>
new_symbol(token, result);
end case;
end if;
end procedure read_atom;
procedure read_sequence(list_type: in mal_type_tag; end_ch: in string; r: inout reader_class; result: out mal_val_ptr; err: out mal_val_ptr) is
variable token: line;
variable element, sub_err: mal_val_ptr;
variable seq: mal_seq_ptr;
begin
reader_next(r, token); -- Consume the open paren
reader_peek(r, token);
seq := new mal_seq(0 to -1);
while token /= null and token.all /= end_ch loop
read_form(r, element, sub_err);
if sub_err /= null then
err := sub_err;
result := null;
return;
end if;
seq := new mal_seq'(seq.all & element);
reader_peek(r, token);
end loop;
if token = null then
new_string("expected '" & end_ch & "', got EOF", err);
result := null;
return;
end if;
reader_next(r, token); -- Consume the close paren
new_seq_obj(list_type, seq, result);
end procedure read_sequence;
procedure reader_macro(r: inout reader_class; result: out mal_val_ptr; err: out mal_val_ptr; sym_name: in string) is
variable token, sym_line: line;
variable seq: mal_seq_ptr;
variable rest, rest_err: mal_val_ptr;
begin
reader_next(r, token);
seq := new mal_seq(0 to 1);
sym_line := new string'(sym_name);
new_symbol(sym_line, seq(0));
read_form(r, rest, rest_err);
if rest_err /= null then
err := rest_err;
result := null;
return;
end if;
seq(1) := rest;
new_seq_obj(mal_list, seq, result);
end procedure reader_macro;
procedure with_meta_reader_macro(r: inout reader_class; result: out mal_val_ptr; err: out mal_val_ptr) is
variable token, sym_line: line;
variable seq: mal_seq_ptr;
variable meta, rest, rest_err: mal_val_ptr;
begin
reader_next(r, token);
seq := new mal_seq(0 to 2);
sym_line := new string'("with-meta");
new_symbol(sym_line, seq(0));
read_form(r, meta, rest_err);
if rest_err /= null then
err := rest_err;
result := null;
return;
end if;
read_form(r, rest, rest_err);
if rest_err /= null then
err := rest_err;
result := null;
return;
end if;
seq(1) := rest;
seq(2) := meta;
new_seq_obj(mal_list, seq, result);
end procedure with_meta_reader_macro;
procedure read_form(r: inout reader_class; result: out mal_val_ptr; err: out mal_val_ptr) is
variable token: line;
variable ch: character;
begin
reader_peek(r, token);
ch := token(1);
case ch is
when ''' => reader_macro(r, result, err, "quote");
when '`' => reader_macro(r, result, err, "quasiquote");
when '~' =>
if token'length = 1 then
reader_macro(r, result, err, "unquote");
else
if token(2) = '@' then
reader_macro(r, result, err, "splice-unquote");
else
new_string("Unknown token", err);
end if;
end if;
when '^' => with_meta_reader_macro(r, result, err);
when '@' => reader_macro(r, result, err, "deref");
when '(' => read_sequence(mal_list, ")", r, result, err);
when ')' => new_string("unexcepted ')'", err);
when '[' => read_sequence(mal_vector, "]", r, result, err);
when ']' => new_string("unexcepted ']'", err);
when '{' => read_sequence(mal_hashmap, "}", r, result, err);
when '}' => new_string("unexcepted '}'", err);
when others => read_atom(r, result, err);
end case;
end procedure read_form;
procedure read_str(s: in string; result: out mal_val_ptr; err: out mal_val_ptr) is
variable tokens: token_list_ptr;
variable r: reader_class;
begin
tokens := tokenize(s);
if tokens = null or tokens'length = 0 then
result := null;
err := null;
return;
end if;
reader_new(r, tokens);
read_form(r, result, err);
end procedure read_str;
end package body reader;