mirror of
https://github.com/kanaka/mal.git
synced 2024-11-11 00:52:44 +03:00
394 lines
11 KiB
Ada
394 lines
11 KiB
Ada
with Ada.IO_Exceptions;
|
|
with Ada.Characters.Latin_1;
|
|
with Ada.Exceptions;
|
|
with Ada.Strings.Maps.Constants;
|
|
with Ada.Strings.Unbounded;
|
|
with Ada.Text_IO;
|
|
with Smart_Pointers;
|
|
with Types.Vector;
|
|
with Types.Hash_Map;
|
|
|
|
package body Reader is
|
|
|
|
use Types;
|
|
|
|
package ACL renames Ada.Characters.Latin_1;
|
|
|
|
type Lexemes is (Ignored_Tok,
|
|
Start_List_Tok, Start_Vector_Tok, Start_Hash_Tok,
|
|
Meta_Tok, Deref_Tok,
|
|
Quote_Tok, Quasi_Quote_Tok, Splice_Unq_Tok, Unquote_Tok,
|
|
Int_Tok, Float_Tok,
|
|
Str_Tok, Sym_Tok);
|
|
|
|
type Token (ID : Lexemes := Ignored_Tok) is record
|
|
case ID is
|
|
when Int_Tok =>
|
|
Int_Val : Mal_Integer;
|
|
when Float_Tok =>
|
|
Float_Val : Mal_Float;
|
|
when Str_Tok | Sym_Tok =>
|
|
Start_Char, Stop_Char : Natural;
|
|
when others => null;
|
|
end case;
|
|
end record;
|
|
|
|
Lisp_Whitespace : constant Ada.Strings.Maps.Character_Set :=
|
|
Ada.Strings.Maps.To_Set
|
|
(ACL.HT & ACL.LF & ACL.CR & ACL.Space & ACL.Comma);
|
|
|
|
-- [^\s\[\]{}('"`,;)]
|
|
Terminator_Syms : Ada.Strings.Maps.Character_Set :=
|
|
Ada.Strings.Maps."or"
|
|
(Lisp_Whitespace,
|
|
Ada.Strings.Maps.To_Set ("[]{}('""`,;)"));
|
|
|
|
-- The unterminated string error
|
|
String_Error : exception;
|
|
|
|
|
|
function Convert_String (S : String) return String is
|
|
use Ada.Strings.Unbounded;
|
|
Res : Unbounded_String;
|
|
I : Positive;
|
|
Str_Last : Natural;
|
|
begin
|
|
Str_Last := S'Last;
|
|
I := S'First;
|
|
while I <= Str_Last loop
|
|
if S (I) = '\' then
|
|
if I+1 > Str_Last then
|
|
Append (Res, S (I));
|
|
I := I + 1;
|
|
elsif S (I+1) = 'n' then
|
|
Append (Res, Ada.Characters.Latin_1.LF);
|
|
I := I + 2;
|
|
elsif S (I+1) = '"' then
|
|
Append (Res, S (I+1));
|
|
I := I + 2;
|
|
elsif S (I+1) = '\' then
|
|
Append (Res, S (I+1));
|
|
I := I + 2;
|
|
else
|
|
Append (Res, S (I));
|
|
I := I + 1;
|
|
end if;
|
|
else
|
|
Append (Res, S (I));
|
|
I := I + 1;
|
|
end if;
|
|
end loop;
|
|
return To_String (Res);
|
|
end Convert_String;
|
|
|
|
Str_Len : Natural := 0;
|
|
Saved_Line : Ada.Strings.Unbounded.Unbounded_String;
|
|
Char_To_Read : Natural := 1;
|
|
|
|
function Get_Token return Token is
|
|
Res : Token;
|
|
I, J : Natural;
|
|
use Ada.Strings.Unbounded;
|
|
begin
|
|
|
|
<<Tail_Call_Opt>>
|
|
|
|
-- Skip over whitespace...
|
|
I := Char_To_Read;
|
|
while I <= Str_Len and then
|
|
Ada.Strings.Maps.Is_In (Element (Saved_Line, I), Lisp_Whitespace) loop
|
|
I := I + 1;
|
|
end loop;
|
|
|
|
-- Filter out lines consisting of only whitespace
|
|
if I > Str_Len then
|
|
return (ID => Ignored_Tok);
|
|
end if;
|
|
|
|
J := I;
|
|
|
|
case Element (Saved_Line, J) is
|
|
|
|
when ''' => Res := (ID => Quote_Tok); Char_To_Read := J+1;
|
|
|
|
when '`' => Res := (ID => Quasi_Quote_Tok); Char_To_Read := J+1;
|
|
|
|
when '~' => -- Tilde
|
|
|
|
if J+1 <= Str_Len and then Element (Saved_Line, J+1) = '@' then
|
|
Res := (ID => Splice_Unq_Tok);
|
|
Char_To_Read := J+2;
|
|
else
|
|
-- Just a Tilde
|
|
Res := (ID => Unquote_Tok);
|
|
Char_To_Read := J+1;
|
|
end if;
|
|
|
|
when '(' => Res := (ID => Start_List_Tok); Char_To_Read := J+1;
|
|
when '[' => Res := (ID => Start_Vector_Tok); Char_To_Read := J+1;
|
|
when '{' => Res := (ID => Start_Hash_Tok); Char_To_Read := J+1;
|
|
|
|
when '^' => Res := (ID => Meta_Tok); Char_To_Read := J+1;
|
|
when '@' => Res := (ID => Deref_Tok); Char_To_Read := J+1;
|
|
|
|
when ']' | '}' | ')' =>
|
|
|
|
Res := (ID => Sym_Tok, Start_Char => J, Stop_Char => J);
|
|
Char_To_Read := J+1;
|
|
|
|
when '"' => -- a string
|
|
|
|
-- Skip over "
|
|
J := J + 1;
|
|
while J <= Str_Len and then
|
|
(Element (Saved_Line, J) /= '"' or else
|
|
Element (Saved_Line, J-1) = '\') loop
|
|
J := J + 1;
|
|
end loop;
|
|
|
|
-- So we either ran out of string..
|
|
if J > Str_Len then
|
|
raise String_Error;
|
|
end if;
|
|
|
|
-- or we reached an unescaped "
|
|
Res := (ID => Str_Tok, Start_Char => I, Stop_Char => J);
|
|
Char_To_Read := J + 1;
|
|
|
|
when ';' => -- a comment
|
|
|
|
-- Read to the end of the line or until
|
|
-- the saved_line string is exhausted.
|
|
-- NB if we reach the end we don't care
|
|
-- what the last char was.
|
|
while J < Str_Len and Element (Saved_Line, J) /= ACL.LF loop
|
|
J := J + 1;
|
|
end loop;
|
|
if J = Str_Len then
|
|
Res := (ID => Ignored_Tok);
|
|
else
|
|
Char_To_Read := J + 1;
|
|
-- was: Res := Get_Token;
|
|
goto Tail_Call_Opt;
|
|
end if;
|
|
|
|
when others => -- an atom
|
|
|
|
while J <= Str_Len and then
|
|
not Ada.Strings.Maps.Is_In (Element (Saved_Line, J), Terminator_Syms) loop
|
|
J := J + 1;
|
|
end loop;
|
|
|
|
-- Either we ran out of string or
|
|
-- the one at J was the start of a new token
|
|
Char_To_Read := J;
|
|
J := J - 1;
|
|
|
|
declare
|
|
Dots : Natural;
|
|
All_Digits : Boolean;
|
|
begin
|
|
-- check if all digits or .
|
|
Dots := 0;
|
|
All_Digits := True;
|
|
for K in I .. J loop
|
|
if (K = I and K /= J) and then Element (Saved_Line, K) = '-' then
|
|
null;
|
|
elsif Element (Saved_Line, K) = '.' then
|
|
Dots := Dots + 1;
|
|
elsif not (Element (Saved_Line, K) in '0' .. '9') then
|
|
All_Digits := False;
|
|
exit;
|
|
end if;
|
|
end loop;
|
|
|
|
if All_Digits then
|
|
if Dots = 0 then
|
|
Res :=
|
|
(ID => Int_Tok,
|
|
Int_Val => Mal_Integer'Value (Slice (Saved_Line, I, J)));
|
|
elsif Dots = 1 then
|
|
Res :=
|
|
(ID => Float_Tok,
|
|
Float_Val => Mal_Float'Value (Slice (Saved_Line, I, J)));
|
|
else
|
|
Res := (ID => Sym_Tok, Start_Char => I, Stop_Char => J);
|
|
end if;
|
|
else
|
|
Res := (ID => Sym_Tok, Start_Char => I, Stop_Char => J);
|
|
end if;
|
|
|
|
end;
|
|
|
|
end case;
|
|
|
|
return Res;
|
|
|
|
end Get_Token;
|
|
|
|
|
|
function Read_List (LT : Types.List_Types)
|
|
return Types.Mal_Handle is
|
|
|
|
MTA : Mal_Handle;
|
|
|
|
begin
|
|
|
|
MTA := Read_Form;
|
|
|
|
declare
|
|
List_SP : Mal_Handle;
|
|
List_P : List_Class_Ptr;
|
|
Close : String (1..1) := (1 => Types.Closing (LT));
|
|
begin
|
|
|
|
case LT is
|
|
when List_List => List_SP := New_List_Mal_Type (List_Type => LT);
|
|
when Vector_List => List_SP := Vector.New_Vector_Mal_Type;
|
|
when Hashed_List => List_SP := Hash_Map.New_Hash_Map_Mal_Type;
|
|
end case;
|
|
|
|
-- Need to append to a variable so...
|
|
List_P := Deref_List_Class (List_SP);
|
|
|
|
loop
|
|
|
|
if Is_Null (MTA) then
|
|
return New_Error_Mal_Type (Str => "expected '" & Close & "', got EOF");
|
|
end if;
|
|
|
|
exit when Deref (MTA).Sym_Type = Sym and then
|
|
Symbol_Mal_Type (Deref (MTA).all).Get_Sym = Close;
|
|
|
|
Append (List_P.all, MTA);
|
|
|
|
MTA := Read_Form;
|
|
|
|
end loop;
|
|
|
|
return List_SP;
|
|
|
|
end;
|
|
|
|
end Read_List;
|
|
|
|
|
|
function Read_Form return Types.Mal_Handle is
|
|
Tok : Token;
|
|
MTS : Mal_Handle;
|
|
use Ada.Strings.Unbounded;
|
|
begin
|
|
|
|
Tok := Get_Token;
|
|
|
|
case Tok.ID is
|
|
|
|
when Ignored_Tok => return Smart_Pointers.Null_Smart_Pointer;
|
|
|
|
when Int_Tok => return New_Int_Mal_Type (Tok.Int_Val);
|
|
|
|
when Float_Tok => return New_Float_Mal_Type (Tok.Float_Val);
|
|
|
|
when Start_List_Tok => return Read_List (List_List);
|
|
|
|
when Start_Vector_Tok => return Read_List (Vector_List);
|
|
|
|
when Start_Hash_Tok => return Read_List (Hashed_List);
|
|
|
|
when Meta_Tok =>
|
|
|
|
declare
|
|
Meta, Obj : Mal_Handle;
|
|
begin
|
|
Meta := Read_Form;
|
|
Obj := Read_Form;
|
|
return Make_New_List
|
|
((1 => New_Symbol_Mal_Type ("with-meta"),
|
|
2 => Obj,
|
|
3 => Meta));
|
|
end;
|
|
|
|
when Deref_Tok =>
|
|
|
|
return Make_New_List
|
|
((1 => New_Symbol_Mal_Type ("deref"),
|
|
2 => Read_Form));
|
|
|
|
when Quote_Tok =>
|
|
|
|
return Make_New_List
|
|
((1 => New_Symbol_Mal_Type ("quote"),
|
|
2 => Read_Form));
|
|
|
|
when Quasi_Quote_Tok =>
|
|
|
|
return Make_New_List
|
|
((1 => New_Symbol_Mal_Type ("quasiquote"),
|
|
2 => Read_Form));
|
|
|
|
when Splice_Unq_Tok =>
|
|
|
|
return Make_New_List
|
|
((1 => New_Symbol_Mal_Type ("splice-unquote"),
|
|
2 => Read_Form));
|
|
|
|
when Unquote_Tok =>
|
|
|
|
return Make_New_List
|
|
((1 => New_Symbol_Mal_Type ("unquote"),
|
|
2 => Read_Form));
|
|
|
|
when Str_Tok =>
|
|
|
|
-- +/-1 strips out the double quotes.
|
|
-- Convert_String converts backquoted charaters to raw format.
|
|
return New_String_Mal_Type
|
|
(Convert_String
|
|
(Slice (Saved_Line, Tok.Start_Char + 1, Tok.Stop_Char - 1)));
|
|
|
|
when Sym_Tok =>
|
|
|
|
-- Mal interpreter is required to know about true, false and nil.
|
|
declare
|
|
S : String := Slice (Saved_Line, Tok.Start_Char, Tok.Stop_Char);
|
|
begin
|
|
if S = "true" then
|
|
return New_Bool_Mal_Type (True);
|
|
elsif S = "false" then
|
|
return New_Bool_Mal_Type (False);
|
|
elsif S = "nil" then
|
|
return New_Nil_Mal_Type;
|
|
else
|
|
return New_Symbol_Mal_Type (S);
|
|
end if;
|
|
end;
|
|
|
|
end case;
|
|
|
|
end Read_Form;
|
|
|
|
|
|
procedure Lex_Init (S : String) is
|
|
begin
|
|
Str_Len := S'Length;
|
|
Saved_Line := Ada.Strings.Unbounded.To_Unbounded_String (S);
|
|
Char_To_Read := 1;
|
|
end Lex_Init;
|
|
|
|
|
|
function Read_Str (S : String) return Types.Mal_Handle is
|
|
I, Str_Len : Natural := S'Length;
|
|
begin
|
|
|
|
Lex_Init (S);
|
|
|
|
return Read_Form;
|
|
|
|
exception
|
|
when String_Error =>
|
|
return New_Error_Mal_Type (Str => "expected '""', got EOF");
|
|
end Read_Str;
|
|
|
|
|
|
end Reader;
|