mirror of
https://github.com/kanaka/mal.git
synced 2024-10-27 14:52:16 +03:00
8a19f60386
- Reorder README to have implementation list after "learning tool" bullet. - This also moves tests/ and libs/ into impls. It would be preferrable to have these directories at the top level. However, this causes difficulties with the wasm implementations which need pre-open directories and have trouble with paths starting with "../../". So in lieu of that, symlink those directories to the top-level. - Move the run_argv_test.sh script into the tests directory for general hygiene.
342 lines
10 KiB
Zig
342 lines
10 KiB
Zig
const fmt = @import("std").fmt;
|
|
const warn = @import("std").debug.warn;
|
|
|
|
pub const pcre = @cImport({
|
|
@cInclude("pcre.h");
|
|
});
|
|
|
|
const MalType = @import("types.zig").MalType;
|
|
const MalData = @import("types.zig").MalData;
|
|
const MalTypeValue = @import("types.zig").MalTypeValue;
|
|
const MalError = @import("error.zig").MalError;
|
|
const MalLinkedList = @import("linked_list.zig").MalLinkedList;
|
|
const printer = @import("printer.zig");
|
|
|
|
const Allocator = @import("std").heap.c_allocator;
|
|
const string_eql = @import("utils.zig").string_eql;
|
|
const linked_list = @import("linked_list.zig");
|
|
|
|
const match: [*]const u8 =
|
|
c\\[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('"`,;)]*)
|
|
;
|
|
var error_msg: [*c]const u8 = undefined;
|
|
var erroroffset: c_int = 0;
|
|
var re: ?*pcre.pcre = null;
|
|
|
|
const Reader = struct {
|
|
position: u32,
|
|
string: [] const u8,
|
|
tokens: [] usize,
|
|
|
|
pub fn init(string: [] const u8, tokens: [] usize) Reader {
|
|
return Reader {
|
|
.position = 0,
|
|
.string = string,
|
|
.tokens = tokens,
|
|
};
|
|
}
|
|
|
|
pub fn next(self: *Reader) []const u8 {
|
|
const this_token = self.peek();
|
|
self.position += 1;
|
|
return this_token;
|
|
}
|
|
|
|
pub fn peek(self: *Reader) []const u8 {
|
|
while(!self.eol()) {
|
|
const start = self.tokens[2*self.position];
|
|
const end = self.tokens[2*self.position+1];
|
|
if(self.string[start] == ';') {
|
|
self.position += 1;
|
|
continue;
|
|
}
|
|
return self.string[start..end];
|
|
}
|
|
return "";
|
|
}
|
|
|
|
pub fn eol(self: *Reader) bool {
|
|
return (2 * self.position >= self.tokens.len);
|
|
}
|
|
};
|
|
|
|
const AliasPair = struct {
|
|
name: []const u8,
|
|
value: []const u8,
|
|
count: u8,
|
|
};
|
|
|
|
const alias_pairs = [_] AliasPair {
|
|
AliasPair {.name="@", .value="deref", .count=1},
|
|
AliasPair {.name="\'", .value="quote", .count=1},
|
|
AliasPair {.name="`", .value="quasiquote", .count=1},
|
|
AliasPair {.name="~", .value="unquote", .count=1},
|
|
AliasPair {.name="~@", .value="splice-unquote", .count=1},
|
|
AliasPair {.name="^", .value="with-meta", .count=2},
|
|
};
|
|
|
|
pub fn read_form(reader: *Reader) MalError!?*MalType {
|
|
if(reader.eol()) {
|
|
return null;
|
|
}
|
|
const token = reader.peek();
|
|
if(token.len == 0) {
|
|
return MalType.new_nil(Allocator);
|
|
}
|
|
if(token[0] == '(') {
|
|
return try read_list(reader);
|
|
}
|
|
else if(token[0] == '[') {
|
|
return try read_vector(reader);
|
|
}
|
|
else if(token[0] == ':') {
|
|
const keyword = reader.next();
|
|
return MalType.new_keyword(Allocator, keyword[1..keyword.len]);
|
|
}
|
|
else if(token[0] == '{') {
|
|
return try read_hashmap(reader);
|
|
}
|
|
|
|
for(alias_pairs) |pair| {
|
|
const name = pair.name;
|
|
const value = pair.value;
|
|
const count = pair.count;
|
|
if(!string_eql(token, name)) {
|
|
continue;
|
|
}
|
|
var new_ll = MalLinkedList.init(Allocator);
|
|
const new_generic = try MalType.new_generic(Allocator, value);
|
|
const tmp = reader.next();
|
|
var num_read: u8 = 0;
|
|
while(num_read < count) {
|
|
const next_read = (try read_form(reader)) orelse return MalError.ArgError;
|
|
try linked_list.prepend_mal(Allocator, &new_ll, next_read);
|
|
num_read += 1;
|
|
}
|
|
try linked_list.prepend_mal(Allocator, &new_ll, new_generic);
|
|
const new_list = try MalType.new_nil(Allocator);
|
|
new_list.data = MalData {.List = new_ll};
|
|
return new_list;
|
|
}
|
|
|
|
return try read_atom(reader);
|
|
}
|
|
|
|
pub fn read_list(reader: *Reader) MalError!*MalType {
|
|
const first_token = reader.next();
|
|
var new_ll = MalLinkedList.init(Allocator);
|
|
const mal_list: *MalType = try MalType.new_nil(Allocator);
|
|
|
|
while(!reader.eol()) {
|
|
var next_token = reader.peek();
|
|
|
|
if(next_token.len == 0) {
|
|
return MalError.ReaderUnmatchedParen;
|
|
}
|
|
if(next_token[0] == ')') {
|
|
const right_paren = reader.next();
|
|
mal_list.data = MalData{.List = new_ll};
|
|
return mal_list;
|
|
}
|
|
const mal = (try read_form(reader)) orelse return MalError.ArgError;
|
|
try linked_list.append_mal(Allocator, &new_ll, mal);
|
|
}
|
|
return MalError.ReaderUnmatchedParen;
|
|
}
|
|
|
|
pub fn read_vector(reader: *Reader) MalError!*MalType {
|
|
const first_token = reader.next();
|
|
var new_ll = MalLinkedList.init(Allocator);
|
|
const mal_list: *MalType = try MalType.new_nil(Allocator);
|
|
|
|
while(!reader.eol()) {
|
|
var next_token = reader.peek();
|
|
|
|
if(next_token.len == 0) {
|
|
return MalError.ReaderUnmatchedParen;
|
|
}
|
|
if(next_token[0] == ']') {
|
|
const right_paren = reader.next();
|
|
mal_list.data = MalData{.Vector = new_ll};
|
|
return mal_list;
|
|
}
|
|
const mal = (try read_form(reader)) orelse return MalError.ArgError;
|
|
try linked_list.append_mal(Allocator, &new_ll, mal);
|
|
}
|
|
return MalError.ReaderUnmatchedParen;
|
|
}
|
|
|
|
|
|
pub fn read_hashmap(reader: *Reader) MalError!*MalType {
|
|
const first_token = reader.next();
|
|
const new_hashmap = try MalType.new_hashmap(Allocator);
|
|
while(!reader.eol()) {
|
|
var next_token = reader.peek();
|
|
|
|
if(next_token.len == 0) {
|
|
return MalError.ReaderUnmatchedParen;
|
|
}
|
|
if(next_token[0] == '}') {
|
|
const right_paren = reader.next();
|
|
return new_hashmap;
|
|
}
|
|
const mal = (try read_form(reader)) orelse return MalError.ArgError;
|
|
const key = switch(mal.data) {
|
|
.String => |s| s,
|
|
.Keyword => |kwd| kwd,
|
|
else => return MalError.TypeError,
|
|
};
|
|
if(next_token.len == 0 or next_token[0] == '}') {
|
|
return MalError.ReaderBadHashmap;
|
|
}
|
|
const val = (try read_form(reader)) orelse return MalError.ArgError;
|
|
try new_hashmap.hashmap_insert(key, val);
|
|
}
|
|
return MalError.ReaderUnmatchedParen;
|
|
}
|
|
|
|
fn char_is_int(c: u8) bool {
|
|
return (c >= '0' and c <= '9');
|
|
}
|
|
|
|
fn token_is_int(token: []const u8) bool {
|
|
if(char_is_int(token[0]))
|
|
return true;
|
|
if(token.len >= 2 and token[0] == '-' and char_is_int(token[1]))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
pub fn read_atom(reader: *Reader) MalError!*MalType {
|
|
const token = reader.next();
|
|
|
|
if(token_is_int(token)) {
|
|
var mal_atom = try MalType.new_nil(Allocator);
|
|
try read_atom_int(mal_atom, token);
|
|
return mal_atom;
|
|
}
|
|
else if(string_eql(token, "nil")) {
|
|
return MalType.new_nil(Allocator);
|
|
}
|
|
else if(string_eql(token, "true")) {
|
|
return MalType.new_bool(Allocator, true);
|
|
}
|
|
else if(string_eql(token, "false")) {
|
|
return MalType.new_bool(Allocator, false);
|
|
}
|
|
else if(token[0] == '"') {
|
|
var mal_atom = try MalType.new_nil(Allocator);
|
|
try read_atom_string(mal_atom, token);
|
|
return mal_atom;
|
|
}
|
|
else {
|
|
var mal_atom = try MalType.new_generic(Allocator, token);
|
|
return mal_atom;
|
|
}
|
|
}
|
|
|
|
fn read_atom_int(mal_atom: *MalType, token: []const u8) MalError!void {
|
|
// TODO: extract int type from union
|
|
mal_atom.data = MalData {.Int = fmt.parseInt(i32, token, 10)
|
|
catch |err| return MalError.SystemError };
|
|
}
|
|
|
|
fn read_atom_string(mal_atom: *MalType, token: []const u8) MalError!void {
|
|
const n = token.len;
|
|
if(token[0] != '"' or token[n-1] != '"' or n <= 1) {
|
|
return MalError.ReaderUnmatchedString;
|
|
}
|
|
|
|
if(n <= 2) {
|
|
// We get here when the token is an empty string.
|
|
// We encode this as MalTypeValue.String, with null .string_value
|
|
var string = Allocator.alloc(u8, 0) catch return MalError.SystemError;
|
|
mal_atom.data = MalData {.String = string};
|
|
return;
|
|
}
|
|
|
|
var tmp_buffer = Allocator.alloc(u8, n-2) catch return MalError.SystemError;
|
|
defer Allocator.free(tmp_buffer);
|
|
var i: usize = 1;
|
|
var j: usize = 0;
|
|
const escape_char: u8 = '\\'; //TODO: remove this comment required by bad emacs config '
|
|
while(i < n-1) {
|
|
if(token[i] != escape_char) {
|
|
tmp_buffer[j] = token[i];
|
|
j += 1;
|
|
i += 1;
|
|
}
|
|
else {
|
|
if(i==n-2) {
|
|
return MalError.ReaderUnmatchedString;
|
|
}
|
|
if(token[i+1] == 'n') {
|
|
tmp_buffer[j] = '\n';
|
|
} else {
|
|
tmp_buffer[j] = token[i+1];
|
|
}
|
|
j += 1;
|
|
i += 2;
|
|
}
|
|
}
|
|
|
|
var string = Allocator.alloc(u8, j) catch return MalError.SystemError;
|
|
i = 0;
|
|
while(i < j) {
|
|
string[i] = tmp_buffer[i];
|
|
i += 1;
|
|
}
|
|
|
|
mal_atom.data = MalData {.String = string};
|
|
}
|
|
|
|
pub fn read_str(string: [] const u8) MalError!Reader {
|
|
if(re == null) {
|
|
re = pcre.pcre_compile(&match[0], 0, &error_msg, &erroroffset, 0);
|
|
}
|
|
const tokens = try tokenize(re, string);
|
|
return Reader.init(string, tokens);
|
|
}
|
|
|
|
// Allocates an array of matches. Caller is becomes owner of memory.
|
|
pub fn tokenize(regex: ?*pcre.pcre, string: [] const u8) MalError![] usize {
|
|
// TODO: pass in allocator
|
|
const buffer_size: usize = 3 * string.len + 10;
|
|
var indices: [] c_int = Allocator.alloc(c_int, buffer_size)
|
|
catch return MalError.SystemError;
|
|
defer Allocator.free(indices);
|
|
var match_buffer: [] usize = Allocator.alloc(usize, buffer_size)
|
|
catch return MalError.SystemError;
|
|
defer Allocator.free(match_buffer);
|
|
var current_match: usize = 0;
|
|
var start_pos: c_int = 0;
|
|
|
|
var rc: c_int = 0;
|
|
var start_match: usize = 0;
|
|
var end_match: usize = 0;
|
|
const subject_size: c_int = @intCast(c_int, string.len);
|
|
|
|
while(start_pos < subject_size) {
|
|
rc = pcre.pcre_exec(regex, 0, &string[0], subject_size, start_pos, 0,
|
|
&indices[0], @intCast(c_int,buffer_size));
|
|
if(rc <= 0)
|
|
break;
|
|
start_pos = indices[1];
|
|
start_match = @intCast(usize, indices[2]);
|
|
end_match = @intCast(usize, indices[3]);
|
|
match_buffer[current_match] = start_match;
|
|
match_buffer[current_match+1] = end_match;
|
|
current_match += 2;
|
|
}
|
|
|
|
var matches: [] usize = Allocator.alloc(usize, current_match)
|
|
catch return MalError.SystemError;
|
|
var i: usize = 0;
|
|
while(i < current_match) {
|
|
matches[i] = match_buffer[i];
|
|
i += 1;
|
|
}
|
|
|
|
return matches;
|
|
}
|