diff --git a/grammar.md b/grammar.md new file mode 100644 index 00000000..dc68a2e2 --- /dev/null +++ b/grammar.md @@ -0,0 +1,137 @@ +# Lexical Grammar: + +``` +whitespace: + \t | \r | ' ' | \f | \v + +line_terminator: + \n + +ident_letter_start: + [a-zA-Z] | '_' + +ident_letter: + ident_letter_start | '$' + +ident_atom: + ident_letter+ + +ident_atom_start: + ident_letter_start ident_letter+ + +ident: + ident_atom_start ('.' ident_atom)* + +string: + '"' string_item '"' + +string_item: + string_char | string_slash + +string_char: + [^('"' | '\'] + +string_slash: + '\' ('\' | ''' | '"' | "n" | "t" + | "0" | "x" hex_char hex_char + | "u" hex_char hex_char hex_char hex_char) + +hex_char: + [0-9a-fA-F] + +bin_char: + '0' | '1' + +octal_char: + [0-7] + +num: + [0-9]+ | '0x' hex_char+ | '0b' bin_char+ | '0o' octal_char+ + +chr: + '\'' string_item '\'' + +doc: + '//' [^(\n | eof)]* + '///' [^(\n | eof)]* + '/*' [^(*/)] '*/' + +symbols: + '(' | ')' | '{' | '}' | '[' | ']' | + '=' | ':' | ';' | '=>' | '$' | ',' | + '+' | '-' | '\' | '*' | '>' | '<' | + '<=' | '>=' | '==' | '!=' | '>>' | '<<' + +float: + num+ '.' num+ + +keyword: + 'do' | 'if' | 'else' | 'match' | 'open' | 'ask' | 'let' + +token: + doc | symbol | keyword | chr | ident | string_item | num | float +``` + +# How the Auto semicolon insert works: +It works by adding semicolon when sequence of newlines are detected after +some of the tokens +- '=' +- 'let' +- 'ask' + +# Syntax + +``` +Atom ::= ident ; Variable + | num ; Integer literal + | float ; Float literal + | string ; String literal + | hlp ; 'Help' marker + | chr ; Character + | '[' Expr* ']' ; Array without commas + | '[' Expr (',' Expr)* ] ; Array with commas + | '$' Atom Atom ; Sigma type constructor + | '(' Expr ',' Expr ')' ; Tuple + | '(' Expr '::' Expr ')' ; Type Annotation + +Call ::= Atom ' ' Call ; Call + +Arrow ::= Call -> Expr ; Arrow + | Call ; Call + +Sttm ::= ask Expr ';' ; Monadic bind statement without assignment + | ask Ident '=' Expr ';' ; Moandic bind statement with assingment + | return ';' ; Monadic return + | Expr ';' ; Just executes an expression + +Match ::= 'match' ident ident ('=' Expr)? '{' (ident '=>' Expr) '}' + +Expr ::= + | ident '=>' Expr ; Lambda + | let Ident '=' Expr ';' Expr ; Variable binding + | if Expr { Expr } else { Expr } ; If/else statement + | Match ; Dependent eliminator for sum types + | Open ; Dependent eliminator for record types + | do '{' Sttm* '}' ; Do notation + | '[' ident ':' Expr ']' -> Expr ; Sigma type + | '(' ident ':' Expr ')' -> Expr ; Pi type + | '(' Op Expr Expr ')' ; Binary operation + | '(' Expr ')' ; Duplicated because it's easier to treat it here. + | ## ident '/' ident ; Substitution + +Pat ::= num | ident | string | (ident pat*) + +Rule ::= Pat* '=' Expr + +Impl := '(' Ident ':' Expr ')' + | '<' Ident ':' Expr '>' + | '<' Ident '>' + +Binding := '(' Ident ':' Expr ')' + | '<' Ident ':' Expr '>' + +Entry ::= ident Binding* ':' Expr Semi + ident Rule + | ident Binding* ':' _ '{' Expr '}' + +``` \ No newline at end of file diff --git a/src/kind-parser/Cargo.toml b/src/kind-parser/Cargo.toml index cde4011e..aef34994 100644 --- a/src/kind-parser/Cargo.toml +++ b/src/kind-parser/Cargo.toml @@ -6,3 +6,5 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +kind-span = { path = "../kind-span" } +kind-tree = { path = "../kind-tree" } \ No newline at end of file diff --git a/src/kind-parser/src/errors.rs b/src/kind-parser/src/errors.rs new file mode 100644 index 00000000..517c2c77 --- /dev/null +++ b/src/kind-parser/src/errors.rs @@ -0,0 +1,21 @@ +use kind_span::Span; + +use crate::lexer::tokens::Token; + +#[derive(Debug, Clone)] +pub enum EncodeSequence { + Hexa, + Octal, + Binary, + Unicode, +} + +#[derive(Debug, Clone)] +pub enum SyntaxError { + UnfinishedString(Span), + UnfinishedComment(Span), + InvalidEscapeSequence(EncodeSequence, Span), + InvalidNumberRepresentation(EncodeSequence, Span), + UnexpectedChar(char, Span), + UnexpectedToken(Token, Span, Option) +} diff --git a/src/kind-parser/src/expr.rs b/src/kind-parser/src/expr.rs new file mode 100644 index 00000000..e7dcd71a --- /dev/null +++ b/src/kind-parser/src/expr.rs @@ -0,0 +1,48 @@ +use kind_tree::expr::{Expr, ExprKind}; +use kind_tree::symbol::{Ident, Symbol}; + +use crate::errors::SyntaxError; +use crate::lexer::tokens::Token; +use crate::state::Parser; + +impl<'a> Parser<'a> { + fn parse_lambda(&mut self, name: String) -> Result, SyntaxError> { + // We are assuming that it came from parse_atom. + // so we just remove the argument and "=>" + let name_span = self.advance().1; + self.advance(); + + let expr = self.parse_expr()?; + let end_range = expr.span; + + let ident = Ident::new(Symbol(name), self.ctx, name_span); + + Ok(Box::new(Expr { + data: ExprKind::Lambda(ident, expr), + span: name_span.mix(end_range), + })) + } + + fn parse_atom(&mut self) -> Result, SyntaxError> { + todo!() + } + + pub fn parse_expr(&mut self) -> Result, SyntaxError> { + // Simply matching both on the current and the next one. + // it's useful for a lot of tokens. + match (self.get().clone(), self.peek()) { + (Token::Id(name), Token::FatArrow) => self.parse_lambda(name), + (Token::LPar, Token::Id(name)) => { + let start = self.advance().1; + let res = match self.peek() { + _ => todo!() + }; + self.eat_variant(&Token::LPar)?; + todo!() + } + (Token::LBracket, Token::Id(name)) => todo!(), + (Token::LPar, Token::Id(name)) => todo!(), + _ => todo!(), + } + } +} diff --git a/src/kind-parser/src/lexer/comments.rs b/src/kind-parser/src/lexer/comments.rs new file mode 100644 index 00000000..8b662f71 --- /dev/null +++ b/src/kind-parser/src/lexer/comments.rs @@ -0,0 +1,65 @@ +use kind_span::Span; + +use crate::errors::SyntaxError; +use crate::lexer::tokens::Token; +use crate::Lexer; + +impl<'a> Lexer<'a> { + /// Single line comments + pub fn lex_comment(&mut self, start: usize) -> (Token, Span) { + self.next_char(); + let mut is_doc = false; + if let Some('/') = self.peekable.peek() { + self.next_char(); + is_doc = true; + } + let cmt = self.accumulate_while(&|x| x != '\n'); + (Token::Comment(is_doc, cmt.to_string()), self.mk_span(start)) + } + + /// Parses multi line comments with nested comments + /// really useful + pub fn lex_multiline_comment(&mut self, start: usize) -> (Token, Span) { + let mut size = 0; + self.next_char(); + + let mut next = | p: &mut Lexer<'a>, x: char | { + size += x.len_utf8(); + p.peekable.next(); + p.adv_col(x); + }; + + self.comment_depth += 1; + + while let Some(&x) = self.peekable.peek() { + match x { + '*' => { + next(self, x); + if let Some('/') = self.peekable.peek() { + self.comment_depth -= 1; + if self.comment_depth == 0 { + next(self, '/'); + break; + } + } + } + '/' => { + next(self, x); + if let Some('*') = self.peekable.peek() { + self.comment_depth += 1; + } + } + _ => (), + } + next(self, x); + } + self.pos += size; + if self.comment_depth != 0 { + (Token::Error(Box::new(SyntaxError::UnfinishedComment(self.mk_span(start)))), self.mk_span(start)) + } else { + let str = &self.input[..size - 2]; + self.input = &self.input[size..]; + (Token::Comment(false, str.to_string()), self.mk_span(start)) + } + } +} diff --git a/src/kind-parser/src/lexer/literals.rs b/src/kind-parser/src/lexer/literals.rs new file mode 100644 index 00000000..f5b1cca4 --- /dev/null +++ b/src/kind-parser/src/lexer/literals.rs @@ -0,0 +1,118 @@ +use kind_span::Span; + +use crate::errors::{EncodeSequence, SyntaxError}; +use crate::lexer::tokens::Token; +use crate::Lexer; + +impl<'a> Lexer<'a> { + /// Lex a sequence of digits of the base @base@ with + /// maximum length of @size@ and turns it into a char. + fn lex_char_encoded(&mut self, start: usize, size: usize, base: u32, err: EncodeSequence) -> Result { + let string = self.next_chars(size); + let to_chr = string.and_then(|x| u32::from_str_radix(x, base).ok()); + if let Some(chr) = to_chr.and_then(char::from_u32) { + return Ok(chr); + } + Err(SyntaxError::InvalidEscapeSequence(err, self.mk_span(start))) + } + + /// Turns a escaped char into a normal char. + fn lex_escaped_char(&mut self, start: usize) -> Result { + match self.peekable.peek() { + None => Err(SyntaxError::UnfinishedString(self.mk_span(start))), + Some(&x) => { + self.next_char(); + match x { + '\'' => Ok('\''), + '\"' => Ok('\"'), + 'n' => Ok('\n'), + 'r' => Ok('\r'), + 't' => Ok('\t'), + '0' => Ok('\0'), + '\\' => Ok('\\'), + 'x' => self.lex_char_encoded(start, 2, 16, EncodeSequence::Hexa), + 'u' => self.lex_char_encoded(start, 4, 16, EncodeSequence::Unicode), + other => Ok(other), + } + } + } + } + + /// Lex a base-10 digit. + fn lex_digit(&mut self, start: usize) -> (Token, Span) { + let num = self.accumulate_while(&|x| x.is_ascii_digit()); + (Token::Num(num.parse::().unwrap()), self.mk_span(start)) + } + + /// Lexes a number of base @base@ removing the first + /// character that indicates the encoding + fn lex_base(&mut self, start: usize, base: u32, err: EncodeSequence) -> (Token, Span) { + self.next_char(); + let num = self.accumulate_while(&|x| x.is_digit(base)); + if let Ok(res) = u64::from_str_radix(num, base) { + (Token::Num(res), self.mk_span(start)) + } else { + (Token::Error(Box::new(SyntaxError::InvalidNumberRepresentation(err, self.mk_span(start)))), self.mk_span(start)) + } + } + + /// Lex numbers with decimal, hexadecimal, binary or octal. + pub fn lex_number(&mut self) -> (Token, Span) { + let start = self.pos; + match self.peekable.peek() { + None => (Token::Eof, self.mk_span(start)), + Some('0') => { + self.next_char(); + match self.peekable.peek() { + Some('x') => self.lex_base(start, 16, EncodeSequence::Hexa), + Some('o') => self.lex_base(start, 8, EncodeSequence::Octal), + Some('b') => self.lex_base(start, 2, EncodeSequence::Binary), + Some('0'..='9') => self.lex_digit(start), + Some(_) => (Token::Num(0), self.mk_span(start)), + None => (Token::Num(0), self.mk_span(start)), + } + } + Some('0'..='9') => self.lex_digit(start), + Some(_) => (Token::Num(0), self.mk_span(start)), + } + } + + /// Lexes a string that starts with '"' and ends with the + /// same char. each string item can contain a escaped char + /// and if the esaped char is not well-formed then it will + /// acummulate the error until the end of the string. + /// TODO: Accumulate multiple encoding errors? + pub fn lex_string(&mut self) -> (Token, Span) { + let start = self.pos; + + self.next_char(); + + let mut string = String::new(); + let mut error: Option<(Token, Span)> = None; + + while let Some(&x) = self.peekable.peek() { + let chr_start = self.pos; + match x { + '\"' => break, + '\\' => { + self.next_char(); + match self.lex_escaped_char(chr_start) { + Ok(x) => string.push(x), + Err(t) => { + self.accumulate_while(&|x| x != '"'); + error = Some((Token::Error(Box::new(t)), self.mk_span(start))); + } + } + } + x => string.push(x), + } + self.next_char(); + } + + match (self.next_char(), error) { + (_, Some(err)) => err, + (Some('"'), _) => (Token::Str(string), self.mk_span(start)), + _ => (Token::Error(Box::new(SyntaxError::UnfinishedString(self.mk_span(start)))), self.mk_span(start)), + } + } +} diff --git a/src/kind-parser/src/lexer/mod.rs b/src/kind-parser/src/lexer/mod.rs new file mode 100644 index 00000000..026ce18a --- /dev/null +++ b/src/kind-parser/src/lexer/mod.rs @@ -0,0 +1,144 @@ +use kind_span::Span; + +use crate::{errors::SyntaxError}; + +use self::{state::Lexer, tokens::Token}; + +pub mod literals; +pub mod state; +pub mod comments; +pub mod tokens; + +fn is_whitespace(chr: char) -> bool { + matches!(chr, ' ' | '\r' | '\t') +} + +fn is_valid_id(chr: char) -> bool { + chr.is_alphanumeric() || matches!(chr, '_' | '$' | '.') +} + +fn is_valid_id_start(chr: char) -> bool { + chr.is_alphabetic() || matches!(chr, '_') +} + +impl<'a> Lexer<'a> { + pub fn single_token(&mut self, token: Token) -> (Token, Span) { + let start = self.pos; + self.next_char(); + (token, self.mk_span(start)) + } + + pub fn is_breakline(&mut self) -> bool { + self.accumulate_while(&is_whitespace); + let count = self.accumulate_while(&|x| x == '\n').len(); + count > 0 + } + + pub fn to_keyword(str: &str) -> Token { + match str { + "ask" => Token::Ask, + "do" => Token::Do, + "if" => Token::If, + "else" => Token::Else, + "match" => Token::Match, + "let" => Token::Let, + "open" => Token::Open, + _ => Token::Id(str.to_string()) + } + } + + pub fn get_next_no_error(&mut self, vec: &mut Vec>) -> (Token, Span) { + loop { + let (token, span) = self.lex_token(); + match token { + Token::Error(x) => { + vec.push(x); + continue + }, + _ => () + } + return (token, span) + } + } + + pub fn lex_token(&mut self) -> (Token, Span) { + let start = self.pos; + match self.peekable.peek() { + None => (Token::Eof, self.mk_span(start)), + Some(chr) => match chr { + c if is_whitespace(*c) => { + self.accumulate_while(&is_whitespace); + self.lex_next() + } + '\n' => { + self.accumulate_while(&|x| x == '\n' || x == '\r'); + if self.semis > 0 { + self.semis -= 1; + (Token::Semi, self.mk_span(start)) + } else { + self.lex_next() + } + } + c if c.is_ascii_digit() => self.lex_number(), + c if is_valid_id_start(*c) => { + let str = self.accumulate_while(&is_valid_id); + (Lexer::to_keyword(str), self.mk_span(start)) + } + '(' => self.single_token(Token::LPar), + ')' => self.single_token(Token::RPar), + '[' => self.single_token(Token::LBracket), + ']' => self.single_token(Token::RBracket), + '{' => self.single_token(Token::LBrace), + '}' => self.single_token(Token::RBrace), + '=' => { + self.next_char(); + match self.peekable.peek() { + Some('>') => self.single_token(Token::FatArrow), + Some('=') => self.single_token(Token::EqEq), + _ => (Token::Eq, self.mk_span(start)), + } + } + '>' => { + self.next_char(); + match self.peekable.peek() { + Some('>') => self.single_token(Token::GreaterGreater), + Some('=') => self.single_token(Token::GreaterEq), + _ => (Token::Greater, self.mk_span(start)), + } + } + '<' => { + self.next_char(); + match self.peekable.peek() { + Some('<') => self.single_token(Token::LessLess), + Some('=') => self.single_token(Token::LessEq), + _ => (Token::Less, self.mk_span(start)), + } + } + '/' => { + self.next_char(); + match self.peekable.peek() { + Some('/') => self.lex_comment(start), + Some('*') => self.lex_multiline_comment(start), + _ => (Token::Slash, self.mk_span(start)), + } + } + ':' => self.single_token(Token::Colon), + ';' => self.single_token(Token::Semi), + '$' => self.single_token(Token::Dollar), + ',' => self.single_token(Token::Comma), + '+' => self.single_token(Token::Plus), + '-' => self.single_token(Token::Minus), + '*' => self.single_token(Token::Star), + '%' => self.single_token(Token::Percent), + '&' => self.single_token(Token::Ampersand), + '|' => self.single_token(Token::Bar), + '^' => self.single_token(Token::Hat), + '"' => self.lex_string(), + &c => { + self.next_char(); + (Token::Error(Box::new(SyntaxError::UnexpectedChar(c, self.mk_span(start)))), self.mk_span(start)) + } + }, + } + } +} diff --git a/src/kind-parser/src/lexer/state.rs b/src/kind-parser/src/lexer/state.rs new file mode 100644 index 00000000..1ab78ebc --- /dev/null +++ b/src/kind-parser/src/lexer/state.rs @@ -0,0 +1,92 @@ +use std::{iter::Peekable, str::Chars}; + +use kind_span::{Pos, Range, Span, SyntaxCtxIndex}; + +use crate::lexer::tokens::Token; + +/// The lexer state. +pub struct Lexer<'a> { + pub input: &'a str, + pub peekable: &'a mut Peekable>, + pub pos: usize, + pub ctx: SyntaxCtxIndex, + + // Modes + pub semis: u16, + pub comment_depth: u16, + pub column: u16, +} + +impl<'a> Lexer<'a> { + pub fn new(input: &'a str, peekable: &'a mut Peekable>, ctx: SyntaxCtxIndex) -> Lexer<'a> { + Lexer { + input, + pos: 0, + ctx, + peekable, + semis: 0, + comment_depth: 0, + column: 0, + } + } + + pub fn mk_span(&self, start: usize) -> Span { + Span::new(Range::new(Pos(start as u32), Pos(self.pos as u32), self.ctx)) + } + + #[inline] + pub fn adv_col(&mut self, chr: char) { + self.column = if chr == '\n' { 0 } else { self.column + 1 }; + } + + pub fn next_char(&mut self) -> Option { + match self.peekable.next() { + Some(chr) if !self.input.is_empty() => { + self.input = &self.input[chr.len_utf8()..]; + self.pos += chr.len_utf8(); + self.adv_col(chr); + Some(chr) + } + _ => None, + } + } + + pub fn accumulate_while(&mut self, condition: &dyn Fn(char) -> bool) -> &str { + let mut size = 0; + while let Some(&x) = self.peekable.peek() { + if !condition(x) { + break; + } + size += x.len_utf8(); + self.adv_col(x); + self.peekable.next(); + } + self.pos += size; + let str = &self.input[..size]; + self.input = &self.input[size..]; + str + } + + pub fn next_chars(&mut self, size: usize) -> Option<&str> { + let start = self.pos; + for _ in 0..size { + if let Some(&x) = self.peekable.peek() { + self.pos += x.len_utf8(); + self.peekable.next(); + self.adv_col(x); + } else { + return None; + } + } + let len = self.pos - start; + let str = &self.input[..len]; + self.input = &self.input[len..]; + Some(str) + } + + #[inline] + /// Useful as entrypoint + pub fn lex_next(&mut self) -> (Token, Span) { + self.lex_token() + } +} diff --git a/src/kind-parser/src/lexer/tokens.rs b/src/kind-parser/src/lexer/tokens.rs new file mode 100644 index 00000000..3788fafb --- /dev/null +++ b/src/kind-parser/src/lexer/tokens.rs @@ -0,0 +1,69 @@ +use crate::errors::SyntaxError; + +#[derive(Debug, Clone)] +pub enum Token { + LPar, // ( + RPar, // ) + LBracket, // [ + RBracket, // ] + LBrace, // { + RBrace, // } + Eq, // = + Colon, // : + Semi, // ; + FatArrow, // => + Dollar, // $ + Comma, // , + + Help(String), + Id(String), + + // Keywords + Do, + If, + Else, + Match, + Ask, + Let, + Open, + + // Literals + Char(char), + Str(String), + Num(u64), + Float(u64, u64), + Hole, + + // TO Interpolation + + // Operators + Plus, + Minus, + Star, + Slash, + Percent, + Ampersand, + Bar, + Hat, + GreaterGreater, + LessLess, + Less, + LessEq, + EqEq, + GreaterEq, + Greater, + NotEq, + + Comment(bool, String), + + Eof, + + // The error token that is useful to error recovery. + Error(Box), +} + +impl Token { + pub fn same_variant(&self, b: &Token) -> bool { + std::mem::discriminant(self) == std::mem::discriminant(b) + } +} diff --git a/src/kind-parser/src/lib.rs b/src/kind-parser/src/lib.rs index 7d12d9af..abb73910 100644 --- a/src/kind-parser/src/lib.rs +++ b/src/kind-parser/src/lib.rs @@ -1,14 +1,9 @@ -pub fn add(left: usize, right: usize) -> usize { - left + right -} +mod errors; -#[cfg(test)] -mod tests { - use super::*; +pub mod state; +pub mod expr; +pub mod top_level; - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); - } -} + +pub mod lexer; +pub use lexer::state::*; \ No newline at end of file diff --git a/src/kind-parser/src/macros.rs b/src/kind-parser/src/macros.rs new file mode 100644 index 00000000..dc8c0f57 --- /dev/null +++ b/src/kind-parser/src/macros.rs @@ -0,0 +1,23 @@ +macro_rules! match_single { + ($pattern:pat) => { + |x| match x { + $pattern => Some(()), + _ => None, + } + }; + + ($pattern:pat => $then:expr) => { + |x| match x { + $pattern => Some($then), + _ => None, + } + }; +} + +macro_rules! eat_single { + ($x:expr, $pattern:pat) => { $x.eat(match_single!($pattern)) }; + ($x:expr, $pattern:pat => $then:expr) => { $x.eat(match_single!($pattern => $then)) } +} + +pub(crate) use eat_single; +pub(crate) use match_single; \ No newline at end of file diff --git a/src/kind-parser/src/state.rs b/src/kind-parser/src/state.rs new file mode 100644 index 00000000..b3f0610a --- /dev/null +++ b/src/kind-parser/src/state.rs @@ -0,0 +1,79 @@ +use kind_span::{Span, SyntaxCtxIndex}; + +use crate::{lexer::tokens::Token, Lexer, errors::SyntaxError}; + +/// The parser state. it current have some parameters +/// that makes the behaviour change +/// - eaten: It counts how much tokens it has eaten +/// it's useful to all of the rules that use "try_local" +/// and similar functions +pub struct Parser<'a> { + pub lexer: Lexer<'a>, + pub current: (Token, Span), + pub next: (Token, Span), + pub errs: Vec>, + pub eaten: u32, + pub ctx: SyntaxCtxIndex +} + +impl<'a> Parser<'a> { + pub fn new(mut lexer: Lexer<'a>, ctx: SyntaxCtxIndex) -> Parser<'a> { + let mut errs = Vec::new(); + let current = lexer.get_next_no_error(&mut errs); + let next = lexer.get_next_no_error(&mut errs); + Parser { lexer, next, current, errs, eaten: 0, ctx } + } + + pub fn advance(&mut self) -> (Token, Span) { + let cur = self.current.clone(); + self.current = self.next.clone(); + self.next = self.lexer.get_next_no_error(&mut self.errs); + self.eaten += 1; + cur + } + + #[inline] + pub fn fail(&mut self, expect: Option) -> Result { + Err(SyntaxError::UnexpectedToken(self.current.0.clone(), self.current.1, expect)) + } + + pub fn eat_variant(&mut self, expect: &Token) -> Result<(Token, Span), SyntaxError> { + if self.current.0.same_variant(expect) { + Ok(self.advance()) + } else { + self.fail(Some(expect.clone())) + } + } + + pub fn eat(&mut self, expect: fn(&Token) -> Option) -> Result { + match expect(&self.current.0) { + None => self.fail(None), + Some(res) => Ok(res) + } + } + + #[inline] + pub fn get(&mut self) -> &Token { + &self.current.0 + } + + #[inline] + pub fn peek(&mut self) -> &Token { + &self.next.0 + } + + #[inline] + pub fn span(&mut self) -> &Span { + &self.current.1 + } + + pub fn try_single(&mut self, fun: fn(&mut Parser<'a>) -> Result) -> Result, SyntaxError> { + let current = self.eaten; + match fun(self) { + Err(_) if current == self.eaten => Ok(None), + Err(err) => Err(err), + Ok(res) => Ok(Some(res)), + } + } + +} diff --git a/src/kind-parser/src/top_level.rs b/src/kind-parser/src/top_level.rs new file mode 100644 index 00000000..e69de29b diff --git a/src/kind-span/src/lib.rs b/src/kind-span/src/lib.rs index 72d8a5dd..87dae87c 100644 --- a/src/kind-span/src/lib.rs +++ b/src/kind-span/src/lib.rs @@ -54,7 +54,7 @@ impl Range { Range { start: self.start, end: self.end, - ctx: ctx, + ctx, } } @@ -90,9 +90,7 @@ impl Span { pub fn set_ctx(&mut self, ctx: SyntaxCtxIndex) { match self { Span::Generated => (), - Span::Locatable(span) => { - *span = span.set_ctx(ctx) - }, + Span::Locatable(span) => *span = span.set_ctx(ctx), } } diff --git a/src/kind-tree/src/expr.rs b/src/kind-tree/src/expr.rs index af770f46..3dbe8fa4 100644 --- a/src/kind-tree/src/expr.rs +++ b/src/kind-tree/src/expr.rs @@ -1,6 +1,10 @@ +/// This module describes a CONCRETE SYNTAX TREE +/// without parenthesis. It helps when it comes to +/// a static analysis of the tree with the syntax sugars +/// and it makes it easier to split phases. + use crate::symbol::Ident; -use kind_span::{Locatable, Span}; -use core::ascii; +use kind_span::Span; use std::fmt::{Display, Error, Formatter}; /// Enum of binary operators. @@ -54,7 +58,7 @@ pub struct Substution { pub name: Ident, pub redx: u64, pub indx: u64, - pub expr: Box + pub expr: Box, } #[derive(Clone, Debug)] @@ -69,7 +73,7 @@ pub enum Literal { /// A number literal of 60 bits (e.g 32132) Number(u64), // A String literal - String(String) + String(String), } #[derive(Clone, Debug)] @@ -86,10 +90,6 @@ pub enum ExprKind { Let(Ident, Box, Box), /// Type ascription (x : y) Ann(Box, Box), - /// A constructor application - Ctr(Ident, Spine), - /// A function application - Fun(Ident, Spine), /// Literal Lit(Literal), /// Binary operation (e.g. 2 + 3) @@ -112,16 +112,6 @@ pub struct Expr { pub span: Span, } -impl Locatable for Expr { - fn locate(&self) -> Span { - self.span - } - - fn set_location(&mut self, location: Span) { - self.span = location; - } -} - impl Display for Operator { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { use Operator::*; @@ -151,49 +141,17 @@ impl Expr { pub fn new_var(name: Ident) -> Expr { Expr { span: Span::Generated, - data: ExprKind::Var(name) + data: ExprKind::Var(name), } } - pub fn traverse_pi_types<'a>(&'a self) -> String { + pub fn traverse_pi_types(&self) -> String { match &self.data { - ExprKind::All(binder, typ, body) => { - match binder { - None => format!("{} -> {}", typ, body.traverse_pi_types()), - Some(binder) => format!("({} : {}) -> {}", binder, typ, body.traverse_pi_types()), - } - } - _ => format!("{}", self) - } - } - - pub fn interpret_as_string(&self) -> Option { - let mut text = String::new(); - let mut term = &self.data; - - let string_nil = Ident::new_path("String", "nil"); - let string_cons = Ident::new_path("String", "cons"); - - loop { - if let ExprKind::Ctr (name, args) = term { - if name.data == string_cons.data && args.len() == 2 { - // TODO: Change it to support escaped chars. - if let ExprKind::Lit (Literal::Number(numb)) = args[0].data { - if ascii::escape_default(numb as u8).count() > 1 { - return None; - } else { - text.push(char::from_u32(numb as u32).unwrap_or('\0')); - term = &args[1].data; - continue; - } - } else { - return None; - } - } else if name.data == string_nil.data && args.is_empty() { - return Some(text); - } - } - return None; + ExprKind::All(binder, typ, body) => match binder { + None => format!("{} -> {}", typ, body.traverse_pi_types()), + Some(binder) => format!("({} : {}) -> {}", binder, typ, body.traverse_pi_types()), + }, + _ => format!("{}", self), } } } @@ -208,32 +166,24 @@ impl Display for Literal { Literal::String(str) => write!(f, "\"{}\"", str), } } - } impl Display for Expr { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - if let Some(str) = self.interpret_as_string() { - write!(f, "\"{}\"", str) - } else { - use ExprKind::*; - match &self.data { - All(_, __, _) => write!(f, "({}", self.traverse_pi_types()), - Lit(lit) => write!(f, "{}", lit), - Var(name) => write!(f, "{}", name), - Lambda(binder, body) => write!(f, "({} => {})", binder, body), - App(head, spine) => write!(f, "({}{})", head, spine.iter().map(|x| format!(" {}", x)).collect::()), - Let(name, expr, body) => write!(f, "(let {} = {}; {})", name, expr, body), - Ann(expr, typ) => write!(f, "({} : {})", expr, typ), - Ctr(head, spine) => write!(f, "({}{})", head, spine.iter().map(|x| format!(" {}", x)).collect::()), - Fun(head, spine) => write!(f, "({}{})", head, spine.iter().map(|x| format!(" {}", x)).collect::()), - Binary(op, expr, typ) => write!(f, "({} {} {})", op, expr, typ), - Subst(Substution { name, redx, expr, .. }) => write!(f, "({} ## {}/{})", expr, name, redx), - Hole(_) => todo!(), - Match(_) => todo!(), - Open(_) => todo!(), - - } + use ExprKind::*; + match &self.data { + All(_, _, _) => write!(f, "({}", self.traverse_pi_types()), + Lit(lit) => write!(f, "{}", lit), + Var(name) => write!(f, "{}", name), + Lambda(binder, body) => write!(f, "({} => {})", binder, body), + App(head, spine) => write!(f, "({}{})", head, spine.iter().map(|x| format!(" {}", x)).collect::()), + Let(name, expr, body) => write!(f, "(let {} = {}; {})", name, expr, body), + Ann(expr, typ) => write!(f, "({} : {})", expr, typ), + Binary(op, expr, typ) => write!(f, "({} {} {})", op, expr, typ), + Subst(Substution { name, redx, expr, .. }) => write!(f, "({} ## {}/{})", expr, name, redx), + Hole(_) => todo!(), + Match(_) => todo!(), + Open(_) => todo!(), } } -} \ No newline at end of file +} diff --git a/src/kind-tree/src/lib.rs b/src/kind-tree/src/lib.rs index 41c7954f..009703f9 100644 --- a/src/kind-tree/src/lib.rs +++ b/src/kind-tree/src/lib.rs @@ -1,4 +1,5 @@ -use std::{collections::HashMap, fmt::{Formatter, Display, Error}}; +use std::collections::HashMap; +use std::fmt::{Display, Error, Formatter}; use expr::Expr; use kind_span::{Span, SyntaxCtxIndex}; @@ -14,7 +15,7 @@ pub mod visitor; pub enum AttributeStyle { Ident(Ident), String(String), - Number(Span, u64) + Number(Span, u64), } /// A attribute is a kind of declaration @@ -25,7 +26,7 @@ pub enum AttributeStyle { pub struct Attribute { pub name: Ident, pub value: Option, - pub span: Span + pub span: Span, } /// An argument is a 'binding' of a name to a type @@ -61,6 +62,7 @@ pub struct Rule { #[derive(Clone, Debug)] pub struct Entry { pub name: Ident, + pub docs: Option, pub args: Vec>, pub tipo: Box, pub rules: Vec>, @@ -92,6 +94,8 @@ impl Book { } } +// Display + impl Display for Book { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { for name in &self.names { @@ -101,8 +105,6 @@ impl Display for Book { } } -// Display - impl Display for Argument { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { let (open, close) = match (self.erased, self.hidden) { @@ -141,4 +143,4 @@ impl Display for Rule { } write!(f, " = {}", self.body) } -} \ No newline at end of file +} diff --git a/src/kind-tree/src/symbol.rs b/src/kind-tree/src/symbol.rs index cee27270..81488d4b 100644 --- a/src/kind-tree/src/symbol.rs +++ b/src/kind-tree/src/symbol.rs @@ -1,6 +1,6 @@ use std::fmt::Display; -use kind_span::{SyntaxCtxIndex, Span}; +use kind_span::{Span, SyntaxCtxIndex}; // Stores the name of a variable or constructor #[derive(Clone, PartialEq, Eq, Hash, Debug)] @@ -16,11 +16,7 @@ pub struct Ident { impl Ident { pub fn new(data: Symbol, ctx: SyntaxCtxIndex, span: Span) -> Ident { - Ident { - data, - ctx, - span, - } + Ident { data, ctx, span } } pub fn new_path(data: &str, id: &str) -> Ident { @@ -33,12 +29,12 @@ impl Ident { /// Changes the syntax context of the span and of the ident pub fn set_ctx(&self, ctx: SyntaxCtxIndex) -> Ident { - let mut span = self.span.clone(); + let mut span = self.span; span.set_ctx(ctx); Ident { data: self.data.clone(), ctx, - span + span, } } } @@ -47,4 +43,4 @@ impl Display for Ident { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.data.0) } -} \ No newline at end of file +} diff --git a/src/kind-tree/src/visitor.rs b/src/kind-tree/src/visitor.rs index 376be085..bfceab48 100644 --- a/src/kind-tree/src/visitor.rs +++ b/src/kind-tree/src/visitor.rs @@ -1,6 +1,6 @@ use kind_span::Span; -use crate::expr::{ExprKind, Literal, Open, Substution, Match}; +use crate::expr::{ExprKind, Literal, Match, Open, Substution}; use crate::symbol::*; use crate::*; @@ -13,13 +13,13 @@ use crate::*; /// All of these functions are implemented so we can easily /// change these default implementations. pub trait Visitor { - fn visit_span(&mut self, _: &mut Span) { } + fn visit_span(&mut self, _: &mut Span) {} - fn visit_syntax_ctx(&mut self, _: &mut SyntaxCtxIndex) { } + fn visit_syntax_ctx(&mut self, _: &mut SyntaxCtxIndex) {} - fn visit_operator(&mut self, _: &mut expr::Operator) { } + fn visit_operator(&mut self, _: &mut expr::Operator) {} - fn visit_literal(&mut self, _: &mut Literal) { } + fn visit_literal(&mut self, _: &mut Literal) {} fn visit_ident(&mut self, ident: &mut Ident) { self.visit_span(&mut ident.span); @@ -99,62 +99,43 @@ pub trait Visitor { ExprKind::All(None, typ, body) => { self.visit_expr(typ); self.visit_expr(body); - }, + } ExprKind::All(Some(ident), typ, body) => { self.visit_ident(ident); self.visit_expr(typ); self.visit_expr(body); - }, + } ExprKind::Lambda(ident, body) => { self.visit_ident(ident); self.visit_expr(body); - }, + } ExprKind::App(expr, spine) => { self.visit_expr(expr); for arg in spine { self.visit_expr(arg); } - }, + } ExprKind::Let(ident, val, body) => { self.visit_ident(ident); self.visit_expr(val); self.visit_expr(body); - - }, + } ExprKind::Ann(val, ty) => { self.visit_expr(val); self.visit_expr(ty); - }, - ExprKind::Ctr(ident, spine) => { - self.visit_ident(ident); - for arg in spine { - self.visit_expr(arg); - } - }, - ExprKind::Fun(ident, spine) => { - self.visit_ident(ident); - for arg in spine { - self.visit_expr(arg); - } - }, + } ExprKind::Lit(lit) => { self.visit_literal(lit); - }, + } ExprKind::Binary(op, a, b) => { self.visit_operator(op); self.visit_expr(a); self.visit_expr(b); - }, - ExprKind::Hole(_) => { }, - ExprKind::Subst(subst) => { - self.visit_substitution(subst) - }, - ExprKind::Match(matcher) => { - self.visit_match(matcher) - }, - ExprKind::Open(open) => { - self.visit_open(open) - }, + } + ExprKind::Hole(_) => {} + ExprKind::Subst(subst) => self.visit_substitution(subst), + ExprKind::Match(matcher) => self.visit_match(matcher), + ExprKind::Open(open) => self.visit_open(open), } } -} \ No newline at end of file +}