[parser] Extend and update some documentation.

This commit is contained in:
Alessandro Coglio 2023-01-13 20:12:47 -08:00
parent 1cbea97f27
commit 5c223460c1
4 changed files with 13 additions and 5 deletions

View File

@ -43,7 +43,7 @@ pub fn parse_ast(handler: &Handler, source: &str, start_pos: BytePos) -> Result<
Ok(Ast::new(parser::parse(handler, source, start_pos)?))
}
/// Parses program inputs from from the input file path and state file path
/// Parses program inputs from the input file path
pub fn parse_program_inputs(handler: &Handler, input_string: &str, start_pos: BytePos) -> Result<InputData> {
let program_input: ProgramInput = parser::parse_input(handler, input_string, start_pos)?.try_into()?;

View File

@ -175,7 +175,7 @@ impl Token {
Ok((int.len(), Token::Integer(int)))
}
/// Returns a tuple: [(token length, token)] if the next token can be eaten, otherwise returns [`None`].
/// Returns a tuple: [(token length, token)] if the next token can be eaten, otherwise returns an error.
/// The next token can be eaten if the bytes at the front of the given `input` string can be scanned into a token.
pub(crate) fn eat(input: &str) -> Result<(usize, Token)> {
if input.is_empty() {
@ -253,6 +253,10 @@ impl Token {
'"' => {
// Find end string quotation mark.
// Instead of checking each `char` and pushing, we can avoid reallocations.
// This works because the code 34 of double quote cannot appear as a byte
// in middle of a multi-byte UTF-8 encoding of a character,
// because those bytes all have the high bit set to 1;
// in UTF-8, the byte 34 can only appear as the single-byte encoding of double quote.
let rest = &input_str[1..];
let string = match rest.as_bytes().iter().position(|c| *c == b'"') {
None => return Err(ParserError::lexer_string_not_closed(rest).into()),
@ -302,6 +306,10 @@ impl Token {
input.next();
if input.next_if_eq(&'/').is_some() {
// Find the end of the comment line.
// This works because the code 10 of line feed cannot appear as a byte
// in middle of a multi-byte UTF-8 encoding of a character,
// because those bytes all have the high bit set to 1;
// in UTF-8, the byte 10 can only appear as the single-byte encoding of line feed.
let comment = match input_str.as_bytes().iter().position(|c| *c == b'\n') {
None => input_str,
Some(idx) => &input_str[..idx + 1],

View File

@ -17,7 +17,7 @@
//! The tokenizer to convert Leo code text into tokens.
//!
//! This module contains the [`tokenize()`] method which breaks down string text into tokens,
//! separated by whitespace.
//! optionally separated by whitespace.
pub(crate) mod token;

View File

@ -147,9 +147,9 @@ pub enum Token {
}
/// Represents all valid Leo keyword tokens.
/// This defers from the ABNF for the following reasons:
/// This differs from the ABNF grammar for the following reasons:
/// Adding true and false to the keywords of the ABNF grammar makes the lexical grammar ambiguous,
/// because true and false are also boolean literals, which are different tokens from keywords
/// because true and false are also boolean literals, which are different tokens from keywords.
pub const KEYWORD_TOKENS: &[Token] = &[
Token::Address,
Token::Assert,