mirror of
https://github.com/ProvableHQ/leo.git
synced 2024-12-24 10:41:57 +03:00
[parser] Extend and update some documentation.
This commit is contained in:
parent
1cbea97f27
commit
5c223460c1
@ -43,7 +43,7 @@ pub fn parse_ast(handler: &Handler, source: &str, start_pos: BytePos) -> Result<
|
||||
Ok(Ast::new(parser::parse(handler, source, start_pos)?))
|
||||
}
|
||||
|
||||
/// Parses program inputs from from the input file path and state file path
|
||||
/// Parses program inputs from the input file path
|
||||
pub fn parse_program_inputs(handler: &Handler, input_string: &str, start_pos: BytePos) -> Result<InputData> {
|
||||
let program_input: ProgramInput = parser::parse_input(handler, input_string, start_pos)?.try_into()?;
|
||||
|
||||
|
@ -175,7 +175,7 @@ impl Token {
|
||||
Ok((int.len(), Token::Integer(int)))
|
||||
}
|
||||
|
||||
/// Returns a tuple: [(token length, token)] if the next token can be eaten, otherwise returns [`None`].
|
||||
/// Returns a tuple: [(token length, token)] if the next token can be eaten, otherwise returns an error.
|
||||
/// The next token can be eaten if the bytes at the front of the given `input` string can be scanned into a token.
|
||||
pub(crate) fn eat(input: &str) -> Result<(usize, Token)> {
|
||||
if input.is_empty() {
|
||||
@ -253,6 +253,10 @@ impl Token {
|
||||
'"' => {
|
||||
// Find end string quotation mark.
|
||||
// Instead of checking each `char` and pushing, we can avoid reallocations.
|
||||
// This works because the code 34 of double quote cannot appear as a byte
|
||||
// in middle of a multi-byte UTF-8 encoding of a character,
|
||||
// because those bytes all have the high bit set to 1;
|
||||
// in UTF-8, the byte 34 can only appear as the single-byte encoding of double quote.
|
||||
let rest = &input_str[1..];
|
||||
let string = match rest.as_bytes().iter().position(|c| *c == b'"') {
|
||||
None => return Err(ParserError::lexer_string_not_closed(rest).into()),
|
||||
@ -302,6 +306,10 @@ impl Token {
|
||||
input.next();
|
||||
if input.next_if_eq(&'/').is_some() {
|
||||
// Find the end of the comment line.
|
||||
// This works because the code 10 of line feed cannot appear as a byte
|
||||
// in middle of a multi-byte UTF-8 encoding of a character,
|
||||
// because those bytes all have the high bit set to 1;
|
||||
// in UTF-8, the byte 10 can only appear as the single-byte encoding of line feed.
|
||||
let comment = match input_str.as_bytes().iter().position(|c| *c == b'\n') {
|
||||
None => input_str,
|
||||
Some(idx) => &input_str[..idx + 1],
|
||||
|
@ -17,7 +17,7 @@
|
||||
//! The tokenizer to convert Leo code text into tokens.
|
||||
//!
|
||||
//! This module contains the [`tokenize()`] method which breaks down string text into tokens,
|
||||
//! separated by whitespace.
|
||||
//! optionally separated by whitespace.
|
||||
|
||||
pub(crate) mod token;
|
||||
|
||||
|
@ -147,9 +147,9 @@ pub enum Token {
|
||||
}
|
||||
|
||||
/// Represents all valid Leo keyword tokens.
|
||||
/// This defers from the ABNF for the following reasons:
|
||||
/// This differs from the ABNF grammar for the following reasons:
|
||||
/// Adding true and false to the keywords of the ABNF grammar makes the lexical grammar ambiguous,
|
||||
/// because true and false are also boolean literals, which are different tokens from keywords
|
||||
/// because true and false are also boolean literals, which are different tokens from keywords.
|
||||
pub const KEYWORD_TOKENS: &[Token] = &[
|
||||
Token::Address,
|
||||
Token::Assert,
|
||||
|
Loading…
Reference in New Issue
Block a user