configparser: use pest to parse config files

Summary:
[pest](https://github.com/pest-parser/pest) is an elegant Rust library for
parsing text.

A navie benchmark on a 1MB config file shows pest is about 1.5 to 2x slower.
But the better error message and cleaner code seems worth it.

Practically, in a VirtualBox VM, parsing a set of our config files takes 3-7ms.
The overhead seems to be opening too many files. Reducing it to one file makes
parsing complete in 2-4ms.

Unfortunately the buck build has issues with the elegant syntax
`#[grammar = "spec.pest"]`, because "spec.pest" cannot be located by pest_derive.
Therefore a workaround is used to generate the parser.

The motivation behind this is because I noticed multi-line value can not be
taken as a plain Bytes slice. For example:

  [section]
  foo = line1
    line2

"foo" should be "line1\nline2", instead of "line1\n  line2". It does not make a
difference on configlist. But it affects templates. Rather than making the
parser more complex, it seems better to just adopt a reasonbly fast parsing
library.

Reviewed By: DurhamG

Differential Revision: D8960876

fbshipit-source-id: 2fa04e38b706f7126008512732c9efa168f84cc7
This commit is contained in:
Jun Wu 2018-08-08 17:15:29 -07:00 committed by Facebook Github Bot
parent 0ea7f4aa94
commit a4129f8d53
8 changed files with 1172 additions and 141 deletions

View File

@ -7,6 +7,8 @@ bytes = "0.4.8"
failure = "0.1.1"
failure_derive = "0.1.1"
linked-hash-map = "0.5.1"
pest = "1.0.6"
pest_derive = "1.0.7"
[dev-dependencies]
tempdir = "0.3.7"

View File

@ -1,6 +1,8 @@
use bytes::Bytes;
use error::Error;
use linked_hash_map::LinkedHashMap;
use parser::{ConfigParser, Rule};
use pest::{self, Parser, Span};
use std::cmp::Eq;
use std::collections::{HashMap, HashSet};
use std::convert::AsRef;
@ -10,8 +12,11 @@ use std::hash::Hash;
use std::io::Read;
use std::ops::Range;
use std::path::{Path, PathBuf};
use std::str;
use std::sync::Arc;
type Pair<'a> = pest::iterators::Pair<'a, Rule>;
/// Collection of config sections loaded from various sources.
#[derive(Default)]
pub struct ConfigSet {
@ -258,133 +263,135 @@ impl ConfigSet {
opts: &Options,
visited: &mut HashSet<PathBuf>,
) {
let mut pos = 0;
let mut section = Bytes::new();
let shared_path = Arc::new(path.to_path_buf()); // use Arc to do shallow copy
let skip_include = path.parent().is_none(); // skip handling %include if path is empty
while pos < buf.len() {
match buf[pos] {
b'\n' | b'\r' | b' ' | b'\t' => pos += 1,
b'[' => {
let section_start = pos + 1;
match memchr(b']', &buf.as_ref()[section_start..]) {
Some(len) => {
let section_end = section_start + len;
section = strip(&buf, section_start, section_end);
pos = section_end + 1;
}
None => {
self.error(Error::Parse(
path.to_path_buf(),
pos,
"missing ']' for section name",
));
return;
}
}
}
b';' | b'#' => {
match memchr(b'\n', &buf.as_ref()[pos..]) {
Some(len) => pos += len, // skip this line
None => return, // reach file end
}
}
b'%' => {
static INCLUDE: &[u8] = b"%include ";
static UNSET: &[u8] = b"%unset ";
if buf.get(pos..pos + INCLUDE.len()) == Some(INCLUDE) {
let path_start = pos + INCLUDE.len();
let path_end = memchr(b'\n', &buf.as_ref()[pos..])
.map(|len| len + pos)
.unwrap_or(buf.len());
if !skip_include {
match ::std::str::from_utf8(&buf[path_start..path_end]) {
Ok(literal_include_path) => {
let full_include_path = path.parent()
.unwrap()
.join(expand_path(literal_include_path));
self.load_dir_or_file(&full_include_path, opts, visited);
}
Err(_error) => {
self.error(Error::Parse(
path.to_path_buf(),
path_start,
"invalid utf-8",
));
}
}
}
pos = path_end;
} else if buf.get(pos..pos + UNSET.len()) == Some(UNSET) {
let name_start = pos + UNSET.len();
let name_end = memchr(b'\n', &buf.as_ref()[pos..])
.map(|len| len + pos)
.unwrap_or(buf.len());
let name = strip(&buf, name_start, name_end);
// Utilities to avoid too much indentation.
let handle_config_item = |this: &mut ConfigSet, pair: Pair, section: Bytes| {
let pairs = pair.into_inner();
let mut name = Bytes::new();
for pair in pairs {
match pair.as_rule() {
Rule::name => name = extract(&buf, pair.into_span()),
Rule::value => {
let span = pair.into_span();
let (start, end) = strip_offsets(&buf, span.start(), span.end());
// TODO(quark): value needs post-processing
let value = buf.slice(start, end);
let location = ValueLocation {
path: shared_path.clone(),
location: pos..name_end,
location: start..end,
};
self.set_internal(section.clone(), name, None, location.into(), opts);
pos = name_end;
} else {
self.error(Error::Parse(path.to_path_buf(), pos, "unknown instruction"));
return this.set_internal(
section,
name,
value.into(),
location.into(),
opts,
);
}
_ => (),
}
}
unreachable!();
};
let handle_section = |pair: Pair, section: &mut Bytes| {
let pairs = pair.into_inner();
for pair in pairs {
match pair.as_rule() {
Rule::name => {
*section = extract(&buf, pair.into_span());
return;
}
_ => (),
}
_ => {
let name_start = pos;
match memchr(b'=', &buf.as_ref()[name_start..]) {
Some(len) => {
let equal_pos = name_start + len;
let name = strip(&buf, name_start, equal_pos);
// Find the end of value. It could be multi-line.
let value_start = equal_pos + 1;
let mut value_end = value_start;
loop {
match memchr(b'\n', &buf.as_ref()[value_end..]) {
Some(len) => {
value_end += len + 1;
let next_line_first_char =
*buf.get(value_end).unwrap_or(&b'.');
if !is_space(next_line_first_char) {
break;
}
}
None => {
value_end = buf.len();
break;
}
}
}
let (start, end) = strip_offsets(&buf, value_start, value_end);
let value = buf.slice(start, end);
let location = ValueLocation {
path: shared_path.clone(),
location: start..end,
};
}
unreachable!();
};
self.set_internal(
section.clone(),
name,
value.into(),
location.into(),
opts,
);
pos = value_end;
}
None => {
self.error(Error::Parse(
path.to_path_buf(),
pos,
"missing '=' for config value",
));
return;
}
}
let mut handle_include = |this: &mut ConfigSet, pair: Pair| {
let pairs = pair.into_inner();
for pair in pairs {
match pair.as_rule() {
Rule::line => if !skip_include {
let include_path = pair.as_str();
let full_include_path =
path.parent().unwrap().join(expand_path(include_path));
this.load_dir_or_file(&full_include_path, opts, visited);
},
_ => (),
}
} // match buf[pos]
}
};
let handle_unset = |this: &mut ConfigSet, pair: Pair, section: &Bytes| {
let unset_span = pair.clone().into_span();
let pairs = pair.into_inner();
for pair in pairs {
match pair.as_rule() {
Rule::name => {
let name = extract(&buf, pair.into_span());
let location = ValueLocation {
path: shared_path.clone(),
location: unset_span.start()..unset_span.end(),
};
return this.set_internal(
section.clone(),
name,
None,
location.into(),
opts,
);
}
_ => (),
}
}
unreachable!();
};
let mut handle_directive = |this: &mut ConfigSet, pair: Pair, section: &Bytes| {
let pairs = pair.into_inner();
for pair in pairs {
match pair.as_rule() {
Rule::include => handle_include(this, pair),
Rule::unset => handle_unset(this, pair, section),
_ => (),
}
}
};
let text = match str::from_utf8(&buf) {
Ok(text) => text,
Err(error) => return self.error(Error::Utf8(path.to_path_buf(), error)),
};
let pairs = match ConfigParser::parse(Rule::file, &text) {
Ok(pairs) => pairs,
Err(error) => return self.error(Error::Parse(path.to_path_buf(), format!("{}", error))),
};
for pair in pairs {
match pair.as_rule() {
Rule::config_item => handle_config_item(self, pair, section.clone()),
Rule::section => handle_section(pair, &mut section),
Rule::directive => handle_directive(self, pair, &section),
Rule::blank_line | Rule::comment_line | Rule::new_line => (),
Rule::comment_start
| Rule::compound
| Rule::equal_sign
| Rule::file
| Rule::include
| Rule::left_bracket
| Rule::line
| Rule::name
| Rule::right_bracket
| Rule::space
| Rule::unset
| Rule::value => unreachable!(),
}
}
}
@ -516,12 +523,6 @@ impl<S: Into<Bytes>> From<S> for Options {
}
}
/// C memchr-like API
#[inline]
fn memchr(needle: u8, haystack: &[u8]) -> Option<usize> {
haystack.iter().position(|&x| x == needle)
}
/// Test if a binary char is a space.
#[inline]
fn is_space(byte: u8) -> bool {
@ -543,11 +544,9 @@ fn strip_offsets(buf: &Bytes, start: usize, end: usize) -> (usize, usize) {
(start, end)
}
/// Strip spaces and return a `Bytes` sub-slice.
#[inline]
fn strip(buf: &Bytes, start: usize, end: usize) -> Bytes {
let (start, end) = strip_offsets(buf, start, end);
buf.slice(start, end)
fn extract<'a>(buf: &Bytes, span: Span<'a>) -> Bytes {
buf.slice(span.start(), span.end())
}
/// Expand `~` to home directory.
@ -619,15 +618,15 @@ mod tests {
"[y]\n\
a = 0\n\
b=1\n\
# override a to 2
a = 2 \n\
# override a to 2\n\
a = 2 \n\
\n\
[x]\n\
m = this\n \
value has\n \
multi lines\n\
; comment again\n\
n =",
n =\n",
&"test_parse_basic".into(),
);
@ -650,7 +649,7 @@ mod tests {
assert_eq!(sources[0].source(), "test_parse_basic");
assert_eq!(sources[1].source(), "test_parse_basic");
assert_eq!(sources[0].location().unwrap(), (PathBuf::new(), 8..9));
assert_eq!(sources[1].location().unwrap(), (PathBuf::new(), 52..53));
assert_eq!(sources[1].location().unwrap(), (PathBuf::new(), 38..39));
}
#[test]
@ -658,10 +657,11 @@ mod tests {
let mut cfg = ConfigSet::new();
cfg.parse(
"[a]\n\
\t#\n\
x=1",
#\n\
x= \t1",
&"".into(),
);
assert_eq!(cfg.get("a", "x"), Some("1".into()));
}
@ -671,21 +671,39 @@ mod tests {
cfg.parse("# foo\n[y", &"test_parse_errors".into());
assert_eq!(
format!("{}", cfg.errors()[0]),
"\"\": parse error around byte 6: missing \']\' for section name"
"\"\":
--> 2:3
|
2 | [y
| ^---
|
= expected right_bracket"
);
let mut cfg = ConfigSet::new();
cfg.parse("\n\n%unknown", &"test_parse_errors".into());
assert_eq!(
format!("{}", cfg.errors()[0]),
"\"\": parse error around byte 2: unknown instruction"
"\"\":
--> 3:2
|
3 | %unknown
| ^---
|
= expected include or unset"
);
let mut cfg = ConfigSet::new();
cfg.parse("[section]\nabc", &"test_parse_errors".into());
assert_eq!(
format!("{}", cfg.errors()[0]),
"\"\": parse error around byte 10: missing \'=\' for config value"
"\"\":
--> 2:4
|
2 | abc
| ^---
|
= expected equal_sign"
);
}
@ -715,7 +733,7 @@ mod tests {
let sources = cfg.get_sources("x", "a");
assert_eq!(sources.len(), 2);
assert_eq!(sources[0].location().unwrap(), (PathBuf::new(), 8..9));
assert_eq!(sources[1].location().unwrap(), (PathBuf::new(), 25..35));
assert_eq!(sources[1].location().unwrap(), (PathBuf::new(), 26..35));
}
#[test]

View File

@ -1,15 +1,19 @@
use std::io;
use std::path::PathBuf;
use std::str;
/// The error type for parsing config files.
#[derive(Fail, Debug)]
pub enum Error {
// TODO: use line number instead of byte offsets.
/// Unable to parse a file due to syntax or encoding error in the file content.
#[fail(display = "{:?}: parse error around byte {}: {}", _0, _1, _2)]
Parse(PathBuf, usize, &'static str),
/// Unable to parse a file due to syntax.
#[fail(display = "{:?}:\n{}", _0, _1)]
Parse(PathBuf, String),
/// Unable to read a file due to IO errors.
#[fail(display = "{:?}: {}", _0, _1)]
Io(PathBuf, #[cause] io::Error),
/// Config file contains invalid UTF-8.
#[fail(display = "{:?}: {}", _0, _1)]
Utf8(PathBuf, #[cause] str::Utf8Error),
}

View File

@ -0,0 +1,113 @@
#!/usr/bin/env python3
import hashlib
import os
import re
import subprocess
import tempfile
dirname = os.path.dirname
crate_root = dirname(dirname(os.path.realpath(__file__)))
def expand_parser(pest):
"""expand the "#[derive(Parser)] part"""
with tempfile.TemporaryDirectory() as tmp_root:
# Copy Cargo.toml
with open(os.path.join(tmp_root, "Cargo.toml"), "w") as f:
f.write(open(os.path.join(crate_root, "Cargo.toml")).read())
# Copy spec.pest
os.mkdir(os.path.join(tmp_root, "src"))
with open(os.path.join(tmp_root, "src", "spec.pest"), "wb") as f:
f.write(pest)
# Create a minimal project which is used to expand ConfigParser
with open(os.path.join(tmp_root, "src", "lib.rs"), "w") as f:
f.write(
"""
extern crate pest;
#[macro_use]
extern crate pest_derive;
#[derive(Parser)]
#[grammar = "spec.pest"]
pub(crate) struct ConfigParser;
"""
)
# Run cargo-expand
env = os.environ.copy()
env["RUSTFMT"] = "false"
expanded = subprocess.check_output(
["cargo-expand", "--release"], env=env, cwd=tmp_root
)
expanded = expanded.decode("utf-8")
# Keep only interesting parts
rule_struct = re.search("^pub enum Rule [^}]*^\}", expanded, re.S + re.M).group(
0
)
parser_impl = re.search(
"^impl ::pest::Parser<Rule> for ConfigParser .*^\}", expanded, re.S + re.M
).group(0)
code = f"""
#[allow(dead_code, non_camel_case_types)]
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
{rule_struct}
pub(crate) struct ConfigParser;
{parser_impl}
"""
return code
def write_generated_parser():
spec_pest_path = os.path.join(crate_root, "src", "spec.pest")
spec = open(spec_pest_path, "rb").read()
checksum = hashlib.sha1(spec).hexdigest()
output_path = os.path.join(crate_root, "src", "parser.rs")
try:
old_checksum = re.search(
"pest-checksum: (.*)\.", open(output_path).read()
).group(1)
if old_checksum == checksum:
print(
"No need to update %s because %s is not changed."
% (output_path, spec_pest_path)
)
return
except Exception:
pass
with open(output_path, "w") as f:
code = expand_parser(spec)
f.write(
f"""
// Generated by generate_parser.py. Do not edit manually. Instead, edit
// spec.pest, then run generate_parser.py (require cargo-expand).
//
// This file should really be just 3 lines:
//
// #[derive(Parser)]
// #[grammar = "spec.pest"]
// struct ConfigParser;
//
// However, `#[grammar = "spec.pest"]` does not play well with Buck build,
// because pest_derive cannot find "spec.pest" in buck build environment.
// Therefore this file is {'detareneg@'[::-1]}. {"tnil-on@"[::-1]}.
// pest-checksum: {checksum}.
{code}"""
)
if __name__ == "__main__":
write_generated_parser()

View File

@ -68,9 +68,12 @@ extern crate failure;
#[macro_use]
extern crate failure_derive;
extern crate pest;
pub mod config;
pub mod error;
pub mod hg;
pub mod parser;
pub use error::Error;

View File

@ -0,0 +1,840 @@
// Generated by generate_parser.py. Do not edit manually. Instead, edit
// spec.pest, then run generate_parser.py (require cargo-expand).
//
// This file should really be just 3 lines:
//
// #[derive(Parser)]
// #[grammar = "spec.pest"]
// struct ConfigParser;
//
// However, `#[grammar = "spec.pest"]` does not play well with Buck build,
// because pest_derive cannot find "spec.pest" in buck build environment.
// Therefore this file is @generated. @no-lint.
// pest-checksum: 1fa4fbe929a8b3e6c073fcd1e46cae2bc30d23f7.
#[allow(dead_code, non_camel_case_types)]
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum Rule {
new_line,
space,
comment_start,
line,
name,
value,
equal_sign,
config_item,
left_bracket,
right_bracket,
section,
comment_line,
blank_line,
directive,
include,
unset,
compound,
file,
}
pub(crate) struct ConfigParser;
impl ::pest::Parser<Rule> for ConfigParser {
fn parse<'i>(rule: Rule, input: &'i str)
->
::std::result::Result<::pest::iterators::Pairs<'i, Rule>,
::pest::Error<'i, Rule>> {
mod rules {
use super::Rule;
#[inline]
#[allow(unused_variables)]
pub fn new_line<'i>(pos: ::pest::Position<'i>,
state: &mut ::pest::ParserState<'i, Rule>)
->
::std::result::Result<::pest::Position<'i>,
::pest::Position<'i>> {
state.rule(Rule::new_line, pos,
|state, pos|
{
pos.match_string("\n").or_else(|pos|
{
pos.match_string("\r")
})
})
}
#[inline]
#[allow(unused_variables)]
pub fn space<'i>(pos: ::pest::Position<'i>,
state: &mut ::pest::ParserState<'i, Rule>)
->
::std::result::Result<::pest::Position<'i>,
::pest::Position<'i>> {
state.rule(Rule::space, pos,
|state, pos|
{
pos.match_string(" ").or_else(|pos|
{
pos.match_string("\t")
})
})
}
#[inline]
#[allow(unused_variables)]
pub fn comment_start<'i>(pos: ::pest::Position<'i>,
state:
&mut ::pest::ParserState<'i, Rule>)
->
::std::result::Result<::pest::Position<'i>,
::pest::Position<'i>> {
state.rule(Rule::comment_start, pos,
|state, pos|
{
pos.match_string("#").or_else(|pos|
{
pos.match_string(";")
})
})
}
#[inline]
#[allow(unused_variables)]
pub fn line<'i>(pos: ::pest::Position<'i>,
state: &mut ::pest::ParserState<'i, Rule>)
->
::std::result::Result<::pest::Position<'i>,
::pest::Position<'i>> {
state.rule(Rule::line, pos,
|state, pos|
{
state.atomic(::pest::Atomicity::Atomic,
move |state|
{
pos.repeat(|pos|
{
state.sequence(move
|state|
{
pos.sequence(|pos|
{
state.lookahead(false,
move
|state|
{
pos.lookahead(false,
|pos|
{
self::new_line(pos,
state)
})
}).and_then(|pos|
{
self::any(pos,
state)
})
})
})
})
})
})
}
#[inline]
#[allow(unused_variables)]
pub fn name<'i>(pos: ::pest::Position<'i>,
state: &mut ::pest::ParserState<'i, Rule>)
->
::std::result::Result<::pest::Position<'i>,
::pest::Position<'i>> {
state.rule(Rule::name, pos,
|state, pos|
{
state.atomic(::pest::Atomicity::Atomic,
move |state|
{
state.sequence(move
|state|
{
pos.sequence(|pos|
{
state.sequence(move
|state|
{
pos.sequence(|pos|
{
state.lookahead(false,
move
|state|
{
pos.lookahead(false,
|pos|
{
self::space(pos,
state).or_else(|pos|
{
self::new_line(pos,
state).or_else(|pos|
{
self::comment_start(pos,
state)
}).or_else(|pos|
{
pos.match_string("=")
}).or_else(|pos|
{
pos.match_string("[")
}).or_else(|pos|
{
pos.match_string("]")
})
}).or_else(|pos|
{
pos.match_string("%")
})
})
}).and_then(|pos|
{
self::any(pos,
state)
})
})
}).and_then(|pos|
{
pos.repeat(|pos|
{
state.sequence(move
|state|
{
pos.sequence(|pos|
{
state.lookahead(false,
move
|state|
{
pos.lookahead(false,
|pos|
{
self::space(pos,
state).or_else(|pos|
{
self::new_line(pos,
state).or_else(|pos|
{
self::comment_start(pos,
state)
}).or_else(|pos|
{
pos.match_string("=")
}).or_else(|pos|
{
pos.match_string("[")
}).or_else(|pos|
{
pos.match_string("]")
})
}).or_else(|pos|
{
pos.match_string("%")
})
})
}).and_then(|pos|
{
self::any(pos,
state)
})
})
})
})
})
})
})
})
})
}
#[inline]
#[allow(unused_variables)]
pub fn value<'i>(pos: ::pest::Position<'i>,
state: &mut ::pest::ParserState<'i, Rule>)
->
::std::result::Result<::pest::Position<'i>,
::pest::Position<'i>> {
state.atomic(::pest::Atomicity::CompoundAtomic,
move |state|
{
state.rule(Rule::value, pos,
|state, pos|
{
state.sequence(move
|state|
{
pos.sequence(|pos|
{
self::line(pos,
state).and_then(|pos|
{
pos.repeat(|pos|
{
state.sequence(move
|state|
{
pos.sequence(|pos|
{
self::new_line(pos,
state).and_then(|pos|
{
state.sequence(move
|state|
{
pos.sequence(|pos|
{
self::space(pos,
state).and_then(|pos|
{
pos.repeat(|pos|
{
self::space(pos,
state)
})
})
})
})
}).and_then(|pos|
{
self::line(pos,
state)
})
})
})
})
})
})
})
})
})
}
#[inline]
#[allow(unused_variables)]
pub fn equal_sign<'i>(pos: ::pest::Position<'i>,
state: &mut ::pest::ParserState<'i, Rule>)
->
::std::result::Result<::pest::Position<'i>,
::pest::Position<'i>> {
state.rule(Rule::equal_sign, pos,
|state, pos|
{
state.atomic(::pest::Atomicity::Atomic,
move |state|
{
state.sequence(move
|state|
{
pos.sequence(|pos|
{
pos.repeat(|pos|
{
self::space(pos,
state)
}).and_then(|pos|
{
pos.match_string("=")
}).and_then(|pos|
{
pos.repeat(|pos|
{
self::space(pos,
state)
})
})
})
})
})
})
}
#[inline]
#[allow(unused_variables)]
pub fn config_item<'i>(pos: ::pest::Position<'i>,
state: &mut ::pest::ParserState<'i, Rule>)
->
::std::result::Result<::pest::Position<'i>,
::pest::Position<'i>> {
state.atomic(::pest::Atomicity::CompoundAtomic,
move |state|
{
state.rule(Rule::config_item, pos,
|state, pos|
{
state.sequence(move
|state|
{
pos.sequence(|pos|
{
self::name(pos,
state).and_then(|pos|
{
self::equal_sign(pos,
state)
}).and_then(|pos|
{
self::value(pos,
state)
})
})
})
})
})
}
#[inline]
#[allow(unused_variables)]
pub fn left_bracket<'i>(pos: ::pest::Position<'i>,
state: &mut ::pest::ParserState<'i, Rule>)
->
::std::result::Result<::pest::Position<'i>,
::pest::Position<'i>> {
state.rule(Rule::left_bracket, pos,
|state, pos|
{
state.atomic(::pest::Atomicity::Atomic,
move |state|
{ pos.match_string("[") })
})
}
#[inline]
#[allow(unused_variables)]
pub fn right_bracket<'i>(pos: ::pest::Position<'i>,
state:
&mut ::pest::ParserState<'i, Rule>)
->
::std::result::Result<::pest::Position<'i>,
::pest::Position<'i>> {
state.rule(Rule::right_bracket, pos,
|state, pos|
{
state.atomic(::pest::Atomicity::Atomic,
move |state|
{ pos.match_string("]") })
})
}
#[inline]
#[allow(unused_variables)]
pub fn section<'i>(pos: ::pest::Position<'i>,
state: &mut ::pest::ParserState<'i, Rule>)
->
::std::result::Result<::pest::Position<'i>,
::pest::Position<'i>> {
state.atomic(::pest::Atomicity::CompoundAtomic,
move |state|
{
state.rule(Rule::section, pos,
|state, pos|
{
state.sequence(move
|state|
{
pos.sequence(|pos|
{
self::left_bracket(pos,
state).and_then(|pos|
{
self::name(pos,
state)
}).and_then(|pos|
{
self::right_bracket(pos,
state)
})
})
})
})
})
}
#[inline]
#[allow(unused_variables)]
pub fn comment_line<'i>(pos: ::pest::Position<'i>,
state: &mut ::pest::ParserState<'i, Rule>)
->
::std::result::Result<::pest::Position<'i>,
::pest::Position<'i>> {
state.rule(Rule::comment_line, pos,
|state, pos|
{
state.atomic(::pest::Atomicity::Atomic,
move |state|
{
state.sequence(move
|state|
{
pos.sequence(|pos|
{
self::comment_start(pos,
state).and_then(|pos|
{
self::line(pos,
state)
})
})
})
})
})
}
#[inline]
#[allow(unused_variables)]
pub fn blank_line<'i>(pos: ::pest::Position<'i>,
state: &mut ::pest::ParserState<'i, Rule>)
->
::std::result::Result<::pest::Position<'i>,
::pest::Position<'i>> {
state.rule(Rule::blank_line, pos,
|state, pos|
{
state.atomic(::pest::Atomicity::Atomic,
move |state|
{
pos.repeat(|pos|
{
self::space(pos,
state)
})
})
})
}
#[inline]
#[allow(unused_variables)]
pub fn directive<'i>(pos: ::pest::Position<'i>,
state: &mut ::pest::ParserState<'i, Rule>)
->
::std::result::Result<::pest::Position<'i>,
::pest::Position<'i>> {
state.atomic(::pest::Atomicity::CompoundAtomic,
move |state|
{
state.rule(Rule::directive, pos,
|state, pos|
{
state.sequence(move
|state|
{
pos.sequence(|pos|
{
pos.match_string("%").and_then(|pos|
{
self::include(pos,
state).or_else(|pos|
{
self::unset(pos,
state)
})
})
})
})
})
})
}
#[inline]
#[allow(unused_variables)]
pub fn include<'i>(pos: ::pest::Position<'i>,
state: &mut ::pest::ParserState<'i, Rule>)
->
::std::result::Result<::pest::Position<'i>,
::pest::Position<'i>> {
state.atomic(::pest::Atomicity::CompoundAtomic,
move |state|
{
state.rule(Rule::include, pos,
|state, pos|
{
state.sequence(move
|state|
{
pos.sequence(|pos|
{
pos.match_string("include").and_then(|pos|
{
state.sequence(move
|state|
{
pos.sequence(|pos|
{
self::space(pos,
state).and_then(|pos|
{
pos.repeat(|pos|
{
self::space(pos,
state)
})
})
})
})
}).and_then(|pos|
{
self::line(pos,
state)
})
})
})
})
})
}
#[inline]
#[allow(unused_variables)]
pub fn unset<'i>(pos: ::pest::Position<'i>,
state: &mut ::pest::ParserState<'i, Rule>)
->
::std::result::Result<::pest::Position<'i>,
::pest::Position<'i>> {
state.atomic(::pest::Atomicity::CompoundAtomic,
move |state|
{
state.rule(Rule::unset, pos,
|state, pos|
{
state.sequence(move
|state|
{
pos.sequence(|pos|
{
pos.match_string("unset").and_then(|pos|
{
state.sequence(move
|state|
{
pos.sequence(|pos|
{
state.sequence(move
|state|
{
pos.sequence(|pos|
{
self::space(pos,
state).and_then(|pos|
{
pos.repeat(|pos|
{
self::space(pos,
state)
})
})
})
}).and_then(|pos|
{
self::name(pos,
state)
})
})
})
}).and_then(|pos|
{
pos.repeat(|pos|
{
self::space(pos,
state)
})
})
})
})
})
})
}
#[inline]
#[allow(unused_variables)]
pub fn compound<'i>(pos: ::pest::Position<'i>,
state: &mut ::pest::ParserState<'i, Rule>)
->
::std::result::Result<::pest::Position<'i>,
::pest::Position<'i>> {
self::config_item(pos,
state).or_else(|pos|
{
self::section(pos,
state).or_else(|pos|
{
self::comment_line(pos,
state)
}).or_else(|pos|
{
self::directive(pos,
state)
})
}).or_else(|pos|
{
self::blank_line(pos,
state)
})
}
#[inline]
#[allow(unused_variables)]
pub fn file<'i>(pos: ::pest::Position<'i>,
state: &mut ::pest::ParserState<'i, Rule>)
->
::std::result::Result<::pest::Position<'i>,
::pest::Position<'i>> {
state.sequence(move |state|
{
pos.sequence(|pos|
{
self::soi(pos,
state).and_then(|pos|
{
self::skip(pos,
state)
}).and_then(|pos|
{
state.sequence(move
|state|
{
pos.sequence(|pos|
{
self::compound(pos,
state).and_then(|pos|
{
self::skip(pos,
state)
}).and_then(|pos|
{
state.sequence(move
|state|
{
pos.sequence(|pos|
{
pos.optional(|pos|
{
state.sequence(move
|state|
{
pos.sequence(|pos|
{
self::new_line(pos,
state).and_then(|pos|
{
self::skip(pos,
state)
}).and_then(|pos|
{
self::compound(pos,
state)
})
})
}).and_then(|pos|
{
pos.repeat(|pos|
{
state.sequence(move
|state|
{
pos.sequence(|pos|
{
self::skip(pos,
state).and_then(|pos|
{
state.sequence(move
|state|
{
pos.sequence(|pos|
{
self::new_line(pos,
state).and_then(|pos|
{
self::skip(pos,
state)
}).and_then(|pos|
{
self::compound(pos,
state)
})
})
})
})
})
})
})
})
})
})
})
})
})
})
}).and_then(|pos|
{
self::skip(pos,
state)
}).and_then(|pos|
{
self::eoi(pos,
state)
})
})
})
}
#[inline]
fn soi<'i>(pos: ::pest::Position<'i>,
_: &mut ::pest::ParserState<'i, Rule>)
->
::std::result::Result<::pest::Position<'i>,
::pest::Position<'i>> {
pos.at_start()
}
#[inline]
fn eoi<'i>(pos: ::pest::Position<'i>,
_: &mut ::pest::ParserState<'i, Rule>)
->
::std::result::Result<::pest::Position<'i>,
::pest::Position<'i>> {
pos.at_end()
}
#[inline]
fn any<'i>(pos: ::pest::Position<'i>,
_: &mut ::pest::ParserState<'i, Rule>)
->
::std::result::Result<::pest::Position<'i>,
::pest::Position<'i>> {
pos.skip(1)
}
#[inline]
#[allow(dead_code)]
fn skip<'i>(pos: ::pest::Position<'i>,
_: &mut ::pest::ParserState<'i, Rule>)
->
::std::result::Result<::pest::Position<'i>,
::pest::Position<'i>> {
Ok(pos)
}
}
::pest::state(input,
move |mut state, pos|
{
match rule {
Rule::new_line =>
rules::new_line(pos, &mut state),
Rule::space =>
rules::space(pos, &mut state),
Rule::comment_start =>
rules::comment_start(pos, &mut state),
Rule::line => rules::line(pos, &mut state),
Rule::name => rules::name(pos, &mut state),
Rule::value =>
rules::value(pos, &mut state),
Rule::equal_sign =>
rules::equal_sign(pos, &mut state),
Rule::config_item =>
rules::config_item(pos, &mut state),
Rule::left_bracket =>
rules::left_bracket(pos, &mut state),
Rule::right_bracket =>
rules::right_bracket(pos, &mut state),
Rule::section =>
rules::section(pos, &mut state),
Rule::comment_line =>
rules::comment_line(pos, &mut state),
Rule::blank_line =>
rules::blank_line(pos, &mut state),
Rule::directive =>
rules::directive(pos, &mut state),
Rule::include =>
rules::include(pos, &mut state),
Rule::unset =>
rules::unset(pos, &mut state),
Rule::compound =>
rules::compound(pos, &mut state),
Rule::file => rules::file(pos, &mut state),
}
})
}
}

View File

@ -0,0 +1,47 @@
// "comment" and "whitespace" have special meaning in pest. They cause more
// trouble than benefit here. Therfore, avoid them.
// See https://pest-parser.github.io/book/grammars/syntax.html
//
// Names are used in error messages. Certain rules are used to improved UX.
// For example,
//
// equal_sign = { space* ~ "=" ~ space* }
// config_item = { name ~ equal_sign ~ value }
//
// is more friendly than:
//
// config_item = { name ~ space* ~ "=" ~ space* ~ value }
//
// because the former shows "expect space", while the latter shows
// "expect equal_sign", for the following illegal content:
//
// [section]
// lack-of-equal-sign
// ^ error shows here
//
// Same applies to "directive" and "bracket"s.
new_line = { "\n" | "\r" }
space = { " " | "\t" }
comment_start = { ("#" | ";") }
line = @{ (!new_line ~ any)* }
name = @{ (!(space | new_line | comment_start | "=" | "[" | "]" | "%") ~ any)+ }
value = @{ line ~ (new_line ~ space+ ~ line)* }
equal_sign = @{ space* ~ "=" ~ space* }
config_item = ${ name ~ equal_sign ~ value }
left_bracket = @{ "[" }
right_bracket = @{ "]" }
section = ${ left_bracket ~ name ~ right_bracket }
comment_line = @{ comment_start ~ line }
blank_line = @{ space* }
directive = ${ "%" ~ (include | unset) }
include = ${ "include" ~ space+ ~ line }
unset = ${ "unset" ~ space+ ~ name ~ space* }
compound = _{ (config_item | section | comment_line | directive | blank_line) }
file = _{ soi ~ compound ~ (new_line ~ compound)* ~ eoi }

View File

@ -11,6 +11,10 @@
> z = 3
> [a]
> %unset y
> %include broken.rc
> EOF
$ cat >> broken.rc << EOF
> %not-implemented
> EOF
@ -27,7 +31,7 @@
section [c] has names ['x']
section [d] has names ['y', 'x']
>>> print("errors: %r" % cfg.errors())
errors: ['"$TESTTMP/b.rc": parse error around byte 37: unknown instruction']
errors: ['"$TESTTMP/broken.rc":\n --> 1:2\n |\n1 | %not-implemented\n | ^---\n |\n = expected include or unset']
>>> for item in ["a.x", "a.y", "b.z", "c.x", "d.x", "d.y", "e.x"]:
... section, name = item.split(".")
... print("%s = %r" % (item, cfg.get(section, name)))
@ -35,7 +39,7 @@
a.x = '1'
sources: [('1', ('$TESTTMP/a.rc', 6, 7), 'readpath')]
a.y = None
sources: [('2', ('$TESTTMP/a.rc', 10, 11), 'readpath'), (None, ('$TESTTMP/b.rc', 28, 36), 'readpath')]
sources: [('2', ('$TESTTMP/a.rc', 10, 11), 'readpath'), (None, ('$TESTTMP/b.rc', 29, 36), 'readpath')]
b.z = '3'
sources: [('3', ('$TESTTMP/b.rc', 22, 23), 'readpath')]
c.x = '1'