Back out "[hg] configparser: inline the pest grammar"

Summary:
The pest codegen has some non-determinism (HashMap) that breaks buck build.

According to jsgf:

  This basically indicates some kind of non-determinism in the build. They're a pain, so I'd been hoping that we'd got them all.

  Yeah, pest is generating non-deterministic output, which will screw things up badly. The problem is:

  https://github.com/pest-parser/pest/blob/master/generator/src/generator.rs#L92-L93

    fn generate_builtin_rules() -> HashMap<&'static str, TokenStream> {
        let mut builtins = HashMap::new();

  is putting builtins into a `HashMap`, then:

  https://github.com/pest-parser/pest/blob/master/generator/src/generator.rs#L46

    rules.extend(defaults.into_iter().map(|name| builtins[name].clone()));

  emitting them in hashmap order. It needs to use a `BTreeMap` to make sure they're in a consistent order.

  (I didn't check whether there are other instances of this.)

Reviewed By: jsgf

Differential Revision: D17063573

fbshipit-source-id: c03adc3c6d50bd09ffbd44ca8dc7bc51d6cad28d
This commit is contained in:
Jun Wu 2019-08-26 18:29:36 -07:00 committed by Facebook Github Bot
parent 3e4443737f
commit 6a304509f0
3 changed files with 818 additions and 61 deletions

View File

@ -0,0 +1,117 @@
#!/usr/bin/env python3
# Copyright 2018 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
import hashlib
import os
import re
import subprocess
import tempfile
dirname = os.path.dirname
crate_root = dirname(dirname(os.path.realpath(__file__)))
def expand_parser(pest):
"""expand the "#[derive(Parser)] part"""
with tempfile.TemporaryDirectory() as tmp_root:
# Copy Cargo.toml, without [dev-dependencies] and [[bench]]
with open(os.path.join(tmp_root, "Cargo.toml"), "w") as f:
content = open(os.path.join(crate_root, "Cargo.toml")).read()
content = content.split("[dev-dependencies]")[0]
f.write(content)
# Copy spec.pest
os.mkdir(os.path.join(tmp_root, "src"))
with open(os.path.join(tmp_root, "src", "spec.pest"), "wb") as f:
f.write(pest)
# Create a minimal project which is used to expand ConfigParser
with open(os.path.join(tmp_root, "src", "lib.rs"), "w") as f:
f.write(
"""
#[derive(Parser)]
#[grammar = "spec.pest"]
pub(crate) struct ConfigParser;
"""
)
# Run cargo-expand
env = os.environ.copy()
env["RUSTFMT"] = "false"
expanded = subprocess.check_output(
["cargo-expand", "--release"], env=env, cwd=tmp_root
)
expanded = expanded.decode("utf-8")
# Keep only interesting parts
rule_struct = re.search("^pub enum Rule [^}]*^\}", expanded, re.S + re.M).group(
0
)
parser_impl = re.search(
"^impl ::pest::Parser<Rule> for ConfigParser .*^\}", expanded, re.S + re.M
).group(0)
code = f"""
#[allow(dead_code, non_camel_case_types)]
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
{rule_struct}
pub(crate) struct ConfigParser;
{parser_impl}
"""
return code
def write_generated_parser():
spec_pest_path = os.path.join(crate_root, "src", "spec.pest")
spec = open(spec_pest_path, "rb").read()
checksum = hashlib.sha1(spec).hexdigest()
output_path = os.path.join(crate_root, "src", "parser.rs")
try:
old_checksum = re.search(
"pest-checksum: (.*)\.", open(output_path).read()
).group(1)
if old_checksum == checksum:
print(
"No need to update %s because %s is not changed."
% (output_path, spec_pest_path)
)
return
except Exception:
pass
with open(output_path, "w") as f:
code = expand_parser(spec)
f.write(
f"""
// Generated by generate_parser.py. Do not edit manually. Instead, edit
// spec.pest, then run generate_parser.py (require cargo-expand).
//
// This file should really be just 3 lines:
//
// #[derive(Parser)]
// #[grammar = "spec.pest"]
// pub(crate) struct ConfigParser;
//
// However, `#[grammar = "spec.pest"]` does not play well with Buck build,
// because pest_derive cannot find "spec.pest" in buck build environment.
// Therefore this file is {'detareneg@'[::-1]}. {"tnil-on@"[::-1]}.
// pest-checksum: {checksum}.
{code}"""
)
if __name__ == "__main__":
write_generated_parser()

View File

@ -3,72 +3,649 @@
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
use pest_derive::Parser;
#[derive(Parser)]
#[grammar_inline = r##"
// "comment" and "whitespace" have special meaning in pest. They cause more
// trouble than benefit here. Therfore, avoid them.
// See https://pest-parser.github.io/book/grammars/syntax.html
// Generated by generate_parser.py. Do not edit manually. Instead, edit
// spec.pest, then run generate_parser.py (require cargo-expand).
//
// Names are used in error messages. Certain rules are used to improved UX.
// For example,
// This file should really be just 3 lines:
//
// equal_sign = { space* ~ "=" ~ space* }
// config_item = { name ~ equal_sign ~ value }
// #[derive(Parser)]
// #[grammar = "spec.pest"]
// pub(crate) struct ConfigParser;
//
// is more friendly than:
//
// config_item = { name ~ space* ~ "=" ~ space* ~ value }
//
// because the former shows "expect space", while the latter shows
// "expect equal_sign", for the following illegal content:
//
// [section]
// lack-of-equal-sign
// ^ error shows here
//
// Same applies to "directive" and "bracket"s.
// However, `#[grammar = "spec.pest"]` does not play well with Buck build,
// because pest_derive cannot find "spec.pest" in buck build environment.
// Therefore this file is @generated. @no-lint.
// pest-checksum: af79d1287503f19c331643cd2e28d30f6312592f.
new_line = { "\n" | "\r\n" }
space = { " " | "\t" }
comment_start = { ("#" | ";") }
#[allow(dead_code, non_camel_case_types)]
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum Rule {
EOI,
new_line,
space,
comment_start,
line,
value,
equal_sign,
config_name,
config_item,
left_bracket,
right_bracket,
section_name,
section,
comment_line,
blank_line,
directive,
include,
unset,
compound,
file,
}
line = @{ (!new_line ~ ANY)* }
value = ${ line ~ (new_line ~ space+ ~ line)* }
equal_sign = @{ "=" ~ space* }
// Excluding special prefixes explicitly from config_name affects error
// messages. For example:
//
// []
// ^ expect section_name (with "[" excluded)
// ^ except equal_sign (without "[" excluded)
//
// %unknown
// ^ expect unset or include (with "%" excluded)
// ^ expect equal_sign (without "%" excluded)
//
// The "expect equal_sign" version is less friendly.
config_name = @{ !("[" | "=" | "%" | space | comment_start | new_line) ~ ANY ~ (!("=" | new_line) ~ ANY)* }
config_item = ${ config_name ~ equal_sign ~ value }
left_bracket = @{ "[" }
right_bracket = @{ "]" }
section_name = @{ (!("]" | new_line) ~ ANY)+ }
section = ${ left_bracket ~ section_name ~ right_bracket ~ space* }
comment_line = @{ comment_start ~ line }
blank_line = @{ space* }
directive = ${ "%" ~ (include | unset) }
include = ${ "include" ~ space+ ~ line }
unset = ${ "unset" ~ space+ ~ config_name ~ space* }
compound = _{ (config_item | section | comment_line | directive | blank_line ) }
file = _{ SOI ~ compound ~ (new_line ~ compound)* ~ EOI }
"##]
pub(crate) struct ConfigParser;
impl ::pest::Parser<Rule> for ConfigParser {
fn parse<'i>(rule: Rule, input: &'i str)
->
::std::result::Result<::pest::iterators::Pairs<'i, Rule>,
::pest::error::Error<Rule>> {
mod rules {
pub mod hidden {
use super::super::Rule;
#[inline]
#[allow(dead_code, non_snake_case, unused_variables)]
pub fn skip(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
Ok(state)
}
}
pub mod visible {
use super::super::Rule;
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn new_line(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
state.rule(Rule::new_line,
|state|
{
state.match_string("\n").or_else(|state|
{
state.match_string("\r\n")
})
})
}
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn space(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
state.rule(Rule::space,
|state|
{
state.match_string(" ").or_else(|state|
{
state.match_string("\t")
})
})
}
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn comment_start(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
state.rule(Rule::comment_start,
|state|
{
state.match_string("#").or_else(|state|
{
state.match_string(";")
})
})
}
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn line(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
state.rule(Rule::line,
|state|
{
state.atomic(::pest::Atomicity::Atomic,
|state|
{
state.repeat(|state|
{
state.sequence(|state|
{
state.lookahead(false,
|state|
{
self::new_line(state)
}).and_then(|state|
{
self::ANY(state)
})
})
})
})
})
}
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn value(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
state.atomic(::pest::Atomicity::CompoundAtomic,
|state|
{
state.rule(Rule::value,
|state|
{
state.sequence(|state|
{
self::line(state).and_then(|state|
{
state.repeat(|state|
{
state.sequence(|state|
{
self::new_line(state).and_then(|state|
{
state.sequence(|state|
{
self::space(state).and_then(|state|
{
state.repeat(|state|
{
self::space(state)
})
})
})
}).and_then(|state|
{
self::line(state)
})
})
})
})
})
})
})
}
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn equal_sign(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
state.rule(Rule::equal_sign,
|state|
{
state.atomic(::pest::Atomicity::Atomic,
|state|
{
state.sequence(|state|
{
state.match_string("=").and_then(|state|
{
state.repeat(|state|
{
self::space(state)
})
})
})
})
})
}
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn config_name(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
state.rule(Rule::config_name,
|state|
{
state.atomic(::pest::Atomicity::Atomic,
|state|
{
state.sequence(|state|
{
state.lookahead(false,
|state|
{
state.match_string("[").or_else(|state|
{
state.match_string("=")
}).or_else(|state|
{
state.match_string("%")
}).or_else(|state|
{
self::space(state)
}).or_else(|state|
{
self::comment_start(state)
}).or_else(|state|
{
self::new_line(state)
})
}).and_then(|state|
{
self::ANY(state)
}).and_then(|state|
{
state.repeat(|state|
{
state.sequence(|state|
{
state.lookahead(false,
|state|
{
state.match_string("=").or_else(|state|
{
self::new_line(state)
})
}).and_then(|state|
{
self::ANY(state)
})
})
})
})
})
})
})
}
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn config_item(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
state.atomic(::pest::Atomicity::CompoundAtomic,
|state|
{
state.rule(Rule::config_item,
|state|
{
state.sequence(|state|
{
self::config_name(state).and_then(|state|
{
self::equal_sign(state)
}).and_then(|state|
{
self::value(state)
})
})
})
})
}
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn left_bracket(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
state.rule(Rule::left_bracket,
|state|
{
state.atomic(::pest::Atomicity::Atomic,
|state|
{
state.match_string("[")
})
})
}
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn right_bracket(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
state.rule(Rule::right_bracket,
|state|
{
state.atomic(::pest::Atomicity::Atomic,
|state|
{
state.match_string("]")
})
})
}
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn section_name(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
state.rule(Rule::section_name,
|state|
{
state.atomic(::pest::Atomicity::Atomic,
|state|
{
state.sequence(|state|
{
state.sequence(|state|
{
state.lookahead(false,
|state|
{
state.match_string("]").or_else(|state|
{
self::new_line(state)
})
}).and_then(|state|
{
self::ANY(state)
})
}).and_then(|state|
{
state.repeat(|state|
{
state.sequence(|state|
{
state.lookahead(false,
|state|
{
state.match_string("]").or_else(|state|
{
self::new_line(state)
})
}).and_then(|state|
{
self::ANY(state)
})
})
})
})
})
})
})
}
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn section(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
state.atomic(::pest::Atomicity::CompoundAtomic,
|state|
{
state.rule(Rule::section,
|state|
{
state.sequence(|state|
{
self::left_bracket(state).and_then(|state|
{
self::section_name(state)
}).and_then(|state|
{
self::right_bracket(state)
}).and_then(|state|
{
state.repeat(|state|
{
self::space(state)
})
})
})
})
})
}
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn comment_line(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
state.rule(Rule::comment_line,
|state|
{
state.atomic(::pest::Atomicity::Atomic,
|state|
{
state.sequence(|state|
{
self::comment_start(state).and_then(|state|
{
self::line(state)
})
})
})
})
}
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn blank_line(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
state.rule(Rule::blank_line,
|state|
{
state.atomic(::pest::Atomicity::Atomic,
|state|
{
state.repeat(|state|
{
self::space(state)
})
})
})
}
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn directive(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
state.atomic(::pest::Atomicity::CompoundAtomic,
|state|
{
state.rule(Rule::directive,
|state|
{
state.sequence(|state|
{
state.match_string("%").and_then(|state|
{
self::include(state).or_else(|state|
{
self::unset(state)
})
})
})
})
})
}
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn include(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
state.atomic(::pest::Atomicity::CompoundAtomic,
|state|
{
state.rule(Rule::include,
|state|
{
state.sequence(|state|
{
state.match_string("include").and_then(|state|
{
state.sequence(|state|
{
self::space(state).and_then(|state|
{
state.repeat(|state|
{
self::space(state)
})
})
})
}).and_then(|state|
{
self::line(state)
})
})
})
})
}
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn unset(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
state.atomic(::pest::Atomicity::CompoundAtomic,
|state|
{
state.rule(Rule::unset,
|state|
{
state.sequence(|state|
{
state.match_string("unset").and_then(|state|
{
state.sequence(|state|
{
self::space(state).and_then(|state|
{
state.repeat(|state|
{
self::space(state)
})
})
})
}).and_then(|state|
{
self::config_name(state)
}).and_then(|state|
{
state.repeat(|state|
{
self::space(state)
})
})
})
})
})
}
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn compound(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
self::config_item(state).or_else(|state|
{
self::section(state)
}).or_else(|state|
{
self::comment_line(state)
}).or_else(|state|
{
self::directive(state)
}).or_else(|state|
{
self::blank_line(state)
})
}
#[inline]
#[allow(non_snake_case, unused_variables)]
pub fn file(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
state.sequence(|state|
{
self::SOI(state).and_then(|state|
{
super::hidden::skip(state)
}).and_then(|state|
{
self::compound(state)
}).and_then(|state|
{
super::hidden::skip(state)
}).and_then(|state|
{
state.sequence(|state|
{
state.optional(|state|
{
state.sequence(|state|
{
self::new_line(state).and_then(|state|
{
super::hidden::skip(state)
}).and_then(|state|
{
self::compound(state)
})
}).and_then(|state|
{
state.repeat(|state|
{
state.sequence(|state|
{
super::hidden::skip(state).and_then(|state|
{
state.sequence(|state|
{
self::new_line(state).and_then(|state|
{
super::hidden::skip(state)
}).and_then(|state|
{
self::compound(state)
})
})
})
})
})
})
})
})
}).and_then(|state|
{
super::hidden::skip(state)
}).and_then(|state|
{
self::EOI(state)
})
})
}
#[inline]
#[allow(dead_code, non_snake_case, unused_variables)]
pub fn SOI(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
state.start_of_input()
}
#[inline]
#[allow(dead_code, non_snake_case, unused_variables)]
pub fn EOI(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
state.rule(Rule::EOI, |state| state.end_of_input())
}
#[inline]
#[allow(dead_code, non_snake_case, unused_variables)]
pub fn ANY(state: Box<::pest::ParserState<Rule>>)
-> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> {
state.skip(1)
}
}
pub use self::visible::*;
}
::pest::state(input,
|state|
{
match rule {
Rule::new_line => rules::new_line(state),
Rule::space => rules::space(state),
Rule::comment_start =>
rules::comment_start(state),
Rule::line => rules::line(state),
Rule::value => rules::value(state),
Rule::equal_sign =>
rules::equal_sign(state),
Rule::config_name =>
rules::config_name(state),
Rule::config_item =>
rules::config_item(state),
Rule::left_bracket =>
rules::left_bracket(state),
Rule::right_bracket =>
rules::right_bracket(state),
Rule::section_name =>
rules::section_name(state),
Rule::section => rules::section(state),
Rule::comment_line =>
rules::comment_line(state),
Rule::blank_line =>
rules::blank_line(state),
Rule::directive => rules::directive(state),
Rule::include => rules::include(state),
Rule::unset => rules::unset(state),
Rule::compound => rules::compound(state),
Rule::file => rules::file(state),
Rule::EOI => rules::EOI(state),
}
})
}
}

View File

@ -0,0 +1,63 @@
// "comment" and "whitespace" have special meaning in pest. They cause more
// trouble than benefit here. Therfore, avoid them.
// See https://pest-parser.github.io/book/grammars/syntax.html
//
// Names are used in error messages. Certain rules are used to improved UX.
// For example,
//
// equal_sign = { space* ~ "=" ~ space* }
// config_item = { name ~ equal_sign ~ value }
//
// is more friendly than:
//
// config_item = { name ~ space* ~ "=" ~ space* ~ value }
//
// because the former shows "expect space", while the latter shows
// "expect equal_sign", for the following illegal content:
//
// [section]
// lack-of-equal-sign
// ^ error shows here
//
// Same applies to "directive" and "bracket"s.
new_line = { "\n" | "\r\n" }
space = { " " | "\t" }
comment_start = { ("#" | ";") }
line = @{ (!new_line ~ ANY)* }
value = ${ line ~ (new_line ~ space+ ~ line)* }
equal_sign = @{ "=" ~ space* }
// Excluding special prefixes explicitly from config_name affects error
// messages. For example:
//
// []
// ^ expect section_name (with "[" excluded)
// ^ except equal_sign (without "[" excluded)
//
// %unknown
// ^ expect unset or include (with "%" excluded)
// ^ expect equal_sign (without "%" excluded)
//
// The "expect equal_sign" version is less friendly.
config_name = @{ !("[" | "=" | "%" | space | comment_start | new_line) ~ ANY ~ (!("=" | new_line) ~ ANY)* }
config_item = ${ config_name ~ equal_sign ~ value }
left_bracket = @{ "[" }
right_bracket = @{ "]" }
section_name = @{ (!("]" | new_line) ~ ANY)+ }
section = ${ left_bracket ~ section_name ~ right_bracket ~ space* }
comment_line = @{ comment_start ~ line }
blank_line = @{ space* }
directive = ${ "%" ~ (include | unset) }
include = ${ "include" ~ space+ ~ line }
unset = ${ "unset" ~ space+ ~ config_name ~ space* }
compound = _{ (config_item | section | comment_line | directive | blank_line ) }
file = _{ SOI ~ compound ~ (new_line ~ compound)* ~ EOI }