From f7d4ef546aa43a3cb084264bc69a8b9d7643ef95 Mon Sep 17 00:00:00 2001
From: Ara Adkins <iamrecursion@users.noreply.github.com>
Date: Tue, 16 Jun 2020 17:18:11 +0100
Subject: [PATCH] Prepare the repo for working on rust code (#841)

---
 .cargo/config                          |   4 +
 .github/PULL_REQUEST_TEMPLATE.md       |   2 +-
 .github/settings.yml                   |   2 +-
 .github/workflows/rust.yml             |  55 ++++
 .github/workflows/scala.yml            |   6 +-
 .gitignore                             |  40 +--
 .rustfmt.toml                          |  33 +++
 Cargo.toml                             |  25 ++
 docs/README.md                         |   2 +-
 docs/parser/README.md                  |  25 ++
 docs/parser/tech-analysis.md           |  68 +++++
 parser/flexer/Cargo.toml               |  27 ++
 parser/flexer/README.md                |   4 +
 parser/flexer/build.rs                 |   1 +
 parser/flexer/src/automata.rs          |   8 +
 parser/flexer/src/automata/alphabet.rs |  61 +++++
 parser/flexer/src/automata/dfa.rs      | 156 +++++++++++
 parser/flexer/src/automata/nfa.rs      | 365 +++++++++++++++++++++++++
 parser/flexer/src/automata/pattern.rs  | 164 +++++++++++
 parser/flexer/src/automata/state.rs    | 121 ++++++++
 parser/flexer/src/data.rs              |   3 +
 parser/flexer/src/data/matrix.rs       |  55 ++++
 parser/flexer/src/group.rs             | 222 +++++++++++++++
 parser/flexer/src/group/rule.rs        |  39 +++
 parser/flexer/src/lib.rs               |  17 ++
 parser/flexer/src/parser.rs            |  15 +
 26 files changed, 1482 insertions(+), 38 deletions(-)
 create mode 100644 .cargo/config
 create mode 100644 .github/workflows/rust.yml
 create mode 100644 .rustfmt.toml
 create mode 100644 Cargo.toml
 create mode 100644 docs/parser/README.md
 create mode 100644 docs/parser/tech-analysis.md
 create mode 100644 parser/flexer/Cargo.toml
 create mode 100644 parser/flexer/README.md
 create mode 100644 parser/flexer/build.rs
 create mode 100644 parser/flexer/src/automata.rs
 create mode 100644 parser/flexer/src/automata/alphabet.rs
 create mode 100644 parser/flexer/src/automata/dfa.rs
 create mode 100644 parser/flexer/src/automata/nfa.rs
 create mode 100644 parser/flexer/src/automata/pattern.rs
 create mode 100644 parser/flexer/src/automata/state.rs
 create mode 100644 parser/flexer/src/data.rs
 create mode 100644 parser/flexer/src/data/matrix.rs
 create mode 100644 parser/flexer/src/group.rs
 create mode 100644 parser/flexer/src/group/rule.rs
 create mode 100644 parser/flexer/src/lib.rs
 create mode 100644 parser/flexer/src/parser.rs

diff --git a/.cargo/config b/.cargo/config
new file mode 100644
index 0000000000..c7960922a8
--- /dev/null
+++ b/.cargo/config
@@ -0,0 +1,4 @@
+
+[build]
+target-dir = "target/rust/"
+rustflags = []
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index aba880a7b1..143a8d8062 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -14,5 +14,5 @@
 Please include the following checklist in your PR:
 
 - [ ] The documentation has been updated if necessary.
-- [ ] All code conforms to the [Scala](https://github.com/luna/enso/blob/main/docs/style-guide/scala.md) and [Java](https://github.com/luna/enso/blob/main/docs/style-guide/java.md) style guides.
+- [ ] All code conforms to the [Scala](https://github.com/luna/enso/blob/main/docs/style-guide/scala.md), [Java](https://github.com/luna/enso/blob/main/docs/style-guide/java.md), and [Rust](https://github.com/luna/enso/blob/main/docs/style-guide/rust.md) style guides.
 - [ ] All code has been tested where possible.
diff --git a/.github/settings.yml b/.github/settings.yml
index d265643818..055a73152e 100644
--- a/.github/settings.yml
+++ b/.github/settings.yml
@@ -204,7 +204,7 @@ branches:
       required_status_checks:
         # Require branches to be up to date before merging.
         strict: true
-        contexts: ["Test (macOS-latest)", "Test (ubuntu-latest)", "license/cla"]
+        contexts: ["Test Engine (macOS-latest)", "Test Engine (ubuntu-latest)", "Test Parser (macOS-latest)", "Test Parser (ubuntu-latest)", " Test Parser (windows-latest) ", "license/cla"]
       enforce_admins: null
       restrictions: null
 
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
new file mode 100644
index 0000000000..5244f1bc7a
--- /dev/null
+++ b/.github/workflows/rust.yml
@@ -0,0 +1,55 @@
+name: Parser CI
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ "*" ]
+
+env:
+  wasmpackVersion: 0.8.1
+
+jobs:
+  test:
+    name: Test Parser
+    runs-on: ${{ matrix.os }}
+    timeout-minutes: 10
+    strategy:
+      matrix:
+        os: [macOS-latest, ubuntu-latest, windows-latest]
+      fail-fast: false
+    steps:
+      - name: Checkout Parser Sources
+        uses: actions/checkout@v2
+
+      # Install Tooling
+      - name: Install Rust
+        uses: actions-rs/toolchain@v1
+        with:
+          toolchain: nightly-2020-06-09
+          override: true
+      - name: Install wasm-pack
+        uses: actions-rs/cargo@v1
+        with:
+          command: install
+          args: wasm-pack --version ${{ env.wasmpackVersion }}
+
+      # Caches
+      - name: Cache Cargo Registry
+        uses: actions/cache@v2
+        with:
+          path: ~/.cargo/registry
+          key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**Cargo.toml') }}
+          restore-keys: ${{ runner.os }}-cargo-registry
+      - name: Cache Cargo Test
+        uses: actions/cache@v2
+        with:
+          path: ./target/rust
+          key: ${{ runner.os }}-cargo-build-${{ hashFiles('**Cargo.toml') }}
+          restore-keys: ${{ runner.os }}-cargo-build
+
+      # Tests
+      - name: Test Parser
+        uses: actions-rs/cargo@v1
+        with:
+          command: test
diff --git a/.github/workflows/scala.yml b/.github/workflows/scala.yml
index a79d7d8937..38c5120699 100644
--- a/.github/workflows/scala.yml
+++ b/.github/workflows/scala.yml
@@ -1,4 +1,4 @@
-name: Enso CI
+name: Engine CI
 
 on:
   push:
@@ -27,7 +27,7 @@ env:
 jobs:
   # This job is responsible for testing the codebase
   test:
-    name: Test
+    name: Test Engine
     runs-on: ${{ matrix.os }}
     timeout-minutes: 30
     strategy:
@@ -93,7 +93,7 @@ jobs:
 
   # This job is responsible for building the artifacts
   build:
-    name: Build
+    name: Build Engine
     runs-on: ubuntu-latest
     timeout-minutes: 30
     steps:
diff --git a/.gitignore b/.gitignore
index 10afaf8320..6fbcb5fa43 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,15 +5,9 @@
 
 graal_dumps
 
-##########
-## Java ##
-##########
-
-*.class
-
-###########
-## Scala ##
-###########
+#########
+## JVM ##
+#########
 
 graal_dumps/
 target/
@@ -25,6 +19,8 @@ target/
 ##########
 
 Cargo.lock
+**/*.rs.bk
+wasm-pack.log
 
 #############
 ## Haskell ##
@@ -32,28 +28,12 @@ Cargo.lock
 
 dist
 cabal-dev
-*.o
-*.hi
-*.chi
-*.chs.h
-*.dyn_o
-*.dyn_hi
-.hpc
-.hsenv
-.cabal-sandbox/
-cabal.sandbox.config
-*.cabal
-*.prof
-*.aux
-*.hp
-*.DS_Store
 .stack-work/
 
 ############
 ## System ##
 ############
 
-# OSX
 .DS_Store
 
 ############
@@ -70,6 +50,7 @@ cabal.sandbox.config
 ######################
 
 .idea/
+.vscode/
 *.swp
 .projections.json
 
@@ -83,6 +64,7 @@ scaladoc/
 #######################
 ## Benchmark Reports ##
 #######################
+
 bench-report.xml
 
 ##############
@@ -97,10 +79,4 @@ bench-report.xml
 #########
 
 .editorconfig
-.bloop
-
-
-#########
-## NPM ##
-#########
-node_modules/
+.bloop/
diff --git a/.rustfmt.toml b/.rustfmt.toml
new file mode 100644
index 0000000000..7f37c27ec4
--- /dev/null
+++ b/.rustfmt.toml
@@ -0,0 +1,33 @@
+
+# General Configuration
+unstable_features = true
+max_width = 80
+error_on_line_overflow = true
+newline_style = "Unix"
+
+# Operators
+binop_separator = "Front"
+
+# Whitespace
+blank_lines_upper_bound = 1
+
+# Code Layout
+brace_style = "SameLineWhere"
+combine_control_expr = true
+empty_item_single_line = true
+fn_single_line = true
+format_strings = true
+inline_attribute_width = 80
+space_before_colon = false
+space_after_colon = false
+type_punctuation_density = "Wide"
+
+# Comments
+comment_width = 80
+wrap_comments = true
+format_code_in_doc_comments = true
+normalize_comments = true
+
+# Macros
+format_macro_matchers = true
+format_macro_bodies = true
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000000..47897e224a
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,25 @@
+[workspace]
+
+members = [
+    "parser/flexer"
+]
+
+[profile.dev]
+opt-level = 0
+lto       = false
+debug     = true
+
+[profile.release]
+opt-level = 3
+lto       = true
+debug     = false
+
+[profile.bench]
+opt-level = 3
+lto       = true
+debug     = false
+
+[profile.test]
+opt-level = 0
+lto       = false
+debug     = true
diff --git a/docs/README.md b/docs/README.md
index 4aecb560a1..47a837da0b 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -43,4 +43,4 @@ It is broken up into categories as follows:
 - [**Syntax:**](./syntax) A specification of Enso's syntax.
 - [**Types:**](./types) A specification of Enso's type system and type theory.
 - [**Debugger:**](./debugger) A specification of Enso's debugger.
-
+- [**Parser:**](./parser) Design and specification of the Enso parser.
diff --git a/docs/parser/README.md b/docs/parser/README.md
new file mode 100644
index 0000000000..ecc046ae32
--- /dev/null
+++ b/docs/parser/README.md
@@ -0,0 +1,25 @@
+---
+layout: docs-index
+title: Enso's Parser
+category: summary
+tags: [parser, readme]
+order: 0
+---
+
+# Enso's Parser
+The parser is one of the most crucial components of the Enso runtime in that
+_all_ code that a user writes must be parsed. This means that a good parser is
+fast, responsive, and lightweight; it shouldn't baulk at having thousands of
+lines of code thrown at it.
+
+Enso's parser, however, is very special. In order to support interactive use it
+has to narrow down the scope of a syntax error as much as possible, while still
+providing useful output for the compiler around the rest of the parse errors.
+This feature makes it more complex than many common parsers, so making this work
+while still preserving performance is of paramount importance.
+
+The various components of the parser's design and architecture are described
+below:
+
+- [**Tech Analysis:**](./tech-analysis.md) A brief overview of the reasons for
+  the implementation technologies for the parser.
diff --git a/docs/parser/tech-analysis.md b/docs/parser/tech-analysis.md
new file mode 100644
index 0000000000..07ebca6fb7
--- /dev/null
+++ b/docs/parser/tech-analysis.md
@@ -0,0 +1,68 @@
+---
+layout: developer-doc
+title: Technology Analysis
+category: syntax
+tags: [parser, tech-analysis]
+order: 1
+---
+
+# Parser Technology Analysis
+As the Enso parser has some fairly unique requirements placed upon it, the
+choice of implementation technology is of paramount importance. Choosing the
+correct technology ensures that we can meet all of the requirements placed upon
+the parser.
+
+<!-- MarkdownTOC levels="2,3" autolink="true" -->
+
+- [Technology Requirements for the Parser](#technology-requirements-for-the-parser)
+- [Issues With the Previous Implementation](#issues-with-the-previous-implementation)
+- [Choosing Rust](#choosing-rust)
+    - [Downsides of Rust](#downsides-of-rust)
+
+<!-- /MarkdownTOC -->
+
+## Technology Requirements for the Parser
+As the parser has to work both for the Engine and for the IDE, it has a strange
+set of requirements:
+
+- The implementation language must be able to run on native platforms, as well
+  as in the browser via WASM (not JavaScript due to the marshalling overhead).
+- The implementation language should permit _excellent_ native performance on
+  both native and web platforms, by giving implementers fine-grained control
+  over memory usage.
+- The implementation language must be able to target all primary platforms:
+  macOS, Linux and Windows.
+
+## Issues With the Previous Implementation
+The previous implementation of the parser was implemented in Scala, and had some
+serious issues that have necessitated this rewrite:
+
+- **Performance:** The structures used to implement the parser proved inherently
+  difficult for a JIT to optimise, making performance far worse than expected on
+  the JVM.
+- **ScalaJS Sub-Optimal Code Generation:** The JavaScript generated by ScalaJS
+  was very suboptimal for these structures, making the parser _even_ slower when
+  run in the browser.
+- **JS as a Browser Target:** To transfer textual data between WASM and JS
+  incurs a significant marshalling overhead. As the IDE primarily works with
+  textual operations under the hood, this proved to be a significant slowdown.
+
+## Choosing Rust
+Rust, then, is an obvious choice for the following reasons:
+
+- It can be compiled _natively_ into the IDE binary, providing them with
+  excellent performance.
+- As a native language it can use JNI to directly create JVM objects on the JVM
+  heap, for use by the compiler.
+- As a native language it can be called directly via JNI.
+- There is potential in the future for employing Graal's LLVM bitcode
+  interpreter to execute the parser safely in a non-native context.
+
+### Downsides of Rust
+This is not to say that choosing rust doesn't come with some compromises:
+
+- It significantly complicates the CI pipeline for the engine, as we will have
+  to build native artefacts for use by the runtime itself.
+- As a non-JVM language, the complexity of working with it from Scala and Java
+  is increased. We will need to maintain a full definition of the AST in Scala
+  to permit the compiler to work properly with it.
diff --git a/parser/flexer/Cargo.toml b/parser/flexer/Cargo.toml
new file mode 100644
index 0000000000..9d46d331d2
--- /dev/null
+++ b/parser/flexer/Cargo.toml
@@ -0,0 +1,27 @@
+[package]
+name    = "flexer"
+version = "0.0.1"
+authors = [
+  "Enso Team <enso-dev@enso.org>",
+  "Ara Adkins <ara.adkins@enso.org"
+]
+edition = "2018"
+
+description = "A finite-automata-based lexing engine."
+readme = "README.md"
+homepage = "https://github.com/luna/enso"
+repository = "https://github.com/luna/enso"
+license-file = "../../LICENSE"
+
+keywords = ["lexer", "finite-automata"]
+
+publish = false
+
+[lib]
+name = "flexer"
+crate-type = ["dylib", "rlib"]
+test = true
+bench = true
+
+[dependencies]
+itertools = "0.8"
diff --git a/parser/flexer/README.md b/parser/flexer/README.md
new file mode 100644
index 0000000000..dfb294806b
--- /dev/null
+++ b/parser/flexer/README.md
@@ -0,0 +1,4 @@
+# Flexer
+This library provides a finite-automata-based lexing engine that can flexibly
+tokenize an input stream.
+
diff --git a/parser/flexer/build.rs b/parser/flexer/build.rs
new file mode 100644
index 0000000000..f328e4d9d0
--- /dev/null
+++ b/parser/flexer/build.rs
@@ -0,0 +1 @@
+fn main() {}
diff --git a/parser/flexer/src/automata.rs b/parser/flexer/src/automata.rs
new file mode 100644
index 0000000000..6cec71e732
--- /dev/null
+++ b/parser/flexer/src/automata.rs
@@ -0,0 +1,8 @@
+//! Exports API for construction of Nondeterminist and Deterministic Finite
+//! State Automata.
+
+pub mod alphabet;
+pub mod dfa;
+pub mod nfa;
+pub mod pattern;
+pub mod state;
diff --git a/parser/flexer/src/automata/alphabet.rs b/parser/flexer/src/automata/alphabet.rs
new file mode 100644
index 0000000000..d5f7183d93
--- /dev/null
+++ b/parser/flexer/src/automata/alphabet.rs
@@ -0,0 +1,61 @@
+//! Exports an alphabet (set of all valid input symbols) for Finite State
+//! Automata (NFA and DFA).
+
+use crate::automata::state::Symbol;
+
+use std::collections::BTreeSet;
+use std::ops::RangeInclusive;
+
+// ================
+// === Alphabet ===
+// ================
+
+/// An alphabet describes a set of all the valid input symbols that a given
+/// finite state automata (NFA or DFA) can operate over.
+/// More information at: https://en.wikipedia.org/wiki/Deterministic_finite_automaton
+/// The alphabet is meant to be represented as an interval. That is, if `a` and
+/// `b` are in alphabet, then any symbol from `a..=b` is in alphabet too.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct Alphabet {
+    /// The interval of all valid input symbols. The interval is further
+    /// divided into subintervals (i.e. `[a,z,A,Z]` should be understood as
+    /// `[a..=z,z..=A,A..=Z]`), in order to efficiently encode state
+    /// transitions that trigger not just on one but a whole range of symbols
+    /// (i.e. `a..=z`)
+    pub symbols:BTreeSet<Symbol>,
+}
+
+impl Default for Alphabet {
+    fn default() -> Self {
+        Alphabet {
+            symbols:[Symbol { val:0 }].iter().cloned().collect(),
+        }
+    }
+}
+
+impl Alphabet {
+    /// Inserts a range of symbols into the alphabet.
+    pub fn insert(&mut self, range:RangeInclusive<Symbol>) {
+        // The symbol range is associated with transition in automata. Therefore
+        // we: Mark the symbol with the new transition.
+        self.symbols.insert(Symbol {
+            val:range.start().val,
+        });
+        // Mark the symbol without the new transition.
+        self.symbols.insert(Symbol {
+            val:range.end().val + 1,
+        });
+        // This way each symbol in alphabet corresponds to a unique set of
+        // transitions.
+    }
+}
+
+impl From<Vec<u32>> for Alphabet {
+    fn from(vec:Vec<u32>) -> Self {
+        let mut dict = Self::default();
+        for val in vec {
+            dict.symbols.insert(Symbol { val });
+        }
+        dict
+    }
+}
diff --git a/parser/flexer/src/automata/dfa.rs b/parser/flexer/src/automata/dfa.rs
new file mode 100644
index 0000000000..a55441a94c
--- /dev/null
+++ b/parser/flexer/src/automata/dfa.rs
@@ -0,0 +1,156 @@
+//! Exports the structure for Deterministic Finite Automata.
+
+use crate::automata::alphabet::Alphabet;
+use crate::automata::state;
+use crate::data::matrix::Matrix;
+
+// =====================================
+// === Deterministic Finite Automata ===
+// =====================================
+
+/// Function callback for an arbitrary state of finite automata.
+/// It contains name of Rust procedure that is meant to be executed after
+/// encountering a pattern (declared in `group::Rule.pattern`).
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct Callback {
+    /// TODO[jv] Write better explanation after implementing rust code
+    /// generation. Priority is used during rust code generation.
+    pub priority:usize,
+    /// Name of Rust method that will be called when executing this callback.
+    pub name:String,
+}
+
+/// DFA automata with a set of symbols, states and transitions.
+/// Deterministic Finite Automata is a finite-state machine that accepts or
+/// rejects a given sequence of symbols, by running through a state sequence
+/// uniquely determined by the input symbol sequence.   ___              ___
+/// ___              ___  | 0 | -- 'D' --> | 1 | -- 'F' --> | 2 | -- 'A' --> | 3
+/// |   ‾‾‾              ‾‾‾              ‾‾‾              ‾‾‾
+/// More information at: https://en.wikipedia.org/wiki/Deterministic_finite_automaton
+
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
+pub struct DFA {
+    /// Finite set of all valid input symbols.
+    pub alphabet:Alphabet,
+    /// Transition matrix of deterministic finite state automata.
+    /// It contains next state for each pair of state and input symbol -
+    /// (state,symbol) => new state. For example, a transition matrix for
+    /// automata that accepts string "ABABAB...." would look like this:
+    ///  states
+    /// |       | A | B | <- symbols
+    /// | 0     | 1 | - |
+    /// | 1     | - | 0 |
+    ///  Where `-` denotes `state::INVALID`.
+    pub links:Matrix<state::Id>,
+    /// Stores callback for each state (if it has one).
+    pub callbacks:Vec<Option<Callback>>,
+}
+
+impl From<Vec<Vec<usize>>> for Matrix<state::Id> {
+    fn from(input:Vec<Vec<usize>>) -> Self {
+        let rows = input.len();
+        let columns = if rows == 0 { 0 } else { input[0].len() };
+        let mut matrix = Self::new(rows, columns);
+        for row in 0..rows {
+            for column in 0..columns {
+                matrix[(row, column)] = state::Id {
+                    id:input[row][column],
+                };
+            }
+        }
+        matrix
+    }
+}
+
+// ===========
+// == Tests ==
+// ===========
+
+#[cfg(test)]
+pub mod tests {
+    use super::*;
+    use crate::automata::state;
+
+    const I:usize = state::INVALID.id;
+
+    /// DFA automata that accepts newline '\n'.
+    pub fn newline() -> DFA {
+        DFA {
+            alphabet:Alphabet::from(vec![10, 11]),
+            links:Matrix::from(vec![vec![I, 1, I], vec![I, I, I]]),
+            callbacks:vec![
+                None,
+                Some(Callback {
+                    priority:2,
+                    name:"group0_rule0".into(),
+                }),
+            ],
+        }
+    }
+
+    /// DFA automata that accepts any letter a..=z.
+    pub fn letter() -> DFA {
+        DFA {
+            alphabet:Alphabet::from(vec![97, 123]),
+            links:Matrix::from(vec![vec![I, 1, I], vec![I, I, I]]),
+            callbacks:vec![
+                None,
+                Some(Callback {
+                    priority:2,
+                    name:"group0_rule0".into(),
+                }),
+            ],
+        }
+    }
+
+    /// DFA automata that accepts any number of spaces ' '.
+    pub fn spaces() -> DFA {
+        DFA {
+            alphabet:Alphabet::from(vec![0, 32, 33]),
+            links:Matrix::from(vec![
+                vec![I, 1, I],
+                vec![I, 2, I],
+                vec![I, 2, I],
+            ]),
+            callbacks:vec![
+                None,
+                Some(Callback {
+                    priority:3,
+                    name:"group0_rule0".into(),
+                }),
+                Some(Callback {
+                    priority:3,
+                    name:"group0_rule0".into(),
+                }),
+            ],
+        }
+    }
+
+    /// DFA automata that accepts one letter a..=z or any many spaces.
+    pub fn letter_and_spaces() -> DFA {
+        DFA {
+            alphabet:Alphabet::from(vec![32, 33, 97, 123]),
+            links:Matrix::from(vec![
+                vec![I, 1, I, 2, I],
+                vec![I, 3, I, I, I],
+                vec![I, I, I, I, I],
+                vec![I, 3, I, I, I],
+            ]),
+            callbacks:vec![
+                None,
+                Some(Callback {
+                    priority:4,
+                    name:"group0_rule1".into(),
+                }),
+                Some(Callback {
+                    priority:4,
+                    name:"group0_rule0".into(),
+                }),
+                Some(Callback {
+                    priority:4,
+                    name:"group0_rule1".into(),
+                }),
+            ],
+        }
+    }
+}
diff --git a/parser/flexer/src/automata/nfa.rs b/parser/flexer/src/automata/nfa.rs
new file mode 100644
index 0000000000..38d9d5c190
--- /dev/null
+++ b/parser/flexer/src/automata/nfa.rs
@@ -0,0 +1,365 @@
+//! Implementation of Nondeterministic Finite Automata and it's conversion to
+//! DFA.
+
+use crate::automata::alphabet::Alphabet;
+use crate::automata::dfa::Callback;
+use crate::automata::dfa::DFA;
+use crate::automata::state;
+use crate::automata::state::Link;
+use crate::automata::state::State;
+use crate::automata::state::Symbol;
+use crate::data::matrix::Matrix;
+
+use crate::automata::pattern::Pattern;
+use itertools::Itertools;
+use std::collections::BTreeSet;
+use std::collections::HashMap;
+use std::ops::RangeInclusive;
+
+// ========================================
+// === Nondeterministic Finite Automata ===
+// ========================================
+
+/// Type alias for a state Id based on set of states.
+/// It is used during NFA -> DFA transformation where multiple states can merge
+/// together, thanks to epsilon links.
+type StateSetId = BTreeSet<state::Id>;
+
+/// NFA automata with a set of symbols, states and transitions.
+/// Nondeterministic Finite Automata is a finite-state machine that accepts or
+/// rejects a given sequence of symbols.
+/// Compared to `DFA`, NFA can transition into multiple new states without
+/// reading any symbol (so called epsilon link / transition),
+///   ___              ___         ___              ___              ___
+///  | 0 | -- 'N' --> | 1 | ----> | 2 | -- 'F' --> | 3 | -- 'A' --> | 4 |
+///   ‾‾‾              ‾‾‾         ‾‾‾              ‾‾‾              ‾‾‾
+/// More information at: https://en.wikipedia.org/wiki/Deterministic_finite_automaton
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
+pub struct NFA {
+    /// Finite set of all valid input symbols.
+    pub alphabet:Alphabet,
+    /// Set of named NFA states with (epsilon) transitions.
+    pub states:Vec<State>,
+}
+
+impl NFA {
+    /// Adds a new state to NFA and returns it's Id.
+    pub fn new_state(&mut self) -> state::Id {
+        let id = self.states.len();
+        self.states.push(State::default());
+        state::Id { id }
+    }
+
+    /// Creates an epsilon transition between two states.
+    /// Whenever the automata happens to be in `source` state  it can
+    /// immediatelly move to `target` state (but does not have to).
+    pub fn connect(&mut self, source:state::Id, target:state::Id) {
+        self.states[source.id].epsilon_links.push(target);
+    }
+
+    /// Creates an ordinary transition (for a range of symbols) between two
+    /// states. If any symbol from such range happens to be on input when
+    /// the automata  is in `source` state, it will immediatelly move to
+    /// `target` state.
+    pub fn connect_by(
+        &mut self,
+        source:state::Id,
+        target:state::Id,
+        symbols:&RangeInclusive<Symbol>,
+    ) {
+        self.alphabet.insert(symbols.clone());
+        self.states[source.id].links.push(Link {
+            symbols:symbols.clone(),
+            target,
+        });
+    }
+
+    /// Transforms pattern to NFA.
+    /// The algorithm is based on: https://www.youtube.com/watch?v=RYNN-tb9WxI
+    pub fn new_pattern(
+        &mut self,
+        source:state::Id,
+        pattern:&Pattern,
+    ) -> state::Id {
+        let current = self.new_state();
+        self.connect(source, current);
+        match pattern {
+            Pattern::Range(range) => {
+                let state = self.new_state();
+                self.connect_by(current, state, range);
+                state
+            }
+            Pattern::Many(body) => {
+                let s1 = self.new_state();
+                let s2 = self.new_pattern(s1, body);
+                let s3 = self.new_state();
+                self.connect(current, s1);
+                self.connect(current, s3);
+                self.connect(s2, s3);
+                self.connect(s3, s1);
+                s3
+            }
+            Pattern::And(patterns) => patterns
+                .iter()
+                .fold(current, |s, pat| self.new_pattern(s, pat)),
+            Pattern::Or(patterns) => {
+                let states = patterns
+                    .iter()
+                    .map(|pat| self.new_pattern(current, pat))
+                    .collect_vec();
+                let end = self.new_state();
+                for state in states {
+                    self.connect(state, end);
+                }
+                end
+            }
+        }
+    }
+
+    // === NFA -> DFA ===
+
+    /// Merges states that are connected by epsilon links.
+    /// The algorithm is based on: https://www.youtube.com/watch?v=taClnxU-nao
+    fn eps_matrix(&self) -> Vec<StateSetId> {
+        fn fill_eps_matrix(
+            nfa:&NFA,
+            states:&mut Vec<StateSetId>,
+            computed:&mut Vec<bool>,
+            visited:&mut Vec<bool>,
+            state:state::Id,
+        ) {
+            let mut state_set = StateSetId::new();
+            let mut circular = false;
+            visited[state.id] = true;
+            state_set.insert(state);
+            for &target in &nfa.states[state.id].epsilon_links {
+                if !visited[target.id] {
+                    fill_eps_matrix(nfa, states, computed, visited, target);
+                }
+                state_set.insert(target);
+                state_set.extend(states[target.id].iter());
+                if !computed[target.id] {
+                    circular = true
+                }
+            }
+            if !circular {
+                computed[state.id] = true
+            }
+            states[state.id] = state_set;
+        }
+
+        let mut states = vec![StateSetId::new(); self.states.len()];
+        let mut computed = vec![false; self.states.len()];
+        for id in 0..self.states.len() {
+            let mut visited = vec![false; states.len()];
+            fill_eps_matrix(
+                self,
+                &mut states,
+                &mut computed,
+                &mut visited,
+                state::Id { id },
+            );
+        }
+        states
+    }
+
+    /// Computes a transition matrix (state X symbol => state) for NFA.
+    /// Ignores epsilon links.
+    fn nfa_matrix(&self) -> Matrix<state::Id> {
+        let mut matrix =
+            Matrix::new(self.states.len(), self.alphabet.symbols.len());
+
+        for (state_ix, source) in self.states.iter().enumerate() {
+            let targets = source.targets(&self.alphabet);
+            for (voc_ix, &target) in targets.iter().enumerate() {
+                matrix[(state_ix, voc_ix)] = target;
+            }
+        }
+        matrix
+    }
+}
+
+impl From<&NFA> for DFA {
+    /// Transforms NFA into DFA.
+    /// The algorithm is based on: https://www.youtube.com/watch?v=taClnxU-nao
+    fn from(nfa:&NFA) -> Self {
+        let nfa_mat = nfa.nfa_matrix();
+        let eps_mat = nfa.eps_matrix();
+        let mut dfa_mat = Matrix::new(0, nfa.alphabet.symbols.len());
+        let mut dfa_eps_ixs = Vec::<StateSetId>::new();
+        let mut dfa_eps_map = HashMap::<StateSetId, state::Id>::new();
+
+        dfa_eps_ixs.push(eps_mat[0].clone());
+        dfa_eps_map.insert(eps_mat[0].clone(), state::Id { id:0 });
+
+        let mut i = 0;
+        while i < dfa_eps_ixs.len() {
+            dfa_mat.new_row();
+            for voc_ix in 0..nfa.alphabet.symbols.len() {
+                let mut eps_set = StateSetId::new();
+                for &eps_ix in &dfa_eps_ixs[i] {
+                    let tgt = nfa_mat[(eps_ix.id, voc_ix)];
+                    if tgt != state::INVALID {
+                        eps_set.extend(eps_mat[tgt.id].iter());
+                    }
+                }
+                if !eps_set.is_empty() {
+                    dfa_mat[(i, voc_ix)] = match dfa_eps_map.get(&eps_set) {
+                        Some(&id) => id,
+                        None => {
+                            let id = state::Id {
+                                id:dfa_eps_ixs.len(),
+                            };
+                            dfa_eps_ixs.push(eps_set.clone());
+                            dfa_eps_map.insert(eps_set, id);
+                            id
+                        }
+                    };
+                }
+            }
+            i += 1;
+        }
+
+        let mut callbacks = vec![None; dfa_eps_ixs.len()];
+        let priority = dfa_eps_ixs.len();
+        for (dfa_ix, epss) in dfa_eps_ixs.into_iter().enumerate() {
+            let has_name = |&key:&state::Id| nfa.states[key.id].name.is_some();
+            if let Some(eps) = epss.into_iter().find(has_name) {
+                let rule = nfa.states[eps.id].name.as_ref().cloned().unwrap();
+                callbacks[dfa_ix] = Some(Callback {
+                    name:rule,
+                    priority,
+                });
+            }
+        }
+
+        DFA {
+            alphabet:nfa.alphabet.clone(),
+            links:dfa_mat,
+            callbacks,
+        }
+    }
+}
+
+// ===========
+// == Tests ==
+// ===========
+
+#[cfg(test)]
+pub mod tests {
+    extern crate test;
+
+    use crate::automata::dfa;
+
+    use super::*;
+    use test::Bencher;
+
+    /// NFA automata that accepts newline '\n'.
+    pub fn newline() -> NFA {
+        NFA {
+            states:vec![
+                State::from(vec![1]),
+                State::from(vec![(10..=10, 2)]),
+                State::from(vec![3]).named("group0_rule0"),
+                State::default(),
+            ],
+            alphabet:Alphabet::from(vec![10, 11]),
+        }
+    }
+
+    /// NFA automata that accepts any letter a..=z.
+    pub fn letter() -> NFA {
+        NFA {
+            states:vec![
+                State::from(vec![1]),
+                State::from(vec![(97..=122, 2)]),
+                State::from(vec![3]).named("group0_rule0"),
+                State::default(),
+            ],
+            alphabet:Alphabet::from(vec![97, 123]),
+        }
+    }
+
+    /// NFA automata that accepts any number of spaces ' '.
+    pub fn spaces() -> NFA {
+        NFA {
+            states:vec![
+                State::from(vec![1]),
+                State::from(vec![2]),
+                State::from(vec![(32..=32, 3)]),
+                State::from(vec![4]),
+                State::from(vec![5, 8]),
+                State::from(vec![6]),
+                State::from(vec![(32..=32, 7)]),
+                State::from(vec![8]),
+                State::from(vec![5, 9]).named("group0_rule0"),
+                State::default(),
+            ],
+            alphabet:Alphabet::from(vec![0, 32, 33]),
+        }
+    }
+
+    /// NFA automata that accepts one letter a..=z or many spaces ' '.
+    pub fn letter_and_spaces() -> NFA {
+        NFA {
+            states:vec![
+                State::from(vec![1, 3]),
+                State::from(vec![(97..=122, 2)]),
+                State::from(vec![11]).named("group0_rule0"),
+                State::from(vec![4]),
+                State::from(vec![(32..=32, 5)]),
+                State::from(vec![6]),
+                State::from(vec![7, 10]),
+                State::from(vec![8]),
+                State::from(vec![(32..=32, 9)]),
+                State::from(vec![10]),
+                State::from(vec![7, 11]).named("group0_rule1"),
+                State::default(),
+            ],
+            alphabet:Alphabet::from(vec![32, 33, 97, 123]),
+        }
+    }
+
+    #[test]
+    fn test_to_dfa_newline() {
+        assert_eq!(DFA::from(&newline()), dfa::tests::newline());
+    }
+
+    #[test]
+    fn test_to_dfa_letter() {
+        assert_eq!(DFA::from(&letter()), dfa::tests::letter());
+    }
+
+    #[test]
+    fn test_to_dfa_spaces() {
+        assert_eq!(DFA::from(&spaces()), dfa::tests::spaces());
+    }
+
+    #[test]
+    fn test_to_dfa_letter_and_spaces() {
+        assert_eq!(
+            DFA::from(&letter_and_spaces()),
+            dfa::tests::letter_and_spaces()
+        );
+    }
+
+    #[bench]
+    fn bench_to_dfa_newline(bencher:&mut Bencher) {
+        bencher.iter(|| DFA::from(&newline()))
+    }
+
+    #[bench]
+    fn bench_to_dfa_letter(bencher:&mut Bencher) {
+        bencher.iter(|| DFA::from(&letter()))
+    }
+
+    #[bench]
+    fn bench_to_dfa_spaces(bencher:&mut Bencher) {
+        bencher.iter(|| DFA::from(&spaces()))
+    }
+
+    #[bench]
+    fn bench_to_dfa_letter_and_spaces(bencher:&mut Bencher) {
+        bencher.iter(|| DFA::from(&letter_and_spaces()))
+    }
+}
diff --git a/parser/flexer/src/automata/pattern.rs b/parser/flexer/src/automata/pattern.rs
new file mode 100644
index 0000000000..1bf2fea954
--- /dev/null
+++ b/parser/flexer/src/automata/pattern.rs
@@ -0,0 +1,164 @@
+//! Simple API for constructing regex patterns that are used in parser
+//! implementation.
+
+use crate::automata::state::Symbol;
+use crate::parser;
+
+use core::iter;
+use itertools::Itertools;
+use std::ops::BitAnd;
+use std::ops::BitOr;
+use std::ops::RangeInclusive;
+
+// =============
+// == Pattern ==
+// =============
+
+/// Simple regex pattern.
+#[derive(Clone, Debug)]
+pub enum Pattern {
+    /// Pattern that triggers on any symbol from given range.
+    Range(RangeInclusive<Symbol>),
+    /// Pattern that triggers on any given pattern from sequence.
+    Or(Vec<Pattern>),
+    /// Pattern that triggers when a sequence of patterns is encountered.
+    And(Vec<Pattern>),
+    /// Pattern that triggers on 0..N repetitions of given pattern.
+    Many(Box<Pattern>),
+}
+
+use Pattern::*;
+
+impl BitOr<Pattern> for Pattern {
+    type Output = Pattern;
+    fn bitor(self, rhs:Pattern) -> Self::Output {
+        match (self, rhs) {
+            (Or(mut lhs), Or(rhs)) => {
+                lhs.extend(rhs);
+                Or(lhs)
+            }
+            (Or(mut lhs), rhs) => {
+                lhs.push(rhs);
+                Or(lhs)
+            }
+            (lhs, Or(mut rhs)) => {
+                rhs.push(lhs);
+                Or(rhs)
+            }
+            (lhs, rhs) => Or(vec![lhs, rhs]),
+        }
+    }
+}
+
+impl BitAnd<Pattern> for Pattern {
+    type Output = Pattern;
+    fn bitand(self, rhs:Pattern) -> Self::Output {
+        match (self, rhs) {
+            (And(mut lhs), And(rhs)) => {
+                lhs.extend(rhs);
+                And(lhs)
+            }
+            (And(mut lhs), rhs) => {
+                lhs.push(rhs);
+                And(lhs)
+            }
+            (lhs, And(mut rhs)) => {
+                rhs.push(lhs);
+                And(rhs)
+            }
+            (lhs, rhs) => And(vec![lhs, rhs]),
+        }
+    }
+}
+
+impl Pattern {
+    /// Pattern that never triggers.
+    pub fn never() -> Self { Pattern::symbols(1..=0) }
+
+    /// Pattern that always triggers.
+    pub fn always() -> Self {
+        Pattern::symbols(u32::min_value()..=u32::max_value())
+    }
+
+    /// Pattern that triggers on any char.
+    pub fn any_char() -> Self { Pattern::symbols(0..=u32::max_value()) }
+
+    /// Pattern that triggers on 0..N repetitions of given pattern.
+    pub fn many(self) -> Self { Many(Box::new(self)) }
+
+    /// Pattern that triggers on 1..N repetitions of given pattern.
+    pub fn many1(self) -> Self { self.clone() & self.many() }
+
+    /// Pattern that triggers on 0..=1 repetitions of given pattern.
+    pub fn opt(self) -> Self { self | Self::always() }
+
+    /// Pattern that triggers on given symbol
+    pub fn symbol(symbol:u32) -> Self { Pattern::symbols(symbol..=symbol) }
+
+    /// Pattern that triggers on any of the given symbols.
+    pub fn symbols(symbols:RangeInclusive<u32>) -> Self {
+        Pattern::Range(
+            Symbol {
+                val:*symbols.start(),
+            }..=Symbol { val:*symbols.end() },
+        )
+    }
+
+    /// Pattern that triggers on end of file.
+    pub fn eof() -> Self { Self::symbol(parser::EOF_CODE.val) }
+
+    /// Pattern that triggers on given character.
+    pub fn char(char:char) -> Self { Self::symbol(char as u32) }
+
+    /// Pattern that triggers on any of the given characters.
+    pub fn range(chars:RangeInclusive<char>) -> Self {
+        Pattern::symbols((*chars.start() as u32)..=(*chars.end() as u32))
+    }
+
+    /// Pattern that triggers when sequence of characters is encountered.
+    pub fn all(chars:&str) -> Self {
+        chars
+            .chars()
+            .fold(Self::never(), |pat, char| pat & Self::char(char))
+    }
+
+    /// Pattern that triggers on any characters from given sequence.
+    pub fn any(chars:&str) -> Self {
+        chars
+            .chars()
+            .fold(Self::never(), |pat, char| pat | Self::char(char))
+    }
+
+    /// Pattern that doesn't trigger on any given character from given sequence.
+    pub fn none(chars:&str) -> Self {
+        let max = u32::max_value();
+        let char_iter = chars.chars().map(|char| char as u32);
+        let char_iter2 = iter::once(0).chain(char_iter).chain(iter::once(max));
+        let mut codes = char_iter2.collect_vec();
+
+        codes.sort();
+        codes
+            .iter()
+            .tuple_windows()
+            .fold(Self::never(), |pat, (start, end)| {
+                if end < start {
+                    pat
+                } else {
+                    pat | Pattern::symbols(*start..=*end)
+                }
+            })
+    }
+
+    /// Pattern that triggers on any character but the one given.
+    pub fn not(char:char) -> Self { Self::none(&char.to_string()) }
+
+    /// Pattern that triggers on N repetitions of given pattern.
+    pub fn repeat(pat:Pattern, num:usize) -> Self {
+        (0..num).fold(Self::always(), |p, _| p & pat.clone())
+    }
+
+    /// Pattern that triggers on MIN..MAX repetitions of given pattern.
+    pub fn repeat_between(pat:Pattern, min:usize, max:usize) -> Self {
+        (min..max).fold(Self::never(), |p, n| p | Self::repeat(pat.clone(), n))
+    }
+}
diff --git a/parser/flexer/src/automata/state.rs b/parser/flexer/src/automata/state.rs
new file mode 100644
index 0000000000..4bff4ac2fc
--- /dev/null
+++ b/parser/flexer/src/automata/state.rs
@@ -0,0 +1,121 @@
+//! This module exports State implementation for Nondeterministic Finite
+//! Automata.
+
+use crate::automata::alphabet::Alphabet;
+use crate::automata::state;
+
+use std::ops::RangeInclusive;
+
+// =======================
+// == State Of Automata ==
+// =======================
+
+/// Flag for invalid state.
+/// When finite automata gets into invalid state the input sequence of symbols
+/// is rejected.
+pub const INVALID:Id = Id {
+    id:usize::max_value(),
+};
+
+/// Newtype wrapper for finite automata input symbol.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Symbol {
+    #[allow(missing_docs)]
+    pub val:u32,
+}
+
+/// Newtype wrapper for finite automata state ID.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Id {
+    #[allow(missing_docs)]
+    pub id:usize,
+}
+
+impl Default for Id {
+    /// Returns state::INVALID. This is because every finite automata has an
+    /// invalid state and because all transitions in automata transition
+    /// matrix lead to invalid state by default.
+    fn default() -> Self { state::INVALID }
+}
+
+/// Named NFA state with a set of transitions (links).
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
+pub struct State {
+    /// Set of transitions that don't require any symbol to trigger.
+    /// I.E. If there is an epsilon link from state A to state B, then whenever
+    /// we are in state A, we can freely move to state B.
+    pub epsilon_links:Vec<Id>,
+    /// Set of transitions that trigger with specific symbol on input.
+    /// When triggered, the automata will transition to the `link.target`.
+    pub links:Vec<Link>,
+    /// Name of the state.
+    /// We use it to autogenerate a call to Rust method with same name.
+    pub name:Option<String>,
+}
+
+/// A transition to new automata state
+/// that requires specific symbol on automata input to trigger.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct Link {
+    /// Any symbol from the range will trigger this link.
+    pub symbols:RangeInclusive<Symbol>,
+    /// A state that is visited, after the link is triggered.
+    pub target:Id,
+}
+
+impl State {
+    /// Updater for field `name`. Returns updated state.
+    pub fn named(mut self, name:&str) -> Self {
+        self.name = Some(name.to_owned());
+        self
+    }
+
+    /// Returns transition (next state) for each symbol in alphabet.
+    pub fn targets(&self, alphabet:&Alphabet) -> Vec<Id> {
+        let mut targets = vec![];
+        let mut index = 0;
+        let mut links = self.links.clone();
+        links.sort_by_key(|link| *link.symbols.start());
+        for &symbol in &alphabet.symbols {
+            while links.len() > index && *links[index].symbols.end() < symbol {
+                index += 1;
+            }
+            if links.len() <= index || *links[index].symbols.start() > symbol {
+                targets.push(state::INVALID);
+            } else {
+                targets.push(links[index].target);
+            }
+        }
+        targets
+    }
+}
+
+impl From<Vec<usize>> for State {
+    /// Creates a state with epsilon links.
+    fn from(vec:Vec<usize>) -> Self {
+        let epsilon_links = vec.iter().cloned().map(|id| Id { id }).collect();
+        State {
+            epsilon_links,
+            ..Default::default()
+        }
+    }
+}
+
+impl From<Vec<(RangeInclusive<u32>, usize)>> for State {
+    /// Creates a state with ordinary links.
+    fn from(vec:Vec<(RangeInclusive<u32>, usize)>) -> Self {
+        let link = |(range, id):(RangeInclusive<u32>, usize)| {
+            let start = Symbol { val:*range.start() };
+            let end = Symbol { val:*range.end() };
+            Link {
+                symbols:start..=end,
+                target:Id { id },
+            }
+        };
+        let links = vec.iter().cloned().map(link).collect();
+        State {
+            links,
+            ..Default::default()
+        }
+    }
+}
diff --git a/parser/flexer/src/data.rs b/parser/flexer/src/data.rs
new file mode 100644
index 0000000000..a2bb4e66f3
--- /dev/null
+++ b/parser/flexer/src/data.rs
@@ -0,0 +1,3 @@
+//! Generic datastructures, with multiple usecases.
+
+pub mod matrix;
diff --git a/parser/flexer/src/data/matrix.rs b/parser/flexer/src/data/matrix.rs
new file mode 100644
index 0000000000..f1964b48e2
--- /dev/null
+++ b/parser/flexer/src/data/matrix.rs
@@ -0,0 +1,55 @@
+//! Efficient representation of 2D matrix.
+
+use std::ops::Index;
+use std::ops::IndexMut;
+
+// ============
+// == Matrix ==
+// ============
+
+/// Efficient 2D matrix implemented on top of vector.
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
+pub struct Matrix<T> {
+    /// The number of rows in matrix.
+    rows:usize,
+    /// The number of columns in matrix.
+    columns:usize,
+    /// Matrix implemented with vector.
+    matrix:Vec<T>,
+}
+
+impl<T> Index<(usize, usize)> for Matrix<T> {
+    type Output = T;
+    fn index(&self, index:(usize, usize)) -> &T {
+        &self.matrix[index.0 * self.columns + index.1]
+    }
+}
+
+impl<T> IndexMut<(usize, usize)> for Matrix<T> {
+    fn index_mut(&mut self, index:(usize, usize)) -> &mut T {
+        &mut self.matrix[index.0 * self.columns + index.1]
+    }
+}
+
+impl<T:Default> Matrix<T> {
+    /// Constructs a new matrix for given number of rows and columns.
+    pub fn new(rows:usize, columns:usize) -> Self {
+        let mut matrix = Vec::with_capacity(rows * columns);
+        for _ in 0..matrix.capacity() {
+            matrix.push(Default::default())
+        }
+        Self {
+            rows,
+            columns,
+            matrix,
+        }
+    }
+
+    /// Adds a new row to matrix, filled with default values.
+    pub fn new_row(&mut self) {
+        for _ in 0..self.columns {
+            self.matrix.push(Default::default());
+        }
+        self.rows += 1;
+    }
+}
diff --git a/parser/flexer/src/group.rs b/parser/flexer/src/group.rs
new file mode 100644
index 0000000000..6e89c080fc
--- /dev/null
+++ b/parser/flexer/src/group.rs
@@ -0,0 +1,222 @@
+//! This module exports API for grouping multiple rules (Rust callbacks with
+//! regex pattern) together.
+
+use crate::automata::nfa::NFA;
+use crate::automata::pattern::Pattern;
+use crate::group::rule::Rule;
+
+use itertools::Itertools;
+
+pub mod rule;
+
+// ===========
+// == Group ==
+// ===========
+
+/// Struct that group rules together. It also inherits rules from parent group
+/// (if it has one). Groups are the basic building block of flexer:
+/// Flexer internally keeps a stack of groups, only one of them active at a
+/// time. Each group contains set of regex patterns and callbacks (together
+/// called `Rule`). Whenever a rule.pattern from active group is matched with
+/// part of input the associated rule.callback is executed, which in turn may
+/// exit the current groupor enter a new one. This allows us to nicely model a
+/// situation, where certain part of program (like a string literal) should have
+/// very different parsing rules than other (for example body of function). Note
+/// that the input is first matched with first added rule, then with the second
+/// etc. Therefore, if two rules overlap, only the callback of the first added
+/// rule will be executed.
+#[derive(Clone, Debug, Default)]
+pub struct Group {
+    /// Unique ID.
+    pub id:usize,
+    /// Custom name which is used for debugging.
+    pub name:String,
+    /// Parent which we inherit rules from.
+    pub parent:Option<Box<Group>>,
+    /// Set of regex patterns with associated callbacks.
+    pub rules:Vec<Rule>,
+}
+
+impl Group {
+    /// Adds new rule (regex pattern with associated callback) to group.
+    pub fn add_rule(&mut self, rule:Rule) { self.rules.push(rule) }
+
+    /// Returns rule builder for given pattern.
+    /// TODO[jv] better describe it's purpose once we agree on correct API.
+    pub fn rule(
+        &mut self,
+        pattern:Pattern,
+    ) -> rule::Builder<impl FnMut(Rule) + '_> {
+        rule::Builder {
+            pattern,
+            callback:move |rule| self.add_rule(rule),
+        }
+    }
+
+    /// All rules including parent rules.
+    pub fn rules(&self) -> Vec<&Rule> {
+        let mut parent = &self.parent;
+        let mut rules = (&self.rules).iter().collect_vec();
+        while let Some(state) = parent {
+            rules.extend((&state.rules).iter());
+            parent = &state.parent;
+        }
+        rules
+    }
+
+    /// Canonical name of given rule.
+    fn callback_name(&self, rule_ix:usize) -> String {
+        format!("group{}_rule{}", self.id, rule_ix)
+    }
+}
+
+impl From<&Group> for NFA {
+    /// Transforms Group to NFA.
+    /// Algorithm is based on: https://www.youtube.com/watch?v=RYNN-tb9WxI
+    fn from(group:&Group) -> Self {
+        let mut nfa = NFA::default();
+        let start = nfa.new_state();
+        let build = |rule:&Rule| nfa.new_pattern(start, &rule.pattern);
+        let states = group.rules().into_iter().map(build).collect_vec();
+        let end = nfa.new_state();
+        for (ix, state) in states.into_iter().enumerate() {
+            nfa.states[state.id].name = Some(group.callback_name(ix));
+            nfa.connect(state, end);
+        }
+        nfa
+    }
+}
+
+// =============
+// === Tests ===
+// =============
+
+#[cfg(test)]
+pub mod tests {
+    extern crate test;
+
+    use crate::automata::dfa::DFA;
+    use crate::automata::nfa;
+    use crate::automata::nfa::NFA;
+    use crate::automata::pattern::Pattern;
+    use crate::group::rule::Rule;
+    use crate::group::Group;
+
+    use std::default::Default;
+    use test::Bencher;
+
+    fn newline() -> Group {
+        let pattern = Pattern::char('\n');
+        let mut group = Group::default();
+
+        group.add_rule(Rule {
+            pattern,
+            callback:"".into(),
+        });
+
+        group
+    }
+
+    fn letter() -> Group {
+        let pattern = Pattern::range('a'..='z');
+        let mut group = Group::default();
+
+        group.add_rule(Rule {
+            pattern,
+            callback:"".into(),
+        });
+
+        group
+    }
+
+    fn spaces() -> Group {
+        let pattern = Pattern::char(' ').many1();
+        let mut group = Group::default();
+
+        group.add_rule(Rule {
+            pattern,
+            callback:"".into(),
+        });
+
+        group
+    }
+
+    fn letter_and_spaces() -> Group {
+        let letter = Pattern::range('a'..='z');
+        let spaces = Pattern::char(' ').many1();
+        let mut group = Group::default();
+
+        group.add_rule(Rule {
+            pattern:letter,
+            callback:"".into(),
+        });
+        group.add_rule(Rule {
+            pattern:spaces,
+            callback:"".into(),
+        });
+
+        group
+    }
+
+    fn hundred_rules() -> Group {
+        let pattern =
+            Pattern::all("The quick brown fox jumps over the lazy dog!!");
+        let mut group = Group::default();
+
+        for _ in 0..100 {
+            group.add_rule(Rule {
+                pattern:pattern.clone(),
+                callback:"".into(),
+            })
+        }
+        group
+    }
+
+    #[test]
+    fn test_to_nfa_newline() {
+        assert_eq!(NFA::from(&newline()), nfa::tests::newline());
+    }
+
+    #[test]
+    fn test_to_nfa_letter() {
+        assert_eq!(NFA::from(&letter()), nfa::tests::letter());
+    }
+
+    #[test]
+    fn test_to_nfa_spaces() {
+        assert_eq!(NFA::from(&spaces()), nfa::tests::spaces());
+    }
+
+    #[test]
+    fn test_to_nfa_letter_and_spaces() {
+        assert_eq!(
+            NFA::from(&letter_and_spaces()),
+            nfa::tests::letter_and_spaces()
+        );
+    }
+
+    #[bench]
+    fn bench_to_nfa_newline(bencher:&mut Bencher) {
+        bencher.iter(|| NFA::from(&newline()))
+    }
+
+    #[bench]
+    fn bench_to_nfa_letter(bencher:&mut Bencher) {
+        bencher.iter(|| NFA::from(&letter()))
+    }
+
+    #[bench]
+    fn bench_to_nfa_spaces(bencher:&mut Bencher) {
+        bencher.iter(|| NFA::from(&spaces()))
+    }
+
+    #[bench]
+    fn bench_to_nfa_letter_and_spaces(bencher:&mut Bencher) {
+        bencher.iter(|| NFA::from(&letter_and_spaces()))
+    }
+
+    #[bench]
+    fn bench_hundred_rules(bencher:&mut Bencher) {
+        bencher.iter(|| DFA::from(&NFA::from(&hundred_rules())));
+    }
+}
diff --git a/parser/flexer/src/group/rule.rs b/parser/flexer/src/group/rule.rs
new file mode 100644
index 0000000000..d5f90e1083
--- /dev/null
+++ b/parser/flexer/src/group/rule.rs
@@ -0,0 +1,39 @@
+//! An API for declaring Rust callbacks for encountered regex patterns.
+//!
+use crate::automata::pattern::Pattern;
+
+// ==========
+// == Rule ==
+// ==========
+
+/// A rule is a pair of regex pattern and callback.
+/// The intention is to run the callback after encountering given pattern.
+#[derive(Clone, Debug)]
+pub struct Rule {
+    /// Pattern that triggers the callback.
+    pub pattern:Pattern,
+    /// Callback containing stringified Rust code.
+    pub callback:String,
+}
+
+/// Builder that allows us to add `Rule` to `Group` in a nice way.
+/// It is possible this structure won't be useful in rust, since borrow checker
+/// will likely influence the final API of rule construction.
+#[derive(Clone, Debug)]
+pub struct Builder<Callback> {
+    /// Pattern that triggers the callback.
+    pub pattern:Pattern,
+    /// Callback containing a closure.
+    pub callback:Callback,
+}
+
+impl<F:FnMut(Rule)> Builder<F> {
+    /// Feeds the input that triggered regex pattern to callback.
+    pub fn run(&mut self, program:String) {
+        let rule = Rule {
+            pattern:self.pattern.clone(),
+            callback:program,
+        };
+        (self.callback)(rule);
+    }
+}
diff --git a/parser/flexer/src/lib.rs b/parser/flexer/src/lib.rs
new file mode 100644
index 0000000000..b73d02cb7c
--- /dev/null
+++ b/parser/flexer/src/lib.rs
@@ -0,0 +1,17 @@
+#![feature(test)]
+#![deny(unconditional_recursion)]
+#![warn(missing_copy_implementations)]
+#![warn(missing_debug_implementations)]
+#![warn(missing_docs)]
+#![warn(trivial_casts)]
+#![warn(trivial_numeric_casts)]
+#![warn(unsafe_code)]
+#![warn(unused_import_braces)]
+
+//! This module exports simple parser based on Deterministic Finite State
+//! Automata for regular grammars (anything parsable with regex patterns).
+
+pub mod automata;
+pub mod data;
+pub mod group;
+pub mod parser;
diff --git a/parser/flexer/src/parser.rs b/parser/flexer/src/parser.rs
new file mode 100644
index 0000000000..9c4213ff02
--- /dev/null
+++ b/parser/flexer/src/parser.rs
@@ -0,0 +1,15 @@
+//! The entry point of flexer. It (is going to) contain API for parsing an input
+//! string based on group of regex patterns.
+
+use crate::automata::state::Symbol;
+
+// ============
+// == Parser ==
+// ============
+
+/// End Of File - This symbol is inserted at the end of each parser input.
+/// We can use the maximum value of u32, because no `char` (unicode scalar) can
+/// hold this value.
+pub const EOF_CODE:Symbol = Symbol {
+    val:u32::max_value(),
+};