Overall progress

This commit is contained in:
Wojciech Danilo 2019-06-09 03:55:17 +02:00
parent 76344cf79e
commit fae4fe10ef
11 changed files with 569 additions and 492 deletions

20
.scalafmt.conf Normal file
View File

@ -0,0 +1,20 @@
align = most
maxColumn = 80
assumeStandardLibraryStripMargin = true
continuationIndent.defnSite = 2
newlines.alwaysBeforeTopLevelStatements = true
align.tokens = [
{code = "=>", owner = "Case"}
{code = "%", owner = "Term.ApplyInfix"}
{code = "%%", owner = "Term.ApplyInfix"}
{code = "="}
{code = "<-"}
{code = "extends"}
{code = ":", owner = "Defn.Def"}
]
rewrite.rules = [
ExpandImportSelectors
RedundantParens
SortModifiers
PreferCurlyFors
]

View File

@ -18,9 +18,9 @@ lazy val syntax = (project in file("syntax"))
publishArtifact := false,
libraryDependencies ++= Seq(
"com.storm-enroute" %% "scalameter" % "0.17" % "bench",
"org.typelevel" %% "cats-core" % "1.6.0",
"org.scalatest" %% "scalatest" % "3.0.5" % Test,
"com.lihaoyi" %% "pprint" % "0.5.3"
// "org.typelevel" %% "cats-core" % "1.6.0",
// "org.scalatest" %% "scalatest" % "3.0.5" % Test,
// "com.lihaoyi" %% "pprint" % "0.5.3"
),
resolvers ++= Seq(
"Sonatype OSS Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots",

View File

@ -2,14 +2,17 @@ package org.enso.syntax.text.lexer
import org.scalameter.api._
import scala.math.pow
import org.enso.syntax.text.parser.{Parser,BParser}
import java.io.{StringReader}
object RangeBenchmark extends Bench.OfflineReport {
val exp14 = Gen.exponential("size")(pow(2,14).toInt, pow(2,16).toInt, 2)
val exp15 = Gen.exponential("size")(pow(2,15).toInt, pow(2,17).toInt, 2)
val exp16 = Gen.exponential("size")(pow(2,16).toInt, pow(2,18).toInt, 2)
val exp16 = Gen.exponential("size")(pow(2,14).toInt, pow(2,18).toInt, 2)
val longVars = for {i <- exp16} yield "test12" * i
val multipleVars = for {i <- exp16} yield "test1 " * i
val multipleVars = for {i <- exp16} yield "test1 (A B) " * i
val exprs1 = for {i <- exp14} yield "a += 1\nb == '\n'\n" * i
val exprs2 = for {i <- exp14} yield "a += 1\nb == '`a`!'\n" * i
@ -24,14 +27,30 @@ object RangeBenchmark extends Bench.OfflineReport {
// input => new Lexer(input).lexAll()
// }
// }
measure method "longVar" in {
using(longVars) in {
input => new Lexer(input).lexAll()
// measure method "longVar" in {
// using(longVars) in {
// input => new Lexer(input).lexAll()
// }
// }
// measure method "multipleVars" in {
// using(multipleVars) in {
// input => new Lexer(input).lexAll()
// }
// }
measure method "Parser-multipleVars" in {
using(multipleVars) in {
input => {
(new Parser(new StringReader(input))).parse
}
}
}
measure method "multipleVars" in {
measure method "BParser-multipleVars" in {
using(multipleVars) in {
input => new Lexer(input).lexAll()
input => {
(new BParser(new StringReader(input))).parse
}
}
}
}

View File

@ -32,9 +32,9 @@
/* First part of user declarations. */
/* "../../text/../../../../../../../target/Parser.java":36 */ /* lalr1.java:91 */
/* "../../../../../../java/org/enso/syntax/text/xx/Parser.java":36 */ /* lalr1.java:91 */
/* "../../text/../../../../../../../target/Parser.java":38 */ /* lalr1.java:92 */
/* "../../../../../../java/org/enso/syntax/text/xx/Parser.java":38 */ /* lalr1.java:92 */
/* "%code imports" blocks. */
/* "rules.y":7 */ /* lalr1.java:93 */
@ -42,7 +42,7 @@ package org.enso.syntax.text.xx;
import org.enso.syntax.text.parser.AST;
import org.enso.syntax.text.lexer.Token;
/* "../../text/../../../../../../../target/Parser.java":46 */ /* lalr1.java:93 */
/* "../../../../../../java/org/enso/syntax/text/xx/Parser.java":46 */ /* lalr1.java:93 */
/**
* A Bison parser, automatically generated from <tt>rules.y</tt>.
@ -75,13 +75,19 @@ public class Parser
/** Token number,to be returned by the scanner. */
static final int VAR = 258;
/** Token number,to be returned by the scanner. */
static final int EOL = 259;
static final int CONS = 259;
/** Token number,to be returned by the scanner. */
static final int BLOCK_BEGIN = 260;
static final int EOL = 260;
/** Token number,to be returned by the scanner. */
static final int BLOCK_END = 261;
static final int GROUP_BEGIN = 261;
/** Token number,to be returned by the scanner. */
static final int CONS = 262;
static final int GROUP_END = 262;
/** Token number,to be returned by the scanner. */
static final int BLOCK_BEGIN = 263;
/** Token number,to be returned by the scanner. */
static final int BLOCK_END = 264;
/** Token number,to be returned by the scanner. */
static final int BLOCK_INVALID = 265;
@ -319,62 +325,104 @@ public class Parser
{
case 2:
if (yyn == 2)
/* "rules.y":49 */ /* lalr1.java:489 */
/* "rules.y":57 */ /* lalr1.java:489 */
{result=((AST)(yystack.valueAt (1-(1))));};
break;
case 4:
if (yyn == 4)
/* "rules.y":53 */ /* lalr1.java:489 */
/* "rules.y":61 */ /* lalr1.java:489 */
{yyval=((AST)(yystack.valueAt (1-(1))));};
break;
case 5:
if (yyn == 5)
/* "rules.y":54 */ /* lalr1.java:489 */
/* "rules.y":62 */ /* lalr1.java:489 */
{yyval=AST.app(((AST)(yystack.valueAt (2-(1)))),((AST)(yystack.valueAt (2-(2)))));};
break;
case 6:
if (yyn == 6)
/* "rules.y":55 */ /* lalr1.java:489 */
{yyval=AST.app(((AST)(yystack.valueAt (2-(1)))),((AST)(yystack.valueAt (2-(2)))));};
/* "rules.y":65 */ /* lalr1.java:489 */
{yyval=((AST)(yystack.valueAt (1-(1))));};
break;
case 7:
if (yyn == 7)
/* "rules.y":58 */ /* lalr1.java:489 */
{yyval=((AST)(yystack.valueAt (2-(2))));};
/* "rules.y":66 */ /* lalr1.java:489 */
{yyval=((AST)(yystack.valueAt (1-(1))));};
break;
case 8:
if (yyn == 8)
/* "rules.y":61 */ /* lalr1.java:489 */
{yyval=((AST)(yystack.valueAt (3-(1))));};
/* "rules.y":67 */ /* lalr1.java:489 */
{yyval=AST.grouped(((Token)(yystack.valueAt (3-(1)))),((AST)(yystack.valueAt (3-(2)))),((Token)(yystack.valueAt (3-(3)))));};
break;
case 9:
if (yyn == 9)
/* "rules.y":62 */ /* lalr1.java:489 */
{yyval=AST.emptyBlock();};
/* "rules.y":68 */ /* lalr1.java:489 */
{yyval=((AST)(yystack.valueAt (2-(2))));};
break;
case 10:
if (yyn == 10)
/* "rules.y":65 */ /* lalr1.java:489 */
/* "rules.y":71 */ /* lalr1.java:489 */
{yyval=((AST)(yystack.valueAt (1-(1))));};
break;
case 11:
if (yyn == 11)
/* "rules.y":72 */ /* lalr1.java:489 */
{yyval=AST.app(((AST)(yystack.valueAt (2-(1)))),((AST)(yystack.valueAt (2-(2)))));};
break;
case 12:
if (yyn == 12)
/* "rules.y":77 */ /* lalr1.java:489 */
{yyval=((AST)(yystack.valueAt (2-(2))));};
break;
case 13:
if (yyn == 13)
/* "rules.y":80 */ /* lalr1.java:489 */
{yyval=((AST)(yystack.valueAt (3-(1))));};
break;
case 14:
if (yyn == 14)
/* "rules.y":81 */ /* lalr1.java:489 */
{yyval=AST.emptyBlock();};
break;
case 15:
if (yyn == 15)
/* "rules.y":84 */ /* lalr1.java:489 */
{yyval=AST.fromToken(((Token)(yystack.valueAt (1-(1)))));};
break;
case 16:
if (yyn == 16)
/* "rules.y":85 */ /* lalr1.java:489 */
{yyval=AST.fromToken(((Token)(yystack.valueAt (1-(1)))));};
break;
/* "../../text/../../../../../../../target/Parser.java":378 */ /* lalr1.java:489 */
/* "../../../../../../java/org/enso/syntax/text/xx/Parser.java":426 */ /* lalr1.java:489 */
default: break;
}
@ -691,8 +739,9 @@ public class Parser
{
return new byte[]
{
5, -4, 9, 2, -4, -4, 5, -4, -4, -2,
-4, 5, -4, -4
9, -4, -4, -2, 9, 11, 9, -4, -4, -4,
15, -4, 0, -4, -4, -4, -4, -4, 9, -4,
-4
};
}
@ -704,8 +753,9 @@ public class Parser
{
return new byte[]
{
3, 10, 0, 2, 4, 1, 0, 6, 5, 0,
7, 0, 9, 8
3, 15, 16, 0, 0, 0, 2, 4, 7, 6,
9, 10, 0, 12, 1, 5, 8, 11, 0, 14,
13
};
}
@ -715,7 +765,7 @@ public class Parser
{
return new byte[]
{
-4, -4, 10, -4, 0, -3
-4, -4, 14, 4, -4, -4, 2, -3
};
}
@ -725,7 +775,7 @@ public class Parser
{
return new byte[]
{
-1, 2, 9, 7, 10, 4
-1, 5, 12, 7, 10, 8, 13, 9
};
}
@ -737,8 +787,9 @@ public class Parser
{
return new byte[]
{
8, 1, 11, 6, 12, 1, 8, 6, 1, 5,
3, 13
11, 1, 2, 1, 2, 18, 3, 17, 4, 19,
15, 14, 1, 2, 6, 3, 15, 4, 1, 2,
20, 0, 16
};
}
@ -747,8 +798,9 @@ private static final byte yycheck_[] = yycheck_init();
{
return new byte[]
{
3, 3, 4, 5, 6, 3, 9, 5, 3, 0,
0, 11
3, 3, 4, 3, 4, 5, 6, 10, 8, 9,
6, 0, 3, 4, 0, 6, 12, 8, 3, 4,
18, -1, 7
};
}
@ -759,8 +811,9 @@ private static final byte yycheck_[] = yycheck_init();
{
return new byte[]
{
0, 3, 9, 10, 13, 0, 5, 11, 13, 10,
12, 4, 6, 12
0, 3, 4, 6, 8, 12, 13, 14, 16, 18,
15, 18, 13, 17, 0, 14, 7, 18, 5, 9,
17
};
}
@ -770,8 +823,8 @@ private static final byte yycheck_[] = yycheck_init();
{
return new byte[]
{
0, 8, 9, 9, 10, 10, 10, 11, 12, 12,
13
0, 11, 12, 12, 13, 13, 14, 14, 14, 14,
15, 15, 16, 17, 17, 18, 18
};
}
@ -781,8 +834,8 @@ private static final byte yycheck_[] = yycheck_init();
{
return new byte[]
{
0, 2, 1, 0, 1, 2, 2, 2, 3, 2,
1
0, 2, 1, 0, 1, 2, 1, 1, 3, 2,
1, 2, 2, 3, 2, 1, 1
};
}
@ -793,7 +846,8 @@ private static final byte yycheck_[] = yycheck_init();
{
return new short[]
{
0, 256, 257, 258, 259, 260, 261, 262
0, 256, 257, 258, 259, 260, 261, 262, 263, 264,
265
};
}
@ -804,8 +858,9 @@ private static final byte yycheck_[] = yycheck_init();
{
return new String[]
{
"$end", "error", "$undefined", "VAR", "EOL", "BLOCK_BEGIN", "BLOCK_END",
"CONS", "$accept", "program", "expr", "block", "blockBody", "tok", null
"$end", "error", "$undefined", "VAR", "CONS", "EOL", "GROUP_BEGIN",
"GROUP_END", "BLOCK_BEGIN", "BLOCK_END", "BLOCK_INVALID", "$accept",
"program", "expr", "exprItem", "expr_group", "block", "blockBody", "tok", null
};
}
@ -815,8 +870,8 @@ private static final byte yycheck_[] = yycheck_init();
{
return new byte[]
{
0, 49, 49, 50, 53, 54, 55, 58, 61, 62,
65
0, 57, 57, 58, 61, 62, 65, 66, 67, 68,
71, 72, 77, 80, 81, 84, 85
};
}
@ -872,7 +927,7 @@ private static final byte yycheck_[] = yycheck_init();
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 1, 2, 3, 4,
5, 6, 7
5, 6, 7, 8, 9, 10
};
}
@ -884,15 +939,15 @@ private static final byte yycheck_[] = yycheck_init();
return yyundef_token_;
}
private static final int yylast_ = 11;
private static final int yynnts_ = 6;
private static final int yylast_ = 22;
private static final int yynnts_ = 8;
private static final int yyempty_ = -2;
private static final int yyfinal_ = 5;
private static final int yyfinal_ = 14;
private static final int yyterror_ = 1;
private static final int yyerrcode_ = 256;
private static final int yyntokens_ = 8;
private static final int yyntokens_ = 11;
private static final int yyuser_token_number_max_ = 262;
private static final int yyuser_token_number_max_ = 265;
private static final int yyundef_token_ = 2;
/* User implementation code. */
@ -901,11 +956,11 @@ private static final byte yycheck_[] = yycheck_init();
public AST result;
/* "../../text/../../../../../../../target/Parser.java":905 */ /* lalr1.java:1066 */
/* "../../../../../../java/org/enso/syntax/text/xx/Parser.java":960 */ /* lalr1.java:1066 */
}
/* "rules.y":88 */ /* lalr1.java:1070 */
/* "rules.y":108 */ /* lalr1.java:1070 */
// class CalcLexer implements Calc.Lexer {

View File

@ -9,7 +9,7 @@ import static org.enso.syntax.text.xx.Parser.Lexer.*;
%{
private int indent = 0;
int currentBlock = 0;
///////////////////////
// Indent Management //
@ -25,7 +25,7 @@ import static org.enso.syntax.text.xx.Parser.Lexer.*;
return indentStack.pop();
}
public final Integer indentx() {
public final Integer indent() {
return indentStack.peek();
}
@ -108,6 +108,7 @@ import static org.enso.syntax.text.xx.Parser.Lexer.*;
//////////////////
int var() {value = token(new Var (yytext())); return VAR;}
int cons() {value = token(new Cons (yytext())); return CONS;}
// Utils
void whitespace() {lastOffset += yylength();}
@ -133,10 +134,13 @@ void whitespace() {lastOffset += yylength();}
// Token disabled() {pushState(CHECK_OP_SFX); return disabled_();}
// Layout
int blockBegin(int i) {pushIndent(i); value = token(BlockBegin$.MODULE$); return BLOCK_BEGIN;}
int blockEnd() {popIndent(); value = token(BlockEnd$.MODULE$); return BLOCK_END;}
int blockInvalid() {value = token(BlockInvalid$.MODULE$); return BLOCK_INVALID;}
int newline() {value = token(EOL$.MODULE$); return EOL;}
int blockBegin() {return BLOCK_BEGIN;}
int blockEnd() {return BLOCK_END;}
// Token groupBegin() {return token(GroupBegin$.MODULE$);}
int groupBegin() {value = token(GroupBegin$.MODULE$); return GROUP_BEGIN;}
int groupEnd() {value = token(GroupEnd$.MODULE$); return GROUP_END;}
// Token groupEnd() {return token(GroupEnd$.MODULE$);}
// Token listBegin() {return token(ListBegin$.MODULE$);}
// Token listEnd() {return token(ListEnd$.MODULE$);}
@ -223,7 +227,7 @@ int blockEnd() {return BLOCK_END;}
alpha_upper = [A-Z]
alpha_lower = [a-z]
alpha = {alpha_lower} | {alpha_upper}
alphanum = {alpha} | digit
alphanum = {alpha} | {digit}
whitespace = [\ ]
newline = \r|\n|\r\n
@ -263,6 +267,7 @@ decimal = {digit}+
%xstate COMMENT
%xstate COMMENT_LINE
%xstate NEWLINE
%xstate BLOCK_ENDING
%state TEXT_INTERPOLATE
@ -454,23 +459,47 @@ decimal = {digit}+
///////////////////////
<NEWLINE> {
{whitespace}+ {
{whitespace}+{newline} {
whitespace();
popState();
Integer ind = yylength();
if (ind > indentx()) {
return blockBegin();
} else {
// TODO
}
}
[^] {
indent = 0;
popState();
rewind();
return newline();
}
{whitespace}+ {
whitespace();
popState();
currentBlock = yylength();
if (currentBlock > indent()) {
return blockBegin(currentBlock);
} else if (currentBlock < indent()) {
pushState(BLOCK_ENDING);
}
}
[^] {
rewind();
popState();
currentBlock = 0;
if (indent() > 0) {
pushState(BLOCK_ENDING);
} else {
return newline();
}
}
}
<BLOCK_ENDING> {
[^] {
rewind();
if(currentBlock == indent()) {
popState();
} else if(currentBlock < indent()) {
return blockEnd();
} else {
popState();
return blockInvalid();
}
}
}
@ -482,8 +511,7 @@ decimal = {digit}+
// // Identifiers
{var} {return var();}
// {var} {return var();}
// {cons} {return cons();}
{cons} {return cons();}
// {wildcard} {return wildcard();}
// // Operators
@ -500,9 +528,9 @@ decimal = {digit}+
// {modifier} {return modifier();}
// (\#\=) {return disabled();}
// // Layout
// (\() {return groupBegin();}
// (\)) {return groupEnd();}
// Layout
(\() {return groupBegin();}
(\)) {return groupEnd();}
// (\[) {return listBegin();}
// (\]) {return listEnd();}
// (\{) {return recordBegin();}
@ -543,7 +571,7 @@ decimal = {digit}+
// Layout
{whitespace}+ {whitespace();}
{newline} {pushState(NEWLINE);}
{newline} {pushState(NEWLINE);return newline();}
// // Unknown
// [^] {

View File

@ -1,163 +1,55 @@
package org.enso.syntax
// import org.enso.syntax.text.parser.{Parser}
import java.io.{Reader, StringReader}
import org.enso.syntax.text.lexer.{Scanner, EOF, Token}
import org.enso.syntax.text.xx.Parser
import org.enso.syntax.text.xx.Parser.Lexer._
// import org.enso.syntax.text.{parser => AST}
import org.enso.syntax.text.parser.AST
class SS(scanner:Scanner) extends Parser.Lexer {
private var _done = false
def getLVal():Token = {
scanner.value
}
def yyerror(s:String) {
println("!!! ERROR !!!")
println(s)
}
def yylex():Int = {
scanner.lex
}
def lex(): Token = {
val tok = yylex
if (tok == -1) {
_done = true;
return Token(EOF,0,0)
}
return getLVal
}
def lexAll(): Vector[Token] = {
var builder = Vector.newBuilder[Token]
do {
builder += lex
} while (!_done)
builder.result
}
def lexAll2(): (Vector[Int],Vector[Token]) = {
var builder_t = Vector.newBuilder[Int]
var builder = Vector.newBuilder[Token]
do {
val tok = yylex
var tval = getLVal
if (tok == -1) {
tval = Token(EOF,0,0)
_done = true
}
builder_t += tok
builder += tval
} while (!_done)
(builder_t.result, builder.result)
}
}
class PP(reader:Reader) {
val lexer = new SS(new Scanner(reader))
//////////////////////
// Token Management //
//////////////////////
val (itokens, tokens) = lexer.lexAll2()
var tokenIx = 0
var current : Token = tokens(tokenIx)
var icurrent : Int = itokens(tokenIx)
def step(): Unit = {
if (tokenIx == tokens.size - 1) {
return
}
tokenIx += 1
current = tokens(tokenIx)
icurrent = itokens(tokenIx)
}
def parse():Option[AST] = {
manyWith(AST.app,()=>tok)
}
def tok():Option[AST] = {
token(VAR).map(AST.fromToken)
}
// def block
def or[T](l:()=>Option[T],r:()=>Option[T]) {
l() match {
case Some(a) => Some(a)
case None => r()
}
}
def token(t:Int):Option[Token] = {
if(icurrent==t) {
val result = Some(current)
step
result
} else {
None
}
}
def manyWith(concat:(AST,AST)=>AST,f:()=>Option[AST]): Option[AST] = {
f() match {
case None => None
case Some(ast) => {
Some(manyWith_(concat,f,ast))
}
}
}
def manyWith_(concat:(AST,AST)=>AST,f:()=>Option[AST],ast:AST): AST = {
f() match {
case None => ast
case Some(ast2) => {
manyWith_(concat,f,concat(ast,ast2))
}
}
}
}
import java.io.StringReader
import org.enso.syntax.text.lexer.Lexer
import org.enso.syntax.text.parser.Parser
import org.enso.syntax.text.parser.BParser
import scala.language.implicitConversions
object Main extends App {
val str = "a b"
val reader = new StringReader(str)
val reader2 = new StringReader(str)
val scanner = new Scanner(reader)
val scanner2 = new Scanner(reader2)
val ss = new SS(scanner)
val ss2 = new SS(scanner2)
val parser = new Parser(ss)
val pp = new PP(new StringReader(str))
pprint.pprintln(ss2.lexAll)
pprint.pprintln(pp.parse)
pprint.pprintln(parser.parse)
// val parser = new Parser(reader)
// val ast = parser.parse
pprint.pprintln(parser.result,width=3,height=1000)
}
var indent = 0
def pprint(s: String) {
print(" " * indent)
val (l, r2) = s.span((x) => (x != '(' && x != ')'))
print(l)
if (r2 == "") {
println
return
}
val (m, r) = r2.splitAt(1)
if (m == "(") {
indent += 1
println(m)
pprint(r)
} else if (m == ")") {
indent -= 1
println(m)
pprint(r)
}
}
// val str = "a (b"
val str =
"""|a
|
| a
| c""".stripMargin
println(str)
val reader = new StringReader(str)
val ss = new Lexer(reader)
pprint(ss.lexAll.toString())
val bparser = new BParser(new StringReader(str))
val parser = new Parser(new StringReader(str))
pprint(bparser.parse.toString())
pprint(parser.parse.toString())
pprint("!")
}

View File

@ -1,8 +1,56 @@
package org.enso.syntax.text.lexer
import java.io.{StringReader, Reader}
import java.io.StringReader
import java.io.Reader
import scala.collection.immutable.Vector
import org.enso.syntax.text.xx.Parser
class Lexer(reader: Reader) extends Scanner(reader) with Parser.Lexer {
private var _done = false
def getLVal(): Token = {
value
}
def yyerror(s: String) {
println("!!! ERROR !!!")
println(s)
}
def lexTok(): Token = {
val tok = yylex
if (tok == -1) {
_done = true;
return Token(EOF, 0, 0)
}
return getLVal
}
def lexAll(): Vector[Token] = {
var builder = Vector.newBuilder[Token]
do {
builder += lexTok
} while (!_done)
builder.result
}
def lexAll2(): (Vector[Int], Vector[Token]) = {
var builder_t = Vector.newBuilder[Int]
var builder = Vector.newBuilder[Token]
do {
val tok = yylex
var tval = getLVal
if (tok == -1) {
tval = Token(EOF, 0, 0)
_done = true
}
builder_t += tok
builder += tval
} while (!_done)
(builder_t.result, builder.result)
}
}
// class Lexer (reader:Reader) {
// val scanner = new Scanner(reader)
// private var _done = false
@ -39,4 +87,4 @@ import scala.collection.immutable.Vector
// def done(): Boolean = {
// return _done;
// }
// }
// }

View File

@ -4,139 +4,140 @@ package org.enso.syntax.text.lexer
// Token //
///////////
case class Token (symbol:Symbol, offset:Int, span:Int)
case class Token(symbol: Symbol, offset: Int, span: Int)
////////////
// Symbol //
////////////
abstract class Symbol
abstract class Symbol
// Identifiers
case class Var (name:String) extends Symbol
case class Cons (name:String) extends Symbol
case object Wildcard extends Symbol
case class Var(name: String) extends Symbol
case class Cons(name: String) extends Symbol
case object Wildcard extends Symbol
// Operators
case class Operator (name:String) extends Symbol
case class Modifier (name:String) extends Symbol
case object DisabledAssignment extends Symbol
case class Operator(name: String) extends Symbol
case class Modifier(name: String) extends Symbol
case object DisabledAssignment extends Symbol
// Layout
case object EOL extends Symbol
case object BOF extends Symbol
case object EOF extends Symbol
case object GroupBegin extends Symbol
case object GroupEnd extends Symbol
case object ListBegin extends Symbol
case object ListEnd extends Symbol
case object RecordBegin extends Symbol
case object RecordEnd extends Symbol
case object EOL extends Symbol
case object BOF extends Symbol
case object EOF extends Symbol
case object BlockBegin extends Symbol
case object BlockEnd extends Symbol
case object BlockInvalid extends Symbol
case object GroupBegin extends Symbol
case object GroupEnd extends Symbol
case object ListBegin extends Symbol
case object ListEnd extends Symbol
case object RecordBegin extends Symbol
case object RecordEnd extends Symbol
// Literals
case object TextBegin extends Symbol
case object TextEnd extends Symbol
case object TextRawBegin extends Symbol
case object TextRawEnd extends Symbol
case class Text (text:String) extends Symbol
case class TextEscape (esc:TextEscapeType) extends Symbol
case object TextInterpolateBegin extends Symbol
case object TextInterpolateEnd extends Symbol
case class Number (base:Int
,intPart:List[Int]
,fracPart:List[Int]) extends Symbol
case object TextBegin extends Symbol
case object TextEnd extends Symbol
case object TextRawBegin extends Symbol
case object TextRawEnd extends Symbol
case class Text(text: String) extends Symbol
case class TextEscape(esc: TextEscapeType) extends Symbol
case object TextInterpolateBegin extends Symbol
case object TextInterpolateEnd extends Symbol
case class Number(base: Int, intPart: List[Int], fracPart: List[Int])
extends Symbol
// Invalid
case class Invalid (reason:InvalidReason) extends Symbol
case class Unmatched (char:String) extends Symbol
case class Invalid(reason: InvalidReason) extends Symbol
case class Unmatched(char: String) extends Symbol
// Comments
case object Comment extends Symbol
case class CommentBody (text:String) extends Symbol
case object Comment extends Symbol
case class CommentBody(text: String) extends Symbol
//////////////////
// Text Escapes //
//////////////////
abstract class TextEscapeType
case class CharEscape (code:Int) extends TextEscapeType
case class CtrlEscape (code:Int) extends TextEscapeType
case class IntEscape (code:Int) extends TextEscapeType
case class Uni16Escape (code:Int) extends TextEscapeType
case class Uni32Escape (code:Int) extends TextEscapeType
case class Uni21Escape (code:Int) extends TextEscapeType
case object QuoteEscape extends TextEscapeType
case object RawQuoteEscape extends TextEscapeType
case object SlashEscape extends TextEscapeType
case class InvalidCharEscape (char:Char) extends TextEscapeType
case class InvalidUni32Escape (str:String) extends TextEscapeType
case class InvalidUni21Escape (str:String) extends TextEscapeType
case class CharEscape(code: Int) extends TextEscapeType
case class CtrlEscape(code: Int) extends TextEscapeType
case class IntEscape(code: Int) extends TextEscapeType
case class Uni16Escape(code: Int) extends TextEscapeType
case class Uni32Escape(code: Int) extends TextEscapeType
case class Uni21Escape(code: Int) extends TextEscapeType
case object QuoteEscape extends TextEscapeType
case object RawQuoteEscape extends TextEscapeType
case object SlashEscape extends TextEscapeType
case class InvalidCharEscape(char: Char) extends TextEscapeType
case class InvalidUni32Escape(str: String) extends TextEscapeType
case class InvalidUni21Escape(str: String) extends TextEscapeType
/////////////
// Invalid //
/////////////
abstract class InvalidReason
case class UnexpectedSuffix (text:String) extends InvalidReason
case class UnexpectedSuffix(text: String) extends InvalidReason
////////////////
// Companions //
////////////////
object Number {
def charToDigit (char:Char): Int = {
def charToDigit(char: Char): Int = {
val i = char.toInt
if (i >= 48 && i <= 57) { return i - 48 } // 0 to 9
if (i >= 65 && i <= 90) { return i - 55 } // A to Z
if (i >= 48 && i <= 57) { return i - 48 } // 0 to 9
if (i >= 65 && i <= 90) { return i - 55 } // A to Z
if (i >= 97 && i <= 122) { return i - 87 } // a to z
return -1
}
def stringToDigits (str:String): List[Int] = {
def stringToDigits(str: String): List[Int] = {
str.toList.map(charToDigit)
}
def fromString(base:String, intPart:String, fracPart:String): Number = {
def fromString(base: String, intPart: String, fracPart: String): Number = {
val base2 = if (base == "") 10 else base.toInt
return Number(base2,stringToDigits(intPart), stringToDigits(fracPart))
return Number(base2, stringToDigits(intPart), stringToDigits(fracPart))
}
}
object IntEscape {
def fromString(code:String): IntEscape = {
def fromString(code: String): IntEscape = {
IntEscape(code.toInt)
}
}
object CharEscape {
def fromChar(c:Char): CharEscape = {
def fromChar(c: Char): CharEscape = {
CharEscape(c.toInt)
}
}
object Uni32Escape {
def fromString(str:String): TextEscapeType = {
def fromString(str: String): TextEscapeType = {
try {
return Uni32Escape(Integer.parseInt(str,16))
return Uni32Escape(Integer.parseInt(str, 16))
} catch {
case e:Exception => return InvalidUni32Escape(str)
case e: Exception => return InvalidUni32Escape(str)
}
}
}
object Uni21Escape {
def fromString(str:String): TextEscapeType = {
def fromString(str: String): TextEscapeType = {
try {
return Uni21Escape(Integer.parseInt(str,16))
return Uni21Escape(Integer.parseInt(str, 16))
} catch {
case e:Exception => return InvalidUni21Escape(str)
case e: Exception => return InvalidUni21Escape(str)
}
}
}

View File

@ -3,44 +3,56 @@ package org.enso.syntax.text.parser
import org.enso.syntax.text.lexer.Token
import org.enso.syntax.text.{lexer => token}
/////////
// AST //
/////////
case class AST (offset:Int, span:Int, symbol:Symbol)
case class AST(offset: Int, span: Int, symbol: Symbol)
// class Sym[T](offset:Int, span:Int, element:T)
////////////
// Symbol //
////////////
abstract class Symbol
case object NONE extends Symbol
trait Symbol
case object NONE extends Symbol
// Identifiers
case class Var (name:String) extends Symbol
case class Operator (name:String) extends Symbol
case class App (func:AST, arg:AST) extends Symbol
case class Block (body:Vector[AST]) extends Symbol
case class Var(name: String) extends Symbol
case class Cons(name: String) extends Symbol
case class Operator(name: String) extends Symbol
case class App(func: AST, arg: AST) extends Symbol
case class Block(body: Vector[AST]) extends Symbol
case class Grouped(body: AST) extends Symbol
//
object AST {
def fromToken(tok:Token):AST = {
def fromToken(tok: Token): AST = {
tok.symbol match {
case token.Var(name) => AST(0,0,Var(name))
case token.Var(name) => AST(tok.offset, tok.span, Var(name))
case token.Cons(name) => AST(tok.offset, tok.span, Cons(name))
}
}
def app(fn:AST, arg:AST):AST = {
AST(fn.offset,fn.span + arg.span,App(fn.copy(offset=0),arg))
def app(fn: AST, arg: AST): AST = {
AST(fn.offset, fn.span + arg.span, App(fn.copy(offset = 0), arg))
}
def emptyBlock():AST = {
AST(0,0,Block(Vector()))
def emptyBlock(): AST = {
AST(0, 0, Block(Vector()))
}
}
def block(lines: Vector[AST]): AST = {
AST(0, 0, Block(lines))
}
def grouped(begin: Token, body: AST, end: Token): AST = {
val offset = begin.offset
val span = begin.span + body.offset + body.span + end.offset + end.span
AST(offset, span, Grouped(body))
}
}

View File

@ -1,188 +1,170 @@
package org.enso.syntax.text.parser
// import java.io.{Reader}
// import org.enso.syntax.text.lexer.{Lexer, Token}
// import org.enso.syntax.text.{lexer => token}
// import scala.collection.immutable.{Vector}
// import scala.collection.mutable.{Builder}
import org.enso.syntax.text.{xx => bison}
import java.io.Reader
// class Parser(reader:Reader) {
// val lexer = new Lexer(reader)
import org.enso.syntax.text.xx.Parser.Lexer._
import org.enso.syntax.text.lexer.Token
import org.enso.syntax.text.lexer.Lexer
import scala.collection.immutable.VectorBuilder
import scala.collection.mutable
// //////////////////////
// // Token Management //
// //////////////////////
//////////////////
// Parser Rules //
//////////////////
// val tokens = lexer.lexAll()
// var tokenIx = 0
// var current : Token = tokens(tokenIx)
case class Rule[T](unwrap: () => Option[T]) {
final def run(): Option[T] = unwrap()
final def apply(): Option[T] = run
// def step(): Token = {
// if (tokenIx == tokens.size - 1) {
// return Token(token.EOF,0,0)
// }
// tokenIx += 1
// current = tokens(tokenIx)
// if (current.symbol == token.EOL) {
// line += 1
// column = 0
// } else {
// column += current.offset + current.span
// }
// current
// }
final def mapOption[S](f: Option[T] => Option[S]): Rule[S] =
Rule(() => f(run))
// def lookup(i:Int=1): Token = {
// val ix = tokenIx + i
// if (ix >= 0 && ix < tokens.size) {
// tokens(ix)
// } else {
// Token(token.EOF,0,0)
// }
// }
final def map[S](f: T => S): Rule[S] =
mapOption(_.map(f))
// def next(): Token = {
// lookup()
// }
// def previous(): Token = {
// lookup(-1)
// }
final def flatMap[S](f: T => Rule[S]): Rule[S] =
mapOption(_.flatMap(f(_).run))
final def |(that: Rule[T]): Rule[T] = this or that
final def or(that: Rule[T]): Rule[T] = mapOption(_.orElse(that.run))
final def default(v: T): Rule[T] = mapOption(_.orElse(Some(v)))
// // var indents : Stack[Int] = new Stack()
// // indents.push(0)
// final def foreach(f: T => Unit): Unit = run.foreach(f)
// // def indent(): Int = {
// // indents.head
// // }
@scala.annotation.tailrec
final def manyWith(concat: (T, T) => T, t: T): T = {
this.run match {
case None => t
case Some(t2) => {
manyWith(concat, concat(t, t2))
}
}
}
// var column : Int = 0
// var line : Int = 0
// var indent : Int = 0
final def many1_(): Rule[Unit] = this.flatMap(_ => many_)
final def many_(): Rule[Unit] = this.flatMap(_ => many_).default(Unit)
// def withIndent[T](newIndent:Int,f:()=>T):T = {
// val oldIndent = indent
// indent = newIndent
// val result = f()
// indent = oldIndent
// result
// }
final def fold(concat: (T, T) => T): Rule[T] =
mapOption(_.map(manyWith(concat, _)))
}
///////////////
// GenParser //
///////////////
class GenParser(reader: Reader) {
private val lexer = new Lexer(reader)
private val (itokens, tokens) = lexer.lexAll2()
private var tokenIx: Int = 0
var current: Token = tokens(tokenIx)
private var icurrent: Int = itokens(tokenIx)
// def parse(): AST = {
// expr() match {
// case Some(ast) => ast
// case None => AST(0,0,NONE)
// }
// }
final def step(): Unit = {
if (tokenIx == tokens.size - 1) return
tokenIx += 1
current = tokens(tokenIx)
icurrent = itokens(tokenIx)
}
// def expr(): Option[AST] = {
// manyWith(app, patternToken)
// // .flatMap(pat => {
// // if(current.symbol == token.EOL && next.offset > indent) {
// // step
// // withIndent(next.offset, () => Some(app(pat,block)))
// // } else {
// // Some(pat)
// // }
// // })
// }
final def lookahead(shift: Int = 1): Token = tokens(tokenIx + shift)
// def block(): AST = {
// AST(0,0,Block(many(() => blockLine)))
// }
final def token(t: Int, name: String = ""): Rule[Token] =
Rule(() => {
if (icurrent == t) {
val result = Some(current)
step
println("STEP -> ", current, name)
result
} else {
None
}
})
// def blockLine(): Option[AST] = {
// if(next.offset == indent) {
// val out = expr
// println("!!!!!--")
// println(out)
// out
// } else {
// None
// }
// }
}
////////////
// Parser //
////////////
// // def parseExprX(): AST = {
// // current.symbol match {
// // case token.Var(name) => {
// // AST(Var(name),current.offset,current.span)
// // }
// // case x => {
// // println("!!!")
// // println(x)
// // AST(NONE,0,0)
// // }
// // }
// // }
class Parser(reader: Reader) extends GenParser(reader) {
final def parse(): Option[AST] = expr.run
final def expr(): Rule[AST] = tok.fold(AST.app)
// def patternToken(tok:Token): Option[AST] = {
// tok.symbol match {
// case token.Var (name) => Some(AST(tok.offset,tok.span, Var (name)))
// case token.Operator (name) => Some(AST(current.offset,current.span, Operator (name)))
// case token.EOL => {
// if (next.offset > indent) {
// step
// withIndent(next.offset, () => Some(block))
// } else {
// None
// }
// }
// case _ => None
// }
// }
final def tok(): Rule[AST] = vvar | cons | group | block
final def vvar(): Rule[AST] = token(VAR).map(AST.fromToken)
final def cons(): Rule[AST] = token(CONS).map(AST.fromToken)
// def many(f:()=>Option[AST]): Vector[AST] = {
// f() match {
// case None => Vector()
// case Some(ast) => {
// step
// val builder = Vector.newBuilder[AST]
// builder += ast
// many_(f,builder)
// builder.result
// }
// }
// }
final def group(): Rule[AST] =
token(GROUP_BEGIN).flatMap(beg => {
expr.flatMap(exp => {
token(GROUP_END)
.map(AST.grouped(beg, exp, _))
.default(exp)
})
})
// def many_(f:()=>Option[AST], builder:Builder[AST,Vector[AST]]): Unit = {
// f() match {
// case None => return
// case Some(ast) => {
// builder += ast
// many_(f,builder)
// }
// }
// }
final def block(): Rule[AST] = {
println(">> 1")
val out = token(EOL, "n1").many1_
.flatMap(_ => token(BLOCK_BEGIN))
.map(beg => {
val lines = blockLines
AST.block(lines)
})
println("<< 1")
out
}
// def manyWith(concat:(AST,AST)=>AST,f:(Token)=>Option[AST]): Option[AST] = {
// f(current) match {
// case None => None
// case Some(ast) => {
// step
// Some(manyWith_(concat,f,ast))
// }
// }
// }
final def blockLines(): Vector[AST] = {
println("--- 1")
println(current)
expr.run match {
case None => {
println("--=--")
Vector[AST]()
}
case Some(firstLine) => {
println("--- 2")
println(current)
println(firstLine)
val lines = Vector.newBuilder[AST]
lines += firstLine
blockLines_(lines)
}
}
}
// def manyWith_(concat:(AST,AST)=>AST,f:(Token)=>Option[AST],ast:AST): AST = {
// f(current) match {
// case None => ast
// case Some(ast2) => {
// step
// manyWith_(concat,f,concat(ast,ast2))
// }
// }
// }
final def blockLines_(
lines: mutable.Builder[AST, Vector[AST]]): Vector[AST] = {
var body = true
println("--- 3")
println(current)
while (body) {
println(">> 2")
token(EOL, "n2").many1_.flatMap(_ => expr).run match {
case None => body = false
case Some(e) => lines += e
}
println("<< 2")
}
lines.result
}
// def app(func:AST, arg:AST): AST = {
// AST(func.offset,func.span + arg.span,App(func.copy(offset=0),arg))
// }
// }
}
class BParser(reader: Reader) {
val lexer = new Lexer(reader)
val parser = new bison.Parser(lexer)
def parse(): Option[AST] = {
if (parser.parse) {
Some(parser.result)
} else {
None
}
}
}

View File

@ -28,19 +28,27 @@ import org.enso.syntax.text.lexer.Token;
/* Bison Declarations */
%token <Token> VAR
%token <Token> CONS
%token <Token> EOL
%token <Token> GROUP_BEGIN
%token <Token> GROUP_END
%token <Token> BLOCK_BEGIN
%token <Token> BLOCK_END
%token <Token> BLOCK_INVALID
%token <AST> CONS
%type <AST> expr
%type <AST> exprItem
%type <AST> expr_group
%type <AST> block
%type <AST> blockBody
%type <AST> tok
%start program
%right GROUP_BEGIN GROUP_END
/////////////
// Grammar //
/////////////
@ -50,9 +58,20 @@ program:
| /* empty */
expr:
tok {$$=$1;}
| expr tok {$$=AST.app($1,$2);}
| expr block {$$=AST.app($1,$2);}
exprItem {$$=$1;}
| expr exprItem {$$=AST.app($1,$2);}
exprItem:
tok {$$=$1;};
| block {$$=$1;};
| GROUP_BEGIN expr_group GROUP_END {$$=AST.grouped($1,$2,$3);};
| GROUP_BEGIN expr_group {$$=$2;};
expr_group:
tok {$$=$1;}
| expr_group tok {$$=AST.app($1,$2);}
block:
BLOCK_BEGIN blockBody {$$=$2;}
@ -62,7 +81,8 @@ blockBody:
| expr BLOCK_END {$$=AST.emptyBlock();}
tok:
VAR {$$=AST.fromToken($1);}
VAR {$$=AST.fromToken($1);}
| CONS {$$=AST.fromToken($1);}