Overall progress

This commit is contained in:
Wojciech Danilo 2019-06-09 03:55:17 +02:00
parent 76344cf79e
commit fae4fe10ef
11 changed files with 569 additions and 492 deletions

20
.scalafmt.conf Normal file
View File

@ -0,0 +1,20 @@
align = most
maxColumn = 80
assumeStandardLibraryStripMargin = true
continuationIndent.defnSite = 2
newlines.alwaysBeforeTopLevelStatements = true
align.tokens = [
{code = "=>", owner = "Case"}
{code = "%", owner = "Term.ApplyInfix"}
{code = "%%", owner = "Term.ApplyInfix"}
{code = "="}
{code = "<-"}
{code = "extends"}
{code = ":", owner = "Defn.Def"}
]
rewrite.rules = [
ExpandImportSelectors
RedundantParens
SortModifiers
PreferCurlyFors
]

View File

@ -18,9 +18,9 @@ lazy val syntax = (project in file("syntax"))
publishArtifact := false, publishArtifact := false,
libraryDependencies ++= Seq( libraryDependencies ++= Seq(
"com.storm-enroute" %% "scalameter" % "0.17" % "bench", "com.storm-enroute" %% "scalameter" % "0.17" % "bench",
"org.typelevel" %% "cats-core" % "1.6.0", // "org.typelevel" %% "cats-core" % "1.6.0",
"org.scalatest" %% "scalatest" % "3.0.5" % Test, // "org.scalatest" %% "scalatest" % "3.0.5" % Test,
"com.lihaoyi" %% "pprint" % "0.5.3" // "com.lihaoyi" %% "pprint" % "0.5.3"
), ),
resolvers ++= Seq( resolvers ++= Seq(
"Sonatype OSS Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots", "Sonatype OSS Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots",

View File

@ -2,14 +2,17 @@ package org.enso.syntax.text.lexer
import org.scalameter.api._ import org.scalameter.api._
import scala.math.pow import scala.math.pow
import org.enso.syntax.text.parser.{Parser,BParser}
import java.io.{StringReader}
object RangeBenchmark extends Bench.OfflineReport { object RangeBenchmark extends Bench.OfflineReport {
val exp14 = Gen.exponential("size")(pow(2,14).toInt, pow(2,16).toInt, 2) val exp14 = Gen.exponential("size")(pow(2,14).toInt, pow(2,16).toInt, 2)
val exp15 = Gen.exponential("size")(pow(2,15).toInt, pow(2,17).toInt, 2) val exp15 = Gen.exponential("size")(pow(2,15).toInt, pow(2,17).toInt, 2)
val exp16 = Gen.exponential("size")(pow(2,16).toInt, pow(2,18).toInt, 2) val exp16 = Gen.exponential("size")(pow(2,14).toInt, pow(2,18).toInt, 2)
val longVars = for {i <- exp16} yield "test12" * i val longVars = for {i <- exp16} yield "test12" * i
val multipleVars = for {i <- exp16} yield "test1 " * i val multipleVars = for {i <- exp16} yield "test1 (A B) " * i
val exprs1 = for {i <- exp14} yield "a += 1\nb == '\n'\n" * i val exprs1 = for {i <- exp14} yield "a += 1\nb == '\n'\n" * i
val exprs2 = for {i <- exp14} yield "a += 1\nb == '`a`!'\n" * i val exprs2 = for {i <- exp14} yield "a += 1\nb == '`a`!'\n" * i
@ -24,14 +27,30 @@ object RangeBenchmark extends Bench.OfflineReport {
// input => new Lexer(input).lexAll() // input => new Lexer(input).lexAll()
// } // }
// } // }
measure method "longVar" in { // measure method "longVar" in {
using(longVars) in { // using(longVars) in {
input => new Lexer(input).lexAll() // input => new Lexer(input).lexAll()
// }
// }
// measure method "multipleVars" in {
// using(multipleVars) in {
// input => new Lexer(input).lexAll()
// }
// }
measure method "Parser-multipleVars" in {
using(multipleVars) in {
input => {
(new Parser(new StringReader(input))).parse
}
} }
} }
measure method "multipleVars" in {
measure method "BParser-multipleVars" in {
using(multipleVars) in { using(multipleVars) in {
input => new Lexer(input).lexAll() input => {
(new BParser(new StringReader(input))).parse
}
} }
} }
} }

View File

@ -32,9 +32,9 @@
/* First part of user declarations. */ /* First part of user declarations. */
/* "../../text/../../../../../../../target/Parser.java":36 */ /* lalr1.java:91 */ /* "../../../../../../java/org/enso/syntax/text/xx/Parser.java":36 */ /* lalr1.java:91 */
/* "../../text/../../../../../../../target/Parser.java":38 */ /* lalr1.java:92 */ /* "../../../../../../java/org/enso/syntax/text/xx/Parser.java":38 */ /* lalr1.java:92 */
/* "%code imports" blocks. */ /* "%code imports" blocks. */
/* "rules.y":7 */ /* lalr1.java:93 */ /* "rules.y":7 */ /* lalr1.java:93 */
@ -42,7 +42,7 @@ package org.enso.syntax.text.xx;
import org.enso.syntax.text.parser.AST; import org.enso.syntax.text.parser.AST;
import org.enso.syntax.text.lexer.Token; import org.enso.syntax.text.lexer.Token;
/* "../../text/../../../../../../../target/Parser.java":46 */ /* lalr1.java:93 */ /* "../../../../../../java/org/enso/syntax/text/xx/Parser.java":46 */ /* lalr1.java:93 */
/** /**
* A Bison parser, automatically generated from <tt>rules.y</tt>. * A Bison parser, automatically generated from <tt>rules.y</tt>.
@ -75,13 +75,19 @@ public class Parser
/** Token number,to be returned by the scanner. */ /** Token number,to be returned by the scanner. */
static final int VAR = 258; static final int VAR = 258;
/** Token number,to be returned by the scanner. */ /** Token number,to be returned by the scanner. */
static final int EOL = 259; static final int CONS = 259;
/** Token number,to be returned by the scanner. */ /** Token number,to be returned by the scanner. */
static final int BLOCK_BEGIN = 260; static final int EOL = 260;
/** Token number,to be returned by the scanner. */ /** Token number,to be returned by the scanner. */
static final int BLOCK_END = 261; static final int GROUP_BEGIN = 261;
/** Token number,to be returned by the scanner. */ /** Token number,to be returned by the scanner. */
static final int CONS = 262; static final int GROUP_END = 262;
/** Token number,to be returned by the scanner. */
static final int BLOCK_BEGIN = 263;
/** Token number,to be returned by the scanner. */
static final int BLOCK_END = 264;
/** Token number,to be returned by the scanner. */
static final int BLOCK_INVALID = 265;
@ -319,62 +325,104 @@ public class Parser
{ {
case 2: case 2:
if (yyn == 2) if (yyn == 2)
/* "rules.y":49 */ /* lalr1.java:489 */ /* "rules.y":57 */ /* lalr1.java:489 */
{result=((AST)(yystack.valueAt (1-(1))));}; {result=((AST)(yystack.valueAt (1-(1))));};
break; break;
case 4: case 4:
if (yyn == 4) if (yyn == 4)
/* "rules.y":53 */ /* lalr1.java:489 */ /* "rules.y":61 */ /* lalr1.java:489 */
{yyval=((AST)(yystack.valueAt (1-(1))));}; {yyval=((AST)(yystack.valueAt (1-(1))));};
break; break;
case 5: case 5:
if (yyn == 5) if (yyn == 5)
/* "rules.y":54 */ /* lalr1.java:489 */ /* "rules.y":62 */ /* lalr1.java:489 */
{yyval=AST.app(((AST)(yystack.valueAt (2-(1)))),((AST)(yystack.valueAt (2-(2)))));}; {yyval=AST.app(((AST)(yystack.valueAt (2-(1)))),((AST)(yystack.valueAt (2-(2)))));};
break; break;
case 6: case 6:
if (yyn == 6) if (yyn == 6)
/* "rules.y":55 */ /* lalr1.java:489 */ /* "rules.y":65 */ /* lalr1.java:489 */
{yyval=AST.app(((AST)(yystack.valueAt (2-(1)))),((AST)(yystack.valueAt (2-(2)))));}; {yyval=((AST)(yystack.valueAt (1-(1))));};
break; break;
case 7: case 7:
if (yyn == 7) if (yyn == 7)
/* "rules.y":58 */ /* lalr1.java:489 */ /* "rules.y":66 */ /* lalr1.java:489 */
{yyval=((AST)(yystack.valueAt (2-(2))));}; {yyval=((AST)(yystack.valueAt (1-(1))));};
break; break;
case 8: case 8:
if (yyn == 8) if (yyn == 8)
/* "rules.y":61 */ /* lalr1.java:489 */ /* "rules.y":67 */ /* lalr1.java:489 */
{yyval=((AST)(yystack.valueAt (3-(1))));}; {yyval=AST.grouped(((Token)(yystack.valueAt (3-(1)))),((AST)(yystack.valueAt (3-(2)))),((Token)(yystack.valueAt (3-(3)))));};
break; break;
case 9: case 9:
if (yyn == 9) if (yyn == 9)
/* "rules.y":62 */ /* lalr1.java:489 */ /* "rules.y":68 */ /* lalr1.java:489 */
{yyval=AST.emptyBlock();}; {yyval=((AST)(yystack.valueAt (2-(2))));};
break; break;
case 10: case 10:
if (yyn == 10) if (yyn == 10)
/* "rules.y":65 */ /* lalr1.java:489 */ /* "rules.y":71 */ /* lalr1.java:489 */
{yyval=((AST)(yystack.valueAt (1-(1))));};
break;
case 11:
if (yyn == 11)
/* "rules.y":72 */ /* lalr1.java:489 */
{yyval=AST.app(((AST)(yystack.valueAt (2-(1)))),((AST)(yystack.valueAt (2-(2)))));};
break;
case 12:
if (yyn == 12)
/* "rules.y":77 */ /* lalr1.java:489 */
{yyval=((AST)(yystack.valueAt (2-(2))));};
break;
case 13:
if (yyn == 13)
/* "rules.y":80 */ /* lalr1.java:489 */
{yyval=((AST)(yystack.valueAt (3-(1))));};
break;
case 14:
if (yyn == 14)
/* "rules.y":81 */ /* lalr1.java:489 */
{yyval=AST.emptyBlock();};
break;
case 15:
if (yyn == 15)
/* "rules.y":84 */ /* lalr1.java:489 */
{yyval=AST.fromToken(((Token)(yystack.valueAt (1-(1)))));};
break;
case 16:
if (yyn == 16)
/* "rules.y":85 */ /* lalr1.java:489 */
{yyval=AST.fromToken(((Token)(yystack.valueAt (1-(1)))));}; {yyval=AST.fromToken(((Token)(yystack.valueAt (1-(1)))));};
break; break;
/* "../../text/../../../../../../../target/Parser.java":378 */ /* lalr1.java:489 */ /* "../../../../../../java/org/enso/syntax/text/xx/Parser.java":426 */ /* lalr1.java:489 */
default: break; default: break;
} }
@ -691,8 +739,9 @@ public class Parser
{ {
return new byte[] return new byte[]
{ {
5, -4, 9, 2, -4, -4, 5, -4, -4, -2, 9, -4, -4, -2, 9, 11, 9, -4, -4, -4,
-4, 5, -4, -4 15, -4, 0, -4, -4, -4, -4, -4, 9, -4,
-4
}; };
} }
@ -704,8 +753,9 @@ public class Parser
{ {
return new byte[] return new byte[]
{ {
3, 10, 0, 2, 4, 1, 0, 6, 5, 0, 3, 15, 16, 0, 0, 0, 2, 4, 7, 6,
7, 0, 9, 8 9, 10, 0, 12, 1, 5, 8, 11, 0, 14,
13
}; };
} }
@ -715,7 +765,7 @@ public class Parser
{ {
return new byte[] return new byte[]
{ {
-4, -4, 10, -4, 0, -3 -4, -4, 14, 4, -4, -4, 2, -3
}; };
} }
@ -725,7 +775,7 @@ public class Parser
{ {
return new byte[] return new byte[]
{ {
-1, 2, 9, 7, 10, 4 -1, 5, 12, 7, 10, 8, 13, 9
}; };
} }
@ -737,8 +787,9 @@ public class Parser
{ {
return new byte[] return new byte[]
{ {
8, 1, 11, 6, 12, 1, 8, 6, 1, 5, 11, 1, 2, 1, 2, 18, 3, 17, 4, 19,
3, 13 15, 14, 1, 2, 6, 3, 15, 4, 1, 2,
20, 0, 16
}; };
} }
@ -747,8 +798,9 @@ private static final byte yycheck_[] = yycheck_init();
{ {
return new byte[] return new byte[]
{ {
3, 3, 4, 5, 6, 3, 9, 5, 3, 0, 3, 3, 4, 3, 4, 5, 6, 10, 8, 9,
0, 11 6, 0, 3, 4, 0, 6, 12, 8, 3, 4,
18, -1, 7
}; };
} }
@ -759,8 +811,9 @@ private static final byte yycheck_[] = yycheck_init();
{ {
return new byte[] return new byte[]
{ {
0, 3, 9, 10, 13, 0, 5, 11, 13, 10, 0, 3, 4, 6, 8, 12, 13, 14, 16, 18,
12, 4, 6, 12 15, 18, 13, 17, 0, 14, 7, 18, 5, 9,
17
}; };
} }
@ -770,8 +823,8 @@ private static final byte yycheck_[] = yycheck_init();
{ {
return new byte[] return new byte[]
{ {
0, 8, 9, 9, 10, 10, 10, 11, 12, 12, 0, 11, 12, 12, 13, 13, 14, 14, 14, 14,
13 15, 15, 16, 17, 17, 18, 18
}; };
} }
@ -781,8 +834,8 @@ private static final byte yycheck_[] = yycheck_init();
{ {
return new byte[] return new byte[]
{ {
0, 2, 1, 0, 1, 2, 2, 2, 3, 2, 0, 2, 1, 0, 1, 2, 1, 1, 3, 2,
1 1, 2, 2, 3, 2, 1, 1
}; };
} }
@ -793,7 +846,8 @@ private static final byte yycheck_[] = yycheck_init();
{ {
return new short[] return new short[]
{ {
0, 256, 257, 258, 259, 260, 261, 262 0, 256, 257, 258, 259, 260, 261, 262, 263, 264,
265
}; };
} }
@ -804,8 +858,9 @@ private static final byte yycheck_[] = yycheck_init();
{ {
return new String[] return new String[]
{ {
"$end", "error", "$undefined", "VAR", "EOL", "BLOCK_BEGIN", "BLOCK_END", "$end", "error", "$undefined", "VAR", "CONS", "EOL", "GROUP_BEGIN",
"CONS", "$accept", "program", "expr", "block", "blockBody", "tok", null "GROUP_END", "BLOCK_BEGIN", "BLOCK_END", "BLOCK_INVALID", "$accept",
"program", "expr", "exprItem", "expr_group", "block", "blockBody", "tok", null
}; };
} }
@ -815,8 +870,8 @@ private static final byte yycheck_[] = yycheck_init();
{ {
return new byte[] return new byte[]
{ {
0, 49, 49, 50, 53, 54, 55, 58, 61, 62, 0, 57, 57, 58, 61, 62, 65, 66, 67, 68,
65 71, 72, 77, 80, 81, 84, 85
}; };
} }
@ -872,7 +927,7 @@ private static final byte yycheck_[] = yycheck_init();
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 1, 2, 3, 4, 2, 2, 2, 2, 2, 2, 1, 2, 3, 4,
5, 6, 7 5, 6, 7, 8, 9, 10
}; };
} }
@ -884,15 +939,15 @@ private static final byte yycheck_[] = yycheck_init();
return yyundef_token_; return yyundef_token_;
} }
private static final int yylast_ = 11; private static final int yylast_ = 22;
private static final int yynnts_ = 6; private static final int yynnts_ = 8;
private static final int yyempty_ = -2; private static final int yyempty_ = -2;
private static final int yyfinal_ = 5; private static final int yyfinal_ = 14;
private static final int yyterror_ = 1; private static final int yyterror_ = 1;
private static final int yyerrcode_ = 256; private static final int yyerrcode_ = 256;
private static final int yyntokens_ = 8; private static final int yyntokens_ = 11;
private static final int yyuser_token_number_max_ = 262; private static final int yyuser_token_number_max_ = 265;
private static final int yyundef_token_ = 2; private static final int yyundef_token_ = 2;
/* User implementation code. */ /* User implementation code. */
@ -901,11 +956,11 @@ private static final byte yycheck_[] = yycheck_init();
public AST result; public AST result;
/* "../../text/../../../../../../../target/Parser.java":905 */ /* lalr1.java:1066 */ /* "../../../../../../java/org/enso/syntax/text/xx/Parser.java":960 */ /* lalr1.java:1066 */
} }
/* "rules.y":88 */ /* lalr1.java:1070 */ /* "rules.y":108 */ /* lalr1.java:1070 */
// class CalcLexer implements Calc.Lexer { // class CalcLexer implements Calc.Lexer {

View File

@ -9,7 +9,7 @@ import static org.enso.syntax.text.xx.Parser.Lexer.*;
%{ %{
private int indent = 0; int currentBlock = 0;
/////////////////////// ///////////////////////
// Indent Management // // Indent Management //
@ -25,7 +25,7 @@ import static org.enso.syntax.text.xx.Parser.Lexer.*;
return indentStack.pop(); return indentStack.pop();
} }
public final Integer indentx() { public final Integer indent() {
return indentStack.peek(); return indentStack.peek();
} }
@ -108,6 +108,7 @@ import static org.enso.syntax.text.xx.Parser.Lexer.*;
////////////////// //////////////////
int var() {value = token(new Var (yytext())); return VAR;} int var() {value = token(new Var (yytext())); return VAR;}
int cons() {value = token(new Cons (yytext())); return CONS;}
// Utils // Utils
void whitespace() {lastOffset += yylength();} void whitespace() {lastOffset += yylength();}
@ -133,10 +134,13 @@ void whitespace() {lastOffset += yylength();}
// Token disabled() {pushState(CHECK_OP_SFX); return disabled_();} // Token disabled() {pushState(CHECK_OP_SFX); return disabled_();}
// Layout // Layout
int blockBegin(int i) {pushIndent(i); value = token(BlockBegin$.MODULE$); return BLOCK_BEGIN;}
int blockEnd() {popIndent(); value = token(BlockEnd$.MODULE$); return BLOCK_END;}
int blockInvalid() {value = token(BlockInvalid$.MODULE$); return BLOCK_INVALID;}
int newline() {value = token(EOL$.MODULE$); return EOL;} int newline() {value = token(EOL$.MODULE$); return EOL;}
int blockBegin() {return BLOCK_BEGIN;} int groupBegin() {value = token(GroupBegin$.MODULE$); return GROUP_BEGIN;}
int blockEnd() {return BLOCK_END;} int groupEnd() {value = token(GroupEnd$.MODULE$); return GROUP_END;}
// Token groupBegin() {return token(GroupBegin$.MODULE$);}
// Token groupEnd() {return token(GroupEnd$.MODULE$);} // Token groupEnd() {return token(GroupEnd$.MODULE$);}
// Token listBegin() {return token(ListBegin$.MODULE$);} // Token listBegin() {return token(ListBegin$.MODULE$);}
// Token listEnd() {return token(ListEnd$.MODULE$);} // Token listEnd() {return token(ListEnd$.MODULE$);}
@ -223,7 +227,7 @@ int blockEnd() {return BLOCK_END;}
alpha_upper = [A-Z] alpha_upper = [A-Z]
alpha_lower = [a-z] alpha_lower = [a-z]
alpha = {alpha_lower} | {alpha_upper} alpha = {alpha_lower} | {alpha_upper}
alphanum = {alpha} | digit alphanum = {alpha} | {digit}
whitespace = [\ ] whitespace = [\ ]
newline = \r|\n|\r\n newline = \r|\n|\r\n
@ -263,6 +267,7 @@ decimal = {digit}+
%xstate COMMENT %xstate COMMENT
%xstate COMMENT_LINE %xstate COMMENT_LINE
%xstate NEWLINE %xstate NEWLINE
%xstate BLOCK_ENDING
%state TEXT_INTERPOLATE %state TEXT_INTERPOLATE
@ -454,23 +459,47 @@ decimal = {digit}+
/////////////////////// ///////////////////////
<NEWLINE> { <NEWLINE> {
{whitespace}+ { {whitespace}+{newline} {
whitespace(); whitespace();
popState();
Integer ind = yylength();
if (ind > indentx()) {
return blockBegin();
} else {
// TODO
}
}
[^] {
indent = 0;
popState();
rewind();
return newline(); return newline();
} }
{whitespace}+ {
whitespace();
popState();
currentBlock = yylength();
if (currentBlock > indent()) {
return blockBegin(currentBlock);
} else if (currentBlock < indent()) {
pushState(BLOCK_ENDING);
}
}
[^] {
rewind();
popState();
currentBlock = 0;
if (indent() > 0) {
pushState(BLOCK_ENDING);
} else {
return newline();
}
}
}
<BLOCK_ENDING> {
[^] {
rewind();
if(currentBlock == indent()) {
popState();
} else if(currentBlock < indent()) {
return blockEnd();
} else {
popState();
return blockInvalid();
}
}
} }
@ -482,8 +511,7 @@ decimal = {digit}+
// // Identifiers // // Identifiers
{var} {return var();} {var} {return var();}
// {var} {return var();} {cons} {return cons();}
// {cons} {return cons();}
// {wildcard} {return wildcard();} // {wildcard} {return wildcard();}
// // Operators // // Operators
@ -500,9 +528,9 @@ decimal = {digit}+
// {modifier} {return modifier();} // {modifier} {return modifier();}
// (\#\=) {return disabled();} // (\#\=) {return disabled();}
// // Layout // Layout
// (\() {return groupBegin();} (\() {return groupBegin();}
// (\)) {return groupEnd();} (\)) {return groupEnd();}
// (\[) {return listBegin();} // (\[) {return listBegin();}
// (\]) {return listEnd();} // (\]) {return listEnd();}
// (\{) {return recordBegin();} // (\{) {return recordBegin();}
@ -543,7 +571,7 @@ decimal = {digit}+
// Layout // Layout
{whitespace}+ {whitespace();} {whitespace}+ {whitespace();}
{newline} {pushState(NEWLINE);} {newline} {pushState(NEWLINE);return newline();}
// // Unknown // // Unknown
// [^] { // [^] {

View File

@ -1,163 +1,55 @@
package org.enso.syntax package org.enso.syntax
// import org.enso.syntax.text.parser.{Parser} import java.io.StringReader
import java.io.{Reader, StringReader}
import org.enso.syntax.text.lexer.{Scanner, EOF, Token}
import org.enso.syntax.text.xx.Parser
import org.enso.syntax.text.xx.Parser.Lexer._
// import org.enso.syntax.text.{parser => AST}
import org.enso.syntax.text.parser.AST
class SS(scanner:Scanner) extends Parser.Lexer {
private var _done = false
def getLVal():Token = {
scanner.value
}
def yyerror(s:String) {
println("!!! ERROR !!!")
println(s)
}
def yylex():Int = {
scanner.lex
}
def lex(): Token = {
val tok = yylex
if (tok == -1) {
_done = true;
return Token(EOF,0,0)
}
return getLVal
}
def lexAll(): Vector[Token] = {
var builder = Vector.newBuilder[Token]
do {
builder += lex
} while (!_done)
builder.result
}
def lexAll2(): (Vector[Int],Vector[Token]) = {
var builder_t = Vector.newBuilder[Int]
var builder = Vector.newBuilder[Token]
do {
val tok = yylex
var tval = getLVal
if (tok == -1) {
tval = Token(EOF,0,0)
_done = true
}
builder_t += tok
builder += tval
} while (!_done)
(builder_t.result, builder.result)
}
}
class PP(reader:Reader) {
val lexer = new SS(new Scanner(reader))
//////////////////////
// Token Management //
//////////////////////
val (itokens, tokens) = lexer.lexAll2()
var tokenIx = 0
var current : Token = tokens(tokenIx)
var icurrent : Int = itokens(tokenIx)
def step(): Unit = {
if (tokenIx == tokens.size - 1) {
return
}
tokenIx += 1
current = tokens(tokenIx)
icurrent = itokens(tokenIx)
}
def parse():Option[AST] = {
manyWith(AST.app,()=>tok)
}
def tok():Option[AST] = {
token(VAR).map(AST.fromToken)
}
// def block
def or[T](l:()=>Option[T],r:()=>Option[T]) {
l() match {
case Some(a) => Some(a)
case None => r()
}
}
def token(t:Int):Option[Token] = {
if(icurrent==t) {
val result = Some(current)
step
result
} else {
None
}
}
def manyWith(concat:(AST,AST)=>AST,f:()=>Option[AST]): Option[AST] = {
f() match {
case None => None
case Some(ast) => {
Some(manyWith_(concat,f,ast))
}
}
}
def manyWith_(concat:(AST,AST)=>AST,f:()=>Option[AST],ast:AST): AST = {
f() match {
case None => ast
case Some(ast2) => {
manyWith_(concat,f,concat(ast,ast2))
}
}
}
}
import org.enso.syntax.text.lexer.Lexer
import org.enso.syntax.text.parser.Parser
import org.enso.syntax.text.parser.BParser
import scala.language.implicitConversions
object Main extends App { object Main extends App {
val str = "a b"
val reader = new StringReader(str)
val reader2 = new StringReader(str)
val scanner = new Scanner(reader)
val scanner2 = new Scanner(reader2)
val ss = new SS(scanner)
val ss2 = new SS(scanner2)
val parser = new Parser(ss)
val pp = new PP(new StringReader(str)) var indent = 0
pprint.pprintln(ss2.lexAll)
pprint.pprintln(pp.parse) def pprint(s: String) {
pprint.pprintln(parser.parse) print(" " * indent)
// val parser = new Parser(reader) val (l, r2) = s.span((x) => (x != '(' && x != ')'))
// val ast = parser.parse print(l)
pprint.pprintln(parser.result,width=3,height=1000) if (r2 == "") {
} println
return
}
val (m, r) = r2.splitAt(1)
if (m == "(") {
indent += 1
println(m)
pprint(r)
} else if (m == ")") {
indent -= 1
println(m)
pprint(r)
}
}
// val str = "a (b"
val str =
"""|a
|
| a
| c""".stripMargin
println(str)
val reader = new StringReader(str)
val ss = new Lexer(reader)
pprint(ss.lexAll.toString())
val bparser = new BParser(new StringReader(str))
val parser = new Parser(new StringReader(str))
pprint(bparser.parse.toString())
pprint(parser.parse.toString())
pprint("!")
}

View File

@ -1,8 +1,56 @@
package org.enso.syntax.text.lexer package org.enso.syntax.text.lexer
import java.io.{StringReader, Reader} import java.io.StringReader
import java.io.Reader
import scala.collection.immutable.Vector import scala.collection.immutable.Vector
import org.enso.syntax.text.xx.Parser
class Lexer(reader: Reader) extends Scanner(reader) with Parser.Lexer {
private var _done = false
def getLVal(): Token = {
value
}
def yyerror(s: String) {
println("!!! ERROR !!!")
println(s)
}
def lexTok(): Token = {
val tok = yylex
if (tok == -1) {
_done = true;
return Token(EOF, 0, 0)
}
return getLVal
}
def lexAll(): Vector[Token] = {
var builder = Vector.newBuilder[Token]
do {
builder += lexTok
} while (!_done)
builder.result
}
def lexAll2(): (Vector[Int], Vector[Token]) = {
var builder_t = Vector.newBuilder[Int]
var builder = Vector.newBuilder[Token]
do {
val tok = yylex
var tval = getLVal
if (tok == -1) {
tval = Token(EOF, 0, 0)
_done = true
}
builder_t += tok
builder += tval
} while (!_done)
(builder_t.result, builder.result)
}
}
// class Lexer (reader:Reader) { // class Lexer (reader:Reader) {
// val scanner = new Scanner(reader) // val scanner = new Scanner(reader)
// private var _done = false // private var _done = false
@ -39,4 +87,4 @@ import scala.collection.immutable.Vector
// def done(): Boolean = { // def done(): Boolean = {
// return _done; // return _done;
// } // }
// } // }

View File

@ -4,139 +4,140 @@ package org.enso.syntax.text.lexer
// Token // // Token //
/////////// ///////////
case class Token (symbol:Symbol, offset:Int, span:Int) case class Token(symbol: Symbol, offset: Int, span: Int)
//////////// ////////////
// Symbol // // Symbol //
//////////// ////////////
abstract class Symbol abstract class Symbol
// Identifiers // Identifiers
case class Var (name:String) extends Symbol case class Var(name: String) extends Symbol
case class Cons (name:String) extends Symbol case class Cons(name: String) extends Symbol
case object Wildcard extends Symbol case object Wildcard extends Symbol
// Operators // Operators
case class Operator (name:String) extends Symbol case class Operator(name: String) extends Symbol
case class Modifier (name:String) extends Symbol case class Modifier(name: String) extends Symbol
case object DisabledAssignment extends Symbol case object DisabledAssignment extends Symbol
// Layout // Layout
case object EOL extends Symbol case object EOL extends Symbol
case object BOF extends Symbol case object BOF extends Symbol
case object EOF extends Symbol case object EOF extends Symbol
case object GroupBegin extends Symbol case object BlockBegin extends Symbol
case object GroupEnd extends Symbol case object BlockEnd extends Symbol
case object ListBegin extends Symbol case object BlockInvalid extends Symbol
case object ListEnd extends Symbol
case object RecordBegin extends Symbol case object GroupBegin extends Symbol
case object RecordEnd extends Symbol case object GroupEnd extends Symbol
case object ListBegin extends Symbol
case object ListEnd extends Symbol
case object RecordBegin extends Symbol
case object RecordEnd extends Symbol
// Literals // Literals
case object TextBegin extends Symbol case object TextBegin extends Symbol
case object TextEnd extends Symbol case object TextEnd extends Symbol
case object TextRawBegin extends Symbol case object TextRawBegin extends Symbol
case object TextRawEnd extends Symbol case object TextRawEnd extends Symbol
case class Text (text:String) extends Symbol case class Text(text: String) extends Symbol
case class TextEscape (esc:TextEscapeType) extends Symbol case class TextEscape(esc: TextEscapeType) extends Symbol
case object TextInterpolateBegin extends Symbol case object TextInterpolateBegin extends Symbol
case object TextInterpolateEnd extends Symbol case object TextInterpolateEnd extends Symbol
case class Number (base:Int case class Number(base: Int, intPart: List[Int], fracPart: List[Int])
,intPart:List[Int] extends Symbol
,fracPart:List[Int]) extends Symbol
// Invalid // Invalid
case class Invalid (reason:InvalidReason) extends Symbol case class Invalid(reason: InvalidReason) extends Symbol
case class Unmatched (char:String) extends Symbol case class Unmatched(char: String) extends Symbol
// Comments // Comments
case object Comment extends Symbol case object Comment extends Symbol
case class CommentBody (text:String) extends Symbol case class CommentBody(text: String) extends Symbol
////////////////// //////////////////
// Text Escapes // // Text Escapes //
////////////////// //////////////////
abstract class TextEscapeType abstract class TextEscapeType
case class CharEscape (code:Int) extends TextEscapeType case class CharEscape(code: Int) extends TextEscapeType
case class CtrlEscape (code:Int) extends TextEscapeType case class CtrlEscape(code: Int) extends TextEscapeType
case class IntEscape (code:Int) extends TextEscapeType case class IntEscape(code: Int) extends TextEscapeType
case class Uni16Escape (code:Int) extends TextEscapeType case class Uni16Escape(code: Int) extends TextEscapeType
case class Uni32Escape (code:Int) extends TextEscapeType case class Uni32Escape(code: Int) extends TextEscapeType
case class Uni21Escape (code:Int) extends TextEscapeType case class Uni21Escape(code: Int) extends TextEscapeType
case object QuoteEscape extends TextEscapeType case object QuoteEscape extends TextEscapeType
case object RawQuoteEscape extends TextEscapeType case object RawQuoteEscape extends TextEscapeType
case object SlashEscape extends TextEscapeType case object SlashEscape extends TextEscapeType
case class InvalidCharEscape (char:Char) extends TextEscapeType case class InvalidCharEscape(char: Char) extends TextEscapeType
case class InvalidUni32Escape (str:String) extends TextEscapeType case class InvalidUni32Escape(str: String) extends TextEscapeType
case class InvalidUni21Escape (str:String) extends TextEscapeType case class InvalidUni21Escape(str: String) extends TextEscapeType
///////////// /////////////
// Invalid // // Invalid //
///////////// /////////////
abstract class InvalidReason abstract class InvalidReason
case class UnexpectedSuffix (text:String) extends InvalidReason case class UnexpectedSuffix(text: String) extends InvalidReason
//////////////// ////////////////
// Companions // // Companions //
//////////////// ////////////////
object Number { object Number {
def charToDigit (char:Char): Int = {
def charToDigit(char: Char): Int = {
val i = char.toInt val i = char.toInt
if (i >= 48 && i <= 57) { return i - 48 } // 0 to 9 if (i >= 48 && i <= 57) { return i - 48 } // 0 to 9
if (i >= 65 && i <= 90) { return i - 55 } // A to Z if (i >= 65 && i <= 90) { return i - 55 } // A to Z
if (i >= 97 && i <= 122) { return i - 87 } // a to z if (i >= 97 && i <= 122) { return i - 87 } // a to z
return -1 return -1
} }
def stringToDigits (str:String): List[Int] = { def stringToDigits(str: String): List[Int] = {
str.toList.map(charToDigit) str.toList.map(charToDigit)
} }
def fromString(base:String, intPart:String, fracPart:String): Number = { def fromString(base: String, intPart: String, fracPart: String): Number = {
val base2 = if (base == "") 10 else base.toInt val base2 = if (base == "") 10 else base.toInt
return Number(base2,stringToDigits(intPart), stringToDigits(fracPart)) return Number(base2, stringToDigits(intPart), stringToDigits(fracPart))
} }
} }
object IntEscape { object IntEscape {
def fromString(code:String): IntEscape = {
def fromString(code: String): IntEscape = {
IntEscape(code.toInt) IntEscape(code.toInt)
} }
} }
object CharEscape { object CharEscape {
def fromChar(c:Char): CharEscape = {
def fromChar(c: Char): CharEscape = {
CharEscape(c.toInt) CharEscape(c.toInt)
} }
} }
object Uni32Escape { object Uni32Escape {
def fromString(str:String): TextEscapeType = {
def fromString(str: String): TextEscapeType = {
try { try {
return Uni32Escape(Integer.parseInt(str,16)) return Uni32Escape(Integer.parseInt(str, 16))
} catch { } catch {
case e:Exception => return InvalidUni32Escape(str) case e: Exception => return InvalidUni32Escape(str)
} }
} }
} }
object Uni21Escape { object Uni21Escape {
def fromString(str:String): TextEscapeType = {
def fromString(str: String): TextEscapeType = {
try { try {
return Uni21Escape(Integer.parseInt(str,16)) return Uni21Escape(Integer.parseInt(str, 16))
} catch { } catch {
case e:Exception => return InvalidUni21Escape(str) case e: Exception => return InvalidUni21Escape(str)
} }
} }
} }

View File

@ -3,44 +3,56 @@ package org.enso.syntax.text.parser
import org.enso.syntax.text.lexer.Token import org.enso.syntax.text.lexer.Token
import org.enso.syntax.text.{lexer => token} import org.enso.syntax.text.{lexer => token}
///////// /////////
// AST // // AST //
///////// /////////
case class AST (offset:Int, span:Int, symbol:Symbol) case class AST(offset: Int, span: Int, symbol: Symbol)
// class Sym[T](offset:Int, span:Int, element:T)
//////////// ////////////
// Symbol // // Symbol //
//////////// ////////////
abstract class Symbol trait Symbol
case object NONE extends Symbol
case object NONE extends Symbol
// Identifiers // Identifiers
case class Var (name:String) extends Symbol case class Var(name: String) extends Symbol
case class Operator (name:String) extends Symbol case class Cons(name: String) extends Symbol
case class App (func:AST, arg:AST) extends Symbol case class Operator(name: String) extends Symbol
case class Block (body:Vector[AST]) extends Symbol case class App(func: AST, arg: AST) extends Symbol
case class Block(body: Vector[AST]) extends Symbol
case class Grouped(body: AST) extends Symbol
// //
object AST { object AST {
def fromToken(tok:Token):AST = {
def fromToken(tok: Token): AST = {
tok.symbol match { tok.symbol match {
case token.Var(name) => AST(0,0,Var(name)) case token.Var(name) => AST(tok.offset, tok.span, Var(name))
case token.Cons(name) => AST(tok.offset, tok.span, Cons(name))
} }
} }
def app(fn:AST, arg:AST):AST = {
AST(fn.offset,fn.span + arg.span,App(fn.copy(offset=0),arg)) def app(fn: AST, arg: AST): AST = {
AST(fn.offset, fn.span + arg.span, App(fn.copy(offset = 0), arg))
} }
def emptyBlock():AST = {
AST(0,0,Block(Vector())) def emptyBlock(): AST = {
AST(0, 0, Block(Vector()))
} }
}
def block(lines: Vector[AST]): AST = {
AST(0, 0, Block(lines))
}
def grouped(begin: Token, body: AST, end: Token): AST = {
val offset = begin.offset
val span = begin.span + body.offset + body.span + end.offset + end.span
AST(offset, span, Grouped(body))
}
}

View File

@ -1,188 +1,170 @@
package org.enso.syntax.text.parser package org.enso.syntax.text.parser
// import java.io.{Reader} import org.enso.syntax.text.{xx => bison}
// import org.enso.syntax.text.lexer.{Lexer, Token} import java.io.Reader
// import org.enso.syntax.text.{lexer => token}
// import scala.collection.immutable.{Vector}
// import scala.collection.mutable.{Builder}
// class Parser(reader:Reader) { import org.enso.syntax.text.xx.Parser.Lexer._
// val lexer = new Lexer(reader) import org.enso.syntax.text.lexer.Token
import org.enso.syntax.text.lexer.Lexer
import scala.collection.immutable.VectorBuilder
import scala.collection.mutable
// ////////////////////// //////////////////
// // Token Management // // Parser Rules //
// ////////////////////// //////////////////
// val tokens = lexer.lexAll() case class Rule[T](unwrap: () => Option[T]) {
// var tokenIx = 0 final def run(): Option[T] = unwrap()
// var current : Token = tokens(tokenIx) final def apply(): Option[T] = run
// def step(): Token = { final def mapOption[S](f: Option[T] => Option[S]): Rule[S] =
// if (tokenIx == tokens.size - 1) { Rule(() => f(run))
// return Token(token.EOF,0,0)
// }
// tokenIx += 1
// current = tokens(tokenIx)
// if (current.symbol == token.EOL) {
// line += 1
// column = 0
// } else {
// column += current.offset + current.span
// }
// current
// }
// def lookup(i:Int=1): Token = { final def map[S](f: T => S): Rule[S] =
// val ix = tokenIx + i mapOption(_.map(f))
// if (ix >= 0 && ix < tokens.size) {
// tokens(ix)
// } else {
// Token(token.EOF,0,0)
// }
// }
// def next(): Token = { final def flatMap[S](f: T => Rule[S]): Rule[S] =
// lookup() mapOption(_.flatMap(f(_).run))
// }
// def previous(): Token = {
// lookup(-1)
// }
final def |(that: Rule[T]): Rule[T] = this or that
final def or(that: Rule[T]): Rule[T] = mapOption(_.orElse(that.run))
final def default(v: T): Rule[T] = mapOption(_.orElse(Some(v)))
// // var indents : Stack[Int] = new Stack() // final def foreach(f: T => Unit): Unit = run.foreach(f)
// // indents.push(0)
// // def indent(): Int = { @scala.annotation.tailrec
// // indents.head final def manyWith(concat: (T, T) => T, t: T): T = {
// // } this.run match {
case None => t
case Some(t2) => {
manyWith(concat, concat(t, t2))
}
}
}
// var column : Int = 0 final def many1_(): Rule[Unit] = this.flatMap(_ => many_)
// var line : Int = 0 final def many_(): Rule[Unit] = this.flatMap(_ => many_).default(Unit)
// var indent : Int = 0
// def withIndent[T](newIndent:Int,f:()=>T):T = { final def fold(concat: (T, T) => T): Rule[T] =
// val oldIndent = indent mapOption(_.map(manyWith(concat, _)))
// indent = newIndent }
// val result = f()
// indent = oldIndent
// result
// }
///////////////
// GenParser //
///////////////
class GenParser(reader: Reader) {
private val lexer = new Lexer(reader)
private val (itokens, tokens) = lexer.lexAll2()
private var tokenIx: Int = 0
var current: Token = tokens(tokenIx)
private var icurrent: Int = itokens(tokenIx)
// def parse(): AST = { final def step(): Unit = {
// expr() match { if (tokenIx == tokens.size - 1) return
// case Some(ast) => ast tokenIx += 1
// case None => AST(0,0,NONE) current = tokens(tokenIx)
// } icurrent = itokens(tokenIx)
// } }
// def expr(): Option[AST] = { final def lookahead(shift: Int = 1): Token = tokens(tokenIx + shift)
// manyWith(app, patternToken)
// // .flatMap(pat => {
// // if(current.symbol == token.EOL && next.offset > indent) {
// // step
// // withIndent(next.offset, () => Some(app(pat,block)))
// // } else {
// // Some(pat)
// // }
// // })
// }
// def block(): AST = { final def token(t: Int, name: String = ""): Rule[Token] =
// AST(0,0,Block(many(() => blockLine))) Rule(() => {
// } if (icurrent == t) {
val result = Some(current)
step
println("STEP -> ", current, name)
result
} else {
None
}
})
// def blockLine(): Option[AST] = { }
// if(next.offset == indent) {
// val out = expr
// println("!!!!!--")
// println(out)
// out
// } else {
// None
// }
// }
////////////
// Parser //
////////////
// // def parseExprX(): AST = { class Parser(reader: Reader) extends GenParser(reader) {
// // current.symbol match {
// // case token.Var(name) => {
// // AST(Var(name),current.offset,current.span)
// // }
// // case x => {
// // println("!!!")
// // println(x)
// // AST(NONE,0,0)
// // }
// // }
// // }
final def parse(): Option[AST] = expr.run
final def expr(): Rule[AST] = tok.fold(AST.app)
// def patternToken(tok:Token): Option[AST] = { final def tok(): Rule[AST] = vvar | cons | group | block
// tok.symbol match { final def vvar(): Rule[AST] = token(VAR).map(AST.fromToken)
// case token.Var (name) => Some(AST(tok.offset,tok.span, Var (name))) final def cons(): Rule[AST] = token(CONS).map(AST.fromToken)
// case token.Operator (name) => Some(AST(current.offset,current.span, Operator (name)))
// case token.EOL => {
// if (next.offset > indent) {
// step
// withIndent(next.offset, () => Some(block))
// } else {
// None
// }
// }
// case _ => None
// }
// }
// def many(f:()=>Option[AST]): Vector[AST] = { final def group(): Rule[AST] =
// f() match { token(GROUP_BEGIN).flatMap(beg => {
// case None => Vector() expr.flatMap(exp => {
// case Some(ast) => { token(GROUP_END)
// step .map(AST.grouped(beg, exp, _))
// val builder = Vector.newBuilder[AST] .default(exp)
// builder += ast })
// many_(f,builder) })
// builder.result
// }
// }
// }
// def many_(f:()=>Option[AST], builder:Builder[AST,Vector[AST]]): Unit = { final def block(): Rule[AST] = {
// f() match { println(">> 1")
// case None => return val out = token(EOL, "n1").many1_
// case Some(ast) => { .flatMap(_ => token(BLOCK_BEGIN))
// builder += ast .map(beg => {
// many_(f,builder) val lines = blockLines
// } AST.block(lines)
// } })
// } println("<< 1")
out
}
// def manyWith(concat:(AST,AST)=>AST,f:(Token)=>Option[AST]): Option[AST] = { final def blockLines(): Vector[AST] = {
// f(current) match { println("--- 1")
// case None => None println(current)
// case Some(ast) => { expr.run match {
// step case None => {
// Some(manyWith_(concat,f,ast)) println("--=--")
// } Vector[AST]()
// } }
// } case Some(firstLine) => {
println("--- 2")
println(current)
println(firstLine)
val lines = Vector.newBuilder[AST]
lines += firstLine
blockLines_(lines)
}
}
}
// def manyWith_(concat:(AST,AST)=>AST,f:(Token)=>Option[AST],ast:AST): AST = { final def blockLines_(
// f(current) match { lines: mutable.Builder[AST, Vector[AST]]): Vector[AST] = {
// case None => ast var body = true
// case Some(ast2) => { println("--- 3")
// step println(current)
// manyWith_(concat,f,concat(ast,ast2)) while (body) {
// } println(">> 2")
// } token(EOL, "n2").many1_.flatMap(_ => expr).run match {
// } case None => body = false
case Some(e) => lines += e
}
println("<< 2")
}
lines.result
}
// def app(func:AST, arg:AST): AST = { }
// AST(func.offset,func.span + arg.span,App(func.copy(offset=0),arg))
// } class BParser(reader: Reader) {
// } val lexer = new Lexer(reader)
val parser = new bison.Parser(lexer)
def parse(): Option[AST] = {
if (parser.parse) {
Some(parser.result)
} else {
None
}
}
}

View File

@ -28,19 +28,27 @@ import org.enso.syntax.text.lexer.Token;
/* Bison Declarations */ /* Bison Declarations */
%token <Token> VAR %token <Token> VAR
%token <Token> CONS
%token <Token> EOL %token <Token> EOL
%token <Token> GROUP_BEGIN
%token <Token> GROUP_END
%token <Token> BLOCK_BEGIN %token <Token> BLOCK_BEGIN
%token <Token> BLOCK_END %token <Token> BLOCK_END
%token <Token> BLOCK_INVALID
%token <AST> CONS
%type <AST> expr %type <AST> expr
%type <AST> exprItem
%type <AST> expr_group
%type <AST> block %type <AST> block
%type <AST> blockBody %type <AST> blockBody
%type <AST> tok %type <AST> tok
%start program %start program
%right GROUP_BEGIN GROUP_END
///////////// /////////////
// Grammar // // Grammar //
///////////// /////////////
@ -50,9 +58,20 @@ program:
| /* empty */ | /* empty */
expr: expr:
tok {$$=$1;} exprItem {$$=$1;}
| expr tok {$$=AST.app($1,$2);} | expr exprItem {$$=AST.app($1,$2);}
| expr block {$$=AST.app($1,$2);}
exprItem:
tok {$$=$1;};
| block {$$=$1;};
| GROUP_BEGIN expr_group GROUP_END {$$=AST.grouped($1,$2,$3);};
| GROUP_BEGIN expr_group {$$=$2;};
expr_group:
tok {$$=$1;}
| expr_group tok {$$=AST.app($1,$2);}
block: block:
BLOCK_BEGIN blockBody {$$=$2;} BLOCK_BEGIN blockBody {$$=$2;}
@ -62,7 +81,8 @@ blockBody:
| expr BLOCK_END {$$=AST.emptyBlock();} | expr BLOCK_END {$$=AST.emptyBlock();}
tok: tok:
VAR {$$=AST.fromToken($1);} VAR {$$=AST.fromToken($1);}
| CONS {$$=AST.fromToken($1);}