diff --git a/.scalafmt.conf b/.scalafmt.conf new file mode 100644 index 00000000000..6ca66d185df --- /dev/null +++ b/.scalafmt.conf @@ -0,0 +1,20 @@ +align = most +maxColumn = 80 +assumeStandardLibraryStripMargin = true +continuationIndent.defnSite = 2 +newlines.alwaysBeforeTopLevelStatements = true +align.tokens = [ + {code = "=>", owner = "Case"} + {code = "%", owner = "Term.ApplyInfix"} + {code = "%%", owner = "Term.ApplyInfix"} + {code = "="} + {code = "<-"} + {code = "extends"} + {code = ":", owner = "Defn.Def"} +] +rewrite.rules = [ + ExpandImportSelectors + RedundantParens + SortModifiers + PreferCurlyFors +] \ No newline at end of file diff --git a/build.sbt b/build.sbt index c7dcca525b4..2eada55542f 100644 --- a/build.sbt +++ b/build.sbt @@ -18,9 +18,9 @@ lazy val syntax = (project in file("syntax")) publishArtifact := false, libraryDependencies ++= Seq( "com.storm-enroute" %% "scalameter" % "0.17" % "bench", - "org.typelevel" %% "cats-core" % "1.6.0", - "org.scalatest" %% "scalatest" % "3.0.5" % Test, - "com.lihaoyi" %% "pprint" % "0.5.3" + // "org.typelevel" %% "cats-core" % "1.6.0", + // "org.scalatest" %% "scalatest" % "3.0.5" % Test, + // "com.lihaoyi" %% "pprint" % "0.5.3" ), resolvers ++= Seq( "Sonatype OSS Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots", diff --git a/syntax/src/bench/scala/org/enso/syntax/text/Lexer.scala b/syntax/src/bench/scala/org/enso/syntax/text/Lexer.scala index c5048413498..2289e1077fa 100644 --- a/syntax/src/bench/scala/org/enso/syntax/text/Lexer.scala +++ b/syntax/src/bench/scala/org/enso/syntax/text/Lexer.scala @@ -2,14 +2,17 @@ package org.enso.syntax.text.lexer import org.scalameter.api._ import scala.math.pow +import org.enso.syntax.text.parser.{Parser,BParser} +import java.io.{StringReader} + object RangeBenchmark extends Bench.OfflineReport { val exp14 = Gen.exponential("size")(pow(2,14).toInt, pow(2,16).toInt, 2) val exp15 = Gen.exponential("size")(pow(2,15).toInt, pow(2,17).toInt, 2) - val exp16 = Gen.exponential("size")(pow(2,16).toInt, pow(2,18).toInt, 2) + val exp16 = Gen.exponential("size")(pow(2,14).toInt, pow(2,18).toInt, 2) val longVars = for {i <- exp16} yield "test12" * i - val multipleVars = for {i <- exp16} yield "test1 " * i + val multipleVars = for {i <- exp16} yield "test1 (A B) " * i val exprs1 = for {i <- exp14} yield "a += 1\nb == '\n'\n" * i val exprs2 = for {i <- exp14} yield "a += 1\nb == '`a`!'\n" * i @@ -24,14 +27,30 @@ object RangeBenchmark extends Bench.OfflineReport { // input => new Lexer(input).lexAll() // } // } - measure method "longVar" in { - using(longVars) in { - input => new Lexer(input).lexAll() + // measure method "longVar" in { + // using(longVars) in { + // input => new Lexer(input).lexAll() + // } + // } + // measure method "multipleVars" in { + // using(multipleVars) in { + // input => new Lexer(input).lexAll() + // } + // } + + measure method "Parser-multipleVars" in { + using(multipleVars) in { + input => { + (new Parser(new StringReader(input))).parse + } } } - measure method "multipleVars" in { + + measure method "BParser-multipleVars" in { using(multipleVars) in { - input => new Lexer(input).lexAll() + input => { + (new BParser(new StringReader(input))).parse + } } } } diff --git a/syntax/src/main/java/org/enso/syntax/text/xx/Parser.java b/syntax/src/main/java/org/enso/syntax/text/xx/Parser.java index 43ca8eacb81..b481adf28b9 100644 --- a/syntax/src/main/java/org/enso/syntax/text/xx/Parser.java +++ b/syntax/src/main/java/org/enso/syntax/text/xx/Parser.java @@ -32,9 +32,9 @@ /* First part of user declarations. */ -/* "../../text/../../../../../../../target/Parser.java":36 */ /* lalr1.java:91 */ +/* "../../../../../../java/org/enso/syntax/text/xx/Parser.java":36 */ /* lalr1.java:91 */ -/* "../../text/../../../../../../../target/Parser.java":38 */ /* lalr1.java:92 */ +/* "../../../../../../java/org/enso/syntax/text/xx/Parser.java":38 */ /* lalr1.java:92 */ /* "%code imports" blocks. */ /* "rules.y":7 */ /* lalr1.java:93 */ @@ -42,7 +42,7 @@ package org.enso.syntax.text.xx; import org.enso.syntax.text.parser.AST; import org.enso.syntax.text.lexer.Token; -/* "../../text/../../../../../../../target/Parser.java":46 */ /* lalr1.java:93 */ +/* "../../../../../../java/org/enso/syntax/text/xx/Parser.java":46 */ /* lalr1.java:93 */ /** * A Bison parser, automatically generated from rules.y. @@ -75,13 +75,19 @@ public class Parser /** Token number,to be returned by the scanner. */ static final int VAR = 258; /** Token number,to be returned by the scanner. */ - static final int EOL = 259; + static final int CONS = 259; /** Token number,to be returned by the scanner. */ - static final int BLOCK_BEGIN = 260; + static final int EOL = 260; /** Token number,to be returned by the scanner. */ - static final int BLOCK_END = 261; + static final int GROUP_BEGIN = 261; /** Token number,to be returned by the scanner. */ - static final int CONS = 262; + static final int GROUP_END = 262; + /** Token number,to be returned by the scanner. */ + static final int BLOCK_BEGIN = 263; + /** Token number,to be returned by the scanner. */ + static final int BLOCK_END = 264; + /** Token number,to be returned by the scanner. */ + static final int BLOCK_INVALID = 265; @@ -319,62 +325,104 @@ public class Parser { case 2: if (yyn == 2) - /* "rules.y":49 */ /* lalr1.java:489 */ + /* "rules.y":57 */ /* lalr1.java:489 */ {result=((AST)(yystack.valueAt (1-(1))));}; break; case 4: if (yyn == 4) - /* "rules.y":53 */ /* lalr1.java:489 */ + /* "rules.y":61 */ /* lalr1.java:489 */ {yyval=((AST)(yystack.valueAt (1-(1))));}; break; case 5: if (yyn == 5) - /* "rules.y":54 */ /* lalr1.java:489 */ + /* "rules.y":62 */ /* lalr1.java:489 */ {yyval=AST.app(((AST)(yystack.valueAt (2-(1)))),((AST)(yystack.valueAt (2-(2)))));}; break; case 6: if (yyn == 6) - /* "rules.y":55 */ /* lalr1.java:489 */ - {yyval=AST.app(((AST)(yystack.valueAt (2-(1)))),((AST)(yystack.valueAt (2-(2)))));}; + /* "rules.y":65 */ /* lalr1.java:489 */ + {yyval=((AST)(yystack.valueAt (1-(1))));}; break; case 7: if (yyn == 7) - /* "rules.y":58 */ /* lalr1.java:489 */ - {yyval=((AST)(yystack.valueAt (2-(2))));}; + /* "rules.y":66 */ /* lalr1.java:489 */ + {yyval=((AST)(yystack.valueAt (1-(1))));}; break; case 8: if (yyn == 8) - /* "rules.y":61 */ /* lalr1.java:489 */ - {yyval=((AST)(yystack.valueAt (3-(1))));}; + /* "rules.y":67 */ /* lalr1.java:489 */ + {yyval=AST.grouped(((Token)(yystack.valueAt (3-(1)))),((AST)(yystack.valueAt (3-(2)))),((Token)(yystack.valueAt (3-(3)))));}; break; case 9: if (yyn == 9) - /* "rules.y":62 */ /* lalr1.java:489 */ - {yyval=AST.emptyBlock();}; + /* "rules.y":68 */ /* lalr1.java:489 */ + {yyval=((AST)(yystack.valueAt (2-(2))));}; break; case 10: if (yyn == 10) - /* "rules.y":65 */ /* lalr1.java:489 */ + /* "rules.y":71 */ /* lalr1.java:489 */ + {yyval=((AST)(yystack.valueAt (1-(1))));}; + break; + + + case 11: + if (yyn == 11) + /* "rules.y":72 */ /* lalr1.java:489 */ + {yyval=AST.app(((AST)(yystack.valueAt (2-(1)))),((AST)(yystack.valueAt (2-(2)))));}; + break; + + + case 12: + if (yyn == 12) + /* "rules.y":77 */ /* lalr1.java:489 */ + {yyval=((AST)(yystack.valueAt (2-(2))));}; + break; + + + case 13: + if (yyn == 13) + /* "rules.y":80 */ /* lalr1.java:489 */ + {yyval=((AST)(yystack.valueAt (3-(1))));}; + break; + + + case 14: + if (yyn == 14) + /* "rules.y":81 */ /* lalr1.java:489 */ + {yyval=AST.emptyBlock();}; + break; + + + case 15: + if (yyn == 15) + /* "rules.y":84 */ /* lalr1.java:489 */ + {yyval=AST.fromToken(((Token)(yystack.valueAt (1-(1)))));}; + break; + + + case 16: + if (yyn == 16) + /* "rules.y":85 */ /* lalr1.java:489 */ {yyval=AST.fromToken(((Token)(yystack.valueAt (1-(1)))));}; break; -/* "../../text/../../../../../../../target/Parser.java":378 */ /* lalr1.java:489 */ +/* "../../../../../../java/org/enso/syntax/text/xx/Parser.java":426 */ /* lalr1.java:489 */ default: break; } @@ -691,8 +739,9 @@ public class Parser { return new byte[] { - 5, -4, 9, 2, -4, -4, 5, -4, -4, -2, - -4, 5, -4, -4 + 9, -4, -4, -2, 9, 11, 9, -4, -4, -4, + 15, -4, 0, -4, -4, -4, -4, -4, 9, -4, + -4 }; } @@ -704,8 +753,9 @@ public class Parser { return new byte[] { - 3, 10, 0, 2, 4, 1, 0, 6, 5, 0, - 7, 0, 9, 8 + 3, 15, 16, 0, 0, 0, 2, 4, 7, 6, + 9, 10, 0, 12, 1, 5, 8, 11, 0, 14, + 13 }; } @@ -715,7 +765,7 @@ public class Parser { return new byte[] { - -4, -4, 10, -4, 0, -3 + -4, -4, 14, 4, -4, -4, 2, -3 }; } @@ -725,7 +775,7 @@ public class Parser { return new byte[] { - -1, 2, 9, 7, 10, 4 + -1, 5, 12, 7, 10, 8, 13, 9 }; } @@ -737,8 +787,9 @@ public class Parser { return new byte[] { - 8, 1, 11, 6, 12, 1, 8, 6, 1, 5, - 3, 13 + 11, 1, 2, 1, 2, 18, 3, 17, 4, 19, + 15, 14, 1, 2, 6, 3, 15, 4, 1, 2, + 20, 0, 16 }; } @@ -747,8 +798,9 @@ private static final byte yycheck_[] = yycheck_init(); { return new byte[] { - 3, 3, 4, 5, 6, 3, 9, 5, 3, 0, - 0, 11 + 3, 3, 4, 3, 4, 5, 6, 10, 8, 9, + 6, 0, 3, 4, 0, 6, 12, 8, 3, 4, + 18, -1, 7 }; } @@ -759,8 +811,9 @@ private static final byte yycheck_[] = yycheck_init(); { return new byte[] { - 0, 3, 9, 10, 13, 0, 5, 11, 13, 10, - 12, 4, 6, 12 + 0, 3, 4, 6, 8, 12, 13, 14, 16, 18, + 15, 18, 13, 17, 0, 14, 7, 18, 5, 9, + 17 }; } @@ -770,8 +823,8 @@ private static final byte yycheck_[] = yycheck_init(); { return new byte[] { - 0, 8, 9, 9, 10, 10, 10, 11, 12, 12, - 13 + 0, 11, 12, 12, 13, 13, 14, 14, 14, 14, + 15, 15, 16, 17, 17, 18, 18 }; } @@ -781,8 +834,8 @@ private static final byte yycheck_[] = yycheck_init(); { return new byte[] { - 0, 2, 1, 0, 1, 2, 2, 2, 3, 2, - 1 + 0, 2, 1, 0, 1, 2, 1, 1, 3, 2, + 1, 2, 2, 3, 2, 1, 1 }; } @@ -793,7 +846,8 @@ private static final byte yycheck_[] = yycheck_init(); { return new short[] { - 0, 256, 257, 258, 259, 260, 261, 262 + 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, + 265 }; } @@ -804,8 +858,9 @@ private static final byte yycheck_[] = yycheck_init(); { return new String[] { - "$end", "error", "$undefined", "VAR", "EOL", "BLOCK_BEGIN", "BLOCK_END", - "CONS", "$accept", "program", "expr", "block", "blockBody", "tok", null + "$end", "error", "$undefined", "VAR", "CONS", "EOL", "GROUP_BEGIN", + "GROUP_END", "BLOCK_BEGIN", "BLOCK_END", "BLOCK_INVALID", "$accept", + "program", "expr", "exprItem", "expr_group", "block", "blockBody", "tok", null }; } @@ -815,8 +870,8 @@ private static final byte yycheck_[] = yycheck_init(); { return new byte[] { - 0, 49, 49, 50, 53, 54, 55, 58, 61, 62, - 65 + 0, 57, 57, 58, 61, 62, 65, 66, 67, 68, + 71, 72, 77, 80, 81, 84, 85 }; } @@ -872,7 +927,7 @@ private static final byte yycheck_[] = yycheck_init(); 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, - 5, 6, 7 + 5, 6, 7, 8, 9, 10 }; } @@ -884,15 +939,15 @@ private static final byte yycheck_[] = yycheck_init(); return yyundef_token_; } - private static final int yylast_ = 11; - private static final int yynnts_ = 6; + private static final int yylast_ = 22; + private static final int yynnts_ = 8; private static final int yyempty_ = -2; - private static final int yyfinal_ = 5; + private static final int yyfinal_ = 14; private static final int yyterror_ = 1; private static final int yyerrcode_ = 256; - private static final int yyntokens_ = 8; + private static final int yyntokens_ = 11; - private static final int yyuser_token_number_max_ = 262; + private static final int yyuser_token_number_max_ = 265; private static final int yyundef_token_ = 2; /* User implementation code. */ @@ -901,11 +956,11 @@ private static final byte yycheck_[] = yycheck_init(); public AST result; -/* "../../text/../../../../../../../target/Parser.java":905 */ /* lalr1.java:1066 */ +/* "../../../../../../java/org/enso/syntax/text/xx/Parser.java":960 */ /* lalr1.java:1066 */ } -/* "rules.y":88 */ /* lalr1.java:1070 */ +/* "rules.y":108 */ /* lalr1.java:1070 */ // class CalcLexer implements Calc.Lexer { diff --git a/syntax/src/main/jflex/test.flex b/syntax/src/main/jflex/test.flex index a7031eeae86..d63529d3106 100644 --- a/syntax/src/main/jflex/test.flex +++ b/syntax/src/main/jflex/test.flex @@ -9,7 +9,7 @@ import static org.enso.syntax.text.xx.Parser.Lexer.*; %{ - private int indent = 0; + int currentBlock = 0; /////////////////////// // Indent Management // @@ -25,7 +25,7 @@ import static org.enso.syntax.text.xx.Parser.Lexer.*; return indentStack.pop(); } - public final Integer indentx() { + public final Integer indent() { return indentStack.peek(); } @@ -108,6 +108,7 @@ import static org.enso.syntax.text.xx.Parser.Lexer.*; ////////////////// int var() {value = token(new Var (yytext())); return VAR;} +int cons() {value = token(new Cons (yytext())); return CONS;} // Utils void whitespace() {lastOffset += yylength();} @@ -133,10 +134,13 @@ void whitespace() {lastOffset += yylength();} // Token disabled() {pushState(CHECK_OP_SFX); return disabled_();} // Layout +int blockBegin(int i) {pushIndent(i); value = token(BlockBegin$.MODULE$); return BLOCK_BEGIN;} +int blockEnd() {popIndent(); value = token(BlockEnd$.MODULE$); return BLOCK_END;} +int blockInvalid() {value = token(BlockInvalid$.MODULE$); return BLOCK_INVALID;} + int newline() {value = token(EOL$.MODULE$); return EOL;} -int blockBegin() {return BLOCK_BEGIN;} -int blockEnd() {return BLOCK_END;} -// Token groupBegin() {return token(GroupBegin$.MODULE$);} +int groupBegin() {value = token(GroupBegin$.MODULE$); return GROUP_BEGIN;} +int groupEnd() {value = token(GroupEnd$.MODULE$); return GROUP_END;} // Token groupEnd() {return token(GroupEnd$.MODULE$);} // Token listBegin() {return token(ListBegin$.MODULE$);} // Token listEnd() {return token(ListEnd$.MODULE$);} @@ -223,7 +227,7 @@ int blockEnd() {return BLOCK_END;} alpha_upper = [A-Z] alpha_lower = [a-z] alpha = {alpha_lower} | {alpha_upper} -alphanum = {alpha} | digit +alphanum = {alpha} | {digit} whitespace = [\ ] newline = \r|\n|\r\n @@ -263,6 +267,7 @@ decimal = {digit}+ %xstate COMMENT %xstate COMMENT_LINE %xstate NEWLINE +%xstate BLOCK_ENDING %state TEXT_INTERPOLATE @@ -454,23 +459,47 @@ decimal = {digit}+ /////////////////////// { - {whitespace}+ { + {whitespace}+{newline} { whitespace(); - popState(); - Integer ind = yylength(); - if (ind > indentx()) { - return blockBegin(); - } else { - // TODO - } - - } - [^] { - indent = 0; - popState(); - rewind(); return newline(); } + {whitespace}+ { + whitespace(); + popState(); + currentBlock = yylength(); + if (currentBlock > indent()) { + return blockBegin(currentBlock); + } else if (currentBlock < indent()) { + pushState(BLOCK_ENDING); + } + } + [^] { + rewind(); + popState(); + currentBlock = 0; + if (indent() > 0) { + pushState(BLOCK_ENDING); + } else { + return newline(); + } + } +} + + { + + [^] { + rewind(); + if(currentBlock == indent()) { + popState(); + } else if(currentBlock < indent()) { + return blockEnd(); + } else { + popState(); + return blockInvalid(); + } + + } + } @@ -482,8 +511,7 @@ decimal = {digit}+ // // Identifiers {var} {return var();} -// {var} {return var();} -// {cons} {return cons();} +{cons} {return cons();} // {wildcard} {return wildcard();} // // Operators @@ -500,9 +528,9 @@ decimal = {digit}+ // {modifier} {return modifier();} // (\#\=) {return disabled();} -// // Layout -// (\() {return groupBegin();} -// (\)) {return groupEnd();} +// Layout +(\() {return groupBegin();} +(\)) {return groupEnd();} // (\[) {return listBegin();} // (\]) {return listEnd();} // (\{) {return recordBegin();} @@ -543,7 +571,7 @@ decimal = {digit}+ // Layout {whitespace}+ {whitespace();} -{newline} {pushState(NEWLINE);} +{newline} {pushState(NEWLINE);return newline();} // // Unknown // [^] { diff --git a/syntax/src/main/scala/org/enso/syntax/Main.scala b/syntax/src/main/scala/org/enso/syntax/Main.scala index 9bc8527de37..ac352069408 100644 --- a/syntax/src/main/scala/org/enso/syntax/Main.scala +++ b/syntax/src/main/scala/org/enso/syntax/Main.scala @@ -1,163 +1,55 @@ - package org.enso.syntax -// import org.enso.syntax.text.parser.{Parser} -import java.io.{Reader, StringReader} - -import org.enso.syntax.text.lexer.{Scanner, EOF, Token} -import org.enso.syntax.text.xx.Parser -import org.enso.syntax.text.xx.Parser.Lexer._ -// import org.enso.syntax.text.{parser => AST} -import org.enso.syntax.text.parser.AST - -class SS(scanner:Scanner) extends Parser.Lexer { - private var _done = false - - def getLVal():Token = { - scanner.value - } - - def yyerror(s:String) { - println("!!! ERROR !!!") - println(s) - } - - def yylex():Int = { - scanner.lex - } - - def lex(): Token = { - val tok = yylex - if (tok == -1) { - _done = true; - return Token(EOF,0,0) - } - return getLVal - } - - def lexAll(): Vector[Token] = { - var builder = Vector.newBuilder[Token] - do { - builder += lex - } while (!_done) - builder.result - } - - def lexAll2(): (Vector[Int],Vector[Token]) = { - var builder_t = Vector.newBuilder[Int] - var builder = Vector.newBuilder[Token] - do { - val tok = yylex - var tval = getLVal - if (tok == -1) { - tval = Token(EOF,0,0) - _done = true - } - builder_t += tok - builder += tval - } while (!_done) - (builder_t.result, builder.result) - } - -} - - -class PP(reader:Reader) { - val lexer = new SS(new Scanner(reader)) - - - ////////////////////// - // Token Management // - ////////////////////// - - val (itokens, tokens) = lexer.lexAll2() - var tokenIx = 0 - var current : Token = tokens(tokenIx) - var icurrent : Int = itokens(tokenIx) - - def step(): Unit = { - if (tokenIx == tokens.size - 1) { - return - } - tokenIx += 1 - current = tokens(tokenIx) - icurrent = itokens(tokenIx) - } - - - - - - def parse():Option[AST] = { - manyWith(AST.app,()=>tok) - } - - - def tok():Option[AST] = { - token(VAR).map(AST.fromToken) - } - - // def block - - - - - def or[T](l:()=>Option[T],r:()=>Option[T]) { - l() match { - case Some(a) => Some(a) - case None => r() - } - } - - def token(t:Int):Option[Token] = { - if(icurrent==t) { - val result = Some(current) - step - result - } else { - None - } - } - - - - def manyWith(concat:(AST,AST)=>AST,f:()=>Option[AST]): Option[AST] = { - f() match { - case None => None - case Some(ast) => { - Some(manyWith_(concat,f,ast)) - } - } - } - - def manyWith_(concat:(AST,AST)=>AST,f:()=>Option[AST],ast:AST): AST = { - f() match { - case None => ast - case Some(ast2) => { - manyWith_(concat,f,concat(ast,ast2)) - } - } - } -} - +import java.io.StringReader +import org.enso.syntax.text.lexer.Lexer +import org.enso.syntax.text.parser.Parser +import org.enso.syntax.text.parser.BParser +import scala.language.implicitConversions object Main extends App { - val str = "a b" - val reader = new StringReader(str) - val reader2 = new StringReader(str) - val scanner = new Scanner(reader) - val scanner2 = new Scanner(reader2) - val ss = new SS(scanner) - val ss2 = new SS(scanner2) - val parser = new Parser(ss) - val pp = new PP(new StringReader(str)) - pprint.pprintln(ss2.lexAll) - pprint.pprintln(pp.parse) - pprint.pprintln(parser.parse) - // val parser = new Parser(reader) - // val ast = parser.parse - pprint.pprintln(parser.result,width=3,height=1000) -} \ No newline at end of file + var indent = 0 + + def pprint(s: String) { + print(" " * indent) + val (l, r2) = s.span((x) => (x != '(' && x != ')')) + print(l) + if (r2 == "") { + println + return + } + + val (m, r) = r2.splitAt(1) + + if (m == "(") { + indent += 1 + println(m) + pprint(r) + } else if (m == ")") { + indent -= 1 + println(m) + pprint(r) + } + + } + +// val str = "a (b" + val str = + """|a + | + | a + | c""".stripMargin + println(str) + val reader = new StringReader(str) + val ss = new Lexer(reader) + pprint(ss.lexAll.toString()) + + val bparser = new BParser(new StringReader(str)) + val parser = new Parser(new StringReader(str)) + + pprint(bparser.parse.toString()) + pprint(parser.parse.toString()) + pprint("!") +} diff --git a/syntax/src/main/scala/org/enso/syntax/text/lexer/Lexer.scala b/syntax/src/main/scala/org/enso/syntax/text/lexer/Lexer.scala index 1b52ebc2556..b104bed8355 100644 --- a/syntax/src/main/scala/org/enso/syntax/text/lexer/Lexer.scala +++ b/syntax/src/main/scala/org/enso/syntax/text/lexer/Lexer.scala @@ -1,8 +1,56 @@ package org.enso.syntax.text.lexer -import java.io.{StringReader, Reader} +import java.io.StringReader +import java.io.Reader import scala.collection.immutable.Vector +import org.enso.syntax.text.xx.Parser +class Lexer(reader: Reader) extends Scanner(reader) with Parser.Lexer { + private var _done = false + + def getLVal(): Token = { + value + } + + def yyerror(s: String) { + println("!!! ERROR !!!") + println(s) + } + + def lexTok(): Token = { + val tok = yylex + if (tok == -1) { + _done = true; + return Token(EOF, 0, 0) + } + return getLVal + } + + def lexAll(): Vector[Token] = { + var builder = Vector.newBuilder[Token] + do { + builder += lexTok + } while (!_done) + builder.result + } + + def lexAll2(): (Vector[Int], Vector[Token]) = { + var builder_t = Vector.newBuilder[Int] + var builder = Vector.newBuilder[Token] + do { + val tok = yylex + var tval = getLVal + if (tok == -1) { + tval = Token(EOF, 0, 0) + _done = true + } + builder_t += tok + builder += tval + } while (!_done) + (builder_t.result, builder.result) + } + +} // class Lexer (reader:Reader) { // val scanner = new Scanner(reader) // private var _done = false @@ -39,4 +87,4 @@ import scala.collection.immutable.Vector // def done(): Boolean = { // return _done; // } -// } \ No newline at end of file +// } diff --git a/syntax/src/main/scala/org/enso/syntax/text/lexer/Token.scala b/syntax/src/main/scala/org/enso/syntax/text/lexer/Token.scala index dbb5821ab3f..7b91342c448 100644 --- a/syntax/src/main/scala/org/enso/syntax/text/lexer/Token.scala +++ b/syntax/src/main/scala/org/enso/syntax/text/lexer/Token.scala @@ -4,139 +4,140 @@ package org.enso.syntax.text.lexer // Token // /////////// -case class Token (symbol:Symbol, offset:Int, span:Int) - +case class Token(symbol: Symbol, offset: Int, span: Int) //////////// // Symbol // //////////// -abstract class Symbol +abstract class Symbol // Identifiers -case class Var (name:String) extends Symbol -case class Cons (name:String) extends Symbol -case object Wildcard extends Symbol +case class Var(name: String) extends Symbol +case class Cons(name: String) extends Symbol +case object Wildcard extends Symbol // Operators -case class Operator (name:String) extends Symbol -case class Modifier (name:String) extends Symbol -case object DisabledAssignment extends Symbol +case class Operator(name: String) extends Symbol +case class Modifier(name: String) extends Symbol +case object DisabledAssignment extends Symbol // Layout -case object EOL extends Symbol -case object BOF extends Symbol -case object EOF extends Symbol -case object GroupBegin extends Symbol -case object GroupEnd extends Symbol -case object ListBegin extends Symbol -case object ListEnd extends Symbol -case object RecordBegin extends Symbol -case object RecordEnd extends Symbol +case object EOL extends Symbol +case object BOF extends Symbol +case object EOF extends Symbol +case object BlockBegin extends Symbol +case object BlockEnd extends Symbol +case object BlockInvalid extends Symbol + +case object GroupBegin extends Symbol +case object GroupEnd extends Symbol +case object ListBegin extends Symbol +case object ListEnd extends Symbol +case object RecordBegin extends Symbol +case object RecordEnd extends Symbol // Literals -case object TextBegin extends Symbol -case object TextEnd extends Symbol -case object TextRawBegin extends Symbol -case object TextRawEnd extends Symbol -case class Text (text:String) extends Symbol -case class TextEscape (esc:TextEscapeType) extends Symbol -case object TextInterpolateBegin extends Symbol -case object TextInterpolateEnd extends Symbol -case class Number (base:Int - ,intPart:List[Int] - ,fracPart:List[Int]) extends Symbol +case object TextBegin extends Symbol +case object TextEnd extends Symbol +case object TextRawBegin extends Symbol +case object TextRawEnd extends Symbol +case class Text(text: String) extends Symbol +case class TextEscape(esc: TextEscapeType) extends Symbol +case object TextInterpolateBegin extends Symbol +case object TextInterpolateEnd extends Symbol +case class Number(base: Int, intPart: List[Int], fracPart: List[Int]) + extends Symbol // Invalid -case class Invalid (reason:InvalidReason) extends Symbol -case class Unmatched (char:String) extends Symbol +case class Invalid(reason: InvalidReason) extends Symbol +case class Unmatched(char: String) extends Symbol // Comments -case object Comment extends Symbol -case class CommentBody (text:String) extends Symbol - - +case object Comment extends Symbol +case class CommentBody(text: String) extends Symbol ////////////////// // Text Escapes // ////////////////// abstract class TextEscapeType -case class CharEscape (code:Int) extends TextEscapeType -case class CtrlEscape (code:Int) extends TextEscapeType -case class IntEscape (code:Int) extends TextEscapeType -case class Uni16Escape (code:Int) extends TextEscapeType -case class Uni32Escape (code:Int) extends TextEscapeType -case class Uni21Escape (code:Int) extends TextEscapeType -case object QuoteEscape extends TextEscapeType -case object RawQuoteEscape extends TextEscapeType -case object SlashEscape extends TextEscapeType -case class InvalidCharEscape (char:Char) extends TextEscapeType -case class InvalidUni32Escape (str:String) extends TextEscapeType -case class InvalidUni21Escape (str:String) extends TextEscapeType - - +case class CharEscape(code: Int) extends TextEscapeType +case class CtrlEscape(code: Int) extends TextEscapeType +case class IntEscape(code: Int) extends TextEscapeType +case class Uni16Escape(code: Int) extends TextEscapeType +case class Uni32Escape(code: Int) extends TextEscapeType +case class Uni21Escape(code: Int) extends TextEscapeType +case object QuoteEscape extends TextEscapeType +case object RawQuoteEscape extends TextEscapeType +case object SlashEscape extends TextEscapeType +case class InvalidCharEscape(char: Char) extends TextEscapeType +case class InvalidUni32Escape(str: String) extends TextEscapeType +case class InvalidUni21Escape(str: String) extends TextEscapeType ///////////// // Invalid // ///////////// abstract class InvalidReason -case class UnexpectedSuffix (text:String) extends InvalidReason - - +case class UnexpectedSuffix(text: String) extends InvalidReason //////////////// // Companions // //////////////// object Number { - def charToDigit (char:Char): Int = { + + def charToDigit(char: Char): Int = { val i = char.toInt - if (i >= 48 && i <= 57) { return i - 48 } // 0 to 9 - if (i >= 65 && i <= 90) { return i - 55 } // A to Z + if (i >= 48 && i <= 57) { return i - 48 } // 0 to 9 + if (i >= 65 && i <= 90) { return i - 55 } // A to Z if (i >= 97 && i <= 122) { return i - 87 } // a to z return -1 } - def stringToDigits (str:String): List[Int] = { + def stringToDigits(str: String): List[Int] = { str.toList.map(charToDigit) } - def fromString(base:String, intPart:String, fracPart:String): Number = { + def fromString(base: String, intPart: String, fracPart: String): Number = { val base2 = if (base == "") 10 else base.toInt - return Number(base2,stringToDigits(intPart), stringToDigits(fracPart)) + return Number(base2, stringToDigits(intPart), stringToDigits(fracPart)) } } object IntEscape { - def fromString(code:String): IntEscape = { + + def fromString(code: String): IntEscape = { IntEscape(code.toInt) } } object CharEscape { - def fromChar(c:Char): CharEscape = { + + def fromChar(c: Char): CharEscape = { CharEscape(c.toInt) } } object Uni32Escape { - def fromString(str:String): TextEscapeType = { + + def fromString(str: String): TextEscapeType = { try { - return Uni32Escape(Integer.parseInt(str,16)) + return Uni32Escape(Integer.parseInt(str, 16)) } catch { - case e:Exception => return InvalidUni32Escape(str) + case e: Exception => return InvalidUni32Escape(str) } } } object Uni21Escape { - def fromString(str:String): TextEscapeType = { + + def fromString(str: String): TextEscapeType = { try { - return Uni21Escape(Integer.parseInt(str,16)) + return Uni21Escape(Integer.parseInt(str, 16)) } catch { - case e:Exception => return InvalidUni21Escape(str) + case e: Exception => return InvalidUni21Escape(str) } } } diff --git a/syntax/src/main/scala/org/enso/syntax/text/parser/AST.scala b/syntax/src/main/scala/org/enso/syntax/text/parser/AST.scala index 2b9998aa55e..c013a19ac4b 100644 --- a/syntax/src/main/scala/org/enso/syntax/text/parser/AST.scala +++ b/syntax/src/main/scala/org/enso/syntax/text/parser/AST.scala @@ -3,44 +3,56 @@ package org.enso.syntax.text.parser import org.enso.syntax.text.lexer.Token import org.enso.syntax.text.{lexer => token} - ///////// // AST // ///////// -case class AST (offset:Int, span:Int, symbol:Symbol) - +case class AST(offset: Int, span: Int, symbol: Symbol) +// class Sym[T](offset:Int, span:Int, element:T) //////////// // Symbol // //////////// -abstract class Symbol -case object NONE extends Symbol +trait Symbol + +case object NONE extends Symbol // Identifiers -case class Var (name:String) extends Symbol -case class Operator (name:String) extends Symbol -case class App (func:AST, arg:AST) extends Symbol -case class Block (body:Vector[AST]) extends Symbol - - - +case class Var(name: String) extends Symbol +case class Cons(name: String) extends Symbol +case class Operator(name: String) extends Symbol +case class App(func: AST, arg: AST) extends Symbol +case class Block(body: Vector[AST]) extends Symbol +case class Grouped(body: AST) extends Symbol // - object AST { - def fromToken(tok:Token):AST = { + + def fromToken(tok: Token): AST = { tok.symbol match { - case token.Var(name) => AST(0,0,Var(name)) + case token.Var(name) => AST(tok.offset, tok.span, Var(name)) + case token.Cons(name) => AST(tok.offset, tok.span, Cons(name)) } } - def app(fn:AST, arg:AST):AST = { - AST(fn.offset,fn.span + arg.span,App(fn.copy(offset=0),arg)) + + def app(fn: AST, arg: AST): AST = { + AST(fn.offset, fn.span + arg.span, App(fn.copy(offset = 0), arg)) } - def emptyBlock():AST = { - AST(0,0,Block(Vector())) + + def emptyBlock(): AST = { + AST(0, 0, Block(Vector())) } -} \ No newline at end of file + + def block(lines: Vector[AST]): AST = { + AST(0, 0, Block(lines)) + } + + def grouped(begin: Token, body: AST, end: Token): AST = { + val offset = begin.offset + val span = begin.span + body.offset + body.span + end.offset + end.span + AST(offset, span, Grouped(body)) + } +} diff --git a/syntax/src/main/scala/org/enso/syntax/text/parser/Parser.scala b/syntax/src/main/scala/org/enso/syntax/text/parser/Parser.scala index 5a5b5327de1..7b9c3b95f81 100644 --- a/syntax/src/main/scala/org/enso/syntax/text/parser/Parser.scala +++ b/syntax/src/main/scala/org/enso/syntax/text/parser/Parser.scala @@ -1,188 +1,170 @@ package org.enso.syntax.text.parser -// import java.io.{Reader} -// import org.enso.syntax.text.lexer.{Lexer, Token} -// import org.enso.syntax.text.{lexer => token} -// import scala.collection.immutable.{Vector} -// import scala.collection.mutable.{Builder} +import org.enso.syntax.text.{xx => bison} +import java.io.Reader -// class Parser(reader:Reader) { -// val lexer = new Lexer(reader) +import org.enso.syntax.text.xx.Parser.Lexer._ +import org.enso.syntax.text.lexer.Token +import org.enso.syntax.text.lexer.Lexer +import scala.collection.immutable.VectorBuilder +import scala.collection.mutable -// ////////////////////// -// // Token Management // -// ////////////////////// +////////////////// +// Parser Rules // +////////////////// -// val tokens = lexer.lexAll() -// var tokenIx = 0 -// var current : Token = tokens(tokenIx) +case class Rule[T](unwrap: () => Option[T]) { + final def run(): Option[T] = unwrap() + final def apply(): Option[T] = run -// def step(): Token = { -// if (tokenIx == tokens.size - 1) { -// return Token(token.EOF,0,0) -// } -// tokenIx += 1 -// current = tokens(tokenIx) -// if (current.symbol == token.EOL) { -// line += 1 -// column = 0 -// } else { -// column += current.offset + current.span -// } -// current -// } + final def mapOption[S](f: Option[T] => Option[S]): Rule[S] = + Rule(() => f(run)) -// def lookup(i:Int=1): Token = { -// val ix = tokenIx + i -// if (ix >= 0 && ix < tokens.size) { -// tokens(ix) -// } else { -// Token(token.EOF,0,0) -// } -// } + final def map[S](f: T => S): Rule[S] = + mapOption(_.map(f)) -// def next(): Token = { -// lookup() -// } - -// def previous(): Token = { -// lookup(-1) -// } + final def flatMap[S](f: T => Rule[S]): Rule[S] = + mapOption(_.flatMap(f(_).run)) + final def |(that: Rule[T]): Rule[T] = this or that + final def or(that: Rule[T]): Rule[T] = mapOption(_.orElse(that.run)) + final def default(v: T): Rule[T] = mapOption(_.orElse(Some(v))) -// // var indents : Stack[Int] = new Stack() -// // indents.push(0) +// final def foreach(f: T => Unit): Unit = run.foreach(f) -// // def indent(): Int = { -// // indents.head -// // } + @scala.annotation.tailrec + final def manyWith(concat: (T, T) => T, t: T): T = { + this.run match { + case None => t + case Some(t2) => { + manyWith(concat, concat(t, t2)) + } + } + } -// var column : Int = 0 -// var line : Int = 0 -// var indent : Int = 0 + final def many1_(): Rule[Unit] = this.flatMap(_ => many_) + final def many_(): Rule[Unit] = this.flatMap(_ => many_).default(Unit) -// def withIndent[T](newIndent:Int,f:()=>T):T = { -// val oldIndent = indent -// indent = newIndent -// val result = f() -// indent = oldIndent -// result -// } + final def fold(concat: (T, T) => T): Rule[T] = + mapOption(_.map(manyWith(concat, _))) +} +/////////////// +// GenParser // +/////////////// +class GenParser(reader: Reader) { + private val lexer = new Lexer(reader) + private val (itokens, tokens) = lexer.lexAll2() + private var tokenIx: Int = 0 + var current: Token = tokens(tokenIx) + private var icurrent: Int = itokens(tokenIx) -// def parse(): AST = { -// expr() match { -// case Some(ast) => ast -// case None => AST(0,0,NONE) -// } -// } + final def step(): Unit = { + if (tokenIx == tokens.size - 1) return + tokenIx += 1 + current = tokens(tokenIx) + icurrent = itokens(tokenIx) + } -// def expr(): Option[AST] = { -// manyWith(app, patternToken) -// // .flatMap(pat => { -// // if(current.symbol == token.EOL && next.offset > indent) { -// // step -// // withIndent(next.offset, () => Some(app(pat,block))) -// // } else { -// // Some(pat) -// // } -// // }) -// } + final def lookahead(shift: Int = 1): Token = tokens(tokenIx + shift) -// def block(): AST = { -// AST(0,0,Block(many(() => blockLine))) -// } + final def token(t: Int, name: String = ""): Rule[Token] = + Rule(() => { + if (icurrent == t) { + val result = Some(current) + step + println("STEP -> ", current, name) + result + } else { + None + } + }) -// def blockLine(): Option[AST] = { -// if(next.offset == indent) { -// val out = expr -// println("!!!!!--") -// println(out) -// out -// } else { -// None -// } -// } +} +//////////// +// Parser // +//////////// -// // def parseExprX(): AST = { -// // current.symbol match { -// // case token.Var(name) => { -// // AST(Var(name),current.offset,current.span) -// // } -// // case x => { -// // println("!!!") -// // println(x) -// // AST(NONE,0,0) -// // } -// // } -// // } +class Parser(reader: Reader) extends GenParser(reader) { + final def parse(): Option[AST] = expr.run + final def expr(): Rule[AST] = tok.fold(AST.app) -// def patternToken(tok:Token): Option[AST] = { -// tok.symbol match { -// case token.Var (name) => Some(AST(tok.offset,tok.span, Var (name))) -// case token.Operator (name) => Some(AST(current.offset,current.span, Operator (name))) -// case token.EOL => { -// if (next.offset > indent) { -// step -// withIndent(next.offset, () => Some(block)) -// } else { -// None -// } -// } -// case _ => None -// } -// } + final def tok(): Rule[AST] = vvar | cons | group | block + final def vvar(): Rule[AST] = token(VAR).map(AST.fromToken) + final def cons(): Rule[AST] = token(CONS).map(AST.fromToken) -// def many(f:()=>Option[AST]): Vector[AST] = { -// f() match { -// case None => Vector() -// case Some(ast) => { -// step -// val builder = Vector.newBuilder[AST] -// builder += ast -// many_(f,builder) -// builder.result -// } -// } -// } + final def group(): Rule[AST] = + token(GROUP_BEGIN).flatMap(beg => { + expr.flatMap(exp => { + token(GROUP_END) + .map(AST.grouped(beg, exp, _)) + .default(exp) + }) + }) -// def many_(f:()=>Option[AST], builder:Builder[AST,Vector[AST]]): Unit = { -// f() match { -// case None => return -// case Some(ast) => { -// builder += ast -// many_(f,builder) -// } -// } -// } + final def block(): Rule[AST] = { + println(">> 1") + val out = token(EOL, "n1").many1_ + .flatMap(_ => token(BLOCK_BEGIN)) + .map(beg => { + val lines = blockLines + AST.block(lines) + }) + println("<< 1") + out + } -// def manyWith(concat:(AST,AST)=>AST,f:(Token)=>Option[AST]): Option[AST] = { -// f(current) match { -// case None => None -// case Some(ast) => { -// step -// Some(manyWith_(concat,f,ast)) -// } -// } -// } + final def blockLines(): Vector[AST] = { + println("--- 1") + println(current) + expr.run match { + case None => { + println("--=--") + Vector[AST]() + } + case Some(firstLine) => { + println("--- 2") + println(current) + println(firstLine) + val lines = Vector.newBuilder[AST] + lines += firstLine + blockLines_(lines) + } + } + } -// def manyWith_(concat:(AST,AST)=>AST,f:(Token)=>Option[AST],ast:AST): AST = { -// f(current) match { -// case None => ast -// case Some(ast2) => { -// step -// manyWith_(concat,f,concat(ast,ast2)) -// } -// } -// } - + final def blockLines_( + lines: mutable.Builder[AST, Vector[AST]]): Vector[AST] = { + var body = true + println("--- 3") + println(current) + while (body) { + println(">> 2") + token(EOL, "n2").many1_.flatMap(_ => expr).run match { + case None => body = false + case Some(e) => lines += e + } + println("<< 2") + } + lines.result + } -// def app(func:AST, arg:AST): AST = { -// AST(func.offset,func.span + arg.span,App(func.copy(offset=0),arg)) -// } -// } +} + +class BParser(reader: Reader) { + val lexer = new Lexer(reader) + val parser = new bison.Parser(lexer) + + def parse(): Option[AST] = { + if (parser.parse) { + Some(parser.result) + } else { + None + } + } +} diff --git a/syntax/src/main/scala/org/enso/syntax/text/parser/rules.y b/syntax/src/main/scala/org/enso/syntax/text/parser/rules.y index 93bb8bc6b16..a31d2bf7dbe 100644 --- a/syntax/src/main/scala/org/enso/syntax/text/parser/rules.y +++ b/syntax/src/main/scala/org/enso/syntax/text/parser/rules.y @@ -28,19 +28,27 @@ import org.enso.syntax.text.lexer.Token; /* Bison Declarations */ %token VAR +%token CONS %token EOL +%token GROUP_BEGIN +%token GROUP_END + %token BLOCK_BEGIN %token BLOCK_END +%token BLOCK_INVALID -%token CONS %type expr +%type exprItem +%type expr_group %type block %type blockBody %type tok %start program +%right GROUP_BEGIN GROUP_END + ///////////// // Grammar // ///////////// @@ -50,9 +58,20 @@ program: | /* empty */ expr: - tok {$$=$1;} -| expr tok {$$=AST.app($1,$2);} -| expr block {$$=AST.app($1,$2);} + exprItem {$$=$1;} +| expr exprItem {$$=AST.app($1,$2);} + +exprItem: + tok {$$=$1;}; +| block {$$=$1;}; +| GROUP_BEGIN expr_group GROUP_END {$$=AST.grouped($1,$2,$3);}; +| GROUP_BEGIN expr_group {$$=$2;}; + +expr_group: + tok {$$=$1;} +| expr_group tok {$$=AST.app($1,$2);} + + block: BLOCK_BEGIN blockBody {$$=$2;} @@ -62,7 +81,8 @@ blockBody: | expr BLOCK_END {$$=AST.emptyBlock();} tok: - VAR {$$=AST.fromToken($1);} + VAR {$$=AST.fromToken($1);} +| CONS {$$=AST.fromToken($1);}