Updating lexer and parser rules

This commit is contained in:
Wojciech Danilo 2019-06-07 10:54:20 +02:00
parent a9b1cc3120
commit f111b7bba8
7 changed files with 888 additions and 493 deletions

View File

@ -34,4 +34,4 @@ lazy val basic = Project(
SbtJFlexPlugin.jflexSettings SbtJFlexPlugin.jflexSettings
mainClass in (Compile, run) := Some("org.enso.syntax.text.Main") mainClass in (Compile, run) := Some("org.enso.main.Main")

View File

@ -1,5 +1,6 @@
package org.enso.syntax.text.lexer; package org.enso.syntax.text.lexer;
import java.util.Stack; import java.util.Stack;
import static org.enso.syntax.text.xx.Parser.Lexer.*;
/** /**
* Enso lexical scanner * Enso lexical scanner
@ -8,7 +9,25 @@ import java.util.Stack;
%{ %{
private int lineIndent = 0; private int indent = 0;
///////////////////////
// Indent Management //
///////////////////////
private Stack<Integer> indentStack = new Stack<Integer>();
public final void pushIndent(int i) {
indentStack.push(i);
}
public final Integer popIndent() {
return indentStack.pop();
}
public final Integer indentx() {
return indentStack.peek();
}
///////////////////////////// /////////////////////////////
@ -64,7 +83,7 @@ import java.util.Stack;
return zzAtEOF; return zzAtEOF;
} }
public Token lex() throws java.io.IOException { public int lex() throws java.io.IOException {
return yylex(); return yylex();
} }
@ -80,93 +99,102 @@ import java.util.Stack;
} }
//////////////////
// Constructors //
//////////////////
// Utils public Token value;
void whitespace() {lastOffset += yylength();}
Symbol invalid(InvalidReason reason) {
return new Invalid (reason);
}
// Identifiers
Token var_() {return token(new Var (yytext()));} //////////////////
Token cons_() {return token(new Cons (yytext()));} // Constructors //
Token wildcard_() {return token(Wildcard$.MODULE$);} //////////////////
Token var() {pushState(CHECK_IDENT_SFX); return var_();}
Token cons() {pushState(CHECK_IDENT_SFX); return cons_();} int var() {value = token(new Var (yytext())); return VAR;}
Token wildcard() {pushState(CHECK_IDENT_SFX); return wildcard_();}
Token errorSfx() {return token(invalid(new UnexpectedSuffix(yytext())));} // Utils
void whitespace() {lastOffset += yylength();}
// Symbol invalid(InvalidReason reason) {
// return new Invalid (reason);
// }
// // Identifiers
// Token var_() {return token(new Var (yytext()));}
// Token cons_() {return token(new Cons (yytext()));}
// Token wildcard_() {return token(Wildcard$.MODULE$);}
// Token var() {pushState(CHECK_IDENT_SFX); return var_();}
// Token cons() {pushState(CHECK_IDENT_SFX); return cons_();}
// Token wildcard() {pushState(CHECK_IDENT_SFX); return wildcard_();}
// Token errorSfx() {return token(invalid(new UnexpectedSuffix(yytext())));}
// Operators // // Operators
Token operator_() {return token(new Operator(yytext()));} // Token operator_() {return token(new Operator(yytext()));}
Token modifier_() {return token(new Modifier(yytext()));} // Token modifier_() {return token(new Modifier(yytext()));}
Token disabled_() {return token(DisabledAssignment$.MODULE$);} // Token disabled_() {return token(DisabledAssignment$.MODULE$);}
Token operator() {pushState(CHECK_OP_SFX); return operator_();} // Token operator() {pushState(CHECK_OP_SFX); return operator_();}
Token modifier() {pushState(CHECK_OP_SFX); return modifier_();} // Token modifier() {pushState(CHECK_OP_SFX); return modifier_();}
Token disabled() {pushState(CHECK_OP_SFX); return disabled_();} // Token disabled() {pushState(CHECK_OP_SFX); return disabled_();}
// Layout // Layout
Token newline() {return token(EOL$.MODULE$);} int newline() {value = token(EOL$.MODULE$); return EOL;}
Token groupBegin() {return token(GroupBegin$.MODULE$);} int blockBegin() {return BLOCK_BEGIN;}
Token groupEnd() {return token(GroupEnd$.MODULE$);} int blockEnd() {return BLOCK_END;}
Token listBegin() {return token(ListBegin$.MODULE$);} // Token groupBegin() {return token(GroupBegin$.MODULE$);}
Token listEnd() {return token(ListEnd$.MODULE$);} // Token groupEnd() {return token(GroupEnd$.MODULE$);}
Token recordBegin() {return token(RecordBegin$.MODULE$);} // Token listBegin() {return token(ListBegin$.MODULE$);}
Token recordEnd() {return token(RecordEnd$.MODULE$);} // Token listEnd() {return token(ListEnd$.MODULE$);}
Token unmatched() {return token(new Unmatched(yytext()));} // Token recordBegin() {return token(RecordBegin$.MODULE$);}
// Token recordEnd() {return token(RecordEnd$.MODULE$);}
// Token unmatched() {return token(new Unmatched(yytext()));}
// Numbers // // Numbers
Token number() { // Token number() {
Token num = token(Number.fromString(numberPart1,numberPart2,numberPart3)); // Token num = token(Number.fromString(numberPart1,numberPart2,numberPart3));
numberPart1 = ""; // numberPart1 = "";
numberPart2 = ""; // numberPart2 = "";
numberPart3 = ""; // numberPart3 = "";
return num; // return num;
} // }
// Text // // Text
Token textBegin() {return token(TextBegin$.MODULE$);} // Token textBegin() {return token(TextBegin$.MODULE$);}
Token textEnd() {return token(TextEnd$.MODULE$);} // Token textEnd() {return token(TextEnd$.MODULE$);}
Token textRawBegin() {return token(TextRawBegin$.MODULE$);} // Token textRawBegin() {return token(TextRawBegin$.MODULE$);}
Token textRawEnd() {return token(TextRawEnd$.MODULE$);} // Token textRawEnd() {return token(TextRawEnd$.MODULE$);}
Token text() {return token(new Text(yytext()));} // Token text() {return token(new Text(yytext()));}
Token textIntBegin() {return token(TextInterpolateBegin$.MODULE$);} // Token textIntBegin() {return token(TextInterpolateBegin$.MODULE$);}
Token textIntEnd() {return token(TextInterpolateEnd$.MODULE$);} // Token textIntEnd() {return token(TextInterpolateEnd$.MODULE$);}
// Text Escapes // // Text Escapes
Token slashEsc() {return token(new TextEscape(SlashEscape$.MODULE$));} // Token slashEsc() {return token(new TextEscape(SlashEscape$.MODULE$));}
Token quoteEsc() {return token(new TextEscape(QuoteEscape$.MODULE$));} // Token quoteEsc() {return token(new TextEscape(QuoteEscape$.MODULE$));}
Token rawQuoteEsc() {return token(new TextEscape(RawQuoteEscape$.MODULE$));} // Token rawQuoteEsc() {return token(new TextEscape(RawQuoteEscape$.MODULE$));}
Token charEsc(char c) {return token(new TextEscape(CharEscape.fromChar(c)));} // Token charEsc(char c) {return token(new TextEscape(CharEscape.fromChar(c)));}
Token ctrlEsc(int c) {return token(new TextEscape(new CtrlEscape(c)));} // Token ctrlEsc(int c) {return token(new TextEscape(new CtrlEscape(c)));}
Token intEsc() { // Token intEsc() {
return token(new TextEscape(IntEscape.fromString(yytext().substring(1)))); // return token(new TextEscape(IntEscape.fromString(yytext().substring(1))));
} // }
Token uni16Esc() { // Token uni16Esc() {
String scode = yytext().substring(2); // String scode = yytext().substring(2);
return token(new TextEscape(new Uni16Escape (Integer.parseInt(scode,16)))); // return token(new TextEscape(new Uni16Escape (Integer.parseInt(scode,16))));
} // }
Token uni32Esc() { // Token uni32Esc() {
return token(new TextEscape(Uni32Escape.fromString(yytext().substring(2)))); // return token(new TextEscape(Uni32Escape.fromString(yytext().substring(2))));
} // }
Token uni21Esc() { // Token uni21Esc() {
String scode = yytext(); // String scode = yytext();
scode = scode.substring(3,scode.length()-1); // scode = scode.substring(3,scode.length()-1);
return token(new TextEscape(Uni21Escape.fromString(scode))); // return token(new TextEscape(Uni21Escape.fromString(scode)));
} // }
Token invalidCharEsc(){ // Token invalidCharEsc(){
return token(new TextEscape(new InvalidCharEscape(yytext().charAt(1)))); // return token(new TextEscape(new InvalidCharEscape(yytext().charAt(1))));
} // }
// Comment // // Comment
Token comment() {return token(Comment$.MODULE$);} // Token comment() {return token(Comment$.MODULE$);}
Token commentBody() {return token(new CommentBody(yytext()));} // Token commentBody() {return token(new CommentBody(yytext()));}
%} %}
%init{ %init{
pushState(NEWLINE); // pushState(NEWLINE);
pushIndent(0);
%init} %init}
@ -176,12 +204,14 @@ import java.util.Stack;
///////////// /////////////
%class Scanner %class Scanner
%type Token %int
%public
// %type int
%line %line
%column %column
%char %char
%unicode %unicode
%apiprivate // %apiprivate
// %debug // %debug
@ -238,181 +268,184 @@ decimal = {digit}+
%% %%
///////////////////////
// Unexpected Suffix //
///////////////////////
<CHECK_IDENT_SFX> {
{ident_err_sfx} {return errorSfx();}
[^] {rewind(); popState();}
}
<CHECK_OP_SFX> {
{operator_err_sfx} {return errorSfx();}
[^] {rewind(); popState();}
}
////////// // ///////////////////////
// Text // // // Unexpected Suffix //
////////// // ///////////////////////
<TEXT_INTERPOLATE> { // <CHECK_IDENT_SFX> {
(\`) {popState(); return textIntEnd();} // {ident_err_sfx} {return errorSfx();}
} // [^] {rewind(); popState();}
// }
<TEXT> { // <CHECK_OP_SFX> {
(\')+ { // {operator_err_sfx} {return errorSfx();}
if (yylength() == quoteSize()) { // [^] {rewind(); popState();}
popState(); // }
popQuoteSize();
return textEnd();
} else {
return text();
}
}
// Prim Escapes
(\\\\) {return slashEsc();}
(\\\') {return quoteEsc();}
(\\\") {return rawQuoteEsc();}
(\\[0-9]+) {return intEsc();}
// Escape Characters (https://en.wikipedia.org/wiki/String_literal)
(\\a) {return charEsc('\u0007');} // alert // //////////
(\\b) {return charEsc('\u0008');} // backspace // // Text //
(\\f) {return charEsc('\u000C');} // form feed // //////////
(\\n) {return charEsc('\n') ;} // line feed
(\\r) {return charEsc('\r') ;} // carriage return // <TEXT_INTERPOLATE> {
(\\t) {return charEsc('\u0009');} // horizontal tab // (\`) {popState(); return textIntEnd();}
(\\v) {return charEsc('\u000B');} // vertical tab // }
(\\e) {return charEsc('\u001B');} // escape character
// <TEXT> {
// (\')+ {
// if (yylength() == quoteSize()) {
// popState();
// popQuoteSize();
// return textEnd();
// } else {
// return text();
// }
// }
// // Prim Escapes
// (\\\\) {return slashEsc();}
// (\\\') {return quoteEsc();}
// (\\\") {return rawQuoteEsc();}
// (\\[0-9]+) {return intEsc();}
// // Escape Characters (https://en.wikipedia.org/wiki/String_literal)
// (\\a) {return charEsc('\u0007');} // alert
// (\\b) {return charEsc('\u0008');} // backspace
// (\\f) {return charEsc('\u000C');} // form feed
// (\\n) {return charEsc('\n') ;} // line feed
// (\\r) {return charEsc('\r') ;} // carriage return
// (\\t) {return charEsc('\u0009');} // horizontal tab
// (\\v) {return charEsc('\u000B');} // vertical tab
// (\\e) {return charEsc('\u001B');} // escape character
// Unicode Escapes // // Unicode Escapes
(\\u{hex}{hex}{hex}{hex}) {return uni16Esc();} // (\\u{hex}{hex}{hex}{hex}) {return uni16Esc();}
(\\U{hex}{hex}{hex}{hex}{hex}{hex}{hex}{hex}) {return uni32Esc();} // (\\U{hex}{hex}{hex}{hex}{hex}{hex}{hex}{hex}) {return uni32Esc();}
(\\u\{{hex}*\}) {return uni21Esc();} // (\\u\{{hex}*\}) {return uni21Esc();}
// Control Characters (https://en.wikipedia.org/wiki/Control_character) // // Control Characters (https://en.wikipedia.org/wiki/Control_character)
(\\NUL) {return ctrlEsc(0x00);} // (\\NUL) {return ctrlEsc(0x00);}
(\\SOH) {return ctrlEsc(0x01);} // (\\SOH) {return ctrlEsc(0x01);}
(\\STX) {return ctrlEsc(0x02);} // (\\STX) {return ctrlEsc(0x02);}
(\\ETX) {return ctrlEsc(0x03);} // (\\ETX) {return ctrlEsc(0x03);}
(\\EOT) {return ctrlEsc(0x04);} // (\\EOT) {return ctrlEsc(0x04);}
(\\ENQ) {return ctrlEsc(0x05);} // (\\ENQ) {return ctrlEsc(0x05);}
(\\ACK) {return ctrlEsc(0x06);} // (\\ACK) {return ctrlEsc(0x06);}
(\\BEL) {return ctrlEsc(0x07);} // (\\BEL) {return ctrlEsc(0x07);}
(\\BS) {return ctrlEsc(0x08);} // (\\BS) {return ctrlEsc(0x08);}
(\\TAB) {return ctrlEsc(0x09);} // (\\TAB) {return ctrlEsc(0x09);}
(\\LF) {return ctrlEsc(0x0A);} // (\\LF) {return ctrlEsc(0x0A);}
(\\VT) {return ctrlEsc(0x0B);} // (\\VT) {return ctrlEsc(0x0B);}
(\\FF) {return ctrlEsc(0x0C);} // (\\FF) {return ctrlEsc(0x0C);}
(\\CR) {return ctrlEsc(0x0D);} // (\\CR) {return ctrlEsc(0x0D);}
(\\SO) {return ctrlEsc(0x0E);} // (\\SO) {return ctrlEsc(0x0E);}
(\\SI) {return ctrlEsc(0x0F);} // (\\SI) {return ctrlEsc(0x0F);}
(\\DLE) {return ctrlEsc(0x10);} // (\\DLE) {return ctrlEsc(0x10);}
(\\DC1) {return ctrlEsc(0x11);} // (\\DC1) {return ctrlEsc(0x11);}
(\\DC2) {return ctrlEsc(0x12);} // (\\DC2) {return ctrlEsc(0x12);}
(\\DC3) {return ctrlEsc(0x13);} // (\\DC3) {return ctrlEsc(0x13);}
(\\DC4) {return ctrlEsc(0x14);} // (\\DC4) {return ctrlEsc(0x14);}
(\\NAK) {return ctrlEsc(0x15);} // (\\NAK) {return ctrlEsc(0x15);}
(\\SYN) {return ctrlEsc(0x16);} // (\\SYN) {return ctrlEsc(0x16);}
(\\ETB) {return ctrlEsc(0x17);} // (\\ETB) {return ctrlEsc(0x17);}
(\\CAN) {return ctrlEsc(0x18);} // (\\CAN) {return ctrlEsc(0x18);}
(\\EM) {return ctrlEsc(0x19);} // (\\EM) {return ctrlEsc(0x19);}
(\\SUB) {return ctrlEsc(0x1A);} // (\\SUB) {return ctrlEsc(0x1A);}
(\\ESC) {return ctrlEsc(0x1B);} // (\\ESC) {return ctrlEsc(0x1B);}
(\\FS) {return ctrlEsc(0x1C);} // (\\FS) {return ctrlEsc(0x1C);}
(\\GS) {return ctrlEsc(0x1D);} // (\\GS) {return ctrlEsc(0x1D);}
(\\RS) {return ctrlEsc(0x1E);} // (\\RS) {return ctrlEsc(0x1E);}
(\\US) {return ctrlEsc(0x1F);} // (\\US) {return ctrlEsc(0x1F);}
(\\DEL) {return ctrlEsc(0x7F);} // (\\DEL) {return ctrlEsc(0x7F);}
// Invalid Escapes // // Invalid Escapes
(\\([a-z]|[A-Z])) {return invalidCharEsc();} // (\\([a-z]|[A-Z])) {return invalidCharEsc();}
{newline} {return newline();} // {newline} {return newline();}
[^\'\`\n\r\\]+ {return text();} // [^\'\`\n\r\\]+ {return text();}
(\`) { // (\`) {
pushState(TEXT_INTERPOLATE); // pushState(TEXT_INTERPOLATE);
return textIntBegin(); // return textIntBegin();
} // }
} // }
<TEXT_RAW> { // <TEXT_RAW> {
(\")+ { // (\")+ {
if (yylength() == quoteSize()) { // if (yylength() == quoteSize()) {
popState(); // popState();
popQuoteSize(); // popQuoteSize();
return textRawEnd(); // return textRawEnd();
} else { // } else {
return text(); // return text();
} // }
} // }
// Prim Escapes // // Prim Escapes
(\\\') {return quoteEsc();} // (\\\') {return quoteEsc();}
(\\\") {return rawQuoteEsc();} // (\\\") {return rawQuoteEsc();}
(\\) {return text();} // (\\) {return text();}
{newline} {return newline();} // {newline} {return newline();}
[^\"\n\r\\]+ {return text();} // [^\"\n\r\\]+ {return text();}
} // }
//////////////////////////////// // ////////////////////////////////
// Number (e.g. 16_ff0000.ff) // // // Number (e.g. 16_ff0000.ff) //
//////////////////////////////// // ////////////////////////////////
<NUMBER_PHASE2> { // <NUMBER_PHASE2> {
_[a-zA-Z0-9]+ { // _[a-zA-Z0-9]+ {
numberPart1 = numberPart2; // numberPart1 = numberPart2;
numberPart2 = yytext().substring(1); // numberPart2 = yytext().substring(1);
popState(); // popState();
pushState(NUMBER_PHASE3); // pushState(NUMBER_PHASE3);
} // }
[^] {rewind(); popState(); return number();} // [^] {rewind(); popState(); return number();}
<<EOF>> {return number();} // <<EOF>> {return number();}
} // }
<NUMBER_PHASE3> { // <NUMBER_PHASE3> {
.[a-zA-Z0-9]+ { // .[a-zA-Z0-9]+ {
numberPart3=yytext().substring(1); // numberPart3=yytext().substring(1);
popState(); // popState();
return number(); // return number();
} // }
[^] {rewind(); popState(); return number();} // [^] {rewind(); popState(); return number();}
<<EOF>> {return number();} // <<EOF>> {return number();}
} // }
////////////// // //////////////
// Comments // // // Comments //
////////////// // //////////////
<COMMENT> { // <COMMENT> {
[^\n\r]+ {return commentBody();} // [^\n\r]+ {return commentBody();}
{newline} {popState(); pushState(COMMENT_LINE); return newline();} // {newline} {popState(); pushState(COMMENT_LINE); return newline();}
} // }
<COMMENT_LINE> { // <COMMENT_LINE> {
{whitespace}+ { // {whitespace}+ {
popState(); // popState();
if(yylength() > lineIndent) { // if(yylength() > indent) {
pushState(COMMENT); // pushState(COMMENT);
} else { // } else {
pushState(NEWLINE); // pushState(NEWLINE);
} // }
rewind(); // rewind();
} // }
[^] { // [^] {
popState(); // popState();
pushState(NEWLINE); // pushState(NEWLINE);
rewind(); // rewind();
} // }
} // }
@ -422,89 +455,97 @@ decimal = {digit}+
<NEWLINE> { <NEWLINE> {
{whitespace}+ { {whitespace}+ {
lineIndent = yylength();
whitespace(); whitespace();
popState(); popState();
Integer ind = yylength();
if (ind > indentx()) {
return blockBegin();
} else {
// TODO
}
} }
[^] { [^] {
lineIndent = 0; indent = 0;
popState(); popState();
rewind(); rewind();
return newline();
} }
} }
/////////////////// // ///////////////////
// Default Rules // // // Default Rules //
/////////////////// // ///////////////////
// Identifiers // // Identifiers
{var} {return var();} {var} {return var();}
{cons} {return cons();} // {var} {return var();}
{wildcard} {return wildcard();} // {cons} {return cons();}
// {wildcard} {return wildcard();}
// Operators // // Operators
{operator} {return operator();} // {operator} {return operator();}
(\=) {return operator();} // (\=) {return operator();}
(\=\=) {return operator();} // (\=\=) {return operator();}
(\>\=) {return operator();} // (\>\=) {return operator();}
(\<\=) {return operator();} // (\<\=) {return operator();}
(\/\=) {return operator();} // (\/\=) {return operator();}
(\,) {return operator();} // (\,) {return operator();}
(\.) {return operator_();} // (\.) {return operator_();}
(\.\.) {return operator();} // (\.\.) {return operator();}
(\.\.\.) {return operator();} // (\.\.\.) {return operator();}
{modifier} {return modifier();} // {modifier} {return modifier();}
(\#\=) {return disabled();} // (\#\=) {return disabled();}
// Layout // // Layout
(\() {return groupBegin();} // (\() {return groupBegin();}
(\)) {return groupEnd();} // (\)) {return groupEnd();}
(\[) {return listBegin();} // (\[) {return listBegin();}
(\]) {return listEnd();} // (\]) {return listEnd();}
(\{) {return recordBegin();} // (\{) {return recordBegin();}
(\}) {return recordEnd();} // (\}) {return recordEnd();}
// Numbers // // Numbers
{decimal} {numberPart2=yytext(); pushState(NUMBER_PHASE2);} // {decimal} {numberPart2=yytext(); pushState(NUMBER_PHASE2);}
// Text // // Text
(\')+ { // (\')+ {
int size = yylength(); // int size = yylength();
if(size == 2) { // if(size == 2) {
size = 1; // size = 1;
yypushback(1); // yypushback(1);
} // }
pushQuoteSize(size); // pushQuoteSize(size);
pushState(TEXT); // pushState(TEXT);
return textBegin(); // return textBegin();
} // }
// Raw Text // // Raw Text
(\")+ { // (\")+ {
int size = yylength(); // int size = yylength();
if(size == 2) { // if(size == 2) {
size = 1; // size = 1;
yypushback(1); // yypushback(1);
} // }
pushQuoteSize(size); // pushQuoteSize(size);
pushState(TEXT_RAW); // pushState(TEXT_RAW);
return textRawBegin(); // return textRawBegin();
} // }
// Comments // // Comments
(\#) { // (\#) {
pushState(COMMENT); // pushState(COMMENT);
return comment(); // return comment();
} // }
// Layout // Layout
{whitespace}+ {whitespace();} {whitespace}+ {whitespace();}
{newline} {pushState(NEWLINE); return newline();} {newline} {pushState(NEWLINE);}
// Unknown // // Unknown
[^] { // [^] {
return unmatched(); // return unmatched();
} // }

View File

@ -1,12 +1,163 @@
package org.enso.syntax.text package org.enso.main
// import org.enso.syntax.text.parser.{Parser}
import java.io.{Reader,StringReader, StringWriter}
import org.enso.syntax.text.xx.{Parser}
import org.enso.syntax.text.xx.Parser.{Lexer => Tok}
import org.enso.syntax.text.xx.Parser.Lexer._
import org.enso.syntax.text.lexer.{Scanner,Token,Wildcard,EOF,EOL}
// import org.enso.syntax.text.{parser => AST}
import org.enso.syntax.text.parser.AST
class SS(scanner:Scanner) extends Parser.Lexer {
private var _done = false
def getLVal():Token = {
scanner.value
}
def yyerror(s:String) {
println("!!! ERROR !!!")
println(s)
}
def yylex():Int = {
scanner.lex
}
def lex(): Token = {
val tok = yylex
if (tok == -1) {
_done = true;
return Token(EOF,0,0)
}
return getLVal
}
def lexAll(): Vector[Token] = {
var builder = Vector.newBuilder[Token]
do {
builder += lex
} while (!_done)
builder.result
}
def lexAll2(): (Vector[Int],Vector[Token]) = {
var builder_t = Vector.newBuilder[Int]
var builder = Vector.newBuilder[Token]
do {
val tok = yylex
var tval = getLVal
if (tok == -1) {
tval = Token(EOF,0,0)
_done = true
}
builder_t += tok
builder += tval
} while (!_done)
(builder_t.result, builder.result)
}
}
class PP(reader:Reader) {
val lexer = new SS(new Scanner(reader))
//////////////////////
// Token Management //
//////////////////////
val (itokens, tokens) = lexer.lexAll2()
var tokenIx = 0
var current : Token = tokens(tokenIx)
var icurrent : Int = itokens(tokenIx)
def step(): Unit = {
if (tokenIx == tokens.size - 1) {
return
}
tokenIx += 1
current = tokens(tokenIx)
icurrent = itokens(tokenIx)
}
def parse():Option[AST] = {
manyWith(AST.app,()=>tok)
}
def tok():Option[AST] = {
token(VAR).map(AST.fromToken)
}
// def block
def or[T](l:()=>Option[T],r:()=>Option[T]) {
l() match {
case Some(a) => Some(a)
case None => r()
}
}
def token(t:Int):Option[Token] = {
if(icurrent==t) {
val result = Some(current)
step
result
} else {
None
}
}
def manyWith(concat:(AST,AST)=>AST,f:()=>Option[AST]): Option[AST] = {
f() match {
case None => None
case Some(ast) => {
Some(manyWith_(concat,f,ast))
}
}
}
def manyWith_(concat:(AST,AST)=>AST,f:()=>Option[AST],ast:AST): AST = {
f() match {
case None => ast
case Some(ast2) => {
manyWith_(concat,f,concat(ast,ast2))
}
}
}
}
import org.enso.syntax.text.parser.{Parser}
import java.io.{StringReader, StringWriter}
object Main extends App { object Main extends App {
val reader = new StringReader("a =\n b") val str = "a b"
val parser = new Parser(reader) val reader = new StringReader(str)
val ast = parser.parse val reader2 = new StringReader(str)
pprint.pprintln(ast,width=3,height=1000) val scanner = new Scanner(reader)
val scanner2 = new Scanner(reader2)
val ss = new SS(scanner)
val ss2 = new SS(scanner2)
val parser = new Parser(ss)
val pp = new PP(new StringReader(str))
pprint.pprintln(ss2.lexAll)
pprint.pprintln(pp.parse)
pprint.pprintln(parser.parse)
// val parser = new Parser(reader)
// val ast = parser.parse
pprint.pprintln(parser.result,width=3,height=1000)
} }

View File

@ -3,40 +3,40 @@ package org.enso.syntax.text.lexer
import java.io.{StringReader, Reader} import java.io.{StringReader, Reader}
import scala.collection.immutable.Vector import scala.collection.immutable.Vector
class Lexer (reader:Reader) { // class Lexer (reader:Reader) {
val scanner = new Scanner(reader) // val scanner = new Scanner(reader)
private var _done = false // private var _done = false
def this(str:String) { // def this(str:String) {
this(new StringReader(str)) // this(new StringReader(str))
} // }
def lex(): Token = { // def lex(): Token = {
if (done) { // if (done) {
return Token(EOF,0,0) // return Token(EOF,0,0)
} // }
if (scanner.done) { // if (scanner.done) {
_done = true // _done = true
return lex // return lex
} // }
val token = scanner.lex // val token = scanner.lex
if (token == null) { // if (token == null) {
_done = true // _done = true
return lex // return lex
} else { // } else {
return token // return token
} // }
} // }
def lexAll(): Vector[Token] = { // def lexAll(): Vector[Token] = {
var builder = Vector.newBuilder[Token] // var builder = Vector.newBuilder[Token]
do { // do {
builder += lex // builder += lex
} while (!done) // } while (!done)
builder.result // builder.result
} // }
def done(): Boolean = { // def done(): Boolean = {
return _done; // return _done;
} // }
} // }

View File

@ -1,5 +1,8 @@
package org.enso.syntax.text.parser package org.enso.syntax.text.parser
import org.enso.syntax.text.lexer.Token
import org.enso.syntax.text.{lexer => token}
///////// /////////
// AST // // AST //
@ -20,4 +23,24 @@ case object NONE extends Symbol
case class Var (name:String) extends Symbol case class Var (name:String) extends Symbol
case class Operator (name:String) extends Symbol case class Operator (name:String) extends Symbol
case class App (func:AST, arg:AST) extends Symbol case class App (func:AST, arg:AST) extends Symbol
case class Block (body:Vector[AST]) extends Symbol case class Block (body:Vector[AST]) extends Symbol
//
object AST {
def fromToken(tok:Token):AST = {
tok.symbol match {
case token.Var(name) => AST(0,0,Var(name))
}
}
def app(fn:AST, arg:AST):AST = {
AST(fn.offset,fn.span + arg.span,App(fn.copy(offset=0),arg))
}
def emptyBlock():AST = {
AST(0,0,Block(Vector()))
}
}

View File

@ -1,186 +1,188 @@
package org.enso.syntax.text.parser package org.enso.syntax.text.parser
import java.io.{Reader} // import java.io.{Reader}
import org.enso.syntax.text.lexer.{Lexer, Token} // import org.enso.syntax.text.lexer.{Lexer, Token}
import org.enso.syntax.text.{lexer => token} // import org.enso.syntax.text.{lexer => token}
import scala.collection.immutable.{Vector} // import scala.collection.immutable.{Vector}
import scala.collection.mutable.{Builder} // import scala.collection.mutable.{Builder}
class Parser(reader:Reader) { // class Parser(reader:Reader) {
val lexer = new Lexer(reader) // val lexer = new Lexer(reader)
////////////////////// // //////////////////////
// Token Management // // // Token Management //
////////////////////// // //////////////////////
val tokens = lexer.lexAll() // val tokens = lexer.lexAll()
var tokenIx = 0 // var tokenIx = 0
var current : Token = tokens(tokenIx) // var current : Token = tokens(tokenIx)
def step(): Token = { // def step(): Token = {
if (tokenIx == tokens.size - 1) { // if (tokenIx == tokens.size - 1) {
return Token(token.EOF,0,0) // return Token(token.EOF,0,0)
} // }
tokenIx += 1 // tokenIx += 1
current = tokens(tokenIx) // current = tokens(tokenIx)
if (current.symbol == token.EOL) { // if (current.symbol == token.EOL) {
line += 1 // line += 1
column = 0 // column = 0
} else { // } else {
column += current.offset + current.span // column += current.offset + current.span
} // }
current // current
} // }
def lookup(i:Int=1): Token = { // def lookup(i:Int=1): Token = {
val ix = tokenIx + i // val ix = tokenIx + i
if (ix >= 0 && ix < tokens.size) { // if (ix >= 0 && ix < tokens.size) {
tokens(ix) // tokens(ix)
} else { // } else {
Token(token.EOF,0,0) // Token(token.EOF,0,0)
} // }
} // }
def next(): Token = { // def next(): Token = {
lookup() // lookup()
} // }
def previous(): Token = { // def previous(): Token = {
lookup(-1) // lookup(-1)
} // }
// var indents : Stack[Int] = new Stack() // // var indents : Stack[Int] = new Stack()
// indents.push(0) // // indents.push(0)
// def indent(): Int = { // // def indent(): Int = {
// indents.head // // indents.head
// } // // }
var column : Int = 0 // var column : Int = 0
var line : Int = 0 // var line : Int = 0
var indent : Int = 0 // var indent : Int = 0
def withIndent[T](newIndent:Int,f:()=>T):T = { // def withIndent[T](newIndent:Int,f:()=>T):T = {
val oldIndent = indent // val oldIndent = indent
indent = newIndent // indent = newIndent
val result = f() // val result = f()
indent = oldIndent // indent = oldIndent
result // result
} // }
def parse(): AST = { // def parse(): AST = {
expr() match { // expr() match {
case Some(ast) => ast // case Some(ast) => ast
case None => AST(0,0,NONE) // case None => AST(0,0,NONE)
} // }
} // }
def expr(): Option[AST] = { // def expr(): Option[AST] = {
manyWith(app, patternToken) // manyWith(app, patternToken)
// .flatMap(pat => { // // .flatMap(pat => {
// if(current.symbol == token.EOL && next.offset > indent) { // // if(current.symbol == token.EOL && next.offset > indent) {
// step // // step
// withIndent(next.offset, () => Some(app(pat,block))) // // withIndent(next.offset, () => Some(app(pat,block)))
// } else { // // } else {
// Some(pat) // // Some(pat)
// } // // }
// }) // // })
} // }
def block(): AST = { // def block(): AST = {
AST(0,0,Block(many(() => blockLine))) // AST(0,0,Block(many(() => blockLine)))
} // }
def blockLine(): Option[AST] = { // def blockLine(): Option[AST] = {
if(next.offset == indent) { // if(next.offset == indent) {
val out = expr // val out = expr
println("!!!!!--") // println("!!!!!--")
println(out) // println(out)
out // out
} else { // } else {
None // None
} // }
} // }
// def parseExprX(): AST = { // // def parseExprX(): AST = {
// current.symbol match { // // current.symbol match {
// case token.Var(name) => { // // case token.Var(name) => {
// AST(Var(name),current.offset,current.span) // // AST(Var(name),current.offset,current.span)
// } // // }
// case x => { // // case x => {
// println("!!!") // // println("!!!")
// println(x) // // println(x)
// AST(NONE,0,0) // // AST(NONE,0,0)
// } // // }
// } // // }
// } // // }
def patternToken(tok:Token): Option[AST] = {
tok.symbol match {
case token.Var (name) => Some(AST(tok.offset,tok.span, Var (name)))
case token.Operator (name) => Some(AST(current.offset,current.span, Operator (name)))
case token.EOL => {
if (next.offset > indent) {
step
withIndent(next.offset, () => Some(block))
} else {
None
}
}
case _ => None
}
}
def many(f:()=>Option[AST]): Vector[AST] = {
f() match {
case None => Vector()
case Some(ast) => {
step
val builder = Vector.newBuilder[AST]
builder += ast
many_(f,builder)
builder.result
}
}
}
def many_(f:()=>Option[AST], builder:Builder[AST,Vector[AST]]): Unit = { // def patternToken(tok:Token): Option[AST] = {
f() match { // tok.symbol match {
case None => return // case token.Var (name) => Some(AST(tok.offset,tok.span, Var (name)))
case Some(ast) => { // case token.Operator (name) => Some(AST(current.offset,current.span, Operator (name)))
builder += ast // case token.EOL => {
many_(f,builder) // if (next.offset > indent) {
} // step
} // withIndent(next.offset, () => Some(block))
} // } else {
// None
// }
// }
// case _ => None
// }
// }
def manyWith(concat:(AST,AST)=>AST,f:(Token)=>Option[AST]): Option[AST] = { // def many(f:()=>Option[AST]): Vector[AST] = {
f(current) match { // f() match {
case None => None // case None => Vector()
case Some(ast) => { // case Some(ast) => {
step // step
Some(manyWith_(concat,f,ast)) // val builder = Vector.newBuilder[AST]
} // builder += ast
} // many_(f,builder)
} // builder.result
// }
// }
// }
def manyWith_(concat:(AST,AST)=>AST,f:(Token)=>Option[AST],ast:AST): AST = { // def many_(f:()=>Option[AST], builder:Builder[AST,Vector[AST]]): Unit = {
f(current) match { // f() match {
case None => ast // case None => return
case Some(ast2) => { // case Some(ast) => {
step // builder += ast
manyWith_(concat,f,concat(ast,ast2)) // many_(f,builder)
} // }
} // }
} // }
// def manyWith(concat:(AST,AST)=>AST,f:(Token)=>Option[AST]): Option[AST] = {
// f(current) match {
// case None => None
// case Some(ast) => {
// step
// Some(manyWith_(concat,f,ast))
// }
// }
// }
// def manyWith_(concat:(AST,AST)=>AST,f:(Token)=>Option[AST],ast:AST): AST = {
// f(current) match {
// case None => ast
// case Some(ast2) => {
// step
// manyWith_(concat,f,concat(ast,ast2))
// }
// }
// }
def app(func:AST, arg:AST): AST = { // def app(func:AST, arg:AST): AST = {
AST(func.offset,func.span + arg.span,App(func.copy(offset=0),arg)) // AST(func.offset,func.span + arg.span,App(func.copy(offset=0),arg))
} // }
} // }

View File

@ -0,0 +1,178 @@
%language "Java"
%name-prefix "Parser"
%define parser_class_name "Parser"
%define public
%code imports {
package org.enso.syntax.text.xx;
import org.enso.syntax.text.parser.AST;
import org.enso.syntax.text.lexer.Token;
}
%code {
public AST result;
}
// public static void main (String args[]) throws IOException
// {
// CalcLexer l = new CalcLexer (System.in);
// Calc p = new Calc (l);
// p.parse ();
// }
// }
// %define api.value.type {Token}
/* Bison Declarations */
%token <Token> VAR
%token <Token> EOL
%token <Token> BLOCK_BEGIN
%token <Token> BLOCK_END
%token <AST> CONS
%type <AST> expr
%type <AST> block
%type <AST> blockBody
%type <AST> tok
%start program
/////////////
// Grammar //
/////////////
%%
program:
expr {result=$1;}
| /* empty */
expr:
tok {$$=$1;}
| expr tok {$$=AST.app($1,$2);}
| expr block {$$=AST.app($1,$2);}
block:
BLOCK_BEGIN blockBody {$$=$2;}
blockBody:
expr EOL blockBody {$$=$1;}
| expr BLOCK_END {$$=AST.emptyBlock();}
tok:
VAR {$$=AST.fromToken($1);}
// {
// if ($1.intValue () != $3.intValue ())
// yyerror ( "calc: error: " + $1 + " != " + $3);
// }
// | exp '-' exp { $$ = new Integer ($1.intValue () - $3.intValue ()); }
// | exp '*' exp { $$ = new Integer ($1.intValue () * $3.intValue ()); }
// | exp '/' exp { $$ = new Integer ($1.intValue () / $3.intValue ()); }
// | '-' exp %prec NEG { $$ = new Integer (-$2.intValue ()); }
// | exp '^' exp { $$ = new Integer ((int)
// Math.pow ($1.intValue (),
// $3.intValue ())); }
// | '(' exp ')' { $$ = $2; }
// | '(' error ')' { $$ = new Integer (1111); }
// | '!' { $$ = new Integer (0); return YYERROR; }
// | '-' error { $$ = new Integer (0); return YYERROR; }
// ;
%%
// class CalcLexer implements Calc.Lexer {
// StreamTokenizer st;
// public CalcLexer (InputStream is)
// {
// st = new StreamTokenizer (new InputStreamReader (is));
// st.resetSyntax ();
// st.eolIsSignificant (true);
// st.whitespaceChars (9, 9);
// st.whitespaceChars (32, 32);
// st.wordChars (48, 57);
// }
// public void yyerror (String s)
// {
// System.err.println (s);
// }
// Integer yylval;
// public Object getLVal() {
// return yylval;
// }
// public int yylex () throws IOException {
// int ttype = st.nextToken ();
// if (ttype == st.TT_EOF)
// return Calc.EOF;
// else if (ttype == st.TT_EOL)
// {
// return (int) '\n';
// }
// else if (ttype == st.TT_WORD)
// {
// yylval = new Integer (st.sval);
// return Calc.NUMX;
// }
// else
// return st.ttype;
// }
// }
// class Position {
// public int line;
// public int token;
// public Position ()
// {
// line = 0;
// token = 0;
// }
// public Position (int l, int t)
// {
// line = l;
// token = t;
// }
// public boolean equals (Position l)
// {
// return l.line == line && l.token == token;
// }
// public String toString ()
// {
// return Integer.toString (line) + "." + Integer.toString(token);
// }
// public int lineno ()
// {
// return line;
// }
// public int token ()
// {
// return token;
// }
// }