Updating lexer and parser rules

This commit is contained in:
Wojciech Danilo 2019-06-07 10:54:20 +02:00
parent a9b1cc3120
commit f111b7bba8
7 changed files with 888 additions and 493 deletions

View File

@ -34,4 +34,4 @@ lazy val basic = Project(
SbtJFlexPlugin.jflexSettings
mainClass in (Compile, run) := Some("org.enso.syntax.text.Main")
mainClass in (Compile, run) := Some("org.enso.main.Main")

View File

@ -1,5 +1,6 @@
package org.enso.syntax.text.lexer;
import java.util.Stack;
import static org.enso.syntax.text.xx.Parser.Lexer.*;
/**
* Enso lexical scanner
@ -8,7 +9,25 @@ import java.util.Stack;
%{
private int lineIndent = 0;
private int indent = 0;
///////////////////////
// Indent Management //
///////////////////////
private Stack<Integer> indentStack = new Stack<Integer>();
public final void pushIndent(int i) {
indentStack.push(i);
}
public final Integer popIndent() {
return indentStack.pop();
}
public final Integer indentx() {
return indentStack.peek();
}
/////////////////////////////
@ -64,7 +83,7 @@ import java.util.Stack;
return zzAtEOF;
}
public Token lex() throws java.io.IOException {
public int lex() throws java.io.IOException {
return yylex();
}
@ -80,93 +99,102 @@ import java.util.Stack;
}
//////////////////
// Constructors //
//////////////////
// Utils
void whitespace() {lastOffset += yylength();}
Symbol invalid(InvalidReason reason) {
return new Invalid (reason);
}
public Token value;
// Identifiers
Token var_() {return token(new Var (yytext()));}
Token cons_() {return token(new Cons (yytext()));}
Token wildcard_() {return token(Wildcard$.MODULE$);}
Token var() {pushState(CHECK_IDENT_SFX); return var_();}
Token cons() {pushState(CHECK_IDENT_SFX); return cons_();}
Token wildcard() {pushState(CHECK_IDENT_SFX); return wildcard_();}
Token errorSfx() {return token(invalid(new UnexpectedSuffix(yytext())));}
//////////////////
// Constructors //
//////////////////
int var() {value = token(new Var (yytext())); return VAR;}
// Utils
void whitespace() {lastOffset += yylength();}
// Symbol invalid(InvalidReason reason) {
// return new Invalid (reason);
// }
// // Identifiers
// Token var_() {return token(new Var (yytext()));}
// Token cons_() {return token(new Cons (yytext()));}
// Token wildcard_() {return token(Wildcard$.MODULE$);}
// Token var() {pushState(CHECK_IDENT_SFX); return var_();}
// Token cons() {pushState(CHECK_IDENT_SFX); return cons_();}
// Token wildcard() {pushState(CHECK_IDENT_SFX); return wildcard_();}
// Token errorSfx() {return token(invalid(new UnexpectedSuffix(yytext())));}
// Operators
Token operator_() {return token(new Operator(yytext()));}
Token modifier_() {return token(new Modifier(yytext()));}
Token disabled_() {return token(DisabledAssignment$.MODULE$);}
Token operator() {pushState(CHECK_OP_SFX); return operator_();}
Token modifier() {pushState(CHECK_OP_SFX); return modifier_();}
Token disabled() {pushState(CHECK_OP_SFX); return disabled_();}
// // Operators
// Token operator_() {return token(new Operator(yytext()));}
// Token modifier_() {return token(new Modifier(yytext()));}
// Token disabled_() {return token(DisabledAssignment$.MODULE$);}
// Token operator() {pushState(CHECK_OP_SFX); return operator_();}
// Token modifier() {pushState(CHECK_OP_SFX); return modifier_();}
// Token disabled() {pushState(CHECK_OP_SFX); return disabled_();}
// Layout
Token newline() {return token(EOL$.MODULE$);}
Token groupBegin() {return token(GroupBegin$.MODULE$);}
Token groupEnd() {return token(GroupEnd$.MODULE$);}
Token listBegin() {return token(ListBegin$.MODULE$);}
Token listEnd() {return token(ListEnd$.MODULE$);}
Token recordBegin() {return token(RecordBegin$.MODULE$);}
Token recordEnd() {return token(RecordEnd$.MODULE$);}
Token unmatched() {return token(new Unmatched(yytext()));}
// Layout
int newline() {value = token(EOL$.MODULE$); return EOL;}
int blockBegin() {return BLOCK_BEGIN;}
int blockEnd() {return BLOCK_END;}
// Token groupBegin() {return token(GroupBegin$.MODULE$);}
// Token groupEnd() {return token(GroupEnd$.MODULE$);}
// Token listBegin() {return token(ListBegin$.MODULE$);}
// Token listEnd() {return token(ListEnd$.MODULE$);}
// Token recordBegin() {return token(RecordBegin$.MODULE$);}
// Token recordEnd() {return token(RecordEnd$.MODULE$);}
// Token unmatched() {return token(new Unmatched(yytext()));}
// Numbers
Token number() {
Token num = token(Number.fromString(numberPart1,numberPart2,numberPart3));
numberPart1 = "";
numberPart2 = "";
numberPart3 = "";
return num;
}
// // Numbers
// Token number() {
// Token num = token(Number.fromString(numberPart1,numberPart2,numberPart3));
// numberPart1 = "";
// numberPart2 = "";
// numberPart3 = "";
// return num;
// }
// Text
Token textBegin() {return token(TextBegin$.MODULE$);}
Token textEnd() {return token(TextEnd$.MODULE$);}
Token textRawBegin() {return token(TextRawBegin$.MODULE$);}
Token textRawEnd() {return token(TextRawEnd$.MODULE$);}
Token text() {return token(new Text(yytext()));}
Token textIntBegin() {return token(TextInterpolateBegin$.MODULE$);}
Token textIntEnd() {return token(TextInterpolateEnd$.MODULE$);}
// // Text
// Token textBegin() {return token(TextBegin$.MODULE$);}
// Token textEnd() {return token(TextEnd$.MODULE$);}
// Token textRawBegin() {return token(TextRawBegin$.MODULE$);}
// Token textRawEnd() {return token(TextRawEnd$.MODULE$);}
// Token text() {return token(new Text(yytext()));}
// Token textIntBegin() {return token(TextInterpolateBegin$.MODULE$);}
// Token textIntEnd() {return token(TextInterpolateEnd$.MODULE$);}
// Text Escapes
Token slashEsc() {return token(new TextEscape(SlashEscape$.MODULE$));}
Token quoteEsc() {return token(new TextEscape(QuoteEscape$.MODULE$));}
Token rawQuoteEsc() {return token(new TextEscape(RawQuoteEscape$.MODULE$));}
Token charEsc(char c) {return token(new TextEscape(CharEscape.fromChar(c)));}
Token ctrlEsc(int c) {return token(new TextEscape(new CtrlEscape(c)));}
Token intEsc() {
return token(new TextEscape(IntEscape.fromString(yytext().substring(1))));
}
Token uni16Esc() {
String scode = yytext().substring(2);
return token(new TextEscape(new Uni16Escape (Integer.parseInt(scode,16))));
}
Token uni32Esc() {
return token(new TextEscape(Uni32Escape.fromString(yytext().substring(2))));
}
Token uni21Esc() {
String scode = yytext();
scode = scode.substring(3,scode.length()-1);
return token(new TextEscape(Uni21Escape.fromString(scode)));
}
Token invalidCharEsc(){
return token(new TextEscape(new InvalidCharEscape(yytext().charAt(1))));
}
// // Text Escapes
// Token slashEsc() {return token(new TextEscape(SlashEscape$.MODULE$));}
// Token quoteEsc() {return token(new TextEscape(QuoteEscape$.MODULE$));}
// Token rawQuoteEsc() {return token(new TextEscape(RawQuoteEscape$.MODULE$));}
// Token charEsc(char c) {return token(new TextEscape(CharEscape.fromChar(c)));}
// Token ctrlEsc(int c) {return token(new TextEscape(new CtrlEscape(c)));}
// Token intEsc() {
// return token(new TextEscape(IntEscape.fromString(yytext().substring(1))));
// }
// Token uni16Esc() {
// String scode = yytext().substring(2);
// return token(new TextEscape(new Uni16Escape (Integer.parseInt(scode,16))));
// }
// Token uni32Esc() {
// return token(new TextEscape(Uni32Escape.fromString(yytext().substring(2))));
// }
// Token uni21Esc() {
// String scode = yytext();
// scode = scode.substring(3,scode.length()-1);
// return token(new TextEscape(Uni21Escape.fromString(scode)));
// }
// Token invalidCharEsc(){
// return token(new TextEscape(new InvalidCharEscape(yytext().charAt(1))));
// }
// Comment
Token comment() {return token(Comment$.MODULE$);}
Token commentBody() {return token(new CommentBody(yytext()));}
// // Comment
// Token comment() {return token(Comment$.MODULE$);}
// Token commentBody() {return token(new CommentBody(yytext()));}
%}
%init{
pushState(NEWLINE);
// pushState(NEWLINE);
pushIndent(0);
%init}
@ -176,12 +204,14 @@ import java.util.Stack;
/////////////
%class Scanner
%type Token
%int
%public
// %type int
%line
%column
%char
%unicode
%apiprivate
// %apiprivate
// %debug
@ -238,181 +268,184 @@ decimal = {digit}+
%%
///////////////////////
// Unexpected Suffix //
///////////////////////
<CHECK_IDENT_SFX> {
{ident_err_sfx} {return errorSfx();}
[^] {rewind(); popState();}
}
<CHECK_OP_SFX> {
{operator_err_sfx} {return errorSfx();}
[^] {rewind(); popState();}
}
//////////
// Text //
//////////
// ///////////////////////
// // Unexpected Suffix //
// ///////////////////////
<TEXT_INTERPOLATE> {
(\`) {popState(); return textIntEnd();}
}
// <CHECK_IDENT_SFX> {
// {ident_err_sfx} {return errorSfx();}
// [^] {rewind(); popState();}
// }
<TEXT> {
(\')+ {
if (yylength() == quoteSize()) {
popState();
popQuoteSize();
return textEnd();
} else {
return text();
}
}
// <CHECK_OP_SFX> {
// {operator_err_sfx} {return errorSfx();}
// [^] {rewind(); popState();}
// }
// Prim Escapes
(\\\\) {return slashEsc();}
(\\\') {return quoteEsc();}
(\\\") {return rawQuoteEsc();}
(\\[0-9]+) {return intEsc();}
// Escape Characters (https://en.wikipedia.org/wiki/String_literal)
(\\a) {return charEsc('\u0007');} // alert
(\\b) {return charEsc('\u0008');} // backspace
(\\f) {return charEsc('\u000C');} // form feed
(\\n) {return charEsc('\n') ;} // line feed
(\\r) {return charEsc('\r') ;} // carriage return
(\\t) {return charEsc('\u0009');} // horizontal tab
(\\v) {return charEsc('\u000B');} // vertical tab
(\\e) {return charEsc('\u001B');} // escape character
// //////////
// // Text //
// //////////
// <TEXT_INTERPOLATE> {
// (\`) {popState(); return textIntEnd();}
// }
// <TEXT> {
// (\')+ {
// if (yylength() == quoteSize()) {
// popState();
// popQuoteSize();
// return textEnd();
// } else {
// return text();
// }
// }
// // Prim Escapes
// (\\\\) {return slashEsc();}
// (\\\') {return quoteEsc();}
// (\\\") {return rawQuoteEsc();}
// (\\[0-9]+) {return intEsc();}
// // Escape Characters (https://en.wikipedia.org/wiki/String_literal)
// (\\a) {return charEsc('\u0007');} // alert
// (\\b) {return charEsc('\u0008');} // backspace
// (\\f) {return charEsc('\u000C');} // form feed
// (\\n) {return charEsc('\n') ;} // line feed
// (\\r) {return charEsc('\r') ;} // carriage return
// (\\t) {return charEsc('\u0009');} // horizontal tab
// (\\v) {return charEsc('\u000B');} // vertical tab
// (\\e) {return charEsc('\u001B');} // escape character
// Unicode Escapes
(\\u{hex}{hex}{hex}{hex}) {return uni16Esc();}
(\\U{hex}{hex}{hex}{hex}{hex}{hex}{hex}{hex}) {return uni32Esc();}
(\\u\{{hex}*\}) {return uni21Esc();}
// // Unicode Escapes
// (\\u{hex}{hex}{hex}{hex}) {return uni16Esc();}
// (\\U{hex}{hex}{hex}{hex}{hex}{hex}{hex}{hex}) {return uni32Esc();}
// (\\u\{{hex}*\}) {return uni21Esc();}
// Control Characters (https://en.wikipedia.org/wiki/Control_character)
(\\NUL) {return ctrlEsc(0x00);}
(\\SOH) {return ctrlEsc(0x01);}
(\\STX) {return ctrlEsc(0x02);}
(\\ETX) {return ctrlEsc(0x03);}
(\\EOT) {return ctrlEsc(0x04);}
(\\ENQ) {return ctrlEsc(0x05);}
(\\ACK) {return ctrlEsc(0x06);}
(\\BEL) {return ctrlEsc(0x07);}
(\\BS) {return ctrlEsc(0x08);}
(\\TAB) {return ctrlEsc(0x09);}
(\\LF) {return ctrlEsc(0x0A);}
(\\VT) {return ctrlEsc(0x0B);}
(\\FF) {return ctrlEsc(0x0C);}
(\\CR) {return ctrlEsc(0x0D);}
(\\SO) {return ctrlEsc(0x0E);}
(\\SI) {return ctrlEsc(0x0F);}
(\\DLE) {return ctrlEsc(0x10);}
(\\DC1) {return ctrlEsc(0x11);}
(\\DC2) {return ctrlEsc(0x12);}
(\\DC3) {return ctrlEsc(0x13);}
(\\DC4) {return ctrlEsc(0x14);}
(\\NAK) {return ctrlEsc(0x15);}
(\\SYN) {return ctrlEsc(0x16);}
(\\ETB) {return ctrlEsc(0x17);}
(\\CAN) {return ctrlEsc(0x18);}
(\\EM) {return ctrlEsc(0x19);}
(\\SUB) {return ctrlEsc(0x1A);}
(\\ESC) {return ctrlEsc(0x1B);}
(\\FS) {return ctrlEsc(0x1C);}
(\\GS) {return ctrlEsc(0x1D);}
(\\RS) {return ctrlEsc(0x1E);}
(\\US) {return ctrlEsc(0x1F);}
(\\DEL) {return ctrlEsc(0x7F);}
// // Control Characters (https://en.wikipedia.org/wiki/Control_character)
// (\\NUL) {return ctrlEsc(0x00);}
// (\\SOH) {return ctrlEsc(0x01);}
// (\\STX) {return ctrlEsc(0x02);}
// (\\ETX) {return ctrlEsc(0x03);}
// (\\EOT) {return ctrlEsc(0x04);}
// (\\ENQ) {return ctrlEsc(0x05);}
// (\\ACK) {return ctrlEsc(0x06);}
// (\\BEL) {return ctrlEsc(0x07);}
// (\\BS) {return ctrlEsc(0x08);}
// (\\TAB) {return ctrlEsc(0x09);}
// (\\LF) {return ctrlEsc(0x0A);}
// (\\VT) {return ctrlEsc(0x0B);}
// (\\FF) {return ctrlEsc(0x0C);}
// (\\CR) {return ctrlEsc(0x0D);}
// (\\SO) {return ctrlEsc(0x0E);}
// (\\SI) {return ctrlEsc(0x0F);}
// (\\DLE) {return ctrlEsc(0x10);}
// (\\DC1) {return ctrlEsc(0x11);}
// (\\DC2) {return ctrlEsc(0x12);}
// (\\DC3) {return ctrlEsc(0x13);}
// (\\DC4) {return ctrlEsc(0x14);}
// (\\NAK) {return ctrlEsc(0x15);}
// (\\SYN) {return ctrlEsc(0x16);}
// (\\ETB) {return ctrlEsc(0x17);}
// (\\CAN) {return ctrlEsc(0x18);}
// (\\EM) {return ctrlEsc(0x19);}
// (\\SUB) {return ctrlEsc(0x1A);}
// (\\ESC) {return ctrlEsc(0x1B);}
// (\\FS) {return ctrlEsc(0x1C);}
// (\\GS) {return ctrlEsc(0x1D);}
// (\\RS) {return ctrlEsc(0x1E);}
// (\\US) {return ctrlEsc(0x1F);}
// (\\DEL) {return ctrlEsc(0x7F);}
// Invalid Escapes
(\\([a-z]|[A-Z])) {return invalidCharEsc();}
{newline} {return newline();}
[^\'\`\n\r\\]+ {return text();}
(\`) {
pushState(TEXT_INTERPOLATE);
return textIntBegin();
}
}
// // Invalid Escapes
// (\\([a-z]|[A-Z])) {return invalidCharEsc();}
// {newline} {return newline();}
// [^\'\`\n\r\\]+ {return text();}
// (\`) {
// pushState(TEXT_INTERPOLATE);
// return textIntBegin();
// }
// }
<TEXT_RAW> {
(\")+ {
if (yylength() == quoteSize()) {
popState();
popQuoteSize();
return textRawEnd();
} else {
return text();
}
}
// <TEXT_RAW> {
// (\")+ {
// if (yylength() == quoteSize()) {
// popState();
// popQuoteSize();
// return textRawEnd();
// } else {
// return text();
// }
// }
// Prim Escapes
(\\\') {return quoteEsc();}
(\\\") {return rawQuoteEsc();}
(\\) {return text();}
{newline} {return newline();}
[^\"\n\r\\]+ {return text();}
// // Prim Escapes
// (\\\') {return quoteEsc();}
// (\\\") {return rawQuoteEsc();}
// (\\) {return text();}
// {newline} {return newline();}
// [^\"\n\r\\]+ {return text();}
}
// }
////////////////////////////////
// Number (e.g. 16_ff0000.ff) //
////////////////////////////////
// ////////////////////////////////
// // Number (e.g. 16_ff0000.ff) //
// ////////////////////////////////
<NUMBER_PHASE2> {
_[a-zA-Z0-9]+ {
numberPart1 = numberPart2;
numberPart2 = yytext().substring(1);
popState();
pushState(NUMBER_PHASE3);
}
[^] {rewind(); popState(); return number();}
<<EOF>> {return number();}
}
// <NUMBER_PHASE2> {
// _[a-zA-Z0-9]+ {
// numberPart1 = numberPart2;
// numberPart2 = yytext().substring(1);
// popState();
// pushState(NUMBER_PHASE3);
// }
// [^] {rewind(); popState(); return number();}
// <<EOF>> {return number();}
// }
<NUMBER_PHASE3> {
.[a-zA-Z0-9]+ {
numberPart3=yytext().substring(1);
popState();
return number();
}
[^] {rewind(); popState(); return number();}
<<EOF>> {return number();}
}
// <NUMBER_PHASE3> {
// .[a-zA-Z0-9]+ {
// numberPart3=yytext().substring(1);
// popState();
// return number();
// }
// [^] {rewind(); popState(); return number();}
// <<EOF>> {return number();}
// }
//////////////
// Comments //
//////////////
// //////////////
// // Comments //
// //////////////
<COMMENT> {
[^\n\r]+ {return commentBody();}
{newline} {popState(); pushState(COMMENT_LINE); return newline();}
}
// <COMMENT> {
// [^\n\r]+ {return commentBody();}
// {newline} {popState(); pushState(COMMENT_LINE); return newline();}
// }
<COMMENT_LINE> {
{whitespace}+ {
popState();
if(yylength() > lineIndent) {
pushState(COMMENT);
} else {
pushState(NEWLINE);
}
rewind();
}
[^] {
popState();
pushState(NEWLINE);
rewind();
}
}
// <COMMENT_LINE> {
// {whitespace}+ {
// popState();
// if(yylength() > indent) {
// pushState(COMMENT);
// } else {
// pushState(NEWLINE);
// }
// rewind();
// }
// [^] {
// popState();
// pushState(NEWLINE);
// rewind();
// }
// }
@ -422,89 +455,97 @@ decimal = {digit}+
<NEWLINE> {
{whitespace}+ {
lineIndent = yylength();
whitespace();
popState();
Integer ind = yylength();
if (ind > indentx()) {
return blockBegin();
} else {
// TODO
}
}
[^] {
lineIndent = 0;
indent = 0;
popState();
rewind();
return newline();
}
}
///////////////////
// Default Rules //
///////////////////
// ///////////////////
// // Default Rules //
// ///////////////////
// Identifiers
{var} {return var();}
{cons} {return cons();}
{wildcard} {return wildcard();}
// // Identifiers
{var} {return var();}
// {var} {return var();}
// {cons} {return cons();}
// {wildcard} {return wildcard();}
// Operators
{operator} {return operator();}
(\=) {return operator();}
(\=\=) {return operator();}
(\>\=) {return operator();}
(\<\=) {return operator();}
(\/\=) {return operator();}
(\,) {return operator();}
(\.) {return operator_();}
(\.\.) {return operator();}
(\.\.\.) {return operator();}
{modifier} {return modifier();}
(\#\=) {return disabled();}
// // Operators
// {operator} {return operator();}
// (\=) {return operator();}
// (\=\=) {return operator();}
// (\>\=) {return operator();}
// (\<\=) {return operator();}
// (\/\=) {return operator();}
// (\,) {return operator();}
// (\.) {return operator_();}
// (\.\.) {return operator();}
// (\.\.\.) {return operator();}
// {modifier} {return modifier();}
// (\#\=) {return disabled();}
// Layout
(\() {return groupBegin();}
(\)) {return groupEnd();}
(\[) {return listBegin();}
(\]) {return listEnd();}
(\{) {return recordBegin();}
(\}) {return recordEnd();}
// // Layout
// (\() {return groupBegin();}
// (\)) {return groupEnd();}
// (\[) {return listBegin();}
// (\]) {return listEnd();}
// (\{) {return recordBegin();}
// (\}) {return recordEnd();}
// Numbers
{decimal} {numberPart2=yytext(); pushState(NUMBER_PHASE2);}
// // Numbers
// {decimal} {numberPart2=yytext(); pushState(NUMBER_PHASE2);}
// Text
(\')+ {
int size = yylength();
if(size == 2) {
size = 1;
yypushback(1);
}
pushQuoteSize(size);
pushState(TEXT);
return textBegin();
}
// // Text
// (\')+ {
// int size = yylength();
// if(size == 2) {
// size = 1;
// yypushback(1);
// }
// pushQuoteSize(size);
// pushState(TEXT);
// return textBegin();
// }
// Raw Text
(\")+ {
int size = yylength();
if(size == 2) {
size = 1;
yypushback(1);
}
pushQuoteSize(size);
pushState(TEXT_RAW);
return textRawBegin();
}
// // Raw Text
// (\")+ {
// int size = yylength();
// if(size == 2) {
// size = 1;
// yypushback(1);
// }
// pushQuoteSize(size);
// pushState(TEXT_RAW);
// return textRawBegin();
// }
// Comments
(\#) {
pushState(COMMENT);
return comment();
}
// // Comments
// (\#) {
// pushState(COMMENT);
// return comment();
// }
// Layout
{whitespace}+ {whitespace();}
{newline} {pushState(NEWLINE); return newline();}
{newline} {pushState(NEWLINE);}
// Unknown
[^] {
return unmatched();
}
// // Unknown
// [^] {
// return unmatched();
// }

View File

@ -1,12 +1,163 @@
package org.enso.syntax.text
package org.enso.main
// import org.enso.syntax.text.parser.{Parser}
import java.io.{Reader,StringReader, StringWriter}
import org.enso.syntax.text.xx.{Parser}
import org.enso.syntax.text.xx.Parser.{Lexer => Tok}
import org.enso.syntax.text.xx.Parser.Lexer._
import org.enso.syntax.text.lexer.{Scanner,Token,Wildcard,EOF,EOL}
// import org.enso.syntax.text.{parser => AST}
import org.enso.syntax.text.parser.AST
class SS(scanner:Scanner) extends Parser.Lexer {
private var _done = false
def getLVal():Token = {
scanner.value
}
def yyerror(s:String) {
println("!!! ERROR !!!")
println(s)
}
def yylex():Int = {
scanner.lex
}
def lex(): Token = {
val tok = yylex
if (tok == -1) {
_done = true;
return Token(EOF,0,0)
}
return getLVal
}
def lexAll(): Vector[Token] = {
var builder = Vector.newBuilder[Token]
do {
builder += lex
} while (!_done)
builder.result
}
def lexAll2(): (Vector[Int],Vector[Token]) = {
var builder_t = Vector.newBuilder[Int]
var builder = Vector.newBuilder[Token]
do {
val tok = yylex
var tval = getLVal
if (tok == -1) {
tval = Token(EOF,0,0)
_done = true
}
builder_t += tok
builder += tval
} while (!_done)
(builder_t.result, builder.result)
}
}
class PP(reader:Reader) {
val lexer = new SS(new Scanner(reader))
//////////////////////
// Token Management //
//////////////////////
val (itokens, tokens) = lexer.lexAll2()
var tokenIx = 0
var current : Token = tokens(tokenIx)
var icurrent : Int = itokens(tokenIx)
def step(): Unit = {
if (tokenIx == tokens.size - 1) {
return
}
tokenIx += 1
current = tokens(tokenIx)
icurrent = itokens(tokenIx)
}
def parse():Option[AST] = {
manyWith(AST.app,()=>tok)
}
def tok():Option[AST] = {
token(VAR).map(AST.fromToken)
}
// def block
def or[T](l:()=>Option[T],r:()=>Option[T]) {
l() match {
case Some(a) => Some(a)
case None => r()
}
}
def token(t:Int):Option[Token] = {
if(icurrent==t) {
val result = Some(current)
step
result
} else {
None
}
}
def manyWith(concat:(AST,AST)=>AST,f:()=>Option[AST]): Option[AST] = {
f() match {
case None => None
case Some(ast) => {
Some(manyWith_(concat,f,ast))
}
}
}
def manyWith_(concat:(AST,AST)=>AST,f:()=>Option[AST],ast:AST): AST = {
f() match {
case None => ast
case Some(ast2) => {
manyWith_(concat,f,concat(ast,ast2))
}
}
}
}
import org.enso.syntax.text.parser.{Parser}
import java.io.{StringReader, StringWriter}
object Main extends App {
val reader = new StringReader("a =\n b")
val parser = new Parser(reader)
val ast = parser.parse
pprint.pprintln(ast,width=3,height=1000)
val str = "a b"
val reader = new StringReader(str)
val reader2 = new StringReader(str)
val scanner = new Scanner(reader)
val scanner2 = new Scanner(reader2)
val ss = new SS(scanner)
val ss2 = new SS(scanner2)
val parser = new Parser(ss)
val pp = new PP(new StringReader(str))
pprint.pprintln(ss2.lexAll)
pprint.pprintln(pp.parse)
pprint.pprintln(parser.parse)
// val parser = new Parser(reader)
// val ast = parser.parse
pprint.pprintln(parser.result,width=3,height=1000)
}

View File

@ -3,40 +3,40 @@ package org.enso.syntax.text.lexer
import java.io.{StringReader, Reader}
import scala.collection.immutable.Vector
class Lexer (reader:Reader) {
val scanner = new Scanner(reader)
private var _done = false
// class Lexer (reader:Reader) {
// val scanner = new Scanner(reader)
// private var _done = false
def this(str:String) {
this(new StringReader(str))
}
// def this(str:String) {
// this(new StringReader(str))
// }
def lex(): Token = {
if (done) {
return Token(EOF,0,0)
}
if (scanner.done) {
_done = true
return lex
}
val token = scanner.lex
if (token == null) {
_done = true
return lex
} else {
return token
}
}
// def lex(): Token = {
// if (done) {
// return Token(EOF,0,0)
// }
// if (scanner.done) {
// _done = true
// return lex
// }
// val token = scanner.lex
// if (token == null) {
// _done = true
// return lex
// } else {
// return token
// }
// }
def lexAll(): Vector[Token] = {
var builder = Vector.newBuilder[Token]
do {
builder += lex
} while (!done)
builder.result
}
// def lexAll(): Vector[Token] = {
// var builder = Vector.newBuilder[Token]
// do {
// builder += lex
// } while (!done)
// builder.result
// }
def done(): Boolean = {
return _done;
}
}
// def done(): Boolean = {
// return _done;
// }
// }

View File

@ -1,5 +1,8 @@
package org.enso.syntax.text.parser
import org.enso.syntax.text.lexer.Token
import org.enso.syntax.text.{lexer => token}
/////////
// AST //
@ -20,4 +23,24 @@ case object NONE extends Symbol
case class Var (name:String) extends Symbol
case class Operator (name:String) extends Symbol
case class App (func:AST, arg:AST) extends Symbol
case class Block (body:Vector[AST]) extends Symbol
case class Block (body:Vector[AST]) extends Symbol
//
object AST {
def fromToken(tok:Token):AST = {
tok.symbol match {
case token.Var(name) => AST(0,0,Var(name))
}
}
def app(fn:AST, arg:AST):AST = {
AST(fn.offset,fn.span + arg.span,App(fn.copy(offset=0),arg))
}
def emptyBlock():AST = {
AST(0,0,Block(Vector()))
}
}

View File

@ -1,186 +1,188 @@
package org.enso.syntax.text.parser
import java.io.{Reader}
import org.enso.syntax.text.lexer.{Lexer, Token}
import org.enso.syntax.text.{lexer => token}
import scala.collection.immutable.{Vector}
import scala.collection.mutable.{Builder}
// import java.io.{Reader}
// import org.enso.syntax.text.lexer.{Lexer, Token}
// import org.enso.syntax.text.{lexer => token}
// import scala.collection.immutable.{Vector}
// import scala.collection.mutable.{Builder}
class Parser(reader:Reader) {
val lexer = new Lexer(reader)
// class Parser(reader:Reader) {
// val lexer = new Lexer(reader)
//////////////////////
// Token Management //
//////////////////////
// //////////////////////
// // Token Management //
// //////////////////////
val tokens = lexer.lexAll()
var tokenIx = 0
var current : Token = tokens(tokenIx)
// val tokens = lexer.lexAll()
// var tokenIx = 0
// var current : Token = tokens(tokenIx)
def step(): Token = {
if (tokenIx == tokens.size - 1) {
return Token(token.EOF,0,0)
}
tokenIx += 1
current = tokens(tokenIx)
if (current.symbol == token.EOL) {
line += 1
column = 0
} else {
column += current.offset + current.span
}
current
}
// def step(): Token = {
// if (tokenIx == tokens.size - 1) {
// return Token(token.EOF,0,0)
// }
// tokenIx += 1
// current = tokens(tokenIx)
// if (current.symbol == token.EOL) {
// line += 1
// column = 0
// } else {
// column += current.offset + current.span
// }
// current
// }
def lookup(i:Int=1): Token = {
val ix = tokenIx + i
if (ix >= 0 && ix < tokens.size) {
tokens(ix)
} else {
Token(token.EOF,0,0)
}
}
// def lookup(i:Int=1): Token = {
// val ix = tokenIx + i
// if (ix >= 0 && ix < tokens.size) {
// tokens(ix)
// } else {
// Token(token.EOF,0,0)
// }
// }
def next(): Token = {
lookup()
}
// def next(): Token = {
// lookup()
// }
def previous(): Token = {
lookup(-1)
}
// def previous(): Token = {
// lookup(-1)
// }
// var indents : Stack[Int] = new Stack()
// indents.push(0)
// // var indents : Stack[Int] = new Stack()
// // indents.push(0)
// def indent(): Int = {
// indents.head
// }
// // def indent(): Int = {
// // indents.head
// // }
var column : Int = 0
var line : Int = 0
var indent : Int = 0
// var column : Int = 0
// var line : Int = 0
// var indent : Int = 0
def withIndent[T](newIndent:Int,f:()=>T):T = {
val oldIndent = indent
indent = newIndent
val result = f()
indent = oldIndent
result
}
// def withIndent[T](newIndent:Int,f:()=>T):T = {
// val oldIndent = indent
// indent = newIndent
// val result = f()
// indent = oldIndent
// result
// }
def parse(): AST = {
expr() match {
case Some(ast) => ast
case None => AST(0,0,NONE)
}
}
// def parse(): AST = {
// expr() match {
// case Some(ast) => ast
// case None => AST(0,0,NONE)
// }
// }
def expr(): Option[AST] = {
manyWith(app, patternToken)
// .flatMap(pat => {
// if(current.symbol == token.EOL && next.offset > indent) {
// step
// withIndent(next.offset, () => Some(app(pat,block)))
// } else {
// Some(pat)
// }
// })
}
// def expr(): Option[AST] = {
// manyWith(app, patternToken)
// // .flatMap(pat => {
// // if(current.symbol == token.EOL && next.offset > indent) {
// // step
// // withIndent(next.offset, () => Some(app(pat,block)))
// // } else {
// // Some(pat)
// // }
// // })
// }
def block(): AST = {
AST(0,0,Block(many(() => blockLine)))
}
// def block(): AST = {
// AST(0,0,Block(many(() => blockLine)))
// }
def blockLine(): Option[AST] = {
if(next.offset == indent) {
val out = expr
println("!!!!!--")
println(out)
out
} else {
None
}
}
// def blockLine(): Option[AST] = {
// if(next.offset == indent) {
// val out = expr
// println("!!!!!--")
// println(out)
// out
// } else {
// None
// }
// }
// def parseExprX(): AST = {
// current.symbol match {
// case token.Var(name) => {
// AST(Var(name),current.offset,current.span)
// }
// case x => {
// println("!!!")
// println(x)
// AST(NONE,0,0)
// }
// }
// }
// // def parseExprX(): AST = {
// // current.symbol match {
// // case token.Var(name) => {
// // AST(Var(name),current.offset,current.span)
// // }
// // case x => {
// // println("!!!")
// // println(x)
// // AST(NONE,0,0)
// // }
// // }
// // }
def patternToken(tok:Token): Option[AST] = {
tok.symbol match {
case token.Var (name) => Some(AST(tok.offset,tok.span, Var (name)))
case token.Operator (name) => Some(AST(current.offset,current.span, Operator (name)))
case token.EOL => {
if (next.offset > indent) {
step
withIndent(next.offset, () => Some(block))
} else {
None
}
}
case _ => None
}
}
def many(f:()=>Option[AST]): Vector[AST] = {
f() match {
case None => Vector()
case Some(ast) => {
step
val builder = Vector.newBuilder[AST]
builder += ast
many_(f,builder)
builder.result
}
}
}
def many_(f:()=>Option[AST], builder:Builder[AST,Vector[AST]]): Unit = {
f() match {
case None => return
case Some(ast) => {
builder += ast
many_(f,builder)
}
}
}
// def patternToken(tok:Token): Option[AST] = {
// tok.symbol match {
// case token.Var (name) => Some(AST(tok.offset,tok.span, Var (name)))
// case token.Operator (name) => Some(AST(current.offset,current.span, Operator (name)))
// case token.EOL => {
// if (next.offset > indent) {
// step
// withIndent(next.offset, () => Some(block))
// } else {
// None
// }
// }
// case _ => None
// }
// }
def manyWith(concat:(AST,AST)=>AST,f:(Token)=>Option[AST]): Option[AST] = {
f(current) match {
case None => None
case Some(ast) => {
step
Some(manyWith_(concat,f,ast))
}
}
}
// def many(f:()=>Option[AST]): Vector[AST] = {
// f() match {
// case None => Vector()
// case Some(ast) => {
// step
// val builder = Vector.newBuilder[AST]
// builder += ast
// many_(f,builder)
// builder.result
// }
// }
// }
def manyWith_(concat:(AST,AST)=>AST,f:(Token)=>Option[AST],ast:AST): AST = {
f(current) match {
case None => ast
case Some(ast2) => {
step
manyWith_(concat,f,concat(ast,ast2))
}
}
}
// def many_(f:()=>Option[AST], builder:Builder[AST,Vector[AST]]): Unit = {
// f() match {
// case None => return
// case Some(ast) => {
// builder += ast
// many_(f,builder)
// }
// }
// }
// def manyWith(concat:(AST,AST)=>AST,f:(Token)=>Option[AST]): Option[AST] = {
// f(current) match {
// case None => None
// case Some(ast) => {
// step
// Some(manyWith_(concat,f,ast))
// }
// }
// }
// def manyWith_(concat:(AST,AST)=>AST,f:(Token)=>Option[AST],ast:AST): AST = {
// f(current) match {
// case None => ast
// case Some(ast2) => {
// step
// manyWith_(concat,f,concat(ast,ast2))
// }
// }
// }
def app(func:AST, arg:AST): AST = {
AST(func.offset,func.span + arg.span,App(func.copy(offset=0),arg))
}
}
// def app(func:AST, arg:AST): AST = {
// AST(func.offset,func.span + arg.span,App(func.copy(offset=0),arg))
// }
// }

View File

@ -0,0 +1,178 @@
%language "Java"
%name-prefix "Parser"
%define parser_class_name "Parser"
%define public
%code imports {
package org.enso.syntax.text.xx;
import org.enso.syntax.text.parser.AST;
import org.enso.syntax.text.lexer.Token;
}
%code {
public AST result;
}
// public static void main (String args[]) throws IOException
// {
// CalcLexer l = new CalcLexer (System.in);
// Calc p = new Calc (l);
// p.parse ();
// }
// }
// %define api.value.type {Token}
/* Bison Declarations */
%token <Token> VAR
%token <Token> EOL
%token <Token> BLOCK_BEGIN
%token <Token> BLOCK_END
%token <AST> CONS
%type <AST> expr
%type <AST> block
%type <AST> blockBody
%type <AST> tok
%start program
/////////////
// Grammar //
/////////////
%%
program:
expr {result=$1;}
| /* empty */
expr:
tok {$$=$1;}
| expr tok {$$=AST.app($1,$2);}
| expr block {$$=AST.app($1,$2);}
block:
BLOCK_BEGIN blockBody {$$=$2;}
blockBody:
expr EOL blockBody {$$=$1;}
| expr BLOCK_END {$$=AST.emptyBlock();}
tok:
VAR {$$=AST.fromToken($1);}
// {
// if ($1.intValue () != $3.intValue ())
// yyerror ( "calc: error: " + $1 + " != " + $3);
// }
// | exp '-' exp { $$ = new Integer ($1.intValue () - $3.intValue ()); }
// | exp '*' exp { $$ = new Integer ($1.intValue () * $3.intValue ()); }
// | exp '/' exp { $$ = new Integer ($1.intValue () / $3.intValue ()); }
// | '-' exp %prec NEG { $$ = new Integer (-$2.intValue ()); }
// | exp '^' exp { $$ = new Integer ((int)
// Math.pow ($1.intValue (),
// $3.intValue ())); }
// | '(' exp ')' { $$ = $2; }
// | '(' error ')' { $$ = new Integer (1111); }
// | '!' { $$ = new Integer (0); return YYERROR; }
// | '-' error { $$ = new Integer (0); return YYERROR; }
// ;
%%
// class CalcLexer implements Calc.Lexer {
// StreamTokenizer st;
// public CalcLexer (InputStream is)
// {
// st = new StreamTokenizer (new InputStreamReader (is));
// st.resetSyntax ();
// st.eolIsSignificant (true);
// st.whitespaceChars (9, 9);
// st.whitespaceChars (32, 32);
// st.wordChars (48, 57);
// }
// public void yyerror (String s)
// {
// System.err.println (s);
// }
// Integer yylval;
// public Object getLVal() {
// return yylval;
// }
// public int yylex () throws IOException {
// int ttype = st.nextToken ();
// if (ttype == st.TT_EOF)
// return Calc.EOF;
// else if (ttype == st.TT_EOL)
// {
// return (int) '\n';
// }
// else if (ttype == st.TT_WORD)
// {
// yylval = new Integer (st.sval);
// return Calc.NUMX;
// }
// else
// return st.ttype;
// }
// }
// class Position {
// public int line;
// public int token;
// public Position ()
// {
// line = 0;
// token = 0;
// }
// public Position (int l, int t)
// {
// line = l;
// token = t;
// }
// public boolean equals (Position l)
// {
// return l.line == line && l.token == token;
// }
// public String toString ()
// {
// return Integer.toString (line) + "." + Integer.toString(token);
// }
// public int lineno ()
// {
// return line;
// }
// public int token ()
// {
// return token;
// }
// }