From 815deda92bbe923864e0b710adf580ec5f42d985 Mon Sep 17 00:00:00 2001 From: Wojciech Danilo Date: Sat, 1 Jun 2019 20:32:49 +0200 Subject: [PATCH] Initial commit --- .gitignore | 2 + build.sbt | 78 +++++++++++++++ src/main/java/Utility.java | 23 +++++ src/main/java/Yytoken.java | 36 +++++++ src/main/jflex/test.flex | 93 ++++++++++++++++++ src/main/scala/Main.scala | 46 +++++++++ .../org/enso/syntax/text/lexer/Token.scala | 97 +++++++++++++++++++ 7 files changed, 375 insertions(+) create mode 100644 .gitignore create mode 100644 build.sbt create mode 100644 src/main/java/Utility.java create mode 100644 src/main/java/Yytoken.java create mode 100644 src/main/jflex/test.flex create mode 100644 src/main/scala/Main.scala create mode 100644 src/main/scala/org/enso/syntax/text/lexer/Token.scala diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000000..6ba19f7f113 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +project/ +target/ diff --git a/build.sbt b/build.sbt new file mode 100644 index 00000000000..7c47865094c --- /dev/null +++ b/build.sbt @@ -0,0 +1,78 @@ +// The simplest possible sbt build file is just one line: + +scalaVersion := "2.12.8" +// That is, to create a valid sbt build, all you've got to do is define the +// version of Scala you'd like your project to use. + +// ============================================================================ + +// Lines like the above defining `scalaVersion` are called "settings" Settings +// are key/value pairs. In the case of `scalaVersion`, the key is "scalaVersion" +// and the value is "2.12.8" + +// It's possible to define many kinds of settings, such as: + +name := "hello-world" +organization := "ch.epfl.scala" +version := "1.0" + +// Note, it's not required for you to define these three settings. These are +// mostly only necessary if you intend to publish your library's binaries on a +// place like Sonatype or Bintray. + + +// Want to use a published library in your project? +// You can define other libraries as dependencies in your build like this: +libraryDependencies += "org.typelevel" %% "cats-core" % "1.6.0" +// Here, `libraryDependencies` is a set of dependencies, and by using `+=`, +// we're adding the cats dependency to the set of dependencies that sbt will go +// and fetch when it starts up. +// Now, in any Scala file, you can import classes, objects, etc, from cats with +// a regular import. + +// TIP: To find the "dependency" that you need to add to the +// `libraryDependencies` set, which in the above example looks like this: + +// "org.typelevel" %% "cats-core" % "1.6.0" + +// You can use Scaladex, an index of all known published Scala libraries. There, +// after you find the library you want, you can just copy/paste the dependency +// information that you need into your build file. For example, on the +// typelevel/cats Scaladex page, +// https://index.scala-lang.org/typelevel/cats, you can copy/paste the sbt +// dependency from the sbt box on the right-hand side of the screen. + +// IMPORTANT NOTE: while build files look _kind of_ like regular Scala, it's +// important to note that syntax in *.sbt files doesn't always behave like +// regular Scala. For example, notice in this build file that it's not required +// to put our settings into an enclosing object or class. Always remember that +// sbt is a bit different, semantically, than vanilla Scala. + +// ============================================================================ + +// Most moderately interesting Scala projects don't make use of the very simple +// build file style (called "bare style") used in this build.sbt file. Most +// intermediate Scala projects make use of so-called "multi-project" builds. A +// multi-project build makes it possible to have different folders which sbt can +// be configured differently for. That is, you may wish to have different +// dependencies or different testing frameworks defined for different parts of +// your codebase. Multi-project builds make this possible. + +// Here's a quick glimpse of what a multi-project build looks like for this +// build, with only one "subproject" defined, called `root`: + +// lazy val root = (project in file(".")). +// settings( +// inThisBuild(List( +// organization := "ch.epfl.scala", +// scalaVersion := "2.12.8" +// )), +// name := "hello-world" +// ) + +// To learn more about multi-project builds, head over to the official sbt +// documentation at http://www.scala-sbt.org/documentation.html + +SbtJFlexPlugin.jflexSettings + +mainClass in (Compile, run) := Some("org.enso.syntax.text.lexer.Main") \ No newline at end of file diff --git a/src/main/java/Utility.java b/src/main/java/Utility.java new file mode 100644 index 00000000000..c1abe70aa71 --- /dev/null +++ b/src/main/java/Utility.java @@ -0,0 +1,23 @@ +/** A small utility class. */ +// TODO: use resource bundle + +package org.enso.syntax.text.lexer; + +class Utility { + + private static final String errorMsg[] = { + "Error: Unmatched end-of-comment punctuation.", + "Error: Unmatched start-of-comment punctuation.", + "Error: Unclosed string.", + "Error: Illegal character." + }; + + public static final int E_ENDCOMMENT = 0; + public static final int E_STARTCOMMENT = 1; + public static final int E_UNCLOSEDSTR = 2; + public static final int E_UNMATCHED = 3; + + public static void error(int code) { + System.err.println(errorMsg[code]); + } +} \ No newline at end of file diff --git a/src/main/java/Yytoken.java b/src/main/java/Yytoken.java new file mode 100644 index 00000000000..c9273dab0a6 --- /dev/null +++ b/src/main/java/Yytoken.java @@ -0,0 +1,36 @@ +/** The tokens returned by the scanner. */ + +package org.enso.syntax.text.lexer; + + +class Yytoken { + public int m_index; + public String m_text; + public int m_line; + public int m_charBegin; + public int m_charEnd; + + Yytoken(int index, String text, int line, int charBegin, int charEnd) { + m_index = index; + m_text = text; + m_line = line; + m_charBegin = charBegin; + m_charEnd = charEnd; + } + + public String toString() { + return "Text : " + + m_text + + "\nindex : " + + m_index + + "\nline : " + + m_line + + "\ncBeg. : " + + m_charBegin + + "\ncEnd. : " + + m_charEnd; + } + Token test(int i) { + return new Var("foo"); + } +} diff --git a/src/main/jflex/test.flex b/src/main/jflex/test.flex new file mode 100644 index 00000000000..a7c1406cfbb --- /dev/null +++ b/src/main/jflex/test.flex @@ -0,0 +1,93 @@ +/* this is the scanner example from the JLex website + (with small modifications to make it more readable) */ + +package org.enso.syntax.text.lexer; + +%% + +%{ + private int comment_count = 0; + + public boolean done(){ + return zzAtEOF; + } +%} + +%class Lexer +%line +%column +%char +%state COMMENT +%unicode + +%debug + +ALPHA=[A-Za-z] +DIGIT=[0-9] +NONNEWLINE_WHITE_SPACE_CHAR=[\ \t\b\012] +NEWLINE=\r|\n|\r\n +WHITE_SPACE_CHAR=[\n\r\ \t\b\012] +STRING_TEXT=(\\\"|[^\n\r\"]|\\{WHITE_SPACE_CHAR}+\\)* +COMMENT_TEXT=([^*/\n]|[^*\n]"/"[^*\n]|[^/\n]"*"[^/\n]|"*"[^/\n]|"/"[^*\n])+ +Ident = {ALPHA}({ALPHA}|{DIGIT}|_)* + +%% + + { + "," { return (new Yytoken(0,yytext(),yyline,yychar,yychar+1)); } + ":" { return (new Yytoken(1,yytext(),yyline,yychar,yychar+1)); } + ";" { return (new Yytoken(2,yytext(),yyline,yychar,yychar+1)); } + "(" { return (new Yytoken(3,yytext(),yyline,yychar,yychar+1)); } + ")" { return (new Yytoken(4,yytext(),yyline,yychar,yychar+1)); } + "[" { return (new Yytoken(5,yytext(),yyline,yychar,yychar+1)); } + "]" { return (new Yytoken(6,yytext(),yyline,yychar,yychar+1)); } + "{" { return (new Yytoken(7,yytext(),yyline,yychar,yychar+1)); } + "}" { return (new Yytoken(8,yytext(),yyline,yychar,yychar+1)); } + "." { return (new Yytoken(9,yytext(),yyline,yychar,yychar+1)); } + "+" { return (new Yytoken(10,yytext(),yyline,yychar,yychar+1)); } + "-" { return (new Yytoken(11,yytext(),yyline,yychar,yychar+1)); } + "*" { return (new Yytoken(12,yytext(),yyline,yychar,yychar+1)); } + "/" { return (new Yytoken(13,yytext(),yyline,yychar,yychar+1)); } + "=" { return (new Yytoken(14,yytext(),yyline,yychar,yychar+1)); } + "<>" { return (new Yytoken(15,yytext(),yyline,yychar,yychar+2)); } + "<" { return (new Yytoken(16,yytext(),yyline,yychar,yychar+1)); } + "<=" { return (new Yytoken(17,yytext(),yyline,yychar,yychar+2)); } + ">" { return (new Yytoken(18,yytext(),yyline,yychar,yychar+1)); } + ">=" { return (new Yytoken(19,yytext(),yyline,yychar,yychar+2)); } + "&" { return (new Yytoken(20,yytext(),yyline,yychar,yychar+1)); } + "|" { return (new Yytoken(21,yytext(),yyline,yychar,yychar+1)); } + ":=" { return (new Yytoken(22,yytext(),yyline,yychar,yychar+2)); } + + {NONNEWLINE_WHITE_SPACE_CHAR}+ { } + + "/*" { yybegin(COMMENT); comment_count++; } + + \"{STRING_TEXT}\" { + String str = yytext().substring(1,yylength()-1); + return (new Yytoken(40,str,yyline,yychar,yychar+yylength())); + } + + \"{STRING_TEXT} { + String str = yytext().substring(1,yytext().length()); + Utility.error(Utility.E_UNCLOSEDSTR); + return (new Yytoken(41,str,yyline,yychar,yychar + str.length())); + } + + {DIGIT}+ { return (new Yytoken(42,yytext(),yyline,yychar,yychar+yylength())); } + + {Ident} { return (new Yytoken(43,yytext(),yyline,yychar,yychar+yylength())); } +} + + { + "/*" { comment_count++; } + "*/" { if (--comment_count == 0) yybegin(YYINITIAL); } + {COMMENT_TEXT} { } +} + + +{NEWLINE} { } + +. { + System.out.println("Illegal character: <" + yytext() + ">"); + Utility.error(Utility.E_UNMATCHED); +} diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala new file mode 100644 index 00000000000..09d85e39fa7 --- /dev/null +++ b/src/main/scala/Main.scala @@ -0,0 +1,46 @@ + +// import org.enso.Yylex + +package org.enso.syntax.text.lexer +import java.io.{StringReader, StringWriter} +// import org.enso.syntax.text.lexer.{Token} + +object Main extends App { + println("Hello, World!!") + val firstFilePos = 0 + val encodingName = "UTF-8" + // if (argv[0].equals("--encoding")) { + // firstFilePos = 2 + // encodingName = argv[1] + // try { + // java.nio.charset.Charset.forName(encodingName) // Side-effect: is encodingName valid? + // } catch (Exception e) { + // System.out.println("Invalid encoding '" + encodingName + "'") + // return + // } + // } + // for (int i = firstFilePos i < argv.length i++) { + // Yylex scanner = null + // // try { + // // java.io.FileInputStream stream = new java.io.FileInputStream(argv[i]) + // // java.io.Reader reader = new java.io.InputStreamReader(stream, encodingName) + val reader = new StringReader("15 17") + val scanner = new Lexer(reader) + do { + System.out.println(scanner.yylex()) + } while (!scanner.done()) + + // } + // catch (java.io.FileNotFoundException e) { + // System.out.println("File not found : \""+argv[i]+"\"") + // } + // catch (java.io.IOException e) { + // System.out.println("IO error scanning file \""+argv[i]+"\"") + // System.out.println(e) + // } + // catch (Exception e) { + // System.out.println("Unexpected exception:") + // e.printStackTrace() + // } + // } +} \ No newline at end of file diff --git a/src/main/scala/org/enso/syntax/text/lexer/Token.scala b/src/main/scala/org/enso/syntax/text/lexer/Token.scala new file mode 100644 index 00000000000..58ac2850ecf --- /dev/null +++ b/src/main/scala/org/enso/syntax/text/lexer/Token.scala @@ -0,0 +1,97 @@ +/** The tokens returned by the scanner. */ + +package org.enso.syntax.text.lexer + +// class Token (index:Int, text:String, line:Int, charBegin:Int, charEnd:Int) { + +// override def toString() : String = { +// return "Text : " + text + "\nindex : " + index + "\nline : " + line + "\ncBeg. : " + charBegin + "\ncEnd. : " + charEnd; +// } +// } + +// sealed trait Tree[+A] +// case object Leaf extends Node[Nothing] +// case class Node[A](data: A, left: Tree[A], right: Tree[A]) extends Tree[A] + +abstract class Bound +case object Begin extends Bound +case object End extends Bound + +abstract class Token +case class Var (name:String) extends Token +case class Cons (name:String) extends Token +case object Wildcard extends Token + +// data Symbol +// // Layout +// case class STX +// case class ETX +// case class EOL +// case class Terminator +// case class BlockStart +// case class Block !Bound +// case class Group !Bound +// case class Marker !Word64 + +// -- Ident +// | Var !Text32 +// | Cons !Text32 +// | Wildcard + +// -- Keyword +// | KwCase +// | KwClass +// | KwDef +// | KwForeign +// | KwImport +// | KwNative +// | KwOf + +// -- Operator +// | Operator !Text32 +// | Modifier !Text32 +// | Accessor + +// -- | Arrow +// | Assignment +// | Typed +// | TypeApp +// | Merge +// | Range +// | Anything + +// -- Literal +// | Number !Number +// | Quote !StrType !Bound +// | Str !Text32 +// | StrEsc !StrEscType +// | List !Bound + +// -- Comment +// | Disable +// | Doc !Text32 + +// -- Config +// | Metadata !Text32 +// -- | Pragma ... + +// -- Other +// | Unknown !Text32 -- DEPRECATED +// | Incorrect !Text32 -- DEPRECATED +// | Invalid !Invalid.Symbol +// deriving (Eq, Generic, Ord, Show) + +// data StrEscType +// = CharStrEsc !Int +// | NumStrEsc !Int +// | QuoteEscape !StrType +// | SlashEsc +// deriving (Eq, Generic, Ord, Show) + +// data Bound = Begin | End deriving (Eq, Generic, Ord, Show) +// data StrType = RawStr | FmtStr | NatStr deriving (Eq, Generic, Ord, Show) +// data Number = NumRep +// { _base :: Word8 +// , _intPart :: [Word8] +// , _fracPart :: [Word8] +// } deriving (Eq, Generic, Ord, Show) \ No newline at end of file