From 815deda92bbe923864e0b710adf580ec5f42d985 Mon Sep 17 00:00:00 2001
From: Wojciech Danilo <wojciech.danilo@gmail.com>
Date: Sat, 1 Jun 2019 20:32:49 +0200
Subject: [PATCH] Initial commit

---
 .gitignore                                    |  2 +
 build.sbt                                     | 78 +++++++++++++++
 src/main/java/Utility.java                    | 23 +++++
 src/main/java/Yytoken.java                    | 36 +++++++
 src/main/jflex/test.flex                      | 93 ++++++++++++++++++
 src/main/scala/Main.scala                     | 46 +++++++++
 .../org/enso/syntax/text/lexer/Token.scala    | 97 +++++++++++++++++++
 7 files changed, 375 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 build.sbt
 create mode 100644 src/main/java/Utility.java
 create mode 100644 src/main/java/Yytoken.java
 create mode 100644 src/main/jflex/test.flex
 create mode 100644 src/main/scala/Main.scala
 create mode 100644 src/main/scala/org/enso/syntax/text/lexer/Token.scala

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000000..6ba19f7f113
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+project/
+target/
diff --git a/build.sbt b/build.sbt
new file mode 100644
index 00000000000..7c47865094c
--- /dev/null
+++ b/build.sbt
@@ -0,0 +1,78 @@
+// The simplest possible sbt build file is just one line:
+
+scalaVersion := "2.12.8"
+// That is, to create a valid sbt build, all you've got to do is define the
+// version of Scala you'd like your project to use.
+
+// ============================================================================
+
+// Lines like the above defining `scalaVersion` are called "settings" Settings
+// are key/value pairs. In the case of `scalaVersion`, the key is "scalaVersion"
+// and the value is "2.12.8"
+
+// It's possible to define many kinds of settings, such as:
+
+name := "hello-world"
+organization := "ch.epfl.scala"
+version := "1.0"
+
+// Note, it's not required for you to define these three settings. These are
+// mostly only necessary if you intend to publish your library's binaries on a
+// place like Sonatype or Bintray.
+
+
+// Want to use a published library in your project?
+// You can define other libraries as dependencies in your build like this:
+libraryDependencies += "org.typelevel" %% "cats-core" % "1.6.0"
+// Here, `libraryDependencies` is a set of dependencies, and by using `+=`,
+// we're adding the cats dependency to the set of dependencies that sbt will go
+// and fetch when it starts up.
+// Now, in any Scala file, you can import classes, objects, etc, from cats with
+// a regular import.
+
+// TIP: To find the "dependency" that you need to add to the
+// `libraryDependencies` set, which in the above example looks like this:
+
+// "org.typelevel" %% "cats-core" % "1.6.0"
+
+// You can use Scaladex, an index of all known published Scala libraries. There,
+// after you find the library you want, you can just copy/paste the dependency
+// information that you need into your build file. For example, on the
+// typelevel/cats Scaladex page,
+// https://index.scala-lang.org/typelevel/cats, you can copy/paste the sbt
+// dependency from the sbt box on the right-hand side of the screen.
+
+// IMPORTANT NOTE: while build files look _kind of_ like regular Scala, it's
+// important to note that syntax in *.sbt files doesn't always behave like
+// regular Scala. For example, notice in this build file that it's not required
+// to put our settings into an enclosing object or class. Always remember that
+// sbt is a bit different, semantically, than vanilla Scala.
+
+// ============================================================================
+
+// Most moderately interesting Scala projects don't make use of the very simple
+// build file style (called "bare style") used in this build.sbt file. Most
+// intermediate Scala projects make use of so-called "multi-project" builds. A
+// multi-project build makes it possible to have different folders which sbt can
+// be configured differently for. That is, you may wish to have different
+// dependencies or different testing frameworks defined for different parts of
+// your codebase. Multi-project builds make this possible.
+
+// Here's a quick glimpse of what a multi-project build looks like for this
+// build, with only one "subproject" defined, called `root`:
+
+// lazy val root = (project in file(".")).
+//   settings(
+//     inThisBuild(List(
+//       organization := "ch.epfl.scala",
+//       scalaVersion := "2.12.8"
+//     )),
+//     name := "hello-world"
+//   )
+
+// To learn more about multi-project builds, head over to the official sbt
+// documentation at http://www.scala-sbt.org/documentation.html
+
+SbtJFlexPlugin.jflexSettings
+
+mainClass in (Compile, run) := Some("org.enso.syntax.text.lexer.Main")
\ No newline at end of file
diff --git a/src/main/java/Utility.java b/src/main/java/Utility.java
new file mode 100644
index 00000000000..c1abe70aa71
--- /dev/null
+++ b/src/main/java/Utility.java
@@ -0,0 +1,23 @@
+/** A small utility class. */
+// TODO: use resource bundle
+
+package org.enso.syntax.text.lexer;
+
+class Utility {
+
+  private static final String errorMsg[] = {
+    "Error: Unmatched end-of-comment punctuation.",
+    "Error: Unmatched start-of-comment punctuation.",
+    "Error: Unclosed string.",
+    "Error: Illegal character."
+  };
+
+  public static final int E_ENDCOMMENT = 0;
+  public static final int E_STARTCOMMENT = 1;
+  public static final int E_UNCLOSEDSTR = 2;
+  public static final int E_UNMATCHED = 3;
+
+  public static void error(int code) {
+    System.err.println(errorMsg[code]);
+  }
+}
\ No newline at end of file
diff --git a/src/main/java/Yytoken.java b/src/main/java/Yytoken.java
new file mode 100644
index 00000000000..c9273dab0a6
--- /dev/null
+++ b/src/main/java/Yytoken.java
@@ -0,0 +1,36 @@
+/** The tokens returned by the scanner. */
+
+package org.enso.syntax.text.lexer;
+
+
+class Yytoken {
+  public int m_index;
+  public String m_text;
+  public int m_line;
+  public int m_charBegin;
+  public int m_charEnd;
+
+  Yytoken(int index, String text, int line, int charBegin, int charEnd) {
+    m_index = index;
+    m_text = text;
+    m_line = line;
+    m_charBegin = charBegin;
+    m_charEnd = charEnd;
+  }
+
+  public String toString() {
+    return "Text   : "
+        + m_text
+        + "\nindex : "
+        + m_index
+        + "\nline  : "
+        + m_line
+        + "\ncBeg. : "
+        + m_charBegin
+        + "\ncEnd. : "
+        + m_charEnd;
+  }
+  Token test(int i) {
+      return new Var("foo");
+  }
+}
diff --git a/src/main/jflex/test.flex b/src/main/jflex/test.flex
new file mode 100644
index 00000000000..a7c1406cfbb
--- /dev/null
+++ b/src/main/jflex/test.flex
@@ -0,0 +1,93 @@
+/* this is the scanner example from the JLex website 
+   (with small modifications to make it more readable) */
+
+package org.enso.syntax.text.lexer;
+
+%%
+
+%{
+  private int comment_count = 0;
+
+  public boolean done(){
+    return zzAtEOF;
+  }
+%} 
+
+%class Lexer
+%line
+%column
+%char
+%state COMMENT
+%unicode
+
+%debug
+
+ALPHA=[A-Za-z]
+DIGIT=[0-9]
+NONNEWLINE_WHITE_SPACE_CHAR=[\ \t\b\012]
+NEWLINE=\r|\n|\r\n
+WHITE_SPACE_CHAR=[\n\r\ \t\b\012]
+STRING_TEXT=(\\\"|[^\n\r\"]|\\{WHITE_SPACE_CHAR}+\\)*
+COMMENT_TEXT=([^*/\n]|[^*\n]"/"[^*\n]|[^/\n]"*"[^/\n]|"*"[^/\n]|"/"[^*\n])+
+Ident = {ALPHA}({ALPHA}|{DIGIT}|_)*
+
+%% 
+
+<YYINITIAL> {
+  "," { return (new Yytoken(0,yytext(),yyline,yychar,yychar+1)); }
+  ":" { return (new Yytoken(1,yytext(),yyline,yychar,yychar+1)); }
+  ";" { return (new Yytoken(2,yytext(),yyline,yychar,yychar+1)); }
+  "(" { return (new Yytoken(3,yytext(),yyline,yychar,yychar+1)); }
+  ")" { return (new Yytoken(4,yytext(),yyline,yychar,yychar+1)); }
+  "[" { return (new Yytoken(5,yytext(),yyline,yychar,yychar+1)); }
+  "]" { return (new Yytoken(6,yytext(),yyline,yychar,yychar+1)); }
+  "{" { return (new Yytoken(7,yytext(),yyline,yychar,yychar+1)); }
+  "}" { return (new Yytoken(8,yytext(),yyline,yychar,yychar+1)); }
+  "." { return (new Yytoken(9,yytext(),yyline,yychar,yychar+1)); }
+  "+" { return (new Yytoken(10,yytext(),yyline,yychar,yychar+1)); }
+  "-" { return (new Yytoken(11,yytext(),yyline,yychar,yychar+1)); }
+  "*" { return (new Yytoken(12,yytext(),yyline,yychar,yychar+1)); }
+  "/" { return (new Yytoken(13,yytext(),yyline,yychar,yychar+1)); }
+  "=" { return (new Yytoken(14,yytext(),yyline,yychar,yychar+1)); }
+  "<>" { return (new Yytoken(15,yytext(),yyline,yychar,yychar+2)); }
+  "<"  { return (new Yytoken(16,yytext(),yyline,yychar,yychar+1)); }
+  "<=" { return (new Yytoken(17,yytext(),yyline,yychar,yychar+2)); }
+  ">"  { return (new Yytoken(18,yytext(),yyline,yychar,yychar+1)); }
+  ">=" { return (new Yytoken(19,yytext(),yyline,yychar,yychar+2)); }
+  "&"  { return (new Yytoken(20,yytext(),yyline,yychar,yychar+1)); }
+  "|"  { return (new Yytoken(21,yytext(),yyline,yychar,yychar+1)); }
+  ":=" { return (new Yytoken(22,yytext(),yyline,yychar,yychar+2)); }
+
+  {NONNEWLINE_WHITE_SPACE_CHAR}+ { }
+
+  "/*" { yybegin(COMMENT); comment_count++; }
+
+  \"{STRING_TEXT}\" {
+    String str =  yytext().substring(1,yylength()-1);
+    return (new Yytoken(40,str,yyline,yychar,yychar+yylength()));
+  }
+  
+  \"{STRING_TEXT} {
+    String str =  yytext().substring(1,yytext().length());
+    Utility.error(Utility.E_UNCLOSEDSTR);
+    return (new Yytoken(41,str,yyline,yychar,yychar + str.length()));
+  } 
+  
+  {DIGIT}+ { return (new Yytoken(42,yytext(),yyline,yychar,yychar+yylength())); }  
+
+  {Ident} { return (new Yytoken(43,yytext(),yyline,yychar,yychar+yylength())); }  
+}
+
+<COMMENT> {
+  "/*" { comment_count++; }
+  "*/" { if (--comment_count == 0) yybegin(YYINITIAL); }
+  {COMMENT_TEXT} { }
+}
+
+
+{NEWLINE} { }
+
+. {
+  System.out.println("Illegal character: <" + yytext() + ">");
+	Utility.error(Utility.E_UNMATCHED);
+}
diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala
new file mode 100644
index 00000000000..09d85e39fa7
--- /dev/null
+++ b/src/main/scala/Main.scala
@@ -0,0 +1,46 @@
+
+// import org.enso.Yylex
+
+package org.enso.syntax.text.lexer
+import java.io.{StringReader, StringWriter}
+// import org.enso.syntax.text.lexer.{Token}
+
+object Main extends App {
+  println("Hello, World!!")
+  val firstFilePos = 0
+  val encodingName = "UTF-8"
+  // if (argv[0].equals("--encoding")) {
+  //   firstFilePos = 2
+  //   encodingName = argv[1]
+  //   try {
+  //     java.nio.charset.Charset.forName(encodingName) // Side-effect: is encodingName valid? 
+  //   } catch (Exception e) {
+  //     System.out.println("Invalid encoding '" + encodingName + "'")
+  //     return
+  //   }
+  // }
+  // for (int i = firstFilePos i < argv.length i++) {
+    // Yylex scanner = null
+    // // try {
+    //   // java.io.FileInputStream stream = new java.io.FileInputStream(argv[i])
+    //   // java.io.Reader reader = new java.io.InputStreamReader(stream, encodingName)
+      val reader = new StringReader("15 17")
+      val scanner = new Lexer(reader)
+      do {
+        System.out.println(scanner.yylex())
+      } while (!scanner.done())
+
+    // }
+    // catch (java.io.FileNotFoundException e) {
+    //   System.out.println("File not found : \""+argv[i]+"\"")
+    // }
+    // catch (java.io.IOException e) {
+    //   System.out.println("IO error scanning file \""+argv[i]+"\"")
+    //   System.out.println(e)
+    // }
+    // catch (Exception e) {
+    //   System.out.println("Unexpected exception:")
+    //   e.printStackTrace()
+    // }
+  // }
+}
\ No newline at end of file
diff --git a/src/main/scala/org/enso/syntax/text/lexer/Token.scala b/src/main/scala/org/enso/syntax/text/lexer/Token.scala
new file mode 100644
index 00000000000..58ac2850ecf
--- /dev/null
+++ b/src/main/scala/org/enso/syntax/text/lexer/Token.scala
@@ -0,0 +1,97 @@
+/** The tokens returned by the scanner. */
+
+package org.enso.syntax.text.lexer
+
+// class Token (index:Int, text:String, line:Int, charBegin:Int, charEnd:Int) {
+
+//   override def toString() : String = {
+//     return "Text   : " + text + "\nindex : " + index + "\nline  : " + line + "\ncBeg. : " + charBegin + "\ncEnd. : " + charEnd;
+//   }
+// }
+
+// sealed trait Tree[+A]
+// case object Leaf extends Node[Nothing]
+// case class  Node[A](data: A, left: Tree[A], right: Tree[A]) extends Tree[A]
+
+abstract class Bound
+case object Begin extends Bound
+case object End   extends Bound
+
+abstract class Token 
+case class  Var  (name:String) extends Token
+case class  Cons (name:String) extends Token
+case object Wildcard           extends Token
+
+// data Symbol
+// // Layout
+// case class STX
+// case class ETX
+// case class EOL
+// case class Terminator
+// case class BlockStart
+// case class Block       !Bound
+// case class Group       !Bound
+// case class Marker      !Word64
+
+//     -- Ident
+//     | Var          !Text32
+//     | Cons         !Text32
+//     | Wildcard
+
+//     -- Keyword
+//     | KwCase
+//     | KwClass
+//     | KwDef
+//     | KwForeign
+//     | KwImport
+//     | KwNative
+//     | KwOf
+
+//     -- Operator
+//     | Operator    !Text32
+//     | Modifier    !Text32
+//     | Accessor
+
+//     -- | Arrow
+//     | Assignment
+//     | Typed
+//     | TypeApp
+//     | Merge
+//     | Range
+//     | Anything
+
+//     -- Literal
+//     | Number      !Number
+//     | Quote       !StrType  !Bound
+//     | Str         !Text32
+//     | StrEsc      !StrEscType
+//     | List        !Bound
+
+//     -- Comment
+//     | Disable
+//     | Doc         !Text32
+
+//     -- Config
+//     | Metadata    !Text32
+//     -- | Pragma ...
+
+//     -- Other
+//     | Unknown     !Text32 -- DEPRECATED
+//     | Incorrect   !Text32 -- DEPRECATED
+//     | Invalid     !Invalid.Symbol
+//     deriving (Eq, Generic, Ord, Show)
+
+// data StrEscType
+//     = CharStrEsc  !Int
+//     | NumStrEsc   !Int
+//     | QuoteEscape !StrType
+//     | SlashEsc
+//     deriving (Eq, Generic, Ord, Show)
+
+// data Bound   = Begin | End              deriving (Eq, Generic, Ord, Show)
+// data StrType = RawStr | FmtStr | NatStr deriving (Eq, Generic, Ord, Show)
+// data Number  = NumRep
+//     { _base     :: Word8
+//     , _intPart  :: [Word8]
+//     , _fracPart :: [Word8]
+//     } deriving (Eq, Generic, Ord, Show)
\ No newline at end of file