move Unison.Syntax.Lexer into new unison-syntax package

This commit is contained in:
Mitchell Rosen 2022-10-18 16:02:46 -04:00
parent 6730e0cb5f
commit 1674b22e69
11 changed files with 406 additions and 218 deletions

View File

@ -111,6 +111,7 @@ dependencies:
- unison-prelude
- unison-pretty-printer
- unison-sqlite
- unison-syntax
- unison-util
- unison-util-base32hex
- unison-util-bytes

View File

@ -19,7 +19,6 @@ import qualified Unison.Test.DataDeclaration as DataDeclaration
import qualified Unison.Test.MCode as MCode
import qualified Unison.Test.Referent as Referent
import qualified Unison.Test.Syntax.FileParser as FileParser
import qualified Unison.Test.Syntax.Lexer as Lexer
import qualified Unison.Test.Syntax.TermParser as TermParser
import qualified Unison.Test.Syntax.TermPrinter as TermPrinter
import qualified Unison.Test.Syntax.TypePrinter as TypePrinter
@ -38,7 +37,6 @@ test :: Test ()
test =
tests
[ Cache.test,
Lexer.test,
Term.test,
TermParser.test,
TermPrinter.test,

View File

@ -1,214 +0,0 @@
{-# LANGUAGE OverloadedStrings #-}
module Unison.Test.Syntax.Lexer where
import EasyTest
import qualified Unison.ShortHash as ShortHash
import Unison.Syntax.Lexer
test :: Test ()
test =
scope "lexer"
. tests
$ [ t "1" [Numeric "1"],
t "+1" [Numeric "+1"],
t "-1" [Numeric "-1"],
t "-1.0" [Numeric "-1.0"],
t "+1.0" [Numeric "+1.0"],
t "1e3" [Numeric "1e3"],
t "1e+3" [Numeric "1e+3"],
t "1e-3" [Numeric "1e-3"],
t "+1e3" [Numeric "+1e3"],
t "+1e+3" [Numeric "+1e+3"],
t "+1e-3" [Numeric "+1e-3"],
t "-1e3" [Numeric "-1e3"],
t "-1e+3" [Numeric "-1e+3"],
t "-1e-3" [Numeric "-1e-3"],
t "1.2e3" [Numeric "1.2e3"],
t "1.2e+3" [Numeric "1.2e+3"],
t "1.2e-3" [Numeric "1.2e-3"],
t "+1.2e3" [Numeric "+1.2e3"],
t "+1.2e+3" [Numeric "+1.2e+3"],
t "+1.2e-3" [Numeric "+1.2e-3"],
t "-1.2e3" [Numeric "-1.2e3"],
t "-1.2e+3" [Numeric "-1.2e+3"],
t "-1.2e-3" [Numeric "-1.2e-3"],
t "1E3" [Numeric "1e3"],
t "1E+3" [Numeric "1e+3"],
t "1E-3" [Numeric "1e-3"],
t "+1E3" [Numeric "+1e3"],
t "+1E+3" [Numeric "+1e+3"],
t "+1E-3" [Numeric "+1e-3"],
t "-1E3" [Numeric "-1e3"],
t "-1E+3" [Numeric "-1e+3"],
t "-1E-3" [Numeric "-1e-3"],
t "1.2E3" [Numeric "1.2e3"],
t "1.2E+3" [Numeric "1.2e+3"],
t "1.2E-3" [Numeric "1.2e-3"],
t "+1.2E3" [Numeric "+1.2e3"],
t "+1.2E+3" [Numeric "+1.2e+3"],
t "+1.2E-3" [Numeric "+1.2e-3"],
t "-1.2E3" [Numeric "-1.2e3"],
t "-1.2E+3" [Numeric "-1.2e+3"],
t "-1.2E-3" [Numeric "-1.2e-3"],
t "1-1" [Numeric "1", simpleSymbolyId "-", Numeric "1"],
t "1+1" [Numeric "1", simpleSymbolyId "+", Numeric "1"],
t "1 +1" [Numeric "1", Numeric "+1"],
t "1+ 1" [Numeric "1", simpleSymbolyId "+", Numeric "1"],
t "x+y" [simpleWordyId "x", simpleSymbolyId "+", simpleWordyId "y"],
t "++;++" [simpleSymbolyId "++", Semi False, simpleSymbolyId "++"],
t "++; woot" [simpleSymbolyId "++", Semi False, simpleWordyId "woot"],
t "woot;woot" [simpleWordyId "woot", Semi False, simpleWordyId "woot"],
t "woot;(woot)" [simpleWordyId "woot", Semi False, Open "(", simpleWordyId "woot", Close],
t
"[+1,+1]"
[Open "[", Numeric "+1", Reserved ",", Numeric "+1", Close],
t
"[ +1 , +1 ]"
[Open "[", Numeric "+1", Reserved ",", Numeric "+1", Close],
t "-- a comment 1.0" [],
t "\"woot\" -- a comment 1.0" [Textual "woot"],
t "0:Int" [Numeric "0", Reserved ":", simpleWordyId "Int"],
t "0 : Int" [Numeric "0", Reserved ":", simpleWordyId "Int"],
t
".Foo Foo . .foo.bar.baz"
[ simpleWordyId ".Foo",
simpleWordyId "Foo",
simpleSymbolyId ".",
simpleWordyId ".foo.bar.baz"
],
t ".Foo.Bar.+" [simpleSymbolyId ".Foo.Bar.+"],
-- idents with hashes
t "foo#bar" [WordyId "foo" (Just (ShortHash.unsafeFromText "#bar"))],
t "+#bar" [SymbolyId "+" (Just (ShortHash.unsafeFromText "#bar"))],
-- note - these are all the same, just with different spacing
let ex1 = "if x then y else z"
ex2 = unlines ["if", " x", "then", " y", "else z"]
ex3 = unlines ["if", " x", " then", " y", "else z"]
ex4 = unlines ["if", " x", " then", " y", "else z"]
expected =
[ Open "if",
simpleWordyId "x",
Close,
Open "then",
simpleWordyId "y",
Close,
Open "else",
simpleWordyId "z",
Close
]
in -- directly close empty = block
tests $ map (`t` expected) [ex1, ex2, ex3, ex4],
let ex = unlines ["test =", "", "x = 1"]
in -- directly close nested empty blocks
t
ex
[ simpleWordyId "test",
Open "=",
Close,
(Semi True),
simpleWordyId "x",
Open "=",
Numeric "1",
Close
],
let ex = unlines ["test =", " test2 =", "", "x = 1"]
in t
ex
[ simpleWordyId "test",
Open "=",
simpleWordyId "test2",
Open "=",
Close,
Close,
(Semi True),
simpleWordyId "x",
Open "=",
Numeric "1",
Close
],
let ex =
unlines
["if a then b", "else if c then d", "else if e then f", "else g"] -- close of the three `else` blocks
in -- In an empty `then` clause, the `else` is interpreted as a `Reserved` token
t
ex
[ Open "if",
simpleWordyId "a",
Close,
Open "then",
simpleWordyId "b",
Close,
Open "else",
Open "if",
simpleWordyId "c",
Close,
Open "then",
simpleWordyId "d",
Close,
Open "else",
Open "if",
simpleWordyId "e",
Close,
Open "then",
simpleWordyId "f",
Close,
Open "else",
simpleWordyId "g",
Close,
Close,
Close
],
t
"if x then else"
[ Open "if",
simpleWordyId "x",
Close,
Open "then",
Close,
Open "else",
Close
],
-- Empty `else` clause
t
"if x then 1 else"
[ Open "if",
simpleWordyId "x",
Close,
Open "then",
Numeric "1",
Close,
Open "else",
Close
],
-- shouldn't be too eager to find keywords at the front of identifiers,
-- particularly for block-closing keywords (see #2727)
tests $ do
kw <- ["if", "then", "else"]
suffix <- ["0", "x", "!", "'"] -- examples of wordyIdChar
let i = kw ++ suffix
-- a keyword at the front of an identifier should still be an identifier
pure $ t i [simpleWordyId i],
-- Test string literals
t
"\"simple string without escape characters\""
[Textual "simple string without escape characters"],
t
"\"test escaped quotes \\\"in quotes\\\"\""
[Textual "test escaped quotes \"in quotes\""],
t "\"\\n \\t \\b \\a\"" [Textual "\n \t \b \a"],
-- Delayed string
t "'\"\"" [Reserved "'", Textual ""]
]
t :: String -> [Lexeme] -> Test ()
t s expected =
let actual0 = payload <$> lexer "ignored filename" s
actual = take (length actual0 - 2) . drop 1 $ actual0
in scope s $
if actual == expected
then ok
else do
note $ "expected: " ++ show expected
note $ "actual : " ++ show actual
crash "actual != expected"

View File

@ -119,7 +119,6 @@ library
Unison.Share.Types
Unison.Syntax.DeclPrinter
Unison.Syntax.FileParser
Unison.Syntax.Lexer
Unison.Syntax.NamePrinter
Unison.Syntax.Parser
Unison.Syntax.TermParser
@ -279,6 +278,7 @@ library
, unison-prelude
, unison-pretty-printer
, unison-sqlite
, unison-syntax
, unison-util
, unison-util-base32hex
, unison-util-bytes
@ -317,7 +317,6 @@ test-suite parser-typechecker-tests
Unison.Test.MCode
Unison.Test.Referent
Unison.Test.Syntax.FileParser
Unison.Test.Syntax.Lexer
Unison.Test.Syntax.TermParser
Unison.Test.Syntax.TermPrinter
Unison.Test.Syntax.TypePrinter
@ -466,6 +465,7 @@ test-suite parser-typechecker-tests
, unison-prelude
, unison-pretty-printer
, unison-sqlite
, unison-syntax
, unison-util
, unison-util-base32hex
, unison-util-bytes

View File

@ -16,6 +16,7 @@ packages:
- unison-cli
- unison-hashing-v2
- unison-share-api
- unison-syntax
- codebase2/codebase
- codebase2/codebase-sqlite
- codebase2/codebase-sqlite-hashing-v2

View File

@ -74,6 +74,7 @@ dependencies:
- unison-pretty-printer
- unison-share-api
- unison-sqlite
- unison-syntax
- unison-util
- unison-util-base32hex
- unison-util-relation

View File

@ -183,6 +183,7 @@ library
, unison-pretty-printer
, unison-share-api
, unison-sqlite
, unison-syntax
, unison-util
, unison-util-base32hex
, unison-util-relation
@ -306,6 +307,7 @@ executable cli-integration-tests
, unison-pretty-printer
, unison-share-api
, unison-sqlite
, unison-syntax
, unison-util
, unison-util-base32hex
, unison-util-relation
@ -423,6 +425,7 @@ executable transcripts
, unison-pretty-printer
, unison-share-api
, unison-sqlite
, unison-syntax
, unison-util
, unison-util-base32hex
, unison-util-relation
@ -545,6 +548,7 @@ executable unison
, unison-pretty-printer
, unison-share-api
, unison-sqlite
, unison-syntax
, unison-util
, unison-util-base32hex
, unison-util-relation
@ -670,6 +674,7 @@ test-suite cli-tests
, unison-pretty-printer
, unison-share-api
, unison-sqlite
, unison-syntax
, unison-util
, unison-util-base32hex
, unison-util-relation

View File

@ -0,0 +1,65 @@
name: unison-syntax
github: unisonweb/unison
copyright: Copyright (C) 2013-2022 Unison Computing, PBC and contributors
ghc-options: -Wall
dependencies:
- base
- containers
- lens
- megaparsec
- mtl
- text
- unison-core1
- unison-prelude
- unison-pretty-printer
- unison-util-bytes
library:
source-dirs: src
when:
- condition: false
other-modules: Paths_unison_syntax
tests:
syntax-tests:
when:
- condition: false
other-modules: Paths_unison_syntax
dependencies:
- code-page
- easytest
- unison-syntax
main: Main.hs
source-dirs: test
default-extensions:
- ApplicativeDo
- BangPatterns
- BlockArguments
- DeriveAnyClass
- DeriveFoldable
- DeriveFunctor
- DeriveGeneric
- DeriveTraversable
- DerivingStrategies
- DerivingVia
- DoAndIfThenElse
- DuplicateRecordFields
- FlexibleContexts
- FlexibleInstances
- FunctionalDependencies
- GeneralizedNewtypeDeriving
- LambdaCase
- MultiParamTypeClasses
- NamedFieldPuns
- OverloadedStrings
- PatternSynonyms
- RankNTypes
- ScopedTypeVariables
- StandaloneDeriving
- TupleSections
- TypeApplications
- TypeFamilies
- ViewPatterns

216
unison-syntax/test/Main.hs Normal file
View File

@ -0,0 +1,216 @@
module Main (main) where
import EasyTest
import System.IO.CodePage (withCP65001)
import qualified Unison.ShortHash as ShortHash
import Unison.Syntax.Lexer
main :: IO ()
main =
withCP65001 (run test)
test :: Test ()
test =
scope "lexer" . tests $
[ t "1" [Numeric "1"],
t "+1" [Numeric "+1"],
t "-1" [Numeric "-1"],
t "-1.0" [Numeric "-1.0"],
t "+1.0" [Numeric "+1.0"],
t "1e3" [Numeric "1e3"],
t "1e+3" [Numeric "1e+3"],
t "1e-3" [Numeric "1e-3"],
t "+1e3" [Numeric "+1e3"],
t "+1e+3" [Numeric "+1e+3"],
t "+1e-3" [Numeric "+1e-3"],
t "-1e3" [Numeric "-1e3"],
t "-1e+3" [Numeric "-1e+3"],
t "-1e-3" [Numeric "-1e-3"],
t "1.2e3" [Numeric "1.2e3"],
t "1.2e+3" [Numeric "1.2e+3"],
t "1.2e-3" [Numeric "1.2e-3"],
t "+1.2e3" [Numeric "+1.2e3"],
t "+1.2e+3" [Numeric "+1.2e+3"],
t "+1.2e-3" [Numeric "+1.2e-3"],
t "-1.2e3" [Numeric "-1.2e3"],
t "-1.2e+3" [Numeric "-1.2e+3"],
t "-1.2e-3" [Numeric "-1.2e-3"],
t "1E3" [Numeric "1e3"],
t "1E+3" [Numeric "1e+3"],
t "1E-3" [Numeric "1e-3"],
t "+1E3" [Numeric "+1e3"],
t "+1E+3" [Numeric "+1e+3"],
t "+1E-3" [Numeric "+1e-3"],
t "-1E3" [Numeric "-1e3"],
t "-1E+3" [Numeric "-1e+3"],
t "-1E-3" [Numeric "-1e-3"],
t "1.2E3" [Numeric "1.2e3"],
t "1.2E+3" [Numeric "1.2e+3"],
t "1.2E-3" [Numeric "1.2e-3"],
t "+1.2E3" [Numeric "+1.2e3"],
t "+1.2E+3" [Numeric "+1.2e+3"],
t "+1.2E-3" [Numeric "+1.2e-3"],
t "-1.2E3" [Numeric "-1.2e3"],
t "-1.2E+3" [Numeric "-1.2e+3"],
t "-1.2E-3" [Numeric "-1.2e-3"],
t "1-1" [Numeric "1", simpleSymbolyId "-", Numeric "1"],
t "1+1" [Numeric "1", simpleSymbolyId "+", Numeric "1"],
t "1 +1" [Numeric "1", Numeric "+1"],
t "1+ 1" [Numeric "1", simpleSymbolyId "+", Numeric "1"],
t "x+y" [simpleWordyId "x", simpleSymbolyId "+", simpleWordyId "y"],
t "++;++" [simpleSymbolyId "++", Semi False, simpleSymbolyId "++"],
t "++; woot" [simpleSymbolyId "++", Semi False, simpleWordyId "woot"],
t "woot;woot" [simpleWordyId "woot", Semi False, simpleWordyId "woot"],
t "woot;(woot)" [simpleWordyId "woot", Semi False, Open "(", simpleWordyId "woot", Close],
t
"[+1,+1]"
[Open "[", Numeric "+1", Reserved ",", Numeric "+1", Close],
t
"[ +1 , +1 ]"
[Open "[", Numeric "+1", Reserved ",", Numeric "+1", Close],
t "-- a comment 1.0" [],
t "\"woot\" -- a comment 1.0" [Textual "woot"],
t "0:Int" [Numeric "0", Reserved ":", simpleWordyId "Int"],
t "0 : Int" [Numeric "0", Reserved ":", simpleWordyId "Int"],
t
".Foo Foo . .foo.bar.baz"
[ simpleWordyId ".Foo",
simpleWordyId "Foo",
simpleSymbolyId ".",
simpleWordyId ".foo.bar.baz"
],
t ".Foo.Bar.+" [simpleSymbolyId ".Foo.Bar.+"],
-- idents with hashes
t "foo#bar" [WordyId "foo" (Just (ShortHash.unsafeFromText "#bar"))],
t "+#bar" [SymbolyId "+" (Just (ShortHash.unsafeFromText "#bar"))],
-- note - these are all the same, just with different spacing
let ex1 = "if x then y else z"
ex2 = unlines ["if", " x", "then", " y", "else z"]
ex3 = unlines ["if", " x", " then", " y", "else z"]
ex4 = unlines ["if", " x", " then", " y", "else z"]
expected =
[ Open "if",
simpleWordyId "x",
Close,
Open "then",
simpleWordyId "y",
Close,
Open "else",
simpleWordyId "z",
Close
]
in -- directly close empty = block
tests $ map (`t` expected) [ex1, ex2, ex3, ex4],
let ex = unlines ["test =", "", "x = 1"]
in -- directly close nested empty blocks
t
ex
[ simpleWordyId "test",
Open "=",
Close,
(Semi True),
simpleWordyId "x",
Open "=",
Numeric "1",
Close
],
let ex = unlines ["test =", " test2 =", "", "x = 1"]
in t
ex
[ simpleWordyId "test",
Open "=",
simpleWordyId "test2",
Open "=",
Close,
Close,
(Semi True),
simpleWordyId "x",
Open "=",
Numeric "1",
Close
],
let ex =
unlines
["if a then b", "else if c then d", "else if e then f", "else g"] -- close of the three `else` blocks
in -- In an empty `then` clause, the `else` is interpreted as a `Reserved` token
t
ex
[ Open "if",
simpleWordyId "a",
Close,
Open "then",
simpleWordyId "b",
Close,
Open "else",
Open "if",
simpleWordyId "c",
Close,
Open "then",
simpleWordyId "d",
Close,
Open "else",
Open "if",
simpleWordyId "e",
Close,
Open "then",
simpleWordyId "f",
Close,
Open "else",
simpleWordyId "g",
Close,
Close,
Close
],
t
"if x then else"
[ Open "if",
simpleWordyId "x",
Close,
Open "then",
Close,
Open "else",
Close
],
-- Empty `else` clause
t
"if x then 1 else"
[ Open "if",
simpleWordyId "x",
Close,
Open "then",
Numeric "1",
Close,
Open "else",
Close
],
-- shouldn't be too eager to find keywords at the front of identifiers,
-- particularly for block-closing keywords (see #2727)
tests $ do
kw <- ["if", "then", "else"]
suffix <- ["0", "x", "!", "'"] -- examples of wordyIdChar
let i = kw ++ suffix
-- a keyword at the front of an identifier should still be an identifier
pure $ t i [simpleWordyId i],
-- Test string literals
t
"\"simple string without escape characters\""
[Textual "simple string without escape characters"],
t
"\"test escaped quotes \\\"in quotes\\\"\""
[Textual "test escaped quotes \"in quotes\""],
t "\"\\n \\t \\b \\a\"" [Textual "\n \t \b \a"],
-- Delayed string
t "'\"\"" [Reserved "'", Textual ""]
]
t :: String -> [Lexeme] -> Test ()
t s expected =
let actual0 = payload <$> lexer "ignored filename" s
actual = take (length actual0 - 2) . drop 1 $ actual0
in scope s $
if actual == expected
then ok
else do
note $ "expected: " ++ show expected
note $ "actual : " ++ show actual
crash "actual != expected"

View File

@ -0,0 +1,115 @@
cabal-version: 1.12
-- This file has been generated from package.yaml by hpack version 0.34.4.
--
-- see: https://github.com/sol/hpack
name: unison-syntax
version: 0.0.0
homepage: https://github.com/unisonweb/unison#readme
bug-reports: https://github.com/unisonweb/unison/issues
copyright: Copyright (C) 2013-2022 Unison Computing, PBC and contributors
build-type: Simple
source-repository head
type: git
location: https://github.com/unisonweb/unison
library
exposed-modules:
Unison.Syntax.Lexer
hs-source-dirs:
src
default-extensions:
ApplicativeDo
BangPatterns
BlockArguments
DeriveAnyClass
DeriveFoldable
DeriveFunctor
DeriveGeneric
DeriveTraversable
DerivingStrategies
DerivingVia
DoAndIfThenElse
DuplicateRecordFields
FlexibleContexts
FlexibleInstances
FunctionalDependencies
GeneralizedNewtypeDeriving
LambdaCase
MultiParamTypeClasses
NamedFieldPuns
OverloadedStrings
PatternSynonyms
RankNTypes
ScopedTypeVariables
StandaloneDeriving
TupleSections
TypeApplications
TypeFamilies
ViewPatterns
ghc-options: -Wall
build-depends:
base
, containers
, lens
, megaparsec
, mtl
, text
, unison-core1
, unison-prelude
, unison-pretty-printer
, unison-util-bytes
default-language: Haskell2010
test-suite syntax-tests
type: exitcode-stdio-1.0
main-is: Main.hs
hs-source-dirs:
test
default-extensions:
ApplicativeDo
BangPatterns
BlockArguments
DeriveAnyClass
DeriveFoldable
DeriveFunctor
DeriveGeneric
DeriveTraversable
DerivingStrategies
DerivingVia
DoAndIfThenElse
DuplicateRecordFields
FlexibleContexts
FlexibleInstances
FunctionalDependencies
GeneralizedNewtypeDeriving
LambdaCase
MultiParamTypeClasses
NamedFieldPuns
OverloadedStrings
PatternSynonyms
RankNTypes
ScopedTypeVariables
StandaloneDeriving
TupleSections
TypeApplications
TypeFamilies
ViewPatterns
ghc-options: -Wall
build-depends:
base
, code-page
, containers
, easytest
, lens
, megaparsec
, mtl
, text
, unison-core1
, unison-prelude
, unison-pretty-printer
, unison-syntax
, unison-util-bytes
default-language: Haskell2010