Idris2/libs/test/Test/Golden.idr
Guillaume Allais b4d7bba550 [ cleanup ] use default arguments
This simplifies most calls to `testsInDir`.
2023-09-07 14:57:22 +01:00

669 lines
23 KiB
Idris

||| Core features required to perform Golden file testing.
|||
||| We provide the core functionality to run a *single* golden file test, or
||| a whole test tree.
||| This allows the developer freedom to use as is or design the rest of the
||| test harness to their liking.
|||
||| This was originally used as part of Idris2's own test suite and
||| the core functionality is useful for the many and not the few.
||| Please see Idris2 test harness for example usage.
|||
||| # Test Structure
|||
||| This harness works from the assumption that each individual golden test
||| comprises of a directory with the following structure:
|||
||| + `run` a *shell* script that runs the test. We expect it to:
||| * Use `$1` as the variable standing for the idris executable to be tested
||| * May use `${IDRIS2_TESTS_CG}` to pick a codegen that ought to work
||| * Clean up after itself (e.g. by running `rm -rf build/`)
|||
||| + `expected` a file containting the expected output of `run`
|||
||| During testing, the test harness will generate an artefact named `output`
||| and display both outputs if there is a failure.
||| During an interactive session the following command is used to compare them
||| as they are:
|||
||| ```sh
||| git diff --no-index --exit-code --word-diff-regex=. --color expected output
||| ```
|||
||| If `git` fails then the runner will simply present the expected and 'given'
||| files side-by-side.
|||
||| Of note, it is helpful to add `output` to a local `.gitignore` instance
||| to ensure that it is not mistakenly versioned.
|||
||| # Options
|||
||| The test harness has several options that may be set:
|||
||| + `idris2` The path of the executable we are testing.
||| + `codegen` The backend to use for code generation.
||| + `onlyNames` The tests to run relative to the generated executable.
||| + `onlyFile` The file listing the tests to run relative to the generated executable.
||| + `interactive` Whether to offer to update the expected file or not.
||| + `timing` Whether to display time taken for each test.
||| + `threads` The maximum numbers to use (default: number of cores).
||| + `failureFile` The file in which to write the list of failing tests.
|||
||| We provide an options parser (`options`) that takes the list of command line
||| arguments and constructs this for you.
|||
||| # Usage
|||
||| When compiled to an executable the expected usage is:
|||
|||```sh
||| runtests <path to executable under test>
||| [--timing]
||| [--interactive]
||| [--only-file PATH]
||| [--failure-file PATH]
||| [--threads N]
||| [--cg CODEGEN]
||| [[--only|--except] [NAMES]]
|||```
|||
||| assuming that the test runner is compiled to an executable named `runtests`.
module Test.Golden
import Control.ANSI
import Data.Either
import Data.Maybe
import Data.List
import Data.List1
import Data.String
import Data.String.Extra
import System
import System.Clock
import System.Directory
import System.File
import System.Info
import System.Path
import System.Concurrency
-- [ Options ]
||| Options for the test driver.
public export
record Options where
constructor MkOptions
||| Name of the idris2 executable
exeUnderTest : String
||| Which codegen should we use?
codegen : Maybe String
||| Should we only run some specific cases?
onlyNames : Maybe (String -> Bool)
||| Should we run the test suite interactively?
interactive : Bool
||| Should we use colors?
color : Bool
||| Should we time and display the tests
timing : Bool
||| How many threads should we use?
threads : Nat
||| Should we write the list of failing cases to a file?
failureFile : Maybe String
export
initOptions : String -> Bool -> Options
initOptions exe color
= MkOptions exe
Nothing
Nothing
False
color
False
1
Nothing
export
usage : String
usage = unwords
["Usage:"
, "runtests <path>"
, "[--timing]"
, "[--interactive]"
, "[--[no-]color, --[no-]colour]"
, "[--cg CODEGEN]"
, "[--threads N]"
, "[--failure-file PATH]"
, "[--only-file PATH]"
, "[[--only|--except] [NAMES]]"
]
optionsTestsFilter : List String -> List String -> Maybe (String -> Bool)
optionsTestsFilter [] [] = Nothing
optionsTestsFilter only except = Just $ \ name =>
let onlyCheck = null only || any (`isInfixOf` name) only in
let exceptCheck = all (not . (`isInfixOf` name)) except in
onlyCheck && exceptCheck
record Acc where
constructor MkAcc
onlyFile : Maybe String
onlyNames : List String
exceptNames : List String
initAcc : Acc
initAcc = MkAcc Nothing [] []
||| Process the command line options.
export
options : List String -> IO (Maybe Options)
options args = case args of
(_ :: exe :: rest) => mkOptions exe rest
_ => pure Nothing
where
isFlag : String -> Bool
isFlag str = "--" `isPrefixOf` str
go : List String -> Acc -> Options -> Maybe (Acc, Options)
go rest acc opts = case rest of
[] => pure (acc, opts)
("--timing" :: xs) => go xs acc ({ timing := True} opts)
("--interactive" :: xs) => go xs acc ({ interactive := True } opts)
("--color" :: xs) => go xs acc ({ color := True } opts)
("--colour" :: xs) => go xs acc ({ color := True } opts)
("--no-color" :: xs) => go xs acc ({ color := False } opts)
("--no-colour" :: xs) => go xs acc ({ color := False } opts)
("--cg" :: cg :: xs) => go xs acc ({ codegen := Just cg } opts)
("--threads" :: n :: xs) => do let pos : Nat = !(parsePositive n)
go xs acc ({ threads := pos } opts)
("--failure-file" :: p :: xs) => go xs acc ({ failureFile := Just p } opts)
("--only" :: p :: xs) =>
ifThenElse (isFlag p)
(go (p :: xs) acc opts)
(go xs ({ onlyNames $= (words p ++) } acc) opts)
("--except" :: p :: xs) =>
ifThenElse (isFlag p)
(go (p :: xs) acc opts)
(go xs ({ exceptNames $= (words p ++) } acc) opts)
("--only-file" :: p :: xs) => go xs ({ onlyFile := Just p } acc) opts
[p] => do guard (p `elem` ["--only", "--except"])
pure (acc, opts)
-- ^ we accept trailing only or except flags as unused (do not filter out any tests)
-- for the convenience of populating these options from shell scripts.
_ => Nothing
mkOptions : String -> List String -> IO (Maybe Options)
mkOptions exe rest
= do color <- [ (isNothing noc) && tty | noc <- getEnv "NO_COLOR", tty <- isTTY stdout ]
let Just (acc, opts) = go rest initAcc (initOptions exe color)
| Nothing => pure Nothing
extraOnlyNames <- the (IO (List String)) $ case acc.onlyFile of
Nothing => pure []
Just fp => do Right only <- readFile fp
| Left err => die (show err)
pure (lines only)
pure $ Just $ { onlyNames := optionsTestsFilter (extraOnlyNames ++ acc.onlyNames) acc.exceptNames } opts
||| Normalise strings between different OS.
|||
||| on Windows, we just ignore backslashes and slashes when comparing,
||| similarity up to that is good enough. Leave errors that depend
||| on the confusion of slashes and backslashes to unix machines.
normalize : String -> String
normalize str =
if isWindows
then pack $ filter (\ch => ch /= '/' && ch /= '\\') (unpack str)
else str
||| The result of a test run
||| `Left` corresponds to a failure, and `Right` to a success
Result : Type
Result = Either String String
||| Run the specified Golden test with the supplied options.
||| See the module documentation for more information.
||| @testPath the directory that contains the test.
export
runTest : Options -> (testPath : String) -> IO Result
runTest opts testPath = do
start <- clockTime UTC
let cg = maybe "" (" --cg " ++) (codegen opts)
let exe = "\"" ++ exeUnderTest opts ++ cg ++ "\""
ignore $ system $ "cd " ++ escapeArg testPath ++ " && " ++
"sh ./run " ++ exe ++ " | tr -d '\\r' > output"
end <- clockTime UTC
Right out <- readFile $ testPath ++ "/output"
| Left err => do putStrLn $ (testPath ++ "/output") ++ ": " ++ show err
pure (Left testPath)
Right exp <- readFile $ testPath ++ "/expected"
| Left FileNotFound => do
if interactive opts
then mayOverwrite Nothing out
else putStrLn $ (testPath ++ "/expected") ++ ": " ++ show FileNotFound
pure (Left testPath)
| Left err => do putStrLn $ (testPath ++ "/expected") ++ ": " ++ show err
pure (Left testPath)
let result = normalize out == normalize exp
let time = timeDifference end start
if result
then printTiming opts.timing time testPath $ maybeColored BrightGreen "success"
else do
printTiming opts.timing time testPath $ maybeColored BrightRed "FAILURE"
if interactive opts
then mayOverwrite (Just exp) out
else putStr . unlines $ expVsOut exp out
pure $ if result then Right testPath else Left testPath
where
getAnswer : IO Bool
getAnswer = do
str <- getLine
case str of
"y" => pure True
"n" => pure False
"N" => pure False
"" => pure False
_ => do putStrLn "Invalid answer."
getAnswer
maybeColored : Color -> String -> String
maybeColored c = if opts.color then show . colored c else id
expVsOut : String -> String -> List String
expVsOut exp out = ["Expected:", maybeColored Green exp, "Given:", maybeColored Red out]
badSystemExitCode : Int -> Bool
badSystemExitCode code = code < 0 || code == 127 {- 127 means shell couldn't start -}
mayOverwrite : Maybe String -> String -> IO ()
mayOverwrite mexp out = do
case mexp of
Nothing => putStr $ unlines
[ "Golden value missing. I computed the following result:"
, maybeColored BrightBlue out
, "Accept new golden value? [y/N]"
]
Just exp => do
code <- system $ "git diff --no-index --exit-code --word-diff-regex=. " ++
(if opts.color then "--color " else "") ++
escapeArg testPath ++ "/expected " ++ escapeArg testPath ++ "/output"
putStr . unlines $
["Golden value differs from actual value."] ++
(if badSystemExitCode code then expVsOut exp out else []) ++
["Accept actual value as new golden value? [y/N]"]
b <- getAnswer
when b $ do Right _ <- writeFile (testPath ++ "/expected") out
| Left err => putStrLn $ (testPath ++ "/expected") ++ ": " ++ show err
pure ()
printTiming : Bool -> Clock type -> String -> String -> IO ()
printTiming False _ path msg = putStrLn $ concat [path, ": ", msg]
printTiming True clock path msg =
let time = showTime 2 3 clock
width = 72
-- We use 9 instead of `String.length msg` because:
-- 1. ": success" and ": FAILURE" have the same length
-- 2. ANSI escape codes make the msg look longer than it is
msgl = 9
path = leftEllipsis (width `minus` (1 + msgl + length time)) "(...)" path
spent = length time + length path + msgl
pad = pack $ replicate (width `minus` spent) ' '
in putStrLn $ concat [path, ": ", msg, pad, time]
||| Find the first occurrence of an executable on `PATH`.
export
pathLookup : List String -> IO (Maybe String)
pathLookup names = do
path <- getEnv "PATH"
let extensions = if isWindows then [".exe", ".cmd", ".bat", ""] else [""]
let pathList = forget $ split (== pathSeparator) $ fromMaybe "/usr/bin:/usr/local/bin" path
let candidates = [p ++ "/" ++ x ++ y | p <- pathList,
x <- names,
y <- extensions]
firstExists candidates
||| Some test may involve Idris' backends and have requirements.
||| We define here the ones supported by Idris
public export
data Requirement = C | Chez | Node | Racket | Gambit
export
Eq Requirement where
C == C = True
Chez == Chez = True
Node == Node = True
Racket == Racket = True
Gambit == Gambit = True
_ == _ = False
export
Show Requirement where
show C = "C"
show Chez = "Chez"
show Node = "node"
show Racket = "racket"
show Gambit = "gambit"
export
[CG] Show Requirement where
show C = "refc"
show Chez = "chez"
show Node = "node"
show Racket = "racket"
show Gambit = "gambit"
export
checkRequirement : Requirement -> IO (Maybe String)
checkRequirement req
= if platformSupport req
then do let (envvar, paths) = requirement req
Just exec <- getEnv envvar | Nothing => pathLookup paths
pure (Just exec)
else pure Nothing
where
requirement : Requirement -> (String, List String)
requirement C = ("CC", ["cc"])
requirement Chez = ("CHEZ", ["chez", "chezscheme9.5", "chezscheme", "chez-scheme", "scheme"])
requirement Node = ("NODE", ["node"])
requirement Racket = ("RACKET", ["racket"])
requirement Gambit = ("GAMBIT", ["gsc"])
platformSupport : Requirement -> Bool
platformSupport C = not isWindows
platformSupport Racket = not isWindows
platformSupport _ = True
export
findCG : IO (Maybe String)
findCG
= do Nothing <- getEnv "IDRIS2_TESTS_CG" | p => pure p
Nothing <- checkRequirement Chez | p => pure (Just "chez")
Nothing <- checkRequirement Node | p => pure (Just "node")
Nothing <- checkRequirement Racket | p => pure (Just "racket")
Nothing <- checkRequirement Gambit | p => pure (Just "gsc")
Nothing <- checkRequirement C | p => pure (Just "refc")
pure Nothing
||| A choice of a codegen
public export
data Codegen
= ||| Do NOT pass a cg argument to the executable being tested
Nothing
| ||| Use whatever the test runner was passed at the toplevel,
||| and if nothing was passed guess a sensible default using findCG
Default
| ||| Use exactly the given requirement
Just Requirement
export
toList : Codegen -> List Requirement
toList (Just r) = [r]
toList _ = []
||| A test pool is characterised by
||| + a name
||| + a list of requirement
||| + a choice of codegen (overriding the default)
||| + and a list of directory paths
public export
record TestPool where
constructor MkTestPool
poolName : String
constraints : List Requirement
codegen : Codegen
testCases : List String
||| Find all the test in the given directory.
export
testsInDir :
(dirName : String) ->
{default (const True) pred : String -> Bool} ->
(poolName : String) ->
{default [] requirements : List Requirement} ->
{default Nothing codegen : Codegen} ->
IO TestPool
testsInDir dirName poolName = do
Right names <- listDir dirName
| Left e => die $ "failed to list " ++ dirName ++ ": " ++ show e
let names = [n | n <- names, pred n]
let testNames = [dirName ++ "/" ++ n | n <- names]
testNames <- filter testNames
when (length testNames == 0) $ die $ "no tests found in " ++ dirName
pure $ MkTestPool poolName requirements codegen testNames
where
-- Directory without `run` file is not a test
isTest : (path : String) -> IO Bool
isTest path = exists (path ++ "/run")
filter : (testPaths : List String) -> IO (List String)
filter [] = pure []
filter (p :: ps) =
do rem <- filter ps
case !(isTest p) of
True => pure $ p :: rem
False => pure rem
||| Only keep the tests that have been asked for
export
filterTests : Options -> List String -> List String
filterTests opts = case onlyNames opts of
Nothing => id
Just f => filter f
||| The summary of a test pool run
public export
record Summary where
constructor MkSummary
success : List String
failure : List String
||| A new, blank summary
export
initSummary : Summary
initSummary = MkSummary [] []
||| Update the summary to contain the given result
export
updateSummary : (newRes : Result) -> Summary -> Summary
updateSummary newRes =
case newRes of
Left l => { failure $= (l ::) }
Right w => { success $= (w ::) }
||| Update the summary to contain the given results
export
bulkUpdateSummary : (newRess : List Result) -> Summary -> Summary
bulkUpdateSummary newRess =
let (ls, ws) = partitionEithers newRess in
{ success $= (ws ++)
, failure $= (ls ++)
}
export
Semigroup Summary where
MkSummary ws1 ls1 <+> MkSummary ws2 ls2
= MkSummary (ws1 ++ ws2) (ls1 ++ ls2)
export
Monoid Summary where
neutral = initSummary
||| An instruction to a thread which runs tests
public export
data ThreadInstruction : Type where
||| A test to run
Run : (test : String) -> ThreadInstruction
||| An indication for the thread to stop
Stop : ThreadInstruction
||| Sends the given tests on the given @Channel@, then sends `nThreads` many
||| 'Stop' @ThreadInstruction@s to stop the threads running the tests.
|||
||| @testChan The channel to send the tests over.
||| @nThreads The number of threads being used to run the tests.
||| @tests The list of tests to send to the runners/threads.
export
testSender : (testChan : Channel ThreadInstruction) -> (nThreads : Nat)
-> (tests : List String) -> IO ()
testSender testChan 0 [] = pure ()
testSender testChan (S k) [] =
-- out of tests, so the next thing for all the threads is to stop
do channelPut testChan Stop
testSender testChan k []
testSender testChan nThreads (test :: tests) =
do channelPut testChan (Run test)
testSender testChan nThreads tests
||| A result from a test-runner/thread
public export
data ThreadResult : Type where
||| The result of running a test
Res : (res : Result) -> ThreadResult
||| An indication that the thread was told to stop
Done : ThreadResult
||| Receives results on the given @Channel@, accumulating them as a @Summary@.
||| When all results have been received (i.e. @nThreads@ many 'Done'
||| @ThreadInstruction@s have been encountered), send the resulting Summary over
||| the @accChan@ Channel (necessary to be able to @fork@ this function and
||| still obtain the Summary at the end).
|||
||| @resChan The channel to receives the results on.
||| @acc The Summary acting as an accumulator.
||| @accChan The Channel to send the final Summary over.
||| @nThreads The number of threads being used to run the tests.
export
testReceiver : (resChan : Channel ThreadResult) -> (acc : Summary)
-> (accChan : Channel Summary) -> (nThreads : Nat) -> IO ()
testReceiver resChan acc accChan 0 = channelPut accChan acc
testReceiver resChan acc accChan nThreads@(S k) =
do (Res res) <- channelGet resChan
| Done => testReceiver resChan acc accChan k
testReceiver resChan (updateSummary res acc) accChan nThreads
||| Function responsible for receiving and running tests.
|||
||| @opts The options to run the threads under.
||| @testChan The Channel to receive tests on.
||| @resChan The Channel to send results over.
testThread : (opts : Options) -> (testChan : Channel ThreadInstruction)
-> (resChan : Channel ThreadResult) -> IO ()
testThread opts testChan resChan =
do (Run test) <- channelGet testChan
| Stop => channelPut resChan Done
res <- runTest opts test
channelPut resChan (Res res)
testThread opts testChan resChan
||| A runner for a test pool. If there are tests in the @TestPool@ that we want
||| to run, spawns `opts.threads` many runners and sends them the tests,
||| collecting all the results in the @Summary@ returned at the end.
|||
||| @opts The options for the TestPool.
||| @pool The TestPool to run.
export
poolRunner : Options -> TestPool -> IO Summary
poolRunner opts pool
= do -- check that we indeed want to run some of these tests
let tests = filterTests opts (testCases pool)
let (_ :: _) = tests
| [] => pure initSummary
-- if so make sure the constraints are satisfied
cs <- for (toList (codegen pool) ++ constraints pool) $ \ req => do
mfp <- checkRequirement req
let msg = case mfp of
Nothing => "" ++ show req ++ " not found"
Just fp => "✓ Found " ++ show req ++ " at " ++ fp
pure (mfp, msg)
let (cs, msgs) = unzip cs
putStrLn (banner msgs)
let Just _ = the (Maybe (List String)) (sequence cs)
| Nothing => pure initSummary
-- if the test pool requires a specific codegen then use that
let opts = case codegen pool of
Nothing => { codegen := Nothing } opts
Just cg => { codegen := Just (show @{CG} cg) } opts
Default => opts
-- set up the channels
accChan <- makeChannel
resChan <- makeChannel
testChan <- makeChannel
-- and then run all the tests
for_ (replicate opts.threads 0) $ \_ =>
fork (testThread opts testChan resChan)
-- start sending tests
senderTID <- fork $ testSender testChan opts.threads tests
-- start receiving results
receiverTID <- fork $ testReceiver resChan initSummary accChan opts.threads
-- wait until things are done, i.e. until we receive the final acc
acc <- channelGet accChan
pure acc
where
separator : String
separator = fastPack $ replicate 72 '-'
banner : List String -> String
banner msgs = fastUnlines
$ [ "", separator, pool.poolName ]
++ msgs
++ [ separator ]
export
runnerWith : Options -> List TestPool -> IO ()
runnerWith opts tests = do
-- if no CG has been set, find a sensible default based on what is available
opts <- case codegen opts of
Nothing => pure $ { codegen := !findCG } opts
Just _ => pure opts
-- run the tests
res <- concat <$> traverse (poolRunner opts) tests
-- report the result
let nsucc = length res.success
let nfail = length res.failure
let ntotal = nsucc + nfail
putStrLn (show nsucc ++ "/" ++ show ntotal ++ " tests successful")
-- deal with failures
let list = fastUnlines res.failure
when (nfail > 0) $
do putStrLn "Failing tests:"
putStr list
-- always overwrite the failure file, if it is given
whenJust opts.failureFile $ \ path =>
do Right _ <- writeFile path list
| Left err => die (show err)
pure ()
-- exit
if nfail == 0
then exitSuccess
else exitFailure
||| A runner for a whole test suite
export
runner : List TestPool -> IO ()
runner tests
= do args <- getArgs
Just opts <- options args
| _ => do printLn args
putStrLn usage
runnerWith opts tests