[ test ] cosmetic changes & retest capability (#1394)

* Banners for test pools
* Summary with the list of failing tests at the end
* Option to write the list of failing tests to a file
* Option to read the list of tests to run from a file
* Using these two latest features to add a new make target to rerun precisely the tests that failed last time
This commit is contained in:
G. Allais 2021-05-11 09:46:21 +01:00 committed by GitHub
parent 1d70a83fcd
commit 004cc45e9d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 169 additions and 71 deletions

1
.gitignore vendored
View File

@ -21,6 +21,7 @@ idris2docs_venv
/tests/**/*.so
/tests/**/*.dylib
/tests/**/*.dll
/tests/failures
/benchmark/**/build
/benchmark/*.csv

View File

@ -112,6 +112,13 @@ test: testenv
@echo
@${MAKE} -C tests only=$(only) IDRIS2=${TARGET} IDRIS2_PREFIX=${TEST_PREFIX}
retest: testenv
@echo
@echo "NOTE: \`${MAKE} retest\` does not rebuild Idris or the libraries packaged with it; to do that run \`${MAKE}\`"
@if [ ! -x "${TARGET}" ]; then echo "ERROR: Missing IDRIS2 executable. Cannot run tests!\n"; exit 1; fi
@echo
@${MAKE} -C tests retest only=$(only) IDRIS2=${TARGET} IDRIS2_PREFIX=${TEST_PREFIX}
support:
@${MAKE} -C support/c

View File

@ -221,12 +221,17 @@ public export
guard : Alternative f => Bool -> f ()
guard x = if x then pure () else empty
||| Conditionally execute an applicative expression.
||| Conditionally execute an applicative expression when the boolean is true.
public export
when : Applicative f => Bool -> Lazy (f ()) -> f ()
when True f = f
when False f = pure ()
||| Execute an applicative expression unless the boolean is true.
%inline public export
unless : Applicative f => Bool -> Lazy (f ()) -> f ()
unless = when . not
---------------------------
-- FOLDABLE, TRAVERSABLE --
---------------------------

View File

@ -157,6 +157,12 @@ maybe : Lazy b -> Lazy (a -> b) -> Maybe a -> b
maybe n j Nothing = n
maybe n j (Just x) = j x
||| Execute an applicative expression when the Maybe is Just
%inline public export
whenJust : Applicative f => Maybe a -> (a -> f ()) -> f ()
whenJust (Just a) k = k a
whenJust Nothing k = pure ()
public export
Eq a => Eq (Maybe a) where
Nothing == Nothing = True

View File

@ -43,9 +43,11 @@
||| + `idris2` The path of the executable we are testing.
||| + `codegen` The backend to use for code generation.
||| + `onlyNames` The tests to run relative to the generated executable.
||| + `onlyFile` The file listing the tests to run relative to the generated executable.
||| + `interactive` Whether to offer to update the expected file or not.
||| + `timing` Whether to display time taken for each test.
||| + `threads` The maximum numbers to use (default: number of cores).
||| + `failureFile` The file in which to write the list of failing tests.
|||
||| We provide an options parser (`options`) that takes the list of command line
||| arguments and constructs this for you.
@ -62,6 +64,7 @@
module Test.Golden
import Data.Either
import Data.Maybe
import Data.List
import Data.List1
@ -93,6 +96,8 @@ record Options where
timing : Bool
||| How many threads should we use?
threads : Nat
||| Should we write the list of failing cases from a file?
failureFile : Maybe String
export
initOptions : String -> Options
@ -103,6 +108,7 @@ initOptions exe
False
False
1
Nothing
export
usage : String -> String
@ -113,37 +119,48 @@ usage exe = unwords
, "[--interactive]"
, "[--cg CODEGEN]"
, "[--threads N]"
, "[--failure-file PATH]"
, "[--only-file PATH]"
, "[--only [NAMES]]"
]
||| Process the command line options.
export
options : List String -> Maybe Options
options args = case args of
(_ :: exe :: rest) => go rest (initOptions exe)
_ => Nothing
where
go : List String -> Options -> Maybe Options
go rest opts = case rest of
[] => pure opts
("--timing" :: xs) => go xs (record { timing = True} opts)
("--interactive" :: xs) => go xs (record { interactive = True } opts)
("--cg" :: cg :: xs) => go xs (record { codegen = Just cg } opts)
("--threads" :: n :: xs) => do let pos : Nat = !(parsePositive n)
go xs (record { threads = pos } opts)
("--only" :: xs) => pure $ record { onlyNames = xs } opts
_ => Nothing
-- [ Core ]
export
fail : String -> IO ()
fail : String -> IO a
fail err
= do putStrLn err
exitWith (ExitFailure 1)
||| Process the command line options.
export
options : List String -> IO (Maybe Options)
options args = case args of
(_ :: exe :: rest) => mkOptions exe rest
_ => pure Nothing
where
go : List String -> Maybe String -> Options -> Maybe (Maybe String, Options)
go rest only opts = case rest of
[] => pure (only, opts)
("--timing" :: xs) => go xs only (record { timing = True} opts)
("--interactive" :: xs) => go xs only (record { interactive = True } opts)
("--cg" :: cg :: xs) => go xs only (record { codegen = Just cg } opts)
("--threads" :: n :: xs) => do let pos : Nat = !(parsePositive n)
go xs only (record { threads = pos } opts)
("--failure-file" :: p :: xs) => go xs only (record { failureFile = Just p } opts)
("--only" :: xs) => pure (only, record { onlyNames = xs } opts)
("--only-file" :: p :: xs) => go xs (Just p) opts
_ => Nothing
mkOptions : String -> List String -> IO (Maybe Options)
mkOptions exe rest
= do let Just (mfp, opts) = go rest Nothing (initOptions exe)
| Nothing => pure Nothing
let Just fp = mfp
| Nothing => pure (Just opts)
Right only <- readFile fp
| Left err => fail (show err)
pure $ Just $ record { onlyNames $= (forget (lines only) ++) } opts
||| Normalise strings between different OS.
|||
@ -156,13 +173,18 @@ normalize str =
then pack $ filter (\ch => ch /= '/' && ch /= '\\') (unpack str)
else str
||| The result of a test run
||| `Left` corresponds to a failure, and `Right` to a success
Result : Type
Result = Either String String
||| Run the specified Golden test with the supplied options.
|||
||| See the module documentation for more information.
|||
||| @testPath the directory that contains the test.
export
runTest : Options -> String -> IO (Future Bool)
runTest : Options -> String -> IO (Future Result)
runTest opts testPath = forkIO $ do
start <- clockTime Thread
let cg = case codegen opts of
@ -174,16 +196,16 @@ runTest opts testPath = forkIO $ do
Right out <- readFile $ testPath ++ "/output"
| Left err => do print err
pure False
pure (Left testPath)
Right exp <- readFile $ testPath ++ "/expected"
| Left FileNotFound => do
if interactive opts
then mayOverwrite Nothing out
else print FileNotFound
pure False
pure (Left testPath)
| Left err => do print err
pure False
pure (Left testPath)
let result = normalize out == normalize exp
let time = timeDifference end start
@ -196,7 +218,7 @@ runTest opts testPath = forkIO $ do
then mayOverwrite (Just exp) out
else putStrLn . unlines $ expVsOut exp out
pure result
pure $ if result then Right testPath else Left testPath
where
getAnswer : IO Bool
@ -298,6 +320,7 @@ findCG
public export
record TestPool where
constructor MkTestPool
poolName : String
constraints : List Requirement
testCases : List String
@ -308,14 +331,43 @@ filterTests opts = case onlyNames opts of
[] => id
xs => filter (\ name => any (`isInfixOf` name) xs)
||| The summary of a test pool run
public export
record Summary where
constructor MkSummary
success : List String
failure : List String
export
initSummary : Summary
initSummary = MkSummary [] []
export
updateSummary : List Result -> Summary -> Summary
updateSummary res =
let (ls, ws) = partitionEithers res in
{ success $= (ws ++)
, failure $= (ls ++)
}
export
Semigroup Summary where
MkSummary ws1 ls1 <+> MkSummary ws2 ls2
= MkSummary (ws1 ++ ws2) (ls1 ++ ls2)
export
Monoid Summary where
neutral = initSummary
||| A runner for a test pool
export
poolRunner : Options -> TestPool -> IO (List Bool)
poolRunner : Options -> TestPool -> IO Summary
poolRunner opts pool
= do -- check that we indeed want to run some of these tests
let tests = filterTests opts (testCases pool)
let (_ :: _) = tests
| [] => pure []
| [] => pure initSummary
putStrLn banner
-- if so make sure the constraints are satisfied
cs <- for (constraints pool) $ \ req => do
mfp <- checkRequirement req
@ -324,17 +376,23 @@ poolRunner opts pool
Just fp => "Found " ++ show req ++ " at " ++ fp
pure mfp
let Just _ = the (Maybe (List String)) (sequence cs)
| Nothing => pure []
| Nothing => pure initSummary
-- if so run them all!
loop [] tests
loop initSummary tests
where
loop : List (List Bool) -> List String -> IO (List Bool)
loop acc [] = pure (concat $ reverse acc)
banner : String
banner =
let separator = fastPack $ replicate 72 '-' in
fastUnlines [ "", separator, pool.poolName, separator, "" ]
loop : Summary -> List String -> IO Summary
loop acc [] = pure acc
loop acc tests
= do let (now, later) = splitAt opts.threads tests
bs <- map await <$> traverse (runTest opts) now
loop (bs :: acc) later
loop (updateSummary bs acc) later
||| A runner for a whole test suite
@ -342,19 +400,36 @@ export
runner : List TestPool -> IO ()
runner tests
= do args <- getArgs
let (Just opts) = options args
| _ => do print args
putStrLn (usage "runtests")
Just opts <- options args
| _ => do print args
putStrLn (usage "runtests")
-- if no CG has been set, find a sensible default based on what is available
opts <- case codegen opts of
Nothing => pure $ record { codegen = !findCG } opts
Just _ => pure opts
-- run the tests
res <- concat <$> traverse (poolRunner opts) tests
putStrLn (show (length (filter id res)) ++ "/" ++ show (length res)
++ " tests successful")
if (any not res)
then exitWith (ExitFailure 1)
else exitWith ExitSuccess
-- report the result
let nsucc = length res.success
let nfail = length res.failure
let ntotal = nsucc + nfail
putStrLn (show nsucc ++ "/" ++ show ntotal ++ " tests successful")
-- deal with failures
let list = fastUnlines res.failure
when (nfail > 0) $
do putStrLn "Failing tests:"
putStrLn list
-- always overwrite the failure file, if it is given
whenJust opts.failureFile $ \ path =>
do Right _ <- writeFile path list
| Left err => fail (show err)
pure ()
-- exit
if nfail == 0
then exitWith ExitSuccess
else exitWith (ExitFailure 1)
-- [ EOF ]

View File

@ -19,7 +19,7 @@ import Test.Golden
-- Test cases
ttimpTests : TestPool
ttimpTests = MkTestPool []
ttimpTests = MkTestPool "TTImp" []
[ "basic001", "basic002", "basic003", "basic004", "basic005"
, "basic006"
, "coverage001", "coverage002"
@ -34,7 +34,7 @@ ttimpTests = MkTestPool []
]
idrisTestsBasic : TestPool
idrisTestsBasic = MkTestPool []
idrisTestsBasic = MkTestPool "Fundamental language features" []
-- Fundamental language features
["basic001", "basic002", "basic003", "basic004", "basic005",
"basic006", "basic007", "basic008", "basic009", "basic010",
@ -50,7 +50,7 @@ idrisTestsBasic = MkTestPool []
"basic056", "basic057", "basic058", "basic059"]
idrisTestsCoverage : TestPool
idrisTestsCoverage = MkTestPool []
idrisTestsCoverage = MkTestPool "Coverage checking" []
-- Coverage checking
["coverage001", "coverage002", "coverage003", "coverage004",
"coverage005", "coverage006", "coverage007", "coverage008",
@ -58,12 +58,12 @@ idrisTestsCoverage = MkTestPool []
"coverage013", "coverage014", "coverage015", "coverage016"]
idrisTestsCasetree : TestPool
idrisTestsCasetree = MkTestPool []
idrisTestsCasetree = MkTestPool "Case tree building" []
-- Case tree building
["casetree001"]
idrisTestsError : TestPool
idrisTestsError = MkTestPool []
idrisTestsError = MkTestPool "Error messages" []
-- Error messages
["error001", "error002", "error003", "error004", "error005",
"error006", "error007", "error008", "error009", "error010",
@ -74,7 +74,7 @@ idrisTestsError = MkTestPool []
"perror006", "perror007", "perror008"]
idrisTestsInteractive : TestPool
idrisTestsInteractive = MkTestPool []
idrisTestsInteractive = MkTestPool "Interactive editing" []
-- Interactive editing support
["interactive001", "interactive002", "interactive003", "interactive004",
"interactive005", "interactive006", "interactive007", "interactive008",
@ -86,7 +86,7 @@ idrisTestsInteractive = MkTestPool []
"interactive029", "interactive030"]
idrisTestsInterface : TestPool
idrisTestsInterface = MkTestPool []
idrisTestsInterface = MkTestPool "Interface" []
-- Interfaces
["interface001", "interface002", "interface003", "interface004",
"interface005", "interface006", "interface007", "interface008",
@ -97,7 +97,7 @@ idrisTestsInterface = MkTestPool []
"interface025"]
idrisTestsLinear : TestPool
idrisTestsLinear = MkTestPool []
idrisTestsLinear = MkTestPool "Quantities" []
-- QTT and linearity related
["linear001", "linear002", "linear003", -- "linear004" -- disabled due to requiring linearity subtyping
"linear005", "linear006", "linear007", "linear008",
@ -105,7 +105,7 @@ idrisTestsLinear = MkTestPool []
"linear013"]
idrisTestsLiterate : TestPool
idrisTestsLiterate = MkTestPool []
idrisTestsLiterate = MkTestPool "Literate programming" []
-- Literate
["literate001", "literate002", "literate003", "literate004",
"literate005", "literate006", "literate007", "literate008",
@ -113,13 +113,13 @@ idrisTestsLiterate = MkTestPool []
"literate013", "literate014", "literate015", "literate016"]
idrisTestsPerformance : TestPool
idrisTestsPerformance = MkTestPool []
idrisTestsPerformance = MkTestPool "Performance" []
-- Performance: things which have been slow in the past, or which
-- pose interesting challenges for the elaborator
["perf001", "perf002", "perf003", "perf004", "perf005", "perf006"]
idrisTestsRegression : TestPool
idrisTestsRegression = MkTestPool []
idrisTestsRegression = MkTestPool "Various regressions" []
-- Miscellaneous regressions
["reg001", "reg002", "reg003", "reg004", "reg005", "reg006", "reg007",
"reg008", "reg009", "reg010", "reg011", "reg012", "reg013", "reg014",
@ -129,7 +129,7 @@ idrisTestsRegression = MkTestPool []
"reg036", "reg037", "reg038", "reg039"]
idrisTestsData : TestPool
idrisTestsData = MkTestPool []
idrisTestsData = MkTestPool "Data and record types" []
[-- Data types
"data001",
-- Records, access and dependent update
@ -137,13 +137,13 @@ idrisTestsData = MkTestPool []
"record006", "record007"]
idrisTestsBuiltin : TestPool
idrisTestsBuiltin = MkTestPool []
idrisTestsBuiltin = MkTestPool "Builtin types and functions" []
-- %builtin related tests for the frontend (type-checking)
["builtin001", "builtin002", "builtin003", "builtin004", "builtin005",
"builtin006", "builtin007", "builtin008", "builtin009"]
idrisTestsEvaluator : TestPool
idrisTestsEvaluator = MkTestPool []
idrisTestsEvaluator = MkTestPool "Evaluation" []
[ -- Evaluator
"evaluator001", "evaluator002", "evaluator003", "evaluator004",
-- Unfortunately the behaviour of Double is platform dependent so the
@ -154,7 +154,7 @@ idrisTestsEvaluator = MkTestPool []
"interpreter005", "interpreter006", "interpreter007"]
idrisTests : TestPool
idrisTests = MkTestPool []
idrisTests = MkTestPool "Misc" []
-- Documentation strings
["docs001", "docs002",
-- Eta equality
@ -190,14 +190,14 @@ idrisTests = MkTestPool []
"pretty001"]
typeddTests : TestPool
typeddTests = MkTestPool []
typeddTests = MkTestPool "Type Driven Development" []
[ "chapter01", "chapter02", "chapter03", "chapter04", "chapter05"
, "chapter06", "chapter07", "chapter08", "chapter09", "chapter10"
, "chapter11", "chapter12", "chapter13", "chapter14"
]
chezTests : TestPool
chezTests = MkTestPool [Chez]
chezTests = MkTestPool "Chez backend" [Chez]
[ "chez001", "chez002", "chez003", "chez004", "chez005", "chez006"
, "chez007", "chez008", "chez009", "chez010", "chez011", "chez012"
, "chez013", "chez014", "chez015", "chez016", "chez017", "chez018"
@ -215,11 +215,11 @@ chezTests = MkTestPool [Chez]
]
refcTests : TestPool
refcTests = MkTestPool [C]
refcTests = MkTestPool "Reference counting C backend" [C]
[ "refc001" , "refc002" ]
racketTests : TestPool
racketTests = MkTestPool [Racket]
racketTests = MkTestPool "Racket backend" [Racket]
[ "forkjoin001"
, "semaphores001", "semaphores002"
, "futures001"
@ -234,7 +234,7 @@ racketTests = MkTestPool [Racket]
]
nodeTests : TestPool
nodeTests = MkTestPool [Node]
nodeTests = MkTestPool "Node backend" [Node]
[ "node001", "node002", "node003", "node004", "node005", "node006"
, "node007", "node008", "node009", "node011", "node012", "node015"
, "node017", "node018", "node019", "node021", "node022", "node023"
@ -250,17 +250,17 @@ nodeTests = MkTestPool [Node]
]
ideModeTests : TestPool
ideModeTests = MkTestPool []
ideModeTests = MkTestPool "IDE mode" []
[ "ideMode001", "ideMode002", "ideMode003", "ideMode004", "ideMode005"
]
preludeTests : TestPool
preludeTests = MkTestPool []
preludeTests = MkTestPool "Prelude library" []
[ "reg001"
]
templateTests : TestPool
templateTests = MkTestPool []
templateTests = MkTestPool "Test templates" []
[ "simple-test", "ttimp", "with-ipkg"
]
@ -270,7 +270,7 @@ templateTests = MkTestPool []
-- that only runs if all backends are
-- available.
baseLibraryTests : TestPool
baseLibraryTests = MkTestPool [Chez, Node]
baseLibraryTests = MkTestPool "Base library" [Chez, Node]
[ "system_file001"
, "data_bits001"
, "system_info001"
@ -278,12 +278,12 @@ baseLibraryTests = MkTestPool [Chez, Node]
-- same behavior as `baseLibraryTests`
contribLibraryTests : TestPool
contribLibraryTests = MkTestPool [Chez, Node]
contribLibraryTests = MkTestPool "Contrib library" [Chez, Node]
[ "json_001"
]
codegenTests : TestPool
codegenTests = MkTestPool []
codegenTests = MkTestPool "Code generation" []
[ "con001"
, "builtin001"
]

View File

@ -4,12 +4,16 @@ threads ?= `nproc`
.PHONY: testbin test
test:
./build/exec/runtests $(IDRIS2) $(INTERACTIVE) --threads $(threads) --only $(only)
./build/exec/runtests $(IDRIS2) $(INTERACTIVE) --failure-file failures --threads $(threads) --only $(only)
retest:
./build/exec/runtests $(IDRIS2) $(INTERACTIVE) --failure-file failures --threads $(threads) --only-file failures --only $(only)
testbin:
${IDRIS2} --build tests.ipkg
clean:
$(RM) failures
$(RM) -r build
$(RM) -r **/**/build
@find . -type f -name 'output' -exec rm -rf {} \;