[ test ] cosmetic changes & retest capability (#1394)

* Banners for test pools * Summary with the list of failing tests at the end * Option to write the list of failing tests to a file * Option to read the list of tests to run from a file * Using these two latest features to add a new make target to rerun precisely the tests that failed last time
2024-12-24 20:23:11 +03:00 · 2021-05-11 09:46:21 +01:00 · 2021-05-11 09:46:21 +01:00 · 004cc45e9d
commit 004cc45e9d
parent 1d70a83fcd
7 changed files with 169 additions and 71 deletions
--- a/.gitignore
+++ b/.gitignore
@ -21,6 +21,7 @@ idris2docs_venv
 /tests/**/*.so
 /tests/**/*.dylib
 /tests/**/*.dll
+/tests/failures

 /benchmark/**/build
 /benchmark/*.csv
--- a/7
+++ b/7
@ -112,6 +112,13 @@ test: testenv
 	@echo
 	@${MAKE} -C tests only=$(only) IDRIS2=${TARGET} IDRIS2_PREFIX=${TEST_PREFIX}

+retest: testenv
+	@echo
+	@echo "NOTE: \`${MAKE} retest\` does not rebuild Idris or the libraries packaged with it; to do that run \`${MAKE}\`"
+	@if [ ! -x "${TARGET}" ]; then echo "ERROR: Missing IDRIS2 executable. Cannot run tests!\n"; exit 1; fi
+	@echo
+	@${MAKE} -C tests retest only=$(only) IDRIS2=${TARGET} IDRIS2_PREFIX=${TEST_PREFIX}
+

 support:
 	@${MAKE} -C support/c
--- a/libs/prelude/Prelude/Interfaces.idr
+++ b/libs/prelude/Prelude/Interfaces.idr
@ -221,12 +221,17 @@ public export
 guard : Alternative f => Bool -> f ()
 guard x = if x then pure () else empty

-||| Conditionally execute an applicative expression.
+||| Conditionally execute an applicative expression when the boolean is true.
 public export
 when : Applicative f => Bool -> Lazy (f ()) -> f ()
 when True f = f
 when False f = pure ()

+||| Execute an applicative expression unless the boolean is true.
+%inline public export
+unless : Applicative f => Bool -> Lazy (f ()) -> f ()
+unless = when . not
+
 ---------------------------
 -- FOLDABLE, TRAVERSABLE --
 ---------------------------
--- a/libs/prelude/Prelude/Types.idr
+++ b/libs/prelude/Prelude/Types.idr
@ -157,6 +157,12 @@ maybe : Lazy b -> Lazy (a -> b) -> Maybe a -> b
 maybe n j Nothing  = n
 maybe n j (Just x) = j x

+||| Execute an applicative expression when the Maybe is Just
+%inline public export
+whenJust : Applicative f => Maybe a -> (a -> f ()) -> f ()
+whenJust (Just a) k = k a
+whenJust Nothing k = pure ()
+
 public export
 Eq a => Eq (Maybe a) where
  Nothing  == Nothing  = True
--- a/libs/test/Test/Golden.idr
+++ b/libs/test/Test/Golden.idr
@ -43,9 +43,11 @@
 ||| + `idris2`       The path of the executable we are testing.
 ||| + `codegen`      The backend to use for code generation.
 ||| + `onlyNames`    The tests to run relative to the generated executable.
+||| + `onlyFile`     The file listing the tests to run relative to the generated executable.
 ||| + `interactive`  Whether to offer to update the expected file or not.
 ||| + `timing`       Whether to display time taken for each test.
 ||| + `threads`      The maximum numbers to use (default: number of cores).
+||| + `failureFile`  The file in which to write the list of failing tests.
 |||
 ||| We provide an options parser (`options`) that takes the list of command line
 ||| arguments and constructs this for you.
@ -62,6 +64,7 @@

 module Test.Golden

+import Data.Either
 import Data.Maybe
 import Data.List
 import Data.List1
@ -93,6 +96,8 @@ record Options where
  timing       : Bool
  ||| How many threads should we use?
  threads      : Nat
+  ||| Should we write the list of failing cases from a file?
+  failureFile     : Maybe String

 export
 initOptions : String -> Options
@ -103,6 +108,7 @@ initOptions exe
              False
              False
              1
+              Nothing

 export
 usage : String -> String
@ -113,37 +119,48 @@ usage exe = unwords
  , "[--interactive]"
  , "[--cg CODEGEN]"
  , "[--threads N]"
+  , "[--failure-file PATH]"
+  , "[--only-file PATH]"
  , "[--only [NAMES]]"
  ]

-||| Process the command line options.
 export
-options : List String -> Maybe Options
-options args = case args of
-    (_ :: exe :: rest) => go rest (initOptions exe)
-    _ => Nothing
-
-  where
-
-    go : List String -> Options -> Maybe Options
-    go rest opts = case rest of
-      []                       => pure opts
-      ("--timing" :: xs)       => go xs (record { timing = True} opts)
-      ("--interactive" :: xs)  => go xs (record { interactive = True } opts)
-      ("--cg" :: cg :: xs)     => go xs (record { codegen = Just cg } opts)
-      ("--threads" :: n :: xs) => do let pos : Nat = !(parsePositive n)
-                                     go xs (record { threads = pos } opts)
-      ("--only" :: xs)         => pure $ record { onlyNames = xs } opts
-      _ => Nothing
-
-- [ Core ]
-
-export
-fail : String -> IO ()
+fail : String -> IO a
 fail err
    = do putStrLn err
         exitWith (ExitFailure 1)

+||| Process the command line options.
+export
+options : List String -> IO (Maybe Options)
+options args = case args of
+    (_ :: exe :: rest) => mkOptions exe rest
+    _ => pure Nothing
+
+  where
+
+    go : List String -> Maybe String -> Options -> Maybe (Maybe String, Options)
+    go rest only opts = case rest of
+      []                            => pure (only, opts)
+      ("--timing" :: xs)            => go xs only (record { timing = True} opts)
+      ("--interactive" :: xs)       => go xs only (record { interactive = True } opts)
+      ("--cg" :: cg :: xs)          => go xs only (record { codegen = Just cg } opts)
+      ("--threads" :: n :: xs)      => do let pos : Nat = !(parsePositive n)
+                                          go xs only (record { threads = pos } opts)
+      ("--failure-file" :: p :: xs) => go  xs only (record { failureFile = Just p } opts)
+      ("--only" :: xs)              => pure (only, record { onlyNames = xs } opts)
+      ("--only-file" :: p :: xs)    => go xs (Just p) opts
+      _ => Nothing
+
+    mkOptions : String -> List String -> IO (Maybe Options)
+    mkOptions exe rest
+      = do let Just (mfp, opts) = go rest Nothing (initOptions exe)
+                 | Nothing => pure Nothing
+           let Just fp = mfp
+                 | Nothing => pure (Just opts)
+           Right only <- readFile fp
+             | Left err => fail (show err)
+           pure $ Just $ record { onlyNames $= (forget (lines only) ++) } opts

 ||| Normalise strings between different OS.
 |||
@ -156,13 +173,18 @@ normalize str =
      then pack $ filter (\ch => ch /= '/' && ch /= '\\') (unpack str)
      else str

+||| The result of a test run
+||| `Left` corresponds to a failure, and `Right` to a success
+Result : Type
+Result = Either String String
+
 ||| Run the specified Golden test with the supplied options.
 |||
 ||| See the module documentation for more information.
 |||
 ||| @testPath the directory that contains the test.
 export
-runTest : Options -> String -> IO (Future Bool)
+runTest : Options -> String -> IO (Future Result)
 runTest opts testPath = forkIO $ do
  start <- clockTime Thread
  let cg = case codegen opts of
@ -174,16 +196,16 @@ runTest opts testPath = forkIO $ do

  Right out <- readFile $ testPath ++ "/output"
    | Left err => do print err
-                     pure False
+                     pure (Left testPath)

  Right exp <- readFile $ testPath ++ "/expected"
    | Left FileNotFound => do
        if interactive opts
          then mayOverwrite Nothing out
          else print FileNotFound
-        pure False
+        pure (Left testPath)
    | Left err => do print err
-                     pure False
+                     pure (Left testPath)

  let result = normalize out == normalize exp
  let time = timeDifference end start
@ -196,7 +218,7 @@ runTest opts testPath = forkIO $ do
        then mayOverwrite (Just exp) out
        else putStrLn . unlines $ expVsOut exp out

-  pure result
+  pure $ if result then Right testPath else Left testPath

  where
    getAnswer : IO Bool
@ -298,6 +320,7 @@ findCG
 public export
 record TestPool where
  constructor MkTestPool
+  poolName : String
  constraints : List Requirement
  testCases : List String

@ -308,14 +331,43 @@ filterTests opts = case onlyNames opts of
  [] => id
  xs => filter (\ name => any (`isInfixOf` name) xs)

+||| The summary of a test pool run
+public export
+record Summary where
+  constructor MkSummary
+  success : List String
+  failure : List String
+
+export
+initSummary : Summary
+initSummary = MkSummary [] []
+
+export
+updateSummary : List Result -> Summary -> Summary
+updateSummary res =
+  let (ls, ws) = partitionEithers res in
+  { success $= (ws ++)
+  , failure $= (ls ++)
+  }
+
+export
+Semigroup Summary where
+  MkSummary ws1 ls1 <+> MkSummary ws2 ls2
+    = MkSummary (ws1 ++ ws2) (ls1 ++ ls2)
+
+export
+Monoid Summary where
+  neutral = initSummary
+
 ||| A runner for a test pool
 export
-poolRunner : Options -> TestPool -> IO (List Bool)
+poolRunner : Options -> TestPool -> IO Summary
 poolRunner opts pool
  = do -- check that we indeed want to run some of these tests
       let tests = filterTests opts (testCases pool)
       let (_ :: _) = tests
-             | [] => pure []
+             | [] => pure initSummary
+       putStrLn banner
       -- if so make sure the constraints are satisfied
       cs <- for (constraints pool) $ \ req => do
          mfp <- checkRequirement req
@ -324,17 +376,23 @@ poolRunner opts pool
            Just fp => "Found " ++ show req ++ " at " ++ fp
          pure mfp
       let Just _ = the (Maybe (List String)) (sequence cs)
-             | Nothing => pure []
+             | Nothing => pure initSummary
       -- if so run them all!
-       loop [] tests
+       loop initSummary tests

  where
-    loop : List (List Bool) -> List String -> IO (List Bool)
-    loop acc [] = pure (concat $ reverse acc)
+
+    banner : String
+    banner =
+      let separator = fastPack $ replicate 72 '-' in
+      fastUnlines [ "", separator, pool.poolName, separator, "" ]
+
+    loop : Summary -> List String -> IO Summary
+    loop acc [] = pure acc
    loop acc tests
      = do let (now, later) = splitAt opts.threads tests
           bs <- map await <$> traverse (runTest opts) now
-           loop (bs :: acc) later
+           loop (updateSummary bs acc) later


 ||| A runner for a whole test suite
@ -342,19 +400,36 @@ export
 runner : List TestPool -> IO ()
 runner tests
    = do args <- getArgs
-         let (Just opts) = options args
-              | _ => do print args
-                        putStrLn (usage "runtests")
+         Just opts <- options args
+            | _ => do print args
+                      putStrLn (usage "runtests")
         -- if no CG has been set, find a sensible default based on what is available
         opts <- case codegen opts of
                   Nothing => pure $ record { codegen = !findCG } opts
                   Just _ => pure opts
         -- run the tests
         res <- concat <$> traverse (poolRunner opts) tests
-         putStrLn (show (length (filter id res)) ++ "/" ++ show (length res)
-                       ++ " tests successful")
-         if (any not res)
-            then exitWith (ExitFailure 1)
-            else exitWith ExitSuccess
+
+         -- report the result
+         let nsucc  = length res.success
+         let nfail  = length res.failure
+         let ntotal = nsucc + nfail
+         putStrLn (show nsucc ++ "/" ++ show ntotal ++ " tests successful")
+
+         -- deal with failures
+         let list = fastUnlines res.failure
+         when (nfail > 0) $
+           do putStrLn "Failing tests:"
+              putStrLn list
+         -- always overwrite the failure file, if it is given
+         whenJust opts.failureFile $ \ path =>
+           do Right _ <- writeFile path list
+                | Left err => fail (show err)
+              pure ()
+
+         -- exit
+         if nfail == 0
+           then exitWith ExitSuccess
+           else exitWith (ExitFailure 1)

 -- [ EOF ]
--- a/tests/Main.idr
+++ b/tests/Main.idr
@ -19,7 +19,7 @@ import Test.Golden
 -- Test cases

 ttimpTests : TestPool
-ttimpTests = MkTestPool []
+ttimpTests = MkTestPool "TTImp" []
     [ "basic001", "basic002", "basic003", "basic004", "basic005"
     , "basic006"
     , "coverage001", "coverage002"
@ -34,7 +34,7 @@ ttimpTests = MkTestPool []
     ]

 idrisTestsBasic : TestPool
-idrisTestsBasic = MkTestPool []
+idrisTestsBasic = MkTestPool "Fundamental language features" []
      -- Fundamental language features
      ["basic001", "basic002", "basic003", "basic004", "basic005",
       "basic006", "basic007", "basic008", "basic009", "basic010",
@ -50,7 +50,7 @@ idrisTestsBasic = MkTestPool []
       "basic056", "basic057", "basic058", "basic059"]

 idrisTestsCoverage : TestPool
-idrisTestsCoverage = MkTestPool []
+idrisTestsCoverage = MkTestPool "Coverage checking" []
       -- Coverage checking
      ["coverage001", "coverage002", "coverage003", "coverage004",
       "coverage005", "coverage006", "coverage007", "coverage008",
@ -58,12 +58,12 @@ idrisTestsCoverage = MkTestPool []
       "coverage013", "coverage014", "coverage015", "coverage016"]

 idrisTestsCasetree : TestPool
-idrisTestsCasetree = MkTestPool []
+idrisTestsCasetree = MkTestPool "Case tree building" []
       -- Case tree building
      ["casetree001"]

 idrisTestsError : TestPool
-idrisTestsError = MkTestPool []
+idrisTestsError = MkTestPool "Error messages" []
       -- Error messages
      ["error001", "error002", "error003", "error004", "error005",
       "error006", "error007", "error008", "error009", "error010",
@ -74,7 +74,7 @@ idrisTestsError = MkTestPool []
       "perror006", "perror007", "perror008"]

 idrisTestsInteractive : TestPool
-idrisTestsInteractive = MkTestPool []
+idrisTestsInteractive = MkTestPool "Interactive editing" []
       -- Interactive editing support
      ["interactive001", "interactive002", "interactive003", "interactive004",
       "interactive005", "interactive006", "interactive007", "interactive008",
@ -86,7 +86,7 @@ idrisTestsInteractive = MkTestPool []
       "interactive029", "interactive030"]

 idrisTestsInterface : TestPool
-idrisTestsInterface = MkTestPool []
+idrisTestsInterface = MkTestPool "Interface" []
       -- Interfaces
      ["interface001", "interface002", "interface003", "interface004",
       "interface005", "interface006", "interface007", "interface008",
@ -97,7 +97,7 @@ idrisTestsInterface = MkTestPool []
       "interface025"]

 idrisTestsLinear : TestPool
-idrisTestsLinear = MkTestPool []
+idrisTestsLinear = MkTestPool "Quantities" []
       -- QTT and linearity related
       ["linear001", "linear002", "linear003", -- "linear004" -- disabled due to requiring linearity subtyping
        "linear005", "linear006", "linear007", "linear008",
@ -105,7 +105,7 @@ idrisTestsLinear = MkTestPool []
        "linear013"]

 idrisTestsLiterate : TestPool
-idrisTestsLiterate = MkTestPool []
+idrisTestsLiterate = MkTestPool "Literate programming" []
       -- Literate
      ["literate001", "literate002", "literate003", "literate004",
       "literate005", "literate006", "literate007", "literate008",
@ -113,13 +113,13 @@ idrisTestsLiterate = MkTestPool []
       "literate013", "literate014", "literate015", "literate016"]

 idrisTestsPerformance : TestPool
-idrisTestsPerformance = MkTestPool []
+idrisTestsPerformance = MkTestPool "Performance" []
       -- Performance: things which have been slow in the past, or which
       -- pose interesting challenges for the elaborator
      ["perf001", "perf002", "perf003", "perf004", "perf005", "perf006"]

 idrisTestsRegression : TestPool
-idrisTestsRegression = MkTestPool []
+idrisTestsRegression = MkTestPool "Various regressions" []
       -- Miscellaneous regressions
      ["reg001", "reg002", "reg003", "reg004", "reg005", "reg006", "reg007",
       "reg008", "reg009", "reg010", "reg011", "reg012", "reg013", "reg014",
@ -129,7 +129,7 @@ idrisTestsRegression = MkTestPool []
       "reg036", "reg037", "reg038", "reg039"]

 idrisTestsData : TestPool
-idrisTestsData = MkTestPool []
+idrisTestsData = MkTestPool "Data and record types" []
      [-- Data types
       "data001",
       -- Records, access and dependent update
@ -137,13 +137,13 @@ idrisTestsData = MkTestPool []
       "record006", "record007"]

 idrisTestsBuiltin : TestPool
-idrisTestsBuiltin = MkTestPool []
+idrisTestsBuiltin = MkTestPool "Builtin types and functions" []
      -- %builtin related tests for the frontend (type-checking)
      ["builtin001", "builtin002", "builtin003", "builtin004", "builtin005",
       "builtin006", "builtin007", "builtin008", "builtin009"]

 idrisTestsEvaluator : TestPool
-idrisTestsEvaluator = MkTestPool []
+idrisTestsEvaluator = MkTestPool "Evaluation" []
      [ -- Evaluator
       "evaluator001", "evaluator002", "evaluator003", "evaluator004",
       -- Unfortunately the behaviour of Double is platform dependent so the
@ -154,7 +154,7 @@ idrisTestsEvaluator = MkTestPool []
       "interpreter005", "interpreter006", "interpreter007"]

 idrisTests : TestPool
-idrisTests = MkTestPool []
+idrisTests = MkTestPool "Misc" []
       -- Documentation strings
      ["docs001", "docs002",
       -- Eta equality
@ -190,14 +190,14 @@ idrisTests = MkTestPool []
       "pretty001"]

 typeddTests : TestPool
-typeddTests = MkTestPool []
+typeddTests = MkTestPool "Type Driven Development" []
     [ "chapter01", "chapter02", "chapter03", "chapter04", "chapter05"
     , "chapter06", "chapter07", "chapter08", "chapter09", "chapter10"
     , "chapter11", "chapter12", "chapter13", "chapter14"
     ]

 chezTests : TestPool
-chezTests = MkTestPool [Chez]
+chezTests = MkTestPool "Chez backend" [Chez]
    [ "chez001", "chez002", "chez003", "chez004", "chez005", "chez006"
    , "chez007", "chez008", "chez009", "chez010", "chez011", "chez012"
    , "chez013", "chez014", "chez015", "chez016", "chez017", "chez018"
@ -215,11 +215,11 @@ chezTests = MkTestPool [Chez]
    ]

 refcTests : TestPool
-refcTests = MkTestPool [C]
+refcTests = MkTestPool "Reference counting C backend" [C]
  [ "refc001" , "refc002" ]

 racketTests : TestPool
-racketTests = MkTestPool [Racket]
+racketTests = MkTestPool "Racket backend" [Racket]
    [ "forkjoin001"
    , "semaphores001", "semaphores002"
    , "futures001"
@ -234,7 +234,7 @@ racketTests = MkTestPool [Racket]
    ]

 nodeTests : TestPool
-nodeTests = MkTestPool [Node]
+nodeTests = MkTestPool "Node backend" [Node]
    [ "node001", "node002", "node003", "node004", "node005", "node006"
    , "node007", "node008", "node009", "node011", "node012", "node015"
    , "node017", "node018", "node019", "node021", "node022", "node023"
@ -250,17 +250,17 @@ nodeTests = MkTestPool [Node]
    ]

 ideModeTests : TestPool
-ideModeTests = MkTestPool []
+ideModeTests = MkTestPool "IDE mode" []
  [ "ideMode001", "ideMode002", "ideMode003", "ideMode004", "ideMode005"
  ]

 preludeTests : TestPool
-preludeTests = MkTestPool []
+preludeTests = MkTestPool "Prelude library" []
  [ "reg001"
  ]

 templateTests : TestPool
-templateTests = MkTestPool []
+templateTests = MkTestPool "Test templates" []
  [ "simple-test", "ttimp", "with-ipkg"
  ]

@ -270,7 +270,7 @@ templateTests = MkTestPool []
 -- that only runs if all backends are
 -- available.
 baseLibraryTests : TestPool
-baseLibraryTests = MkTestPool [Chez, Node]
+baseLibraryTests = MkTestPool "Base library" [Chez, Node]
  [ "system_file001"
  , "data_bits001"
  , "system_info001"
@ -278,12 +278,12 @@ baseLibraryTests = MkTestPool [Chez, Node]

 -- same behavior as `baseLibraryTests`
 contribLibraryTests : TestPool
-contribLibraryTests = MkTestPool [Chez, Node]
+contribLibraryTests = MkTestPool "Contrib library" [Chez, Node]
  [ "json_001"
  ]

 codegenTests : TestPool
-codegenTests = MkTestPool []
+codegenTests = MkTestPool "Code generation" []
  [ "con001"
  , "builtin001"
  ]
--- a/tests/Makefile
+++ b/tests/Makefile
@ -4,12 +4,16 @@ threads ?= `nproc`
 .PHONY: testbin test

 test:
-	./build/exec/runtests $(IDRIS2) $(INTERACTIVE) --threads $(threads) --only $(only)
+	./build/exec/runtests $(IDRIS2) $(INTERACTIVE) --failure-file failures --threads $(threads) --only $(only)
+
+retest:
+	./build/exec/runtests $(IDRIS2) $(INTERACTIVE) --failure-file failures --threads $(threads) --only-file failures --only $(only)

 testbin:
 	${IDRIS2} --build tests.ipkg

 clean:
+	$(RM) failures
 	$(RM) -r build
 	$(RM) -r **/**/build
 	@find . -type f -name 'output' -exec rm -rf {} \;