macaw/refinement/tests/RefinementTests.hs
2019-02-08 14:10:08 -08:00

378 lines
16 KiB
Haskell

-- This test harness uses local files to run Macaw discovery both with
-- and without refinement and checks the results of that validation
-- against expected outputs (aka. golden testing).
--
-- test/samples/
-- X[.A].[F-]expected -- expected (golden) output [the primary file that enables this test]
-- X.A.exe -- input ELF binary file
-- X.c -- C source for generating binary
--
-- This methodology allows a binary for each architecture (A) to be
-- produced from a single C source (X), and then one or more
-- refinement test forms (F) to be created to run on that binary,
-- where F is one of:
--
-- "base" -- standard Macaw discovery
-- "refined" -- additional discovery from this macaw-refinement package
--
-- If there is no "F-" portion, then the same expected file is applied
-- for all possible values of F.
--
-- If there is an expected file but it fails to Parse on a Haskell
-- Read (e.g. "echo garbage > blah.ppc.base-expected") then the test
-- will write the actual output to a separate file with the
-- "last-actual" suffix (e.g. "blah.ppc.base-last-actual"); verify the
-- contents of that file and then simply copy them to the expected
-- file to get the correct file contents.
--
-- There is also a README in the test/samples directory where the C
-- source is described, along with various tests.
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# OPTIONS_GHC -Wno-missing-signatures #-}
module Main ( main ) where
import GHC.TypeLits
import Control.Lens hiding ( (<.>) )
import Control.Monad
import qualified Control.Monad.Catch as X
import Control.Monad.IO.Class
import qualified Data.ByteString as BS
import qualified Data.ElfEdit as E
import Data.Foldable
import Data.Function ( on )
import Data.List ( intercalate, sortBy )
import qualified Data.Macaw.Architecture.Info as AI
import Data.Macaw.BinaryLoader as MBL
import Data.Macaw.BinaryLoader.X86 ()
import Data.Macaw.CFG as MC
import qualified Data.Macaw.Discovery as MD
import qualified Data.Macaw.Memory.ElfLoader as ML
import Data.Macaw.PPC
import Data.Macaw.PPC.Symbolic ()
import Data.Macaw.Refinement
import qualified Data.Macaw.Symbolic as MS
import qualified Data.Macaw.X86 as MX86
import Data.Macaw.X86.Symbolic ()
import qualified Data.Map as M
import Data.Maybe ( catMaybes )
import Data.Parameterized.Some
import Data.Proxy
import Data.Semigroup ( (<>) )
import qualified Data.Set as Set
import Data.Tagged
import Data.Typeable ( Typeable )
import Options.Applicative
import qualified SemMC.Architecture.PPC32 as PPC32
import qualified SemMC.Architecture.PPC64 as PPC64
import System.Directory ( doesFileExist )
import System.FilePath ( (</>), (<.>) )
import qualified System.FilePath as FP
import qualified System.FilePath.Glob as FPG
import qualified Test.Tasty as TT
import qualified Test.Tasty.HUnit as TTH
import Test.Tasty.Ingredients
import Test.Tasty.Options
import Text.PrettyPrint.ANSI.Leijen hiding ( (<$>), (<>), (</>) )
import Text.Read ( readEither )
import Prelude
datadir = "tests/samples"
supportedArch = [ "x86"
, "ppc"
]
data ShowSearch = ShowSearch Bool deriving (Eq, Ord, Typeable)
instance IsOption ShowSearch where
defaultValue = ShowSearch False
parseValue = fmap ShowSearch . safeRead
optionName = pure $ "showsearch"
optionHelp = pure $ "Show details of the search for the set of\n\
\refinement tests that would be performed\n\
\based on the contents of the " <> datadir <> " directory"
optionCLParser = ShowSearch <$> switch
( long (untag (optionName :: Tagged ShowSearch String))
<> help (untag (optionHelp :: Tagged ShowSearch String))
)
data VerboseLogging = VerboseLogging Bool
deriving (Eq, Ord)
instance IsOption VerboseLogging where
defaultValue = VerboseLogging False
parseValue = fmap VerboseLogging . safeRead
optionName = pure "verbose-logging"
optionHelp = pure "Turn on verbose logging output"
optionCLParser = VerboseLogging <$> switch ( long (untag (optionName :: Tagged VerboseLogging String))
<> help (untag (optionHelp :: Tagged VerboseLogging String))
)
-- | This is a Tasty "Ingredient" (aka test runner) that can be used
-- to display the search process and results for generating the tests.
searchResultsReport = TestManager [] $ \opts _tests ->
if lookupOption opts == ShowSearch True
then Just $ do searchlist <- getTestList datadir True
putStrLn ""
putStrLn $ "Final set of tests [" ++ show (length searchlist) ++ "]:"
mapM_ (putStrLn . show) searchlist
putStrLn ""
putStrLn ("These search results were based on the contents of the " <>
datadir <> " directory\n\
\Syntax: BASENAME.c\n\
\ BASENAME.ARCH.EXE\n\
\ BASENAME.ARCH.expected OR BASENAME.ARCH.base-expected\n\
\ AND BASENAME.ARCH.refined-expected\n\
\Note that if any .expected file fails a Haskell Read\n\
\parse, a \".....last-actual\" file is written; it can\n\
\be verified and renamed to be the new expected contents.\n")
return True
else Nothing
ingredients = TT.includingOptions [ Option (Proxy :: Proxy ShowSearch)
, Option (Proxy :: Proxy VerboseLogging)
]
: searchResultsReport
: TT.defaultIngredients
main :: IO ()
main = do
-- Note: dynamic test generation should be done before the
-- TT.defaultMain call, but test arguments/options are not yet
-- available for this; see:
-- https://stackoverflow.com/questions/33040722
-- https://github.com/feuerbach/tasty/issues/228
testInputs <- getTestList datadir False
let testNames = Set.fromList $ map name testInputs
TT.defaultMainWithIngredients ingredients $
TT.testGroup "macaw-refinement" $
mkNameGroup testInputs <$> toList testNames
where mkNameGroup inps nm =
TT.testGroup nm $ map mkTest $ filter ((==) nm . name) inps
data TestInput = TestInput { name :: String
, arch :: String
, expectFileBase :: Maybe FilePath
, expectFileRefined :: Maybe FilePath
, binaryFile :: FilePath
}
deriving (Eq,Show)
-- | Returns a list of the TestInputs that should be run. These are
-- driven by the existence of a .[F-]expected file, for which there
-- should be a corresponding .exe. The exe and expected files are
-- sub-named by the architecture (A) to which they apply.
getTestList :: FilePath -> Bool -> IO [TestInput]
getTestList basedir explain = do
when explain $ putStrLn $ "Checking for test inputs in " <> (basedir </> "*.exe")
let exeGlob = "*.(" <> (intercalate "|" supportedArch) <> ").exe"
exeNames <- FPG.namesMatching $ basedir </> exeGlob
postproc <$> mapM mkTI exeNames
where
postproc = sorted . catMaybes
sorted = sortBy (compare `on` name)
mkTI exe = do when explain $ putStrLn $ "Checking exe " ++ exe
-- exe is path/to/srcfile.A.exe
let exeRmvd = fst $ FP.splitExtension exe
(srcPathAndName, testArch') = FP.splitExtension exeRmvd
testName = FP.takeFileName srcPathAndName
if length testArch' < 2
then do when explain $ putStrLn $ " no arch, skipping"
return Nothing
else mkTIAN exe (drop 1 testArch') testName
mkTIAN exe testArch testName = do
when explain $ putStrLn $ " arch = " <> testArch
when explain $ putStrLn $ " name = " <> testName
let expRoot = basedir </> testName <.> testArch
expBaseName = expRoot <.> "base-expected"
expRefinedName = expRoot <.> "refined-expected"
expName = expRoot <.> "expected"
base <- doesFileExist expBaseName
refn <- doesFileExist expRefinedName
expt <- doesFileExist expName
let expn = if expt then Just expName else Nothing
mkTIANE exe testArch testName
(if base then Just expBaseName else expn)
(if refn then Just expRefinedName else expn)
mkTIANE exe testArch testName expBase expRefn = do
when explain $ case expBase of
Just f -> putStrLn $ " expected base = " <> f
Nothing -> return ()
when explain $ case expRefn of
Just f -> putStrLn $ " expected refined = " <> f
Nothing -> return ()
case (expBase, expRefn) of
(Nothing, Nothing) -> return Nothing
_ -> return $ Just $ TestInput { name = testName
, arch = testArch
, binaryFile = exe
, expectFileBase = expBase
, expectFileRefined = expRefn
}
mkTest :: TestInput -> TT.TestTree
mkTest testinp =
let readbin = BS.readFile $ binaryFile testinp
cleanup = return . const ()
formName ref = if ref then "refined" else "base"
tests = catMaybes [ mkT False <$> expectFileBase testinp
, mkT True <$> expectFileRefined testinp
]
mkT ref fn = \v r -> TTH.testCase (formName ref) $
testExpected ref fn testinp v r
in TT.askOption $ \(VerboseLogging beVerbose) ->
TT.withResource readbin cleanup $
\readBinary ->
TT.testGroup (arch testinp) $ map (\t -> t beVerbose readBinary) tests
testExpected useRefinement expFile testinp beVerbose readBinary = do
bs <- readBinary
case E.parseElf bs of
E.Elf64Res warnings elf -> mapM_ print warnings >> withElf64 elf
E.Elf32Res warnings elf -> mapM_ print warnings >> withElf32 elf
_ -> let badMsg = binaryFile testinp <> " is not a 64-bit ELF file"
in do when beVerbose $ putStrLn badMsg
TTH.assertBool badMsg False
where
withElf64 elf =
case E.elfMachine elf of
E.EM_PPC64 -> do
bin <- MBL.loadBinary @PPC64.PPC ML.defaultLoadOptions elf
let pli = ppc64_linux_info bin
withBinaryDiscoveredInfo testinp useRefinement expFile pli bin
E.EM_X86_64 ->
withBinaryDiscoveredInfo testinp useRefinement expFile MX86.x86_64_linux_info =<<
MBL.loadBinary @MX86.X86_64 ML.defaultLoadOptions elf
m -> error $ "no 64-bit ELF support for " ++ show m
withElf32 elf =
case E.elfMachine elf of
E.EM_PPC -> do
bin <- MBL.loadBinary @PPC32.PPC ML.defaultLoadOptions elf
let pli = ppc32_linux_info bin
withBinaryDiscoveredInfo testinp useRefinement expFile pli bin
m -> error $ "no 32-bit ELF support for " ++ show m
withBinaryDiscoveredInfo :: ( X.MonadThrow m
, MS.SymArchConstraints arch
, 16 <= MC.ArchAddrWidth arch
, MBL.BinaryLoader arch binFmt
, MonadIO m) =>
TestInput
-> Bool
-> FilePath
-> AI.ArchitectureInfo arch
-> MBL.LoadedBinary arch binFmt
-> m ()
withBinaryDiscoveredInfo testinp useRefinement expFile arch_info bin = do
entries <- toList <$> entryPoints bin
let baseCFG = MD.cfgFromAddrs arch_info (memoryImage bin) M.empty entries []
actualBase = cfgToExpected testinp bin (Just baseCFG) Nothing
if useRefinement
then do refinedCFG <- refineDiscovery bin baseCFG
let refinedBase = cfgToExpected testinp bin Nothing (Just refinedCFG)
compareToExpected "refined" refinedBase expFile
else compareToExpected "base" actualBase expFile
compareToExpected formName actual fn =
let cfgName = formName <> " CFG" in
do expectedData <- liftIO $ readFile fn
case readEither expectedData of
Right expInfo ->
-- KWQ: TODO: refine this down to individual components if it fails
liftIO $ TTH.assertEqual ("discovered " <> cfgName) expInfo actual
Left e ->
let badMsg = "error parsing expected " <> cfgName <> " data in " <> fn <> ": " <> e
outFileName = take (length fn - length "expected") fn <> "last-actual"
in liftIO $ do writeFile outFileName $ show actual
putStrLn $ "Generated actual output to: " <> outFileName
TTH.assertBool badMsg False
----------------------------------------------------------------------
-- | The ExpectedInfo is the format of information stored in the
-- .expected files. Ideally this would be a 'Show' output so that a
-- 'Read' could import native data structures for a more refined
-- comparison, but unfortunately the 'read . show == id' intent is not
-- held for Macaw/Flexdis86, so the actual stored and compared format
-- is generally the 'pretty' output of the structures.
data ExpectedInfo arch = Expected
{ expBinaryName :: String
, expEntryPoints :: [EntryPoint arch]
, expFunctions :: [Function arch]
}
deriving (Show, Read, Eq)
data EntryPoint arch = EntryPoint (Address arch)
deriving (Show, Read, Eq)
data Function arch = Function (Address arch) [Block arch]
deriving (Show, Read, Eq)
data Block arch = Block (Address arch) StatementList
deriving (Show, Read, Eq)
data Address arch = Address { addrSegmentBase :: Int
, addrSegmentOffset :: Int
, addrSegoffOffset :: Int
, addrPretty :: String
}
deriving (Show, Read, Eq)
mkAddress :: (MemWidth (RegAddrWidth (ArchReg arch))) =>
MC.MemSegmentOff (MC.ArchAddrWidth arch) -> Address arch
mkAddress addr = Address { addrSegmentBase = fromEnum $ segmentBase $ segoffSegment addr
, addrSegmentOffset = fromEnum $ memWordToUnsigned $ segmentOffset $ segoffSegment addr
, addrSegoffOffset = fromEnum $ memWordToUnsigned $ segoffOffset addr
, addrPretty = show $ pretty addr
}
type StatementList = String -- no Read or Eq for Macaw.Discovery.StatementList, so just use String format
cfgToExpected :: (MBL.BinaryLoader arch binFmt) =>
TestInput
-> MBL.LoadedBinary arch binFmt
-> Maybe (MD.DiscoveryState arch)
-> Maybe (MD.DiscoveryState arch)
-> ExpectedInfo arch
cfgToExpected testinp bin mbCFG mbRefCFG =
let eps = case entryPoints bin of
Left _ -> []
Right epl -> toList epl
fns = case (mbCFG, mbRefCFG) of
(Nothing, Nothing) -> error "must specify a discovered Macaw CFG"
(Just _, Just _) -> error "must specify only one discovered Macaw CFG"
(Just di, Nothing) -> getFunctions di
(Nothing, Just di) -> getFunctions di
in Expected { expBinaryName = binaryFile testinp
, expEntryPoints = (EntryPoint . mkAddress) <$> eps
, expFunctions = fns
}
getFunctions :: MD.DiscoveryState arch -> [Function arch]
getFunctions di =
AI.withArchConstraints (MD.archInfo di) $
fmap (\(funAddr, Some dfi) ->
Function
(mkAddress funAddr)
(fmap (\(blkAddr, pb) ->
Block (mkAddress blkAddr) (show $ MD.blockStatementList pb))
(dfi ^. MD.parsedBlocks . to M.toList)))
(di ^. MD.funInfo . to M.toList)