macaw/macaw-arm/tests/ARMTests.hs

{-# LANGUAGE DataKinds #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}

module ARMTests
    ( armAsmTests
    )
    where


import           Control.Lens hiding ( ignored )
import           Control.Monad ( when )
import           Control.Monad.Catch ( throwM, Exception )
import qualified Data.ElfEdit as E
import qualified Data.Foldable as F
import qualified Data.Macaw.ARM as RO
import qualified Data.Macaw.ARM.BinaryFormat.ELF as ARMELF
import qualified Data.Macaw.Discovery as MD
import qualified Data.Macaw.Memory as MM
import qualified Data.Map as M
import           Data.Maybe
import           Data.Monoid
import qualified Data.Parameterized.Some as PU
import qualified Data.Set as S
import           Data.Typeable ( Typeable )
import           Data.Word ( Word64 )
import           Debug.Trace
import           Shared
import           System.FilePath ( dropExtension, replaceExtension )
import qualified Test.Tasty as T
import qualified Test.Tasty.HUnit as T
import           Text.PrettyPrint.ANSI.Leijen ( putDoc )
import           Text.Printf ( PrintfArg, printf )
import           Text.Read ( readMaybe )


-- | Set to true to build with chatty output.
isChatty :: Bool
isChatty = True

-- | Used to generate output when chatty
chatty :: String -> IO ()
chatty = when isChatty . putStrLn


-- | Called with a list of paths to test files.  This will remove the
-- file extension from the test file to find a filepath to a binary
-- (executable) corresponding to that test file.  The macaw-arm
-- library will then be used to discover semantics on the binary and
-- these will be compared to the semantics described in the test file.
armAsmTests :: [FilePath] -> T.TestTree
armAsmTests = T.testGroup "ARM" . map mkTest


-- | Read in a test case from disk and output a test tree.
mkTest :: FilePath -> T.TestTree
mkTest fp = T.testCase fp $ do x <- getExpected fp
                               withELF exeFilename $ testDiscovery x
  where
    asmFilename = dropExtension fp
    exeFilename = replaceExtension asmFilename "exe"


-- ----------------------------------------------------------------------
-- Parser/representation for files that contain expected results of
-- semantics discovery from a binary.

newtype Hex a = Hex a
  deriving (Eq, Ord, Num, PrintfArg)

instance (Num a, Show a, PrintfArg a) => Show (Hex a) where
  show (Hex a) = printf "0x%x" a

instance (Read a) => Read (Hex a) where
  readsPrec i s = [ (Hex a, s') | (a, s') <- readsPrec i s ]

-- | The type of expected results for test cases
data ExpectedResultFileData =
  R { funcs :: [(Hex Word64, [(Hex Word64, Word64)])]
    -- ^ The first element of the pair is the address of entry point
    -- of the function.  The list is a list of the addresses of the
    -- basic blocks in the function (including the first block).
    , ignoreBlocks :: [Hex Word64]
    -- ^ This is a list of discovered blocks to ignore.  This is
    -- basically just the address of the instruction after the exit
    -- syscall, as macaw doesn't know that exit never returns and
    -- discovers a false block after exit.
    }
  deriving (Read, Show, Eq)

type ExpectedResult = (M.Map (Hex Word64) (S.Set (Hex Word64, Word64)),
                        S.Set (Hex Word64))

data ExpectedException = BadExpectedFile String
                         deriving (Typeable, Show)

instance Exception ExpectedException


getExpected :: FilePath -> IO ExpectedResult
getExpected expectedFilename = do
  expectedString <- readFile expectedFilename
  case readMaybe expectedString of
    -- Above: Read in the ExpectedResultFileData from the contents of the file
    -- Nothing -> T.assertFailure ("Invalid expected result: " ++ show expectedString)
    Nothing -> throwM $ BadExpectedFile ("Invalid expected spec: " ++ show expectedString)
    Just er ->
      let expectedEntries = M.fromList [ (entry, S.fromList starts) | (entry, starts) <- funcs er ]
          -- expectedEntries maps function entry points to the set of block starts
          -- within the function.
          ignoredBlocks = S.fromList (ignoreBlocks er)
      in return (expectedEntries, ignoredBlocks)


testDiscovery :: ExpectedResult -> E.Elf w -> IO ()
testDiscovery expRes elf =
    case E.elfClass elf of
      E.ELFCLASS32 -> testDiscovery32 expRes elf
      E.ELFCLASS64 -> error "testDiscovery64 TBD"

-- | Run a test over a given expected result filename and the ELF file
-- associated with it
testDiscovery32 :: ExpectedResult -> E.Elf 32 -> IO ()
testDiscovery32 (funcblocks, ignored) elf =
  withMemory MM.Addr32 elf $ \mem -> do
    let Just entryPoint = MM.asSegmentOff mem epinfo
        epinfo = findEntryPoint elf mem
    when isChatty $
         do chatty $ "entryPoint: " <> show entryPoint
            chatty $ "sections = " <> show (ARMELF.getElfSections elf) <> "\n"
            chatty $ "symbols = "
            putDoc $ ARMELF.getELFSymbols elf
            chatty ""

    let discoveryInfo = MD.cfgFromAddrs RO.arm_linux_info mem mempty [entryPoint] []
    chatty $ "di = " <> (show $ MD.ppDiscoveryStateBlocks discoveryInfo) <> "\n"

    let getAbsBlkAddr = fromJust . MM.asAbsoluteAddr . MM.relativeSegmentAddr . MD.pblockAddr
        getAbsFunAddr = fromJust . MM.asAbsoluteAddr . MM.relativeSegmentAddr . MD.discoveredFunAddr


    let allFoundBlockAddrs :: S.Set Word64
        allFoundBlockAddrs =
            S.fromList [ fromIntegral $ getAbsBlkAddr pbr
                       | PU.Some dfi <- M.elems (discoveryInfo ^. MD.funInfo)
                       , pbr <- M.elems (dfi ^. MD.parsedBlocks)
                       ]

    -- Test that all discovered blocks were expected (and verify their sizes)
    F.forM_ (M.elems (discoveryInfo ^. MD.funInfo)) $ \(PU.Some dfi) ->
        do let actualEntry = fromIntegral $ getAbsFunAddr dfi
               actualBlockStarts = S.fromList [ (baddr, bsize)
                                              | pbr <- M.elems (dfi ^. MD.parsedBlocks)
                                              , trace ("Parsed Block: " ++ show pbr) True
                                              , let baddr = fromIntegral $ getAbsBlkAddr pbr
                                              , let bsize = fromIntegral (MD.blockSize pbr)
                                              ]
           chatty $ "actualEntry: " <> show actualEntry
           chatty $ "actualBlockStarts: " <> show actualBlockStarts
           case (S.member actualEntry ignored, M.lookup actualEntry funcblocks) of
             (True, _) -> return ()
             (_, Nothing) -> T.assertFailure (printf "Unexpected block start: 0x%x" actualEntry)
             (_, Just expectedBlockStarts) ->
                 T.assertEqual (printf "Block starts for 0x%x" actualEntry)
                                     expectedBlockStarts (actualBlockStarts `removeIgnored` ignored)

    -- Test that all expected blocks were discovered
    F.forM_ funcblocks $ \blockAddrs ->
        F.forM_ blockAddrs $ \(blockAddr@(Hex addr), _) ->
            T.assertBool ("Missing block address: " ++ show blockAddr) (S.member addr allFoundBlockAddrs)

    T.assertBool "everything looks good" True


removeIgnored :: (Ord b, Ord a) => S.Set (a, b) -> S.Set a -> S.Set (a, b)
removeIgnored actualBlockStarts ignoredBlocks =
    let removeIfPresent v@(addr, _) acc = if S.member addr ignoredBlocks
                                          then S.delete v acc
                                          else acc
    in F.foldr removeIfPresent actualBlockStarts actualBlockStarts
Initial tests for analyzing test-just-exit ARM binary. 2017-12-20 21:19:51 +03:00			`{-# LANGUAGE DataKinds #-}`
			`{-# LANGUAGE GeneralizedNewtypeDeriving #-}`
			`{-# LANGUAGE TypeApplications #-}`
[arm] Initial ARM ELF file macaw import and parse. 2017-12-29 03:30:25 +03:00			`{-# LANGUAGE TypeFamilies #-}`
Initial tests for analyzing test-just-exit ARM binary. 2017-12-20 21:19:51 +03:00
[arm] Initial ARM ELF file macaw import and parse. 2017-12-29 03:30:25 +03:00			`module ARMTests`
			`( armAsmTests`
			`)`
			`where`


[arm] Update ARMTests to validate binary analysis against expected results. 2018-02-27 21:46:01 +03:00			`import Control.Lens hiding ( ignored )`
			`import Control.Monad ( when )`
			`import Control.Monad.Catch ( throwM, Exception )`
Initial tests for analyzing test-just-exit ARM binary. 2017-12-20 21:19:51 +03:00			`import qualified Data.ElfEdit as E`
			`import qualified Data.Foldable as F`
[arm] Initial ARM ELF file macaw import and parse. 2017-12-29 03:30:25 +03:00			`import qualified Data.Macaw.ARM as RO`
			`import qualified Data.Macaw.ARM.BinaryFormat.ELF as ARMELF`
Initial tests for analyzing test-just-exit ARM binary. 2017-12-20 21:19:51 +03:00			`import qualified Data.Macaw.Discovery as MD`
			`import qualified Data.Macaw.Memory as MM`
			`import qualified Data.Map as M`
[arm] Update ARMTests to validate binary analysis against expected results. 2018-02-27 21:46:01 +03:00			`import Data.Maybe`
Initial tests for analyzing test-just-exit ARM binary. 2017-12-20 21:19:51 +03:00			`import Data.Monoid`
[arm] Update ARMTests to validate binary analysis against expected results. 2018-02-27 21:46:01 +03:00			`import qualified Data.Parameterized.Some as PU`
Initial tests for analyzing test-just-exit ARM binary. 2017-12-20 21:19:51 +03:00			`import qualified Data.Set as S`
[arm] Initial ARM ELF file macaw import and parse. 2017-12-29 03:30:25 +03:00			`import Data.Typeable ( Typeable )`
Initial tests for analyzing test-just-exit ARM binary. 2017-12-20 21:19:51 +03:00			`import Data.Word ( Word64 )`
[arm] Update ARMTests to validate binary analysis against expected results. 2018-02-27 21:46:01 +03:00			`import Debug.Trace`
[arm] Initial ARM ELF file macaw import and parse. 2017-12-29 03:30:25 +03:00			`import Shared`
Initial tests for analyzing test-just-exit ARM binary. 2017-12-20 21:19:51 +03:00			`import System.FilePath ( dropExtension, replaceExtension )`
			`import qualified Test.Tasty as T`
			`import qualified Test.Tasty.HUnit as T`
[arm] Initial ARM ELF file macaw import and parse. 2017-12-29 03:30:25 +03:00			`import Text.PrettyPrint.ANSI.Leijen ( putDoc )`
Initial tests for analyzing test-just-exit ARM binary. 2017-12-20 21:19:51 +03:00			`import Text.Printf ( PrintfArg, printf )`
			`import Text.Read ( readMaybe )`

[arm] Update ARMTests to validate binary analysis against expected results. 2018-02-27 21:46:01 +03:00
			`-- \| Set to true to build with chatty output.`
			`isChatty :: Bool`
			`isChatty = True`

			`-- \| Used to generate output when chatty`
			`chatty :: String -> IO ()`
			`chatty = when isChatty . putStrLn`

Initial tests for analyzing test-just-exit ARM binary. 2017-12-20 21:19:51 +03:00
[arm] Update naming scheme for generated A32 and T32 test executables. 2018-02-27 20:55:00 +03:00			`-- \| Called with a list of paths to test files. This will remove the`
			`-- file extension from the test file to find a filepath to a binary`
			`-- (executable) corresponding to that test file. The macaw-arm`
			`-- library will then be used to discover semantics on the binary and`
			`-- these will be compared to the semantics described in the test file.`
Initial tests for analyzing test-just-exit ARM binary. 2017-12-20 21:19:51 +03:00			`armAsmTests :: [FilePath] -> T.TestTree`
			`armAsmTests = T.testGroup "ARM" . map mkTest`

[arm] Update naming scheme for generated A32 and T32 test executables. 2018-02-27 20:55:00 +03:00
			`-- \| Read in a test case from disk and output a test tree.`
			`mkTest :: FilePath -> T.TestTree`
			`mkTest fp = T.testCase fp $ do x <- getExpected fp`
			`withELF exeFilename $ testDiscovery x`
			`where`
			`asmFilename = dropExtension fp`
			`exeFilename = replaceExtension asmFilename "exe"`


			`-- ----------------------------------------------------------------------`
			`-- Parser/representation for files that contain expected results of`
			`-- semantics discovery from a binary.`

Initial tests for analyzing test-just-exit ARM binary. 2017-12-20 21:19:51 +03:00			`newtype Hex a = Hex a`
			`deriving (Eq, Ord, Num, PrintfArg)`

			`instance (Num a, Show a, PrintfArg a) => Show (Hex a) where`
			`show (Hex a) = printf "0x%x" a`

			`instance (Read a) => Read (Hex a) where`
			`readsPrec i s = [ (Hex a, s') \| (a, s') <- readsPrec i s ]`

			`-- \| The type of expected results for test cases`
[arm] Initial ARM ELF file macaw import and parse. 2017-12-29 03:30:25 +03:00			`data ExpectedResultFileData =`
Initial tests for analyzing test-just-exit ARM binary. 2017-12-20 21:19:51 +03:00			`R { funcs :: [(Hex Word64, [(Hex Word64, Word64)])]`
			`-- ^ The first element of the pair is the address of entry point`
			`-- of the function. The list is a list of the addresses of the`
			`-- basic blocks in the function (including the first block).`
			`, ignoreBlocks :: [Hex Word64]`
			`-- ^ This is a list of discovered blocks to ignore. This is`
			`-- basically just the address of the instruction after the exit`
			`-- syscall, as macaw doesn't know that exit never returns and`
			`-- discovers a false block after exit.`
			`}`
			`deriving (Read, Show, Eq)`

[arm] Initial ARM ELF file macaw import and parse. 2017-12-29 03:30:25 +03:00			`type ExpectedResult = (M.Map (Hex Word64) (S.Set (Hex Word64, Word64)),`
			`S.Set (Hex Word64))`

			`data ExpectedException = BadExpectedFile String`
			`deriving (Typeable, Show)`

			`instance Exception ExpectedException`


			`getExpected :: FilePath -> IO ExpectedResult`
			`getExpected expectedFilename = do`
			`expectedString <- readFile expectedFilename`
			`case readMaybe expectedString of`
			`-- Above: Read in the ExpectedResultFileData from the contents of the file`
			`-- Nothing -> T.assertFailure ("Invalid expected result: " ++ show expectedString)`
			`Nothing -> throwM $ BadExpectedFile ("Invalid expected spec: " ++ show expectedString)`
			`Just er ->`
			`let expectedEntries = M.fromList [ (entry, S.fromList starts) \| (entry, starts) <- funcs er ]`
			`-- expectedEntries maps function entry points to the set of block starts`
			`-- within the function.`
			`ignoredBlocks = S.fromList (ignoreBlocks er)`
			`in return (expectedEntries, ignoredBlocks)`


			`testDiscovery :: ExpectedResult -> E.Elf w -> IO ()`
[arm] Update naming scheme for generated A32 and T32 test executables. 2018-02-27 20:55:00 +03:00			`testDiscovery expRes elf =`
Initial tests for analyzing test-just-exit ARM binary. 2017-12-20 21:19:51 +03:00			`case E.elfClass elf of`
[arm] Update naming scheme for generated A32 and T32 test executables. 2018-02-27 20:55:00 +03:00			`E.ELFCLASS32 -> testDiscovery32 expRes elf`
Initial tests for analyzing test-just-exit ARM binary. 2017-12-20 21:19:51 +03:00			`E.ELFCLASS64 -> error "testDiscovery64 TBD"`

			`-- \| Run a test over a given expected result filename and the ELF file`
			`-- associated with it`
[arm] Initial ARM ELF file macaw import and parse. 2017-12-29 03:30:25 +03:00			`testDiscovery32 :: ExpectedResult -> E.Elf 32 -> IO ()`
			`testDiscovery32 (funcblocks, ignored) elf =`
Initial tests for analyzing test-just-exit ARM binary. 2017-12-20 21:19:51 +03:00			`withMemory MM.Addr32 elf $ \mem -> do`
[arm] Initial ARM ELF file macaw import and parse. 2017-12-29 03:30:25 +03:00			`let Just entryPoint = MM.asSegmentOff mem epinfo`
			`epinfo = findEntryPoint elf mem`
[arm] Update ARMTests to validate binary analysis against expected results. 2018-02-27 21:46:01 +03:00			`when isChatty $`
			`do chatty $ "entryPoint: " <> show entryPoint`
			`chatty $ "sections = " <> show (ARMELF.getElfSections elf) <> "\n"`
			`chatty $ "symbols = "`
			`putDoc $ ARMELF.getELFSymbols elf`
			`chatty ""`

			`let discoveryInfo = MD.cfgFromAddrs RO.arm_linux_info mem mempty [entryPoint] []`
			`chatty $ "di = " <> (show $ MD.ppDiscoveryStateBlocks discoveryInfo) <> "\n"`

			`let getAbsBlkAddr = fromJust . MM.asAbsoluteAddr . MM.relativeSegmentAddr . MD.pblockAddr`
			`getAbsFunAddr = fromJust . MM.asAbsoluteAddr . MM.relativeSegmentAddr . MD.discoveredFunAddr`


			`let allFoundBlockAddrs :: S.Set Word64`
			`allFoundBlockAddrs =`
			`S.fromList [ fromIntegral $ getAbsBlkAddr pbr`
			`\| PU.Some dfi <- M.elems (discoveryInfo ^. MD.funInfo)`
			`, pbr <- M.elems (dfi ^. MD.parsedBlocks)`
			`]`

			`-- Test that all discovered blocks were expected (and verify their sizes)`
			`F.forM_ (M.elems (discoveryInfo ^. MD.funInfo)) $ \(PU.Some dfi) ->`
			`do let actualEntry = fromIntegral $ getAbsFunAddr dfi`
			`actualBlockStarts = S.fromList [ (baddr, bsize)`
			`\| pbr <- M.elems (dfi ^. MD.parsedBlocks)`
			`, trace ("Parsed Block: " ++ show pbr) True`
			`, let baddr = fromIntegral $ getAbsBlkAddr pbr`
			`, let bsize = fromIntegral (MD.blockSize pbr)`
			`]`
			`chatty $ "actualEntry: " <> show actualEntry`
			`chatty $ "actualBlockStarts: " <> show actualBlockStarts`
			`case (S.member actualEntry ignored, M.lookup actualEntry funcblocks) of`
			`(True, _) -> return ()`
			`(_, Nothing) -> T.assertFailure (printf "Unexpected block start: 0x%x" actualEntry)`
			`(_, Just expectedBlockStarts) ->`
			`T.assertEqual (printf "Block starts for 0x%x" actualEntry)`
			expectedBlockStarts (actualBlockStarts `removeIgnored` ignored)

			`-- Test that all expected blocks were discovered`
			`F.forM_ funcblocks $ \blockAddrs ->`
			`F.forM_ blockAddrs $ \(blockAddr@(Hex addr), _) ->`
			`T.assertBool ("Missing block address: " ++ show blockAddr) (S.member addr allFoundBlockAddrs)`

			`T.assertBool "everything looks good" True`


			`removeIgnored :: (Ord b, Ord a) => S.Set (a, b) -> S.Set a -> S.Set (a, b)`
			`removeIgnored actualBlockStarts ignoredBlocks =`
			`let removeIfPresent v@(addr, _) acc = if S.member addr ignoredBlocks`
			`then S.delete v acc`
			`else acc`
			`in F.foldr removeIfPresent actualBlockStarts actualBlockStarts`