[arm] Initial ARM ELF file macaw import and parse.

This commit is contained in:
Kevin Quick 2017-12-28 16:30:25 -08:00
parent 77d4341ac1
commit cd1c676554
No known key found for this signature in database
GPG Key ID: E6D7733599CC0A21
7 changed files with 290 additions and 115 deletions

View File

@ -13,11 +13,24 @@ extra-source-files: ChangeLog.md
cabal-version: >=1.10
library
-- exposed-modules:
exposed-modules: Data.Macaw.ARM
, Data.Macaw.ARM.ARMReg
, Data.Macaw.ARM.BinaryFormat.ELF
, Data.Macaw.ARM.Semantics.ARMSemantics
-- other-modules:
-- other-extensions:
build-depends: base >=4.10 && <4.11
build-tools: arm-none-eabi-gcc
, ansi-wl-pprint
, bytestring
, cereal
, containers
, elf-edit
, lens
, macaw-base
, macaw-semmc
, semmc-arm
, vector
-- build-tools: arm-none-eabi-gcc
hs-source-dirs: src
default-language: Haskell2010
ghc-options: -Wall
@ -32,6 +45,7 @@ test-suite macaw-arm-tests
, MismatchTests
, Shared
build-depends: base >=4.10 && <4.11
, ansi-wl-pprint
, binary
, bytestring
, containers
@ -43,5 +57,6 @@ test-suite macaw-arm-tests
, macaw-base
, macaw-arm
, parameterized-utils
, semmc-arm
, tasty
, tasty-hunit

View File

@ -0,0 +1,58 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeApplications #-}
-- {-# LANGUAGE FlexibleContexts #-}
-- {-# LANGUAGE ScopedTypeVariables #-}
-- {-# LANGUAGE RankNTypes #-}
-- {-# LANGUAGE GADTs #-}
-- {-# LANGUAGE GeneralizedNewtypeDeriving #-}
module Data.Macaw.ARM
( -- * Macaw configurations
arm_linux_info,
-- * Type-level tags
ARM,
-- * ELF support
-- tocBaseForELF
-- tocEntryAddrsForELF
)
where
import qualified Data.Macaw.ARM.Semantics.ARMSemantics as ARMSem
import qualified Data.Macaw.AbsDomain.AbsState as MA
import qualified Data.Macaw.Architecture.Info as MI
import Data.Macaw.CFG ( ArchSegmentOff )
import qualified Data.Macaw.Memory as MM
import Data.Macaw.Types ( BVType )
import qualified SemMC.ARM as ARM
import Data.Proxy ( Proxy(..) )
import Data.Macaw.ARM.ARMReg
-- | The type tag for ARM (32-bit)
type ARM = ARM.ARM
-- arm_linux_info :: (ArchSegmentOff ARM.ARM -> Maybe (MA.AbsValue 32 (BVType 32))) -> MI.ArchitectureInfo ARM.ARM
arm_linux_info :: MI.ArchitectureInfo ARM.ARM
arm_linux_info =
MI.ArchitectureInfo { MI.withArchConstraints = undefined -- id -- \x -> x
, MI.archAddrWidth = MM.Addr32
, MI.archEndianness = MM.LittleEndian
, MI.jumpTableEntrySize = undefined -- jumpTableEntrySize proxy
, MI.disassembleFn = undefined -- disassembleFn proxy ARMSem.execInstruction
, MI.mkInitialAbsState = undefined -- mkInitialAbsState proxy tocMap
, MI.absEvalArchFn = undefined -- absEvalArchFn proxy
, MI.absEvalArchStmt = undefined -- absEvalArchStmt proxy
, MI.postCallAbsState = undefined -- postCallAbsState proxy
, MI.identifyCall = undefined -- identifyCall proxy
, MI.identifyReturn = undefined -- identifyReturn proxy
, MI.rewriteArchFn = undefined -- rewritePrimFn
, MI.rewriteArchStmt = undefined -- rewriteStmt
, MI.rewriteArchTermStmt = undefined -- rewriteTermStmt
, MI.archDemandContext = undefined -- archDemandContext proxy
, MI.postArchTermStmtAbsState = undefined -- postARMTermStmtAbsState (preserveRegAcrossSyscall proxy)
}
where
proxy = Proxy @ARM.ARM

View File

@ -0,0 +1,40 @@
-- | Defines the register types for ARM, along with some helpers
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeOperators #-}
module Data.Macaw.ARM.ARMReg
( ARMReg(..)
)
where
import GHC.TypeLits
import qualified Data.Macaw.CFG as MC
import qualified SemMC.ARM as ARM
import Data.Macaw.Types ( BVType )
data ARMReg tp where
ARM_IP :: (w ~ MC.RegAddrWidth ARMReg, 1 <= w) => ARMReg (BVType w)
deriving instance Eq (ARMReg tp)
deriving instance Ord (ARMReg tp)
instance Show (ARMReg tp) where
show r = case r of
ARM_IP -> "ip"
-- instance ShowF ARMReg where
-- showF = show
type instance MC.ArchReg ARM.ARM = ARMReg
type instance MC.RegAddrWidth ARMReg = 32

View File

@ -0,0 +1,88 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE AllowAmbiguousTypes #-}
module Data.Macaw.ARM.BinaryFormat.ELF
( parseELFInfo
, getELFSymbols
-- , tocBaseForELF
-- , tocEntryAddrsForElf
)
where
import Data.List (intercalate)
import Data.Vector (toList)
import Data.Bits
import Text.PrettyPrint.ANSI.Leijen
import Control.Lens -- ((^.), (^..), filtered, over, folded, foldMapOf, to)
import Control.Monad ( replicateM, unless )
import qualified Data.ByteString.Char8 as C8
import qualified Data.ElfEdit as E
import qualified Data.Macaw.AbsDomain.AbsState as MA
import qualified Data.Macaw.CFG as MC
import qualified Data.Macaw.Memory as MM
import Data.Macaw.Types ( BVType )
import qualified Data.Map.Strict as M
import Data.Proxy ( Proxy(..) )
import qualified Data.Serialize.Get as G
import qualified Data.Set as S
import GHC.TypeLits ( KnownNat, natVal )
parseFunctionDescriptors :: (MM.MemWidth (MC.RegAddrWidth (MC.ArchReg ppc)))
=> proxy ppc
-> Int
-> G.Get (M.Map (MM.MemAddr (MC.RegAddrWidth (MC.ArchReg ppc))) (MA.AbsValue (MC.RegAddrWidth (MC.ArchReg ppc)) (BVType (MC.RegAddrWidth (MC.ArchReg ppc)))))
parseFunctionDescriptors _ ptrSize = do
let recordBytes = (3 * ptrSize) `div` 8
let recordParser =
case ptrSize of
32 -> getFunctionDescriptor G.getWord32be
64 -> getFunctionDescriptor G.getWord64be
_ -> error ("Invalid pointer size: " ++ show ptrSize)
totalBytes <- G.remaining
unless (totalBytes `mod` recordBytes == 0) $ do
fail "The .opd section is not divisible by the record size"
funcDescs <- replicateM (totalBytes `div` recordBytes) recordParser
return (M.fromList funcDescs)
getFunctionDescriptor :: (Integral a, MM.MemWidth w)
=> G.Get a
-> G.Get (MM.MemAddr w, MA.AbsValue w (BVType w))
getFunctionDescriptor ptrParser = do
entryAddr <- ptrParser
tocAddr <- ptrParser
_ <- ptrParser
let mso = MM.absoluteAddr (fromIntegral entryAddr)
return (mso, MA.FinSet (S.singleton (fromIntegral tocAddr)))
parseELFInfo :: forall arm proxy
. (KnownNat (MC.RegAddrWidth (MC.ArchReg arm)), MM.MemWidth (MC.RegAddrWidth (MC.ArchReg arm)))
=> proxy arm
-> E.Elf (MC.RegAddrWidth (MC.ArchReg arm))
-> Either String (M.Map
(MM.MemAddr (MC.RegAddrWidth (MC.ArchReg arm)))
(MA.AbsValue (MC.RegAddrWidth (MC.ArchReg arm)) (BVType (MC.RegAddrWidth (MC.ArchReg arm)))))
parseELFInfo proxy e =
let secnames = e^..E.elfSections.to (C8.unpack . E.elfSectionName)
in Left $ intercalate ", " secnames
-- Left $ e^..E.elfSections . foldMapOf folded (C8.unpack . E.elfSectionName)
-- case E.findSectionByName (C8.pack ".opd") e of
-- [sec] ->
-- G.runGet (parseFunctionDescriptors proxy (fromIntegral ptrSize)) (E.elfSectionData sec)
-- _ -> error "Could not find .opd section"
-- where
-- ptrSize = natVal (Proxy @(MC.RegAddrWidth (MC.ArchReg arm)))
-- getELFSymbols :: E.Elf (MC.RegAddrWidth (MC.ArchReg arm)) -> String
getELFSymbols :: (Show (E.ElfWordType w), Data.Bits.Bits (E.ElfWordType w), Integral (E.ElfWordType w)) => E.Elf w -> Doc
getELFSymbols elf =
let dummy = 1
-- symtab :: E.ElfSymbolTableEntry (E.ElfWordType (MC.RegAddrWidth (MC.ArchReg arm)))
-- symtab :: E.ElfSymbolTableEntry (E.ElfWordType w)
symtab = elf^.to E.elfSymtab
ps = fmap (E.ppSymbolTableEntries . toList . E.elfSymbolTableEntries) symtab
-- x = elf^.(E.elfSymtab).to (show . E.ppSymbolTableEntries)
in vsep ps -- intercalate ", and " ps

View File

@ -0,0 +1,16 @@
{-# LANGUAGE DataKinds #-}
module Data.Macaw.ARM.Semantics.ARMSemantics
( execInstruction
)
where
import qualified Data.Macaw.CFG as MC
import qualified Data.Macaw.Types as MT
import SemMC.ARM ( ARM, Instruction )
import Data.Macaw.SemMC.Generator ( Generator )
-- import SemMC.Architecture.ARM.Opcodes ( allSemantics, allOpcodeInfo )
execInstruction :: MC.Value ARM ids (MT.BVType 32) -> Instruction -> Maybe (Generator ARM ids s ())
execInstruction = undefined
-- execInstruction = $(genExecInstruction (Proxy @ARM) (locToRegTH (Proxy @ARM)) armNonceAppEval armAppEvaluator 'armInstructionMatcher allSemantics allOpcodeInfo)

View File

@ -1,43 +1,49 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE GADTs #-}
module ARMTests (
armAsmTests
) where
{-# LANGUAGE TypeFamilies #-}
import Control.Lens ( (^.) )
module ARMTests
( armAsmTests
)
where
import Control.Lens ( (^.), to )
import Control.Monad.Catch (throwM, Exception)
import qualified Data.ElfEdit as E
import qualified Data.Foldable as F
import Data.List (intercalate)
import qualified Data.Macaw.ARM as RO
import Data.Macaw.ARM.ARMReg
import qualified Data.Macaw.ARM.BinaryFormat.ELF as ARMELF
import qualified Data.Macaw.AbsDomain.AbsState as MA
import qualified Data.Macaw.Discovery as MD
import Data.Parameterized.NatRepr
import qualified Data.Macaw.Discovery.State as MD
import qualified Data.Macaw.Memory as MM
import Data.Macaw.Types ( BVType )
import qualified Data.Map as M
import Data.Maybe ( fromJust, mapMaybe )
import Data.Monoid
import Data.Parameterized.NatRepr
import qualified Data.Parameterized.Some as PU
import Data.Proxy ( Proxy(..) )
import qualified Data.Set as S
import Data.Typeable ( Typeable )
import Data.Word ( Word64 )
import Debug.Trace (trace)
import qualified SemMC.ARM as ARM
import Shared
import System.FilePath ( dropExtension, replaceExtension )
import qualified Test.Tasty as T
import qualified Test.Tasty.HUnit as T
import Text.PrettyPrint.ANSI.Leijen ( putDoc )
import Text.Printf ( PrintfArg, printf )
import Text.Read ( readMaybe )
-- import qualified Data.Macaw.PPC as RO
-- import qualified Data.Macaw.PPC.BinaryFormat.ELF as E
-- import qualified Data.Macaw.PPC.BinaryFormat.ELF as E -- KWQ: replacement should be complete
-- import qualified SemMC.Architecture.PPC64 as PPC64
import Data.List (intercalate)
import Debug.Trace (trace)
import Shared
armAsmTests :: [FilePath] -> T.TestTree
armAsmTests = T.testGroup "ARM" . map mkTest
@ -51,7 +57,7 @@ instance (Read a) => Read (Hex a) where
readsPrec i s = [ (Hex a, s') | (a, s') <- readsPrec i s ]
-- | The type of expected results for test cases
data ExpectedResult =
data ExpectedResultFileData =
R { funcs :: [(Hex Word64, [(Hex Word64, Word64)])]
-- ^ The first element of the pair is the address of entry point
-- of the function. The list is a list of the addresses of the
@ -64,75 +70,61 @@ data ExpectedResult =
}
deriving (Read, Show, Eq)
type ExpectedResult = (M.Map (Hex Word64) (S.Set (Hex Word64, Word64)),
S.Set (Hex Word64))
data ExpectedException = BadExpectedFile String
deriving (Typeable, Show)
instance Exception ExpectedException
getExpected :: FilePath -> IO ExpectedResult
getExpected expectedFilename = do
expectedString <- readFile expectedFilename
case readMaybe expectedString of
-- Above: Read in the ExpectedResultFileData from the contents of the file
-- Nothing -> T.assertFailure ("Invalid expected result: " ++ show expectedString)
Nothing -> throwM $ BadExpectedFile ("Invalid expected spec: " ++ show expectedString)
Just er ->
let expectedEntries = M.fromList [ (entry, S.fromList starts) | (entry, starts) <- funcs er ]
-- expectedEntries maps function entry points to the set of block starts
-- within the function.
ignoredBlocks = S.fromList (ignoreBlocks er)
in return (expectedEntries, ignoredBlocks)
-- | Read in a test case from disk and output a test tree.
mkTest :: FilePath -> T.TestTree
mkTest fp = T.testCase fp $ withELF exeFilename (testDiscovery fp)
mkTest fp = T.testCase fp $ do x <- getExpected fp
withELF exeFilename $ testDiscovery x
where
asmFilename = dropExtension fp
exeFilename = replaceExtension asmFilename "exe"
showSegments :: (MM.MemWidth w) => MM.Memory w -> String
showSegments mem = intercalate "\n" $ map show (MM.memSegments mem)
testDiscovery :: FilePath -> E.Elf w -> IO ()
testDiscovery fname elf =
testDiscovery :: ExpectedResult -> E.Elf w -> IO ()
testDiscovery expres elf =
case E.elfClass elf of
E.ELFCLASS32 -> testDiscovery32 fname elf
E.ELFCLASS32 -> testDiscovery32 expres elf
E.ELFCLASS64 -> error "testDiscovery64 TBD"
-- | Run a test over a given expected result filename and the ELF file
-- associated with it
testDiscovery32 :: FilePath -> E.Elf 32 -> IO ()
testDiscovery32 expectedFilename elf =
testDiscovery32 :: ExpectedResult -> E.Elf 32 -> IO ()
testDiscovery32 (funcblocks, ignored) elf =
withMemory MM.Addr32 elf $ \mem -> do
let Just entryPoint = trace (showSegments mem) $ MM.asSegmentOff mem epinfo
epinfo = let e' = findEntryPoint elf mem in trace ("epinfo: " <> show e') e'
putStrLn $ "entryPoint:\n ep:\n e: " <> show entryPoint
T.assertBool ("step 1: " <> show entryPoint) $ "foo" == show entryPoint
-- tocBase = RO.tocBaseForELF (Proxy @PPC64.PPC) elf
-- otherEntryAddrs :: [MM.MemAddr 64]
-- otherEntryAddrs = E.tocEntryAddrsForElf (Proxy @PPC64.PPC) elf
-- otherEntries = mapMaybe (MM.asSegmentOff mem) otherEntryAddrs
-- di = MD.cfgFromAddrs (RO.ppc64_linux_info tocBase) mem MD.emptySymbolAddrMap (entryPoint:otherEntries) []
-- expectedString <- readFile expectedFilename
-- case readMaybe expectedString of
-- -- Above: Read in the ExpectedResult from the contents of the file
-- Nothing -> T.assertFailure ("Invalid expected result: " ++ show expectedString)
-- Just er -> do
-- let expectedEntries = M.fromList [ (entry, S.fromList starts) | (entry, starts) <- funcs er ]
-- -- expectedEntries maps function entry points to the set of block starts
-- -- within the function.
-- ignoredBlocks = S.fromList (ignoreBlocks er)
-- allFoundBlockAddrs :: S.Set Word64
-- allFoundBlockAddrs =
-- S.fromList [ fromIntegral (fromJust (MM.asAbsoluteAddr (MM.relativeSegmentAddr (MD.pblockAddr pbr))))
-- | PU.Some dfi <- M.elems (di ^. MD.funInfo)
-- , pbr <- M.elems (dfi ^. MD.parsedBlocks)
-- ]
-- -- Test that all discovered blocks were expected (and verify their sizes)
-- F.forM_ (M.elems (di ^. MD.funInfo)) $ \(PU.Some dfi) -> do
-- let actualEntry = fromIntegral (fromJust (MM.asAbsoluteAddr (MM.relativeSegmentAddr (MD.discoveredFunAddr dfi))))
-- actualBlockStarts = S.fromList [ (baddr, bsize)
-- | pbr <- M.elems (dfi ^. MD.parsedBlocks)
-- , trace ("Parsed Block: " ++ show pbr) True
-- , let baddr = fromIntegral (fromJust (MM.asAbsoluteAddr (MM.relativeSegmentAddr (MD.pblockAddr pbr))))
-- , let bsize = fromIntegral (MD.blockSize pbr)
-- ]
-- case (S.member actualEntry ignoredBlocks, M.lookup actualEntry expectedEntries) of
-- (True, _) -> return ()
-- (_, Nothing) -> T.assertFailure (printf "Unexpected block start: 0x%x" actualEntry)
-- (_, Just expectedBlockStarts) ->
-- T.assertEqual (printf "Block starts for 0x%x" actualEntry) expectedBlockStarts (actualBlockStarts `removeIgnored` ignoredBlocks)
let Just entryPoint = MM.asSegmentOff mem epinfo
epinfo = findEntryPoint elf mem
putStrLn $ "entryPoint: " <> show entryPoint
-- -- Test that all expected blocks were discovered
-- F.forM_ (funcs er) $ \(_funcAddr, blockAddrs) ->
-- F.forM_ blockAddrs $ \(blockAddr@(Hex addr), _) -> do
-- T.assertBool ("Missing block address: " ++ show blockAddr) (S.member addr allFoundBlockAddrs)
let mbAddrs :: Either String (M.Map
(MM.MemAddr 32)
(MA.AbsValue 32 (BVType 32)))
mbAddrs = ARMELF.parseELFInfo (Proxy @ARM.ARM) elf
removeIgnored :: (Ord b, Ord a) => S.Set (a, b) -> S.Set a -> S.Set (a, b)
removeIgnored actualBlockStarts ignoredBlocks =
let removeIfPresent v@(addr, _) acc = if S.member addr ignoredBlocks
then S.delete v acc
else acc
in F.foldr removeIfPresent actualBlockStarts actualBlockStarts
putStrLn $ "sections = " <> show mbAddrs <> "\n"
putStrLn $ "symbols = "
putDoc $ ARMELF.getELFSymbols elf
T.assertBool ("sections = " <> show mbAddrs) False

View File

@ -1,38 +1,4 @@
.cpu arm7tdmi
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 1
.eabi_attribute 30, 6
.eabi_attribute 34, 0
.eabi_attribute 18, 4
.file "test-just-exit.c"
.text
.align 2
.global _start
.syntax unified
.arm
.fpu softvfp
.type _start, %function
_start:
@ Function supports interworking.
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 1, uses_anonymous_args = 0
@ link register save eliminated.
str fp, [sp, #-4]!
add fp, sp, #0
.syntax divided
@ 4 "test-just-exit.c" 1
svc #0
@ 0 "" 2
.arm
.syntax unified
nop
add sp, fp, #0
@ sp needed
ldr fp, [sp], #4
bx lr
.size _start, .-_start
.ident "GCC: (GNU Tools for ARM Embedded Processors 6-2017-q2-update) 6.3.1 20170620 (release) [ARM/embedded-6-branch revision 249437]"
R { funcs = [(0x801c, [(0x801c, 20)])
]
, ignoreBlocks = [0x803c]
}