Add a draft of a generic binary loading interface, simplify TOC handling on PPC

The generic binary loading interface is instantiated once for each
architecture/binary container pair.  This isn't great, but there is enough
custom work in each setting to justify it.

The binary loading interface isn't finished yet, and needs to learn some
additional operations to support relocation.  It already supports additional
information that is architecture specific and binary container format
specific (that operations will have to use on a per-format basis).

On the PowerPC side, the Table of Contents (TOC) is now architecture-specific
information constructed by the loader (currently from ELF binaries).  The new
TOC data type is in place to support this more easily (the old format was just a
function).
This commit is contained in:
Tristan Ravitch 2018-05-17 16:03:04 -07:00
parent c943d45d21
commit 27810cdbf3
9 changed files with 240 additions and 52 deletions

View File

@ -28,10 +28,13 @@ library
Data.Macaw.PPC.Semantics.PPC64
Data.Macaw.PPC.Semantics.PPC32
Data.Macaw.PPC.Semantics.TH
Data.Macaw.PPC.Loader
other-modules: Data.Macaw.PPC.TOC
build-depends: base >=4.9 && <5,
bytestring,
containers,
constraints,
exceptions,
crucible,
text,
dismantle-tablegen,
@ -46,8 +49,7 @@ library
mtl,
parameterized-utils,
elf-edit,
template-haskell,
text
template-haskell
hs-source-dirs: src
default-language: Haskell2010
ghc-options: -Wall

View File

@ -19,18 +19,21 @@ module Data.Macaw.PPC (
A.PPCStmt(..),
A.PPCPrimFn(..),
-- * ELF support
tocBaseForELF,
tocEntryAddrsForElf
PL.PPCLoadException(..),
TOC.TOC,
TOC.lookupTOC,
TOC.lookupTOCAbs,
TOC.entryPoints,
BE.parseTOC,
BE.TOCException(..)
) where
import Data.Proxy ( Proxy(..) )
import qualified Data.Macaw.AbsDomain.AbsState as MA
import qualified Data.Macaw.Architecture.Info as MI
import Data.Macaw.CFG
import qualified Data.Macaw.CFG.DemandSet as MDS
import qualified Data.Macaw.Memory as MM
import Data.Macaw.Types ( BVType )
import qualified SemMC.Architecture.PPC32 as PPC32
import qualified SemMC.Architecture.PPC64 as PPC64
@ -52,11 +55,13 @@ import Data.Macaw.PPC.Arch ( rewriteTermStmt,
ppcPrimFnHasSideEffects,
PPCArchConstraints
)
import Data.Macaw.PPC.BinaryFormat.ELF ( tocBaseForELF, tocEntryAddrsForElf )
import qualified Data.Macaw.PPC.BinaryFormat.ELF as BE
import qualified Data.Macaw.PPC.Semantics.PPC32 as PPC32
import qualified Data.Macaw.PPC.Semantics.PPC64 as PPC64
import qualified Data.Macaw.PPC.PPCReg as R
import qualified Data.Macaw.PPC.Arch as A
import qualified Data.Macaw.PPC.Loader as PL
import qualified Data.Macaw.PPC.TOC as TOC
-- | The type tag for 64 bit PowerPC
type PPC64 = PPC64.PPC
@ -75,7 +80,8 @@ archDemandContext _ =
jumpTableEntrySize :: (PPCArchConstraints ppc) => proxy ppc -> MM.MemWord (ArchAddrWidth ppc)
jumpTableEntrySize _ = 4
ppc64_linux_info :: (ArchSegmentOff PPC64.PPC -> Maybe (MA.AbsValue 64 (BVType 64))) -> MI.ArchitectureInfo PPC64.PPC
ppc64_linux_info :: TOC.TOC PPC64.PPC
-> MI.ArchitectureInfo PPC64.PPC
ppc64_linux_info tocMap =
MI.ArchitectureInfo { MI.withArchConstraints = \x -> x
, MI.archAddrWidth = MM.Addr64
@ -97,7 +103,8 @@ ppc64_linux_info tocMap =
where
proxy = Proxy @PPC64.PPC
ppc32_linux_info :: (ArchSegmentOff PPC32.PPC -> Maybe (MA.AbsValue 32 (BVType 32))) -> MI.ArchitectureInfo PPC32.PPC
ppc32_linux_info :: TOC.TOC PPC32.PPC
-> MI.ArchitectureInfo PPC32.PPC
ppc32_linux_info tocMap =
MI.ArchitectureInfo { MI.withArchConstraints = \x -> x
, MI.archAddrWidth = MM.Addr32

View File

@ -2,24 +2,24 @@
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
module Data.Macaw.PPC.BinaryFormat.ELF (
tocBaseForELF,
tocEntryAddrsForElf
parseTOC,
TOCException(..)
) where
import GHC.TypeLits ( KnownNat, natVal )
import Control.Monad ( replicateM, unless )
import qualified Control.Monad.Catch as X
import qualified Data.ByteString.Char8 as C8
import qualified Data.Map.Strict as M
import Data.Proxy ( Proxy(..) )
import qualified Data.Serialize.Get as G
import qualified Data.Set as S
import qualified Data.Word.Indexed as W
import qualified Data.ElfEdit as E
import qualified Data.Macaw.AbsDomain.AbsState as MA
import qualified Data.Macaw.CFG as MC
import qualified Data.Macaw.Memory as MM
import Data.Macaw.Types ( BVType )
import qualified Data.Macaw.PPC.TOC as TOC
-- | Given an ELF file, extract a mapping from function entry points to the
-- value of the TOC pointer (which is to be stored in r2) for that function.
@ -32,44 +32,32 @@ import Data.Macaw.Types ( BVType )
-- stored in the @.opd@ section. Each entry is three pointers, where the first
-- entry is the function address and the second is the value of the TOC. The
-- third entry is unused in C programs (it is meant for Pascal).
tocBaseForELF :: (KnownNat (MC.RegAddrWidth (MC.ArchReg ppc)), MM.MemWidth (MC.RegAddrWidth (MC.ArchReg ppc)))
=> proxy ppc
-> E.Elf (MC.RegAddrWidth (MC.ArchReg ppc))
-> MC.ArchSegmentOff ppc
-> Maybe (MA.AbsValue (MC.RegAddrWidth (MC.ArchReg ppc)) (BVType (MC.RegAddrWidth (MC.ArchReg ppc))))
tocBaseForELF proxy e =
case parseTOC proxy e of
Left err -> error ("Error parsing .opd section: " ++ err)
Right res -> \entryAddr -> M.lookup (MM.relativeSegmentAddr entryAddr) res
tocEntryAddrsForElf :: (MM.MemWidth (MC.RegAddrWidth (MC.ArchReg ppc)),
KnownNat (MC.RegAddrWidth (MC.ArchReg ppc)))
=> proxy ppc
-> E.Elf (MC.RegAddrWidth (MC.ArchReg ppc))
-> [MM.MemAddr (MC.RegAddrWidth (MC.ArchReg ppc))]
tocEntryAddrsForElf proxy e =
case parseTOC proxy e of
Left err -> error ("Error parsing .opd section: " ++ err)
Right res -> M.keys res
parseTOC :: forall ppc proxy
. (KnownNat (MC.RegAddrWidth (MC.ArchReg ppc)), MM.MemWidth (MC.RegAddrWidth (MC.ArchReg ppc)))
=> proxy ppc
-> E.Elf (MC.RegAddrWidth (MC.ArchReg ppc))
-> Either String (M.Map (MM.MemAddr (MC.RegAddrWidth (MC.ArchReg ppc))) (MA.AbsValue (MC.RegAddrWidth (MC.ArchReg ppc)) (BVType (MC.RegAddrWidth (MC.ArchReg ppc)))))
parseTOC proxy e =
parseTOC :: forall ppc m
. (KnownNat (MC.ArchAddrWidth ppc),
MM.MemWidth (MC.ArchAddrWidth ppc),
X.MonadThrow m)
=> E.Elf (MC.ArchAddrWidth ppc)
-> m (TOC.TOC ppc)
parseTOC e =
case E.findSectionByName (C8.pack ".opd") e of
[sec] ->
G.runGet (parseFunctionDescriptors proxy (fromIntegral ptrSize)) (E.elfSectionData sec)
_ -> error "Could not find .opd section"
case G.runGet (parseFunctionDescriptors (Proxy @ppc) (fromIntegral ptrSize)) (E.elfSectionData sec) of
Left msg -> X.throwM (TOCParseError msg)
Right t -> return (TOC.toc t)
_ -> X.throwM (MissingTOCSection ".opd")
where
ptrSize = natVal (Proxy @(MC.RegAddrWidth (MC.ArchReg ppc)))
ptrSize = natVal (Proxy @(MC.ArchAddrWidth ppc))
data TOCException = MissingTOCSection String
| TOCParseError String
deriving (Show)
parseFunctionDescriptors :: (MM.MemWidth (MC.RegAddrWidth (MC.ArchReg ppc)))
instance X.Exception TOCException
parseFunctionDescriptors :: (KnownNat (MC.ArchAddrWidth ppc), MM.MemWidth (MC.ArchAddrWidth ppc))
=> proxy ppc
-> Int
-> G.Get (M.Map (MM.MemAddr (MC.RegAddrWidth (MC.ArchReg ppc))) (MA.AbsValue (MC.RegAddrWidth (MC.ArchReg ppc)) (BVType (MC.RegAddrWidth (MC.ArchReg ppc)))))
-> G.Get (M.Map (MM.MemAddr (MC.ArchAddrWidth ppc)) (W.W (MC.ArchAddrWidth ppc)))
parseFunctionDescriptors _ ptrSize = do
let recordBytes = (3 * ptrSize) `div` 8
let recordParser =
@ -83,12 +71,12 @@ parseFunctionDescriptors _ ptrSize = do
funcDescs <- replicateM (totalBytes `div` recordBytes) recordParser
return (M.fromList funcDescs)
getFunctionDescriptor :: (Integral a, MM.MemWidth w)
getFunctionDescriptor :: (KnownNat w, Integral a, MM.MemWidth w)
=> G.Get a
-> G.Get (MM.MemAddr w, MA.AbsValue w (BVType w))
-> G.Get (MM.MemAddr w, W.W w)
getFunctionDescriptor ptrParser = do
entryAddr <- ptrParser
tocAddr <- ptrParser
_ <- ptrParser
let mso = MM.absoluteAddr (fromIntegral entryAddr)
return (mso, MA.FinSet (S.singleton (fromIntegral tocAddr)))
return (mso, fromIntegral tocAddr)

View File

@ -19,7 +19,6 @@ import qualified Data.Set as S
import Data.Macaw.AbsDomain.AbsState as MA
import Data.Macaw.CFG
import Data.Macaw.Types ( BVType )
import qualified Data.Macaw.Memory as MM
import Data.Parameterized.Some ( Some(..) )
@ -28,6 +27,7 @@ import qualified Dismantle.PPC as D
import Data.Macaw.SemMC.Simplify ( simplifyValue )
import Data.Macaw.PPC.Arch
import Data.Macaw.PPC.PPCReg
import qualified Data.Macaw.PPC.TOC as TOC
preserveRegAcrossSyscall :: (ArchReg ppc ~ PPCReg ppc, 1 <= RegAddrWidth (PPCReg ppc))
=> proxy ppc
@ -77,12 +77,12 @@ postPPCTermStmtAbsState preservePred mem s0 regState stmt =
-- abstract return value.
mkInitialAbsState :: (PPCArchConstraints ppc)
=> proxy ppc
-> (ArchSegmentOff ppc -> Maybe (MA.AbsValue (RegAddrWidth (ArchReg ppc)) (BVType (RegAddrWidth (ArchReg ppc)))))
-> TOC.TOC ppc
-> MM.Memory (RegAddrWidth (ArchReg ppc))
-> ArchSegmentOff ppc
-> MA.AbsBlockState (ArchReg ppc)
mkInitialAbsState _ tocMap _mem startAddr =
case tocMap startAddr of
case TOC.lookupTOCAbs tocMap startAddr of
Just tocAddr -> s0 & MA.absRegState . boundValue (PPC_GP (D.GPR 2)) .~ tocAddr
Nothing -> s0
where

View File

@ -0,0 +1,65 @@
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FlexibleContexts #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Data.Macaw.PPC.Loader (
PPCLoadException(..)
) where
import GHC.TypeLits
import qualified Control.Exception as X
import qualified Data.ElfEdit as E
import qualified Data.Macaw.BinaryLoader as BL
import qualified Data.Macaw.CFG as MC
import qualified Data.Macaw.Memory.ElfLoader as EL
import qualified Data.Macaw.Memory.LoadCommon as LC
import Data.Macaw.PPC.PPCReg ()
import qualified SemMC.Architecture.PPC32 as PPC32
import qualified SemMC.Architecture.PPC64 as PPC64
import qualified Data.Macaw.PPC.BinaryFormat.ELF as BE
import qualified Data.Macaw.PPC.TOC as TOC
instance BL.BinaryLoader PPC32.PPC (E.Elf 32) where
type ArchBinaryData PPC32.PPC = TOC.TOC PPC32.PPC
type BinaryFormatData (E.Elf 32) = EL.SectionIndexMap 32
type Diagnostic (E.Elf 32) = EL.MemLoadWarning
loadBinary = loadPPCBinary
instance BL.BinaryLoader PPC64.PPC (E.Elf 64) where
type ArchBinaryData PPC64.PPC = TOC.TOC PPC64.PPC
type BinaryFormatData (E.Elf 64) = EL.SectionIndexMap 64
type Diagnostic (E.Elf 64) = EL.MemLoadWarning
loadBinary = loadPPCBinary
loadPPCBinary :: (w ~ MC.ArchAddrWidth ppc,
BL.ArchBinaryData ppc ~ TOC.TOC ppc,
BL.BinaryFormatData (E.Elf w) ~ EL.SectionIndexMap w,
BL.Diagnostic (E.Elf w) ~ EL.MemLoadWarning,
MC.MemWidth w,
KnownNat w)
=> LC.LoadOptions
-> E.Elf (MC.ArchAddrWidth ppc)
-> IO (BL.LoadedBinary ppc (E.Elf (MC.ArchAddrWidth ppc)))
loadPPCBinary lopts e = do
case EL.memoryForElf lopts e of
Left err -> X.throwIO (PPCElfLoadError err)
Right (sim, mem, warnings) ->
case BE.parseTOC e of
Left err -> X.throwIO (PPCTOCLoadError err)
Right toc ->
return BL.LoadedBinary { BL.memoryImage = mem
, BL.archBinaryData = toc
, BL.binaryFormatData = sim
, BL.loadDiagnostics = warnings
}
data PPCLoadException = PPCElfLoadError String
| PPCTOCLoadError X.SomeException
deriving (Show)
instance X.Exception PPCLoadException

View File

@ -0,0 +1,59 @@
{-# LANGUAGE FlexibleContexts #-}
module Data.Macaw.PPC.TOC (
TOC,
toc,
lookupTOC,
lookupTOCAbs,
entryPoints
) where
import qualified Data.Macaw.AbsDomain.AbsState as MA
import qualified Data.Macaw.CFG as MC
import qualified Data.Macaw.Types as MT
import qualified Data.Map.Strict as M
import qualified Data.Set as S
import qualified Data.Word.Indexed as W
-- | The Table of Contents (TOC) of a PowerPC binary
--
-- Note that different ABIs and container formats store the TOC in different
-- places in the binary; this abstraction only concerns itself with the values
-- in the TOC and not their representation on disk.
newtype TOC ppc =
TOC (M.Map (MC.MemAddr (MC.ArchAddrWidth ppc)) (W.W (MC.ArchAddrWidth ppc)))
-- (MA.AbsValue (MC.ArchAddrWidth ppc) (MT.BVType (MC.ArchAddrWidth ppc))))
toc :: M.Map (MC.MemAddr (MC.ArchAddrWidth ppc)) (W.W (MC.ArchAddrWidth ppc)) -- (MA.AbsValue (MC.ArchAddrWidth ppc) (MT.BVType (MC.ArchAddrWidth ppc)))
-> TOC ppc
toc = TOC
-- | Look up the value of the TOC base pointer for the function with the given address
--
-- A call
--
-- > lookupTOC toc addr
--
-- Returns the value of the TOC base pointer (i.e., the value in @r2@ when a
-- function begins executing) for the function whose entry point is at address
-- @addr@.
--
-- This variant returns a Macaw 'MA.AbsValue'
lookupTOCAbs :: (MC.MemWidth (MC.ArchAddrWidth ppc))
=> TOC ppc
-> MC.ArchSegmentOff ppc
-> Maybe (MA.AbsValue (MC.ArchAddrWidth ppc) (MT.BVType (MC.ArchAddrWidth ppc)))
lookupTOCAbs (TOC m) addr = (MA.FinSet . S.singleton . W.unW) <$> M.lookup (MC.relativeSegmentAddr addr) m
-- | Like 'lookupTOCAbs', but this variant returns a plain size-indexed word
lookupTOC :: (MC.MemWidth (MC.ArchAddrWidth ppc))
=> TOC ppc
-> MC.ArchSegmentOff ppc
-> Maybe (W.W (MC.ArchAddrWidth ppc))
lookupTOC (TOC m) addr = M.lookup (MC.relativeSegmentAddr addr) m
-- | Return the addresses of all of the functions present in the TOC
--
-- These addresses can be thought of as a root set of entry points in a binary
-- or library.
entryPoints :: TOC ppc -> [MC.MemAddr (MC.ArchAddrWidth ppc)]
entryPoints (TOC m) = M.keys m

View File

@ -22,6 +22,8 @@ library
Data.Macaw.SemMC.Translations
Data.Macaw.SemMC.TH
Data.Macaw.SemMC.TH.Monad
Data.Macaw.BinaryLoader
Data.Macaw.BinaryLoader.X86
-- other-extensions:
build-depends: base >=4.9 && <5,
async,
@ -35,7 +37,10 @@ library
crucible,
macaw-base,
dismantle-tablegen,
semmc
semmc,
-- Temporary
macaw-x86,
elf-edit
hs-source-dirs: src
default-language: Haskell2010
ghc-options: -Wall

View File

@ -0,0 +1,22 @@
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module Data.Macaw.BinaryLoader (
BinaryLoader(..),
LoadedBinary(..)
) where
import qualified Data.Macaw.CFG as MC
import qualified Data.Macaw.Memory.LoadCommon as LC
data LoadedBinary arch binImg =
LoadedBinary { memoryImage :: MC.Memory (MC.ArchAddrWidth arch)
, archBinaryData :: ArchBinaryData arch
, binaryFormatData :: BinaryFormatData binImg
, loadDiagnostics :: [Diagnostic binImg]
}
class BinaryLoader arch binImg where
type ArchBinaryData arch :: *
type BinaryFormatData binImg :: *
type Diagnostic binImg :: *
loadBinary :: LC.LoadOptions -> binImg -> IO (LoadedBinary arch binImg)

View File

@ -0,0 +1,40 @@
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE DataKinds #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Data.Macaw.BinaryLoader.X86 (
X86LoadException(..)
) where
import qualified Control.Exception as X
import qualified Data.ElfEdit as E
import qualified Data.Macaw.BinaryLoader as BL
import qualified Data.Macaw.Memory.ElfLoader as EL
import qualified Data.Macaw.Memory.LoadCommon as LC
import qualified Data.Macaw.X86 as MX
instance BL.BinaryLoader MX.X86_64 (E.Elf 64) where
type ArchBinaryData MX.X86_64 = ()
type BinaryFormatData (E.Elf 64) = EL.SectionIndexMap 64
type Diagnostic (E.Elf 64) = EL.MemLoadWarning
loadBinary = loadX86Binary
loadX86Binary :: LC.LoadOptions
-> E.Elf 64
-> IO (BL.LoadedBinary MX.X86_64 (E.Elf 64))
loadX86Binary lopts e = do
case EL.memoryForElf lopts e of
Left err -> X.throwIO (X86ElfLoadError err)
Right (sim, mem, warnings) ->
return BL.LoadedBinary { BL.memoryImage = mem
, BL.archBinaryData = ()
, BL.binaryFormatData = sim
, BL.loadDiagnostics = warnings
}
data X86LoadException = X86ElfLoadError String
deriving (Show)
instance X.Exception X86LoadException