mirror of
https://github.com/GaloisInc/macaw.git
synced 2024-12-24 22:53:43 +03:00
Added ARM Macaw disassembly functionality.
This commit is contained in:
parent
716ae2a28f
commit
b533f4a92a
@ -16,6 +16,7 @@ library
|
||||
exposed-modules: Data.Macaw.ARM
|
||||
, Data.Macaw.ARM.Arch
|
||||
, Data.Macaw.ARM.ARMReg
|
||||
, Data.Macaw.ARM.Disassemble
|
||||
, Data.Macaw.ARM.Eval
|
||||
, Data.Macaw.ARM.BinaryFormat.ELF
|
||||
, Data.Macaw.ARM.Semantics.ARMSemantics
|
||||
@ -31,8 +32,10 @@ library
|
||||
, lens
|
||||
, macaw-base
|
||||
, macaw-semmc
|
||||
, mtl
|
||||
, parameterized-utils
|
||||
, semmc-arm
|
||||
, text
|
||||
, vector
|
||||
-- build-tools: arm-none-eabi-gcc
|
||||
hs-source-dirs: src
|
||||
|
@ -12,7 +12,10 @@ module Data.Macaw.ARM
|
||||
)
|
||||
where
|
||||
|
||||
|
||||
import Data.Macaw.ARM.Disassemble ( disassembleFn )
|
||||
import Data.Macaw.ARM.Eval
|
||||
import qualified Data.Macaw.ARM.Semantics.ARMSemantics as ARMSem
|
||||
import qualified Data.Macaw.Architecture.Info as MI
|
||||
import qualified Data.Macaw.Memory as MM
|
||||
import Data.Proxy ( Proxy(..) )
|
||||
@ -29,7 +32,7 @@ arm_linux_info =
|
||||
, MI.archAddrWidth = MM.Addr32
|
||||
, MI.archEndianness = MM.LittleEndian
|
||||
, MI.jumpTableEntrySize = 0 -- undefined -- jumpTableEntrySize proxy
|
||||
, MI.disassembleFn = undefined -- disassembleFn proxy ARMSem.execInstruction
|
||||
, MI.disassembleFn = disassembleFn proxy ARMSem.execInstruction
|
||||
, MI.mkInitialAbsState = mkInitialAbsState proxy
|
||||
, MI.absEvalArchFn = undefined -- absEvalArchFn proxy
|
||||
, MI.absEvalArchStmt = undefined -- absEvalArchStmt proxy
|
||||
|
292
macaw-arm/src/Data/Macaw/ARM/Disassemble.hs
Normal file
292
macaw-arm/src/Data/Macaw/ARM/Disassemble.hs
Normal file
@ -0,0 +1,292 @@
|
||||
{-# LANGUAGE DeriveFunctor #-}
|
||||
{-# LANGUAGE FlexibleContexts #-}
|
||||
{-# LANGUAGE FlexibleInstances #-}
|
||||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||
{-# LANGUAGE ScopedTypeVariables #-}
|
||||
{-# LANGUAGE StandaloneDeriving #-}
|
||||
{-# LANGUAGE TypeFamilies #-}
|
||||
{-# LANGUAGE TypeApplications #-}
|
||||
{-# LANGUAGE UndecidableInstances #-}
|
||||
|
||||
module Data.Macaw.ARM.Disassemble
|
||||
( disassembleFn
|
||||
)
|
||||
where
|
||||
|
||||
import Control.Lens ( (&), (^.), (%~), (.~) )
|
||||
import Control.Monad ( unless )
|
||||
import qualified Control.Monad.Except as ET
|
||||
import Control.Monad.ST ( ST )
|
||||
import Control.Monad.Trans ( lift )
|
||||
import qualified Data.ByteString as BS
|
||||
import qualified Data.ByteString.Lazy as LBS
|
||||
import qualified Data.Foldable as F
|
||||
import Data.Macaw.ARM.ARMReg
|
||||
import Data.Macaw.ARM.Arch ( ARMArchConstraints )
|
||||
import Data.Macaw.AbsDomain.AbsState as MA
|
||||
import Data.Macaw.CFG
|
||||
import Data.Macaw.CFG.Block
|
||||
import qualified Data.Macaw.CFG.Core as MC
|
||||
import qualified Data.Macaw.Memory as MM
|
||||
import qualified Data.Macaw.Memory.Permissions as MMP
|
||||
import Data.Macaw.SemMC.Generator
|
||||
import Data.Macaw.SemMC.Simplify ( simplifyValue )
|
||||
import Data.Macaw.Types -- ( BVType, BoolType )
|
||||
import Data.Maybe ( fromMaybe )
|
||||
import qualified Data.Parameterized.Nonce as NC
|
||||
import Data.Proxy ( Proxy(..) )
|
||||
import qualified Data.Sequence as Seq
|
||||
import qualified Data.Text as T
|
||||
import Data.Word ( Word64 )
|
||||
import qualified Dismantle.ARM as D
|
||||
import Text.Printf ( printf )
|
||||
|
||||
|
||||
-- | Disassemble a block from the given start address (which points into the
|
||||
-- 'MM.Memory').
|
||||
--
|
||||
-- Return a list of disassembled blocks as well as the total number of bytes
|
||||
-- occupied by those blocks.
|
||||
disassembleFn :: (ARMArchConstraints arm)
|
||||
=> proxy arm
|
||||
-> (Value arm ids (BVType (ArchAddrWidth arm)) -> D.Instruction -> Maybe (Generator arm ids s ()))
|
||||
-- ^ A function to look up the semantics for an instruction. The
|
||||
-- lookup is provided with the value of the IP in case IP-relative
|
||||
-- addressing is necessary.
|
||||
-> MM.Memory (ArchAddrWidth arm)
|
||||
-- ^ The mapped memory space
|
||||
-> NC.NonceGenerator (ST s) ids
|
||||
-- ^ A generator of unique IDs used for assignments
|
||||
-> ArchSegmentOff arm
|
||||
-- ^ The address to disassemble from
|
||||
-> ArchAddrWord arm
|
||||
-- ^ Maximum size of the block (a safeguard)
|
||||
-> MA.AbsBlockState (ArchReg arm)
|
||||
-- ^ Abstract state of the processor at the start of the block
|
||||
-> ST s ([Block arm ids], MM.MemWord (ArchAddrWidth arm), Maybe String)
|
||||
disassembleFn _ lookupSemantics mem nonceGen startAddr maxSize _ = do
|
||||
mr <- ET.runExceptT (unDisM (tryDisassembleBlock lookupSemantics mem nonceGen startAddr maxSize))
|
||||
case mr of
|
||||
Left (blocks, off, exn) -> return (blocks, off, Just (show exn))
|
||||
Right (blocks, bytes) -> return (blocks, bytes, Nothing)
|
||||
|
||||
tryDisassembleBlock :: (ARMArchConstraints arm)
|
||||
=> (Value arm ids (BVType (ArchAddrWidth arm)) -> D.Instruction -> Maybe (Generator arm ids s ()))
|
||||
-> MM.Memory (ArchAddrWidth arm)
|
||||
-> NC.NonceGenerator (ST s) ids
|
||||
-> ArchSegmentOff arm
|
||||
-> ArchAddrWord arm
|
||||
-> DisM arm ids s ([Block arm ids], MM.MemWord (ArchAddrWidth arm))
|
||||
tryDisassembleBlock lookupSemantics mem nonceGen startAddr maxSize = do
|
||||
let gs0 = initGenState nonceGen mem startAddr (initRegState startAddr)
|
||||
let startOffset = MM.msegOffset startAddr
|
||||
(nextIPOffset, blocks) <- disassembleBlock lookupSemantics mem gs0 startAddr (startOffset + maxSize)
|
||||
unless (nextIPOffset > startOffset) $ do
|
||||
let reason = InvalidNextIP (fromIntegral nextIPOffset) (fromIntegral startOffset)
|
||||
failAt gs0 nextIPOffset startAddr reason
|
||||
return (F.toList (blocks ^. frontierBlocks), nextIPOffset - startOffset)
|
||||
|
||||
|
||||
|
||||
-- | Disassemble an instruction and terminate the current block if we run out of
|
||||
-- instructions to disassemble. We can run out if:
|
||||
--
|
||||
-- 1) We exceed the offset that macaw has told us to disassemble to
|
||||
--
|
||||
-- 2) We can't decode the IP (i.e., it isn't a constant)
|
||||
--
|
||||
-- 3) The IP after executing the semantics transformer is not equal to the
|
||||
-- expected next IP value, which indicates a jump to another block or
|
||||
-- function
|
||||
--
|
||||
-- In most of those cases, we end the block with a simple terminator. If the IP
|
||||
-- becomes a mux, we split execution using 'conditionalBranch'.
|
||||
disassembleBlock :: forall arm ids s
|
||||
. ARMArchConstraints arm
|
||||
=> (Value arm ids (BVType (ArchAddrWidth arm)) -> D.Instruction -> Maybe (Generator arm ids s ()))
|
||||
-- ^ A function to look up the semantics for an instruction that we disassemble
|
||||
-> MM.Memory (ArchAddrWidth arm)
|
||||
-> GenState arm ids s
|
||||
-> MM.MemSegmentOff (ArchAddrWidth arm)
|
||||
-- ^ The current instruction pointer
|
||||
-> MM.MemWord (ArchAddrWidth arm)
|
||||
-- ^ The maximum offset into the bytestring that we should
|
||||
-- disassemble to; in principle, macaw can tell us to limit our
|
||||
-- search with this.
|
||||
-> DisM arm ids s (MM.MemWord (ArchAddrWidth arm), BlockSeq arm ids)
|
||||
disassembleBlock lookupSemantics mem gs curIPAddr maxOffset = do
|
||||
let seg = MM.msegSegment curIPAddr
|
||||
let off = MM.msegOffset curIPAddr
|
||||
case readInstruction mem curIPAddr of
|
||||
Left err -> failAt gs off curIPAddr (DecodeError err)
|
||||
Right (i, bytesRead) -> do
|
||||
-- traceM ("II: " ++ show i)
|
||||
let nextIPOffset = off + bytesRead
|
||||
nextIP = MM.relativeAddr seg nextIPOffset
|
||||
-- nextIPVal = MC.RelocatableValue (pointerNatRepr (Proxy @arm)) nextIP
|
||||
-- nextIPVal = MC.RelocatableValue (knownNat :: (BVType (ArchAddrWidth arm))) nextIP
|
||||
nextIPVal = MC.RelocatableValue (knownNat :: NatRepr (ArchAddrWidth arm)) nextIP
|
||||
-- Note: In ARM, the IP is incremented *after* an instruction
|
||||
-- executes; pass in the -- physical address of the instruction here.
|
||||
ipVal <- case MM.asAbsoluteAddr (MM.relativeSegmentAddr curIPAddr) of
|
||||
Nothing -> failAt gs off curIPAddr (InstructionAtUnmappedAddr i)
|
||||
Just addr -> return (BVValue (knownNat :: NatRepr (ArchAddrWidth arm)) (fromIntegral addr))
|
||||
case lookupSemantics ipVal i of
|
||||
Nothing -> failAt gs off curIPAddr (UnsupportedInstruction i)
|
||||
Just transformer -> do
|
||||
-- Once we have the semantics for the instruction (represented by a
|
||||
-- state transformer), we apply the state transformer and then extract
|
||||
-- a result from the state of the 'Generator'.
|
||||
egs1 <- liftST $ ET.runExceptT (runGenerator genResult gs $ do
|
||||
let lineStr = printf "%s: %s" (show curIPAddr) (show (D.ppInstruction i))
|
||||
addStmt (Comment (T.pack lineStr))
|
||||
transformer
|
||||
|
||||
-- Check to see if the IP has become conditionally-defined (by e.g.,
|
||||
-- a mux). If it has, we need to split execution using a primitive
|
||||
-- provided by the Generator monad.
|
||||
nextIPExpr <- getRegValue ARM_IP
|
||||
case matchConditionalBranch nextIPExpr of
|
||||
Just (cond, t_ip, f_ip) ->
|
||||
conditionalBranch cond (setRegVal ARM_IP t_ip) (setRegVal ARM_IP f_ip)
|
||||
Nothing -> return ())
|
||||
case egs1 of
|
||||
Left genErr -> failAt gs off curIPAddr (GenerationError i genErr)
|
||||
Right gs1 -> do
|
||||
case resState gs1 of
|
||||
Just preBlock
|
||||
| Seq.null (resBlockSeq gs1 ^. frontierBlocks)
|
||||
, v <- preBlock ^. (pBlockState . curIP)
|
||||
, Just simplifiedIP <- simplifyValue v
|
||||
, simplifiedIP == nextIPVal
|
||||
, nextIPOffset < maxOffset
|
||||
, Just nextIPSegAddr <- MM.asSegmentOff mem nextIP -> do
|
||||
let preBlock' = (pBlockState . curIP .~ simplifiedIP) preBlock
|
||||
let gs2 = GenState { assignIdGen = assignIdGen gs
|
||||
, _blockSeq = resBlockSeq gs1
|
||||
, _blockState = preBlock'
|
||||
, genAddr = nextIPSegAddr
|
||||
, genMemory = mem
|
||||
}
|
||||
disassembleBlock lookupSemantics mem gs2 nextIPSegAddr maxOffset
|
||||
|
||||
_ -> return (nextIPOffset, finishBlock FetchAndExecute gs1)
|
||||
|
||||
-- | Read one instruction from the 'MM.Memory' at the given segmented offset.
|
||||
--
|
||||
-- Returns the instruction and number of bytes consumed /or/ an error.
|
||||
--
|
||||
-- This code assumes that the 'MM.ByteRegion' is maximal; that is, that there
|
||||
-- are no byte regions that could be coalesced.
|
||||
readInstruction :: MM.Memory w
|
||||
-> MM.MemSegmentOff w
|
||||
-> Either (MM.MemoryError w) (D.Instruction, MM.MemWord w)
|
||||
readInstruction mem addr = MM.addrWidthClass (MM.memAddrWidth mem) $ do
|
||||
let seg = MM.msegSegment addr
|
||||
case MM.segmentFlags seg `MMP.hasPerm` MMP.execute of
|
||||
False -> ET.throwError (MM.PermissionsError (MM.relativeSegmentAddr addr))
|
||||
True -> do
|
||||
contents <- MM.addrContentsAfter mem (MM.relativeSegmentAddr addr)
|
||||
case contents of
|
||||
[] -> ET.throwError (MM.AccessViolation (MM.relativeSegmentAddr addr))
|
||||
MM.SymbolicRef {} : _ ->
|
||||
ET.throwError (MM.UnexpectedRelocation (MM.relativeSegmentAddr addr))
|
||||
MM.ByteRegion bs : _rest
|
||||
| BS.null bs -> ET.throwError (MM.AccessViolation (MM.relativeSegmentAddr addr))
|
||||
| otherwise -> do
|
||||
-- FIXME: Having to wrap the bytestring in a lazy wrapper is
|
||||
-- unpleasant. We could alter the disassembler to consume strict
|
||||
-- bytestrings, at the cost of possibly making it less efficient for
|
||||
-- other clients.
|
||||
let (bytesRead, minsn) = D.disassembleInstruction (LBS.fromStrict bs)
|
||||
case minsn of
|
||||
Just insn -> return (insn, fromIntegral bytesRead)
|
||||
Nothing -> ET.throwError (MM.InvalidInstruction (MM.relativeSegmentAddr addr) contents)
|
||||
|
||||
-- | Examine a value and see if it is a mux; if it is, break the mux up and
|
||||
-- return its component values (the condition and two alternatives)
|
||||
matchConditionalBranch :: (ARMArchConstraints arch)
|
||||
=> Value arch ids tp
|
||||
-> Maybe (Value arch ids BoolType, Value arch ids tp, Value arch ids tp)
|
||||
matchConditionalBranch v =
|
||||
case v of
|
||||
AssignedValue (Assignment { assignRhs = EvalApp a }) ->
|
||||
case a of
|
||||
Mux _rep cond t f -> Just (cond, fromMaybe t (simplifyValue t), fromMaybe f (simplifyValue f))
|
||||
_ -> Nothing
|
||||
_ -> Nothing
|
||||
|
||||
|
||||
type LocatedError ppc ids = ([Block ppc ids], MM.MemWord (ArchAddrWidth ppc), TranslationError (ArchAddrWidth ppc))
|
||||
-- | This is a monad for error handling during disassembly
|
||||
--
|
||||
-- It allows for early failure that reports progress (in the form of blocks
|
||||
-- discovered and the latest address examined) along with a reason for failure
|
||||
-- (a 'TranslationError').
|
||||
newtype DisM ppc ids s a = DisM { unDisM :: ET.ExceptT (LocatedError ppc ids) (ST s) a }
|
||||
deriving (Functor,
|
||||
Applicative,
|
||||
Monad)
|
||||
|
||||
-- | This funny instance is required because GHC doesn't allow type function
|
||||
-- applications in instance heads, so we factor the type functions out into a
|
||||
-- constraint on a fresh variable. See
|
||||
--
|
||||
-- > https://stackoverflow.com/questions/45360959/illegal-type-synonym-family-application-in-instance-with-functional-dependency
|
||||
--
|
||||
-- We also can't derive this instance because of that restriction (but deriving
|
||||
-- silently fails).
|
||||
instance (w ~ ArchAddrWidth ppc) => ET.MonadError ([Block ppc ids], MM.MemWord w, TranslationError w) (DisM ppc ids s) where
|
||||
throwError e = DisM (ET.throwError e)
|
||||
catchError a hdlr = do
|
||||
r <- liftST $ ET.runExceptT (unDisM a)
|
||||
case r of
|
||||
Left l -> do
|
||||
r' <- liftST $ ET.runExceptT (unDisM (hdlr l))
|
||||
case r' of
|
||||
Left e -> DisM (ET.throwError e)
|
||||
Right res -> return res
|
||||
Right res -> return res
|
||||
|
||||
|
||||
data TranslationError w = TranslationError { transErrorAddr :: MM.MemSegmentOff w
|
||||
, transErrorReason :: TranslationErrorReason w
|
||||
}
|
||||
|
||||
data TranslationErrorReason w = InvalidNextIP Word64 Word64
|
||||
| DecodeError (MM.MemoryError w)
|
||||
| UnsupportedInstruction D.Instruction
|
||||
| InstructionAtUnmappedAddr D.Instruction
|
||||
| GenerationError D.Instruction GeneratorError
|
||||
deriving (Show)
|
||||
|
||||
deriving instance (MM.MemWidth w) => Show (TranslationError w)
|
||||
|
||||
liftST :: ST s a -> DisM arm ids s a
|
||||
liftST = DisM . lift
|
||||
|
||||
|
||||
-- | Early failure for 'DisM'. This is a wrapper around 'ET.throwError' that
|
||||
-- computes the current progress alongside the reason for the failure.
|
||||
--
|
||||
-- Note that the 'TranslateError' below is a block terminator marker that notes
|
||||
-- that translation of the basic block resulted in an error (with the exception
|
||||
-- string stored as its argument). This allows macaw to carry errors in the
|
||||
-- instruction stream, which is useful for debugging and diagnostics.
|
||||
failAt :: forall arm ids s a
|
||||
. (ArchReg arm ~ ARMReg, MM.MemWidth (ArchAddrWidth arm))
|
||||
=> GenState arm ids s
|
||||
-> MM.MemWord (ArchAddrWidth arm)
|
||||
-> MM.MemSegmentOff (ArchAddrWidth arm)
|
||||
-> TranslationErrorReason (ArchAddrWidth arm)
|
||||
-> DisM arm ids s a
|
||||
failAt gs offset curIPAddr reason = do
|
||||
let exn = TranslationError { transErrorAddr = curIPAddr
|
||||
, transErrorReason = reason
|
||||
}
|
||||
let term = (`TranslateError` T.pack (show exn))
|
||||
let b = finishBlock' (gs ^. blockState) term
|
||||
let res = _blockSeq gs & frontierBlocks %~ (Seq.|> b)
|
||||
let res' = F.toList (res ^. frontierBlocks)
|
||||
ET.throwError (res', offset, exn)
|
@ -10,7 +10,9 @@ import qualified Data.Macaw.Types as MT
|
||||
import SemMC.ARM ( ARM, Instruction )
|
||||
import Data.Macaw.SemMC.Generator ( Generator )
|
||||
-- import SemMC.Architecture.ARM.Opcodes ( allSemantics, allOpcodeInfo )
|
||||
import qualified Dismantle.ARM as D
|
||||
|
||||
execInstruction :: MC.Value ARM ids (MT.BVType 32) -> Instruction -> Maybe (Generator ARM ids s ())
|
||||
|
||||
execInstruction :: MC.Value ARM ids (MT.BVType 32) -> D.Instruction -> Maybe (Generator ARM ids s ())
|
||||
execInstruction = undefined
|
||||
-- execInstruction = $(genExecInstruction (Proxy @ARM) (locToRegTH (Proxy @ARM)) armNonceAppEval armAppEvaluator 'armInstructionMatcher allSemantics allOpcodeInfo)
|
||||
|
Loading…
Reference in New Issue
Block a user