Update to the latest macaw

In macaw core, the type of the arch-specific 'disassemble' function changed to
no longer take a Memory, and to pass the maximum offset as an Int instead of a
MemWord.  It also removed the jump table entry size (which is no longer
required).

The removal of the Memory parameter required a bit of a change in how the
instruction parsers are structured, but it isn't a huge change (the "memory
contents after an address" can be computed from a MemSegmentOff, too).
This commit is contained in:
Tristan Ravitch 2018-08-16 10:26:55 -07:00
parent d5cab147e5
commit f7b87224a4
7 changed files with 54 additions and 68 deletions

View File

@ -36,7 +36,6 @@ arm_linux_info =
MI.ArchitectureInfo { MI.withArchConstraints = \x -> x
, MI.archAddrWidth = MM.Addr32
, MI.archEndianness = MM.LittleEndian
, MI.jumpTableEntrySize = 0 -- jumpTableEntrySize proxy
, MI.disassembleFn = disassembleFn proxy ARMSem.execInstruction ThumbSem.execInstruction
, MI.mkInitialAbsState = mkInitialAbsState proxy
, MI.absEvalArchFn = absEvalArchFn proxy

View File

@ -62,43 +62,40 @@ disassembleFn :: (ARMArchConstraints arm)
-- ^ A function to look up the semantics for a T32 instruction. The
-- lookup is provided with the value of the IP in case IP-relative
-- addressing is necessary.
-> MM.Memory (ArchAddrWidth arm)
-- ^ The mapped memory space
-> NC.NonceGenerator (ST s) ids
-- ^ A generator of unique IDs used for assignments
-> ArchSegmentOff arm
-- ^ The address to disassemble from
-> ArchAddrWord arm
-> Int
-- ^ Maximum size of the block (a safeguard)
-> MA.AbsBlockState (ArchReg arm)
-- ^ Abstract state of the processor at the start of the block
-> ST s ([Block arm ids], MM.MemWord (ArchAddrWidth arm), Maybe String)
disassembleFn _ lookupA32Semantics lookupT32Semantics mem nonceGen startAddr maxSize _ = do
-> ST s ([Block arm ids], Int, Maybe String)
disassembleFn _ lookupA32Semantics lookupT32Semantics nonceGen startAddr maxSize _ = do
let lookupSemantics ipval instr = case instr of
A32I inst -> lookupA32Semantics ipval inst
T32I inst -> lookupT32Semantics ipval inst
mr <- ET.runExceptT (unDisM (tryDisassembleBlock
lookupSemantics
mem nonceGen startAddr maxSize))
nonceGen startAddr maxSize))
case mr of
Left (blocks, off, exn) -> return (blocks, off, Just (show exn))
Right (blocks, bytes) -> return (blocks, bytes, Nothing)
tryDisassembleBlock :: (ARMArchConstraints arm)
=> (Value arm ids (BVType (ArchAddrWidth arm)) -> InstructionSet -> Maybe (Generator arm ids s ()))
-> MM.Memory (ArchAddrWidth arm)
-> NC.NonceGenerator (ST s) ids
-> ArchSegmentOff arm
-> ArchAddrWord arm
-> DisM arm ids s ([Block arm ids], MM.MemWord (ArchAddrWidth arm))
tryDisassembleBlock lookupSemantics mem nonceGen startAddr maxSize = do
let gs0 = initGenState nonceGen mem startAddr (initRegState startAddr)
-> Int
-> DisM arm ids s ([Block arm ids], Int)
tryDisassembleBlock lookupSemantics nonceGen startAddr maxSize = do
let gs0 = initGenState nonceGen startAddr (initRegState startAddr)
let startOffset = MM.msegOffset startAddr
(nextPCOffset, blocks) <- disassembleBlock lookupSemantics mem gs0 startAddr (startOffset + maxSize)
(nextPCOffset, blocks) <- disassembleBlock lookupSemantics gs0 startAddr (startOffset + fromIntegral maxSize)
unless (nextPCOffset > startOffset) $ do
let reason = InvalidNextPC (MM.absoluteAddr nextPCOffset) (MM.absoluteAddr startOffset)
failAt gs0 nextPCOffset startAddr reason
return (F.toList (blocks ^. frontierBlocks), nextPCOffset - startOffset)
return (F.toList (blocks ^. frontierBlocks), fromIntegral (nextPCOffset - startOffset))
@ -119,7 +116,6 @@ disassembleBlock :: forall arm ids s
. ARMArchConstraints arm
=> (Value arm ids (BVType (ArchAddrWidth arm)) -> InstructionSet -> Maybe (Generator arm ids s ()))
-- ^ A function to look up the semantics for an instruction that we disassemble
-> MM.Memory (ArchAddrWidth arm)
-> GenState arm ids s
-> MM.MemSegmentOff (ArchAddrWidth arm)
-- ^ The current instruction pointer
@ -128,10 +124,10 @@ disassembleBlock :: forall arm ids s
-- disassemble to; in principle, macaw can tell us to limit our
-- search with this.
-> DisM arm ids s (MM.MemWord (ArchAddrWidth arm), BlockSeq arm ids)
disassembleBlock lookupSemantics mem gs curPCAddr maxOffset = do
disassembleBlock lookupSemantics gs curPCAddr maxOffset = do
let seg = MM.msegSegment curPCAddr
let off = MM.msegOffset curPCAddr
case readInstruction mem curPCAddr of
case readInstruction curPCAddr of
Left err -> failAt gs off curPCAddr (DecodeError err)
Right (_, 0) -> failAt gs off curPCAddr (InvalidNextPC (MM.relativeSegmentAddr curPCAddr) (MM.relativeSegmentAddr curPCAddr))
Right (i, bytesRead) -> do
@ -175,16 +171,15 @@ disassembleBlock lookupSemantics mem gs curPCAddr maxOffset = do
, Just simplifiedIP <- simplifyValue v
, simplifiedIP == nextPCVal
, nextPCOffset < maxOffset
, Just nextPCSegAddr <- MM.asSegmentOff mem nextPC -> do
, Just nextPCSegAddr <- MM.incSegmentOff curPCAddr (fromIntegral bytesRead) -> do
let preBlock' = (pBlockState . curIP .~ simplifiedIP) preBlock
let gs2 = GenState { assignIdGen = assignIdGen gs
, _blockSeq = resBlockSeq gs1
, _blockState = preBlock'
, genAddr = nextPCSegAddr
, genMemory = mem
, genRegUpdates = MapF.empty
}
disassembleBlock lookupSemantics mem gs2 nextPCSegAddr maxOffset
disassembleBlock lookupSemantics gs2 nextPCSegAddr maxOffset
_ -> return (nextPCOffset, finishBlock FetchAndExecute gs1)
@ -194,10 +189,10 @@ disassembleBlock lookupSemantics mem gs curPCAddr maxOffset = do
--
-- This code assumes that the 'MM.ByteRegion' is maximal; that is, that there
-- are no byte regions that could be coalesced.
readInstruction :: MM.Memory w
-> MM.MemSegmentOff w
readInstruction :: (MM.MemWidth w)
=> MM.MemSegmentOff w
-> Either (ARMMemoryError w) (InstructionSet, MM.MemWord w)
readInstruction mem addr = MM.addrWidthClass (MM.memAddrWidth mem) $ do
readInstruction addr = do
let seg = MM.msegSegment addr
segRelAddrRaw = MM.relativeSegmentAddr addr
-- Addresses specified in ARM instructions have the low bit
@ -209,7 +204,9 @@ readInstruction mem addr = MM.addrWidthClass (MM.memAddrWidth mem) $ do
segRelAddr = segRelAddrRaw { addrOffset = MM.addrOffset segRelAddrRaw `xor` loBit }
if MM.segmentFlags seg `MMP.hasPerm` MMP.execute
then do
contents <- liftMemError $ MM.addrContentsAfter mem segRelAddr
let ao = addrOffset segRelAddr
alignedMsegOff <- liftMaybe (ARMInvalidInstructionAddress seg ao) (MM.resolveSegmentOff seg ao)
contents <- liftMemError $ MM.contentsAfterSegmentOff alignedMsegOff
case contents of
[] -> ET.throwError $ ARMMemoryError (MM.AccessViolation segRelAddr)
MM.BSSRegion {} : _ ->
@ -232,6 +229,12 @@ readInstruction mem addr = MM.addrWidthClass (MM.memAddrWidth mem) $ do
Nothing -> ET.throwError $ ARMInvalidInstruction segRelAddr contents
else ET.throwError $ ARMMemoryError (MM.PermissionsError segRelAddr)
liftMaybe :: ARMMemoryError w -> Maybe a -> Either (ARMMemoryError w) a
liftMaybe err ma =
case ma of
Just a -> Right a
Nothing -> Left err
liftMemError :: Either (MM.MemoryError w) a -> Either (ARMMemoryError w) a
liftMemError e =
case e of
@ -242,6 +245,7 @@ liftMemError e =
-- invalid instructions.
data ARMMemoryError w = ARMInvalidInstruction !(MM.MemAddr w) [MM.SegmentRange w]
| ARMMemoryError !(MM.MemoryError w)
| ARMInvalidInstructionAddress !(MM.MemSegment w) !(MM.MemWord w)
deriving (Show)
-- | Examine a value and see if it is a mux; if it is, break the mux up and
@ -261,7 +265,7 @@ matchConditionalBranch v =
type LocatedError ppc ids = ([Block ppc ids]
, MM.MemWord (ArchAddrWidth ppc)
, Int
, TranslationError (ArchAddrWidth ppc))
-- | This is a monad for error handling during disassembly
@ -280,7 +284,7 @@ newtype DisM ppc ids s a = DisM { unDisM :: ET.ExceptT (LocatedError ppc ids) (S
--
-- We also can't derive this instance because of that restriction (but deriving
-- silently fails).
instance (w ~ ArchAddrWidth ppc) => ET.MonadError ([Block ppc ids], MM.MemWord w, TranslationError w) (DisM ppc ids s) where
instance (w ~ ArchAddrWidth ppc) => ET.MonadError ([Block ppc ids], Int, TranslationError w) (DisM ppc ids s) where
throwError e = DisM (ET.throwError e)
catchError a hdlr = do
r <- liftST $ ET.runExceptT (unDisM a)
@ -332,4 +336,4 @@ failAt gs offset curPCAddr reason = do
let b = finishBlock' (gs ^. blockState) term
let res = _blockSeq gs & frontierBlocks %~ (Seq.|> b)
let res' = F.toList (res ^. frontierBlocks)
ET.throwError (res', offset, exn)
ET.throwError (res', fromIntegral offset, exn)

View File

@ -31,7 +31,6 @@ module Data.Macaw.PPC (
import Data.Proxy ( Proxy(..) )
import qualified Data.Macaw.Architecture.Info as MI
import Data.Macaw.CFG
import qualified Data.Macaw.CFG.DemandSet as MDS
import qualified Data.Macaw.Memory as MM
@ -75,18 +74,12 @@ archDemandContext _ =
, MDS.archFnHasSideEffects = ppcPrimFnHasSideEffects
}
-- | NOTE: There isn't necessarily one answer for this. This will need to turn
-- into a function. With PIC jump tables, it can be smaller than the native size.
jumpTableEntrySize :: (PPCArchConstraints ppc) => proxy ppc -> MM.MemWord (ArchAddrWidth ppc)
jumpTableEntrySize _ = 4
ppc64_linux_info :: TOC.TOC PPC64.PPC
-> MI.ArchitectureInfo PPC64.PPC
ppc64_linux_info tocMap =
MI.ArchitectureInfo { MI.withArchConstraints = \x -> x
, MI.archAddrWidth = MM.Addr64
, MI.archEndianness = MM.BigEndian
, MI.jumpTableEntrySize = jumpTableEntrySize proxy
, MI.disassembleFn = disassembleFn proxy PPC64.execInstruction
, MI.mkInitialAbsState = mkInitialAbsState proxy tocMap
, MI.absEvalArchFn = absEvalArchFn proxy
@ -109,7 +102,6 @@ ppc32_linux_info tocMap =
MI.ArchitectureInfo { MI.withArchConstraints = \x -> x
, MI.archAddrWidth = MM.Addr32
, MI.archEndianness = MM.BigEndian
, MI.jumpTableEntrySize = jumpTableEntrySize proxy
, MI.disassembleFn = disassembleFn proxy PPC32.execInstruction
, MI.mkInitialAbsState = mkInitialAbsState proxy tocMap
, MI.absEvalArchFn = absEvalArchFn proxy

View File

@ -48,15 +48,15 @@ import Data.Macaw.PPC.PPCReg
--
-- This code assumes that the 'MM.ByteRegion' is maximal; that is, that there
-- are no byte regions that could be coalesced.
readInstruction :: MM.Memory w
-> MM.MemSegmentOff w
readInstruction :: (MM.MemWidth w)
=> MM.MemSegmentOff w
-> Either (PPCMemoryError w) (D.Instruction, MM.MemWord w)
readInstruction mem addr = MM.addrWidthClass (MM.memAddrWidth mem) $ do
readInstruction addr = do
let seg = MM.msegSegment addr
case MM.segmentFlags seg `MMP.hasPerm` MMP.execute of
False -> ET.throwError (PPCMemoryError (MM.PermissionsError (MM.relativeSegmentAddr addr)))
True -> do
contents <- liftMemError $ MM.addrContentsAfter mem (MM.relativeSegmentAddr addr)
contents <- liftMemError $ MM.contentsAfterSegmentOff addr
case contents of
[] -> ET.throwError (PPCMemoryError (MM.AccessViolation (MM.relativeSegmentAddr addr)))
MM.RelocationRegion r : _ ->
@ -104,7 +104,6 @@ disassembleBlock :: forall ppc ids s
. PPCArchConstraints ppc
=> (Value ppc ids (BVType (ArchAddrWidth ppc)) -> D.Instruction -> Maybe (Generator ppc ids s ()))
-- ^ A function to look up the semantics for an instruction that we disassemble
-> MM.Memory (ArchAddrWidth ppc)
-> GenState ppc ids s
-> MM.MemSegmentOff (ArchAddrWidth ppc)
-- ^ The current instruction pointer
@ -113,10 +112,10 @@ disassembleBlock :: forall ppc ids s
-- disassemble to; in principle, macaw can tell us to limit our
-- search with this.
-> DisM ppc ids s (MM.MemWord (ArchAddrWidth ppc), BlockSeq ppc ids)
disassembleBlock lookupSemantics mem gs curIPAddr maxOffset = do
disassembleBlock lookupSemantics gs curIPAddr maxOffset = do
let seg = MM.msegSegment curIPAddr
let off = MM.msegOffset curIPAddr
case readInstruction mem curIPAddr of
case readInstruction curIPAddr of
Left err -> failAt gs off curIPAddr (DecodeError err)
Right (i, bytesRead) -> do
-- traceM ("II: " ++ show i)
@ -158,16 +157,15 @@ disassembleBlock lookupSemantics mem gs curIPAddr maxOffset = do
, Just simplifiedIP <- simplifyValue v
, simplifiedIP == nextIPVal
, nextIPOffset < maxOffset
, Just nextIPSegAddr <- MM.asSegmentOff mem nextIP -> do
, Just nextIPSegAddr <- MM.incSegmentOff curIPAddr (fromIntegral bytesRead) -> do
let preBlock' = (pBlockState . curIP .~ simplifiedIP) preBlock
let gs2 = GenState { assignIdGen = assignIdGen gs
, _blockSeq = resBlockSeq gs1
, _blockState = preBlock'
, genAddr = nextIPSegAddr
, genMemory = mem
, genRegUpdates = MapF.empty
}
disassembleBlock lookupSemantics mem gs2 nextIPSegAddr maxOffset
disassembleBlock lookupSemantics gs2 nextIPSegAddr maxOffset
_ -> return (nextIPOffset, finishBlock FetchAndExecute gs1)
@ -186,19 +184,18 @@ matchConditionalBranch v =
tryDisassembleBlock :: (PPCArchConstraints ppc)
=> (Value ppc ids (BVType (ArchAddrWidth ppc)) -> D.Instruction -> Maybe (Generator ppc ids s ()))
-> MM.Memory (ArchAddrWidth ppc)
-> NC.NonceGenerator (ST s) ids
-> ArchSegmentOff ppc
-> ArchAddrWord ppc
-> DisM ppc ids s ([Block ppc ids], MM.MemWord (ArchAddrWidth ppc))
tryDisassembleBlock lookupSemantics mem nonceGen startAddr maxSize = do
let gs0 = initGenState nonceGen mem startAddr (initRegState startAddr)
-> Int
-> DisM ppc ids s ([Block ppc ids], Int)
tryDisassembleBlock lookupSemantics nonceGen startAddr maxSize = do
let gs0 = initGenState nonceGen startAddr (initRegState startAddr)
let startOffset = MM.msegOffset startAddr
(nextIPOffset, blocks) <- disassembleBlock lookupSemantics mem gs0 startAddr (startOffset + maxSize)
(nextIPOffset, blocks) <- disassembleBlock lookupSemantics gs0 startAddr (startOffset + fromIntegral maxSize)
unless (nextIPOffset > startOffset) $ do
let reason = InvalidNextIP (fromIntegral nextIPOffset) (fromIntegral startOffset)
failAt gs0 nextIPOffset startAddr reason
return (F.toList (blocks ^. frontierBlocks), nextIPOffset - startOffset)
return (F.toList (blocks ^. frontierBlocks), fromIntegral (nextIPOffset - startOffset))
-- | Disassemble a block from the given start address (which points into the
-- 'MM.Memory').
@ -211,24 +208,22 @@ disassembleFn :: (PPCArchConstraints ppc)
-- ^ A function to look up the semantics for an instruction. The
-- lookup is provided with the value of the IP in case IP-relative
-- addressing is necessary.
-> MM.Memory (ArchAddrWidth ppc)
-- ^ The mapped memory space
-> NC.NonceGenerator (ST s) ids
-- ^ A generator of unique IDs used for assignments
-> ArchSegmentOff ppc
-- ^ The address to disassemble from
-> ArchAddrWord ppc
-> Int
-- ^ Maximum size of the block (a safeguard)
-> MA.AbsBlockState (ArchReg ppc)
-- ^ Abstract state of the processor at the start of the block
-> ST s ([Block ppc ids], MM.MemWord (ArchAddrWidth ppc), Maybe String)
disassembleFn _ lookupSemantics mem nonceGen startAddr maxSize _ = do
mr <- ET.runExceptT (unDisM (tryDisassembleBlock lookupSemantics mem nonceGen startAddr maxSize))
-> ST s ([Block ppc ids], Int, Maybe String)
disassembleFn _ lookupSemantics nonceGen startAddr maxSize _ = do
mr <- ET.runExceptT (unDisM (tryDisassembleBlock lookupSemantics nonceGen startAddr maxSize))
case mr of
Left (blocks, off, exn) -> return (blocks, off, Just (show exn))
Right (blocks, bytes) -> return (blocks, bytes, Nothing)
type LocatedError ppc ids = ([Block ppc ids], MM.MemWord (ArchAddrWidth ppc), TranslationError (ArchAddrWidth ppc))
type LocatedError ppc ids = ([Block ppc ids], Int, TranslationError (ArchAddrWidth ppc))
-- | This is a monad for error handling during disassembly
--
-- It allows for early failure that reports progress (in the form of blocks
@ -247,7 +242,7 @@ newtype DisM ppc ids s a = DisM { unDisM :: ET.ExceptT (LocatedError ppc ids) (S
--
-- We also can't derive this instance because of that restriction (but deriving
-- silently fails).
instance (w ~ ArchAddrWidth ppc) => ET.MonadError ([Block ppc ids], MM.MemWord w, TranslationError w) (DisM ppc ids s) where
instance (w ~ ArchAddrWidth ppc) => ET.MonadError ([Block ppc ids], Int, TranslationError w) (DisM ppc ids s) where
throwError e = DisM (ET.throwError e)
catchError a hdlr = do
r <- liftST $ ET.runExceptT (unDisM a)
@ -297,4 +292,4 @@ failAt gs offset curIPAddr reason = do
let b = finishBlock' (gs ^. blockState) term
let res = _blockSeq gs & frontierBlocks %~ (Seq.|> b)
let res' = F.toList (res ^. frontierBlocks)
ET.throwError (res', offset, exn)
ET.throwError (res', fromIntegral offset, exn)

View File

@ -118,7 +118,6 @@ data GenState arch ids s =
, _blockSeq :: !(BlockSeq arch ids)
, _blockState :: !(PreBlock arch ids)
, genAddr :: MM.MemSegmentOff (ArchAddrWidth arch)
, genMemory :: MM.Memory (ArchAddrWidth arch)
, genRegUpdates :: MapF.MapF (ArchReg arch) (Value arch ids)
}
@ -134,16 +133,14 @@ emptyPreBlock s0 idx addr =
}
initGenState :: NC.NonceGenerator (ST s) ids
-> MM.Memory (ArchAddrWidth arch)
-> MM.MemSegmentOff (ArchAddrWidth arch)
-> RegState (ArchReg arch) (Value arch ids)
-> GenState arch ids s
initGenState nonceGen mem addr st =
initGenState nonceGen addr st =
GenState { assignIdGen = nonceGen
, _blockSeq = BlockSeq { _nextBlockID = 1, _frontierBlocks = Seq.empty }
, _blockState = emptyPreBlock st 0 addr
, genAddr = addr
, genMemory = mem
, genRegUpdates = MapF.empty
}
@ -408,7 +405,6 @@ conditionalBranch condExpr t f =
}
, _blockState = emptyPreBlock st f_block_label (genAddr s0)
, genAddr = genAddr s0
, genMemory = genMemory s0
, genRegUpdates = genRegUpdates s0
}
f_seq <- finishBlock FetchAndExecute <$> runGenerator c s2 f

@ -1 +1 @@
Subproject commit 9304269acef1c82e9ae3676649574e371ac54c36
Subproject commit 1a162453c818d401d0cedce5764adeafb4a092c7

@ -1 +1 @@
Subproject commit 9876c2aff8fb550be24214a8edd870d81a540127
Subproject commit 64d71737af104f614ccd51d81a164b8036b4b3e7