From f7b87224a4f1a2810599d58d0c5174c2a2777da8 Mon Sep 17 00:00:00 2001 From: Tristan Ravitch Date: Thu, 16 Aug 2018 10:26:55 -0700 Subject: [PATCH] Update to the latest macaw In macaw core, the type of the arch-specific 'disassemble' function changed to no longer take a Memory, and to pass the maximum offset as an Int instead of a MemWord. It also removed the jump table entry size (which is no longer required). The removal of the Memory parameter required a bit of a change in how the instruction parsers are structured, but it isn't a huge change (the "memory contents after an address" can be computed from a MemSegmentOff, too). --- macaw-arm/src/Data/Macaw/ARM.hs | 1 - macaw-arm/src/Data/Macaw/ARM/Disassemble.hs | 56 ++++++++++--------- macaw-ppc/src/Data/Macaw/PPC.hs | 8 --- macaw-ppc/src/Data/Macaw/PPC/Disassemble.hs | 47 +++++++--------- macaw-semmc/src/Data/Macaw/SemMC/Generator.hs | 6 +- submodules/flexdis86 | 2 +- submodules/macaw | 2 +- 7 files changed, 54 insertions(+), 68 deletions(-) diff --git a/macaw-arm/src/Data/Macaw/ARM.hs b/macaw-arm/src/Data/Macaw/ARM.hs index 2e20bc74..7be75a46 100644 --- a/macaw-arm/src/Data/Macaw/ARM.hs +++ b/macaw-arm/src/Data/Macaw/ARM.hs @@ -36,7 +36,6 @@ arm_linux_info = MI.ArchitectureInfo { MI.withArchConstraints = \x -> x , MI.archAddrWidth = MM.Addr32 , MI.archEndianness = MM.LittleEndian - , MI.jumpTableEntrySize = 0 -- jumpTableEntrySize proxy , MI.disassembleFn = disassembleFn proxy ARMSem.execInstruction ThumbSem.execInstruction , MI.mkInitialAbsState = mkInitialAbsState proxy , MI.absEvalArchFn = absEvalArchFn proxy diff --git a/macaw-arm/src/Data/Macaw/ARM/Disassemble.hs b/macaw-arm/src/Data/Macaw/ARM/Disassemble.hs index 0ca92843..6c38e725 100644 --- a/macaw-arm/src/Data/Macaw/ARM/Disassemble.hs +++ b/macaw-arm/src/Data/Macaw/ARM/Disassemble.hs @@ -62,43 +62,40 @@ disassembleFn :: (ARMArchConstraints arm) -- ^ A function to look up the semantics for a T32 instruction. The -- lookup is provided with the value of the IP in case IP-relative -- addressing is necessary. - -> MM.Memory (ArchAddrWidth arm) - -- ^ The mapped memory space -> NC.NonceGenerator (ST s) ids -- ^ A generator of unique IDs used for assignments -> ArchSegmentOff arm -- ^ The address to disassemble from - -> ArchAddrWord arm + -> Int -- ^ Maximum size of the block (a safeguard) -> MA.AbsBlockState (ArchReg arm) -- ^ Abstract state of the processor at the start of the block - -> ST s ([Block arm ids], MM.MemWord (ArchAddrWidth arm), Maybe String) -disassembleFn _ lookupA32Semantics lookupT32Semantics mem nonceGen startAddr maxSize _ = do + -> ST s ([Block arm ids], Int, Maybe String) +disassembleFn _ lookupA32Semantics lookupT32Semantics nonceGen startAddr maxSize _ = do let lookupSemantics ipval instr = case instr of A32I inst -> lookupA32Semantics ipval inst T32I inst -> lookupT32Semantics ipval inst mr <- ET.runExceptT (unDisM (tryDisassembleBlock lookupSemantics - mem nonceGen startAddr maxSize)) + nonceGen startAddr maxSize)) case mr of Left (blocks, off, exn) -> return (blocks, off, Just (show exn)) Right (blocks, bytes) -> return (blocks, bytes, Nothing) tryDisassembleBlock :: (ARMArchConstraints arm) => (Value arm ids (BVType (ArchAddrWidth arm)) -> InstructionSet -> Maybe (Generator arm ids s ())) - -> MM.Memory (ArchAddrWidth arm) -> NC.NonceGenerator (ST s) ids -> ArchSegmentOff arm - -> ArchAddrWord arm - -> DisM arm ids s ([Block arm ids], MM.MemWord (ArchAddrWidth arm)) -tryDisassembleBlock lookupSemantics mem nonceGen startAddr maxSize = do - let gs0 = initGenState nonceGen mem startAddr (initRegState startAddr) + -> Int + -> DisM arm ids s ([Block arm ids], Int) +tryDisassembleBlock lookupSemantics nonceGen startAddr maxSize = do + let gs0 = initGenState nonceGen startAddr (initRegState startAddr) let startOffset = MM.msegOffset startAddr - (nextPCOffset, blocks) <- disassembleBlock lookupSemantics mem gs0 startAddr (startOffset + maxSize) + (nextPCOffset, blocks) <- disassembleBlock lookupSemantics gs0 startAddr (startOffset + fromIntegral maxSize) unless (nextPCOffset > startOffset) $ do let reason = InvalidNextPC (MM.absoluteAddr nextPCOffset) (MM.absoluteAddr startOffset) failAt gs0 nextPCOffset startAddr reason - return (F.toList (blocks ^. frontierBlocks), nextPCOffset - startOffset) + return (F.toList (blocks ^. frontierBlocks), fromIntegral (nextPCOffset - startOffset)) @@ -119,7 +116,6 @@ disassembleBlock :: forall arm ids s . ARMArchConstraints arm => (Value arm ids (BVType (ArchAddrWidth arm)) -> InstructionSet -> Maybe (Generator arm ids s ())) -- ^ A function to look up the semantics for an instruction that we disassemble - -> MM.Memory (ArchAddrWidth arm) -> GenState arm ids s -> MM.MemSegmentOff (ArchAddrWidth arm) -- ^ The current instruction pointer @@ -128,10 +124,10 @@ disassembleBlock :: forall arm ids s -- disassemble to; in principle, macaw can tell us to limit our -- search with this. -> DisM arm ids s (MM.MemWord (ArchAddrWidth arm), BlockSeq arm ids) -disassembleBlock lookupSemantics mem gs curPCAddr maxOffset = do +disassembleBlock lookupSemantics gs curPCAddr maxOffset = do let seg = MM.msegSegment curPCAddr let off = MM.msegOffset curPCAddr - case readInstruction mem curPCAddr of + case readInstruction curPCAddr of Left err -> failAt gs off curPCAddr (DecodeError err) Right (_, 0) -> failAt gs off curPCAddr (InvalidNextPC (MM.relativeSegmentAddr curPCAddr) (MM.relativeSegmentAddr curPCAddr)) Right (i, bytesRead) -> do @@ -175,16 +171,15 @@ disassembleBlock lookupSemantics mem gs curPCAddr maxOffset = do , Just simplifiedIP <- simplifyValue v , simplifiedIP == nextPCVal , nextPCOffset < maxOffset - , Just nextPCSegAddr <- MM.asSegmentOff mem nextPC -> do + , Just nextPCSegAddr <- MM.incSegmentOff curPCAddr (fromIntegral bytesRead) -> do let preBlock' = (pBlockState . curIP .~ simplifiedIP) preBlock let gs2 = GenState { assignIdGen = assignIdGen gs , _blockSeq = resBlockSeq gs1 , _blockState = preBlock' , genAddr = nextPCSegAddr - , genMemory = mem , genRegUpdates = MapF.empty } - disassembleBlock lookupSemantics mem gs2 nextPCSegAddr maxOffset + disassembleBlock lookupSemantics gs2 nextPCSegAddr maxOffset _ -> return (nextPCOffset, finishBlock FetchAndExecute gs1) @@ -194,10 +189,10 @@ disassembleBlock lookupSemantics mem gs curPCAddr maxOffset = do -- -- This code assumes that the 'MM.ByteRegion' is maximal; that is, that there -- are no byte regions that could be coalesced. -readInstruction :: MM.Memory w - -> MM.MemSegmentOff w +readInstruction :: (MM.MemWidth w) + => MM.MemSegmentOff w -> Either (ARMMemoryError w) (InstructionSet, MM.MemWord w) -readInstruction mem addr = MM.addrWidthClass (MM.memAddrWidth mem) $ do +readInstruction addr = do let seg = MM.msegSegment addr segRelAddrRaw = MM.relativeSegmentAddr addr -- Addresses specified in ARM instructions have the low bit @@ -209,7 +204,9 @@ readInstruction mem addr = MM.addrWidthClass (MM.memAddrWidth mem) $ do segRelAddr = segRelAddrRaw { addrOffset = MM.addrOffset segRelAddrRaw `xor` loBit } if MM.segmentFlags seg `MMP.hasPerm` MMP.execute then do - contents <- liftMemError $ MM.addrContentsAfter mem segRelAddr + let ao = addrOffset segRelAddr + alignedMsegOff <- liftMaybe (ARMInvalidInstructionAddress seg ao) (MM.resolveSegmentOff seg ao) + contents <- liftMemError $ MM.contentsAfterSegmentOff alignedMsegOff case contents of [] -> ET.throwError $ ARMMemoryError (MM.AccessViolation segRelAddr) MM.BSSRegion {} : _ -> @@ -232,6 +229,12 @@ readInstruction mem addr = MM.addrWidthClass (MM.memAddrWidth mem) $ do Nothing -> ET.throwError $ ARMInvalidInstruction segRelAddr contents else ET.throwError $ ARMMemoryError (MM.PermissionsError segRelAddr) +liftMaybe :: ARMMemoryError w -> Maybe a -> Either (ARMMemoryError w) a +liftMaybe err ma = + case ma of + Just a -> Right a + Nothing -> Left err + liftMemError :: Either (MM.MemoryError w) a -> Either (ARMMemoryError w) a liftMemError e = case e of @@ -242,6 +245,7 @@ liftMemError e = -- invalid instructions. data ARMMemoryError w = ARMInvalidInstruction !(MM.MemAddr w) [MM.SegmentRange w] | ARMMemoryError !(MM.MemoryError w) + | ARMInvalidInstructionAddress !(MM.MemSegment w) !(MM.MemWord w) deriving (Show) -- | Examine a value and see if it is a mux; if it is, break the mux up and @@ -261,7 +265,7 @@ matchConditionalBranch v = type LocatedError ppc ids = ([Block ppc ids] - , MM.MemWord (ArchAddrWidth ppc) + , Int , TranslationError (ArchAddrWidth ppc)) -- | This is a monad for error handling during disassembly @@ -280,7 +284,7 @@ newtype DisM ppc ids s a = DisM { unDisM :: ET.ExceptT (LocatedError ppc ids) (S -- -- We also can't derive this instance because of that restriction (but deriving -- silently fails). -instance (w ~ ArchAddrWidth ppc) => ET.MonadError ([Block ppc ids], MM.MemWord w, TranslationError w) (DisM ppc ids s) where +instance (w ~ ArchAddrWidth ppc) => ET.MonadError ([Block ppc ids], Int, TranslationError w) (DisM ppc ids s) where throwError e = DisM (ET.throwError e) catchError a hdlr = do r <- liftST $ ET.runExceptT (unDisM a) @@ -332,4 +336,4 @@ failAt gs offset curPCAddr reason = do let b = finishBlock' (gs ^. blockState) term let res = _blockSeq gs & frontierBlocks %~ (Seq.|> b) let res' = F.toList (res ^. frontierBlocks) - ET.throwError (res', offset, exn) + ET.throwError (res', fromIntegral offset, exn) diff --git a/macaw-ppc/src/Data/Macaw/PPC.hs b/macaw-ppc/src/Data/Macaw/PPC.hs index 98a0bb8b..7d18a345 100644 --- a/macaw-ppc/src/Data/Macaw/PPC.hs +++ b/macaw-ppc/src/Data/Macaw/PPC.hs @@ -31,7 +31,6 @@ module Data.Macaw.PPC ( import Data.Proxy ( Proxy(..) ) import qualified Data.Macaw.Architecture.Info as MI -import Data.Macaw.CFG import qualified Data.Macaw.CFG.DemandSet as MDS import qualified Data.Macaw.Memory as MM @@ -75,18 +74,12 @@ archDemandContext _ = , MDS.archFnHasSideEffects = ppcPrimFnHasSideEffects } --- | NOTE: There isn't necessarily one answer for this. This will need to turn --- into a function. With PIC jump tables, it can be smaller than the native size. -jumpTableEntrySize :: (PPCArchConstraints ppc) => proxy ppc -> MM.MemWord (ArchAddrWidth ppc) -jumpTableEntrySize _ = 4 - ppc64_linux_info :: TOC.TOC PPC64.PPC -> MI.ArchitectureInfo PPC64.PPC ppc64_linux_info tocMap = MI.ArchitectureInfo { MI.withArchConstraints = \x -> x , MI.archAddrWidth = MM.Addr64 , MI.archEndianness = MM.BigEndian - , MI.jumpTableEntrySize = jumpTableEntrySize proxy , MI.disassembleFn = disassembleFn proxy PPC64.execInstruction , MI.mkInitialAbsState = mkInitialAbsState proxy tocMap , MI.absEvalArchFn = absEvalArchFn proxy @@ -109,7 +102,6 @@ ppc32_linux_info tocMap = MI.ArchitectureInfo { MI.withArchConstraints = \x -> x , MI.archAddrWidth = MM.Addr32 , MI.archEndianness = MM.BigEndian - , MI.jumpTableEntrySize = jumpTableEntrySize proxy , MI.disassembleFn = disassembleFn proxy PPC32.execInstruction , MI.mkInitialAbsState = mkInitialAbsState proxy tocMap , MI.absEvalArchFn = absEvalArchFn proxy diff --git a/macaw-ppc/src/Data/Macaw/PPC/Disassemble.hs b/macaw-ppc/src/Data/Macaw/PPC/Disassemble.hs index 10236abd..af7a2076 100644 --- a/macaw-ppc/src/Data/Macaw/PPC/Disassemble.hs +++ b/macaw-ppc/src/Data/Macaw/PPC/Disassemble.hs @@ -48,15 +48,15 @@ import Data.Macaw.PPC.PPCReg -- -- This code assumes that the 'MM.ByteRegion' is maximal; that is, that there -- are no byte regions that could be coalesced. -readInstruction :: MM.Memory w - -> MM.MemSegmentOff w +readInstruction :: (MM.MemWidth w) + => MM.MemSegmentOff w -> Either (PPCMemoryError w) (D.Instruction, MM.MemWord w) -readInstruction mem addr = MM.addrWidthClass (MM.memAddrWidth mem) $ do +readInstruction addr = do let seg = MM.msegSegment addr case MM.segmentFlags seg `MMP.hasPerm` MMP.execute of False -> ET.throwError (PPCMemoryError (MM.PermissionsError (MM.relativeSegmentAddr addr))) True -> do - contents <- liftMemError $ MM.addrContentsAfter mem (MM.relativeSegmentAddr addr) + contents <- liftMemError $ MM.contentsAfterSegmentOff addr case contents of [] -> ET.throwError (PPCMemoryError (MM.AccessViolation (MM.relativeSegmentAddr addr))) MM.RelocationRegion r : _ -> @@ -104,7 +104,6 @@ disassembleBlock :: forall ppc ids s . PPCArchConstraints ppc => (Value ppc ids (BVType (ArchAddrWidth ppc)) -> D.Instruction -> Maybe (Generator ppc ids s ())) -- ^ A function to look up the semantics for an instruction that we disassemble - -> MM.Memory (ArchAddrWidth ppc) -> GenState ppc ids s -> MM.MemSegmentOff (ArchAddrWidth ppc) -- ^ The current instruction pointer @@ -113,10 +112,10 @@ disassembleBlock :: forall ppc ids s -- disassemble to; in principle, macaw can tell us to limit our -- search with this. -> DisM ppc ids s (MM.MemWord (ArchAddrWidth ppc), BlockSeq ppc ids) -disassembleBlock lookupSemantics mem gs curIPAddr maxOffset = do +disassembleBlock lookupSemantics gs curIPAddr maxOffset = do let seg = MM.msegSegment curIPAddr let off = MM.msegOffset curIPAddr - case readInstruction mem curIPAddr of + case readInstruction curIPAddr of Left err -> failAt gs off curIPAddr (DecodeError err) Right (i, bytesRead) -> do -- traceM ("II: " ++ show i) @@ -158,16 +157,15 @@ disassembleBlock lookupSemantics mem gs curIPAddr maxOffset = do , Just simplifiedIP <- simplifyValue v , simplifiedIP == nextIPVal , nextIPOffset < maxOffset - , Just nextIPSegAddr <- MM.asSegmentOff mem nextIP -> do + , Just nextIPSegAddr <- MM.incSegmentOff curIPAddr (fromIntegral bytesRead) -> do let preBlock' = (pBlockState . curIP .~ simplifiedIP) preBlock let gs2 = GenState { assignIdGen = assignIdGen gs , _blockSeq = resBlockSeq gs1 , _blockState = preBlock' , genAddr = nextIPSegAddr - , genMemory = mem , genRegUpdates = MapF.empty } - disassembleBlock lookupSemantics mem gs2 nextIPSegAddr maxOffset + disassembleBlock lookupSemantics gs2 nextIPSegAddr maxOffset _ -> return (nextIPOffset, finishBlock FetchAndExecute gs1) @@ -186,19 +184,18 @@ matchConditionalBranch v = tryDisassembleBlock :: (PPCArchConstraints ppc) => (Value ppc ids (BVType (ArchAddrWidth ppc)) -> D.Instruction -> Maybe (Generator ppc ids s ())) - -> MM.Memory (ArchAddrWidth ppc) -> NC.NonceGenerator (ST s) ids -> ArchSegmentOff ppc - -> ArchAddrWord ppc - -> DisM ppc ids s ([Block ppc ids], MM.MemWord (ArchAddrWidth ppc)) -tryDisassembleBlock lookupSemantics mem nonceGen startAddr maxSize = do - let gs0 = initGenState nonceGen mem startAddr (initRegState startAddr) + -> Int + -> DisM ppc ids s ([Block ppc ids], Int) +tryDisassembleBlock lookupSemantics nonceGen startAddr maxSize = do + let gs0 = initGenState nonceGen startAddr (initRegState startAddr) let startOffset = MM.msegOffset startAddr - (nextIPOffset, blocks) <- disassembleBlock lookupSemantics mem gs0 startAddr (startOffset + maxSize) + (nextIPOffset, blocks) <- disassembleBlock lookupSemantics gs0 startAddr (startOffset + fromIntegral maxSize) unless (nextIPOffset > startOffset) $ do let reason = InvalidNextIP (fromIntegral nextIPOffset) (fromIntegral startOffset) failAt gs0 nextIPOffset startAddr reason - return (F.toList (blocks ^. frontierBlocks), nextIPOffset - startOffset) + return (F.toList (blocks ^. frontierBlocks), fromIntegral (nextIPOffset - startOffset)) -- | Disassemble a block from the given start address (which points into the -- 'MM.Memory'). @@ -211,24 +208,22 @@ disassembleFn :: (PPCArchConstraints ppc) -- ^ A function to look up the semantics for an instruction. The -- lookup is provided with the value of the IP in case IP-relative -- addressing is necessary. - -> MM.Memory (ArchAddrWidth ppc) - -- ^ The mapped memory space -> NC.NonceGenerator (ST s) ids -- ^ A generator of unique IDs used for assignments -> ArchSegmentOff ppc -- ^ The address to disassemble from - -> ArchAddrWord ppc + -> Int -- ^ Maximum size of the block (a safeguard) -> MA.AbsBlockState (ArchReg ppc) -- ^ Abstract state of the processor at the start of the block - -> ST s ([Block ppc ids], MM.MemWord (ArchAddrWidth ppc), Maybe String) -disassembleFn _ lookupSemantics mem nonceGen startAddr maxSize _ = do - mr <- ET.runExceptT (unDisM (tryDisassembleBlock lookupSemantics mem nonceGen startAddr maxSize)) + -> ST s ([Block ppc ids], Int, Maybe String) +disassembleFn _ lookupSemantics nonceGen startAddr maxSize _ = do + mr <- ET.runExceptT (unDisM (tryDisassembleBlock lookupSemantics nonceGen startAddr maxSize)) case mr of Left (blocks, off, exn) -> return (blocks, off, Just (show exn)) Right (blocks, bytes) -> return (blocks, bytes, Nothing) -type LocatedError ppc ids = ([Block ppc ids], MM.MemWord (ArchAddrWidth ppc), TranslationError (ArchAddrWidth ppc)) +type LocatedError ppc ids = ([Block ppc ids], Int, TranslationError (ArchAddrWidth ppc)) -- | This is a monad for error handling during disassembly -- -- It allows for early failure that reports progress (in the form of blocks @@ -247,7 +242,7 @@ newtype DisM ppc ids s a = DisM { unDisM :: ET.ExceptT (LocatedError ppc ids) (S -- -- We also can't derive this instance because of that restriction (but deriving -- silently fails). -instance (w ~ ArchAddrWidth ppc) => ET.MonadError ([Block ppc ids], MM.MemWord w, TranslationError w) (DisM ppc ids s) where +instance (w ~ ArchAddrWidth ppc) => ET.MonadError ([Block ppc ids], Int, TranslationError w) (DisM ppc ids s) where throwError e = DisM (ET.throwError e) catchError a hdlr = do r <- liftST $ ET.runExceptT (unDisM a) @@ -297,4 +292,4 @@ failAt gs offset curIPAddr reason = do let b = finishBlock' (gs ^. blockState) term let res = _blockSeq gs & frontierBlocks %~ (Seq.|> b) let res' = F.toList (res ^. frontierBlocks) - ET.throwError (res', offset, exn) + ET.throwError (res', fromIntegral offset, exn) diff --git a/macaw-semmc/src/Data/Macaw/SemMC/Generator.hs b/macaw-semmc/src/Data/Macaw/SemMC/Generator.hs index 214bde90..30abdd6d 100644 --- a/macaw-semmc/src/Data/Macaw/SemMC/Generator.hs +++ b/macaw-semmc/src/Data/Macaw/SemMC/Generator.hs @@ -118,7 +118,6 @@ data GenState arch ids s = , _blockSeq :: !(BlockSeq arch ids) , _blockState :: !(PreBlock arch ids) , genAddr :: MM.MemSegmentOff (ArchAddrWidth arch) - , genMemory :: MM.Memory (ArchAddrWidth arch) , genRegUpdates :: MapF.MapF (ArchReg arch) (Value arch ids) } @@ -134,16 +133,14 @@ emptyPreBlock s0 idx addr = } initGenState :: NC.NonceGenerator (ST s) ids - -> MM.Memory (ArchAddrWidth arch) -> MM.MemSegmentOff (ArchAddrWidth arch) -> RegState (ArchReg arch) (Value arch ids) -> GenState arch ids s -initGenState nonceGen mem addr st = +initGenState nonceGen addr st = GenState { assignIdGen = nonceGen , _blockSeq = BlockSeq { _nextBlockID = 1, _frontierBlocks = Seq.empty } , _blockState = emptyPreBlock st 0 addr , genAddr = addr - , genMemory = mem , genRegUpdates = MapF.empty } @@ -408,7 +405,6 @@ conditionalBranch condExpr t f = } , _blockState = emptyPreBlock st f_block_label (genAddr s0) , genAddr = genAddr s0 - , genMemory = genMemory s0 , genRegUpdates = genRegUpdates s0 } f_seq <- finishBlock FetchAndExecute <$> runGenerator c s2 f diff --git a/submodules/flexdis86 b/submodules/flexdis86 index 9304269a..1a162453 160000 --- a/submodules/flexdis86 +++ b/submodules/flexdis86 @@ -1 +1 @@ -Subproject commit 9304269acef1c82e9ae3676649574e371ac54c36 +Subproject commit 1a162453c818d401d0cedce5764adeafb4a092c7 diff --git a/submodules/macaw b/submodules/macaw index 9876c2af..64d71737 160000 --- a/submodules/macaw +++ b/submodules/macaw @@ -1 +1 @@ -Subproject commit 9876c2aff8fb550be24214a8edd870d81a540127 +Subproject commit 64d71737af104f614ccd51d81a164b8036b4b3e7