From bb63f9f859640fd38399cc0d5395aa0caf725aaa Mon Sep 17 00:00:00 2001 From: Joe Hendrix Date: Mon, 12 Nov 2018 11:56:44 -0500 Subject: [PATCH 01/10] This fixes tail call detection, and allows architecture-specific checks. --- base/macaw-base.cabal | 2 +- base/src/Data/Macaw/Architecture/Info.hs | 19 +++++++++++-- base/src/Data/Macaw/CFG/Core.hs | 2 +- base/src/Data/Macaw/Discovery.hs | 34 ++++++++++-------------- base/src/Data/Macaw/Discovery/State.hs | 29 +++++++++++++++----- x86/macaw-x86.cabal | 2 +- x86/src/Data/Macaw/X86.hs | 13 +++++++++ 7 files changed, 69 insertions(+), 32 deletions(-) diff --git a/base/macaw-base.cabal b/base/macaw-base.cabal index 4c989ef0..92586520 100644 --- a/base/macaw-base.cabal +++ b/base/macaw-base.cabal @@ -1,5 +1,5 @@ name: macaw-base -version: 0.3.2 +version: 0.3.3 author: Galois, Inc. maintainer: jhendrix@galois.com build-type: Simple diff --git a/base/src/Data/Macaw/Architecture/Info.hs b/base/src/Data/Macaw/Architecture/Info.hs index e62391d9..e1d8e827 100644 --- a/base/src/Data/Macaw/Architecture/Info.hs +++ b/base/src/Data/Macaw/Architecture/Info.hs @@ -97,8 +97,23 @@ data ArchitectureInfo arch -- Given a memory state, list of statements, and final register -- state, the should determine if this is a call, and if so, -- return the statements with any action to push the return - -- value to the stack removed, and provide the explicit return - -- address that the function should return to. + -- value to the stack removed, and provide the return address that + -- the function should return to. + + , checkForReturnAddr :: forall ids + . RegState (ArchReg arch) (Value arch ids) + -> AbsProcessorState (ArchReg arch) ids + -> Bool + -- ^ @checkForReturnAddr regs s@ returns true if the location + -- where the return address is normally stored in regs when + -- calling a function does indeed contain the abstract value + -- associated with return addresses. + -- + -- For x86 this checks if the address just above the stack is the + -- return address. For ARM, this should check the link register. + -- + -- This predicate is invoked when considering if a potential tail call + -- is setup to return to the right location. , identifyReturn :: forall ids . [Stmt arch ids] -> RegState (ArchReg arch) (Value arch ids) diff --git a/base/src/Data/Macaw/CFG/Core.hs b/base/src/Data/Macaw/CFG/Core.hs index 8df22ec3..6769c5c3 100644 --- a/base/src/Data/Macaw/CFG/Core.hs +++ b/base/src/Data/Macaw/CFG/Core.hs @@ -693,7 +693,7 @@ data Stmt arch ids ppStmt :: ArchConstraints arch => (ArchAddrWord arch -> Doc) - -- ^ Function for pretty printing an offset + -- ^ Function for pretty printing an instruction address offset -> Stmt arch ids -> Doc ppStmt ppOff stmt = diff --git a/base/src/Data/Macaw/Discovery.hs b/base/src/Data/Macaw/Discovery.hs index 9c405a47..c61ee29f 100644 --- a/base/src/Data/Macaw/Discovery.hs +++ b/base/src/Data/Macaw/Discovery.hs @@ -18,6 +18,7 @@ module Data.Macaw.Discovery ( -- * DiscoveryInfo State.DiscoveryState(..) , State.emptyDiscoveryState + , State.trustedFunctionEntryPoints , State.AddrSymMap , State.funInfo , State.exploredFunctions @@ -844,6 +845,7 @@ parseFetchAndExecute ctx idx stmts regs s = do -- We define calls as statements that end with a write that -- stores the pc to an address. case () of + -- The block ends with a Mux, so we turn this into a `ParsedIte` statement. _ | Just (Mux _ c t f) <- valueAsApp (s^.boundValue ip_reg) -> do mapM_ (recordWriteStmt ainfo mem absProcState') stmts @@ -869,7 +871,7 @@ parseFetchAndExecute ctx idx stmts regs s = do } pure (ret, falseIdx) - -- The last statement was a call. + -- Use architecture-specific callback to check if last statement was a call. -- Note that in some cases the call is known not to return, and thus -- this code will never jump to the return value. _ | Just (prev_stmts, ret) <- identifyCall ainfo mem stmts s -> do @@ -913,7 +915,7 @@ parseFetchAndExecute ctx idx stmts regs s = do -- Jump to a block within this function. | Just tgt_mseg <- valueAsSegmentOff mem (s^.boundValue ip_reg) - -- Check + -- Check target block address is in executable segment. , segmentFlags (segoffSegment tgt_mseg) `Perm.hasPerm` Perm.execute -- Check the target address is not the entry point of this function. @@ -935,7 +937,8 @@ parseFetchAndExecute ctx idx stmts regs s = do , stmtsAbsState = absProcState' } pure (ret, idx+1) - -- Block ends with what looks like a jump table. + + -- Block ends with what looks like a jump table. | Just (_jt, entries, jumpIndex) <- matchJumpTableRef mem absProcState' (s^.curIP) -> do mapM_ (recordWriteStmt ainfo mem absProcState') stmts @@ -957,12 +960,13 @@ parseFetchAndExecute ctx idx stmts regs s = do } pure (ret,idx+1) - -- Check for tail call when the stack pointer points to the return address. - -- - -- TODO: this makes sense for x86, but is not correct for all architectures - | ptrType <- addrMemRepr ainfo - , sp_val <- s^.boundValue sp_reg - , ReturnAddr <- absEvalReadMem absProcState' sp_val ptrType -> do + -- Check for tail call when the calling convention seems to be satisfied. + | spVal <- s^.boundValue sp_reg + -- Check to see if the stack pointer points to an offset of the initial stack. + , StackOffset _ offsets <- transferValue absProcState' spVal + -- Stack stack is back to height when function was called. + , offsets == Set.singleton 0 + , checkForReturnAddr ainfo s absProcState' -> do finishWithTailCall absProcState' -- Is this a jump to a known function entry? We're already past the @@ -1097,19 +1101,9 @@ addBlocks src finfo sz blockMap = funAddr <- gets curFunAddr s <- use curFunCtx - -- Combine entries of functions we've discovered thus far with - -- undiscovered functions with entries marked InitAddr, which we assume is - -- info we know from the symbol table or some other reliable source, and - -- pass in. Only used in analysis if pctxTrustKnownFns is True. - let knownFns = - if s^.trustKnownFns then - Set.union (Map.keysSet $ s^.funInfo) - (Map.keysSet $ Map.filter (== InitAddr) $ s^.unexploredFunctions) - else - Set.empty let ctx = ParseContext { pctxMemory = memory s , pctxArchInfo = archInfo s - , pctxKnownFnEntries = knownFns + , pctxKnownFnEntries = s^.trustedFunctionEntryPoints , pctxFunAddr = funAddr , pctxAddr = src , pctxBlockMap = blockMap diff --git a/base/src/Data/Macaw/Discovery/State.hs b/base/src/Data/Macaw/Discovery/State.hs index 887bcf56..157cc340 100644 --- a/base/src/Data/Macaw/Discovery/State.hs +++ b/base/src/Data/Macaw/Discovery/State.hs @@ -33,7 +33,7 @@ module Data.Macaw.Discovery.State , globalDataMap , funInfo , unexploredFunctions - , trustKnownFns + , trustedFunctionEntryPoints , exploreFnPred -- * DiscoveryFunInfo , DiscoveryFunInfo(..) @@ -51,6 +51,8 @@ import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.Parameterized.Classes import Data.Parameterized.Some +import Data.Set (Set) +import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as Text import qualified Data.Vector as V @@ -310,9 +312,20 @@ data DiscoveryState arch -- they are analyzed. -- -- The keys in this map and `_funInfo` should be mutually disjoint. - , _trustKnownFns :: !Bool - -- ^ Should we use and depend on known function entries in - -- our analysis? E.g. used to distinguish jumps vs. tail calls + , _trustedFunctionEntryPoints :: !(Set (ArchSegmentOff arch)) + -- ^ This is the set of addresses that we treat + -- as definitely belonging to function entry + -- points. + -- + -- The discovery process will not allow + -- intra-procedural jumps to these addresses. + -- Jumps to these addresses must either be calls + -- or tail calls. + -- + -- To ensure translation is invariant on the + -- order in which functions are visited, this + -- set should be initialized upfront, and not + -- changed. , _exploreFnPred :: Maybe (ArchSegmentOff arch -> Bool) -- ^ if present, this predicate decides whether to explore -- a function at the given address or not @@ -350,7 +363,7 @@ emptyDiscoveryState mem symbols info = , _globalDataMap = Map.empty , _funInfo = Map.empty , _unexploredFunctions = Map.empty - , _trustKnownFns = False + , _trustedFunctionEntryPoints = Set.empty , _exploreFnPred = Nothing } @@ -368,8 +381,10 @@ unexploredFunctions = lens _unexploredFunctions (\s v -> s { _unexploredFunction funInfo :: Simple Lens (DiscoveryState arch) (Map (ArchSegmentOff arch) (Some (DiscoveryFunInfo arch))) funInfo = lens _funInfo (\s v -> s { _funInfo = v }) -trustKnownFns :: Simple Lens (DiscoveryState arch) Bool -trustKnownFns = lens _trustKnownFns (\s v -> s { _trustKnownFns = v }) +trustedFunctionEntryPoints :: Simple Lens (DiscoveryState arch) (Set (ArchSegmentOff arch)) +trustedFunctionEntryPoints = + lens _trustedFunctionEntryPoints + (\s v -> s { _trustedFunctionEntryPoints = v }) exploreFnPred :: Simple Lens (DiscoveryState arch) (Maybe (ArchSegmentOff arch -> Bool)) exploreFnPred = lens _exploreFnPred (\s v -> s { _exploreFnPred = v }) diff --git a/x86/macaw-x86.cabal b/x86/macaw-x86.cabal index a4904a7c..93109989 100644 --- a/x86/macaw-x86.cabal +++ b/x86/macaw-x86.cabal @@ -16,7 +16,7 @@ library containers, flexdis86 >= 0.1.2, lens >= 4.7, - macaw-base >= 0.3.2, + macaw-base >= 0.3.3, mtl, parameterized-utils, text, diff --git a/x86/src/Data/Macaw/X86.hs b/x86/src/Data/Macaw/X86.hs index 56fc90b2..6d19b71b 100644 --- a/x86/src/Data/Macaw/X86.hs +++ b/x86/src/Data/Macaw/X86.hs @@ -68,6 +68,7 @@ import Text.PrettyPrint.ANSI.Leijen (Pretty(..), text) import Data.Macaw.AbsDomain.AbsState ( AbsBlockState + , curAbsStack , setAbsIP , absRegState , StackEntry(..) @@ -503,6 +504,17 @@ identifyX86Call mem stmts0 s = go (Seq.fromList stmts0) Seq.empty -- Otherwise skip over this instruction. | otherwise -> go prev (stmt Seq.<| after) +-- | Return true if stack pointer has been reset to original value, and +-- return address is on top of stack. +checkForReturnAddrX86 :: forall ids + . AbsProcessorState X86Reg ids + -> Bool +checkForReturnAddrX86 absState + | Just (StackEntry _ ReturnAddr) <- Map.lookup 8 (absState^.curAbsStack) = + True + | otherwise = + False + -- | Called to determine if the instruction sequence contains a return -- from the current function. -- @@ -575,6 +587,7 @@ x86_64_info preservePred = , absEvalArchStmt = \s _ -> s , postCallAbsState = x86PostCallAbsState , identifyCall = identifyX86Call + , checkForReturnAddr = \_ s -> checkForReturnAddrX86 s , identifyReturn = identifyX86Return , rewriteArchFn = rewriteX86PrimFn , rewriteArchStmt = rewriteX86Stmt From 23fe50bd45f8e13b27324b048880b7d715ef18ab Mon Sep 17 00:00:00 2001 From: Joe Hendrix Date: Mon, 12 Nov 2018 15:28:32 -0500 Subject: [PATCH 02/10] Fix stack offset. --- x86/src/Data/Macaw/X86.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x86/src/Data/Macaw/X86.hs b/x86/src/Data/Macaw/X86.hs index 6d19b71b..7ab0975a 100644 --- a/x86/src/Data/Macaw/X86.hs +++ b/x86/src/Data/Macaw/X86.hs @@ -510,7 +510,7 @@ checkForReturnAddrX86 :: forall ids . AbsProcessorState X86Reg ids -> Bool checkForReturnAddrX86 absState - | Just (StackEntry _ ReturnAddr) <- Map.lookup 8 (absState^.curAbsStack) = + | Just (StackEntry _ ReturnAddr) <- Map.lookup 0 (absState^.curAbsStack) = True | otherwise = False From c4b7252c77bd2f81b8d9a3bfc1dd58747dafd246 Mon Sep 17 00:00:00 2001 From: Joe Hendrix Date: Fri, 16 Nov 2018 13:40:40 -0500 Subject: [PATCH 03/10] Add specialized terminal statement for PLT stubs. --- base/macaw-base.cabal | 4 +- base/src/Data/Macaw/Analysis/FunctionArgs.hs | 230 +++++++++------ base/src/Data/Macaw/Architecture/Info.hs | 17 +- base/src/Data/Macaw/CFG/Core.hs | 36 +-- base/src/Data/Macaw/Discovery.hs | 292 +++++++++++++------ base/src/Data/Macaw/Discovery/AbsEval.hs | 5 +- base/src/Data/Macaw/Discovery/State.hs | 21 +- base/src/Data/Macaw/Fold.hs | 10 +- base/src/Data/Macaw/Memory.hs | 13 +- base/src/Data/Macaw/Memory/ElfLoader.hs | 11 +- x86/src/Data/Macaw/X86.hs | 102 ++++--- 11 files changed, 474 insertions(+), 267 deletions(-) diff --git a/base/macaw-base.cabal b/base/macaw-base.cabal index 92586520..29adf298 100644 --- a/base/macaw-base.cabal +++ b/base/macaw-base.cabal @@ -1,5 +1,5 @@ name: macaw-base -version: 0.3.3 +version: 0.3.4 author: Galois, Inc. maintainer: jhendrix@galois.com build-type: Simple @@ -39,7 +39,7 @@ library IntervalMap >= 0.5, lens >= 4.7, mtl, - parameterized-utils >= 1.0.1, + parameterized-utils >= 1.0.7, template-haskell, text, vector, diff --git a/base/src/Data/Macaw/Analysis/FunctionArgs.hs b/base/src/Data/Macaw/Analysis/FunctionArgs.hs index b0b4aa59..e79e5812 100644 --- a/base/src/Data/Macaw/Analysis/FunctionArgs.hs +++ b/base/src/Data/Macaw/Analysis/FunctionArgs.hs @@ -49,9 +49,15 @@ import Data.Macaw.CFG import Data.Macaw.CFG.BlockLabel import Data.Macaw.CFG.DemandSet import Data.Macaw.Discovery.State -import Data.Macaw.Fold import Data.Macaw.Types + +newtype Ap f a = Ap { getAp :: f a } + +instance (Applicative f, Monoid a) => Monoid (Ap f a) where + mempty = Ap $ pure mempty + mappend (Ap x) (Ap y) = Ap $ mappend <$> x <*> y + ------------------------------------------------------------------------------- -- The algorithm computes the set of direct deps (i.e., from writes) @@ -94,6 +100,10 @@ data DemandSet (r :: Type -> *) = , functionResultDemands :: !(Map (RegSegmentOff r) (RegisterSet r)) } +-- | Create a demand set for specific registers. +registerDemandSet :: RegisterSet r -> DemandSet r +registerDemandSet s = DemandSet { registerDemands = s, functionResultDemands = Map.empty } + deriving instance (ShowF r, MemWidth (RegAddrWidth r)) => Show (DemandSet r) deriving instance (TestEquality r) => Eq (DemandSet r) deriving instance (OrdF r) => Ord (DemandSet r) @@ -108,15 +118,18 @@ instance OrdF r => Semigroup (DemandSet r) where instance OrdF r => Monoid (DemandSet r) where mempty = DemandSet { registerDemands = Set.empty - , functionResultDemands = mempty + , functionResultDemands = Map.empty } mappend = (<>) demandSetDifference :: OrdF r => DemandSet r -> DemandSet r -> DemandSet r demandSetDifference ds1 ds2 = - DemandSet (registerDemands ds1 `Set.difference` registerDemands ds2) - (Map.differenceWith setDiff (functionResultDemands ds1) - (functionResultDemands ds2)) + DemandSet { registerDemands = registerDemands ds1 `Set.difference` registerDemands ds2 + , functionResultDemands = + Map.differenceWith setDiff + (functionResultDemands ds1) + (functionResultDemands ds2) + } where setDiff s1 s2 = let s' = s1 `Set.difference` s2 @@ -130,8 +143,8 @@ data DemandType r -- | This type is for registers that are demanded if the function at the given address wants -- the given register. | forall tp. DemandFunctionArg (RegSegmentOff r) (r tp) - -- | This is a associated with a set of registers that are demanded if the given register is needed - -- as a return value. + -- | This is a associated with the registers that are demanded if + -- the given register is needed as a return value. | forall tp. DemandFunctionResult (r tp) instance (MemWidth (RegAddrWidth r), ShowF r) => Show (DemandType r) where @@ -178,8 +191,8 @@ data ArchTermStmtRegEffects arch = ArchTermStmtRegEffects { termRegDemands :: ![Some (ArchReg arch)] -- ^ Registers demanded by term statement , termRegTransfers :: [Some (ArchReg arch)] - -- ^ Registers that terminal statement are not modified - -- by terminal statement. + -- ^ Registers that are not modified by + -- terminal statement. } -- | Returns information about the registers needed and modified by a terminal statement @@ -192,6 +205,8 @@ type ComputeArchTermStmtEffects arch ids -> RegState (ArchReg arch) (Value arch ids) -> ArchTermStmtRegEffects arch +-- | Information about the architecture/environment what arguments a +-- function needs. data ArchDemandInfo arch = ArchDemandInfo { -- | Registers used as arguments to the function. functionArgRegs :: ![Some (ArchReg arch)] @@ -293,34 +308,38 @@ addIntraproceduralJumpTarget fun_info src_block dest_addr = do -- record the ed text "Could not find target block" <+> text (show dest_addr) <$$> indent 2 (text "Source:" <$$> pretty src_block) --- | Compute the input registers that this value depends on +withAssignmentCache :: State (AssignmentCache (ArchReg arch) ids) a -> FunctionArgsM arch ids a +withAssignmentCache m = do + c <- use assignmentCache + let (r, c') = runState m c + seq c' $ assignmentCache .= c' + pure r + +-- | Return the input registers that a value depends on. valueUses :: (OrdF (ArchReg arch), FoldableFC (ArchFn arch)) => Value arch ids tp - -> FunctionArgsM arch ids (RegisterSet (ArchReg arch)) -valueUses v = zoom assignmentCache $ foldValueCached fns v - where fns = emptyValueFold { foldInput = Set.singleton . Some } + -> State (AssignmentCache (ArchReg arch) ids) (RegisterSet (ArchReg arch)) +valueUses (AssignedValue (Assignment a rhs)) = do + mr <- gets $ Map.lookup (Some a) + case mr of + Just s -> pure s + Nothing -> do + rhs' <- foldrFC (\v mrhs -> Set.union <$> valueUses v <*> mrhs) (pure Set.empty) rhs + seq rhs' $ modify' $ Map.insert (Some a) rhs' + pure $ rhs' +valueUses (Initial r) = do + pure $! Set.singleton (Some r) +valueUses _ = do + pure $! Set.empty + +addBlockDemands :: OrdF (ArchReg arch) => ArchLabel arch -> DemandMap (ArchReg arch) -> FunctionArgsM arch ids () +addBlockDemands lbl m = + blockDemandMap %= Map.insertWith demandMapUnion lbl m --- | Record that a block demands the value of certain registers. -recordBlockDemand :: ( OrdF (ArchReg arch) - , FoldableFC (ArchFn arch) - ) - => ArchLabel arch - -- ^ The current block - -> RegState (ArchReg arch) (Value arch ids) - -- ^ The current register state - -> (forall tp . ArchReg arch tp -> DemandType (ArchReg arch)) - -> [Some (ArchReg arch)] - -- ^ The registers that we need. - -> FunctionArgsM arch ids () -- Map (Some N.RegisterName) RegDeps -recordBlockDemand lbl s mk rs = do - let doReg (Some r) = do - rs' <- valueUses (s ^. boundValue r) - return (mk r, DemandSet rs' mempty) - vs <- mapM doReg rs - blockDemandMap %= Map.insertWith (Map.unionWith mappend) lbl (Map.fromListWith mappend vs) -- Figure out the deps of the given registers and update the state for the current label -recordBlockTransfer :: ( OrdF (ArchReg arch) +recordBlockTransfer :: forall arch ids + . ( OrdF (ArchReg arch) , FoldableFC (ArchFn arch) ) => ArchLabel arch @@ -328,10 +347,13 @@ recordBlockTransfer :: ( OrdF (ArchReg arch) -> [Some (ArchReg arch)] -> FunctionArgsM arch ids () -- Map (Some N.RegisterName) RegDeps recordBlockTransfer lbl s rs = do - let doReg (Some r) = do + let doReg :: Some (ArchReg arch) + -> State (AssignmentCache (ArchReg arch) ids) + (Some (ArchReg arch), DemandSet (ArchReg arch)) + doReg (Some r) = do rs' <- valueUses (s ^. boundValue r) - return (Some r, DemandSet rs' mempty) - vs <- mapM doReg rs + return (Some r, registerDemandSet rs') + vs <- withAssignmentCache $ traverse doReg rs blockTransfer %= Map.insertWith (Map.unionWith mappend) lbl (Map.fromListWith mappend vs) -- | A block requires a value, and so we need to remember which @@ -341,9 +363,8 @@ demandValue :: (OrdF (ArchReg arch), FoldableFC (ArchFn arch)) -> Value arch ids tp -> FunctionArgsM arch ids () demandValue lbl v = do - regs <- valueUses v - blockDemandMap %= Map.insertWith demandMapUnion lbl - (Map.singleton DemandAlways (DemandSet regs mempty)) + regs <- withAssignmentCache $ valueUses v + addBlockDemands lbl $ Map.singleton DemandAlways (registerDemandSet regs) -- ----------------------------------------------------------------------------- -- Entry point @@ -425,9 +446,9 @@ summarizeCall :: forall arch ids -> Bool -- ^ A flag that is set to true for tail calls. -> FunctionArgsM arch ids () -summarizeCall mem lbl proc_state isTailCall = do +summarizeCall mem lbl finalRegs isTailCall = do knownAddrs <- gets computedAddrSet - case valueAsMemAddr (proc_state^.boundValue ip_reg) of + case valueAsMemAddr (finalRegs^.boundValue ip_reg) of Just faddr0 | Just faddr <- asSegmentOff mem faddr0 , Set.member faddr knownAddrs -> do @@ -438,10 +459,10 @@ summarizeCall mem lbl proc_state isTailCall = do -- singleton for now, but propagating back will introduce more deps. let demandSet sr = DemandSet mempty (Map.singleton faddr (Set.singleton sr)) - if isTailCall then + if isTailCall then do -- tail call, propagate demands for our return regs to the called function - let propMap = map (\(Some r) -> (DemandFunctionResult r, demandSet (Some r))) retRegs - in blockDemandMap %= Map.insertWith (Map.unionWith mappend) lbl (Map.fromList propMap) + let propMap = (\(Some r) -> (DemandFunctionResult r, demandSet (Some r))) <$> retRegs + addBlockDemands lbl $ Map.fromList propMap else do -- Given a return register sr, this indicates that let propResult :: Some (ArchReg arch) -> FunctionArgsM arch ids () @@ -454,12 +475,22 @@ summarizeCall mem lbl proc_state isTailCall = do -- If a function wants argument register r, then we note that this -- block needs the corresponding state values. Note that we could -- do this for _all_ registers, but this should make the summaries somewhat smaller. + + -- Associate the demand sets for each potential argument register with the registers used + -- by faddr. argRegs <- gets $ functionArgRegs . archDemandInfo - recordBlockDemand lbl proc_state (DemandFunctionArg faddr) argRegs + let regDemandSet (Some r) = registerDemandSet <$> valueUses (finalRegs^. boundValue r) + let demandTypes = viewSome (DemandFunctionArg faddr) <$> argRegs + demands <- withAssignmentCache $ traverse regDemandSet argRegs + addBlockDemands lbl $ Map.fromList $ zip demandTypes demands _ -> do -- In the dynamic case, we just assume all arguments (FIXME: results?) argRegs <- gets $ functionArgRegs . archDemandInfo - recordBlockDemand lbl proc_state (\_ -> DemandAlways) ([Some ip_reg] ++ argRegs) + + do let demandedRegs = [Some ip_reg] ++ argRegs + let regUses (Some r) = valueUses (finalRegs^. boundValue r) + demands <- withAssignmentCache $ fmap registerDemandSet $ getAp $ foldMap (Ap . regUses) demandedRegs + addBlockDemands lbl $ Map.singleton DemandAlways demands -- | Return values that must be evaluated to execute side effects. stmtDemandedValues :: DemandContext arch @@ -491,10 +522,10 @@ summarizeBlock :: forall arch ids -> ArchSegmentOff arch -- ^ Address of the code. -> StatementList arch ids -- ^ Current block -> FunctionArgsM arch ids () -summarizeBlock mem interp_state addr stmts = do +summarizeBlock mem interpState addr stmts = do let lbl = GeneratedBlock addr (stmtsIdent stmts) -- Add this label to block demand map with empty set. - blockDemandMap %= Map.insertWith demandMapUnion lbl mempty + addBlockDemands lbl mempty ctx <- gets $ demandInfoCtx . archDemandInfo -- Add all values demanded by non-terminal statements in list. @@ -502,6 +533,67 @@ summarizeBlock mem interp_state addr stmts = do (stmtsNonterm stmts) -- Add values demanded by terminal statements case stmtsTerm stmts of + ParsedCall finalRegs m_ret_addr -> do + -- Record the demands based on the call, and add edges between + -- this note and next nodes. + case m_ret_addr of + Nothing -> do + summarizeCall mem lbl finalRegs True + Just ret_addr -> do + summarizeCall mem lbl finalRegs False + addIntraproceduralJumpTarget interpState lbl ret_addr + callRegs <- gets $ calleeSavedRegs . archDemandInfo + recordBlockTransfer lbl finalRegs ([Some sp_reg] ++ Set.toList callRegs) + + PLTStub regs _ _ -> do + -- PLT Stubs demand all registers that could be function + -- arguments, as well as any registers in regs. + ainfo <- gets archDemandInfo + let demandedRegs = Set.fromList (functionArgRegs ainfo) + demands <- withAssignmentCache $ getAp $ foldMapF (Ap . valueUses) regs + addBlockDemands lbl $ Map.singleton DemandAlways $ + registerDemandSet $ demands <> demandedRegs + + ParsedJump procState tgtAddr -> do + -- record all propagations + recordBlockTransfer lbl procState archRegs + addIntraproceduralJumpTarget interpState lbl tgtAddr + + ParsedLookupTable finalRegs lookup_idx vec -> do + demandValue lbl lookup_idx + -- record all propagations + recordBlockTransfer lbl finalRegs archRegs + traverse_ (addIntraproceduralJumpTarget interpState lbl) vec + + ParsedReturn finalRegs -> do + retRegs <- gets $ functionRetRegs . archDemandInfo + let demandTypes = viewSome DemandFunctionResult <$> retRegs + let regDemandSet (Some r) = registerDemandSet <$> valueUses (finalRegs^.boundValue r) + demands <- withAssignmentCache $ traverse regDemandSet retRegs + addBlockDemands lbl $ Map.fromList $ zip demandTypes demands + + + + ParsedIte c tblock fblock -> do + -- Demand condition then summarize recursive blocks. + demandValue lbl c + summarizeBlock mem interpState addr tblock + summarizeBlock mem interpState addr fblock + + ParsedArchTermStmt tstmt finalRegs next_addr -> do + -- Compute effects of terminal statement. + ainfo <- gets $ archDemandInfo + let e = computeArchTermStmtEffects ainfo tstmt finalRegs + + -- Demand all registers the terminal statement demands. + do let regUses (Some r) = valueUses (finalRegs^.boundValue r) + demands <- withAssignmentCache $ fmap registerDemandSet $ getAp $ + foldMap (Ap . regUses) (termRegDemands e) + addBlockDemands lbl $ Map.singleton DemandAlways demands + + recordBlockTransfer lbl finalRegs (termRegTransfers e) + traverse_ (addIntraproceduralJumpTarget interpState lbl) next_addr + ParsedTranslateError _ -> do -- We ignore demands for translate errors. pure () @@ -509,43 +601,6 @@ summarizeBlock mem interp_state addr stmts = do -- We ignore demands for classify failure. pure () - ParsedIte c tblock fblock -> do - -- Demand condition then summarize recursive blocks. - demandValue lbl c - summarizeBlock mem interp_state addr tblock - summarizeBlock mem interp_state addr fblock - - ParsedCall proc_state m_ret_addr -> do - case m_ret_addr of - Nothing -> do - summarizeCall mem lbl proc_state True - Just ret_addr -> do - summarizeCall mem lbl proc_state False - addIntraproceduralJumpTarget interp_state lbl ret_addr - callRegs <- gets $ calleeSavedRegs . archDemandInfo - recordBlockTransfer lbl proc_state ([Some sp_reg] ++ Set.toList callRegs) - - ParsedJump proc_state tgt_addr -> do - -- record all propagations - recordBlockTransfer lbl proc_state archRegs - addIntraproceduralJumpTarget interp_state lbl tgt_addr - - ParsedReturn proc_state -> do - retRegs <- gets $ functionRetRegs . archDemandInfo - recordBlockDemand lbl proc_state DemandFunctionResult retRegs - - ParsedArchTermStmt tstmt proc_state next_addr -> do - effFn <- gets $ computeArchTermStmtEffects . archDemandInfo - let e = effFn tstmt proc_state - recordBlockDemand lbl proc_state (\_ -> DemandAlways) (termRegDemands e) - recordBlockTransfer lbl proc_state (termRegTransfers e) - traverse_ (addIntraproceduralJumpTarget interp_state lbl) next_addr - - ParsedLookupTable proc_state lookup_idx vec -> do - demandValue lbl lookup_idx - -- record all propagations - recordBlockTransfer lbl proc_state archRegs - traverse_ (addIntraproceduralJumpTarget interp_state lbl) vec -- | Explore states until we have reached end of frontier. summarizeIter :: ArchConstraints arch @@ -574,7 +629,7 @@ calculateOnePred newDemands predLbl = do -- update uses, returning value before this iteration seenDemands <- use (blockDemandMap . ix lbl') - blockDemandMap . at lbl' .= Just (Map.unionWith mappend demands' seenDemands) + addBlockDemands lbl' demands' -- seenDemands <- blockDemandMap . ix lbl' <<%= demandMapUnion demands' @@ -682,10 +737,7 @@ doOneFunction archFns addrs ist0 acc ist = do -- recorded as a use, which is erroneous, so we strip out any -- reference to them here. callRegs <- gets $ calleeSavedRegs . archDemandInfo - let calleeDemandSet = DemandSet { registerDemands = - Set.insert (Some sp_reg) callRegs - , functionResultDemands = mempty - } + let calleeDemandSet = registerDemandSet (Set.insert (Some sp_reg) callRegs) return (Map.foldlWithKey' (decomposeMap calleeDemandSet addr) acc funDemands) diff --git a/base/src/Data/Macaw/Architecture/Info.hs b/base/src/Data/Macaw/Architecture/Info.hs index e1d8e827..b44d4568 100644 --- a/base/src/Data/Macaw/Architecture/Info.hs +++ b/base/src/Data/Macaw/Architecture/Info.hs @@ -45,13 +45,10 @@ type DisassembleFn arch . NonceGenerator (ST s) ids -> ArchSegmentOff arch -- ^ The offset to start reading from. + -> RegState (ArchReg arch) (Value arch ids) + -- ^ Initial values to use for registers. -> Int -- ^ Maximum offset for this to read from. - -> AbsBlockState (ArchReg arch) - -- ^ Abstract state associated with address that we are disassembling - -- from. - -- - -- This is used for things like the height of the x87 stack. -> ST s ([Block arch ids], Int, Maybe String) -- | This records architecture specific functions for analysis. @@ -64,6 +61,12 @@ data ArchitectureInfo arch -- ^ Architecture address width. , archEndianness :: !Endianness -- ^ The byte order values are stored in. + , mkInitialRegsForBlock :: !(forall ids + . ArchSegmentOff arch + -> AbsBlockState (ArchReg arch) + -> Either String (RegState (ArchReg arch) (Value arch ids))) + -- ^ Use the abstract block state information to infer register + -- values to use for disassembling from given address. , disassembleFn :: !(DisassembleFn arch) -- ^ Function for disasembling a block. , mkInitialAbsState :: !(Memory (RegAddrWidth (ArchReg arch)) @@ -89,7 +92,7 @@ data ArchitectureInfo arch -- ^ Update the abstract state after a function call returns , identifyCall :: forall ids . Memory (ArchAddrWidth arch) - -> [Stmt arch ids] + -> Seq (Stmt arch ids) -> RegState (ArchReg arch) (Value arch ids) -> Maybe (Seq (Stmt arch ids), ArchSegmentOff arch) -- ^ Function for recognizing call statements. @@ -115,7 +118,7 @@ data ArchitectureInfo arch -- This predicate is invoked when considering if a potential tail call -- is setup to return to the right location. , identifyReturn :: forall ids - . [Stmt arch ids] + . Seq (Stmt arch ids) -> RegState (ArchReg arch) (Value arch ids) -> AbsProcessorState (ArchReg arch) ids -> Maybe (Seq (Stmt arch ids)) diff --git a/base/src/Data/Macaw/CFG/Core.hs b/base/src/Data/Macaw/CFG/Core.hs index 6769c5c3..500b16dc 100644 --- a/base/src/Data/Macaw/CFG/Core.hs +++ b/base/src/Data/Macaw/CFG/Core.hs @@ -53,6 +53,7 @@ module Data.Macaw.CFG.Core , mkRegStateM , traverseRegsWith , zipWithRegState + , ppRegMap -- * Pretty printing , ppAssignId , ppLit @@ -70,8 +71,6 @@ module Data.Macaw.CFG.Core , asStackAddrOffset -- * References , refsInValue - , refsInApp - , refsInAssignRhs -- ** Synonyms , ArchAddrValue , Data.Parameterized.TraversableFC.FoldableFC(..) @@ -647,12 +646,15 @@ class PrettyRegValue r (f :: Type -> *) where -- should be printed, and Nothing if the contents should be ignored. ppValueEq :: r tp -> f tp -> Maybe Doc -instance ( PrettyRegValue r f - ) +ppRegMap :: forall r v . PrettyRegValue r v => MapF.MapF r v -> Doc +ppRegMap m = bracketsep $ catMaybes (f <$> MapF.toList m) + where f :: MapF.Pair r v -> Maybe Doc + f (MapF.Pair r v) = ppValueEq r v + + +instance ( PrettyRegValue r f) => Pretty (RegState r f) where - pretty (RegState m) = bracketsep $ catMaybes (f <$> MapF.toList m) - where f :: MapF.Pair r f -> Maybe Doc - f (MapF.Pair r v) = ppValueEq r v + pretty (RegState m) = ppRegMap m instance ( PrettyRegValue r f ) @@ -703,7 +705,8 @@ ppStmt ppOff stmt = InstructionStart off mnem -> text "#" <+> ppOff off <+> text (Text.unpack mnem) Comment s -> text $ "# " ++ Text.unpack s ExecArchStmt s -> ppArchStmt (ppValue 10) s - ArchState a m -> hang (length (show prefix)) (prefix PP.<> PP.semiBraces (MapF.foldrWithKey ppUpdate [] m)) + ArchState a m -> + hang (length (show prefix)) (prefix PP.<> PP.semiBraces (MapF.foldrWithKey ppUpdate [] m)) where ppAddr addr = case asAbsoluteAddr addr of @@ -722,18 +725,11 @@ refsInValue :: Value arch ids tp -> Set (Some (AssignId ids)) refsInValue (AssignedValue (Assignment v _)) = Set.singleton (Some v) refsInValue _ = Set.empty -refsInApp :: App (Value arch ids) tp -> Set (Some (AssignId ids)) -refsInApp app = foldMapFC refsInValue app - +-- | Return the assign identifiers in the assignment right-hand side. +-- +-- Note. This does not recursively evaluate references in values, it +-- just returns the assignment identifiers. refsInAssignRhs :: FoldableFC (ArchFn arch) => AssignRhs arch (Value arch ids) tp -> Set (Some (AssignId ids)) -refsInAssignRhs rhs = - case rhs of - EvalApp v -> refsInApp v - SetUndefined _ -> Set.empty - ReadMem v _ -> refsInValue v - CondReadMem _ c a d -> - Set.union (refsInValue c) $ - Set.union (refsInValue a) (refsInValue d) - EvalArchFn f _ -> foldMapFC refsInValue f +refsInAssignRhs rhs = foldMapFC refsInValue rhs diff --git a/base/src/Data/Macaw/Discovery.hs b/base/src/Data/Macaw/Discovery.hs index c61ee29f..fdf76d69 100644 --- a/base/src/Data/Macaw/Discovery.hs +++ b/base/src/Data/Macaw/Discovery.hs @@ -9,6 +9,7 @@ This provides information about code discovered in binaries. {-# LANGUAGE GADTs #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE StandaloneDeriving #-} @@ -65,11 +66,15 @@ import Data.Foldable import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.Maybe +import Data.Monoid import Data.Parameterized.Classes +import qualified Data.Parameterized.Map as MapF import Data.Parameterized.NatRepr import Data.Parameterized.Nonce import Data.Parameterized.Some import Data.Parameterized.TraversableF +import Data.Sequence (Seq) +import qualified Data.Sequence as Seq import Data.Set (Set) import qualified Data.Set as Set import qualified Data.Text as Text @@ -199,6 +204,8 @@ addStatementListDemands sl = do case stmtsTerm sl of ParsedCall regs _ -> do traverseF_ addValueDemands regs + PLTStub regs _ _ -> + traverseF_ addValueDemands regs ParsedJump regs _ -> do traverseF_ addValueDemands regs ParsedLookupTable regs _idx _tbl -> do @@ -780,13 +787,15 @@ data ParseContext arch ids = -- ^ Entry addresses for known functions (e.g. from -- symbol information) -- - -- The discovery process will not create intra-procedural - -- jumps to the entry points of new functions. + -- The discovery process will not create + -- intra-procedural jumps to the entry points of new + -- functions. , pctxFunAddr :: !(ArchSegmentOff arch) -- ^ Address of function this block is being parsed as , pctxAddr :: !(ArchSegmentOff arch) -- ^ Address of the current block , pctxBlockMap :: !(Map Word64 (Block arch ids)) + -- ^ Map from block indices to block code at address. } addrMemRepr :: ArchitectureInfo arch -> MemRepr (BVType (RegAddrWidth (ArchReg arch))) @@ -826,46 +835,111 @@ addNewFunctionAddrs :: [ArchSegmentOff arch] addNewFunctionAddrs addrs = newFunctionAddrs %= (++addrs) +-- | @stripPLTRead assignId prev rest@ looks for a read of @assignId@ +-- from the end of @prev@, and if it finds it returns the +-- concatenation of the instructione before the read in @prev@ and +-- @rest@. +-- +-- The read may appear before comment and @instructionStart@ +-- instructions, but otherwise must be at the end of prev +stripPLTRead :: ArchConstraints arch + => AssignId ids tp -- ^ Identifier of write to remove + -> Seq (Stmt arch ids) + -> Seq (Stmt arch ids) + -> Maybe (Seq (Stmt arch ids)) +stripPLTRead readId next rest = + case Seq.viewr next of + Seq.EmptyR -> Nothing + prev Seq.:> lastStmt -> do + let cont = stripPLTRead readId prev (lastStmt Seq.<| rest) + case lastStmt of + AssignStmt (Assignment stmtId rhs) + | Just Refl <- testEquality readId stmtId -> Just (prev Seq.>< rest) + -- Fail if the read to delete is used in later computations + | Set.member (Some readId) (foldMapFC refsInValue rhs) -> + Nothing + | otherwise -> + case rhs of + EvalApp{} -> cont + SetUndefined{} -> cont + _ -> Nothing + InstructionStart{} -> cont + ArchState{} -> cont + Comment{} -> cont + _ -> Nothing + +removeUnassignedRegs :: forall arch ids + . RegisterInfo (ArchReg arch) + => RegState (ArchReg arch) (Value arch ids) + -- ^ Initial register values + -> RegState (ArchReg arch) (Value arch ids) + -- ^ Final register values + -> MapF.MapF (ArchReg arch) (Value arch ids) +removeUnassignedRegs initRegs finalRegs = + let keepReg :: forall tp . ArchReg arch tp -> Value arch ids tp -> Bool + keepReg r finalVal + | Just Refl <- testEquality r ip_reg = False + | Just Refl <- testEquality initVal finalVal = False + | otherwise = True + where initVal = initRegs^.boundValue r + in MapF.filterWithKey keepReg (regStateMap finalRegs) + +-- | Return true if any value in structure contains the given +-- identifier. +containsAssignId :: forall t arch ids itp + . FoldableF t + => AssignId ids itp + -- ^ Forbidden assignment -- may not appear in terms. + -> t (Value arch ids) + -> Bool +containsAssignId droppedAssign = + let hasId :: forall tp . Value arch ids tp -> Any + hasId v = Any (Set.member (Some droppedAssign) (refsInValue v)) + in getAny . foldMapF hasId + -- | This parses a block that ended with a fetch and execute instruction. parseFetchAndExecute :: forall arch ids . ParseContext arch ids -> Word64 -- ^ Index of this block - -> [Stmt arch ids] - -> AbsProcessorState (ArchReg arch) ids - -- ^ Registers prior to blocks being executed. -> RegState (ArchReg arch) (Value arch ids) + -- ^ Initial register values + -> Seq (Stmt arch ids) + -> AbsProcessorState (ArchReg arch) ids + -- ^ Abstract state of registers prior to blocks being executed. + -> RegState (ArchReg arch) (Value arch ids) + -- ^ Final register values -> State (ParseState arch ids) (StatementList arch ids, Word64) -parseFetchAndExecute ctx idx stmts regs s = do - let mem = pctxMemory ctx - let ainfo= pctxArchInfo ctx - let absProcState' = absEvalStmts ainfo regs stmts +parseFetchAndExecute ctx idx initRegs stmts absProcState finalRegs = do + let mem = pctxMemory ctx + let ainfo = pctxArchInfo ctx + let absProcState' = absEvalStmts ainfo absProcState stmts withArchConstraints ainfo $ do -- See if next statement appears to end with a call. -- We define calls as statements that end with a write that -- stores the pc to an address. case () of -- The block ends with a Mux, so we turn this into a `ParsedIte` statement. - _ | Just (Mux _ c t f) <- valueAsApp (s^.boundValue ip_reg) -> do + _ | Just (Mux _ c t f) <- valueAsApp (finalRegs^.boundValue ip_reg) -> do mapM_ (recordWriteStmt ainfo mem absProcState') stmts let l_regs = refineProcStateBounds c True $ refineProcState c absTrue absProcState' let l_regs' = absEvalStmts ainfo l_regs stmts - let lState = s & boundValue ip_reg .~ t + let lState = finalRegs & boundValue ip_reg .~ t (tStmts,trueIdx) <- - parseFetchAndExecute ctx (idx+1) [] l_regs' lState + parseFetchAndExecute ctx (idx+1) initRegs Seq.empty l_regs' lState let r_regs = refineProcStateBounds c False $ refineProcState c absFalse absProcState' let r_regs' = absEvalStmts ainfo r_regs stmts - let rState = s & boundValue ip_reg .~ f + let rState = finalRegs & boundValue ip_reg .~ f (fStmts,falseIdx) <- - parseFetchAndExecute ctx trueIdx [] r_regs' rState + parseFetchAndExecute ctx trueIdx initRegs Seq.empty r_regs' rState let ret = StatementList { stmtsIdent = idx - , stmtsNonterm = stmts + , stmtsNonterm = toList stmts , stmtsTerm = ParsedIte c tStmts fStmts , stmtsAbsState = absProcState' } @@ -874,20 +948,20 @@ parseFetchAndExecute ctx idx stmts regs s = do -- Use architecture-specific callback to check if last statement was a call. -- Note that in some cases the call is known not to return, and thus -- this code will never jump to the return value. - _ | Just (prev_stmts, ret) <- identifyCall ainfo mem stmts s -> do + _ | Just (prev_stmts, ret) <- identifyCall ainfo mem stmts finalRegs -> do mapM_ (recordWriteStmt ainfo mem absProcState') prev_stmts - let abst = finalAbsBlockState absProcState' s + let abst = finalAbsBlockState absProcState' finalRegs seq abst $ do -- Merge caller return information intraJumpTargets %= ((ret, postCallAbsState ainfo abst ret):) -- Use the abstract domain to look for new code pointers for the current IP. addNewFunctionAddrs $ - identifyCallTargets mem abst s + identifyCallTargets mem abst finalRegs -- Use the call-specific code to look for new IPs. let r = StatementList { stmtsIdent = idx , stmtsNonterm = toList prev_stmts - , stmtsTerm = ParsedCall s (Just ret) + , stmtsTerm = ParsedCall finalRegs (Just ret) , stmtsAbsState = absProcState' } pure (r, idx+1) @@ -903,18 +977,18 @@ parseFetchAndExecute ctx idx stmts regs s = do -- (e.g. ARM will clear the low bit in T32 mode or the low 2 -- bits in A32 mode), so the actual detection process is -- deferred to architecture-specific functionality. - | Just prev_stmts <- identifyReturn ainfo stmts s absProcState' -> do + | Just prev_stmts <- identifyReturn ainfo stmts finalRegs absProcState' -> do mapM_ (recordWriteStmt ainfo mem absProcState') prev_stmts let ret = StatementList { stmtsIdent = idx , stmtsNonterm = toList prev_stmts - , stmtsTerm = ParsedReturn s + , stmtsTerm = ParsedReturn finalRegs , stmtsAbsState = absProcState' } pure (ret, idx+1) -- Jump to a block within this function. - | Just tgt_mseg <- valueAsSegmentOff mem (s^.boundValue ip_reg) + | Just tgt_mseg <- valueAsSegmentOff mem (finalRegs^.boundValue ip_reg) -- Check target block address is in executable segment. , segmentFlags (segoffSegment tgt_mseg) `Perm.hasPerm` Perm.execute @@ -928,23 +1002,23 @@ parseFetchAndExecute ctx idx stmts regs s = do mapM_ (recordWriteStmt ainfo mem absProcState') stmts -- Merge block state and add intra jump target. - let abst = finalAbsBlockState absProcState' s + let abst = finalAbsBlockState absProcState' finalRegs let abst' = abst & setAbsIP tgt_mseg intraJumpTargets %= ((tgt_mseg, abst'):) let ret = StatementList { stmtsIdent = idx - , stmtsNonterm = stmts - , stmtsTerm = ParsedJump s tgt_mseg + , stmtsNonterm = toList stmts + , stmtsTerm = ParsedJump finalRegs tgt_mseg , stmtsAbsState = absProcState' } pure (ret, idx+1) -- Block ends with what looks like a jump table. - | Just (_jt, entries, jumpIndex) <- matchJumpTableRef mem absProcState' (s^.curIP) -> do + | Just (_jt, entries, jumpIndex) <- matchJumpTableRef mem absProcState' (finalRegs^.curIP) -> do mapM_ (recordWriteStmt ainfo mem absProcState') stmts let abst :: AbsBlockState (ArchReg arch) - abst = finalAbsBlockState absProcState' s + abst = finalAbsBlockState absProcState' finalRegs seq abst $ do @@ -952,36 +1026,51 @@ parseFetchAndExecute ctx idx stmts regs s = do let abst' = abst & setAbsIP tgtAddr intraJumpTargets %= ((tgtAddr, abst'):) - let term = ParsedLookupTable s jumpIndex entries + let term = ParsedLookupTable finalRegs jumpIndex entries let ret = StatementList { stmtsIdent = idx - , stmtsNonterm = stmts + , stmtsNonterm = toList stmts , stmtsTerm = term , stmtsAbsState = absProcState' } pure (ret,idx+1) + -- Code for PLT entry + _ | 0 <- idx + , AssignedValue (Assignment valId v) <- finalRegs^.boundValue ip_reg + , ReadMem gotVal _repr <- v + , Just gotAddr <- valueAsMemAddr gotVal + , Just gotSegOff <- asSegmentOff mem gotAddr + , Right chunks <- segoffContentsAfter gotSegOff + , RelocationRegion r:_ <- chunks + -- Check the relocation is a jump slot. + , relocationJumpSlot r + , Just strippedStmts <- stripPLTRead valId stmts Seq.empty + , strippedRegs <- removeUnassignedRegs initRegs finalRegs + , not (containsAssignId valId strippedRegs) -> do + + mapM_ (recordWriteStmt ainfo mem absProcState') strippedStmts + let ret = StatementList { stmtsIdent = idx + , stmtsNonterm = toList strippedStmts + , stmtsTerm = PLTStub strippedRegs gotSegOff r + , stmtsAbsState = absEvalStmts ainfo absProcState strippedStmts + } + pure (ret, idx+1) + -- Check for tail call when the calling convention seems to be satisfied. - | spVal <- s^.boundValue sp_reg + | spVal <- finalRegs^.boundValue sp_reg -- Check to see if the stack pointer points to an offset of the initial stack. , StackOffset _ offsets <- transferValue absProcState' spVal -- Stack stack is back to height when function was called. , offsets == Set.singleton 0 - , checkForReturnAddr ainfo s absProcState' -> do - finishWithTailCall absProcState' - - -- Is this a jump to a known function entry? We're already past the - -- "identifyCall" case, so this must be a tail call, assuming we trust our - -- known function entry info. - | Just tgt_mseg <- valueAsSegmentOff mem (s^.boundValue ip_reg) - , tgt_mseg `Set.member` pctxKnownFnEntries ctx -> do + , checkForReturnAddr ainfo finalRegs absProcState' -> do finishWithTailCall absProcState' -- Block that ends with some unknown | otherwise -> do mapM_ (recordWriteStmt ainfo mem absProcState') stmts let ret = StatementList { stmtsIdent = idx - , stmtsNonterm = stmts - , stmtsTerm = ClassifyFailure s + , stmtsNonterm = toList stmts + , stmtsTerm = ClassifyFailure finalRegs , stmtsAbsState = absProcState' } pure (ret,idx+1) @@ -994,7 +1083,7 @@ parseFetchAndExecute ctx idx stmts regs s = do mapM_ (recordWriteStmt (pctxArchInfo ctx) mem absProcState') stmts -- Compute final state - let abst = finalAbsBlockState absProcState' s + let abst = finalAbsBlockState absProcState' finalRegs seq abst $ do -- Look for new instruction pointers @@ -1002,8 +1091,8 @@ parseFetchAndExecute ctx idx stmts regs s = do identifyConcreteAddresses mem (abst^.absRegState^.curIP) let ret = StatementList { stmtsIdent = idx - , stmtsNonterm = stmts - , stmtsTerm = ParsedCall s Nothing + , stmtsNonterm = toList stmts + , stmtsTerm = ParsedCall finalRegs Nothing , stmtsAbsState = absProcState' } seq ret $ pure (ret,idx+1) @@ -1013,13 +1102,15 @@ parseFetchAndExecute ctx idx stmts regs s = do parseBlock :: ParseContext arch ids -- ^ Context for parsing blocks. -> Word64 - -- ^ Index for next statements + -- ^ Index for next statements + -> RegState (ArchReg arch) (Value arch ids) + -- ^ Initial register values -> Block arch ids -- ^ Block to parse -> AbsProcessorState (ArchReg arch) ids -- ^ Abstract state at start of block -> State (ParseState arch ids) (StatementList arch ids, Word64) -parseBlock ctx idx b regs = do +parseBlock ctx idx initRegs b absProcState = do let mem = pctxMemory ctx let ainfo = pctxArchInfo ctx withArchConstraints ainfo $ do @@ -1027,7 +1118,7 @@ parseBlock ctx idx b regs = do Branch c lb rb -> do let blockMap = pctxBlockMap ctx -- FIXME: we should propagate c back to the initial block, not just b - let absProcState' = absEvalStmts ainfo regs (blockStmts b) + let absProcState' = absEvalStmts ainfo absProcState (blockStmts b) mapM_ (recordWriteStmt ainfo mem absProcState') (blockStmts b) let Just l = Map.lookup lb blockMap @@ -1038,8 +1129,8 @@ parseBlock ctx idx b regs = do let l_regs' = absEvalStmts ainfo l_regs (blockStmts b) let r_regs' = absEvalStmts ainfo r_regs (blockStmts b) - (parsedTrueBlock,trueIdx) <- parseBlock ctx (idx+1) l l_regs' - (parsedFalseBlock,falseIdx) <- parseBlock ctx trueIdx r r_regs' + (parsedTrueBlock,trueIdx) <- parseBlock ctx (idx+1) initRegs l l_regs' + (parsedFalseBlock,falseIdx) <- parseBlock ctx trueIdx initRegs r r_regs' let ret = StatementList { stmtsIdent = idx , stmtsNonterm = blockStmts b @@ -1048,13 +1139,13 @@ parseBlock ctx idx b regs = do } pure (ret, falseIdx) - FetchAndExecute s -> do - parseFetchAndExecute ctx idx (blockStmts b) regs s + FetchAndExecute finalRegs -> do + parseFetchAndExecute ctx idx initRegs (Seq.fromList (blockStmts b)) absProcState finalRegs -- Do nothing when this block ends in a translation error. TranslateError _ msg -> do -- FIXME: we should propagate c back to the initial block, not just b - let absProcState' = absEvalStmts ainfo regs (blockStmts b) + let absProcState' = absEvalStmts ainfo absProcState (blockStmts b) let ret = StatementList { stmtsIdent = idx , stmtsNonterm = blockStmts b @@ -1064,7 +1155,7 @@ parseBlock ctx idx b regs = do pure (ret, idx+1) ArchTermStmt ts s -> do -- FIXME: we should propagate c back to the initial block, not just b - let absProcState' = absEvalStmts ainfo regs (blockStmts b) + let absProcState' = absEvalStmts ainfo absProcState (blockStmts b) mapM_ (recordWriteStmt ainfo mem absProcState') (blockStmts b) let abst = finalAbsBlockState absProcState' s -- Compute possible next IPS. @@ -1083,15 +1174,16 @@ parseBlock ctx idx b regs = do -- | This evalutes the statements in a block to expand the information known -- about control flow targets of this block. addBlocks :: ArchSegmentOff arch - -- ^ Address of theze blocks - -> FoundAddr arch - -- ^ State leading to explore block - -> Int - -- ^ Number of blocks covered - -> Map Word64 (Block arch ids) - -- ^ Map from labelIndex to associated block - -> FunM arch s ids () -addBlocks src finfo sz blockMap = + -- ^ Address of theze blocks + -> FoundAddr arch + -- ^ State leading to explore block + -> RegState (ArchReg arch) (Value arch ids) + -> Int + -- ^ Number of blocks covered + -> Map Word64 (Block arch ids) + -- ^ Map from labelIndex to associated block + -> FunM arch s ids () +addBlocks src finfo initRegs sz blockMap = case Map.lookup 0 blockMap of Nothing -> do error $ "addBlocks given empty blockRegion." @@ -1112,7 +1204,7 @@ addBlocks src finfo sz blockMap = , _intraJumpTargets = [] , _newFunctionAddrs = [] } - let ((pblock,_), ps) = runState (parseBlock ctx 0 b regs) ps0 + let ((pblock,_), ps) = runState (parseBlock ctx 0 initRegs b regs) ps0 let pb = ParsedBlock { pblockAddr = src , blockSize = sz , blockReason = foundReason finfo @@ -1125,6 +1217,26 @@ addBlocks src finfo sz blockMap = . markAddrsAsFunction (CallTarget src) (ps^.newFunctionAddrs) mapM_ (\(addr, abs_state) -> mergeIntraJump src abs_state addr) (ps^.intraJumpTargets) +-- | Record an error block with no statements for the given address. +recordErrorBlock :: ArchSegmentOff arch -> FoundAddr arch -> Maybe String -> FunM arch s ids () +recordErrorBlock addr finfo maybeError = do + s <- use curFunCtx + let mem = memory s + let errMsg = maybe "Unknown error" Text.pack maybeError + let stmts = StatementList + { stmtsIdent = 0 + , stmtsNonterm = [] + , stmtsTerm = ParsedTranslateError errMsg + , stmtsAbsState = initAbsProcessorState mem (foundAbstractState finfo) + } + let pb = ParsedBlock { pblockAddr = addr + , blockSize = 0 + , blockReason = foundReason finfo + , blockAbstractState = foundAbstractState finfo + , blockStatementList = stmts + } + id %= addFunBlock addr pb + transfer :: ArchSegmentOff arch -> FunM arch s ids () transfer addr = do s <- use curFunCtx @@ -1143,43 +1255,31 @@ transfer addr = do Just (next,_) | Just o <- diffSegmentOff next addr -> fromInteger o _ -> fromInteger (segoffBytesLeft addr) let ab = foundAbstractState finfo - (bs0, sz, maybeError) <- liftST $ disassembleFn ainfo nonceGen addr maxSize ab - + case mkInitialRegsForBlock ainfo addr ab of + Left msg -> do + recordErrorBlock addr finfo (Just msg) + Right initRegs -> do + (bs0, sz, maybeError) <- liftST $ disassembleFn ainfo nonceGen addr initRegs maxSize + -- If no blocks are returned, then we just add an empty parsed block. + if null bs0 then do + recordErrorBlock addr finfo maybeError + else do + -- Rewrite returned blocks to simplify expressions #ifdef USE_REWRITER - bs1 <- do - let archStmt = rewriteArchStmt ainfo - let secAddrMap = memSectionIndexMap mem - liftST $ do - ctx <- mkRewriteContext nonceGen (rewriteArchFn ainfo) archStmt secAddrMap - traverse (rewriteBlock ainfo ctx) bs0 + bs1 <- do + let archStmt = rewriteArchStmt ainfo + let secAddrMap = memSectionIndexMap mem + liftST $ do + ctx <- mkRewriteContext nonceGen (rewriteArchFn ainfo) archStmt secAddrMap + traverse (rewriteBlock ainfo ctx) bs0 #else - bs1 <- pure bs0 + bs1 <- pure bs0 #endif - - -- If no blocks are returned, then we just add an empty parsed block. - if null bs1 then do - let errMsg = Text.pack $ fromMaybe "Unknown error" maybeError - let stmts = StatementList - { stmtsIdent = 0 - , stmtsNonterm = [] - , stmtsTerm = ParsedTranslateError errMsg - , stmtsAbsState = initAbsProcessorState mem (foundAbstractState finfo) - } - let pb = ParsedBlock { pblockAddr = addr - , blockSize = sz - , blockReason = foundReason finfo - , blockAbstractState = foundAbstractState finfo - , blockStatementList = stmts - } - id %= addFunBlock addr pb - else do - -- Rewrite returned blocks to simplify expressions - - -- Compute demand set - let bs = bs1 -- eliminateDeadStmts ainfo bs1 - -- Call transfer blocks to calculate parsedblocks - let blockMap = Map.fromList [ (blockLabel b, b) | b <- bs ] - addBlocks addr finfo sz blockMap + -- Compute demand set + let bs = bs1 -- eliminateDeadStmts ainfo bs1 + -- Call transfer blocks to calculate parsedblocks + let blockMap = Map.fromList [ (blockLabel b, b) | b <- bs ] + addBlocks addr finfo initRegs sz blockMap ------------------------------------------------------------------------ -- Main loop diff --git a/base/src/Data/Macaw/Discovery/AbsEval.hs b/base/src/Data/Macaw/Discovery/AbsEval.hs index a0f63a7f..e1a29c0d 100644 --- a/base/src/Data/Macaw/Discovery/AbsEval.hs +++ b/base/src/Data/Macaw/Discovery/AbsEval.hs @@ -88,8 +88,9 @@ absEvalStmt info stmt = withArchConstraints info $ pure () -- This takes a processor state and updates it based on executing each statement. -absEvalStmts :: ArchitectureInfo arch +absEvalStmts :: Foldable t + => ArchitectureInfo arch -> AbsProcessorState (ArchReg arch) ids - -> [Stmt arch ids] + -> t (Stmt arch ids) -> AbsProcessorState (ArchReg arch) ids absEvalStmts info r stmts = execState (mapM_ (absEvalStmt info) stmts) r diff --git a/base/src/Data/Macaw/Discovery/State.hs b/base/src/Data/Macaw/Discovery/State.hs index 157cc340..8c9aadd3 100644 --- a/base/src/Data/Macaw/Discovery/State.hs +++ b/base/src/Data/Macaw/Discovery/State.hs @@ -10,6 +10,7 @@ discovery. {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -50,6 +51,7 @@ import qualified Data.ByteString.Char8 as BSC import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.Parameterized.Classes +import qualified Data.Parameterized.Map as MapF import Data.Parameterized.Some import Data.Set (Set) import qualified Data.Set as Set @@ -133,6 +135,18 @@ data ParsedTermStmt arch ids -- | A call with the current register values and location to return to or 'Nothing' if this is a tail call. = ParsedCall !(RegState (ArchReg arch) (Value arch ids)) !(Maybe (ArchSegmentOff arch)) + -- | @PLTStub regs addr@ denotes a terminal statement that has been identified as a PLT stub + -- for calling the given relocation. + -- + -- This is a special case of a tail call. It has been added + -- separately because it occurs frequently in dynamically linked + -- code, and we can use this to recognize PLT stubs. + -- + -- The register set only contains registers that were changed in + -- the function. Other registers have the initial value. + | PLTStub !(MapF.MapF (ArchReg arch) (Value arch ids)) + !(ArchSegmentOff arch) + !(Relocation (ArchAddrWidth arch)) -- | A jump to an explicit address within a function. | ParsedJump !(RegState (ArchReg arch) (Value arch ids)) !(ArchSegmentOff arch) -- | A lookup table that branches to one of a vector of addresses. @@ -174,11 +188,14 @@ ppTermStmt :: ArchConstraints arch ppTermStmt ppOff tstmt = case tstmt of ParsedCall s Nothing -> - text "tail call" <$$> + text "tail_call" <$$> indent 2 (pretty s) ParsedCall s (Just next) -> text "call and return to" <+> text (show next) <$$> indent 2 (pretty s) + PLTStub regs addr r -> + text "call_via_got" <+> text (show (relocationSym r)) <+> "(at" <+> text (show addr) PP.<> ")" <$$> + indent 2 (ppRegMap regs) ParsedJump s addr -> text "jump" <+> text (show addr) <$$> indent 2 (pretty s) @@ -281,7 +298,7 @@ parsedBlocks = lens _parsedBlocks (\s v -> s { _parsedBlocks = v }) instance ArchConstraints arch => Pretty (DiscoveryFunInfo arch ids) where pretty info = text "function" <+> text (BSC.unpack (discoveredFunName info)) - <+> pretty "@" <+> pretty (show (discoveredFunAddr info)) + <+> "@" <+> pretty (show (discoveredFunAddr info)) <$$> vcat (pretty <$> Map.elems (info^.parsedBlocks)) diff --git a/base/src/Data/Macaw/Fold.hs b/base/src/Data/Macaw/Fold.hs index efaa9eed..f42d6f4b 100644 --- a/base/src/Data/Macaw/Fold.hs +++ b/base/src/Data/Macaw/Fold.hs @@ -53,8 +53,14 @@ emptyValueFold = -- | This folds over elements of a values in a values. -- --- It memoizes values so that it only evaluates assignments with the same id --- once. +-- It memoizes the results so that if an assignment is visited +-- multiple times, we only visit the children the first time it is +-- visited. On subsequent visits, `foldAssign` will still be called, +-- but the children will not be revisited. +-- +-- This makes the total time to visit linear with respect to the +-- number of children, but still allows determining whether a term is +-- shared. foldValueCached :: forall r arch ids tp . (Monoid r, FoldableFC (ArchFn arch)) => ValueFold arch ids r diff --git a/base/src/Data/Macaw/Memory.hs b/base/src/Data/Macaw/Memory.hs index e1f8734a..afa9781f 100644 --- a/base/src/Data/Macaw/Memory.hs +++ b/base/src/Data/Macaw/Memory.hs @@ -469,6 +469,15 @@ data Relocation w , relocationEndianness :: !Endianness -- ^ The byte order used to encode the relocation in -- memory. + , relocationJumpSlot :: !Bool + -- ^ Returns true if this is a jump slot relocation. + -- + -- This relocation is specifically used for global + -- offset table entries, and are typically resolved + -- when the function is first called rather than at + -- load time. The address will be initially the + -- entry sequence stub, and will be updated once + -- resolved by the stub. } -- | Short encoding of endianness for relocation pretty printing @@ -846,8 +855,8 @@ memBindSegmentIndex idx seg mem -- | A memory with no segments. emptyMemory :: AddrWidthRepr w -> Memory w -emptyMemory w = Memory { memAddrWidth = w - , memSegmentMap = Map.empty +emptyMemory w = Memory { memAddrWidth = w + , memSegmentMap = Map.empty , memSectionIndexMap = Map.empty , memSegmentIndexMap = Map.empty } diff --git a/base/src/Data/Macaw/Memory/ElfLoader.hs b/base/src/Data/Macaw/Memory/ElfLoader.hs index d51b8bb7..7f65f195 100644 --- a/base/src/Data/Macaw/Memory/ElfLoader.hs +++ b/base/src/Data/Macaw/Memory/ElfLoader.hs @@ -383,6 +383,7 @@ relaTargetX86_64 _ symtab rel off _isRel = , relocationSize = 8 , relocationIsSigned = False , relocationEndianness = LittleEndian + , relocationJumpSlot = True } Elf.R_X86_64_PC32 -> do sym <- resolveRelocationSym symtab (Elf.relSym rel) @@ -390,8 +391,9 @@ relaTargetX86_64 _ symtab rel off _isRel = , relocationOffset = off , relocationIsRel = True , relocationSize = 4 - , relocationIsSigned = False + , relocationIsSigned = False , relocationEndianness = LittleEndian + , relocationJumpSlot = False } Elf.R_X86_64_32 -> do sym <- resolveRelocationSym symtab (Elf.relSym rel) @@ -401,6 +403,7 @@ relaTargetX86_64 _ symtab rel off _isRel = , relocationSize = 4 , relocationIsSigned = False , relocationEndianness = LittleEndian + , relocationJumpSlot = False } Elf.R_X86_64_32S -> do sym <- resolveRelocationSym symtab (Elf.relSym rel) @@ -410,6 +413,7 @@ relaTargetX86_64 _ symtab rel off _isRel = , relocationSize = 4 , relocationIsSigned = True , relocationEndianness = LittleEndian + , relocationJumpSlot = False } Elf.R_X86_64_64 -> do sym <- resolveRelocationSym symtab (Elf.relSym rel) @@ -419,6 +423,7 @@ relaTargetX86_64 _ symtab rel off _isRel = , relocationSize = 8 , relocationIsSigned = False , relocationEndianness = LittleEndian + , relocationJumpSlot = False } -- R_X86_64_GLOB_DAT are used to update GOT entries with their -- target address. They are similar to R_x86_64_64 except appear @@ -432,6 +437,7 @@ relaTargetX86_64 _ symtab rel off _isRel = , relocationSize = 8 , relocationIsSigned = False , relocationEndianness = LittleEndian + , relocationJumpSlot = False } -- Jhx Note. These will be needed to support thread local variables. @@ -465,6 +471,7 @@ relaTargetARM end msegIndex symtab rel addend relFlag = , relocationSize = 4 , relocationIsSigned = False , relocationEndianness = end + , relocationJumpSlot = False } Elf.R_ARM_RELATIVE -> do -- This relocation has the value B(S) + A where @@ -500,6 +507,7 @@ relaTargetARM end msegIndex symtab rel addend relFlag = , relocationSize = 4 , relocationIsSigned = False , relocationEndianness = end + , relocationJumpSlot = False } Elf.R_ARM_JUMP_SLOT -> do -- This is a PLT relocation @@ -517,6 +525,7 @@ relaTargetARM end msegIndex symtab rel addend relFlag = , relocationSize = 4 , relocationIsSigned = False , relocationEndianness = end + , relocationJumpSlot = True } tp -> do relocError $ RelocationUnsupportedType (show tp) diff --git a/x86/src/Data/Macaw/X86.hs b/x86/src/Data/Macaw/X86.hs index 7ab0975a..55a61501 100644 --- a/x86/src/Data/Macaw/X86.hs +++ b/x86/src/Data/Macaw/X86.hs @@ -167,12 +167,11 @@ instance MemWidth w => Show (X86TranslateError w) where -- | Signal an error from the initial address. -initError :: ExploreLoc -- ^ Location to explore from. +initError :: MemSegmentOff 64 -- ^ Location to explore from. + -> RegState X86Reg (Value X86_64 ids) -> X86TranslateError 64 -> ST st_s (Block X86_64 ids, MemWord 64, Maybe (X86TranslateError 64)) -initError loc err = do - let addr = loc_ip loc - let s = initX86State loc +initError addr s err = do let b = Block { blockLabel = 0 , blockStmts = [] , blockTerm = TranslateError s (Text.pack (show err)) @@ -276,7 +275,9 @@ disassembleFixedBlock :: NonceGenerator (ST st_s) ids -- ^ Number of bytes to translate -> ST st_s (Either (X86TranslateError 64) (Block X86_64 ids)) disassembleFixedBlock gen loc sz = do - case segoffContentsAfter (loc_ip loc) of + let addr = loc_ip loc + let initRegs = initX86State loc + case segoffContentsAfter addr of Left err -> do pure $ Left $ FlexdisMemoryError err Right fullContents -> do @@ -284,8 +285,8 @@ disassembleFixedBlock gen loc sz = do Left _err -> do error $ "Could not split memory." Right (contents,_) -> do - let pblock = emptyPreBlock (initX86State loc) - disassembleFixedBlock' gen pblock 0 (loc_ip loc) contents + let pblock = emptyPreBlock initRegs + disassembleFixedBlock' gen pblock 0 addr contents -- | Translate block, returning blocks read, ending -- PC, and an optional error. and ending PC. @@ -328,13 +329,14 @@ disassembleBlock :: forall s -> ST s (Block X86_64 s, MemWord 64, Maybe (X86TranslateError 64)) disassembleBlock nonce_gen loc max_size = do let addr = loc_ip loc + let regs = initX86State loc let sz = segoffOffset addr + max_size (b, next_ip_off, maybeError) <- case segoffContentsAfter addr of Left msg -> do - initError loc (FlexdisMemoryError msg) + initError addr regs (FlexdisMemoryError msg) Right contents -> do - let pblock = emptyPreBlock (initX86State loc) + let pblock = emptyPreBlock regs disassembleBlockImpl nonce_gen pblock 0 addr sz contents assert (next_ip_off > segoffOffset addr) $ do let block_sz = next_ip_off - segoffOffset addr @@ -416,56 +418,67 @@ transferAbsValue r f = VExtractF128 {} -> TopV VInsert {} -> TopV + -- | Disassemble block, returning either an error, or a list of blocks -- and ending PC. -tryDisassembleBlockFromAbsState :: forall s ids - . NonceGenerator (ST s) ids - -> MemSegmentOff 64 - -- ^ Address to disassemble at - -> Int - -- ^ Maximum size of this block - -> AbsBlockState X86Reg - -- ^ Abstract state of processor for defining state. - -> ExceptT String (ST s) (Block X86_64 ids, Int, Maybe String) -tryDisassembleBlockFromAbsState nonceGen addr maxSize ab = do +initRegsFromAbsState :: forall ids + . MemSegmentOff 64 + -- ^ Address to disassemble at + -> AbsBlockState X86Reg + -- ^ Abstract state of processor for defining state. + -> Either String (RegState X86Reg (Value X86_64 ids)) +initRegsFromAbsState addr ab = do t <- case asConcreteSingleton (ab^.absRegState^.boundValue X87_TopReg) of - Nothing -> throwError "Could not determine height of X87 stack." + Nothing -> Left "Could not determine height of X87 stack." Just t -> pure t d <- case asConcreteSingleton (ab^.absRegState^.boundValue DF) of Nothing -> do - throwError $ "Could not determine df flag " ++ show (ab^.absRegState^.boundValue DF) + Left $ "Could not determine df flag " ++ show (ab^.absRegState^.boundValue DF) Just d -> pure d - let loc = ExploreLoc { loc_ip = addr - , loc_x87_top = fromInteger t - , loc_df_flag = d /= 0 - } + pure $ initX86State $ + ExploreLoc { loc_ip = addr + , loc_x87_top = fromInteger t + , loc_df_flag = d /= 0 + } + +-- | Disassemble block, returning either an error, or a list of blocks +-- and ending PC. +tryDisassembleBlock :: forall s ids + . NonceGenerator (ST s) ids + -> MemSegmentOff 64 + -- ^ Address to disassemble at + -> RegState X86Reg (Value X86_64 ids) + -- ^ Initial registers + -> Int + -- ^ Maximum size of this block + -> ExceptT String (ST s) (Block X86_64 ids, Int, Maybe String) +tryDisassembleBlock nonceGen addr initRegs maxSize = do let off = segoffOffset addr - let pblock = emptyPreBlock (initX86State loc) (b, nextIPOff, maybeError) <- lift $ case segoffContentsAfter addr of Left msg -> do - initError loc (FlexdisMemoryError msg) + initError addr initRegs (FlexdisMemoryError msg) Right contents -> do - disassembleBlockImpl nonceGen pblock 0 addr (off + fromIntegral maxSize) contents + disassembleBlockImpl nonceGen (emptyPreBlock initRegs) 0 addr (off + fromIntegral maxSize) contents let sz :: Int sz = fromIntegral $ nextIPOff - off pure $! (b, sz, show <$> maybeError) -- | Disassemble block, returning either an error, or a list of blocks -- and ending PC. -disassembleBlockFromAbsState :: forall s ids - . NonceGenerator (ST s) ids - -> MemSegmentOff 64 - -- ^ Address to disassemble at - -> Int - -- ^ Maximum size of this block - -> AbsBlockState X86Reg - -- ^ Abstract state of processor for defining state. - -> ST s ([Block X86_64 ids], Int, Maybe String) -disassembleBlockFromAbsState nonceGen addr maxSize ab = do - mr <- runExceptT $ tryDisassembleBlockFromAbsState nonceGen addr maxSize ab +disassembleBlockWithRegs :: forall s ids + . NonceGenerator (ST s) ids + -> MemSegmentOff 64 + -- ^ Address to disassemble at + -> RegState X86Reg (Value X86_64 ids) + -> Int + -- ^ Maximum size of this block + -- ^ Abstract state of processor for defining state. + -> ST s ([Block X86_64 ids], Int, Maybe String) +disassembleBlockWithRegs nonceGen addr initRegs maxSize = do + mr <- runExceptT $ tryDisassembleBlock nonceGen addr initRegs maxSize case mr of Left msg -> pure ([], 0, Just msg) Right (b,sz, merr) -> pure ([b],sz,merr) @@ -475,10 +488,10 @@ disassembleBlockFromAbsState nonceGen addr maxSize ab = do -- -- This can also return Nothing if the call is not supported. identifyX86Call :: Memory 64 - -> [Stmt X86_64 ids] + -> Seq (Stmt X86_64 ids) -> RegState X86Reg (Value X86_64 ids) -> Maybe (Seq (Stmt X86_64 ids), MemSegmentOff 64) -identifyX86Call mem stmts0 s = go (Seq.fromList stmts0) Seq.empty +identifyX86Call mem stmts0 s = go stmts0 Seq.empty where -- Get value of stack pointer next_sp = s^.boundValue sp_reg -- Recurse on statements. @@ -521,13 +534,13 @@ checkForReturnAddrX86 absState -- An instruction executing a return from a function will place the -- ReturnAddr value (placed on the top of the stack by -- 'initialX86AbsState' above) into the instruction pointer. -identifyX86Return :: [Stmt X86_64 ids] +identifyX86Return :: Seq (Stmt X86_64 ids) -> RegState X86Reg (Value X86_64 ids) -> AbsProcessorState X86Reg ids -> Maybe (Seq (Stmt X86_64 ids)) identifyX86Return stmts s finalRegSt8 = case transferValue finalRegSt8 (s^.boundValue ip_reg) of - ReturnAddr -> Just $ Seq.fromList stmts + ReturnAddr -> Just stmts _ -> Nothing -- | Return state post call @@ -581,7 +594,8 @@ x86_64_info preservePred = ArchitectureInfo { withArchConstraints = \x -> x , archAddrWidth = Addr64 , archEndianness = LittleEndian - , disassembleFn = disassembleBlockFromAbsState + , mkInitialRegsForBlock = initRegsFromAbsState + , disassembleFn = disassembleBlockWithRegs , mkInitialAbsState = \_ addr -> initialX86AbsState addr , absEvalArchFn = transferAbsValue , absEvalArchStmt = \s _ -> s From 127e7fbe346244fc9c616c4212458d884f31d088 Mon Sep 17 00:00:00 2001 From: Joe Hendrix Date: Sat, 17 Nov 2018 15:52:14 -0800 Subject: [PATCH 04/10] Bump submodules. --- deps/crucible | 2 +- deps/elf-edit | 2 +- deps/flexdis86 | 2 +- deps/llvm-pretty | 2 +- deps/parameterized-utils | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/deps/crucible b/deps/crucible index 459ae2a3..8cc059b0 160000 --- a/deps/crucible +++ b/deps/crucible @@ -1 +1 @@ -Subproject commit 459ae2a3dfc033eb7f8129f3068859e73dc40cb3 +Subproject commit 8cc059b0ad0a8bcd8b944d2bb3754d65782b2b2f diff --git a/deps/elf-edit b/deps/elf-edit index afa3dcf4..774c405d 160000 --- a/deps/elf-edit +++ b/deps/elf-edit @@ -1 +1 @@ -Subproject commit afa3dcf45afc5429139cae09f8584a722a737946 +Subproject commit 774c405d0d1107e185e73f4f24e85a1ced8859d8 diff --git a/deps/flexdis86 b/deps/flexdis86 index e53a3359..14f33876 160000 --- a/deps/flexdis86 +++ b/deps/flexdis86 @@ -1 +1 @@ -Subproject commit e53a3359b51ad1a91758144a85d8a8eb635d91d9 +Subproject commit 14f33876b03fa1b832392f37b46c1b7cb0b8d4e8 diff --git a/deps/llvm-pretty b/deps/llvm-pretty index 6cc69900..47aace66 160000 --- a/deps/llvm-pretty +++ b/deps/llvm-pretty @@ -1 +1 @@ -Subproject commit 6cc6990017c4d875311a52a6d3988fb7a962ed7a +Subproject commit 47aace66cdc5abc2294918b9665ef14b4d22762e diff --git a/deps/parameterized-utils b/deps/parameterized-utils index fa608573..1c37bf1e 160000 --- a/deps/parameterized-utils +++ b/deps/parameterized-utils @@ -1 +1 @@ -Subproject commit fa6085733f507d07c7c417d238e1a6c575944680 +Subproject commit 1c37bf1eb8be0f7c8beacd0998a30bc932d9122d From 15477121768e86c38a9e4ca276bc9c72e4799dcb Mon Sep 17 00:00:00 2001 From: Joe Hendrix Date: Sat, 17 Nov 2018 16:03:34 -0800 Subject: [PATCH 05/10] Bump parameterized-util version. --- deps/parameterized-utils | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/parameterized-utils b/deps/parameterized-utils index 1c37bf1e..de6a0be1 160000 --- a/deps/parameterized-utils +++ b/deps/parameterized-utils @@ -1 +1 @@ -Subproject commit 1c37bf1eb8be0f7c8beacd0998a30bc932d9122d +Subproject commit de6a0be19acb1c9562c017432751673169c43fd2 From a0a89083e88024286360f99130e0693740a055ec Mon Sep 17 00:00:00 2001 From: Joe Hendrix Date: Mon, 3 Dec 2018 20:52:44 -0800 Subject: [PATCH 06/10] Support X86 Relative; other minor changes. --- base/macaw-base.cabal | 2 +- base/src/Data/Macaw/CFG/Core.hs | 9 -- base/src/Data/Macaw/Discovery.hs | 8 +- base/src/Data/Macaw/Memory.hs | 113 ++++++++++++++---------- base/src/Data/Macaw/Memory/ElfLoader.hs | 50 +++++++---- deps/crucible | 2 +- deps/elf-edit | 2 +- deps/flexdis86 | 2 +- deps/llvm-pretty | 2 +- deps/parameterized-utils | 2 +- 10 files changed, 112 insertions(+), 80 deletions(-) diff --git a/base/macaw-base.cabal b/base/macaw-base.cabal index 29adf298..5037340b 100644 --- a/base/macaw-base.cabal +++ b/base/macaw-base.cabal @@ -31,7 +31,7 @@ library base >= 4, ansi-wl-pprint, binary, - binary-symbols >= 0.1.1, + binary-symbols >= 0.1.2, bytestring, containers >= 0.5.8.1, elf-edit >= 0.32, diff --git a/base/src/Data/Macaw/CFG/Core.hs b/base/src/Data/Macaw/CFG/Core.hs index 500b16dc..5eeee45f 100644 --- a/base/src/Data/Macaw/CFG/Core.hs +++ b/base/src/Data/Macaw/CFG/Core.hs @@ -724,12 +724,3 @@ instance ArchConstraints arch => Show (Stmt arch ids) where refsInValue :: Value arch ids tp -> Set (Some (AssignId ids)) refsInValue (AssignedValue (Assignment v _)) = Set.singleton (Some v) refsInValue _ = Set.empty - --- | Return the assign identifiers in the assignment right-hand side. --- --- Note. This does not recursively evaluate references in values, it --- just returns the assignment identifiers. -refsInAssignRhs :: FoldableFC (ArchFn arch) - => AssignRhs arch (Value arch ids) tp - -> Set (Some (AssignId ids)) -refsInAssignRhs rhs = foldMapFC refsInValue rhs diff --git a/base/src/Data/Macaw/Discovery.hs b/base/src/Data/Macaw/Discovery.hs index fdf76d69..c3707847 100644 --- a/base/src/Data/Macaw/Discovery.hs +++ b/base/src/Data/Macaw/Discovery.hs @@ -650,8 +650,10 @@ resolveAsAbsoluteAddr :: forall w -> Maybe (MemAddr w) resolveAsAbsoluteAddr mem endianness l = addrWidthClass (memAddrWidth mem) $ case l of - [ByteRegion bs] -> do - absoluteAddr <$> addrRead endianness bs + [ByteRegion bs] -> + case addrRead endianness bs of + Just a -> pure $! absoluteAddr a + Nothing -> error $ "internal: resolveAsAbsoluteAddr given short chunk list." [RelocationRegion r] -> do when (relocationIsRel r) $ Nothing case relocationSym r of @@ -662,6 +664,8 @@ resolveAsAbsoluteAddr mem endianness l = addrWidthClass (memAddrWidth mem) $ SegmentBaseAddr idx -> do seg <- Map.lookup idx (memSegmentIndexMap mem) pure $! segmentOffAddr seg (relocationOffset r) + LoadBaseAddr -> do + memBaseAddr mem _ -> Nothing -- This function resolves jump table entries. diff --git a/base/src/Data/Macaw/Memory.hs b/base/src/Data/Macaw/Memory.hs index afa9781f..43dd89f9 100644 --- a/base/src/Data/Macaw/Memory.hs +++ b/base/src/Data/Macaw/Memory.hs @@ -19,15 +19,18 @@ module Data.Macaw.Memory , memAddrWidth , memWidth , memSegments - , memSectionIndexMap - , memSegmentIndexMap , memAsAddrPairs -- * Constructing memory , emptyMemory , insertMemSegment , InsertError(..) , showInsertError + -- * Load values + , memBaseAddr + , memSetBaseAddr , memBindSectionIndex + , memSectionIndexMap + , memSegmentIndexMap , memBindSegmentIndex -- * Memory segments , MemSegment @@ -52,6 +55,7 @@ module Data.Macaw.Memory , memWord , memWordToUnsigned , memWordToSigned + , addrRead -- * Addresses , MemAddr(..) , absoluteAddr @@ -270,7 +274,7 @@ bsWord64 LittleEndian = bsWord64le -- -- Operations on it require the `MemWidth` constraint to be satisfied, so in practice -- this only works for 32 and 64-bit values. -newtype MemWord (w :: Nat) = MemWord { _memWordValue :: Word64 } +newtype MemWord (w :: Nat) = MemWord { memWordValue :: Word64 } -- | Convert word64 @x@ into mem word @x mod 2^w-1@. memWord :: forall w . MemWidth w => Word64 -> MemWord w @@ -312,14 +316,21 @@ class (1 <= w) => MemWidth w where -- | Rotates the value by the given index. addrRotate :: MemWord w -> Int -> MemWord w - -- | Read an address with the given endianess. - -- - -- This returns nothing if the bytestring is too short. - addrRead :: Endianness -> BS.ByteString -> Maybe (MemWord w) +-- | Read an address with the given endianess. +-- +-- This returns nothing if the bytestring is too short. +addrRead :: forall w . MemWidth w => Endianness -> BS.ByteString -> Maybe (MemWord w) +addrRead e s = + case addrWidthRepr (Proxy :: Proxy w) of + Addr32 | BS.length s < 4 -> Nothing + | otherwise -> Just $ MemWord $ fromIntegral $ bsWord32 e s + Addr64 | BS.length s < 8 -> Nothing + | otherwise -> Just $ MemWord $ bsWord64 e s + -- | Return the value represented by the MemWord as an unsigned integer. memWordToUnsigned :: MemWord w -> Integer -memWordToUnsigned = fromIntegral . _memWordValue +memWordToUnsigned = fromIntegral . memWordValue -- | Treat the word as a signed integer. memWordToSigned :: MemWidth w => MemWord w -> Integer @@ -386,18 +397,12 @@ instance MemWidth 32 where addrRotate (MemWord w) i = MemWord (fromIntegral ((fromIntegral w :: Word32) `rotate` i)) addrSize _ = 4 - addrRead e s - | BS.length s < 4 = Nothing - | otherwise = Just $ MemWord $ fromIntegral $ bsWord32 e s instance MemWidth 64 where addrWidthRepr _ = Addr64 addrWidthMask _ = 0xffffffffffffffff addrRotate (MemWord w) i = MemWord (w `rotate` i) addrSize _ = 8 - addrRead e s - | BS.length s < 8 = Nothing - | otherwise = Just $ MemWord $ bsWord64 e s -- | Number of bytes in an address addrWidthClass :: AddrWidthRepr w -> (MemWidth w => a) -> a @@ -824,6 +829,8 @@ data Memory w = Memory { memAddrWidth :: !(AddrWidthRepr w) -- ^ Map from registered section indices to the segment offset it is loaded at. , memSegmentIndexMap :: !(Map SegmentIndex (MemSegment w)) -- ^ Map from registered segment indices to associated segment. + , memBaseAddr :: !(Maybe (MemAddr w)) + -- ^ This denotes the base region for loads. } -- | Return the set of memory segments in memory. @@ -853,12 +860,17 @@ memBindSegmentIndex idx seg mem | otherwise = mem { memSegmentIndexMap = Map.insert idx seg (memSegmentIndexMap mem) } +-- | Set the region index used or the load addresses. +memSetBaseAddr :: MemAddr w -> Memory w -> Memory w +memSetBaseAddr r m = m { memBaseAddr = Just r } + -- | A memory with no segments. emptyMemory :: AddrWidthRepr w -> Memory w emptyMemory w = Memory { memAddrWidth = w , memSegmentMap = Map.empty , memSectionIndexMap = Map.empty , memSegmentIndexMap = Map.empty + , memBaseAddr = Nothing } -- | Return segments with executable permissions. @@ -952,7 +964,7 @@ addrLeastBit (MemAddr _ (MemWord off)) = off `testBit` 0 -- | Increment an address by a fixed amount. incAddr :: MemWidth w => Integer -> MemAddr w -> MemAddr w -incAddr o (MemAddr i off) = MemAddr i (off + fromInteger o) +incAddr o a = a { addrOffset = addrOffset a + fromInteger o } -- | Returns the number of bytes between two addresses if they point to -- the same region and `Nothing` if they are different segments. @@ -986,10 +998,9 @@ resolveAddr :: Memory w -> RegionIndex -> MemWord w -> Maybe (MemSegmentOff w) resolveAddr = resolveRegionOff {-# DEPRECATED resolveAddr "Use resolveRegionOff" #-} - -- | Return the address of a segment offset. segoffAddr :: MemWidth w => MemSegmentOff w -> MemAddr w -segoffAddr (MemSegmentOff seg off) = relativeAddr seg off +segoffAddr (MemSegmentOff seg off) = segmentOffAddr seg off -- | Return the segment associated with the given address if well-defined. resolveAbsoluteAddr :: Memory w -> MemWord w -> Maybe (MemSegmentOff w) @@ -1343,40 +1354,46 @@ addrContentsAfter mem addr = do addrWidthClass (memAddrWidth mem) $ segoffContentsAfter =<< resolveMemAddr mem addr --- | Read a bytestring from a sequence of statements. --- --- This is a helper method for @readByteString@ below. -readByteString' :: MemWidth w - => MemSegmentOff w - -- ^ Initial starting address - -> [BS.ByteString] - -- ^ Bytestring read so far (in reverse order) - -> [MemChunk w] - -- ^ Remaining segments to read from. +-- | Attempt to read a bytestring of the given length +readByteString' :: RegionIndex + -- ^ Region we are in. + -> BS.ByteString + -- ^ Bytestring read so far. -> Word64 - -- ^ Number of bytes remaining to read. - -> Either (MemoryError w) [BS.ByteString] -readByteString' _ prev _ 0 = + -- ^ Bytes read so far. + -> [MemChunk w] + -- ^ Remaining memory chunks to read from. + -> Word64 + -- ^ Total remaining number of bytes to read. + -> Either (MemoryError w) BS.ByteString +readByteString' _ prev _ _ 0 = pure $! prev -readByteString' _ _ [] _ = error "internal: readByteString' given too many bytes." -readByteString' initAddr prev (ByteRegion bs:rest) cnt = - if toInteger cnt <= toInteger (BS.length bs) then - pure $! BS.take (fromIntegral cnt) bs : prev +readByteString' _ _ _ [] _ = error "internal: readByteString' given too many bytes." +readByteString' reg prev off (ByteRegion bs:rest) cnt = do + let sz = fromIntegral (BS.length bs) + if cnt <= sz then + pure $! prev <> BS.take (fromIntegral cnt) bs else do - let cnt' = cnt - fromIntegral (BS.length bs) - readByteString' initAddr (bs:prev) rest cnt' -readByteString' initAddr prev (RelocationRegion r:_) _ = do - let cnt = sum (toInteger . BS.length <$> prev) - let addr = incAddr cnt (relativeSegmentAddr initAddr) + let off' = off + sz + let cnt' = cnt - sz + seq cnt' $ seq off' $ readByteString' reg (prev <> bs) off' rest cnt' +readByteString' reg _ off (RelocationRegion r:_) _ = do + let addr = MemAddr { addrBase = reg, addrOffset = MemWord off } Left $! UnexpectedRelocation addr r -readByteString' initAddr prev (BSSRegion sz:rest) cnt = - if toInteger cnt <= toInteger sz then - pure $! BS.replicate (fromIntegral cnt) 0 : prev +readByteString' reg prev off (BSSRegion sz0:rest) cnt = do + let sz :: Word64 + sz = memWordValue sz0 + if cnt <= sz then do + when (cnt > fromIntegral (maxBound :: Int)) $ do + error $ "Illegal size " ++ show cnt + pure $! prev <> BS.replicate (fromIntegral cnt) 0 else do - let cnt' = cnt - fromIntegral sz - let next = BS.replicate (fromIntegral sz) 0 : prev - seq cnt' $ seq next $ - readByteString' initAddr next rest cnt' + when (sz > fromIntegral (maxBound :: Int)) $ do + error $ "Illegal size " ++ show cnt + let bs = BS.replicate (fromIntegral sz) 0 + let off' = off + sz + let cnt' = cnt - sz + seq cnt' $ seq off' $ readByteString' reg (prev <> bs) off' rest cnt' -- | Attempt to read a bytestring of the given length readByteString :: Memory w @@ -1390,7 +1407,7 @@ readByteString mem addr cnt = addrWidthClass (memAddrWidth mem) $ do Left $! InvalidRead segOff cnt -- Get contents after segment l <- segoffContentsAfter segOff - mconcat . reverse <$> readByteString' segOff [] l cnt + readByteString' (addrBase addr) BS.empty (memWordValue (addrOffset addr)) l cnt -- | Read an address from the value in the segment or report a memory -- error. @@ -1461,7 +1478,7 @@ relativeSegmentContents memSegs = concatMap relativeOffset memSegs -- Each MemSegment has a segmentOffset indicating the offset from segmentBase its located. -- This makes the offsets within the MemChunk relative to that segmentOffset. relativeOffset :: (MemWidth w) => MemSegment w -> [(MemAddr w, MemChunk w)] - relativeOffset seg = map (\(contentOffset,r) -> (relativeAddr seg contentOffset, r)) $ (contentsRanges . segmentContents) seg + relativeOffset seg = map (\(contentOffset,r) -> (segmentOffAddr seg contentOffset, r)) $ (contentsRanges . segmentContents) seg -- | Naive string matching algorithm identifies matches to given -- pattern within the list of memory segments and their corresponding diff --git a/base/src/Data/Macaw/Memory/ElfLoader.hs b/base/src/Data/Macaw/Memory/ElfLoader.hs index 7f65f195..94c9936b 100644 --- a/base/src/Data/Macaw/Memory/ElfLoader.hs +++ b/base/src/Data/Macaw/Memory/ElfLoader.hs @@ -179,7 +179,11 @@ data MemLoadWarning | ExpectedSectionSymbolLocal | InvalidSectionSymbolIndex !Elf.ElfSectionIndex | UnsupportedProcessorSpecificSymbolIndex !SymbolName !ElfSectionIndex - | IgnoreRelocation !RelocationError + | IgnoreRelocation !Integer !String !RelocationError + -- ^ @IgnoreRelocation idx tp err@ warns we ignored the location at index @idx@ due to @err@. + -- + -- @tp@ is a string representing the type which we print, because usually errors come because + -- we don't support that type or only partially implement it. ppSymbol :: SymbolName -> String ppSymbol "" = "unnamed symbol" @@ -224,8 +228,8 @@ instance Show MemLoadWarning where "Expected section symbol to have a valid index instead of " ++ show idx ++ "." show (UnsupportedProcessorSpecificSymbolIndex nm idx) = "Could not resolve symbol index " ++ show idx ++ " for symbol " ++ BSC.unpack nm ++ "." - show (IgnoreRelocation err) = - "Ignoring relocation: " ++ show err + show (IgnoreRelocation idx typeName err) = + "Ignoring relocation " ++ show idx ++ " with type " ++ typeName ++ ": " ++ show err data MemLoaderState w = MLS { _mlsMemory :: !(Memory w) , mlsEndianness :: !Endianness @@ -440,6 +444,18 @@ relaTargetX86_64 _ symtab rel off _isRel = , relocationJumpSlot = False } + Elf.R_X86_64_RELATIVE -> do + when (Elf.relSym rel /= 0) $ do + Left $ RelocationBadSymbolIndex (fromIntegral (Elf.relSym rel)) + pure $ Relocation { relocationSym = LoadBaseAddr + , relocationOffset = off + , relocationIsRel = False + , relocationSize = 8 + , relocationIsSigned = False + , relocationEndianness = LittleEndian + , relocationJumpSlot = False + } + -- Jhx Note. These will be needed to support thread local variables. -- Elf.R_X86_64_TPOFF32 -> undefined -- Elf.R_X86_64_GOTTPOFF -> undefined @@ -562,12 +578,13 @@ resolveRela :: ( MemWidth w ) => SymbolTable -> RelocationResolver tp + -> Integer -- ^ Index of relocation -> Elf.RelaEntry tp -> ResolveFn (MemLoader w) w -resolveRela symtab resolver rela msegIdx _ = +resolveRela symtab resolver relaIdx rela msegIdx _ = case resolver msegIdx symtab (Elf.relaToRel rela) (fromIntegral (Elf.relaAddend rela)) IsRela of Left e -> do - addWarning (IgnoreRelocation e) + addWarning (IgnoreRelocation relaIdx (show (Elf.relaType rela)) e) pure Nothing Right r -> do pure $ Just r @@ -579,9 +596,10 @@ resolveRel :: ( MemWidth w => Endianness -- ^ Endianness of Elf file -> SymbolTable -- ^ Symbol table -> RelocationResolver tp + -> Integer -- ^ Index of relocation -> Elf.RelEntry tp -> ResolveFn (MemLoader w) w -resolveRel end symtab resolver rel msegIdx bytes = do +resolveRel end symtab resolver relIdx rel msegIdx bytes = do -- Get the number of bits in the addend let bits = Elf.relocTargetBits (Elf.relType rel) -- Compute the addended by masking off the low order bits, and @@ -597,7 +615,7 @@ resolveRel end symtab resolver rel msegIdx bytes = do -- Update the resolver. case resolver msegIdx symtab rel (fromInteger saddend) IsRel of Left e -> do - addWarning (IgnoreRelocation e) + addWarning (IgnoreRelocation relIdx (show (Elf.relType rel)) e) pure Nothing Right r -> do pure $ Just r @@ -616,12 +634,13 @@ relocFromRela :: ( Elf.IsRelocationType tp ) => SymbolTable -> RelocationResolver tp + -> Integer -- ^ Index of relocation entry for error reporting -> Elf.RelaEntry tp -> (MemWord w, RelocEntry (MemLoader w) w) -relocFromRela symtab resolver r = +relocFromRela symtab resolver idx r = ( fromIntegral (Elf.relaAddr r) , RelocEntry { relocEntrySize = relocTargetBytes (Elf.relaType r) - , applyReloc = resolveRela symtab resolver r + , applyReloc = resolveRela symtab resolver idx r } ) @@ -633,12 +652,13 @@ relocFromRel :: ( Elf.IsRelocationType tp => Endianness -> SymbolTable -> RelocationResolver tp + -> Integer -- ^ Index of relocation entry for error reporting. -> Elf.RelEntry tp -> (MemWord w, RelocEntry (MemLoader w) w) -relocFromRel end symtab resolver r = +relocFromRel end symtab resolver idx r = ( fromIntegral (Elf.relAddr r) , RelocEntry { relocEntrySize = relocTargetBytes (Elf.relType r) - , applyReloc = resolveRel end symtab resolver r + , applyReloc = resolveRel end symtab resolver idx r } ) @@ -663,7 +683,7 @@ relocMapFromRelAndRela dta resolver symtab _ (Just relaBuffer) = do addWarning (RelocationParseFailure msg) pure Map.empty Right entries -> do - pure $ Map.fromList $ relocFromRela symtab resolver <$> entries + pure $ Map.fromList $ zipWith (relocFromRela symtab resolver) [0..] entries relocMapFromRelAndRela dta resolver symtab (Just relBuffer) Nothing = do w <- uses mlsMemory memAddrWidth reprConstraints w $ do @@ -672,7 +692,7 @@ relocMapFromRelAndRela dta resolver symtab (Just relBuffer) Nothing = do addWarning (RelocationParseFailure msg) pure Map.empty Right entries -> do - pure $ Map.fromList $ relocFromRel (toEndianness dta) symtab resolver <$> entries + pure $ Map.fromList $ zipWith (relocFromRel (toEndianness dta) symtab resolver) [0..] entries resolveUndefinedSymbolReq :: SymbolName @@ -835,9 +855,9 @@ dynamicRelocationMap hdr ph contents = Right Elf.PLTEmpty -> do pure $! Map.empty Right (Elf.PLTRel entries) -> do - pure $! Map.fromList $ relocFromRel (toEndianness dta) symtab resolver <$> entries + pure $! Map.fromList $ zipWith (relocFromRel (toEndianness dta) symtab resolver) [0..] entries Right (Elf.PLTRela entries) -> do - pure $! Map.fromList $ relocFromRela symtab resolver <$> entries + pure $! Map.fromList $ zipWith (relocFromRela symtab resolver) [0..] entries pure $ Map.union loadtimeRelocs pltRelocs ------------------------------------------------------------------------ diff --git a/deps/crucible b/deps/crucible index 8cc059b0..0a93de3a 160000 --- a/deps/crucible +++ b/deps/crucible @@ -1 +1 @@ -Subproject commit 8cc059b0ad0a8bcd8b944d2bb3754d65782b2b2f +Subproject commit 0a93de3a8f6e3697818fa1e157029e9cc1b46808 diff --git a/deps/elf-edit b/deps/elf-edit index 774c405d..a9428d84 160000 --- a/deps/elf-edit +++ b/deps/elf-edit @@ -1 +1 @@ -Subproject commit 774c405d0d1107e185e73f4f24e85a1ced8859d8 +Subproject commit a9428d847f63cddfbd38eb3b0fea07153dbcd18f diff --git a/deps/flexdis86 b/deps/flexdis86 index 14f33876..ac18aaf5 160000 --- a/deps/flexdis86 +++ b/deps/flexdis86 @@ -1 +1 @@ -Subproject commit 14f33876b03fa1b832392f37b46c1b7cb0b8d4e8 +Subproject commit ac18aaf5fc6f857a9a81ef09956ab1021a3fcb39 diff --git a/deps/llvm-pretty b/deps/llvm-pretty index 47aace66..8bbd47cf 160000 --- a/deps/llvm-pretty +++ b/deps/llvm-pretty @@ -1 +1 @@ -Subproject commit 47aace66cdc5abc2294918b9665ef14b4d22762e +Subproject commit 8bbd47cf21379bf699297ebb2f69148c88d0ec72 diff --git a/deps/parameterized-utils b/deps/parameterized-utils index de6a0be1..2af20d4e 160000 --- a/deps/parameterized-utils +++ b/deps/parameterized-utils @@ -1 +1 @@ -Subproject commit de6a0be19acb1c9562c017432751673169c43fd2 +Subproject commit 2af20d4e1ed93b18503d7e73376fa993ed865357 From f03941d607a3b9d7c936a3131f4cd01286fdcd58 Mon Sep 17 00:00:00 2001 From: Joe Hendrix Date: Tue, 4 Dec 2018 00:04:23 -0800 Subject: [PATCH 07/10] Add test-plt test case, and fix discovery to use trust symbols. --- base/src/Data/Macaw/Discovery.hs | 4 +- base/src/Data/Macaw/Discovery/State.hs | 6 +- base/src/Data/Macaw/Memory/ElfLoader.hs | 1 + base/src/Data/Macaw/Memory/Symbols.hs | 3 + x86/tests/ElfX64Linux.hs | 142 +++++++++++----------- x86/tests/x64/test-plt.c | 10 ++ x86/tests/x64/test-plt.exe | Bin 0 -> 7376 bytes x86/tests/x64/test-plt.exe.expected | 19 +++ x86/tests/x64/test-tail-call.exe.expected | 2 +- 9 files changed, 113 insertions(+), 74 deletions(-) create mode 100644 x86/tests/x64/test-plt.c create mode 100755 x86/tests/x64/test-plt.exe create mode 100644 x86/tests/x64/test-plt.exe.expected diff --git a/base/src/Data/Macaw/Discovery.hs b/base/src/Data/Macaw/Discovery.hs index c3707847..cf933f07 100644 --- a/base/src/Data/Macaw/Discovery.hs +++ b/base/src/Data/Macaw/Discovery.hs @@ -1445,8 +1445,8 @@ cfgFromAddrs :: -- -- Each entry contains an address and the value stored in it. -> DiscoveryState arch -cfgFromAddrs ainfo mem symbols = - cfgFromAddrsAndState (emptyDiscoveryState mem symbols ainfo) +cfgFromAddrs ainfo mem addrSymMap = + cfgFromAddrsAndState (emptyDiscoveryState mem addrSymMap ainfo) ------------------------------------------------------------------------ -- Resolve functions with logging diff --git a/base/src/Data/Macaw/Discovery/State.hs b/base/src/Data/Macaw/Discovery/State.hs index 8c9aadd3..8f7ed910 100644 --- a/base/src/Data/Macaw/Discovery/State.hs +++ b/base/src/Data/Macaw/Discovery/State.hs @@ -372,15 +372,15 @@ emptyDiscoveryState :: Memory (ArchAddrWidth arch) -> ArchitectureInfo arch -- ^ architecture/OS specific information -> DiscoveryState arch -emptyDiscoveryState mem symbols info = +emptyDiscoveryState mem addrSymMap info = DiscoveryState { memory = mem - , symbolNames = symbols + , symbolNames = addrSymMap , archInfo = info , _globalDataMap = Map.empty , _funInfo = Map.empty , _unexploredFunctions = Map.empty - , _trustedFunctionEntryPoints = Set.empty + , _trustedFunctionEntryPoints = Map.keysSet addrSymMap , _exploreFnPred = Nothing } diff --git a/base/src/Data/Macaw/Memory/ElfLoader.hs b/base/src/Data/Macaw/Memory/ElfLoader.hs index 94c9936b..70d0c203 100644 --- a/base/src/Data/Macaw/Memory/ElfLoader.hs +++ b/base/src/Data/Macaw/Memory/ElfLoader.hs @@ -742,6 +742,7 @@ symbolDefTypeMap = Map.fromList , (,) Elf.STT_FUNC SymbolDefFunc , (,) Elf.STT_TLS SymbolDefThreadLocal , (,) Elf.STT_GNU_IFUNC SymbolDefIFunc + , (,) Elf.STT_NOTYPE SymbolDefNoType ] resolveDefinedSymbolDef :: ElfSymbolTableEntry wtp diff --git a/base/src/Data/Macaw/Memory/Symbols.hs b/base/src/Data/Macaw/Memory/Symbols.hs index e1cd16ee..e648f841 100644 --- a/base/src/Data/Macaw/Memory/Symbols.hs +++ b/base/src/Data/Macaw/Memory/Symbols.hs @@ -42,6 +42,9 @@ data SymbolDefType -- ^ This symbol denotes a thread local identifier | SymbolDefIFunc -- ^ This symbol is a "IFUNC" (e.g., it calls a function to resolve the symbol) + | SymbolDefNoType + -- ^ This symbol does not have a specified type. + -- | Describes whether an undefined symbol is required during linking. data SymbolRequirement diff --git a/x86/tests/ElfX64Linux.hs b/x86/tests/ElfX64Linux.hs index 91b455bd..d33ac86b 100644 --- a/x86/tests/ElfX64Linux.hs +++ b/x86/tests/ElfX64Linux.hs @@ -7,7 +7,7 @@ module ElfX64Linux ( ) where import Control.Lens ( (^.) ) -import Control.Monad ( unless ) +import Control.Monad ( unless, when ) import qualified Control.Monad.Catch as C import qualified Data.ByteString as B import qualified Data.Foldable as F @@ -16,6 +16,7 @@ import Data.Maybe import qualified Data.Set as S import Data.Typeable ( Typeable ) import Data.Word ( Word64 ) +import Numeric (showHex) import System.FilePath import qualified Test.Tasty as T import qualified Test.Tasty.HUnit as T @@ -35,9 +36,12 @@ elfX64LinuxTests :: [FilePath] -> T.TestTree elfX64LinuxTests = T.testGroup "ELF x64 Linux" . map mkTest data Addr = Addr Int Word64 - deriving (Read,Show,Eq) + deriving (Read,Eq, Ord) -- ^ An address is a region index and offset +instance Show Addr where + showsPrec _ (Addr idx off) = showString "Addr " . shows idx . showString " 0x" . showHex off + -- | The type of expected results for test cases data ExpectedResult = R { funcs :: [(Addr, [(Addr, Integer)])] @@ -59,57 +63,79 @@ mkTest fp = T.testCase fp $ withELF elfFilename (testDiscovery fp) where elfFilename = dropExtension fp +toSegOff :: MM.Memory 64 -> Addr -> MM.MemSegmentOff 64 +toSegOff mem (Addr idx off) = do + let addr :: MM.MemAddr 64 + addr = MM.MemAddr idx (fromIntegral off) + case MM.asSegmentOff mem addr of + Just a -> a + Nothing -> + let ppSeg seg = " Segment: " ++ show (MM.segmentOffAddr seg 0) + in error $ "Could not resolve address : " ++ show addr ++ "\n" + ++ unlines (fmap ppSeg (MM.memSegments mem)) + +toAddr :: MM.MemSegmentOff 64 -> Addr +toAddr segOff = do + let addr :: MM.MemAddr 64 + addr = MM.segoffAddr segOff + in Addr (fromIntegral (MM.addrBase addr)) (fromIntegral (MM.addrOffset addr)) + -- | Run a test over a given expected result filename and the ELF file -- associated with it testDiscovery :: FilePath -> E.Elf 64 -> IO () -testDiscovery expectedFilename elf = - withMemory MM.Addr64 elf $ \mem entries -> do - let di = MD.cfgFromAddrs RO.x86_64_linux_info mem M.empty entries [] - expectedString <- readFile expectedFilename - case readMaybe expectedString of - Nothing -> T.assertFailure ("Invalid expected result: " ++ show expectedString) - Just er -> do - let toSegOff :: Addr -> MM.MemSegmentOff 64 - toSegOff (Addr idx off) = do - let addr :: MM.MemAddr 64 - addr = MM.MemAddr idx (fromIntegral off) - case MM.asSegmentOff mem addr of - Just a -> a - Nothing -> do - let ppSeg seg = " Segment: " ++ show (MM.segmentOffAddr seg 0) - error $ "Could not resolve address : " ++ show addr ++ "\n" - ++ unlines (fmap ppSeg (MM.memSegments mem)) - let expectedEntries = M.fromList - [ (toSegOff entry - , S.fromList ((\(s,sz) -> (toSegOff s, sz)) <$> starts) - ) - | (entry, starts) <- funcs er - ] - ignoredBlocks :: S.Set (MM.MemSegmentOff 64) - ignoredBlocks = S.fromList (toSegOff <$> ignoreBlocks er) - T.assertEqual "Collection of discovered function starting points" +testDiscovery expectedFilename elf = do + let opt = MM.LoadOptions { MM.loadRegionIndex = Nothing + , MM.loadRegionBaseOffset = 0 + } + (warn, mem, mentry, syms) <- + case MM.resolveElfContents opt elf of + Left err -> C.throwM (MemoryLoadError err) + Right r -> pure r + when (not (null warn)) $ do + error $ "Warnings while loading Elf " ++ show warn + let entries = maybeToList mentry ++ fmap MM.memSymbolStart syms + let addrSymMap :: M.Map (MM.MemSegmentOff 64) B.ByteString + addrSymMap = M.fromList [ (MM.memSymbolStart sym, MM.memSymbolName sym) + | sym <- syms + ] + let di = MD.cfgFromAddrs RO.x86_64_linux_info mem addrSymMap entries [] + expectedString <- readFile expectedFilename + case readMaybe expectedString of + Nothing -> T.assertFailure ("Invalid expected result: " ++ show expectedString) + Just er -> do + let expectedEntries :: M.Map (MM.MemSegmentOff 64) (S.Set (Addr, Integer)) + expectedEntries = M.fromList + [ (toSegOff mem entry + , S.fromList ((\(s,sz) -> (s, sz)) <$> starts) + ) + | (entry, starts) <- funcs er + ] + ignoredBlocks :: S.Set (MM.MemSegmentOff 64) + ignoredBlocks = S.fromList (toSegOff mem <$> ignoreBlocks er) + T.assertEqual "Collection of discovered function starting points" (M.keysSet expectedEntries `S.difference` ignoredBlocks) (M.keysSet (di ^. MD.funInfo)) - F.forM_ (M.elems (di ^. MD.funInfo)) $ \(PU.Some dfi) -> do - F.forM_ (M.elems (dfi ^. MD.parsedBlocks)) $ \pb -> do - let addr = MD.pblockAddr pb - unless (S.member addr ignoredBlocks) $ do - let term = blockTerminator pb - T.assertBool ("Unclassified block at " ++ show (MD.pblockAddr pb)) (not (isClassifyFailure term)) - T.assertBool ("Translate error at " ++ show (MD.pblockAddr pb) ++ " " ++ show term) (not (isTranslateError term)) - let actualEntry = MD.discoveredFunAddr dfi - -- actualEntry = fromIntegral (MM.addrValue (MD.discoveredFunAddr dfi)) - let actualBlockStarts = S.fromList [ (addr, toInteger (MD.blockSize pbr)) - | pbr <- M.elems (dfi ^. MD.parsedBlocks) - , let addr = MD.pblockAddr pbr - , addr `S.notMember` ignoredBlocks - ] - case (S.member actualEntry ignoredBlocks, M.lookup actualEntry expectedEntries) of - (True, _) -> return () - (_, Nothing) -> - T.assertFailure (printf "Unexpected entry point: %s" (show actualEntry)) - (_, Just expectedBlockStarts) -> - T.assertEqual (printf "Block starts for %s" (show actualEntry)) + F.forM_ (M.elems (di ^. MD.funInfo)) $ \(PU.Some dfi) -> do + F.forM_ (M.elems (dfi ^. MD.parsedBlocks)) $ \pb -> do + let addr = MD.pblockAddr pb + unless (S.member addr ignoredBlocks) $ do + let term = blockTerminator pb + T.assertBool ("Unclassified block at " ++ show (MD.pblockAddr pb)) (not (isClassifyFailure term)) + T.assertBool ("Translate error at " ++ show (MD.pblockAddr pb) ++ " " ++ show term) (not (isTranslateError term)) + let actualEntry = MD.discoveredFunAddr dfi + -- actualEntry = fromIntegral (MM.addrValue (MD.discoveredFunAddr dfi)) + let actualBlockStarts :: S.Set (Addr, Integer) + actualBlockStarts = S.fromList [ (toAddr addr, toInteger (MD.blockSize pbr)) + | pbr <- M.elems (dfi ^. MD.parsedBlocks) + , let addr = MD.pblockAddr pbr + , addr `S.notMember` ignoredBlocks + ] + case (S.member actualEntry ignoredBlocks, M.lookup actualEntry expectedEntries) of + (True, _) -> return () + (_, Nothing) -> + T.assertFailure (printf "Unexpected entry point: %s" (show actualEntry)) + (_, Just expectedBlockStarts) -> + T.assertEqual (printf "Block starts for %s" (show actualEntry)) expectedBlockStarts actualBlockStarts @@ -124,26 +150,6 @@ withELF fp k = do E.Elf32Res errs _ -> error ("Errors while parsing ELF file: " ++ show errs) E.Elf64Res errs _ -> error ("Errors while parsing ELF file: " ++ show errs) -withMemory :: forall w m a - . (C.MonadThrow m, MM.MemWidth w, Integral (E.ElfWordType w)) - => MM.AddrWidthRepr w - -> E.Elf w - -> (MM.Memory w -> [MM.MemSegmentOff w] -> m a) - -> m a -withMemory _relaWidth e k = do - let opt = MM.LoadOptions { MM.loadRegionIndex = Nothing - , MM.loadRegionBaseOffset = 0 --- let opt = MM.LoadOptions { MM.loadRegionIndex = Just 0 --- , MM.loadRegionBaseOffset = fromIntegral loadOffset - } - case MM.resolveElfContents opt e of - Left err -> C.throwM (MemoryLoadError err) - Right (warn, mem, mentry, syms) -> - if null warn then - k mem (maybeToList mentry ++ fmap MM.memSymbolStart syms) - else - error $ "Warnings while loading Elf " ++ show warn - data ElfException = MemoryLoadError String deriving (Typeable, Show) diff --git a/x86/tests/x64/test-plt.c b/x86/tests/x64/test-plt.c new file mode 100644 index 00000000..23aefcc2 --- /dev/null +++ b/x86/tests/x64/test-plt.c @@ -0,0 +1,10 @@ +// This is a simple call to puts to test that we resolve the PLT stub +// to puts. +// +// It should be compiled with +// `clang -fpic -FPIC -o test-plt.exe test-plc.c` +#include + +int main(int argc, char** argv) { + puts("Hello World"); +} diff --git a/x86/tests/x64/test-plt.exe b/x86/tests/x64/test-plt.exe new file mode 100755 index 0000000000000000000000000000000000000000..071edee23c6a988534d0285d8844912f669fd3a4 GIT binary patch literal 7376 zcmcgxZ)_Y_5r5~ilZ)e=osdu`DP&a|VMp@j{I^b{5TE}f>*$;q$4LXl*`9r8`_TE$ zy4y>gNI<}86gLf|tQ(7rip@cul@te0Z z_U`&ZD?ad~+j;YQGxO%XoqapA_mzRfp{h_wNeipbDB_lCT%?BwhPQDCq(^nD2(F)0 zJJmYyB-(=?wH|$4rHgD@XoSbZpw-l`Vjw^tWmu?qNR(Y;2|xKaPasq&Lko}|`KmT( z7SwP%Oi%Ir&xjwaM=a?zvR)(03tb|^XhL~~iLzf_onSu<%jZRr2M0lopdfgf ze@g|t6tFoGa>+V96I z;0pI?f^JKfnk_j+W!n>TNqaJrOJ+0Y(qL)aE;`AAW6vZrIc29)NhgVc@nX@%0p(K4 z9vmIAQ|Ur_DpPdQh0&qDY(AGBO^#>D%G69g$5w1*A30->uv}j@SuCb8FR52lKAafr z?X%m=HnWrcuTmlXc!xl#Q7llOIElZ~jtoMlN_~d}sPXe9ak0cf9SPvq7$4^QAo9ZB zt2MMa5|bFz0Lv53CNv!J;QXj-yupLlYQ()XdT?*dcQa10O))R6$Af!A*_wYRVqLB& z)fmcpZqcb(-VY6H{uhxOu4cLOYZxl;zJP1}t{(J=pQgb(OJ%g(=ZL2*y>mvvty4nser^*jFwS?E6?{p%0H z*1}!u`tm_5bklnMfzt?MFS4;Z*O;pxw0Hi%k07Q>pSI=?Oi}Ly>^XJTl?B^iG{Zl3m^2uLV4@&t@#@vYu{f=Z_^EbdMy5h_*ndO+`e&jb<+0{2ihKo z1U*q!I-AWKPvr~QlnU((@27V`4Sq}a%jNT+2BsYb9maYVL1#g4gHpU06e(z1U7|wg zMpS67A+)n@edKCreFNe2&iy59?O_=rmF1Os9AhMZ3i2@$j5ZvKHXf*0Z@aizL9+Vnz= z6^&h7XGOc`*AGQ|3eoO(G!~CG^+pZILB2N{(SE-Q^`F6x?;IeTXmY;DdE%n(JCFLj zUHTnIyu61t5rg(!eot?A*_HEaC-ZVH?GT>NFm)PDmyBEFS4ARnKJ*s%H=!{ zX;Eyy=4V+Sp{RM;-@DA8xUCC#;dxwXrTb(pQOW_Q47i?w#ex?=kji%(>exhdmJx=_sIb4I(_X|}br zv$E0TB&UjdM+OoD@#6z_duQ8Todh%+P2NEo2x zs~#Bdv+zoM`tfQdap}ivR^r!>uTyfL{rGw%@!`iKO5)CsZ%`7Setd1dNaJB-ht~r4 z%f;)|t%~@jl{(08Kf3bV`|*19iaaQufiSY2#ftc5b+00hjFis_j~V#?F(rABAK$7P zD#~v|-C}q}bw8{=t|UM54u)o1 zJA`oh1pTM;v`TGK4RY|YBAu&*4+n6Q@!&b$4?Ki?UGj1{Z;oqx?S7tPJMvxttJ_6f zLsTgQ_VatdE3NBc>wQJr!F`Wtvn!X^G`@BpNT8DaUun%PYvSPzZD;L$f0N~d{{LO$ zYvZ#P3kX$+2Lt#P?Ef{6TRA>rzzuE7f4QjfwegeCcKou$od#a1PTQm9*FJaYhuFEm zc7k#HB5-5vwCWo0%~g#`&TkR<0pV)(x#riCdKEbBLyY$!%?iH*tyCXA&+Q@JL4qBL7jq2r=^$4 z;oHNkGIMz+ZN_^CTaeW#b2?d^R%YsK4vH>y3d)?yl}t{Ce2k5;LOPoy1@7VAD^su0 zq%fGugLTq#xKda_D6cMzi&#;<6hir?!`)&uaQ30XhHpFfstR59!b3_G?i-716uY7?^^5%fvc&7>uvm$Ixi4Q0%rAA$y;=;QHMwNvD(e?|8zWx-r5-Z) z1Ivf$|1qF1`KQJD;-@$u^9uiGbO@96OFnms^@H-k_<0Aq Date: Tue, 4 Dec 2018 09:02:27 -0800 Subject: [PATCH 08/10] Fix previous commit --- base/src/Data/Macaw/Discovery.hs | 111 +++++++------------------------ 1 file changed, 24 insertions(+), 87 deletions(-) diff --git a/base/src/Data/Macaw/Discovery.hs b/base/src/Data/Macaw/Discovery.hs index 48860a9e..d835a037 100644 --- a/base/src/Data/Macaw/Discovery.hs +++ b/base/src/Data/Macaw/Discovery.hs @@ -1090,22 +1090,12 @@ parseFetchAndExecute ctx idx initRegs stmts absProcState finalRegs = do -- Look for new instruction pointers addNewFunctionAddrs $ identifyConcreteAddresses mem (abst^.absRegState^.curIP) - -<<<<<<< HEAD - let ret = StatementList { stmtsIdent = idx - , stmtsNonterm = toList stmts - , stmtsTerm = ParsedCall finalRegs Nothing - , stmtsAbsState = absProcState' - } - seq ret $ pure (ret,idx+1) -======= let ret = StatementList { stmtsIdent = idx - , stmtsNonterm = stmts - , stmtsTerm = ParsedCall s Nothing + , stmtsNonterm = toList stmts + , stmtsTerm = ParsedCall finalRegs Nothing , stmtsAbsState = absProcState' } seq ret $ pure (ret,idx+1) ->>>>>>> public/master -- | this evalutes the statements in a block to expand the information known -- about control flow targets of this block. @@ -1252,46 +1242,6 @@ transfer addr = do s <- use curFunCtx let ainfo = archInfo s withArchConstraints ainfo $ do -<<<<<<< HEAD - mfinfo <- use $ foundAddrs . at addr - let finfo = fromMaybe (error $ "transfer called on unfound address " ++ show addr ++ ".") $ - mfinfo - let mem = memory s - nonceGen <- gets funNonceGen - prev_block_map <- use $ curFunBlocks - -- Get maximum number of bytes to disassemble - let maxSize :: Int - maxSize = - case Map.lookupGT addr prev_block_map of - Just (next,_) | Just o <- diffSegmentOff next addr -> fromInteger o - _ -> fromInteger (segoffBytesLeft addr) - let ab = foundAbstractState finfo - case mkInitialRegsForBlock ainfo addr ab of - Left msg -> do - recordErrorBlock addr finfo (Just msg) - Right initRegs -> do - (bs0, sz, maybeError) <- liftST $ disassembleFn ainfo nonceGen addr initRegs maxSize - -- If no blocks are returned, then we just add an empty parsed block. - if null bs0 then do - recordErrorBlock addr finfo maybeError - else do - -- Rewrite returned blocks to simplify expressions -#ifdef USE_REWRITER - bs1 <- do - let archStmt = rewriteArchStmt ainfo - let secAddrMap = memSectionIndexMap mem - liftST $ do - ctx <- mkRewriteContext nonceGen (rewriteArchFn ainfo) archStmt secAddrMap - traverse (rewriteBlock ainfo ctx) bs0 -#else - bs1 <- pure bs0 -#endif - -- Compute demand set - let bs = bs1 -- eliminateDeadStmts ainfo bs1 - -- Call transfer blocks to calculate parsedblocks - let blockMap = Map.fromList [ (blockLabel b, b) | b <- bs ] - addBlocks addr finfo initRegs sz blockMap -======= mfinfo <- use $ foundAddrs . at addr let finfo = fromMaybe (error $ "transfer called on unfound address " ++ show addr ++ ".") $ mfinfo @@ -1305,44 +1255,31 @@ transfer addr = do Just (next,_) | Just o <- diffSegmentOff next addr -> fromInteger o _ -> fromInteger (segoffBytesLeft addr) let ab = foundAbstractState finfo - (bs0, sz, maybeError) <- liftST $ disassembleFn ainfo nonceGen addr maxSize ab - + case mkInitialRegsForBlock ainfo addr ab of + Left msg -> do + recordErrorBlock addr finfo (Just msg) + Right initRegs -> do + (bs0, sz, maybeError) <- liftST $ disassembleFn ainfo nonceGen addr initRegs maxSize + -- If no blocks are returned, then we just add an empty parsed block. + if null bs0 then do + recordErrorBlock addr finfo maybeError + else do + -- Rewrite returned blocks to simplify expressions #ifdef USE_REWRITER - bs1 <- do - let archStmt = rewriteArchStmt ainfo - let secAddrMap = memSectionIndexMap mem - liftST $ do - ctx <- mkRewriteContext nonceGen (rewriteArchFn ainfo) archStmt secAddrMap - traverse (rewriteBlock ainfo ctx) bs0 + bs1 <- do + let archStmt = rewriteArchStmt ainfo + let secAddrMap = memSectionIndexMap mem + liftST $ do + ctx <- mkRewriteContext nonceGen (rewriteArchFn ainfo) archStmt secAddrMap + traverse (rewriteBlock ainfo ctx) bs0 #else - bs1 <- pure bs0 + bs1 <- pure bs0 #endif - - -- If no blocks are returned, then we just add an empty parsed block. - if null bs1 then do - let errMsg = Text.pack $ fromMaybe "Unknown error" maybeError - let stmts = StatementList - { stmtsIdent = 0 - , stmtsNonterm = [] - , stmtsTerm = ParsedTranslateError errMsg - , stmtsAbsState = initAbsProcessorState mem (foundAbstractState finfo) - } - let pb = ParsedBlock { pblockAddr = addr - , blockSize = sz - , blockReason = foundReason finfo - , blockAbstractState = foundAbstractState finfo - , blockStatementList = stmts - } - id %= addFunBlock addr pb - else do - -- Rewrite returned blocks to simplify expressions - - -- Compute demand set - let bs = bs1 -- eliminateDeadStmts ainfo bs1 - -- Call transfer blocks to calculate parsedblocks - let blockMap = Map.fromList [ (blockLabel b, b) | b <- bs ] - addBlocks addr finfo sz blockMap ->>>>>>> public/master + -- Compute demand set + let bs = bs1 -- eliminateDeadStmts ainfo bs1 + -- Call transfer blocks to calculate parsedblocks + let blockMap = Map.fromList [ (blockLabel b, b) | b <- bs ] + addBlocks addr finfo initRegs sz blockMap ------------------------------------------------------------------------ -- Main loop From 3fc657782df431c7dbddf9aca62c09f155e31e3d Mon Sep 17 00:00:00 2001 From: Brian Huffman Date: Fri, 7 Dec 2018 13:48:38 -0800 Subject: [PATCH 09/10] Add Semigroup instance to make GHC 8.4 happy. --- base/src/Data/Macaw/Analysis/FunctionArgs.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/base/src/Data/Macaw/Analysis/FunctionArgs.hs b/base/src/Data/Macaw/Analysis/FunctionArgs.hs index 956514c1..0aaea075 100644 --- a/base/src/Data/Macaw/Analysis/FunctionArgs.hs +++ b/base/src/Data/Macaw/Analysis/FunctionArgs.hs @@ -55,6 +55,9 @@ import Data.Macaw.Types newtype Ap f a = Ap { getAp :: f a } +instance (Applicative f, Semigroup a) => Semigroup (Ap f a) where + Ap x <> Ap y = Ap $ (<>) <$> x <*> y + instance (Applicative f, Monoid a) => Monoid (Ap f a) where mempty = Ap $ pure mempty mappend (Ap x) (Ap y) = Ap $ mappend <$> x <*> y From 7e6582fa073f5449a4b9ffd56546200350a2a3bf Mon Sep 17 00:00:00 2001 From: Brian Huffman Date: Tue, 18 Dec 2018 13:47:51 -0800 Subject: [PATCH 10/10] Bump submodules, adapt to changes in crucible-llvm api. --- deps/crucible | 2 +- deps/flexdis86 | 2 +- deps/llvm-pretty | 2 +- deps/parameterized-utils | 2 +- symbolic/src/Data/Macaw/Symbolic/MemOps.hs | 3 ++- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/deps/crucible b/deps/crucible index 0a93de3a..6720f34b 160000 --- a/deps/crucible +++ b/deps/crucible @@ -1 +1 @@ -Subproject commit 0a93de3a8f6e3697818fa1e157029e9cc1b46808 +Subproject commit 6720f34b2db2e86512b5c8b469cdd4ccbad1fc3b diff --git a/deps/flexdis86 b/deps/flexdis86 index ac18aaf5..5c4453b0 160000 --- a/deps/flexdis86 +++ b/deps/flexdis86 @@ -1 +1 @@ -Subproject commit ac18aaf5fc6f857a9a81ef09956ab1021a3fcb39 +Subproject commit 5c4453b0b2c89af2267174bf06b486a9dd6c57a2 diff --git a/deps/llvm-pretty b/deps/llvm-pretty index 8bbd47cf..e05cf319 160000 --- a/deps/llvm-pretty +++ b/deps/llvm-pretty @@ -1 +1 @@ -Subproject commit 8bbd47cf21379bf699297ebb2f69148c88d0ec72 +Subproject commit e05cf3195b0938961cb0d8ecf4b7a4821f0d2673 diff --git a/deps/parameterized-utils b/deps/parameterized-utils index 2af20d4e..045e90c5 160000 --- a/deps/parameterized-utils +++ b/deps/parameterized-utils @@ -1 +1 @@ -Subproject commit 2af20d4e1ed93b18503d7e73376fa993ed865357 +Subproject commit 045e90c564c7839667ff92274d442a343b76d168 diff --git a/symbolic/src/Data/Macaw/Symbolic/MemOps.hs b/symbolic/src/Data/Macaw/Symbolic/MemOps.hs index 975ee650..eb254bc7 100644 --- a/symbolic/src/Data/Macaw/Symbolic/MemOps.hs +++ b/symbolic/src/Data/Macaw/Symbolic/MemOps.hs @@ -513,7 +513,8 @@ doWriteMem st mvar globs w (BVMemRepr bytes endian) ptr0 val = let ?ptrWidth = M.addrWidthNatRepr w let v0 = regValue val v = LLVMValInt (ptrBase v0) (asBits v0) - mem1 <- storeRaw sym mem ptr ty v + let alignment = 0 -- default to byte alignment (FIXME) + mem1 <- storeRaw sym mem ptr ty alignment v return ((), setMem st mvar mem1) --------------------------------------------------------------------------------