diff --git a/base/macaw-base.cabal b/base/macaw-base.cabal index 69e84f78..ee52a35d 100644 --- a/base/macaw-base.cabal +++ b/base/macaw-base.cabal @@ -1,5 +1,5 @@ name: macaw-base -version: 0.3.2 +version: 0.3.4 author: Galois, Inc. maintainer: jhendrix@galois.com build-type: Simple @@ -31,7 +31,7 @@ library base >= 4, ansi-wl-pprint, binary, - binary-symbols >= 0.1.1, + binary-symbols >= 0.1.2, bytestring, containers >= 0.5.8.1, elf-edit >= 0.32, @@ -39,7 +39,7 @@ library IntervalMap >= 0.5, lens >= 4.7, mtl, - parameterized-utils >= 1.0.1, + parameterized-utils >= 1.0.7, template-haskell, text, vector, diff --git a/base/src/Data/Macaw/Analysis/FunctionArgs.hs b/base/src/Data/Macaw/Analysis/FunctionArgs.hs index 1ee18afd..0aaea075 100644 --- a/base/src/Data/Macaw/Analysis/FunctionArgs.hs +++ b/base/src/Data/Macaw/Analysis/FunctionArgs.hs @@ -50,9 +50,18 @@ import Data.Macaw.CFG import Data.Macaw.CFG.BlockLabel import Data.Macaw.CFG.DemandSet import Data.Macaw.Discovery.State -import Data.Macaw.Fold import Data.Macaw.Types + +newtype Ap f a = Ap { getAp :: f a } + +instance (Applicative f, Semigroup a) => Semigroup (Ap f a) where + Ap x <> Ap y = Ap $ (<>) <$> x <*> y + +instance (Applicative f, Monoid a) => Monoid (Ap f a) where + mempty = Ap $ pure mempty + mappend (Ap x) (Ap y) = Ap $ mappend <$> x <*> y + ------------------------------------------------------------------------------- -- The algorithm computes the set of direct deps (i.e., from writes) @@ -95,6 +104,10 @@ data DemandSet (r :: Type -> Kind.Type) = , functionResultDemands :: !(Map (RegSegmentOff r) (RegisterSet r)) } +-- | Create a demand set for specific registers. +registerDemandSet :: RegisterSet r -> DemandSet r +registerDemandSet s = DemandSet { registerDemands = s, functionResultDemands = Map.empty } + deriving instance (ShowF r, MemWidth (RegAddrWidth r)) => Show (DemandSet r) deriving instance (TestEquality r) => Eq (DemandSet r) deriving instance (OrdF r) => Ord (DemandSet r) @@ -109,15 +122,18 @@ instance OrdF r => Semigroup (DemandSet r) where instance OrdF r => Monoid (DemandSet r) where mempty = DemandSet { registerDemands = Set.empty - , functionResultDemands = mempty + , functionResultDemands = Map.empty } mappend = (<>) demandSetDifference :: OrdF r => DemandSet r -> DemandSet r -> DemandSet r demandSetDifference ds1 ds2 = - DemandSet (registerDemands ds1 `Set.difference` registerDemands ds2) - (Map.differenceWith setDiff (functionResultDemands ds1) - (functionResultDemands ds2)) + DemandSet { registerDemands = registerDemands ds1 `Set.difference` registerDemands ds2 + , functionResultDemands = + Map.differenceWith setDiff + (functionResultDemands ds1) + (functionResultDemands ds2) + } where setDiff s1 s2 = let s' = s1 `Set.difference` s2 @@ -131,8 +147,8 @@ data DemandType r -- | This type is for registers that are demanded if the function at the given address wants -- the given register. | forall tp. DemandFunctionArg (RegSegmentOff r) (r tp) - -- | This is a associated with a set of registers that are demanded if the given register is needed - -- as a return value. + -- | This is a associated with the registers that are demanded if + -- the given register is needed as a return value. | forall tp. DemandFunctionResult (r tp) instance (MemWidth (RegAddrWidth r), ShowF r) => Show (DemandType r) where @@ -179,8 +195,8 @@ data ArchTermStmtRegEffects arch = ArchTermStmtRegEffects { termRegDemands :: ![Some (ArchReg arch)] -- ^ Registers demanded by term statement , termRegTransfers :: [Some (ArchReg arch)] - -- ^ Registers that terminal statement are not modified - -- by terminal statement. + -- ^ Registers that are not modified by + -- terminal statement. } -- | Returns information about the registers needed and modified by a terminal statement @@ -193,6 +209,8 @@ type ComputeArchTermStmtEffects arch ids -> RegState (ArchReg arch) (Value arch ids) -> ArchTermStmtRegEffects arch +-- | Information about the architecture/environment what arguments a +-- function needs. data ArchDemandInfo arch = ArchDemandInfo { -- | Registers used as arguments to the function. functionArgRegs :: ![Some (ArchReg arch)] @@ -294,34 +312,38 @@ addIntraproceduralJumpTarget fun_info src_block dest_addr = do -- record the ed text "Could not find target block" <+> text (show dest_addr) <$$> indent 2 (text "Source:" <$$> pretty src_block) --- | Compute the input registers that this value depends on +withAssignmentCache :: State (AssignmentCache (ArchReg arch) ids) a -> FunctionArgsM arch ids a +withAssignmentCache m = do + c <- use assignmentCache + let (r, c') = runState m c + seq c' $ assignmentCache .= c' + pure r + +-- | Return the input registers that a value depends on. valueUses :: (OrdF (ArchReg arch), FoldableFC (ArchFn arch)) => Value arch ids tp - -> FunctionArgsM arch ids (RegisterSet (ArchReg arch)) -valueUses v = zoom assignmentCache $ foldValueCached fns v - where fns = emptyValueFold { foldInput = Set.singleton . Some } + -> State (AssignmentCache (ArchReg arch) ids) (RegisterSet (ArchReg arch)) +valueUses (AssignedValue (Assignment a rhs)) = do + mr <- gets $ Map.lookup (Some a) + case mr of + Just s -> pure s + Nothing -> do + rhs' <- foldrFC (\v mrhs -> Set.union <$> valueUses v <*> mrhs) (pure Set.empty) rhs + seq rhs' $ modify' $ Map.insert (Some a) rhs' + pure $ rhs' +valueUses (Initial r) = do + pure $! Set.singleton (Some r) +valueUses _ = do + pure $! Set.empty + +addBlockDemands :: OrdF (ArchReg arch) => ArchLabel arch -> DemandMap (ArchReg arch) -> FunctionArgsM arch ids () +addBlockDemands lbl m = + blockDemandMap %= Map.insertWith demandMapUnion lbl m --- | Record that a block demands the value of certain registers. -recordBlockDemand :: ( OrdF (ArchReg arch) - , FoldableFC (ArchFn arch) - ) - => ArchLabel arch - -- ^ The current block - -> RegState (ArchReg arch) (Value arch ids) - -- ^ The current register state - -> (forall tp . ArchReg arch tp -> DemandType (ArchReg arch)) - -> [Some (ArchReg arch)] - -- ^ The registers that we need. - -> FunctionArgsM arch ids () -- Map (Some N.RegisterName) RegDeps -recordBlockDemand lbl s mk rs = do - let doReg (Some r) = do - rs' <- valueUses (s ^. boundValue r) - return (mk r, DemandSet rs' mempty) - vs <- mapM doReg rs - blockDemandMap %= Map.insertWith (Map.unionWith mappend) lbl (Map.fromListWith mappend vs) -- Figure out the deps of the given registers and update the state for the current label -recordBlockTransfer :: ( OrdF (ArchReg arch) +recordBlockTransfer :: forall arch ids + . ( OrdF (ArchReg arch) , FoldableFC (ArchFn arch) ) => ArchLabel arch @@ -329,10 +351,13 @@ recordBlockTransfer :: ( OrdF (ArchReg arch) -> [Some (ArchReg arch)] -> FunctionArgsM arch ids () -- Map (Some N.RegisterName) RegDeps recordBlockTransfer lbl s rs = do - let doReg (Some r) = do + let doReg :: Some (ArchReg arch) + -> State (AssignmentCache (ArchReg arch) ids) + (Some (ArchReg arch), DemandSet (ArchReg arch)) + doReg (Some r) = do rs' <- valueUses (s ^. boundValue r) - return (Some r, DemandSet rs' mempty) - vs <- mapM doReg rs + return (Some r, registerDemandSet rs') + vs <- withAssignmentCache $ traverse doReg rs blockTransfer %= Map.insertWith (Map.unionWith mappend) lbl (Map.fromListWith mappend vs) -- | A block requires a value, and so we need to remember which @@ -342,9 +367,8 @@ demandValue :: (OrdF (ArchReg arch), FoldableFC (ArchFn arch)) -> Value arch ids tp -> FunctionArgsM arch ids () demandValue lbl v = do - regs <- valueUses v - blockDemandMap %= Map.insertWith demandMapUnion lbl - (Map.singleton DemandAlways (DemandSet regs mempty)) + regs <- withAssignmentCache $ valueUses v + addBlockDemands lbl $ Map.singleton DemandAlways (registerDemandSet regs) -- ----------------------------------------------------------------------------- -- Entry point @@ -426,9 +450,9 @@ summarizeCall :: forall arch ids -> Bool -- ^ A flag that is set to true for tail calls. -> FunctionArgsM arch ids () -summarizeCall mem lbl proc_state isTailCall = do +summarizeCall mem lbl finalRegs isTailCall = do knownAddrs <- gets computedAddrSet - case valueAsMemAddr (proc_state^.boundValue ip_reg) of + case valueAsMemAddr (finalRegs^.boundValue ip_reg) of Just faddr0 | Just faddr <- asSegmentOff mem faddr0 , Set.member faddr knownAddrs -> do @@ -439,10 +463,10 @@ summarizeCall mem lbl proc_state isTailCall = do -- singleton for now, but propagating back will introduce more deps. let demandSet sr = DemandSet mempty (Map.singleton faddr (Set.singleton sr)) - if isTailCall then + if isTailCall then do -- tail call, propagate demands for our return regs to the called function - let propMap = map (\(Some r) -> (DemandFunctionResult r, demandSet (Some r))) retRegs - in blockDemandMap %= Map.insertWith (Map.unionWith mappend) lbl (Map.fromList propMap) + let propMap = (\(Some r) -> (DemandFunctionResult r, demandSet (Some r))) <$> retRegs + addBlockDemands lbl $ Map.fromList propMap else do -- Given a return register sr, this indicates that let propResult :: Some (ArchReg arch) -> FunctionArgsM arch ids () @@ -455,12 +479,22 @@ summarizeCall mem lbl proc_state isTailCall = do -- If a function wants argument register r, then we note that this -- block needs the corresponding state values. Note that we could -- do this for _all_ registers, but this should make the summaries somewhat smaller. + + -- Associate the demand sets for each potential argument register with the registers used + -- by faddr. argRegs <- gets $ functionArgRegs . archDemandInfo - recordBlockDemand lbl proc_state (DemandFunctionArg faddr) argRegs + let regDemandSet (Some r) = registerDemandSet <$> valueUses (finalRegs^. boundValue r) + let demandTypes = viewSome (DemandFunctionArg faddr) <$> argRegs + demands <- withAssignmentCache $ traverse regDemandSet argRegs + addBlockDemands lbl $ Map.fromList $ zip demandTypes demands _ -> do -- In the dynamic case, we just assume all arguments (FIXME: results?) argRegs <- gets $ functionArgRegs . archDemandInfo - recordBlockDemand lbl proc_state (\_ -> DemandAlways) ([Some ip_reg] ++ argRegs) + + do let demandedRegs = [Some ip_reg] ++ argRegs + let regUses (Some r) = valueUses (finalRegs^. boundValue r) + demands <- withAssignmentCache $ fmap registerDemandSet $ getAp $ foldMap (Ap . regUses) demandedRegs + addBlockDemands lbl $ Map.singleton DemandAlways demands -- | Return values that must be evaluated to execute side effects. stmtDemandedValues :: DemandContext arch @@ -492,10 +526,10 @@ summarizeBlock :: forall arch ids -> ArchSegmentOff arch -- ^ Address of the code. -> StatementList arch ids -- ^ Current block -> FunctionArgsM arch ids () -summarizeBlock mem interp_state addr stmts = do +summarizeBlock mem interpState addr stmts = do let lbl = GeneratedBlock addr (stmtsIdent stmts) -- Add this label to block demand map with empty set. - blockDemandMap %= Map.insertWith demandMapUnion lbl mempty + addBlockDemands lbl mempty ctx <- gets $ demandInfoCtx . archDemandInfo -- Add all values demanded by non-terminal statements in list. @@ -503,6 +537,67 @@ summarizeBlock mem interp_state addr stmts = do (stmtsNonterm stmts) -- Add values demanded by terminal statements case stmtsTerm stmts of + ParsedCall finalRegs m_ret_addr -> do + -- Record the demands based on the call, and add edges between + -- this note and next nodes. + case m_ret_addr of + Nothing -> do + summarizeCall mem lbl finalRegs True + Just ret_addr -> do + summarizeCall mem lbl finalRegs False + addIntraproceduralJumpTarget interpState lbl ret_addr + callRegs <- gets $ calleeSavedRegs . archDemandInfo + recordBlockTransfer lbl finalRegs ([Some sp_reg] ++ Set.toList callRegs) + + PLTStub regs _ _ -> do + -- PLT Stubs demand all registers that could be function + -- arguments, as well as any registers in regs. + ainfo <- gets archDemandInfo + let demandedRegs = Set.fromList (functionArgRegs ainfo) + demands <- withAssignmentCache $ getAp $ foldMapF (Ap . valueUses) regs + addBlockDemands lbl $ Map.singleton DemandAlways $ + registerDemandSet $ demands <> demandedRegs + + ParsedJump procState tgtAddr -> do + -- record all propagations + recordBlockTransfer lbl procState archRegs + addIntraproceduralJumpTarget interpState lbl tgtAddr + + ParsedLookupTable finalRegs lookup_idx vec -> do + demandValue lbl lookup_idx + -- record all propagations + recordBlockTransfer lbl finalRegs archRegs + traverse_ (addIntraproceduralJumpTarget interpState lbl) vec + + ParsedReturn finalRegs -> do + retRegs <- gets $ functionRetRegs . archDemandInfo + let demandTypes = viewSome DemandFunctionResult <$> retRegs + let regDemandSet (Some r) = registerDemandSet <$> valueUses (finalRegs^.boundValue r) + demands <- withAssignmentCache $ traverse regDemandSet retRegs + addBlockDemands lbl $ Map.fromList $ zip demandTypes demands + + + + ParsedIte c tblock fblock -> do + -- Demand condition then summarize recursive blocks. + demandValue lbl c + summarizeBlock mem interpState addr tblock + summarizeBlock mem interpState addr fblock + + ParsedArchTermStmt tstmt finalRegs next_addr -> do + -- Compute effects of terminal statement. + ainfo <- gets $ archDemandInfo + let e = computeArchTermStmtEffects ainfo tstmt finalRegs + + -- Demand all registers the terminal statement demands. + do let regUses (Some r) = valueUses (finalRegs^.boundValue r) + demands <- withAssignmentCache $ fmap registerDemandSet $ getAp $ + foldMap (Ap . regUses) (termRegDemands e) + addBlockDemands lbl $ Map.singleton DemandAlways demands + + recordBlockTransfer lbl finalRegs (termRegTransfers e) + traverse_ (addIntraproceduralJumpTarget interpState lbl) next_addr + ParsedTranslateError _ -> do -- We ignore demands for translate errors. pure () @@ -510,43 +605,6 @@ summarizeBlock mem interp_state addr stmts = do -- We ignore demands for classify failure. pure () - ParsedIte c tblock fblock -> do - -- Demand condition then summarize recursive blocks. - demandValue lbl c - summarizeBlock mem interp_state addr tblock - summarizeBlock mem interp_state addr fblock - - ParsedCall proc_state m_ret_addr -> do - case m_ret_addr of - Nothing -> do - summarizeCall mem lbl proc_state True - Just ret_addr -> do - summarizeCall mem lbl proc_state False - addIntraproceduralJumpTarget interp_state lbl ret_addr - callRegs <- gets $ calleeSavedRegs . archDemandInfo - recordBlockTransfer lbl proc_state ([Some sp_reg] ++ Set.toList callRegs) - - ParsedJump proc_state tgt_addr -> do - -- record all propagations - recordBlockTransfer lbl proc_state archRegs - addIntraproceduralJumpTarget interp_state lbl tgt_addr - - ParsedReturn proc_state -> do - retRegs <- gets $ functionRetRegs . archDemandInfo - recordBlockDemand lbl proc_state DemandFunctionResult retRegs - - ParsedArchTermStmt tstmt proc_state next_addr -> do - effFn <- gets $ computeArchTermStmtEffects . archDemandInfo - let e = effFn tstmt proc_state - recordBlockDemand lbl proc_state (\_ -> DemandAlways) (termRegDemands e) - recordBlockTransfer lbl proc_state (termRegTransfers e) - traverse_ (addIntraproceduralJumpTarget interp_state lbl) next_addr - - ParsedLookupTable proc_state lookup_idx vec -> do - demandValue lbl lookup_idx - -- record all propagations - recordBlockTransfer lbl proc_state archRegs - traverse_ (addIntraproceduralJumpTarget interp_state lbl) vec -- | Explore states until we have reached end of frontier. summarizeIter :: ArchConstraints arch @@ -575,7 +633,7 @@ calculateOnePred newDemands predLbl = do -- update uses, returning value before this iteration seenDemands <- use (blockDemandMap . ix lbl') - blockDemandMap . at lbl' .= Just (Map.unionWith mappend demands' seenDemands) + addBlockDemands lbl' demands' -- seenDemands <- blockDemandMap . ix lbl' <<%= demandMapUnion demands' @@ -683,10 +741,7 @@ doOneFunction archFns addrs ist0 acc ist = do -- recorded as a use, which is erroneous, so we strip out any -- reference to them here. callRegs <- gets $ calleeSavedRegs . archDemandInfo - let calleeDemandSet = DemandSet { registerDemands = - Set.insert (Some sp_reg) callRegs - , functionResultDemands = mempty - } + let calleeDemandSet = registerDemandSet (Set.insert (Some sp_reg) callRegs) return (Map.foldlWithKey' (decomposeMap calleeDemandSet addr) acc funDemands) diff --git a/base/src/Data/Macaw/Architecture/Info.hs b/base/src/Data/Macaw/Architecture/Info.hs index e62391d9..b44d4568 100644 --- a/base/src/Data/Macaw/Architecture/Info.hs +++ b/base/src/Data/Macaw/Architecture/Info.hs @@ -45,13 +45,10 @@ type DisassembleFn arch . NonceGenerator (ST s) ids -> ArchSegmentOff arch -- ^ The offset to start reading from. + -> RegState (ArchReg arch) (Value arch ids) + -- ^ Initial values to use for registers. -> Int -- ^ Maximum offset for this to read from. - -> AbsBlockState (ArchReg arch) - -- ^ Abstract state associated with address that we are disassembling - -- from. - -- - -- This is used for things like the height of the x87 stack. -> ST s ([Block arch ids], Int, Maybe String) -- | This records architecture specific functions for analysis. @@ -64,6 +61,12 @@ data ArchitectureInfo arch -- ^ Architecture address width. , archEndianness :: !Endianness -- ^ The byte order values are stored in. + , mkInitialRegsForBlock :: !(forall ids + . ArchSegmentOff arch + -> AbsBlockState (ArchReg arch) + -> Either String (RegState (ArchReg arch) (Value arch ids))) + -- ^ Use the abstract block state information to infer register + -- values to use for disassembling from given address. , disassembleFn :: !(DisassembleFn arch) -- ^ Function for disasembling a block. , mkInitialAbsState :: !(Memory (RegAddrWidth (ArchReg arch)) @@ -89,7 +92,7 @@ data ArchitectureInfo arch -- ^ Update the abstract state after a function call returns , identifyCall :: forall ids . Memory (ArchAddrWidth arch) - -> [Stmt arch ids] + -> Seq (Stmt arch ids) -> RegState (ArchReg arch) (Value arch ids) -> Maybe (Seq (Stmt arch ids), ArchSegmentOff arch) -- ^ Function for recognizing call statements. @@ -97,10 +100,25 @@ data ArchitectureInfo arch -- Given a memory state, list of statements, and final register -- state, the should determine if this is a call, and if so, -- return the statements with any action to push the return - -- value to the stack removed, and provide the explicit return - -- address that the function should return to. + -- value to the stack removed, and provide the return address that + -- the function should return to. + + , checkForReturnAddr :: forall ids + . RegState (ArchReg arch) (Value arch ids) + -> AbsProcessorState (ArchReg arch) ids + -> Bool + -- ^ @checkForReturnAddr regs s@ returns true if the location + -- where the return address is normally stored in regs when + -- calling a function does indeed contain the abstract value + -- associated with return addresses. + -- + -- For x86 this checks if the address just above the stack is the + -- return address. For ARM, this should check the link register. + -- + -- This predicate is invoked when considering if a potential tail call + -- is setup to return to the right location. , identifyReturn :: forall ids - . [Stmt arch ids] + . Seq (Stmt arch ids) -> RegState (ArchReg arch) (Value arch ids) -> AbsProcessorState (ArchReg arch) ids -> Maybe (Seq (Stmt arch ids)) diff --git a/base/src/Data/Macaw/CFG/Core.hs b/base/src/Data/Macaw/CFG/Core.hs index f9f5703e..6748538d 100644 --- a/base/src/Data/Macaw/CFG/Core.hs +++ b/base/src/Data/Macaw/CFG/Core.hs @@ -55,6 +55,7 @@ module Data.Macaw.CFG.Core , mkRegStateM , traverseRegsWith , zipWithRegState + , ppRegMap -- * Pretty printing , ppAssignId , ppLit @@ -72,8 +73,6 @@ module Data.Macaw.CFG.Core , asStackAddrOffset -- * References , refsInValue - , refsInApp - , refsInAssignRhs -- ** Synonyms , ArchAddrValue , Data.Parameterized.TraversableFC.FoldableFC(..) @@ -650,12 +649,15 @@ class PrettyRegValue r (f :: Type -> Kind.Type) where -- should be printed, and Nothing if the contents should be ignored. ppValueEq :: r tp -> f tp -> Maybe Doc -instance ( PrettyRegValue r f - ) +ppRegMap :: forall r v . PrettyRegValue r v => MapF.MapF r v -> Doc +ppRegMap m = bracketsep $ catMaybes (f <$> MapF.toList m) + where f :: MapF.Pair r v -> Maybe Doc + f (MapF.Pair r v) = ppValueEq r v + + +instance ( PrettyRegValue r f) => Pretty (RegState r f) where - pretty (RegState m) = bracketsep $ catMaybes (f <$> MapF.toList m) - where f :: MapF.Pair r f -> Maybe Doc - f (MapF.Pair r v) = ppValueEq r v + pretty (RegState m) = ppRegMap m instance ( PrettyRegValue r f ) @@ -696,7 +698,7 @@ data Stmt arch ids ppStmt :: ArchConstraints arch => (ArchAddrWord arch -> Doc) - -- ^ Function for pretty printing an offset + -- ^ Function for pretty printing an instruction address offset -> Stmt arch ids -> Doc ppStmt ppOff stmt = @@ -706,7 +708,8 @@ ppStmt ppOff stmt = InstructionStart off mnem -> text "#" <+> ppOff off <+> text (Text.unpack mnem) Comment s -> text $ "# " ++ Text.unpack s ExecArchStmt s -> ppArchStmt (ppValue 10) s - ArchState a m -> hang (length (show prefix)) (prefix PP.<> PP.semiBraces (MapF.foldrWithKey ppUpdate [] m)) + ArchState a m -> + hang (length (show prefix)) (prefix PP.<> PP.semiBraces (MapF.foldrWithKey ppUpdate [] m)) where ppAddr addr = case asAbsoluteAddr addr of @@ -724,19 +727,3 @@ instance ArchConstraints arch => Show (Stmt arch ids) where refsInValue :: Value arch ids tp -> Set (Some (AssignId ids)) refsInValue (AssignedValue (Assignment v _)) = Set.singleton (Some v) refsInValue _ = Set.empty - -refsInApp :: App (Value arch ids) tp -> Set (Some (AssignId ids)) -refsInApp app = foldMapFC refsInValue app - -refsInAssignRhs :: FoldableFC (ArchFn arch) - => AssignRhs arch (Value arch ids) tp - -> Set (Some (AssignId ids)) -refsInAssignRhs rhs = - case rhs of - EvalApp v -> refsInApp v - SetUndefined _ -> Set.empty - ReadMem v _ -> refsInValue v - CondReadMem _ c a d -> - Set.union (refsInValue c) $ - Set.union (refsInValue a) (refsInValue d) - EvalArchFn f _ -> foldMapFC refsInValue f diff --git a/base/src/Data/Macaw/Discovery.hs b/base/src/Data/Macaw/Discovery.hs index 5f8fdab8..faeb3e63 100644 --- a/base/src/Data/Macaw/Discovery.hs +++ b/base/src/Data/Macaw/Discovery.hs @@ -9,6 +9,7 @@ This provides information about code discovered in binaries. {-# LANGUAGE GADTs #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE StandaloneDeriving #-} @@ -18,6 +19,7 @@ module Data.Macaw.Discovery ( -- * DiscoveryInfo State.DiscoveryState(..) , State.emptyDiscoveryState + , State.trustedFunctionEntryPoints , State.AddrSymMap , State.funInfo , State.exploredFunctions @@ -64,11 +66,15 @@ import Data.Foldable import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.Maybe +import Data.Monoid import Data.Parameterized.Classes +import qualified Data.Parameterized.Map as MapF import Data.Parameterized.NatRepr import Data.Parameterized.Nonce import Data.Parameterized.Some import Data.Parameterized.TraversableF +import Data.Sequence (Seq) +import qualified Data.Sequence as Seq import Data.Set (Set) import qualified Data.Set as Set import qualified Data.Text as Text @@ -198,6 +204,8 @@ addStatementListDemands sl = do case stmtsTerm sl of ParsedCall regs _ -> do traverseF_ addValueDemands regs + PLTStub regs _ _ -> + traverseF_ addValueDemands regs ParsedJump regs _ -> do traverseF_ addValueDemands regs ParsedLookupTable regs _idx _tbl -> do @@ -642,8 +650,10 @@ resolveAsAbsoluteAddr :: forall w -> Maybe (MemAddr w) resolveAsAbsoluteAddr mem endianness l = addrWidthClass (memAddrWidth mem) $ case l of - [ByteRegion bs] -> do - absoluteAddr <$> addrRead endianness bs + [ByteRegion bs] -> + case addrRead endianness bs of + Just a -> pure $! absoluteAddr a + Nothing -> error $ "internal: resolveAsAbsoluteAddr given short chunk list." [RelocationRegion r] -> do when (relocationIsRel r) $ Nothing case relocationSym r of @@ -654,6 +664,8 @@ resolveAsAbsoluteAddr mem endianness l = addrWidthClass (memAddrWidth mem) $ SegmentBaseAddr idx -> do seg <- Map.lookup idx (memSegmentIndexMap mem) pure $! segmentOffAddr seg (relocationOffset r) + LoadBaseAddr -> do + memBaseAddr mem _ -> Nothing -- This function resolves jump table entries. @@ -779,13 +791,15 @@ data ParseContext arch ids = -- ^ Entry addresses for known functions (e.g. from -- symbol information) -- - -- The discovery process will not create intra-procedural - -- jumps to the entry points of new functions. + -- The discovery process will not create + -- intra-procedural jumps to the entry points of new + -- functions. , pctxFunAddr :: !(ArchSegmentOff arch) -- ^ Address of function this block is being parsed as , pctxAddr :: !(ArchSegmentOff arch) -- ^ Address of the current block , pctxBlockMap :: !(Map Word64 (Block arch ids)) + -- ^ Map from block indices to block code at address. } addrMemRepr :: ArchitectureInfo arch -> MemRepr (BVType (RegAddrWidth (ArchReg arch))) @@ -825,70 +839,133 @@ addNewFunctionAddrs :: [ArchSegmentOff arch] addNewFunctionAddrs addrs = newFunctionAddrs %= (++addrs) +-- | @stripPLTRead assignId prev rest@ looks for a read of @assignId@ +-- from the end of @prev@, and if it finds it returns the +-- concatenation of the instructione before the read in @prev@ and +-- @rest@. +-- +-- The read may appear before comment and @instructionStart@ +-- instructions, but otherwise must be at the end of prev +stripPLTRead :: ArchConstraints arch + => AssignId ids tp -- ^ Identifier of write to remove + -> Seq (Stmt arch ids) + -> Seq (Stmt arch ids) + -> Maybe (Seq (Stmt arch ids)) +stripPLTRead readId next rest = + case Seq.viewr next of + Seq.EmptyR -> Nothing + prev Seq.:> lastStmt -> do + let cont = stripPLTRead readId prev (lastStmt Seq.<| rest) + case lastStmt of + AssignStmt (Assignment stmtId rhs) + | Just Refl <- testEquality readId stmtId -> Just (prev Seq.>< rest) + -- Fail if the read to delete is used in later computations + | Set.member (Some readId) (foldMapFC refsInValue rhs) -> + Nothing + | otherwise -> + case rhs of + EvalApp{} -> cont + SetUndefined{} -> cont + _ -> Nothing + InstructionStart{} -> cont + ArchState{} -> cont + Comment{} -> cont + _ -> Nothing + +removeUnassignedRegs :: forall arch ids + . RegisterInfo (ArchReg arch) + => RegState (ArchReg arch) (Value arch ids) + -- ^ Initial register values + -> RegState (ArchReg arch) (Value arch ids) + -- ^ Final register values + -> MapF.MapF (ArchReg arch) (Value arch ids) +removeUnassignedRegs initRegs finalRegs = + let keepReg :: forall tp . ArchReg arch tp -> Value arch ids tp -> Bool + keepReg r finalVal + | Just Refl <- testEquality r ip_reg = False + | Just Refl <- testEquality initVal finalVal = False + | otherwise = True + where initVal = initRegs^.boundValue r + in MapF.filterWithKey keepReg (regStateMap finalRegs) + +-- | Return true if any value in structure contains the given +-- identifier. +containsAssignId :: forall t arch ids itp + . FoldableF t + => AssignId ids itp + -- ^ Forbidden assignment -- may not appear in terms. + -> t (Value arch ids) + -> Bool +containsAssignId droppedAssign = + let hasId :: forall tp . Value arch ids tp -> Any + hasId v = Any (Set.member (Some droppedAssign) (refsInValue v)) + in getAny . foldMapF hasId + -- | This parses a block that ended with a fetch and execute instruction. parseFetchAndExecute :: forall arch ids . ParseContext arch ids -> Word64 -- ^ Index of this block - -> [Stmt arch ids] - -> AbsProcessorState (ArchReg arch) ids - -- ^ Registers prior to blocks being executed. -> RegState (ArchReg arch) (Value arch ids) + -- ^ Initial register values + -> Seq (Stmt arch ids) + -> AbsProcessorState (ArchReg arch) ids + -- ^ Abstract state of registers prior to blocks being executed. + -> RegState (ArchReg arch) (Value arch ids) + -- ^ Final register values -> State (ParseState arch ids) (StatementList arch ids, Word64) -parseFetchAndExecute ctx idx stmts regs s = do - let mem = pctxMemory ctx - let ainfo= pctxArchInfo ctx - let absProcState' = absEvalStmts ainfo regs stmts +parseFetchAndExecute ctx idx initRegs stmts absProcState finalRegs = do + let mem = pctxMemory ctx + let ainfo = pctxArchInfo ctx + let absProcState' = absEvalStmts ainfo absProcState stmts withArchConstraints ainfo $ do - -- See if next statement appears to end with a call. - -- We define calls as statements that end with a write that - -- stores the pc to an address. + -- Try to figure out what control flow statement we have. case () of - _ | Just (Mux _ c t f) <- valueAsApp (s^.boundValue ip_reg) -> do + -- The block ends with a Mux, so we turn this into a `ParsedIte` statement. + _ | Just (Mux _ c t f) <- valueAsApp (finalRegs^.boundValue ip_reg) -> do mapM_ (recordWriteStmt ainfo mem absProcState') stmts let l_regs = refineProcStateBounds c True $ refineProcState c absTrue absProcState' let l_regs' = absEvalStmts ainfo l_regs stmts - let lState = s & boundValue ip_reg .~ t + let lState = finalRegs & boundValue ip_reg .~ t (tStmts,trueIdx) <- - parseFetchAndExecute ctx (idx+1) [] l_regs' lState + parseFetchAndExecute ctx (idx+1) initRegs Seq.empty l_regs' lState let r_regs = refineProcStateBounds c False $ refineProcState c absFalse absProcState' let r_regs' = absEvalStmts ainfo r_regs stmts - let rState = s & boundValue ip_reg .~ f + let rState = finalRegs & boundValue ip_reg .~ f (fStmts,falseIdx) <- - parseFetchAndExecute ctx trueIdx [] r_regs' rState + parseFetchAndExecute ctx trueIdx initRegs Seq.empty r_regs' rState let ret = StatementList { stmtsIdent = idx - , stmtsNonterm = stmts + , stmtsNonterm = toList stmts , stmtsTerm = ParsedIte c tStmts fStmts , stmtsAbsState = absProcState' } pure (ret, falseIdx) - -- The last statement was a call. + -- Use architecture-specific callback to check if last statement was a call. -- Note that in some cases the call is known not to return, and thus -- this code will never jump to the return value. - _ | Just (prev_stmts, ret) <- identifyCall ainfo mem stmts s -> do + _ | Just (prev_stmts, ret) <- identifyCall ainfo mem stmts finalRegs -> do mapM_ (recordWriteStmt ainfo mem absProcState') prev_stmts - let abst = finalAbsBlockState absProcState' s - seq abst $ do - -- Merge caller return information - intraJumpTargets %= ((ret, postCallAbsState ainfo abst ret):) - -- Use the abstract domain to look for new code pointers for the current IP. - addNewFunctionAddrs $ - identifyCallTargets mem abst s - -- Use the call-specific code to look for new IPs. + let abst = finalAbsBlockState absProcState' finalRegs + -- Merge caller return information + seq abst $ intraJumpTargets %= ((ret, postCallAbsState ainfo abst ret):) + -- Use the abstract domain to look for new code pointers for the current IP. + addNewFunctionAddrs $ + identifyCallTargets mem abst finalRegs + -- Use the call-specific code to look for new IPs. - let r = StatementList { stmtsIdent = idx - , stmtsNonterm = toList stmts - , stmtsTerm = ParsedCall s (Just ret) - , stmtsAbsState = absProcState' - } - pure (r, idx+1) + let r = StatementList { stmtsIdent = idx + , stmtsNonterm = toList stmts + , stmtsTerm = ParsedCall finalRegs (Just ret) + , stmtsAbsState = absProcState' + } + pure (r, idx+1) -- This block ends with a return as identified by the -- architecture-specific processing. Basic return @@ -901,19 +978,19 @@ parseFetchAndExecute ctx idx stmts regs s = do -- (e.g. ARM will clear the low bit in T32 mode or the low 2 -- bits in A32 mode), so the actual detection process is -- deferred to architecture-specific functionality. - | Just prev_stmts <- identifyReturn ainfo stmts s absProcState' -> do + | Just prev_stmts <- identifyReturn ainfo stmts finalRegs absProcState' -> do mapM_ (recordWriteStmt ainfo mem absProcState') prev_stmts let ret = StatementList { stmtsIdent = idx , stmtsNonterm = toList prev_stmts - , stmtsTerm = ParsedReturn s + , stmtsTerm = ParsedReturn finalRegs , stmtsAbsState = absProcState' } pure (ret, idx+1) -- Jump to a block within this function. - | Just tgt_mseg <- valueAsSegmentOff mem (s^.boundValue ip_reg) - -- Check + | Just tgt_mseg <- valueAsSegmentOff mem (finalRegs^.boundValue ip_reg) + -- Check target block address is in executable segment. , segmentFlags (segoffSegment tgt_mseg) `Perm.hasPerm` Perm.execute -- Check the target address is not the entry point of this function. @@ -926,22 +1003,23 @@ parseFetchAndExecute ctx idx stmts regs s = do mapM_ (recordWriteStmt ainfo mem absProcState') stmts -- Merge block state and add intra jump target. - let abst = finalAbsBlockState absProcState' s + let abst = finalAbsBlockState absProcState' finalRegs let abst' = abst & setAbsIP tgt_mseg intraJumpTargets %= ((tgt_mseg, abst'):) let ret = StatementList { stmtsIdent = idx - , stmtsNonterm = stmts - , stmtsTerm = ParsedJump s tgt_mseg + , stmtsNonterm = toList stmts + , stmtsTerm = ParsedJump finalRegs tgt_mseg , stmtsAbsState = absProcState' } pure (ret, idx+1) - -- Block ends with what looks like a jump table. - | Just (_jt, entries, jumpIndex) <- matchJumpTableRef mem absProcState' (s^.curIP) -> do + + -- Block ends with what looks like a jump table. + | Just (_jt, entries, jumpIndex) <- matchJumpTableRef mem absProcState' (finalRegs^.curIP) -> do mapM_ (recordWriteStmt ainfo mem absProcState') stmts let abst :: AbsBlockState (ArchReg arch) - abst = finalAbsBlockState absProcState' s + abst = finalAbsBlockState absProcState' finalRegs seq abst $ do @@ -949,35 +1027,51 @@ parseFetchAndExecute ctx idx stmts regs s = do let abst' = abst & setAbsIP tgtAddr intraJumpTargets %= ((tgtAddr, abst'):) - let term = ParsedLookupTable s jumpIndex entries + let term = ParsedLookupTable finalRegs jumpIndex entries let ret = StatementList { stmtsIdent = idx - , stmtsNonterm = stmts + , stmtsNonterm = toList stmts , stmtsTerm = term , stmtsAbsState = absProcState' } pure (ret,idx+1) - -- Check for tail call when the stack pointer points to the return address. - -- - -- TODO: this makes sense for x86, but is not correct for all architectures - | ptrType <- addrMemRepr ainfo - , sp_val <- s^.boundValue sp_reg - , ReturnAddr <- absEvalReadMem absProcState' sp_val ptrType -> do - finishWithTailCall absProcState' + -- Code for PLT entry + _ | 0 <- idx + , AssignedValue (Assignment valId v) <- finalRegs^.boundValue ip_reg + , ReadMem gotVal _repr <- v + , Just gotAddr <- valueAsMemAddr gotVal + , Just gotSegOff <- asSegmentOff mem gotAddr + , Right chunks <- segoffContentsAfter gotSegOff + , RelocationRegion r:_ <- chunks + -- Check the relocation is a jump slot. + , relocationJumpSlot r + , Just strippedStmts <- stripPLTRead valId stmts Seq.empty + , strippedRegs <- removeUnassignedRegs initRegs finalRegs + , not (containsAssignId valId strippedRegs) -> do - -- Is this a jump to a known function entry? We're already past the - -- "identifyCall" case, so this must be a tail call, assuming we trust our - -- known function entry info. - | Just tgt_mseg <- valueAsSegmentOff mem (s^.boundValue ip_reg) - , tgt_mseg `Set.member` pctxKnownFnEntries ctx -> do + mapM_ (recordWriteStmt ainfo mem absProcState') strippedStmts + let ret = StatementList { stmtsIdent = idx + , stmtsNonterm = toList strippedStmts + , stmtsTerm = PLTStub strippedRegs gotSegOff r + , stmtsAbsState = absEvalStmts ainfo absProcState strippedStmts + } + pure (ret, idx+1) + + -- Check for tail call when the calling convention seems to be satisfied. + | spVal <- finalRegs^.boundValue sp_reg + -- Check to see if the stack pointer points to an offset of the initial stack. + , StackOffset _ offsets <- transferValue absProcState' spVal + -- Stack stack is back to height when function was called. + , offsets == Set.singleton 0 + , checkForReturnAddr ainfo finalRegs absProcState' -> do finishWithTailCall absProcState' -- Block that ends with some unknown | otherwise -> do mapM_ (recordWriteStmt ainfo mem absProcState') stmts let ret = StatementList { stmtsIdent = idx - , stmtsNonterm = stmts - , stmtsTerm = ClassifyFailure s + , stmtsNonterm = toList stmts + , stmtsTerm = ClassifyFailure finalRegs , stmtsAbsState = absProcState' } pure (ret,idx+1) @@ -990,16 +1084,15 @@ parseFetchAndExecute ctx idx stmts regs s = do mapM_ (recordWriteStmt (pctxArchInfo ctx) mem absProcState') stmts -- Compute final state - let abst = finalAbsBlockState absProcState' s + let abst = finalAbsBlockState absProcState' finalRegs seq abst $ do -- Look for new instruction pointers addNewFunctionAddrs $ identifyConcreteAddresses mem (abst^.absRegState^.curIP) - let ret = StatementList { stmtsIdent = idx - , stmtsNonterm = stmts - , stmtsTerm = ParsedCall s Nothing + , stmtsNonterm = toList stmts + , stmtsTerm = ParsedCall finalRegs Nothing , stmtsAbsState = absProcState' } seq ret $ pure (ret,idx+1) @@ -1009,13 +1102,15 @@ parseFetchAndExecute ctx idx stmts regs s = do parseBlock :: ParseContext arch ids -- ^ Context for parsing blocks. -> Word64 - -- ^ Index for next statements + -- ^ Index for next statements + -> RegState (ArchReg arch) (Value arch ids) + -- ^ Initial register values -> Block arch ids -- ^ Block to parse -> AbsProcessorState (ArchReg arch) ids -- ^ Abstract state at start of block -> State (ParseState arch ids) (StatementList arch ids, Word64) -parseBlock ctx idx b regs = do +parseBlock ctx idx initRegs b absProcState = do let mem = pctxMemory ctx let ainfo = pctxArchInfo ctx withArchConstraints ainfo $ do @@ -1023,7 +1118,7 @@ parseBlock ctx idx b regs = do Branch c lb rb -> do let blockMap = pctxBlockMap ctx -- FIXME: we should propagate c back to the initial block, not just b - let absProcState' = absEvalStmts ainfo regs (blockStmts b) + let absProcState' = absEvalStmts ainfo absProcState (blockStmts b) mapM_ (recordWriteStmt ainfo mem absProcState') (blockStmts b) let Just l = Map.lookup lb blockMap @@ -1034,8 +1129,8 @@ parseBlock ctx idx b regs = do let l_regs' = absEvalStmts ainfo l_regs (blockStmts b) let r_regs' = absEvalStmts ainfo r_regs (blockStmts b) - (parsedTrueBlock,trueIdx) <- parseBlock ctx (idx+1) l l_regs' - (parsedFalseBlock,falseIdx) <- parseBlock ctx trueIdx r r_regs' + (parsedTrueBlock,trueIdx) <- parseBlock ctx (idx+1) initRegs l l_regs' + (parsedFalseBlock,falseIdx) <- parseBlock ctx trueIdx initRegs r r_regs' let ret = StatementList { stmtsIdent = idx , stmtsNonterm = blockStmts b @@ -1044,13 +1139,13 @@ parseBlock ctx idx b regs = do } pure (ret, falseIdx) - FetchAndExecute s -> do - parseFetchAndExecute ctx idx (blockStmts b) regs s + FetchAndExecute finalRegs -> do + parseFetchAndExecute ctx idx initRegs (Seq.fromList (blockStmts b)) absProcState finalRegs -- Do nothing when this block ends in a translation error. TranslateError _ msg -> do -- FIXME: we should propagate c back to the initial block, not just b - let absProcState' = absEvalStmts ainfo regs (blockStmts b) + let absProcState' = absEvalStmts ainfo absProcState (blockStmts b) let ret = StatementList { stmtsIdent = idx , stmtsNonterm = blockStmts b @@ -1060,7 +1155,7 @@ parseBlock ctx idx b regs = do pure (ret, idx+1) ArchTermStmt ts s -> do -- FIXME: we should propagate c back to the initial block, not just b - let absProcState' = absEvalStmts ainfo regs (blockStmts b) + let absProcState' = absEvalStmts ainfo absProcState (blockStmts b) mapM_ (recordWriteStmt ainfo mem absProcState') (blockStmts b) let abst = finalAbsBlockState absProcState' s -- Compute possible next IPS. @@ -1079,15 +1174,16 @@ parseBlock ctx idx b regs = do -- | This evalutes the statements in a block to expand the information known -- about control flow targets of this block. addBlocks :: ArchSegmentOff arch - -- ^ Address of theze blocks - -> FoundAddr arch - -- ^ State leading to explore block - -> Int - -- ^ Number of blocks covered - -> Map Word64 (Block arch ids) - -- ^ Map from labelIndex to associated block - -> FunM arch s ids () -addBlocks src finfo sz blockMap = + -- ^ Address of theze blocks + -> FoundAddr arch + -- ^ State leading to explore block + -> RegState (ArchReg arch) (Value arch ids) + -> Int + -- ^ Number of blocks covered + -> Map Word64 (Block arch ids) + -- ^ Map from labelIndex to associated block + -> FunM arch s ids () +addBlocks src finfo initRegs sz blockMap = case Map.lookup 0 blockMap of Nothing -> do error $ "addBlocks given empty blockRegion." @@ -1097,19 +1193,9 @@ addBlocks src finfo sz blockMap = funAddr <- gets curFunAddr s <- use curFunCtx - -- Combine entries of functions we've discovered thus far with - -- undiscovered functions with entries marked InitAddr, which we assume is - -- info we know from the symbol table or some other reliable source, and - -- pass in. Only used in analysis if pctxTrustKnownFns is True. - let knownFns = - if s^.trustKnownFns then - Set.union (Map.keysSet $ s^.funInfo) - (Map.keysSet $ Map.filter (== InitAddr) $ s^.unexploredFunctions) - else - Set.empty let ctx = ParseContext { pctxMemory = memory s , pctxArchInfo = archInfo s - , pctxKnownFnEntries = knownFns + , pctxKnownFnEntries = s^.trustedFunctionEntryPoints , pctxFunAddr = funAddr , pctxAddr = src , pctxBlockMap = blockMap @@ -1118,7 +1204,7 @@ addBlocks src finfo sz blockMap = , _intraJumpTargets = [] , _newFunctionAddrs = [] } - let ((pblock,_), ps) = runState (parseBlock ctx 0 b regs) ps0 + let ((pblock,_), ps) = runState (parseBlock ctx 0 initRegs b regs) ps0 let pb = ParsedBlock { pblockAddr = src , blockSize = sz , blockReason = foundReason finfo @@ -1131,6 +1217,26 @@ addBlocks src finfo sz blockMap = . markAddrsAsFunction (CallTarget src) (ps^.newFunctionAddrs) mapM_ (\(addr, abs_state) -> mergeIntraJump src abs_state addr) (ps^.intraJumpTargets) +-- | Record an error block with no statements for the given address. +recordErrorBlock :: ArchSegmentOff arch -> FoundAddr arch -> Maybe String -> FunM arch s ids () +recordErrorBlock addr finfo maybeError = do + s <- use curFunCtx + let mem = memory s + let errMsg = maybe "Unknown error" Text.pack maybeError + let stmts = StatementList + { stmtsIdent = 0 + , stmtsNonterm = [] + , stmtsTerm = ParsedTranslateError errMsg + , stmtsAbsState = initAbsProcessorState mem (foundAbstractState finfo) + } + let pb = ParsedBlock { pblockAddr = addr + , blockSize = 0 + , blockReason = foundReason finfo + , blockAbstractState = foundAbstractState finfo + , blockStatementList = stmts + } + id %= addFunBlock addr pb + transfer :: ArchSegmentOff arch -> FunM arch s ids () transfer addr = do s <- use curFunCtx @@ -1149,43 +1255,31 @@ transfer addr = do Just (next,_) | Just o <- diffSegmentOff next addr -> fromInteger o _ -> fromInteger (segoffBytesLeft addr) let ab = foundAbstractState finfo - (bs0, sz, maybeError) <- liftST $ disassembleFn ainfo nonceGen addr maxSize ab - + case mkInitialRegsForBlock ainfo addr ab of + Left msg -> do + recordErrorBlock addr finfo (Just msg) + Right initRegs -> do + (bs0, sz, maybeError) <- liftST $ disassembleFn ainfo nonceGen addr initRegs maxSize + -- If no blocks are returned, then we just add an empty parsed block. + if null bs0 then do + recordErrorBlock addr finfo maybeError + else do + -- Rewrite returned blocks to simplify expressions #ifdef USE_REWRITER - bs1 <- do - let archStmt = rewriteArchStmt ainfo - let secAddrMap = memSectionIndexMap mem - liftST $ do - ctx <- mkRewriteContext nonceGen (rewriteArchFn ainfo) archStmt secAddrMap - traverse (rewriteBlock ainfo ctx) bs0 + bs1 <- do + let archStmt = rewriteArchStmt ainfo + let secAddrMap = memSectionIndexMap mem + liftST $ do + ctx <- mkRewriteContext nonceGen (rewriteArchFn ainfo) archStmt secAddrMap + traverse (rewriteBlock ainfo ctx) bs0 #else - bs1 <- pure bs0 + bs1 <- pure bs0 #endif - - -- If no blocks are returned, then we just add an empty parsed block. - if null bs1 then do - let errMsg = Text.pack $ fromMaybe "Unknown error" maybeError - let stmts = StatementList - { stmtsIdent = 0 - , stmtsNonterm = [] - , stmtsTerm = ParsedTranslateError errMsg - , stmtsAbsState = initAbsProcessorState mem (foundAbstractState finfo) - } - let pb = ParsedBlock { pblockAddr = addr - , blockSize = sz - , blockReason = foundReason finfo - , blockAbstractState = foundAbstractState finfo - , blockStatementList = stmts - } - id %= addFunBlock addr pb - else do - -- Rewrite returned blocks to simplify expressions - - -- Compute demand set - let bs = bs1 -- eliminateDeadStmts ainfo bs1 - -- Call transfer blocks to calculate parsedblocks - let blockMap = Map.fromList [ (blockLabel b, b) | b <- bs ] - addBlocks addr finfo sz blockMap + -- Compute demand set + let bs = bs1 -- eliminateDeadStmts ainfo bs1 + -- Call transfer blocks to calculate parsedblocks + let blockMap = Map.fromList [ (blockLabel b, b) | b <- bs ] + addBlocks addr finfo initRegs sz blockMap ------------------------------------------------------------------------ -- Main loop @@ -1347,8 +1441,8 @@ cfgFromAddrs :: -- -- Each entry contains an address and the value stored in it. -> DiscoveryState arch -cfgFromAddrs ainfo mem symbols = - cfgFromAddrsAndState (emptyDiscoveryState mem symbols ainfo) +cfgFromAddrs ainfo mem addrSymMap = + cfgFromAddrsAndState (emptyDiscoveryState mem addrSymMap ainfo) ------------------------------------------------------------------------ -- Resolve functions with logging diff --git a/base/src/Data/Macaw/Discovery/AbsEval.hs b/base/src/Data/Macaw/Discovery/AbsEval.hs index a0f63a7f..e1a29c0d 100644 --- a/base/src/Data/Macaw/Discovery/AbsEval.hs +++ b/base/src/Data/Macaw/Discovery/AbsEval.hs @@ -88,8 +88,9 @@ absEvalStmt info stmt = withArchConstraints info $ pure () -- This takes a processor state and updates it based on executing each statement. -absEvalStmts :: ArchitectureInfo arch +absEvalStmts :: Foldable t + => ArchitectureInfo arch -> AbsProcessorState (ArchReg arch) ids - -> [Stmt arch ids] + -> t (Stmt arch ids) -> AbsProcessorState (ArchReg arch) ids absEvalStmts info r stmts = execState (mapM_ (absEvalStmt info) stmts) r diff --git a/base/src/Data/Macaw/Discovery/State.hs b/base/src/Data/Macaw/Discovery/State.hs index 887bcf56..8f7ed910 100644 --- a/base/src/Data/Macaw/Discovery/State.hs +++ b/base/src/Data/Macaw/Discovery/State.hs @@ -10,6 +10,7 @@ discovery. {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -33,7 +34,7 @@ module Data.Macaw.Discovery.State , globalDataMap , funInfo , unexploredFunctions - , trustKnownFns + , trustedFunctionEntryPoints , exploreFnPred -- * DiscoveryFunInfo , DiscoveryFunInfo(..) @@ -50,7 +51,10 @@ import qualified Data.ByteString.Char8 as BSC import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.Parameterized.Classes +import qualified Data.Parameterized.Map as MapF import Data.Parameterized.Some +import Data.Set (Set) +import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as Text import qualified Data.Vector as V @@ -131,6 +135,18 @@ data ParsedTermStmt arch ids -- | A call with the current register values and location to return to or 'Nothing' if this is a tail call. = ParsedCall !(RegState (ArchReg arch) (Value arch ids)) !(Maybe (ArchSegmentOff arch)) + -- | @PLTStub regs addr@ denotes a terminal statement that has been identified as a PLT stub + -- for calling the given relocation. + -- + -- This is a special case of a tail call. It has been added + -- separately because it occurs frequently in dynamically linked + -- code, and we can use this to recognize PLT stubs. + -- + -- The register set only contains registers that were changed in + -- the function. Other registers have the initial value. + | PLTStub !(MapF.MapF (ArchReg arch) (Value arch ids)) + !(ArchSegmentOff arch) + !(Relocation (ArchAddrWidth arch)) -- | A jump to an explicit address within a function. | ParsedJump !(RegState (ArchReg arch) (Value arch ids)) !(ArchSegmentOff arch) -- | A lookup table that branches to one of a vector of addresses. @@ -172,11 +188,14 @@ ppTermStmt :: ArchConstraints arch ppTermStmt ppOff tstmt = case tstmt of ParsedCall s Nothing -> - text "tail call" <$$> + text "tail_call" <$$> indent 2 (pretty s) ParsedCall s (Just next) -> text "call and return to" <+> text (show next) <$$> indent 2 (pretty s) + PLTStub regs addr r -> + text "call_via_got" <+> text (show (relocationSym r)) <+> "(at" <+> text (show addr) PP.<> ")" <$$> + indent 2 (ppRegMap regs) ParsedJump s addr -> text "jump" <+> text (show addr) <$$> indent 2 (pretty s) @@ -279,7 +298,7 @@ parsedBlocks = lens _parsedBlocks (\s v -> s { _parsedBlocks = v }) instance ArchConstraints arch => Pretty (DiscoveryFunInfo arch ids) where pretty info = text "function" <+> text (BSC.unpack (discoveredFunName info)) - <+> pretty "@" <+> pretty (show (discoveredFunAddr info)) + <+> "@" <+> pretty (show (discoveredFunAddr info)) <$$> vcat (pretty <$> Map.elems (info^.parsedBlocks)) @@ -310,9 +329,20 @@ data DiscoveryState arch -- they are analyzed. -- -- The keys in this map and `_funInfo` should be mutually disjoint. - , _trustKnownFns :: !Bool - -- ^ Should we use and depend on known function entries in - -- our analysis? E.g. used to distinguish jumps vs. tail calls + , _trustedFunctionEntryPoints :: !(Set (ArchSegmentOff arch)) + -- ^ This is the set of addresses that we treat + -- as definitely belonging to function entry + -- points. + -- + -- The discovery process will not allow + -- intra-procedural jumps to these addresses. + -- Jumps to these addresses must either be calls + -- or tail calls. + -- + -- To ensure translation is invariant on the + -- order in which functions are visited, this + -- set should be initialized upfront, and not + -- changed. , _exploreFnPred :: Maybe (ArchSegmentOff arch -> Bool) -- ^ if present, this predicate decides whether to explore -- a function at the given address or not @@ -342,15 +372,15 @@ emptyDiscoveryState :: Memory (ArchAddrWidth arch) -> ArchitectureInfo arch -- ^ architecture/OS specific information -> DiscoveryState arch -emptyDiscoveryState mem symbols info = +emptyDiscoveryState mem addrSymMap info = DiscoveryState { memory = mem - , symbolNames = symbols + , symbolNames = addrSymMap , archInfo = info , _globalDataMap = Map.empty , _funInfo = Map.empty , _unexploredFunctions = Map.empty - , _trustKnownFns = False + , _trustedFunctionEntryPoints = Map.keysSet addrSymMap , _exploreFnPred = Nothing } @@ -368,8 +398,10 @@ unexploredFunctions = lens _unexploredFunctions (\s v -> s { _unexploredFunction funInfo :: Simple Lens (DiscoveryState arch) (Map (ArchSegmentOff arch) (Some (DiscoveryFunInfo arch))) funInfo = lens _funInfo (\s v -> s { _funInfo = v }) -trustKnownFns :: Simple Lens (DiscoveryState arch) Bool -trustKnownFns = lens _trustKnownFns (\s v -> s { _trustKnownFns = v }) +trustedFunctionEntryPoints :: Simple Lens (DiscoveryState arch) (Set (ArchSegmentOff arch)) +trustedFunctionEntryPoints = + lens _trustedFunctionEntryPoints + (\s v -> s { _trustedFunctionEntryPoints = v }) exploreFnPred :: Simple Lens (DiscoveryState arch) (Maybe (ArchSegmentOff arch -> Bool)) exploreFnPred = lens _exploreFnPred (\s v -> s { _exploreFnPred = v }) diff --git a/base/src/Data/Macaw/Fold.hs b/base/src/Data/Macaw/Fold.hs index efaa9eed..f42d6f4b 100644 --- a/base/src/Data/Macaw/Fold.hs +++ b/base/src/Data/Macaw/Fold.hs @@ -53,8 +53,14 @@ emptyValueFold = -- | This folds over elements of a values in a values. -- --- It memoizes values so that it only evaluates assignments with the same id --- once. +-- It memoizes the results so that if an assignment is visited +-- multiple times, we only visit the children the first time it is +-- visited. On subsequent visits, `foldAssign` will still be called, +-- but the children will not be revisited. +-- +-- This makes the total time to visit linear with respect to the +-- number of children, but still allows determining whether a term is +-- shared. foldValueCached :: forall r arch ids tp . (Monoid r, FoldableFC (ArchFn arch)) => ValueFold arch ids r diff --git a/base/src/Data/Macaw/Memory.hs b/base/src/Data/Macaw/Memory.hs index 89de8240..20f68e69 100644 --- a/base/src/Data/Macaw/Memory.hs +++ b/base/src/Data/Macaw/Memory.hs @@ -19,15 +19,18 @@ module Data.Macaw.Memory , memAddrWidth , memWidth , memSegments - , memSectionIndexMap - , memSegmentIndexMap , memAsAddrPairs -- * Constructing memory , emptyMemory , insertMemSegment , InsertError(..) , showInsertError + -- * Load values + , memBaseAddr + , memSetBaseAddr , memBindSectionIndex + , memSectionIndexMap + , memSegmentIndexMap , memBindSegmentIndex -- * Memory segments , MemSegment @@ -52,6 +55,7 @@ module Data.Macaw.Memory , memWord , memWordToUnsigned , memWordToSigned + , addrRead -- * Addresses , MemAddr(..) , absoluteAddr @@ -272,7 +276,7 @@ bsWord64 LittleEndian = bsWord64le -- -- Operations on it require the `MemWidth` constraint to be satisfied, so in practice -- this only works for 32 and 64-bit values. -newtype MemWord (w :: Nat) = MemWord { _memWordValue :: Word64 } +newtype MemWord (w :: Nat) = MemWord { memWordValue :: Word64 } -- | Convert word64 @x@ into mem word @x mod 2^w-1@. memWord :: forall w . MemWidth w => Word64 -> MemWord w @@ -314,14 +318,21 @@ class (1 <= w) => MemWidth w where -- | Rotates the value by the given index. addrRotate :: MemWord w -> Int -> MemWord w - -- | Read an address with the given endianess. - -- - -- This returns nothing if the bytestring is too short. - addrRead :: Endianness -> BS.ByteString -> Maybe (MemWord w) +-- | Read an address with the given endianess. +-- +-- This returns nothing if the bytestring is too short. +addrRead :: forall w . MemWidth w => Endianness -> BS.ByteString -> Maybe (MemWord w) +addrRead e s = + case addrWidthRepr (Proxy :: Proxy w) of + Addr32 | BS.length s < 4 -> Nothing + | otherwise -> Just $ MemWord $ fromIntegral $ bsWord32 e s + Addr64 | BS.length s < 8 -> Nothing + | otherwise -> Just $ MemWord $ bsWord64 e s + -- | Return the value represented by the MemWord as an unsigned integer. memWordToUnsigned :: MemWord w -> Integer -memWordToUnsigned = fromIntegral . _memWordValue +memWordToUnsigned = fromIntegral . memWordValue -- | Treat the word as a signed integer. memWordToSigned :: MemWidth w => MemWord w -> Integer @@ -388,18 +399,12 @@ instance MemWidth 32 where addrRotate (MemWord w) i = MemWord (fromIntegral ((fromIntegral w :: Word32) `rotate` i)) addrSize _ = 4 - addrRead e s - | BS.length s < 4 = Nothing - | otherwise = Just $ MemWord $ fromIntegral $ bsWord32 e s instance MemWidth 64 where addrWidthRepr _ = Addr64 addrWidthMask _ = 0xffffffffffffffff addrRotate (MemWord w) i = MemWord (w `rotate` i) addrSize _ = 8 - addrRead e s - | BS.length s < 8 = Nothing - | otherwise = Just $ MemWord $ bsWord64 e s -- | Number of bytes in an address addrWidthClass :: AddrWidthRepr w -> (MemWidth w => a) -> a @@ -471,6 +476,15 @@ data Relocation w , relocationEndianness :: !Endianness -- ^ The byte order used to encode the relocation in -- memory. + , relocationJumpSlot :: !Bool + -- ^ Returns true if this is a jump slot relocation. + -- + -- This relocation is specifically used for global + -- offset table entries, and are typically resolved + -- when the function is first called rather than at + -- load time. The address will be initially the + -- entry sequence stub, and will be updated once + -- resolved by the stub. } -- | Short encoding of endianness for relocation pretty printing @@ -817,6 +831,8 @@ data Memory w = Memory { memAddrWidth :: !(AddrWidthRepr w) -- ^ Map from registered section indices to the segment offset it is loaded at. , memSegmentIndexMap :: !(Map SegmentIndex (MemSegment w)) -- ^ Map from registered segment indices to associated segment. + , memBaseAddr :: !(Maybe (MemAddr w)) + -- ^ This denotes the base region for loads. } -- | Return the set of memory segments in memory. @@ -846,12 +862,17 @@ memBindSegmentIndex idx seg mem | otherwise = mem { memSegmentIndexMap = Map.insert idx seg (memSegmentIndexMap mem) } +-- | Set the region index used or the load addresses. +memSetBaseAddr :: MemAddr w -> Memory w -> Memory w +memSetBaseAddr r m = m { memBaseAddr = Just r } + -- | A memory with no segments. emptyMemory :: AddrWidthRepr w -> Memory w -emptyMemory w = Memory { memAddrWidth = w - , memSegmentMap = Map.empty +emptyMemory w = Memory { memAddrWidth = w + , memSegmentMap = Map.empty , memSectionIndexMap = Map.empty , memSegmentIndexMap = Map.empty + , memBaseAddr = Nothing } -- | Return segments with executable permissions. @@ -945,7 +966,7 @@ addrLeastBit (MemAddr _ (MemWord off)) = off `testBit` 0 -- | Increment an address by a fixed amount. incAddr :: MemWidth w => Integer -> MemAddr w -> MemAddr w -incAddr o (MemAddr i off) = MemAddr i (off + fromInteger o) +incAddr o a = a { addrOffset = addrOffset a + fromInteger o } -- | Returns the number of bytes between two addresses if they point to -- the same region and `Nothing` if they are different segments. @@ -979,10 +1000,9 @@ resolveAddr :: Memory w -> RegionIndex -> MemWord w -> Maybe (MemSegmentOff w) resolveAddr = resolveRegionOff {-# DEPRECATED resolveAddr "Use resolveRegionOff" #-} - -- | Return the address of a segment offset. segoffAddr :: MemWidth w => MemSegmentOff w -> MemAddr w -segoffAddr (MemSegmentOff seg off) = relativeAddr seg off +segoffAddr (MemSegmentOff seg off) = segmentOffAddr seg off -- | Return the segment associated with the given address if well-defined. resolveAbsoluteAddr :: Memory w -> MemWord w -> Maybe (MemSegmentOff w) @@ -1336,40 +1356,46 @@ addrContentsAfter mem addr = do addrWidthClass (memAddrWidth mem) $ segoffContentsAfter =<< resolveMemAddr mem addr --- | Read a bytestring from a sequence of statements. --- --- This is a helper method for @readByteString@ below. -readByteString' :: MemWidth w - => MemSegmentOff w - -- ^ Initial starting address - -> [BS.ByteString] - -- ^ Bytestring read so far (in reverse order) - -> [MemChunk w] - -- ^ Remaining segments to read from. +-- | Attempt to read a bytestring of the given length +readByteString' :: RegionIndex + -- ^ Region we are in. + -> BS.ByteString + -- ^ Bytestring read so far. -> Word64 - -- ^ Number of bytes remaining to read. - -> Either (MemoryError w) [BS.ByteString] -readByteString' _ prev _ 0 = + -- ^ Bytes read so far. + -> [MemChunk w] + -- ^ Remaining memory chunks to read from. + -> Word64 + -- ^ Total remaining number of bytes to read. + -> Either (MemoryError w) BS.ByteString +readByteString' _ prev _ _ 0 = pure $! prev -readByteString' _ _ [] _ = error "internal: readByteString' given too many bytes." -readByteString' initAddr prev (ByteRegion bs:rest) cnt = - if toInteger cnt <= toInteger (BS.length bs) then - pure $! BS.take (fromIntegral cnt) bs : prev +readByteString' _ _ _ [] _ = error "internal: readByteString' given too many bytes." +readByteString' reg prev off (ByteRegion bs:rest) cnt = do + let sz = fromIntegral (BS.length bs) + if cnt <= sz then + pure $! prev <> BS.take (fromIntegral cnt) bs else do - let cnt' = cnt - fromIntegral (BS.length bs) - readByteString' initAddr (bs:prev) rest cnt' -readByteString' initAddr prev (RelocationRegion r:_) _ = do - let cnt = sum (toInteger . BS.length <$> prev) - let addr = incAddr cnt (relativeSegmentAddr initAddr) + let off' = off + sz + let cnt' = cnt - sz + seq cnt' $ seq off' $ readByteString' reg (prev <> bs) off' rest cnt' +readByteString' reg _ off (RelocationRegion r:_) _ = do + let addr = MemAddr { addrBase = reg, addrOffset = MemWord off } Left $! UnexpectedRelocation addr r -readByteString' initAddr prev (BSSRegion sz:rest) cnt = - if toInteger cnt <= toInteger sz then - pure $! BS.replicate (fromIntegral cnt) 0 : prev +readByteString' reg prev off (BSSRegion sz0:rest) cnt = do + let sz :: Word64 + sz = memWordValue sz0 + if cnt <= sz then do + when (cnt > fromIntegral (maxBound :: Int)) $ do + error $ "Illegal size " ++ show cnt + pure $! prev <> BS.replicate (fromIntegral cnt) 0 else do - let cnt' = cnt - fromIntegral sz - let next = BS.replicate (fromIntegral sz) 0 : prev - seq cnt' $ seq next $ - readByteString' initAddr next rest cnt' + when (sz > fromIntegral (maxBound :: Int)) $ do + error $ "Illegal size " ++ show cnt + let bs = BS.replicate (fromIntegral sz) 0 + let off' = off + sz + let cnt' = cnt - sz + seq cnt' $ seq off' $ readByteString' reg (prev <> bs) off' rest cnt' -- | Attempt to read a bytestring of the given length readByteString :: Memory w @@ -1383,7 +1409,7 @@ readByteString mem addr cnt = addrWidthClass (memAddrWidth mem) $ do Left $! InvalidRead segOff cnt -- Get contents after segment l <- segoffContentsAfter segOff - mconcat . reverse <$> readByteString' segOff [] l cnt + readByteString' (addrBase addr) BS.empty (memWordValue (addrOffset addr)) l cnt -- | Read an address from the value in the segment or report a memory -- error. @@ -1454,7 +1480,7 @@ relativeSegmentContents memSegs = concatMap relativeOffset memSegs -- Each MemSegment has a segmentOffset indicating the offset from segmentBase its located. -- This makes the offsets within the MemChunk relative to that segmentOffset. relativeOffset :: (MemWidth w) => MemSegment w -> [(MemAddr w, MemChunk w)] - relativeOffset seg = map (\(contentOffset,r) -> (relativeAddr seg contentOffset, r)) $ (contentsRanges . segmentContents) seg + relativeOffset seg = map (\(contentOffset,r) -> (segmentOffAddr seg contentOffset, r)) $ (contentsRanges . segmentContents) seg -- | Naive string matching algorithm identifies matches to given -- pattern within the list of memory segments and their corresponding diff --git a/base/src/Data/Macaw/Memory/ElfLoader.hs b/base/src/Data/Macaw/Memory/ElfLoader.hs index 18fc6546..7861188d 100644 --- a/base/src/Data/Macaw/Memory/ElfLoader.hs +++ b/base/src/Data/Macaw/Memory/ElfLoader.hs @@ -182,7 +182,11 @@ data MemLoadWarning | ExpectedSectionSymbolLocal | InvalidSectionSymbolIndex !Elf.ElfSectionIndex | UnsupportedProcessorSpecificSymbolIndex !SymbolName !ElfSectionIndex - | IgnoreRelocation !RelocationError + | IgnoreRelocation !Integer !String !RelocationError + -- ^ @IgnoreRelocation idx tp err@ warns we ignored the location at index @idx@ due to @err@. + -- + -- @tp@ is a string representing the type which we print, because usually errors come because + -- we don't support that type or only partially implement it. ppSymbol :: SymbolName -> String ppSymbol "" = "unnamed symbol" @@ -227,8 +231,8 @@ instance Show MemLoadWarning where "Expected section symbol to have a valid index instead of " ++ show idx ++ "." show (UnsupportedProcessorSpecificSymbolIndex nm idx) = "Could not resolve symbol index " ++ show idx ++ " for symbol " ++ BSC.unpack nm ++ "." - show (IgnoreRelocation err) = - "Ignoring relocation: " ++ show err + show (IgnoreRelocation idx typeName err) = + "Ignoring relocation " ++ show idx ++ " with type " ++ typeName ++ ": " ++ show err data MemLoaderState w = MLS { _mlsMemory :: !(Memory w) , mlsEndianness :: !Endianness @@ -386,6 +390,7 @@ relaTargetX86_64 _ symtab rel off _isRel = , relocationSize = 8 , relocationIsSigned = False , relocationEndianness = LittleEndian + , relocationJumpSlot = True } Elf.R_X86_64_PC32 -> do sym <- resolveRelocationSym symtab (Elf.relSym rel) @@ -393,8 +398,9 @@ relaTargetX86_64 _ symtab rel off _isRel = , relocationOffset = off , relocationIsRel = True , relocationSize = 4 - , relocationIsSigned = False + , relocationIsSigned = False , relocationEndianness = LittleEndian + , relocationJumpSlot = False } Elf.R_X86_64_32 -> do sym <- resolveRelocationSym symtab (Elf.relSym rel) @@ -404,6 +410,7 @@ relaTargetX86_64 _ symtab rel off _isRel = , relocationSize = 4 , relocationIsSigned = False , relocationEndianness = LittleEndian + , relocationJumpSlot = False } Elf.R_X86_64_32S -> do sym <- resolveRelocationSym symtab (Elf.relSym rel) @@ -413,6 +420,7 @@ relaTargetX86_64 _ symtab rel off _isRel = , relocationSize = 4 , relocationIsSigned = True , relocationEndianness = LittleEndian + , relocationJumpSlot = False } Elf.R_X86_64_64 -> do sym <- resolveRelocationSym symtab (Elf.relSym rel) @@ -422,6 +430,7 @@ relaTargetX86_64 _ symtab rel off _isRel = , relocationSize = 8 , relocationIsSigned = False , relocationEndianness = LittleEndian + , relocationJumpSlot = False } -- R_X86_64_GLOB_DAT are used to update GOT entries with their -- target address. They are similar to R_x86_64_64 except appear @@ -435,6 +444,19 @@ relaTargetX86_64 _ symtab rel off _isRel = , relocationSize = 8 , relocationIsSigned = False , relocationEndianness = LittleEndian + , relocationJumpSlot = False + } + + Elf.R_X86_64_RELATIVE -> do + when (Elf.relSym rel /= 0) $ do + Left $ RelocationBadSymbolIndex (fromIntegral (Elf.relSym rel)) + pure $ Relocation { relocationSym = LoadBaseAddr + , relocationOffset = off + , relocationIsRel = False + , relocationSize = 8 + , relocationIsSigned = False + , relocationEndianness = LittleEndian + , relocationJumpSlot = False } -- Jhx Note. These will be needed to support thread local variables. @@ -468,6 +490,7 @@ relaTargetARM end msegIndex symtab rel addend relFlag = , relocationSize = 4 , relocationIsSigned = False , relocationEndianness = end + , relocationJumpSlot = False } Elf.R_ARM_RELATIVE -> do -- This relocation has the value B(S) + A where @@ -503,6 +526,7 @@ relaTargetARM end msegIndex symtab rel addend relFlag = , relocationSize = 4 , relocationIsSigned = False , relocationEndianness = end + , relocationJumpSlot = False } Elf.R_ARM_JUMP_SLOT -> do -- This is a PLT relocation @@ -520,6 +544,7 @@ relaTargetARM end msegIndex symtab rel addend relFlag = , relocationSize = 4 , relocationIsSigned = False , relocationEndianness = end + , relocationJumpSlot = True } tp -> do relocError $ RelocationUnsupportedType (show tp) @@ -556,12 +581,13 @@ resolveRela :: ( MemWidth w ) => SymbolTable -> RelocationResolver tp + -> Integer -- ^ Index of relocation -> Elf.RelaEntry tp -> ResolveFn (MemLoader w) w -resolveRela symtab resolver rela msegIdx _ = +resolveRela symtab resolver relaIdx rela msegIdx _ = case resolver msegIdx symtab (Elf.relaToRel rela) (fromIntegral (Elf.relaAddend rela)) IsRela of Left e -> do - addWarning (IgnoreRelocation e) + addWarning (IgnoreRelocation relaIdx (show (Elf.relaType rela)) e) pure Nothing Right r -> do pure $ Just r @@ -573,9 +599,10 @@ resolveRel :: ( MemWidth w => Endianness -- ^ Endianness of Elf file -> SymbolTable -- ^ Symbol table -> RelocationResolver tp + -> Integer -- ^ Index of relocation -> Elf.RelEntry tp -> ResolveFn (MemLoader w) w -resolveRel end symtab resolver rel msegIdx bytes = do +resolveRel end symtab resolver relIdx rel msegIdx bytes = do -- Get the number of bits in the addend let bits = Elf.relocTargetBits (Elf.relType rel) -- Compute the addended by masking off the low order bits, and @@ -591,7 +618,7 @@ resolveRel end symtab resolver rel msegIdx bytes = do -- Update the resolver. case resolver msegIdx symtab rel (fromInteger saddend) IsRel of Left e -> do - addWarning (IgnoreRelocation e) + addWarning (IgnoreRelocation relIdx (show (Elf.relType rel)) e) pure Nothing Right r -> do pure $ Just r @@ -610,12 +637,13 @@ relocFromRela :: ( Elf.IsRelocationType tp ) => SymbolTable -> RelocationResolver tp + -> Integer -- ^ Index of relocation entry for error reporting -> Elf.RelaEntry tp -> (MemWord w, RelocEntry (MemLoader w) w) -relocFromRela symtab resolver r = +relocFromRela symtab resolver idx r = ( fromIntegral (Elf.relaAddr r) , RelocEntry { relocEntrySize = relocTargetBytes (Elf.relaType r) - , applyReloc = resolveRela symtab resolver r + , applyReloc = resolveRela symtab resolver idx r } ) @@ -627,12 +655,13 @@ relocFromRel :: ( Elf.IsRelocationType tp => Endianness -> SymbolTable -> RelocationResolver tp + -> Integer -- ^ Index of relocation entry for error reporting. -> Elf.RelEntry tp -> (MemWord w, RelocEntry (MemLoader w) w) -relocFromRel end symtab resolver r = +relocFromRel end symtab resolver idx r = ( fromIntegral (Elf.relAddr r) , RelocEntry { relocEntrySize = relocTargetBytes (Elf.relType r) - , applyReloc = resolveRel end symtab resolver r + , applyReloc = resolveRel end symtab resolver idx r } ) @@ -657,7 +686,7 @@ relocMapFromRelAndRela dta resolver symtab _ (Just relaBuffer) = do addWarning (RelocationParseFailure msg) pure Map.empty Right entries -> do - pure $ Map.fromList $ relocFromRela symtab resolver <$> entries + pure $ Map.fromList $ zipWith (relocFromRela symtab resolver) [0..] entries relocMapFromRelAndRela dta resolver symtab (Just relBuffer) Nothing = do w <- uses mlsMemory memAddrWidth reprConstraints w $ do @@ -666,7 +695,7 @@ relocMapFromRelAndRela dta resolver symtab (Just relBuffer) Nothing = do addWarning (RelocationParseFailure msg) pure Map.empty Right entries -> do - pure $ Map.fromList $ relocFromRel (toEndianness dta) symtab resolver <$> entries + pure $ Map.fromList $ zipWith (relocFromRel (toEndianness dta) symtab resolver) [0..] entries resolveUndefinedSymbolReq :: SymbolName @@ -716,6 +745,7 @@ symbolDefTypeMap = Map.fromList , (,) Elf.STT_FUNC SymbolDefFunc , (,) Elf.STT_TLS SymbolDefThreadLocal , (,) Elf.STT_GNU_IFUNC SymbolDefIFunc + , (,) Elf.STT_NOTYPE SymbolDefNoType ] resolveDefinedSymbolDef :: ElfSymbolTableEntry wtp @@ -829,9 +859,9 @@ dynamicRelocationMap hdr ph contents = Right Elf.PLTEmpty -> do pure $! Map.empty Right (Elf.PLTRel entries) -> do - pure $! Map.fromList $ relocFromRel (toEndianness dta) symtab resolver <$> entries + pure $! Map.fromList $ zipWith (relocFromRel (toEndianness dta) symtab resolver) [0..] entries Right (Elf.PLTRela entries) -> do - pure $! Map.fromList $ relocFromRela symtab resolver <$> entries + pure $! Map.fromList $ zipWith (relocFromRela symtab resolver) [0..] entries pure $ Map.union loadtimeRelocs pltRelocs ------------------------------------------------------------------------ diff --git a/base/src/Data/Macaw/Memory/Symbols.hs b/base/src/Data/Macaw/Memory/Symbols.hs index e1cd16ee..e648f841 100644 --- a/base/src/Data/Macaw/Memory/Symbols.hs +++ b/base/src/Data/Macaw/Memory/Symbols.hs @@ -42,6 +42,9 @@ data SymbolDefType -- ^ This symbol denotes a thread local identifier | SymbolDefIFunc -- ^ This symbol is a "IFUNC" (e.g., it calls a function to resolve the symbol) + | SymbolDefNoType + -- ^ This symbol does not have a specified type. + -- | Describes whether an undefined symbol is required during linking. data SymbolRequirement diff --git a/deps/crucible b/deps/crucible index 70587094..6720f34b 160000 --- a/deps/crucible +++ b/deps/crucible @@ -1 +1 @@ -Subproject commit 70587094dc19e1d27f4fc2eca4a704f9dac9110c +Subproject commit 6720f34b2db2e86512b5c8b469cdd4ccbad1fc3b diff --git a/deps/elf-edit b/deps/elf-edit index afa3dcf4..a9428d84 160000 --- a/deps/elf-edit +++ b/deps/elf-edit @@ -1 +1 @@ -Subproject commit afa3dcf45afc5429139cae09f8584a722a737946 +Subproject commit a9428d847f63cddfbd38eb3b0fea07153dbcd18f diff --git a/deps/flexdis86 b/deps/flexdis86 index e53a3359..5c4453b0 160000 --- a/deps/flexdis86 +++ b/deps/flexdis86 @@ -1 +1 @@ -Subproject commit e53a3359b51ad1a91758144a85d8a8eb635d91d9 +Subproject commit 5c4453b0b2c89af2267174bf06b486a9dd6c57a2 diff --git a/deps/llvm-pretty b/deps/llvm-pretty index 6cc69900..e05cf319 160000 --- a/deps/llvm-pretty +++ b/deps/llvm-pretty @@ -1 +1 @@ -Subproject commit 6cc6990017c4d875311a52a6d3988fb7a962ed7a +Subproject commit e05cf3195b0938961cb0d8ecf4b7a4821f0d2673 diff --git a/deps/parameterized-utils b/deps/parameterized-utils index 1c37bf1e..045e90c5 160000 --- a/deps/parameterized-utils +++ b/deps/parameterized-utils @@ -1 +1 @@ -Subproject commit 1c37bf1eb8be0f7c8beacd0998a30bc932d9122d +Subproject commit 045e90c564c7839667ff92274d442a343b76d168 diff --git a/symbolic/src/Data/Macaw/Symbolic/MemOps.hs b/symbolic/src/Data/Macaw/Symbolic/MemOps.hs index 975ee650..eb254bc7 100644 --- a/symbolic/src/Data/Macaw/Symbolic/MemOps.hs +++ b/symbolic/src/Data/Macaw/Symbolic/MemOps.hs @@ -513,7 +513,8 @@ doWriteMem st mvar globs w (BVMemRepr bytes endian) ptr0 val = let ?ptrWidth = M.addrWidthNatRepr w let v0 = regValue val v = LLVMValInt (ptrBase v0) (asBits v0) - mem1 <- storeRaw sym mem ptr ty v + let alignment = 0 -- default to byte alignment (FIXME) + mem1 <- storeRaw sym mem ptr ty alignment v return ((), setMem st mvar mem1) -------------------------------------------------------------------------------- diff --git a/x86/macaw-x86.cabal b/x86/macaw-x86.cabal index 186f0993..8ffd9049 100644 --- a/x86/macaw-x86.cabal +++ b/x86/macaw-x86.cabal @@ -16,7 +16,7 @@ library containers, flexdis86 >= 0.1.2, lens >= 4.7, - macaw-base >= 0.3.2, + macaw-base >= 0.3.3, mtl, parameterized-utils, text, diff --git a/x86/src/Data/Macaw/X86.hs b/x86/src/Data/Macaw/X86.hs index 56fc90b2..55a61501 100644 --- a/x86/src/Data/Macaw/X86.hs +++ b/x86/src/Data/Macaw/X86.hs @@ -68,6 +68,7 @@ import Text.PrettyPrint.ANSI.Leijen (Pretty(..), text) import Data.Macaw.AbsDomain.AbsState ( AbsBlockState + , curAbsStack , setAbsIP , absRegState , StackEntry(..) @@ -166,12 +167,11 @@ instance MemWidth w => Show (X86TranslateError w) where -- | Signal an error from the initial address. -initError :: ExploreLoc -- ^ Location to explore from. +initError :: MemSegmentOff 64 -- ^ Location to explore from. + -> RegState X86Reg (Value X86_64 ids) -> X86TranslateError 64 -> ST st_s (Block X86_64 ids, MemWord 64, Maybe (X86TranslateError 64)) -initError loc err = do - let addr = loc_ip loc - let s = initX86State loc +initError addr s err = do let b = Block { blockLabel = 0 , blockStmts = [] , blockTerm = TranslateError s (Text.pack (show err)) @@ -275,7 +275,9 @@ disassembleFixedBlock :: NonceGenerator (ST st_s) ids -- ^ Number of bytes to translate -> ST st_s (Either (X86TranslateError 64) (Block X86_64 ids)) disassembleFixedBlock gen loc sz = do - case segoffContentsAfter (loc_ip loc) of + let addr = loc_ip loc + let initRegs = initX86State loc + case segoffContentsAfter addr of Left err -> do pure $ Left $ FlexdisMemoryError err Right fullContents -> do @@ -283,8 +285,8 @@ disassembleFixedBlock gen loc sz = do Left _err -> do error $ "Could not split memory." Right (contents,_) -> do - let pblock = emptyPreBlock (initX86State loc) - disassembleFixedBlock' gen pblock 0 (loc_ip loc) contents + let pblock = emptyPreBlock initRegs + disassembleFixedBlock' gen pblock 0 addr contents -- | Translate block, returning blocks read, ending -- PC, and an optional error. and ending PC. @@ -327,13 +329,14 @@ disassembleBlock :: forall s -> ST s (Block X86_64 s, MemWord 64, Maybe (X86TranslateError 64)) disassembleBlock nonce_gen loc max_size = do let addr = loc_ip loc + let regs = initX86State loc let sz = segoffOffset addr + max_size (b, next_ip_off, maybeError) <- case segoffContentsAfter addr of Left msg -> do - initError loc (FlexdisMemoryError msg) + initError addr regs (FlexdisMemoryError msg) Right contents -> do - let pblock = emptyPreBlock (initX86State loc) + let pblock = emptyPreBlock regs disassembleBlockImpl nonce_gen pblock 0 addr sz contents assert (next_ip_off > segoffOffset addr) $ do let block_sz = next_ip_off - segoffOffset addr @@ -415,56 +418,67 @@ transferAbsValue r f = VExtractF128 {} -> TopV VInsert {} -> TopV + -- | Disassemble block, returning either an error, or a list of blocks -- and ending PC. -tryDisassembleBlockFromAbsState :: forall s ids - . NonceGenerator (ST s) ids - -> MemSegmentOff 64 - -- ^ Address to disassemble at - -> Int - -- ^ Maximum size of this block - -> AbsBlockState X86Reg - -- ^ Abstract state of processor for defining state. - -> ExceptT String (ST s) (Block X86_64 ids, Int, Maybe String) -tryDisassembleBlockFromAbsState nonceGen addr maxSize ab = do +initRegsFromAbsState :: forall ids + . MemSegmentOff 64 + -- ^ Address to disassemble at + -> AbsBlockState X86Reg + -- ^ Abstract state of processor for defining state. + -> Either String (RegState X86Reg (Value X86_64 ids)) +initRegsFromAbsState addr ab = do t <- case asConcreteSingleton (ab^.absRegState^.boundValue X87_TopReg) of - Nothing -> throwError "Could not determine height of X87 stack." + Nothing -> Left "Could not determine height of X87 stack." Just t -> pure t d <- case asConcreteSingleton (ab^.absRegState^.boundValue DF) of Nothing -> do - throwError $ "Could not determine df flag " ++ show (ab^.absRegState^.boundValue DF) + Left $ "Could not determine df flag " ++ show (ab^.absRegState^.boundValue DF) Just d -> pure d - let loc = ExploreLoc { loc_ip = addr - , loc_x87_top = fromInteger t - , loc_df_flag = d /= 0 - } + pure $ initX86State $ + ExploreLoc { loc_ip = addr + , loc_x87_top = fromInteger t + , loc_df_flag = d /= 0 + } + +-- | Disassemble block, returning either an error, or a list of blocks +-- and ending PC. +tryDisassembleBlock :: forall s ids + . NonceGenerator (ST s) ids + -> MemSegmentOff 64 + -- ^ Address to disassemble at + -> RegState X86Reg (Value X86_64 ids) + -- ^ Initial registers + -> Int + -- ^ Maximum size of this block + -> ExceptT String (ST s) (Block X86_64 ids, Int, Maybe String) +tryDisassembleBlock nonceGen addr initRegs maxSize = do let off = segoffOffset addr - let pblock = emptyPreBlock (initX86State loc) (b, nextIPOff, maybeError) <- lift $ case segoffContentsAfter addr of Left msg -> do - initError loc (FlexdisMemoryError msg) + initError addr initRegs (FlexdisMemoryError msg) Right contents -> do - disassembleBlockImpl nonceGen pblock 0 addr (off + fromIntegral maxSize) contents + disassembleBlockImpl nonceGen (emptyPreBlock initRegs) 0 addr (off + fromIntegral maxSize) contents let sz :: Int sz = fromIntegral $ nextIPOff - off pure $! (b, sz, show <$> maybeError) -- | Disassemble block, returning either an error, or a list of blocks -- and ending PC. -disassembleBlockFromAbsState :: forall s ids - . NonceGenerator (ST s) ids - -> MemSegmentOff 64 - -- ^ Address to disassemble at - -> Int - -- ^ Maximum size of this block - -> AbsBlockState X86Reg - -- ^ Abstract state of processor for defining state. - -> ST s ([Block X86_64 ids], Int, Maybe String) -disassembleBlockFromAbsState nonceGen addr maxSize ab = do - mr <- runExceptT $ tryDisassembleBlockFromAbsState nonceGen addr maxSize ab +disassembleBlockWithRegs :: forall s ids + . NonceGenerator (ST s) ids + -> MemSegmentOff 64 + -- ^ Address to disassemble at + -> RegState X86Reg (Value X86_64 ids) + -> Int + -- ^ Maximum size of this block + -- ^ Abstract state of processor for defining state. + -> ST s ([Block X86_64 ids], Int, Maybe String) +disassembleBlockWithRegs nonceGen addr initRegs maxSize = do + mr <- runExceptT $ tryDisassembleBlock nonceGen addr initRegs maxSize case mr of Left msg -> pure ([], 0, Just msg) Right (b,sz, merr) -> pure ([b],sz,merr) @@ -474,10 +488,10 @@ disassembleBlockFromAbsState nonceGen addr maxSize ab = do -- -- This can also return Nothing if the call is not supported. identifyX86Call :: Memory 64 - -> [Stmt X86_64 ids] + -> Seq (Stmt X86_64 ids) -> RegState X86Reg (Value X86_64 ids) -> Maybe (Seq (Stmt X86_64 ids), MemSegmentOff 64) -identifyX86Call mem stmts0 s = go (Seq.fromList stmts0) Seq.empty +identifyX86Call mem stmts0 s = go stmts0 Seq.empty where -- Get value of stack pointer next_sp = s^.boundValue sp_reg -- Recurse on statements. @@ -503,19 +517,30 @@ identifyX86Call mem stmts0 s = go (Seq.fromList stmts0) Seq.empty -- Otherwise skip over this instruction. | otherwise -> go prev (stmt Seq.<| after) +-- | Return true if stack pointer has been reset to original value, and +-- return address is on top of stack. +checkForReturnAddrX86 :: forall ids + . AbsProcessorState X86Reg ids + -> Bool +checkForReturnAddrX86 absState + | Just (StackEntry _ ReturnAddr) <- Map.lookup 0 (absState^.curAbsStack) = + True + | otherwise = + False + -- | Called to determine if the instruction sequence contains a return -- from the current function. -- -- An instruction executing a return from a function will place the -- ReturnAddr value (placed on the top of the stack by -- 'initialX86AbsState' above) into the instruction pointer. -identifyX86Return :: [Stmt X86_64 ids] +identifyX86Return :: Seq (Stmt X86_64 ids) -> RegState X86Reg (Value X86_64 ids) -> AbsProcessorState X86Reg ids -> Maybe (Seq (Stmt X86_64 ids)) identifyX86Return stmts s finalRegSt8 = case transferValue finalRegSt8 (s^.boundValue ip_reg) of - ReturnAddr -> Just $ Seq.fromList stmts + ReturnAddr -> Just stmts _ -> Nothing -- | Return state post call @@ -569,12 +594,14 @@ x86_64_info preservePred = ArchitectureInfo { withArchConstraints = \x -> x , archAddrWidth = Addr64 , archEndianness = LittleEndian - , disassembleFn = disassembleBlockFromAbsState + , mkInitialRegsForBlock = initRegsFromAbsState + , disassembleFn = disassembleBlockWithRegs , mkInitialAbsState = \_ addr -> initialX86AbsState addr , absEvalArchFn = transferAbsValue , absEvalArchStmt = \s _ -> s , postCallAbsState = x86PostCallAbsState , identifyCall = identifyX86Call + , checkForReturnAddr = \_ s -> checkForReturnAddrX86 s , identifyReturn = identifyX86Return , rewriteArchFn = rewriteX86PrimFn , rewriteArchStmt = rewriteX86Stmt diff --git a/x86/tests/ElfX64Linux.hs b/x86/tests/ElfX64Linux.hs index 91b455bd..d33ac86b 100644 --- a/x86/tests/ElfX64Linux.hs +++ b/x86/tests/ElfX64Linux.hs @@ -7,7 +7,7 @@ module ElfX64Linux ( ) where import Control.Lens ( (^.) ) -import Control.Monad ( unless ) +import Control.Monad ( unless, when ) import qualified Control.Monad.Catch as C import qualified Data.ByteString as B import qualified Data.Foldable as F @@ -16,6 +16,7 @@ import Data.Maybe import qualified Data.Set as S import Data.Typeable ( Typeable ) import Data.Word ( Word64 ) +import Numeric (showHex) import System.FilePath import qualified Test.Tasty as T import qualified Test.Tasty.HUnit as T @@ -35,9 +36,12 @@ elfX64LinuxTests :: [FilePath] -> T.TestTree elfX64LinuxTests = T.testGroup "ELF x64 Linux" . map mkTest data Addr = Addr Int Word64 - deriving (Read,Show,Eq) + deriving (Read,Eq, Ord) -- ^ An address is a region index and offset +instance Show Addr where + showsPrec _ (Addr idx off) = showString "Addr " . shows idx . showString " 0x" . showHex off + -- | The type of expected results for test cases data ExpectedResult = R { funcs :: [(Addr, [(Addr, Integer)])] @@ -59,57 +63,79 @@ mkTest fp = T.testCase fp $ withELF elfFilename (testDiscovery fp) where elfFilename = dropExtension fp +toSegOff :: MM.Memory 64 -> Addr -> MM.MemSegmentOff 64 +toSegOff mem (Addr idx off) = do + let addr :: MM.MemAddr 64 + addr = MM.MemAddr idx (fromIntegral off) + case MM.asSegmentOff mem addr of + Just a -> a + Nothing -> + let ppSeg seg = " Segment: " ++ show (MM.segmentOffAddr seg 0) + in error $ "Could not resolve address : " ++ show addr ++ "\n" + ++ unlines (fmap ppSeg (MM.memSegments mem)) + +toAddr :: MM.MemSegmentOff 64 -> Addr +toAddr segOff = do + let addr :: MM.MemAddr 64 + addr = MM.segoffAddr segOff + in Addr (fromIntegral (MM.addrBase addr)) (fromIntegral (MM.addrOffset addr)) + -- | Run a test over a given expected result filename and the ELF file -- associated with it testDiscovery :: FilePath -> E.Elf 64 -> IO () -testDiscovery expectedFilename elf = - withMemory MM.Addr64 elf $ \mem entries -> do - let di = MD.cfgFromAddrs RO.x86_64_linux_info mem M.empty entries [] - expectedString <- readFile expectedFilename - case readMaybe expectedString of - Nothing -> T.assertFailure ("Invalid expected result: " ++ show expectedString) - Just er -> do - let toSegOff :: Addr -> MM.MemSegmentOff 64 - toSegOff (Addr idx off) = do - let addr :: MM.MemAddr 64 - addr = MM.MemAddr idx (fromIntegral off) - case MM.asSegmentOff mem addr of - Just a -> a - Nothing -> do - let ppSeg seg = " Segment: " ++ show (MM.segmentOffAddr seg 0) - error $ "Could not resolve address : " ++ show addr ++ "\n" - ++ unlines (fmap ppSeg (MM.memSegments mem)) - let expectedEntries = M.fromList - [ (toSegOff entry - , S.fromList ((\(s,sz) -> (toSegOff s, sz)) <$> starts) - ) - | (entry, starts) <- funcs er - ] - ignoredBlocks :: S.Set (MM.MemSegmentOff 64) - ignoredBlocks = S.fromList (toSegOff <$> ignoreBlocks er) - T.assertEqual "Collection of discovered function starting points" +testDiscovery expectedFilename elf = do + let opt = MM.LoadOptions { MM.loadRegionIndex = Nothing + , MM.loadRegionBaseOffset = 0 + } + (warn, mem, mentry, syms) <- + case MM.resolveElfContents opt elf of + Left err -> C.throwM (MemoryLoadError err) + Right r -> pure r + when (not (null warn)) $ do + error $ "Warnings while loading Elf " ++ show warn + let entries = maybeToList mentry ++ fmap MM.memSymbolStart syms + let addrSymMap :: M.Map (MM.MemSegmentOff 64) B.ByteString + addrSymMap = M.fromList [ (MM.memSymbolStart sym, MM.memSymbolName sym) + | sym <- syms + ] + let di = MD.cfgFromAddrs RO.x86_64_linux_info mem addrSymMap entries [] + expectedString <- readFile expectedFilename + case readMaybe expectedString of + Nothing -> T.assertFailure ("Invalid expected result: " ++ show expectedString) + Just er -> do + let expectedEntries :: M.Map (MM.MemSegmentOff 64) (S.Set (Addr, Integer)) + expectedEntries = M.fromList + [ (toSegOff mem entry + , S.fromList ((\(s,sz) -> (s, sz)) <$> starts) + ) + | (entry, starts) <- funcs er + ] + ignoredBlocks :: S.Set (MM.MemSegmentOff 64) + ignoredBlocks = S.fromList (toSegOff mem <$> ignoreBlocks er) + T.assertEqual "Collection of discovered function starting points" (M.keysSet expectedEntries `S.difference` ignoredBlocks) (M.keysSet (di ^. MD.funInfo)) - F.forM_ (M.elems (di ^. MD.funInfo)) $ \(PU.Some dfi) -> do - F.forM_ (M.elems (dfi ^. MD.parsedBlocks)) $ \pb -> do - let addr = MD.pblockAddr pb - unless (S.member addr ignoredBlocks) $ do - let term = blockTerminator pb - T.assertBool ("Unclassified block at " ++ show (MD.pblockAddr pb)) (not (isClassifyFailure term)) - T.assertBool ("Translate error at " ++ show (MD.pblockAddr pb) ++ " " ++ show term) (not (isTranslateError term)) - let actualEntry = MD.discoveredFunAddr dfi - -- actualEntry = fromIntegral (MM.addrValue (MD.discoveredFunAddr dfi)) - let actualBlockStarts = S.fromList [ (addr, toInteger (MD.blockSize pbr)) - | pbr <- M.elems (dfi ^. MD.parsedBlocks) - , let addr = MD.pblockAddr pbr - , addr `S.notMember` ignoredBlocks - ] - case (S.member actualEntry ignoredBlocks, M.lookup actualEntry expectedEntries) of - (True, _) -> return () - (_, Nothing) -> - T.assertFailure (printf "Unexpected entry point: %s" (show actualEntry)) - (_, Just expectedBlockStarts) -> - T.assertEqual (printf "Block starts for %s" (show actualEntry)) + F.forM_ (M.elems (di ^. MD.funInfo)) $ \(PU.Some dfi) -> do + F.forM_ (M.elems (dfi ^. MD.parsedBlocks)) $ \pb -> do + let addr = MD.pblockAddr pb + unless (S.member addr ignoredBlocks) $ do + let term = blockTerminator pb + T.assertBool ("Unclassified block at " ++ show (MD.pblockAddr pb)) (not (isClassifyFailure term)) + T.assertBool ("Translate error at " ++ show (MD.pblockAddr pb) ++ " " ++ show term) (not (isTranslateError term)) + let actualEntry = MD.discoveredFunAddr dfi + -- actualEntry = fromIntegral (MM.addrValue (MD.discoveredFunAddr dfi)) + let actualBlockStarts :: S.Set (Addr, Integer) + actualBlockStarts = S.fromList [ (toAddr addr, toInteger (MD.blockSize pbr)) + | pbr <- M.elems (dfi ^. MD.parsedBlocks) + , let addr = MD.pblockAddr pbr + , addr `S.notMember` ignoredBlocks + ] + case (S.member actualEntry ignoredBlocks, M.lookup actualEntry expectedEntries) of + (True, _) -> return () + (_, Nothing) -> + T.assertFailure (printf "Unexpected entry point: %s" (show actualEntry)) + (_, Just expectedBlockStarts) -> + T.assertEqual (printf "Block starts for %s" (show actualEntry)) expectedBlockStarts actualBlockStarts @@ -124,26 +150,6 @@ withELF fp k = do E.Elf32Res errs _ -> error ("Errors while parsing ELF file: " ++ show errs) E.Elf64Res errs _ -> error ("Errors while parsing ELF file: " ++ show errs) -withMemory :: forall w m a - . (C.MonadThrow m, MM.MemWidth w, Integral (E.ElfWordType w)) - => MM.AddrWidthRepr w - -> E.Elf w - -> (MM.Memory w -> [MM.MemSegmentOff w] -> m a) - -> m a -withMemory _relaWidth e k = do - let opt = MM.LoadOptions { MM.loadRegionIndex = Nothing - , MM.loadRegionBaseOffset = 0 --- let opt = MM.LoadOptions { MM.loadRegionIndex = Just 0 --- , MM.loadRegionBaseOffset = fromIntegral loadOffset - } - case MM.resolveElfContents opt e of - Left err -> C.throwM (MemoryLoadError err) - Right (warn, mem, mentry, syms) -> - if null warn then - k mem (maybeToList mentry ++ fmap MM.memSymbolStart syms) - else - error $ "Warnings while loading Elf " ++ show warn - data ElfException = MemoryLoadError String deriving (Typeable, Show) diff --git a/x86/tests/x64/test-plt.c b/x86/tests/x64/test-plt.c new file mode 100644 index 00000000..23aefcc2 --- /dev/null +++ b/x86/tests/x64/test-plt.c @@ -0,0 +1,10 @@ +// This is a simple call to puts to test that we resolve the PLT stub +// to puts. +// +// It should be compiled with +// `clang -fpic -FPIC -o test-plt.exe test-plc.c` +#include + +int main(int argc, char** argv) { + puts("Hello World"); +} diff --git a/x86/tests/x64/test-plt.exe b/x86/tests/x64/test-plt.exe new file mode 100755 index 00000000..071edee2 Binary files /dev/null and b/x86/tests/x64/test-plt.exe differ diff --git a/x86/tests/x64/test-plt.exe.expected b/x86/tests/x64/test-plt.exe.expected new file mode 100644 index 00000000..9494c5ee --- /dev/null +++ b/x86/tests/x64/test-plt.exe.expected @@ -0,0 +1,19 @@ +R { funcs = + [ (Addr 1 0x580, [(Addr 1 0x580,16),(Addr 1 0x590,5),(Addr 1 0x595,5)]) + , (Addr 1 0x5b0, [(Addr 1 0x5b0,6)]) + , (Addr 1 0x5c0, [(Addr 1 0x5c0,6)]) + , (Addr 1 0x5d0, [(Addr 1 0x5d0,6)]) + , (Addr 1 0x5d8, [(Addr 1 0x5d8,6)]) + , (Addr 1 0x5e0, [(Addr 1 0x5e0,41),(Addr 1 0x609,1)]) + , (Addr 1 0x610, [(Addr 1 0x610,27),(Addr 1 0x62b,12),(Addr 1 0x637,3),(Addr 1 0x640,2)]) + , (Addr 1 0x650, [(Addr 1 0x650,40),(Addr 1 0x678,12),(Addr 1 0x684,3),(Addr 1 0x690,2)]) + , (Addr 1 0x6a0, [(Addr 1 0x6a0,9),(Addr 1 0x6a9,14),(Addr 1 0x6b7,12),(Addr 1 0x6c3,5),(Addr 1 0x6c8,8),(Addr 1 0x6d0,2)]) + , (Addr 1 0x6e0, [(Addr 1 0x6e0,13), (Addr 1 0x6ed,5), (Addr 1 0x6f8, 12), (Addr 1 0x704, 6), (Addr 1 0x70a, 6)]) + , (Addr 1 0x710, [(Addr 1 0x710,13),(Addr 1 0x71d,4)]) + , (Addr 1 0x730, [(Addr 1 0x730,49),(Addr 1 0x761,5),(Addr 1 0x766,10),(Addr 1 0x770,13),(Addr 1 +0x77d,9),(Addr 1 0x786,15)]) + , (Addr 1 0x7a0, [(Addr 1 0x7a0,2)]) + , (Addr 1 0x7a4, [(Addr 1 0x7a4,9)]) + ] + , ignoreBlocks = [] + } diff --git a/x86/tests/x64/test-tail-call.exe.expected b/x86/tests/x64/test-tail-call.exe.expected index 13b4b85b..fc8a3ac5 100644 --- a/x86/tests/x64/test-tail-call.exe.expected +++ b/x86/tests/x64/test-tail-call.exe.expected @@ -1,5 +1,5 @@ R { funcs = [ (Addr 1 0x2c0, [(Addr 1 0x2c0, 7)]) - , (Addr 1 0x2d0, [(Addr 1 0x2d0, 11), (Addr 1 0x2c0, 7)]) + , (Addr 1 0x2d0, [(Addr 1 0x2d0, 11)]) , (Addr 1 0x2e0, [(Addr 1 0x2e0, 11), (Addr 1 0x2eb, 16)]) ] , ignoreBlocks = [Addr 1 0x2fb]