Merge branch 'master' of github.com:GaloisInc/macaw into fix/keep-return-address-stack-write

This commit is contained in:
Andrei Stefanescu 2018-12-18 14:31:08 -08:00
commit 76ac547995
24 changed files with 774 additions and 459 deletions

View File

@ -1,5 +1,5 @@
name: macaw-base
version: 0.3.2
version: 0.3.4
author: Galois, Inc.
maintainer: jhendrix@galois.com
build-type: Simple
@ -31,7 +31,7 @@ library
base >= 4,
ansi-wl-pprint,
binary,
binary-symbols >= 0.1.1,
binary-symbols >= 0.1.2,
bytestring,
containers >= 0.5.8.1,
elf-edit >= 0.32,
@ -39,7 +39,7 @@ library
IntervalMap >= 0.5,
lens >= 4.7,
mtl,
parameterized-utils >= 1.0.1,
parameterized-utils >= 1.0.7,
template-haskell,
text,
vector,

View File

@ -50,9 +50,18 @@ import Data.Macaw.CFG
import Data.Macaw.CFG.BlockLabel
import Data.Macaw.CFG.DemandSet
import Data.Macaw.Discovery.State
import Data.Macaw.Fold
import Data.Macaw.Types
newtype Ap f a = Ap { getAp :: f a }
instance (Applicative f, Semigroup a) => Semigroup (Ap f a) where
Ap x <> Ap y = Ap $ (<>) <$> x <*> y
instance (Applicative f, Monoid a) => Monoid (Ap f a) where
mempty = Ap $ pure mempty
mappend (Ap x) (Ap y) = Ap $ mappend <$> x <*> y
-------------------------------------------------------------------------------
-- The algorithm computes the set of direct deps (i.e., from writes)
@ -95,6 +104,10 @@ data DemandSet (r :: Type -> Kind.Type) =
, functionResultDemands :: !(Map (RegSegmentOff r) (RegisterSet r))
}
-- | Create a demand set for specific registers.
registerDemandSet :: RegisterSet r -> DemandSet r
registerDemandSet s = DemandSet { registerDemands = s, functionResultDemands = Map.empty }
deriving instance (ShowF r, MemWidth (RegAddrWidth r)) => Show (DemandSet r)
deriving instance (TestEquality r) => Eq (DemandSet r)
deriving instance (OrdF r) => Ord (DemandSet r)
@ -109,15 +122,18 @@ instance OrdF r => Semigroup (DemandSet r) where
instance OrdF r => Monoid (DemandSet r) where
mempty = DemandSet { registerDemands = Set.empty
, functionResultDemands = mempty
, functionResultDemands = Map.empty
}
mappend = (<>)
demandSetDifference :: OrdF r => DemandSet r -> DemandSet r -> DemandSet r
demandSetDifference ds1 ds2 =
DemandSet (registerDemands ds1 `Set.difference` registerDemands ds2)
(Map.differenceWith setDiff (functionResultDemands ds1)
(functionResultDemands ds2))
DemandSet { registerDemands = registerDemands ds1 `Set.difference` registerDemands ds2
, functionResultDemands =
Map.differenceWith setDiff
(functionResultDemands ds1)
(functionResultDemands ds2)
}
where
setDiff s1 s2 =
let s' = s1 `Set.difference` s2
@ -131,8 +147,8 @@ data DemandType r
-- | This type is for registers that are demanded if the function at the given address wants
-- the given register.
| forall tp. DemandFunctionArg (RegSegmentOff r) (r tp)
-- | This is a associated with a set of registers that are demanded if the given register is needed
-- as a return value.
-- | This is a associated with the registers that are demanded if
-- the given register is needed as a return value.
| forall tp. DemandFunctionResult (r tp)
instance (MemWidth (RegAddrWidth r), ShowF r) => Show (DemandType r) where
@ -179,8 +195,8 @@ data ArchTermStmtRegEffects arch
= ArchTermStmtRegEffects { termRegDemands :: ![Some (ArchReg arch)]
-- ^ Registers demanded by term statement
, termRegTransfers :: [Some (ArchReg arch)]
-- ^ Registers that terminal statement are not modified
-- by terminal statement.
-- ^ Registers that are not modified by
-- terminal statement.
}
-- | Returns information about the registers needed and modified by a terminal statement
@ -193,6 +209,8 @@ type ComputeArchTermStmtEffects arch ids
-> RegState (ArchReg arch) (Value arch ids)
-> ArchTermStmtRegEffects arch
-- | Information about the architecture/environment what arguments a
-- function needs.
data ArchDemandInfo arch = ArchDemandInfo
{ -- | Registers used as arguments to the function.
functionArgRegs :: ![Some (ArchReg arch)]
@ -294,34 +312,38 @@ addIntraproceduralJumpTarget fun_info src_block dest_addr = do -- record the ed
text "Could not find target block" <+> text (show dest_addr) <$$>
indent 2 (text "Source:" <$$> pretty src_block)
-- | Compute the input registers that this value depends on
withAssignmentCache :: State (AssignmentCache (ArchReg arch) ids) a -> FunctionArgsM arch ids a
withAssignmentCache m = do
c <- use assignmentCache
let (r, c') = runState m c
seq c' $ assignmentCache .= c'
pure r
-- | Return the input registers that a value depends on.
valueUses :: (OrdF (ArchReg arch), FoldableFC (ArchFn arch))
=> Value arch ids tp
-> FunctionArgsM arch ids (RegisterSet (ArchReg arch))
valueUses v = zoom assignmentCache $ foldValueCached fns v
where fns = emptyValueFold { foldInput = Set.singleton . Some }
-> State (AssignmentCache (ArchReg arch) ids) (RegisterSet (ArchReg arch))
valueUses (AssignedValue (Assignment a rhs)) = do
mr <- gets $ Map.lookup (Some a)
case mr of
Just s -> pure s
Nothing -> do
rhs' <- foldrFC (\v mrhs -> Set.union <$> valueUses v <*> mrhs) (pure Set.empty) rhs
seq rhs' $ modify' $ Map.insert (Some a) rhs'
pure $ rhs'
valueUses (Initial r) = do
pure $! Set.singleton (Some r)
valueUses _ = do
pure $! Set.empty
addBlockDemands :: OrdF (ArchReg arch) => ArchLabel arch -> DemandMap (ArchReg arch) -> FunctionArgsM arch ids ()
addBlockDemands lbl m =
blockDemandMap %= Map.insertWith demandMapUnion lbl m
-- | Record that a block demands the value of certain registers.
recordBlockDemand :: ( OrdF (ArchReg arch)
, FoldableFC (ArchFn arch)
)
=> ArchLabel arch
-- ^ The current block
-> RegState (ArchReg arch) (Value arch ids)
-- ^ The current register state
-> (forall tp . ArchReg arch tp -> DemandType (ArchReg arch))
-> [Some (ArchReg arch)]
-- ^ The registers that we need.
-> FunctionArgsM arch ids () -- Map (Some N.RegisterName) RegDeps
recordBlockDemand lbl s mk rs = do
let doReg (Some r) = do
rs' <- valueUses (s ^. boundValue r)
return (mk r, DemandSet rs' mempty)
vs <- mapM doReg rs
blockDemandMap %= Map.insertWith (Map.unionWith mappend) lbl (Map.fromListWith mappend vs)
-- Figure out the deps of the given registers and update the state for the current label
recordBlockTransfer :: ( OrdF (ArchReg arch)
recordBlockTransfer :: forall arch ids
. ( OrdF (ArchReg arch)
, FoldableFC (ArchFn arch)
)
=> ArchLabel arch
@ -329,10 +351,13 @@ recordBlockTransfer :: ( OrdF (ArchReg arch)
-> [Some (ArchReg arch)]
-> FunctionArgsM arch ids () -- Map (Some N.RegisterName) RegDeps
recordBlockTransfer lbl s rs = do
let doReg (Some r) = do
let doReg :: Some (ArchReg arch)
-> State (AssignmentCache (ArchReg arch) ids)
(Some (ArchReg arch), DemandSet (ArchReg arch))
doReg (Some r) = do
rs' <- valueUses (s ^. boundValue r)
return (Some r, DemandSet rs' mempty)
vs <- mapM doReg rs
return (Some r, registerDemandSet rs')
vs <- withAssignmentCache $ traverse doReg rs
blockTransfer %= Map.insertWith (Map.unionWith mappend) lbl (Map.fromListWith mappend vs)
-- | A block requires a value, and so we need to remember which
@ -342,9 +367,8 @@ demandValue :: (OrdF (ArchReg arch), FoldableFC (ArchFn arch))
-> Value arch ids tp
-> FunctionArgsM arch ids ()
demandValue lbl v = do
regs <- valueUses v
blockDemandMap %= Map.insertWith demandMapUnion lbl
(Map.singleton DemandAlways (DemandSet regs mempty))
regs <- withAssignmentCache $ valueUses v
addBlockDemands lbl $ Map.singleton DemandAlways (registerDemandSet regs)
-- -----------------------------------------------------------------------------
-- Entry point
@ -426,9 +450,9 @@ summarizeCall :: forall arch ids
-> Bool
-- ^ A flag that is set to true for tail calls.
-> FunctionArgsM arch ids ()
summarizeCall mem lbl proc_state isTailCall = do
summarizeCall mem lbl finalRegs isTailCall = do
knownAddrs <- gets computedAddrSet
case valueAsMemAddr (proc_state^.boundValue ip_reg) of
case valueAsMemAddr (finalRegs^.boundValue ip_reg) of
Just faddr0
| Just faddr <- asSegmentOff mem faddr0
, Set.member faddr knownAddrs -> do
@ -439,10 +463,10 @@ summarizeCall mem lbl proc_state isTailCall = do
-- singleton for now, but propagating back will introduce more deps.
let demandSet sr = DemandSet mempty (Map.singleton faddr (Set.singleton sr))
if isTailCall then
if isTailCall then do
-- tail call, propagate demands for our return regs to the called function
let propMap = map (\(Some r) -> (DemandFunctionResult r, demandSet (Some r))) retRegs
in blockDemandMap %= Map.insertWith (Map.unionWith mappend) lbl (Map.fromList propMap)
let propMap = (\(Some r) -> (DemandFunctionResult r, demandSet (Some r))) <$> retRegs
addBlockDemands lbl $ Map.fromList propMap
else do
-- Given a return register sr, this indicates that
let propResult :: Some (ArchReg arch) -> FunctionArgsM arch ids ()
@ -455,12 +479,22 @@ summarizeCall mem lbl proc_state isTailCall = do
-- If a function wants argument register r, then we note that this
-- block needs the corresponding state values. Note that we could
-- do this for _all_ registers, but this should make the summaries somewhat smaller.
-- Associate the demand sets for each potential argument register with the registers used
-- by faddr.
argRegs <- gets $ functionArgRegs . archDemandInfo
recordBlockDemand lbl proc_state (DemandFunctionArg faddr) argRegs
let regDemandSet (Some r) = registerDemandSet <$> valueUses (finalRegs^. boundValue r)
let demandTypes = viewSome (DemandFunctionArg faddr) <$> argRegs
demands <- withAssignmentCache $ traverse regDemandSet argRegs
addBlockDemands lbl $ Map.fromList $ zip demandTypes demands
_ -> do
-- In the dynamic case, we just assume all arguments (FIXME: results?)
argRegs <- gets $ functionArgRegs . archDemandInfo
recordBlockDemand lbl proc_state (\_ -> DemandAlways) ([Some ip_reg] ++ argRegs)
do let demandedRegs = [Some ip_reg] ++ argRegs
let regUses (Some r) = valueUses (finalRegs^. boundValue r)
demands <- withAssignmentCache $ fmap registerDemandSet $ getAp $ foldMap (Ap . regUses) demandedRegs
addBlockDemands lbl $ Map.singleton DemandAlways demands
-- | Return values that must be evaluated to execute side effects.
stmtDemandedValues :: DemandContext arch
@ -492,10 +526,10 @@ summarizeBlock :: forall arch ids
-> ArchSegmentOff arch -- ^ Address of the code.
-> StatementList arch ids -- ^ Current block
-> FunctionArgsM arch ids ()
summarizeBlock mem interp_state addr stmts = do
summarizeBlock mem interpState addr stmts = do
let lbl = GeneratedBlock addr (stmtsIdent stmts)
-- Add this label to block demand map with empty set.
blockDemandMap %= Map.insertWith demandMapUnion lbl mempty
addBlockDemands lbl mempty
ctx <- gets $ demandInfoCtx . archDemandInfo
-- Add all values demanded by non-terminal statements in list.
@ -503,6 +537,67 @@ summarizeBlock mem interp_state addr stmts = do
(stmtsNonterm stmts)
-- Add values demanded by terminal statements
case stmtsTerm stmts of
ParsedCall finalRegs m_ret_addr -> do
-- Record the demands based on the call, and add edges between
-- this note and next nodes.
case m_ret_addr of
Nothing -> do
summarizeCall mem lbl finalRegs True
Just ret_addr -> do
summarizeCall mem lbl finalRegs False
addIntraproceduralJumpTarget interpState lbl ret_addr
callRegs <- gets $ calleeSavedRegs . archDemandInfo
recordBlockTransfer lbl finalRegs ([Some sp_reg] ++ Set.toList callRegs)
PLTStub regs _ _ -> do
-- PLT Stubs demand all registers that could be function
-- arguments, as well as any registers in regs.
ainfo <- gets archDemandInfo
let demandedRegs = Set.fromList (functionArgRegs ainfo)
demands <- withAssignmentCache $ getAp $ foldMapF (Ap . valueUses) regs
addBlockDemands lbl $ Map.singleton DemandAlways $
registerDemandSet $ demands <> demandedRegs
ParsedJump procState tgtAddr -> do
-- record all propagations
recordBlockTransfer lbl procState archRegs
addIntraproceduralJumpTarget interpState lbl tgtAddr
ParsedLookupTable finalRegs lookup_idx vec -> do
demandValue lbl lookup_idx
-- record all propagations
recordBlockTransfer lbl finalRegs archRegs
traverse_ (addIntraproceduralJumpTarget interpState lbl) vec
ParsedReturn finalRegs -> do
retRegs <- gets $ functionRetRegs . archDemandInfo
let demandTypes = viewSome DemandFunctionResult <$> retRegs
let regDemandSet (Some r) = registerDemandSet <$> valueUses (finalRegs^.boundValue r)
demands <- withAssignmentCache $ traverse regDemandSet retRegs
addBlockDemands lbl $ Map.fromList $ zip demandTypes demands
ParsedIte c tblock fblock -> do
-- Demand condition then summarize recursive blocks.
demandValue lbl c
summarizeBlock mem interpState addr tblock
summarizeBlock mem interpState addr fblock
ParsedArchTermStmt tstmt finalRegs next_addr -> do
-- Compute effects of terminal statement.
ainfo <- gets $ archDemandInfo
let e = computeArchTermStmtEffects ainfo tstmt finalRegs
-- Demand all registers the terminal statement demands.
do let regUses (Some r) = valueUses (finalRegs^.boundValue r)
demands <- withAssignmentCache $ fmap registerDemandSet $ getAp $
foldMap (Ap . regUses) (termRegDemands e)
addBlockDemands lbl $ Map.singleton DemandAlways demands
recordBlockTransfer lbl finalRegs (termRegTransfers e)
traverse_ (addIntraproceduralJumpTarget interpState lbl) next_addr
ParsedTranslateError _ -> do
-- We ignore demands for translate errors.
pure ()
@ -510,43 +605,6 @@ summarizeBlock mem interp_state addr stmts = do
-- We ignore demands for classify failure.
pure ()
ParsedIte c tblock fblock -> do
-- Demand condition then summarize recursive blocks.
demandValue lbl c
summarizeBlock mem interp_state addr tblock
summarizeBlock mem interp_state addr fblock
ParsedCall proc_state m_ret_addr -> do
case m_ret_addr of
Nothing -> do
summarizeCall mem lbl proc_state True
Just ret_addr -> do
summarizeCall mem lbl proc_state False
addIntraproceduralJumpTarget interp_state lbl ret_addr
callRegs <- gets $ calleeSavedRegs . archDemandInfo
recordBlockTransfer lbl proc_state ([Some sp_reg] ++ Set.toList callRegs)
ParsedJump proc_state tgt_addr -> do
-- record all propagations
recordBlockTransfer lbl proc_state archRegs
addIntraproceduralJumpTarget interp_state lbl tgt_addr
ParsedReturn proc_state -> do
retRegs <- gets $ functionRetRegs . archDemandInfo
recordBlockDemand lbl proc_state DemandFunctionResult retRegs
ParsedArchTermStmt tstmt proc_state next_addr -> do
effFn <- gets $ computeArchTermStmtEffects . archDemandInfo
let e = effFn tstmt proc_state
recordBlockDemand lbl proc_state (\_ -> DemandAlways) (termRegDemands e)
recordBlockTransfer lbl proc_state (termRegTransfers e)
traverse_ (addIntraproceduralJumpTarget interp_state lbl) next_addr
ParsedLookupTable proc_state lookup_idx vec -> do
demandValue lbl lookup_idx
-- record all propagations
recordBlockTransfer lbl proc_state archRegs
traverse_ (addIntraproceduralJumpTarget interp_state lbl) vec
-- | Explore states until we have reached end of frontier.
summarizeIter :: ArchConstraints arch
@ -575,7 +633,7 @@ calculateOnePred newDemands predLbl = do
-- update uses, returning value before this iteration
seenDemands <- use (blockDemandMap . ix lbl')
blockDemandMap . at lbl' .= Just (Map.unionWith mappend demands' seenDemands)
addBlockDemands lbl' demands'
-- seenDemands <- blockDemandMap . ix lbl' <<%= demandMapUnion demands'
@ -683,10 +741,7 @@ doOneFunction archFns addrs ist0 acc ist = do
-- recorded as a use, which is erroneous, so we strip out any
-- reference to them here.
callRegs <- gets $ calleeSavedRegs . archDemandInfo
let calleeDemandSet = DemandSet { registerDemands =
Set.insert (Some sp_reg) callRegs
, functionResultDemands = mempty
}
let calleeDemandSet = registerDemandSet (Set.insert (Some sp_reg) callRegs)
return (Map.foldlWithKey' (decomposeMap calleeDemandSet addr) acc funDemands)

View File

@ -45,13 +45,10 @@ type DisassembleFn arch
. NonceGenerator (ST s) ids
-> ArchSegmentOff arch
-- ^ The offset to start reading from.
-> RegState (ArchReg arch) (Value arch ids)
-- ^ Initial values to use for registers.
-> Int
-- ^ Maximum offset for this to read from.
-> AbsBlockState (ArchReg arch)
-- ^ Abstract state associated with address that we are disassembling
-- from.
--
-- This is used for things like the height of the x87 stack.
-> ST s ([Block arch ids], Int, Maybe String)
-- | This records architecture specific functions for analysis.
@ -64,6 +61,12 @@ data ArchitectureInfo arch
-- ^ Architecture address width.
, archEndianness :: !Endianness
-- ^ The byte order values are stored in.
, mkInitialRegsForBlock :: !(forall ids
. ArchSegmentOff arch
-> AbsBlockState (ArchReg arch)
-> Either String (RegState (ArchReg arch) (Value arch ids)))
-- ^ Use the abstract block state information to infer register
-- values to use for disassembling from given address.
, disassembleFn :: !(DisassembleFn arch)
-- ^ Function for disasembling a block.
, mkInitialAbsState :: !(Memory (RegAddrWidth (ArchReg arch))
@ -89,7 +92,7 @@ data ArchitectureInfo arch
-- ^ Update the abstract state after a function call returns
, identifyCall :: forall ids
. Memory (ArchAddrWidth arch)
-> [Stmt arch ids]
-> Seq (Stmt arch ids)
-> RegState (ArchReg arch) (Value arch ids)
-> Maybe (Seq (Stmt arch ids), ArchSegmentOff arch)
-- ^ Function for recognizing call statements.
@ -97,10 +100,25 @@ data ArchitectureInfo arch
-- Given a memory state, list of statements, and final register
-- state, the should determine if this is a call, and if so,
-- return the statements with any action to push the return
-- value to the stack removed, and provide the explicit return
-- address that the function should return to.
-- value to the stack removed, and provide the return address that
-- the function should return to.
, checkForReturnAddr :: forall ids
. RegState (ArchReg arch) (Value arch ids)
-> AbsProcessorState (ArchReg arch) ids
-> Bool
-- ^ @checkForReturnAddr regs s@ returns true if the location
-- where the return address is normally stored in regs when
-- calling a function does indeed contain the abstract value
-- associated with return addresses.
--
-- For x86 this checks if the address just above the stack is the
-- return address. For ARM, this should check the link register.
--
-- This predicate is invoked when considering if a potential tail call
-- is setup to return to the right location.
, identifyReturn :: forall ids
. [Stmt arch ids]
. Seq (Stmt arch ids)
-> RegState (ArchReg arch) (Value arch ids)
-> AbsProcessorState (ArchReg arch) ids
-> Maybe (Seq (Stmt arch ids))

View File

@ -55,6 +55,7 @@ module Data.Macaw.CFG.Core
, mkRegStateM
, traverseRegsWith
, zipWithRegState
, ppRegMap
-- * Pretty printing
, ppAssignId
, ppLit
@ -72,8 +73,6 @@ module Data.Macaw.CFG.Core
, asStackAddrOffset
-- * References
, refsInValue
, refsInApp
, refsInAssignRhs
-- ** Synonyms
, ArchAddrValue
, Data.Parameterized.TraversableFC.FoldableFC(..)
@ -650,13 +649,16 @@ class PrettyRegValue r (f :: Type -> Kind.Type) where
-- should be printed, and Nothing if the contents should be ignored.
ppValueEq :: r tp -> f tp -> Maybe Doc
instance ( PrettyRegValue r f
)
=> Pretty (RegState r f) where
pretty (RegState m) = bracketsep $ catMaybes (f <$> MapF.toList m)
where f :: MapF.Pair r f -> Maybe Doc
ppRegMap :: forall r v . PrettyRegValue r v => MapF.MapF r v -> Doc
ppRegMap m = bracketsep $ catMaybes (f <$> MapF.toList m)
where f :: MapF.Pair r v -> Maybe Doc
f (MapF.Pair r v) = ppValueEq r v
instance ( PrettyRegValue r f)
=> Pretty (RegState r f) where
pretty (RegState m) = ppRegMap m
instance ( PrettyRegValue r f
)
=> Show (RegState r f) where
@ -696,7 +698,7 @@ data Stmt arch ids
ppStmt :: ArchConstraints arch
=> (ArchAddrWord arch -> Doc)
-- ^ Function for pretty printing an offset
-- ^ Function for pretty printing an instruction address offset
-> Stmt arch ids
-> Doc
ppStmt ppOff stmt =
@ -706,7 +708,8 @@ ppStmt ppOff stmt =
InstructionStart off mnem -> text "#" <+> ppOff off <+> text (Text.unpack mnem)
Comment s -> text $ "# " ++ Text.unpack s
ExecArchStmt s -> ppArchStmt (ppValue 10) s
ArchState a m -> hang (length (show prefix)) (prefix PP.<> PP.semiBraces (MapF.foldrWithKey ppUpdate [] m))
ArchState a m ->
hang (length (show prefix)) (prefix PP.<> PP.semiBraces (MapF.foldrWithKey ppUpdate [] m))
where
ppAddr addr =
case asAbsoluteAddr addr of
@ -724,19 +727,3 @@ instance ArchConstraints arch => Show (Stmt arch ids) where
refsInValue :: Value arch ids tp -> Set (Some (AssignId ids))
refsInValue (AssignedValue (Assignment v _)) = Set.singleton (Some v)
refsInValue _ = Set.empty
refsInApp :: App (Value arch ids) tp -> Set (Some (AssignId ids))
refsInApp app = foldMapFC refsInValue app
refsInAssignRhs :: FoldableFC (ArchFn arch)
=> AssignRhs arch (Value arch ids) tp
-> Set (Some (AssignId ids))
refsInAssignRhs rhs =
case rhs of
EvalApp v -> refsInApp v
SetUndefined _ -> Set.empty
ReadMem v _ -> refsInValue v
CondReadMem _ c a d ->
Set.union (refsInValue c) $
Set.union (refsInValue a) (refsInValue d)
EvalArchFn f _ -> foldMapFC refsInValue f

View File

@ -9,6 +9,7 @@ This provides information about code discovered in binaries.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE StandaloneDeriving #-}
@ -18,6 +19,7 @@ module Data.Macaw.Discovery
( -- * DiscoveryInfo
State.DiscoveryState(..)
, State.emptyDiscoveryState
, State.trustedFunctionEntryPoints
, State.AddrSymMap
, State.funInfo
, State.exploredFunctions
@ -64,11 +66,15 @@ import Data.Foldable
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import Data.Maybe
import Data.Monoid
import Data.Parameterized.Classes
import qualified Data.Parameterized.Map as MapF
import Data.Parameterized.NatRepr
import Data.Parameterized.Nonce
import Data.Parameterized.Some
import Data.Parameterized.TraversableF
import Data.Sequence (Seq)
import qualified Data.Sequence as Seq
import Data.Set (Set)
import qualified Data.Set as Set
import qualified Data.Text as Text
@ -198,6 +204,8 @@ addStatementListDemands sl = do
case stmtsTerm sl of
ParsedCall regs _ -> do
traverseF_ addValueDemands regs
PLTStub regs _ _ ->
traverseF_ addValueDemands regs
ParsedJump regs _ -> do
traverseF_ addValueDemands regs
ParsedLookupTable regs _idx _tbl -> do
@ -642,8 +650,10 @@ resolveAsAbsoluteAddr :: forall w
-> Maybe (MemAddr w)
resolveAsAbsoluteAddr mem endianness l = addrWidthClass (memAddrWidth mem) $
case l of
[ByteRegion bs] -> do
absoluteAddr <$> addrRead endianness bs
[ByteRegion bs] ->
case addrRead endianness bs of
Just a -> pure $! absoluteAddr a
Nothing -> error $ "internal: resolveAsAbsoluteAddr given short chunk list."
[RelocationRegion r] -> do
when (relocationIsRel r) $ Nothing
case relocationSym r of
@ -654,6 +664,8 @@ resolveAsAbsoluteAddr mem endianness l = addrWidthClass (memAddrWidth mem) $
SegmentBaseAddr idx -> do
seg <- Map.lookup idx (memSegmentIndexMap mem)
pure $! segmentOffAddr seg (relocationOffset r)
LoadBaseAddr -> do
memBaseAddr mem
_ -> Nothing
-- This function resolves jump table entries.
@ -779,13 +791,15 @@ data ParseContext arch ids =
-- ^ Entry addresses for known functions (e.g. from
-- symbol information)
--
-- The discovery process will not create intra-procedural
-- jumps to the entry points of new functions.
-- The discovery process will not create
-- intra-procedural jumps to the entry points of new
-- functions.
, pctxFunAddr :: !(ArchSegmentOff arch)
-- ^ Address of function this block is being parsed as
, pctxAddr :: !(ArchSegmentOff arch)
-- ^ Address of the current block
, pctxBlockMap :: !(Map Word64 (Block arch ids))
-- ^ Map from block indices to block code at address.
}
addrMemRepr :: ArchitectureInfo arch -> MemRepr (BVType (RegAddrWidth (ArchReg arch)))
@ -825,67 +839,130 @@ addNewFunctionAddrs :: [ArchSegmentOff arch]
addNewFunctionAddrs addrs =
newFunctionAddrs %= (++addrs)
-- | @stripPLTRead assignId prev rest@ looks for a read of @assignId@
-- from the end of @prev@, and if it finds it returns the
-- concatenation of the instructione before the read in @prev@ and
-- @rest@.
--
-- The read may appear before comment and @instructionStart@
-- instructions, but otherwise must be at the end of prev
stripPLTRead :: ArchConstraints arch
=> AssignId ids tp -- ^ Identifier of write to remove
-> Seq (Stmt arch ids)
-> Seq (Stmt arch ids)
-> Maybe (Seq (Stmt arch ids))
stripPLTRead readId next rest =
case Seq.viewr next of
Seq.EmptyR -> Nothing
prev Seq.:> lastStmt -> do
let cont = stripPLTRead readId prev (lastStmt Seq.<| rest)
case lastStmt of
AssignStmt (Assignment stmtId rhs)
| Just Refl <- testEquality readId stmtId -> Just (prev Seq.>< rest)
-- Fail if the read to delete is used in later computations
| Set.member (Some readId) (foldMapFC refsInValue rhs) ->
Nothing
| otherwise ->
case rhs of
EvalApp{} -> cont
SetUndefined{} -> cont
_ -> Nothing
InstructionStart{} -> cont
ArchState{} -> cont
Comment{} -> cont
_ -> Nothing
removeUnassignedRegs :: forall arch ids
. RegisterInfo (ArchReg arch)
=> RegState (ArchReg arch) (Value arch ids)
-- ^ Initial register values
-> RegState (ArchReg arch) (Value arch ids)
-- ^ Final register values
-> MapF.MapF (ArchReg arch) (Value arch ids)
removeUnassignedRegs initRegs finalRegs =
let keepReg :: forall tp . ArchReg arch tp -> Value arch ids tp -> Bool
keepReg r finalVal
| Just Refl <- testEquality r ip_reg = False
| Just Refl <- testEquality initVal finalVal = False
| otherwise = True
where initVal = initRegs^.boundValue r
in MapF.filterWithKey keepReg (regStateMap finalRegs)
-- | Return true if any value in structure contains the given
-- identifier.
containsAssignId :: forall t arch ids itp
. FoldableF t
=> AssignId ids itp
-- ^ Forbidden assignment -- may not appear in terms.
-> t (Value arch ids)
-> Bool
containsAssignId droppedAssign =
let hasId :: forall tp . Value arch ids tp -> Any
hasId v = Any (Set.member (Some droppedAssign) (refsInValue v))
in getAny . foldMapF hasId
-- | This parses a block that ended with a fetch and execute instruction.
parseFetchAndExecute :: forall arch ids
. ParseContext arch ids
-> Word64
-- ^ Index of this block
-> [Stmt arch ids]
-> AbsProcessorState (ArchReg arch) ids
-- ^ Registers prior to blocks being executed.
-> RegState (ArchReg arch) (Value arch ids)
-- ^ Initial register values
-> Seq (Stmt arch ids)
-> AbsProcessorState (ArchReg arch) ids
-- ^ Abstract state of registers prior to blocks being executed.
-> RegState (ArchReg arch) (Value arch ids)
-- ^ Final register values
-> State (ParseState arch ids) (StatementList arch ids, Word64)
parseFetchAndExecute ctx idx stmts regs s = do
parseFetchAndExecute ctx idx initRegs stmts absProcState finalRegs = do
let mem = pctxMemory ctx
let ainfo= pctxArchInfo ctx
let absProcState' = absEvalStmts ainfo regs stmts
let ainfo = pctxArchInfo ctx
let absProcState' = absEvalStmts ainfo absProcState stmts
withArchConstraints ainfo $ do
-- See if next statement appears to end with a call.
-- We define calls as statements that end with a write that
-- stores the pc to an address.
-- Try to figure out what control flow statement we have.
case () of
_ | Just (Mux _ c t f) <- valueAsApp (s^.boundValue ip_reg) -> do
-- The block ends with a Mux, so we turn this into a `ParsedIte` statement.
_ | Just (Mux _ c t f) <- valueAsApp (finalRegs^.boundValue ip_reg) -> do
mapM_ (recordWriteStmt ainfo mem absProcState') stmts
let l_regs = refineProcStateBounds c True $
refineProcState c absTrue absProcState'
let l_regs' = absEvalStmts ainfo l_regs stmts
let lState = s & boundValue ip_reg .~ t
let lState = finalRegs & boundValue ip_reg .~ t
(tStmts,trueIdx) <-
parseFetchAndExecute ctx (idx+1) [] l_regs' lState
parseFetchAndExecute ctx (idx+1) initRegs Seq.empty l_regs' lState
let r_regs = refineProcStateBounds c False $
refineProcState c absFalse absProcState'
let r_regs' = absEvalStmts ainfo r_regs stmts
let rState = s & boundValue ip_reg .~ f
let rState = finalRegs & boundValue ip_reg .~ f
(fStmts,falseIdx) <-
parseFetchAndExecute ctx trueIdx [] r_regs' rState
parseFetchAndExecute ctx trueIdx initRegs Seq.empty r_regs' rState
let ret = StatementList { stmtsIdent = idx
, stmtsNonterm = stmts
, stmtsNonterm = toList stmts
, stmtsTerm = ParsedIte c tStmts fStmts
, stmtsAbsState = absProcState'
}
pure (ret, falseIdx)
-- The last statement was a call.
-- Use architecture-specific callback to check if last statement was a call.
-- Note that in some cases the call is known not to return, and thus
-- this code will never jump to the return value.
_ | Just (prev_stmts, ret) <- identifyCall ainfo mem stmts s -> do
_ | Just (prev_stmts, ret) <- identifyCall ainfo mem stmts finalRegs -> do
mapM_ (recordWriteStmt ainfo mem absProcState') prev_stmts
let abst = finalAbsBlockState absProcState' s
seq abst $ do
let abst = finalAbsBlockState absProcState' finalRegs
-- Merge caller return information
intraJumpTargets %= ((ret, postCallAbsState ainfo abst ret):)
seq abst $ intraJumpTargets %= ((ret, postCallAbsState ainfo abst ret):)
-- Use the abstract domain to look for new code pointers for the current IP.
addNewFunctionAddrs $
identifyCallTargets mem abst s
identifyCallTargets mem abst finalRegs
-- Use the call-specific code to look for new IPs.
let r = StatementList { stmtsIdent = idx
, stmtsNonterm = toList stmts
, stmtsTerm = ParsedCall s (Just ret)
, stmtsTerm = ParsedCall finalRegs (Just ret)
, stmtsAbsState = absProcState'
}
pure (r, idx+1)
@ -901,19 +978,19 @@ parseFetchAndExecute ctx idx stmts regs s = do
-- (e.g. ARM will clear the low bit in T32 mode or the low 2
-- bits in A32 mode), so the actual detection process is
-- deferred to architecture-specific functionality.
| Just prev_stmts <- identifyReturn ainfo stmts s absProcState' -> do
| Just prev_stmts <- identifyReturn ainfo stmts finalRegs absProcState' -> do
mapM_ (recordWriteStmt ainfo mem absProcState') prev_stmts
let ret = StatementList { stmtsIdent = idx
, stmtsNonterm = toList prev_stmts
, stmtsTerm = ParsedReturn s
, stmtsTerm = ParsedReturn finalRegs
, stmtsAbsState = absProcState'
}
pure (ret, idx+1)
-- Jump to a block within this function.
| Just tgt_mseg <- valueAsSegmentOff mem (s^.boundValue ip_reg)
-- Check
| Just tgt_mseg <- valueAsSegmentOff mem (finalRegs^.boundValue ip_reg)
-- Check target block address is in executable segment.
, segmentFlags (segoffSegment tgt_mseg) `Perm.hasPerm` Perm.execute
-- Check the target address is not the entry point of this function.
@ -926,22 +1003,23 @@ parseFetchAndExecute ctx idx stmts regs s = do
mapM_ (recordWriteStmt ainfo mem absProcState') stmts
-- Merge block state and add intra jump target.
let abst = finalAbsBlockState absProcState' s
let abst = finalAbsBlockState absProcState' finalRegs
let abst' = abst & setAbsIP tgt_mseg
intraJumpTargets %= ((tgt_mseg, abst'):)
let ret = StatementList { stmtsIdent = idx
, stmtsNonterm = stmts
, stmtsTerm = ParsedJump s tgt_mseg
, stmtsNonterm = toList stmts
, stmtsTerm = ParsedJump finalRegs tgt_mseg
, stmtsAbsState = absProcState'
}
pure (ret, idx+1)
-- Block ends with what looks like a jump table.
| Just (_jt, entries, jumpIndex) <- matchJumpTableRef mem absProcState' (s^.curIP) -> do
| Just (_jt, entries, jumpIndex) <- matchJumpTableRef mem absProcState' (finalRegs^.curIP) -> do
mapM_ (recordWriteStmt ainfo mem absProcState') stmts
let abst :: AbsBlockState (ArchReg arch)
abst = finalAbsBlockState absProcState' s
abst = finalAbsBlockState absProcState' finalRegs
seq abst $ do
@ -949,35 +1027,51 @@ parseFetchAndExecute ctx idx stmts regs s = do
let abst' = abst & setAbsIP tgtAddr
intraJumpTargets %= ((tgtAddr, abst'):)
let term = ParsedLookupTable s jumpIndex entries
let term = ParsedLookupTable finalRegs jumpIndex entries
let ret = StatementList { stmtsIdent = idx
, stmtsNonterm = stmts
, stmtsNonterm = toList stmts
, stmtsTerm = term
, stmtsAbsState = absProcState'
}
pure (ret,idx+1)
-- Check for tail call when the stack pointer points to the return address.
--
-- TODO: this makes sense for x86, but is not correct for all architectures
| ptrType <- addrMemRepr ainfo
, sp_val <- s^.boundValue sp_reg
, ReturnAddr <- absEvalReadMem absProcState' sp_val ptrType -> do
finishWithTailCall absProcState'
-- Code for PLT entry
_ | 0 <- idx
, AssignedValue (Assignment valId v) <- finalRegs^.boundValue ip_reg
, ReadMem gotVal _repr <- v
, Just gotAddr <- valueAsMemAddr gotVal
, Just gotSegOff <- asSegmentOff mem gotAddr
, Right chunks <- segoffContentsAfter gotSegOff
, RelocationRegion r:_ <- chunks
-- Check the relocation is a jump slot.
, relocationJumpSlot r
, Just strippedStmts <- stripPLTRead valId stmts Seq.empty
, strippedRegs <- removeUnassignedRegs initRegs finalRegs
, not (containsAssignId valId strippedRegs) -> do
-- Is this a jump to a known function entry? We're already past the
-- "identifyCall" case, so this must be a tail call, assuming we trust our
-- known function entry info.
| Just tgt_mseg <- valueAsSegmentOff mem (s^.boundValue ip_reg)
, tgt_mseg `Set.member` pctxKnownFnEntries ctx -> do
mapM_ (recordWriteStmt ainfo mem absProcState') strippedStmts
let ret = StatementList { stmtsIdent = idx
, stmtsNonterm = toList strippedStmts
, stmtsTerm = PLTStub strippedRegs gotSegOff r
, stmtsAbsState = absEvalStmts ainfo absProcState strippedStmts
}
pure (ret, idx+1)
-- Check for tail call when the calling convention seems to be satisfied.
| spVal <- finalRegs^.boundValue sp_reg
-- Check to see if the stack pointer points to an offset of the initial stack.
, StackOffset _ offsets <- transferValue absProcState' spVal
-- Stack stack is back to height when function was called.
, offsets == Set.singleton 0
, checkForReturnAddr ainfo finalRegs absProcState' -> do
finishWithTailCall absProcState'
-- Block that ends with some unknown
| otherwise -> do
mapM_ (recordWriteStmt ainfo mem absProcState') stmts
let ret = StatementList { stmtsIdent = idx
, stmtsNonterm = stmts
, stmtsTerm = ClassifyFailure s
, stmtsNonterm = toList stmts
, stmtsTerm = ClassifyFailure finalRegs
, stmtsAbsState = absProcState'
}
pure (ret,idx+1)
@ -990,16 +1084,15 @@ parseFetchAndExecute ctx idx stmts regs s = do
mapM_ (recordWriteStmt (pctxArchInfo ctx) mem absProcState') stmts
-- Compute final state
let abst = finalAbsBlockState absProcState' s
let abst = finalAbsBlockState absProcState' finalRegs
seq abst $ do
-- Look for new instruction pointers
addNewFunctionAddrs $
identifyConcreteAddresses mem (abst^.absRegState^.curIP)
let ret = StatementList { stmtsIdent = idx
, stmtsNonterm = stmts
, stmtsTerm = ParsedCall s Nothing
, stmtsNonterm = toList stmts
, stmtsTerm = ParsedCall finalRegs Nothing
, stmtsAbsState = absProcState'
}
seq ret $ pure (ret,idx+1)
@ -1010,12 +1103,14 @@ parseBlock :: ParseContext arch ids
-- ^ Context for parsing blocks.
-> Word64
-- ^ Index for next statements
-> RegState (ArchReg arch) (Value arch ids)
-- ^ Initial register values
-> Block arch ids
-- ^ Block to parse
-> AbsProcessorState (ArchReg arch) ids
-- ^ Abstract state at start of block
-> State (ParseState arch ids) (StatementList arch ids, Word64)
parseBlock ctx idx b regs = do
parseBlock ctx idx initRegs b absProcState = do
let mem = pctxMemory ctx
let ainfo = pctxArchInfo ctx
withArchConstraints ainfo $ do
@ -1023,7 +1118,7 @@ parseBlock ctx idx b regs = do
Branch c lb rb -> do
let blockMap = pctxBlockMap ctx
-- FIXME: we should propagate c back to the initial block, not just b
let absProcState' = absEvalStmts ainfo regs (blockStmts b)
let absProcState' = absEvalStmts ainfo absProcState (blockStmts b)
mapM_ (recordWriteStmt ainfo mem absProcState') (blockStmts b)
let Just l = Map.lookup lb blockMap
@ -1034,8 +1129,8 @@ parseBlock ctx idx b regs = do
let l_regs' = absEvalStmts ainfo l_regs (blockStmts b)
let r_regs' = absEvalStmts ainfo r_regs (blockStmts b)
(parsedTrueBlock,trueIdx) <- parseBlock ctx (idx+1) l l_regs'
(parsedFalseBlock,falseIdx) <- parseBlock ctx trueIdx r r_regs'
(parsedTrueBlock,trueIdx) <- parseBlock ctx (idx+1) initRegs l l_regs'
(parsedFalseBlock,falseIdx) <- parseBlock ctx trueIdx initRegs r r_regs'
let ret = StatementList { stmtsIdent = idx
, stmtsNonterm = blockStmts b
@ -1044,13 +1139,13 @@ parseBlock ctx idx b regs = do
}
pure (ret, falseIdx)
FetchAndExecute s -> do
parseFetchAndExecute ctx idx (blockStmts b) regs s
FetchAndExecute finalRegs -> do
parseFetchAndExecute ctx idx initRegs (Seq.fromList (blockStmts b)) absProcState finalRegs
-- Do nothing when this block ends in a translation error.
TranslateError _ msg -> do
-- FIXME: we should propagate c back to the initial block, not just b
let absProcState' = absEvalStmts ainfo regs (blockStmts b)
let absProcState' = absEvalStmts ainfo absProcState (blockStmts b)
let ret = StatementList { stmtsIdent = idx
, stmtsNonterm = blockStmts b
@ -1060,7 +1155,7 @@ parseBlock ctx idx b regs = do
pure (ret, idx+1)
ArchTermStmt ts s -> do
-- FIXME: we should propagate c back to the initial block, not just b
let absProcState' = absEvalStmts ainfo regs (blockStmts b)
let absProcState' = absEvalStmts ainfo absProcState (blockStmts b)
mapM_ (recordWriteStmt ainfo mem absProcState') (blockStmts b)
let abst = finalAbsBlockState absProcState' s
-- Compute possible next IPS.
@ -1082,12 +1177,13 @@ addBlocks :: ArchSegmentOff arch
-- ^ Address of theze blocks
-> FoundAddr arch
-- ^ State leading to explore block
-> RegState (ArchReg arch) (Value arch ids)
-> Int
-- ^ Number of blocks covered
-> Map Word64 (Block arch ids)
-- ^ Map from labelIndex to associated block
-> FunM arch s ids ()
addBlocks src finfo sz blockMap =
addBlocks src finfo initRegs sz blockMap =
case Map.lookup 0 blockMap of
Nothing -> do
error $ "addBlocks given empty blockRegion."
@ -1097,19 +1193,9 @@ addBlocks src finfo sz blockMap =
funAddr <- gets curFunAddr
s <- use curFunCtx
-- Combine entries of functions we've discovered thus far with
-- undiscovered functions with entries marked InitAddr, which we assume is
-- info we know from the symbol table or some other reliable source, and
-- pass in. Only used in analysis if pctxTrustKnownFns is True.
let knownFns =
if s^.trustKnownFns then
Set.union (Map.keysSet $ s^.funInfo)
(Map.keysSet $ Map.filter (== InitAddr) $ s^.unexploredFunctions)
else
Set.empty
let ctx = ParseContext { pctxMemory = memory s
, pctxArchInfo = archInfo s
, pctxKnownFnEntries = knownFns
, pctxKnownFnEntries = s^.trustedFunctionEntryPoints
, pctxFunAddr = funAddr
, pctxAddr = src
, pctxBlockMap = blockMap
@ -1118,7 +1204,7 @@ addBlocks src finfo sz blockMap =
, _intraJumpTargets = []
, _newFunctionAddrs = []
}
let ((pblock,_), ps) = runState (parseBlock ctx 0 b regs) ps0
let ((pblock,_), ps) = runState (parseBlock ctx 0 initRegs b regs) ps0
let pb = ParsedBlock { pblockAddr = src
, blockSize = sz
, blockReason = foundReason finfo
@ -1131,6 +1217,26 @@ addBlocks src finfo sz blockMap =
. markAddrsAsFunction (CallTarget src) (ps^.newFunctionAddrs)
mapM_ (\(addr, abs_state) -> mergeIntraJump src abs_state addr) (ps^.intraJumpTargets)
-- | Record an error block with no statements for the given address.
recordErrorBlock :: ArchSegmentOff arch -> FoundAddr arch -> Maybe String -> FunM arch s ids ()
recordErrorBlock addr finfo maybeError = do
s <- use curFunCtx
let mem = memory s
let errMsg = maybe "Unknown error" Text.pack maybeError
let stmts = StatementList
{ stmtsIdent = 0
, stmtsNonterm = []
, stmtsTerm = ParsedTranslateError errMsg
, stmtsAbsState = initAbsProcessorState mem (foundAbstractState finfo)
}
let pb = ParsedBlock { pblockAddr = addr
, blockSize = 0
, blockReason = foundReason finfo
, blockAbstractState = foundAbstractState finfo
, blockStatementList = stmts
}
id %= addFunBlock addr pb
transfer :: ArchSegmentOff arch -> FunM arch s ids ()
transfer addr = do
s <- use curFunCtx
@ -1149,8 +1255,16 @@ transfer addr = do
Just (next,_) | Just o <- diffSegmentOff next addr -> fromInteger o
_ -> fromInteger (segoffBytesLeft addr)
let ab = foundAbstractState finfo
(bs0, sz, maybeError) <- liftST $ disassembleFn ainfo nonceGen addr maxSize ab
case mkInitialRegsForBlock ainfo addr ab of
Left msg -> do
recordErrorBlock addr finfo (Just msg)
Right initRegs -> do
(bs0, sz, maybeError) <- liftST $ disassembleFn ainfo nonceGen addr initRegs maxSize
-- If no blocks are returned, then we just add an empty parsed block.
if null bs0 then do
recordErrorBlock addr finfo maybeError
else do
-- Rewrite returned blocks to simplify expressions
#ifdef USE_REWRITER
bs1 <- do
let archStmt = rewriteArchStmt ainfo
@ -1161,31 +1275,11 @@ transfer addr = do
#else
bs1 <- pure bs0
#endif
-- If no blocks are returned, then we just add an empty parsed block.
if null bs1 then do
let errMsg = Text.pack $ fromMaybe "Unknown error" maybeError
let stmts = StatementList
{ stmtsIdent = 0
, stmtsNonterm = []
, stmtsTerm = ParsedTranslateError errMsg
, stmtsAbsState = initAbsProcessorState mem (foundAbstractState finfo)
}
let pb = ParsedBlock { pblockAddr = addr
, blockSize = sz
, blockReason = foundReason finfo
, blockAbstractState = foundAbstractState finfo
, blockStatementList = stmts
}
id %= addFunBlock addr pb
else do
-- Rewrite returned blocks to simplify expressions
-- Compute demand set
let bs = bs1 -- eliminateDeadStmts ainfo bs1
-- Call transfer blocks to calculate parsedblocks
let blockMap = Map.fromList [ (blockLabel b, b) | b <- bs ]
addBlocks addr finfo sz blockMap
addBlocks addr finfo initRegs sz blockMap
------------------------------------------------------------------------
-- Main loop
@ -1347,8 +1441,8 @@ cfgFromAddrs ::
--
-- Each entry contains an address and the value stored in it.
-> DiscoveryState arch
cfgFromAddrs ainfo mem symbols =
cfgFromAddrsAndState (emptyDiscoveryState mem symbols ainfo)
cfgFromAddrs ainfo mem addrSymMap =
cfgFromAddrsAndState (emptyDiscoveryState mem addrSymMap ainfo)
------------------------------------------------------------------------
-- Resolve functions with logging

View File

@ -88,8 +88,9 @@ absEvalStmt info stmt = withArchConstraints info $
pure ()
-- This takes a processor state and updates it based on executing each statement.
absEvalStmts :: ArchitectureInfo arch
absEvalStmts :: Foldable t
=> ArchitectureInfo arch
-> AbsProcessorState (ArchReg arch) ids
-> [Stmt arch ids]
-> t (Stmt arch ids)
-> AbsProcessorState (ArchReg arch) ids
absEvalStmts info r stmts = execState (mapM_ (absEvalStmt info) stmts) r

View File

@ -10,6 +10,7 @@ discovery.
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
@ -33,7 +34,7 @@ module Data.Macaw.Discovery.State
, globalDataMap
, funInfo
, unexploredFunctions
, trustKnownFns
, trustedFunctionEntryPoints
, exploreFnPred
-- * DiscoveryFunInfo
, DiscoveryFunInfo(..)
@ -50,7 +51,10 @@ import qualified Data.ByteString.Char8 as BSC
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import Data.Parameterized.Classes
import qualified Data.Parameterized.Map as MapF
import Data.Parameterized.Some
import Data.Set (Set)
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.Vector as V
@ -131,6 +135,18 @@ data ParsedTermStmt arch ids
-- | A call with the current register values and location to return to or 'Nothing' if this is a tail call.
= ParsedCall !(RegState (ArchReg arch) (Value arch ids))
!(Maybe (ArchSegmentOff arch))
-- | @PLTStub regs addr@ denotes a terminal statement that has been identified as a PLT stub
-- for calling the given relocation.
--
-- This is a special case of a tail call. It has been added
-- separately because it occurs frequently in dynamically linked
-- code, and we can use this to recognize PLT stubs.
--
-- The register set only contains registers that were changed in
-- the function. Other registers have the initial value.
| PLTStub !(MapF.MapF (ArchReg arch) (Value arch ids))
!(ArchSegmentOff arch)
!(Relocation (ArchAddrWidth arch))
-- | A jump to an explicit address within a function.
| ParsedJump !(RegState (ArchReg arch) (Value arch ids)) !(ArchSegmentOff arch)
-- | A lookup table that branches to one of a vector of addresses.
@ -172,11 +188,14 @@ ppTermStmt :: ArchConstraints arch
ppTermStmt ppOff tstmt =
case tstmt of
ParsedCall s Nothing ->
text "tail call" <$$>
text "tail_call" <$$>
indent 2 (pretty s)
ParsedCall s (Just next) ->
text "call and return to" <+> text (show next) <$$>
indent 2 (pretty s)
PLTStub regs addr r ->
text "call_via_got" <+> text (show (relocationSym r)) <+> "(at" <+> text (show addr) PP.<> ")" <$$>
indent 2 (ppRegMap regs)
ParsedJump s addr ->
text "jump" <+> text (show addr) <$$>
indent 2 (pretty s)
@ -279,7 +298,7 @@ parsedBlocks = lens _parsedBlocks (\s v -> s { _parsedBlocks = v })
instance ArchConstraints arch => Pretty (DiscoveryFunInfo arch ids) where
pretty info =
text "function" <+> text (BSC.unpack (discoveredFunName info))
<+> pretty "@" <+> pretty (show (discoveredFunAddr info))
<+> "@" <+> pretty (show (discoveredFunAddr info))
<$$>
vcat (pretty <$> Map.elems (info^.parsedBlocks))
@ -310,9 +329,20 @@ data DiscoveryState arch
-- they are analyzed.
--
-- The keys in this map and `_funInfo` should be mutually disjoint.
, _trustKnownFns :: !Bool
-- ^ Should we use and depend on known function entries in
-- our analysis? E.g. used to distinguish jumps vs. tail calls
, _trustedFunctionEntryPoints :: !(Set (ArchSegmentOff arch))
-- ^ This is the set of addresses that we treat
-- as definitely belonging to function entry
-- points.
--
-- The discovery process will not allow
-- intra-procedural jumps to these addresses.
-- Jumps to these addresses must either be calls
-- or tail calls.
--
-- To ensure translation is invariant on the
-- order in which functions are visited, this
-- set should be initialized upfront, and not
-- changed.
, _exploreFnPred :: Maybe (ArchSegmentOff arch -> Bool)
-- ^ if present, this predicate decides whether to explore
-- a function at the given address or not
@ -342,15 +372,15 @@ emptyDiscoveryState :: Memory (ArchAddrWidth arch)
-> ArchitectureInfo arch
-- ^ architecture/OS specific information
-> DiscoveryState arch
emptyDiscoveryState mem symbols info =
emptyDiscoveryState mem addrSymMap info =
DiscoveryState
{ memory = mem
, symbolNames = symbols
, symbolNames = addrSymMap
, archInfo = info
, _globalDataMap = Map.empty
, _funInfo = Map.empty
, _unexploredFunctions = Map.empty
, _trustKnownFns = False
, _trustedFunctionEntryPoints = Map.keysSet addrSymMap
, _exploreFnPred = Nothing
}
@ -368,8 +398,10 @@ unexploredFunctions = lens _unexploredFunctions (\s v -> s { _unexploredFunction
funInfo :: Simple Lens (DiscoveryState arch) (Map (ArchSegmentOff arch) (Some (DiscoveryFunInfo arch)))
funInfo = lens _funInfo (\s v -> s { _funInfo = v })
trustKnownFns :: Simple Lens (DiscoveryState arch) Bool
trustKnownFns = lens _trustKnownFns (\s v -> s { _trustKnownFns = v })
trustedFunctionEntryPoints :: Simple Lens (DiscoveryState arch) (Set (ArchSegmentOff arch))
trustedFunctionEntryPoints =
lens _trustedFunctionEntryPoints
(\s v -> s { _trustedFunctionEntryPoints = v })
exploreFnPred :: Simple Lens (DiscoveryState arch) (Maybe (ArchSegmentOff arch -> Bool))
exploreFnPred = lens _exploreFnPred (\s v -> s { _exploreFnPred = v })

View File

@ -53,8 +53,14 @@ emptyValueFold =
-- | This folds over elements of a values in a values.
--
-- It memoizes values so that it only evaluates assignments with the same id
-- once.
-- It memoizes the results so that if an assignment is visited
-- multiple times, we only visit the children the first time it is
-- visited. On subsequent visits, `foldAssign` will still be called,
-- but the children will not be revisited.
--
-- This makes the total time to visit linear with respect to the
-- number of children, but still allows determining whether a term is
-- shared.
foldValueCached :: forall r arch ids tp
. (Monoid r, FoldableFC (ArchFn arch))
=> ValueFold arch ids r

View File

@ -19,15 +19,18 @@ module Data.Macaw.Memory
, memAddrWidth
, memWidth
, memSegments
, memSectionIndexMap
, memSegmentIndexMap
, memAsAddrPairs
-- * Constructing memory
, emptyMemory
, insertMemSegment
, InsertError(..)
, showInsertError
-- * Load values
, memBaseAddr
, memSetBaseAddr
, memBindSectionIndex
, memSectionIndexMap
, memSegmentIndexMap
, memBindSegmentIndex
-- * Memory segments
, MemSegment
@ -52,6 +55,7 @@ module Data.Macaw.Memory
, memWord
, memWordToUnsigned
, memWordToSigned
, addrRead
-- * Addresses
, MemAddr(..)
, absoluteAddr
@ -272,7 +276,7 @@ bsWord64 LittleEndian = bsWord64le
--
-- Operations on it require the `MemWidth` constraint to be satisfied, so in practice
-- this only works for 32 and 64-bit values.
newtype MemWord (w :: Nat) = MemWord { _memWordValue :: Word64 }
newtype MemWord (w :: Nat) = MemWord { memWordValue :: Word64 }
-- | Convert word64 @x@ into mem word @x mod 2^w-1@.
memWord :: forall w . MemWidth w => Word64 -> MemWord w
@ -314,14 +318,21 @@ class (1 <= w) => MemWidth w where
-- | Rotates the value by the given index.
addrRotate :: MemWord w -> Int -> MemWord w
-- | Read an address with the given endianess.
--
-- This returns nothing if the bytestring is too short.
addrRead :: Endianness -> BS.ByteString -> Maybe (MemWord w)
-- | Read an address with the given endianess.
--
-- This returns nothing if the bytestring is too short.
addrRead :: forall w . MemWidth w => Endianness -> BS.ByteString -> Maybe (MemWord w)
addrRead e s =
case addrWidthRepr (Proxy :: Proxy w) of
Addr32 | BS.length s < 4 -> Nothing
| otherwise -> Just $ MemWord $ fromIntegral $ bsWord32 e s
Addr64 | BS.length s < 8 -> Nothing
| otherwise -> Just $ MemWord $ bsWord64 e s
-- | Return the value represented by the MemWord as an unsigned integer.
memWordToUnsigned :: MemWord w -> Integer
memWordToUnsigned = fromIntegral . _memWordValue
memWordToUnsigned = fromIntegral . memWordValue
-- | Treat the word as a signed integer.
memWordToSigned :: MemWidth w => MemWord w -> Integer
@ -388,18 +399,12 @@ instance MemWidth 32 where
addrRotate (MemWord w) i =
MemWord (fromIntegral ((fromIntegral w :: Word32) `rotate` i))
addrSize _ = 4
addrRead e s
| BS.length s < 4 = Nothing
| otherwise = Just $ MemWord $ fromIntegral $ bsWord32 e s
instance MemWidth 64 where
addrWidthRepr _ = Addr64
addrWidthMask _ = 0xffffffffffffffff
addrRotate (MemWord w) i = MemWord (w `rotate` i)
addrSize _ = 8
addrRead e s
| BS.length s < 8 = Nothing
| otherwise = Just $ MemWord $ bsWord64 e s
-- | Number of bytes in an address
addrWidthClass :: AddrWidthRepr w -> (MemWidth w => a) -> a
@ -471,6 +476,15 @@ data Relocation w
, relocationEndianness :: !Endianness
-- ^ The byte order used to encode the relocation in
-- memory.
, relocationJumpSlot :: !Bool
-- ^ Returns true if this is a jump slot relocation.
--
-- This relocation is specifically used for global
-- offset table entries, and are typically resolved
-- when the function is first called rather than at
-- load time. The address will be initially the
-- entry sequence stub, and will be updated once
-- resolved by the stub.
}
-- | Short encoding of endianness for relocation pretty printing
@ -817,6 +831,8 @@ data Memory w = Memory { memAddrWidth :: !(AddrWidthRepr w)
-- ^ Map from registered section indices to the segment offset it is loaded at.
, memSegmentIndexMap :: !(Map SegmentIndex (MemSegment w))
-- ^ Map from registered segment indices to associated segment.
, memBaseAddr :: !(Maybe (MemAddr w))
-- ^ This denotes the base region for loads.
}
-- | Return the set of memory segments in memory.
@ -846,12 +862,17 @@ memBindSegmentIndex idx seg mem
| otherwise =
mem { memSegmentIndexMap = Map.insert idx seg (memSegmentIndexMap mem) }
-- | Set the region index used or the load addresses.
memSetBaseAddr :: MemAddr w -> Memory w -> Memory w
memSetBaseAddr r m = m { memBaseAddr = Just r }
-- | A memory with no segments.
emptyMemory :: AddrWidthRepr w -> Memory w
emptyMemory w = Memory { memAddrWidth = w
, memSegmentMap = Map.empty
, memSectionIndexMap = Map.empty
, memSegmentIndexMap = Map.empty
, memBaseAddr = Nothing
}
-- | Return segments with executable permissions.
@ -945,7 +966,7 @@ addrLeastBit (MemAddr _ (MemWord off)) = off `testBit` 0
-- | Increment an address by a fixed amount.
incAddr :: MemWidth w => Integer -> MemAddr w -> MemAddr w
incAddr o (MemAddr i off) = MemAddr i (off + fromInteger o)
incAddr o a = a { addrOffset = addrOffset a + fromInteger o }
-- | Returns the number of bytes between two addresses if they point to
-- the same region and `Nothing` if they are different segments.
@ -979,10 +1000,9 @@ resolveAddr :: Memory w -> RegionIndex -> MemWord w -> Maybe (MemSegmentOff w)
resolveAddr = resolveRegionOff
{-# DEPRECATED resolveAddr "Use resolveRegionOff" #-}
-- | Return the address of a segment offset.
segoffAddr :: MemWidth w => MemSegmentOff w -> MemAddr w
segoffAddr (MemSegmentOff seg off) = relativeAddr seg off
segoffAddr (MemSegmentOff seg off) = segmentOffAddr seg off
-- | Return the segment associated with the given address if well-defined.
resolveAbsoluteAddr :: Memory w -> MemWord w -> Maybe (MemSegmentOff w)
@ -1336,40 +1356,46 @@ addrContentsAfter mem addr = do
addrWidthClass (memAddrWidth mem) $
segoffContentsAfter =<< resolveMemAddr mem addr
-- | Read a bytestring from a sequence of statements.
--
-- This is a helper method for @readByteString@ below.
readByteString' :: MemWidth w
=> MemSegmentOff w
-- ^ Initial starting address
-> [BS.ByteString]
-- ^ Bytestring read so far (in reverse order)
-> [MemChunk w]
-- ^ Remaining segments to read from.
-- | Attempt to read a bytestring of the given length
readByteString' :: RegionIndex
-- ^ Region we are in.
-> BS.ByteString
-- ^ Bytestring read so far.
-> Word64
-- ^ Number of bytes remaining to read.
-> Either (MemoryError w) [BS.ByteString]
readByteString' _ prev _ 0 =
-- ^ Bytes read so far.
-> [MemChunk w]
-- ^ Remaining memory chunks to read from.
-> Word64
-- ^ Total remaining number of bytes to read.
-> Either (MemoryError w) BS.ByteString
readByteString' _ prev _ _ 0 =
pure $! prev
readByteString' _ _ [] _ = error "internal: readByteString' given too many bytes."
readByteString' initAddr prev (ByteRegion bs:rest) cnt =
if toInteger cnt <= toInteger (BS.length bs) then
pure $! BS.take (fromIntegral cnt) bs : prev
readByteString' _ _ _ [] _ = error "internal: readByteString' given too many bytes."
readByteString' reg prev off (ByteRegion bs:rest) cnt = do
let sz = fromIntegral (BS.length bs)
if cnt <= sz then
pure $! prev <> BS.take (fromIntegral cnt) bs
else do
let cnt' = cnt - fromIntegral (BS.length bs)
readByteString' initAddr (bs:prev) rest cnt'
readByteString' initAddr prev (RelocationRegion r:_) _ = do
let cnt = sum (toInteger . BS.length <$> prev)
let addr = incAddr cnt (relativeSegmentAddr initAddr)
let off' = off + sz
let cnt' = cnt - sz
seq cnt' $ seq off' $ readByteString' reg (prev <> bs) off' rest cnt'
readByteString' reg _ off (RelocationRegion r:_) _ = do
let addr = MemAddr { addrBase = reg, addrOffset = MemWord off }
Left $! UnexpectedRelocation addr r
readByteString' initAddr prev (BSSRegion sz:rest) cnt =
if toInteger cnt <= toInteger sz then
pure $! BS.replicate (fromIntegral cnt) 0 : prev
readByteString' reg prev off (BSSRegion sz0:rest) cnt = do
let sz :: Word64
sz = memWordValue sz0
if cnt <= sz then do
when (cnt > fromIntegral (maxBound :: Int)) $ do
error $ "Illegal size " ++ show cnt
pure $! prev <> BS.replicate (fromIntegral cnt) 0
else do
let cnt' = cnt - fromIntegral sz
let next = BS.replicate (fromIntegral sz) 0 : prev
seq cnt' $ seq next $
readByteString' initAddr next rest cnt'
when (sz > fromIntegral (maxBound :: Int)) $ do
error $ "Illegal size " ++ show cnt
let bs = BS.replicate (fromIntegral sz) 0
let off' = off + sz
let cnt' = cnt - sz
seq cnt' $ seq off' $ readByteString' reg (prev <> bs) off' rest cnt'
-- | Attempt to read a bytestring of the given length
readByteString :: Memory w
@ -1383,7 +1409,7 @@ readByteString mem addr cnt = addrWidthClass (memAddrWidth mem) $ do
Left $! InvalidRead segOff cnt
-- Get contents after segment
l <- segoffContentsAfter segOff
mconcat . reverse <$> readByteString' segOff [] l cnt
readByteString' (addrBase addr) BS.empty (memWordValue (addrOffset addr)) l cnt
-- | Read an address from the value in the segment or report a memory
-- error.
@ -1454,7 +1480,7 @@ relativeSegmentContents memSegs = concatMap relativeOffset memSegs
-- Each MemSegment has a segmentOffset indicating the offset from segmentBase its located.
-- This makes the offsets within the MemChunk relative to that segmentOffset.
relativeOffset :: (MemWidth w) => MemSegment w -> [(MemAddr w, MemChunk w)]
relativeOffset seg = map (\(contentOffset,r) -> (relativeAddr seg contentOffset, r)) $ (contentsRanges . segmentContents) seg
relativeOffset seg = map (\(contentOffset,r) -> (segmentOffAddr seg contentOffset, r)) $ (contentsRanges . segmentContents) seg
-- | Naive string matching algorithm identifies matches to given
-- pattern within the list of memory segments and their corresponding

View File

@ -182,7 +182,11 @@ data MemLoadWarning
| ExpectedSectionSymbolLocal
| InvalidSectionSymbolIndex !Elf.ElfSectionIndex
| UnsupportedProcessorSpecificSymbolIndex !SymbolName !ElfSectionIndex
| IgnoreRelocation !RelocationError
| IgnoreRelocation !Integer !String !RelocationError
-- ^ @IgnoreRelocation idx tp err@ warns we ignored the location at index @idx@ due to @err@.
--
-- @tp@ is a string representing the type which we print, because usually errors come because
-- we don't support that type or only partially implement it.
ppSymbol :: SymbolName -> String
ppSymbol "" = "unnamed symbol"
@ -227,8 +231,8 @@ instance Show MemLoadWarning where
"Expected section symbol to have a valid index instead of " ++ show idx ++ "."
show (UnsupportedProcessorSpecificSymbolIndex nm idx) =
"Could not resolve symbol index " ++ show idx ++ " for symbol " ++ BSC.unpack nm ++ "."
show (IgnoreRelocation err) =
"Ignoring relocation: " ++ show err
show (IgnoreRelocation idx typeName err) =
"Ignoring relocation " ++ show idx ++ " with type " ++ typeName ++ ": " ++ show err
data MemLoaderState w = MLS { _mlsMemory :: !(Memory w)
, mlsEndianness :: !Endianness
@ -386,6 +390,7 @@ relaTargetX86_64 _ symtab rel off _isRel =
, relocationSize = 8
, relocationIsSigned = False
, relocationEndianness = LittleEndian
, relocationJumpSlot = True
}
Elf.R_X86_64_PC32 -> do
sym <- resolveRelocationSym symtab (Elf.relSym rel)
@ -395,6 +400,7 @@ relaTargetX86_64 _ symtab rel off _isRel =
, relocationSize = 4
, relocationIsSigned = False
, relocationEndianness = LittleEndian
, relocationJumpSlot = False
}
Elf.R_X86_64_32 -> do
sym <- resolveRelocationSym symtab (Elf.relSym rel)
@ -404,6 +410,7 @@ relaTargetX86_64 _ symtab rel off _isRel =
, relocationSize = 4
, relocationIsSigned = False
, relocationEndianness = LittleEndian
, relocationJumpSlot = False
}
Elf.R_X86_64_32S -> do
sym <- resolveRelocationSym symtab (Elf.relSym rel)
@ -413,6 +420,7 @@ relaTargetX86_64 _ symtab rel off _isRel =
, relocationSize = 4
, relocationIsSigned = True
, relocationEndianness = LittleEndian
, relocationJumpSlot = False
}
Elf.R_X86_64_64 -> do
sym <- resolveRelocationSym symtab (Elf.relSym rel)
@ -422,6 +430,7 @@ relaTargetX86_64 _ symtab rel off _isRel =
, relocationSize = 8
, relocationIsSigned = False
, relocationEndianness = LittleEndian
, relocationJumpSlot = False
}
-- R_X86_64_GLOB_DAT are used to update GOT entries with their
-- target address. They are similar to R_x86_64_64 except appear
@ -435,6 +444,19 @@ relaTargetX86_64 _ symtab rel off _isRel =
, relocationSize = 8
, relocationIsSigned = False
, relocationEndianness = LittleEndian
, relocationJumpSlot = False
}
Elf.R_X86_64_RELATIVE -> do
when (Elf.relSym rel /= 0) $ do
Left $ RelocationBadSymbolIndex (fromIntegral (Elf.relSym rel))
pure $ Relocation { relocationSym = LoadBaseAddr
, relocationOffset = off
, relocationIsRel = False
, relocationSize = 8
, relocationIsSigned = False
, relocationEndianness = LittleEndian
, relocationJumpSlot = False
}
-- Jhx Note. These will be needed to support thread local variables.
@ -468,6 +490,7 @@ relaTargetARM end msegIndex symtab rel addend relFlag =
, relocationSize = 4
, relocationIsSigned = False
, relocationEndianness = end
, relocationJumpSlot = False
}
Elf.R_ARM_RELATIVE -> do
-- This relocation has the value B(S) + A where
@ -503,6 +526,7 @@ relaTargetARM end msegIndex symtab rel addend relFlag =
, relocationSize = 4
, relocationIsSigned = False
, relocationEndianness = end
, relocationJumpSlot = False
}
Elf.R_ARM_JUMP_SLOT -> do
-- This is a PLT relocation
@ -520,6 +544,7 @@ relaTargetARM end msegIndex symtab rel addend relFlag =
, relocationSize = 4
, relocationIsSigned = False
, relocationEndianness = end
, relocationJumpSlot = True
}
tp -> do
relocError $ RelocationUnsupportedType (show tp)
@ -556,12 +581,13 @@ resolveRela :: ( MemWidth w
)
=> SymbolTable
-> RelocationResolver tp
-> Integer -- ^ Index of relocation
-> Elf.RelaEntry tp
-> ResolveFn (MemLoader w) w
resolveRela symtab resolver rela msegIdx _ =
resolveRela symtab resolver relaIdx rela msegIdx _ =
case resolver msegIdx symtab (Elf.relaToRel rela) (fromIntegral (Elf.relaAddend rela)) IsRela of
Left e -> do
addWarning (IgnoreRelocation e)
addWarning (IgnoreRelocation relaIdx (show (Elf.relaType rela)) e)
pure Nothing
Right r -> do
pure $ Just r
@ -573,9 +599,10 @@ resolveRel :: ( MemWidth w
=> Endianness -- ^ Endianness of Elf file
-> SymbolTable -- ^ Symbol table
-> RelocationResolver tp
-> Integer -- ^ Index of relocation
-> Elf.RelEntry tp
-> ResolveFn (MemLoader w) w
resolveRel end symtab resolver rel msegIdx bytes = do
resolveRel end symtab resolver relIdx rel msegIdx bytes = do
-- Get the number of bits in the addend
let bits = Elf.relocTargetBits (Elf.relType rel)
-- Compute the addended by masking off the low order bits, and
@ -591,7 +618,7 @@ resolveRel end symtab resolver rel msegIdx bytes = do
-- Update the resolver.
case resolver msegIdx symtab rel (fromInteger saddend) IsRel of
Left e -> do
addWarning (IgnoreRelocation e)
addWarning (IgnoreRelocation relIdx (show (Elf.relType rel)) e)
pure Nothing
Right r -> do
pure $ Just r
@ -610,12 +637,13 @@ relocFromRela :: ( Elf.IsRelocationType tp
)
=> SymbolTable
-> RelocationResolver tp
-> Integer -- ^ Index of relocation entry for error reporting
-> Elf.RelaEntry tp
-> (MemWord w, RelocEntry (MemLoader w) w)
relocFromRela symtab resolver r =
relocFromRela symtab resolver idx r =
( fromIntegral (Elf.relaAddr r)
, RelocEntry { relocEntrySize = relocTargetBytes (Elf.relaType r)
, applyReloc = resolveRela symtab resolver r
, applyReloc = resolveRela symtab resolver idx r
}
)
@ -627,12 +655,13 @@ relocFromRel :: ( Elf.IsRelocationType tp
=> Endianness
-> SymbolTable
-> RelocationResolver tp
-> Integer -- ^ Index of relocation entry for error reporting.
-> Elf.RelEntry tp
-> (MemWord w, RelocEntry (MemLoader w) w)
relocFromRel end symtab resolver r =
relocFromRel end symtab resolver idx r =
( fromIntegral (Elf.relAddr r)
, RelocEntry { relocEntrySize = relocTargetBytes (Elf.relType r)
, applyReloc = resolveRel end symtab resolver r
, applyReloc = resolveRel end symtab resolver idx r
}
)
@ -657,7 +686,7 @@ relocMapFromRelAndRela dta resolver symtab _ (Just relaBuffer) = do
addWarning (RelocationParseFailure msg)
pure Map.empty
Right entries -> do
pure $ Map.fromList $ relocFromRela symtab resolver <$> entries
pure $ Map.fromList $ zipWith (relocFromRela symtab resolver) [0..] entries
relocMapFromRelAndRela dta resolver symtab (Just relBuffer) Nothing = do
w <- uses mlsMemory memAddrWidth
reprConstraints w $ do
@ -666,7 +695,7 @@ relocMapFromRelAndRela dta resolver symtab (Just relBuffer) Nothing = do
addWarning (RelocationParseFailure msg)
pure Map.empty
Right entries -> do
pure $ Map.fromList $ relocFromRel (toEndianness dta) symtab resolver <$> entries
pure $ Map.fromList $ zipWith (relocFromRel (toEndianness dta) symtab resolver) [0..] entries
resolveUndefinedSymbolReq :: SymbolName
@ -716,6 +745,7 @@ symbolDefTypeMap = Map.fromList
, (,) Elf.STT_FUNC SymbolDefFunc
, (,) Elf.STT_TLS SymbolDefThreadLocal
, (,) Elf.STT_GNU_IFUNC SymbolDefIFunc
, (,) Elf.STT_NOTYPE SymbolDefNoType
]
resolveDefinedSymbolDef :: ElfSymbolTableEntry wtp
@ -829,9 +859,9 @@ dynamicRelocationMap hdr ph contents =
Right Elf.PLTEmpty -> do
pure $! Map.empty
Right (Elf.PLTRel entries) -> do
pure $! Map.fromList $ relocFromRel (toEndianness dta) symtab resolver <$> entries
pure $! Map.fromList $ zipWith (relocFromRel (toEndianness dta) symtab resolver) [0..] entries
Right (Elf.PLTRela entries) -> do
pure $! Map.fromList $ relocFromRela symtab resolver <$> entries
pure $! Map.fromList $ zipWith (relocFromRela symtab resolver) [0..] entries
pure $ Map.union loadtimeRelocs pltRelocs
------------------------------------------------------------------------

View File

@ -42,6 +42,9 @@ data SymbolDefType
-- ^ This symbol denotes a thread local identifier
| SymbolDefIFunc
-- ^ This symbol is a "IFUNC" (e.g., it calls a function to resolve the symbol)
| SymbolDefNoType
-- ^ This symbol does not have a specified type.
-- | Describes whether an undefined symbol is required during linking.
data SymbolRequirement

2
deps/crucible vendored

@ -1 +1 @@
Subproject commit 70587094dc19e1d27f4fc2eca4a704f9dac9110c
Subproject commit 6720f34b2db2e86512b5c8b469cdd4ccbad1fc3b

2
deps/elf-edit vendored

@ -1 +1 @@
Subproject commit afa3dcf45afc5429139cae09f8584a722a737946
Subproject commit a9428d847f63cddfbd38eb3b0fea07153dbcd18f

2
deps/flexdis86 vendored

@ -1 +1 @@
Subproject commit e53a3359b51ad1a91758144a85d8a8eb635d91d9
Subproject commit 5c4453b0b2c89af2267174bf06b486a9dd6c57a2

2
deps/llvm-pretty vendored

@ -1 +1 @@
Subproject commit 6cc6990017c4d875311a52a6d3988fb7a962ed7a
Subproject commit e05cf3195b0938961cb0d8ecf4b7a4821f0d2673

@ -1 +1 @@
Subproject commit 1c37bf1eb8be0f7c8beacd0998a30bc932d9122d
Subproject commit 045e90c564c7839667ff92274d442a343b76d168

View File

@ -513,7 +513,8 @@ doWriteMem st mvar globs w (BVMemRepr bytes endian) ptr0 val =
let ?ptrWidth = M.addrWidthNatRepr w
let v0 = regValue val
v = LLVMValInt (ptrBase v0) (asBits v0)
mem1 <- storeRaw sym mem ptr ty v
let alignment = 0 -- default to byte alignment (FIXME)
mem1 <- storeRaw sym mem ptr ty alignment v
return ((), setMem st mvar mem1)
--------------------------------------------------------------------------------

View File

@ -16,7 +16,7 @@ library
containers,
flexdis86 >= 0.1.2,
lens >= 4.7,
macaw-base >= 0.3.2,
macaw-base >= 0.3.3,
mtl,
parameterized-utils,
text,

View File

@ -68,6 +68,7 @@ import Text.PrettyPrint.ANSI.Leijen (Pretty(..), text)
import Data.Macaw.AbsDomain.AbsState
( AbsBlockState
, curAbsStack
, setAbsIP
, absRegState
, StackEntry(..)
@ -166,12 +167,11 @@ instance MemWidth w => Show (X86TranslateError w) where
-- | Signal an error from the initial address.
initError :: ExploreLoc -- ^ Location to explore from.
initError :: MemSegmentOff 64 -- ^ Location to explore from.
-> RegState X86Reg (Value X86_64 ids)
-> X86TranslateError 64
-> ST st_s (Block X86_64 ids, MemWord 64, Maybe (X86TranslateError 64))
initError loc err = do
let addr = loc_ip loc
let s = initX86State loc
initError addr s err = do
let b = Block { blockLabel = 0
, blockStmts = []
, blockTerm = TranslateError s (Text.pack (show err))
@ -275,7 +275,9 @@ disassembleFixedBlock :: NonceGenerator (ST st_s) ids
-- ^ Number of bytes to translate
-> ST st_s (Either (X86TranslateError 64) (Block X86_64 ids))
disassembleFixedBlock gen loc sz = do
case segoffContentsAfter (loc_ip loc) of
let addr = loc_ip loc
let initRegs = initX86State loc
case segoffContentsAfter addr of
Left err -> do
pure $ Left $ FlexdisMemoryError err
Right fullContents -> do
@ -283,8 +285,8 @@ disassembleFixedBlock gen loc sz = do
Left _err -> do
error $ "Could not split memory."
Right (contents,_) -> do
let pblock = emptyPreBlock (initX86State loc)
disassembleFixedBlock' gen pblock 0 (loc_ip loc) contents
let pblock = emptyPreBlock initRegs
disassembleFixedBlock' gen pblock 0 addr contents
-- | Translate block, returning blocks read, ending
-- PC, and an optional error. and ending PC.
@ -327,13 +329,14 @@ disassembleBlock :: forall s
-> ST s (Block X86_64 s, MemWord 64, Maybe (X86TranslateError 64))
disassembleBlock nonce_gen loc max_size = do
let addr = loc_ip loc
let regs = initX86State loc
let sz = segoffOffset addr + max_size
(b, next_ip_off, maybeError) <-
case segoffContentsAfter addr of
Left msg -> do
initError loc (FlexdisMemoryError msg)
initError addr regs (FlexdisMemoryError msg)
Right contents -> do
let pblock = emptyPreBlock (initX86State loc)
let pblock = emptyPreBlock regs
disassembleBlockImpl nonce_gen pblock 0 addr sz contents
assert (next_ip_off > segoffOffset addr) $ do
let block_sz = next_ip_off - segoffOffset addr
@ -415,56 +418,67 @@ transferAbsValue r f =
VExtractF128 {} -> TopV
VInsert {} -> TopV
-- | Disassemble block, returning either an error, or a list of blocks
-- and ending PC.
tryDisassembleBlockFromAbsState :: forall s ids
. NonceGenerator (ST s) ids
-> MemSegmentOff 64
initRegsFromAbsState :: forall ids
. MemSegmentOff 64
-- ^ Address to disassemble at
-> Int
-- ^ Maximum size of this block
-> AbsBlockState X86Reg
-- ^ Abstract state of processor for defining state.
-> ExceptT String (ST s) (Block X86_64 ids, Int, Maybe String)
tryDisassembleBlockFromAbsState nonceGen addr maxSize ab = do
-> Either String (RegState X86Reg (Value X86_64 ids))
initRegsFromAbsState addr ab = do
t <-
case asConcreteSingleton (ab^.absRegState^.boundValue X87_TopReg) of
Nothing -> throwError "Could not determine height of X87 stack."
Nothing -> Left "Could not determine height of X87 stack."
Just t -> pure t
d <-
case asConcreteSingleton (ab^.absRegState^.boundValue DF) of
Nothing -> do
throwError $ "Could not determine df flag " ++ show (ab^.absRegState^.boundValue DF)
Left $ "Could not determine df flag " ++ show (ab^.absRegState^.boundValue DF)
Just d -> pure d
let loc = ExploreLoc { loc_ip = addr
pure $ initX86State $
ExploreLoc { loc_ip = addr
, loc_x87_top = fromInteger t
, loc_df_flag = d /= 0
}
-- | Disassemble block, returning either an error, or a list of blocks
-- and ending PC.
tryDisassembleBlock :: forall s ids
. NonceGenerator (ST s) ids
-> MemSegmentOff 64
-- ^ Address to disassemble at
-> RegState X86Reg (Value X86_64 ids)
-- ^ Initial registers
-> Int
-- ^ Maximum size of this block
-> ExceptT String (ST s) (Block X86_64 ids, Int, Maybe String)
tryDisassembleBlock nonceGen addr initRegs maxSize = do
let off = segoffOffset addr
let pblock = emptyPreBlock (initX86State loc)
(b, nextIPOff, maybeError) <- lift $
case segoffContentsAfter addr of
Left msg -> do
initError loc (FlexdisMemoryError msg)
initError addr initRegs (FlexdisMemoryError msg)
Right contents -> do
disassembleBlockImpl nonceGen pblock 0 addr (off + fromIntegral maxSize) contents
disassembleBlockImpl nonceGen (emptyPreBlock initRegs) 0 addr (off + fromIntegral maxSize) contents
let sz :: Int
sz = fromIntegral $ nextIPOff - off
pure $! (b, sz, show <$> maybeError)
-- | Disassemble block, returning either an error, or a list of blocks
-- and ending PC.
disassembleBlockFromAbsState :: forall s ids
disassembleBlockWithRegs :: forall s ids
. NonceGenerator (ST s) ids
-> MemSegmentOff 64
-- ^ Address to disassemble at
-> RegState X86Reg (Value X86_64 ids)
-> Int
-- ^ Maximum size of this block
-> AbsBlockState X86Reg
-- ^ Abstract state of processor for defining state.
-> ST s ([Block X86_64 ids], Int, Maybe String)
disassembleBlockFromAbsState nonceGen addr maxSize ab = do
mr <- runExceptT $ tryDisassembleBlockFromAbsState nonceGen addr maxSize ab
disassembleBlockWithRegs nonceGen addr initRegs maxSize = do
mr <- runExceptT $ tryDisassembleBlock nonceGen addr initRegs maxSize
case mr of
Left msg -> pure ([], 0, Just msg)
Right (b,sz, merr) -> pure ([b],sz,merr)
@ -474,10 +488,10 @@ disassembleBlockFromAbsState nonceGen addr maxSize ab = do
--
-- This can also return Nothing if the call is not supported.
identifyX86Call :: Memory 64
-> [Stmt X86_64 ids]
-> Seq (Stmt X86_64 ids)
-> RegState X86Reg (Value X86_64 ids)
-> Maybe (Seq (Stmt X86_64 ids), MemSegmentOff 64)
identifyX86Call mem stmts0 s = go (Seq.fromList stmts0) Seq.empty
identifyX86Call mem stmts0 s = go stmts0 Seq.empty
where -- Get value of stack pointer
next_sp = s^.boundValue sp_reg
-- Recurse on statements.
@ -503,19 +517,30 @@ identifyX86Call mem stmts0 s = go (Seq.fromList stmts0) Seq.empty
-- Otherwise skip over this instruction.
| otherwise -> go prev (stmt Seq.<| after)
-- | Return true if stack pointer has been reset to original value, and
-- return address is on top of stack.
checkForReturnAddrX86 :: forall ids
. AbsProcessorState X86Reg ids
-> Bool
checkForReturnAddrX86 absState
| Just (StackEntry _ ReturnAddr) <- Map.lookup 0 (absState^.curAbsStack) =
True
| otherwise =
False
-- | Called to determine if the instruction sequence contains a return
-- from the current function.
--
-- An instruction executing a return from a function will place the
-- ReturnAddr value (placed on the top of the stack by
-- 'initialX86AbsState' above) into the instruction pointer.
identifyX86Return :: [Stmt X86_64 ids]
identifyX86Return :: Seq (Stmt X86_64 ids)
-> RegState X86Reg (Value X86_64 ids)
-> AbsProcessorState X86Reg ids
-> Maybe (Seq (Stmt X86_64 ids))
identifyX86Return stmts s finalRegSt8 =
case transferValue finalRegSt8 (s^.boundValue ip_reg) of
ReturnAddr -> Just $ Seq.fromList stmts
ReturnAddr -> Just stmts
_ -> Nothing
-- | Return state post call
@ -569,12 +594,14 @@ x86_64_info preservePred =
ArchitectureInfo { withArchConstraints = \x -> x
, archAddrWidth = Addr64
, archEndianness = LittleEndian
, disassembleFn = disassembleBlockFromAbsState
, mkInitialRegsForBlock = initRegsFromAbsState
, disassembleFn = disassembleBlockWithRegs
, mkInitialAbsState = \_ addr -> initialX86AbsState addr
, absEvalArchFn = transferAbsValue
, absEvalArchStmt = \s _ -> s
, postCallAbsState = x86PostCallAbsState
, identifyCall = identifyX86Call
, checkForReturnAddr = \_ s -> checkForReturnAddrX86 s
, identifyReturn = identifyX86Return
, rewriteArchFn = rewriteX86PrimFn
, rewriteArchStmt = rewriteX86Stmt

View File

@ -7,7 +7,7 @@ module ElfX64Linux (
) where
import Control.Lens ( (^.) )
import Control.Monad ( unless )
import Control.Monad ( unless, when )
import qualified Control.Monad.Catch as C
import qualified Data.ByteString as B
import qualified Data.Foldable as F
@ -16,6 +16,7 @@ import Data.Maybe
import qualified Data.Set as S
import Data.Typeable ( Typeable )
import Data.Word ( Word64 )
import Numeric (showHex)
import System.FilePath
import qualified Test.Tasty as T
import qualified Test.Tasty.HUnit as T
@ -35,9 +36,12 @@ elfX64LinuxTests :: [FilePath] -> T.TestTree
elfX64LinuxTests = T.testGroup "ELF x64 Linux" . map mkTest
data Addr = Addr Int Word64
deriving (Read,Show,Eq)
deriving (Read,Eq, Ord)
-- ^ An address is a region index and offset
instance Show Addr where
showsPrec _ (Addr idx off) = showString "Addr " . shows idx . showString " 0x" . showHex off
-- | The type of expected results for test cases
data ExpectedResult =
R { funcs :: [(Addr, [(Addr, Integer)])]
@ -59,34 +63,55 @@ mkTest fp = T.testCase fp $ withELF elfFilename (testDiscovery fp)
where
elfFilename = dropExtension fp
-- | Run a test over a given expected result filename and the ELF file
-- associated with it
testDiscovery :: FilePath -> E.Elf 64 -> IO ()
testDiscovery expectedFilename elf =
withMemory MM.Addr64 elf $ \mem entries -> do
let di = MD.cfgFromAddrs RO.x86_64_linux_info mem M.empty entries []
expectedString <- readFile expectedFilename
case readMaybe expectedString of
Nothing -> T.assertFailure ("Invalid expected result: " ++ show expectedString)
Just er -> do
let toSegOff :: Addr -> MM.MemSegmentOff 64
toSegOff (Addr idx off) = do
toSegOff :: MM.Memory 64 -> Addr -> MM.MemSegmentOff 64
toSegOff mem (Addr idx off) = do
let addr :: MM.MemAddr 64
addr = MM.MemAddr idx (fromIntegral off)
case MM.asSegmentOff mem addr of
Just a -> a
Nothing -> do
Nothing ->
let ppSeg seg = " Segment: " ++ show (MM.segmentOffAddr seg 0)
error $ "Could not resolve address : " ++ show addr ++ "\n"
in error $ "Could not resolve address : " ++ show addr ++ "\n"
++ unlines (fmap ppSeg (MM.memSegments mem))
let expectedEntries = M.fromList
[ (toSegOff entry
, S.fromList ((\(s,sz) -> (toSegOff s, sz)) <$> starts)
toAddr :: MM.MemSegmentOff 64 -> Addr
toAddr segOff = do
let addr :: MM.MemAddr 64
addr = MM.segoffAddr segOff
in Addr (fromIntegral (MM.addrBase addr)) (fromIntegral (MM.addrOffset addr))
-- | Run a test over a given expected result filename and the ELF file
-- associated with it
testDiscovery :: FilePath -> E.Elf 64 -> IO ()
testDiscovery expectedFilename elf = do
let opt = MM.LoadOptions { MM.loadRegionIndex = Nothing
, MM.loadRegionBaseOffset = 0
}
(warn, mem, mentry, syms) <-
case MM.resolveElfContents opt elf of
Left err -> C.throwM (MemoryLoadError err)
Right r -> pure r
when (not (null warn)) $ do
error $ "Warnings while loading Elf " ++ show warn
let entries = maybeToList mentry ++ fmap MM.memSymbolStart syms
let addrSymMap :: M.Map (MM.MemSegmentOff 64) B.ByteString
addrSymMap = M.fromList [ (MM.memSymbolStart sym, MM.memSymbolName sym)
| sym <- syms
]
let di = MD.cfgFromAddrs RO.x86_64_linux_info mem addrSymMap entries []
expectedString <- readFile expectedFilename
case readMaybe expectedString of
Nothing -> T.assertFailure ("Invalid expected result: " ++ show expectedString)
Just er -> do
let expectedEntries :: M.Map (MM.MemSegmentOff 64) (S.Set (Addr, Integer))
expectedEntries = M.fromList
[ (toSegOff mem entry
, S.fromList ((\(s,sz) -> (s, sz)) <$> starts)
)
| (entry, starts) <- funcs er
]
ignoredBlocks :: S.Set (MM.MemSegmentOff 64)
ignoredBlocks = S.fromList (toSegOff <$> ignoreBlocks er)
ignoredBlocks = S.fromList (toSegOff mem <$> ignoreBlocks er)
T.assertEqual "Collection of discovered function starting points"
(M.keysSet expectedEntries `S.difference` ignoredBlocks)
(M.keysSet (di ^. MD.funInfo))
@ -99,7 +124,8 @@ testDiscovery expectedFilename elf =
T.assertBool ("Translate error at " ++ show (MD.pblockAddr pb) ++ " " ++ show term) (not (isTranslateError term))
let actualEntry = MD.discoveredFunAddr dfi
-- actualEntry = fromIntegral (MM.addrValue (MD.discoveredFunAddr dfi))
let actualBlockStarts = S.fromList [ (addr, toInteger (MD.blockSize pbr))
let actualBlockStarts :: S.Set (Addr, Integer)
actualBlockStarts = S.fromList [ (toAddr addr, toInteger (MD.blockSize pbr))
| pbr <- M.elems (dfi ^. MD.parsedBlocks)
, let addr = MD.pblockAddr pbr
, addr `S.notMember` ignoredBlocks
@ -124,26 +150,6 @@ withELF fp k = do
E.Elf32Res errs _ -> error ("Errors while parsing ELF file: " ++ show errs)
E.Elf64Res errs _ -> error ("Errors while parsing ELF file: " ++ show errs)
withMemory :: forall w m a
. (C.MonadThrow m, MM.MemWidth w, Integral (E.ElfWordType w))
=> MM.AddrWidthRepr w
-> E.Elf w
-> (MM.Memory w -> [MM.MemSegmentOff w] -> m a)
-> m a
withMemory _relaWidth e k = do
let opt = MM.LoadOptions { MM.loadRegionIndex = Nothing
, MM.loadRegionBaseOffset = 0
-- let opt = MM.LoadOptions { MM.loadRegionIndex = Just 0
-- , MM.loadRegionBaseOffset = fromIntegral loadOffset
}
case MM.resolveElfContents opt e of
Left err -> C.throwM (MemoryLoadError err)
Right (warn, mem, mentry, syms) ->
if null warn then
k mem (maybeToList mentry ++ fmap MM.memSymbolStart syms)
else
error $ "Warnings while loading Elf " ++ show warn
data ElfException = MemoryLoadError String
deriving (Typeable, Show)

10
x86/tests/x64/test-plt.c Normal file
View File

@ -0,0 +1,10 @@
// This is a simple call to puts to test that we resolve the PLT stub
// to puts.
//
// It should be compiled with
// `clang -fpic -FPIC -o test-plt.exe test-plc.c`
#include <stdio.h>
int main(int argc, char** argv) {
puts("Hello World");
}

BIN
x86/tests/x64/test-plt.exe Executable file

Binary file not shown.

View File

@ -0,0 +1,19 @@
R { funcs =
[ (Addr 1 0x580, [(Addr 1 0x580,16),(Addr 1 0x590,5),(Addr 1 0x595,5)])
, (Addr 1 0x5b0, [(Addr 1 0x5b0,6)])
, (Addr 1 0x5c0, [(Addr 1 0x5c0,6)])
, (Addr 1 0x5d0, [(Addr 1 0x5d0,6)])
, (Addr 1 0x5d8, [(Addr 1 0x5d8,6)])
, (Addr 1 0x5e0, [(Addr 1 0x5e0,41),(Addr 1 0x609,1)])
, (Addr 1 0x610, [(Addr 1 0x610,27),(Addr 1 0x62b,12),(Addr 1 0x637,3),(Addr 1 0x640,2)])
, (Addr 1 0x650, [(Addr 1 0x650,40),(Addr 1 0x678,12),(Addr 1 0x684,3),(Addr 1 0x690,2)])
, (Addr 1 0x6a0, [(Addr 1 0x6a0,9),(Addr 1 0x6a9,14),(Addr 1 0x6b7,12),(Addr 1 0x6c3,5),(Addr 1 0x6c8,8),(Addr 1 0x6d0,2)])
, (Addr 1 0x6e0, [(Addr 1 0x6e0,13), (Addr 1 0x6ed,5), (Addr 1 0x6f8, 12), (Addr 1 0x704, 6), (Addr 1 0x70a, 6)])
, (Addr 1 0x710, [(Addr 1 0x710,13),(Addr 1 0x71d,4)])
, (Addr 1 0x730, [(Addr 1 0x730,49),(Addr 1 0x761,5),(Addr 1 0x766,10),(Addr 1 0x770,13),(Addr 1
0x77d,9),(Addr 1 0x786,15)])
, (Addr 1 0x7a0, [(Addr 1 0x7a0,2)])
, (Addr 1 0x7a4, [(Addr 1 0x7a4,9)])
]
, ignoreBlocks = []
}

View File

@ -1,5 +1,5 @@
R { funcs = [ (Addr 1 0x2c0, [(Addr 1 0x2c0, 7)])
, (Addr 1 0x2d0, [(Addr 1 0x2d0, 11), (Addr 1 0x2c0, 7)])
, (Addr 1 0x2d0, [(Addr 1 0x2d0, 11)])
, (Addr 1 0x2e0, [(Addr 1 0x2e0, 11), (Addr 1 0x2eb, 16)])
]
, ignoreBlocks = [Addr 1 0x2fb]