mirror of
https://github.com/GaloisInc/macaw.git
synced 2024-12-29 17:17:05 +03:00
commit
146ec121c3
@ -1,5 +1,5 @@
|
||||
name: macaw-base
|
||||
version: 0.3.2
|
||||
version: 0.3.4
|
||||
author: Galois, Inc.
|
||||
maintainer: jhendrix@galois.com
|
||||
build-type: Simple
|
||||
@ -31,7 +31,7 @@ library
|
||||
base >= 4,
|
||||
ansi-wl-pprint,
|
||||
binary,
|
||||
binary-symbols >= 0.1.1,
|
||||
binary-symbols >= 0.1.2,
|
||||
bytestring,
|
||||
containers >= 0.5.8.1,
|
||||
elf-edit >= 0.32,
|
||||
@ -39,7 +39,7 @@ library
|
||||
IntervalMap >= 0.5,
|
||||
lens >= 4.7,
|
||||
mtl,
|
||||
parameterized-utils >= 1.0.1,
|
||||
parameterized-utils >= 1.0.7,
|
||||
template-haskell,
|
||||
text,
|
||||
vector,
|
||||
|
@ -50,9 +50,15 @@ import Data.Macaw.CFG
|
||||
import Data.Macaw.CFG.BlockLabel
|
||||
import Data.Macaw.CFG.DemandSet
|
||||
import Data.Macaw.Discovery.State
|
||||
import Data.Macaw.Fold
|
||||
import Data.Macaw.Types
|
||||
|
||||
|
||||
newtype Ap f a = Ap { getAp :: f a }
|
||||
|
||||
instance (Applicative f, Monoid a) => Monoid (Ap f a) where
|
||||
mempty = Ap $ pure mempty
|
||||
mappend (Ap x) (Ap y) = Ap $ mappend <$> x <*> y
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
-- The algorithm computes the set of direct deps (i.e., from writes)
|
||||
@ -95,6 +101,10 @@ data DemandSet (r :: Type -> Kind.Type) =
|
||||
, functionResultDemands :: !(Map (RegSegmentOff r) (RegisterSet r))
|
||||
}
|
||||
|
||||
-- | Create a demand set for specific registers.
|
||||
registerDemandSet :: RegisterSet r -> DemandSet r
|
||||
registerDemandSet s = DemandSet { registerDemands = s, functionResultDemands = Map.empty }
|
||||
|
||||
deriving instance (ShowF r, MemWidth (RegAddrWidth r)) => Show (DemandSet r)
|
||||
deriving instance (TestEquality r) => Eq (DemandSet r)
|
||||
deriving instance (OrdF r) => Ord (DemandSet r)
|
||||
@ -109,15 +119,18 @@ instance OrdF r => Semigroup (DemandSet r) where
|
||||
|
||||
instance OrdF r => Monoid (DemandSet r) where
|
||||
mempty = DemandSet { registerDemands = Set.empty
|
||||
, functionResultDemands = mempty
|
||||
, functionResultDemands = Map.empty
|
||||
}
|
||||
mappend = (<>)
|
||||
|
||||
demandSetDifference :: OrdF r => DemandSet r -> DemandSet r -> DemandSet r
|
||||
demandSetDifference ds1 ds2 =
|
||||
DemandSet (registerDemands ds1 `Set.difference` registerDemands ds2)
|
||||
(Map.differenceWith setDiff (functionResultDemands ds1)
|
||||
(functionResultDemands ds2))
|
||||
DemandSet { registerDemands = registerDemands ds1 `Set.difference` registerDemands ds2
|
||||
, functionResultDemands =
|
||||
Map.differenceWith setDiff
|
||||
(functionResultDemands ds1)
|
||||
(functionResultDemands ds2)
|
||||
}
|
||||
where
|
||||
setDiff s1 s2 =
|
||||
let s' = s1 `Set.difference` s2
|
||||
@ -131,8 +144,8 @@ data DemandType r
|
||||
-- | This type is for registers that are demanded if the function at the given address wants
|
||||
-- the given register.
|
||||
| forall tp. DemandFunctionArg (RegSegmentOff r) (r tp)
|
||||
-- | This is a associated with a set of registers that are demanded if the given register is needed
|
||||
-- as a return value.
|
||||
-- | This is a associated with the registers that are demanded if
|
||||
-- the given register is needed as a return value.
|
||||
| forall tp. DemandFunctionResult (r tp)
|
||||
|
||||
instance (MemWidth (RegAddrWidth r), ShowF r) => Show (DemandType r) where
|
||||
@ -179,8 +192,8 @@ data ArchTermStmtRegEffects arch
|
||||
= ArchTermStmtRegEffects { termRegDemands :: ![Some (ArchReg arch)]
|
||||
-- ^ Registers demanded by term statement
|
||||
, termRegTransfers :: [Some (ArchReg arch)]
|
||||
-- ^ Registers that terminal statement are not modified
|
||||
-- by terminal statement.
|
||||
-- ^ Registers that are not modified by
|
||||
-- terminal statement.
|
||||
}
|
||||
|
||||
-- | Returns information about the registers needed and modified by a terminal statement
|
||||
@ -193,6 +206,8 @@ type ComputeArchTermStmtEffects arch ids
|
||||
-> RegState (ArchReg arch) (Value arch ids)
|
||||
-> ArchTermStmtRegEffects arch
|
||||
|
||||
-- | Information about the architecture/environment what arguments a
|
||||
-- function needs.
|
||||
data ArchDemandInfo arch = ArchDemandInfo
|
||||
{ -- | Registers used as arguments to the function.
|
||||
functionArgRegs :: ![Some (ArchReg arch)]
|
||||
@ -294,34 +309,38 @@ addIntraproceduralJumpTarget fun_info src_block dest_addr = do -- record the ed
|
||||
text "Could not find target block" <+> text (show dest_addr) <$$>
|
||||
indent 2 (text "Source:" <$$> pretty src_block)
|
||||
|
||||
-- | Compute the input registers that this value depends on
|
||||
withAssignmentCache :: State (AssignmentCache (ArchReg arch) ids) a -> FunctionArgsM arch ids a
|
||||
withAssignmentCache m = do
|
||||
c <- use assignmentCache
|
||||
let (r, c') = runState m c
|
||||
seq c' $ assignmentCache .= c'
|
||||
pure r
|
||||
|
||||
-- | Return the input registers that a value depends on.
|
||||
valueUses :: (OrdF (ArchReg arch), FoldableFC (ArchFn arch))
|
||||
=> Value arch ids tp
|
||||
-> FunctionArgsM arch ids (RegisterSet (ArchReg arch))
|
||||
valueUses v = zoom assignmentCache $ foldValueCached fns v
|
||||
where fns = emptyValueFold { foldInput = Set.singleton . Some }
|
||||
-> State (AssignmentCache (ArchReg arch) ids) (RegisterSet (ArchReg arch))
|
||||
valueUses (AssignedValue (Assignment a rhs)) = do
|
||||
mr <- gets $ Map.lookup (Some a)
|
||||
case mr of
|
||||
Just s -> pure s
|
||||
Nothing -> do
|
||||
rhs' <- foldrFC (\v mrhs -> Set.union <$> valueUses v <*> mrhs) (pure Set.empty) rhs
|
||||
seq rhs' $ modify' $ Map.insert (Some a) rhs'
|
||||
pure $ rhs'
|
||||
valueUses (Initial r) = do
|
||||
pure $! Set.singleton (Some r)
|
||||
valueUses _ = do
|
||||
pure $! Set.empty
|
||||
|
||||
addBlockDemands :: OrdF (ArchReg arch) => ArchLabel arch -> DemandMap (ArchReg arch) -> FunctionArgsM arch ids ()
|
||||
addBlockDemands lbl m =
|
||||
blockDemandMap %= Map.insertWith demandMapUnion lbl m
|
||||
|
||||
-- | Record that a block demands the value of certain registers.
|
||||
recordBlockDemand :: ( OrdF (ArchReg arch)
|
||||
, FoldableFC (ArchFn arch)
|
||||
)
|
||||
=> ArchLabel arch
|
||||
-- ^ The current block
|
||||
-> RegState (ArchReg arch) (Value arch ids)
|
||||
-- ^ The current register state
|
||||
-> (forall tp . ArchReg arch tp -> DemandType (ArchReg arch))
|
||||
-> [Some (ArchReg arch)]
|
||||
-- ^ The registers that we need.
|
||||
-> FunctionArgsM arch ids () -- Map (Some N.RegisterName) RegDeps
|
||||
recordBlockDemand lbl s mk rs = do
|
||||
let doReg (Some r) = do
|
||||
rs' <- valueUses (s ^. boundValue r)
|
||||
return (mk r, DemandSet rs' mempty)
|
||||
vs <- mapM doReg rs
|
||||
blockDemandMap %= Map.insertWith (Map.unionWith mappend) lbl (Map.fromListWith mappend vs)
|
||||
|
||||
-- Figure out the deps of the given registers and update the state for the current label
|
||||
recordBlockTransfer :: ( OrdF (ArchReg arch)
|
||||
recordBlockTransfer :: forall arch ids
|
||||
. ( OrdF (ArchReg arch)
|
||||
, FoldableFC (ArchFn arch)
|
||||
)
|
||||
=> ArchLabel arch
|
||||
@ -329,10 +348,13 @@ recordBlockTransfer :: ( OrdF (ArchReg arch)
|
||||
-> [Some (ArchReg arch)]
|
||||
-> FunctionArgsM arch ids () -- Map (Some N.RegisterName) RegDeps
|
||||
recordBlockTransfer lbl s rs = do
|
||||
let doReg (Some r) = do
|
||||
let doReg :: Some (ArchReg arch)
|
||||
-> State (AssignmentCache (ArchReg arch) ids)
|
||||
(Some (ArchReg arch), DemandSet (ArchReg arch))
|
||||
doReg (Some r) = do
|
||||
rs' <- valueUses (s ^. boundValue r)
|
||||
return (Some r, DemandSet rs' mempty)
|
||||
vs <- mapM doReg rs
|
||||
return (Some r, registerDemandSet rs')
|
||||
vs <- withAssignmentCache $ traverse doReg rs
|
||||
blockTransfer %= Map.insertWith (Map.unionWith mappend) lbl (Map.fromListWith mappend vs)
|
||||
|
||||
-- | A block requires a value, and so we need to remember which
|
||||
@ -342,9 +364,8 @@ demandValue :: (OrdF (ArchReg arch), FoldableFC (ArchFn arch))
|
||||
-> Value arch ids tp
|
||||
-> FunctionArgsM arch ids ()
|
||||
demandValue lbl v = do
|
||||
regs <- valueUses v
|
||||
blockDemandMap %= Map.insertWith demandMapUnion lbl
|
||||
(Map.singleton DemandAlways (DemandSet regs mempty))
|
||||
regs <- withAssignmentCache $ valueUses v
|
||||
addBlockDemands lbl $ Map.singleton DemandAlways (registerDemandSet regs)
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- Entry point
|
||||
@ -426,9 +447,9 @@ summarizeCall :: forall arch ids
|
||||
-> Bool
|
||||
-- ^ A flag that is set to true for tail calls.
|
||||
-> FunctionArgsM arch ids ()
|
||||
summarizeCall mem lbl proc_state isTailCall = do
|
||||
summarizeCall mem lbl finalRegs isTailCall = do
|
||||
knownAddrs <- gets computedAddrSet
|
||||
case valueAsMemAddr (proc_state^.boundValue ip_reg) of
|
||||
case valueAsMemAddr (finalRegs^.boundValue ip_reg) of
|
||||
Just faddr0
|
||||
| Just faddr <- asSegmentOff mem faddr0
|
||||
, Set.member faddr knownAddrs -> do
|
||||
@ -439,10 +460,10 @@ summarizeCall mem lbl proc_state isTailCall = do
|
||||
-- singleton for now, but propagating back will introduce more deps.
|
||||
let demandSet sr = DemandSet mempty (Map.singleton faddr (Set.singleton sr))
|
||||
|
||||
if isTailCall then
|
||||
if isTailCall then do
|
||||
-- tail call, propagate demands for our return regs to the called function
|
||||
let propMap = map (\(Some r) -> (DemandFunctionResult r, demandSet (Some r))) retRegs
|
||||
in blockDemandMap %= Map.insertWith (Map.unionWith mappend) lbl (Map.fromList propMap)
|
||||
let propMap = (\(Some r) -> (DemandFunctionResult r, demandSet (Some r))) <$> retRegs
|
||||
addBlockDemands lbl $ Map.fromList propMap
|
||||
else do
|
||||
-- Given a return register sr, this indicates that
|
||||
let propResult :: Some (ArchReg arch) -> FunctionArgsM arch ids ()
|
||||
@ -455,12 +476,22 @@ summarizeCall mem lbl proc_state isTailCall = do
|
||||
-- If a function wants argument register r, then we note that this
|
||||
-- block needs the corresponding state values. Note that we could
|
||||
-- do this for _all_ registers, but this should make the summaries somewhat smaller.
|
||||
|
||||
-- Associate the demand sets for each potential argument register with the registers used
|
||||
-- by faddr.
|
||||
argRegs <- gets $ functionArgRegs . archDemandInfo
|
||||
recordBlockDemand lbl proc_state (DemandFunctionArg faddr) argRegs
|
||||
let regDemandSet (Some r) = registerDemandSet <$> valueUses (finalRegs^. boundValue r)
|
||||
let demandTypes = viewSome (DemandFunctionArg faddr) <$> argRegs
|
||||
demands <- withAssignmentCache $ traverse regDemandSet argRegs
|
||||
addBlockDemands lbl $ Map.fromList $ zip demandTypes demands
|
||||
_ -> do
|
||||
-- In the dynamic case, we just assume all arguments (FIXME: results?)
|
||||
argRegs <- gets $ functionArgRegs . archDemandInfo
|
||||
recordBlockDemand lbl proc_state (\_ -> DemandAlways) ([Some ip_reg] ++ argRegs)
|
||||
|
||||
do let demandedRegs = [Some ip_reg] ++ argRegs
|
||||
let regUses (Some r) = valueUses (finalRegs^. boundValue r)
|
||||
demands <- withAssignmentCache $ fmap registerDemandSet $ getAp $ foldMap (Ap . regUses) demandedRegs
|
||||
addBlockDemands lbl $ Map.singleton DemandAlways demands
|
||||
|
||||
-- | Return values that must be evaluated to execute side effects.
|
||||
stmtDemandedValues :: DemandContext arch
|
||||
@ -492,10 +523,10 @@ summarizeBlock :: forall arch ids
|
||||
-> ArchSegmentOff arch -- ^ Address of the code.
|
||||
-> StatementList arch ids -- ^ Current block
|
||||
-> FunctionArgsM arch ids ()
|
||||
summarizeBlock mem interp_state addr stmts = do
|
||||
summarizeBlock mem interpState addr stmts = do
|
||||
let lbl = GeneratedBlock addr (stmtsIdent stmts)
|
||||
-- Add this label to block demand map with empty set.
|
||||
blockDemandMap %= Map.insertWith demandMapUnion lbl mempty
|
||||
addBlockDemands lbl mempty
|
||||
|
||||
ctx <- gets $ demandInfoCtx . archDemandInfo
|
||||
-- Add all values demanded by non-terminal statements in list.
|
||||
@ -503,6 +534,67 @@ summarizeBlock mem interp_state addr stmts = do
|
||||
(stmtsNonterm stmts)
|
||||
-- Add values demanded by terminal statements
|
||||
case stmtsTerm stmts of
|
||||
ParsedCall finalRegs m_ret_addr -> do
|
||||
-- Record the demands based on the call, and add edges between
|
||||
-- this note and next nodes.
|
||||
case m_ret_addr of
|
||||
Nothing -> do
|
||||
summarizeCall mem lbl finalRegs True
|
||||
Just ret_addr -> do
|
||||
summarizeCall mem lbl finalRegs False
|
||||
addIntraproceduralJumpTarget interpState lbl ret_addr
|
||||
callRegs <- gets $ calleeSavedRegs . archDemandInfo
|
||||
recordBlockTransfer lbl finalRegs ([Some sp_reg] ++ Set.toList callRegs)
|
||||
|
||||
PLTStub regs _ _ -> do
|
||||
-- PLT Stubs demand all registers that could be function
|
||||
-- arguments, as well as any registers in regs.
|
||||
ainfo <- gets archDemandInfo
|
||||
let demandedRegs = Set.fromList (functionArgRegs ainfo)
|
||||
demands <- withAssignmentCache $ getAp $ foldMapF (Ap . valueUses) regs
|
||||
addBlockDemands lbl $ Map.singleton DemandAlways $
|
||||
registerDemandSet $ demands <> demandedRegs
|
||||
|
||||
ParsedJump procState tgtAddr -> do
|
||||
-- record all propagations
|
||||
recordBlockTransfer lbl procState archRegs
|
||||
addIntraproceduralJumpTarget interpState lbl tgtAddr
|
||||
|
||||
ParsedLookupTable finalRegs lookup_idx vec -> do
|
||||
demandValue lbl lookup_idx
|
||||
-- record all propagations
|
||||
recordBlockTransfer lbl finalRegs archRegs
|
||||
traverse_ (addIntraproceduralJumpTarget interpState lbl) vec
|
||||
|
||||
ParsedReturn finalRegs -> do
|
||||
retRegs <- gets $ functionRetRegs . archDemandInfo
|
||||
let demandTypes = viewSome DemandFunctionResult <$> retRegs
|
||||
let regDemandSet (Some r) = registerDemandSet <$> valueUses (finalRegs^.boundValue r)
|
||||
demands <- withAssignmentCache $ traverse regDemandSet retRegs
|
||||
addBlockDemands lbl $ Map.fromList $ zip demandTypes demands
|
||||
|
||||
|
||||
|
||||
ParsedIte c tblock fblock -> do
|
||||
-- Demand condition then summarize recursive blocks.
|
||||
demandValue lbl c
|
||||
summarizeBlock mem interpState addr tblock
|
||||
summarizeBlock mem interpState addr fblock
|
||||
|
||||
ParsedArchTermStmt tstmt finalRegs next_addr -> do
|
||||
-- Compute effects of terminal statement.
|
||||
ainfo <- gets $ archDemandInfo
|
||||
let e = computeArchTermStmtEffects ainfo tstmt finalRegs
|
||||
|
||||
-- Demand all registers the terminal statement demands.
|
||||
do let regUses (Some r) = valueUses (finalRegs^.boundValue r)
|
||||
demands <- withAssignmentCache $ fmap registerDemandSet $ getAp $
|
||||
foldMap (Ap . regUses) (termRegDemands e)
|
||||
addBlockDemands lbl $ Map.singleton DemandAlways demands
|
||||
|
||||
recordBlockTransfer lbl finalRegs (termRegTransfers e)
|
||||
traverse_ (addIntraproceduralJumpTarget interpState lbl) next_addr
|
||||
|
||||
ParsedTranslateError _ -> do
|
||||
-- We ignore demands for translate errors.
|
||||
pure ()
|
||||
@ -510,43 +602,6 @@ summarizeBlock mem interp_state addr stmts = do
|
||||
-- We ignore demands for classify failure.
|
||||
pure ()
|
||||
|
||||
ParsedIte c tblock fblock -> do
|
||||
-- Demand condition then summarize recursive blocks.
|
||||
demandValue lbl c
|
||||
summarizeBlock mem interp_state addr tblock
|
||||
summarizeBlock mem interp_state addr fblock
|
||||
|
||||
ParsedCall proc_state m_ret_addr -> do
|
||||
case m_ret_addr of
|
||||
Nothing -> do
|
||||
summarizeCall mem lbl proc_state True
|
||||
Just ret_addr -> do
|
||||
summarizeCall mem lbl proc_state False
|
||||
addIntraproceduralJumpTarget interp_state lbl ret_addr
|
||||
callRegs <- gets $ calleeSavedRegs . archDemandInfo
|
||||
recordBlockTransfer lbl proc_state ([Some sp_reg] ++ Set.toList callRegs)
|
||||
|
||||
ParsedJump proc_state tgt_addr -> do
|
||||
-- record all propagations
|
||||
recordBlockTransfer lbl proc_state archRegs
|
||||
addIntraproceduralJumpTarget interp_state lbl tgt_addr
|
||||
|
||||
ParsedReturn proc_state -> do
|
||||
retRegs <- gets $ functionRetRegs . archDemandInfo
|
||||
recordBlockDemand lbl proc_state DemandFunctionResult retRegs
|
||||
|
||||
ParsedArchTermStmt tstmt proc_state next_addr -> do
|
||||
effFn <- gets $ computeArchTermStmtEffects . archDemandInfo
|
||||
let e = effFn tstmt proc_state
|
||||
recordBlockDemand lbl proc_state (\_ -> DemandAlways) (termRegDemands e)
|
||||
recordBlockTransfer lbl proc_state (termRegTransfers e)
|
||||
traverse_ (addIntraproceduralJumpTarget interp_state lbl) next_addr
|
||||
|
||||
ParsedLookupTable proc_state lookup_idx vec -> do
|
||||
demandValue lbl lookup_idx
|
||||
-- record all propagations
|
||||
recordBlockTransfer lbl proc_state archRegs
|
||||
traverse_ (addIntraproceduralJumpTarget interp_state lbl) vec
|
||||
|
||||
-- | Explore states until we have reached end of frontier.
|
||||
summarizeIter :: ArchConstraints arch
|
||||
@ -575,7 +630,7 @@ calculateOnePred newDemands predLbl = do
|
||||
|
||||
-- update uses, returning value before this iteration
|
||||
seenDemands <- use (blockDemandMap . ix lbl')
|
||||
blockDemandMap . at lbl' .= Just (Map.unionWith mappend demands' seenDemands)
|
||||
addBlockDemands lbl' demands'
|
||||
-- seenDemands <- blockDemandMap . ix lbl' <<%= demandMapUnion demands'
|
||||
|
||||
|
||||
@ -683,10 +738,7 @@ doOneFunction archFns addrs ist0 acc ist = do
|
||||
-- recorded as a use, which is erroneous, so we strip out any
|
||||
-- reference to them here.
|
||||
callRegs <- gets $ calleeSavedRegs . archDemandInfo
|
||||
let calleeDemandSet = DemandSet { registerDemands =
|
||||
Set.insert (Some sp_reg) callRegs
|
||||
, functionResultDemands = mempty
|
||||
}
|
||||
let calleeDemandSet = registerDemandSet (Set.insert (Some sp_reg) callRegs)
|
||||
|
||||
return (Map.foldlWithKey' (decomposeMap calleeDemandSet addr) acc funDemands)
|
||||
|
||||
|
@ -45,13 +45,10 @@ type DisassembleFn arch
|
||||
. NonceGenerator (ST s) ids
|
||||
-> ArchSegmentOff arch
|
||||
-- ^ The offset to start reading from.
|
||||
-> RegState (ArchReg arch) (Value arch ids)
|
||||
-- ^ Initial values to use for registers.
|
||||
-> Int
|
||||
-- ^ Maximum offset for this to read from.
|
||||
-> AbsBlockState (ArchReg arch)
|
||||
-- ^ Abstract state associated with address that we are disassembling
|
||||
-- from.
|
||||
--
|
||||
-- This is used for things like the height of the x87 stack.
|
||||
-> ST s ([Block arch ids], Int, Maybe String)
|
||||
|
||||
-- | This records architecture specific functions for analysis.
|
||||
@ -64,6 +61,12 @@ data ArchitectureInfo arch
|
||||
-- ^ Architecture address width.
|
||||
, archEndianness :: !Endianness
|
||||
-- ^ The byte order values are stored in.
|
||||
, mkInitialRegsForBlock :: !(forall ids
|
||||
. ArchSegmentOff arch
|
||||
-> AbsBlockState (ArchReg arch)
|
||||
-> Either String (RegState (ArchReg arch) (Value arch ids)))
|
||||
-- ^ Use the abstract block state information to infer register
|
||||
-- values to use for disassembling from given address.
|
||||
, disassembleFn :: !(DisassembleFn arch)
|
||||
-- ^ Function for disasembling a block.
|
||||
, mkInitialAbsState :: !(Memory (RegAddrWidth (ArchReg arch))
|
||||
@ -89,7 +92,7 @@ data ArchitectureInfo arch
|
||||
-- ^ Update the abstract state after a function call returns
|
||||
, identifyCall :: forall ids
|
||||
. Memory (ArchAddrWidth arch)
|
||||
-> [Stmt arch ids]
|
||||
-> Seq (Stmt arch ids)
|
||||
-> RegState (ArchReg arch) (Value arch ids)
|
||||
-> Maybe (Seq (Stmt arch ids), ArchSegmentOff arch)
|
||||
-- ^ Function for recognizing call statements.
|
||||
@ -97,10 +100,25 @@ data ArchitectureInfo arch
|
||||
-- Given a memory state, list of statements, and final register
|
||||
-- state, the should determine if this is a call, and if so,
|
||||
-- return the statements with any action to push the return
|
||||
-- value to the stack removed, and provide the explicit return
|
||||
-- address that the function should return to.
|
||||
-- value to the stack removed, and provide the return address that
|
||||
-- the function should return to.
|
||||
|
||||
, checkForReturnAddr :: forall ids
|
||||
. RegState (ArchReg arch) (Value arch ids)
|
||||
-> AbsProcessorState (ArchReg arch) ids
|
||||
-> Bool
|
||||
-- ^ @checkForReturnAddr regs s@ returns true if the location
|
||||
-- where the return address is normally stored in regs when
|
||||
-- calling a function does indeed contain the abstract value
|
||||
-- associated with return addresses.
|
||||
--
|
||||
-- For x86 this checks if the address just above the stack is the
|
||||
-- return address. For ARM, this should check the link register.
|
||||
--
|
||||
-- This predicate is invoked when considering if a potential tail call
|
||||
-- is setup to return to the right location.
|
||||
, identifyReturn :: forall ids
|
||||
. [Stmt arch ids]
|
||||
. Seq (Stmt arch ids)
|
||||
-> RegState (ArchReg arch) (Value arch ids)
|
||||
-> AbsProcessorState (ArchReg arch) ids
|
||||
-> Maybe (Seq (Stmt arch ids))
|
||||
|
@ -55,6 +55,7 @@ module Data.Macaw.CFG.Core
|
||||
, mkRegStateM
|
||||
, traverseRegsWith
|
||||
, zipWithRegState
|
||||
, ppRegMap
|
||||
-- * Pretty printing
|
||||
, ppAssignId
|
||||
, ppLit
|
||||
@ -72,8 +73,6 @@ module Data.Macaw.CFG.Core
|
||||
, asStackAddrOffset
|
||||
-- * References
|
||||
, refsInValue
|
||||
, refsInApp
|
||||
, refsInAssignRhs
|
||||
-- ** Synonyms
|
||||
, ArchAddrValue
|
||||
, Data.Parameterized.TraversableFC.FoldableFC(..)
|
||||
@ -650,13 +649,16 @@ class PrettyRegValue r (f :: Type -> Kind.Type) where
|
||||
-- should be printed, and Nothing if the contents should be ignored.
|
||||
ppValueEq :: r tp -> f tp -> Maybe Doc
|
||||
|
||||
instance ( PrettyRegValue r f
|
||||
)
|
||||
=> Pretty (RegState r f) where
|
||||
pretty (RegState m) = bracketsep $ catMaybes (f <$> MapF.toList m)
|
||||
where f :: MapF.Pair r f -> Maybe Doc
|
||||
ppRegMap :: forall r v . PrettyRegValue r v => MapF.MapF r v -> Doc
|
||||
ppRegMap m = bracketsep $ catMaybes (f <$> MapF.toList m)
|
||||
where f :: MapF.Pair r v -> Maybe Doc
|
||||
f (MapF.Pair r v) = ppValueEq r v
|
||||
|
||||
|
||||
instance ( PrettyRegValue r f)
|
||||
=> Pretty (RegState r f) where
|
||||
pretty (RegState m) = ppRegMap m
|
||||
|
||||
instance ( PrettyRegValue r f
|
||||
)
|
||||
=> Show (RegState r f) where
|
||||
@ -696,7 +698,7 @@ data Stmt arch ids
|
||||
|
||||
ppStmt :: ArchConstraints arch
|
||||
=> (ArchAddrWord arch -> Doc)
|
||||
-- ^ Function for pretty printing an offset
|
||||
-- ^ Function for pretty printing an instruction address offset
|
||||
-> Stmt arch ids
|
||||
-> Doc
|
||||
ppStmt ppOff stmt =
|
||||
@ -706,7 +708,8 @@ ppStmt ppOff stmt =
|
||||
InstructionStart off mnem -> text "#" <+> ppOff off <+> text (Text.unpack mnem)
|
||||
Comment s -> text $ "# " ++ Text.unpack s
|
||||
ExecArchStmt s -> ppArchStmt (ppValue 10) s
|
||||
ArchState a m -> hang (length (show prefix)) (prefix PP.<> PP.semiBraces (MapF.foldrWithKey ppUpdate [] m))
|
||||
ArchState a m ->
|
||||
hang (length (show prefix)) (prefix PP.<> PP.semiBraces (MapF.foldrWithKey ppUpdate [] m))
|
||||
where
|
||||
ppAddr addr =
|
||||
case asAbsoluteAddr addr of
|
||||
@ -724,19 +727,3 @@ instance ArchConstraints arch => Show (Stmt arch ids) where
|
||||
refsInValue :: Value arch ids tp -> Set (Some (AssignId ids))
|
||||
refsInValue (AssignedValue (Assignment v _)) = Set.singleton (Some v)
|
||||
refsInValue _ = Set.empty
|
||||
|
||||
refsInApp :: App (Value arch ids) tp -> Set (Some (AssignId ids))
|
||||
refsInApp app = foldMapFC refsInValue app
|
||||
|
||||
refsInAssignRhs :: FoldableFC (ArchFn arch)
|
||||
=> AssignRhs arch (Value arch ids) tp
|
||||
-> Set (Some (AssignId ids))
|
||||
refsInAssignRhs rhs =
|
||||
case rhs of
|
||||
EvalApp v -> refsInApp v
|
||||
SetUndefined _ -> Set.empty
|
||||
ReadMem v _ -> refsInValue v
|
||||
CondReadMem _ c a d ->
|
||||
Set.union (refsInValue c) $
|
||||
Set.union (refsInValue a) (refsInValue d)
|
||||
EvalArchFn f _ -> foldMapFC refsInValue f
|
||||
|
@ -9,6 +9,7 @@ This provides information about code discovered in binaries.
|
||||
{-# LANGUAGE GADTs #-}
|
||||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE PatternGuards #-}
|
||||
{-# LANGUAGE ScopedTypeVariables #-}
|
||||
{-# LANGUAGE StandaloneDeriving #-}
|
||||
@ -18,6 +19,7 @@ module Data.Macaw.Discovery
|
||||
( -- * DiscoveryInfo
|
||||
State.DiscoveryState(..)
|
||||
, State.emptyDiscoveryState
|
||||
, State.trustedFunctionEntryPoints
|
||||
, State.AddrSymMap
|
||||
, State.funInfo
|
||||
, State.exploredFunctions
|
||||
@ -64,11 +66,15 @@ import Data.Foldable
|
||||
import Data.Map.Strict (Map)
|
||||
import qualified Data.Map.Strict as Map
|
||||
import Data.Maybe
|
||||
import Data.Monoid
|
||||
import Data.Parameterized.Classes
|
||||
import qualified Data.Parameterized.Map as MapF
|
||||
import Data.Parameterized.NatRepr
|
||||
import Data.Parameterized.Nonce
|
||||
import Data.Parameterized.Some
|
||||
import Data.Parameterized.TraversableF
|
||||
import Data.Sequence (Seq)
|
||||
import qualified Data.Sequence as Seq
|
||||
import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.Text as Text
|
||||
@ -198,6 +204,8 @@ addStatementListDemands sl = do
|
||||
case stmtsTerm sl of
|
||||
ParsedCall regs _ -> do
|
||||
traverseF_ addValueDemands regs
|
||||
PLTStub regs _ _ ->
|
||||
traverseF_ addValueDemands regs
|
||||
ParsedJump regs _ -> do
|
||||
traverseF_ addValueDemands regs
|
||||
ParsedLookupTable regs _idx _tbl -> do
|
||||
@ -642,8 +650,10 @@ resolveAsAbsoluteAddr :: forall w
|
||||
-> Maybe (MemAddr w)
|
||||
resolveAsAbsoluteAddr mem endianness l = addrWidthClass (memAddrWidth mem) $
|
||||
case l of
|
||||
[ByteRegion bs] -> do
|
||||
absoluteAddr <$> addrRead endianness bs
|
||||
[ByteRegion bs] ->
|
||||
case addrRead endianness bs of
|
||||
Just a -> pure $! absoluteAddr a
|
||||
Nothing -> error $ "internal: resolveAsAbsoluteAddr given short chunk list."
|
||||
[RelocationRegion r] -> do
|
||||
when (relocationIsRel r) $ Nothing
|
||||
case relocationSym r of
|
||||
@ -654,6 +664,8 @@ resolveAsAbsoluteAddr mem endianness l = addrWidthClass (memAddrWidth mem) $
|
||||
SegmentBaseAddr idx -> do
|
||||
seg <- Map.lookup idx (memSegmentIndexMap mem)
|
||||
pure $! segmentOffAddr seg (relocationOffset r)
|
||||
LoadBaseAddr -> do
|
||||
memBaseAddr mem
|
||||
_ -> Nothing
|
||||
|
||||
-- This function resolves jump table entries.
|
||||
@ -779,13 +791,15 @@ data ParseContext arch ids =
|
||||
-- ^ Entry addresses for known functions (e.g. from
|
||||
-- symbol information)
|
||||
--
|
||||
-- The discovery process will not create intra-procedural
|
||||
-- jumps to the entry points of new functions.
|
||||
-- The discovery process will not create
|
||||
-- intra-procedural jumps to the entry points of new
|
||||
-- functions.
|
||||
, pctxFunAddr :: !(ArchSegmentOff arch)
|
||||
-- ^ Address of function this block is being parsed as
|
||||
, pctxAddr :: !(ArchSegmentOff arch)
|
||||
-- ^ Address of the current block
|
||||
, pctxBlockMap :: !(Map Word64 (Block arch ids))
|
||||
-- ^ Map from block indices to block code at address.
|
||||
}
|
||||
|
||||
addrMemRepr :: ArchitectureInfo arch -> MemRepr (BVType (RegAddrWidth (ArchReg arch)))
|
||||
@ -825,67 +839,130 @@ addNewFunctionAddrs :: [ArchSegmentOff arch]
|
||||
addNewFunctionAddrs addrs =
|
||||
newFunctionAddrs %= (++addrs)
|
||||
|
||||
-- | @stripPLTRead assignId prev rest@ looks for a read of @assignId@
|
||||
-- from the end of @prev@, and if it finds it returns the
|
||||
-- concatenation of the instructione before the read in @prev@ and
|
||||
-- @rest@.
|
||||
--
|
||||
-- The read may appear before comment and @instructionStart@
|
||||
-- instructions, but otherwise must be at the end of prev
|
||||
stripPLTRead :: ArchConstraints arch
|
||||
=> AssignId ids tp -- ^ Identifier of write to remove
|
||||
-> Seq (Stmt arch ids)
|
||||
-> Seq (Stmt arch ids)
|
||||
-> Maybe (Seq (Stmt arch ids))
|
||||
stripPLTRead readId next rest =
|
||||
case Seq.viewr next of
|
||||
Seq.EmptyR -> Nothing
|
||||
prev Seq.:> lastStmt -> do
|
||||
let cont = stripPLTRead readId prev (lastStmt Seq.<| rest)
|
||||
case lastStmt of
|
||||
AssignStmt (Assignment stmtId rhs)
|
||||
| Just Refl <- testEquality readId stmtId -> Just (prev Seq.>< rest)
|
||||
-- Fail if the read to delete is used in later computations
|
||||
| Set.member (Some readId) (foldMapFC refsInValue rhs) ->
|
||||
Nothing
|
||||
| otherwise ->
|
||||
case rhs of
|
||||
EvalApp{} -> cont
|
||||
SetUndefined{} -> cont
|
||||
_ -> Nothing
|
||||
InstructionStart{} -> cont
|
||||
ArchState{} -> cont
|
||||
Comment{} -> cont
|
||||
_ -> Nothing
|
||||
|
||||
removeUnassignedRegs :: forall arch ids
|
||||
. RegisterInfo (ArchReg arch)
|
||||
=> RegState (ArchReg arch) (Value arch ids)
|
||||
-- ^ Initial register values
|
||||
-> RegState (ArchReg arch) (Value arch ids)
|
||||
-- ^ Final register values
|
||||
-> MapF.MapF (ArchReg arch) (Value arch ids)
|
||||
removeUnassignedRegs initRegs finalRegs =
|
||||
let keepReg :: forall tp . ArchReg arch tp -> Value arch ids tp -> Bool
|
||||
keepReg r finalVal
|
||||
| Just Refl <- testEquality r ip_reg = False
|
||||
| Just Refl <- testEquality initVal finalVal = False
|
||||
| otherwise = True
|
||||
where initVal = initRegs^.boundValue r
|
||||
in MapF.filterWithKey keepReg (regStateMap finalRegs)
|
||||
|
||||
-- | Return true if any value in structure contains the given
|
||||
-- identifier.
|
||||
containsAssignId :: forall t arch ids itp
|
||||
. FoldableF t
|
||||
=> AssignId ids itp
|
||||
-- ^ Forbidden assignment -- may not appear in terms.
|
||||
-> t (Value arch ids)
|
||||
-> Bool
|
||||
containsAssignId droppedAssign =
|
||||
let hasId :: forall tp . Value arch ids tp -> Any
|
||||
hasId v = Any (Set.member (Some droppedAssign) (refsInValue v))
|
||||
in getAny . foldMapF hasId
|
||||
|
||||
-- | This parses a block that ended with a fetch and execute instruction.
|
||||
parseFetchAndExecute :: forall arch ids
|
||||
. ParseContext arch ids
|
||||
-> Word64
|
||||
-- ^ Index of this block
|
||||
-> [Stmt arch ids]
|
||||
-> AbsProcessorState (ArchReg arch) ids
|
||||
-- ^ Registers prior to blocks being executed.
|
||||
-> RegState (ArchReg arch) (Value arch ids)
|
||||
-- ^ Initial register values
|
||||
-> Seq (Stmt arch ids)
|
||||
-> AbsProcessorState (ArchReg arch) ids
|
||||
-- ^ Abstract state of registers prior to blocks being executed.
|
||||
-> RegState (ArchReg arch) (Value arch ids)
|
||||
-- ^ Final register values
|
||||
-> State (ParseState arch ids) (StatementList arch ids, Word64)
|
||||
parseFetchAndExecute ctx idx stmts regs s = do
|
||||
parseFetchAndExecute ctx idx initRegs stmts absProcState finalRegs = do
|
||||
let mem = pctxMemory ctx
|
||||
let ainfo = pctxArchInfo ctx
|
||||
let absProcState' = absEvalStmts ainfo regs stmts
|
||||
let absProcState' = absEvalStmts ainfo absProcState stmts
|
||||
withArchConstraints ainfo $ do
|
||||
-- See if next statement appears to end with a call.
|
||||
-- We define calls as statements that end with a write that
|
||||
-- stores the pc to an address.
|
||||
-- Try to figure out what control flow statement we have.
|
||||
case () of
|
||||
_ | Just (Mux _ c t f) <- valueAsApp (s^.boundValue ip_reg) -> do
|
||||
-- The block ends with a Mux, so we turn this into a `ParsedIte` statement.
|
||||
_ | Just (Mux _ c t f) <- valueAsApp (finalRegs^.boundValue ip_reg) -> do
|
||||
mapM_ (recordWriteStmt ainfo mem absProcState') stmts
|
||||
|
||||
let l_regs = refineProcStateBounds c True $
|
||||
refineProcState c absTrue absProcState'
|
||||
let l_regs' = absEvalStmts ainfo l_regs stmts
|
||||
let lState = s & boundValue ip_reg .~ t
|
||||
let lState = finalRegs & boundValue ip_reg .~ t
|
||||
(tStmts,trueIdx) <-
|
||||
parseFetchAndExecute ctx (idx+1) [] l_regs' lState
|
||||
parseFetchAndExecute ctx (idx+1) initRegs Seq.empty l_regs' lState
|
||||
|
||||
let r_regs = refineProcStateBounds c False $
|
||||
refineProcState c absFalse absProcState'
|
||||
let r_regs' = absEvalStmts ainfo r_regs stmts
|
||||
let rState = s & boundValue ip_reg .~ f
|
||||
let rState = finalRegs & boundValue ip_reg .~ f
|
||||
|
||||
(fStmts,falseIdx) <-
|
||||
parseFetchAndExecute ctx trueIdx [] r_regs' rState
|
||||
parseFetchAndExecute ctx trueIdx initRegs Seq.empty r_regs' rState
|
||||
|
||||
let ret = StatementList { stmtsIdent = idx
|
||||
, stmtsNonterm = stmts
|
||||
, stmtsNonterm = toList stmts
|
||||
, stmtsTerm = ParsedIte c tStmts fStmts
|
||||
, stmtsAbsState = absProcState'
|
||||
}
|
||||
pure (ret, falseIdx)
|
||||
|
||||
-- The last statement was a call.
|
||||
-- Use architecture-specific callback to check if last statement was a call.
|
||||
-- Note that in some cases the call is known not to return, and thus
|
||||
-- this code will never jump to the return value.
|
||||
_ | Just (prev_stmts, ret) <- identifyCall ainfo mem stmts s -> do
|
||||
_ | Just (prev_stmts, ret) <- identifyCall ainfo mem stmts finalRegs -> do
|
||||
mapM_ (recordWriteStmt ainfo mem absProcState') prev_stmts
|
||||
let abst = finalAbsBlockState absProcState' s
|
||||
seq abst $ do
|
||||
let abst = finalAbsBlockState absProcState' finalRegs
|
||||
-- Merge caller return information
|
||||
intraJumpTargets %= ((ret, postCallAbsState ainfo abst ret):)
|
||||
seq abst $ intraJumpTargets %= ((ret, postCallAbsState ainfo abst ret):)
|
||||
-- Use the abstract domain to look for new code pointers for the current IP.
|
||||
addNewFunctionAddrs $
|
||||
identifyCallTargets mem abst s
|
||||
identifyCallTargets mem abst finalRegs
|
||||
-- Use the call-specific code to look for new IPs.
|
||||
|
||||
let r = StatementList { stmtsIdent = idx
|
||||
, stmtsNonterm = toList prev_stmts
|
||||
, stmtsTerm = ParsedCall s (Just ret)
|
||||
, stmtsTerm = ParsedCall finalRegs (Just ret)
|
||||
, stmtsAbsState = absProcState'
|
||||
}
|
||||
pure (r, idx+1)
|
||||
@ -901,19 +978,19 @@ parseFetchAndExecute ctx idx stmts regs s = do
|
||||
-- (e.g. ARM will clear the low bit in T32 mode or the low 2
|
||||
-- bits in A32 mode), so the actual detection process is
|
||||
-- deferred to architecture-specific functionality.
|
||||
| Just prev_stmts <- identifyReturn ainfo stmts s absProcState' -> do
|
||||
| Just prev_stmts <- identifyReturn ainfo stmts finalRegs absProcState' -> do
|
||||
mapM_ (recordWriteStmt ainfo mem absProcState') prev_stmts
|
||||
|
||||
let ret = StatementList { stmtsIdent = idx
|
||||
, stmtsNonterm = toList prev_stmts
|
||||
, stmtsTerm = ParsedReturn s
|
||||
, stmtsTerm = ParsedReturn finalRegs
|
||||
, stmtsAbsState = absProcState'
|
||||
}
|
||||
pure (ret, idx+1)
|
||||
|
||||
-- Jump to a block within this function.
|
||||
| Just tgt_mseg <- valueAsSegmentOff mem (s^.boundValue ip_reg)
|
||||
-- Check
|
||||
| Just tgt_mseg <- valueAsSegmentOff mem (finalRegs^.boundValue ip_reg)
|
||||
-- Check target block address is in executable segment.
|
||||
, segmentFlags (segoffSegment tgt_mseg) `Perm.hasPerm` Perm.execute
|
||||
|
||||
-- Check the target address is not the entry point of this function.
|
||||
@ -926,22 +1003,23 @@ parseFetchAndExecute ctx idx stmts regs s = do
|
||||
|
||||
mapM_ (recordWriteStmt ainfo mem absProcState') stmts
|
||||
-- Merge block state and add intra jump target.
|
||||
let abst = finalAbsBlockState absProcState' s
|
||||
let abst = finalAbsBlockState absProcState' finalRegs
|
||||
let abst' = abst & setAbsIP tgt_mseg
|
||||
intraJumpTargets %= ((tgt_mseg, abst'):)
|
||||
let ret = StatementList { stmtsIdent = idx
|
||||
, stmtsNonterm = stmts
|
||||
, stmtsTerm = ParsedJump s tgt_mseg
|
||||
, stmtsNonterm = toList stmts
|
||||
, stmtsTerm = ParsedJump finalRegs tgt_mseg
|
||||
, stmtsAbsState = absProcState'
|
||||
}
|
||||
pure (ret, idx+1)
|
||||
|
||||
-- Block ends with what looks like a jump table.
|
||||
| Just (_jt, entries, jumpIndex) <- matchJumpTableRef mem absProcState' (s^.curIP) -> do
|
||||
| Just (_jt, entries, jumpIndex) <- matchJumpTableRef mem absProcState' (finalRegs^.curIP) -> do
|
||||
|
||||
mapM_ (recordWriteStmt ainfo mem absProcState') stmts
|
||||
|
||||
let abst :: AbsBlockState (ArchReg arch)
|
||||
abst = finalAbsBlockState absProcState' s
|
||||
abst = finalAbsBlockState absProcState' finalRegs
|
||||
|
||||
seq abst $ do
|
||||
|
||||
@ -949,35 +1027,51 @@ parseFetchAndExecute ctx idx stmts regs s = do
|
||||
let abst' = abst & setAbsIP tgtAddr
|
||||
intraJumpTargets %= ((tgtAddr, abst'):)
|
||||
|
||||
let term = ParsedLookupTable s jumpIndex entries
|
||||
let term = ParsedLookupTable finalRegs jumpIndex entries
|
||||
let ret = StatementList { stmtsIdent = idx
|
||||
, stmtsNonterm = stmts
|
||||
, stmtsNonterm = toList stmts
|
||||
, stmtsTerm = term
|
||||
, stmtsAbsState = absProcState'
|
||||
}
|
||||
pure (ret,idx+1)
|
||||
|
||||
-- Check for tail call when the stack pointer points to the return address.
|
||||
--
|
||||
-- TODO: this makes sense for x86, but is not correct for all architectures
|
||||
| ptrType <- addrMemRepr ainfo
|
||||
, sp_val <- s^.boundValue sp_reg
|
||||
, ReturnAddr <- absEvalReadMem absProcState' sp_val ptrType -> do
|
||||
finishWithTailCall absProcState'
|
||||
-- Code for PLT entry
|
||||
_ | 0 <- idx
|
||||
, AssignedValue (Assignment valId v) <- finalRegs^.boundValue ip_reg
|
||||
, ReadMem gotVal _repr <- v
|
||||
, Just gotAddr <- valueAsMemAddr gotVal
|
||||
, Just gotSegOff <- asSegmentOff mem gotAddr
|
||||
, Right chunks <- segoffContentsAfter gotSegOff
|
||||
, RelocationRegion r:_ <- chunks
|
||||
-- Check the relocation is a jump slot.
|
||||
, relocationJumpSlot r
|
||||
, Just strippedStmts <- stripPLTRead valId stmts Seq.empty
|
||||
, strippedRegs <- removeUnassignedRegs initRegs finalRegs
|
||||
, not (containsAssignId valId strippedRegs) -> do
|
||||
|
||||
-- Is this a jump to a known function entry? We're already past the
|
||||
-- "identifyCall" case, so this must be a tail call, assuming we trust our
|
||||
-- known function entry info.
|
||||
| Just tgt_mseg <- valueAsSegmentOff mem (s^.boundValue ip_reg)
|
||||
, tgt_mseg `Set.member` pctxKnownFnEntries ctx -> do
|
||||
mapM_ (recordWriteStmt ainfo mem absProcState') strippedStmts
|
||||
let ret = StatementList { stmtsIdent = idx
|
||||
, stmtsNonterm = toList strippedStmts
|
||||
, stmtsTerm = PLTStub strippedRegs gotSegOff r
|
||||
, stmtsAbsState = absEvalStmts ainfo absProcState strippedStmts
|
||||
}
|
||||
pure (ret, idx+1)
|
||||
|
||||
-- Check for tail call when the calling convention seems to be satisfied.
|
||||
| spVal <- finalRegs^.boundValue sp_reg
|
||||
-- Check to see if the stack pointer points to an offset of the initial stack.
|
||||
, StackOffset _ offsets <- transferValue absProcState' spVal
|
||||
-- Stack stack is back to height when function was called.
|
||||
, offsets == Set.singleton 0
|
||||
, checkForReturnAddr ainfo finalRegs absProcState' -> do
|
||||
finishWithTailCall absProcState'
|
||||
|
||||
-- Block that ends with some unknown
|
||||
| otherwise -> do
|
||||
mapM_ (recordWriteStmt ainfo mem absProcState') stmts
|
||||
let ret = StatementList { stmtsIdent = idx
|
||||
, stmtsNonterm = stmts
|
||||
, stmtsTerm = ClassifyFailure s
|
||||
, stmtsNonterm = toList stmts
|
||||
, stmtsTerm = ClassifyFailure finalRegs
|
||||
, stmtsAbsState = absProcState'
|
||||
}
|
||||
pure (ret,idx+1)
|
||||
@ -990,16 +1084,15 @@ parseFetchAndExecute ctx idx stmts regs s = do
|
||||
mapM_ (recordWriteStmt (pctxArchInfo ctx) mem absProcState') stmts
|
||||
|
||||
-- Compute final state
|
||||
let abst = finalAbsBlockState absProcState' s
|
||||
let abst = finalAbsBlockState absProcState' finalRegs
|
||||
seq abst $ do
|
||||
|
||||
-- Look for new instruction pointers
|
||||
addNewFunctionAddrs $
|
||||
identifyConcreteAddresses mem (abst^.absRegState^.curIP)
|
||||
|
||||
let ret = StatementList { stmtsIdent = idx
|
||||
, stmtsNonterm = stmts
|
||||
, stmtsTerm = ParsedCall s Nothing
|
||||
, stmtsNonterm = toList stmts
|
||||
, stmtsTerm = ParsedCall finalRegs Nothing
|
||||
, stmtsAbsState = absProcState'
|
||||
}
|
||||
seq ret $ pure (ret,idx+1)
|
||||
@ -1010,12 +1103,14 @@ parseBlock :: ParseContext arch ids
|
||||
-- ^ Context for parsing blocks.
|
||||
-> Word64
|
||||
-- ^ Index for next statements
|
||||
-> RegState (ArchReg arch) (Value arch ids)
|
||||
-- ^ Initial register values
|
||||
-> Block arch ids
|
||||
-- ^ Block to parse
|
||||
-> AbsProcessorState (ArchReg arch) ids
|
||||
-- ^ Abstract state at start of block
|
||||
-> State (ParseState arch ids) (StatementList arch ids, Word64)
|
||||
parseBlock ctx idx b regs = do
|
||||
parseBlock ctx idx initRegs b absProcState = do
|
||||
let mem = pctxMemory ctx
|
||||
let ainfo = pctxArchInfo ctx
|
||||
withArchConstraints ainfo $ do
|
||||
@ -1023,7 +1118,7 @@ parseBlock ctx idx b regs = do
|
||||
Branch c lb rb -> do
|
||||
let blockMap = pctxBlockMap ctx
|
||||
-- FIXME: we should propagate c back to the initial block, not just b
|
||||
let absProcState' = absEvalStmts ainfo regs (blockStmts b)
|
||||
let absProcState' = absEvalStmts ainfo absProcState (blockStmts b)
|
||||
mapM_ (recordWriteStmt ainfo mem absProcState') (blockStmts b)
|
||||
|
||||
let Just l = Map.lookup lb blockMap
|
||||
@ -1034,8 +1129,8 @@ parseBlock ctx idx b regs = do
|
||||
let l_regs' = absEvalStmts ainfo l_regs (blockStmts b)
|
||||
let r_regs' = absEvalStmts ainfo r_regs (blockStmts b)
|
||||
|
||||
(parsedTrueBlock,trueIdx) <- parseBlock ctx (idx+1) l l_regs'
|
||||
(parsedFalseBlock,falseIdx) <- parseBlock ctx trueIdx r r_regs'
|
||||
(parsedTrueBlock,trueIdx) <- parseBlock ctx (idx+1) initRegs l l_regs'
|
||||
(parsedFalseBlock,falseIdx) <- parseBlock ctx trueIdx initRegs r r_regs'
|
||||
|
||||
let ret = StatementList { stmtsIdent = idx
|
||||
, stmtsNonterm = blockStmts b
|
||||
@ -1044,13 +1139,13 @@ parseBlock ctx idx b regs = do
|
||||
}
|
||||
pure (ret, falseIdx)
|
||||
|
||||
FetchAndExecute s -> do
|
||||
parseFetchAndExecute ctx idx (blockStmts b) regs s
|
||||
FetchAndExecute finalRegs -> do
|
||||
parseFetchAndExecute ctx idx initRegs (Seq.fromList (blockStmts b)) absProcState finalRegs
|
||||
|
||||
-- Do nothing when this block ends in a translation error.
|
||||
TranslateError _ msg -> do
|
||||
-- FIXME: we should propagate c back to the initial block, not just b
|
||||
let absProcState' = absEvalStmts ainfo regs (blockStmts b)
|
||||
let absProcState' = absEvalStmts ainfo absProcState (blockStmts b)
|
||||
|
||||
let ret = StatementList { stmtsIdent = idx
|
||||
, stmtsNonterm = blockStmts b
|
||||
@ -1060,7 +1155,7 @@ parseBlock ctx idx b regs = do
|
||||
pure (ret, idx+1)
|
||||
ArchTermStmt ts s -> do
|
||||
-- FIXME: we should propagate c back to the initial block, not just b
|
||||
let absProcState' = absEvalStmts ainfo regs (blockStmts b)
|
||||
let absProcState' = absEvalStmts ainfo absProcState (blockStmts b)
|
||||
mapM_ (recordWriteStmt ainfo mem absProcState') (blockStmts b)
|
||||
let abst = finalAbsBlockState absProcState' s
|
||||
-- Compute possible next IPS.
|
||||
@ -1082,12 +1177,13 @@ addBlocks :: ArchSegmentOff arch
|
||||
-- ^ Address of theze blocks
|
||||
-> FoundAddr arch
|
||||
-- ^ State leading to explore block
|
||||
-> RegState (ArchReg arch) (Value arch ids)
|
||||
-> Int
|
||||
-- ^ Number of blocks covered
|
||||
-> Map Word64 (Block arch ids)
|
||||
-- ^ Map from labelIndex to associated block
|
||||
-> FunM arch s ids ()
|
||||
addBlocks src finfo sz blockMap =
|
||||
addBlocks src finfo initRegs sz blockMap =
|
||||
case Map.lookup 0 blockMap of
|
||||
Nothing -> do
|
||||
error $ "addBlocks given empty blockRegion."
|
||||
@ -1097,19 +1193,9 @@ addBlocks src finfo sz blockMap =
|
||||
funAddr <- gets curFunAddr
|
||||
s <- use curFunCtx
|
||||
|
||||
-- Combine entries of functions we've discovered thus far with
|
||||
-- undiscovered functions with entries marked InitAddr, which we assume is
|
||||
-- info we know from the symbol table or some other reliable source, and
|
||||
-- pass in. Only used in analysis if pctxTrustKnownFns is True.
|
||||
let knownFns =
|
||||
if s^.trustKnownFns then
|
||||
Set.union (Map.keysSet $ s^.funInfo)
|
||||
(Map.keysSet $ Map.filter (== InitAddr) $ s^.unexploredFunctions)
|
||||
else
|
||||
Set.empty
|
||||
let ctx = ParseContext { pctxMemory = memory s
|
||||
, pctxArchInfo = archInfo s
|
||||
, pctxKnownFnEntries = knownFns
|
||||
, pctxKnownFnEntries = s^.trustedFunctionEntryPoints
|
||||
, pctxFunAddr = funAddr
|
||||
, pctxAddr = src
|
||||
, pctxBlockMap = blockMap
|
||||
@ -1118,7 +1204,7 @@ addBlocks src finfo sz blockMap =
|
||||
, _intraJumpTargets = []
|
||||
, _newFunctionAddrs = []
|
||||
}
|
||||
let ((pblock,_), ps) = runState (parseBlock ctx 0 b regs) ps0
|
||||
let ((pblock,_), ps) = runState (parseBlock ctx 0 initRegs b regs) ps0
|
||||
let pb = ParsedBlock { pblockAddr = src
|
||||
, blockSize = sz
|
||||
, blockReason = foundReason finfo
|
||||
@ -1131,6 +1217,26 @@ addBlocks src finfo sz blockMap =
|
||||
. markAddrsAsFunction (CallTarget src) (ps^.newFunctionAddrs)
|
||||
mapM_ (\(addr, abs_state) -> mergeIntraJump src abs_state addr) (ps^.intraJumpTargets)
|
||||
|
||||
-- | Record an error block with no statements for the given address.
|
||||
recordErrorBlock :: ArchSegmentOff arch -> FoundAddr arch -> Maybe String -> FunM arch s ids ()
|
||||
recordErrorBlock addr finfo maybeError = do
|
||||
s <- use curFunCtx
|
||||
let mem = memory s
|
||||
let errMsg = maybe "Unknown error" Text.pack maybeError
|
||||
let stmts = StatementList
|
||||
{ stmtsIdent = 0
|
||||
, stmtsNonterm = []
|
||||
, stmtsTerm = ParsedTranslateError errMsg
|
||||
, stmtsAbsState = initAbsProcessorState mem (foundAbstractState finfo)
|
||||
}
|
||||
let pb = ParsedBlock { pblockAddr = addr
|
||||
, blockSize = 0
|
||||
, blockReason = foundReason finfo
|
||||
, blockAbstractState = foundAbstractState finfo
|
||||
, blockStatementList = stmts
|
||||
}
|
||||
id %= addFunBlock addr pb
|
||||
|
||||
transfer :: ArchSegmentOff arch -> FunM arch s ids ()
|
||||
transfer addr = do
|
||||
s <- use curFunCtx
|
||||
@ -1149,8 +1255,16 @@ transfer addr = do
|
||||
Just (next,_) | Just o <- diffSegmentOff next addr -> fromInteger o
|
||||
_ -> fromInteger (segoffBytesLeft addr)
|
||||
let ab = foundAbstractState finfo
|
||||
(bs0, sz, maybeError) <- liftST $ disassembleFn ainfo nonceGen addr maxSize ab
|
||||
|
||||
case mkInitialRegsForBlock ainfo addr ab of
|
||||
Left msg -> do
|
||||
recordErrorBlock addr finfo (Just msg)
|
||||
Right initRegs -> do
|
||||
(bs0, sz, maybeError) <- liftST $ disassembleFn ainfo nonceGen addr initRegs maxSize
|
||||
-- If no blocks are returned, then we just add an empty parsed block.
|
||||
if null bs0 then do
|
||||
recordErrorBlock addr finfo maybeError
|
||||
else do
|
||||
-- Rewrite returned blocks to simplify expressions
|
||||
#ifdef USE_REWRITER
|
||||
bs1 <- do
|
||||
let archStmt = rewriteArchStmt ainfo
|
||||
@ -1161,31 +1275,11 @@ transfer addr = do
|
||||
#else
|
||||
bs1 <- pure bs0
|
||||
#endif
|
||||
|
||||
-- If no blocks are returned, then we just add an empty parsed block.
|
||||
if null bs1 then do
|
||||
let errMsg = Text.pack $ fromMaybe "Unknown error" maybeError
|
||||
let stmts = StatementList
|
||||
{ stmtsIdent = 0
|
||||
, stmtsNonterm = []
|
||||
, stmtsTerm = ParsedTranslateError errMsg
|
||||
, stmtsAbsState = initAbsProcessorState mem (foundAbstractState finfo)
|
||||
}
|
||||
let pb = ParsedBlock { pblockAddr = addr
|
||||
, blockSize = sz
|
||||
, blockReason = foundReason finfo
|
||||
, blockAbstractState = foundAbstractState finfo
|
||||
, blockStatementList = stmts
|
||||
}
|
||||
id %= addFunBlock addr pb
|
||||
else do
|
||||
-- Rewrite returned blocks to simplify expressions
|
||||
|
||||
-- Compute demand set
|
||||
let bs = bs1 -- eliminateDeadStmts ainfo bs1
|
||||
-- Call transfer blocks to calculate parsedblocks
|
||||
let blockMap = Map.fromList [ (blockLabel b, b) | b <- bs ]
|
||||
addBlocks addr finfo sz blockMap
|
||||
addBlocks addr finfo initRegs sz blockMap
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Main loop
|
||||
@ -1347,8 +1441,8 @@ cfgFromAddrs ::
|
||||
--
|
||||
-- Each entry contains an address and the value stored in it.
|
||||
-> DiscoveryState arch
|
||||
cfgFromAddrs ainfo mem symbols =
|
||||
cfgFromAddrsAndState (emptyDiscoveryState mem symbols ainfo)
|
||||
cfgFromAddrs ainfo mem addrSymMap =
|
||||
cfgFromAddrsAndState (emptyDiscoveryState mem addrSymMap ainfo)
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Resolve functions with logging
|
||||
|
@ -88,8 +88,9 @@ absEvalStmt info stmt = withArchConstraints info $
|
||||
pure ()
|
||||
|
||||
-- This takes a processor state and updates it based on executing each statement.
|
||||
absEvalStmts :: ArchitectureInfo arch
|
||||
absEvalStmts :: Foldable t
|
||||
=> ArchitectureInfo arch
|
||||
-> AbsProcessorState (ArchReg arch) ids
|
||||
-> [Stmt arch ids]
|
||||
-> t (Stmt arch ids)
|
||||
-> AbsProcessorState (ArchReg arch) ids
|
||||
absEvalStmts info r stmts = execState (mapM_ (absEvalStmt info) stmts) r
|
||||
|
@ -10,6 +10,7 @@ discovery.
|
||||
{-# LANGUAGE FlexibleContexts #-}
|
||||
{-# LANGUAGE GADTs #-}
|
||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE PatternGuards #-}
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
{-# LANGUAGE ScopedTypeVariables #-}
|
||||
@ -33,7 +34,7 @@ module Data.Macaw.Discovery.State
|
||||
, globalDataMap
|
||||
, funInfo
|
||||
, unexploredFunctions
|
||||
, trustKnownFns
|
||||
, trustedFunctionEntryPoints
|
||||
, exploreFnPred
|
||||
-- * DiscoveryFunInfo
|
||||
, DiscoveryFunInfo(..)
|
||||
@ -50,7 +51,10 @@ import qualified Data.ByteString.Char8 as BSC
|
||||
import Data.Map.Strict (Map)
|
||||
import qualified Data.Map.Strict as Map
|
||||
import Data.Parameterized.Classes
|
||||
import qualified Data.Parameterized.Map as MapF
|
||||
import Data.Parameterized.Some
|
||||
import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as Text
|
||||
import qualified Data.Vector as V
|
||||
@ -131,6 +135,18 @@ data ParsedTermStmt arch ids
|
||||
-- | A call with the current register values and location to return to or 'Nothing' if this is a tail call.
|
||||
= ParsedCall !(RegState (ArchReg arch) (Value arch ids))
|
||||
!(Maybe (ArchSegmentOff arch))
|
||||
-- | @PLTStub regs addr@ denotes a terminal statement that has been identified as a PLT stub
|
||||
-- for calling the given relocation.
|
||||
--
|
||||
-- This is a special case of a tail call. It has been added
|
||||
-- separately because it occurs frequently in dynamically linked
|
||||
-- code, and we can use this to recognize PLT stubs.
|
||||
--
|
||||
-- The register set only contains registers that were changed in
|
||||
-- the function. Other registers have the initial value.
|
||||
| PLTStub !(MapF.MapF (ArchReg arch) (Value arch ids))
|
||||
!(ArchSegmentOff arch)
|
||||
!(Relocation (ArchAddrWidth arch))
|
||||
-- | A jump to an explicit address within a function.
|
||||
| ParsedJump !(RegState (ArchReg arch) (Value arch ids)) !(ArchSegmentOff arch)
|
||||
-- | A lookup table that branches to one of a vector of addresses.
|
||||
@ -172,11 +188,14 @@ ppTermStmt :: ArchConstraints arch
|
||||
ppTermStmt ppOff tstmt =
|
||||
case tstmt of
|
||||
ParsedCall s Nothing ->
|
||||
text "tail call" <$$>
|
||||
text "tail_call" <$$>
|
||||
indent 2 (pretty s)
|
||||
ParsedCall s (Just next) ->
|
||||
text "call and return to" <+> text (show next) <$$>
|
||||
indent 2 (pretty s)
|
||||
PLTStub regs addr r ->
|
||||
text "call_via_got" <+> text (show (relocationSym r)) <+> "(at" <+> text (show addr) PP.<> ")" <$$>
|
||||
indent 2 (ppRegMap regs)
|
||||
ParsedJump s addr ->
|
||||
text "jump" <+> text (show addr) <$$>
|
||||
indent 2 (pretty s)
|
||||
@ -279,7 +298,7 @@ parsedBlocks = lens _parsedBlocks (\s v -> s { _parsedBlocks = v })
|
||||
instance ArchConstraints arch => Pretty (DiscoveryFunInfo arch ids) where
|
||||
pretty info =
|
||||
text "function" <+> text (BSC.unpack (discoveredFunName info))
|
||||
<+> pretty "@" <+> pretty (show (discoveredFunAddr info))
|
||||
<+> "@" <+> pretty (show (discoveredFunAddr info))
|
||||
<$$>
|
||||
vcat (pretty <$> Map.elems (info^.parsedBlocks))
|
||||
|
||||
@ -310,9 +329,20 @@ data DiscoveryState arch
|
||||
-- they are analyzed.
|
||||
--
|
||||
-- The keys in this map and `_funInfo` should be mutually disjoint.
|
||||
, _trustKnownFns :: !Bool
|
||||
-- ^ Should we use and depend on known function entries in
|
||||
-- our analysis? E.g. used to distinguish jumps vs. tail calls
|
||||
, _trustedFunctionEntryPoints :: !(Set (ArchSegmentOff arch))
|
||||
-- ^ This is the set of addresses that we treat
|
||||
-- as definitely belonging to function entry
|
||||
-- points.
|
||||
--
|
||||
-- The discovery process will not allow
|
||||
-- intra-procedural jumps to these addresses.
|
||||
-- Jumps to these addresses must either be calls
|
||||
-- or tail calls.
|
||||
--
|
||||
-- To ensure translation is invariant on the
|
||||
-- order in which functions are visited, this
|
||||
-- set should be initialized upfront, and not
|
||||
-- changed.
|
||||
, _exploreFnPred :: Maybe (ArchSegmentOff arch -> Bool)
|
||||
-- ^ if present, this predicate decides whether to explore
|
||||
-- a function at the given address or not
|
||||
@ -342,15 +372,15 @@ emptyDiscoveryState :: Memory (ArchAddrWidth arch)
|
||||
-> ArchitectureInfo arch
|
||||
-- ^ architecture/OS specific information
|
||||
-> DiscoveryState arch
|
||||
emptyDiscoveryState mem symbols info =
|
||||
emptyDiscoveryState mem addrSymMap info =
|
||||
DiscoveryState
|
||||
{ memory = mem
|
||||
, symbolNames = symbols
|
||||
, symbolNames = addrSymMap
|
||||
, archInfo = info
|
||||
, _globalDataMap = Map.empty
|
||||
, _funInfo = Map.empty
|
||||
, _unexploredFunctions = Map.empty
|
||||
, _trustKnownFns = False
|
||||
, _trustedFunctionEntryPoints = Map.keysSet addrSymMap
|
||||
, _exploreFnPred = Nothing
|
||||
}
|
||||
|
||||
@ -368,8 +398,10 @@ unexploredFunctions = lens _unexploredFunctions (\s v -> s { _unexploredFunction
|
||||
funInfo :: Simple Lens (DiscoveryState arch) (Map (ArchSegmentOff arch) (Some (DiscoveryFunInfo arch)))
|
||||
funInfo = lens _funInfo (\s v -> s { _funInfo = v })
|
||||
|
||||
trustKnownFns :: Simple Lens (DiscoveryState arch) Bool
|
||||
trustKnownFns = lens _trustKnownFns (\s v -> s { _trustKnownFns = v })
|
||||
trustedFunctionEntryPoints :: Simple Lens (DiscoveryState arch) (Set (ArchSegmentOff arch))
|
||||
trustedFunctionEntryPoints =
|
||||
lens _trustedFunctionEntryPoints
|
||||
(\s v -> s { _trustedFunctionEntryPoints = v })
|
||||
|
||||
exploreFnPred :: Simple Lens (DiscoveryState arch) (Maybe (ArchSegmentOff arch -> Bool))
|
||||
exploreFnPred = lens _exploreFnPred (\s v -> s { _exploreFnPred = v })
|
||||
|
@ -53,8 +53,14 @@ emptyValueFold =
|
||||
|
||||
-- | This folds over elements of a values in a values.
|
||||
--
|
||||
-- It memoizes values so that it only evaluates assignments with the same id
|
||||
-- once.
|
||||
-- It memoizes the results so that if an assignment is visited
|
||||
-- multiple times, we only visit the children the first time it is
|
||||
-- visited. On subsequent visits, `foldAssign` will still be called,
|
||||
-- but the children will not be revisited.
|
||||
--
|
||||
-- This makes the total time to visit linear with respect to the
|
||||
-- number of children, but still allows determining whether a term is
|
||||
-- shared.
|
||||
foldValueCached :: forall r arch ids tp
|
||||
. (Monoid r, FoldableFC (ArchFn arch))
|
||||
=> ValueFold arch ids r
|
||||
|
@ -19,15 +19,18 @@ module Data.Macaw.Memory
|
||||
, memAddrWidth
|
||||
, memWidth
|
||||
, memSegments
|
||||
, memSectionIndexMap
|
||||
, memSegmentIndexMap
|
||||
, memAsAddrPairs
|
||||
-- * Constructing memory
|
||||
, emptyMemory
|
||||
, insertMemSegment
|
||||
, InsertError(..)
|
||||
, showInsertError
|
||||
-- * Load values
|
||||
, memBaseAddr
|
||||
, memSetBaseAddr
|
||||
, memBindSectionIndex
|
||||
, memSectionIndexMap
|
||||
, memSegmentIndexMap
|
||||
, memBindSegmentIndex
|
||||
-- * Memory segments
|
||||
, MemSegment
|
||||
@ -52,6 +55,7 @@ module Data.Macaw.Memory
|
||||
, memWord
|
||||
, memWordToUnsigned
|
||||
, memWordToSigned
|
||||
, addrRead
|
||||
-- * Addresses
|
||||
, MemAddr(..)
|
||||
, absoluteAddr
|
||||
@ -272,7 +276,7 @@ bsWord64 LittleEndian = bsWord64le
|
||||
--
|
||||
-- Operations on it require the `MemWidth` constraint to be satisfied, so in practice
|
||||
-- this only works for 32 and 64-bit values.
|
||||
newtype MemWord (w :: Nat) = MemWord { _memWordValue :: Word64 }
|
||||
newtype MemWord (w :: Nat) = MemWord { memWordValue :: Word64 }
|
||||
|
||||
-- | Convert word64 @x@ into mem word @x mod 2^w-1@.
|
||||
memWord :: forall w . MemWidth w => Word64 -> MemWord w
|
||||
@ -317,11 +321,18 @@ class (1 <= w) => MemWidth w where
|
||||
-- | Read an address with the given endianess.
|
||||
--
|
||||
-- This returns nothing if the bytestring is too short.
|
||||
addrRead :: Endianness -> BS.ByteString -> Maybe (MemWord w)
|
||||
addrRead :: forall w . MemWidth w => Endianness -> BS.ByteString -> Maybe (MemWord w)
|
||||
addrRead e s =
|
||||
case addrWidthRepr (Proxy :: Proxy w) of
|
||||
Addr32 | BS.length s < 4 -> Nothing
|
||||
| otherwise -> Just $ MemWord $ fromIntegral $ bsWord32 e s
|
||||
Addr64 | BS.length s < 8 -> Nothing
|
||||
| otherwise -> Just $ MemWord $ bsWord64 e s
|
||||
|
||||
|
||||
-- | Return the value represented by the MemWord as an unsigned integer.
|
||||
memWordToUnsigned :: MemWord w -> Integer
|
||||
memWordToUnsigned = fromIntegral . _memWordValue
|
||||
memWordToUnsigned = fromIntegral . memWordValue
|
||||
|
||||
-- | Treat the word as a signed integer.
|
||||
memWordToSigned :: MemWidth w => MemWord w -> Integer
|
||||
@ -388,18 +399,12 @@ instance MemWidth 32 where
|
||||
addrRotate (MemWord w) i =
|
||||
MemWord (fromIntegral ((fromIntegral w :: Word32) `rotate` i))
|
||||
addrSize _ = 4
|
||||
addrRead e s
|
||||
| BS.length s < 4 = Nothing
|
||||
| otherwise = Just $ MemWord $ fromIntegral $ bsWord32 e s
|
||||
|
||||
instance MemWidth 64 where
|
||||
addrWidthRepr _ = Addr64
|
||||
addrWidthMask _ = 0xffffffffffffffff
|
||||
addrRotate (MemWord w) i = MemWord (w `rotate` i)
|
||||
addrSize _ = 8
|
||||
addrRead e s
|
||||
| BS.length s < 8 = Nothing
|
||||
| otherwise = Just $ MemWord $ bsWord64 e s
|
||||
|
||||
-- | Number of bytes in an address
|
||||
addrWidthClass :: AddrWidthRepr w -> (MemWidth w => a) -> a
|
||||
@ -471,6 +476,15 @@ data Relocation w
|
||||
, relocationEndianness :: !Endianness
|
||||
-- ^ The byte order used to encode the relocation in
|
||||
-- memory.
|
||||
, relocationJumpSlot :: !Bool
|
||||
-- ^ Returns true if this is a jump slot relocation.
|
||||
--
|
||||
-- This relocation is specifically used for global
|
||||
-- offset table entries, and are typically resolved
|
||||
-- when the function is first called rather than at
|
||||
-- load time. The address will be initially the
|
||||
-- entry sequence stub, and will be updated once
|
||||
-- resolved by the stub.
|
||||
}
|
||||
|
||||
-- | Short encoding of endianness for relocation pretty printing
|
||||
@ -817,6 +831,8 @@ data Memory w = Memory { memAddrWidth :: !(AddrWidthRepr w)
|
||||
-- ^ Map from registered section indices to the segment offset it is loaded at.
|
||||
, memSegmentIndexMap :: !(Map SegmentIndex (MemSegment w))
|
||||
-- ^ Map from registered segment indices to associated segment.
|
||||
, memBaseAddr :: !(Maybe (MemAddr w))
|
||||
-- ^ This denotes the base region for loads.
|
||||
}
|
||||
|
||||
-- | Return the set of memory segments in memory.
|
||||
@ -846,12 +862,17 @@ memBindSegmentIndex idx seg mem
|
||||
| otherwise =
|
||||
mem { memSegmentIndexMap = Map.insert idx seg (memSegmentIndexMap mem) }
|
||||
|
||||
-- | Set the region index used or the load addresses.
|
||||
memSetBaseAddr :: MemAddr w -> Memory w -> Memory w
|
||||
memSetBaseAddr r m = m { memBaseAddr = Just r }
|
||||
|
||||
-- | A memory with no segments.
|
||||
emptyMemory :: AddrWidthRepr w -> Memory w
|
||||
emptyMemory w = Memory { memAddrWidth = w
|
||||
, memSegmentMap = Map.empty
|
||||
, memSectionIndexMap = Map.empty
|
||||
, memSegmentIndexMap = Map.empty
|
||||
, memBaseAddr = Nothing
|
||||
}
|
||||
|
||||
-- | Return segments with executable permissions.
|
||||
@ -945,7 +966,7 @@ addrLeastBit (MemAddr _ (MemWord off)) = off `testBit` 0
|
||||
|
||||
-- | Increment an address by a fixed amount.
|
||||
incAddr :: MemWidth w => Integer -> MemAddr w -> MemAddr w
|
||||
incAddr o (MemAddr i off) = MemAddr i (off + fromInteger o)
|
||||
incAddr o a = a { addrOffset = addrOffset a + fromInteger o }
|
||||
|
||||
-- | Returns the number of bytes between two addresses if they point to
|
||||
-- the same region and `Nothing` if they are different segments.
|
||||
@ -979,10 +1000,9 @@ resolveAddr :: Memory w -> RegionIndex -> MemWord w -> Maybe (MemSegmentOff w)
|
||||
resolveAddr = resolveRegionOff
|
||||
{-# DEPRECATED resolveAddr "Use resolveRegionOff" #-}
|
||||
|
||||
|
||||
-- | Return the address of a segment offset.
|
||||
segoffAddr :: MemWidth w => MemSegmentOff w -> MemAddr w
|
||||
segoffAddr (MemSegmentOff seg off) = relativeAddr seg off
|
||||
segoffAddr (MemSegmentOff seg off) = segmentOffAddr seg off
|
||||
|
||||
-- | Return the segment associated with the given address if well-defined.
|
||||
resolveAbsoluteAddr :: Memory w -> MemWord w -> Maybe (MemSegmentOff w)
|
||||
@ -1336,40 +1356,46 @@ addrContentsAfter mem addr = do
|
||||
addrWidthClass (memAddrWidth mem) $
|
||||
segoffContentsAfter =<< resolveMemAddr mem addr
|
||||
|
||||
-- | Read a bytestring from a sequence of statements.
|
||||
--
|
||||
-- This is a helper method for @readByteString@ below.
|
||||
readByteString' :: MemWidth w
|
||||
=> MemSegmentOff w
|
||||
-- ^ Initial starting address
|
||||
-> [BS.ByteString]
|
||||
-- ^ Bytestring read so far (in reverse order)
|
||||
-> [MemChunk w]
|
||||
-- ^ Remaining segments to read from.
|
||||
-- | Attempt to read a bytestring of the given length
|
||||
readByteString' :: RegionIndex
|
||||
-- ^ Region we are in.
|
||||
-> BS.ByteString
|
||||
-- ^ Bytestring read so far.
|
||||
-> Word64
|
||||
-- ^ Number of bytes remaining to read.
|
||||
-> Either (MemoryError w) [BS.ByteString]
|
||||
readByteString' _ prev _ 0 =
|
||||
-- ^ Bytes read so far.
|
||||
-> [MemChunk w]
|
||||
-- ^ Remaining memory chunks to read from.
|
||||
-> Word64
|
||||
-- ^ Total remaining number of bytes to read.
|
||||
-> Either (MemoryError w) BS.ByteString
|
||||
readByteString' _ prev _ _ 0 =
|
||||
pure $! prev
|
||||
readByteString' _ _ [] _ = error "internal: readByteString' given too many bytes."
|
||||
readByteString' initAddr prev (ByteRegion bs:rest) cnt =
|
||||
if toInteger cnt <= toInteger (BS.length bs) then
|
||||
pure $! BS.take (fromIntegral cnt) bs : prev
|
||||
readByteString' _ _ _ [] _ = error "internal: readByteString' given too many bytes."
|
||||
readByteString' reg prev off (ByteRegion bs:rest) cnt = do
|
||||
let sz = fromIntegral (BS.length bs)
|
||||
if cnt <= sz then
|
||||
pure $! prev <> BS.take (fromIntegral cnt) bs
|
||||
else do
|
||||
let cnt' = cnt - fromIntegral (BS.length bs)
|
||||
readByteString' initAddr (bs:prev) rest cnt'
|
||||
readByteString' initAddr prev (RelocationRegion r:_) _ = do
|
||||
let cnt = sum (toInteger . BS.length <$> prev)
|
||||
let addr = incAddr cnt (relativeSegmentAddr initAddr)
|
||||
let off' = off + sz
|
||||
let cnt' = cnt - sz
|
||||
seq cnt' $ seq off' $ readByteString' reg (prev <> bs) off' rest cnt'
|
||||
readByteString' reg _ off (RelocationRegion r:_) _ = do
|
||||
let addr = MemAddr { addrBase = reg, addrOffset = MemWord off }
|
||||
Left $! UnexpectedRelocation addr r
|
||||
readByteString' initAddr prev (BSSRegion sz:rest) cnt =
|
||||
if toInteger cnt <= toInteger sz then
|
||||
pure $! BS.replicate (fromIntegral cnt) 0 : prev
|
||||
readByteString' reg prev off (BSSRegion sz0:rest) cnt = do
|
||||
let sz :: Word64
|
||||
sz = memWordValue sz0
|
||||
if cnt <= sz then do
|
||||
when (cnt > fromIntegral (maxBound :: Int)) $ do
|
||||
error $ "Illegal size " ++ show cnt
|
||||
pure $! prev <> BS.replicate (fromIntegral cnt) 0
|
||||
else do
|
||||
let cnt' = cnt - fromIntegral sz
|
||||
let next = BS.replicate (fromIntegral sz) 0 : prev
|
||||
seq cnt' $ seq next $
|
||||
readByteString' initAddr next rest cnt'
|
||||
when (sz > fromIntegral (maxBound :: Int)) $ do
|
||||
error $ "Illegal size " ++ show cnt
|
||||
let bs = BS.replicate (fromIntegral sz) 0
|
||||
let off' = off + sz
|
||||
let cnt' = cnt - sz
|
||||
seq cnt' $ seq off' $ readByteString' reg (prev <> bs) off' rest cnt'
|
||||
|
||||
-- | Attempt to read a bytestring of the given length
|
||||
readByteString :: Memory w
|
||||
@ -1383,7 +1409,7 @@ readByteString mem addr cnt = addrWidthClass (memAddrWidth mem) $ do
|
||||
Left $! InvalidRead segOff cnt
|
||||
-- Get contents after segment
|
||||
l <- segoffContentsAfter segOff
|
||||
mconcat . reverse <$> readByteString' segOff [] l cnt
|
||||
readByteString' (addrBase addr) BS.empty (memWordValue (addrOffset addr)) l cnt
|
||||
|
||||
-- | Read an address from the value in the segment or report a memory
|
||||
-- error.
|
||||
@ -1454,7 +1480,7 @@ relativeSegmentContents memSegs = concatMap relativeOffset memSegs
|
||||
-- Each MemSegment has a segmentOffset indicating the offset from segmentBase its located.
|
||||
-- This makes the offsets within the MemChunk relative to that segmentOffset.
|
||||
relativeOffset :: (MemWidth w) => MemSegment w -> [(MemAddr w, MemChunk w)]
|
||||
relativeOffset seg = map (\(contentOffset,r) -> (relativeAddr seg contentOffset, r)) $ (contentsRanges . segmentContents) seg
|
||||
relativeOffset seg = map (\(contentOffset,r) -> (segmentOffAddr seg contentOffset, r)) $ (contentsRanges . segmentContents) seg
|
||||
|
||||
-- | Naive string matching algorithm identifies matches to given
|
||||
-- pattern within the list of memory segments and their corresponding
|
||||
|
@ -182,7 +182,11 @@ data MemLoadWarning
|
||||
| ExpectedSectionSymbolLocal
|
||||
| InvalidSectionSymbolIndex !Elf.ElfSectionIndex
|
||||
| UnsupportedProcessorSpecificSymbolIndex !SymbolName !ElfSectionIndex
|
||||
| IgnoreRelocation !RelocationError
|
||||
| IgnoreRelocation !Integer !String !RelocationError
|
||||
-- ^ @IgnoreRelocation idx tp err@ warns we ignored the location at index @idx@ due to @err@.
|
||||
--
|
||||
-- @tp@ is a string representing the type which we print, because usually errors come because
|
||||
-- we don't support that type or only partially implement it.
|
||||
|
||||
ppSymbol :: SymbolName -> String
|
||||
ppSymbol "" = "unnamed symbol"
|
||||
@ -227,8 +231,8 @@ instance Show MemLoadWarning where
|
||||
"Expected section symbol to have a valid index instead of " ++ show idx ++ "."
|
||||
show (UnsupportedProcessorSpecificSymbolIndex nm idx) =
|
||||
"Could not resolve symbol index " ++ show idx ++ " for symbol " ++ BSC.unpack nm ++ "."
|
||||
show (IgnoreRelocation err) =
|
||||
"Ignoring relocation: " ++ show err
|
||||
show (IgnoreRelocation idx typeName err) =
|
||||
"Ignoring relocation " ++ show idx ++ " with type " ++ typeName ++ ": " ++ show err
|
||||
|
||||
data MemLoaderState w = MLS { _mlsMemory :: !(Memory w)
|
||||
, mlsEndianness :: !Endianness
|
||||
@ -386,6 +390,7 @@ relaTargetX86_64 _ symtab rel off _isRel =
|
||||
, relocationSize = 8
|
||||
, relocationIsSigned = False
|
||||
, relocationEndianness = LittleEndian
|
||||
, relocationJumpSlot = True
|
||||
}
|
||||
Elf.R_X86_64_PC32 -> do
|
||||
sym <- resolveRelocationSym symtab (Elf.relSym rel)
|
||||
@ -395,6 +400,7 @@ relaTargetX86_64 _ symtab rel off _isRel =
|
||||
, relocationSize = 4
|
||||
, relocationIsSigned = False
|
||||
, relocationEndianness = LittleEndian
|
||||
, relocationJumpSlot = False
|
||||
}
|
||||
Elf.R_X86_64_32 -> do
|
||||
sym <- resolveRelocationSym symtab (Elf.relSym rel)
|
||||
@ -404,6 +410,7 @@ relaTargetX86_64 _ symtab rel off _isRel =
|
||||
, relocationSize = 4
|
||||
, relocationIsSigned = False
|
||||
, relocationEndianness = LittleEndian
|
||||
, relocationJumpSlot = False
|
||||
}
|
||||
Elf.R_X86_64_32S -> do
|
||||
sym <- resolveRelocationSym symtab (Elf.relSym rel)
|
||||
@ -413,6 +420,7 @@ relaTargetX86_64 _ symtab rel off _isRel =
|
||||
, relocationSize = 4
|
||||
, relocationIsSigned = True
|
||||
, relocationEndianness = LittleEndian
|
||||
, relocationJumpSlot = False
|
||||
}
|
||||
Elf.R_X86_64_64 -> do
|
||||
sym <- resolveRelocationSym symtab (Elf.relSym rel)
|
||||
@ -422,6 +430,7 @@ relaTargetX86_64 _ symtab rel off _isRel =
|
||||
, relocationSize = 8
|
||||
, relocationIsSigned = False
|
||||
, relocationEndianness = LittleEndian
|
||||
, relocationJumpSlot = False
|
||||
}
|
||||
-- R_X86_64_GLOB_DAT are used to update GOT entries with their
|
||||
-- target address. They are similar to R_x86_64_64 except appear
|
||||
@ -435,6 +444,19 @@ relaTargetX86_64 _ symtab rel off _isRel =
|
||||
, relocationSize = 8
|
||||
, relocationIsSigned = False
|
||||
, relocationEndianness = LittleEndian
|
||||
, relocationJumpSlot = False
|
||||
}
|
||||
|
||||
Elf.R_X86_64_RELATIVE -> do
|
||||
when (Elf.relSym rel /= 0) $ do
|
||||
Left $ RelocationBadSymbolIndex (fromIntegral (Elf.relSym rel))
|
||||
pure $ Relocation { relocationSym = LoadBaseAddr
|
||||
, relocationOffset = off
|
||||
, relocationIsRel = False
|
||||
, relocationSize = 8
|
||||
, relocationIsSigned = False
|
||||
, relocationEndianness = LittleEndian
|
||||
, relocationJumpSlot = False
|
||||
}
|
||||
|
||||
-- Jhx Note. These will be needed to support thread local variables.
|
||||
@ -468,6 +490,7 @@ relaTargetARM end msegIndex symtab rel addend relFlag =
|
||||
, relocationSize = 4
|
||||
, relocationIsSigned = False
|
||||
, relocationEndianness = end
|
||||
, relocationJumpSlot = False
|
||||
}
|
||||
Elf.R_ARM_RELATIVE -> do
|
||||
-- This relocation has the value B(S) + A where
|
||||
@ -503,6 +526,7 @@ relaTargetARM end msegIndex symtab rel addend relFlag =
|
||||
, relocationSize = 4
|
||||
, relocationIsSigned = False
|
||||
, relocationEndianness = end
|
||||
, relocationJumpSlot = False
|
||||
}
|
||||
Elf.R_ARM_JUMP_SLOT -> do
|
||||
-- This is a PLT relocation
|
||||
@ -520,6 +544,7 @@ relaTargetARM end msegIndex symtab rel addend relFlag =
|
||||
, relocationSize = 4
|
||||
, relocationIsSigned = False
|
||||
, relocationEndianness = end
|
||||
, relocationJumpSlot = True
|
||||
}
|
||||
tp -> do
|
||||
relocError $ RelocationUnsupportedType (show tp)
|
||||
@ -556,12 +581,13 @@ resolveRela :: ( MemWidth w
|
||||
)
|
||||
=> SymbolTable
|
||||
-> RelocationResolver tp
|
||||
-> Integer -- ^ Index of relocation
|
||||
-> Elf.RelaEntry tp
|
||||
-> ResolveFn (MemLoader w) w
|
||||
resolveRela symtab resolver rela msegIdx _ =
|
||||
resolveRela symtab resolver relaIdx rela msegIdx _ =
|
||||
case resolver msegIdx symtab (Elf.relaToRel rela) (fromIntegral (Elf.relaAddend rela)) IsRela of
|
||||
Left e -> do
|
||||
addWarning (IgnoreRelocation e)
|
||||
addWarning (IgnoreRelocation relaIdx (show (Elf.relaType rela)) e)
|
||||
pure Nothing
|
||||
Right r -> do
|
||||
pure $ Just r
|
||||
@ -573,9 +599,10 @@ resolveRel :: ( MemWidth w
|
||||
=> Endianness -- ^ Endianness of Elf file
|
||||
-> SymbolTable -- ^ Symbol table
|
||||
-> RelocationResolver tp
|
||||
-> Integer -- ^ Index of relocation
|
||||
-> Elf.RelEntry tp
|
||||
-> ResolveFn (MemLoader w) w
|
||||
resolveRel end symtab resolver rel msegIdx bytes = do
|
||||
resolveRel end symtab resolver relIdx rel msegIdx bytes = do
|
||||
-- Get the number of bits in the addend
|
||||
let bits = Elf.relocTargetBits (Elf.relType rel)
|
||||
-- Compute the addended by masking off the low order bits, and
|
||||
@ -591,7 +618,7 @@ resolveRel end symtab resolver rel msegIdx bytes = do
|
||||
-- Update the resolver.
|
||||
case resolver msegIdx symtab rel (fromInteger saddend) IsRel of
|
||||
Left e -> do
|
||||
addWarning (IgnoreRelocation e)
|
||||
addWarning (IgnoreRelocation relIdx (show (Elf.relType rel)) e)
|
||||
pure Nothing
|
||||
Right r -> do
|
||||
pure $ Just r
|
||||
@ -610,12 +637,13 @@ relocFromRela :: ( Elf.IsRelocationType tp
|
||||
)
|
||||
=> SymbolTable
|
||||
-> RelocationResolver tp
|
||||
-> Integer -- ^ Index of relocation entry for error reporting
|
||||
-> Elf.RelaEntry tp
|
||||
-> (MemWord w, RelocEntry (MemLoader w) w)
|
||||
relocFromRela symtab resolver r =
|
||||
relocFromRela symtab resolver idx r =
|
||||
( fromIntegral (Elf.relaAddr r)
|
||||
, RelocEntry { relocEntrySize = relocTargetBytes (Elf.relaType r)
|
||||
, applyReloc = resolveRela symtab resolver r
|
||||
, applyReloc = resolveRela symtab resolver idx r
|
||||
}
|
||||
)
|
||||
|
||||
@ -627,12 +655,13 @@ relocFromRel :: ( Elf.IsRelocationType tp
|
||||
=> Endianness
|
||||
-> SymbolTable
|
||||
-> RelocationResolver tp
|
||||
-> Integer -- ^ Index of relocation entry for error reporting.
|
||||
-> Elf.RelEntry tp
|
||||
-> (MemWord w, RelocEntry (MemLoader w) w)
|
||||
relocFromRel end symtab resolver r =
|
||||
relocFromRel end symtab resolver idx r =
|
||||
( fromIntegral (Elf.relAddr r)
|
||||
, RelocEntry { relocEntrySize = relocTargetBytes (Elf.relType r)
|
||||
, applyReloc = resolveRel end symtab resolver r
|
||||
, applyReloc = resolveRel end symtab resolver idx r
|
||||
}
|
||||
)
|
||||
|
||||
@ -657,7 +686,7 @@ relocMapFromRelAndRela dta resolver symtab _ (Just relaBuffer) = do
|
||||
addWarning (RelocationParseFailure msg)
|
||||
pure Map.empty
|
||||
Right entries -> do
|
||||
pure $ Map.fromList $ relocFromRela symtab resolver <$> entries
|
||||
pure $ Map.fromList $ zipWith (relocFromRela symtab resolver) [0..] entries
|
||||
relocMapFromRelAndRela dta resolver symtab (Just relBuffer) Nothing = do
|
||||
w <- uses mlsMemory memAddrWidth
|
||||
reprConstraints w $ do
|
||||
@ -666,7 +695,7 @@ relocMapFromRelAndRela dta resolver symtab (Just relBuffer) Nothing = do
|
||||
addWarning (RelocationParseFailure msg)
|
||||
pure Map.empty
|
||||
Right entries -> do
|
||||
pure $ Map.fromList $ relocFromRel (toEndianness dta) symtab resolver <$> entries
|
||||
pure $ Map.fromList $ zipWith (relocFromRel (toEndianness dta) symtab resolver) [0..] entries
|
||||
|
||||
|
||||
resolveUndefinedSymbolReq :: SymbolName
|
||||
@ -716,6 +745,7 @@ symbolDefTypeMap = Map.fromList
|
||||
, (,) Elf.STT_FUNC SymbolDefFunc
|
||||
, (,) Elf.STT_TLS SymbolDefThreadLocal
|
||||
, (,) Elf.STT_GNU_IFUNC SymbolDefIFunc
|
||||
, (,) Elf.STT_NOTYPE SymbolDefNoType
|
||||
]
|
||||
|
||||
resolveDefinedSymbolDef :: ElfSymbolTableEntry wtp
|
||||
@ -829,9 +859,9 @@ dynamicRelocationMap hdr ph contents =
|
||||
Right Elf.PLTEmpty -> do
|
||||
pure $! Map.empty
|
||||
Right (Elf.PLTRel entries) -> do
|
||||
pure $! Map.fromList $ relocFromRel (toEndianness dta) symtab resolver <$> entries
|
||||
pure $! Map.fromList $ zipWith (relocFromRel (toEndianness dta) symtab resolver) [0..] entries
|
||||
Right (Elf.PLTRela entries) -> do
|
||||
pure $! Map.fromList $ relocFromRela symtab resolver <$> entries
|
||||
pure $! Map.fromList $ zipWith (relocFromRela symtab resolver) [0..] entries
|
||||
pure $ Map.union loadtimeRelocs pltRelocs
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
@ -42,6 +42,9 @@ data SymbolDefType
|
||||
-- ^ This symbol denotes a thread local identifier
|
||||
| SymbolDefIFunc
|
||||
-- ^ This symbol is a "IFUNC" (e.g., it calls a function to resolve the symbol)
|
||||
| SymbolDefNoType
|
||||
-- ^ This symbol does not have a specified type.
|
||||
|
||||
|
||||
-- | Describes whether an undefined symbol is required during linking.
|
||||
data SymbolRequirement
|
||||
|
2
deps/crucible
vendored
2
deps/crucible
vendored
@ -1 +1 @@
|
||||
Subproject commit 70587094dc19e1d27f4fc2eca4a704f9dac9110c
|
||||
Subproject commit 0a93de3a8f6e3697818fa1e157029e9cc1b46808
|
2
deps/elf-edit
vendored
2
deps/elf-edit
vendored
@ -1 +1 @@
|
||||
Subproject commit afa3dcf45afc5429139cae09f8584a722a737946
|
||||
Subproject commit a9428d847f63cddfbd38eb3b0fea07153dbcd18f
|
2
deps/flexdis86
vendored
2
deps/flexdis86
vendored
@ -1 +1 @@
|
||||
Subproject commit e53a3359b51ad1a91758144a85d8a8eb635d91d9
|
||||
Subproject commit ac18aaf5fc6f857a9a81ef09956ab1021a3fcb39
|
2
deps/llvm-pretty
vendored
2
deps/llvm-pretty
vendored
@ -1 +1 @@
|
||||
Subproject commit 6cc6990017c4d875311a52a6d3988fb7a962ed7a
|
||||
Subproject commit 8bbd47cf21379bf699297ebb2f69148c88d0ec72
|
2
deps/parameterized-utils
vendored
2
deps/parameterized-utils
vendored
@ -1 +1 @@
|
||||
Subproject commit 1c37bf1eb8be0f7c8beacd0998a30bc932d9122d
|
||||
Subproject commit 2af20d4e1ed93b18503d7e73376fa993ed865357
|
@ -16,7 +16,7 @@ library
|
||||
containers,
|
||||
flexdis86 >= 0.1.2,
|
||||
lens >= 4.7,
|
||||
macaw-base >= 0.3.2,
|
||||
macaw-base >= 0.3.3,
|
||||
mtl,
|
||||
parameterized-utils,
|
||||
text,
|
||||
|
@ -68,6 +68,7 @@ import Text.PrettyPrint.ANSI.Leijen (Pretty(..), text)
|
||||
|
||||
import Data.Macaw.AbsDomain.AbsState
|
||||
( AbsBlockState
|
||||
, curAbsStack
|
||||
, setAbsIP
|
||||
, absRegState
|
||||
, StackEntry(..)
|
||||
@ -166,12 +167,11 @@ instance MemWidth w => Show (X86TranslateError w) where
|
||||
|
||||
|
||||
-- | Signal an error from the initial address.
|
||||
initError :: ExploreLoc -- ^ Location to explore from.
|
||||
initError :: MemSegmentOff 64 -- ^ Location to explore from.
|
||||
-> RegState X86Reg (Value X86_64 ids)
|
||||
-> X86TranslateError 64
|
||||
-> ST st_s (Block X86_64 ids, MemWord 64, Maybe (X86TranslateError 64))
|
||||
initError loc err = do
|
||||
let addr = loc_ip loc
|
||||
let s = initX86State loc
|
||||
initError addr s err = do
|
||||
let b = Block { blockLabel = 0
|
||||
, blockStmts = []
|
||||
, blockTerm = TranslateError s (Text.pack (show err))
|
||||
@ -275,7 +275,9 @@ disassembleFixedBlock :: NonceGenerator (ST st_s) ids
|
||||
-- ^ Number of bytes to translate
|
||||
-> ST st_s (Either (X86TranslateError 64) (Block X86_64 ids))
|
||||
disassembleFixedBlock gen loc sz = do
|
||||
case segoffContentsAfter (loc_ip loc) of
|
||||
let addr = loc_ip loc
|
||||
let initRegs = initX86State loc
|
||||
case segoffContentsAfter addr of
|
||||
Left err -> do
|
||||
pure $ Left $ FlexdisMemoryError err
|
||||
Right fullContents -> do
|
||||
@ -283,8 +285,8 @@ disassembleFixedBlock gen loc sz = do
|
||||
Left _err -> do
|
||||
error $ "Could not split memory."
|
||||
Right (contents,_) -> do
|
||||
let pblock = emptyPreBlock (initX86State loc)
|
||||
disassembleFixedBlock' gen pblock 0 (loc_ip loc) contents
|
||||
let pblock = emptyPreBlock initRegs
|
||||
disassembleFixedBlock' gen pblock 0 addr contents
|
||||
|
||||
-- | Translate block, returning blocks read, ending
|
||||
-- PC, and an optional error. and ending PC.
|
||||
@ -327,13 +329,14 @@ disassembleBlock :: forall s
|
||||
-> ST s (Block X86_64 s, MemWord 64, Maybe (X86TranslateError 64))
|
||||
disassembleBlock nonce_gen loc max_size = do
|
||||
let addr = loc_ip loc
|
||||
let regs = initX86State loc
|
||||
let sz = segoffOffset addr + max_size
|
||||
(b, next_ip_off, maybeError) <-
|
||||
case segoffContentsAfter addr of
|
||||
Left msg -> do
|
||||
initError loc (FlexdisMemoryError msg)
|
||||
initError addr regs (FlexdisMemoryError msg)
|
||||
Right contents -> do
|
||||
let pblock = emptyPreBlock (initX86State loc)
|
||||
let pblock = emptyPreBlock regs
|
||||
disassembleBlockImpl nonce_gen pblock 0 addr sz contents
|
||||
assert (next_ip_off > segoffOffset addr) $ do
|
||||
let block_sz = next_ip_off - segoffOffset addr
|
||||
@ -415,56 +418,67 @@ transferAbsValue r f =
|
||||
VExtractF128 {} -> TopV
|
||||
VInsert {} -> TopV
|
||||
|
||||
|
||||
-- | Disassemble block, returning either an error, or a list of blocks
|
||||
-- and ending PC.
|
||||
tryDisassembleBlockFromAbsState :: forall s ids
|
||||
. NonceGenerator (ST s) ids
|
||||
-> MemSegmentOff 64
|
||||
initRegsFromAbsState :: forall ids
|
||||
. MemSegmentOff 64
|
||||
-- ^ Address to disassemble at
|
||||
-> Int
|
||||
-- ^ Maximum size of this block
|
||||
-> AbsBlockState X86Reg
|
||||
-- ^ Abstract state of processor for defining state.
|
||||
-> ExceptT String (ST s) (Block X86_64 ids, Int, Maybe String)
|
||||
tryDisassembleBlockFromAbsState nonceGen addr maxSize ab = do
|
||||
-> Either String (RegState X86Reg (Value X86_64 ids))
|
||||
initRegsFromAbsState addr ab = do
|
||||
t <-
|
||||
case asConcreteSingleton (ab^.absRegState^.boundValue X87_TopReg) of
|
||||
Nothing -> throwError "Could not determine height of X87 stack."
|
||||
Nothing -> Left "Could not determine height of X87 stack."
|
||||
Just t -> pure t
|
||||
d <-
|
||||
case asConcreteSingleton (ab^.absRegState^.boundValue DF) of
|
||||
Nothing -> do
|
||||
throwError $ "Could not determine df flag " ++ show (ab^.absRegState^.boundValue DF)
|
||||
Left $ "Could not determine df flag " ++ show (ab^.absRegState^.boundValue DF)
|
||||
Just d -> pure d
|
||||
let loc = ExploreLoc { loc_ip = addr
|
||||
pure $ initX86State $
|
||||
ExploreLoc { loc_ip = addr
|
||||
, loc_x87_top = fromInteger t
|
||||
, loc_df_flag = d /= 0
|
||||
}
|
||||
|
||||
-- | Disassemble block, returning either an error, or a list of blocks
|
||||
-- and ending PC.
|
||||
tryDisassembleBlock :: forall s ids
|
||||
. NonceGenerator (ST s) ids
|
||||
-> MemSegmentOff 64
|
||||
-- ^ Address to disassemble at
|
||||
-> RegState X86Reg (Value X86_64 ids)
|
||||
-- ^ Initial registers
|
||||
-> Int
|
||||
-- ^ Maximum size of this block
|
||||
-> ExceptT String (ST s) (Block X86_64 ids, Int, Maybe String)
|
||||
tryDisassembleBlock nonceGen addr initRegs maxSize = do
|
||||
let off = segoffOffset addr
|
||||
let pblock = emptyPreBlock (initX86State loc)
|
||||
(b, nextIPOff, maybeError) <- lift $
|
||||
case segoffContentsAfter addr of
|
||||
Left msg -> do
|
||||
initError loc (FlexdisMemoryError msg)
|
||||
initError addr initRegs (FlexdisMemoryError msg)
|
||||
Right contents -> do
|
||||
disassembleBlockImpl nonceGen pblock 0 addr (off + fromIntegral maxSize) contents
|
||||
disassembleBlockImpl nonceGen (emptyPreBlock initRegs) 0 addr (off + fromIntegral maxSize) contents
|
||||
let sz :: Int
|
||||
sz = fromIntegral $ nextIPOff - off
|
||||
pure $! (b, sz, show <$> maybeError)
|
||||
|
||||
-- | Disassemble block, returning either an error, or a list of blocks
|
||||
-- and ending PC.
|
||||
disassembleBlockFromAbsState :: forall s ids
|
||||
disassembleBlockWithRegs :: forall s ids
|
||||
. NonceGenerator (ST s) ids
|
||||
-> MemSegmentOff 64
|
||||
-- ^ Address to disassemble at
|
||||
-> RegState X86Reg (Value X86_64 ids)
|
||||
-> Int
|
||||
-- ^ Maximum size of this block
|
||||
-> AbsBlockState X86Reg
|
||||
-- ^ Abstract state of processor for defining state.
|
||||
-> ST s ([Block X86_64 ids], Int, Maybe String)
|
||||
disassembleBlockFromAbsState nonceGen addr maxSize ab = do
|
||||
mr <- runExceptT $ tryDisassembleBlockFromAbsState nonceGen addr maxSize ab
|
||||
disassembleBlockWithRegs nonceGen addr initRegs maxSize = do
|
||||
mr <- runExceptT $ tryDisassembleBlock nonceGen addr initRegs maxSize
|
||||
case mr of
|
||||
Left msg -> pure ([], 0, Just msg)
|
||||
Right (b,sz, merr) -> pure ([b],sz,merr)
|
||||
@ -474,10 +488,10 @@ disassembleBlockFromAbsState nonceGen addr maxSize ab = do
|
||||
--
|
||||
-- This can also return Nothing if the call is not supported.
|
||||
identifyX86Call :: Memory 64
|
||||
-> [Stmt X86_64 ids]
|
||||
-> Seq (Stmt X86_64 ids)
|
||||
-> RegState X86Reg (Value X86_64 ids)
|
||||
-> Maybe (Seq (Stmt X86_64 ids), MemSegmentOff 64)
|
||||
identifyX86Call mem stmts0 s = go (Seq.fromList stmts0) Seq.empty
|
||||
identifyX86Call mem stmts0 s = go stmts0 Seq.empty
|
||||
where -- Get value of stack pointer
|
||||
next_sp = s^.boundValue sp_reg
|
||||
-- Recurse on statements.
|
||||
@ -503,19 +517,30 @@ identifyX86Call mem stmts0 s = go (Seq.fromList stmts0) Seq.empty
|
||||
-- Otherwise skip over this instruction.
|
||||
| otherwise -> go prev (stmt Seq.<| after)
|
||||
|
||||
-- | Return true if stack pointer has been reset to original value, and
|
||||
-- return address is on top of stack.
|
||||
checkForReturnAddrX86 :: forall ids
|
||||
. AbsProcessorState X86Reg ids
|
||||
-> Bool
|
||||
checkForReturnAddrX86 absState
|
||||
| Just (StackEntry _ ReturnAddr) <- Map.lookup 0 (absState^.curAbsStack) =
|
||||
True
|
||||
| otherwise =
|
||||
False
|
||||
|
||||
-- | Called to determine if the instruction sequence contains a return
|
||||
-- from the current function.
|
||||
--
|
||||
-- An instruction executing a return from a function will place the
|
||||
-- ReturnAddr value (placed on the top of the stack by
|
||||
-- 'initialX86AbsState' above) into the instruction pointer.
|
||||
identifyX86Return :: [Stmt X86_64 ids]
|
||||
identifyX86Return :: Seq (Stmt X86_64 ids)
|
||||
-> RegState X86Reg (Value X86_64 ids)
|
||||
-> AbsProcessorState X86Reg ids
|
||||
-> Maybe (Seq (Stmt X86_64 ids))
|
||||
identifyX86Return stmts s finalRegSt8 =
|
||||
case transferValue finalRegSt8 (s^.boundValue ip_reg) of
|
||||
ReturnAddr -> Just $ Seq.fromList stmts
|
||||
ReturnAddr -> Just stmts
|
||||
_ -> Nothing
|
||||
|
||||
-- | Return state post call
|
||||
@ -569,12 +594,14 @@ x86_64_info preservePred =
|
||||
ArchitectureInfo { withArchConstraints = \x -> x
|
||||
, archAddrWidth = Addr64
|
||||
, archEndianness = LittleEndian
|
||||
, disassembleFn = disassembleBlockFromAbsState
|
||||
, mkInitialRegsForBlock = initRegsFromAbsState
|
||||
, disassembleFn = disassembleBlockWithRegs
|
||||
, mkInitialAbsState = \_ addr -> initialX86AbsState addr
|
||||
, absEvalArchFn = transferAbsValue
|
||||
, absEvalArchStmt = \s _ -> s
|
||||
, postCallAbsState = x86PostCallAbsState
|
||||
, identifyCall = identifyX86Call
|
||||
, checkForReturnAddr = \_ s -> checkForReturnAddrX86 s
|
||||
, identifyReturn = identifyX86Return
|
||||
, rewriteArchFn = rewriteX86PrimFn
|
||||
, rewriteArchStmt = rewriteX86Stmt
|
||||
|
@ -7,7 +7,7 @@ module ElfX64Linux (
|
||||
) where
|
||||
|
||||
import Control.Lens ( (^.) )
|
||||
import Control.Monad ( unless )
|
||||
import Control.Monad ( unless, when )
|
||||
import qualified Control.Monad.Catch as C
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.Foldable as F
|
||||
@ -16,6 +16,7 @@ import Data.Maybe
|
||||
import qualified Data.Set as S
|
||||
import Data.Typeable ( Typeable )
|
||||
import Data.Word ( Word64 )
|
||||
import Numeric (showHex)
|
||||
import System.FilePath
|
||||
import qualified Test.Tasty as T
|
||||
import qualified Test.Tasty.HUnit as T
|
||||
@ -35,9 +36,12 @@ elfX64LinuxTests :: [FilePath] -> T.TestTree
|
||||
elfX64LinuxTests = T.testGroup "ELF x64 Linux" . map mkTest
|
||||
|
||||
data Addr = Addr Int Word64
|
||||
deriving (Read,Show,Eq)
|
||||
deriving (Read,Eq, Ord)
|
||||
-- ^ An address is a region index and offset
|
||||
|
||||
instance Show Addr where
|
||||
showsPrec _ (Addr idx off) = showString "Addr " . shows idx . showString " 0x" . showHex off
|
||||
|
||||
-- | The type of expected results for test cases
|
||||
data ExpectedResult =
|
||||
R { funcs :: [(Addr, [(Addr, Integer)])]
|
||||
@ -59,34 +63,55 @@ mkTest fp = T.testCase fp $ withELF elfFilename (testDiscovery fp)
|
||||
where
|
||||
elfFilename = dropExtension fp
|
||||
|
||||
-- | Run a test over a given expected result filename and the ELF file
|
||||
-- associated with it
|
||||
testDiscovery :: FilePath -> E.Elf 64 -> IO ()
|
||||
testDiscovery expectedFilename elf =
|
||||
withMemory MM.Addr64 elf $ \mem entries -> do
|
||||
let di = MD.cfgFromAddrs RO.x86_64_linux_info mem M.empty entries []
|
||||
expectedString <- readFile expectedFilename
|
||||
case readMaybe expectedString of
|
||||
Nothing -> T.assertFailure ("Invalid expected result: " ++ show expectedString)
|
||||
Just er -> do
|
||||
let toSegOff :: Addr -> MM.MemSegmentOff 64
|
||||
toSegOff (Addr idx off) = do
|
||||
toSegOff :: MM.Memory 64 -> Addr -> MM.MemSegmentOff 64
|
||||
toSegOff mem (Addr idx off) = do
|
||||
let addr :: MM.MemAddr 64
|
||||
addr = MM.MemAddr idx (fromIntegral off)
|
||||
case MM.asSegmentOff mem addr of
|
||||
Just a -> a
|
||||
Nothing -> do
|
||||
Nothing ->
|
||||
let ppSeg seg = " Segment: " ++ show (MM.segmentOffAddr seg 0)
|
||||
error $ "Could not resolve address : " ++ show addr ++ "\n"
|
||||
in error $ "Could not resolve address : " ++ show addr ++ "\n"
|
||||
++ unlines (fmap ppSeg (MM.memSegments mem))
|
||||
let expectedEntries = M.fromList
|
||||
[ (toSegOff entry
|
||||
, S.fromList ((\(s,sz) -> (toSegOff s, sz)) <$> starts)
|
||||
|
||||
toAddr :: MM.MemSegmentOff 64 -> Addr
|
||||
toAddr segOff = do
|
||||
let addr :: MM.MemAddr 64
|
||||
addr = MM.segoffAddr segOff
|
||||
in Addr (fromIntegral (MM.addrBase addr)) (fromIntegral (MM.addrOffset addr))
|
||||
|
||||
-- | Run a test over a given expected result filename and the ELF file
|
||||
-- associated with it
|
||||
testDiscovery :: FilePath -> E.Elf 64 -> IO ()
|
||||
testDiscovery expectedFilename elf = do
|
||||
let opt = MM.LoadOptions { MM.loadRegionIndex = Nothing
|
||||
, MM.loadRegionBaseOffset = 0
|
||||
}
|
||||
(warn, mem, mentry, syms) <-
|
||||
case MM.resolveElfContents opt elf of
|
||||
Left err -> C.throwM (MemoryLoadError err)
|
||||
Right r -> pure r
|
||||
when (not (null warn)) $ do
|
||||
error $ "Warnings while loading Elf " ++ show warn
|
||||
let entries = maybeToList mentry ++ fmap MM.memSymbolStart syms
|
||||
let addrSymMap :: M.Map (MM.MemSegmentOff 64) B.ByteString
|
||||
addrSymMap = M.fromList [ (MM.memSymbolStart sym, MM.memSymbolName sym)
|
||||
| sym <- syms
|
||||
]
|
||||
let di = MD.cfgFromAddrs RO.x86_64_linux_info mem addrSymMap entries []
|
||||
expectedString <- readFile expectedFilename
|
||||
case readMaybe expectedString of
|
||||
Nothing -> T.assertFailure ("Invalid expected result: " ++ show expectedString)
|
||||
Just er -> do
|
||||
let expectedEntries :: M.Map (MM.MemSegmentOff 64) (S.Set (Addr, Integer))
|
||||
expectedEntries = M.fromList
|
||||
[ (toSegOff mem entry
|
||||
, S.fromList ((\(s,sz) -> (s, sz)) <$> starts)
|
||||
)
|
||||
| (entry, starts) <- funcs er
|
||||
]
|
||||
ignoredBlocks :: S.Set (MM.MemSegmentOff 64)
|
||||
ignoredBlocks = S.fromList (toSegOff <$> ignoreBlocks er)
|
||||
ignoredBlocks = S.fromList (toSegOff mem <$> ignoreBlocks er)
|
||||
T.assertEqual "Collection of discovered function starting points"
|
||||
(M.keysSet expectedEntries `S.difference` ignoredBlocks)
|
||||
(M.keysSet (di ^. MD.funInfo))
|
||||
@ -99,7 +124,8 @@ testDiscovery expectedFilename elf =
|
||||
T.assertBool ("Translate error at " ++ show (MD.pblockAddr pb) ++ " " ++ show term) (not (isTranslateError term))
|
||||
let actualEntry = MD.discoveredFunAddr dfi
|
||||
-- actualEntry = fromIntegral (MM.addrValue (MD.discoveredFunAddr dfi))
|
||||
let actualBlockStarts = S.fromList [ (addr, toInteger (MD.blockSize pbr))
|
||||
let actualBlockStarts :: S.Set (Addr, Integer)
|
||||
actualBlockStarts = S.fromList [ (toAddr addr, toInteger (MD.blockSize pbr))
|
||||
| pbr <- M.elems (dfi ^. MD.parsedBlocks)
|
||||
, let addr = MD.pblockAddr pbr
|
||||
, addr `S.notMember` ignoredBlocks
|
||||
@ -124,26 +150,6 @@ withELF fp k = do
|
||||
E.Elf32Res errs _ -> error ("Errors while parsing ELF file: " ++ show errs)
|
||||
E.Elf64Res errs _ -> error ("Errors while parsing ELF file: " ++ show errs)
|
||||
|
||||
withMemory :: forall w m a
|
||||
. (C.MonadThrow m, MM.MemWidth w, Integral (E.ElfWordType w))
|
||||
=> MM.AddrWidthRepr w
|
||||
-> E.Elf w
|
||||
-> (MM.Memory w -> [MM.MemSegmentOff w] -> m a)
|
||||
-> m a
|
||||
withMemory _relaWidth e k = do
|
||||
let opt = MM.LoadOptions { MM.loadRegionIndex = Nothing
|
||||
, MM.loadRegionBaseOffset = 0
|
||||
-- let opt = MM.LoadOptions { MM.loadRegionIndex = Just 0
|
||||
-- , MM.loadRegionBaseOffset = fromIntegral loadOffset
|
||||
}
|
||||
case MM.resolveElfContents opt e of
|
||||
Left err -> C.throwM (MemoryLoadError err)
|
||||
Right (warn, mem, mentry, syms) ->
|
||||
if null warn then
|
||||
k mem (maybeToList mentry ++ fmap MM.memSymbolStart syms)
|
||||
else
|
||||
error $ "Warnings while loading Elf " ++ show warn
|
||||
|
||||
data ElfException = MemoryLoadError String
|
||||
deriving (Typeable, Show)
|
||||
|
||||
|
10
x86/tests/x64/test-plt.c
Normal file
10
x86/tests/x64/test-plt.c
Normal file
@ -0,0 +1,10 @@
|
||||
// This is a simple call to puts to test that we resolve the PLT stub
|
||||
// to puts.
|
||||
//
|
||||
// It should be compiled with
|
||||
// `clang -fpic -FPIC -o test-plt.exe test-plc.c`
|
||||
#include <stdio.h>
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
puts("Hello World");
|
||||
}
|
BIN
x86/tests/x64/test-plt.exe
Executable file
BIN
x86/tests/x64/test-plt.exe
Executable file
Binary file not shown.
19
x86/tests/x64/test-plt.exe.expected
Normal file
19
x86/tests/x64/test-plt.exe.expected
Normal file
@ -0,0 +1,19 @@
|
||||
R { funcs =
|
||||
[ (Addr 1 0x580, [(Addr 1 0x580,16),(Addr 1 0x590,5),(Addr 1 0x595,5)])
|
||||
, (Addr 1 0x5b0, [(Addr 1 0x5b0,6)])
|
||||
, (Addr 1 0x5c0, [(Addr 1 0x5c0,6)])
|
||||
, (Addr 1 0x5d0, [(Addr 1 0x5d0,6)])
|
||||
, (Addr 1 0x5d8, [(Addr 1 0x5d8,6)])
|
||||
, (Addr 1 0x5e0, [(Addr 1 0x5e0,41),(Addr 1 0x609,1)])
|
||||
, (Addr 1 0x610, [(Addr 1 0x610,27),(Addr 1 0x62b,12),(Addr 1 0x637,3),(Addr 1 0x640,2)])
|
||||
, (Addr 1 0x650, [(Addr 1 0x650,40),(Addr 1 0x678,12),(Addr 1 0x684,3),(Addr 1 0x690,2)])
|
||||
, (Addr 1 0x6a0, [(Addr 1 0x6a0,9),(Addr 1 0x6a9,14),(Addr 1 0x6b7,12),(Addr 1 0x6c3,5),(Addr 1 0x6c8,8),(Addr 1 0x6d0,2)])
|
||||
, (Addr 1 0x6e0, [(Addr 1 0x6e0,13), (Addr 1 0x6ed,5), (Addr 1 0x6f8, 12), (Addr 1 0x704, 6), (Addr 1 0x70a, 6)])
|
||||
, (Addr 1 0x710, [(Addr 1 0x710,13),(Addr 1 0x71d,4)])
|
||||
, (Addr 1 0x730, [(Addr 1 0x730,49),(Addr 1 0x761,5),(Addr 1 0x766,10),(Addr 1 0x770,13),(Addr 1
|
||||
0x77d,9),(Addr 1 0x786,15)])
|
||||
, (Addr 1 0x7a0, [(Addr 1 0x7a0,2)])
|
||||
, (Addr 1 0x7a4, [(Addr 1 0x7a4,9)])
|
||||
]
|
||||
, ignoreBlocks = []
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
R { funcs = [ (Addr 1 0x2c0, [(Addr 1 0x2c0, 7)])
|
||||
, (Addr 1 0x2d0, [(Addr 1 0x2d0, 11), (Addr 1 0x2c0, 7)])
|
||||
, (Addr 1 0x2d0, [(Addr 1 0x2d0, 11)])
|
||||
, (Addr 1 0x2e0, [(Addr 1 0x2e0, 11), (Addr 1 0x2eb, 16)])
|
||||
]
|
||||
, ignoreBlocks = [Addr 1 0x2fb]
|
||||
|
Loading…
Reference in New Issue
Block a user