Start using jump bounds; introduce "ParsedBlocks"

The code is now using a constraint representation for Jump bounds; an
attempt to reduce the need for strided intervals.

It's a bit unclear how much of a win this is; the jump bounds should
recognize the size of a jump table with fewer needs to repeat as
strided intervals may.  However, it doesn't solve the incompleteness
problem that I hoped it would -- if the intra-procedural control flow
graph contains missing edges, the result may still be incomplete.

This code also introduces ParsedBlocks, a basic block representation
that used the ParseTermStmt.  A future patch will start including this
in the discoveryinfo, so that later procedures do not need to call
"classifyBlock".
This commit is contained in:
Joe Hendrix 2017-02-16 02:53:19 -05:00
parent 848cc2d0d0
commit 0d086237e8
No known key found for this signature in database
GPG Key ID: 00F67DE32381DB9F
8 changed files with 604 additions and 237 deletions

View File

@ -27,6 +27,7 @@ library
exposed-modules:
Data.Macaw.AbsDomain.AbsState
Data.Macaw.AbsDomain.JumpBounds
Data.Macaw.AbsDomain.Refine
Data.Macaw.AbsDomain.StridedInterval
Data.Macaw.Architecture.Info
@ -35,7 +36,6 @@ library
Data.Macaw.DebugLogging
Data.Macaw.Discovery
Data.Macaw.Discovery.Info
Data.Macaw.Discovery.JumpBounds
Data.Macaw.Dwarf
Data.Macaw.Memory
Data.Macaw.Memory.ElfLoader

View File

@ -41,6 +41,7 @@ module Data.Macaw.AbsDomain.AbsState
, AbsProcessorState
, curAbsStack
, absInitialRegs
, indexBounds
, startAbsStack
, initAbsProcessorState
, absAssignments
@ -77,10 +78,10 @@ import qualified Data.Set as Set
import Numeric (showHex)
import Text.PrettyPrint.ANSI.Leijen hiding ((<$>))
import qualified Data.Macaw.AbsDomain.JumpBounds as Jmp
import qualified Data.Macaw.AbsDomain.StridedInterval as SI
import Data.Macaw.CFG
import Data.Macaw.DebugLogging
import Data.Macaw.Discovery.JumpBounds
import Data.Macaw.Memory
import qualified Data.Macaw.Memory.Permissions as Perm
import Data.Macaw.Types
@ -708,6 +709,16 @@ bvmul w v v'
bvmul _ _ _ = TopV
-- FIXME: generalise
bvand :: Integral (MemWord w)
=> NatRepr u
-> AbsValue w (BVType u)
-> AbsValue w (BVType u)
-> AbsValue w (BVType u)
bvand _w (asFinSet "bvand" -> IsFin s) (asConcreteSingleton -> Just v) = FinSet (Set.map (flip (.&.) v) s)
bvand _w (asConcreteSingleton -> Just v) (asFinSet "bvand" -> IsFin s) = FinSet (Set.map ((.&.) v) s)
bvand _ _ _ = TopV
-- FIXME: generalise
bitop :: Integral (MemWord w)
=> (Integer -> Integer -> Integer)
@ -905,7 +916,7 @@ ppAbsStack m = vcat (pp <$> Map.toDescList m)
data AbsBlockState r
= AbsBlockState { _absRegState :: !(RegState r (AbsValue (RegAddrWidth r)))
, _startAbsStack :: !(AbsBlockStack (RegAddrWidth r))
, _initIndexBounds :: !(InitialIndexBounds r)
, _initIndexBounds :: !(Jmp.InitialIndexBounds r)
}
deriving instance MapF.OrdF r => Eq (AbsBlockState r)
@ -918,7 +929,7 @@ absRegState = lens _absRegState (\s v -> s { _absRegState = v })
startAbsStack :: Simple Lens (AbsBlockState r) (AbsBlockStack (RegAddrWidth r))
startAbsStack = lens _startAbsStack (\s v -> s { _startAbsStack = v })
initIndexBounds :: Simple Lens (AbsBlockState r) (InitialIndexBounds r)
initIndexBounds :: Simple Lens (AbsBlockState r) (Jmp.InitialIndexBounds r)
initIndexBounds = lens _initIndexBounds (\s v -> s { _initIndexBounds = v })
instance ( RegisterInfo r
@ -927,7 +938,7 @@ instance ( RegisterInfo r
top = AbsBlockState { _absRegState = mkRegState (\_ -> TopV)
, _startAbsStack = Map.empty
, _initIndexBounds = arbitraryInitialBounds
, _initIndexBounds = Jmp.arbitraryInitialBounds
}
joinD x y | regs_changed = Just $! z
@ -951,7 +962,7 @@ instance ( RegisterInfo r
return $! zr
z_stk <- absStackJoinD x_stk y_stk
z_bnds <-
case joinInitialBounds (x^.initIndexBounds) (y^.initIndexBounds) of
case Jmp.joinInitialBounds (x^.initIndexBounds) (y^.initIndexBounds) of
Just z_bnds -> (_1 .= True) $> z_bnds
Nothing -> pure (x^.initIndexBounds)
@ -965,11 +976,13 @@ instance ( ShowF r
pretty s =
text "registers:" <$$>
indent 2 (pretty (s^.absRegState)) <$$>
stack_d
stack_d <$$>
jmp_bnds
where stack = s^.startAbsStack
stack_d | Map.null stack = empty
| otherwise = text "stack:" <$$>
indent 2 (ppAbsStack stack)
jmp_bnds = pretty (s^.initIndexBounds)
instance ShowF r => Show (AbsBlockState r) where
show = show . pretty
@ -1009,7 +1022,7 @@ data AbsProcessorState r ids
-- symbolic values associated with them
, _curAbsStack :: !(AbsBlockStack (RegAddrWidth r))
-- ^ The current symbolic state of the stack
, _indexBounds :: !(IndexBounds r ids)
, _indexBounds :: !(Jmp.IndexBounds r ids)
}
-- | The width of an address
@ -1028,7 +1041,7 @@ curAbsStack :: Simple Lens (AbsProcessorState r ids) (AbsBlockStack (RegAddrWidt
curAbsStack = lens _curAbsStack (\s v -> s { _curAbsStack = v })
-- | Return the index
indexBounds :: Simple Lens (AbsProcessorState r ids) (IndexBounds r ids)
indexBounds :: Simple Lens (AbsProcessorState r ids) (Jmp.IndexBounds r ids)
indexBounds = lens _indexBounds (\s v -> s { _indexBounds = v })
instance ShowF r
@ -1058,7 +1071,7 @@ initAbsProcessorState mem s =
, _absInitialRegs = s^.absRegState
, _absAssignments = MapF.empty
, _curAbsStack = s^.startAbsStack
, _indexBounds = mkIndexBounds (s^.initIndexBounds)
, _indexBounds = Jmp.mkIndexBounds (s^.initIndexBounds)
}
-- | A lens that allows one to lookup and update the value of an assignment in
@ -1208,7 +1221,7 @@ finalAbsBlockState c s =
transferReg r = transferValue c (s^.boundValue r)
in AbsBlockState { _absRegState = mkRegState transferReg
, _startAbsStack = c^.curAbsStack
, _initIndexBounds = nextBlockBounds (c^.indexBounds) s
, _initIndexBounds = Jmp.nextBlockBounds (c^.indexBounds) s
}
------------------------------------------------------------------------
@ -1228,7 +1241,7 @@ transferApp r a =
BVAdd w x y -> bvadd w (transferValue r x) (transferValue r y)
BVSub w x y -> bvsub (absMem r) w (transferValue r x) (transferValue r y)
BVMul w x y -> bvmul w (transferValue r x) (transferValue r y)
BVAnd w x y -> bitop (.&.) w (transferValue r x) (transferValue r y)
BVAnd w x y -> bvand w (transferValue r x) (transferValue r y)
BVOr w x y -> bitop (.|.) w (transferValue r x) (transferValue r y)
_ -> TopV
@ -1253,7 +1266,7 @@ postCallAbsState :: forall r
postCallAbsState params ab0 addr =
AbsBlockState { _absRegState = mkRegState regFn
, _startAbsStack = ab0^.startAbsStack
, _initIndexBounds = arbitraryInitialBounds
, _initIndexBounds = Jmp.arbitraryInitialBounds
}
where regFn :: r tp -> AbsValue (RegAddrWidth r) tp
regFn r

View File

@ -2,14 +2,17 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Data.Macaw.Discovery.JumpBounds
{-# LANGUAGE StandaloneDeriving #-}
module Data.Macaw.AbsDomain.JumpBounds
( InitialIndexBounds
, arbitraryInitialBounds
, joinInitialBounds
, IndexBounds
, mkIndexBounds
, UpperBound(..)
, addUpperBound
, lookupUpperBound
, assertPred
, unsignedUpperBound
, nextBlockBounds
) where
@ -20,6 +23,7 @@ import Data.Parameterized.Classes
import Data.Parameterized.Map (MapF)
import qualified Data.Parameterized.Map as MapF
import Data.Parameterized.NatRepr (maxUnsigned)
import Text.PrettyPrint.ANSI.Leijen hiding ((<$>))
import Data.Macaw.CFG
import Data.Macaw.Types
@ -36,6 +40,8 @@ instance MapF.EqF UpperBound where
instance Ord (UpperBound tp) where
compare (IntegerUpperBound x) (IntegerUpperBound y) = compare x y
deriving instance Show (UpperBound tp)
-- | Bounds for a function as the start of a block.
data InitialIndexBounds r
= InitialIndexBounds { initialRegUpperBound :: !(MapF r UpperBound)
@ -44,6 +50,11 @@ data InitialIndexBounds r
instance MapF.TestEquality r => Eq (InitialIndexBounds r) where
x == y = initialRegUpperBound x == initialRegUpperBound y
instance ShowF r => Pretty (InitialIndexBounds r) where
pretty r = vcat $ ppPair <$> MapF.toList (initialRegUpperBound r)
where ppPair :: MapF.Pair r UpperBound -> Doc
ppPair (MapF.Pair k (IntegerUpperBound b)) = text (showF k) <+> text "<=" <+> text (show b)
-- | Create initial index bounds that can represent any system state.
arbitraryInitialBounds :: InitialIndexBounds reg
arbitraryInitialBounds = InitialIndexBounds { initialRegUpperBound = MapF.empty }
@ -104,9 +115,10 @@ mkIndexBounds i = IndexBounds { _regBounds = initialRegUpperBound i
, _assignUpperBound = MapF.empty
}
-- | Add a inclusive upper bound to a value.
addUpperBound :: ( MapF.OrdF (ArchReg arch)
, HasRepr (ArchReg arch) TypeRepr
)
, HasRepr (ArchReg arch) TypeRepr
)
=> BVValue arch ids w
-> Integer -- ^ Upper bound as an unsigned number
-> IndexBounds (ArchReg arch) ids
@ -121,18 +133,48 @@ addUpperBound v u bnds
| otherwise -> Left "Constant given upper bound that is statically less than given bounds"
RelocatableValue{} -> Left "Relocatable value does not have upper bounds."
AssignedValue a ->
Right $ bnds & assignUpperBound %~ MapF.insertWith min (assignId a) (IntegerUpperBound u)
case assignRhs a of
EvalApp (UExt x _) -> addUpperBound x u bnds
EvalApp (Trunc x w) ->
if u < maxUnsigned w then
addUpperBound x u bnds
else
Right $ bnds
_ ->
Right $ bnds & assignUpperBound %~ MapF.insertWith min (assignId a) (IntegerUpperBound u)
Initial r ->
Right $ bnds & regBounds %~ MapF.insertWith min r (IntegerUpperBound u)
-- | Assert a predice is true and update bounds.
assertPred :: ( OrdF (ArchReg arch)
, HasRepr (ArchReg arch) TypeRepr
)
=> Value arch ids BoolType -- ^ Value represnting predicate
-> Bool -- ^ Controls whether predicate is true or false
-> IndexBounds (ArchReg arch) ids -- ^ Current index bounds
-> Either String (IndexBounds (ArchReg arch) ids)
assertPred (AssignedValue a) isTrue bnds =
case assignRhs a of
-- Given x < c), assert x <= c-1
EvalApp (BVUnsignedLt x (BVValue _ c)) | isTrue -> addUpperBound x (c-1) bnds
-- Given not (c < y), assert y <= c
EvalApp (BVUnsignedLt (BVValue _ c) y) | not isTrue -> addUpperBound y c bnds
-- Given x <= c, assert x <= c
EvalApp (BVUnsignedLe x (BVValue _ c)) | isTrue -> addUpperBound x c bnds
-- Given not (c <= y), assert y <= (c-1)
EvalApp (BVUnsignedLe (BVValue _ c) y) | not isTrue -> addUpperBound y (c-1) bnds
_ -> Right bnds
assertPred _ _ bnds = Right bnds
-- | Lookup an upper bound or return analysis for why it is not defined.
lookupUpperBound :: ( MapF.OrdF (ArchReg arch)
unsignedUpperBound :: ( MapF.OrdF (ArchReg arch)
, MapF.ShowF (ArchReg arch)
)
=> IndexBounds (ArchReg arch) ids
-> Value arch ids tp
-> Either String (UpperBound tp)
lookupUpperBound bnds v =
unsignedUpperBound bnds v =
case v of
BVValue _ i -> Right (IntegerUpperBound i)
RelocatableValue{} ->
@ -140,12 +182,25 @@ lookupUpperBound bnds v =
AssignedValue a ->
case MapF.lookup (assignId a) (bnds^.assignUpperBound) of
Just bnd -> Right bnd
Nothing -> Left $ "Could not find upper bounds for " ++ show (assignId a) ++ "."
Nothing ->
case assignRhs a of
EvalApp (BVAnd _ x y) -> do
case (unsignedUpperBound bnds x, unsignedUpperBound bnds y) of
(Right (IntegerUpperBound xb), Right (IntegerUpperBound yb)) ->
Right (IntegerUpperBound (min xb yb))
(Right xb, Left{}) -> Right xb
(Left{}, yb) -> yb
EvalApp (UExt x _) -> do
IntegerUpperBound r <- unsignedUpperBound bnds x
pure (IntegerUpperBound r)
EvalApp (Trunc x w) -> do
IntegerUpperBound xr <- unsignedUpperBound bnds x
pure $! IntegerUpperBound (min xr (maxUnsigned w))
_ -> Left $ "Could not find upper bounds for " ++ show (assignId a) ++ "."
Initial r ->
case MapF.lookup r (bnds^.regBounds) of
Just bnd -> Right bnd
Nothing -> Left $ "Could not find upper bounds for " ++ showF r ++ "."
Nothing -> Left $ "No upper bounds for " ++ showF r ++ "."
eitherToMaybe :: Either a b -> Maybe b
eitherToMaybe (Right v) = Just v
@ -163,6 +218,6 @@ nextBlockBounds :: forall arch ids
-> InitialIndexBounds (ArchReg arch)
nextBlockBounds bnds regs =
let m = regStateMap regs
nextBounds = MapF.mapMaybe (eitherToMaybe . lookupUpperBound bnds) m
nextBounds = MapF.mapMaybe (eitherToMaybe . unsignedUpperBound bnds) m
in InitialIndexBounds { initialRegUpperBound = nextBounds
}

View File

@ -82,7 +82,6 @@ module Data.Macaw.CFG
, sexpr
, sexprA
, PrettyF(..)
, PrettyFF(..)
, PrettyArch(..)
, PrettyRegValue(..)
-- * Architecture type families
@ -160,10 +159,6 @@ class PrettyPrec v where
class PrettyF (f :: k -> *) where
prettyF :: f tp -> Doc
-- | Pretty print over a pair of instances
class PrettyFF (f :: j -> k -> *) where
prettyFF :: f a b -> Doc
-- | Pretty print a document with parens if condition is true
parenIf :: Bool -> Doc -> Doc
parenIf True d = parens d
@ -695,7 +690,7 @@ type ArchAddrWidth arch = RegAddrWidth (ArchReg arch)
type ArchLabel arch = BlockLabel (ArchAddrWidth arch)
-- | Operations on a Arch
class ShowF (ArchReg arch) => PrettyArch arch where
class (ShowF (ArchReg arch), PrettyF (ArchStmt arch)) => PrettyArch arch where
-- | A function for pretty printing an archFn of a given type.
ppArchFn :: Applicative m
=> (forall u . Value arch ids u -> m Doc)
@ -833,8 +828,7 @@ $(pure [])
ppLit :: NatRepr n -> Integer -> Doc
ppLit w i
| i >= 0 =
text ("0x" ++ showHex i "") <+> text "::" <+> brackets (text (show w))
| i >= 0 = text ("0x" ++ showHex i "") <+> text "::" <+> brackets (text (show w))
| otherwise = error "ppLit given negative value"
-- | Pretty print a value.
@ -855,28 +849,20 @@ instance ShowF (ArchReg arch) => Show (Value arch ids tp) where
-- | Pretty print an assignment right-hand side using operations parameterized
-- over an application to allow side effects.
ppAssignRhs :: Applicative m
ppAssignRhs :: (Applicative m, PrettyArch arch)
=> (forall u . Value arch ids u -> m Doc)
-- ^ Function for pretty printing value.
-> (forall u . ArchFn arch ids u -> m Doc)
-- ^ Function for pretty printing an architecture-specific function
-> AssignRhs arch ids tp
-> m Doc
ppAssignRhs pp _ (EvalApp a) = ppAppA pp a
ppAssignRhs _ _ (SetUndefined w) = pure $ text "undef ::" <+> brackets (text (show w))
ppAssignRhs pp _ (ReadMem a _) = (\d -> text "*" PP.<> d) <$> pp a
ppAssignRhs _ pp (EvalArchFn f _) = pp f
ppAssignRhs pp (EvalApp a) = ppAppA pp a
ppAssignRhs _ (SetUndefined w) = pure $ text "undef ::" <+> brackets (text (show w))
ppAssignRhs pp (ReadMem a _) = (\d -> text "*" PP.<> d) <$> pp a
ppAssignRhs pp (EvalArchFn f _) = ppArchFn pp f
instance ( ShowF (ArchReg arch)
, PrettyFF (ArchFn arch)
) =>
Pretty (AssignRhs arch ids tp) where
pretty v = runIdentity $ ppAssignRhs (Identity . ppValue 10) (Identity . prettyFF) v
instance PrettyArch arch => Pretty (AssignRhs arch ids tp) where
pretty = runIdentity . ppAssignRhs (Identity . ppValue 10)
instance ( ShowF (ArchReg arch)
, PrettyFF (ArchFn arch)
) =>
Pretty (Assignment arch ids tp) where
instance PrettyArch arch => Pretty (Assignment arch ids tp) where
pretty (Assignment lhs rhs) = ppAssignId lhs <+> text ":=" <+> pretty rhs
$(pure [])
@ -902,7 +888,7 @@ collectValueRep _ (AssignedValue a :: Value arch ids tp) = do
let ppVal :: forall u . Value arch ids u ->
State (MapF (AssignId ids) DocF) Doc
ppVal = collectValueRep 10
rhs <- ppAssignRhs ppVal (ppArchFn ppVal) (assignRhs a)
rhs <- ppAssignRhs ppVal (assignRhs a)
let d = ppAssignId lhs <+> text ":=" <+> rhs
modify $ MapF.insert lhs (DocF d)
return ()
@ -1124,11 +1110,7 @@ data Stmt arch ids
| ExecArchStmt (ArchStmt arch ids)
-- ^ Execute an architecture specific statement
instance ( ShowF (ArchReg arch)
, PrettyF (ArchStmt arch)
, PrettyFF (ArchFn arch)
)
=> Pretty (Stmt arch ids) where
instance PrettyArch arch => Pretty (Stmt arch ids) where
pretty (AssignStmt a) = pretty a
pretty (WriteMem a rhs) = text "*" PP.<> prettyPrec 11 a <+> text ":=" <+> ppValue 0 rhs
pretty (PlaceHolderStmt vals name) = text ("PLACEHOLDER: " ++ name)
@ -1138,11 +1120,7 @@ instance ( ShowF (ArchReg arch)
pretty (ExecArchStmt s) = prettyF s
instance ( ShowF (ArchReg arch)
, PrettyF (ArchStmt arch)
, PrettyFF (ArchFn arch)
)
=> Show (Stmt arch ids) where
instance PrettyArch arch => Show (Stmt arch ids) where
show = show . pretty
------------------------------------------------------------------------
@ -1214,20 +1192,16 @@ refsInAssignRhs rhs =
-- Block
-- | A basic block in a control flow graph.
-- Consists of:
-- 1. A label that should uniquely identify the block, equence of
data Block arch ids =
Block { blockLabel :: !(ArchLabel arch)
, blockStmts :: !([Stmt arch ids])
-- ^ List of statements in the block.
, blockTerm :: !(TermStmt arch ids)
-- ^ The last statement in the block.
}
data Block arch ids
= Block { blockLabel :: !(ArchLabel arch)
, blockStmts :: !([Stmt arch ids])
-- ^ List of statements in the block.
, blockTerm :: !(TermStmt arch ids)
-- ^ The last statement in the block.
}
instance ( OrdF (ArchReg arch)
, ShowF (ArchReg arch)
, PrettyFF (ArchFn arch)
, PrettyF (ArchStmt arch)
, PrettyArch arch
)
=> Pretty (Block arch ids) where
pretty b = do
@ -1296,11 +1270,9 @@ insertBlocksForCode start size bs cfg =
-- | Constraints for pretty printing a 'CFG'.
type PrettyCFGConstraints arch
= ( OrdF (ArchReg arch)
, ShowF (ArchReg arch)
, Integral (ArchAddr arch)
, Show (ArchAddr arch)
, PrettyF (ArchStmt arch)
, PrettyFF (ArchFn arch)
, PrettyArch arch
)
instance PrettyCFGConstraints arch

View File

@ -45,22 +45,25 @@ import Data.Parameterized.NatRepr
import Data.Parameterized.Nonce
import Data.Parameterized.Some
import qualified Data.Set as Set
import qualified Data.Vector as V
import Data.Word
import Numeric
--import Text.PrettyPrint.ANSI.Leijen hiding ((<$>))
import Data.Macaw.AbsDomain.AbsState
import qualified Data.Macaw.AbsDomain.JumpBounds as Jmp
import Data.Macaw.AbsDomain.Refine
import qualified Data.Macaw.AbsDomain.StridedInterval as SI
import Data.Macaw.Architecture.Info
import Data.Macaw.CFG
import Data.Macaw.DebugLogging
import Data.Macaw.Discovery.Info
--import Data.Macaw.Discovery.JumpBounds
import Data.Macaw.Memory
import qualified Data.Macaw.Memory.Permissions as Perm
import Data.Macaw.Types
import Debug.Trace
transferRHS :: forall a ids tp
. ( OrdF (ArchReg a)
, ShowF (ArchReg a)
@ -193,45 +196,40 @@ runCFGM arch_info mem symbols m = do
let init_info = emptyDiscoveryInfo nonce_gen mem symbols arch_info
Some <$> execStateT (unCFGM m) init_info
printAddrBacktrace :: Map (ArchSegmentedAddr arch) (BlockRegion arch ids)
printAddrBacktrace :: Map (ArchSegmentedAddr arch) (FoundAddr arch)
-> ArchSegmentedAddr arch
-> CodeAddrReason (ArchAddrWidth arch)
-> [String]
printAddrBacktrace m rsn = do
let prev addr =
case Map.lookup addr m of
Just br -> printAddrBacktrace m (brReason br)
Nothing -> error $ "Unknown reason for address " ++ show addr
printAddrBacktrace found_map addr rsn = do
let pp msg = show addr ++ ": " ++ msg
let prev prev_addr =
case Map.lookup prev_addr found_map of
Just found_info -> printAddrBacktrace found_map prev_addr (foundReason found_info)
Nothing -> error $ "Unknown reason for address " ++ show prev_addr
case rsn of
InWrite src ->
["Written to memory in block at address " ++ show src ++ "."]
++ prev src
NextIP src ->
["Target IP for " ++ show src ++ "."]
++ prev src
CallTarget src ->
["Target IP of call at " ++ show src ++ "."]
++ prev src
InitAddr -> ["Initial entry point"]
CodePointerInMem src -> ["Memory address " ++ show src ++ " contained code."]
SplitAt src -> ["Split from read of " ++ show src ++ "."] ++ prev src
InterProcedureJump src -> ["Reference from external address " ++ show src ++ "."] ++ prev src
InWrite src -> pp ("Written to memory in block at address " ++ show src ++ ".") : prev src
NextIP src -> pp ("Target IP for " ++ show src ++ ".") : prev src
CallTarget src -> pp ("Target IP of call at " ++ show src ++ ".") : prev src
InitAddr -> [pp "Initial entry point."]
CodePointerInMem src -> [pp ("Memory address " ++ show src ++ " contained code.")]
SplitAt src -> pp ("Split from read of " ++ show src ++ ".") : prev src
InterProcedureJump src -> pp ("Reference from external address " ++ show src ++ ".") : prev src
-- | Return true if this address was added because of the contents of a global address
-- in memory initially.
--
-- This heuristic is not very accurate, so we avoid printing errors when it leads to
-- issues.
cameFromInitialMemoryContents :: Map (ArchSegmentedAddr arch) (BlockRegion arch ids)
-> CodeAddrReason (ArchAddrWidth arch)
-> Bool
cameFromInitialMemoryContents m rsn = do
cameFromUnsoundReason :: Map (ArchSegmentedAddr arch) (FoundAddr arch)
-> CodeAddrReason (ArchAddrWidth arch)
-> Bool
cameFromUnsoundReason found_map rsn = do
let prev addr =
case Map.lookup addr m of
Just br -> cameFromInitialMemoryContents m (brReason br)
case Map.lookup addr found_map of
Just info -> cameFromUnsoundReason found_map (foundReason info)
Nothing -> error $ "Unknown reason for address " ++ show addr
case rsn of
InWrite src -> prev src
InWrite{} -> True
NextIP src -> prev src
CallTarget src -> prev src
SplitAt src -> prev src
@ -264,21 +262,23 @@ tryDisassembleAddr rsn addr ab = do
-- Build state for exploring this.
case maybeError of
Just e -> do
when (not (cameFromInitialMemoryContents block_addrs rsn)) $ do
when (not (cameFromUnsoundReason (s0^.foundAddrs) rsn)) $ do
error $ "Failed to disassemble " ++ show e ++ "\n"
++ unlines (printAddrBacktrace block_addrs rsn)
++ unlines (printAddrBacktrace (s0^.foundAddrs) addr rsn)
Nothing -> do
pure ()
assert (segmentIndex (addrSegment next_ip) == segmentIndex (addrSegment addr)) $ do
assert (next_ip^.addrOffset > addr^.addrOffset) $ do
let block_map = Map.fromList [ (labelIndex (blockLabel b), b) | b <- bs ]
-- Add block region to blocks.
let br = BlockRegion { brReason = rsn
, brSize = next_ip^.addrOffset - addr^.addrOffset
let found_info = FoundAddr { foundReason = rsn
, foundAbstractState = ab
}
let br = BlockRegion { brSize = next_ip^.addrOffset - addr^.addrOffset
, brBlocks = block_map
, brAbsInitState = ab
}
put $ s0 & blocks %~ Map.insert addr br
put $! s0 & foundAddrs %~ Map.insert addr found_info
& blocks %~ Map.insert addr br
-- | Mark address as the start of a code block.
markCodeAddrBlock :: PrettyCFGConstraints arch
@ -300,7 +300,9 @@ markCodeAddrBlock rsn addr ab = do
-- Get block for addr
tryDisassembleAddr rsn addr ab
-- Get block for old block
tryDisassembleAddr (brReason br) l (brAbsInitState br)
case Map.lookup l (s^.foundAddrs) of
Just info -> tryDisassembleAddr (foundReason info) l (foundAbstractState info)
Nothing -> error $ "Internal mark code addr as block saved but no reason information stored."
-- It's possible this will cause the current block to be broken, or cause a function to
-- boundaries. However, we don't think this should cause the need automatically to
-- re-evaluate a block as any information discovered should be strictly less than
@ -326,7 +328,6 @@ transferStmt info stmt =
modify $ \r -> addMemWrite (r^.absInitialRegs^.curIP) addr v r
_ -> return ()
newtype HexWord = HexWord Word64
instance Show HexWord where
@ -353,6 +354,7 @@ markAddrAsFunction rsn addr = do
maybeMapInsert :: Ord a => Maybe a -> b -> Map a b -> Map a b
maybeMapInsert mk v = maybe id (\k -> Map.insert k v) mk
{-
-- | Mark addresses written to memory that point to code as function entry points.
recordWriteStmt :: ( PrettyCFGConstraints arch
, HasRepr (ArchReg arch) TypeRepr
@ -372,6 +374,7 @@ recordWriteStmt src regs stmt = do
let addrs = concretizeAbsCodePointers mem (transferValue regs v)
mapM_ (markAddrAsFunction (InWrite src)) addrs
_ -> return ()
-}
transferStmts :: ( HasRepr (ArchReg arch) TypeRepr
, RegisterInfo (ArchReg arch)
@ -438,7 +441,7 @@ assignmentAbsValues info mem g absm =
mergeIntraJump :: ( PrettyCFGConstraints arch
, RegisterInfo (ArchReg arch)
)
=> BlockLabel (ArchAddrWidth arch)
=> ArchSegmentedAddr arch
-- ^ Source label that we are jumping from.
-> AbsBlockState (ArchReg arch)
-- ^ Block state after executing instructions.
@ -450,23 +453,23 @@ mergeIntraJump src ab _tgt
, debug DCFG ("WARNING: Missing return value in jump from " ++ show src ++ " to\n" ++ show ab) False
= error "Unexpected mergeIntraJump"
mergeIntraJump src ab tgt = do
let rsn = NextIP (labelAddr src)
let rsn = NextIP src
-- Associate a new abstract state with the code region.
s0 <- get
case Map.lookup tgt (s0^.blocks) of
case Map.lookup tgt (s0^.foundAddrs) of
-- We have seen this block before, so need to join and see if
-- the results is changed.
Just old_block -> do
case joinD (brAbsInitState old_block) ab of
Just old_info -> do
case joinD (foundAbstractState old_info) ab of
Nothing -> return ()
Just new -> do
let new_block = old_block { brAbsInitState = new }
modify $ (blocks %~ Map.insert tgt new_block)
. (reverseEdges %~ Map.insertWith Set.union tgt (Set.singleton (labelAddr src)))
let new_info = old_info { foundAbstractState = new }
modify $ (foundAddrs %~ Map.insert tgt new_info)
. (reverseEdges %~ Map.insertWith Set.union tgt (Set.singleton src))
. (frontier %~ Map.insert tgt rsn)
-- We haven't seen this block before
Nothing -> do
modify $ (reverseEdges %~ Map.insertWith Set.union tgt (Set.singleton (labelAddr src)))
modify $ (reverseEdges %~ Map.insertWith Set.union tgt (Set.singleton src))
. (frontier %~ Map.insert tgt rsn)
markCodeAddrBlock rsn tgt ab
@ -498,27 +501,38 @@ matchJumpTable _ _ =
getJumpTableBounds :: ( OrdF (ArchReg a)
, ShowF (ArchReg a)
, MemWidth (ArchAddrWidth a)
, PrettyArch a
)
=> AbsProcessorState (ArchReg a) ids -- ^ Current processor registers.
=> ArchitectureInfo a
-> ArchSegmentedAddr a
-> AbsProcessorState (ArchReg a) ids -- ^ Current processor registers.
-> ArchSegmentedAddr a -- ^ Base
-> BVValue a ids (ArchAddrWidth a) -- ^ Index in jump table
-> Maybe (ArchAddr a)
-- ^ One past last index in jump table or nothing
getJumpTableBounds regs base jump_index
-- Get range for the index.
| let abs_value = transferValue regs jump_index
, StridedInterval index_interval <- abs_value
-- Check that relevant interval is completely contained within a read-only
-- read only range in the memory.
, SI.StridedInterval _ index_base index_range index_stride <-
debug DCFG "getJumpTable3" $ index_interval
, index_end <- index_base + (index_range + 1) * index_stride
, rangeInReadonlySegment base (8 * fromInteger index_end) =
-- Get the addresses associated.
debug DCFG ("Fixed table " ++ show base ++ " [" ++ shows jump_index "]") $
Just $! fromInteger index_end
getJumpTableBounds _ _ _ = Nothing
getJumpTableBounds arch addr regs base jump_index = do
let abs_value = transferValue regs jump_index
case abs_value of
StridedInterval (SI.StridedInterval _ index_base index_range index_stride) -> do
let index_end = index_base + (index_range + 1) * index_stride
if rangeInReadonlySegment base (jumpTableEntrySize arch * fromInteger index_end) then
case Jmp.unsignedUpperBound (regs^.indexBounds) jump_index of
Right (Jmp.IntegerUpperBound bnd) | bnd == index_range -> Just $! fromInteger index_end
Right bnd ->
trace ("Upper bound mismatch at " ++ show addr ++ ":\n"
++ " JumpBounds:" ++ show bnd
++ " Domain:" ++ show index_range)
Nothing
Left msg ->
trace ("Could not find jump table at " ++ show addr ++ ": " ++ msg ++ "\n"
++ show (ppValueAssignments jump_index))
Nothing -- error $ "Could not compute upper bound on jump table: " ++ msg
else
error $ "Jump table range is not in readonly memory"
TopV -> Nothing
_ -> error $ "Index interval is not a stride " ++ show abs_value
{-
-- | This explores a block that ends with a fetch and execute.
fetchAndExecute :: forall arch ids
. ( RegisterInfo (ArchReg arch)
@ -548,7 +562,7 @@ fetchAndExecute b regs' s' = do
let abst = finalAbsBlockState regs' s'
seq abst $ do
-- Merge caller return information
mergeIntraJump lbl (archPostCallAbsState arch_info abst ret) ret
mergeIntraJump src (archPostCallAbsState arch_info abst ret) ret
-- Look for new ips.
let addrs = concretizeAbsCodePointers mem (abst^.absRegState^.curIP)
mapM_ (markAddrAsFunction (CallTarget src)) addrs
@ -602,22 +616,16 @@ fetchAndExecute b regs' s' = do
assert (segmentFlags (addrSegment tgt_addr) `Perm.hasPerm` Perm.execute) $ do
-- Merge block state.
let abst' = abst & setAbsIP tgt_addr
mergeIntraJump lbl abst' tgt_addr
mergeIntraJump src abst' tgt_addr
-- Block ends with what looks like a jump table.
| AssignedValue (Assignment _ (ReadMem ptr _))
<- debug DCFG "try jump table" $ s'^.curIP
| AssignedValue (Assignment _ (ReadMem ptr _)) <- debug DCFG "try jump table" $ s'^.curIP
-- Attempt to compute interval of addresses interval is over.
, Just (base, jump_idx) <- matchJumpTable mem ptr -> do
debug DCFG ("Found jump table at " ++ show lbl) $ do
, Just (base, jump_idx) <- matchJumpTable mem ptr
, Just read_end <- getJumpTableBounds arch_info src regs' base jump_idx -> do
mapM_ (recordWriteStmt src regs') (blockStmts b)
-- Try to compute jump table bounds
read_end <-
case getJumpTableBounds regs' base jump_idx of
Just e -> pure e
Nothing -> error $ "Could not compute jump bounds."
-- Try to compute jump table bounds
let abst :: AbsBlockState (ArchReg arch)
abst = finalAbsBlockState regs' s'
@ -645,7 +653,7 @@ fetchAndExecute b regs' s' = do
let flags = segmentFlags (addrSegment tgt_addr)
assert (flags `Perm.hasPerm` Perm.execute) $ do
let abst' = abst & setAbsIP tgt_addr
mergeIntraJump lbl abst' tgt_addr
mergeIntraJump src abst' tgt_addr
resolveJump (tgt_addr:prev) (idx+1)
_ -> do
debug DCFG ("Stop jump table: " ++ show idx ++ " " ++ show read_end) $ do
@ -664,6 +672,7 @@ fetchAndExecute b regs' s' = do
let addrs = concretizeAbsCodePointers mem (abst^.absRegState^.curIP)
-- Mark entry points as the start of functions
mapM_ (markAddrAsFunction (error "Uninterpretable jump reason")) addrs
-}
type DiscoveryConstraints arch
= ( PrettyCFGConstraints arch
@ -687,6 +696,301 @@ tryLookupBlock ctx base block_map lbl =
++ " given invalid index " ++ show (labelIndex lbl)
Just b -> b
refineProcStateBounds :: ( OrdF (ArchReg arch)
, HasRepr (ArchReg arch) TypeRepr
)
=> Value arch ids BoolType
-> Bool
-> AbsProcessorState (ArchReg arch) ids
-> AbsProcessorState (ArchReg arch) ids
refineProcStateBounds v isTrue ps =
case indexBounds (Jmp.assertPred v isTrue) ps of
Left{} -> ps
Right ps' -> ps'
data ParseState arch ids = ParseState { _pblockMap :: !(Map Word64 (ParsedBlock arch ids))
-- ^ Block m ap
, _writtenCodeAddrs :: ![ArchSegmentedAddr arch]
-- ^ Addresses marked executable that were written to memory.
, _intraJumpTargets :: ![(ArchSegmentedAddr arch, AbsBlockState (ArchReg arch))]
, _newFunctionAddrs :: ![ArchSegmentedAddr arch]
}
pblockMap :: Simple Lens (ParseState arch ids) (Map Word64 (ParsedBlock arch ids))
pblockMap = lens _pblockMap (\s v -> s { _pblockMap = v })
writtenCodeAddrs :: Simple Lens (ParseState arch ids) [ArchSegmentedAddr arch]
writtenCodeAddrs = lens _writtenCodeAddrs (\s v -> s { _writtenCodeAddrs = v })
intraJumpTargets :: Simple Lens (ParseState arch ids) [(ArchSegmentedAddr arch, AbsBlockState (ArchReg arch))]
intraJumpTargets = lens _intraJumpTargets (\s v -> s { _intraJumpTargets = v })
newFunctionAddrs :: Simple Lens (ParseState arch ids) [ArchSegmentedAddr arch]
newFunctionAddrs = lens _newFunctionAddrs (\s v -> s { _newFunctionAddrs = v })
-- | Mark addresses written to memory that point to code as function entry points.
recordWriteStmt' :: ( HasRepr (ArchReg arch) TypeRepr
, Integral (MemWord (RegAddrWidth (ArchReg arch)))
, IsAddr (ArchAddrWidth arch)
, OrdF (ArchReg arch)
, ShowF (ArchReg arch)
)
=> Memory (ArchAddrWidth arch)
-> AbsProcessorState (ArchReg arch) ids
-> Stmt arch ids
-> State (ParseState arch ids) ()
recordWriteStmt' mem regs stmt = do
let addrWidth = addrWidthNatRepr $ memAddrWidth mem
case stmt of
WriteMem _addr v
| Just Refl <- testEquality (typeRepr v) (BVTypeRepr addrWidth) -> do
let addrs = concretizeAbsCodePointers mem (transferValue regs v)
writtenCodeAddrs %= (addrs ++)
_ -> return ()
data ParseContext arch ids = ParseContext { pctxMemory :: !(Memory (ArchAddrWidth arch))
, pctxArchInfo :: !(ArchitectureInfo arch)
, pctxAddr :: !(ArchSegmentedAddr arch)
, pctxBlockMap :: !(Map Word64 (Block arch ids))
}
-- | This explores a block that ends with a fetch and execute.
fetchAndExecute' :: forall arch ids
. ( Integral (MemWord (ArchAddrWidth arch))
, OrdF (ArchReg arch)
, HasRepr (ArchReg arch) TypeRepr
, IsAddr (ArchAddrWidth arch)
, RegisterInfo (ArchReg arch)
, PrettyArch arch
)
=> ParseContext arch ids
-> Block arch ids
-> AbsProcessorState (ArchReg arch) ids
-- ^ Registers at this block after statements executed
-> RegState (ArchReg arch) (Value arch ids)
-> State (ParseState arch ids) (ParsedBlock arch ids)
fetchAndExecute' ctx b regs s' = do
let lbl = blockLabel b
let src = labelAddr lbl
let lbl_idx = labelIndex lbl
let mem = pctxMemory ctx
let arch_info = pctxArchInfo ctx
-- See if next statement appears to end with a call.
-- We define calls as statements that end with a write that
-- stores the pc to an address.
let regs' = transferStmts arch_info regs (blockStmts b)
case () of
-- The last statement was a call.
-- Note that in some cases the call is known not to return, and thus
-- this code will never jump to the return value.
_ | Just (prev_stmts, ret) <- identifyCall mem (blockStmts b) s' -> do
Fold.mapM_ (recordWriteStmt' mem regs') prev_stmts
let abst = finalAbsBlockState regs' s'
seq abst $ do
-- Merge caller return information
intraJumpTargets %= ((ret, archPostCallAbsState arch_info abst ret):)
-- Look for new ips.
let addrs = concretizeAbsCodePointers mem (abst^.absRegState^.curIP)
newFunctionAddrs %= (++ addrs)
pure ParsedBlock { pblockLabel = lbl_idx
, pblockStmts = Fold.toList prev_stmts
, pblockState = regs'
, pblockTerm = ParsedCall s' (Just ret)
}
-- This block ends with a return.
| Just asgn' <- identifyReturn s' (callStackDelta arch_info) -> do
let isRetLoad s =
case s of
AssignStmt asgn
| Just Refl <- testEquality (assignId asgn) (assignId asgn') -> True
_ -> False
nonret_stmts = filter (not . isRetLoad) (blockStmts b)
mapM_ (recordWriteStmt' mem regs') nonret_stmts
let ip_val = s'^.boundValue ip_reg
case transferValue regs' ip_val of
ReturnAddr -> return ()
-- The return_val is bad.
-- This could indicate an imprecision in analysis or maybe unreachable/bad code.
rv ->
debug DCFG ("return_val is bad at " ++ show lbl ++ ": " ++ show rv) $
return ()
pure ParsedBlock { pblockLabel = lbl_idx
, pblockStmts = nonret_stmts
, pblockState = regs'
, pblockTerm = ParsedReturn s'
}
-- Check for tail call (anything where we are right at stack height
| ptrType <- BVTypeRepr (addrWidthNatRepr (archAddrWidth arch_info))
, sp_val <- s'^.boundValue sp_reg
, ReturnAddr <- transferRHS arch_info regs' (ReadMem sp_val ptrType) -> do
mapM_ (recordWriteStmt' mem regs') (blockStmts b)
pure ParsedBlock { pblockLabel = lbl_idx
, pblockStmts = blockStmts b
, pblockState = regs'
, pblockTerm = ParsedCall s' Nothing
}
-- Jump to concrete offset.
| Just tgt_addr <- asLiteralAddr mem (s'^.boundValue ip_reg) -> do
assert (segmentFlags (addrSegment tgt_addr) `Perm.hasPerm` Perm.execute) $ do
mapM_ (recordWriteStmt' mem regs') (blockStmts b)
-- Merge block state.
let abst = finalAbsBlockState regs' s'
let abst' = abst & setAbsIP tgt_addr
intraJumpTargets %= ((tgt_addr, abst'):)
pure ParsedBlock { pblockLabel = lbl_idx
, pblockStmts = blockStmts b
, pblockState = regs'
, pblockTerm = ParsedJump s' tgt_addr
}
-- Block ends with what looks like a jump table.
| AssignedValue (Assignment _ (ReadMem ptr _)) <- debug DCFG "try jump table" $ s'^.curIP
-- Attempt to compute interval of addresses interval is over.
, Just (base, jump_idx) <- matchJumpTable mem ptr
, Just read_end <- getJumpTableBounds arch_info src regs' base jump_idx -> do
mapM_ (recordWriteStmt' mem regs') (blockStmts b)
-- Try to compute jump table bounds
let abst :: AbsBlockState (ArchReg arch)
abst = finalAbsBlockState regs' s'
seq abst $ do
-- This function resolves jump table entries.
-- It is a recursive function that has an index into the jump table.
-- If the current index can be interpreted as a intra-procedural jump,
-- then it will add that to the current procedure.
-- This returns the last address read.
let resolveJump :: [ArchSegmentedAddr arch]
-- /\ Addresses in jump table in reverse order
-> ArchAddr arch
-- /\ Current index
-> State (ParseState arch ids) [ArchSegmentedAddr arch]
resolveJump prev idx | idx == read_end = do
-- Stop jump table when we have reached computed bounds.
return (reverse prev)
resolveJump prev idx = do
let read_addr = base & addrOffset +~ 8 * idx
case readAddr mem LittleEndian read_addr of
Right tgt_addr
| Perm.isReadonly (segmentFlags (addrSegment read_addr)) -> do
let flags = segmentFlags (addrSegment tgt_addr)
assert (flags `Perm.hasPerm` Perm.execute) $ do
let abst' = abst & setAbsIP tgt_addr
intraJumpTargets %= ((tgt_addr, abst'):)
resolveJump (tgt_addr:prev) (idx+1)
_ -> do
debug DCFG ("Stop jump table: " ++ show idx ++ " " ++ show read_end) $ do
return (reverse prev)
read_addrs <- resolveJump [] 0
pure ParsedBlock { pblockLabel = lbl_idx
, pblockStmts = blockStmts b
, pblockState = regs'
, pblockTerm = ParsedLookupTable s' jump_idx (V.fromList read_addrs)
}
-- Block that ends with some unknown
| otherwise -> do
trace ("Could not classify " ++ show lbl) $ do
mapM_ (recordWriteStmt' mem regs') (blockStmts b)
pure ParsedBlock { pblockLabel = lbl_idx
, pblockStmts = blockStmts b
, pblockState = regs'
, pblockTerm = ClassifyFailure "Could not classify block type."
}
type ParseConstraints arch = ( RegisterInfo (ArchReg arch)
, PrettyArch arch
, HasRepr (ArchReg arch) TypeRepr
, IsAddr (RegAddrWidth (ArchReg arch))
)
-- | This evalutes the statements in a block to expand the information known
-- about control flow targets of this block.
parseBlocks :: ParseConstraints arch
=> ParseContext arch ids
-- ^ Context for parsing blocks.
-> [(Block arch ids, AbsProcessorState (ArchReg arch) ids)]
-- ^ Queue of blocks to travese
-> State (ParseState arch ids) ()
parseBlocks _ct [] = pure ()
parseBlocks ctx ((b,regs):rest) = do
let mem = pctxMemory ctx
let arch_info = pctxArchInfo ctx
let lbl = blockLabel b
let idx = labelIndex lbl
let src = pctxAddr ctx
let block_map = pctxBlockMap ctx
-- FIXME: we should propagate c back to the initial block, not just b
case blockTerm b of
Branch c lb rb -> do
let regs' = transferStmts arch_info regs (blockStmts b)
mapM_ (recordWriteStmt' mem regs') (blockStmts b)
let l = tryLookupBlock "left branch" src block_map lb
let l_regs = refineProcStateBounds c True $ refineProcState c absTrue regs'
let r = tryLookupBlock "right branch" src block_map rb
let r_regs = refineProcStateBounds c False $ refineProcState c absFalse regs'
-- We re-transfer the stmts to propagate any changes from
-- the above refineProcState. This could be more efficient by
-- tracking what (if anything) changed. We also might
-- need to keep going back and forth until we reach a
-- fixpoint
let l_regs' = transferStmts arch_info l_regs (blockStmts b)
let r_regs' = transferStmts arch_info r_regs (blockStmts b)
let pb = ParsedBlock { pblockLabel = idx
, pblockStmts = blockStmts b
, pblockState = regs'
, pblockTerm = ParsedBranch c (labelIndex lb) (labelIndex rb)
}
pblockMap %= Map.insert idx pb
parseBlocks ctx ((l, l_regs'):(r, r_regs'):rest)
Syscall s' -> do
let regs' = transferStmts arch_info regs (blockStmts b)
mapM_ (recordWriteStmt' mem regs') (blockStmts b)
let abst = finalAbsBlockState regs' s'
case concretizeAbsCodePointers mem (abst^.absRegState^.curIP) of
[] -> error "Could not identify concrete system call address"
[addr] -> do
-- Merge system call result with possible next IPs.
let post = archPostSyscallAbsState arch_info abst addr
intraJumpTargets %= ((addr, post):)
let pb = ParsedBlock { pblockLabel = idx
, pblockStmts = blockStmts b
, pblockState = regs'
, pblockTerm = ParsedSyscall s' addr
}
pblockMap %= Map.insert idx pb
parseBlocks ctx rest
_ -> error "Multiple system call addresses."
FetchAndExecute s' -> do
pb <- fetchAndExecute' ctx b regs s'
pblockMap %= Map.insert idx pb
-- Do nothing when this block ends in a translation error.
TranslateError _ msg -> do
let regs' = transferStmts arch_info regs (blockStmts b)
let pb = ParsedBlock { pblockLabel = idx
, pblockStmts = blockStmts b
, pblockState = regs'
, pblockTerm = ParsedTranslateError msg
}
pblockMap %= Map.insert idx pb
-- | This evalutes the statements in a block to expand the information known
-- about control flow targets of this block.
transferBlock :: DiscoveryConstraints arch
@ -699,40 +1003,24 @@ transferBlock :: DiscoveryConstraints arch
-- ^ Abstract state describing machine state when block is encountered.
-> CFGM arch ids ()
transferBlock block_map b regs = do
s <- get
let lbl = blockLabel b
let src = labelAddr lbl
mem <- gets memory
arch_info <- gets archInfo
let regs' = transferStmts arch_info regs (blockStmts b)
-- FIXME: we should propagate c back to the initial block, not just b
case blockTerm b of
Branch c lb rb -> do
mapM_ (recordWriteStmt src regs') (blockStmts b)
let l = tryLookupBlock "left branch" (labelAddr (blockLabel b)) block_map lb
let l_regs = refineProcState c absTrue regs'
let r = tryLookupBlock "right branch" (labelAddr (blockLabel b)) block_map rb
let r_regs = refineProcState c absFalse regs'
-- We re-transfer the stmts to propagate any changes from
-- the above refineProcState. This could be more efficient by
-- tracking what (if anything) changed. We also might
-- need to keep going back and forth until we reach a
-- fixpoint
transferBlock block_map l (transferStmts arch_info l_regs (blockStmts b))
transferBlock block_map r (transferStmts arch_info r_regs (blockStmts b))
Syscall s' -> do
mapM_ (recordWriteStmt src regs') (blockStmts b)
let abst = finalAbsBlockState regs' s'
let ips = concretizeAbsCodePointers mem (abst^.absRegState^.curIP)
-- Merge system call result with possible next IPs.
Fold.forM_ ips $ \addr -> do
mergeIntraJump lbl (archPostSyscallAbsState arch_info abst addr) addr
FetchAndExecute s' -> do
fetchAndExecute b regs' s'
-- Do nothing when this block ends in a translation error.
TranslateError _ _ ->
pure ()
let ctx = ParseContext { pctxMemory = memory s
, pctxArchInfo = archInfo s
, pctxAddr = src
, pctxBlockMap = block_map
}
let ps0 = ParseState { _pblockMap = Map.empty
, _writtenCodeAddrs = []
, _intraJumpTargets = []
, _newFunctionAddrs = []
}
let ps = execState (parseBlocks ctx [(b,regs)]) ps0
pblockMapx
mapM_ (markAddrAsFunction (InWrite src)) (ps^.writtenCodeAddrs)
mapM_ (markAddrAsFunction (CallTarget src)) (ps^.newFunctionAddrs)
mapM_ (\(addr, abs_state) -> mergeIntraJump src abs_state addr) (ps^.intraJumpTargets)
transfer :: DiscoveryConstraints arch
=> ArchSegmentedAddr arch
@ -740,17 +1028,21 @@ transfer :: DiscoveryConstraints arch
transfer addr = do
mem <- gets memory
mfinfo <- use $ foundAddrs . at addr
mbr <- use $ blocks . at addr
case mbr of
Nothing -> error $ "getBlock called on block " ++ show addr ++ " we have not seen."
case mfinfo of
Nothing -> error $ "getBlock called on unfound address " ++ show addr ++ "."
Just finfo ->
case mbr of
Nothing -> error $ "getBlock called on block " ++ show addr ++ " we have not seen."
Just br -> do
case Map.lookup 0 (brBlocks br) of
Just root -> do
transferBlock (brBlocks br) root $
initAbsProcessorState mem (brAbsInitState br)
Nothing -> do
error $ "getBlock given block with empty blocks list."
Just br -> do
case Map.lookup 0 (brBlocks br) of
Just root -> do
transferBlock (brBlocks br) root $
initAbsProcessorState mem (foundAbstractState finfo)
Nothing -> do
error $ "getBlock given block with empty blocks list."
------------------------------------------------------------------------
-- Main loop

View File

@ -17,9 +17,11 @@ discovery.
{-# LANGUAGE ViewPatterns #-}
module Data.Macaw.Discovery.Info
( BlockRegion(..)
, FoundAddr(..)
, lookupBlock
, GlobalDataInfo(..)
, ParsedTermStmt(..)
, ParsedBlock(..)
-- * The interpreter state
, DiscoveryInfo
, emptyDiscoveryInfo
@ -27,6 +29,7 @@ module Data.Macaw.Discovery.Info
, archInfo
, memory
, symbolNames
, foundAddrs
, blocks
, functionEntries
, reverseEdges
@ -49,6 +52,7 @@ module Data.Macaw.Discovery.Info
import Control.Lens
import Control.Monad.ST
import qualified Data.ByteString as BS
import Data.Foldable
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import Data.Parameterized.Classes
@ -69,36 +73,35 @@ import Data.Macaw.Memory
import qualified Data.Macaw.Memory.Permissions as Perm
import Data.Macaw.Types
------------------------------------------------------------------------
-- FoundAddr
-- | An address that has been found to be reachable.
data FoundAddr arch
= FoundAddr { foundReason :: !(CodeAddrReason (ArchAddrWidth arch))
-- ^ The reason the address was found to be containing code.
, foundAbstractState :: !(AbsBlockState (ArchReg arch))
-- ^ The abstract state formed from post-states that reach this address.
}
------------------------------------------------------------------------
-- BlockRegion
-- | A contiguous region of instructions in memory.
data BlockRegion arch ids
= BlockRegion { brReason :: !(CodeAddrReason (ArchAddrWidth arch))
-- ^ The reason the start address was marked as
-- containing code.
, brSize :: !(ArchAddr arch)
= BlockRegion { brSize :: !(ArchAddr arch)
-- ^ The size of the region of memory covered by this.
, brBlocks :: !(Map Word64 (Block arch ids))
-- ^ Map from labelIndex to associated block.
, brAbsInitState :: !(AbsBlockState (ArchReg arch))
-- ^ The abstract state at the start of this block.
}
-- | Does a simple lookup in the cfg at a given DecompiledBlock address.
lookupBlock :: Map (ArchSegmentedAddr arch) (BlockRegion arch ids)
-> ArchLabel arch
-> Maybe (Block arch ids)
lookupBlock m lbl = do
br <- Map.lookup (labelAddr lbl) m
Map.lookup (labelIndex lbl) (brBlocks br)
------------------------------------------------------------------------
-- CodeAddrReason
-- | This describes the source of an address that was marked as containing code.
data CodeAddrReason w
= InWrite !(SegmentedAddr w)
= InWrite !(SegmentedAddr w)
-- ^ Exploring because the given block writes it to memory.
| NextIP !(SegmentedAddr w)
-- ^ Exploring because the given block jumps here.
@ -137,12 +140,10 @@ instance (Integral w, Show w) => Show (GlobalDataInfo w) where
-- interpreted.
data ParsedTermStmt arch ids
= ParsedCall !(RegState (ArchReg arch) (Value arch ids))
!(Seq (Stmt arch ids))
!(Maybe (ArchSegmentedAddr arch))
-- ^ A call with the current register values, statements lessed pushed return value
-- function to call, and location to return to (Nothing) if this is a tail call.
-- ^ A call with the current register values and location to return to or 'Nothing' if this is a tail call.
| ParsedJump !(RegState (ArchReg arch) (Value arch ids)) !(ArchSegmentedAddr arch)
-- ^ A jump to an explicit address within a block.
-- ^ A jump to an explicit address within a function.
| ParsedLookupTable !(RegState (ArchReg arch) (Value arch ids))
!(BVValue arch ids (ArchAddrWidth arch))
!(V.Vector (ArchSegmentedAddr arch))
@ -150,9 +151,9 @@ data ParsedTermStmt arch ids
--
-- The registers store the registers, the value contains the index to jump
-- to, and the possible addresses.
| ParsedReturn !(RegState (ArchReg arch) (Value arch ids)) !(Seq (Stmt arch ids))
| ParsedReturn !(RegState (ArchReg arch) (Value arch ids))
-- ^ A return with the given registers.
| ParsedBranch !(Value arch ids BoolType) !(ArchLabel arch) !(ArchLabel arch)
| ParsedBranch !(Value arch ids BoolType) !Word64 !Word64
-- ^ A branch (i.e., BlockTerm is Branch)
| ParsedSyscall !(RegState (ArchReg arch) (Value arch ids))
!(ArchSegmentedAddr arch)
@ -168,20 +169,33 @@ deriving instance
)
=> Show (ParsedTermStmt arch ids)
------------------------------------------------------------------------
-- ParsedBlock
data ParsedBlock arch ids
= ParsedBlock { pblockLabel :: !Word64
, pblockStmts :: !([Stmt arch ids])
, pblockState :: !(AbsProcessorState (ArchReg arch) ids)
-- ^ State of processor prior to term statement.
, pblockTerm :: !(ParsedTermStmt arch ids)
}
------------------------------------------------------------------------
-- DiscoveryInfo
-- | The state of the interpreter
data DiscoveryInfo arch ids
= DiscoveryInfo { nonceGen :: !(NonceGenerator (ST ids) ids)
= DiscoveryInfo { nonceGen :: !(NonceGenerator (ST ids) ids)
-- ^ Generator for creating fresh ids.
, memory :: !(Memory (ArchAddrWidth arch))
, memory :: !(Memory (ArchAddrWidth arch))
-- ^ The initial memory when disassembly started.
, symbolNames :: !(Map (ArchSegmentedAddr arch) BS.ByteString)
-- ^ The set of symbol names (not necessarily complete)
, archInfo :: !(ArchitectureInfo arch)
, archInfo :: !(ArchitectureInfo arch)
-- ^ Architecture-specific information needed for discovery.
, _blocks :: !(Map (ArchSegmentedAddr arch) (BlockRegion arch ids))
, _foundAddrs :: !(Map (ArchSegmentedAddr arch) (FoundAddr arch))
-- ^ Maps fopund address to the pre-state for that block.
, _blocks :: !(Map (ArchSegmentedAddr arch) (BlockRegion arch ids))
-- ^ Maps an address to the code associated with that address.
, _functionEntries :: !(Set (ArchSegmentedAddr arch))
-- ^ Maps addresses that are marked as the start of a function
@ -216,6 +230,7 @@ emptyDiscoveryInfo ng mem symbols info = DiscoveryInfo
, memory = mem
, symbolNames = symbols
, archInfo = info
, _foundAddrs = Map.empty
, _blocks = Map.empty
, _functionEntries = Set.empty
, _reverseEdges = Map.empty
@ -224,6 +239,9 @@ emptyDiscoveryInfo ng mem symbols info = DiscoveryInfo
, _function_frontier = Map.empty
}
foundAddrs :: Simple Lens (DiscoveryInfo arch ids) (Map (ArchSegmentedAddr arch) (FoundAddr arch))
foundAddrs = lens _foundAddrs (\s v -> s { _foundAddrs = v })
blocks :: Simple Lens (DiscoveryInfo arch ids)
(Map (ArchSegmentedAddr arch) (BlockRegion arch ids))
blocks = lens _blocks (\s v -> s { _blocks = v })
@ -256,6 +274,15 @@ function_frontier :: Simple Lens (DiscoveryInfo arch ids)
(CodeAddrReason (ArchAddrWidth arch)))
function_frontier = lens _function_frontier (\s v -> s { _function_frontier = v })
-- | Does a simple lookup in the cfg at a given DecompiledBlock address.
lookupBlock :: DiscoveryInfo arch ids
-> ArchLabel arch
-> Maybe (Block arch ids)
lookupBlock info lbl = do
br <- Map.lookup (labelAddr lbl) (info^.blocks)
Map.lookup (labelIndex lbl) (brBlocks br)
------------------------------------------------------------------------
-- DiscoveryInfo utilities
@ -421,8 +448,8 @@ tryGetStaticSyscallNo interp_state block_addr proc_state
| BVValue _ call_no <- proc_state^.boundValue syscall_num_reg =
Just call_no
| Initial r <- proc_state^.boundValue syscall_num_reg
, Just b <- interp_state^.blocks^.at block_addr =
asConcreteSingleton (brAbsInitState b^.absRegState^.boundValue r)
, Just info <- interp_state^.foundAddrs^.at block_addr =
asConcreteSingleton (foundAbstractState info^.absRegState^.boundValue r)
| otherwise =
Nothing
@ -431,20 +458,26 @@ classifyBlock :: forall arch ids
. (ArchConstraint arch ids, MemWidth (ArchAddrWidth arch))
=> Block arch ids
-> DiscoveryInfo arch ids
-> ParsedTermStmt arch ids
classifyBlock b interp_state =
-> ([Stmt arch ids], ParsedTermStmt arch ids)
classifyBlock b interp_state = do
let stmts = blockStmts b
mem = memory interp_state
case blockTerm b of
TranslateError _ msg -> ParsedTranslateError msg
Branch c x y -> ParsedBranch c x y
TranslateError _ msg -> (stmts, ParsedTranslateError msg)
Branch c x y
| labelAddr x /= labelAddr (blockLabel b) -> error "Branch with bad child"
| labelAddr y /= labelAddr (blockLabel b) -> error "Branch with bad child"
| otherwise -> (stmts, ParsedBranch c (labelIndex x) (labelIndex y))
FetchAndExecute proc_state
-- The last statement was a call.
| Just (prev_stmts, ret_addr) <- identifyCall mem stmts proc_state ->
ParsedCall proc_state prev_stmts (Just ret_addr)
(toList prev_stmts, ParsedCall proc_state (Just ret_addr))
-- Jump to concrete offset.
| Just tgt_addr <- asLiteralAddr mem (proc_state^.boundValue ip_reg)
, inSameFunction (labelAddr (blockLabel b)) tgt_addr interp_state ->
ParsedJump proc_state tgt_addr
(stmts, ParsedJump proc_state tgt_addr)
-- Return
| Just asgn <- identifyReturn proc_state (callStackDelta (archInfo interp_state)) ->
@ -453,26 +486,23 @@ classifyBlock b interp_state =
AssignStmt asgn'
| Just Refl <- testEquality (assignId asgn) (assignId asgn') -> True
_ -> False
nonret_stmts = Seq.fromList $ filter (not . isRetLoad) stmts
in ParsedReturn proc_state nonret_stmts
nonret_stmts = filter (not . isRetLoad) stmts
in (nonret_stmts, ParsedReturn proc_state)
-- Jump table
| let entry = getFunctionEntryPoint (labelAddr (blockLabel b)) interp_state
, let cur_ip = proc_state^.boundValue ip_reg
, Just (idx, nexts) <- identifyJumpTable interp_state entry cur_ip ->
ParsedLookupTable proc_state idx nexts
(stmts, ParsedLookupTable proc_state idx nexts)
-- Finally, we just assume that this is a tail call through a pointer
-- FIXME: probably unsound.
| otherwise ->
ParsedCall proc_state (Seq.fromList stmts) Nothing
(stmts, ParsedCall proc_state Nothing)
-- rax is concrete in the first case, so we don't need to propagate it etc.
Syscall proc_state
| Just next_addr <- asLiteralAddr mem (proc_state^.boundValue ip_reg) ->
ParsedSyscall proc_state next_addr
(stmts, ParsedSyscall proc_state next_addr)
| otherwise -> ClassifyFailure "System call with non-literal return address."
where
stmts = blockStmts b
mem = memory interp_state
| otherwise -> (stmts, ClassifyFailure "System call with non-literal return address.")

View File

@ -155,7 +155,7 @@ bsWord64le bs
------------------------------------------------------------------------
-- MemBase
newtype MemWord (n :: Nat) = MemWord Word64
newtype MemWord (n :: Nat) = MemWord { memWordValue :: Word64 }
-- ^ A value in memory.
instance Show (MemWord w) where
@ -445,11 +445,16 @@ addrValue :: Num (MemWord w) => SegmentedAddr w -> MemWord w
addrValue addr = addrBase addr + addr^.addrOffset
instance Show (SegmentedAddr w) where
showsPrec p a = showParen (p > 6) $
showString "segment"
. shows (segmentIndex (addrSegment a))
. showString "+"
. shows (a^.addrOffset)
showsPrec p a =
case segmentBase (addrSegment a) of
Just b ->
showString "0x" . showHex (memWordValue b + memWordValue (a^.addrOffset))
Nothing ->
showParen (p > 6)
$ showString "segment"
. shows (segmentIndex (addrSegment a))
. showString "+"
. shows (a^.addrOffset)
-- | Return contents starting from location or throw a memory error if there
-- is an unaligned relocation.

View File

@ -112,7 +112,7 @@ instance Num (MemWord w) => ByteReader (MemoryByteReader w) where
MBR $ throwError $ UnalignedRelocation (msAddr ms)
ByteRegion bs:rest -> do
if BS.null bs then do
MBR $ throwError $ AccessViolation (msAddr ms)
throwError $ AccessViolation (msAddr ms)
else do
let v = BS.head bs
let ms' = ms { msPrev = consByte v (msPrev ms)