diff --git a/macaw.cabal b/macaw.cabal index 3c05bf1a..c296dfa7 100644 --- a/macaw.cabal +++ b/macaw.cabal @@ -27,6 +27,7 @@ library exposed-modules: Data.Macaw.AbsDomain.AbsState + Data.Macaw.AbsDomain.JumpBounds Data.Macaw.AbsDomain.Refine Data.Macaw.AbsDomain.StridedInterval Data.Macaw.Architecture.Info @@ -35,7 +36,6 @@ library Data.Macaw.DebugLogging Data.Macaw.Discovery Data.Macaw.Discovery.Info - Data.Macaw.Discovery.JumpBounds Data.Macaw.Dwarf Data.Macaw.Memory Data.Macaw.Memory.ElfLoader diff --git a/src/Data/Macaw/AbsDomain/AbsState.hs b/src/Data/Macaw/AbsDomain/AbsState.hs index 470f3f05..77b2b202 100644 --- a/src/Data/Macaw/AbsDomain/AbsState.hs +++ b/src/Data/Macaw/AbsDomain/AbsState.hs @@ -41,6 +41,7 @@ module Data.Macaw.AbsDomain.AbsState , AbsProcessorState , curAbsStack , absInitialRegs + , indexBounds , startAbsStack , initAbsProcessorState , absAssignments @@ -77,10 +78,10 @@ import qualified Data.Set as Set import Numeric (showHex) import Text.PrettyPrint.ANSI.Leijen hiding ((<$>)) +import qualified Data.Macaw.AbsDomain.JumpBounds as Jmp import qualified Data.Macaw.AbsDomain.StridedInterval as SI import Data.Macaw.CFG import Data.Macaw.DebugLogging -import Data.Macaw.Discovery.JumpBounds import Data.Macaw.Memory import qualified Data.Macaw.Memory.Permissions as Perm import Data.Macaw.Types @@ -708,6 +709,16 @@ bvmul w v v' bvmul _ _ _ = TopV +-- FIXME: generalise +bvand :: Integral (MemWord w) + => NatRepr u + -> AbsValue w (BVType u) + -> AbsValue w (BVType u) + -> AbsValue w (BVType u) +bvand _w (asFinSet "bvand" -> IsFin s) (asConcreteSingleton -> Just v) = FinSet (Set.map (flip (.&.) v) s) +bvand _w (asConcreteSingleton -> Just v) (asFinSet "bvand" -> IsFin s) = FinSet (Set.map ((.&.) v) s) +bvand _ _ _ = TopV + -- FIXME: generalise bitop :: Integral (MemWord w) => (Integer -> Integer -> Integer) @@ -905,7 +916,7 @@ ppAbsStack m = vcat (pp <$> Map.toDescList m) data AbsBlockState r = AbsBlockState { _absRegState :: !(RegState r (AbsValue (RegAddrWidth r))) , _startAbsStack :: !(AbsBlockStack (RegAddrWidth r)) - , _initIndexBounds :: !(InitialIndexBounds r) + , _initIndexBounds :: !(Jmp.InitialIndexBounds r) } deriving instance MapF.OrdF r => Eq (AbsBlockState r) @@ -918,7 +929,7 @@ absRegState = lens _absRegState (\s v -> s { _absRegState = v }) startAbsStack :: Simple Lens (AbsBlockState r) (AbsBlockStack (RegAddrWidth r)) startAbsStack = lens _startAbsStack (\s v -> s { _startAbsStack = v }) -initIndexBounds :: Simple Lens (AbsBlockState r) (InitialIndexBounds r) +initIndexBounds :: Simple Lens (AbsBlockState r) (Jmp.InitialIndexBounds r) initIndexBounds = lens _initIndexBounds (\s v -> s { _initIndexBounds = v }) instance ( RegisterInfo r @@ -927,7 +938,7 @@ instance ( RegisterInfo r top = AbsBlockState { _absRegState = mkRegState (\_ -> TopV) , _startAbsStack = Map.empty - , _initIndexBounds = arbitraryInitialBounds + , _initIndexBounds = Jmp.arbitraryInitialBounds } joinD x y | regs_changed = Just $! z @@ -951,7 +962,7 @@ instance ( RegisterInfo r return $! zr z_stk <- absStackJoinD x_stk y_stk z_bnds <- - case joinInitialBounds (x^.initIndexBounds) (y^.initIndexBounds) of + case Jmp.joinInitialBounds (x^.initIndexBounds) (y^.initIndexBounds) of Just z_bnds -> (_1 .= True) $> z_bnds Nothing -> pure (x^.initIndexBounds) @@ -965,11 +976,13 @@ instance ( ShowF r pretty s = text "registers:" <$$> indent 2 (pretty (s^.absRegState)) <$$> - stack_d + stack_d <$$> + jmp_bnds where stack = s^.startAbsStack stack_d | Map.null stack = empty | otherwise = text "stack:" <$$> indent 2 (ppAbsStack stack) + jmp_bnds = pretty (s^.initIndexBounds) instance ShowF r => Show (AbsBlockState r) where show = show . pretty @@ -1009,7 +1022,7 @@ data AbsProcessorState r ids -- symbolic values associated with them , _curAbsStack :: !(AbsBlockStack (RegAddrWidth r)) -- ^ The current symbolic state of the stack - , _indexBounds :: !(IndexBounds r ids) + , _indexBounds :: !(Jmp.IndexBounds r ids) } -- | The width of an address @@ -1028,7 +1041,7 @@ curAbsStack :: Simple Lens (AbsProcessorState r ids) (AbsBlockStack (RegAddrWidt curAbsStack = lens _curAbsStack (\s v -> s { _curAbsStack = v }) -- | Return the index -indexBounds :: Simple Lens (AbsProcessorState r ids) (IndexBounds r ids) +indexBounds :: Simple Lens (AbsProcessorState r ids) (Jmp.IndexBounds r ids) indexBounds = lens _indexBounds (\s v -> s { _indexBounds = v }) instance ShowF r @@ -1058,7 +1071,7 @@ initAbsProcessorState mem s = , _absInitialRegs = s^.absRegState , _absAssignments = MapF.empty , _curAbsStack = s^.startAbsStack - , _indexBounds = mkIndexBounds (s^.initIndexBounds) + , _indexBounds = Jmp.mkIndexBounds (s^.initIndexBounds) } -- | A lens that allows one to lookup and update the value of an assignment in @@ -1208,7 +1221,7 @@ finalAbsBlockState c s = transferReg r = transferValue c (s^.boundValue r) in AbsBlockState { _absRegState = mkRegState transferReg , _startAbsStack = c^.curAbsStack - , _initIndexBounds = nextBlockBounds (c^.indexBounds) s + , _initIndexBounds = Jmp.nextBlockBounds (c^.indexBounds) s } ------------------------------------------------------------------------ @@ -1228,7 +1241,7 @@ transferApp r a = BVAdd w x y -> bvadd w (transferValue r x) (transferValue r y) BVSub w x y -> bvsub (absMem r) w (transferValue r x) (transferValue r y) BVMul w x y -> bvmul w (transferValue r x) (transferValue r y) - BVAnd w x y -> bitop (.&.) w (transferValue r x) (transferValue r y) + BVAnd w x y -> bvand w (transferValue r x) (transferValue r y) BVOr w x y -> bitop (.|.) w (transferValue r x) (transferValue r y) _ -> TopV @@ -1253,7 +1266,7 @@ postCallAbsState :: forall r postCallAbsState params ab0 addr = AbsBlockState { _absRegState = mkRegState regFn , _startAbsStack = ab0^.startAbsStack - , _initIndexBounds = arbitraryInitialBounds + , _initIndexBounds = Jmp.arbitraryInitialBounds } where regFn :: r tp -> AbsValue (RegAddrWidth r) tp regFn r diff --git a/src/Data/Macaw/Discovery/JumpBounds.hs b/src/Data/Macaw/AbsDomain/JumpBounds.hs similarity index 66% rename from src/Data/Macaw/Discovery/JumpBounds.hs rename to src/Data/Macaw/AbsDomain/JumpBounds.hs index 54dd419f..a45d6e01 100644 --- a/src/Data/Macaw/Discovery/JumpBounds.hs +++ b/src/Data/Macaw/AbsDomain/JumpBounds.hs @@ -2,14 +2,17 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE ScopedTypeVariables #-} -module Data.Macaw.Discovery.JumpBounds +{-# LANGUAGE StandaloneDeriving #-} +module Data.Macaw.AbsDomain.JumpBounds ( InitialIndexBounds , arbitraryInitialBounds , joinInitialBounds , IndexBounds , mkIndexBounds + , UpperBound(..) , addUpperBound - , lookupUpperBound + , assertPred + , unsignedUpperBound , nextBlockBounds ) where @@ -20,6 +23,7 @@ import Data.Parameterized.Classes import Data.Parameterized.Map (MapF) import qualified Data.Parameterized.Map as MapF import Data.Parameterized.NatRepr (maxUnsigned) +import Text.PrettyPrint.ANSI.Leijen hiding ((<$>)) import Data.Macaw.CFG import Data.Macaw.Types @@ -36,6 +40,8 @@ instance MapF.EqF UpperBound where instance Ord (UpperBound tp) where compare (IntegerUpperBound x) (IntegerUpperBound y) = compare x y +deriving instance Show (UpperBound tp) + -- | Bounds for a function as the start of a block. data InitialIndexBounds r = InitialIndexBounds { initialRegUpperBound :: !(MapF r UpperBound) @@ -44,6 +50,11 @@ data InitialIndexBounds r instance MapF.TestEquality r => Eq (InitialIndexBounds r) where x == y = initialRegUpperBound x == initialRegUpperBound y +instance ShowF r => Pretty (InitialIndexBounds r) where + pretty r = vcat $ ppPair <$> MapF.toList (initialRegUpperBound r) + where ppPair :: MapF.Pair r UpperBound -> Doc + ppPair (MapF.Pair k (IntegerUpperBound b)) = text (showF k) <+> text "<=" <+> text (show b) + -- | Create initial index bounds that can represent any system state. arbitraryInitialBounds :: InitialIndexBounds reg arbitraryInitialBounds = InitialIndexBounds { initialRegUpperBound = MapF.empty } @@ -104,9 +115,10 @@ mkIndexBounds i = IndexBounds { _regBounds = initialRegUpperBound i , _assignUpperBound = MapF.empty } +-- | Add a inclusive upper bound to a value. addUpperBound :: ( MapF.OrdF (ArchReg arch) - , HasRepr (ArchReg arch) TypeRepr - ) + , HasRepr (ArchReg arch) TypeRepr + ) => BVValue arch ids w -> Integer -- ^ Upper bound as an unsigned number -> IndexBounds (ArchReg arch) ids @@ -121,18 +133,48 @@ addUpperBound v u bnds | otherwise -> Left "Constant given upper bound that is statically less than given bounds" RelocatableValue{} -> Left "Relocatable value does not have upper bounds." AssignedValue a -> - Right $ bnds & assignUpperBound %~ MapF.insertWith min (assignId a) (IntegerUpperBound u) + case assignRhs a of + EvalApp (UExt x _) -> addUpperBound x u bnds + EvalApp (Trunc x w) -> + if u < maxUnsigned w then + addUpperBound x u bnds + else + Right $ bnds + _ -> + Right $ bnds & assignUpperBound %~ MapF.insertWith min (assignId a) (IntegerUpperBound u) Initial r -> Right $ bnds & regBounds %~ MapF.insertWith min r (IntegerUpperBound u) + +-- | Assert a predice is true and update bounds. +assertPred :: ( OrdF (ArchReg arch) + , HasRepr (ArchReg arch) TypeRepr + ) + => Value arch ids BoolType -- ^ Value represnting predicate + -> Bool -- ^ Controls whether predicate is true or false + -> IndexBounds (ArchReg arch) ids -- ^ Current index bounds + -> Either String (IndexBounds (ArchReg arch) ids) +assertPred (AssignedValue a) isTrue bnds = + case assignRhs a of + -- Given x < c), assert x <= c-1 + EvalApp (BVUnsignedLt x (BVValue _ c)) | isTrue -> addUpperBound x (c-1) bnds + -- Given not (c < y), assert y <= c + EvalApp (BVUnsignedLt (BVValue _ c) y) | not isTrue -> addUpperBound y c bnds + -- Given x <= c, assert x <= c + EvalApp (BVUnsignedLe x (BVValue _ c)) | isTrue -> addUpperBound x c bnds + -- Given not (c <= y), assert y <= (c-1) + EvalApp (BVUnsignedLe (BVValue _ c) y) | not isTrue -> addUpperBound y (c-1) bnds + _ -> Right bnds +assertPred _ _ bnds = Right bnds + -- | Lookup an upper bound or return analysis for why it is not defined. -lookupUpperBound :: ( MapF.OrdF (ArchReg arch) +unsignedUpperBound :: ( MapF.OrdF (ArchReg arch) , MapF.ShowF (ArchReg arch) ) => IndexBounds (ArchReg arch) ids -> Value arch ids tp -> Either String (UpperBound tp) -lookupUpperBound bnds v = +unsignedUpperBound bnds v = case v of BVValue _ i -> Right (IntegerUpperBound i) RelocatableValue{} -> @@ -140,12 +182,25 @@ lookupUpperBound bnds v = AssignedValue a -> case MapF.lookup (assignId a) (bnds^.assignUpperBound) of Just bnd -> Right bnd - Nothing -> Left $ "Could not find upper bounds for " ++ show (assignId a) ++ "." + Nothing -> + case assignRhs a of + EvalApp (BVAnd _ x y) -> do + case (unsignedUpperBound bnds x, unsignedUpperBound bnds y) of + (Right (IntegerUpperBound xb), Right (IntegerUpperBound yb)) -> + Right (IntegerUpperBound (min xb yb)) + (Right xb, Left{}) -> Right xb + (Left{}, yb) -> yb + EvalApp (UExt x _) -> do + IntegerUpperBound r <- unsignedUpperBound bnds x + pure (IntegerUpperBound r) + EvalApp (Trunc x w) -> do + IntegerUpperBound xr <- unsignedUpperBound bnds x + pure $! IntegerUpperBound (min xr (maxUnsigned w)) + _ -> Left $ "Could not find upper bounds for " ++ show (assignId a) ++ "." Initial r -> case MapF.lookup r (bnds^.regBounds) of Just bnd -> Right bnd - Nothing -> Left $ "Could not find upper bounds for " ++ showF r ++ "." - + Nothing -> Left $ "No upper bounds for " ++ showF r ++ "." eitherToMaybe :: Either a b -> Maybe b eitherToMaybe (Right v) = Just v @@ -163,6 +218,6 @@ nextBlockBounds :: forall arch ids -> InitialIndexBounds (ArchReg arch) nextBlockBounds bnds regs = let m = regStateMap regs - nextBounds = MapF.mapMaybe (eitherToMaybe . lookupUpperBound bnds) m + nextBounds = MapF.mapMaybe (eitherToMaybe . unsignedUpperBound bnds) m in InitialIndexBounds { initialRegUpperBound = nextBounds } diff --git a/src/Data/Macaw/CFG.hs b/src/Data/Macaw/CFG.hs index acc2237d..54cd3ef3 100644 --- a/src/Data/Macaw/CFG.hs +++ b/src/Data/Macaw/CFG.hs @@ -82,7 +82,6 @@ module Data.Macaw.CFG , sexpr , sexprA , PrettyF(..) - , PrettyFF(..) , PrettyArch(..) , PrettyRegValue(..) -- * Architecture type families @@ -160,10 +159,6 @@ class PrettyPrec v where class PrettyF (f :: k -> *) where prettyF :: f tp -> Doc --- | Pretty print over a pair of instances -class PrettyFF (f :: j -> k -> *) where - prettyFF :: f a b -> Doc - -- | Pretty print a document with parens if condition is true parenIf :: Bool -> Doc -> Doc parenIf True d = parens d @@ -695,7 +690,7 @@ type ArchAddrWidth arch = RegAddrWidth (ArchReg arch) type ArchLabel arch = BlockLabel (ArchAddrWidth arch) -- | Operations on a Arch -class ShowF (ArchReg arch) => PrettyArch arch where +class (ShowF (ArchReg arch), PrettyF (ArchStmt arch)) => PrettyArch arch where -- | A function for pretty printing an archFn of a given type. ppArchFn :: Applicative m => (forall u . Value arch ids u -> m Doc) @@ -833,8 +828,7 @@ $(pure []) ppLit :: NatRepr n -> Integer -> Doc ppLit w i - | i >= 0 = - text ("0x" ++ showHex i "") <+> text "::" <+> brackets (text (show w)) + | i >= 0 = text ("0x" ++ showHex i "") <+> text "::" <+> brackets (text (show w)) | otherwise = error "ppLit given negative value" -- | Pretty print a value. @@ -855,28 +849,20 @@ instance ShowF (ArchReg arch) => Show (Value arch ids tp) where -- | Pretty print an assignment right-hand side using operations parameterized -- over an application to allow side effects. -ppAssignRhs :: Applicative m +ppAssignRhs :: (Applicative m, PrettyArch arch) => (forall u . Value arch ids u -> m Doc) -- ^ Function for pretty printing value. - -> (forall u . ArchFn arch ids u -> m Doc) - -- ^ Function for pretty printing an architecture-specific function -> AssignRhs arch ids tp -> m Doc -ppAssignRhs pp _ (EvalApp a) = ppAppA pp a -ppAssignRhs _ _ (SetUndefined w) = pure $ text "undef ::" <+> brackets (text (show w)) -ppAssignRhs pp _ (ReadMem a _) = (\d -> text "*" PP.<> d) <$> pp a -ppAssignRhs _ pp (EvalArchFn f _) = pp f +ppAssignRhs pp (EvalApp a) = ppAppA pp a +ppAssignRhs _ (SetUndefined w) = pure $ text "undef ::" <+> brackets (text (show w)) +ppAssignRhs pp (ReadMem a _) = (\d -> text "*" PP.<> d) <$> pp a +ppAssignRhs pp (EvalArchFn f _) = ppArchFn pp f -instance ( ShowF (ArchReg arch) - , PrettyFF (ArchFn arch) - ) => - Pretty (AssignRhs arch ids tp) where - pretty v = runIdentity $ ppAssignRhs (Identity . ppValue 10) (Identity . prettyFF) v +instance PrettyArch arch => Pretty (AssignRhs arch ids tp) where + pretty = runIdentity . ppAssignRhs (Identity . ppValue 10) -instance ( ShowF (ArchReg arch) - , PrettyFF (ArchFn arch) - ) => - Pretty (Assignment arch ids tp) where +instance PrettyArch arch => Pretty (Assignment arch ids tp) where pretty (Assignment lhs rhs) = ppAssignId lhs <+> text ":=" <+> pretty rhs $(pure []) @@ -902,7 +888,7 @@ collectValueRep _ (AssignedValue a :: Value arch ids tp) = do let ppVal :: forall u . Value arch ids u -> State (MapF (AssignId ids) DocF) Doc ppVal = collectValueRep 10 - rhs <- ppAssignRhs ppVal (ppArchFn ppVal) (assignRhs a) + rhs <- ppAssignRhs ppVal (assignRhs a) let d = ppAssignId lhs <+> text ":=" <+> rhs modify $ MapF.insert lhs (DocF d) return () @@ -1124,11 +1110,7 @@ data Stmt arch ids | ExecArchStmt (ArchStmt arch ids) -- ^ Execute an architecture specific statement -instance ( ShowF (ArchReg arch) - , PrettyF (ArchStmt arch) - , PrettyFF (ArchFn arch) - ) - => Pretty (Stmt arch ids) where +instance PrettyArch arch => Pretty (Stmt arch ids) where pretty (AssignStmt a) = pretty a pretty (WriteMem a rhs) = text "*" PP.<> prettyPrec 11 a <+> text ":=" <+> ppValue 0 rhs pretty (PlaceHolderStmt vals name) = text ("PLACEHOLDER: " ++ name) @@ -1138,11 +1120,7 @@ instance ( ShowF (ArchReg arch) pretty (ExecArchStmt s) = prettyF s -instance ( ShowF (ArchReg arch) - , PrettyF (ArchStmt arch) - , PrettyFF (ArchFn arch) - ) - => Show (Stmt arch ids) where +instance PrettyArch arch => Show (Stmt arch ids) where show = show . pretty ------------------------------------------------------------------------ @@ -1214,20 +1192,16 @@ refsInAssignRhs rhs = -- Block -- | A basic block in a control flow graph. --- Consists of: --- 1. A label that should uniquely identify the block, equence of -data Block arch ids = - Block { blockLabel :: !(ArchLabel arch) - , blockStmts :: !([Stmt arch ids]) - -- ^ List of statements in the block. - , blockTerm :: !(TermStmt arch ids) - -- ^ The last statement in the block. - } +data Block arch ids + = Block { blockLabel :: !(ArchLabel arch) + , blockStmts :: !([Stmt arch ids]) + -- ^ List of statements in the block. + , blockTerm :: !(TermStmt arch ids) + -- ^ The last statement in the block. + } instance ( OrdF (ArchReg arch) - , ShowF (ArchReg arch) - , PrettyFF (ArchFn arch) - , PrettyF (ArchStmt arch) + , PrettyArch arch ) => Pretty (Block arch ids) where pretty b = do @@ -1296,11 +1270,9 @@ insertBlocksForCode start size bs cfg = -- | Constraints for pretty printing a 'CFG'. type PrettyCFGConstraints arch = ( OrdF (ArchReg arch) - , ShowF (ArchReg arch) , Integral (ArchAddr arch) , Show (ArchAddr arch) - , PrettyF (ArchStmt arch) - , PrettyFF (ArchFn arch) + , PrettyArch arch ) instance PrettyCFGConstraints arch diff --git a/src/Data/Macaw/Discovery.hs b/src/Data/Macaw/Discovery.hs index a9e649e2..303aa346 100644 --- a/src/Data/Macaw/Discovery.hs +++ b/src/Data/Macaw/Discovery.hs @@ -45,22 +45,25 @@ import Data.Parameterized.NatRepr import Data.Parameterized.Nonce import Data.Parameterized.Some import qualified Data.Set as Set +import qualified Data.Vector as V import Data.Word import Numeric --import Text.PrettyPrint.ANSI.Leijen hiding ((<$>)) import Data.Macaw.AbsDomain.AbsState +import qualified Data.Macaw.AbsDomain.JumpBounds as Jmp import Data.Macaw.AbsDomain.Refine import qualified Data.Macaw.AbsDomain.StridedInterval as SI import Data.Macaw.Architecture.Info import Data.Macaw.CFG import Data.Macaw.DebugLogging import Data.Macaw.Discovery.Info ---import Data.Macaw.Discovery.JumpBounds import Data.Macaw.Memory import qualified Data.Macaw.Memory.Permissions as Perm import Data.Macaw.Types +import Debug.Trace + transferRHS :: forall a ids tp . ( OrdF (ArchReg a) , ShowF (ArchReg a) @@ -193,45 +196,40 @@ runCFGM arch_info mem symbols m = do let init_info = emptyDiscoveryInfo nonce_gen mem symbols arch_info Some <$> execStateT (unCFGM m) init_info -printAddrBacktrace :: Map (ArchSegmentedAddr arch) (BlockRegion arch ids) +printAddrBacktrace :: Map (ArchSegmentedAddr arch) (FoundAddr arch) + -> ArchSegmentedAddr arch -> CodeAddrReason (ArchAddrWidth arch) -> [String] -printAddrBacktrace m rsn = do - let prev addr = - case Map.lookup addr m of - Just br -> printAddrBacktrace m (brReason br) - Nothing -> error $ "Unknown reason for address " ++ show addr +printAddrBacktrace found_map addr rsn = do + let pp msg = show addr ++ ": " ++ msg + let prev prev_addr = + case Map.lookup prev_addr found_map of + Just found_info -> printAddrBacktrace found_map prev_addr (foundReason found_info) + Nothing -> error $ "Unknown reason for address " ++ show prev_addr case rsn of - InWrite src -> - ["Written to memory in block at address " ++ show src ++ "."] - ++ prev src - NextIP src -> - ["Target IP for " ++ show src ++ "."] - ++ prev src - CallTarget src -> - ["Target IP of call at " ++ show src ++ "."] - ++ prev src - - InitAddr -> ["Initial entry point"] - CodePointerInMem src -> ["Memory address " ++ show src ++ " contained code."] - SplitAt src -> ["Split from read of " ++ show src ++ "."] ++ prev src - InterProcedureJump src -> ["Reference from external address " ++ show src ++ "."] ++ prev src + InWrite src -> pp ("Written to memory in block at address " ++ show src ++ ".") : prev src + NextIP src -> pp ("Target IP for " ++ show src ++ ".") : prev src + CallTarget src -> pp ("Target IP of call at " ++ show src ++ ".") : prev src + InitAddr -> [pp "Initial entry point."] + CodePointerInMem src -> [pp ("Memory address " ++ show src ++ " contained code.")] + SplitAt src -> pp ("Split from read of " ++ show src ++ ".") : prev src + InterProcedureJump src -> pp ("Reference from external address " ++ show src ++ ".") : prev src -- | Return true if this address was added because of the contents of a global address -- in memory initially. -- -- This heuristic is not very accurate, so we avoid printing errors when it leads to -- issues. -cameFromInitialMemoryContents :: Map (ArchSegmentedAddr arch) (BlockRegion arch ids) - -> CodeAddrReason (ArchAddrWidth arch) - -> Bool -cameFromInitialMemoryContents m rsn = do +cameFromUnsoundReason :: Map (ArchSegmentedAddr arch) (FoundAddr arch) + -> CodeAddrReason (ArchAddrWidth arch) + -> Bool +cameFromUnsoundReason found_map rsn = do let prev addr = - case Map.lookup addr m of - Just br -> cameFromInitialMemoryContents m (brReason br) + case Map.lookup addr found_map of + Just info -> cameFromUnsoundReason found_map (foundReason info) Nothing -> error $ "Unknown reason for address " ++ show addr case rsn of - InWrite src -> prev src + InWrite{} -> True NextIP src -> prev src CallTarget src -> prev src SplitAt src -> prev src @@ -264,21 +262,23 @@ tryDisassembleAddr rsn addr ab = do -- Build state for exploring this. case maybeError of Just e -> do - when (not (cameFromInitialMemoryContents block_addrs rsn)) $ do + when (not (cameFromUnsoundReason (s0^.foundAddrs) rsn)) $ do error $ "Failed to disassemble " ++ show e ++ "\n" - ++ unlines (printAddrBacktrace block_addrs rsn) + ++ unlines (printAddrBacktrace (s0^.foundAddrs) addr rsn) Nothing -> do pure () assert (segmentIndex (addrSegment next_ip) == segmentIndex (addrSegment addr)) $ do assert (next_ip^.addrOffset > addr^.addrOffset) $ do let block_map = Map.fromList [ (labelIndex (blockLabel b), b) | b <- bs ] -- Add block region to blocks. - let br = BlockRegion { brReason = rsn - , brSize = next_ip^.addrOffset - addr^.addrOffset + let found_info = FoundAddr { foundReason = rsn + , foundAbstractState = ab + } + let br = BlockRegion { brSize = next_ip^.addrOffset - addr^.addrOffset , brBlocks = block_map - , brAbsInitState = ab } - put $ s0 & blocks %~ Map.insert addr br + put $! s0 & foundAddrs %~ Map.insert addr found_info + & blocks %~ Map.insert addr br -- | Mark address as the start of a code block. markCodeAddrBlock :: PrettyCFGConstraints arch @@ -300,7 +300,9 @@ markCodeAddrBlock rsn addr ab = do -- Get block for addr tryDisassembleAddr rsn addr ab -- Get block for old block - tryDisassembleAddr (brReason br) l (brAbsInitState br) + case Map.lookup l (s^.foundAddrs) of + Just info -> tryDisassembleAddr (foundReason info) l (foundAbstractState info) + Nothing -> error $ "Internal mark code addr as block saved but no reason information stored." -- It's possible this will cause the current block to be broken, or cause a function to -- boundaries. However, we don't think this should cause the need automatically to -- re-evaluate a block as any information discovered should be strictly less than @@ -326,7 +328,6 @@ transferStmt info stmt = modify $ \r -> addMemWrite (r^.absInitialRegs^.curIP) addr v r _ -> return () - newtype HexWord = HexWord Word64 instance Show HexWord where @@ -353,6 +354,7 @@ markAddrAsFunction rsn addr = do maybeMapInsert :: Ord a => Maybe a -> b -> Map a b -> Map a b maybeMapInsert mk v = maybe id (\k -> Map.insert k v) mk +{- -- | Mark addresses written to memory that point to code as function entry points. recordWriteStmt :: ( PrettyCFGConstraints arch , HasRepr (ArchReg arch) TypeRepr @@ -372,6 +374,7 @@ recordWriteStmt src regs stmt = do let addrs = concretizeAbsCodePointers mem (transferValue regs v) mapM_ (markAddrAsFunction (InWrite src)) addrs _ -> return () +-} transferStmts :: ( HasRepr (ArchReg arch) TypeRepr , RegisterInfo (ArchReg arch) @@ -438,7 +441,7 @@ assignmentAbsValues info mem g absm = mergeIntraJump :: ( PrettyCFGConstraints arch , RegisterInfo (ArchReg arch) ) - => BlockLabel (ArchAddrWidth arch) + => ArchSegmentedAddr arch -- ^ Source label that we are jumping from. -> AbsBlockState (ArchReg arch) -- ^ Block state after executing instructions. @@ -450,23 +453,23 @@ mergeIntraJump src ab _tgt , debug DCFG ("WARNING: Missing return value in jump from " ++ show src ++ " to\n" ++ show ab) False = error "Unexpected mergeIntraJump" mergeIntraJump src ab tgt = do - let rsn = NextIP (labelAddr src) + let rsn = NextIP src -- Associate a new abstract state with the code region. s0 <- get - case Map.lookup tgt (s0^.blocks) of + case Map.lookup tgt (s0^.foundAddrs) of -- We have seen this block before, so need to join and see if -- the results is changed. - Just old_block -> do - case joinD (brAbsInitState old_block) ab of + Just old_info -> do + case joinD (foundAbstractState old_info) ab of Nothing -> return () Just new -> do - let new_block = old_block { brAbsInitState = new } - modify $ (blocks %~ Map.insert tgt new_block) - . (reverseEdges %~ Map.insertWith Set.union tgt (Set.singleton (labelAddr src))) + let new_info = old_info { foundAbstractState = new } + modify $ (foundAddrs %~ Map.insert tgt new_info) + . (reverseEdges %~ Map.insertWith Set.union tgt (Set.singleton src)) . (frontier %~ Map.insert tgt rsn) -- We haven't seen this block before Nothing -> do - modify $ (reverseEdges %~ Map.insertWith Set.union tgt (Set.singleton (labelAddr src))) + modify $ (reverseEdges %~ Map.insertWith Set.union tgt (Set.singleton src)) . (frontier %~ Map.insert tgt rsn) markCodeAddrBlock rsn tgt ab @@ -498,27 +501,38 @@ matchJumpTable _ _ = getJumpTableBounds :: ( OrdF (ArchReg a) , ShowF (ArchReg a) , MemWidth (ArchAddrWidth a) + , PrettyArch a ) - => AbsProcessorState (ArchReg a) ids -- ^ Current processor registers. + => ArchitectureInfo a + -> ArchSegmentedAddr a + -> AbsProcessorState (ArchReg a) ids -- ^ Current processor registers. -> ArchSegmentedAddr a -- ^ Base -> BVValue a ids (ArchAddrWidth a) -- ^ Index in jump table -> Maybe (ArchAddr a) -- ^ One past last index in jump table or nothing -getJumpTableBounds regs base jump_index - -- Get range for the index. - | let abs_value = transferValue regs jump_index - , StridedInterval index_interval <- abs_value - -- Check that relevant interval is completely contained within a read-only - -- read only range in the memory. - , SI.StridedInterval _ index_base index_range index_stride <- - debug DCFG "getJumpTable3" $ index_interval - , index_end <- index_base + (index_range + 1) * index_stride - , rangeInReadonlySegment base (8 * fromInteger index_end) = - -- Get the addresses associated. - debug DCFG ("Fixed table " ++ show base ++ " [" ++ shows jump_index "]") $ - Just $! fromInteger index_end -getJumpTableBounds _ _ _ = Nothing +getJumpTableBounds arch addr regs base jump_index = do + let abs_value = transferValue regs jump_index + case abs_value of + StridedInterval (SI.StridedInterval _ index_base index_range index_stride) -> do + let index_end = index_base + (index_range + 1) * index_stride + if rangeInReadonlySegment base (jumpTableEntrySize arch * fromInteger index_end) then + case Jmp.unsignedUpperBound (regs^.indexBounds) jump_index of + Right (Jmp.IntegerUpperBound bnd) | bnd == index_range -> Just $! fromInteger index_end + Right bnd -> + trace ("Upper bound mismatch at " ++ show addr ++ ":\n" + ++ " JumpBounds:" ++ show bnd + ++ " Domain:" ++ show index_range) + Nothing + Left msg -> + trace ("Could not find jump table at " ++ show addr ++ ": " ++ msg ++ "\n" + ++ show (ppValueAssignments jump_index)) + Nothing -- error $ "Could not compute upper bound on jump table: " ++ msg + else + error $ "Jump table range is not in readonly memory" + TopV -> Nothing + _ -> error $ "Index interval is not a stride " ++ show abs_value +{- -- | This explores a block that ends with a fetch and execute. fetchAndExecute :: forall arch ids . ( RegisterInfo (ArchReg arch) @@ -548,7 +562,7 @@ fetchAndExecute b regs' s' = do let abst = finalAbsBlockState regs' s' seq abst $ do -- Merge caller return information - mergeIntraJump lbl (archPostCallAbsState arch_info abst ret) ret + mergeIntraJump src (archPostCallAbsState arch_info abst ret) ret -- Look for new ips. let addrs = concretizeAbsCodePointers mem (abst^.absRegState^.curIP) mapM_ (markAddrAsFunction (CallTarget src)) addrs @@ -602,22 +616,16 @@ fetchAndExecute b regs' s' = do assert (segmentFlags (addrSegment tgt_addr) `Perm.hasPerm` Perm.execute) $ do -- Merge block state. let abst' = abst & setAbsIP tgt_addr - mergeIntraJump lbl abst' tgt_addr + mergeIntraJump src abst' tgt_addr -- Block ends with what looks like a jump table. - | AssignedValue (Assignment _ (ReadMem ptr _)) - <- debug DCFG "try jump table" $ s'^.curIP + | AssignedValue (Assignment _ (ReadMem ptr _)) <- debug DCFG "try jump table" $ s'^.curIP -- Attempt to compute interval of addresses interval is over. - , Just (base, jump_idx) <- matchJumpTable mem ptr -> do - debug DCFG ("Found jump table at " ++ show lbl) $ do - + , Just (base, jump_idx) <- matchJumpTable mem ptr + , Just read_end <- getJumpTableBounds arch_info src regs' base jump_idx -> do mapM_ (recordWriteStmt src regs') (blockStmts b) - -- Try to compute jump table bounds - read_end <- - case getJumpTableBounds regs' base jump_idx of - Just e -> pure e - Nothing -> error $ "Could not compute jump bounds." + -- Try to compute jump table bounds let abst :: AbsBlockState (ArchReg arch) abst = finalAbsBlockState regs' s' @@ -645,7 +653,7 @@ fetchAndExecute b regs' s' = do let flags = segmentFlags (addrSegment tgt_addr) assert (flags `Perm.hasPerm` Perm.execute) $ do let abst' = abst & setAbsIP tgt_addr - mergeIntraJump lbl abst' tgt_addr + mergeIntraJump src abst' tgt_addr resolveJump (tgt_addr:prev) (idx+1) _ -> do debug DCFG ("Stop jump table: " ++ show idx ++ " " ++ show read_end) $ do @@ -664,6 +672,7 @@ fetchAndExecute b regs' s' = do let addrs = concretizeAbsCodePointers mem (abst^.absRegState^.curIP) -- Mark entry points as the start of functions mapM_ (markAddrAsFunction (error "Uninterpretable jump reason")) addrs +-} type DiscoveryConstraints arch = ( PrettyCFGConstraints arch @@ -687,6 +696,301 @@ tryLookupBlock ctx base block_map lbl = ++ " given invalid index " ++ show (labelIndex lbl) Just b -> b +refineProcStateBounds :: ( OrdF (ArchReg arch) + , HasRepr (ArchReg arch) TypeRepr + ) + => Value arch ids BoolType + -> Bool + -> AbsProcessorState (ArchReg arch) ids + -> AbsProcessorState (ArchReg arch) ids +refineProcStateBounds v isTrue ps = + case indexBounds (Jmp.assertPred v isTrue) ps of + Left{} -> ps + Right ps' -> ps' + +data ParseState arch ids = ParseState { _pblockMap :: !(Map Word64 (ParsedBlock arch ids)) + -- ^ Block m ap + , _writtenCodeAddrs :: ![ArchSegmentedAddr arch] + -- ^ Addresses marked executable that were written to memory. + , _intraJumpTargets :: ![(ArchSegmentedAddr arch, AbsBlockState (ArchReg arch))] + , _newFunctionAddrs :: ![ArchSegmentedAddr arch] + } + +pblockMap :: Simple Lens (ParseState arch ids) (Map Word64 (ParsedBlock arch ids)) +pblockMap = lens _pblockMap (\s v -> s { _pblockMap = v }) + +writtenCodeAddrs :: Simple Lens (ParseState arch ids) [ArchSegmentedAddr arch] +writtenCodeAddrs = lens _writtenCodeAddrs (\s v -> s { _writtenCodeAddrs = v }) + +intraJumpTargets :: Simple Lens (ParseState arch ids) [(ArchSegmentedAddr arch, AbsBlockState (ArchReg arch))] +intraJumpTargets = lens _intraJumpTargets (\s v -> s { _intraJumpTargets = v }) + +newFunctionAddrs :: Simple Lens (ParseState arch ids) [ArchSegmentedAddr arch] +newFunctionAddrs = lens _newFunctionAddrs (\s v -> s { _newFunctionAddrs = v }) + + +-- | Mark addresses written to memory that point to code as function entry points. +recordWriteStmt' :: ( HasRepr (ArchReg arch) TypeRepr + , Integral (MemWord (RegAddrWidth (ArchReg arch))) + , IsAddr (ArchAddrWidth arch) + , OrdF (ArchReg arch) + , ShowF (ArchReg arch) + ) + => Memory (ArchAddrWidth arch) + -> AbsProcessorState (ArchReg arch) ids + -> Stmt arch ids + -> State (ParseState arch ids) () +recordWriteStmt' mem regs stmt = do + let addrWidth = addrWidthNatRepr $ memAddrWidth mem + case stmt of + WriteMem _addr v + | Just Refl <- testEquality (typeRepr v) (BVTypeRepr addrWidth) -> do + let addrs = concretizeAbsCodePointers mem (transferValue regs v) + writtenCodeAddrs %= (addrs ++) + _ -> return () + +data ParseContext arch ids = ParseContext { pctxMemory :: !(Memory (ArchAddrWidth arch)) + , pctxArchInfo :: !(ArchitectureInfo arch) + , pctxAddr :: !(ArchSegmentedAddr arch) + , pctxBlockMap :: !(Map Word64 (Block arch ids)) + } + +-- | This explores a block that ends with a fetch and execute. +fetchAndExecute' :: forall arch ids + . ( Integral (MemWord (ArchAddrWidth arch)) + , OrdF (ArchReg arch) + , HasRepr (ArchReg arch) TypeRepr + , IsAddr (ArchAddrWidth arch) + , RegisterInfo (ArchReg arch) + , PrettyArch arch + ) + => ParseContext arch ids + -> Block arch ids + -> AbsProcessorState (ArchReg arch) ids + -- ^ Registers at this block after statements executed + -> RegState (ArchReg arch) (Value arch ids) + -> State (ParseState arch ids) (ParsedBlock arch ids) +fetchAndExecute' ctx b regs s' = do + let lbl = blockLabel b + let src = labelAddr lbl + let lbl_idx = labelIndex lbl + let mem = pctxMemory ctx + let arch_info = pctxArchInfo ctx + -- See if next statement appears to end with a call. + -- We define calls as statements that end with a write that + -- stores the pc to an address. + let regs' = transferStmts arch_info regs (blockStmts b) + case () of + -- The last statement was a call. + -- Note that in some cases the call is known not to return, and thus + -- this code will never jump to the return value. + _ | Just (prev_stmts, ret) <- identifyCall mem (blockStmts b) s' -> do + Fold.mapM_ (recordWriteStmt' mem regs') prev_stmts + let abst = finalAbsBlockState regs' s' + seq abst $ do + -- Merge caller return information + intraJumpTargets %= ((ret, archPostCallAbsState arch_info abst ret):) + -- Look for new ips. + let addrs = concretizeAbsCodePointers mem (abst^.absRegState^.curIP) + newFunctionAddrs %= (++ addrs) + pure ParsedBlock { pblockLabel = lbl_idx + , pblockStmts = Fold.toList prev_stmts + , pblockState = regs' + , pblockTerm = ParsedCall s' (Just ret) + } + + -- This block ends with a return. + | Just asgn' <- identifyReturn s' (callStackDelta arch_info) -> do + + let isRetLoad s = + case s of + AssignStmt asgn + | Just Refl <- testEquality (assignId asgn) (assignId asgn') -> True + _ -> False + nonret_stmts = filter (not . isRetLoad) (blockStmts b) + + mapM_ (recordWriteStmt' mem regs') nonret_stmts + + let ip_val = s'^.boundValue ip_reg + case transferValue regs' ip_val of + ReturnAddr -> return () + -- The return_val is bad. + -- This could indicate an imprecision in analysis or maybe unreachable/bad code. + rv -> + debug DCFG ("return_val is bad at " ++ show lbl ++ ": " ++ show rv) $ + return () + pure ParsedBlock { pblockLabel = lbl_idx + , pblockStmts = nonret_stmts + , pblockState = regs' + , pblockTerm = ParsedReturn s' + } + -- Check for tail call (anything where we are right at stack height + | ptrType <- BVTypeRepr (addrWidthNatRepr (archAddrWidth arch_info)) + , sp_val <- s'^.boundValue sp_reg + , ReturnAddr <- transferRHS arch_info regs' (ReadMem sp_val ptrType) -> do + + mapM_ (recordWriteStmt' mem regs') (blockStmts b) + + pure ParsedBlock { pblockLabel = lbl_idx + , pblockStmts = blockStmts b + , pblockState = regs' + , pblockTerm = ParsedCall s' Nothing + } + + -- Jump to concrete offset. + | Just tgt_addr <- asLiteralAddr mem (s'^.boundValue ip_reg) -> do + assert (segmentFlags (addrSegment tgt_addr) `Perm.hasPerm` Perm.execute) $ do + mapM_ (recordWriteStmt' mem regs') (blockStmts b) + -- Merge block state. + let abst = finalAbsBlockState regs' s' + let abst' = abst & setAbsIP tgt_addr + intraJumpTargets %= ((tgt_addr, abst'):) + pure ParsedBlock { pblockLabel = lbl_idx + , pblockStmts = blockStmts b + , pblockState = regs' + , pblockTerm = ParsedJump s' tgt_addr + } + -- Block ends with what looks like a jump table. + | AssignedValue (Assignment _ (ReadMem ptr _)) <- debug DCFG "try jump table" $ s'^.curIP + -- Attempt to compute interval of addresses interval is over. + , Just (base, jump_idx) <- matchJumpTable mem ptr + , Just read_end <- getJumpTableBounds arch_info src regs' base jump_idx -> do + + mapM_ (recordWriteStmt' mem regs') (blockStmts b) + + -- Try to compute jump table bounds + + let abst :: AbsBlockState (ArchReg arch) + abst = finalAbsBlockState regs' s' + seq abst $ do + -- This function resolves jump table entries. + -- It is a recursive function that has an index into the jump table. + -- If the current index can be interpreted as a intra-procedural jump, + -- then it will add that to the current procedure. + -- This returns the last address read. + let resolveJump :: [ArchSegmentedAddr arch] + -- /\ Addresses in jump table in reverse order + -> ArchAddr arch + -- /\ Current index + -> State (ParseState arch ids) [ArchSegmentedAddr arch] + resolveJump prev idx | idx == read_end = do + -- Stop jump table when we have reached computed bounds. + return (reverse prev) + resolveJump prev idx = do + let read_addr = base & addrOffset +~ 8 * idx + case readAddr mem LittleEndian read_addr of + Right tgt_addr + | Perm.isReadonly (segmentFlags (addrSegment read_addr)) -> do + let flags = segmentFlags (addrSegment tgt_addr) + assert (flags `Perm.hasPerm` Perm.execute) $ do + let abst' = abst & setAbsIP tgt_addr + intraJumpTargets %= ((tgt_addr, abst'):) + resolveJump (tgt_addr:prev) (idx+1) + _ -> do + debug DCFG ("Stop jump table: " ++ show idx ++ " " ++ show read_end) $ do + return (reverse prev) + read_addrs <- resolveJump [] 0 + pure ParsedBlock { pblockLabel = lbl_idx + , pblockStmts = blockStmts b + , pblockState = regs' + , pblockTerm = ParsedLookupTable s' jump_idx (V.fromList read_addrs) + } + -- Block that ends with some unknown + | otherwise -> do + trace ("Could not classify " ++ show lbl) $ do + mapM_ (recordWriteStmt' mem regs') (blockStmts b) + pure ParsedBlock { pblockLabel = lbl_idx + , pblockStmts = blockStmts b + , pblockState = regs' + , pblockTerm = ClassifyFailure "Could not classify block type." + } + +type ParseConstraints arch = ( RegisterInfo (ArchReg arch) + , PrettyArch arch + , HasRepr (ArchReg arch) TypeRepr + , IsAddr (RegAddrWidth (ArchReg arch)) + ) + +-- | This evalutes the statements in a block to expand the information known +-- about control flow targets of this block. +parseBlocks :: ParseConstraints arch + => ParseContext arch ids + -- ^ Context for parsing blocks. + -> [(Block arch ids, AbsProcessorState (ArchReg arch) ids)] + -- ^ Queue of blocks to travese + -> State (ParseState arch ids) () +parseBlocks _ct [] = pure () +parseBlocks ctx ((b,regs):rest) = do + let mem = pctxMemory ctx + let arch_info = pctxArchInfo ctx + let lbl = blockLabel b + let idx = labelIndex lbl + let src = pctxAddr ctx + let block_map = pctxBlockMap ctx + -- FIXME: we should propagate c back to the initial block, not just b + case blockTerm b of + Branch c lb rb -> do + let regs' = transferStmts arch_info regs (blockStmts b) + mapM_ (recordWriteStmt' mem regs') (blockStmts b) + + let l = tryLookupBlock "left branch" src block_map lb + let l_regs = refineProcStateBounds c True $ refineProcState c absTrue regs' + let r = tryLookupBlock "right branch" src block_map rb + let r_regs = refineProcStateBounds c False $ refineProcState c absFalse regs' + -- We re-transfer the stmts to propagate any changes from + -- the above refineProcState. This could be more efficient by + -- tracking what (if anything) changed. We also might + -- need to keep going back and forth until we reach a + -- fixpoint + let l_regs' = transferStmts arch_info l_regs (blockStmts b) + let r_regs' = transferStmts arch_info r_regs (blockStmts b) + + + let pb = ParsedBlock { pblockLabel = idx + , pblockStmts = blockStmts b + , pblockState = regs' + , pblockTerm = ParsedBranch c (labelIndex lb) (labelIndex rb) + } + pblockMap %= Map.insert idx pb + + parseBlocks ctx ((l, l_regs'):(r, r_regs'):rest) + + Syscall s' -> do + let regs' = transferStmts arch_info regs (blockStmts b) + mapM_ (recordWriteStmt' mem regs') (blockStmts b) + let abst = finalAbsBlockState regs' s' + case concretizeAbsCodePointers mem (abst^.absRegState^.curIP) of + [] -> error "Could not identify concrete system call address" + [addr] -> do + -- Merge system call result with possible next IPs. + let post = archPostSyscallAbsState arch_info abst addr + + intraJumpTargets %= ((addr, post):) + let pb = ParsedBlock { pblockLabel = idx + , pblockStmts = blockStmts b + , pblockState = regs' + , pblockTerm = ParsedSyscall s' addr + } + pblockMap %= Map.insert idx pb + parseBlocks ctx rest + _ -> error "Multiple system call addresses." + + + FetchAndExecute s' -> do + pb <- fetchAndExecute' ctx b regs s' + pblockMap %= Map.insert idx pb + + -- Do nothing when this block ends in a translation error. + TranslateError _ msg -> do + let regs' = transferStmts arch_info regs (blockStmts b) + let pb = ParsedBlock { pblockLabel = idx + , pblockStmts = blockStmts b + , pblockState = regs' + , pblockTerm = ParsedTranslateError msg + } + pblockMap %= Map.insert idx pb + + -- | This evalutes the statements in a block to expand the information known -- about control flow targets of this block. transferBlock :: DiscoveryConstraints arch @@ -699,40 +1003,24 @@ transferBlock :: DiscoveryConstraints arch -- ^ Abstract state describing machine state when block is encountered. -> CFGM arch ids () transferBlock block_map b regs = do + s <- get let lbl = blockLabel b let src = labelAddr lbl - mem <- gets memory - arch_info <- gets archInfo - let regs' = transferStmts arch_info regs (blockStmts b) - -- FIXME: we should propagate c back to the initial block, not just b - case blockTerm b of - Branch c lb rb -> do - mapM_ (recordWriteStmt src regs') (blockStmts b) - let l = tryLookupBlock "left branch" (labelAddr (blockLabel b)) block_map lb - let l_regs = refineProcState c absTrue regs' - let r = tryLookupBlock "right branch" (labelAddr (blockLabel b)) block_map rb - let r_regs = refineProcState c absFalse regs' - -- We re-transfer the stmts to propagate any changes from - -- the above refineProcState. This could be more efficient by - -- tracking what (if anything) changed. We also might - -- need to keep going back and forth until we reach a - -- fixpoint - transferBlock block_map l (transferStmts arch_info l_regs (blockStmts b)) - transferBlock block_map r (transferStmts arch_info r_regs (blockStmts b)) - - Syscall s' -> do - mapM_ (recordWriteStmt src regs') (blockStmts b) - let abst = finalAbsBlockState regs' s' - let ips = concretizeAbsCodePointers mem (abst^.absRegState^.curIP) - -- Merge system call result with possible next IPs. - Fold.forM_ ips $ \addr -> do - mergeIntraJump lbl (archPostSyscallAbsState arch_info abst addr) addr - - FetchAndExecute s' -> do - fetchAndExecute b regs' s' - -- Do nothing when this block ends in a translation error. - TranslateError _ _ -> - pure () + let ctx = ParseContext { pctxMemory = memory s + , pctxArchInfo = archInfo s + , pctxAddr = src + , pctxBlockMap = block_map + } + let ps0 = ParseState { _pblockMap = Map.empty + , _writtenCodeAddrs = [] + , _intraJumpTargets = [] + , _newFunctionAddrs = [] + } + let ps = execState (parseBlocks ctx [(b,regs)]) ps0 + pblockMapx + mapM_ (markAddrAsFunction (InWrite src)) (ps^.writtenCodeAddrs) + mapM_ (markAddrAsFunction (CallTarget src)) (ps^.newFunctionAddrs) + mapM_ (\(addr, abs_state) -> mergeIntraJump src abs_state addr) (ps^.intraJumpTargets) transfer :: DiscoveryConstraints arch => ArchSegmentedAddr arch @@ -740,17 +1028,21 @@ transfer :: DiscoveryConstraints arch transfer addr = do mem <- gets memory + mfinfo <- use $ foundAddrs . at addr mbr <- use $ blocks . at addr - case mbr of - Nothing -> error $ "getBlock called on block " ++ show addr ++ " we have not seen." + case mfinfo of + Nothing -> error $ "getBlock called on unfound address " ++ show addr ++ "." + Just finfo -> + case mbr of + Nothing -> error $ "getBlock called on block " ++ show addr ++ " we have not seen." - Just br -> do - case Map.lookup 0 (brBlocks br) of - Just root -> do - transferBlock (brBlocks br) root $ - initAbsProcessorState mem (brAbsInitState br) - Nothing -> do - error $ "getBlock given block with empty blocks list." + Just br -> do + case Map.lookup 0 (brBlocks br) of + Just root -> do + transferBlock (brBlocks br) root $ + initAbsProcessorState mem (foundAbstractState finfo) + Nothing -> do + error $ "getBlock given block with empty blocks list." ------------------------------------------------------------------------ -- Main loop diff --git a/src/Data/Macaw/Discovery/Info.hs b/src/Data/Macaw/Discovery/Info.hs index abaa362d..19a91f89 100644 --- a/src/Data/Macaw/Discovery/Info.hs +++ b/src/Data/Macaw/Discovery/Info.hs @@ -17,9 +17,11 @@ discovery. {-# LANGUAGE ViewPatterns #-} module Data.Macaw.Discovery.Info ( BlockRegion(..) + , FoundAddr(..) , lookupBlock , GlobalDataInfo(..) , ParsedTermStmt(..) + , ParsedBlock(..) -- * The interpreter state , DiscoveryInfo , emptyDiscoveryInfo @@ -27,6 +29,7 @@ module Data.Macaw.Discovery.Info , archInfo , memory , symbolNames + , foundAddrs , blocks , functionEntries , reverseEdges @@ -49,6 +52,7 @@ module Data.Macaw.Discovery.Info import Control.Lens import Control.Monad.ST import qualified Data.ByteString as BS +import Data.Foldable import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.Parameterized.Classes @@ -69,36 +73,35 @@ import Data.Macaw.Memory import qualified Data.Macaw.Memory.Permissions as Perm import Data.Macaw.Types + +------------------------------------------------------------------------ +-- FoundAddr + +-- | An address that has been found to be reachable. +data FoundAddr arch + = FoundAddr { foundReason :: !(CodeAddrReason (ArchAddrWidth arch)) + -- ^ The reason the address was found to be containing code. + , foundAbstractState :: !(AbsBlockState (ArchReg arch)) + -- ^ The abstract state formed from post-states that reach this address. + } + ------------------------------------------------------------------------ -- BlockRegion -- | A contiguous region of instructions in memory. data BlockRegion arch ids - = BlockRegion { brReason :: !(CodeAddrReason (ArchAddrWidth arch)) - -- ^ The reason the start address was marked as - -- containing code. - , brSize :: !(ArchAddr arch) + = BlockRegion { brSize :: !(ArchAddr arch) -- ^ The size of the region of memory covered by this. , brBlocks :: !(Map Word64 (Block arch ids)) -- ^ Map from labelIndex to associated block. - , brAbsInitState :: !(AbsBlockState (ArchReg arch)) - -- ^ The abstract state at the start of this block. } --- | Does a simple lookup in the cfg at a given DecompiledBlock address. -lookupBlock :: Map (ArchSegmentedAddr arch) (BlockRegion arch ids) - -> ArchLabel arch - -> Maybe (Block arch ids) -lookupBlock m lbl = do - br <- Map.lookup (labelAddr lbl) m - Map.lookup (labelIndex lbl) (brBlocks br) - ------------------------------------------------------------------------ -- CodeAddrReason -- | This describes the source of an address that was marked as containing code. data CodeAddrReason w - = InWrite !(SegmentedAddr w) + = InWrite !(SegmentedAddr w) -- ^ Exploring because the given block writes it to memory. | NextIP !(SegmentedAddr w) -- ^ Exploring because the given block jumps here. @@ -137,12 +140,10 @@ instance (Integral w, Show w) => Show (GlobalDataInfo w) where -- interpreted. data ParsedTermStmt arch ids = ParsedCall !(RegState (ArchReg arch) (Value arch ids)) - !(Seq (Stmt arch ids)) !(Maybe (ArchSegmentedAddr arch)) - -- ^ A call with the current register values, statements lessed pushed return value - -- function to call, and location to return to (Nothing) if this is a tail call. + -- ^ A call with the current register values and location to return to or 'Nothing' if this is a tail call. | ParsedJump !(RegState (ArchReg arch) (Value arch ids)) !(ArchSegmentedAddr arch) - -- ^ A jump to an explicit address within a block. + -- ^ A jump to an explicit address within a function. | ParsedLookupTable !(RegState (ArchReg arch) (Value arch ids)) !(BVValue arch ids (ArchAddrWidth arch)) !(V.Vector (ArchSegmentedAddr arch)) @@ -150,9 +151,9 @@ data ParsedTermStmt arch ids -- -- The registers store the registers, the value contains the index to jump -- to, and the possible addresses. - | ParsedReturn !(RegState (ArchReg arch) (Value arch ids)) !(Seq (Stmt arch ids)) + | ParsedReturn !(RegState (ArchReg arch) (Value arch ids)) -- ^ A return with the given registers. - | ParsedBranch !(Value arch ids BoolType) !(ArchLabel arch) !(ArchLabel arch) + | ParsedBranch !(Value arch ids BoolType) !Word64 !Word64 -- ^ A branch (i.e., BlockTerm is Branch) | ParsedSyscall !(RegState (ArchReg arch) (Value arch ids)) !(ArchSegmentedAddr arch) @@ -168,20 +169,33 @@ deriving instance ) => Show (ParsedTermStmt arch ids) +------------------------------------------------------------------------ +-- ParsedBlock + +data ParsedBlock arch ids + = ParsedBlock { pblockLabel :: !Word64 + , pblockStmts :: !([Stmt arch ids]) + , pblockState :: !(AbsProcessorState (ArchReg arch) ids) + -- ^ State of processor prior to term statement. + , pblockTerm :: !(ParsedTermStmt arch ids) + } + ------------------------------------------------------------------------ -- DiscoveryInfo -- | The state of the interpreter data DiscoveryInfo arch ids - = DiscoveryInfo { nonceGen :: !(NonceGenerator (ST ids) ids) + = DiscoveryInfo { nonceGen :: !(NonceGenerator (ST ids) ids) -- ^ Generator for creating fresh ids. - , memory :: !(Memory (ArchAddrWidth arch)) + , memory :: !(Memory (ArchAddrWidth arch)) -- ^ The initial memory when disassembly started. , symbolNames :: !(Map (ArchSegmentedAddr arch) BS.ByteString) -- ^ The set of symbol names (not necessarily complete) - , archInfo :: !(ArchitectureInfo arch) + , archInfo :: !(ArchitectureInfo arch) -- ^ Architecture-specific information needed for discovery. - , _blocks :: !(Map (ArchSegmentedAddr arch) (BlockRegion arch ids)) + , _foundAddrs :: !(Map (ArchSegmentedAddr arch) (FoundAddr arch)) + -- ^ Maps fopund address to the pre-state for that block. + , _blocks :: !(Map (ArchSegmentedAddr arch) (BlockRegion arch ids)) -- ^ Maps an address to the code associated with that address. , _functionEntries :: !(Set (ArchSegmentedAddr arch)) -- ^ Maps addresses that are marked as the start of a function @@ -216,6 +230,7 @@ emptyDiscoveryInfo ng mem symbols info = DiscoveryInfo , memory = mem , symbolNames = symbols , archInfo = info + , _foundAddrs = Map.empty , _blocks = Map.empty , _functionEntries = Set.empty , _reverseEdges = Map.empty @@ -224,6 +239,9 @@ emptyDiscoveryInfo ng mem symbols info = DiscoveryInfo , _function_frontier = Map.empty } +foundAddrs :: Simple Lens (DiscoveryInfo arch ids) (Map (ArchSegmentedAddr arch) (FoundAddr arch)) +foundAddrs = lens _foundAddrs (\s v -> s { _foundAddrs = v }) + blocks :: Simple Lens (DiscoveryInfo arch ids) (Map (ArchSegmentedAddr arch) (BlockRegion arch ids)) blocks = lens _blocks (\s v -> s { _blocks = v }) @@ -256,6 +274,15 @@ function_frontier :: Simple Lens (DiscoveryInfo arch ids) (CodeAddrReason (ArchAddrWidth arch))) function_frontier = lens _function_frontier (\s v -> s { _function_frontier = v }) + +-- | Does a simple lookup in the cfg at a given DecompiledBlock address. +lookupBlock :: DiscoveryInfo arch ids + -> ArchLabel arch + -> Maybe (Block arch ids) +lookupBlock info lbl = do + br <- Map.lookup (labelAddr lbl) (info^.blocks) + Map.lookup (labelIndex lbl) (brBlocks br) + ------------------------------------------------------------------------ -- DiscoveryInfo utilities @@ -421,8 +448,8 @@ tryGetStaticSyscallNo interp_state block_addr proc_state | BVValue _ call_no <- proc_state^.boundValue syscall_num_reg = Just call_no | Initial r <- proc_state^.boundValue syscall_num_reg - , Just b <- interp_state^.blocks^.at block_addr = - asConcreteSingleton (brAbsInitState b^.absRegState^.boundValue r) + , Just info <- interp_state^.foundAddrs^.at block_addr = + asConcreteSingleton (foundAbstractState info^.absRegState^.boundValue r) | otherwise = Nothing @@ -431,20 +458,26 @@ classifyBlock :: forall arch ids . (ArchConstraint arch ids, MemWidth (ArchAddrWidth arch)) => Block arch ids -> DiscoveryInfo arch ids - -> ParsedTermStmt arch ids -classifyBlock b interp_state = + -> ([Stmt arch ids], ParsedTermStmt arch ids) +classifyBlock b interp_state = do + let stmts = blockStmts b + mem = memory interp_state + case blockTerm b of - TranslateError _ msg -> ParsedTranslateError msg - Branch c x y -> ParsedBranch c x y + TranslateError _ msg -> (stmts, ParsedTranslateError msg) + Branch c x y + | labelAddr x /= labelAddr (blockLabel b) -> error "Branch with bad child" + | labelAddr y /= labelAddr (blockLabel b) -> error "Branch with bad child" + | otherwise -> (stmts, ParsedBranch c (labelIndex x) (labelIndex y)) FetchAndExecute proc_state -- The last statement was a call. | Just (prev_stmts, ret_addr) <- identifyCall mem stmts proc_state -> - ParsedCall proc_state prev_stmts (Just ret_addr) + (toList prev_stmts, ParsedCall proc_state (Just ret_addr)) -- Jump to concrete offset. | Just tgt_addr <- asLiteralAddr mem (proc_state^.boundValue ip_reg) , inSameFunction (labelAddr (blockLabel b)) tgt_addr interp_state -> - ParsedJump proc_state tgt_addr + (stmts, ParsedJump proc_state tgt_addr) -- Return | Just asgn <- identifyReturn proc_state (callStackDelta (archInfo interp_state)) -> @@ -453,26 +486,23 @@ classifyBlock b interp_state = AssignStmt asgn' | Just Refl <- testEquality (assignId asgn) (assignId asgn') -> True _ -> False - nonret_stmts = Seq.fromList $ filter (not . isRetLoad) stmts - in ParsedReturn proc_state nonret_stmts + nonret_stmts = filter (not . isRetLoad) stmts + in (nonret_stmts, ParsedReturn proc_state) -- Jump table | let entry = getFunctionEntryPoint (labelAddr (blockLabel b)) interp_state , let cur_ip = proc_state^.boundValue ip_reg , Just (idx, nexts) <- identifyJumpTable interp_state entry cur_ip -> - ParsedLookupTable proc_state idx nexts + (stmts, ParsedLookupTable proc_state idx nexts) -- Finally, we just assume that this is a tail call through a pointer -- FIXME: probably unsound. | otherwise -> - ParsedCall proc_state (Seq.fromList stmts) Nothing + (stmts, ParsedCall proc_state Nothing) -- rax is concrete in the first case, so we don't need to propagate it etc. Syscall proc_state | Just next_addr <- asLiteralAddr mem (proc_state^.boundValue ip_reg) -> - ParsedSyscall proc_state next_addr + (stmts, ParsedSyscall proc_state next_addr) - | otherwise -> ClassifyFailure "System call with non-literal return address." - where - stmts = blockStmts b - mem = memory interp_state + | otherwise -> (stmts, ClassifyFailure "System call with non-literal return address.") diff --git a/src/Data/Macaw/Memory.hs b/src/Data/Macaw/Memory.hs index ace9db12..2a1a3df1 100644 --- a/src/Data/Macaw/Memory.hs +++ b/src/Data/Macaw/Memory.hs @@ -155,7 +155,7 @@ bsWord64le bs ------------------------------------------------------------------------ -- MemBase -newtype MemWord (n :: Nat) = MemWord Word64 +newtype MemWord (n :: Nat) = MemWord { memWordValue :: Word64 } -- ^ A value in memory. instance Show (MemWord w) where @@ -445,11 +445,16 @@ addrValue :: Num (MemWord w) => SegmentedAddr w -> MemWord w addrValue addr = addrBase addr + addr^.addrOffset instance Show (SegmentedAddr w) where - showsPrec p a = showParen (p > 6) $ - showString "segment" - . shows (segmentIndex (addrSegment a)) - . showString "+" - . shows (a^.addrOffset) + showsPrec p a = + case segmentBase (addrSegment a) of + Just b -> + showString "0x" . showHex (memWordValue b + memWordValue (a^.addrOffset)) + Nothing -> + showParen (p > 6) + $ showString "segment" + . shows (segmentIndex (addrSegment a)) + . showString "+" + . shows (a^.addrOffset) -- | Return contents starting from location or throw a memory error if there -- is an unaligned relocation. diff --git a/src/Data/Macaw/Memory/Flexdis86.hs b/src/Data/Macaw/Memory/Flexdis86.hs index ddbbeccf..0de2bee2 100644 --- a/src/Data/Macaw/Memory/Flexdis86.hs +++ b/src/Data/Macaw/Memory/Flexdis86.hs @@ -112,7 +112,7 @@ instance Num (MemWord w) => ByteReader (MemoryByteReader w) where MBR $ throwError $ UnalignedRelocation (msAddr ms) ByteRegion bs:rest -> do if BS.null bs then do - MBR $ throwError $ AccessViolation (msAddr ms) + throwError $ AccessViolation (msAddr ms) else do let v = BS.head bs let ms' = ms { msPrev = consByte v (msPrev ms)