mirror of
https://github.com/GaloisInc/macaw.git
synced 2024-11-28 01:35:33 +03:00
Simplify DiscoveryFunInfo
This commit is contained in:
parent
172cf7e863
commit
49d5aefb4e
@ -52,6 +52,7 @@ import Control.Exception
|
||||
import Control.Lens
|
||||
import Control.Monad.ST
|
||||
import Control.Monad.State.Strict
|
||||
import qualified Data.ByteString.Char8 as BSC
|
||||
import Data.Foldable
|
||||
import Data.Map.Strict (Map)
|
||||
import qualified Data.Map.Strict as Map
|
||||
@ -185,6 +186,18 @@ markAddrsAsFunction :: CodeAddrReason (ArchAddrWidth arch)
|
||||
-> DiscoveryState arch
|
||||
markAddrsAsFunction rsn addrs s0 = foldl' (\s a -> markAddrAsFunction rsn a s) s0 addrs
|
||||
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- FoundAddr
|
||||
|
||||
-- | An address that has been found to be reachable.
|
||||
data FoundAddr arch
|
||||
= FoundAddr { foundReason :: !(CodeAddrReason (ArchAddrWidth arch))
|
||||
-- ^ The reason the address was found to be containing code.
|
||||
, foundAbstractState :: !(AbsBlockState (ArchReg arch))
|
||||
-- ^ The abstract state formed from post-states that reach this address.
|
||||
}
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- FunState
|
||||
|
||||
@ -193,9 +206,14 @@ data FunState arch ids
|
||||
= FunState { funNonceGen :: !(NonceGenerator (ST ids) ids)
|
||||
, curFunAddr :: !(ArchSegmentedAddr arch)
|
||||
, _curFunCtx :: !(DiscoveryState arch)
|
||||
-- ^ Discovery info
|
||||
, _curFunInfo :: !(DiscoveryFunInfo arch ids)
|
||||
-- ^ Information about current function we are working on
|
||||
-- ^ Discovery state without this function
|
||||
, _curFunBlocks :: !(Map (ArchSegmentedAddr arch) (ParsedBlockRegion arch ids))
|
||||
-- ^ Maps an address to the blocks associated with that address.
|
||||
, _foundAddrs :: !(Map (ArchSegmentedAddr arch) (FoundAddr arch))
|
||||
-- ^ Maps found address to the pre-state for that block.
|
||||
, _reverseEdges :: !(ReverseEdgeMap arch)
|
||||
-- ^ Maps each code address to the list of predecessors that
|
||||
-- affected its abstract state.
|
||||
, _frontier :: !(Set (ArchSegmentedAddr arch))
|
||||
-- ^ Addresses to explore next.
|
||||
}
|
||||
@ -205,8 +223,20 @@ curFunCtx :: Simple Lens (FunState arch ids) (DiscoveryState arch)
|
||||
curFunCtx = lens _curFunCtx (\s v -> s { _curFunCtx = v })
|
||||
|
||||
-- | Information about current function we are working on
|
||||
curFunInfo :: Simple Lens (FunState arch ids) (DiscoveryFunInfo arch ids)
|
||||
curFunInfo = lens _curFunInfo (\s v -> s { _curFunInfo = v })
|
||||
curFunBlocks :: Simple Lens (FunState arch ids) (Map (ArchSegmentedAddr arch) (ParsedBlockRegion arch ids))
|
||||
curFunBlocks = lens _curFunBlocks (\s v -> s { _curFunBlocks = v })
|
||||
|
||||
foundAddrs :: Simple Lens (FunState arch ids) (Map (ArchSegmentedAddr arch) (FoundAddr arch))
|
||||
foundAddrs = lens _foundAddrs (\s v -> s { _foundAddrs = v })
|
||||
|
||||
type ReverseEdgeMap arch = Map (ArchSegmentedAddr arch) (Set (ArchSegmentedAddr arch))
|
||||
|
||||
-- | Maps each code address to the list of predecessors that
|
||||
-- affected its abstract state.
|
||||
reverseEdges :: Simple Lens (FunState arch ids) (ReverseEdgeMap arch)
|
||||
reverseEdges = lens _reverseEdges (\s v -> s { _reverseEdges = v })
|
||||
|
||||
|
||||
|
||||
-- | Set of addresses to explore next.
|
||||
--
|
||||
@ -249,8 +279,8 @@ mergeIntraJump src ab tgt = do
|
||||
pure ()
|
||||
let rsn = NextIP src
|
||||
-- Associate a new abstract state with the code region.
|
||||
s0 <- use curFunInfo
|
||||
case Map.lookup tgt (s0^.foundAddrs) of
|
||||
foundMap <- use foundAddrs
|
||||
case Map.lookup tgt foundMap of
|
||||
-- We have seen this block before, so need to join and see if
|
||||
-- the results is changed.
|
||||
Just old_info -> do
|
||||
@ -258,17 +288,17 @@ mergeIntraJump src ab tgt = do
|
||||
Nothing -> return ()
|
||||
Just new -> do
|
||||
let new_info = old_info { foundAbstractState = new }
|
||||
curFunInfo . foundAddrs %= Map.insert tgt new_info
|
||||
curFunInfo . reverseEdges %= Map.insertWith Set.union tgt (Set.singleton src)
|
||||
foundAddrs %= Map.insert tgt new_info
|
||||
reverseEdges %= Map.insertWith Set.union tgt (Set.singleton src)
|
||||
frontier %= Set.insert tgt
|
||||
-- We haven't seen this block before
|
||||
Nothing -> do
|
||||
curFunInfo . reverseEdges %= Map.insertWith Set.union tgt (Set.singleton src)
|
||||
reverseEdges %= Map.insertWith Set.union tgt (Set.singleton src)
|
||||
frontier %= Set.insert tgt
|
||||
let found_info = FoundAddr { foundReason = rsn
|
||||
, foundAbstractState = ab
|
||||
}
|
||||
curFunInfo . foundAddrs %= Map.insert tgt found_info
|
||||
foundAddrs %= Map.insert tgt found_info
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
-- Jump table bounds
|
||||
@ -691,18 +721,20 @@ parseBlocks ctx ((b,regs):rest) = do
|
||||
|
||||
-- | This evalutes the statements in a block to expand the information known
|
||||
-- about control flow targets of this block.
|
||||
transferBlocks :: ArchAddr arch
|
||||
transferBlocks :: FoundAddr arch
|
||||
-- ^ State leading to explore block
|
||||
-> ArchAddr arch
|
||||
-- ^ Size of the region these blocks cover.
|
||||
-> Map Word64 (Block arch ids)
|
||||
-- ^ Map from labelIndex to associated block
|
||||
-> AbsProcessorState (ArchReg arch) ids
|
||||
-- ^ Abstract state describing machine state when block is encountered.
|
||||
-> FunM arch ids ()
|
||||
transferBlocks sz block_map regs =
|
||||
transferBlocks finfo sz block_map =
|
||||
case Map.lookup 0 block_map of
|
||||
Nothing -> do
|
||||
error $ "transferBlocks given empty blockRegion."
|
||||
Just b -> do
|
||||
mem <- uses curFunCtx memory
|
||||
let regs = initAbsProcessorState mem (foundAbstractState finfo)
|
||||
funAddr <- gets curFunAddr
|
||||
s <- use curFunCtx
|
||||
let src = labelAddr (blockLabel b)
|
||||
@ -719,10 +751,12 @@ transferBlocks sz block_map regs =
|
||||
}
|
||||
let ps = execState (parseBlocks ctx [(b,regs)]) ps0
|
||||
let pb = ParsedBlockRegion { regionAddr = src
|
||||
, regionReason = foundReason finfo
|
||||
, regionAbstractState = foundAbstractState finfo
|
||||
, regionSize = sz
|
||||
, regionBlockMap = ps^.pblockMap
|
||||
}
|
||||
curFunInfo . parsedBlocks %= Map.insert src pb
|
||||
curFunBlocks %= Map.insert src pb
|
||||
curFunCtx %= markAddrsAsFunction (InWrite src) (ps^.writtenCodeAddrs)
|
||||
. markAddrsAsFunction (CallTarget src) (ps^.newFunctionAddrs)
|
||||
mapM_ (\(addr, abs_state) -> mergeIntraJump src abs_state addr) (ps^.intraJumpTargets)
|
||||
@ -730,25 +764,22 @@ transferBlocks sz block_map regs =
|
||||
transfer :: ArchSegmentedAddr arch
|
||||
-> FunM arch ids ()
|
||||
transfer addr = do
|
||||
mfinfo <- use $ curFunInfo . foundAddrs . at addr
|
||||
mfinfo <- use $ foundAddrs . at addr
|
||||
let finfo = fromMaybe (error $ "getBlock called on unfound address " ++ show addr ++ ".") $
|
||||
mfinfo
|
||||
info <- uses curFunCtx archInfo
|
||||
withArchConstraints info $ do
|
||||
nonce_gen <- gets funNonceGen
|
||||
mem <- uses curFunCtx memory
|
||||
prev_block_map <- use $ curFunInfo . parsedBlocks
|
||||
prev_block_map <- use $ curFunBlocks
|
||||
-- Get maximum number of bytes to disassemble
|
||||
let max_size =
|
||||
case Map.lookupGT addr prev_block_map of
|
||||
Just (next,_) | addrSegment next == addrSegment addr -> next^.addrOffset - addr^.addrOffset
|
||||
_ -> segmentSize (addrSegment addr) - addr^.addrOffset
|
||||
let ab = foundAbstractState finfo
|
||||
trace ("disassembleFn " ++ show (addr, max_size)) $ do
|
||||
(bs, sz, maybeError) <-
|
||||
liftST $ disassembleFn info nonce_gen addr max_size ab
|
||||
seq bs $ do
|
||||
trace ("disassembleFn done" ++ show (addr, max_size)) $ do
|
||||
-- Build state for exploring this.
|
||||
case maybeError of
|
||||
Just e -> do
|
||||
@ -757,7 +788,7 @@ transfer addr = do
|
||||
pure ()
|
||||
|
||||
let block_map = Map.fromList [ (labelIndex (blockLabel b), b) | b <- bs ]
|
||||
transferBlocks sz block_map $ initAbsProcessorState mem (foundAbstractState finfo)
|
||||
transferBlocks finfo sz block_map
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Main loop
|
||||
@ -798,19 +829,28 @@ analyzeFunction addr rsn s =
|
||||
let info = archInfo s
|
||||
let mem = memory s
|
||||
|
||||
let initFunInfo = initDiscoveryFunInfo info mem (symbolNames s) addr rsn
|
||||
let faddr = FoundAddr { foundReason = rsn
|
||||
, foundAbstractState = mkInitialAbsState info mem addr
|
||||
}
|
||||
|
||||
let fs0 = FunState { funNonceGen = gen
|
||||
, curFunAddr = addr
|
||||
, _curFunCtx = s
|
||||
, _curFunInfo = initFunInfo
|
||||
, _curFunBlocks = Map.empty
|
||||
, _foundAddrs = Map.singleton addr faddr
|
||||
, _reverseEdges = Map.empty
|
||||
, _frontier = Set.singleton addr
|
||||
}
|
||||
fs <- execStateT (unFunM analyzeBlocks) fs0
|
||||
let finfo = Some (fs^.curFunInfo)
|
||||
let s' = (fs^.curFunCtx) & funInfo %~ Map.insert addr finfo
|
||||
let nm = fromMaybe (BSC.pack (show addr)) (symbolAtAddr addr (symbolNames s))
|
||||
let finfo = DiscoveryFunInfo { discoveredFunAddr = addr
|
||||
, discoveredFunName = nm
|
||||
, _parsedBlocks = fs^.curFunBlocks
|
||||
}
|
||||
let s' = (fs^.curFunCtx)
|
||||
& funInfo %~ Map.insert addr (Some finfo)
|
||||
& unexploredFunctions %~ Map.delete addr
|
||||
pure (s', finfo)
|
||||
pure (s', Some finfo)
|
||||
|
||||
-- | Analyze addresses that we have marked as functions, but not yet analyzed to
|
||||
-- identify basic blocks, and discover new function candidates until we have
|
||||
|
@ -41,14 +41,8 @@ module Data.Macaw.Discovery.State
|
||||
, funInfo
|
||||
, unexploredFunctions
|
||||
-- * DiscoveryFunInfo
|
||||
, DiscoveryFunInfo
|
||||
, initDiscoveryFunInfo
|
||||
, discoveredFunAddr
|
||||
, discoveredFunName
|
||||
, FoundAddr(..)
|
||||
, foundAddrs
|
||||
, DiscoveryFunInfo(..)
|
||||
, parsedBlocks
|
||||
, reverseEdges
|
||||
-- * CodeAddrRegion
|
||||
, CodeAddrReason(..)
|
||||
-- ** DiscoveryState utilities
|
||||
@ -61,10 +55,8 @@ import qualified Data.ByteString.Char8 as BSC
|
||||
import Data.Char (isDigit)
|
||||
import Data.Map.Strict (Map)
|
||||
import qualified Data.Map.Strict as Map
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Data.Parameterized.Classes
|
||||
import Data.Parameterized.Some
|
||||
import Data.Set (Set)
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as Text
|
||||
import qualified Data.Vector as V
|
||||
@ -102,17 +94,6 @@ data CodeAddrReason w
|
||||
-- ^ The user requested that we analyze this address as a function.
|
||||
deriving (Show)
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- FoundAddr
|
||||
|
||||
-- | An address that has been found to be reachable.
|
||||
data FoundAddr arch
|
||||
= FoundAddr { foundReason :: !(CodeAddrReason (ArchAddrWidth arch))
|
||||
-- ^ The reason the address was found to be containing code.
|
||||
, foundAbstractState :: !(AbsBlockState (ArchReg arch))
|
||||
-- ^ The abstract state formed from post-states that reach this address.
|
||||
}
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- SymbolAddrMap
|
||||
|
||||
@ -276,8 +257,15 @@ ppParsedBlock a b =
|
||||
-- | A contiguous region of instructions in memory.
|
||||
data ParsedBlockRegion arch ids
|
||||
= ParsedBlockRegion { regionAddr :: !(ArchSegmentedAddr arch)
|
||||
-- ^ Address of region
|
||||
, regionSize :: !(ArchAddr arch)
|
||||
-- ^ The size of the region of memory covered by this.
|
||||
, regionReason :: !(CodeAddrReason (ArchAddrWidth arch))
|
||||
-- ^ Reason that we marked this address as
|
||||
-- the start of a basic block.
|
||||
, regionAbstractState :: !(AbsBlockState (ArchReg arch))
|
||||
-- ^ Abstract state prior to the execution of
|
||||
-- this region.
|
||||
, regionBlockMap :: !(Map Word64 (ParsedBlock arch ids))
|
||||
-- ^ Map from labelIndex to associated block.
|
||||
}
|
||||
@ -291,32 +279,19 @@ instance ArchConstraints arch
|
||||
------------------------------------------------------------------------
|
||||
-- DiscoveryFunInfo
|
||||
|
||||
type ReverseEdgeMap arch = Map (ArchSegmentedAddr arch) (Set (ArchSegmentedAddr arch))
|
||||
|
||||
-- | Information discovered about a particular function
|
||||
data DiscoveryFunInfo arch ids
|
||||
= DiscoveryFunInfo { discoveredFunAddr :: !(ArchSegmentedAddr arch)
|
||||
-- ^ Address of function entry block.
|
||||
, discoveredFunName :: !BSC.ByteString
|
||||
-- ^ Name of function should be unique for program
|
||||
, _foundAddrs :: !(Map (ArchSegmentedAddr arch) (FoundAddr arch))
|
||||
-- ^ Maps fopund address to the pre-state for that block.
|
||||
, _parsedBlocks :: !(Map (ArchSegmentedAddr arch) (ParsedBlockRegion arch ids))
|
||||
-- ^ Maps an address to the blocks associated with that address.
|
||||
, _reverseEdges :: !(ReverseEdgeMap arch)
|
||||
-- ^ Maps each code address to the list of predecessors that
|
||||
-- affected its abstract state.
|
||||
}
|
||||
|
||||
foundAddrs :: Simple Lens (DiscoveryFunInfo arch ids) (Map (ArchSegmentedAddr arch) (FoundAddr arch))
|
||||
foundAddrs = lens _foundAddrs (\s v -> s { _foundAddrs = v })
|
||||
|
||||
parsedBlocks :: Simple Lens (DiscoveryFunInfo arch ids) (Map (ArchSegmentedAddr arch) (ParsedBlockRegion arch ids))
|
||||
parsedBlocks = lens _parsedBlocks (\s v -> s { _parsedBlocks = v })
|
||||
|
||||
reverseEdges :: Simple Lens (DiscoveryFunInfo arch ids) (ReverseEdgeMap arch)
|
||||
reverseEdges = lens _reverseEdges (\s v -> s { _reverseEdges = v })
|
||||
|
||||
-- | Does a simple lookup in the cfg at a given DecompiledBlock address.
|
||||
lookupParsedBlock :: DiscoveryFunInfo arch ids
|
||||
-> ArchLabel arch
|
||||
@ -325,29 +300,6 @@ lookupParsedBlock info lbl = do
|
||||
br <- Map.lookup (labelAddr lbl) (info^.parsedBlocks)
|
||||
Map.lookup (labelIndex lbl) (regionBlockMap br)
|
||||
|
||||
initDiscoveryFunInfo :: ArchitectureInfo arch
|
||||
-- ^ Architecture information
|
||||
-> Memory (ArchAddrWidth arch)
|
||||
-- ^ Contents of memory for initializing abstract state.
|
||||
-> SymbolAddrMap (ArchAddrWidth arch)
|
||||
-- ^ The symbol map for computing the name
|
||||
-> ArchSegmentedAddr arch
|
||||
-- ^ Address of this function
|
||||
-> CodeAddrReason (ArchAddrWidth arch)
|
||||
-- ^ Reason this function was discovered
|
||||
-> DiscoveryFunInfo arch ids
|
||||
initDiscoveryFunInfo info mem symMap addr rsn =
|
||||
let nm = fromMaybe (BSC.pack (show addr)) (symbolAtAddr addr symMap)
|
||||
faddr = FoundAddr { foundReason = rsn
|
||||
, foundAbstractState = mkInitialAbsState info mem addr
|
||||
}
|
||||
in DiscoveryFunInfo { discoveredFunAddr = addr
|
||||
, discoveredFunName = nm
|
||||
, _foundAddrs = Map.singleton addr faddr
|
||||
, _parsedBlocks = Map.empty
|
||||
, _reverseEdges = Map.empty
|
||||
}
|
||||
|
||||
instance ArchConstraints arch => Pretty (DiscoveryFunInfo arch ids) where
|
||||
pretty info =
|
||||
text "function" <+> text (BSC.unpack (discoveredFunName info)) <$$>
|
||||
|
Loading…
Reference in New Issue
Block a user