lay some groundwork for jump table detection on PPC

This commit is contained in:
Daniel Wagner 2018-05-24 12:05:39 -04:00
parent 3814d9c649
commit d0566fe03b
3 changed files with 110 additions and 46 deletions

View File

@ -39,11 +39,11 @@ module Data.Macaw.CFG.Core
, valueAsRhs , valueAsRhs
, valueAsMemAddr , valueAsMemAddr
, valueAsSegmentOff , valueAsSegmentOff
, valueAsArrayOffset
, valueAsStaticMultiplication , valueAsStaticMultiplication
, asLiteralAddr , asLiteralAddr
, asBaseOffset , asBaseOffset
, asInt64Constant , asInt64Constant
, IPAlignment(..)
, mkLit , mkLit
, bvValue , bvValue
, ppValueAssignments , ppValueAssignments
@ -494,24 +494,6 @@ valueAsMemAddr (BVValue _ val) = Just $ absoluteAddr (fromInteger val)
valueAsMemAddr (RelocatableValue _ i) = Just i valueAsMemAddr (RelocatableValue _ i) = Just i
valueAsMemAddr _ = Nothing valueAsMemAddr _ = Nothing
valueAsArrayOffset ::
Memory (ArchAddrWidth arch) ->
ArchAddrValue arch ids ->
Maybe (ArchSegmentOff arch, ArchAddrValue arch ids)
valueAsArrayOffset mem v
| Just (BVAdd w base offset) <- valueAsApp v
, Just Refl <- testEquality w (memWidth mem)
, Just ptr <- valueAsSegmentOff mem base
= Just (ptr, offset)
-- and with the other argument order
| Just (BVAdd w offset base) <- valueAsApp v
, Just Refl <- testEquality w (memWidth mem)
, Just ptr <- valueAsSegmentOff mem base
= Just (ptr, offset)
| otherwise = Nothing
valueAsStaticMultiplication :: valueAsStaticMultiplication ::
BVValue arch ids w -> BVValue arch ids w ->
Maybe (Integer, BVValue arch ids w) Maybe (Integer, BVValue arch ids w)
@ -519,6 +501,14 @@ valueAsStaticMultiplication v
| Just (BVMul _ (BVValue _ mul) v') <- valueAsApp v = Just (mul, v') | Just (BVMul _ (BVValue _ mul) v') <- valueAsApp v = Just (mul, v')
| Just (BVMul _ v' (BVValue _ mul)) <- valueAsApp v = Just (mul, v') | Just (BVMul _ v' (BVValue _ mul)) <- valueAsApp v = Just (mul, v')
| Just (BVShl _ v' (BVValue _ sh)) <- valueAsApp v = Just (2^sh, v') | Just (BVShl _ v' (BVValue _ sh)) <- valueAsApp v = Just (2^sh, v')
-- the PowerPC way to shift left is a bit obtuse...
| Just (BVAnd w v' (BVValue _ c)) <- valueAsApp v
, Just (BVOr _ l r) <- valueAsApp v'
, Just (BVShl _ l' (BVValue _ shl)) <- valueAsApp l
, Just (BVShr _ _ (BVValue _ shr)) <- valueAsApp r
, c == complement (2^shl-1) `mod` bit (fromInteger (natValue w))
, shr >= natValue w - shl
= Just (2^shl, l')
| otherwise = Nothing | otherwise = Nothing
asLiteralAddr :: MemWidth (ArchAddrWidth arch) asLiteralAddr :: MemWidth (ArchAddrWidth arch)
@ -545,6 +535,11 @@ asBaseOffset x
| Just (BVAdd _ x_base (BVValue _ x_off)) <- valueAsApp x = (x_base, x_off) | Just (BVAdd _ x_base (BVValue _ x_off)) <- valueAsApp x = (x_base, x_off)
| otherwise = (x,0) | otherwise = (x,0)
class IPAlignment arch where
-- | Take an aligned value and strip away the bits of the semantics that
-- align it, leaving behind a (potentially unaligned) value.
fromIPAligned :: ArchAddrValue arch ids -> Maybe (ArchAddrValue arch ids)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- RegState -- RegState
@ -720,6 +715,7 @@ type ArchConstraints arch
, IsArchStmt (ArchStmt arch) , IsArchStmt (ArchStmt arch)
, FoldableF (ArchStmt arch) , FoldableF (ArchStmt arch)
, PrettyF (ArchTermStmt arch) , PrettyF (ArchTermStmt arch)
, IPAlignment arch
) )
-- | Pretty print an assignment right-hand side using operations parameterized -- | Pretty print an assignment right-hand side using operations parameterized

View File

@ -22,6 +22,8 @@ This provides information about code discovered in binaries.
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE TypeApplications #-}
module Data.Macaw.Discovery module Data.Macaw.Discovery
( -- * DiscoveryInfo ( -- * DiscoveryInfo
State.DiscoveryState State.DiscoveryState
@ -91,6 +93,7 @@ import Debug.Trace
import Data.Macaw.AbsDomain.AbsState import Data.Macaw.AbsDomain.AbsState
import qualified Data.Macaw.AbsDomain.JumpBounds as Jmp import qualified Data.Macaw.AbsDomain.JumpBounds as Jmp
import Data.Macaw.AbsDomain.Refine import Data.Macaw.AbsDomain.Refine
import qualified Data.Macaw.AbsDomain.StridedInterval as SI
import Data.Macaw.Architecture.Info import Data.Macaw.Architecture.Info
import Data.Macaw.CFG import Data.Macaw.CFG
import Data.Macaw.CFG.DemandSet import Data.Macaw.CFG.DemandSet
@ -408,6 +411,8 @@ extendDyn (BVMemRepr size _) ext w = case ext of
| Just Refl <- testEquality size (knownNat :: NatRepr 8) -> Just (memWordSigned w) | Just Refl <- testEquality size (knownNat :: NatRepr 8) -> Just (memWordSigned w)
_ -> Nothing _ -> Nothing
-- Beware: on some architectures, after reading from the jump table, the
-- resulting addresses must be aligned. See the IPAlignment class.
data JumpTable arch ids data JumpTable arch ids
-- the result of the array read gives the address to jump to -- the result of the array read gives the address to jump to
= Absolute (ArrayRead arch ids) (Maybe Extension) = Absolute (ArrayRead arch ids) (Maybe Extension)
@ -432,15 +437,65 @@ jumpTableExtension (Relative _ _ e) = e
ensure :: Alternative f => (a -> Bool) -> a -> f a ensure :: Alternative f => (a -> Bool) -> a -> f a
ensure p x = x <$ guard (p x) ensure p x = x <$ guard (p x)
matchArrayRead, matchReadOnlyArrayRead :: absValueAsSegmentOff ::
MemWidth (ArchAddrWidth arch) => forall arch.
Memory (ArchAddrWidth arch) -> Memory (ArchAddrWidth arch) ->
ArchAbsValue arch (BVType (ArchAddrWidth arch)) ->
Maybe (ArchSegmentOff arch)
absValueAsSegmentOff mem av = case av of
FinSet s | Set.size s == 1 -> resolveAbsoluteIntegerAddr (shead s)
CodePointers s False | Set.size s == 1 -> Just (shead s)
CodePointers s True | Set.size s == 0 -> resolveAbsoluteIntegerAddr 0
StridedInterval si -> SI.isSingleton si >>= resolveAbsoluteIntegerAddr
_ -> Nothing
where
shead :: Set a -> a
shead = Set.findMin
resolveAbsoluteIntegerAddr :: Integer -> Maybe (ArchSegmentOff arch)
resolveAbsoluteIntegerAddr = resolveAbsoluteAddr mem . addrWidthClass (memAddrWidth mem) fromInteger
valueAsSegmentOffWithTransfer ::
forall arch ids.
RegisterInfo (ArchReg arch) =>
Memory (ArchAddrWidth arch) ->
AbsProcessorState (ArchReg arch) ids ->
BVValue arch ids (ArchAddrWidth arch) ->
Maybe (ArchSegmentOff arch)
valueAsSegmentOffWithTransfer mem aps v
= valueAsSegmentOff mem v
<|> absValueAsSegmentOff @arch mem (transferValue aps v)
valueAsArrayOffset ::
RegisterInfo (ArchReg arch) =>
Memory (ArchAddrWidth arch) ->
AbsProcessorState (ArchReg arch) ids ->
ArchAddrValue arch ids ->
Maybe (ArchSegmentOff arch, ArchAddrValue arch ids)
valueAsArrayOffset mem aps v
| Just (BVAdd w base offset) <- valueAsApp v
, Just Refl <- testEquality w (memWidth mem)
, Just ptr <- valueAsSegmentOffWithTransfer mem aps base
= Just (ptr, offset)
-- and with the other argument order
| Just (BVAdd w offset base) <- valueAsApp v
, Just Refl <- testEquality w (memWidth mem)
, Just ptr <- valueAsSegmentOffWithTransfer mem aps base
= Just (ptr, offset)
| otherwise = Nothing
matchArrayRead, matchReadOnlyArrayRead ::
(MemWidth (ArchAddrWidth arch), RegisterInfo (ArchReg arch)) =>
Memory (ArchAddrWidth arch) ->
AbsProcessorState (ArchReg arch) ids ->
BVValue arch ids w -> BVValue arch ids w ->
Maybe (ArrayRead arch ids) Maybe (ArrayRead arch ids)
matchArrayRead mem val matchArrayRead mem aps val
| Just (ReadMem addr size) <- valueAsRhs val | Just (ReadMem addr size) <- valueAsRhs val
, Just (base, offset) <- valueAsArrayOffset mem addr , Just (base, offset) <- valueAsArrayOffset mem aps addr
, Just (stride, ixVal) <- valueAsStaticMultiplication offset , Just (stride, ixVal) <- valueAsStaticMultiplication offset
= Just ArrayRead = Just ArrayRead
{ arBase = base { arBase = base
@ -451,8 +506,8 @@ matchArrayRead mem val
| otherwise = Nothing | otherwise = Nothing
matchReadOnlyArrayRead mem val = matchReadOnlyArrayRead mem aps val =
matchArrayRead mem val >>= matchArrayRead mem aps val >>=
ensure (Perm.isReadonly . segmentFlags . msegSegment . arBase) ensure (Perm.isReadonly . segmentFlags . msegSegment . arBase)
-- | Just like Some (BVValue arch ids), but doesn't run into trouble with -- | Just like Some (BVValue arch ids), but doesn't run into trouble with
@ -466,26 +521,28 @@ matchExtension val
| otherwise = (Nothing, SomeBVValue val) | otherwise = (Nothing, SomeBVValue val)
-- | Figure out if this is a jump table. -- | Figure out if this is a jump table.
matchJumpTable :: MemWidth (ArchAddrWidth arch) matchJumpTable :: (IPAlignment arch, MemWidth (ArchAddrWidth arch), RegisterInfo (ArchReg arch))
=> Memory (ArchAddrWidth arch) => Memory (ArchAddrWidth arch)
-> AbsProcessorState (ArchReg arch) ids
-> ArchAddrValue arch ids -- ^ Value that's assigned to the IP. -> ArchAddrValue arch ids -- ^ Value that's assigned to the IP.
-> Maybe (JumpTable arch ids) -> Maybe (JumpTable arch ids)
matchJumpTable mem ip matchJumpTable mem aps ip
-- Turn a plain read address into base + offset. -- Turn a plain read address into base + offset.
| (ext, SomeBVValue ipShort) <- matchExtension ip | (ext, SomeBVValue ipShort) <- matchExtension ip
, Just arrayRead <- matchReadOnlyArrayRead mem ipShort , Just arrayRead <- matchReadOnlyArrayRead mem aps ipShort
= Just (Absolute arrayRead ext) = Just (Absolute arrayRead ext)
-- gcc-style PIC jump tables on x86 use, roughly, -- gcc-style PIC jump tables on x86 use, roughly,
-- ip = jmptbl + jmptbl[index] -- ip = jmptbl + jmptbl[index]
-- where jmptbl is a pointer to the lookup table. -- where jmptbl is a pointer to the lookup table.
| Just (tgtBase, tgtOffset) <- valueAsArrayOffset mem ip | Just unalignedIP <- fromIPAligned ip
, Just (tgtBase, tgtOffset) <- valueAsArrayOffset mem aps unalignedIP
, (ext, SomeBVValue shortOffset) <- matchExtension tgtOffset , (ext, SomeBVValue shortOffset) <- matchExtension tgtOffset
, Just arrayRead <- matchReadOnlyArrayRead mem shortOffset , Just arrayRead <- matchReadOnlyArrayRead mem aps shortOffset
= Just (Relative tgtBase arrayRead ext) = Just (Relative tgtBase arrayRead ext)
matchJumpTable _ _ = Nothing matchJumpTable _ _ _ = Nothing
-- | This describes why we could not infer the bounds of code that looked like it -- | This describes why we could not infer the bounds of code that looked like it
-- was accessing a jump table. -- was accessing a jump table.
@ -514,7 +571,7 @@ showJumpTableBoundsError err =
getJumpTableBounds :: ArchitectureInfo a getJumpTableBounds :: ArchitectureInfo a
-> AbsProcessorState (ArchReg a) ids -- ^ Current processor registers. -> AbsProcessorState (ArchReg a) ids -- ^ Current processor registers.
-> ArrayRead a ids -> ArrayRead a ids
-> Either (JumpTableBoundsError a ids) (ArchAddrWord a) -> Either String (ArchAddrWord a)
-- ^ One past last index in jump table or nothing -- ^ One past last index in jump table or nothing
getJumpTableBounds info regs arrayRead = withArchConstraints info $ getJumpTableBounds info regs arrayRead = withArchConstraints info $
case Jmp.unsignedUpperBound (regs ^. indexBounds) (arIx arrayRead) of case Jmp.unsignedUpperBound (regs ^. indexBounds) (arIx arrayRead) of
@ -522,8 +579,10 @@ getJumpTableBounds info regs arrayRead = withArchConstraints info $
let arrayByteSize = maxIx * arStride arrayRead + arSizeBytes arrayRead in let arrayByteSize = maxIx * arStride arrayRead + arSizeBytes arrayRead in
if rangeInReadonlySegment (arBase arrayRead) (fromInteger arrayByteSize) if rangeInReadonlySegment (arBase arrayRead) (fromInteger arrayByteSize)
then Right $! fromInteger maxIx then Right $! fromInteger maxIx
else error $ "Jump table range is not in readonly memory" else Left $ "Jump table range is not in readonly memory: "
Left msg -> Left (CouldNotFindBound msg (arIx arrayRead)) ++ show maxIx ++ " entries/" ++ show arrayByteSize ++ " bytes"
++ " starting at " ++ show (arBase arrayRead)
Left msg -> Left (showJumpTableBoundsError (CouldNotFindBound msg (arIx arrayRead)))
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- ParseState -- ParseState
@ -700,11 +759,11 @@ parseFetchAndExecute ctx lbl_idx stmts regs s' = do
, stmtsAbsState = absProcState' , stmtsAbsState = absProcState'
} }
-- Block ends with what looks like a jump table. -- Block ends with what looks like a jump table.
| Just jt <- debug DCFG "try jump table" $ matchJumpTable mem (s'^.curIP) -> | Just jt <- debug DCFG "try jump table" $ matchJumpTable mem absProcState' (s'^.curIP) ->
let arrayRead = jumpTableRead jt in let arrayRead = jumpTableRead jt in
case getJumpTableBounds arch_info absProcState' arrayRead of case getJumpTableBounds arch_info absProcState' arrayRead of
Left err -> Left err ->
trace (show src ++ ": Could not compute bounds: " ++ showJumpTableBoundsError err) $ do trace (show src ++ ": Could not compute bounds: " ++ err) $ do
mapM_ (recordWriteStmt arch_info mem absProcState') stmts mapM_ (recordWriteStmt arch_info mem absProcState') stmts
pure StatementList { stmtsIdent = lbl_idx pure StatementList { stmtsIdent = lbl_idx
, stmtsNonterm = stmts , stmtsNonterm = stmts
@ -827,7 +886,8 @@ parseFetchAndExecute ctx lbl_idx stmts regs s' = do
-- | this evalutes the statements in a block to expand the information known -- | this evalutes the statements in a block to expand the information known
-- about control flow targets of this block. -- about control flow targets of this block.
parseBlock :: ParseContext arch ids parseBlock :: IPAlignment arch
=> ParseContext arch ids
-- ^ Context for parsing blocks. -- ^ Context for parsing blocks.
-> Block arch ids -> Block arch ids
-- ^ Block to parse -- ^ Block to parse
@ -890,7 +950,7 @@ parseBlock ctx b regs = do
-- | This evalutes the statements in a block to expand the information known -- | This evalutes the statements in a block to expand the information known
-- about control flow targets of this block. -- about control flow targets of this block.
transferBlocks :: MemWidth (RegAddrWidth (ArchReg arch)) transferBlocks :: (MemWidth (RegAddrWidth (ArchReg arch)), IPAlignment arch)
=> ArchSegmentOff arch => ArchSegmentOff arch
-- ^ Address of theze blocks -- ^ Address of theze blocks
-> FoundAddr arch -> FoundAddr arch
@ -941,7 +1001,7 @@ transferBlocks src finfo sz block_map =
mapM_ (\(addr, abs_state) -> mergeIntraJump src abs_state addr) (ps^.intraJumpTargets) mapM_ (\(addr, abs_state) -> mergeIntraJump src abs_state addr) (ps^.intraJumpTargets)
transfer :: ArchSegmentOff arch -> FunM arch s ids () transfer :: IPAlignment arch => ArchSegmentOff arch -> FunM arch s ids ()
transfer addr = do transfer addr = do
s <- use curFunCtx s <- use curFunCtx
let ainfo = archInfo s let ainfo = archInfo s
@ -997,7 +1057,8 @@ transfer addr = do
-- | Loop that repeatedly explore blocks until we have explored blocks -- | Loop that repeatedly explore blocks until we have explored blocks
-- on the frontier. -- on the frontier.
analyzeBlocks :: (ArchSegmentOff arch -> ST s ()) analyzeBlocks :: IPAlignment arch
=> (ArchSegmentOff arch -> ST s ())
-- ^ Logging function to call when analyzing a new block. -- ^ Logging function to call when analyzing a new block.
-> FunState arch s ids -> FunState arch s ids
-> ST s (FunState arch s ids) -> ST s (FunState arch s ids)
@ -1049,7 +1110,8 @@ mkFunInfo fs =
-- --
-- This returns the updated state and the discovered control flow -- This returns the updated state and the discovered control flow
-- graph for this function. -- graph for this function.
analyzeFunction :: (ArchSegmentOff arch -> ST s ()) analyzeFunction :: IPAlignment arch
=> (ArchSegmentOff arch -> ST s ())
-- ^ Logging function to call when analyzing a new block. -- ^ Logging function to call when analyzing a new block.
-> ArchSegmentOff arch -> ArchSegmentOff arch
-- ^ The address to explore -- ^ The address to explore
@ -1080,7 +1142,7 @@ analyzeFunction logFn addr rsn s =
-- --
-- If an exploreFnPred function exists in the DiscoveryState, then do not -- If an exploreFnPred function exists in the DiscoveryState, then do not
-- analyze unexploredFunctions at addresses that do not satisfy this predicate. -- analyze unexploredFunctions at addresses that do not satisfy this predicate.
analyzeDiscoveredFunctions :: DiscoveryState arch -> DiscoveryState arch analyzeDiscoveredFunctions :: IPAlignment arch => DiscoveryState arch -> DiscoveryState arch
analyzeDiscoveredFunctions info = analyzeDiscoveredFunctions info =
case Map.lookupMin (exploreOK $ info^.unexploredFunctions) of case Map.lookupMin (exploreOK $ info^.unexploredFunctions) of
Nothing -> info Nothing -> info
@ -1119,7 +1181,8 @@ exploreMemPointers mem_words info =
-- given set of function entry points -- given set of function entry points
cfgFromAddrs :: cfgFromAddrs ::
forall arch forall arch
. ArchitectureInfo arch . IPAlignment arch
=> ArchitectureInfo arch
-- ^ Architecture-specific information needed for doing control-flow exploration. -- ^ Architecture-specific information needed for doing control-flow exploration.
-> Memory (ArchAddrWidth arch) -> Memory (ArchAddrWidth arch)
-- ^ Memory to use when decoding instructions. -- ^ Memory to use when decoding instructions.
@ -1134,7 +1197,8 @@ cfgFromAddrs arch_info mem symbols =
-- | Expand an initial discovery state by exploring from a given set of function -- | Expand an initial discovery state by exploring from a given set of function
-- entry points. -- entry points.
cfgFromAddrsAndState :: forall arch cfgFromAddrsAndState :: forall arch
. DiscoveryState arch . IPAlignment arch
=> DiscoveryState arch
-> [ArchSegmentOff arch] -> [ArchSegmentOff arch]
-- ^ Initial function entry points. -- ^ Initial function entry points.
-> [(ArchSegmentOff arch, ArchSegmentOff arch)] -> [(ArchSegmentOff arch, ArchSegmentOff arch)]
@ -1153,7 +1217,7 @@ cfgFromAddrsAndState initial_state init_addrs mem_words =
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Resolve functions with logging -- Resolve functions with logging
resolveFuns :: MemWidth (RegAddrWidth (ArchReg arch)) resolveFuns :: (MemWidth (RegAddrWidth (ArchReg arch)), IPAlignment arch)
=> (ArchSegmentOff arch -> FunctionExploreReason (ArchAddrWidth arch) -> ST s Bool) => (ArchSegmentOff arch -> FunctionExploreReason (ArchAddrWidth arch) -> ST s Bool)
-- ^ Callback for discovered functions -- ^ Callback for discovered functions
-- --
@ -1248,7 +1312,8 @@ ppFunReason rsn =
-- This function is intended to make it easy to explore functions, and -- This function is intended to make it easy to explore functions, and
-- can be controlled via 'DiscoveryOptions'. -- can be controlled via 'DiscoveryOptions'.
completeDiscoveryState :: forall arch completeDiscoveryState :: forall arch
. ArchitectureInfo arch . IPAlignment arch
=> ArchitectureInfo arch
-> DiscoveryOptions -> DiscoveryOptions
-- ^ Options controlling discovery -- ^ Options controlling discovery
-> Memory (ArchAddrWidth arch) -> Memory (ArchAddrWidth arch)

View File

@ -854,6 +854,9 @@ type instance ArchFn X86_64 = X86PrimFn
type instance ArchStmt X86_64 = X86Stmt type instance ArchStmt X86_64 = X86Stmt
type instance ArchTermStmt X86_64 = X86TermStmt type instance ArchTermStmt X86_64 = X86TermStmt
-- x86 instructions can start at any byte
instance IPAlignment X86_64 where fromIPAligned = Just
rewriteX86PrimFn :: X86PrimFn (Value X86_64 src) tp rewriteX86PrimFn :: X86PrimFn (Value X86_64 src) tp
-> Rewriter X86_64 s src tgt (Value X86_64 tgt tp) -> Rewriter X86_64 s src tgt (Value X86_64 tgt tp)
rewriteX86PrimFn f = rewriteX86PrimFn f =