Bug fixes to code discovery; introduce JumpTableLayout.

This fixes bugs in scanning addresses in memory, and failing to check
the executable status of function entry points.
This commit is contained in:
Joe Hendrix 2018-07-30 13:28:46 -07:00
parent e4a27d7bbc
commit 036f39cbb4
No known key found for this signature in database
GPG Key ID: 8DFA5FF784098C4F
2 changed files with 333 additions and 326 deletions

View File

@ -4,25 +4,16 @@ Maintainer : Joe Hendrix <jhendrix@galois.com>, Simon Winwood <sjw@galois.
This provides information about code discovered in binaries. This provides information about code discovered in binaries.
-} -}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE CPP #-} {-# LANGUAGE CPP #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DoAndIfThenElse #-}
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs #-} {-# LANGUAGE GADTs #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE PatternGuards #-} {-# LANGUAGE PatternGuards #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE UndecidableInstances #-}
module Data.Macaw.Discovery module Data.Macaw.Discovery
( -- * DiscoveryInfo ( -- * DiscoveryInfo
State.DiscoveryState(..) State.DiscoveryState(..)
@ -104,27 +95,20 @@ import Data.Macaw.Types
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Utilities -- Utilities
isExecutableSegOff :: MemSegmentOff w -> Bool
isExecutableSegOff sa =
segmentFlags (msegSegment sa) `Perm.hasPerm` Perm.execute
-- | Get code pointers out of a abstract value. -- | Get code pointers out of a abstract value.
concretizeAbsCodePointers :: MemWidth w identifyConcreteAddresses :: MemWidth w
=> Memory w => Memory w
-> AbsValue w (BVType w) -> AbsValue w (BVType w)
-> [MemSegmentOff w] -> [MemSegmentOff w]
concretizeAbsCodePointers mem (FinSet s) = identifyConcreteAddresses mem (FinSet s) =
[ sa mapMaybe (resolveAbsoluteAddr mem . fromInteger) (Set.toList s)
| a <- Set.toList s identifyConcreteAddresses _ (CodePointers s _) = Set.toList s
, sa <- maybeToList (resolveAbsoluteAddr mem (fromInteger a)) identifyConcreteAddresses _mem StridedInterval{} = []
, segmentFlags (msegSegment sa) `Perm.hasPerm` Perm.execute identifyConcreteAddresses _mem _ = []
]
concretizeAbsCodePointers _ (CodePointers s _) =
[ sa
| sa <- Set.toList s
, segmentFlags (msegSegment sa) `Perm.hasPerm` Perm.execute
]
-- FIXME: this is dangerous !!
concretizeAbsCodePointers _mem StridedInterval{} = [] -- FIXME: this case doesn't make sense
-- debug DCFG ("I think these are code pointers!: " ++ show s) $ []
-- filter (isCodeAddr mem) $ fromInteger <$> SI.toList s
concretizeAbsCodePointers _mem _ = []
{- {-
-- | Return true if this address was added because of the contents of a global address -- | Return true if this address was added because of the contents of a global address
@ -264,14 +248,29 @@ dropUnusedCodeInParsedBlock ainfo b =
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Memory utilities -- Memory utilities
-- | Return true if range is entirely contained within a single read only segment.Q sliceMemContents'
rangeInReadonlySegment :: MemWidth w :: MemWidth w
=> MemSegmentOff w -- ^ Start of range => Int -- ^ Number of bytes in each slice.
-> MemWord w -- ^ The size of the range -> [[SegmentRange w]] -- ^ Previous slices
-> Bool -> Integer -- ^ Number of slices to return
rangeInReadonlySegment mseg size = -> [SegmentRange w] -- ^ Ranges to process next
size <= segmentSize (msegSegment mseg) - msegOffset mseg -> Either (DropError w) ([[SegmentRange w]],[SegmentRange w])
&& Perm.isReadonly (segmentFlags (msegSegment mseg)) sliceMemContents' stride prev c next
| c <= 0 = pure (reverse prev, next)
| otherwise =
case splitSegmentRangeList next stride of
Left e -> Left e
Right (this, rest) -> sliceMemContents' stride (this:prev) (c-1) rest
-- | `sliceMemContents stride cnt contents` splits contents up into `cnt`
-- memory regions each with size `stride`.
sliceMemContents
:: MemWidth w
=> Int -- ^ Number of bytes in each slice.
-> Integer -- ^ Number of slices to return
-> [SegmentRange w] -- ^ Ranges to process next
-> Either (DropError w) ([[SegmentRange w]],[SegmentRange w])
sliceMemContents stride c next = sliceMemContents' stride [] c next
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- DiscoveryState utilities -- DiscoveryState utilities
@ -287,6 +286,8 @@ markAddrAsFunction :: FunctionExploreReason (ArchAddrWidth arch)
markAddrAsFunction rsn addr s markAddrAsFunction rsn addr s
-- Do nothing if function is already explored. -- Do nothing if function is already explored.
| Map.member addr (s^.funInfo) || Map.member addr (s^.unexploredFunctions) = s | Map.member addr (s^.funInfo) || Map.member addr (s^.unexploredFunctions) = s
-- Ignore if address is not in an executable segment.
| isExecutableSegOff addr = s
| otherwise = addrWidthClass (memAddrWidth (memory s)) $ | otherwise = addrWidthClass (memAddrWidth (memory s)) $
-- We check that the function address ignores bytes so that we do -- We check that the function address ignores bytes so that we do
-- not start disassembling at a relocation or BSS region. -- not start disassembling at a relocation or BSS region.
@ -440,63 +441,39 @@ mergeIntraJump src ab tgt = do
foundAddrs %= Map.insert tgt found_info foundAddrs %= Map.insert tgt found_info
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
-- Jump table bounds -- BoundedMemArray
-- | A memory read that looks like array indexing. It read 'arSize' bytes from -- | This describes a region of memory dereferenced in some array read.
--
-- These regions may be be sparse, given an index `i`, the
-- the address given by 'arBase' + 'arIx'*'arStride'. -- the address given by 'arBase' + 'arIx'*'arStride'.
data ArrayRead arch ids w = ArrayRead data BoundedMemArray arch tp = BoundedMemArray
{ arBase :: ArchSegmentOff arch { arBase :: !(MemSegmentOff (ArchAddrWidth arch))
, arIx :: ArchAddrValue arch ids -- ^ The base address for array accesses.
, arStride :: Integer , arStride :: !Integer
, arSize :: MemRepr (BVType w) -- ^ Space between elements of the array.
-- ^ Type of element in this array. --
-- This will typically be the number of bytes denoted by `arEltType`,
-- but may be larger for sparse arrays. `matchBoundedMemArray` will fail
-- if stride is less than the number of bytes read.
, arEltType :: !(MemRepr tp)
-- ^ Resolved type of elements in this array.
, arSlices :: !(V.Vector [SegmentRange (ArchAddrWidth arch)])
-- ^ The slices of memory in the array.
--
-- The `i`th element in the vector corresponds to the first `size`
-- bytes at address `base + stride * i`.
--
-- This could be computed from the previous fields, but we check we
-- can create it when creating the array read, so we store it to
-- avoid recomputing it.
} }
deriving instance RegisterInfo (ArchReg arch) => Show (ArrayRead arch ids w) deriving instance RegisterInfo (ArchReg arch) => Show (BoundedMemArray arch tp)
-- | Return true if the address stored is readable and not writable. -- | Return true if the address stored is readable and not writable.
isReadOnlyArrayRead :: ArrayRead arch ids w -> Bool isReadOnlyBoundedMemArray :: BoundedMemArray arch tp -> Bool
isReadOnlyArrayRead = Perm.isReadonly . segmentFlags . msegSegment . arBase isReadOnlyBoundedMemArray = Perm.isReadonly . segmentFlags . msegSegment . arBase
-- | Number of bytes of size.
arSizeBytes :: ArrayRead arch ids w -> Integer
arSizeBytes = memReprBytes . arSize
------------------------------------------------------------------------
-- Extension
-- | Used to denote how a value should be extended to a full address.
data Extension = Signed | Unsigned
deriving (Bounded, Enum, Eq, Ord, Read, Show)
-- | `extendDyn w ext v` treats `v` as a `w`-bit number and returns the underlying
extendDyn :: (1 <= w, Integral x) => NatRepr w -> Extension -> x ->Integer
extendDyn _ Unsigned = toInteger
extendDyn w Signed = toSigned w . toInteger
------------------------------------------------------------------------
-- JumpTable
-- Beware: on some architectures, after reading from the jump table, the
-- resulting addresses must be aligned. See the IPAlignment class.
data JumpTable arch ids
= AbsoluteJumpTable (ArrayRead arch ids (ArchAddrWidth arch))
-- | `RelativeJumpTable base read ext` describes information about a jump table read.
--
-- The value is computed as `baseVal + readVal` where
--
-- `baseVal = fromMaybe 0 base`, `readVal` is the value stored at the memory
-- read described by `read` with the sign of `ext`.
| forall w . RelativeJumpTable (ArchSegmentOff arch) (ArrayRead arch ids w) Extension
deriving instance RegisterInfo (ArchReg arch) => Show (JumpTable arch ids)
-- | The array read done when computing the jump table. N.B. other processing
-- may be needed on the value read in this way to know the address to jump to.
jumpTableRead :: JumpTable arch ids -> Some (ArrayRead arch ids)
jumpTableRead (AbsoluteJumpTable r) = Some r
jumpTableRead (RelativeJumpTable _ r _) = Some r
absValueAsSegmentOff absValueAsSegmentOff
:: forall w :: forall w
@ -529,14 +506,14 @@ valueAsSegmentOffWithTransfer mem aps base
= valueAsSegmentOff mem base = valueAsSegmentOff mem base
<|> absValueAsSegmentOff mem (transferValue aps base) <|> absValueAsSegmentOff mem (transferValue aps base)
-- | This interprets a value as a memory segment offset plus value. -- | This attempts to pattern match a value as a memory address plus a value.
valueAsArrayOffset valueAsMemOffset
:: RegisterInfo (ArchReg arch) :: RegisterInfo (ArchReg arch)
=> Memory (ArchAddrWidth arch) => Memory (ArchAddrWidth arch)
-> AbsProcessorState (ArchReg arch) ids -> AbsProcessorState (ArchReg arch) ids
-> ArchAddrValue arch ids -> ArchAddrValue arch ids
-> Maybe (ArchSegmentOff arch, ArchAddrValue arch ids) -> Maybe (ArchSegmentOff arch, ArchAddrValue arch ids)
valueAsArrayOffset mem aps v valueAsMemOffset mem aps v
| Just (BVAdd _ base offset) <- valueAsApp v | Just (BVAdd _ base offset) <- valueAsApp v
, Just ptr <- valueAsSegmentOffWithTransfer mem aps base , Just ptr <- valueAsSegmentOffWithTransfer mem aps base
= Just (ptr, offset) = Just (ptr, offset)
@ -548,110 +525,203 @@ valueAsArrayOffset mem aps v
| otherwise = Nothing | otherwise = Nothing
-- | See if the value can be interpreted as a read of memory -- | See if the value can be interpreted as a read of memory
matchArrayRead matchBoundedMemArray
:: (MemWidth (ArchAddrWidth arch), RegisterInfo (ArchReg arch)) :: (MemWidth (ArchAddrWidth arch), RegisterInfo (ArchReg arch))
=> Memory (ArchAddrWidth arch) => Memory (ArchAddrWidth arch)
-> AbsProcessorState (ArchReg arch) ids -> AbsProcessorState (ArchReg arch) ids
-> BVValue arch ids w -> BVValue arch ids w
-> Maybe (ArrayRead arch ids w) -> Maybe (BoundedMemArray arch (BVType w), ArchAddrValue arch ids)
matchArrayRead mem aps val matchBoundedMemArray mem aps val
| Just (ReadMem addr tp) <- valueAsRhs val
| Just (ReadMem addr size) <- valueAsRhs val , Just (base, offset) <- valueAsMemOffset mem aps addr
, Just (base, offset) <- valueAsArrayOffset mem aps addr
, Just (stride, ixVal) <- valueAsStaticMultiplication offset , Just (stride, ixVal) <- valueAsStaticMultiplication offset
, memReprBytes size <= stride -- Check stride covers at least number of bytes read.
= Just $ ArrayRead , memReprBytes tp <= stride
{ arBase = base -- Resolve a static upper bound to array.
, arIx = ixVal , Right (Jmp.IntegerUpperBound bnd)
, arStride = stride <- Jmp.unsignedUpperBound (aps^.indexBounds) ixVal
, arSize = size , cnt <- bnd+1
} -- Check array actually fits in memory.
, msegByteCountAfter base < cnt * toInteger stride
-- Get memory contents after base
, Right contents <- contentsAfterSegmentOff base
-- Break up contents into a list of slices each with size stide
, Right (strideSlices,_) <- sliceMemContents (fromInteger stride) cnt contents
-- Take the given number of bytes out of each slices
, Right slices <- traverse (\s -> fst <$> splitSegmentRangeList s (fromInteger (memReprBytes tp)))
(V.fromList strideSlices)
= let r = BoundedMemArray
{ arBase = base
, arStride = stride
, arEltType = tp
, arSlices = slices
}
in Just (r, ixVal)
| otherwise = Nothing | otherwise = Nothing
------------------------------------------------------------------------
-- Extension
-- | Information about a value that is the signed or unsigned extension of another
-- value.
--
-- This is used for jump tables, and only supports widths that are in memory
data Extension w = Extension { _extIsSigned :: !Bool
, _extWidth :: !(AddrWidthRepr w)
-- ^ Width of argument. is to.
}
deriving (Show)
-- | Just like Some (BVValue arch ids), but doesn't run into trouble with -- | Just like Some (BVValue arch ids), but doesn't run into trouble with
-- partially applying the BVValue type synonym. -- partially applying the BVValue type synonym.
data SomeBVValue arch ids = forall tp. SomeBVValue (BVValue arch ids tp) data SomeExt arch ids = forall m . SomeExt !(BVValue arch ids m) !(Extension m)
-- | Identify how value is extended. matchAddr :: NatRepr w -> Maybe (AddrWidthRepr w)
matchExtension :: ArchAddrValue arch ids matchAddr w
-> (Extension, SomeBVValue arch ids) | Just Refl <- testEquality w n32 = Just Addr32
matchExtension offset = | Just Refl <- testEquality w n64 = Just Addr64
case valueAsApp offset of | otherwise = Nothing
Just (SExt val' _) -> (Signed, SomeBVValue val')
Just (UExt val' _) -> (Unsigned, SomeBVValue val')
_ -> (Unsigned, SomeBVValue offset)
-- | Figure out if this is a jump table. -- | `matchExtension x` matches in `x` has the form `(uext y w)` or `(sext y w)` and returns
matchJumpTable :: ( IPAlignment arch -- a description about the extension as well as the pattern `y`.
, MemWidth (ArchAddrWidth arch) matchExtension :: forall arch ids
, RegisterInfo (ArchReg arch) . ( MemWidth (ArchAddrWidth arch)
) , HasRepr (ArchReg arch) TypeRepr)
=> Memory (ArchAddrWidth arch) => ArchAddrValue arch ids
-> AbsProcessorState (ArchReg arch) ids -> SomeExt arch ids
-> ArchAddrValue arch ids -- ^ Value that's assigned to the IP. matchExtension val =
-> Maybe (JumpTable arch ids) case valueAsApp val of
matchJumpTable mem aps ip Just (SExt val' _w) | Just repr <- matchAddr (typeWidth val') -> SomeExt val' (Extension True repr)
Just (UExt val' _w) | Just repr <- matchAddr (typeWidth val') -> SomeExt val' (Extension False repr)
_ -> SomeExt val (Extension False (addrWidthRepr @(ArchAddrWidth arch) undefined))
-- | `extendDyn ext end bs` parses the bytestring using the extension
-- and endianness information, and returns the extended value.
extendDyn :: Extension w -> Endianness -> BS.ByteString -> Integer
extendDyn (Extension True Addr32) end bs = toInteger (bsWord32 end bs)
extendDyn (Extension True Addr64) end bs = toInteger (bsWord64 end bs)
extendDyn (Extension False Addr32) end bs = toSigned n32 (toInteger (bsWord32 end bs))
extendDyn (Extension False Addr64) end bs = toSigned n64 (toInteger (bsWord64 end bs))
------------------------------------------------------------------------
-- JumpTableLayout
-- | This describes the layout of a jump table.
-- Beware: on some architectures, after reading from the jump table, the
-- resulting addresses must be aligned. See the IPAlignment class.
data JumpTableLayout arch
= AbsoluteJumpTable !(BoundedMemArray arch (BVType (ArchAddrWidth arch)))
-- ^ `AbsoluteJumpTable r` describes a jump table where the jump
-- target is directly stored in the array read `r`.
| forall w . RelativeJumpTable !(ArchSegmentOff arch)
!(BoundedMemArray arch (BVType w))
!(Extension w)
-- ^ `RelativeJumpTable base read ext` describes information about a
-- jump table where all jump targets are relative to a fixed base
-- address.
--
-- The value is computed as `baseVal + readVal` where
--
-- `baseVal = fromMaybe 0 base`, `readVal` is the value stored at
-- the memory read described by `read` with the sign of `ext`.
deriving instance RegisterInfo (ArchReg arch) => Show (JumpTableLayout arch)
-- This function resolves jump table entries.
-- It is a recursive function that has an index into the jump table.
-- If the current index can be interpreted as a intra-procedural jump,
-- then it will add that to the current procedure.
-- This returns the last address read.
resolveAsAbsoluteAddr :: forall w
. Memory w
-> Endianness
-> [SegmentRange w]
-> Maybe (MemAddr w)
resolveAsAbsoluteAddr mem endianness l = addrWidthClass (memAddrWidth mem) $
case l of
[ByteRegion bs] -> do
absoluteAddr <$> addrRead endianness bs
[RelocationRegion r] -> do
let off = relocationOffset r
when (relocationIsRel r) $ Nothing
case relocationSym r of
SymbolRelocation{} -> Nothing
SectionIdentifier idx -> do
addr <- Map.lookup idx (memSectionAddrMap mem)
pure $ relativeSegmentAddr addr & incAddr (toInteger off)
_ -> Nothing
-- This function resolves jump table entries.
-- It is a recursive function that has an index into the jump table.
-- If the current index can be interpreted as a intra-procedural jump,
-- then it will add that to the current procedure.
-- This returns the last address read.
resolveRelativeJumps :: forall arch w
. ( MemWidth (ArchAddrWidth arch)
, IPAlignment arch
, RegisterInfo (ArchReg arch)
)
=> Memory (ArchAddrWidth arch)
-> ArchSegmentOff arch
-- -> MemRepr (BVType w)
-> BoundedMemArray arch (BVType w)
-> Extension w
-> Maybe (V.Vector (ArchSegmentOff arch))
resolveRelativeJumps mem base arrayRead ext = do
let slices = arSlices arrayRead
BVMemRepr _sz endianness <- pure $ arEltType arrayRead
forM slices $ \l -> do
case l of
[ByteRegion bs]
| tgtAddr <- relativeSegmentAddr base
& incAddr (extendDyn ext endianness bs)
, Just tgt <- asSegmentOff mem (toIPAligned @arch tgtAddr)
, Perm.isExecutable (segmentFlags (msegSegment tgt))
-> Just tgt
_ -> Nothing
-- | Resolve an ip to a jump table.
matchJumpTableRef :: forall arch ids
. ( IPAlignment arch
, MemWidth (ArchAddrWidth arch)
, RegisterInfo (ArchReg arch)
)
=> Memory (ArchAddrWidth arch)
-> AbsProcessorState (ArchReg arch) ids
-> ArchAddrValue arch ids -- ^ Value that's assigned to the IP.
-> Maybe (JumpTableLayout arch, V.Vector (ArchSegmentOff arch), ArchAddrValue arch ids)
matchJumpTableRef mem aps ip
-- Turn a plain read address into base + offset. -- Turn a plain read address into base + offset.
| Just arrayRead <- matchArrayRead mem aps ip | Just (arrayRead,idx) <- matchBoundedMemArray mem aps ip
, isReadOnlyArrayRead arrayRead , isReadOnlyBoundedMemArray arrayRead
= Just (AbsoluteJumpTable arrayRead) , BVMemRepr _arByteCount endianness <- arEltType arrayRead = do
let go :: [SegmentRange (ArchAddrWidth arch)] -> Maybe (MemSegmentOff (ArchAddrWidth arch))
go contents = do
addr <- resolveAsAbsoluteAddr mem endianness contents
tgt <- asSegmentOff mem (toIPAligned @arch addr)
unless (Perm.isExecutable (segmentFlags (msegSegment tgt))) $ Nothing
pure tgt
tbl <- traverse go (arSlices arrayRead)
pure (AbsoluteJumpTable arrayRead, tbl, idx)
-- gcc-style PIC jump tables on x86 use, roughly, -- gcc-style PIC jump tables on x86 use, roughly,
-- ip = jmptbl + jmptbl[index] -- ip = jmptbl + jmptbl[index]
-- where jmptbl is a pointer to the lookup table. -- where jmptbl is a pointer to the lookup table.
| Just unalignedIP <- fromIPAligned ip | Just unalignedIP <- fromIPAligned ip
, Just (tgtBase, tgtOffset) <- valueAsArrayOffset mem aps unalignedIP , Just (tgtBase, tgtOffset) <- valueAsMemOffset mem aps unalignedIP
, (ext, SomeBVValue shortOffset) <- matchExtension tgtOffset , SomeExt shortOffset ext <- matchExtension tgtOffset
, Just arrayRead <- matchArrayRead mem aps shortOffset , Just (arrayRead, idx) <- matchBoundedMemArray mem aps shortOffset
, isReadOnlyArrayRead arrayRead , isReadOnlyBoundedMemArray arrayRead
= Just (RelativeJumpTable tgtBase arrayRead ext) , Just tbl <- resolveRelativeJumps mem tgtBase arrayRead ext
= Just (RelativeJumpTable tgtBase arrayRead ext, tbl, idx)
| otherwise | otherwise
= Nothing = Nothing
-- | This describes why we could not infer the bounds of code that looked like it
-- was accessing a jump table.
data JumpTableBoundsError arch ids
= CouldNotInterpretAbsValue !(AbsValue (ArchAddrWidth arch) (BVType (ArchAddrWidth arch)))
| UpperBoundMismatch !(Jmp.UpperBound (BVType (ArchAddrWidth arch))) !Integer
| CouldNotFindBound String !(ArchAddrValue arch ids)
-- | Show the jump table bounds
showJumpTableBoundsError :: ArchConstraints arch => JumpTableBoundsError arch ids -> String
showJumpTableBoundsError err =
case err of
CouldNotInterpretAbsValue val ->
"Index <" ++ show val ++ "> is not a stride."
UpperBoundMismatch bnd index_range ->
"Upper bound mismatch at jumpbounds "
++ show bnd
++ " domain "
++ show index_range
CouldNotFindBound msg jump_index ->
show "Could not find jump table: " ++ msg ++ "\n"
++ show (ppValueAssignments jump_index)
-- | Returns the index bounds for a jump table of 'Nothing' if this is
-- not a block table.
getJumpTableBounds :: ArchConstraints a
=> AbsProcessorState (ArchReg a) ids -- ^ Current processor registers.
-> ArrayRead a ids w
-> Either String (ArchAddrWord a)
getJumpTableBounds regs arrayRead =
case Jmp.unsignedUpperBound (regs ^. indexBounds) (arIx arrayRead) of
Right (Jmp.IntegerUpperBound maxIx) ->
let arrayByteSize = maxIx * arStride arrayRead + arSizeBytes arrayRead in
if rangeInReadonlySegment (arBase arrayRead) (fromInteger arrayByteSize)
then Right $! fromInteger maxIx
else Left $ "Jump table range is not in readonly memory: "
++ show maxIx ++ " entries/" ++ show arrayByteSize ++ " bytes"
++ " starting at " ++ show (arBase arrayRead)
Left msg -> Left (showJumpTableBoundsError (CouldNotFindBound msg (arIx arrayRead)))
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- ParseState -- ParseState
@ -663,6 +733,10 @@ data ParseState arch ids =
, _intraJumpTargets :: , _intraJumpTargets ::
![(ArchSegmentOff arch, AbsBlockState (ArchReg arch))] ![(ArchSegmentOff arch, AbsBlockState (ArchReg arch))]
, _newFunctionAddrs :: ![ArchSegmentOff arch] , _newFunctionAddrs :: ![ArchSegmentOff arch]
-- ^ List of candidate functions found when parsing block.
--
-- Note. In a binary, these could denote the non-executable
-- segments, so they are filtered before traversing.
} }
-- | Code addresses written to memory. -- | Code addresses written to memory.
@ -688,11 +762,10 @@ recordWriteStmt arch_info mem regs stmt = do
WriteMem _addr repr v WriteMem _addr repr v
| Just Refl <- testEquality repr (addrMemRepr arch_info) -> do | Just Refl <- testEquality repr (addrMemRepr arch_info) -> do
withArchConstraints arch_info $ do withArchConstraints arch_info $ do
let addrs = concretizeAbsCodePointers mem (transferValue regs v) let addrs = identifyConcreteAddresses mem (transferValue regs v)
writtenCodeAddrs %= (addrs ++) writtenCodeAddrs %= (filter isExecutableSegOff addrs ++)
_ -> return () _ -> return ()
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- ParseContext -- ParseContext
@ -721,126 +794,34 @@ addrMemRepr arch_info =
identifyCallTargets :: forall arch ids identifyCallTargets :: forall arch ids
. (RegisterInfo (ArchReg arch)) . (RegisterInfo (ArchReg arch))
=> AbsProcessorState (ArchReg arch) ids => Memory (ArchAddrWidth arch)
-> AbsBlockState (ArchReg arch)
-- ^ Abstract processor state just before call. -- ^ Abstract processor state just before call.
-> BVValue arch ids (ArchAddrWidth arch) -> RegState (ArchReg arch) (Value arch ids)
-> [ArchSegmentOff arch] -> [ArchSegmentOff arch]
identifyCallTargets absState ip = do identifyCallTargets mem absState s = do
-- Code pointers from abstract domains. -- Code pointers from abstract domains.
let mem = absMem absState let def = identifyConcreteAddresses mem (absState^.absRegState^.curIP)
let def = concretizeAbsCodePointers mem (transferValue absState ip) case s^.boundValue ip_reg of
let segOffAddrs :: Maybe (ArchSegmentOff arch) -> [ArchSegmentOff arch] BVValue _ x ->
segOffAddrs (Just addr) maybeToList $ resolveAbsoluteAddr mem (fromInteger x)
| segmentFlags (msegSegment addr) `Perm.hasPerm` Perm.execute = RelocatableValue _ a ->
[addr] maybeToList $ asSegmentOff mem a
segOffAddrs _ = []
case ip of
BVValue _ x -> segOffAddrs $ resolveAbsoluteAddr mem (fromInteger x)
RelocatableValue _ a -> segOffAddrs $ asSegmentOff mem a
SymbolValue{} -> def SymbolValue{} -> def
AssignedValue a -> AssignedValue a ->
case assignRhs a of case assignRhs a of
-- See if we can get a value out of a concrete memory read. -- See if we can get a value out of a concrete memory read.
ReadMem addr (BVMemRepr _ end) ReadMem addr (BVMemRepr _ end)
| Just laddr <- valueAsMemAddr addr | Just laddr <- valueAsMemAddr addr
, Right val <- readAddr mem end laddr -> , Right val <- readSegmentOff mem end laddr ->
segOffAddrs (asSegmentOff mem val) ++ def val : def
_ -> def _ -> def
Initial _ -> def Initial _ -> def
sliceMemContents' addNewFunctionAddrs :: [ArchSegmentOff arch]
:: MemWidth w -> State (ParseState arch ids) ()
=> Int -- ^ Number of bytes in each slice. addNewFunctionAddrs addrs =
-> [[SegmentRange w]] -- ^ Previous slices newFunctionAddrs %= (++addrs)
-> Integer -- ^ Number of slices to return
-> [SegmentRange w] -- ^ Ranges to process next
-> Either (DropError w) ([[SegmentRange w]],[SegmentRange w])
sliceMemContents' stride prev c next
| c <= 0 = pure (reverse prev, next)
| otherwise =
case splitSegmentRangeList next stride of
Left e -> Left e
Right (this, rest) -> sliceMemContents' stride (this:prev) (c-1) rest
-- | `sliceMemContents stride cnt contents` splits contents up into `cnt`
-- memory regions each with size `stride`.
sliceMemContents
:: MemWidth w
=> Int -- ^ Number of bytes in each slice.
-> Integer -- ^ Number of slices to return
-> [SegmentRange w] -- ^ Ranges to process next
-> Either (DropError w) ([[SegmentRange w]],[SegmentRange w])
sliceMemContents stride c next = sliceMemContents' stride [] c next
-- `getJumpTableContents base cnt stride` returns a list with
getJumpTableContents :: MemWidth w
=> MemSegmentOff w
-> Integer
-> Integer
-> Maybe [[SegmentRange w]]
getJumpTableContents base cnt stride = do
let totalSize = cnt * stride
when (msegByteCountAfter base < totalSize) $
Nothing
contents <-
case contentsAfterSegmentOff base of
Left _ -> Nothing
Right l -> pure l
case sliceMemContents (fromInteger stride) cnt contents of
Left _ -> Nothing
Right (s,_) -> Just s
-- This function resolves jump table entries.
-- It is a recursive function that has an index into the jump table.
-- If the current index can be interpreted as a intra-procedural jump,
-- then it will add that to the current procedure.
-- This returns the last address read.
resolveJumps :: forall arch ids
. ( MemWidth (ArchAddrWidth arch)
, IPAlignment arch
, RegisterInfo (ArchReg arch)
)
=> Memory (ArchAddrWidth arch)
-> JumpTable arch ids
-> [[SegmentRange (ArchAddrWidth arch)]]
-> Maybe [ArchSegmentOff arch]
resolveJumps mem (AbsoluteJumpTable arrayRead) slices = do
BVMemRepr _arByteCount endianness <- pure $ arSize arrayRead
forM slices $ \l -> do
case l of
[ByteRegion bs] -> do
val <- addrRead endianness bs
tgt <- asSegmentOff mem (toIPAligned @arch (absoluteAddr val))
unless (Perm.isExecutable (segmentFlags (msegSegment tgt))) $ Nothing
pure tgt
[RelocationRegion r] -> do
let off = relocationOffset r
when (relocationIsRel r) $ Nothing
case relocationSym r of
SymbolRelocation{} -> Nothing
SectionIdentifier idx -> do
addr <- Map.lookup idx (memSectionAddrMap mem)
incSegmentOff addr (toInteger off)
_ -> Nothing
resolveJumps mem (RelativeJumpTable base arrayRead ext) slices = do
BVMemRepr sz endianness <- pure $ arSize arrayRead
let readFn
| Just Refl <- testEquality sz (knownNat :: NatRepr 4) =
extendDyn (knownNat :: NatRepr 32) ext . bsWord32 endianness
| Just Refl <- testEquality sz (knownNat :: NatRepr 8) =
extendDyn (knownNat :: NatRepr 64) ext . bsWord64 endianness
| otherwise =
error "Do not support this width."
forM slices $ \l -> do
case l of
[ByteRegion bs]
| tgtAddr <- relativeSegmentAddr base
& incAddr (readFn (BS.take (fromInteger (natValue sz)) bs))
, Just tgt <- asSegmentOff mem (toIPAligned @arch tgtAddr)
, Perm.isExecutable (segmentFlags (msegSegment tgt))
-> Just tgt
_ -> Nothing
-- | This parses a block that ended with a fetch and execute instruction. -- | This parses a block that ended with a fetch and execute instruction.
parseFetchAndExecute :: forall arch ids parseFetchAndExecute :: forall arch ids
@ -896,8 +877,8 @@ parseFetchAndExecute ctx idx stmts regs s = do
-- Merge caller return information -- Merge caller return information
intraJumpTargets %= ((ret, postCallAbsState ainfo abst ret):) intraJumpTargets %= ((ret, postCallAbsState ainfo abst ret):)
-- Use the abstract domain to look for new code pointers for the current IP. -- Use the abstract domain to look for new code pointers for the current IP.
let addrs = identifyCallTargets absProcState' (s^.boundValue ip_reg) addNewFunctionAddrs $
newFunctionAddrs %= (++ addrs) identifyCallTargets mem abst s
-- Use the call-specific code to look for new IPs. -- Use the call-specific code to look for new IPs.
let r = StatementList { stmtsIdent = idx let r = StatementList { stmtsIdent = idx
@ -953,16 +934,8 @@ parseFetchAndExecute ctx idx stmts regs s = do
} }
pure (ret, idx+1) pure (ret, idx+1)
-- Block ends with what looks like a jump table. -- Block ends with what looks like a jump table.
| Just jt <- matchJumpTable mem absProcState' (s^.curIP) | Just (_jt, entries, jumpIndex) <- matchJumpTableRef mem absProcState' (s^.curIP) -> do
, Some arrayRead <- jumpTableRead jt
, Right maxIdx <- getJumpTableBounds absProcState' arrayRead
, Just slices <-
getJumpTableContents (arBase arrayRead)
(toInteger maxIdx+1)
(arStride arrayRead)
-- Read addresses
, Just readAddrs <-
resolveJumps (pctxMemory ctx) jt slices -> do
mapM_ (recordWriteStmt ainfo mem absProcState') stmts mapM_ (recordWriteStmt ainfo mem absProcState') stmts
let abst :: AbsBlockState (ArchReg arch) let abst :: AbsBlockState (ArchReg arch)
@ -970,11 +943,11 @@ parseFetchAndExecute ctx idx stmts regs s = do
seq abst $ do seq abst $ do
forM_ readAddrs $ \tgtAddr -> do forM_ entries $ \tgtAddr -> do
let abst' = abst & setAbsIP tgtAddr let abst' = abst & setAbsIP tgtAddr
intraJumpTargets %= ((tgtAddr, abst'):) intraJumpTargets %= ((tgtAddr, abst'):)
let term = ParsedLookupTable s (arIx arrayRead) (V.fromList readAddrs) let term = ParsedLookupTable s jumpIndex entries
let ret = StatementList { stmtsIdent = idx let ret = StatementList { stmtsIdent = idx
, stmtsNonterm = stmts , stmtsNonterm = stmts
, stmtsTerm = term , stmtsTerm = term
@ -982,20 +955,20 @@ parseFetchAndExecute ctx idx stmts regs s = do
} }
pure (ret,idx+1) pure (ret,idx+1)
-- Check for tail call (anything where we are right at stack height) -- Check for tail call when the stack pointer points to the return address.
-- --
-- TODO: this makes sense for x86, but is not correct for all architectures -- TODO: this makes sense for x86, but is not correct for all architectures
| ptrType <- addrMemRepr ainfo | ptrType <- addrMemRepr ainfo
, sp_val <- s^.boundValue sp_reg , sp_val <- s^.boundValue sp_reg
, ReturnAddr <- absEvalReadMem absProcState' sp_val ptrType -> do , ReturnAddr <- absEvalReadMem absProcState' sp_val ptrType -> do
(,idx+1) <$> finishWithTailCall absProcState' finishWithTailCall absProcState'
-- Is this a jump to a known function entry? We're already past the -- Is this a jump to a known function entry? We're already past the
-- "identifyCall" case, so this must be a tail call, assuming we trust our -- "identifyCall" case, so this must be a tail call, assuming we trust our
-- known function entry info. -- known function entry info.
| Just tgt_mseg <- valueAsSegmentOff mem (s^.boundValue ip_reg) | Just tgt_mseg <- valueAsSegmentOff mem (s^.boundValue ip_reg)
, tgt_mseg `Set.member` pctxKnownFnEntries ctx -> do , tgt_mseg `Set.member` pctxKnownFnEntries ctx -> do
(,idx+1) <$> finishWithTailCall absProcState' finishWithTailCall absProcState'
-- Block that ends with some unknown -- Block that ends with some unknown
| otherwise -> do | otherwise -> do
@ -1009,7 +982,7 @@ parseFetchAndExecute ctx idx stmts regs s = do
where finishWithTailCall :: RegisterInfo (ArchReg arch) where finishWithTailCall :: RegisterInfo (ArchReg arch)
=> AbsProcessorState (ArchReg arch) ids => AbsProcessorState (ArchReg arch) ids
-> State (ParseState arch ids) (StatementList arch ids) -> State (ParseState arch ids) (StatementList arch ids, Word64)
finishWithTailCall absProcState' = do finishWithTailCall absProcState' = do
let mem = pctxMemory ctx let mem = pctxMemory ctx
mapM_ (recordWriteStmt (pctxArchInfo ctx) mem absProcState') stmts mapM_ (recordWriteStmt (pctxArchInfo ctx) mem absProcState') stmts
@ -1019,14 +992,15 @@ parseFetchAndExecute ctx idx stmts regs s = do
seq abst $ do seq abst $ do
-- Look for new instruction pointers -- Look for new instruction pointers
let addrs = concretizeAbsCodePointers mem (abst^.absRegState^.curIP) addNewFunctionAddrs $
newFunctionAddrs %= (++ addrs) identifyConcreteAddresses mem (abst^.absRegState^.curIP)
pure StatementList { stmtsIdent = idx let ret = StatementList { stmtsIdent = idx
, stmtsNonterm = stmts , stmtsNonterm = stmts
, stmtsTerm = ParsedCall s Nothing , stmtsTerm = ParsedCall s Nothing
, stmtsAbsState = absProcState' , stmtsAbsState = absProcState'
} }
seq ret $ pure (ret,idx+1)
-- | this evalutes the statements in a block to expand the information known -- | this evalutes the statements in a block to expand the information known
-- about control flow targets of this block. -- about control flow targets of this block.
@ -1410,16 +1384,22 @@ data DiscoveryOptions
, exploreCodeAddrInMem :: !Bool , exploreCodeAddrInMem :: !Bool
-- ^ If @True@, 'completeDiscoveryState' will -- ^ If @True@, 'completeDiscoveryState' will
-- explore all potential code addresses in -- explore all potential code addresses in
-- memory after exploring other potnetial -- memory after exploring other potential
-- functions. -- functions.
--
-- This is effectively a hack that sometimes
-- allows discovering functions. If you need
-- it, let the author's of Macaw know so that
-- we can find a more principled way.
, logAtAnalyzeFunction :: !Bool , logAtAnalyzeFunction :: !Bool
-- ^ Print a message each time we apply -- ^ Print a message each time we apply
-- discovery analysis to a new function. -- discovery analysis to a new function.
, logAtAnalyzeBlock :: !Bool , logAtAnalyzeBlock :: !Bool
-- ^ Print a message each time we analyze a -- ^ Print a message each time we analyze a
-- block within a function. -- block within a function.
} }
-- | Some default options
defaultDiscoveryOptions :: DiscoveryOptions defaultDiscoveryOptions :: DiscoveryOptions
defaultDiscoveryOptions = defaultDiscoveryOptions =
DiscoveryOptions { exploreFunctionSymbols = True DiscoveryOptions { exploreFunctionSymbols = True
@ -1504,6 +1484,7 @@ completeDiscoveryState initState disOpt funPred = do
postPhase1Discovery <- resolveFuns analyzeFn analyzeBlock postSymState postPhase1Discovery <- resolveFuns analyzeFn analyzeBlock postSymState
-- Discovery functions from memory -- Discovery functions from memory
if exploreCodeAddrInMem disOpt then do if exploreCodeAddrInMem disOpt then do
-- Execute hack of just searching for pointers in memory.
let mem_contents = withArchConstraints ainfo $ memAsAddrPairs mem LittleEndian let mem_contents = withArchConstraints ainfo $ memAsAddrPairs mem LittleEndian
resolveFuns analyzeFn analyzeBlock $ postPhase1Discovery & exploreMemPointers mem_contents resolveFuns analyzeFn analyzeBlock $ postPhase1Discovery & exploreMemPointers mem_contents
else else

View File

@ -18,6 +18,7 @@ some value while regions define a unknown offset in memory.
{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE RankNTypes #-} {-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
module Data.Macaw.Memory module Data.Macaw.Memory
( Memory ( Memory
@ -113,6 +114,7 @@ module Data.Macaw.Memory
, addrContentsAfter , addrContentsAfter
, readByteString , readByteString
, readAddr , readAddr
, readSegmentOff
, readWord8 , readWord8
, readWord16be , readWord16be
, readWord16le , readWord16le
@ -133,7 +135,6 @@ module Data.Macaw.Memory
, AddrSymMap , AddrSymMap
) where ) where
import Control.Exception (assert)
import Control.Monad import Control.Monad
import Data.BinarySymbols import Data.BinarySymbols
import Data.Bits import Data.Bits
@ -165,6 +166,8 @@ data AddrWidthRepr w
| (w ~ 64) => Addr64 | (w ~ 64) => Addr64
-- ^ A 64-bit address -- ^ A 64-bit address
deriving instance Show (AddrWidthRepr w)
instance TestEquality AddrWidthRepr where instance TestEquality AddrWidthRepr where
testEquality Addr32 Addr32 = Just Refl testEquality Addr32 Addr32 = Just Refl
testEquality Addr64 Addr64 = Just Refl testEquality Addr64 Addr64 = Just Refl
@ -294,8 +297,8 @@ class (1 <= w) => MemWidth w where
-- The argument is ignored. -- The argument is ignored.
addrWidthRepr :: p w -> AddrWidthRepr w addrWidthRepr :: p w -> AddrWidthRepr w
-- | @addrWidthMod w@ returns @2^(8 * addrSize w - 1)@. -- | @addrWidthMask w@ returns @2^(8 * addrSize w) - 1@.
addrWidthMod :: p w -> Word64 addrWidthMask :: p w -> Word64
-- | Returns number of bytes in addr. -- | Returns number of bytes in addr.
-- --
@ -322,7 +325,7 @@ addrBitSize w = 8 * addrSize w
-- | Convert word64 @x@ into mem word @x mod 2^w-1@. -- | Convert word64 @x@ into mem word @x mod 2^w-1@.
memWord :: forall w . MemWidth w => Word64 -> MemWord w memWord :: forall w . MemWidth w => Word64 -> MemWord w
memWord x = MemWord (x .&. addrWidthMod p) memWord x = MemWord (x .&. addrWidthMask p)
where p :: Proxy w where p :: Proxy w
p = Proxy p = Proxy
@ -364,11 +367,11 @@ instance MemWidth w => Integral (MemWord w) where
instance MemWidth w => Bounded (MemWord w) where instance MemWidth w => Bounded (MemWord w) where
minBound = 0 minBound = 0
maxBound = MemWord (addrWidthMod (Proxy :: Proxy w)) maxBound = MemWord (addrWidthMask (Proxy :: Proxy w))
instance MemWidth 32 where instance MemWidth 32 where
addrWidthRepr _ = Addr32 addrWidthRepr _ = Addr32
addrWidthMod _ = 0xffffffff addrWidthMask _ = 0xffffffff
addrRotate (MemWord w) i = addrRotate (MemWord w) i =
MemWord (fromIntegral ((fromIntegral w :: Word32) `rotate` i)) MemWord (fromIntegral ((fromIntegral w :: Word32) `rotate` i))
addrSize _ = 4 addrSize _ = 4
@ -378,7 +381,7 @@ instance MemWidth 32 where
instance MemWidth 64 where instance MemWidth 64 where
addrWidthRepr _ = Addr64 addrWidthRepr _ = Addr64
addrWidthMod _ = 0xffffffffffffffff addrWidthMask _ = 0xffffffffffffffff
addrRotate (MemWord w) i = MemWord (w `rotate` i) addrRotate (MemWord w) i = MemWord (w `rotate` i)
addrSize _ = 8 addrSize _ = 8
addrRead e s addrRead e s
@ -976,17 +979,23 @@ memAsAddrPairs :: Memory w
-> [(MemSegmentOff w, MemSegmentOff w)] -> [(MemSegmentOff w, MemSegmentOff w)]
memAsAddrPairs mem end = addrWidthClass (memAddrWidth mem) $ do memAsAddrPairs mem end = addrWidthClass (memAddrWidth mem) $ do
seg <- memSegments mem seg <- memSegments mem
(contents_offset,r) <- contentsRanges (segmentContents seg) (contentsOffset,r) <- contentsRanges (segmentContents seg)
let sz = addrSize mem let sz :: Int
sz = addrSize mem
case r of case r of
ByteRegion bs -> assert (BS.length bs `rem` fromIntegral sz == 0) $ do ByteRegion bs -> do
(off,w) <- -- contentsOffset
zip [contents_offset..] -- Check offset if a multiple
(regularChunks (fromIntegral sz) bs) let mask = sz - 1
when (BS.length bs .&. mask /= 0) $
error "Unexpected offset."
(byteOff,w) <-
zip [contentsOffset,contentsOffset+fromIntegral sz..]
(regularChunks sz bs)
let Just val = addrRead end w let Just val = addrRead end w
case resolveAbsoluteAddr mem val of case resolveAbsoluteAddr mem val of
Just val_ref -> do Just val_ref -> do
pure (MemSegmentOff seg off, val_ref) pure (MemSegmentOff seg byteOff, val_ref)
_ -> [] _ -> []
RelocationRegion{} -> [] RelocationRegion{} -> []
BSSRegion{} -> [] BSSRegion{} -> []
@ -1344,6 +1353,23 @@ readAddr mem end addr = addrWidthClass (memAddrWidth mem) $ do
Just val -> Right $ MemAddr 0 val Just val -> Right $ MemAddr 0 val
Nothing -> error $ "readAddr internal error: readByteString result too short." Nothing -> error $ "readAddr internal error: readByteString result too short."
-- | Read the given address as a reference to a memory segment offset, or report a
-- memory read error.
readSegmentOff :: Memory w
-> Endianness
-> MemAddr w
-> Either (MemoryError w) (MemSegmentOff w)
readSegmentOff mem end addr = addrWidthClass (memAddrWidth mem) $ do
let sz = fromIntegral (addrSize addr)
bs <- readByteString mem addr sz
case addrRead end bs of
Just val -> do
let addrInMem = MemAddr 0 val
case asSegmentOff mem addrInMem of
Just res -> pure res
Nothing -> Left (InvalidAddr addrInMem)
Nothing -> error $ "readSegmentOff internal error: readByteString result too short."
-- | Read a single byte. -- | Read a single byte.
readWord8 :: Memory w -> MemAddr w -> Either (MemoryError w) Word8 readWord8 :: Memory w -> MemAddr w -> Either (MemoryError w) Word8
readWord8 mem addr = bsWord8 <$> readByteString mem addr 1 readWord8 mem addr = bsWord8 <$> readByteString mem addr 1