mirror of
https://github.com/GaloisInc/macaw.git
synced 2024-12-29 00:59:09 +03:00
Bug fixes to code discovery; introduce JumpTableLayout.
This fixes bugs in scanning addresses in memory, and failing to check the executable status of function entry points.
This commit is contained in:
parent
e4a27d7bbc
commit
036f39cbb4
@ -4,25 +4,16 @@ Maintainer : Joe Hendrix <jhendrix@galois.com>, Simon Winwood <sjw@galois.
|
|||||||
|
|
||||||
This provides information about code discovered in binaries.
|
This provides information about code discovered in binaries.
|
||||||
-}
|
-}
|
||||||
{-# LANGUAGE ConstraintKinds #-}
|
|
||||||
{-# LANGUAGE CPP #-}
|
{-# LANGUAGE CPP #-}
|
||||||
{-# LANGUAGE DataKinds #-}
|
|
||||||
{-# LANGUAGE DoAndIfThenElse #-}
|
|
||||||
{-# LANGUAGE FlexibleContexts #-}
|
{-# LANGUAGE FlexibleContexts #-}
|
||||||
{-# LANGUAGE FlexibleInstances #-}
|
|
||||||
{-# LANGUAGE GADTs #-}
|
{-# LANGUAGE GADTs #-}
|
||||||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||||
{-# LANGUAGE KindSignatures #-}
|
|
||||||
{-# LANGUAGE MultiParamTypeClasses #-}
|
{-# LANGUAGE MultiParamTypeClasses #-}
|
||||||
{-# LANGUAGE PatternGuards #-}
|
{-# LANGUAGE PatternGuards #-}
|
||||||
{-# LANGUAGE RankNTypes #-}
|
|
||||||
{-# LANGUAGE TypeOperators #-}
|
|
||||||
{-# LANGUAGE ViewPatterns #-}
|
|
||||||
{-# LANGUAGE ScopedTypeVariables #-}
|
{-# LANGUAGE ScopedTypeVariables #-}
|
||||||
{-# LANGUAGE StandaloneDeriving #-}
|
{-# LANGUAGE StandaloneDeriving #-}
|
||||||
{-# LANGUAGE TupleSections #-}
|
|
||||||
{-# LANGUAGE TypeApplications #-}
|
{-# LANGUAGE TypeApplications #-}
|
||||||
{-# LANGUAGE TypeFamilies #-}
|
{-# LANGUAGE UndecidableInstances #-}
|
||||||
module Data.Macaw.Discovery
|
module Data.Macaw.Discovery
|
||||||
( -- * DiscoveryInfo
|
( -- * DiscoveryInfo
|
||||||
State.DiscoveryState(..)
|
State.DiscoveryState(..)
|
||||||
@ -104,27 +95,20 @@ import Data.Macaw.Types
|
|||||||
------------------------------------------------------------------------
|
------------------------------------------------------------------------
|
||||||
-- Utilities
|
-- Utilities
|
||||||
|
|
||||||
|
isExecutableSegOff :: MemSegmentOff w -> Bool
|
||||||
|
isExecutableSegOff sa =
|
||||||
|
segmentFlags (msegSegment sa) `Perm.hasPerm` Perm.execute
|
||||||
|
|
||||||
-- | Get code pointers out of a abstract value.
|
-- | Get code pointers out of a abstract value.
|
||||||
concretizeAbsCodePointers :: MemWidth w
|
identifyConcreteAddresses :: MemWidth w
|
||||||
=> Memory w
|
=> Memory w
|
||||||
-> AbsValue w (BVType w)
|
-> AbsValue w (BVType w)
|
||||||
-> [MemSegmentOff w]
|
-> [MemSegmentOff w]
|
||||||
concretizeAbsCodePointers mem (FinSet s) =
|
identifyConcreteAddresses mem (FinSet s) =
|
||||||
[ sa
|
mapMaybe (resolveAbsoluteAddr mem . fromInteger) (Set.toList s)
|
||||||
| a <- Set.toList s
|
identifyConcreteAddresses _ (CodePointers s _) = Set.toList s
|
||||||
, sa <- maybeToList (resolveAbsoluteAddr mem (fromInteger a))
|
identifyConcreteAddresses _mem StridedInterval{} = []
|
||||||
, segmentFlags (msegSegment sa) `Perm.hasPerm` Perm.execute
|
identifyConcreteAddresses _mem _ = []
|
||||||
]
|
|
||||||
concretizeAbsCodePointers _ (CodePointers s _) =
|
|
||||||
[ sa
|
|
||||||
| sa <- Set.toList s
|
|
||||||
, segmentFlags (msegSegment sa) `Perm.hasPerm` Perm.execute
|
|
||||||
]
|
|
||||||
-- FIXME: this is dangerous !!
|
|
||||||
concretizeAbsCodePointers _mem StridedInterval{} = [] -- FIXME: this case doesn't make sense
|
|
||||||
-- debug DCFG ("I think these are code pointers!: " ++ show s) $ []
|
|
||||||
-- filter (isCodeAddr mem) $ fromInteger <$> SI.toList s
|
|
||||||
concretizeAbsCodePointers _mem _ = []
|
|
||||||
|
|
||||||
{-
|
{-
|
||||||
-- | Return true if this address was added because of the contents of a global address
|
-- | Return true if this address was added because of the contents of a global address
|
||||||
@ -264,14 +248,29 @@ dropUnusedCodeInParsedBlock ainfo b =
|
|||||||
------------------------------------------------------------------------
|
------------------------------------------------------------------------
|
||||||
-- Memory utilities
|
-- Memory utilities
|
||||||
|
|
||||||
-- | Return true if range is entirely contained within a single read only segment.Q
|
sliceMemContents'
|
||||||
rangeInReadonlySegment :: MemWidth w
|
:: MemWidth w
|
||||||
=> MemSegmentOff w -- ^ Start of range
|
=> Int -- ^ Number of bytes in each slice.
|
||||||
-> MemWord w -- ^ The size of the range
|
-> [[SegmentRange w]] -- ^ Previous slices
|
||||||
-> Bool
|
-> Integer -- ^ Number of slices to return
|
||||||
rangeInReadonlySegment mseg size =
|
-> [SegmentRange w] -- ^ Ranges to process next
|
||||||
size <= segmentSize (msegSegment mseg) - msegOffset mseg
|
-> Either (DropError w) ([[SegmentRange w]],[SegmentRange w])
|
||||||
&& Perm.isReadonly (segmentFlags (msegSegment mseg))
|
sliceMemContents' stride prev c next
|
||||||
|
| c <= 0 = pure (reverse prev, next)
|
||||||
|
| otherwise =
|
||||||
|
case splitSegmentRangeList next stride of
|
||||||
|
Left e -> Left e
|
||||||
|
Right (this, rest) -> sliceMemContents' stride (this:prev) (c-1) rest
|
||||||
|
|
||||||
|
-- | `sliceMemContents stride cnt contents` splits contents up into `cnt`
|
||||||
|
-- memory regions each with size `stride`.
|
||||||
|
sliceMemContents
|
||||||
|
:: MemWidth w
|
||||||
|
=> Int -- ^ Number of bytes in each slice.
|
||||||
|
-> Integer -- ^ Number of slices to return
|
||||||
|
-> [SegmentRange w] -- ^ Ranges to process next
|
||||||
|
-> Either (DropError w) ([[SegmentRange w]],[SegmentRange w])
|
||||||
|
sliceMemContents stride c next = sliceMemContents' stride [] c next
|
||||||
|
|
||||||
------------------------------------------------------------------------
|
------------------------------------------------------------------------
|
||||||
-- DiscoveryState utilities
|
-- DiscoveryState utilities
|
||||||
@ -287,6 +286,8 @@ markAddrAsFunction :: FunctionExploreReason (ArchAddrWidth arch)
|
|||||||
markAddrAsFunction rsn addr s
|
markAddrAsFunction rsn addr s
|
||||||
-- Do nothing if function is already explored.
|
-- Do nothing if function is already explored.
|
||||||
| Map.member addr (s^.funInfo) || Map.member addr (s^.unexploredFunctions) = s
|
| Map.member addr (s^.funInfo) || Map.member addr (s^.unexploredFunctions) = s
|
||||||
|
-- Ignore if address is not in an executable segment.
|
||||||
|
| isExecutableSegOff addr = s
|
||||||
| otherwise = addrWidthClass (memAddrWidth (memory s)) $
|
| otherwise = addrWidthClass (memAddrWidth (memory s)) $
|
||||||
-- We check that the function address ignores bytes so that we do
|
-- We check that the function address ignores bytes so that we do
|
||||||
-- not start disassembling at a relocation or BSS region.
|
-- not start disassembling at a relocation or BSS region.
|
||||||
@ -440,63 +441,39 @@ mergeIntraJump src ab tgt = do
|
|||||||
foundAddrs %= Map.insert tgt found_info
|
foundAddrs %= Map.insert tgt found_info
|
||||||
|
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
-- Jump table bounds
|
-- BoundedMemArray
|
||||||
|
|
||||||
-- | A memory read that looks like array indexing. It read 'arSize' bytes from
|
-- | This describes a region of memory dereferenced in some array read.
|
||||||
|
--
|
||||||
|
-- These regions may be be sparse, given an index `i`, the
|
||||||
-- the address given by 'arBase' + 'arIx'*'arStride'.
|
-- the address given by 'arBase' + 'arIx'*'arStride'.
|
||||||
data ArrayRead arch ids w = ArrayRead
|
data BoundedMemArray arch tp = BoundedMemArray
|
||||||
{ arBase :: ArchSegmentOff arch
|
{ arBase :: !(MemSegmentOff (ArchAddrWidth arch))
|
||||||
, arIx :: ArchAddrValue arch ids
|
-- ^ The base address for array accesses.
|
||||||
, arStride :: Integer
|
, arStride :: !Integer
|
||||||
, arSize :: MemRepr (BVType w)
|
-- ^ Space between elements of the array.
|
||||||
-- ^ Type of element in this array.
|
--
|
||||||
|
-- This will typically be the number of bytes denoted by `arEltType`,
|
||||||
|
-- but may be larger for sparse arrays. `matchBoundedMemArray` will fail
|
||||||
|
-- if stride is less than the number of bytes read.
|
||||||
|
, arEltType :: !(MemRepr tp)
|
||||||
|
-- ^ Resolved type of elements in this array.
|
||||||
|
, arSlices :: !(V.Vector [SegmentRange (ArchAddrWidth arch)])
|
||||||
|
-- ^ The slices of memory in the array.
|
||||||
|
--
|
||||||
|
-- The `i`th element in the vector corresponds to the first `size`
|
||||||
|
-- bytes at address `base + stride * i`.
|
||||||
|
--
|
||||||
|
-- This could be computed from the previous fields, but we check we
|
||||||
|
-- can create it when creating the array read, so we store it to
|
||||||
|
-- avoid recomputing it.
|
||||||
}
|
}
|
||||||
|
|
||||||
deriving instance RegisterInfo (ArchReg arch) => Show (ArrayRead arch ids w)
|
deriving instance RegisterInfo (ArchReg arch) => Show (BoundedMemArray arch tp)
|
||||||
|
|
||||||
-- | Return true if the address stored is readable and not writable.
|
-- | Return true if the address stored is readable and not writable.
|
||||||
isReadOnlyArrayRead :: ArrayRead arch ids w -> Bool
|
isReadOnlyBoundedMemArray :: BoundedMemArray arch tp -> Bool
|
||||||
isReadOnlyArrayRead = Perm.isReadonly . segmentFlags . msegSegment . arBase
|
isReadOnlyBoundedMemArray = Perm.isReadonly . segmentFlags . msegSegment . arBase
|
||||||
|
|
||||||
-- | Number of bytes of size.
|
|
||||||
arSizeBytes :: ArrayRead arch ids w -> Integer
|
|
||||||
arSizeBytes = memReprBytes . arSize
|
|
||||||
|
|
||||||
------------------------------------------------------------------------
|
|
||||||
-- Extension
|
|
||||||
|
|
||||||
-- | Used to denote how a value should be extended to a full address.
|
|
||||||
data Extension = Signed | Unsigned
|
|
||||||
deriving (Bounded, Enum, Eq, Ord, Read, Show)
|
|
||||||
|
|
||||||
-- | `extendDyn w ext v` treats `v` as a `w`-bit number and returns the underlying
|
|
||||||
extendDyn :: (1 <= w, Integral x) => NatRepr w -> Extension -> x ->Integer
|
|
||||||
extendDyn _ Unsigned = toInteger
|
|
||||||
extendDyn w Signed = toSigned w . toInteger
|
|
||||||
|
|
||||||
------------------------------------------------------------------------
|
|
||||||
-- JumpTable
|
|
||||||
|
|
||||||
|
|
||||||
-- Beware: on some architectures, after reading from the jump table, the
|
|
||||||
-- resulting addresses must be aligned. See the IPAlignment class.
|
|
||||||
data JumpTable arch ids
|
|
||||||
= AbsoluteJumpTable (ArrayRead arch ids (ArchAddrWidth arch))
|
|
||||||
-- | `RelativeJumpTable base read ext` describes information about a jump table read.
|
|
||||||
--
|
|
||||||
-- The value is computed as `baseVal + readVal` where
|
|
||||||
--
|
|
||||||
-- `baseVal = fromMaybe 0 base`, `readVal` is the value stored at the memory
|
|
||||||
-- read described by `read` with the sign of `ext`.
|
|
||||||
| forall w . RelativeJumpTable (ArchSegmentOff arch) (ArrayRead arch ids w) Extension
|
|
||||||
|
|
||||||
deriving instance RegisterInfo (ArchReg arch) => Show (JumpTable arch ids)
|
|
||||||
|
|
||||||
-- | The array read done when computing the jump table. N.B. other processing
|
|
||||||
-- may be needed on the value read in this way to know the address to jump to.
|
|
||||||
jumpTableRead :: JumpTable arch ids -> Some (ArrayRead arch ids)
|
|
||||||
jumpTableRead (AbsoluteJumpTable r) = Some r
|
|
||||||
jumpTableRead (RelativeJumpTable _ r _) = Some r
|
|
||||||
|
|
||||||
absValueAsSegmentOff
|
absValueAsSegmentOff
|
||||||
:: forall w
|
:: forall w
|
||||||
@ -529,14 +506,14 @@ valueAsSegmentOffWithTransfer mem aps base
|
|||||||
= valueAsSegmentOff mem base
|
= valueAsSegmentOff mem base
|
||||||
<|> absValueAsSegmentOff mem (transferValue aps base)
|
<|> absValueAsSegmentOff mem (transferValue aps base)
|
||||||
|
|
||||||
-- | This interprets a value as a memory segment offset plus value.
|
-- | This attempts to pattern match a value as a memory address plus a value.
|
||||||
valueAsArrayOffset
|
valueAsMemOffset
|
||||||
:: RegisterInfo (ArchReg arch)
|
:: RegisterInfo (ArchReg arch)
|
||||||
=> Memory (ArchAddrWidth arch)
|
=> Memory (ArchAddrWidth arch)
|
||||||
-> AbsProcessorState (ArchReg arch) ids
|
-> AbsProcessorState (ArchReg arch) ids
|
||||||
-> ArchAddrValue arch ids
|
-> ArchAddrValue arch ids
|
||||||
-> Maybe (ArchSegmentOff arch, ArchAddrValue arch ids)
|
-> Maybe (ArchSegmentOff arch, ArchAddrValue arch ids)
|
||||||
valueAsArrayOffset mem aps v
|
valueAsMemOffset mem aps v
|
||||||
| Just (BVAdd _ base offset) <- valueAsApp v
|
| Just (BVAdd _ base offset) <- valueAsApp v
|
||||||
, Just ptr <- valueAsSegmentOffWithTransfer mem aps base
|
, Just ptr <- valueAsSegmentOffWithTransfer mem aps base
|
||||||
= Just (ptr, offset)
|
= Just (ptr, offset)
|
||||||
@ -548,110 +525,203 @@ valueAsArrayOffset mem aps v
|
|||||||
|
|
||||||
| otherwise = Nothing
|
| otherwise = Nothing
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
-- | See if the value can be interpreted as a read of memory
|
-- | See if the value can be interpreted as a read of memory
|
||||||
matchArrayRead
|
matchBoundedMemArray
|
||||||
:: (MemWidth (ArchAddrWidth arch), RegisterInfo (ArchReg arch))
|
:: (MemWidth (ArchAddrWidth arch), RegisterInfo (ArchReg arch))
|
||||||
=> Memory (ArchAddrWidth arch)
|
=> Memory (ArchAddrWidth arch)
|
||||||
-> AbsProcessorState (ArchReg arch) ids
|
-> AbsProcessorState (ArchReg arch) ids
|
||||||
-> BVValue arch ids w
|
-> BVValue arch ids w
|
||||||
-> Maybe (ArrayRead arch ids w)
|
-> Maybe (BoundedMemArray arch (BVType w), ArchAddrValue arch ids)
|
||||||
matchArrayRead mem aps val
|
matchBoundedMemArray mem aps val
|
||||||
|
| Just (ReadMem addr tp) <- valueAsRhs val
|
||||||
| Just (ReadMem addr size) <- valueAsRhs val
|
, Just (base, offset) <- valueAsMemOffset mem aps addr
|
||||||
, Just (base, offset) <- valueAsArrayOffset mem aps addr
|
|
||||||
, Just (stride, ixVal) <- valueAsStaticMultiplication offset
|
, Just (stride, ixVal) <- valueAsStaticMultiplication offset
|
||||||
, memReprBytes size <= stride
|
-- Check stride covers at least number of bytes read.
|
||||||
= Just $ ArrayRead
|
, memReprBytes tp <= stride
|
||||||
{ arBase = base
|
-- Resolve a static upper bound to array.
|
||||||
, arIx = ixVal
|
, Right (Jmp.IntegerUpperBound bnd)
|
||||||
, arStride = stride
|
<- Jmp.unsignedUpperBound (aps^.indexBounds) ixVal
|
||||||
, arSize = size
|
, cnt <- bnd+1
|
||||||
}
|
-- Check array actually fits in memory.
|
||||||
|
, msegByteCountAfter base < cnt * toInteger stride
|
||||||
|
-- Get memory contents after base
|
||||||
|
, Right contents <- contentsAfterSegmentOff base
|
||||||
|
-- Break up contents into a list of slices each with size stide
|
||||||
|
, Right (strideSlices,_) <- sliceMemContents (fromInteger stride) cnt contents
|
||||||
|
-- Take the given number of bytes out of each slices
|
||||||
|
, Right slices <- traverse (\s -> fst <$> splitSegmentRangeList s (fromInteger (memReprBytes tp)))
|
||||||
|
(V.fromList strideSlices)
|
||||||
|
= let r = BoundedMemArray
|
||||||
|
{ arBase = base
|
||||||
|
, arStride = stride
|
||||||
|
, arEltType = tp
|
||||||
|
, arSlices = slices
|
||||||
|
}
|
||||||
|
in Just (r, ixVal)
|
||||||
|
|
||||||
| otherwise = Nothing
|
| otherwise = Nothing
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- Extension
|
||||||
|
|
||||||
|
-- | Information about a value that is the signed or unsigned extension of another
|
||||||
|
-- value.
|
||||||
|
--
|
||||||
|
-- This is used for jump tables, and only supports widths that are in memory
|
||||||
|
data Extension w = Extension { _extIsSigned :: !Bool
|
||||||
|
, _extWidth :: !(AddrWidthRepr w)
|
||||||
|
-- ^ Width of argument. is to.
|
||||||
|
}
|
||||||
|
deriving (Show)
|
||||||
|
|
||||||
-- | Just like Some (BVValue arch ids), but doesn't run into trouble with
|
-- | Just like Some (BVValue arch ids), but doesn't run into trouble with
|
||||||
-- partially applying the BVValue type synonym.
|
-- partially applying the BVValue type synonym.
|
||||||
data SomeBVValue arch ids = forall tp. SomeBVValue (BVValue arch ids tp)
|
data SomeExt arch ids = forall m . SomeExt !(BVValue arch ids m) !(Extension m)
|
||||||
|
|
||||||
-- | Identify how value is extended.
|
matchAddr :: NatRepr w -> Maybe (AddrWidthRepr w)
|
||||||
matchExtension :: ArchAddrValue arch ids
|
matchAddr w
|
||||||
-> (Extension, SomeBVValue arch ids)
|
| Just Refl <- testEquality w n32 = Just Addr32
|
||||||
matchExtension offset =
|
| Just Refl <- testEquality w n64 = Just Addr64
|
||||||
case valueAsApp offset of
|
| otherwise = Nothing
|
||||||
Just (SExt val' _) -> (Signed, SomeBVValue val')
|
|
||||||
Just (UExt val' _) -> (Unsigned, SomeBVValue val')
|
|
||||||
_ -> (Unsigned, SomeBVValue offset)
|
|
||||||
|
|
||||||
-- | Figure out if this is a jump table.
|
-- | `matchExtension x` matches in `x` has the form `(uext y w)` or `(sext y w)` and returns
|
||||||
matchJumpTable :: ( IPAlignment arch
|
-- a description about the extension as well as the pattern `y`.
|
||||||
, MemWidth (ArchAddrWidth arch)
|
matchExtension :: forall arch ids
|
||||||
, RegisterInfo (ArchReg arch)
|
. ( MemWidth (ArchAddrWidth arch)
|
||||||
)
|
, HasRepr (ArchReg arch) TypeRepr)
|
||||||
=> Memory (ArchAddrWidth arch)
|
=> ArchAddrValue arch ids
|
||||||
-> AbsProcessorState (ArchReg arch) ids
|
-> SomeExt arch ids
|
||||||
-> ArchAddrValue arch ids -- ^ Value that's assigned to the IP.
|
matchExtension val =
|
||||||
-> Maybe (JumpTable arch ids)
|
case valueAsApp val of
|
||||||
matchJumpTable mem aps ip
|
Just (SExt val' _w) | Just repr <- matchAddr (typeWidth val') -> SomeExt val' (Extension True repr)
|
||||||
|
Just (UExt val' _w) | Just repr <- matchAddr (typeWidth val') -> SomeExt val' (Extension False repr)
|
||||||
|
_ -> SomeExt val (Extension False (addrWidthRepr @(ArchAddrWidth arch) undefined))
|
||||||
|
|
||||||
|
-- | `extendDyn ext end bs` parses the bytestring using the extension
|
||||||
|
-- and endianness information, and returns the extended value.
|
||||||
|
extendDyn :: Extension w -> Endianness -> BS.ByteString -> Integer
|
||||||
|
extendDyn (Extension True Addr32) end bs = toInteger (bsWord32 end bs)
|
||||||
|
extendDyn (Extension True Addr64) end bs = toInteger (bsWord64 end bs)
|
||||||
|
extendDyn (Extension False Addr32) end bs = toSigned n32 (toInteger (bsWord32 end bs))
|
||||||
|
extendDyn (Extension False Addr64) end bs = toSigned n64 (toInteger (bsWord64 end bs))
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- JumpTableLayout
|
||||||
|
|
||||||
|
-- | This describes the layout of a jump table.
|
||||||
|
-- Beware: on some architectures, after reading from the jump table, the
|
||||||
|
-- resulting addresses must be aligned. See the IPAlignment class.
|
||||||
|
data JumpTableLayout arch
|
||||||
|
= AbsoluteJumpTable !(BoundedMemArray arch (BVType (ArchAddrWidth arch)))
|
||||||
|
-- ^ `AbsoluteJumpTable r` describes a jump table where the jump
|
||||||
|
-- target is directly stored in the array read `r`.
|
||||||
|
| forall w . RelativeJumpTable !(ArchSegmentOff arch)
|
||||||
|
!(BoundedMemArray arch (BVType w))
|
||||||
|
!(Extension w)
|
||||||
|
-- ^ `RelativeJumpTable base read ext` describes information about a
|
||||||
|
-- jump table where all jump targets are relative to a fixed base
|
||||||
|
-- address.
|
||||||
|
--
|
||||||
|
-- The value is computed as `baseVal + readVal` where
|
||||||
|
--
|
||||||
|
-- `baseVal = fromMaybe 0 base`, `readVal` is the value stored at
|
||||||
|
-- the memory read described by `read` with the sign of `ext`.
|
||||||
|
|
||||||
|
deriving instance RegisterInfo (ArchReg arch) => Show (JumpTableLayout arch)
|
||||||
|
|
||||||
|
-- This function resolves jump table entries.
|
||||||
|
-- It is a recursive function that has an index into the jump table.
|
||||||
|
-- If the current index can be interpreted as a intra-procedural jump,
|
||||||
|
-- then it will add that to the current procedure.
|
||||||
|
-- This returns the last address read.
|
||||||
|
resolveAsAbsoluteAddr :: forall w
|
||||||
|
. Memory w
|
||||||
|
-> Endianness
|
||||||
|
-> [SegmentRange w]
|
||||||
|
-> Maybe (MemAddr w)
|
||||||
|
resolveAsAbsoluteAddr mem endianness l = addrWidthClass (memAddrWidth mem) $
|
||||||
|
case l of
|
||||||
|
[ByteRegion bs] -> do
|
||||||
|
absoluteAddr <$> addrRead endianness bs
|
||||||
|
[RelocationRegion r] -> do
|
||||||
|
let off = relocationOffset r
|
||||||
|
when (relocationIsRel r) $ Nothing
|
||||||
|
case relocationSym r of
|
||||||
|
SymbolRelocation{} -> Nothing
|
||||||
|
SectionIdentifier idx -> do
|
||||||
|
addr <- Map.lookup idx (memSectionAddrMap mem)
|
||||||
|
pure $ relativeSegmentAddr addr & incAddr (toInteger off)
|
||||||
|
_ -> Nothing
|
||||||
|
|
||||||
|
-- This function resolves jump table entries.
|
||||||
|
-- It is a recursive function that has an index into the jump table.
|
||||||
|
-- If the current index can be interpreted as a intra-procedural jump,
|
||||||
|
-- then it will add that to the current procedure.
|
||||||
|
-- This returns the last address read.
|
||||||
|
resolveRelativeJumps :: forall arch w
|
||||||
|
. ( MemWidth (ArchAddrWidth arch)
|
||||||
|
, IPAlignment arch
|
||||||
|
, RegisterInfo (ArchReg arch)
|
||||||
|
)
|
||||||
|
=> Memory (ArchAddrWidth arch)
|
||||||
|
-> ArchSegmentOff arch
|
||||||
|
-- -> MemRepr (BVType w)
|
||||||
|
-> BoundedMemArray arch (BVType w)
|
||||||
|
-> Extension w
|
||||||
|
-> Maybe (V.Vector (ArchSegmentOff arch))
|
||||||
|
resolveRelativeJumps mem base arrayRead ext = do
|
||||||
|
let slices = arSlices arrayRead
|
||||||
|
BVMemRepr _sz endianness <- pure $ arEltType arrayRead
|
||||||
|
forM slices $ \l -> do
|
||||||
|
case l of
|
||||||
|
[ByteRegion bs]
|
||||||
|
| tgtAddr <- relativeSegmentAddr base
|
||||||
|
& incAddr (extendDyn ext endianness bs)
|
||||||
|
, Just tgt <- asSegmentOff mem (toIPAligned @arch tgtAddr)
|
||||||
|
, Perm.isExecutable (segmentFlags (msegSegment tgt))
|
||||||
|
-> Just tgt
|
||||||
|
_ -> Nothing
|
||||||
|
|
||||||
|
-- | Resolve an ip to a jump table.
|
||||||
|
matchJumpTableRef :: forall arch ids
|
||||||
|
. ( IPAlignment arch
|
||||||
|
, MemWidth (ArchAddrWidth arch)
|
||||||
|
, RegisterInfo (ArchReg arch)
|
||||||
|
)
|
||||||
|
=> Memory (ArchAddrWidth arch)
|
||||||
|
-> AbsProcessorState (ArchReg arch) ids
|
||||||
|
-> ArchAddrValue arch ids -- ^ Value that's assigned to the IP.
|
||||||
|
-> Maybe (JumpTableLayout arch, V.Vector (ArchSegmentOff arch), ArchAddrValue arch ids)
|
||||||
|
matchJumpTableRef mem aps ip
|
||||||
|
|
||||||
-- Turn a plain read address into base + offset.
|
-- Turn a plain read address into base + offset.
|
||||||
| Just arrayRead <- matchArrayRead mem aps ip
|
| Just (arrayRead,idx) <- matchBoundedMemArray mem aps ip
|
||||||
, isReadOnlyArrayRead arrayRead
|
, isReadOnlyBoundedMemArray arrayRead
|
||||||
= Just (AbsoluteJumpTable arrayRead)
|
, BVMemRepr _arByteCount endianness <- arEltType arrayRead = do
|
||||||
|
|
||||||
|
let go :: [SegmentRange (ArchAddrWidth arch)] -> Maybe (MemSegmentOff (ArchAddrWidth arch))
|
||||||
|
go contents = do
|
||||||
|
addr <- resolveAsAbsoluteAddr mem endianness contents
|
||||||
|
tgt <- asSegmentOff mem (toIPAligned @arch addr)
|
||||||
|
unless (Perm.isExecutable (segmentFlags (msegSegment tgt))) $ Nothing
|
||||||
|
pure tgt
|
||||||
|
tbl <- traverse go (arSlices arrayRead)
|
||||||
|
pure (AbsoluteJumpTable arrayRead, tbl, idx)
|
||||||
|
|
||||||
-- gcc-style PIC jump tables on x86 use, roughly,
|
-- gcc-style PIC jump tables on x86 use, roughly,
|
||||||
-- ip = jmptbl + jmptbl[index]
|
-- ip = jmptbl + jmptbl[index]
|
||||||
-- where jmptbl is a pointer to the lookup table.
|
-- where jmptbl is a pointer to the lookup table.
|
||||||
| Just unalignedIP <- fromIPAligned ip
|
| Just unalignedIP <- fromIPAligned ip
|
||||||
, Just (tgtBase, tgtOffset) <- valueAsArrayOffset mem aps unalignedIP
|
, Just (tgtBase, tgtOffset) <- valueAsMemOffset mem aps unalignedIP
|
||||||
, (ext, SomeBVValue shortOffset) <- matchExtension tgtOffset
|
, SomeExt shortOffset ext <- matchExtension tgtOffset
|
||||||
, Just arrayRead <- matchArrayRead mem aps shortOffset
|
, Just (arrayRead, idx) <- matchBoundedMemArray mem aps shortOffset
|
||||||
, isReadOnlyArrayRead arrayRead
|
, isReadOnlyBoundedMemArray arrayRead
|
||||||
= Just (RelativeJumpTable tgtBase arrayRead ext)
|
, Just tbl <- resolveRelativeJumps mem tgtBase arrayRead ext
|
||||||
|
= Just (RelativeJumpTable tgtBase arrayRead ext, tbl, idx)
|
||||||
|
|
||||||
| otherwise
|
| otherwise
|
||||||
= Nothing
|
= Nothing
|
||||||
|
|
||||||
-- | This describes why we could not infer the bounds of code that looked like it
|
|
||||||
-- was accessing a jump table.
|
|
||||||
data JumpTableBoundsError arch ids
|
|
||||||
= CouldNotInterpretAbsValue !(AbsValue (ArchAddrWidth arch) (BVType (ArchAddrWidth arch)))
|
|
||||||
| UpperBoundMismatch !(Jmp.UpperBound (BVType (ArchAddrWidth arch))) !Integer
|
|
||||||
| CouldNotFindBound String !(ArchAddrValue arch ids)
|
|
||||||
|
|
||||||
-- | Show the jump table bounds
|
|
||||||
showJumpTableBoundsError :: ArchConstraints arch => JumpTableBoundsError arch ids -> String
|
|
||||||
showJumpTableBoundsError err =
|
|
||||||
case err of
|
|
||||||
CouldNotInterpretAbsValue val ->
|
|
||||||
"Index <" ++ show val ++ "> is not a stride."
|
|
||||||
UpperBoundMismatch bnd index_range ->
|
|
||||||
"Upper bound mismatch at jumpbounds "
|
|
||||||
++ show bnd
|
|
||||||
++ " domain "
|
|
||||||
++ show index_range
|
|
||||||
CouldNotFindBound msg jump_index ->
|
|
||||||
show "Could not find jump table: " ++ msg ++ "\n"
|
|
||||||
++ show (ppValueAssignments jump_index)
|
|
||||||
|
|
||||||
-- | Returns the index bounds for a jump table of 'Nothing' if this is
|
|
||||||
-- not a block table.
|
|
||||||
getJumpTableBounds :: ArchConstraints a
|
|
||||||
=> AbsProcessorState (ArchReg a) ids -- ^ Current processor registers.
|
|
||||||
-> ArrayRead a ids w
|
|
||||||
-> Either String (ArchAddrWord a)
|
|
||||||
getJumpTableBounds regs arrayRead =
|
|
||||||
case Jmp.unsignedUpperBound (regs ^. indexBounds) (arIx arrayRead) of
|
|
||||||
Right (Jmp.IntegerUpperBound maxIx) ->
|
|
||||||
let arrayByteSize = maxIx * arStride arrayRead + arSizeBytes arrayRead in
|
|
||||||
if rangeInReadonlySegment (arBase arrayRead) (fromInteger arrayByteSize)
|
|
||||||
then Right $! fromInteger maxIx
|
|
||||||
else Left $ "Jump table range is not in readonly memory: "
|
|
||||||
++ show maxIx ++ " entries/" ++ show arrayByteSize ++ " bytes"
|
|
||||||
++ " starting at " ++ show (arBase arrayRead)
|
|
||||||
Left msg -> Left (showJumpTableBoundsError (CouldNotFindBound msg (arIx arrayRead)))
|
|
||||||
|
|
||||||
------------------------------------------------------------------------
|
------------------------------------------------------------------------
|
||||||
-- ParseState
|
-- ParseState
|
||||||
|
|
||||||
@ -663,6 +733,10 @@ data ParseState arch ids =
|
|||||||
, _intraJumpTargets ::
|
, _intraJumpTargets ::
|
||||||
![(ArchSegmentOff arch, AbsBlockState (ArchReg arch))]
|
![(ArchSegmentOff arch, AbsBlockState (ArchReg arch))]
|
||||||
, _newFunctionAddrs :: ![ArchSegmentOff arch]
|
, _newFunctionAddrs :: ![ArchSegmentOff arch]
|
||||||
|
-- ^ List of candidate functions found when parsing block.
|
||||||
|
--
|
||||||
|
-- Note. In a binary, these could denote the non-executable
|
||||||
|
-- segments, so they are filtered before traversing.
|
||||||
}
|
}
|
||||||
|
|
||||||
-- | Code addresses written to memory.
|
-- | Code addresses written to memory.
|
||||||
@ -688,11 +762,10 @@ recordWriteStmt arch_info mem regs stmt = do
|
|||||||
WriteMem _addr repr v
|
WriteMem _addr repr v
|
||||||
| Just Refl <- testEquality repr (addrMemRepr arch_info) -> do
|
| Just Refl <- testEquality repr (addrMemRepr arch_info) -> do
|
||||||
withArchConstraints arch_info $ do
|
withArchConstraints arch_info $ do
|
||||||
let addrs = concretizeAbsCodePointers mem (transferValue regs v)
|
let addrs = identifyConcreteAddresses mem (transferValue regs v)
|
||||||
writtenCodeAddrs %= (addrs ++)
|
writtenCodeAddrs %= (filter isExecutableSegOff addrs ++)
|
||||||
_ -> return ()
|
_ -> return ()
|
||||||
|
|
||||||
|
|
||||||
------------------------------------------------------------------------
|
------------------------------------------------------------------------
|
||||||
-- ParseContext
|
-- ParseContext
|
||||||
|
|
||||||
@ -721,126 +794,34 @@ addrMemRepr arch_info =
|
|||||||
|
|
||||||
identifyCallTargets :: forall arch ids
|
identifyCallTargets :: forall arch ids
|
||||||
. (RegisterInfo (ArchReg arch))
|
. (RegisterInfo (ArchReg arch))
|
||||||
=> AbsProcessorState (ArchReg arch) ids
|
=> Memory (ArchAddrWidth arch)
|
||||||
|
-> AbsBlockState (ArchReg arch)
|
||||||
-- ^ Abstract processor state just before call.
|
-- ^ Abstract processor state just before call.
|
||||||
-> BVValue arch ids (ArchAddrWidth arch)
|
-> RegState (ArchReg arch) (Value arch ids)
|
||||||
-> [ArchSegmentOff arch]
|
-> [ArchSegmentOff arch]
|
||||||
identifyCallTargets absState ip = do
|
identifyCallTargets mem absState s = do
|
||||||
-- Code pointers from abstract domains.
|
-- Code pointers from abstract domains.
|
||||||
let mem = absMem absState
|
let def = identifyConcreteAddresses mem (absState^.absRegState^.curIP)
|
||||||
let def = concretizeAbsCodePointers mem (transferValue absState ip)
|
case s^.boundValue ip_reg of
|
||||||
let segOffAddrs :: Maybe (ArchSegmentOff arch) -> [ArchSegmentOff arch]
|
BVValue _ x ->
|
||||||
segOffAddrs (Just addr)
|
maybeToList $ resolveAbsoluteAddr mem (fromInteger x)
|
||||||
| segmentFlags (msegSegment addr) `Perm.hasPerm` Perm.execute =
|
RelocatableValue _ a ->
|
||||||
[addr]
|
maybeToList $ asSegmentOff mem a
|
||||||
segOffAddrs _ = []
|
|
||||||
case ip of
|
|
||||||
BVValue _ x -> segOffAddrs $ resolveAbsoluteAddr mem (fromInteger x)
|
|
||||||
RelocatableValue _ a -> segOffAddrs $ asSegmentOff mem a
|
|
||||||
SymbolValue{} -> def
|
SymbolValue{} -> def
|
||||||
AssignedValue a ->
|
AssignedValue a ->
|
||||||
case assignRhs a of
|
case assignRhs a of
|
||||||
-- See if we can get a value out of a concrete memory read.
|
-- See if we can get a value out of a concrete memory read.
|
||||||
ReadMem addr (BVMemRepr _ end)
|
ReadMem addr (BVMemRepr _ end)
|
||||||
| Just laddr <- valueAsMemAddr addr
|
| Just laddr <- valueAsMemAddr addr
|
||||||
, Right val <- readAddr mem end laddr ->
|
, Right val <- readSegmentOff mem end laddr ->
|
||||||
segOffAddrs (asSegmentOff mem val) ++ def
|
val : def
|
||||||
_ -> def
|
_ -> def
|
||||||
Initial _ -> def
|
Initial _ -> def
|
||||||
|
|
||||||
sliceMemContents'
|
addNewFunctionAddrs :: [ArchSegmentOff arch]
|
||||||
:: MemWidth w
|
-> State (ParseState arch ids) ()
|
||||||
=> Int -- ^ Number of bytes in each slice.
|
addNewFunctionAddrs addrs =
|
||||||
-> [[SegmentRange w]] -- ^ Previous slices
|
newFunctionAddrs %= (++addrs)
|
||||||
-> Integer -- ^ Number of slices to return
|
|
||||||
-> [SegmentRange w] -- ^ Ranges to process next
|
|
||||||
-> Either (DropError w) ([[SegmentRange w]],[SegmentRange w])
|
|
||||||
sliceMemContents' stride prev c next
|
|
||||||
| c <= 0 = pure (reverse prev, next)
|
|
||||||
| otherwise =
|
|
||||||
case splitSegmentRangeList next stride of
|
|
||||||
Left e -> Left e
|
|
||||||
Right (this, rest) -> sliceMemContents' stride (this:prev) (c-1) rest
|
|
||||||
|
|
||||||
-- | `sliceMemContents stride cnt contents` splits contents up into `cnt`
|
|
||||||
-- memory regions each with size `stride`.
|
|
||||||
sliceMemContents
|
|
||||||
:: MemWidth w
|
|
||||||
=> Int -- ^ Number of bytes in each slice.
|
|
||||||
-> Integer -- ^ Number of slices to return
|
|
||||||
-> [SegmentRange w] -- ^ Ranges to process next
|
|
||||||
-> Either (DropError w) ([[SegmentRange w]],[SegmentRange w])
|
|
||||||
sliceMemContents stride c next = sliceMemContents' stride [] c next
|
|
||||||
|
|
||||||
-- `getJumpTableContents base cnt stride` returns a list with
|
|
||||||
getJumpTableContents :: MemWidth w
|
|
||||||
=> MemSegmentOff w
|
|
||||||
-> Integer
|
|
||||||
-> Integer
|
|
||||||
-> Maybe [[SegmentRange w]]
|
|
||||||
getJumpTableContents base cnt stride = do
|
|
||||||
let totalSize = cnt * stride
|
|
||||||
when (msegByteCountAfter base < totalSize) $
|
|
||||||
Nothing
|
|
||||||
contents <-
|
|
||||||
case contentsAfterSegmentOff base of
|
|
||||||
Left _ -> Nothing
|
|
||||||
Right l -> pure l
|
|
||||||
case sliceMemContents (fromInteger stride) cnt contents of
|
|
||||||
Left _ -> Nothing
|
|
||||||
Right (s,_) -> Just s
|
|
||||||
|
|
||||||
-- This function resolves jump table entries.
|
|
||||||
-- It is a recursive function that has an index into the jump table.
|
|
||||||
-- If the current index can be interpreted as a intra-procedural jump,
|
|
||||||
-- then it will add that to the current procedure.
|
|
||||||
-- This returns the last address read.
|
|
||||||
resolveJumps :: forall arch ids
|
|
||||||
. ( MemWidth (ArchAddrWidth arch)
|
|
||||||
, IPAlignment arch
|
|
||||||
, RegisterInfo (ArchReg arch)
|
|
||||||
)
|
|
||||||
=> Memory (ArchAddrWidth arch)
|
|
||||||
-> JumpTable arch ids
|
|
||||||
-> [[SegmentRange (ArchAddrWidth arch)]]
|
|
||||||
-> Maybe [ArchSegmentOff arch]
|
|
||||||
resolveJumps mem (AbsoluteJumpTable arrayRead) slices = do
|
|
||||||
BVMemRepr _arByteCount endianness <- pure $ arSize arrayRead
|
|
||||||
|
|
||||||
forM slices $ \l -> do
|
|
||||||
case l of
|
|
||||||
[ByteRegion bs] -> do
|
|
||||||
val <- addrRead endianness bs
|
|
||||||
tgt <- asSegmentOff mem (toIPAligned @arch (absoluteAddr val))
|
|
||||||
unless (Perm.isExecutable (segmentFlags (msegSegment tgt))) $ Nothing
|
|
||||||
pure tgt
|
|
||||||
[RelocationRegion r] -> do
|
|
||||||
let off = relocationOffset r
|
|
||||||
when (relocationIsRel r) $ Nothing
|
|
||||||
case relocationSym r of
|
|
||||||
SymbolRelocation{} -> Nothing
|
|
||||||
SectionIdentifier idx -> do
|
|
||||||
addr <- Map.lookup idx (memSectionAddrMap mem)
|
|
||||||
incSegmentOff addr (toInteger off)
|
|
||||||
_ -> Nothing
|
|
||||||
resolveJumps mem (RelativeJumpTable base arrayRead ext) slices = do
|
|
||||||
BVMemRepr sz endianness <- pure $ arSize arrayRead
|
|
||||||
let readFn
|
|
||||||
| Just Refl <- testEquality sz (knownNat :: NatRepr 4) =
|
|
||||||
extendDyn (knownNat :: NatRepr 32) ext . bsWord32 endianness
|
|
||||||
| Just Refl <- testEquality sz (knownNat :: NatRepr 8) =
|
|
||||||
extendDyn (knownNat :: NatRepr 64) ext . bsWord64 endianness
|
|
||||||
| otherwise =
|
|
||||||
error "Do not support this width."
|
|
||||||
forM slices $ \l -> do
|
|
||||||
case l of
|
|
||||||
[ByteRegion bs]
|
|
||||||
| tgtAddr <- relativeSegmentAddr base
|
|
||||||
& incAddr (readFn (BS.take (fromInteger (natValue sz)) bs))
|
|
||||||
, Just tgt <- asSegmentOff mem (toIPAligned @arch tgtAddr)
|
|
||||||
, Perm.isExecutable (segmentFlags (msegSegment tgt))
|
|
||||||
-> Just tgt
|
|
||||||
_ -> Nothing
|
|
||||||
|
|
||||||
-- | This parses a block that ended with a fetch and execute instruction.
|
-- | This parses a block that ended with a fetch and execute instruction.
|
||||||
parseFetchAndExecute :: forall arch ids
|
parseFetchAndExecute :: forall arch ids
|
||||||
@ -896,8 +877,8 @@ parseFetchAndExecute ctx idx stmts regs s = do
|
|||||||
-- Merge caller return information
|
-- Merge caller return information
|
||||||
intraJumpTargets %= ((ret, postCallAbsState ainfo abst ret):)
|
intraJumpTargets %= ((ret, postCallAbsState ainfo abst ret):)
|
||||||
-- Use the abstract domain to look for new code pointers for the current IP.
|
-- Use the abstract domain to look for new code pointers for the current IP.
|
||||||
let addrs = identifyCallTargets absProcState' (s^.boundValue ip_reg)
|
addNewFunctionAddrs $
|
||||||
newFunctionAddrs %= (++ addrs)
|
identifyCallTargets mem abst s
|
||||||
-- Use the call-specific code to look for new IPs.
|
-- Use the call-specific code to look for new IPs.
|
||||||
|
|
||||||
let r = StatementList { stmtsIdent = idx
|
let r = StatementList { stmtsIdent = idx
|
||||||
@ -953,16 +934,8 @@ parseFetchAndExecute ctx idx stmts regs s = do
|
|||||||
}
|
}
|
||||||
pure (ret, idx+1)
|
pure (ret, idx+1)
|
||||||
-- Block ends with what looks like a jump table.
|
-- Block ends with what looks like a jump table.
|
||||||
| Just jt <- matchJumpTable mem absProcState' (s^.curIP)
|
| Just (_jt, entries, jumpIndex) <- matchJumpTableRef mem absProcState' (s^.curIP) -> do
|
||||||
, Some arrayRead <- jumpTableRead jt
|
|
||||||
, Right maxIdx <- getJumpTableBounds absProcState' arrayRead
|
|
||||||
, Just slices <-
|
|
||||||
getJumpTableContents (arBase arrayRead)
|
|
||||||
(toInteger maxIdx+1)
|
|
||||||
(arStride arrayRead)
|
|
||||||
-- Read addresses
|
|
||||||
, Just readAddrs <-
|
|
||||||
resolveJumps (pctxMemory ctx) jt slices -> do
|
|
||||||
mapM_ (recordWriteStmt ainfo mem absProcState') stmts
|
mapM_ (recordWriteStmt ainfo mem absProcState') stmts
|
||||||
|
|
||||||
let abst :: AbsBlockState (ArchReg arch)
|
let abst :: AbsBlockState (ArchReg arch)
|
||||||
@ -970,11 +943,11 @@ parseFetchAndExecute ctx idx stmts regs s = do
|
|||||||
|
|
||||||
seq abst $ do
|
seq abst $ do
|
||||||
|
|
||||||
forM_ readAddrs $ \tgtAddr -> do
|
forM_ entries $ \tgtAddr -> do
|
||||||
let abst' = abst & setAbsIP tgtAddr
|
let abst' = abst & setAbsIP tgtAddr
|
||||||
intraJumpTargets %= ((tgtAddr, abst'):)
|
intraJumpTargets %= ((tgtAddr, abst'):)
|
||||||
|
|
||||||
let term = ParsedLookupTable s (arIx arrayRead) (V.fromList readAddrs)
|
let term = ParsedLookupTable s jumpIndex entries
|
||||||
let ret = StatementList { stmtsIdent = idx
|
let ret = StatementList { stmtsIdent = idx
|
||||||
, stmtsNonterm = stmts
|
, stmtsNonterm = stmts
|
||||||
, stmtsTerm = term
|
, stmtsTerm = term
|
||||||
@ -982,20 +955,20 @@ parseFetchAndExecute ctx idx stmts regs s = do
|
|||||||
}
|
}
|
||||||
pure (ret,idx+1)
|
pure (ret,idx+1)
|
||||||
|
|
||||||
-- Check for tail call (anything where we are right at stack height)
|
-- Check for tail call when the stack pointer points to the return address.
|
||||||
--
|
--
|
||||||
-- TODO: this makes sense for x86, but is not correct for all architectures
|
-- TODO: this makes sense for x86, but is not correct for all architectures
|
||||||
| ptrType <- addrMemRepr ainfo
|
| ptrType <- addrMemRepr ainfo
|
||||||
, sp_val <- s^.boundValue sp_reg
|
, sp_val <- s^.boundValue sp_reg
|
||||||
, ReturnAddr <- absEvalReadMem absProcState' sp_val ptrType -> do
|
, ReturnAddr <- absEvalReadMem absProcState' sp_val ptrType -> do
|
||||||
(,idx+1) <$> finishWithTailCall absProcState'
|
finishWithTailCall absProcState'
|
||||||
|
|
||||||
-- Is this a jump to a known function entry? We're already past the
|
-- Is this a jump to a known function entry? We're already past the
|
||||||
-- "identifyCall" case, so this must be a tail call, assuming we trust our
|
-- "identifyCall" case, so this must be a tail call, assuming we trust our
|
||||||
-- known function entry info.
|
-- known function entry info.
|
||||||
| Just tgt_mseg <- valueAsSegmentOff mem (s^.boundValue ip_reg)
|
| Just tgt_mseg <- valueAsSegmentOff mem (s^.boundValue ip_reg)
|
||||||
, tgt_mseg `Set.member` pctxKnownFnEntries ctx -> do
|
, tgt_mseg `Set.member` pctxKnownFnEntries ctx -> do
|
||||||
(,idx+1) <$> finishWithTailCall absProcState'
|
finishWithTailCall absProcState'
|
||||||
|
|
||||||
-- Block that ends with some unknown
|
-- Block that ends with some unknown
|
||||||
| otherwise -> do
|
| otherwise -> do
|
||||||
@ -1009,7 +982,7 @@ parseFetchAndExecute ctx idx stmts regs s = do
|
|||||||
|
|
||||||
where finishWithTailCall :: RegisterInfo (ArchReg arch)
|
where finishWithTailCall :: RegisterInfo (ArchReg arch)
|
||||||
=> AbsProcessorState (ArchReg arch) ids
|
=> AbsProcessorState (ArchReg arch) ids
|
||||||
-> State (ParseState arch ids) (StatementList arch ids)
|
-> State (ParseState arch ids) (StatementList arch ids, Word64)
|
||||||
finishWithTailCall absProcState' = do
|
finishWithTailCall absProcState' = do
|
||||||
let mem = pctxMemory ctx
|
let mem = pctxMemory ctx
|
||||||
mapM_ (recordWriteStmt (pctxArchInfo ctx) mem absProcState') stmts
|
mapM_ (recordWriteStmt (pctxArchInfo ctx) mem absProcState') stmts
|
||||||
@ -1019,14 +992,15 @@ parseFetchAndExecute ctx idx stmts regs s = do
|
|||||||
seq abst $ do
|
seq abst $ do
|
||||||
|
|
||||||
-- Look for new instruction pointers
|
-- Look for new instruction pointers
|
||||||
let addrs = concretizeAbsCodePointers mem (abst^.absRegState^.curIP)
|
addNewFunctionAddrs $
|
||||||
newFunctionAddrs %= (++ addrs)
|
identifyConcreteAddresses mem (abst^.absRegState^.curIP)
|
||||||
|
|
||||||
pure StatementList { stmtsIdent = idx
|
let ret = StatementList { stmtsIdent = idx
|
||||||
, stmtsNonterm = stmts
|
, stmtsNonterm = stmts
|
||||||
, stmtsTerm = ParsedCall s Nothing
|
, stmtsTerm = ParsedCall s Nothing
|
||||||
, stmtsAbsState = absProcState'
|
, stmtsAbsState = absProcState'
|
||||||
}
|
}
|
||||||
|
seq ret $ pure (ret,idx+1)
|
||||||
|
|
||||||
-- | this evalutes the statements in a block to expand the information known
|
-- | this evalutes the statements in a block to expand the information known
|
||||||
-- about control flow targets of this block.
|
-- about control flow targets of this block.
|
||||||
@ -1410,16 +1384,22 @@ data DiscoveryOptions
|
|||||||
, exploreCodeAddrInMem :: !Bool
|
, exploreCodeAddrInMem :: !Bool
|
||||||
-- ^ If @True@, 'completeDiscoveryState' will
|
-- ^ If @True@, 'completeDiscoveryState' will
|
||||||
-- explore all potential code addresses in
|
-- explore all potential code addresses in
|
||||||
-- memory after exploring other potnetial
|
-- memory after exploring other potential
|
||||||
-- functions.
|
-- functions.
|
||||||
|
--
|
||||||
|
-- This is effectively a hack that sometimes
|
||||||
|
-- allows discovering functions. If you need
|
||||||
|
-- it, let the author's of Macaw know so that
|
||||||
|
-- we can find a more principled way.
|
||||||
, logAtAnalyzeFunction :: !Bool
|
, logAtAnalyzeFunction :: !Bool
|
||||||
-- ^ Print a message each time we apply
|
-- ^ Print a message each time we apply
|
||||||
-- discovery analysis to a new function.
|
-- discovery analysis to a new function.
|
||||||
, logAtAnalyzeBlock :: !Bool
|
, logAtAnalyzeBlock :: !Bool
|
||||||
-- ^ Print a message each time we analyze a
|
-- ^ Print a message each time we analyze a
|
||||||
-- block within a function.
|
-- block within a function.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
-- | Some default options
|
||||||
defaultDiscoveryOptions :: DiscoveryOptions
|
defaultDiscoveryOptions :: DiscoveryOptions
|
||||||
defaultDiscoveryOptions =
|
defaultDiscoveryOptions =
|
||||||
DiscoveryOptions { exploreFunctionSymbols = True
|
DiscoveryOptions { exploreFunctionSymbols = True
|
||||||
@ -1504,6 +1484,7 @@ completeDiscoveryState initState disOpt funPred = do
|
|||||||
postPhase1Discovery <- resolveFuns analyzeFn analyzeBlock postSymState
|
postPhase1Discovery <- resolveFuns analyzeFn analyzeBlock postSymState
|
||||||
-- Discovery functions from memory
|
-- Discovery functions from memory
|
||||||
if exploreCodeAddrInMem disOpt then do
|
if exploreCodeAddrInMem disOpt then do
|
||||||
|
-- Execute hack of just searching for pointers in memory.
|
||||||
let mem_contents = withArchConstraints ainfo $ memAsAddrPairs mem LittleEndian
|
let mem_contents = withArchConstraints ainfo $ memAsAddrPairs mem LittleEndian
|
||||||
resolveFuns analyzeFn analyzeBlock $ postPhase1Discovery & exploreMemPointers mem_contents
|
resolveFuns analyzeFn analyzeBlock $ postPhase1Discovery & exploreMemPointers mem_contents
|
||||||
else
|
else
|
||||||
|
@ -18,6 +18,7 @@ some value while regions define a unknown offset in memory.
|
|||||||
{-# LANGUAGE FlexibleInstances #-}
|
{-# LANGUAGE FlexibleInstances #-}
|
||||||
{-# LANGUAGE RankNTypes #-}
|
{-# LANGUAGE RankNTypes #-}
|
||||||
{-# LANGUAGE ScopedTypeVariables #-}
|
{-# LANGUAGE ScopedTypeVariables #-}
|
||||||
|
{-# LANGUAGE StandaloneDeriving #-}
|
||||||
{-# LANGUAGE TypeOperators #-}
|
{-# LANGUAGE TypeOperators #-}
|
||||||
module Data.Macaw.Memory
|
module Data.Macaw.Memory
|
||||||
( Memory
|
( Memory
|
||||||
@ -113,6 +114,7 @@ module Data.Macaw.Memory
|
|||||||
, addrContentsAfter
|
, addrContentsAfter
|
||||||
, readByteString
|
, readByteString
|
||||||
, readAddr
|
, readAddr
|
||||||
|
, readSegmentOff
|
||||||
, readWord8
|
, readWord8
|
||||||
, readWord16be
|
, readWord16be
|
||||||
, readWord16le
|
, readWord16le
|
||||||
@ -133,7 +135,6 @@ module Data.Macaw.Memory
|
|||||||
, AddrSymMap
|
, AddrSymMap
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Control.Exception (assert)
|
|
||||||
import Control.Monad
|
import Control.Monad
|
||||||
import Data.BinarySymbols
|
import Data.BinarySymbols
|
||||||
import Data.Bits
|
import Data.Bits
|
||||||
@ -165,6 +166,8 @@ data AddrWidthRepr w
|
|||||||
| (w ~ 64) => Addr64
|
| (w ~ 64) => Addr64
|
||||||
-- ^ A 64-bit address
|
-- ^ A 64-bit address
|
||||||
|
|
||||||
|
deriving instance Show (AddrWidthRepr w)
|
||||||
|
|
||||||
instance TestEquality AddrWidthRepr where
|
instance TestEquality AddrWidthRepr where
|
||||||
testEquality Addr32 Addr32 = Just Refl
|
testEquality Addr32 Addr32 = Just Refl
|
||||||
testEquality Addr64 Addr64 = Just Refl
|
testEquality Addr64 Addr64 = Just Refl
|
||||||
@ -294,8 +297,8 @@ class (1 <= w) => MemWidth w where
|
|||||||
-- The argument is ignored.
|
-- The argument is ignored.
|
||||||
addrWidthRepr :: p w -> AddrWidthRepr w
|
addrWidthRepr :: p w -> AddrWidthRepr w
|
||||||
|
|
||||||
-- | @addrWidthMod w@ returns @2^(8 * addrSize w - 1)@.
|
-- | @addrWidthMask w@ returns @2^(8 * addrSize w) - 1@.
|
||||||
addrWidthMod :: p w -> Word64
|
addrWidthMask :: p w -> Word64
|
||||||
|
|
||||||
-- | Returns number of bytes in addr.
|
-- | Returns number of bytes in addr.
|
||||||
--
|
--
|
||||||
@ -322,7 +325,7 @@ addrBitSize w = 8 * addrSize w
|
|||||||
|
|
||||||
-- | Convert word64 @x@ into mem word @x mod 2^w-1@.
|
-- | Convert word64 @x@ into mem word @x mod 2^w-1@.
|
||||||
memWord :: forall w . MemWidth w => Word64 -> MemWord w
|
memWord :: forall w . MemWidth w => Word64 -> MemWord w
|
||||||
memWord x = MemWord (x .&. addrWidthMod p)
|
memWord x = MemWord (x .&. addrWidthMask p)
|
||||||
where p :: Proxy w
|
where p :: Proxy w
|
||||||
p = Proxy
|
p = Proxy
|
||||||
|
|
||||||
@ -364,11 +367,11 @@ instance MemWidth w => Integral (MemWord w) where
|
|||||||
|
|
||||||
instance MemWidth w => Bounded (MemWord w) where
|
instance MemWidth w => Bounded (MemWord w) where
|
||||||
minBound = 0
|
minBound = 0
|
||||||
maxBound = MemWord (addrWidthMod (Proxy :: Proxy w))
|
maxBound = MemWord (addrWidthMask (Proxy :: Proxy w))
|
||||||
|
|
||||||
instance MemWidth 32 where
|
instance MemWidth 32 where
|
||||||
addrWidthRepr _ = Addr32
|
addrWidthRepr _ = Addr32
|
||||||
addrWidthMod _ = 0xffffffff
|
addrWidthMask _ = 0xffffffff
|
||||||
addrRotate (MemWord w) i =
|
addrRotate (MemWord w) i =
|
||||||
MemWord (fromIntegral ((fromIntegral w :: Word32) `rotate` i))
|
MemWord (fromIntegral ((fromIntegral w :: Word32) `rotate` i))
|
||||||
addrSize _ = 4
|
addrSize _ = 4
|
||||||
@ -378,7 +381,7 @@ instance MemWidth 32 where
|
|||||||
|
|
||||||
instance MemWidth 64 where
|
instance MemWidth 64 where
|
||||||
addrWidthRepr _ = Addr64
|
addrWidthRepr _ = Addr64
|
||||||
addrWidthMod _ = 0xffffffffffffffff
|
addrWidthMask _ = 0xffffffffffffffff
|
||||||
addrRotate (MemWord w) i = MemWord (w `rotate` i)
|
addrRotate (MemWord w) i = MemWord (w `rotate` i)
|
||||||
addrSize _ = 8
|
addrSize _ = 8
|
||||||
addrRead e s
|
addrRead e s
|
||||||
@ -976,17 +979,23 @@ memAsAddrPairs :: Memory w
|
|||||||
-> [(MemSegmentOff w, MemSegmentOff w)]
|
-> [(MemSegmentOff w, MemSegmentOff w)]
|
||||||
memAsAddrPairs mem end = addrWidthClass (memAddrWidth mem) $ do
|
memAsAddrPairs mem end = addrWidthClass (memAddrWidth mem) $ do
|
||||||
seg <- memSegments mem
|
seg <- memSegments mem
|
||||||
(contents_offset,r) <- contentsRanges (segmentContents seg)
|
(contentsOffset,r) <- contentsRanges (segmentContents seg)
|
||||||
let sz = addrSize mem
|
let sz :: Int
|
||||||
|
sz = addrSize mem
|
||||||
case r of
|
case r of
|
||||||
ByteRegion bs -> assert (BS.length bs `rem` fromIntegral sz == 0) $ do
|
ByteRegion bs -> do
|
||||||
(off,w) <-
|
-- contentsOffset
|
||||||
zip [contents_offset..]
|
-- Check offset if a multiple
|
||||||
(regularChunks (fromIntegral sz) bs)
|
let mask = sz - 1
|
||||||
|
when (BS.length bs .&. mask /= 0) $
|
||||||
|
error "Unexpected offset."
|
||||||
|
(byteOff,w) <-
|
||||||
|
zip [contentsOffset,contentsOffset+fromIntegral sz..]
|
||||||
|
(regularChunks sz bs)
|
||||||
let Just val = addrRead end w
|
let Just val = addrRead end w
|
||||||
case resolveAbsoluteAddr mem val of
|
case resolveAbsoluteAddr mem val of
|
||||||
Just val_ref -> do
|
Just val_ref -> do
|
||||||
pure (MemSegmentOff seg off, val_ref)
|
pure (MemSegmentOff seg byteOff, val_ref)
|
||||||
_ -> []
|
_ -> []
|
||||||
RelocationRegion{} -> []
|
RelocationRegion{} -> []
|
||||||
BSSRegion{} -> []
|
BSSRegion{} -> []
|
||||||
@ -1344,6 +1353,23 @@ readAddr mem end addr = addrWidthClass (memAddrWidth mem) $ do
|
|||||||
Just val -> Right $ MemAddr 0 val
|
Just val -> Right $ MemAddr 0 val
|
||||||
Nothing -> error $ "readAddr internal error: readByteString result too short."
|
Nothing -> error $ "readAddr internal error: readByteString result too short."
|
||||||
|
|
||||||
|
-- | Read the given address as a reference to a memory segment offset, or report a
|
||||||
|
-- memory read error.
|
||||||
|
readSegmentOff :: Memory w
|
||||||
|
-> Endianness
|
||||||
|
-> MemAddr w
|
||||||
|
-> Either (MemoryError w) (MemSegmentOff w)
|
||||||
|
readSegmentOff mem end addr = addrWidthClass (memAddrWidth mem) $ do
|
||||||
|
let sz = fromIntegral (addrSize addr)
|
||||||
|
bs <- readByteString mem addr sz
|
||||||
|
case addrRead end bs of
|
||||||
|
Just val -> do
|
||||||
|
let addrInMem = MemAddr 0 val
|
||||||
|
case asSegmentOff mem addrInMem of
|
||||||
|
Just res -> pure res
|
||||||
|
Nothing -> Left (InvalidAddr addrInMem)
|
||||||
|
Nothing -> error $ "readSegmentOff internal error: readByteString result too short."
|
||||||
|
|
||||||
-- | Read a single byte.
|
-- | Read a single byte.
|
||||||
readWord8 :: Memory w -> MemAddr w -> Either (MemoryError w) Word8
|
readWord8 :: Memory w -> MemAddr w -> Either (MemoryError w) Word8
|
||||||
readWord8 mem addr = bsWord8 <$> readByteString mem addr 1
|
readWord8 mem addr = bsWord8 <$> readByteString mem addr 1
|
||||||
|
Loading…
Reference in New Issue
Block a user