diff --git a/base/src/Data/Macaw/Discovery.hs b/base/src/Data/Macaw/Discovery.hs index eaf210e6..ad7a2f5f 100644 --- a/base/src/Data/Macaw/Discovery.hs +++ b/base/src/Data/Macaw/Discovery.hs @@ -4,25 +4,16 @@ Maintainer : Joe Hendrix , Simon Winwood Bool +isExecutableSegOff sa = + segmentFlags (msegSegment sa) `Perm.hasPerm` Perm.execute + -- | Get code pointers out of a abstract value. -concretizeAbsCodePointers :: MemWidth w +identifyConcreteAddresses :: MemWidth w => Memory w -> AbsValue w (BVType w) -> [MemSegmentOff w] -concretizeAbsCodePointers mem (FinSet s) = - [ sa - | a <- Set.toList s - , sa <- maybeToList (resolveAbsoluteAddr mem (fromInteger a)) - , segmentFlags (msegSegment sa) `Perm.hasPerm` Perm.execute - ] -concretizeAbsCodePointers _ (CodePointers s _) = - [ sa - | sa <- Set.toList s - , segmentFlags (msegSegment sa) `Perm.hasPerm` Perm.execute - ] - -- FIXME: this is dangerous !! -concretizeAbsCodePointers _mem StridedInterval{} = [] -- FIXME: this case doesn't make sense - -- debug DCFG ("I think these are code pointers!: " ++ show s) $ [] - -- filter (isCodeAddr mem) $ fromInteger <$> SI.toList s -concretizeAbsCodePointers _mem _ = [] +identifyConcreteAddresses mem (FinSet s) = + mapMaybe (resolveAbsoluteAddr mem . fromInteger) (Set.toList s) +identifyConcreteAddresses _ (CodePointers s _) = Set.toList s +identifyConcreteAddresses _mem StridedInterval{} = [] +identifyConcreteAddresses _mem _ = [] {- -- | Return true if this address was added because of the contents of a global address @@ -264,14 +248,29 @@ dropUnusedCodeInParsedBlock ainfo b = ------------------------------------------------------------------------ -- Memory utilities --- | Return true if range is entirely contained within a single read only segment.Q -rangeInReadonlySegment :: MemWidth w - => MemSegmentOff w -- ^ Start of range - -> MemWord w -- ^ The size of the range - -> Bool -rangeInReadonlySegment mseg size = - size <= segmentSize (msegSegment mseg) - msegOffset mseg - && Perm.isReadonly (segmentFlags (msegSegment mseg)) +sliceMemContents' + :: MemWidth w + => Int -- ^ Number of bytes in each slice. + -> [[SegmentRange w]] -- ^ Previous slices + -> Integer -- ^ Number of slices to return + -> [SegmentRange w] -- ^ Ranges to process next + -> Either (DropError w) ([[SegmentRange w]],[SegmentRange w]) +sliceMemContents' stride prev c next + | c <= 0 = pure (reverse prev, next) + | otherwise = + case splitSegmentRangeList next stride of + Left e -> Left e + Right (this, rest) -> sliceMemContents' stride (this:prev) (c-1) rest + +-- | `sliceMemContents stride cnt contents` splits contents up into `cnt` +-- memory regions each with size `stride`. +sliceMemContents + :: MemWidth w + => Int -- ^ Number of bytes in each slice. + -> Integer -- ^ Number of slices to return + -> [SegmentRange w] -- ^ Ranges to process next + -> Either (DropError w) ([[SegmentRange w]],[SegmentRange w]) +sliceMemContents stride c next = sliceMemContents' stride [] c next ------------------------------------------------------------------------ -- DiscoveryState utilities @@ -287,6 +286,8 @@ markAddrAsFunction :: FunctionExploreReason (ArchAddrWidth arch) markAddrAsFunction rsn addr s -- Do nothing if function is already explored. | Map.member addr (s^.funInfo) || Map.member addr (s^.unexploredFunctions) = s + -- Ignore if address is not in an executable segment. + | isExecutableSegOff addr = s | otherwise = addrWidthClass (memAddrWidth (memory s)) $ -- We check that the function address ignores bytes so that we do -- not start disassembling at a relocation or BSS region. @@ -440,63 +441,39 @@ mergeIntraJump src ab tgt = do foundAddrs %= Map.insert tgt found_info ------------------------------------------------------------------------------- --- Jump table bounds +-- BoundedMemArray --- | A memory read that looks like array indexing. It read 'arSize' bytes from +-- | This describes a region of memory dereferenced in some array read. +-- +-- These regions may be be sparse, given an index `i`, the -- the address given by 'arBase' + 'arIx'*'arStride'. -data ArrayRead arch ids w = ArrayRead - { arBase :: ArchSegmentOff arch - , arIx :: ArchAddrValue arch ids - , arStride :: Integer - , arSize :: MemRepr (BVType w) - -- ^ Type of element in this array. +data BoundedMemArray arch tp = BoundedMemArray + { arBase :: !(MemSegmentOff (ArchAddrWidth arch)) + -- ^ The base address for array accesses. + , arStride :: !Integer + -- ^ Space between elements of the array. + -- + -- This will typically be the number of bytes denoted by `arEltType`, + -- but may be larger for sparse arrays. `matchBoundedMemArray` will fail + -- if stride is less than the number of bytes read. + , arEltType :: !(MemRepr tp) + -- ^ Resolved type of elements in this array. + , arSlices :: !(V.Vector [SegmentRange (ArchAddrWidth arch)]) + -- ^ The slices of memory in the array. + -- + -- The `i`th element in the vector corresponds to the first `size` + -- bytes at address `base + stride * i`. + -- + -- This could be computed from the previous fields, but we check we + -- can create it when creating the array read, so we store it to + -- avoid recomputing it. } -deriving instance RegisterInfo (ArchReg arch) => Show (ArrayRead arch ids w) +deriving instance RegisterInfo (ArchReg arch) => Show (BoundedMemArray arch tp) -- | Return true if the address stored is readable and not writable. -isReadOnlyArrayRead :: ArrayRead arch ids w -> Bool -isReadOnlyArrayRead = Perm.isReadonly . segmentFlags . msegSegment . arBase - --- | Number of bytes of size. -arSizeBytes :: ArrayRead arch ids w -> Integer -arSizeBytes = memReprBytes . arSize - ------------------------------------------------------------------------- --- Extension - --- | Used to denote how a value should be extended to a full address. -data Extension = Signed | Unsigned - deriving (Bounded, Enum, Eq, Ord, Read, Show) - --- | `extendDyn w ext v` treats `v` as a `w`-bit number and returns the underlying -extendDyn :: (1 <= w, Integral x) => NatRepr w -> Extension -> x ->Integer -extendDyn _ Unsigned = toInteger -extendDyn w Signed = toSigned w . toInteger - ------------------------------------------------------------------------- --- JumpTable - - --- Beware: on some architectures, after reading from the jump table, the --- resulting addresses must be aligned. See the IPAlignment class. -data JumpTable arch ids - = AbsoluteJumpTable (ArrayRead arch ids (ArchAddrWidth arch)) - -- | `RelativeJumpTable base read ext` describes information about a jump table read. - -- - -- The value is computed as `baseVal + readVal` where - -- - -- `baseVal = fromMaybe 0 base`, `readVal` is the value stored at the memory - -- read described by `read` with the sign of `ext`. - | forall w . RelativeJumpTable (ArchSegmentOff arch) (ArrayRead arch ids w) Extension - -deriving instance RegisterInfo (ArchReg arch) => Show (JumpTable arch ids) - --- | The array read done when computing the jump table. N.B. other processing --- may be needed on the value read in this way to know the address to jump to. -jumpTableRead :: JumpTable arch ids -> Some (ArrayRead arch ids) -jumpTableRead (AbsoluteJumpTable r) = Some r -jumpTableRead (RelativeJumpTable _ r _) = Some r +isReadOnlyBoundedMemArray :: BoundedMemArray arch tp -> Bool +isReadOnlyBoundedMemArray = Perm.isReadonly . segmentFlags . msegSegment . arBase absValueAsSegmentOff :: forall w @@ -529,14 +506,14 @@ valueAsSegmentOffWithTransfer mem aps base = valueAsSegmentOff mem base <|> absValueAsSegmentOff mem (transferValue aps base) --- | This interprets a value as a memory segment offset plus value. -valueAsArrayOffset +-- | This attempts to pattern match a value as a memory address plus a value. +valueAsMemOffset :: RegisterInfo (ArchReg arch) => Memory (ArchAddrWidth arch) -> AbsProcessorState (ArchReg arch) ids -> ArchAddrValue arch ids -> Maybe (ArchSegmentOff arch, ArchAddrValue arch ids) -valueAsArrayOffset mem aps v +valueAsMemOffset mem aps v | Just (BVAdd _ base offset) <- valueAsApp v , Just ptr <- valueAsSegmentOffWithTransfer mem aps base = Just (ptr, offset) @@ -548,110 +525,203 @@ valueAsArrayOffset mem aps v | otherwise = Nothing - - -- | See if the value can be interpreted as a read of memory -matchArrayRead +matchBoundedMemArray :: (MemWidth (ArchAddrWidth arch), RegisterInfo (ArchReg arch)) => Memory (ArchAddrWidth arch) -> AbsProcessorState (ArchReg arch) ids -> BVValue arch ids w - -> Maybe (ArrayRead arch ids w) -matchArrayRead mem aps val - - | Just (ReadMem addr size) <- valueAsRhs val - , Just (base, offset) <- valueAsArrayOffset mem aps addr + -> Maybe (BoundedMemArray arch (BVType w), ArchAddrValue arch ids) +matchBoundedMemArray mem aps val + | Just (ReadMem addr tp) <- valueAsRhs val + , Just (base, offset) <- valueAsMemOffset mem aps addr , Just (stride, ixVal) <- valueAsStaticMultiplication offset - , memReprBytes size <= stride - = Just $ ArrayRead - { arBase = base - , arIx = ixVal - , arStride = stride - , arSize = size - } + -- Check stride covers at least number of bytes read. + , memReprBytes tp <= stride + -- Resolve a static upper bound to array. + , Right (Jmp.IntegerUpperBound bnd) + <- Jmp.unsignedUpperBound (aps^.indexBounds) ixVal + , cnt <- bnd+1 + -- Check array actually fits in memory. + , msegByteCountAfter base < cnt * toInteger stride + -- Get memory contents after base + , Right contents <- contentsAfterSegmentOff base + -- Break up contents into a list of slices each with size stide + , Right (strideSlices,_) <- sliceMemContents (fromInteger stride) cnt contents + -- Take the given number of bytes out of each slices + , Right slices <- traverse (\s -> fst <$> splitSegmentRangeList s (fromInteger (memReprBytes tp))) + (V.fromList strideSlices) + = let r = BoundedMemArray + { arBase = base + , arStride = stride + , arEltType = tp + , arSlices = slices + } + in Just (r, ixVal) | otherwise = Nothing +------------------------------------------------------------------------ +-- Extension + +-- | Information about a value that is the signed or unsigned extension of another +-- value. +-- +-- This is used for jump tables, and only supports widths that are in memory +data Extension w = Extension { _extIsSigned :: !Bool + , _extWidth :: !(AddrWidthRepr w) + -- ^ Width of argument. is to. + } + deriving (Show) + -- | Just like Some (BVValue arch ids), but doesn't run into trouble with -- partially applying the BVValue type synonym. -data SomeBVValue arch ids = forall tp. SomeBVValue (BVValue arch ids tp) +data SomeExt arch ids = forall m . SomeExt !(BVValue arch ids m) !(Extension m) --- | Identify how value is extended. -matchExtension :: ArchAddrValue arch ids - -> (Extension, SomeBVValue arch ids) -matchExtension offset = - case valueAsApp offset of - Just (SExt val' _) -> (Signed, SomeBVValue val') - Just (UExt val' _) -> (Unsigned, SomeBVValue val') - _ -> (Unsigned, SomeBVValue offset) +matchAddr :: NatRepr w -> Maybe (AddrWidthRepr w) +matchAddr w + | Just Refl <- testEquality w n32 = Just Addr32 + | Just Refl <- testEquality w n64 = Just Addr64 + | otherwise = Nothing --- | Figure out if this is a jump table. -matchJumpTable :: ( IPAlignment arch - , MemWidth (ArchAddrWidth arch) - , RegisterInfo (ArchReg arch) - ) - => Memory (ArchAddrWidth arch) - -> AbsProcessorState (ArchReg arch) ids - -> ArchAddrValue arch ids -- ^ Value that's assigned to the IP. - -> Maybe (JumpTable arch ids) -matchJumpTable mem aps ip +-- | `matchExtension x` matches in `x` has the form `(uext y w)` or `(sext y w)` and returns +-- a description about the extension as well as the pattern `y`. +matchExtension :: forall arch ids + . ( MemWidth (ArchAddrWidth arch) + , HasRepr (ArchReg arch) TypeRepr) + => ArchAddrValue arch ids + -> SomeExt arch ids +matchExtension val = + case valueAsApp val of + Just (SExt val' _w) | Just repr <- matchAddr (typeWidth val') -> SomeExt val' (Extension True repr) + Just (UExt val' _w) | Just repr <- matchAddr (typeWidth val') -> SomeExt val' (Extension False repr) + _ -> SomeExt val (Extension False (addrWidthRepr @(ArchAddrWidth arch) undefined)) + +-- | `extendDyn ext end bs` parses the bytestring using the extension +-- and endianness information, and returns the extended value. +extendDyn :: Extension w -> Endianness -> BS.ByteString -> Integer +extendDyn (Extension True Addr32) end bs = toInteger (bsWord32 end bs) +extendDyn (Extension True Addr64) end bs = toInteger (bsWord64 end bs) +extendDyn (Extension False Addr32) end bs = toSigned n32 (toInteger (bsWord32 end bs)) +extendDyn (Extension False Addr64) end bs = toSigned n64 (toInteger (bsWord64 end bs)) + +------------------------------------------------------------------------ +-- JumpTableLayout + +-- | This describes the layout of a jump table. +-- Beware: on some architectures, after reading from the jump table, the +-- resulting addresses must be aligned. See the IPAlignment class. +data JumpTableLayout arch + = AbsoluteJumpTable !(BoundedMemArray arch (BVType (ArchAddrWidth arch))) + -- ^ `AbsoluteJumpTable r` describes a jump table where the jump + -- target is directly stored in the array read `r`. + | forall w . RelativeJumpTable !(ArchSegmentOff arch) + !(BoundedMemArray arch (BVType w)) + !(Extension w) + -- ^ `RelativeJumpTable base read ext` describes information about a + -- jump table where all jump targets are relative to a fixed base + -- address. + -- + -- The value is computed as `baseVal + readVal` where + -- + -- `baseVal = fromMaybe 0 base`, `readVal` is the value stored at + -- the memory read described by `read` with the sign of `ext`. + +deriving instance RegisterInfo (ArchReg arch) => Show (JumpTableLayout arch) + +-- This function resolves jump table entries. +-- It is a recursive function that has an index into the jump table. +-- If the current index can be interpreted as a intra-procedural jump, +-- then it will add that to the current procedure. +-- This returns the last address read. +resolveAsAbsoluteAddr :: forall w + . Memory w + -> Endianness + -> [SegmentRange w] + -> Maybe (MemAddr w) +resolveAsAbsoluteAddr mem endianness l = addrWidthClass (memAddrWidth mem) $ + case l of + [ByteRegion bs] -> do + absoluteAddr <$> addrRead endianness bs + [RelocationRegion r] -> do + let off = relocationOffset r + when (relocationIsRel r) $ Nothing + case relocationSym r of + SymbolRelocation{} -> Nothing + SectionIdentifier idx -> do + addr <- Map.lookup idx (memSectionAddrMap mem) + pure $ relativeSegmentAddr addr & incAddr (toInteger off) + _ -> Nothing + +-- This function resolves jump table entries. +-- It is a recursive function that has an index into the jump table. +-- If the current index can be interpreted as a intra-procedural jump, +-- then it will add that to the current procedure. +-- This returns the last address read. +resolveRelativeJumps :: forall arch w + . ( MemWidth (ArchAddrWidth arch) + , IPAlignment arch + , RegisterInfo (ArchReg arch) + ) + => Memory (ArchAddrWidth arch) + -> ArchSegmentOff arch + -- -> MemRepr (BVType w) + -> BoundedMemArray arch (BVType w) + -> Extension w + -> Maybe (V.Vector (ArchSegmentOff arch)) +resolveRelativeJumps mem base arrayRead ext = do + let slices = arSlices arrayRead + BVMemRepr _sz endianness <- pure $ arEltType arrayRead + forM slices $ \l -> do + case l of + [ByteRegion bs] + | tgtAddr <- relativeSegmentAddr base + & incAddr (extendDyn ext endianness bs) + , Just tgt <- asSegmentOff mem (toIPAligned @arch tgtAddr) + , Perm.isExecutable (segmentFlags (msegSegment tgt)) + -> Just tgt + _ -> Nothing + +-- | Resolve an ip to a jump table. +matchJumpTableRef :: forall arch ids + . ( IPAlignment arch + , MemWidth (ArchAddrWidth arch) + , RegisterInfo (ArchReg arch) + ) + => Memory (ArchAddrWidth arch) + -> AbsProcessorState (ArchReg arch) ids + -> ArchAddrValue arch ids -- ^ Value that's assigned to the IP. + -> Maybe (JumpTableLayout arch, V.Vector (ArchSegmentOff arch), ArchAddrValue arch ids) +matchJumpTableRef mem aps ip -- Turn a plain read address into base + offset. - | Just arrayRead <- matchArrayRead mem aps ip - , isReadOnlyArrayRead arrayRead - = Just (AbsoluteJumpTable arrayRead) + | Just (arrayRead,idx) <- matchBoundedMemArray mem aps ip + , isReadOnlyBoundedMemArray arrayRead + , BVMemRepr _arByteCount endianness <- arEltType arrayRead = do + + let go :: [SegmentRange (ArchAddrWidth arch)] -> Maybe (MemSegmentOff (ArchAddrWidth arch)) + go contents = do + addr <- resolveAsAbsoluteAddr mem endianness contents + tgt <- asSegmentOff mem (toIPAligned @arch addr) + unless (Perm.isExecutable (segmentFlags (msegSegment tgt))) $ Nothing + pure tgt + tbl <- traverse go (arSlices arrayRead) + pure (AbsoluteJumpTable arrayRead, tbl, idx) -- gcc-style PIC jump tables on x86 use, roughly, -- ip = jmptbl + jmptbl[index] -- where jmptbl is a pointer to the lookup table. | Just unalignedIP <- fromIPAligned ip - , Just (tgtBase, tgtOffset) <- valueAsArrayOffset mem aps unalignedIP - , (ext, SomeBVValue shortOffset) <- matchExtension tgtOffset - , Just arrayRead <- matchArrayRead mem aps shortOffset - , isReadOnlyArrayRead arrayRead - = Just (RelativeJumpTable tgtBase arrayRead ext) + , Just (tgtBase, tgtOffset) <- valueAsMemOffset mem aps unalignedIP + , SomeExt shortOffset ext <- matchExtension tgtOffset + , Just (arrayRead, idx) <- matchBoundedMemArray mem aps shortOffset + , isReadOnlyBoundedMemArray arrayRead + , Just tbl <- resolveRelativeJumps mem tgtBase arrayRead ext + = Just (RelativeJumpTable tgtBase arrayRead ext, tbl, idx) + | otherwise = Nothing --- | This describes why we could not infer the bounds of code that looked like it --- was accessing a jump table. -data JumpTableBoundsError arch ids - = CouldNotInterpretAbsValue !(AbsValue (ArchAddrWidth arch) (BVType (ArchAddrWidth arch))) - | UpperBoundMismatch !(Jmp.UpperBound (BVType (ArchAddrWidth arch))) !Integer - | CouldNotFindBound String !(ArchAddrValue arch ids) - --- | Show the jump table bounds -showJumpTableBoundsError :: ArchConstraints arch => JumpTableBoundsError arch ids -> String -showJumpTableBoundsError err = - case err of - CouldNotInterpretAbsValue val -> - "Index <" ++ show val ++ "> is not a stride." - UpperBoundMismatch bnd index_range -> - "Upper bound mismatch at jumpbounds " - ++ show bnd - ++ " domain " - ++ show index_range - CouldNotFindBound msg jump_index -> - show "Could not find jump table: " ++ msg ++ "\n" - ++ show (ppValueAssignments jump_index) - --- | Returns the index bounds for a jump table of 'Nothing' if this is --- not a block table. -getJumpTableBounds :: ArchConstraints a - => AbsProcessorState (ArchReg a) ids -- ^ Current processor registers. - -> ArrayRead a ids w - -> Either String (ArchAddrWord a) -getJumpTableBounds regs arrayRead = - case Jmp.unsignedUpperBound (regs ^. indexBounds) (arIx arrayRead) of - Right (Jmp.IntegerUpperBound maxIx) -> - let arrayByteSize = maxIx * arStride arrayRead + arSizeBytes arrayRead in - if rangeInReadonlySegment (arBase arrayRead) (fromInteger arrayByteSize) - then Right $! fromInteger maxIx - else Left $ "Jump table range is not in readonly memory: " - ++ show maxIx ++ " entries/" ++ show arrayByteSize ++ " bytes" - ++ " starting at " ++ show (arBase arrayRead) - Left msg -> Left (showJumpTableBoundsError (CouldNotFindBound msg (arIx arrayRead))) - ------------------------------------------------------------------------ -- ParseState @@ -663,6 +733,10 @@ data ParseState arch ids = , _intraJumpTargets :: ![(ArchSegmentOff arch, AbsBlockState (ArchReg arch))] , _newFunctionAddrs :: ![ArchSegmentOff arch] + -- ^ List of candidate functions found when parsing block. + -- + -- Note. In a binary, these could denote the non-executable + -- segments, so they are filtered before traversing. } -- | Code addresses written to memory. @@ -688,11 +762,10 @@ recordWriteStmt arch_info mem regs stmt = do WriteMem _addr repr v | Just Refl <- testEquality repr (addrMemRepr arch_info) -> do withArchConstraints arch_info $ do - let addrs = concretizeAbsCodePointers mem (transferValue regs v) - writtenCodeAddrs %= (addrs ++) + let addrs = identifyConcreteAddresses mem (transferValue regs v) + writtenCodeAddrs %= (filter isExecutableSegOff addrs ++) _ -> return () - ------------------------------------------------------------------------ -- ParseContext @@ -721,126 +794,34 @@ addrMemRepr arch_info = identifyCallTargets :: forall arch ids . (RegisterInfo (ArchReg arch)) - => AbsProcessorState (ArchReg arch) ids + => Memory (ArchAddrWidth arch) + -> AbsBlockState (ArchReg arch) -- ^ Abstract processor state just before call. - -> BVValue arch ids (ArchAddrWidth arch) + -> RegState (ArchReg arch) (Value arch ids) -> [ArchSegmentOff arch] -identifyCallTargets absState ip = do +identifyCallTargets mem absState s = do -- Code pointers from abstract domains. - let mem = absMem absState - let def = concretizeAbsCodePointers mem (transferValue absState ip) - let segOffAddrs :: Maybe (ArchSegmentOff arch) -> [ArchSegmentOff arch] - segOffAddrs (Just addr) - | segmentFlags (msegSegment addr) `Perm.hasPerm` Perm.execute = - [addr] - segOffAddrs _ = [] - case ip of - BVValue _ x -> segOffAddrs $ resolveAbsoluteAddr mem (fromInteger x) - RelocatableValue _ a -> segOffAddrs $ asSegmentOff mem a + let def = identifyConcreteAddresses mem (absState^.absRegState^.curIP) + case s^.boundValue ip_reg of + BVValue _ x -> + maybeToList $ resolveAbsoluteAddr mem (fromInteger x) + RelocatableValue _ a -> + maybeToList $ asSegmentOff mem a SymbolValue{} -> def AssignedValue a -> case assignRhs a of -- See if we can get a value out of a concrete memory read. ReadMem addr (BVMemRepr _ end) | Just laddr <- valueAsMemAddr addr - , Right val <- readAddr mem end laddr -> - segOffAddrs (asSegmentOff mem val) ++ def + , Right val <- readSegmentOff mem end laddr -> + val : def _ -> def Initial _ -> def -sliceMemContents' - :: MemWidth w - => Int -- ^ Number of bytes in each slice. - -> [[SegmentRange w]] -- ^ Previous slices - -> Integer -- ^ Number of slices to return - -> [SegmentRange w] -- ^ Ranges to process next - -> Either (DropError w) ([[SegmentRange w]],[SegmentRange w]) -sliceMemContents' stride prev c next - | c <= 0 = pure (reverse prev, next) - | otherwise = - case splitSegmentRangeList next stride of - Left e -> Left e - Right (this, rest) -> sliceMemContents' stride (this:prev) (c-1) rest - --- | `sliceMemContents stride cnt contents` splits contents up into `cnt` --- memory regions each with size `stride`. -sliceMemContents - :: MemWidth w - => Int -- ^ Number of bytes in each slice. - -> Integer -- ^ Number of slices to return - -> [SegmentRange w] -- ^ Ranges to process next - -> Either (DropError w) ([[SegmentRange w]],[SegmentRange w]) -sliceMemContents stride c next = sliceMemContents' stride [] c next - --- `getJumpTableContents base cnt stride` returns a list with -getJumpTableContents :: MemWidth w - => MemSegmentOff w - -> Integer - -> Integer - -> Maybe [[SegmentRange w]] -getJumpTableContents base cnt stride = do - let totalSize = cnt * stride - when (msegByteCountAfter base < totalSize) $ - Nothing - contents <- - case contentsAfterSegmentOff base of - Left _ -> Nothing - Right l -> pure l - case sliceMemContents (fromInteger stride) cnt contents of - Left _ -> Nothing - Right (s,_) -> Just s - --- This function resolves jump table entries. --- It is a recursive function that has an index into the jump table. --- If the current index can be interpreted as a intra-procedural jump, --- then it will add that to the current procedure. --- This returns the last address read. -resolveJumps :: forall arch ids - . ( MemWidth (ArchAddrWidth arch) - , IPAlignment arch - , RegisterInfo (ArchReg arch) - ) - => Memory (ArchAddrWidth arch) - -> JumpTable arch ids - -> [[SegmentRange (ArchAddrWidth arch)]] - -> Maybe [ArchSegmentOff arch] -resolveJumps mem (AbsoluteJumpTable arrayRead) slices = do - BVMemRepr _arByteCount endianness <- pure $ arSize arrayRead - - forM slices $ \l -> do - case l of - [ByteRegion bs] -> do - val <- addrRead endianness bs - tgt <- asSegmentOff mem (toIPAligned @arch (absoluteAddr val)) - unless (Perm.isExecutable (segmentFlags (msegSegment tgt))) $ Nothing - pure tgt - [RelocationRegion r] -> do - let off = relocationOffset r - when (relocationIsRel r) $ Nothing - case relocationSym r of - SymbolRelocation{} -> Nothing - SectionIdentifier idx -> do - addr <- Map.lookup idx (memSectionAddrMap mem) - incSegmentOff addr (toInteger off) - _ -> Nothing -resolveJumps mem (RelativeJumpTable base arrayRead ext) slices = do - BVMemRepr sz endianness <- pure $ arSize arrayRead - let readFn - | Just Refl <- testEquality sz (knownNat :: NatRepr 4) = - extendDyn (knownNat :: NatRepr 32) ext . bsWord32 endianness - | Just Refl <- testEquality sz (knownNat :: NatRepr 8) = - extendDyn (knownNat :: NatRepr 64) ext . bsWord64 endianness - | otherwise = - error "Do not support this width." - forM slices $ \l -> do - case l of - [ByteRegion bs] - | tgtAddr <- relativeSegmentAddr base - & incAddr (readFn (BS.take (fromInteger (natValue sz)) bs)) - , Just tgt <- asSegmentOff mem (toIPAligned @arch tgtAddr) - , Perm.isExecutable (segmentFlags (msegSegment tgt)) - -> Just tgt - _ -> Nothing +addNewFunctionAddrs :: [ArchSegmentOff arch] + -> State (ParseState arch ids) () +addNewFunctionAddrs addrs = + newFunctionAddrs %= (++addrs) -- | This parses a block that ended with a fetch and execute instruction. parseFetchAndExecute :: forall arch ids @@ -896,8 +877,8 @@ parseFetchAndExecute ctx idx stmts regs s = do -- Merge caller return information intraJumpTargets %= ((ret, postCallAbsState ainfo abst ret):) -- Use the abstract domain to look for new code pointers for the current IP. - let addrs = identifyCallTargets absProcState' (s^.boundValue ip_reg) - newFunctionAddrs %= (++ addrs) + addNewFunctionAddrs $ + identifyCallTargets mem abst s -- Use the call-specific code to look for new IPs. let r = StatementList { stmtsIdent = idx @@ -953,16 +934,8 @@ parseFetchAndExecute ctx idx stmts regs s = do } pure (ret, idx+1) -- Block ends with what looks like a jump table. - | Just jt <- matchJumpTable mem absProcState' (s^.curIP) - , Some arrayRead <- jumpTableRead jt - , Right maxIdx <- getJumpTableBounds absProcState' arrayRead - , Just slices <- - getJumpTableContents (arBase arrayRead) - (toInteger maxIdx+1) - (arStride arrayRead) - -- Read addresses - , Just readAddrs <- - resolveJumps (pctxMemory ctx) jt slices -> do + | Just (_jt, entries, jumpIndex) <- matchJumpTableRef mem absProcState' (s^.curIP) -> do + mapM_ (recordWriteStmt ainfo mem absProcState') stmts let abst :: AbsBlockState (ArchReg arch) @@ -970,11 +943,11 @@ parseFetchAndExecute ctx idx stmts regs s = do seq abst $ do - forM_ readAddrs $ \tgtAddr -> do + forM_ entries $ \tgtAddr -> do let abst' = abst & setAbsIP tgtAddr intraJumpTargets %= ((tgtAddr, abst'):) - let term = ParsedLookupTable s (arIx arrayRead) (V.fromList readAddrs) + let term = ParsedLookupTable s jumpIndex entries let ret = StatementList { stmtsIdent = idx , stmtsNonterm = stmts , stmtsTerm = term @@ -982,20 +955,20 @@ parseFetchAndExecute ctx idx stmts regs s = do } pure (ret,idx+1) - -- Check for tail call (anything where we are right at stack height) + -- Check for tail call when the stack pointer points to the return address. -- -- TODO: this makes sense for x86, but is not correct for all architectures | ptrType <- addrMemRepr ainfo , sp_val <- s^.boundValue sp_reg , ReturnAddr <- absEvalReadMem absProcState' sp_val ptrType -> do - (,idx+1) <$> finishWithTailCall absProcState' + finishWithTailCall absProcState' -- Is this a jump to a known function entry? We're already past the -- "identifyCall" case, so this must be a tail call, assuming we trust our -- known function entry info. | Just tgt_mseg <- valueAsSegmentOff mem (s^.boundValue ip_reg) , tgt_mseg `Set.member` pctxKnownFnEntries ctx -> do - (,idx+1) <$> finishWithTailCall absProcState' + finishWithTailCall absProcState' -- Block that ends with some unknown | otherwise -> do @@ -1009,7 +982,7 @@ parseFetchAndExecute ctx idx stmts regs s = do where finishWithTailCall :: RegisterInfo (ArchReg arch) => AbsProcessorState (ArchReg arch) ids - -> State (ParseState arch ids) (StatementList arch ids) + -> State (ParseState arch ids) (StatementList arch ids, Word64) finishWithTailCall absProcState' = do let mem = pctxMemory ctx mapM_ (recordWriteStmt (pctxArchInfo ctx) mem absProcState') stmts @@ -1019,14 +992,15 @@ parseFetchAndExecute ctx idx stmts regs s = do seq abst $ do -- Look for new instruction pointers - let addrs = concretizeAbsCodePointers mem (abst^.absRegState^.curIP) - newFunctionAddrs %= (++ addrs) + addNewFunctionAddrs $ + identifyConcreteAddresses mem (abst^.absRegState^.curIP) - pure StatementList { stmtsIdent = idx - , stmtsNonterm = stmts - , stmtsTerm = ParsedCall s Nothing - , stmtsAbsState = absProcState' - } + let ret = StatementList { stmtsIdent = idx + , stmtsNonterm = stmts + , stmtsTerm = ParsedCall s Nothing + , stmtsAbsState = absProcState' + } + seq ret $ pure (ret,idx+1) -- | this evalutes the statements in a block to expand the information known -- about control flow targets of this block. @@ -1410,16 +1384,22 @@ data DiscoveryOptions , exploreCodeAddrInMem :: !Bool -- ^ If @True@, 'completeDiscoveryState' will -- explore all potential code addresses in - -- memory after exploring other potnetial + -- memory after exploring other potential -- functions. + -- + -- This is effectively a hack that sometimes + -- allows discovering functions. If you need + -- it, let the author's of Macaw know so that + -- we can find a more principled way. , logAtAnalyzeFunction :: !Bool - -- ^ Print a message each time we apply - -- discovery analysis to a new function. + -- ^ Print a message each time we apply + -- discovery analysis to a new function. , logAtAnalyzeBlock :: !Bool - -- ^ Print a message each time we analyze a - -- block within a function. - } + -- ^ Print a message each time we analyze a + -- block within a function. + } +-- | Some default options defaultDiscoveryOptions :: DiscoveryOptions defaultDiscoveryOptions = DiscoveryOptions { exploreFunctionSymbols = True @@ -1504,6 +1484,7 @@ completeDiscoveryState initState disOpt funPred = do postPhase1Discovery <- resolveFuns analyzeFn analyzeBlock postSymState -- Discovery functions from memory if exploreCodeAddrInMem disOpt then do + -- Execute hack of just searching for pointers in memory. let mem_contents = withArchConstraints ainfo $ memAsAddrPairs mem LittleEndian resolveFuns analyzeFn analyzeBlock $ postPhase1Discovery & exploreMemPointers mem_contents else diff --git a/base/src/Data/Macaw/Memory.hs b/base/src/Data/Macaw/Memory.hs index 7930a047..cc7649ac 100644 --- a/base/src/Data/Macaw/Memory.hs +++ b/base/src/Data/Macaw/Memory.hs @@ -18,6 +18,7 @@ some value while regions define a unknown offset in memory. {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE TypeOperators #-} module Data.Macaw.Memory ( Memory @@ -113,6 +114,7 @@ module Data.Macaw.Memory , addrContentsAfter , readByteString , readAddr + , readSegmentOff , readWord8 , readWord16be , readWord16le @@ -133,7 +135,6 @@ module Data.Macaw.Memory , AddrSymMap ) where -import Control.Exception (assert) import Control.Monad import Data.BinarySymbols import Data.Bits @@ -165,6 +166,8 @@ data AddrWidthRepr w | (w ~ 64) => Addr64 -- ^ A 64-bit address +deriving instance Show (AddrWidthRepr w) + instance TestEquality AddrWidthRepr where testEquality Addr32 Addr32 = Just Refl testEquality Addr64 Addr64 = Just Refl @@ -294,8 +297,8 @@ class (1 <= w) => MemWidth w where -- The argument is ignored. addrWidthRepr :: p w -> AddrWidthRepr w - -- | @addrWidthMod w@ returns @2^(8 * addrSize w - 1)@. - addrWidthMod :: p w -> Word64 + -- | @addrWidthMask w@ returns @2^(8 * addrSize w) - 1@. + addrWidthMask :: p w -> Word64 -- | Returns number of bytes in addr. -- @@ -322,7 +325,7 @@ addrBitSize w = 8 * addrSize w -- | Convert word64 @x@ into mem word @x mod 2^w-1@. memWord :: forall w . MemWidth w => Word64 -> MemWord w -memWord x = MemWord (x .&. addrWidthMod p) +memWord x = MemWord (x .&. addrWidthMask p) where p :: Proxy w p = Proxy @@ -364,11 +367,11 @@ instance MemWidth w => Integral (MemWord w) where instance MemWidth w => Bounded (MemWord w) where minBound = 0 - maxBound = MemWord (addrWidthMod (Proxy :: Proxy w)) + maxBound = MemWord (addrWidthMask (Proxy :: Proxy w)) instance MemWidth 32 where addrWidthRepr _ = Addr32 - addrWidthMod _ = 0xffffffff + addrWidthMask _ = 0xffffffff addrRotate (MemWord w) i = MemWord (fromIntegral ((fromIntegral w :: Word32) `rotate` i)) addrSize _ = 4 @@ -378,7 +381,7 @@ instance MemWidth 32 where instance MemWidth 64 where addrWidthRepr _ = Addr64 - addrWidthMod _ = 0xffffffffffffffff + addrWidthMask _ = 0xffffffffffffffff addrRotate (MemWord w) i = MemWord (w `rotate` i) addrSize _ = 8 addrRead e s @@ -976,17 +979,23 @@ memAsAddrPairs :: Memory w -> [(MemSegmentOff w, MemSegmentOff w)] memAsAddrPairs mem end = addrWidthClass (memAddrWidth mem) $ do seg <- memSegments mem - (contents_offset,r) <- contentsRanges (segmentContents seg) - let sz = addrSize mem + (contentsOffset,r) <- contentsRanges (segmentContents seg) + let sz :: Int + sz = addrSize mem case r of - ByteRegion bs -> assert (BS.length bs `rem` fromIntegral sz == 0) $ do - (off,w) <- - zip [contents_offset..] - (regularChunks (fromIntegral sz) bs) + ByteRegion bs -> do + -- contentsOffset + -- Check offset if a multiple + let mask = sz - 1 + when (BS.length bs .&. mask /= 0) $ + error "Unexpected offset." + (byteOff,w) <- + zip [contentsOffset,contentsOffset+fromIntegral sz..] + (regularChunks sz bs) let Just val = addrRead end w case resolveAbsoluteAddr mem val of Just val_ref -> do - pure (MemSegmentOff seg off, val_ref) + pure (MemSegmentOff seg byteOff, val_ref) _ -> [] RelocationRegion{} -> [] BSSRegion{} -> [] @@ -1344,6 +1353,23 @@ readAddr mem end addr = addrWidthClass (memAddrWidth mem) $ do Just val -> Right $ MemAddr 0 val Nothing -> error $ "readAddr internal error: readByteString result too short." +-- | Read the given address as a reference to a memory segment offset, or report a +-- memory read error. +readSegmentOff :: Memory w + -> Endianness + -> MemAddr w + -> Either (MemoryError w) (MemSegmentOff w) +readSegmentOff mem end addr = addrWidthClass (memAddrWidth mem) $ do + let sz = fromIntegral (addrSize addr) + bs <- readByteString mem addr sz + case addrRead end bs of + Just val -> do + let addrInMem = MemAddr 0 val + case asSegmentOff mem addrInMem of + Just res -> pure res + Nothing -> Left (InvalidAddr addrInMem) + Nothing -> error $ "readSegmentOff internal error: readByteString result too short." + -- | Read a single byte. readWord8 :: Memory w -> MemAddr w -> Either (MemoryError w) Word8 readWord8 mem addr = bsWord8 <$> readByteString mem addr 1