split blocks when we discover a jump into their middle

2024-12-28 08:34:23 +03:00 · 2018-01-16 15:05:52 -08:00 · 2018-01-16 15:05:52 -08:00 · fc76fd5dc0
commit fc76fd5dc0
parent d61e6d4a7f
3 changed files with 30 additions and 6 deletions
--- a/base/src/Data/Macaw/Discovery.hs
+++ b/base/src/Data/Macaw/Discovery.hs
@ -260,6 +260,9 @@ data FoundAddr arch
                 -- ^ The abstract state formed from post-states that reach this address.
               }

+foundReasonL :: Lens' (FoundAddr arch) (CodeAddrReason (ArchAddrWidth arch))
+foundReasonL = lens foundReason (\old new -> old { foundReason = new })
+
 ------------------------------------------------------------------------
 -- FunState

@ -291,6 +294,26 @@ curFunBlocks = lens _curFunBlocks (\s v -> s { _curFunBlocks = v })
 foundAddrs :: Simple Lens (FunState arch s ids) (Map (ArchSegmentOff arch) (FoundAddr arch))
 foundAddrs = lens _foundAddrs (\s v -> s { _foundAddrs = v })

+-- | Add a block to the current function blocks. If this overlaps with an
+-- existing block, split them so that there's no overlap.
+addFunBlock ::
+    MemWidth (RegAddrWidth (ArchReg arch)) =>
+    ArchSegmentOff arch ->
+    ParsedBlock arch ids ->
+    FunState arch s ids ->
+    FunState arch s ids
+addFunBlock segment block s = case Map.lookupLT segment (s ^. curFunBlocks) of
+    Just (bSegment, bBlock)
+           -- if we're in the same segment
+        |  msegSegment bSegment == msegSegment segment
+           -- and the blocks overlap
+        && msegOffset bSegment + blockSize bBlock >= msegOffset segment
+           -- then put the overlapped segment back in the frontier
+        -> s & curFunBlocks %~ (Map.insert segment block . Map.delete bSegment)
+             & foundAddrs.at bSegment._Just.foundReasonL %~ SplitAt segment
+             & frontier %~ Set.insert bSegment
+    _ -> s & curFunBlocks %~ Map.insert segment block
+
 type ReverseEdgeMap arch = Map (ArchSegmentOff arch) (Set (ArchSegmentOff arch))

 -- | Maps each code address to the list of predecessors that
@ -758,7 +781,8 @@ parseBlock ctx b regs = do

 -- | This evalutes the statements in a block to expand the information known
 -- about control flow targets of this block.
-transferBlocks :: ArchSegmentOff arch
+transferBlocks :: MemWidth (RegAddrWidth (ArchReg arch))
+               => ArchSegmentOff arch
                  -- ^ Address of theze blocks
               -> FoundAddr arch
                  -- ^ State leading to explore block
@ -801,7 +825,7 @@ transferBlocks src finfo sz block_map =
                           , blockAbstractState = foundAbstractState finfo
                           , blockStatementList = pblock
                           }
-      curFunBlocks %= Map.insert src pb
+      id %= addFunBlock src pb
      curFunCtx %= markAddrsAsFunction (InWrite src)    (ps^.writtenCodeAddrs)
                .  markAddrsAsFunction (CallTarget src) (ps^.newFunctionAddrs)
      mapM_ (\(addr, abs_state) -> mergeIntraJump src abs_state addr) (ps^.intraJumpTargets)
@ -848,7 +872,7 @@ transfer addr = do
                         , blockAbstractState = foundAbstractState finfo
                         , blockStatementList = stmts
                         }
-    curFunBlocks %= Map.insert addr pb
+    id %= addFunBlock addr pb
   else do
    -- Rewrite returned blocks to simplify expressions

--- a/base/src/Data/Macaw/Discovery/State.hs
+++ b/base/src/Data/Macaw/Discovery/State.hs
@ -76,8 +76,8 @@ data CodeAddrReason w
     -- ^ Identified as an entry point from initial information
   | CodePointerInMem !(MemSegmentOff w)
     -- ^ A code pointer that was stored at the given address.
-   | SplitAt !(MemAddr w)
-     -- ^ Added because the address split this block after it had been disassembled.
+   | SplitAt !(MemSegmentOff w) !(CodeAddrReason w)
+     -- ^ Added because the address split this block after it had been disassembled. Also includes the reason we thought the block should be there before we split it.
   | UserRequest
     -- ^ The user requested that we analyze this address as a function.
  deriving (Eq, Show)
--- a/x86/tests/x64/test-conditional.s.expected
+++ b/x86/tests/x64/test-conditional.s.expected
@ -1,3 +1,3 @@
-R { funcs = [(0x2b1, [(0x2b1, 14), (0x2ce, 16), (0x2bf, 31)])]
+R { funcs = [(0x2b1, [(0x2b1, 14), (0x2ce, 16), (0x2bf, 15)])]
  , ignoreBlocks = [0x2de]
  }