diff --git a/base/macaw-base.cabal b/base/macaw-base.cabal index 3c43a545..9557cc4c 100644 --- a/base/macaw-base.cabal +++ b/base/macaw-base.cabal @@ -39,7 +39,7 @@ library IntervalMap >= 0.5, lens >= 4.7, mtl, - parameterized-utils >= 0.1.6, + parameterized-utils >= 1.0.1, text, vector, QuickCheck >= 2.7 diff --git a/base/src/Data/Macaw/Architecture/Info.hs b/base/src/Data/Macaw/Architecture/Info.hs index 2c6b977d..85d03025 100644 --- a/base/src/Data/Macaw/Architecture/Info.hs +++ b/base/src/Data/Macaw/Architecture/Info.hs @@ -14,7 +14,6 @@ module Data.Macaw.Architecture.Info -- * Unclassified blocks , module Data.Macaw.CFG.Block , rewriteBlock - , disassembleAndRewrite ) where import Control.Monad.ST @@ -187,12 +186,3 @@ rewriteBlock info rwctx b = do , blockStmts = tgtStmts , blockTerm = tgtTermStmt } - --- | Translate code into blocks and simplify the resulting blocks. -disassembleAndRewrite :: ArchitectureInfo arch -> DisassembleFn arch -disassembleAndRewrite ainfo mem nonceGen addr max_size ab = - withArchConstraints ainfo $ do - (bs0,sz, maybeError) <- disassembleFn ainfo mem nonceGen addr max_size ab - ctx <- mkRewriteContext nonceGen (rewriteArchFn ainfo) (rewriteArchStmt ainfo) - bs1 <- traverse (rewriteBlock ainfo ctx) bs0 - pure (bs1, sz, maybeError) diff --git a/base/src/Data/Macaw/CFG/App.hs b/base/src/Data/Macaw/CFG/App.hs index 80350fe0..2e74aab4 100644 --- a/base/src/Data/Macaw/CFG/App.hs +++ b/base/src/Data/Macaw/CFG/App.hs @@ -1,5 +1,5 @@ {-| -Copyright : (c) Galois, Inc 2015-2017 +Copyright : (c) Galois, Inc 2015-2018 Maintainer : Joe Hendrix This defines a data type `App` for representing operations that can be diff --git a/base/src/Data/Macaw/CFG/Core.hs b/base/src/Data/Macaw/CFG/Core.hs index b77f9302..ea24f1fa 100644 --- a/base/src/Data/Macaw/CFG/Core.hs +++ b/base/src/Data/Macaw/CFG/Core.hs @@ -36,7 +36,6 @@ module Data.Macaw.CFG.Core , valueAsMemAddr , valueAsSegmentOff , valueAsStaticMultiplication - , asLiteralAddr , asBaseOffset , asInt64Constant , IPAlignment(..) @@ -189,7 +188,7 @@ data Value arch ids tp where RelocatableValue :: !(AddrWidthRepr (ArchAddrWidth arch)) -> !(ArchMemAddr arch) -> Value arch ids (BVType (ArchAddrWidth arch)) - -- | Reference to a symbol identifier. + -- | This denotes the address of a symbol identifier in the binary. -- -- This appears when dealing with relocations. SymbolValue :: !(AddrWidthRepr (ArchAddrWidth arch)) @@ -294,6 +293,12 @@ mkLit n v = BVValue n (v .&. mask) bvValue :: (KnownNat n, 1 <= n) => Integer -> Value arch ids (BVType n) bvValue i = mkLit knownNat i +-- | Return the right-hand side if this is an assignment. +valueAsRhs :: Value arch ids tp -> Maybe (AssignRhs arch (Value arch ids) tp) +valueAsRhs (AssignedValue (Assignment _ v)) = Just v +valueAsRhs _ = Nothing + +-- | Return the value evaluated if this is from an `App`. valueAsApp :: Value arch ids tp -> Maybe (App (Value arch ids) tp) valueAsApp (AssignedValue (Assignment _ (EvalApp a))) = Just a valueAsApp _ = Nothing @@ -303,10 +308,6 @@ valueAsArchFn :: Value arch ids tp -> Maybe (ArchFn arch (Value arch ids) tp) valueAsArchFn (AssignedValue (Assignment _ (EvalArchFn a _))) = Just a valueAsArchFn _ = Nothing -valueAsRhs :: Value arch ids tp -> Maybe (AssignRhs arch (Value arch ids) tp) -valueAsRhs (AssignedValue (Assignment _ v)) = Just v -valueAsRhs _ = Nothing - -- | This returns a segmented address if the value can be interpreted as a literal memory -- address, and returns nothing otherwise. valueAsMemAddr :: MemWidth (ArchAddrWidth arch) @@ -333,13 +334,6 @@ valueAsStaticMultiplication v = Just (2^shl, l') | otherwise = Nothing -asLiteralAddr :: MemWidth (ArchAddrWidth arch) - => BVValue arch ids (ArchAddrWidth arch) - -> Maybe (ArchMemAddr arch) -asLiteralAddr = valueAsMemAddr - -{-# DEPRECATED asLiteralAddr "Use valueAsMemAddr" #-} - -- | Returns a segment offset associated with the value if one can be defined. valueAsSegmentOff :: Memory (ArchAddrWidth arch) -> BVValue arch ids (ArchAddrWidth arch) diff --git a/base/src/Data/Macaw/CFG/Rewriter.hs b/base/src/Data/Macaw/CFG/Rewriter.hs index b7922c09..a6086ccb 100644 --- a/base/src/Data/Macaw/CFG/Rewriter.hs +++ b/base/src/Data/Macaw/CFG/Rewriter.hs @@ -23,9 +23,11 @@ module Data.Macaw.CFG.Rewriter ) where import Control.Lens -import Control.Monad.State.Strict import Control.Monad.ST +import Control.Monad.State.Strict import Data.Bits +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map import Data.Parameterized.Map (MapF) import qualified Data.Parameterized.Map as MapF import Data.Parameterized.NatRepr @@ -40,14 +42,22 @@ import Data.Macaw.Types data RewriteContext arch s src tgt = RewriteContext { rwctxNonceGen :: !(NonceGenerator (ST s) tgt) -- ^ Generator for making new nonces in the target ST monad - , rwctxArchFn :: !(forall tp - . ArchFn arch (Value arch src) tp - -> Rewriter arch s src tgt (Value arch tgt tp)) + , rwctxArchFn + :: !(forall tp + . ArchFn arch (Value arch src) tp + -> Rewriter arch s src tgt (Value arch tgt tp)) + -- ^ Rewriter for architecture-specific functions + , rwctxArchStmt + :: !(ArchStmt arch (Value arch src) -> Rewriter arch s src tgt ()) -- ^ Rewriter for architecture-specific statements - , rwctxArchStmt :: !(ArchStmt arch (Value arch src) -> Rewriter arch s src tgt ()) - -- ^ Rewriter for architecture-specific statements - , rwctxConstraints :: (forall a . (RegisterInfo (ArchReg arch) => a) -> a) + , rwctxConstraints + :: !(forall a . (RegisterInfo (ArchReg arch) => a) -> a) -- ^ Constraints needed during rewriting. + , rwctxSectionAddrMap + :: !(Map SectionIndex (ArchSegmentOff arch)) + -- ^ Map from section indices to the address they are loaded at + -- if any. + -- This is used to replace section references with their address. , rwctxCache :: !(STRef s (MapF (AssignId src) (Value arch tgt))) -- ^ A reference to a map from source assignment -- identifiers to the updated value. @@ -67,13 +77,16 @@ mkRewriteContext :: RegisterInfo (ArchReg arch) -> Rewriter arch s src tgt (Value arch tgt tp)) -> (ArchStmt arch (Value arch src) -> Rewriter arch s src tgt ()) + -> Map SectionIndex (ArchSegmentOff arch) + -- ^ Map from loaded section indices to their address. -> ST s (RewriteContext arch s src tgt) -mkRewriteContext nonceGen archFn archStmt = do +mkRewriteContext nonceGen archFn archStmt secAddrMap = do ref <- newSTRef MapF.empty pure $! RewriteContext { rwctxNonceGen = nonceGen , rwctxArchFn = archFn , rwctxArchStmt = archStmt , rwctxConstraints = \a -> a + , rwctxSectionAddrMap = secAddrMap , rwctxCache = ref } @@ -266,18 +279,22 @@ rewriteApp app = do pure x BVAdd w (BVValue _ x) (BVValue _ y) -> do pure (BVValue w (toUnsigned w (x + y))) - -- Move constant to right + -- If first argument is constant and second is not, then commute. BVAdd w (BVValue _ x) y -> do - rewriteApp (BVAdd w y (BVValue w x)) + rewriteApp $ BVAdd w y (BVValue w x) -- (x + yc) + zc -> x + (yc + zc) BVAdd w (valueAsApp -> Just (BVAdd _ x (BVValue _ yc))) (BVValue _ zc) -> do - rewriteApp (BVAdd w x (BVValue w (toUnsigned w (yc + zc)))) + rewriteApp $ BVAdd w x (BVValue w (toUnsigned w (yc + zc))) -- (x - yc) + zc -> x + (zc - yc) BVAdd w (valueAsApp -> Just (BVSub _ x (BVValue _ yc))) (BVValue _ zc) -> do - rewriteApp (BVAdd w x (BVValue w (toUnsigned w (zc - yc)))) + rewriteApp $ BVAdd w x (BVValue w (toUnsigned w (zc - yc))) -- (xc - y) + zc => (xc + zc) - y BVAdd w (valueAsApp -> Just (BVSub _ (BVValue _ xc) y)) (BVValue _ zc) -> do - rewriteApp (BVSub w (BVValue w (toUnsigned w (xc + zc))) y) + rewriteApp $ BVSub w (BVValue w (toUnsigned w (xc + zc))) y + + -- Increment address by a constant. + BVAdd _ (RelocatableValue r a) (BVValue _ c) -> + pure $ RelocatableValue r (incAddr c a) -- addr a + (c - addr b) => c + (addr a - addr b) BVAdd w (RelocatableValue _ a) (valueAsApp -> Just (BVSub _ c (RelocatableValue _ b))) @@ -553,12 +570,18 @@ rewriteAssignRhs rhs = ReadMem addr repr -> do tgtAddr <- rewriteValue addr evalRewrittenRhs (ReadMem tgtAddr repr) - CondReadMem repr cond addr def -> do - rhs' <- CondReadMem repr - <$> rewriteValue cond - <*> rewriteValue addr - <*> rewriteValue def - evalRewrittenRhs rhs' + CondReadMem repr cond0 addr0 def0 -> do + cond <- rewriteValue cond0 + addr <- rewriteValue addr0 + case () of + _ | BoolValue b <- cond -> + if b then + evalRewrittenRhs (ReadMem addr repr) + else + rewriteValue def0 + _ -> do + def <- rewriteValue def0 + evalRewrittenRhs (CondReadMem repr cond addr def) EvalArchFn archFn _repr -> do f <- Rewriter $ gets $ rwctxArchFn . rwContext f archFn @@ -569,7 +592,16 @@ rewriteValue v = BoolValue b -> pure (BoolValue b) BVValue w i -> pure (BVValue w i) RelocatableValue w a -> pure (RelocatableValue w a) - SymbolValue w a -> pure (SymbolValue w a) + SymbolValue repr sym -> do + ctx <- Rewriter $ gets rwContext + rwctxConstraints ctx $ do + let secIdxAddrMap = rwctxSectionAddrMap ctx + case sym of + SectionIdentifier secIdx + | Just val <- Map.lookup secIdx secIdxAddrMap -> do + pure $! RelocatableValue repr (relativeSegmentAddr val) + _ -> do + pure $! SymbolValue repr sym AssignedValue (Assignment aid _) -> Rewriter $ do ref <- gets $ rwctxCache . rwContext srcMap <- lift $ readSTRef ref diff --git a/base/src/Data/Macaw/Discovery.hs b/base/src/Data/Macaw/Discovery.hs index 37525988..85526a94 100644 --- a/base/src/Data/Macaw/Discovery.hs +++ b/base/src/Data/Macaw/Discovery.hs @@ -19,20 +19,16 @@ This provides information about code discovered in binaries. {-# LANGUAGE TypeOperators #-} {-# LANGUAGE ViewPatterns #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE StandaloneDeriving #-} -{-# LANGUAGE AllowAmbiguousTypes #-} +{-# LANGUAGE TupleSections #-} {-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeFamilies #-} module Data.Macaw.Discovery ( -- * DiscoveryInfo - State.DiscoveryState + State.DiscoveryState(..) , State.emptyDiscoveryState - , State.archInfo - , State.memory , State.funInfo , State.exploredFunctions - , State.symbolNames , State.ppDiscoveryStateBlocks , State.unexploredFunctions , Data.Macaw.Discovery.cfgFromAddrs @@ -70,6 +66,7 @@ import Control.Applicative import Control.Lens import Control.Monad.ST import Control.Monad.State.Strict +import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BSC import Data.Foldable import Data.Map.Strict (Map) @@ -88,8 +85,6 @@ import Data.Word import GHC.IO (ioToST, stToIO) import System.IO -import Debug.Trace - import Data.Macaw.AbsDomain.AbsState import qualified Data.Macaw.AbsDomain.JumpBounds as Jmp import Data.Macaw.AbsDomain.Refine @@ -97,6 +92,7 @@ import qualified Data.Macaw.AbsDomain.StridedInterval as SI import Data.Macaw.Architecture.Info import Data.Macaw.CFG import Data.Macaw.CFG.DemandSet +import Data.Macaw.CFG.Rewriter import Data.Macaw.DebugLogging import Data.Macaw.Discovery.AbsEval import Data.Macaw.Discovery.State as State @@ -353,7 +349,6 @@ mergeIntraJump :: ArchSegmentOff arch -- ^ Address we are trying to reach. -> FunM arch s ids () mergeIntraJump src ab tgt = do --- trace ("mergeIntraJump " ++ show src ++ " " ++ show tgt) $ do info <- uses curFunCtx archInfo withArchConstraints info $ do when (not (absStackHasReturnAddr ab)) $ do @@ -387,62 +382,65 @@ mergeIntraJump src ab tgt = do -- | A memory read that looks like array indexing. It read 'arSize' bytes from -- the address given by 'arBase' + 'arIx'*'arStride'. -data ArrayRead arch ids = forall w. ArrayRead +data ArrayRead arch ids w = ArrayRead { arBase :: ArchSegmentOff arch , arIx :: ArchAddrValue arch ids , arStride :: Integer , arSize :: MemRepr (BVType w) + -- ^ Type of element in this array. } -deriving instance RegisterInfo (ArchReg arch) => Show (ArrayRead arch ids) +deriving instance RegisterInfo (ArchReg arch) => Show (ArrayRead arch ids w) --- | Same as 'arSize', but less typed. -arSizeBytes :: ArrayRead arch ids -> Integer -arSizeBytes (ArrayRead { arSize = s }) = memReprBytes s +-- | Return true if the address stored is readable and not writable. +isReadOnlyArrayRead :: ArrayRead arch ids w -> Bool +isReadOnlyArrayRead = Perm.isReadonly . segmentFlags . msegSegment . arBase -data Extension = Signed | Unsigned deriving (Bounded, Enum, Eq, Ord, Read, Show) +-- | Number of bytes of size. +arSizeBytes :: ArrayRead arch ids w -> Integer +arSizeBytes = memReprBytes . arSize + +------------------------------------------------------------------------ +-- Extension + +-- | Used to denote how a value should be extended to a full address. +data Extension = Signed | Unsigned + deriving (Bounded, Enum, Eq, Ord, Read, Show) + +-- | `extendDyn w ext v` treats `v` as a `w`-bit number and returns the underlying +extendDyn :: (1 <= w, Integral x) => NatRepr w -> Extension -> x ->Integer +extendDyn _ Unsigned = toInteger +extendDyn w Signed = toSigned w . toInteger + +------------------------------------------------------------------------ +-- JumpTable -extendDyn :: MemRepr (BVType w) -> Maybe Extension -> MemWord w -> Maybe Integer -extendDyn (BVMemRepr size _) ext w = case ext of - Nothing -> Just (memWordInteger w) - Just Unsigned -> Just (memWordInteger w) - Just Signed | Just Refl <- testEquality size (knownNat :: NatRepr 4) -> Just (memWordSigned w) - | Just Refl <- testEquality size (knownNat :: NatRepr 8) -> Just (memWordSigned w) - _ -> Nothing -- Beware: on some architectures, after reading from the jump table, the -- resulting addresses must be aligned. See the IPAlignment class. data JumpTable arch ids - -- the result of the array read gives the address to jump to - = Absolute (ArrayRead arch ids) (Maybe Extension) - -- the result of the array read gives an offset from the given base address - -- (typically the base address and the array read's arBase will be identical) - | Relative (ArchSegmentOff arch) (ArrayRead arch ids) (Maybe Extension) + = AbsoluteJumpTable (ArrayRead arch ids (ArchAddrWidth arch)) + -- | `RelativeJumpTable base read ext` describes information about a jump table read. + -- + -- The value is computed as `baseVal + readVal` where + -- + -- `baseVal = fromMaybe 0 base`, `readVal` is the value stored at the memory + -- read described by `read` with the sign of `ext`. + | forall w . RelativeJumpTable (ArchSegmentOff arch) (ArrayRead arch ids w) Extension deriving instance RegisterInfo (ArchReg arch) => Show (JumpTable arch ids) -- | The array read done when computing the jump table. N.B. other processing -- may be needed on the value read in this way to know the address to jump to. -jumpTableRead :: JumpTable arch ids -> ArrayRead arch ids -jumpTableRead (Absolute r _) = r -jumpTableRead (Relative _ r _) = r +jumpTableRead :: JumpTable arch ids -> Some (ArrayRead arch ids) +jumpTableRead (AbsoluteJumpTable r) = Some r +jumpTableRead (RelativeJumpTable _ r _) = Some r -{- --- | After reading from the array, the result may be extended to address width; --- if so, this says how. -jumpTableExtension :: JumpTable arch ids -> Maybe Extension -jumpTableExtension (Absolute _ e) = e -jumpTableExtension (Relative _ _ e) = e --} - -ensure :: Alternative f => (a -> Bool) -> a -> f a -ensure p x = x <$ guard (p x) - -absValueAsSegmentOff :: - forall arch. - Memory (ArchAddrWidth arch) -> - ArchAbsValue arch (BVType (ArchAddrWidth arch)) -> - Maybe (ArchSegmentOff arch) +absValueAsSegmentOff + :: forall w + . Memory w + -> AbsValue w (BVType w) + -> Maybe (MemSegmentOff w) absValueAsSegmentOff mem av = case av of FinSet s | Set.size s == 1 -> resolveAbsoluteIntegerAddr (shead s) CodePointers s False | Set.size s == 1 -> Just (shead s) @@ -453,52 +451,57 @@ absValueAsSegmentOff mem av = case av of shead :: Set a -> a shead = Set.findMin - resolveAbsoluteIntegerAddr :: Integer -> Maybe (ArchSegmentOff arch) + resolveAbsoluteIntegerAddr :: Integer -> Maybe (MemSegmentOff w) resolveAbsoluteIntegerAddr = resolveAbsoluteAddr mem . addrWidthClass (memAddrWidth mem) fromInteger -valueAsSegmentOffWithTransfer :: - forall arch ids. - RegisterInfo (ArchReg arch) => - Memory (ArchAddrWidth arch) -> - AbsProcessorState (ArchReg arch) ids -> - BVValue arch ids (ArchAddrWidth arch) -> - Maybe (ArchSegmentOff arch) -valueAsSegmentOffWithTransfer mem aps v - = valueAsSegmentOff mem v - <|> absValueAsSegmentOff @arch mem (transferValue aps v) +-- | This attempts to interpret a value as a memory segment offset +-- using the memory and abstract interpretation of value. +valueAsSegmentOffWithTransfer + :: forall arch ids + . RegisterInfo (ArchReg arch) + => Memory (ArchAddrWidth arch) + -> AbsProcessorState (ArchReg arch) ids + -> BVValue arch ids (ArchAddrWidth arch) + -> Maybe (ArchSegmentOff arch) +valueAsSegmentOffWithTransfer mem aps base + = valueAsSegmentOff mem base + <|> absValueAsSegmentOff mem (transferValue aps base) -valueAsArrayOffset :: - RegisterInfo (ArchReg arch) => - Memory (ArchAddrWidth arch) -> - AbsProcessorState (ArchReg arch) ids -> - ArchAddrValue arch ids -> - Maybe (ArchSegmentOff arch, ArchAddrValue arch ids) +-- | This interprets a value as a memory segment offset plus value. +valueAsArrayOffset + :: RegisterInfo (ArchReg arch) + => Memory (ArchAddrWidth arch) + -> AbsProcessorState (ArchReg arch) ids + -> ArchAddrValue arch ids + -> Maybe (ArchSegmentOff arch, ArchAddrValue arch ids) valueAsArrayOffset mem aps v - | Just (BVAdd w base offset) <- valueAsApp v - , Just Refl <- testEquality w (memWidth mem) + | Just (BVAdd _ base offset) <- valueAsApp v , Just ptr <- valueAsSegmentOffWithTransfer mem aps base = Just (ptr, offset) -- and with the other argument order - | Just (BVAdd w offset base) <- valueAsApp v - , Just Refl <- testEquality w (memWidth mem) + | Just (BVAdd _ offset base) <- valueAsApp v , Just ptr <- valueAsSegmentOffWithTransfer mem aps base = Just (ptr, offset) | otherwise = Nothing -matchArrayRead, matchReadOnlyArrayRead :: - (MemWidth (ArchAddrWidth arch), RegisterInfo (ArchReg arch)) => - Memory (ArchAddrWidth arch) -> - AbsProcessorState (ArchReg arch) ids -> - BVValue arch ids w -> - Maybe (ArrayRead arch ids) + + +-- | See if the value can be interpreted as a read of memory +matchArrayRead + :: (MemWidth (ArchAddrWidth arch), RegisterInfo (ArchReg arch)) + => Memory (ArchAddrWidth arch) + -> AbsProcessorState (ArchReg arch) ids + -> BVValue arch ids w + -> Maybe (ArrayRead arch ids w) matchArrayRead mem aps val | Just (ReadMem addr size) <- valueAsRhs val , Just (base, offset) <- valueAsArrayOffset mem aps addr , Just (stride, ixVal) <- valueAsStaticMultiplication offset - = Just ArrayRead + , memReprBytes size <= stride + = Just $ ArrayRead { arBase = base , arIx = ixVal , arStride = stride @@ -507,22 +510,24 @@ matchArrayRead mem aps val | otherwise = Nothing -matchReadOnlyArrayRead mem aps val = - matchArrayRead mem aps val >>= - ensure (Perm.isReadonly . segmentFlags . msegSegment . arBase) - -- | Just like Some (BVValue arch ids), but doesn't run into trouble with -- partially applying the BVValue type synonym. data SomeBVValue arch ids = forall tp. SomeBVValue (BVValue arch ids tp) -matchExtension :: BVValue arch ids w -> (Maybe Extension, SomeBVValue arch ids) -matchExtension val - | Just (SExt val' _) <- valueAsApp val = (Just Signed , SomeBVValue val') - | Just (UExt val' _) <- valueAsApp val = (Just Unsigned, SomeBVValue val') - | otherwise = (Nothing, SomeBVValue val) +-- | Identify how value is extended. +matchExtension :: ArchAddrValue arch ids + -> (Extension, SomeBVValue arch ids) +matchExtension offset = + case valueAsApp offset of + Just (SExt val' _) -> (Signed, SomeBVValue val') + Just (UExt val' _) -> (Unsigned, SomeBVValue val') + _ -> (Unsigned, SomeBVValue offset) -- | Figure out if this is a jump table. -matchJumpTable :: (IPAlignment arch, MemWidth (ArchAddrWidth arch), RegisterInfo (ArchReg arch)) +matchJumpTable :: ( IPAlignment arch + , MemWidth (ArchAddrWidth arch) + , RegisterInfo (ArchReg arch) + ) => Memory (ArchAddrWidth arch) -> AbsProcessorState (ArchReg arch) ids -> ArchAddrValue arch ids -- ^ Value that's assigned to the IP. @@ -530,9 +535,9 @@ matchJumpTable :: (IPAlignment arch, MemWidth (ArchAddrWidth arch), RegisterInfo matchJumpTable mem aps ip -- Turn a plain read address into base + offset. - | (ext, SomeBVValue ipShort) <- matchExtension ip - , Just arrayRead <- matchReadOnlyArrayRead mem aps ipShort - = Just (Absolute arrayRead ext) + | Just arrayRead <- matchArrayRead mem aps ip + , isReadOnlyArrayRead arrayRead + = Just (AbsoluteJumpTable arrayRead) -- gcc-style PIC jump tables on x86 use, roughly, -- ip = jmptbl + jmptbl[index] @@ -540,10 +545,11 @@ matchJumpTable mem aps ip | Just unalignedIP <- fromIPAligned ip , Just (tgtBase, tgtOffset) <- valueAsArrayOffset mem aps unalignedIP , (ext, SomeBVValue shortOffset) <- matchExtension tgtOffset - , Just arrayRead <- matchReadOnlyArrayRead mem aps shortOffset - = Just (Relative tgtBase arrayRead ext) - -matchJumpTable _ _ _ = Nothing + , Just arrayRead <- matchArrayRead mem aps shortOffset + , isReadOnlyArrayRead arrayRead + = Just (RelativeJumpTable tgtBase arrayRead ext) + | otherwise + = Nothing -- | This describes why we could not infer the bounds of code that looked like it -- was accessing a jump table. @@ -569,12 +575,11 @@ showJumpTableBoundsError err = -- | Returns the index bounds for a jump table of 'Nothing' if this is -- not a block table. -getJumpTableBounds :: ArchitectureInfo a - -> AbsProcessorState (ArchReg a) ids -- ^ Current processor registers. - -> ArrayRead a ids +getJumpTableBounds :: ArchConstraints a + => AbsProcessorState (ArchReg a) ids -- ^ Current processor registers. + -> ArrayRead a ids w -> Either String (ArchAddrWord a) - -- ^ One past last index in jump table or nothing -getJumpTableBounds info regs arrayRead = withArchConstraints info $ +getJumpTableBounds regs arrayRead = case Jmp.unsignedUpperBound (regs ^. indexBounds) (arIx arrayRead) of Right (Jmp.IntegerUpperBound maxIx) -> let arrayByteSize = maxIx * arStride arrayRead + arSizeBytes arrayRead in @@ -634,9 +639,11 @@ data ParseContext arch ids = ParseContext { pctxMemory :: !(Memory (ArchAddrWidth arch)) , pctxArchInfo :: !(ArchitectureInfo arch) , pctxKnownFnEntries :: !(Set (ArchSegmentOff arch)) - -- ^ Entry addresses for known functions (e.g. from symbol information) - , pctxTrustKnownFns :: !Bool - -- ^ should we use pctxKnownFns in analysis to identify e.g. jump vs. tail calls + -- ^ Entry addresses for known functions (e.g. from + -- symbol information) + -- + -- The discovery process will not create intra-procedural + -- jumps to the entry points of new functions. , pctxFunAddr :: !(ArchSegmentOff arch) -- ^ Address of function this block is being parsed as , pctxAddr :: !(ArchSegmentOff arch) @@ -679,19 +686,99 @@ identifyCallTargets absState ip = do _ -> def Initial _ -> def --- | Read an address using the @MemRepr@ for format information, which should be 4 or 8 bytes. --- Returns 'Left' for sizes other than 4 or 8 bytes. -readMemReprDyn :: Memory w -> MemAddr w -> MemRepr (BVType w') -> Either (MemoryError w) (MemWord w') -readMemReprDyn mem addr (BVMemRepr size endianness) = do - bs <- readByteString mem addr (fromInteger (natValue size)) - case () of - _ | Just Refl <- testEquality size (knownNat :: NatRepr 4) -> do - let Just val = addrRead endianness bs - Right val - | Just Refl <- testEquality size (knownNat :: NatRepr 8) -> do - let Just val = addrRead endianness bs - Right val - | otherwise -> Left $ InvalidSize addr size +sliceMemContents' + :: MemWidth w + => Int -- ^ Number of bytes in each slice. + -> [[SegmentRange w]] -- ^ Previous slices + -> Integer -- ^ Number of slices to return + -> [SegmentRange w] -- ^ Ranges to process next + -> Either (DropError w) ([[SegmentRange w]],[SegmentRange w]) +sliceMemContents' stride prev c next + | c <= 0 = pure (reverse prev, next) + | otherwise = + case splitSegmentRangeList next stride of + Left e -> Left e + Right (this, rest) -> sliceMemContents' stride (this:prev) (c-1) rest + +-- | `sliceMemContents stride cnt contents` splits contents up into `cnt` +-- memory regions each with size `stride`. +sliceMemContents + :: MemWidth w + => Int -- ^ Number of bytes in each slice. + -> Integer -- ^ Number of slices to return + -> [SegmentRange w] -- ^ Ranges to process next + -> Either (DropError w) ([[SegmentRange w]],[SegmentRange w]) +sliceMemContents stride c next = sliceMemContents' stride [] c next + +-- `getJumpTableContents base cnt stride` returns a list with +getJumpTableContents :: MemWidth w + => MemSegmentOff w + -> Integer + -> Integer + -> Maybe [[SegmentRange w]] +getJumpTableContents base cnt stride = do + let totalSize = cnt * stride + when (msegByteCountAfter base < totalSize) $ + Nothing + contents <- + case contentsAfterSegmentOff base of + Left _ -> Nothing + Right l -> pure l + case sliceMemContents (fromInteger stride) cnt contents of + Left _ -> Nothing + Right (s,_) -> Just s + +-- This function resolves jump table entries. +-- It is a recursive function that has an index into the jump table. +-- If the current index can be interpreted as a intra-procedural jump, +-- then it will add that to the current procedure. +-- This returns the last address read. +resolveJumps :: forall arch ids + . ( MemWidth (ArchAddrWidth arch) + , IPAlignment arch + , RegisterInfo (ArchReg arch) + ) + => Memory (ArchAddrWidth arch) + -> JumpTable arch ids + -> [[SegmentRange (ArchAddrWidth arch)]] + -> Maybe [ArchSegmentOff arch] +resolveJumps mem (AbsoluteJumpTable arrayRead) slices = do + BVMemRepr _arByteCount endianness <- pure $ arSize arrayRead + + forM slices $ \l -> do + case l of + [ByteRegion bs] -> do + val <- addrRead endianness bs + tgt <- asSegmentOff mem (toIPAligned @arch (absoluteAddr val)) + unless (Perm.isExecutable (segmentFlags (msegSegment tgt))) $ Nothing + pure tgt + [RelocationRegion r] -> do + let off = relocationOffset r + when (relocationIsRel r) $ Nothing + case relocationSym r of + SymbolRelocation{} -> Nothing + SectionIdentifier idx -> do + addr <- Map.lookup idx (memSectionAddrMap mem) + incSegmentOff addr (toInteger off) + _ -> Nothing +resolveJumps mem (RelativeJumpTable base arrayRead ext) slices = do + BVMemRepr sz endianness <- pure $ arSize arrayRead + let readFn + | Just Refl <- testEquality sz (knownNat :: NatRepr 4) = + extendDyn (knownNat :: NatRepr 32) ext . bsWord32 endianness + | Just Refl <- testEquality sz (knownNat :: NatRepr 8) = + extendDyn (knownNat :: NatRepr 64) ext . bsWord64 endianness + | otherwise = + error "Do not support this width." + forM slices $ \l -> do + case l of + [ByteRegion bs] + | tgtAddr <- relativeSegmentAddr base + & incAddr (readFn (BS.take (fromInteger (natValue sz)) bs)) + , Just tgt <- asSegmentOff mem (toIPAligned @arch tgtAddr) + , Perm.isExecutable (segmentFlags (msegSegment tgt)) + -> Just tgt + _ -> Nothing -- | This parses a block that ended with a fetch and execute instruction. parseFetchAndExecute :: forall arch ids @@ -702,34 +789,61 @@ parseFetchAndExecute :: forall arch ids -> AbsProcessorState (ArchReg arch) ids -- ^ Registers prior to blocks being executed. -> RegState (ArchReg arch) (Value arch ids) - -> State (ParseState arch ids) (StatementList arch ids) -parseFetchAndExecute ctx lbl_idx stmts regs s' = do - let src = pctxAddr ctx - withArchConstraints arch_info $ do + -> State (ParseState arch ids) (StatementList arch ids, Word64) +parseFetchAndExecute ctx idx stmts regs s = do + let mem = pctxMemory ctx + let ainfo= pctxArchInfo ctx + let absProcState' = absEvalStmts ainfo regs stmts + withArchConstraints ainfo $ do -- See if next statement appears to end with a call. -- We define calls as statements that end with a write that -- stores the pc to an address. - let absProcState' = absEvalStmts arch_info regs stmts case () of + _ | Just (Mux _ c t f) <- valueAsApp (s^.boundValue ip_reg) -> do + mapM_ (recordWriteStmt ainfo mem absProcState') stmts + + let l_regs = refineProcStateBounds c True $ + refineProcState c absTrue absProcState' + let l_regs' = absEvalStmts ainfo l_regs stmts + let lState = s & boundValue ip_reg .~ t + (tStmts,trueIdx) <- + parseFetchAndExecute ctx (idx+1) [] l_regs' lState + + let r_regs = refineProcStateBounds c False $ + refineProcState c absFalse absProcState' + let r_regs' = absEvalStmts ainfo r_regs stmts + let rState = s & boundValue ip_reg .~ f + + (fStmts,falseIdx) <- + parseFetchAndExecute ctx trueIdx [] r_regs' rState + + let ret = StatementList { stmtsIdent = idx + , stmtsNonterm = stmts + , stmtsTerm = ParsedIte c tStmts fStmts + , stmtsAbsState = absProcState' + } + pure (ret, falseIdx) + -- The last statement was a call. -- Note that in some cases the call is known not to return, and thus -- this code will never jump to the return value. - _ | Just (prev_stmts, ret) <- identifyCall arch_info mem stmts s' -> do - mapM_ (recordWriteStmt arch_info mem absProcState') prev_stmts - let abst = finalAbsBlockState absProcState' s' + _ | Just (prev_stmts, ret) <- identifyCall ainfo mem stmts s -> do + mapM_ (recordWriteStmt ainfo mem absProcState') prev_stmts + let abst = finalAbsBlockState absProcState' s seq abst $ do -- Merge caller return information - intraJumpTargets %= ((ret, postCallAbsState arch_info abst ret):) + intraJumpTargets %= ((ret, postCallAbsState ainfo abst ret):) -- Use the abstract domain to look for new code pointers for the current IP. - let addrs = identifyCallTargets absProcState' (s'^.boundValue ip_reg) + let addrs = identifyCallTargets absProcState' (s^.boundValue ip_reg) newFunctionAddrs %= (++ addrs) -- Use the call-specific code to look for new IPs. - pure StatementList { stmtsIdent = lbl_idx - , stmtsNonterm = toList prev_stmts - , stmtsTerm = ParsedCall s' (Just ret) - , stmtsAbsState = absProcState' - } + let r = StatementList { stmtsIdent = idx + , stmtsNonterm = toList prev_stmts + , stmtsTerm = ParsedCall s (Just ret) + , stmtsAbsState = absProcState' + } + pure (r, idx+1) -- This block ends with a return as identified by the -- architecture-specific processing. Basic return @@ -742,225 +856,187 @@ parseFetchAndExecute ctx lbl_idx stmts regs s' = do -- (e.g. ARM will clear the low bit in T32 mode or the low 2 -- bits in A32 mode), so the actual detection process is -- deferred to architecture-specific functionality. - | Just (prev_stmts) <- identifyReturn arch_info stmts s' absProcState' -> do - mapM_ (recordWriteStmt arch_info mem absProcState') prev_stmts + | Just prev_stmts <- identifyReturn ainfo stmts s absProcState' -> do + mapM_ (recordWriteStmt ainfo mem absProcState') prev_stmts - pure StatementList { stmtsIdent = lbl_idx - , stmtsNonterm = toList prev_stmts - , stmtsTerm = ParsedReturn s' - , stmtsAbsState = absProcState' - } + let ret = StatementList { stmtsIdent = idx + , stmtsNonterm = toList prev_stmts + , stmtsTerm = ParsedReturn s + , stmtsAbsState = absProcState' + } + pure (ret, idx+1) -- Jump to a block within this function. - | Just tgt_mseg <- asSegmentOff mem =<< valueAsMemAddr (s'^.boundValue ip_reg) + | Just tgt_mseg <- valueAsSegmentOff mem (s^.boundValue ip_reg) + -- Check , segmentFlags (msegSegment tgt_mseg) `Perm.hasPerm` Perm.execute - -- The target address cannot be this function entry point. - -- - -- This will result in the target being treated as a call or tail call. + + -- Check the target address is not the entry point of this function. + -- N.B. These should instead decompile into calls or tail calls. , tgt_mseg /= pctxFunAddr ctx -- If we are trusting known function entries, then only mark as an -- intra-procedural jump if the target is not a known function entry. - , not (pctxTrustKnownFns ctx) || (tgt_mseg `notElem` pctxKnownFnEntries ctx) -> do + , not (tgt_mseg `Set.member` pctxKnownFnEntries ctx) -> do - mapM_ (recordWriteStmt arch_info mem absProcState') stmts + mapM_ (recordWriteStmt ainfo mem absProcState') stmts -- Merge block state and add intra jump target. - let abst = finalAbsBlockState absProcState' s' + let abst = finalAbsBlockState absProcState' s let abst' = abst & setAbsIP tgt_mseg intraJumpTargets %= ((tgt_mseg, abst'):) - pure StatementList { stmtsIdent = lbl_idx - , stmtsNonterm = stmts - , stmtsTerm = ParsedJump s' tgt_mseg - , stmtsAbsState = absProcState' - } + let ret = StatementList { stmtsIdent = idx + , stmtsNonterm = stmts + , stmtsTerm = ParsedJump s tgt_mseg + , stmtsAbsState = absProcState' + } + pure (ret, idx+1) -- Block ends with what looks like a jump table. - | Just jt <- debug DCFG "try jump table" $ matchJumpTable mem absProcState' (s'^.curIP) -> - let arrayRead = jumpTableRead jt in - case getJumpTableBounds arch_info absProcState' arrayRead of - Left err -> - trace (show src ++ ": Could not compute bounds: " ++ err) $ do - mapM_ (recordWriteStmt arch_info mem absProcState') stmts - pure StatementList { stmtsIdent = lbl_idx - , stmtsNonterm = stmts - , stmtsTerm = ClassifyFailure s' - , stmtsAbsState = absProcState' - } - Right maxIdx -> do - mapM_ (recordWriteStmt arch_info mem absProcState') stmts - -- Try to compute jump table bounds + | Just jt <- matchJumpTable mem absProcState' (s^.curIP) + , Some arrayRead <- jumpTableRead jt + , Right maxIdx <- getJumpTableBounds absProcState' arrayRead + , Just slices <- + getJumpTableContents (arBase arrayRead) + (toInteger maxIdx+1) + (arStride arrayRead) + -- Read addresses + , Just readAddrs <- + resolveJumps (pctxMemory ctx) jt slices -> do + mapM_ (recordWriteStmt ainfo mem absProcState') stmts - let abst :: AbsBlockState (ArchReg arch) - abst = finalAbsBlockState absProcState' s' + let abst :: AbsBlockState (ArchReg arch) + abst = finalAbsBlockState absProcState' s - resolveJump :: ArchAddrWord arch - -> Maybe (ArchSegmentOff arch) - resolveJump = case jt of - Absolute (ArrayRead { arSize = BVMemRepr arByteCount endianness }) Nothing - | natValue arByteCount == toInteger (addrSize (archAddrWidth arch_info)) -> \idx -> - let read_addr = relativeSegmentAddr (arBase arrayRead) & incAddr (arStride arrayRead * toInteger idx) - in case readAddr mem endianness read_addr of - Right tgt_addr - | Just tgt_mseg <- asSegmentOff mem (toIPAligned @arch tgt_addr) - , Perm.isExecutable (segmentFlags (msegSegment tgt_mseg)) - -> Just tgt_mseg - _ -> Nothing - Relative base (ArrayRead { arSize = repr }) ext -> \idx -> - let read_addr = relativeSegmentAddr (arBase arrayRead) & incAddr (arStride arrayRead * toInteger idx) - in case readMemReprDyn mem read_addr repr of - Right shortOffset - | Just offset <- extendDyn repr ext shortOffset - , let tgt_addr = relativeSegmentAddr base & incAddr offset - , Just tgt_mseg <- asSegmentOff mem (toIPAligned @arch tgt_addr) - , Perm.isExecutable (segmentFlags (msegSegment tgt_mseg)) - -> Just tgt_mseg - _ -> Nothing - Absolute _ _ -> debug DCFG - ( "Found a jump table of absolute addresses, but the array elements weren't of\n" - ++ "the same size as addresses. We're gonna bail and report this as a jump table\n" - ++ "with no targets. Jump table info follows.\n" - ++ show jt - ) - (\_ -> Nothing) + seq abst $ do - seq abst $ do - -- This function resolves jump table entries. - -- It is a recursive function that has an index into the jump table. - -- If the current index can be interpreted as a intra-procedural jump, - -- then it will add that to the current procedure. - -- This returns the last address read. - let resolveJumps :: [ArchSegmentOff arch] - -- /\ Addresses in jump table in reverse order - -> ArchAddrWord arch - -- /\ Current index - -> State (ParseState arch ids) [ArchSegmentOff arch] - resolveJumps prev idx | idx > maxIdx = do - -- Stop jump table when we have reached computed bounds. - return (reverse prev) - resolveJumps prev idx = case resolveJump idx of - Just tgt_mseg -> do - let abst' = abst & setAbsIP tgt_mseg - intraJumpTargets %= ((tgt_mseg, abst'):) - resolveJumps (tgt_mseg:prev) (idx+1) - _ -> debug DCFG ("Stop jump table: " ++ show idx ++ " " ++ show maxIdx) $ do - return (reverse prev) - read_addrs <- resolveJumps [] 0 - pure StatementList { stmtsIdent = lbl_idx - , stmtsNonterm = stmts - , stmtsTerm = ParsedLookupTable s' (arIx arrayRead) (V.fromList read_addrs) - , stmtsAbsState = absProcState' - } + forM_ readAddrs $ \tgtAddr -> do + let abst' = abst & setAbsIP tgtAddr + intraJumpTargets %= ((tgtAddr, abst'):) + + let term = ParsedLookupTable s (arIx arrayRead) (V.fromList readAddrs) + let ret = StatementList { stmtsIdent = idx + , stmtsNonterm = stmts + , stmtsTerm = term + , stmtsAbsState = absProcState' + } + pure (ret,idx+1) -- Check for tail call (anything where we are right at stack height) -- -- TODO: this makes sense for x86, but is not correct for all architectures - | ptrType <- addrMemRepr arch_info - , sp_val <- s'^.boundValue sp_reg - , ReturnAddr <- absEvalReadMem absProcState' sp_val ptrType -> - finishWithTailCall absProcState' + | ptrType <- addrMemRepr ainfo + , sp_val <- s^.boundValue sp_reg + , ReturnAddr <- absEvalReadMem absProcState' sp_val ptrType -> do + (,idx+1) <$> finishWithTailCall absProcState' -- Is this a jump to a known function entry? We're already past the -- "identifyCall" case, so this must be a tail call, assuming we trust our -- known function entry info. - | pctxTrustKnownFns ctx - , Just tgt_mseg <- valueAsSegmentOff mem (s'^.boundValue ip_reg) - , tgt_mseg `elem` pctxKnownFnEntries ctx -> - finishWithTailCall absProcState' + | Just tgt_mseg <- valueAsSegmentOff mem (s^.boundValue ip_reg) + , tgt_mseg `Set.member` pctxKnownFnEntries ctx -> do + (,idx+1) <$> finishWithTailCall absProcState' -- Block that ends with some unknown | otherwise -> do - mapM_ (recordWriteStmt arch_info mem absProcState') stmts - pure StatementList { stmtsIdent = lbl_idx - , stmtsNonterm = stmts - , stmtsTerm = ClassifyFailure s' - , stmtsAbsState = absProcState' - } + mapM_ (recordWriteStmt ainfo mem absProcState') stmts + let ret = StatementList { stmtsIdent = idx + , stmtsNonterm = stmts + , stmtsTerm = ClassifyFailure s + , stmtsAbsState = absProcState' + } + pure (ret,idx+1) - where mem = pctxMemory ctx - arch_info = pctxArchInfo ctx - - finishWithTailCall :: RegisterInfo (ArchReg arch) + where finishWithTailCall :: RegisterInfo (ArchReg arch) => AbsProcessorState (ArchReg arch) ids -> State (ParseState arch ids) (StatementList arch ids) finishWithTailCall absProcState' = do - mapM_ (recordWriteStmt arch_info mem absProcState') stmts + let mem = pctxMemory ctx + mapM_ (recordWriteStmt (pctxArchInfo ctx) mem absProcState') stmts -- Compute final state - let abst = finalAbsBlockState absProcState' s' + let abst = finalAbsBlockState absProcState' s seq abst $ do -- Look for new instruction pointers let addrs = concretizeAbsCodePointers mem (abst^.absRegState^.curIP) newFunctionAddrs %= (++ addrs) - pure StatementList { stmtsIdent = lbl_idx + pure StatementList { stmtsIdent = idx , stmtsNonterm = stmts - , stmtsTerm = ParsedCall s' Nothing + , stmtsTerm = ParsedCall s Nothing , stmtsAbsState = absProcState' } - -- | this evalutes the statements in a block to expand the information known -- about control flow targets of this block. parseBlock :: ParseContext arch ids -- ^ Context for parsing blocks. + -> Word64 + -- ^ Index for next statements -> Block arch ids -- ^ Block to parse -> AbsProcessorState (ArchReg arch) ids -- ^ Abstract state at start of block - -> State (ParseState arch ids) (StatementList arch ids) -parseBlock ctx b regs = do + -> State (ParseState arch ids) (StatementList arch ids, Word64) +parseBlock ctx idx b regs = do let mem = pctxMemory ctx - let arch_info = pctxArchInfo ctx - withArchConstraints arch_info $ do - let idx = blockLabel b - let block_map = pctxBlockMap ctx - -- FIXME: we should propagate c back to the initial block, not just b - let absProcState' = absEvalStmts arch_info regs (blockStmts b) + let ainfo = pctxArchInfo ctx + withArchConstraints ainfo $ do case blockTerm b of Branch c lb rb -> do - mapM_ (recordWriteStmt arch_info mem absProcState') (blockStmts b) + let blockMap = pctxBlockMap ctx + -- FIXME: we should propagate c back to the initial block, not just b + let absProcState' = absEvalStmts ainfo regs (blockStmts b) + mapM_ (recordWriteStmt ainfo mem absProcState') (blockStmts b) - let Just l = Map.lookup lb block_map + let Just l = Map.lookup lb blockMap let l_regs = refineProcStateBounds c True $ refineProcState c absTrue absProcState' - let Just r = Map.lookup rb block_map + let Just r = Map.lookup rb blockMap let r_regs = refineProcStateBounds c False $ refineProcState c absFalse absProcState' - let l_regs' = absEvalStmts arch_info l_regs (blockStmts b) - let r_regs' = absEvalStmts arch_info r_regs (blockStmts b) + let l_regs' = absEvalStmts ainfo l_regs (blockStmts b) + let r_regs' = absEvalStmts ainfo r_regs (blockStmts b) - parsedTrueBlock <- parseBlock ctx l l_regs' - parsedFalseBlock <- parseBlock ctx r r_regs' + (parsedTrueBlock,trueIdx) <- parseBlock ctx (idx+1) l l_regs' + (parsedFalseBlock,falseIdx) <- parseBlock ctx trueIdx r r_regs' - pure $! StatementList { stmtsIdent = idx - , stmtsNonterm = blockStmts b - , stmtsTerm = ParsedIte c parsedTrueBlock parsedFalseBlock - , stmtsAbsState = absProcState' - } + let ret = StatementList { stmtsIdent = idx + , stmtsNonterm = blockStmts b + , stmtsTerm = ParsedIte c parsedTrueBlock parsedFalseBlock + , stmtsAbsState = absProcState' + } + pure (ret, falseIdx) - FetchAndExecute s' -> do - parseFetchAndExecute ctx idx (blockStmts b) regs s' + FetchAndExecute s -> do + parseFetchAndExecute ctx idx (blockStmts b) regs s -- Do nothing when this block ends in a translation error. TranslateError _ msg -> do - pure $! StatementList { stmtsIdent = idx - , stmtsNonterm = blockStmts b - , stmtsTerm = ParsedTranslateError msg - , stmtsAbsState = absProcState' - } - ArchTermStmt ts s' -> do - mapM_ (recordWriteStmt arch_info mem absProcState') (blockStmts b) - let abst = finalAbsBlockState absProcState' s' + -- FIXME: we should propagate c back to the initial block, not just b + let absProcState' = absEvalStmts ainfo regs (blockStmts b) + + let ret = StatementList { stmtsIdent = idx + , stmtsNonterm = blockStmts b + , stmtsTerm = ParsedTranslateError msg + , stmtsAbsState = absProcState' + } + pure (ret, idx+1) + ArchTermStmt ts s -> do + -- FIXME: we should propagate c back to the initial block, not just b + let absProcState' = absEvalStmts ainfo regs (blockStmts b) + mapM_ (recordWriteStmt ainfo mem absProcState') (blockStmts b) + let abst = finalAbsBlockState absProcState' s -- Compute possible next IPS. - let r = postArchTermStmtAbsState arch_info mem abst s' ts + let r = postArchTermStmtAbsState ainfo mem abst s ts case r of Just (addr,post) -> intraJumpTargets %= ((addr, post):) Nothing -> pure () - pure $! StatementList { stmtsIdent = idx - , stmtsNonterm = blockStmts b - , stmtsTerm = ParsedArchTermStmt ts s' (fst <$> r) - , stmtsAbsState = absProcState' - } + let ret = StatementList { stmtsIdent = idx + , stmtsNonterm = blockStmts b + , stmtsTerm = ParsedArchTermStmt ts s (fst <$> r) + , stmtsAbsState = absProcState' + } + pure (ret, idx+1) -- | This evalutes the statements in a block to expand the information known -- about control flow targets of this block. @@ -987,12 +1063,15 @@ transferBlocks src finfo sz block_map = -- undiscovered functions with entries marked InitAddr, which we assume is -- info we know from the symbol table or some other reliable source, and -- pass in. Only used in analysis if pctxTrustKnownFns is True. - let knownFns = Set.union (Map.keysSet $ s^.funInfo) - (Map.keysSet $ Map.filter (== InitAddr) $ s^.unexploredFunctions) + let knownFns = + if s^.trustKnownFns then + Set.union (Map.keysSet $ s^.funInfo) + (Map.keysSet $ Map.filter (== InitAddr) $ s^.unexploredFunctions) + else + Set.empty let ctx = ParseContext { pctxMemory = memory s , pctxArchInfo = archInfo s , pctxKnownFnEntries = knownFns - , pctxTrustKnownFns = s^.trustKnownFns , pctxFunAddr = funAddr , pctxAddr = src , pctxBlockMap = block_map @@ -1001,7 +1080,7 @@ transferBlocks src finfo sz block_map = , _intraJumpTargets = [] , _newFunctionAddrs = [] } - let (pblock, ps) = runState (parseBlock ctx b regs) ps0 + let ((pblock,_), ps) = runState (parseBlock ctx 0 b regs) ps0 let pb = ParsedBlock { pblockAddr = src , blockSize = sz , blockReason = foundReason finfo @@ -1013,7 +1092,6 @@ transferBlocks src finfo sz block_map = . markAddrsAsFunction (CallTarget src) (ps^.newFunctionAddrs) mapM_ (\(addr, abs_state) -> mergeIntraJump src abs_state addr) (ps^.intraJumpTargets) - transfer :: ArchSegmentOff arch -> FunM arch s ids () transfer addr = do s <- use curFunCtx @@ -1028,20 +1106,26 @@ transfer addr = do -- Get maximum number of bytes to disassemble let seg = msegSegment addr off = msegOffset addr - let max_size = + let maxSize = case Map.lookupGT addr prev_block_map of Just (next,_) | Just o <- diffSegmentOff next addr -> fromInteger o _ -> segmentSize seg - off let ab = foundAbstractState finfo - (bs0, sz, maybeError) <- liftST $ do + (bs0, sz, maybeError) <- liftST $ disassembleFn ainfo mem nonceGen addr maxSize ab + #ifdef USE_REWRITER - disassembleAndRewrite ainfo mem nonceGen addr max_size ab + bs1 <- do + let archStmt = rewriteArchStmt ainfo + let secAddrMap = memSectionAddrMap mem + liftST $ do + ctx <- mkRewriteContext nonceGen (rewriteArchFn ainfo) archStmt secAddrMap + traverse (rewriteBlock ainfo ctx) bs0 #else - disassembleFn ainfo mem nonceGen addr max_size ab + bs1 <- pure bs0 #endif -- If no blocks are returned, then we just add an empty parsed block. - if null bs0 then do + if null bs1 then do let errMsg = Text.pack $ fromMaybe "Unknown error" maybeError let stmts = StatementList { stmtsIdent = 0 @@ -1060,7 +1144,7 @@ transfer addr = do -- Rewrite returned blocks to simplify expressions -- Compute demand set - let bs = eliminateDeadStmts ainfo bs0 + let bs = eliminateDeadStmts ainfo bs1 -- Call transfer blocks to calculate parsedblocks let block_map = Map.fromList [ (blockLabel b, b) | b <- bs ] transferBlocks addr finfo sz block_map @@ -1145,7 +1229,7 @@ analyzeFunction logFn addr rsn s = let s' = (fs^.curFunCtx) & funInfo %~ Map.insert addr (Some finfo) & unexploredFunctions %~ Map.delete addr - seq finfo $ seq s' $ pure (s', Some finfo) + seq finfo $ seq s $ pure (s', Some finfo) -- | Analyze addresses that we have marked as functions, but not yet analyzed to -- identify basic blocks, and discover new function candidates until we have @@ -1188,22 +1272,6 @@ exploreMemPointers mem_words info = $ mem_words mapM_ (modify . addMemCodePointer) mem_addrs --- | Construct an empty discovery state and populate it by exploring from a --- given set of function entry points -cfgFromAddrs :: - forall arch - . ArchitectureInfo arch - -- ^ Architecture-specific information needed for doing control-flow exploration. - -> Memory (ArchAddrWidth arch) - -- ^ Memory to use when decoding instructions. - -> AddrSymMap (ArchAddrWidth arch) - -- ^ Map from addresses to the associated symbol name. - -> [ArchSegmentOff arch] - -> [(ArchSegmentOff arch, ArchSegmentOff arch)] - -> DiscoveryState arch -cfgFromAddrs arch_info mem symbols = - cfgFromAddrsAndState (emptyDiscoveryState mem symbols arch_info) - -- | Expand an initial discovery state by exploring from a given set of function -- entry points. cfgFromAddrsAndState :: forall arch @@ -1223,6 +1291,27 @@ cfgFromAddrsAndState initial_state init_addrs mem_words = & exploreMemPointers mem_words & analyzeDiscoveredFunctions +-- | Construct an empty discovery state and populate it by exploring from a +-- given set of function entry points +cfgFromAddrs :: + forall arch + . ArchitectureInfo arch + -- ^ Architecture-specific information needed for doing control-flow exploration. + -> Memory (ArchAddrWidth arch) + -- ^ Memory to use when decoding instructions. + -> AddrSymMap (ArchAddrWidth arch) + -- ^ Map from addresses to the associated symbol name. + -> [ArchSegmentOff arch] + -- ^ Initial function entry points. + -> [(ArchSegmentOff arch, ArchSegmentOff arch)] + -- ^ Function entry points in memory to be explored + -- after exploring function entry points. + -- + -- Each entry contains an address and the value stored in it. + -> DiscoveryState arch +cfgFromAddrs ainfo mem symbols = + cfgFromAddrsAndState (emptyDiscoveryState mem symbols ainfo) + ------------------------------------------------------------------------ -- Resolve functions with logging @@ -1320,24 +1409,19 @@ ppFunReason rsn = -- This function is intended to make it easy to explore functions, and -- can be controlled via 'DiscoveryOptions'. completeDiscoveryState :: forall arch - . ArchitectureInfo arch + . DiscoveryState arch -> DiscoveryOptions -- ^ Options controlling discovery - -> Memory (ArchAddrWidth arch) - -- ^ Memory state used for static code discovery. - -> [MemSegmentOff (ArchAddrWidth arch)] - -- ^ Initial entry points to explore - -> AddrSymMap (ArchAddrWidth arch) - -- ^ The map from addresses to symbols -> (ArchSegmentOff arch -> Bool) -- ^ Predicate to check if we should explore a function -- -- Return true to explore all functions. -> IO (DiscoveryState arch) -completeDiscoveryState ainfo disOpt mem initEntries symMap funPred = stToIO $ withArchConstraints ainfo $ do - let initState - = emptyDiscoveryState mem symMap ainfo - & markAddrsAsFunction InitAddr initEntries +completeDiscoveryState initState disOpt funPred = do + let ainfo = archInfo initState + let mem = memory initState + let symMap = symbolNames initState + stToIO $ withArchConstraints ainfo $ do -- Add symbol table entries to discovery state if requested let postSymState | exploreFunctionSymbols disOpt = diff --git a/base/src/Data/Macaw/Discovery/State.hs b/base/src/Data/Macaw/Discovery/State.hs index d112f72a..c6d339b8 100644 --- a/base/src/Data/Macaw/Discovery/State.hs +++ b/base/src/Data/Macaw/Discovery/State.hs @@ -133,7 +133,7 @@ data ParsedTermStmt arch ids -- an unsigned number) is larger than the number of entries in the vector, then the -- result is undefined. | ParsedLookupTable !(RegState (ArchReg arch) (Value arch ids)) - !(BVValue arch ids (ArchAddrWidth arch)) + !(ArchAddrValue arch ids) !(V.Vector (ArchSegmentOff arch)) -- | A return with the given registers. | ParsedReturn !(RegState (ArchReg arch) (Value arch ids)) @@ -279,6 +279,9 @@ instance ArchConstraints arch => Pretty (DiscoveryFunInfo arch ids) where ------------------------------------------------------------------------ -- DiscoveryState +type UnexploredFunctionMap arch = + Map (ArchSegmentOff arch) (FunctionExploreReason (ArchAddrWidth arch)) + -- | Information discovered about the program data DiscoveryState arch = DiscoveryState { memory :: !(Memory (ArchAddrWidth arch)) @@ -294,7 +297,7 @@ data DiscoveryState arch , _funInfo :: !(Map (ArchSegmentOff arch) (Some (DiscoveryFunInfo arch))) -- ^ Map from function addresses to discovered information about function , _unexploredFunctions - :: !(Map (ArchSegmentOff arch) (FunctionExploreReason (ArchAddrWidth arch))) + :: !(UnexploredFunctionMap arch) -- ^ This maps addresses that have been marked as -- functions, but not yet analyzed to the reason -- they are analyzed. @@ -328,7 +331,7 @@ ppDiscoveryStateBlocks info = withDiscoveryArchConstraints info $ emptyDiscoveryState :: Memory (ArchAddrWidth arch) -- ^ State of memory -> AddrSymMap (ArchAddrWidth arch) - -- ^ Map from addresses + -- ^ Map from addresses to their symbol name (if any) -> ArchitectureInfo arch -- ^ architecture/OS specific information -> DiscoveryState arch @@ -351,7 +354,7 @@ globalDataMap = lens _globalDataMap (\s v -> s { _globalDataMap = v }) -- | List of functions to explore next. unexploredFunctions - :: Simple Lens (DiscoveryState arch) (Map (ArchSegmentOff arch) (FunctionExploreReason (ArchAddrWidth arch))) + :: Simple Lens (DiscoveryState arch) (UnexploredFunctionMap arch) unexploredFunctions = lens _unexploredFunctions (\s v -> s { _unexploredFunctions = v }) -- | Get information for specific functions diff --git a/base/src/Data/Macaw/Memory.hs b/base/src/Data/Macaw/Memory.hs index 8e7fe626..7930a047 100644 --- a/base/src/Data/Macaw/Memory.hs +++ b/base/src/Data/Macaw/Memory.hs @@ -58,6 +58,7 @@ module Data.Macaw.Memory , module Data.BinarySymbols , DropError(..) , dropErrorAsMemError + , splitSegmentRangeList , dropSegmentRangeListBytes , takeSegmentPrefix -- * MemWord @@ -75,6 +76,7 @@ module Data.Macaw.Memory , msegSegment , msegOffset , msegAddr + , msegByteCountAfter , incSegmentOff , diffSegmentOff , contentsAfterSegmentOff @@ -90,10 +92,11 @@ module Data.Macaw.Memory -- ** Undefined symbol infomration , SymbolRequirement(..) , SymbolUndefType(..) + -- * Section addresses + , memAddSectionAddr + , memSectionAddrMap -- * General purposes addrs - , MemAddr - , addrBase - , addrOffset + , MemAddr(..) , absoluteAddr , relativeAddr , relativeSegmentAddr @@ -121,8 +124,10 @@ module Data.Macaw.Memory , bsWord8 , bsWord16be , bsWord16le + , bsWord32 , bsWord32be , bsWord32le + , bsWord64 , bsWord64be , bsWord64le , AddrSymMap @@ -234,6 +239,11 @@ bsWord32le bs | otherwise = w 0 .|. w 1 .|. w 2 .|. w 3 where w i = fromIntegral (BS.index bs i) `shiftL` (i `shiftL` 3) +-- | Convert a bytestring to an unsigned with the given endianness. +bsWord32 :: Endianness -> BS.ByteString -> Word32 +bsWord32 BigEndian = bsWord32be +bsWord32 LittleEndian = bsWord32le + bsWord64be :: BS.ByteString -> Word64 bsWord64be bs | BS.length bs /= 8 = error "bsWord64be given bytestring with bad length." @@ -246,6 +256,10 @@ bsWord64le bs | otherwise = w 0 .|. w 1 .|. w 2 .|. w 3 .|. w 4 .|. w 5 .|. w 6 .|. w 7 where w i = fromIntegral (BS.index bs i) `shiftL` (i `shiftL` 3) +bsWord64 :: Endianness -> BS.ByteString -> Word64 +bsWord64 BigEndian = bsWord64be +bsWord64 LittleEndian = bsWord64le + ------------------------------------------------------------------------ -- MemBase @@ -260,12 +274,6 @@ newtype MemWord (w :: Nat) = MemWord { _memWordValue :: Word64 } memWordInteger :: MemWord w -> Integer memWordInteger = fromIntegral . _memWordValue --- | Treat the word as a signed integer. -memWordSigned :: MemWidth w => MemWord w -> Integer -memWordSigned w = if i >= bound then i-2*bound else i where - i = memWordInteger w - bound = 2^(8*addrSize w-1) - instance Show (MemWord w) where showsPrec _ (MemWord w) = showString "0x" . showHex w @@ -294,7 +302,7 @@ class (1 <= w) => MemWidth w where -- The argument is not evaluated. addrSize :: p w -> Int - -- Rotates the value by the given index. + -- | Rotates the value by the given index. addrRotate :: MemWord w -> Int -> MemWord w -- | Read an address with the given endianess. @@ -302,6 +310,12 @@ class (1 <= w) => MemWidth w where -- This returns nothing if the bytestring is too short. addrRead :: Endianness -> BS.ByteString -> Maybe (MemWord w) +-- | Treat the word as a signed integer. +memWordSigned :: MemWidth w => MemWord w -> Integer +memWordSigned w = if i >= bound then i-2*bound else i + where i = memWordInteger w + bound = 2^(addrBitSize w-1) + -- | Returns number of bits in address. addrBitSize :: MemWidth w => p w -> Int addrBitSize w = 8 * addrSize w @@ -355,14 +369,12 @@ instance MemWidth w => Bounded (MemWord w) where instance MemWidth 32 where addrWidthRepr _ = Addr32 addrWidthMod _ = 0xffffffff - addrRotate (MemWord w) i = MemWord (fromIntegral ((fromIntegral w :: Word32) `rotate` i)) + addrRotate (MemWord w) i = + MemWord (fromIntegral ((fromIntegral w :: Word32) `rotate` i)) addrSize _ = 4 addrRead e s | BS.length s < 4 = Nothing - | otherwise = - case e of - BigEndian -> Just $ MemWord $ fromIntegral $ bsWord32be s - LittleEndian -> Just $ MemWord $ fromIntegral $ bsWord32le s + | otherwise = Just $ MemWord $ fromIntegral $ bsWord32 e s instance MemWidth 64 where addrWidthRepr _ = Addr64 @@ -371,10 +383,7 @@ instance MemWidth 64 where addrSize _ = 8 addrRead e s | BS.length s < 8 = Nothing - | otherwise = - case e of - BigEndian -> Just $ MemWord $ fromIntegral $ bsWord64be s - LittleEndian -> Just $ MemWord $ fromIntegral $ bsWord64le s + | otherwise = Just $ MemWord $ bsWord64 e s -- | Number of bytes in an address addrWidthClass :: AddrWidthRepr w -> (MemWidth w => a) -> a @@ -482,53 +491,63 @@ data SymbolInfo = ------------------------------------------------------------------------ -- Relocation +-- | Information about a relocation. +-- +-- A "relocation" is essentially a region of memory in a binary whose +-- ultimate value is unknown when the binary was generated. For +-- executables and shared libraries, relocation values are assigned +-- during loading or even deferred until needed at runtime (lazy +-- runtime linking). For object files, relocations values are +-- assigned during linking either by assigning values directly or +-- generating dynamic relocation entries for the loader to resolve. +-- +-- This structure contains the information needed to compute the value +-- stored in memory, and whether there are constraints on that value. +data Relocation w + = Relocation { relocationSym :: !SymbolIdentifier + -- ^ The base address of the relocation + , relocationOffset :: !(MemWord w) + -- ^ A constant value to add to the base + -- to compute the relocation + , relocationIsRel :: !Bool + -- ^ If this is true, then one should subtract + -- address of where the relocation is stored from + -- the value computed by the symbol + offset. If + -- false, then do not subtract. + , relocationSize :: !Int + -- ^ Number of bytes available to write the + -- relocation address to. This and `relocationSign` + -- affect the ultimate values the relocation is + -- allowed to take. + , relocationIsSigned :: !Bool + -- ^ This indicates if the value stored will be + -- interpreted as an signed or unsigned number. + -- + -- It is expected that the value stored is in the + -- range determined by the number of bytes and + -- whether those bytes will be interpreted as + -- signed. + , relocationEndianness :: !Endianness + -- ^ The byte order used to encode the relocation in + -- memory. + } + +-- | Short encoding of endianness for relocation pretty printing showEnd :: Endianness -> ShowS showEnd LittleEndian = showString "LE" showEnd BigEndian = showString "BE" --- | Information about a relocation -data Relocation w - = AbsoluteRelocation !SymbolIdentifier !(MemWord w) !Endianness !Int - -- ^ @AbsoluteRelocation addr off end size@ denotes an - -- address of the relocation plus the offset stored - -- with the given endianess. - -- - -- The @size@ field is the number of bytes the relocation is stored - -- at, and when inserting the relocation value it should only use - -- that many bytes. If the address + offset is greater than or equal to - -- @2^(8*n)@, then updating the relocation target should fail. This is - -- used to support relocation types such as @R_X86_64_32@. We do not - -- currently support signed versions like @R_X86_64_32S@. - | RelativeRelocation !SymbolIdentifier !(MemWord w) !Endianness !Int - -- ^ @RelativeRelocation addr off end cnt@ denotes a relocation - -- that stores the value of @addr + off - this_addr@ (where - -- @this_addr@ is the address the relocation is stored at as a - -- signed value in @cnt@ bytes with endianess @end@. - --- | Return size of relocation in bytes -relocSize :: forall w . MemWidth w => Relocation w -> MemWord w -relocSize (AbsoluteRelocation _ _ _ cnt) = fromIntegral cnt -relocSize (RelativeRelocation _ _ _ cnt) = fromIntegral cnt - instance Show (Relocation w) where - showsPrec _ (AbsoluteRelocation base off end cnt) = + showsPrec _ r = showString "[areloc," - . shows base + . shows (relocationSym r) . showChar ',' - . showHex (memWordInteger off) + . showHex (memWordInteger (relocationOffset r)) . showChar ',' - . showEnd end - . showChar ',' - . shows (8*cnt) - . showChar ']' - showsPrec _ (RelativeRelocation base off end cnt) = - showString "[rreloc," - . shows base - . showHex (memWordInteger off) - . showChar ',' - . showEnd end - . showChar ',' - . shows (8*cnt) + . shows (8*relocationSize r) + . (if relocationIsRel r then showString ",PC" else id) + . (if relocationIsSigned r then showString ",S" else id) + . showChar ',' . showEnd (relocationEndianness r) . showChar ']' ------------------------------------------------------------------------ @@ -541,12 +560,15 @@ data SegmentRange (w :: Nat) = ByteRegion !BS.ByteString -- ^ A region with specificed bytes | RelocationRegion !(Relocation w) + -- ^ A region whose contents are computed using the expression + -- denoted by the relocation. | BSSRegion !(MemWord w) - -- ^ A region containing the given number of zero-initialized bytes. + -- ^ A region containing the given number of zero-initialized + -- bytes. rangeSize :: forall w . MemWidth w => SegmentRange w -> MemWord w rangeSize (ByteRegion bs) = fromIntegral (BS.length bs) -rangeSize (RelocationRegion r) = relocSize r +rangeSize (RelocationRegion r) = fromIntegral (relocationSize r) rangeSize (BSSRegion sz) = sz ppByte :: Word8 -> String -> String @@ -561,57 +583,6 @@ instance Show (SegmentRange w) where showList [] = id showList (h : r) = showsPrec 10 h . showList r -takeSegmentPrefix :: MemWidth w => [SegmentRange w] -> MemWord w -> [SegmentRange w] -takeSegmentPrefix _ 0 = [] -takeSegmentPrefix rngs c = do - let rest l d | c > d = takeSegmentPrefix l (c - d) - | otherwise = [] - case rngs of - [] -> [] - ByteRegion b : l -> - ByteRegion (BS.take (fromIntegral c) b) - : rest l (fromIntegral (BS.length b)) - RelocationRegion r : l -> - RelocationRegion r - : rest l (relocSize r) - BSSRegion d : l -> - BSSRegion (min d c) - : rest l d - ------------------------------------------------------------------------- --- MemoryError - --- | Type of errors that may occur when reading memory. -data MemoryError w - = AccessViolation !(MemAddr w) - -- ^ Memory could not be read, because it was not defined. - | PermissionsError !(MemAddr w) - -- ^ Memory could not be read due to insufficient permissions. - | UnexpectedRelocation !(MemAddr w) !(Relocation w) !String - -- ^ Read from location that partially overlaps a relocated entry - | UnexpectedBSS !(MemAddr w) - -- ^ We unexpectedly encountered a BSS segment/section. - | InvalidAddr !(MemAddr w) - -- ^ The data at the given address did not refer to a valid memory location. - | forall n. InvalidSize !(MemAddr w) !(NatRepr n) - -instance MemWidth w => Show (MemoryError w) where - show err = - case err of - AccessViolation a -> - "Access violation at " ++ show a ++ "." - PermissionsError a -> - "Insufficient permissions at " ++ show a ++ "." - UnexpectedRelocation a r msg -> - "Attempt to read an unexpected relocation entry at " ++ show a ++ ":\n" - ++ " " ++ show r ++ "\n" ++ msg - UnexpectedBSS a -> - "Attempt to read zero initialized BSS memory at " ++ show a ++ "." - InvalidAddr a -> - "Attempt to interpret an invalid address: " ++ show a ++ "." - InvalidSize a n -> - "Attempt to read an invalid number of bytes (" ++ show n ++ ") from address " ++ show a ++ "." - ------------------------------------------------------------------------ -- SegmentContents @@ -629,50 +600,10 @@ contentsSize (SegmentContents m) = Nothing -> 0 Just ((start, c),_) -> start + rangeSize c --- | De-construct a 'SegmentContents' into its constituent ranges +-- | Deconstruct a 'SegmentContents' into its constituent ranges contentsRanges :: SegmentContents w -> [(MemWord w, SegmentRange w)] contentsRanges = Map.toList . segContentsMap --- | Return list of contents from given word or an error if this we can't cleanly --- partition a relocation --- due to a relocation. -contentsAfterSegmentOff :: MemWidth w - => MemSegmentOff w - -> Either (MemoryError w) [SegmentRange w] -contentsAfterSegmentOff mseg = do - -- Get offset within segment to get - let off = msegOffset mseg - -- Get complete contents of segment - let contents = segmentContents (msegSegment mseg) - -- Split the map into all segments starting strictly before offset, - -- memory starting at offset (if any), and contents strictly after offset. - let (premap,mv,post) = Map.splitLookup off (segContentsMap contents) - case mv of - -- If something starts at offset, then return it and everything after. - Just v -> Right $ v : Map.elems post - -- If no memory starts exactly at offset, then - -- look at the last segment starting before offset. - Nothing -> - case Map.maxViewWithKey premap of - -- This implies nothing starts before the segment offset, which should not be - -- allowed - Nothing -> error $ "Memory.contentsAfterSegmentOff invalid contents" - -- If last segment is a byte region then we drop elements before offset. - Just ((pre_off, ByteRegion bs),_) -> do - let v = ByteRegion (BS.drop (fromIntegral (off - pre_off)) bs) - Right $ v : Map.elems post - -- If last segment is a BSS region, then we drop elements before offset. - Just ((pre_off, BSSRegion sz),_) -> do - let v = BSSRegion (sz - fromIntegral (off - pre_off)) - Right $ v : Map.elems post - -- If last segment is a symbolic reference, then the code is asking - -- us to partition a symbolic reference in two, which we cannot do. - Just ((_, RelocationRegion r),_) -> - Left (UnexpectedRelocation (relativeSegmentAddr mseg) r "caso") - -contentsList :: SegmentContents w -> [(MemWord w, SegmentRange w)] -contentsList (SegmentContents m) = Map.toList m - ------------------------------------------------------------------------ -- Code for injecting relocations into segments. @@ -875,12 +806,16 @@ instance MemWidth w => Show (MemSegment w) where ------------------------------------------------------------------------ -- Memory +-- | Map from region index to map of offset to segment. type SegmentOffsetMap w = Map.Map RegionIndex (Map.Map (MemWord w) (MemSegment w)) -- | The state of the memory. data Memory w = Memory { memAddrWidth :: !(AddrWidthRepr w) -- ^ Return the address width of the memory , memSegmentMap :: !(SegmentOffsetMap w) + -- ^ Segment map + , memSectionAddrMap :: !(Map SectionIndex (MemSegmentOff w)) + -- ^ Map from section indices to addresses. } -- | Get memory segments. @@ -891,6 +826,14 @@ memSegments m = concatMap Map.elems (Map.elems (memSegmentMap m)) memWidth :: Memory w -> NatRepr w memWidth = addrWidthNatRepr . memAddrWidth +-- | Add a new section index to address entry. +memAddSectionAddr :: SectionIndex -> MemSegmentOff w -> Memory w -> Memory w +memAddSectionAddr idx addr mem + | Map.member idx (memSectionAddrMap mem) = + error $ "memAddSectionAddr: duplicate index " ++ show idx + | otherwise = + mem { memSectionAddrMap = Map.insert idx addr (memSectionAddrMap mem) } + instance MemWidth w => Show (Memory w) where show = show . memSegments @@ -898,6 +841,7 @@ instance MemWidth w => Show (Memory w) where emptyMemory :: AddrWidthRepr w -> Memory w emptyMemory w = Memory { memAddrWidth = w , memSegmentMap = Map.empty + , memSectionAddrMap = Map.empty } -- | Get executable segments. @@ -935,12 +879,11 @@ insertMemSegment :: MemSegment w -> Either (InsertError w) (Memory w) insertMemSegment seg mem = addrWidthClass (memAddrWidth mem) $ do absMap <- insertSegmentOffsetMap seg (memSegmentMap mem) - pure $ Memory { memAddrWidth = memAddrWidth mem - , memSegmentMap = absMap - } + pure $ mem { memSegmentMap = absMap } ------------------------------------------------------------------------ -- MemSegmentOff + -- | A pair containing a segment and offset. -- -- Functions that return a segment-offset pair enforce that the offset @@ -950,6 +893,12 @@ data MemSegmentOff w = MemSegmentOff { msegSegment :: !(MemSegment w) } deriving (Eq, Ord) +-- | Return the number of bytes in the segment after this address. +msegByteCountAfter :: MemWidth w => MemSegmentOff w -> Integer +msegByteCountAfter segOff = sz - off + where sz = toInteger (segmentSize (msegSegment segOff)) + off = toInteger (msegOffset segOff) + {-# DEPRECATED viewSegmentOff "Use msegSegment and msegOffset." #-} viewSegmentOff :: MemSegmentOff w -> (MemSegment w, MemWord w) viewSegmentOff mseg = (msegSegment mseg, msegOffset mseg) @@ -993,7 +942,8 @@ clearSegmentOffLeastBit (MemSegmentOff seg off) = MemSegmentOff seg (off .&. com -- Returns 'Nothing' if the result would be out of range. incSegmentOff :: MemWidth w => MemSegmentOff w -> Integer -> Maybe (MemSegmentOff w) incSegmentOff (MemSegmentOff seg off) inc - | 0 <= next && next <= toInteger (segmentSize seg) = Just $ MemSegmentOff seg (fromInteger next) + | 0 <= next && next <= toInteger (segmentSize seg) = + Just $ MemSegmentOff seg (fromInteger next) | otherwise = Nothing where next = toInteger off + inc @@ -1026,7 +976,7 @@ memAsAddrPairs :: Memory w -> [(MemSegmentOff w, MemSegmentOff w)] memAsAddrPairs mem end = addrWidthClass (memAddrWidth mem) $ do seg <- memSegments mem - (contents_offset,r) <- contentsList (segmentContents seg) + (contents_offset,r) <- contentsRanges (segmentContents seg) let sz = addrSize mem case r of ByteRegion bs -> assert (BS.length bs `rem` fromIntegral sz == 0) $ do @@ -1109,6 +1059,99 @@ instance MemWidth w => Show (MemAddr w) where instance MemWidth w => Pretty (MemAddr w) where pretty = text . show +------------------------------------------------------------------------ +-- MemoryError + +-- | Type of errors that may occur when reading memory. +data MemoryError w + = AccessViolation !(MemAddr w) + -- ^ Memory could not be read, because it was not defined. + | PermissionsError !(MemAddr w) + -- ^ Memory could not be read due to insufficient permissions. + | UnexpectedRelocation !(MemAddr w) !(Relocation w) + -- ^ Read from location that partially overlaps a relocated entry + | UnexpectedByteRelocation !(MemAddr w) !(Relocation w) + -- ^ An relocation appeared when reading a byte. + | Unsupported32ImmRelocation !(MemAddr w) !(Relocation w) + -- ^ An unsupported relocation appeared when reading a 32-bit immediate. + | UnsupportedJumpOffsetRelocation !(MemAddr w) !(Relocation w) + -- ^ An unsupported relocation appeared when reading a jump offset. + | UnexpectedBSS !(MemAddr w) + -- ^ We unexpectedly encountered a BSS segment/section. + | InvalidAddr !(MemAddr w) + -- ^ The data at the given address did not refer to a valid memory location. + | InvalidRead !(MemSegmentOff w) !Word64 + -- ^ Can't read the given number of bytes from the offset as that is outside + -- allocated memory. + | forall n. InvalidSize !(MemAddr w) !(NatRepr n) + +instance MemWidth w => Show (MemoryError w) where + show err = + case err of + AccessViolation a -> + "Access violation at " ++ show a ++ "." + PermissionsError a -> + "Insufficient permissions at " ++ show a ++ "." + UnexpectedRelocation a r -> + "Attempt to read an unexpected relocation entry at " ++ show a ++ ":\n" + ++ " " ++ show r + UnexpectedByteRelocation a r -> + "Attempt to read a relocation as a byte at " ++ show a ++ ":\n" + ++ " " ++ show r + Unsupported32ImmRelocation a r -> + "Attempt to read an unsupported relocation as a 32-bit immediate at " ++ show a ++ ":\n" + ++ " " ++ show r + UnsupportedJumpOffsetRelocation a r -> + "Attempt to read an unsupported relocation as a jump offset at " ++ show a ++ ":\n" + ++ " " ++ show r + UnexpectedBSS a -> + "Attempt to read zero initialized BSS memory at " ++ show a ++ "." + InvalidAddr a -> + "Attempt to interpret an invalid address: " ++ show a ++ "." + InvalidRead a c -> + "Read " ++ show c ++ " bytes if after defined memory " ++ show a ++ "." + InvalidSize a n -> + "Attempt to read an invalid number of bytes (" ++ show n ++ ") from address " ++ show a ++ "." + +------------------------------------------------------------------------ +-- Reading contents + +-- | Return list of contents from given word or an error if this we can't cleanly +-- partition a relocation +-- due to a relocation. +contentsAfterSegmentOff :: MemWidth w + => MemSegmentOff w + -> Either (MemoryError w) [SegmentRange w] +contentsAfterSegmentOff mseg = do + -- Get offset within segment to get + let off = msegOffset mseg + -- Get complete contents of segment + let contents = segmentContents (msegSegment mseg) + -- Split the map into all segments starting strictly before offset, + -- memory starting at offset (if any), and contents strictly after offset. + let (premap,mv,post) = Map.splitLookup off (segContentsMap contents) + case mv of + -- If something starts at offset, then return it and everything after. + Just v -> Right $ v : Map.elems post + -- If no memory starts exactly at offset, then + -- look at the last segment starting before offset. + Nothing -> + case Map.maxViewWithKey premap of + -- This implies nothing starts before the segment offset, which should not be + -- allowed + Nothing -> error $ "Memory.contentsAfterSegmentOff invalid contents" + -- If last segment is a byte region then we drop elements before offset. + Just ((preOff, ByteRegion bs),_) -> do + let v = ByteRegion (BS.drop (fromIntegral (off - preOff)) bs) + Right $ v : Map.elems post + -- If last segment is a BSS region, then we drop elements before offset. + Just ((preOff, BSSRegion sz),_) -> do + let v = BSSRegion (sz - fromIntegral (off - preOff)) + Right $ v : Map.elems post + -- If last segment is a symbolic reference, then the code is asking + -- us to partition a symbolic reference in two, which we cannot do. + Just ((_, RelocationRegion r),_) -> + Left $ UnexpectedRelocation (relativeSegmentAddr mseg) r ------------------------------------------------------------------------ -- AddrSymMap @@ -1117,7 +1160,34 @@ instance MemWidth w => Pretty (MemAddr w) where type AddrSymMap w = Map.Map (MemSegmentOff w) BSC.ByteString ------------------------------------------------------------------------ --- DropError +-- Split segment range list. + +-- | @takeSegmentPrefix ranges cnt@ attempts to read @cnt@ bytes from +-- @ranges@. +-- +-- It is a total function, and will return @ranges@ if it contains +-- less than @cnt@ bytes. It may also return more than @cnt@ bytes as +-- if a relocation region spans across the break, it will return the +-- region. +takeSegmentPrefix :: forall w + . MemWidth w => [SegmentRange w] -> MemWord w -> [SegmentRange w] +takeSegmentPrefix _ 0 = [] +takeSegmentPrefix rngs c = do + let rest :: [SegmentRange w] -> MemWord w -> [SegmentRange w] + rest l d | c > d = takeSegmentPrefix l (c - d) + | otherwise = [] + case rngs of + [] -> [] + ByteRegion b : l -> + ByteRegion (BS.take (fromIntegral c) b) + : rest l (fromIntegral (BS.length b)) + RelocationRegion r : l -> + RelocationRegion r + : rest l (fromIntegral (relocationSize r)) + BSSRegion d : l -> + BSSRegion (min d c) + : rest l d + -- | An error that occured when droping bytes. data DropError w @@ -1125,8 +1195,50 @@ data DropError w | DropInvalidAddr dropErrorAsMemError :: MemAddr w -> DropError w -> MemoryError w -dropErrorAsMemError a (DropUnexpectedRelocation r) = UnexpectedRelocation a r "dropErr" -dropErrorAsMemError a DropInvalidAddr = InvalidAddr a +dropErrorAsMemError a (DropUnexpectedRelocation r) = UnexpectedRelocation a r +dropErrorAsMemError a DropInvalidAddr = InvalidAddr a + +splitSegmentRangeList' :: MemWidth w + => [SegmentRange w] + -> Int + -> [SegmentRange w] + -> Either (DropError w) ([SegmentRange w], [SegmentRange w]) +splitSegmentRangeList' prev c next + | c <= 0 = Right (reverse prev, next) +splitSegmentRangeList' _ _ [] = Left DropInvalidAddr +splitSegmentRangeList' prev cnt (reg@(ByteRegion bs) : rest) = do + let sz = BS.length bs + if cnt < sz then do + let taken = ByteRegion (BS.take cnt bs):prev + let dropped = ByteRegion (BS.drop cnt bs) : rest + pure $ (reverse taken, dropped) + else do + splitSegmentRangeList' (reg:prev) (cnt - sz) rest +splitSegmentRangeList' prev cnt (reg@(RelocationRegion r):rest) = do + let sz = relocationSize r + if toInteger cnt < toInteger sz then + Left (DropUnexpectedRelocation r) + else do + splitSegmentRangeList' (reg:prev) (cnt - fromIntegral sz) rest +splitSegmentRangeList' prev cnt (reg@(BSSRegion sz): rest) = + if toInteger cnt < toInteger sz then do + let taken = BSSRegion (fromIntegral cnt):prev + let dropped = BSSRegion (sz - fromIntegral cnt) : rest + pure $ (reverse taken, dropped) + else + splitSegmentRangeList' (reg:prev) (cnt - fromIntegral sz) rest + +-- | Given a segment data and a number of bytes `c`, this partitions the data in +-- two data regions. The first contains the first `c` bytes in the data; the second +-- contains the rest of the data. +-- +-- This will return an exception if the size of the data is too small or the partition +-- would split a relocation entry. +splitSegmentRangeList :: MemWidth w + => [SegmentRange w] + -> Int + -> Either (DropError w) ([SegmentRange w], [SegmentRange w]) +splitSegmentRangeList l c = splitSegmentRangeList' [] c l -- | Given a contiguous list of segment ranges and a number of bytes to drop, this -- returns the remaining segment ranges or throws an error. @@ -1135,26 +1247,7 @@ dropSegmentRangeListBytes :: forall w => [SegmentRange w] -> Int -> Either (DropError w) [SegmentRange w] -dropSegmentRangeListBytes ranges 0 = Right ranges -dropSegmentRangeListBytes (ByteRegion bs : rest) cnt = do - let sz = BS.length bs - if sz > cnt then - Right $ ByteRegion (BS.drop cnt bs) : rest - else - dropSegmentRangeListBytes rest (cnt - sz) -dropSegmentRangeListBytes (RelocationRegion r:rest) cnt = do - let sz = fromIntegral (relocSize r) - if sz > cnt then - Left (DropUnexpectedRelocation r) - else - dropSegmentRangeListBytes rest (cnt - sz) -dropSegmentRangeListBytes (BSSRegion sz : rest) cnt = - if toInteger sz > toInteger cnt then - Right $ BSSRegion (sz - fromIntegral cnt) : rest - else - dropSegmentRangeListBytes rest (cnt - fromIntegral sz) -dropSegmentRangeListBytes [] _ = - Left DropInvalidAddr +dropSegmentRangeListBytes l c = snd <$> splitSegmentRangeList l c ------------------------------------------------------------------------ -- Memory symbol @@ -1193,41 +1286,50 @@ addrContentsAfter mem addr = do -- -- This is a helper method for @readByteString@ below. readByteString' :: MemWidth w - => BS.ByteString - -- ^ Bytestring read so far (prepended to output) + => MemSegmentOff w + -- ^ Initial starting address + -> [BS.ByteString] + -- ^ Bytestring read so far (in reverse order) -> [SegmentRange w] -- ^ Remaining segments to read from. - -> MemAddr w - -- ^ Address we are reading from (used for error reporting) -> Word64 - -- ^ Number of bytes to read. - -> Either (MemoryError w) BS.ByteString -readByteString' _ _ _ 0 = pure BS.empty -readByteString' _ [] addr _ = Left $! InvalidAddr addr -readByteString' prev (ByteRegion bs:rest) addr sz = - if toInteger sz <= toInteger (BS.length bs) then - pure $ prev <> BS.take (fromIntegral sz) bs + -- ^ Number of bytes remaining to read. + -> Either (MemoryError w) [BS.ByteString] +readByteString' _ prev _ 0 = + pure $! prev +readByteString' _ _ [] _ = error "internal: readByteString' given too many bytes." +readByteString' initAddr prev (ByteRegion bs:rest) cnt = + if toInteger cnt <= toInteger (BS.length bs) then + pure $! BS.take (fromIntegral cnt) bs : prev else do - let addr' = incAddr (fromIntegral (BS.length bs)) addr - let sz' = sz - fromIntegral (BS.length bs) - readByteString' (prev <> bs) rest addr' sz' -readByteString' _ (RelocationRegion r:_) addr _ = do - Left $! UnexpectedRelocation addr r "readBS" -readByteString' prev (BSSRegion cnt:rest) addr sz = - if toInteger sz <= toInteger cnt then - pure $ prev <> BS.replicate (fromIntegral sz) 0 + let cnt' = cnt - fromIntegral (BS.length bs) + readByteString' initAddr (bs:prev) rest cnt' +readByteString' initAddr prev (RelocationRegion r:_) _ = do + let cnt = sum (toInteger . BS.length <$> prev) + let addr = incAddr cnt (relativeSegmentAddr initAddr) + Left $! UnexpectedRelocation addr r +readByteString' initAddr prev (BSSRegion sz:rest) cnt = + if toInteger cnt <= toInteger sz then + pure $! BS.replicate (fromIntegral cnt) 0 : prev else do - let addr' = incAddr (toInteger sz) addr - let sz' = sz - fromIntegral cnt - seq addr' $ - readByteString' (prev <> BS.replicate (fromIntegral cnt) 0) rest addr' sz' + let cnt' = cnt - fromIntegral sz + let next = BS.replicate (fromIntegral sz) 0 : prev + seq cnt' $ seq next $ + readByteString' initAddr next rest cnt' -- | Attemtp to read a bytestring of the given length -readByteString :: Memory w -> MemAddr w -> Word64 -> Either (MemoryError w) BS.ByteString -readByteString mem addr sz = addrWidthClass (memAddrWidth mem) $ do +readByteString :: Memory w + -> MemAddr w + -> Word64 -- ^ Number of bytes to read + -> Either (MemoryError w) BS.ByteString +readByteString mem addr cnt = addrWidthClass (memAddrWidth mem) $ do segOff <- resolveMemAddr mem addr + -- Check read is in range. + when (toInteger cnt > msegByteCountAfter segOff) $ do + Left $! InvalidRead segOff cnt + -- Get contents after segment l <- contentsAfterSegmentOff segOff - readByteString' BS.empty l addr sz + mconcat . reverse <$> readByteString' segOff [] l cnt -- | Read an address from the value in the segment or report a memory -- error. @@ -1239,7 +1341,7 @@ readAddr mem end addr = addrWidthClass (memAddrWidth mem) $ do let sz = fromIntegral (addrSize addr) bs <- readByteString mem addr sz case addrRead end bs of - Just val -> Right $ MemAddr 0 val + Just val -> Right $ MemAddr 0 val Nothing -> error $ "readAddr internal error: readByteString result too short." -- | Read a single byte. diff --git a/base/src/Data/Macaw/Memory/ElfLoader.hs b/base/src/Data/Macaw/Memory/ElfLoader.hs index 251685a5..673be1f7 100644 --- a/base/src/Data/Macaw/Memory/ElfLoader.hs +++ b/base/src/Data/Macaw/Memory/ElfLoader.hs @@ -119,19 +119,6 @@ flagsForSectionFlags f = where flagIf :: ElfSectionFlags w -> Perm.Flags -> Perm.Flags flagIf ef pf = if f `Elf.hasPermissions` ef then pf else Perm.none ------------------------------------------------------------------------- --- RegionAdjust - --- | This captures how to translate addresses in the Elf file to --- regions in the memory object. -data RegionAdjust - = RegionAdjust { regionIndex :: !RegionIndex - -- ^ Region index for new segments - , regionOffset :: !Integer - -- ^ Offset from region to automatically add to - -- segment/sections during loading. - } - ------------------------------------------------------------------------ -- MemLoader @@ -312,6 +299,7 @@ type RelocationResolver tp = SymbolVector -> Elf.RelEntry tp -> MemWord (Elf.RelocationWidth tp) + -- ^ Offset -> RelocResolver (Relocation (Elf.RelocationWidth tp)) data SomeRelocationResolver w @@ -334,47 +322,92 @@ resolveSymbol (SymbolVector symtab) symIdx = do relocError $ RelocationBadSymbolIndex $ fromIntegral symIdx Just sym -> pure $ sym -resolveRelocationAddr :: SymbolVector +-- | This attempts to resolve an index in the symbol table to the +-- identifier information needed to resolve its loaded address. +resolveRelocationSym :: SymbolVector -- ^ A vector mapping symbol indices to the -- associated symbol information. -> Word32 -- ^ Index in the symbol table this refers to. -> RelocResolver SymbolIdentifier -resolveRelocationAddr symtab symIdx = do +resolveRelocationSym symtab symIdx = do sym <- resolveSymbol symtab symIdx case symbolDef sym of DefinedSymbol{} -> do pure $ SymbolRelocation (symbolName sym) (symbolVersion sym) SymbolSection idx -> do - pure $ SectionBaseRelocation idx + pure $ SectionIdentifier idx SymbolFile _ -> do relocError $ RelocationFileUnsupported UndefinedSymbol{} -> do pure $ SymbolRelocation (symbolName sym) (symbolVersion sym) -- | Attempt to resolve an X86_64 specific symbol. -relaTargetX86_64 :: SomeRelocationResolver 64 -relaTargetX86_64 = SomeRelocationResolver $ \symtab rel off -> +relaTargetX86_64 :: SymbolVector + -> Elf.RelEntry Elf.X86_64_RelocationType + -> MemWord 64 + -- ^ Offset of symbol + -> RelocResolver (Relocation 64) +relaTargetX86_64 symtab rel off = case Elf.relType rel of Elf.R_X86_64_JUMP_SLOT -> do - addr <- resolveRelocationAddr symtab (Elf.relSym rel) - pure $ AbsoluteRelocation addr off LittleEndian 8 + sym <- resolveRelocationSym symtab (Elf.relSym rel) + pure $ Relocation { relocationSym = sym + , relocationOffset = off + , relocationIsRel = False + , relocationSize = 8 + , relocationIsSigned = False + , relocationEndianness = LittleEndian + } Elf.R_X86_64_PC32 -> do - addr <- resolveRelocationAddr symtab (Elf.relSym rel) - pure $ RelativeRelocation addr off LittleEndian 4 + sym <- resolveRelocationSym symtab (Elf.relSym rel) + pure $ Relocation { relocationSym = sym + , relocationOffset = off + , relocationIsRel = True + , relocationSize = 4 + , relocationIsSigned = False + , relocationEndianness = LittleEndian + } Elf.R_X86_64_32 -> do - addr <- resolveRelocationAddr symtab (Elf.relSym rel) - pure $ AbsoluteRelocation addr off LittleEndian 4 + sym <- resolveRelocationSym symtab (Elf.relSym rel) + pure $ Relocation { relocationSym = sym + , relocationOffset = off + , relocationIsRel = False + , relocationSize = 4 + , relocationIsSigned = False + , relocationEndianness = LittleEndian + } + Elf.R_X86_64_32S -> do + sym <- resolveRelocationSym symtab (Elf.relSym rel) + pure $ Relocation { relocationSym = sym + , relocationOffset = off + , relocationIsRel = False + , relocationSize = 4 + , relocationIsSigned = True + , relocationEndianness = LittleEndian + } Elf.R_X86_64_64 -> do - addr <- resolveRelocationAddr symtab (Elf.relSym rel) - pure $ AbsoluteRelocation addr off LittleEndian 8 + sym <- resolveRelocationSym symtab (Elf.relSym rel) + pure $ Relocation { relocationSym = sym + , relocationOffset = off + , relocationIsRel = False + , relocationSize = 8 + , relocationIsSigned = False + , relocationEndianness = LittleEndian + } -- R_X86_64_GLOB_DAT are used to update GOT entries with their -- target address. They are similar to R_x86_64_64 except appear -- inside dynamically linked executables/libraries, and are often -- loaded lazily. We just use the eager AbsoluteRelocation here. Elf.R_X86_64_GLOB_DAT -> do - addr <- resolveRelocationAddr symtab (Elf.relSym rel) - pure $ AbsoluteRelocation addr off LittleEndian 8 + sym <- resolveRelocationSym symtab (Elf.relSym rel) + pure $ Relocation { relocationSym = sym + , relocationOffset = off + , relocationIsRel = False + , relocationSize = 8 + , relocationIsSigned = False + , relocationEndianness = LittleEndian + } -- Jhx Note. These will be needed to support thread local variables. -- Elf.R_X86_64_TPOFF32 -> undefined @@ -398,10 +431,6 @@ relaTargetARM = SomeRelocationResolver $ \_symtab rel _maddend -> -- TargetSymbol <$> resolveSymbol symtab rel tp -> relocError $ RelocationUnsupportedType (show tp) -} -{- -000000613ff8 003c00000006 R_X86_64_GLOB_DAT 0000000000000000 __gmon_start__ + 0 --} - -- | Creates a relocation map from the contents of a dynamic section. @@ -413,7 +442,7 @@ withRelocationResolver -> MemLoader w a withRelocationResolver hdr f = case (Elf.headerClass hdr, Elf.headerMachine hdr) of - (Elf.ELFCLASS64, Elf.EM_X86_64) -> f relaTargetX86_64 + (Elf.ELFCLASS64, Elf.EM_X86_64) -> f (SomeRelocationResolver relaTargetX86_64) -- (Elf.ELFCLASS32, Elf.EM_ARM) -> f relaTargetARM (_,mach) -> throwError $ UnsupportedArchitecture (show mach) @@ -684,7 +713,8 @@ reprConstraints Addr64 x = x -- | Return a memory segment for elf segment if it loadable. memSegmentForElfSegment :: (MemWidth w, Monad m, Integral (ElfWordType w)) => ResolveFn v m w - -> RegionAdjust -- ^ Index for segment + -> MemAddr w + -- ^ Base address to use for segments. -> L.ByteString -- ^ Complete contents of Elf file. -> AddrOffsetMap w v @@ -692,28 +722,27 @@ memSegmentForElfSegment :: (MemWidth w, Monad m, Integral (ElfWordType w)) -> Elf.Phdr w -- ^ Program header entry -> m (MemSegment w) -memSegmentForElfSegment resolver regAdj contents relocMap phdr = - memSegment resolver (regionIndex regAdj) relocMap (fromInteger base) flags dta sz - where {- seg = Elf.phdrSegment phdr -} +memSegmentForElfSegment resolver base contents relocMap phdr = + memSegment resolver (addrBase base) relocMap off flags dta sz + where off = addrOffset base + fromIntegral (Elf.phdrSegmentVirtAddr phdr) dta = sliceL (Elf.phdrFileRange phdr) contents sz = fromIntegral $ Elf.phdrMemSize phdr - base = regionOffset regAdj + toInteger (Elf.phdrSegmentVirtAddr phdr) flags = flagsForSegmentFlags (Elf.phdrSegmentFlags phdr) -- | Load an elf file into memory. -insertElfSegment :: RegionAdjust - -- ^ Where to load region +insertElfSegment :: MemAddr w + -- ^ Base address to use for loading program header. -> ElfFileSectionMap (ElfWordType w) -> L.ByteString -> RelocMap w v -- ^ Relocations to apply in loading section. -> Elf.Phdr w -> MemLoader w () -insertElfSegment regAdj shdrMap contents (RelocMap relocMap resolver) phdr = do +insertElfSegment base shdrMap contents (RelocMap relocMap resolver) phdr = do w <- uses mlsMemory memAddrWidth reprConstraints w $ do when (Elf.phdrMemSize phdr > 0) $ do - seg <- memSegmentForElfSegment resolver regAdj contents relocMap phdr + seg <- memSegmentForElfSegment resolver base contents relocMap phdr let seg_idx = Elf.phdrSegmentIndex phdr loadMemSegment ("Segment " ++ show seg_idx) seg let phdr_offset = Elf.fromFileOffset (Elf.phdrFileStart phdr) @@ -727,16 +756,19 @@ insertElfSegment regAdj shdrMap contents (RelocMap relocMap resolver) phdr = do fail $ "Found section header that overlaps with program header." let sec_offset = fromIntegral $ shdr_start - phdr_offset let Just addr = resolveSegmentOff seg sec_offset + mlsMemory %= memAddSectionAddr (fromElfSectionIndex elfIdx) addr mlsIndexMap %= Map.insert elfIdx (addr, sec) _ -> fail "Unexpected shdr interval" --- | Load an elf file into memory with the given options. +-- | Load an elf file into memory by parsing segments. memoryForElfSegments :: forall w - . RegionAdjust + . MemAddr w + -- ^ This is used as the base address to load Elf segments at (the virtual + -- address is treated as relative to this. -> Elf w -> MemLoader w () -memoryForElfSegments regAdj e = do +memoryForElfSegments base e = do let l = elfLayout e let hdr = Elf.elfLayoutHeader l let w = elfAddrWidth (elfClass e) @@ -755,28 +787,43 @@ memoryForElfSegments regAdj e = do , let sec = shdr^._1 , let end = start + elfSectionFileSize sec ] - mapM_ (insertElfSegment regAdj intervals contents relocMap) + mapM_ (insertElfSegment base intervals contents relocMap) (filter (\p -> Elf.phdrSegmentType p == Elf.PT_LOAD) ph) ------------------------------------------------------------------------ -- Elf section loading +-- | Contains the name of a section we allocate and whether +-- relocations are used. +type AllocatedSectionInfo = (B.ByteString, Bool) + +allocatedNames :: AllocatedSectionInfo -> [B.ByteString] +allocatedNames (nm,False) = [nm] +allocatedNames (nm,True) = [nm, ".rela" <> nm] + +allocatedSectionInfo :: [AllocatedSectionInfo] +allocatedSectionInfo = + [ (,) ".text" True + , (,) ".eh_frame" True + , (,) ".data" True + , (,) ".bss" False + , (,) ".rodata" True + ] + allowedSectionNames :: Set B.ByteString allowedSectionNames = Set.fromList - [ "" - , ".text", ".rela.text" - , ".data", ".rela.data" - , ".bss" - , ".tbss" - , ".tdata", ".rela.tdata" - , ".rodata", ".rela.rodata" - , ".comment" - , ".note.GNU-stack" - , ".eh_frame", ".rela.eh_frame" - , ".shstrtab" - , ".symtab" - , ".strtab" - ] + $ concatMap allocatedNames allocatedSectionInfo + ++ [ "" + , ".text.hot" + , ".text.unlikely" + , ".tbss" + , ".tdata", ".rela.tdata" + , ".comment" + , ".note.GNU-stack" + , ".shstrtab" + , ".symtab" + , ".strtab" + ] -- | Map from section names to information about them. type SectionNameMap w = Map SectionName [ElfSection (ElfWordType w)] @@ -835,6 +882,7 @@ insertAllocatedSection hdr symtab sectionMap regIdx nm = do -- Add entry to map elf section index to start in segment. let elfIdx = ElfSectionIndex (elfSectionIndex sec) let Just addr = resolveSegmentOff seg 0 + mlsMemory %= memAddSectionAddr (fromElfSectionIndex elfIdx) addr mlsIndexMap %= Map.insert elfIdx (addr, sec) -- | Create the symbol vector from @@ -864,10 +912,8 @@ memoryForElfSections e = do sectionMap = foldlOf elfSections insSec Map.empty e where insSec m sec = Map.insertWith (\new old -> old ++ new) (elfSectionName sec) [sec] m symtab <- symtabSymbolVector e - insertAllocatedSection hdr symtab sectionMap 1 ".text" - insertAllocatedSection hdr symtab sectionMap 2 ".data" - insertAllocatedSection hdr symtab sectionMap 3 ".bss" - insertAllocatedSection hdr symtab sectionMap 4 ".rodata" + forM_ (zip [1..] allocatedSectionInfo) $ \(idx, (nm,_)) -> do + insertAllocatedSection hdr symtab sectionMap idx nm -- TODO: Figure out what to do about .tdata and .tbss -- Check for other section names that we do not support." let unsupportedKeys = Map.keysSet sectionMap `Set.difference ` allowedSectionNames @@ -899,20 +945,27 @@ adjustedLoadRegionIndex e loadOpt = -- information tends to be more precise. memoryForElf :: LoadOptions -> Elf w - -> Either String (SectionIndexMap w, Memory w, [MemLoadWarning]) -memoryForElf opt e = do + -> Either String ( Memory w + , [MemSymbol w] -- Function symbols + , [MemLoadWarning] + , [SymbolResolutionError] + ) +memoryForElf opt e = reprConstraints (elfAddrWidth (elfClass e)) $ do let end = case Elf.elfData e of Elf.ELFDATA2LSB -> LittleEndian Elf.ELFDATA2MSB -> BigEndian - runMemLoader end (emptyMemory (elfAddrWidth (elfClass e))) $ do - case Elf.elfType e of - Elf.ET_REL -> - memoryForElfSections e - _ -> do - let regAdj = RegionAdjust { regionIndex = adjustedLoadRegionIndex e opt - , regionOffset = loadRegionBaseOffset opt - } - memoryForElfSegments regAdj e + (secMap, mem, warnings) <- + runMemLoader end (emptyMemory (elfAddrWidth (elfClass e))) $ do + case Elf.elfType e of + Elf.ET_REL -> + memoryForElfSections e + _ -> do + let base = MemAddr { addrBase = adjustedLoadRegionIndex e opt + , addrOffset = fromInteger (loadRegionBaseOffset opt) + } + memoryForElfSegments base e + let (symErrs, funcSymbols) = resolveElfFuncSymbols mem secMap e + pure (mem, funcSymbols, warnings, symErrs) ------------------------------------------------------------------------ -- Elf symbol utilities @@ -935,28 +988,27 @@ instance Show SymbolResolutionError where show (CouldNotResolveAddr sym) = "Could not resolve address of " ++ BSC.unpack sym ++ "." show MultipleSymbolTables = "Elf contains multiple symbol tables." --- | Find an absolute symbol, of any time, not just function. -resolveElfFuncSymbolAny' :: - Memory w -- ^ Memory object from Elf file. - -> SectionIndexMap w -- ^ Section index mp from memory - -> Int -- ^ Index of symbol - -> ElfSymbolTableEntry (ElfWordType w) - -> Either SymbolResolutionError (MemSymbol w) -resolveElfFuncSymbolAny' mem secMap idx ste +-- | Find an symbol of any type -- not just functions. +resolveElfSymbol :: Memory w -- ^ Memory object from Elf file. + -> SectionIndexMap w -- ^ Section index mp from memory + -> Int -- ^ Index of symbol + -> ElfSymbolTableEntry (ElfWordType w) + -> Maybe (Either SymbolResolutionError (MemSymbol w)) +resolveElfSymbol mem secMap idx ste -- Check symbol is defined - | Elf.steIndex ste == Elf.SHN_UNDEF = Left $ UndefSymbol (Elf.steName ste) + | Elf.steIndex ste == Elf.SHN_UNDEF = Nothing -- Check symbol name is non-empty - | Elf.steName ste == "" = Left $ EmptySymbolName idx (Elf.steType ste) + | Elf.steName ste == "" = Just $ Left $ EmptySymbolName idx (Elf.steType ste) -- Lookup absolute symbol | Elf.steIndex ste == Elf.SHN_ABS = reprConstraints (memAddrWidth mem) $ do let val = Elf.steValue ste case resolveAddr mem 0 (fromIntegral val) of - Just addr -> Right $ + Just addr -> Just $ Right $ MemSymbol { memSymbolName = Elf.steName ste , memSymbolStart = addr , memSymbolSize = fromIntegral (Elf.steSize ste) } - Nothing -> Left $ CouldNotResolveAddr (Elf.steName ste) + Nothing -> Just $ Left $ CouldNotResolveAddr (Elf.steName ste) -- Lookup symbol stored in specific section | otherwise = reprConstraints (memAddrWidth mem) $ do let val = Elf.steValue ste @@ -964,41 +1016,12 @@ resolveElfFuncSymbolAny' mem secMap idx ste Just (base,sec) | elfSectionAddr sec <= val && val < elfSectionAddr sec + Elf.elfSectionSize sec , off <- toInteger val - toInteger (elfSectionAddr sec) - , Just addr <- incSegmentOff base off -> do + , Just addr <- incSegmentOff base off -> Just $ do Right $ MemSymbol { memSymbolName = Elf.steName ste - , memSymbolStart = addr - , memSymbolSize = fromIntegral (Elf.steSize ste) - } - _ -> Left $ CouldNotResolveAddr (Elf.steName ste) - --- | Find an absolute symbol, of any time, not just function. -resolveElfFuncSymbolAny :: - Memory w -- ^ Memory object from Elf file. - -> SectionIndexMap w -- ^ Section index mp from memory - -> Int -- ^ Index of symbol - -> ElfSymbolTableEntry (ElfWordType w) - -> Maybe (Either SymbolResolutionError (MemSymbol w)) -resolveElfFuncSymbolAny mem secMap idx ste - | Elf.steIndex ste == Elf.SHN_UNDEF = Nothing - | otherwise = Just (resolveElfFuncSymbolAny' mem secMap idx ste) - --- | This resolves an Elf symbol into a MemSymbol if it is likely a --- pointer to a resolved function. -resolveElfFuncSymbol :: Memory w -- ^ Memory object from Elf file. - -> SectionIndexMap w -- ^ Section index mp from memory - -> Int -- ^ Index of symbol - -> ElfSymbolTableEntry (ElfWordType w) - -> Maybe (Either SymbolResolutionError (MemSymbol w)) -resolveElfFuncSymbol mem secMap idx ste - -- Check this is a defined function symbol - -- Some NO_TYPE entries appear to correspond to functions, so we include those. - | (Elf.steType ste `elem` [ Elf.STT_FUNC, Elf.STT_NOTYPE ]) == False = - Nothing - -- Check symbol is defined - | Elf.steIndex ste == Elf.SHN_UNDEF = Nothing - -- Check symbol name is non-empty - | Elf.steName ste == "" = Just $ (resolveElfFuncSymbolAny' mem secMap idx ste) - | otherwise = Just (resolveElfFuncSymbolAny' mem secMap idx ste) + , memSymbolStart = addr + , memSymbolSize = fromIntegral (Elf.steSize ste) + } + _ -> Just $ Left $ CouldNotResolveAddr (Elf.steName ste) -- | Resolve symbol table entries defined in this Elf file to -- a mem symbol @@ -1016,11 +1039,12 @@ resolveElfFuncSymbols mem secMap e = case Elf.elfSymtab e of [] -> ([], []) [tbl] -> - let entries = V.toList (Elf.elfSymbolTableEntries tbl) - in partitionEithers (mapMaybe (uncurry (resolveElfFuncSymbol mem secMap)) (zip [0..] entries)) + let entries = zip [0..] (V.toList (Elf.elfSymbolTableEntries tbl)) + isRelevant (_,ste) = Elf.steType ste == Elf.STT_FUNC + funcEntries = filter isRelevant entries + in partitionEithers (mapMaybe (uncurry (resolveElfSymbol mem secMap)) funcEntries) _ -> ([MultipleSymbolTables], []) - -- | Resolve symbol table entries to the addresses in a memory. -- -- It takes the memory constructed from the Elf file, the section @@ -1037,11 +1061,9 @@ resolveElfFuncSymbolsAny mem secMap e = [] -> ([], []) [tbl] -> let entries = V.toList (Elf.elfSymbolTableEntries tbl) - in partitionEithers (mapMaybe (uncurry (resolveElfFuncSymbolAny mem secMap)) (zip [0..] entries)) + in partitionEithers (mapMaybe (uncurry (resolveElfSymbol mem secMap)) (zip [0..] entries)) _ -> ([MultipleSymbolTables], []) - - ------------------------------------------------------------------------ -- resolveElfContents @@ -1075,19 +1097,16 @@ resolveElfContents :: LoadOptions resolveElfContents loadOpts e = case Elf.elfType e of Elf.ET_REL -> do - (secMap, mem, warnings) <- memoryForElf loadOpts e - let (symErrs, funcSymbols) = resolveElfFuncSymbols mem secMap e + (mem, funcSymbols, warnings, symErrs) <- memoryForElf loadOpts e pure (fmap show warnings ++ fmap show symErrs, mem, Nothing, funcSymbols) Elf.ET_EXEC -> do - (secMap, mem, warnings) <- memoryForElf loadOpts e + (mem, funcSymbols, warnings, symErrs) <- memoryForElf loadOpts e let (entryWarn, mentry) = getElfEntry loadOpts mem e - let (symErrs, funcSymbols) = resolveElfFuncSymbols mem secMap e - Right (fmap show warnings ++ entryWarn ++ fmap show symErrs, mem, mentry, funcSymbols) + Right (fmap show warnings ++ fmap show symErrs ++ entryWarn, mem, mentry, funcSymbols) Elf.ET_DYN -> do - (secMap, mem, warnings) <- memoryForElf loadOpts e + (mem, funcSymbols, warnings, symErrs) <- memoryForElf loadOpts e let (entryWarn, mentry) = getElfEntry loadOpts mem e - let (symErrs, funcSymbols) = resolveElfFuncSymbols mem secMap e - pure (fmap show warnings ++ entryWarn ++ fmap show symErrs, mem, mentry, funcSymbols) + pure (fmap show warnings ++ fmap show symErrs ++ entryWarn, mem, mentry, funcSymbols) Elf.ET_CORE -> do Left $ "Reopt does not support loading core files." tp -> do diff --git a/base/src/Data/Macaw/SCFG.hs b/base/src/Data/Macaw/SCFG.hs index 35444494..712060ad 100644 --- a/base/src/Data/Macaw/SCFG.hs +++ b/base/src/Data/Macaw/SCFG.hs @@ -13,8 +13,12 @@ optimization. module Data.Macaw.SCFG ( SCFG(..) , SCFGBlock(..) - , CallingConvention(..) + , CallingConvention + , Stmt(..) , TermStmt(..) + , Value(..) + , AssignId(..) + , BlockIndex(..) , module Data.Macaw.CFG.App ) where diff --git a/deps/crucible b/deps/crucible index 31f432e2..60a10a79 160000 --- a/deps/crucible +++ b/deps/crucible @@ -1 +1 @@ -Subproject commit 31f432e23c94855273477f6db08cd9c73072e930 +Subproject commit 60a10a7933bcfab2c444dd436aee82eed449cec9 diff --git a/deps/elf-edit b/deps/elf-edit index ed210d94..78ca989c 160000 --- a/deps/elf-edit +++ b/deps/elf-edit @@ -1 +1 @@ -Subproject commit ed210d94e17b48be8905b6389f10d906dfc32b2e +Subproject commit 78ca989cb31058968c5dc4e01009ee705eaeea09 diff --git a/deps/flexdis86 b/deps/flexdis86 index f50f3a78..9304269a 160000 --- a/deps/flexdis86 +++ b/deps/flexdis86 @@ -1 +1 @@ -Subproject commit f50f3a78776287e66a4b25e7f172a55b2818f2b2 +Subproject commit 9304269acef1c82e9ae3676649574e371ac54c36 diff --git a/deps/parameterized-utils b/deps/parameterized-utils index 898020da..a30e1229 160000 --- a/deps/parameterized-utils +++ b/deps/parameterized-utils @@ -1 +1 @@ -Subproject commit 898020da43fbf9989a8877c1404c05767bb5df98 +Subproject commit a30e12292371bc9abe6ccc631a0409df059449d9 diff --git a/stack.ghc-8.2.2.yaml b/stack.ghc-8.2.2.yaml index 7d1fc25d..d8194fdc 100644 --- a/stack.ghc-8.2.2.yaml +++ b/stack.ghc-8.2.2.yaml @@ -19,4 +19,5 @@ packages: extra-deps: - monadLib-3.7.3 +- panic-0.4.0.1 resolver: lts-11.5 diff --git a/symbolic/src/Data/Macaw/Symbolic.hs b/symbolic/src/Data/Macaw/Symbolic.hs index d32724a6..3559b08d 100644 --- a/symbolic/src/Data/Macaw/Symbolic.hs +++ b/symbolic/src/Data/Macaw/Symbolic.hs @@ -60,14 +60,14 @@ import What4.Symbol (userSymbol) import qualified Lang.Crucible.Analysis.Postdom as C import Lang.Crucible.Backend import qualified Lang.Crucible.CFG.Core as C +import qualified Lang.Crucible.CFG.Extension as C import qualified Lang.Crucible.CFG.Reg as CR import qualified Lang.Crucible.CFG.SSAConversion as C import qualified Lang.Crucible.FunctionHandle as C + +import qualified Lang.Crucible.Simulator as C import qualified Lang.Crucible.Simulator.ExecutionTree as C -import qualified Lang.Crucible.Simulator.Intrinsics as C import qualified Lang.Crucible.Simulator.GlobalState as C -import qualified Lang.Crucible.Simulator.OverrideSim as C -import qualified Lang.Crucible.Simulator.RegMap as C import System.IO (stdout) @@ -498,7 +498,7 @@ execMacawStmtExtension archStmtFn mvar globs (LFH lookupH) s0 st = M.BoolTypeRepr -> freshConstant sym nm C.BaseBoolRepr _ -> error ("MacawFreshSymbolic: XXX type " ++ show t) return (v,st) - where sym = C.stateSymInterface st + where sym = st^.C.stateSymInterface MacawLookupFunctionHandle _ args -> do (hv, st') <- doLookupFunctionHandle lookupH st mvar (C.regValue args) @@ -576,7 +576,7 @@ macawExtensions f mvar globs lookupH = -- | Run the simulator over a contiguous set of code. runCodeBlock :: forall sym arch blocks - . IsSymInterface sym + . (C.IsSyntaxExtension (MacawExt arch), IsSymInterface sym) => sym -> MacawSymbolicArchFunctions arch -- ^ Translation functions @@ -612,8 +612,8 @@ runCodeBlock sym archFns archEval halloc (initMem,globs) lookupH g regStruct = d } -- Create the symbolic simulator state let initGlobals = C.insertGlobal mvar initMem C.emptyGlobals - let s = C.initSimState ctx initGlobals C.defaultErrorHandler - a <- C.runOverrideSim s macawStructRepr $ do + let s = C.initSimState ctx initGlobals C.defaultAbortHandler + a <- C.executeCrucible s $ C.runOverrideSim macawStructRepr $ do let args :: C.RegMap sym (MacawFunctionArgs arch) args = C.RegMap (Ctx.singleton (C.RegEntry macawStructRepr regStruct)) crucGenArchConstraints archFns $ diff --git a/symbolic/src/Data/Macaw/Symbolic/MemOps.hs b/symbolic/src/Data/Macaw/Symbolic/MemOps.hs index 81393ae3..3fd40bd5 100644 --- a/symbolic/src/Data/Macaw/Symbolic/MemOps.hs +++ b/symbolic/src/Data/Macaw/Symbolic/MemOps.hs @@ -194,7 +194,7 @@ doGetGlobal :: , CrucibleState s sym ext rtp blocks r ctx ) doGetGlobal st mvar globs addr = do - let sym = stateSymInterface st + let sym = st^.stateSymInterface mem <- getMem st mvar regionNum <- natLit sym (fromIntegral (M.addrBase addr)) offset <- bvLit sym (M.addrWidthNatRepr (M.addrWidthRepr addr)) (M.memWordInteger (M.addrOffset addr)) @@ -205,7 +205,19 @@ doGetGlobal st mvar globs addr = do , "*** Region: " ++ show (M.addrBase addr) , "*** Address: " ++ show addr ] +-- <<<<<<< HEAD Just ptr -> return (ptr, st) +-- ======= +-- Just region -> +-- do mem <- getMem st mvar +-- let sym = st^.stateSymInterface +-- let w = M.addrWidthRepr addr +-- LeqProof <- pure $ addrWidthAtLeast16 w +-- let ?ptrWidth = M.addrWidthNatRepr w +-- off <- bvLit sym ?ptrWidth (M.memWordInteger (M.addrOffset addr)) +-- res <- doPtrAddOffset sym mem region off +-- return (res, st) +-- >>>>>>> master -------------------------------------------------------------------------------- @@ -401,7 +413,7 @@ doReadMem st mvar globs w (BVMemRepr bytes endian) ptr0 = do mem <- getMem st mvar checkEndian mem endian - let sym = stateSymInterface st + let sym = st^.stateSymInterface ty = bitvectorType (toBytes (widthVal bytes)) bitw = natMultiply (knownNat @8) bytes @@ -441,7 +453,7 @@ doCondReadMem st mvar globs w (BVMemRepr bytes endian) cond0 ptr0 def0 = def = regValue def0 mem <- getMem st mvar checkEndian mem endian - let sym = stateSymInterface st + let sym = st^.stateSymInterface ty = bitvectorType (toBytes (widthVal bytes)) bitw = natMultiply (knownNat @8) bytes @@ -490,7 +502,7 @@ doWriteMem st mvar globs w (BVMemRepr bytes endian) ptr0 val = do mem <- getMem st mvar checkEndian mem endian - let sym = stateSymInterface st + let sym = st^.stateSymInterface ty = bitvectorType (toBytes (widthVal bytes)) LeqProof <- pure $ addrWidthIsPos w @@ -547,7 +559,7 @@ ptrOp :: ptrOp k st mvar w x0 y0 = do mem <- getMem st mvar LeqProof <- return (addrWidthIsPos w) - let sym = stateSymInterface st + let sym = st^.stateSymInterface x = regValue x0 y = regValue y0 diff --git a/x86/src/Data/Macaw/X86.hs b/x86/src/Data/Macaw/X86.hs index 2f92d234..9fd2e9a0 100644 --- a/x86/src/Data/Macaw/X86.hs +++ b/x86/src/Data/Macaw/X86.hs @@ -50,7 +50,6 @@ import Control.Lens import Control.Monad.Cont import Control.Monad.Except import Control.Monad.ST -import qualified Data.Foldable as Fold import qualified Data.Map as Map import Data.Parameterized.Classes import qualified Data.Parameterized.Map as MapF @@ -144,9 +143,6 @@ initGenState :: NonceGenerator (ST st_s) ids -> GenState st_s ids initGenState nonce_gen mem addr s = GenState { assignIdGen = nonce_gen - , _blockSeq = BlockSeq { _nextBlockID = 1 - , _frontierBlocks = Seq.empty - } , _blockState = emptyPreBlock s 0 addr , genAddr = addr , genMemory = mem @@ -156,13 +152,12 @@ initGenState nonce_gen mem addr s = returnWithError :: GenState st_s ids -> X86TranslateError 64 - -> ST st_s (BlockSeq ids, MemWord 64, Maybe (X86TranslateError 64)) + -> ST st_s (Block X86_64 ids, MemWord 64, Maybe (X86TranslateError 64)) returnWithError gs err = let curIPAddr = genAddr gs - term = (`TranslateError` Text.pack (show err)) - b = finishBlock' (gs^.blockState) term - res = seq b $ gs^.blockSeq & frontierBlocks %~ (Seq.|> b) - in return (res, msegOffset curIPAddr, Just err) + term s = TranslateError s (Text.pack (show err)) + b = finishBlock (gs^.blockState) term + in return (b, msegOffset curIPAddr, Just err) -- | Translate block, returning blocks read, ending -- PC, and an optional error. and ending PC. @@ -173,7 +168,7 @@ disassembleBlockImpl :: forall st_s ids -- ^ Maximum offset for this addr. -> [SegmentRange 64] -- ^ List of contents to read next. - -> ST st_s (BlockSeq ids, MemWord 64, Maybe (X86TranslateError 64)) + -> ST st_s (Block X86_64 ids, MemWord 64, Maybe (X86TranslateError 64)) disassembleBlockImpl gs max_offset contents = do let curIPAddr = genAddr gs case readInstruction' curIPAddr contents of @@ -191,7 +186,7 @@ disassembleBlockImpl gs max_offset contents = do returnWithError gs (UnsupportedInstruction (genAddr gs) i) Just exec -> do gsr <- - runExceptT $ runX86Generator (\() s -> pure (mkGenResult s)) gs $ do + runExceptT $ runX86Generator gs $ do let next_ip_word = fromIntegral $ segmentOffset seg + off let line = show curIPAddr ++ ": " ++ show (F.ppInstruction next_ip_word i) addStmt (Comment (Text.pack line)) @@ -200,18 +195,16 @@ disassembleBlockImpl gs max_offset contents = do Left msg -> do returnWithError gs (ExecInstructionError (genAddr gs) i msg) Right res -> do - case resState res of + case res of -- If IP after interpretation is the next_ip, there are no blocks, and we -- haven't crossed max_offset, then keep running. - Just p_b - | Seq.null (resBlockSeq res ^. frontierBlocks) - , RelocatableValue _ v <- p_b^.(pBlockState . curIP) + UnfinishedGenResult p_b + | RelocatableValue _ v <- p_b^.(pBlockState . curIP) , v == next_ip -- Check to see if we should continue , next_ip_off < max_offset , Just next_ip_segaddr <- resolveSegmentOff seg next_ip_off -> do let gs2 = GenState { assignIdGen = assignIdGen gs - , _blockSeq = resBlockSeq res , _blockState = p_b , genAddr = next_ip_segaddr , genMemory = genMemory gs @@ -226,8 +219,7 @@ disassembleBlockImpl gs max_offset contents = do Right contents' -> disassembleBlockImpl gs2 max_offset contents' _ -> do - let gs3 = finishBlock FetchAndExecute res - return (gs3, next_ip_off, Nothing) + return (finishGenResult res, next_ip_off, Nothing) -- | Disassemble block, returning either an error, or a list of blocks -- and ending PC. @@ -237,12 +229,12 @@ disassembleBlock :: forall s -> ExploreLoc -> MemWord 64 -- ^ Maximum number of bytes in ths block. - -> ST s ([Block X86_64 s], MemWord 64, Maybe (X86TranslateError 64)) + -> ST s (Block X86_64 s, MemWord 64, Maybe (X86TranslateError 64)) disassembleBlock mem nonce_gen loc max_size = do let addr = loc_ip loc let gs = initGenState nonce_gen mem addr (initX86State loc) let sz = msegOffset addr + max_size - (gs', next_ip_off, maybeError) <- + (b, next_ip_off, maybeError) <- case addrContentsAfter mem (relativeSegmentAddr addr) of Left msg -> returnWithError gs (FlexdisMemoryError msg) @@ -250,7 +242,7 @@ disassembleBlock mem nonce_gen loc max_size = do disassembleBlockImpl gs sz contents assert (next_ip_off > msegOffset addr) $ do let block_sz = next_ip_off - msegOffset addr - pure (Fold.toList (gs'^.frontierBlocks), block_sz, maybeError) + pure (b, block_sz, maybeError) -- | The abstract state for a function begining at a given address. initialX86AbsState :: MemSegmentOff 64 -> AbsBlockState X86Reg @@ -289,6 +281,7 @@ transferAbsValue r f = ReadFSBase -> TopV ReadGSBase -> TopV CPUID _ -> TopV + CMPXCHG8B{} -> TopV RDTSC -> TopV XGetBV _ -> TopV PShufb{} -> TopV @@ -338,7 +331,7 @@ tryDisassembleBlockFromAbsState :: forall s ids -- ^ Maximum size of this block -> AbsBlockState X86Reg -- ^ Abstract state of processor for defining state. - -> ExceptT String (ST s) ([Block X86_64 ids], MemWord 64, Maybe String) + -> ExceptT String (ST s) (Block X86_64 ids, MemWord 64, Maybe String) tryDisassembleBlockFromAbsState mem nonce_gen addr max_size ab = do t <- case asConcreteSingleton (ab^.absRegState^.boundValue X87_TopReg) of @@ -355,7 +348,7 @@ tryDisassembleBlockFromAbsState mem nonce_gen addr max_size ab = do } let gs = initGenState nonce_gen mem addr (initX86State loc) let off = msegOffset addr - (gs', next_ip_off, maybeError) <- lift $ + (b, next_ip_off, maybeError) <- lift $ case addrContentsAfter mem (relativeSegmentAddr addr) of Left msg -> returnWithError gs (FlexdisMemoryError msg) @@ -363,8 +356,7 @@ tryDisassembleBlockFromAbsState mem nonce_gen addr max_size ab = do disassembleBlockImpl gs (off + max_size) contents assert (next_ip_off > off) $ do let sz = next_ip_off - off - let blocks = Fold.toList (gs'^.frontierBlocks) - pure $! (blocks, sz, show <$> maybeError) + pure $! (b, sz, show <$> maybeError) -- | Disassemble block, returning either an error, or a list of blocks -- and ending PC. @@ -382,7 +374,7 @@ disassembleBlockFromAbsState mem nonce_gen addr max_size ab = do mr <- runExceptT $ tryDisassembleBlockFromAbsState mem nonce_gen addr max_size ab case mr of Left msg -> pure ([], 0, Just msg) - Right r -> pure r + Right (b,sz, merr) -> pure ([b],sz,merr) -- | Attempt to identify the write to a stack return address, returning -- instructions prior to that write and return values. diff --git a/x86/src/Data/Macaw/X86/ArchTypes.hs b/x86/src/Data/Macaw/X86/ArchTypes.hs index 3d874fc2..03eeb8a8 100644 --- a/x86/src/Data/Macaw/X86/ArchTypes.hs +++ b/x86/src/Data/Macaw/X86/ArchTypes.hs @@ -73,7 +73,7 @@ instance HasRepr SIMDWidth NatRepr where ------------------------------------------------------------------------ -- RepValSize --- | Rep value +-- | A value for distinguishing between 1,2,4 and 8 byte values. data RepValSize w = (w ~ 8) => ByteRepVal | (w ~ 16) => WordRepVal @@ -309,9 +309,31 @@ data X86PrimFn f tp where -- | Read the 'GS' base address. ReadGSBase :: X86PrimFn f (BVType 64) -- | The CPUID instruction. - CPUID :: !(f (BVType 32)) -> X86PrimFn f (BVType 128) -- -- Given value in eax register, this returns the concatenation of eax:ebx:ecx:edx. + CPUID :: !(f (BVType 32)) -> X86PrimFn f (BVType 128) + + -- | This implements the logic for the cmpxchg8b instruction + -- + -- Given a statement, `CMPXCHG8B addr eax ebx ecx edx` this executes the following logic: + -- temp64 <- read addr + -- if edx:eax == tmp then do + -- *addr := ecx:ebx + -- return (true, eax, edx) + -- else + -- return (false, trunc 32 temp64, trunc 32 (temp64 >> 32)) + CMPXCHG8B :: !(f (BVType 64)) + -- ^ Address to read + -> !(f (BVType 32)) + -- ^ Value in EAX + -> !(f (BVType 32)) + -- ^ Value in EBX + -> !(f (BVType 32)) + -- ^ Value in ECX + -> !(f (BVType 32)) + -- ^ Value in EDX + -> X86PrimFn f (TupleType [BoolType, BVType 32, BVType 32]) + -- | The RDTSC instruction. -- -- This returns the current time stamp counter a 64-bit value that will @@ -331,7 +353,7 @@ data X86PrimFn f tp where -- @res[i] = x[j] where j = s[i](0..l) -- where @msb(y)@ returns the most-significant bit in byte @y@. PShufb :: (1 <= w) => !(SIMDWidth w) -> !(f (BVType w)) -> !(f (BVType w)) -> X86PrimFn f (BVType w) - -- | Compares to memory regions + -- | Compares to memory regions and return the number of bytes that were the same. MemCmp :: !Integer -- /\ Number of bytes per value. -> !(f (BVType 64)) @@ -537,7 +559,7 @@ data X86PrimFn f tp where -> !(f (BVType 80)) -> X86PrimFn f tp - {- | Unary operation on a vector. Should have no side effects. -} + -- | Unary operation on a vector. Should have no side effects. VOp1 :: (1 <= n) => !(NatRepr n) -> {- /\ width of input/result -} !AVXOp1 -> {- /\ do this operation -} @@ -597,6 +619,7 @@ instance HasRepr (X86PrimFn f) TypeRepr where ReadFSBase -> knownRepr ReadGSBase -> knownRepr CPUID{} -> knownRepr + CMPXCHG8B{} -> knownRepr RDTSC{} -> knownRepr XGetBV{} -> knownRepr PShufb w _ _ -> BVTypeRepr (typeRepr w) @@ -651,6 +674,7 @@ instance TraversableFC X86PrimFn where ReadFSBase -> pure ReadFSBase ReadGSBase -> pure ReadGSBase CPUID v -> CPUID <$> go v + CMPXCHG8B a ax bx cx dx -> CMPXCHG8B <$> go a <*> go ax <*> go bx <*> go cx <*> go dx RDTSC -> pure RDTSC XGetBV v -> XGetBV <$> go v PShufb w x y -> PShufb w <$> go x <*> go y @@ -694,6 +718,7 @@ instance IsArchFn X86PrimFn where ReadFSBase -> pure $ text "fs.base" ReadGSBase -> pure $ text "gs.base" CPUID code -> sexprA "cpuid" [ pp code ] + CMPXCHG8B a ax bx cx dx -> sexprA "cmpxchg8b" [ pp a, pp ax, pp bx, pp cx, pp dx ] RDTSC -> pure $ text "rdtsc" XGetBV code -> sexprA "xgetbv" [ pp code ] PShufb _ x s -> sexprA "pshufb" [ pp x, pp s ] @@ -744,6 +769,7 @@ x86PrimFnHasSideEffects f = ReadFSBase -> False ReadGSBase -> False CPUID{} -> False + CMPXCHG8B{} -> True RDTSC -> False XGetBV{} -> False PShufb{} -> False @@ -781,40 +807,40 @@ x86PrimFnHasSideEffects f = -- X86Stmt -- | An X86 specific statement. -data X86Stmt (v :: Type -> *) - = forall tp . - WriteLoc !(X86PrimLoc tp) !(v tp) - | StoreX87Control !(v (BVType 64)) - -- ^ Store the X87 control register in the given location. - | MemCopy !Integer - !(v (BVType 64)) - !(v (BVType 64)) - !(v (BVType 64)) - !(v BoolType) - -- ^ Copy a region of memory from a source buffer to a destination buffer. - -- - -- In an expression @MemCopy bc v src dest dir@ - -- * @bc@ is the number of bytes to copy at a time (1,2,4,8) - -- * @v@ is the number of values to move. - -- * @src@ is the start of source buffer. - -- * @dest@ is the start of destination buffer. - -- * @dir@ is a flag that indicates whether direction of move: - -- * 'True' means we should decrement buffer pointers after each copy. - -- * 'False' means we should increment the buffer pointers after each copy. - | forall n . - MemSet !(v (BVType 64)) - -- /\ Number of values to assign - !(v (BVType n)) - -- /\ Value to assign - !(v (BVType 64)) - -- /\ Address to start assigning from. - !(v BoolType) - -- /\ Direction flag +data X86Stmt (v :: Type -> *) where + WriteLoc :: !(X86PrimLoc tp) -> !(v tp) -> X86Stmt v + StoreX87Control :: !(v (BVType 64)) -> X86Stmt v + -- ^ Store the X87 control register in the given address. - | EMMS - -- ^ Empty MMX technology State. Sets the x87 FPU tag word to empty. - -- Probably OK to use this for both EMMS FEMMS, the second being a - -- a faster version from AMD 3D now. + RepMovs :: !(RepValSize w) + -> !(v (BVType 64)) + -> !(v (BVType 64)) + -> !(v (BVType 64)) + -> !(v BoolType) + -> X86Stmt v + -- ^ Copy a region of memory from a source buffer to a destination buffer. + -- + -- In an expression @RepMovs bc cnt src dest dir@ + -- * @bc@ denotes the bytes to copy at a time. + -- * @cnt@ is the number of values to move. + -- * @src@ is the start of source buffer. + -- * @dest@ is the start of destination buffer. + -- * @dir@ is a flag that indicates whether direction of move: + -- * 'True' means we should decrement buffer pointers after each copy. + -- * 'False' means we should increment the buffer pointers after each copy. + MemSet :: !(v (BVType 64)) + -- /\ Number of values to assign + -> !(v (BVType n)) + -- /\ Value to assign + -> !(v (BVType 64)) + -- /\ Address to start assigning from. + -> !(v BoolType) + -- /\ Direction flag + -> X86Stmt v + EMMS :: X86Stmt v + -- ^ Empty MMX technology State. Sets the x87 FPU tag word to empty. + -- Probably OK to use this for both EMMS FEMMS, the second being a a + -- faster version from AMD 3D now. instance FunctorF X86Stmt where fmapF = fmapFDefault @@ -827,7 +853,7 @@ instance TraversableF X86Stmt where case stmt of WriteLoc loc v -> WriteLoc loc <$> go v StoreX87Control v -> StoreX87Control <$> go v - MemCopy bc v src dest dir -> MemCopy bc <$> go v <*> go src <*> go dest <*> go dir + RepMovs bc v src dest dir -> RepMovs bc <$> go v <*> go src <*> go dest <*> go dir MemSet v src dest dir -> MemSet <$> go v <*> go src <*> go dest <*> go dir EMMS -> pure EMMS @@ -836,9 +862,9 @@ instance IsArchStmt X86Stmt where case stmt of WriteLoc loc rhs -> pretty loc <+> text ":=" <+> pp rhs StoreX87Control addr -> pp addr <+> text ":= x87_control" - MemCopy sz cnt src dest rev -> - text "memcopy" <+> parens (hcat $ punctuate comma args) - where args = [pretty sz, pp cnt, pp src, pp dest, pp rev] + RepMovs sz cnt src dest rev -> + text "repMovs" <+> parens (hcat $ punctuate comma args) + where args = [pretty (repValSizeByteCount sz), pp cnt, pp src, pp dest, pp rev] MemSet cnt val dest d -> text "memset" <+> parens (hcat $ punctuate comma args) where args = [pp cnt, pp val, pp dest, pp d] diff --git a/x86/src/Data/Macaw/X86/Flexdis.hs b/x86/src/Data/Macaw/X86/Flexdis.hs index eebe950f..98745fab 100644 --- a/x86/src/Data/Macaw/X86/Flexdis.hs +++ b/x86/src/Data/Macaw/X86/Flexdis.hs @@ -199,8 +199,8 @@ instance MemWidth w => ByteReader (MemoryByteReader w) where -- Throw error if we try to read a relocation as a symbolic reference BSSRegion _:_ -> do throwMemoryError $ UnexpectedBSS (msAddr ms) - RelocationRegion r:_ -> do - throwMemoryError $ UnexpectedRelocation (msAddr ms) r "byte0" + RelocationRegion r:_ -> + throwMemoryError $ UnexpectedByteRelocation (msAddr ms) r ByteRegion bs:rest -> do if BS.null bs then do throwMemoryError $ AccessViolation (msAddr ms) @@ -219,23 +219,27 @@ instance MemWidth w => ByteReader (MemoryByteReader w) where BSSRegion _:_ -> do throwMemoryError $ UnexpectedBSS (msAddr ms) RelocationRegion r:rest -> do - case r of - AbsoluteRelocation sym off end szCnt -> do - unless (szCnt == 4 && end == LittleEndian) $ do - throwMemoryError $ UnexpectedRelocation (msAddr ms) r "dimm0" - let ms' = ms { msOffset = msOffset ms + 4 - , msNext = rest - } - seq ms' $ MBR $ put ms' - pure $ Flexdis.Imm32SymbolOffset sym (fromIntegral off) - -- RelativeOffset addr ioff (fromIntegral off) - RelativeRelocation _addr _off _end _szCnt -> do - throwMemoryError $ UnexpectedRelocation (msAddr ms) r "dimm1" + let sym = relocationSym r + let off = relocationOffset r + let isGood + = relocationIsRel r == False + && relocationSize r == 4 + && relocationEndianness r == LittleEndian + when (not isGood) $ do + throwMemoryError $ Unsupported32ImmRelocation (msAddr ms) r + -- Returns whether the bytes in this relocation are thought of as signed or unsigned. + let signed = relocationIsSigned r + + let ms' = ms { msOffset = msOffset ms + 4 + , msNext = rest + } + seq ms' $ MBR $ put ms' + pure $ Flexdis.Imm32SymbolOffset sym (fromIntegral off) signed ByteRegion bs:rest -> do v <- getUnsigned32 bs updateMSByteString ms bs rest 4 - pure $! Flexdis.Imm32Concrete v + pure $! Flexdis.Imm32Concrete (fromIntegral v) readJump sz = do ms <- MBR get @@ -247,20 +251,22 @@ instance MemWidth w => ByteReader (MemoryByteReader w) where BSSRegion _:_ -> do throwMemoryError $ UnexpectedBSS (msAddr ms) RelocationRegion r:rest -> do - case r of - AbsoluteRelocation{} -> do - throwMemoryError $ UnexpectedRelocation (msAddr ms) r "jump0" - RelativeRelocation addr off end szCnt -> do - when (szCnt /= jsizeCount sz) $ do - throwMemoryError $ UnexpectedRelocation (msAddr ms) r "jump1" - when (end /= LittleEndian) $ do - throwMemoryError $ UnexpectedRelocation (msAddr ms) r "jump2" - let ms' = ms { msOffset = msOffset ms + fromIntegral (jsizeCount sz) - , msNext = rest - } - seq ms' $ MBR $ put ms' - let ioff = fromIntegral $ msOffset ms - msStart ms - pure $ Flexdis.RelativeOffset addr ioff (fromIntegral off) + let sym = relocationSym r + let off = relocationOffset r + -- Sanity checks + let isGood + = relocationIsRel r + && relocationSize r == jsizeCount sz + && relocationIsSigned r == False + && relocationEndianness r == LittleEndian + when (not isGood) $ do + throwMemoryError $ UnsupportedJumpOffsetRelocation (msAddr ms) r + let ms' = ms { msOffset = msOffset ms + fromIntegral (jsizeCount sz) + , msNext = rest + } + seq ms' $ MBR $ put ms' + let ioff = fromIntegral $ msOffset ms - msStart ms + pure $ Flexdis.RelativeOffset sym ioff (fromIntegral off) ByteRegion bs:rest -> do (v,c) <- getJumpBytes bs sz updateMSByteString ms bs rest (fromIntegral c) @@ -275,7 +281,6 @@ instance MemWidth w => ByteReader (MemoryByteReader w) where ------------------------------------------------------------------------ -- readInstruction - -- | Read instruction at a given memory address. readInstruction' :: MemSegmentOff 64 -- ^ Address to read from. diff --git a/x86/src/Data/Macaw/X86/Generator.hs b/x86/src/Data/Macaw/X86/Generator.hs index b78fdfc9..ef510d2f 100644 --- a/x86/src/Data/Macaw/X86/Generator.hs +++ b/x86/src/Data/Macaw/X86/Generator.hs @@ -24,12 +24,11 @@ module Data.Macaw.X86.Generator , addArchTermStmt , evalArchFn , evalAssignRhs - , shiftX86GCont , asAtomicStateUpdate , getState -- * GenResult , GenResult(..) - , finishBlock + , finishGenResult -- * PreBlock , PreBlock , emptyPreBlock @@ -37,15 +36,9 @@ module Data.Macaw.X86.Generator , pBlockState , pBlockStmts , pBlockApps - , finishBlock' - -- * Misc - , BlockSeq(..) - , nextBlockID - , frontierBlocks + , finishBlock -- * GenState , GenState(..) - , mkGenResult - , blockSeq , blockState , curX86State -- * Expr @@ -186,53 +179,28 @@ pBlockApps :: Simple Lens (PreBlock ids) (MapF (App (Value X86_64 ids)) (Assign pBlockApps = lens _pBlockApps (\s v -> s { _pBlockApps = v }) -- | Finishes the current block, if it is started. -finishBlock' :: PreBlock ids +finishBlock :: PreBlock ids -> (RegState X86Reg (Value X86_64 ids) -> TermStmt X86_64 ids) -> Block X86_64 ids -finishBlock' pre_b term = - Block { blockLabel = pBlockIndex pre_b - , blockStmts = toList (pre_b^.pBlockStmts) - , blockTerm = term (pre_b^.pBlockState) +finishBlock preBlock term = + Block { blockLabel = pBlockIndex preBlock + , blockStmts = toList (preBlock^.pBlockStmts) + , blockTerm = term (preBlock^.pBlockState) } ------------------------------------------------------------------------- --- BlockSeq - --- | List of blocks generated so far, and an index for generating new block labels. -data BlockSeq ids = BlockSeq - { _nextBlockID :: !Word64 - -- ^ Index of next block - , _frontierBlocks :: !(Seq (Block X86_64 ids)) - -- ^ Blocks added to CFG - } - --- | Control flow blocs generated so far. -nextBlockID :: Simple Lens (BlockSeq ids) Word64 -nextBlockID = lens _nextBlockID (\s v -> s { _nextBlockID = v }) - --- | Blocks that are not in CFG that end with a FetchAndExecute, --- which we need to analyze to compute new potential branch targets. -frontierBlocks :: Simple Lens (BlockSeq ids) (Seq (Block X86_64 ids)) -frontierBlocks = lens _frontierBlocks (\s v -> s { _frontierBlocks = v }) - ------------------------------------------------------------------------ -- GenResult -- | The final result from the block generator. -data GenResult ids = GenResult { resBlockSeq :: !(BlockSeq ids) - , resState :: !(Maybe (PreBlock ids)) - } +data GenResult ids + = UnfinishedGenResult !(PreBlock ids) + | FinishedGenResult !(Block X86_64 ids) -- | Finishes the current block, if it is started. -finishBlock :: (RegState X86Reg (Value X86_64 ids) -> TermStmt X86_64 ids) - -> GenResult ids - -> BlockSeq ids -finishBlock term st = - case resState st of - Nothing -> resBlockSeq st - Just pre_b -> - let b = finishBlock' pre_b term - in seq b $ resBlockSeq st & frontierBlocks %~ (Seq.|> b) +finishGenResult :: GenResult ids + -> Block X86_64 ids +finishGenResult (UnfinishedGenResult pre_b) = finishBlock pre_b FetchAndExecute +finishGenResult (FinishedGenResult blk) = blk ------------------------------------------------------------------------ -- GenState @@ -241,10 +209,8 @@ finishBlock term st = data GenState st_s ids = GenState { assignIdGen :: !(NonceGenerator (ST st_s) ids) -- ^ 'NonceGenerator' for generating 'AssignId's - , _blockSeq :: !(BlockSeq ids) - -- ^ Blocks generated so far. , _blockState :: !(PreBlock ids) - -- ^ Current block + -- ^ Block that we are processing. , genAddr :: !(MemSegmentOff 64) -- ^ Address of instruction we are translating , genMemory :: !(Memory 64) @@ -261,16 +227,6 @@ data GenState st_s ids = GenState are working with an SSE instruction. -} } --- | Create a gen result from a state result. -mkGenResult :: GenState st_s ids -> GenResult ids -mkGenResult s = GenResult { resBlockSeq = s^.blockSeq - , resState = Just (s^.blockState) - } - --- | Control flow blocs generated so far. -blockSeq :: Simple Lens (GenState st_s ids) (BlockSeq ids) -blockSeq = lens _blockSeq (\s v -> s { _blockSeq = v }) - genRegUpdates :: Simple Lens (GenState st_s ids) (MapF.MapF X86Reg (Value X86_64 ids)) genRegUpdates = lens _genRegUpdates (\s v -> s { _genRegUpdates = v }) @@ -316,20 +272,11 @@ type X86GCont st_s ids a -> ExceptT Text (ST st_s) (GenResult ids) -- | Run an 'X86Generator' starting from a given state -runX86Generator :: X86GCont st_s ids a - -> GenState st_s ids - -> X86Generator st_s ids a +runX86Generator :: GenState st_s ids + -> X86Generator st_s ids () -> ExceptT Text (ST st_s) (GenResult ids) -runX86Generator k st (X86G m) = runReaderT (runContT m (ReaderT . k)) st - - --- | Capture the current continuation and 'GenState' in an 'X86Generator' -shiftX86GCont :: (X86GCont st_s ids a - -> GenState st_s ids - -> ExceptT Text (ST st_s) (GenResult ids)) - -> X86Generator st_s ids a -shiftX86GCont f = - X86G $ ContT $ \k -> ReaderT $ \s -> f (runReaderT . k) s +runX86Generator st (X86G m) = runReaderT (runContT m (ReaderT . k)) st + where k () s = pure $! UnfinishedGenResult (s^.blockState) getState :: X86Generator st_s ids (GenState st_s ids) getState = X86G ask @@ -381,17 +328,13 @@ addArchStmt = addStmt . ExecArchStmt -- | execute a primitive instruction. addArchTermStmt :: X86TermStmt ids -> X86Generator st ids () addArchTermStmt ts = do - shiftX86GCont $ \_ s0 -> do + X86G $ ContT $ \_ -> ReaderT $ \s0 -> do -- Get last block. let p_b = s0 ^. blockState -- Create finished block. - let fin_b = finishBlock' p_b $ ArchTermStmt ts - seq fin_b $ + let fin_b = finishBlock p_b $ ArchTermStmt ts -- Return early - return GenResult - { resBlockSeq = s0 ^.blockSeq & frontierBlocks %~ (Seq.|> fin_b) - , resState = Nothing - } + return $! FinishedGenResult fin_b -- | Are we in AVX mode? isAVX :: X86Generator st ids Bool diff --git a/x86/src/Data/Macaw/X86/Getters.hs b/x86/src/Data/Macaw/X86/Getters.hs index a8937943..09eafd61 100644 --- a/x86/src/Data/Macaw/X86/Getters.hs +++ b/x86/src/Data/Macaw/X86/Getters.hs @@ -21,7 +21,7 @@ module Data.Macaw.X86.Getters , getSignExtendedValue , truncateBVValue , getCallTarget - , getJumpTarget + , doJump , HasRepSize(..) , getAddrRegOrSegment , getAddrRegSegmentOrImm @@ -54,7 +54,7 @@ import qualified Flexdis86 as F import GHC.TypeLits (KnownNat) import Data.Macaw.CFG -import Data.Macaw.Types (BVType, n8, n16, n32, n64, typeWidth) +import Data.Macaw.Types import Data.Macaw.X86.Generator import Data.Macaw.X86.Monad import Data.Macaw.X86.X86Reg (X86Reg(..)) @@ -107,6 +107,27 @@ reg64Loc = fullRegister . X86_GP ------------------------------------------------------------------------ -- Getters +getImm32 :: F.Imm32 -> BVExpr ids 32 +getImm32 (F.Imm32Concrete w) = bvLit n32 (toInteger w) +getImm32 (F.Imm32SymbolOffset sym off _) = + bvTrunc' n32 (ValueExpr (SymbolValue Addr64 sym)) + .+ bvLit n32 (toInteger off) + +-- | Return the value of a 32-bit displacement. +getDisplacement32 :: F.Displacement -> BVExpr ids 32 +getDisplacement32 F.NoDisplacement = bvLit n32 0 +getDisplacement32 (F.Disp8 x) = bvLit n32 (toInteger x) +getDisplacement32 (F.Disp32 x) = getImm32 x + +-- | Return the value of a 32-bit displacement. +getDisplacement64 :: F.Displacement -> BVExpr ids 64 +getDisplacement64 F.NoDisplacement = bvLit n64 0 +getDisplacement64 (F.Disp8 x) = bvLit n64 (toInteger x) +getDisplacement64 (F.Disp32 (F.Imm32Concrete x)) = bvLit n64 (toInteger x) +getDisplacement64 (F.Disp32 (F.Imm32SymbolOffset sym off _)) = + ValueExpr (SymbolValue Addr64 sym) + .+ bvLit n64 (toInteger off) + -- | Calculates the address corresponding to an AddrRef getBVAddress :: F.AddrRef -> X86Generator st ids (BVExpr ids 64) getBVAddress ar = @@ -122,26 +143,28 @@ getBVAddress ar = Just (i, r) -> bvTrunc n32 . (bvLit n32 (toInteger i) .*) <$> get (reg32Loc r) - let offset = uext n64 (base .+ scale .+ bvLit n32 (toInteger (F.displacementInt i32))) + + let offset = uext n64 (base .+ scale .+ getDisplacement32 i32) mk_absolute seg offset - F.IP_Offset_32 _seg _i32 -> fail "IP_Offset_32" + F.IP_Offset_32 _seg _i32 -> fail "IP_Offset_32" F.Offset_32 _seg _w32 -> fail "Offset_32" F.Offset_64 seg w64 -> do mk_absolute seg (bvLit n64 (toInteger w64)) - F.Addr_64 seg m_r64 m_int_r64 i32 -> do + F.Addr_64 seg m_r64 m_int_r64 disp -> do base <- case m_r64 of Nothing -> return v0_64 Just r -> get (reg64Loc r) - scale <- case m_int_r64 of - Nothing -> return v0_64 - Just (i, r) -> bvTrunc n64 . (bvLit n64 (toInteger i) .*) - <$> get (reg64Loc r) - let offset = base .+ scale .+ bvLit n64 (toInteger i32) + scale <- + case m_int_r64 of + Nothing -> return v0_64 + Just (i, r) -> + bvTrunc n64 . (bvLit n64 (toInteger i) .*) <$> get (reg64Loc r) + let offset = base .+ scale .+ getDisplacement64 disp mk_absolute seg offset - F.IP_Offset_64 seg i32 -> do - ip_val <- get rip - mk_absolute seg (bvLit n64 (toInteger i32) .+ ip_val) + F.IP_Offset_64 seg disp -> do + ipVal <- get rip + mk_absolute seg (ipVal .+ getDisplacement64 disp) where v0_64 = bvLit n64 0 -- | Add the segment base to compute an absolute address. @@ -188,8 +211,6 @@ getBV128Addr ar = (`MemoryAddr` xmmMemRepr) <$> getBVAddress ar getBV256Addr :: F.AddrRef -> X86Generator st ids (Location (Addr ids) (BVType 256)) getBV256Addr ar = (`MemoryAddr` ymmMemRepr) <$> getBVAddress ar - - readBVAddress :: F.AddrRef -> MemRepr tp -> X86Generator st ids (Expr ids tp) readBVAddress ar repr = get . (`MemoryAddr` repr) =<< getBVAddress ar @@ -252,21 +273,13 @@ getBVLocation l expected = do Nothing -> fail $ "Widths aren't equal: " ++ show (typeWidth v) ++ " and " ++ show expected -getImm32 :: F.Imm32 -> X86Generator st ids (BVExpr ids 32) -getImm32 (F.Imm32Concrete w) = - pure $ bvLit n32 (toInteger w) -getImm32 (F.Imm32SymbolOffset sym off) = do - let symExpr = ValueExpr $ SymbolValue Addr64 sym - let offExpr = bvLit n64 (toInteger off) - pure $ bvTrunc' n32 (symExpr .+ offExpr) - -- | Return a bitvector value. getSomeBVValue :: F.Value -> X86Generator st ids (SomeBV (Expr ids)) getSomeBVValue v = case v of F.ByteImm w -> pure $! SomeBV $ bvLit n8 $ toInteger w F.WordImm w -> pure $! SomeBV $ bvLit n16 $ toInteger w - F.DWordImm i -> SomeBV <$> getImm32 i + F.DWordImm i -> pure $! SomeBV $ getImm32 i F.QWordImm w -> pure $! SomeBV $ bvLit n64 $ toInteger w F.JumpOffset _ _ -> fail "Jump Offset should not be treated as a BVValue." _ -> do @@ -348,15 +361,15 @@ truncateBVValue n (SomeBV v) | otherwise = fail $ "Widths isn't >=: " ++ show (typeWidth v) ++ " and " ++ show n -resolveJumpOffset :: F.JumpOffset -> X86Generator s ids (BVExpr ids 64) -resolveJumpOffset (F.FixedOffset off) = - pure $ bvLit n64 (toInteger off) -resolveJumpOffset (F.RelativeOffset symId insOff off) = do - arepr <- memAddrWidth . genMemory <$> getState - let symVal = ValueExpr (SymbolValue arepr symId) - addrOff <- genAddr <$> getState - let relocAddr = relativeAddr (msegSegment addrOff) (msegOffset addrOff + fromIntegral insOff) - pure $ symVal .+ bvLit n64 (toInteger off) .- ValueExpr (RelocatableValue arepr relocAddr) +resolveJumpOffset :: GenState st_s ids -> F.JumpOffset -> BVExpr ids 64 +resolveJumpOffset _ (F.FixedOffset off) = bvLit n64 (toInteger off) +resolveJumpOffset s (F.RelativeOffset symId insOff off) = + symVal .+ bvLit n64 (toInteger off) .- ValueExpr (RelocatableValue arepr relocAddr) + where arepr = memAddrWidth (genMemory s) + symVal = ValueExpr (SymbolValue arepr symId) + addrOff = genAddr s + relocAddr = relativeAddr (msegSegment addrOff) (msegOffset addrOff + fromIntegral insOff) + -- | Return the target of a call or jump instruction. getCallTarget :: F.Value @@ -366,19 +379,33 @@ getCallTarget v = F.Mem64 ar -> get =<< getBV64Addr ar F.QWordReg r -> get (reg64Loc r) F.JumpOffset _ joff -> do - (.+) <$> get rip <*> resolveJumpOffset joff + s <- getState + (.+ resolveJumpOffset s joff) <$> get rip _ -> fail "Unexpected argument" -- | Return the target of a call or jump instruction. -getJumpTarget :: F.Value - -> X86Generator st ids (BVExpr ids 64) -getJumpTarget v = +doJump :: Expr ids BoolType -> F.Value -> X86Generator st ids () +doJump cond v = case v of F.JumpOffset _ joff -> do - (.+) <$> get rip <*> resolveJumpOffset joff - F.QWordReg r -> get (reg64Loc r) - F.Mem64 ar -> get =<< getBV64Addr ar - F.QWordImm w -> return (ValueExpr (BVValue knownNat (toInteger w))) + s <- getState + modify rip $ \ipVal -> mux cond (ipVal .+ resolveJumpOffset s joff) ipVal + + F.QWordReg r -> do + ipVal <- get (reg64Loc r) + modify rip $ mux cond ipVal + F.Mem64 ar -> do + condVal <- eval cond + -- Address to read jump target from + ipAddr <- eval =<< getBVAddress ar + -- Get exiting ip value if we need it. + oldIpVal <- eval =<< get rip + -- Read the new memory if cond is true, and otherwise return old value. + ipVal <- evalAssignRhs $ CondReadMem qwordMemRepr condVal ipAddr oldIpVal + -- Set the ip value. + rip .= ipVal + F.QWordImm w -> do + modify rip $ mux cond $ bvKLit (toInteger w) _ -> fail "Unexpected argument" ------------------------------------------------------------------------ @@ -412,7 +439,7 @@ getAddrRegSegmentOrImm v = case v of F.ByteImm w -> pure $ Some $ HasRepSize ByteRepVal $ bvLit n8 (toInteger w) F.WordImm w -> pure $ Some $ HasRepSize WordRepVal $ bvLit n16 (toInteger w) - F.DWordImm i -> Some . HasRepSize DWordRepVal <$> getImm32 i + F.DWordImm i -> pure $ Some $ HasRepSize DWordRepVal $ getImm32 i F.QWordImm w -> pure $ Some $ HasRepSize QWordRepVal $ bvLit n64 (toInteger w) _ -> do Some (HasRepSize rep l) <- getAddrRegOrSegment v diff --git a/x86/src/Data/Macaw/X86/Monad.hs b/x86/src/Data/Macaw/X86/Monad.hs index 1fa107b5..a979a11b 100644 --- a/x86/src/Data/Macaw/X86/Monad.hs +++ b/x86/src/Data/Macaw/X86/Monad.hs @@ -154,11 +154,6 @@ module Data.Macaw.X86.Monad , Data.Macaw.X86.Generator.eval , Data.Macaw.X86.Generator.evalArchFn , Data.Macaw.X86.Generator.addArchTermStmt - , ifte_ - , when_ - , unless_ - , memcopy - , memcmp , memset , even_parity , fnstcw @@ -181,15 +176,12 @@ import Control.Lens hiding ((.=)) import Control.Monad import qualified Data.Bits as Bits import Data.Macaw.CFG -import Data.Macaw.CFG.Block import Data.Macaw.Memory (Endianness(..)) import Data.Macaw.Types import Data.Maybe import Data.Parameterized.Classes import Data.Parameterized.NatRepr -import qualified Data.Sequence as Seq import qualified Flexdis86 as F -import Flexdis86.Segment ( Segment ) import Flexdis86.Sizes (SizeConstraint(..)) import GHC.TypeLits as TypeLits import Text.PrettyPrint.ANSI.Leijen as PP hiding ((<$>)) @@ -518,7 +510,7 @@ data Location addr (tp :: Type) where DebugReg :: !F.DebugReg -> Location addr (BVType 64) - SegmentReg :: !Segment + SegmentReg :: !F.Segment -> Location addr (BVType 16) X87ControlReg :: !(X87_ControlReg w) @@ -1613,162 +1605,6 @@ modify r f = do x <- get r r .= f x --- | Perform an if-then-else -ifte_ :: Expr ids BoolType - -> X86Generator st ids () - -> X86Generator st ids () - -> X86Generator st ids () --- Implement ifte_ --- Note that this implementation will run any code appearing after the ifte_ --- twice, once for the true branch and once for the false branch. --- --- This could be changed to run the code afterwards once, but the cost would be --- defining a way to merge processor states from the different branches, and making --- sure that expression assignments generated in one branch were not referred to in --- another branch. --- --- One potential design change, not implemented here, would be to run both branches, --- up to the point where they merge, and if the resulting PC is in the same location, --- to merge in that case, otherwise to run them separately. --- --- This would support the cmov instruction, but result in divergence for branches, which --- I think is what we want. -ifte_ c_expr t f = eval c_expr >>= go - where - go (BoolValue True) = t - go (BoolValue False) = f - go cond = - shiftX86GCont $ \c s0 -> do - let p_b = s0 ^.blockState - let st = p_b^.pBlockState - let t_block_label = s0^.blockSeq^.nextBlockID - let s2 = s0 & blockSeq . nextBlockID +~ 1 - & blockSeq . frontierBlocks .~ Seq.empty - & blockState .~ emptyPreBlock st t_block_label (genAddr s0) - -- Run true block. - t_seq <- finishBlock FetchAndExecute <$> runX86Generator c s2 t - -- Run false block - let f_block_label = t_seq^.nextBlockID - let s5 = GenState { assignIdGen = assignIdGen s0 - , _blockSeq = - BlockSeq { _nextBlockID = t_seq^.nextBlockID + 1 - , _frontierBlocks = Seq.empty - } - , _blockState = emptyPreBlock st f_block_label (genAddr s0) - , genAddr = genAddr s0 - , genMemory = genMemory s0 - , _genRegUpdates = _genRegUpdates s0 - , avxMode = avxMode s0 - } - f_seq <- finishBlock FetchAndExecute <$> runX86Generator c s5 f - - -- Join results together. - let fin_b = finishBlock' p_b (\_ -> Branch cond t_block_label f_block_label) - seq fin_b $ - return - GenResult { resBlockSeq = - BlockSeq { _nextBlockID = _nextBlockID f_seq - , _frontierBlocks = (s0^.blockSeq^.frontierBlocks Seq.|> fin_b) - Seq.>< t_seq^.frontierBlocks - Seq.>< f_seq^.frontierBlocks - } - , resState = Nothing - } - --- | Run a step if condition holds. -when_ :: Expr ids BoolType -> X86Generator st ids () -> X86Generator st ids () -when_ p x = ifte_ p x (return ()) - --- | Run a step if condition is false. -unless_ :: Expr ids BoolType -> X86Generator st ids () -> X86Generator st ids () -unless_ p = ifte_ p (return ()) - --- | Move n bits at a time, with count moves --- --- Semantic sketch. The effect on memory should be like @memcopy@ --- below, not like @memcopy2@. These sketches ignore the issue of --- copying in chunks of size `bytes`, which should only be an --- efficiency concern. --- --- @ --- void memcopy(int bytes, int copies, char *src, char *dst, int reversed) { --- int maybeFlip = reversed ? -1 : 1; --- for (int c = 0; c < copies; ++c) { --- for (int b = 0; b < bytes; ++b) { --- int offset = maybeFlip * (b + c * bytes); --- *(dst + offset) = *(src + offset); --- } --- } --- } --- @ --- --- Compare with: --- --- @ --- void memcopy2(int bytes, int copies, char *src, char *dst, int reversed) { --- int maybeFlip = reversed ? -1 : 1; --- /* The only difference from `memcopy` above: here the same memory is --- copied whether `reversed` is true or false -- only the order of --- copies changes -- whereas above different memory is copied for --- each direction. */ --- if (reversed) { --- /* Start at the end and work backwards. */ --- src += copies * bytes - 1; --- dst += copies * bytes - 1; --- } --- for (int c = 0; c < copies; ++c) { --- for (int b = 0; b < bytes; ++b) { --- int offset = maybeFlip * (b + c * bytes); --- *(dst + offset) = *(src + offset); --- } --- } --- } --- @ -memcopy :: Integer - -- ^ Number of bytes to copy at a time (1,2,4,8) - -> BVExpr ids 64 - -- ^ Number of values to move. - -> Addr ids - -- ^ Start of source buffer - -> Addr ids - -- ^ Start of destination buffer. - -> Expr ids BoolType - -- ^ Flag indicates direction of move: - -- True means we should decrement buffer pointers after each copy. - -- False means we should increment the buffer pointers after each copy. - -> X86Generator st ids () -memcopy val_sz count src dest is_reverse = do - count_v <- eval count - src_v <- eval src - dest_v <- eval dest - is_reverse_v <- eval is_reverse - addArchStmt $ MemCopy val_sz count_v src_v dest_v is_reverse_v - --- | Compare the memory regions. Returns the number of elements which are --- identical. If the direction is 0 then it is increasing, otherwise decreasing. --- --- See `memcopy` above for explanation of which memory regions are --- compared: the regions copied there are compared here. -memcmp :: Integer - -- ^ Number of bytes to compare at a time {1, 2, 4, 8} - -> BVExpr ids 64 - -- ^ Number of elementes to compare - -> Addr ids - -- ^ Pointer to first buffer - -> Addr ids - -- ^ Pointer to second buffer - -> Expr ids BoolType - -- ^ Flag indicates direction of copy: - -- True means we should decrement buffer pointers after each copy. - -- False means we should increment the buffer pointers after each copy. - -> X86Generator st ids (BVExpr ids 64) -memcmp sz count src dest is_reverse = do - count_v <- eval count - is_reverse_v <- eval is_reverse - src_v <- eval src - dest_v <- eval dest - evalArchFn (MemCmp sz count_v src_v dest_v is_reverse_v) - -- | Set memory to the given value, for the number of words (nbytes -- = count * typeWidth v) memset :: (1 <= n) @@ -1800,7 +1636,7 @@ fnstcw addr = do addArchStmt =<< StoreX87Control <$> eval addr -- | Return the base address of the given segment. -getSegmentBase :: Segment -> X86Generator st ids (Addr ids) +getSegmentBase :: F.Segment -> X86Generator st ids (Addr ids) getSegmentBase seg = case seg of F.FS -> evalArchFn ReadFSBase diff --git a/x86/src/Data/Macaw/X86/Semantics.hs b/x86/src/Data/Macaw/X86/Semantics.hs index d709684c..39ba2b25 100644 --- a/x86/src/Data/Macaw/X86/Semantics.hs +++ b/x86/src/Data/Macaw/X86/Semantics.hs @@ -18,6 +18,7 @@ module Data.Macaw.X86.Semantics ( execInstruction ) where +import Control.Lens ((^.)) import Control.Monad (when) import qualified Data.Bits as Bits import Data.Foldable @@ -27,6 +28,7 @@ import Data.Parameterized.Classes import qualified Data.Parameterized.List as P import Data.Parameterized.NatRepr import Data.Parameterized.Some +import Data.Parameterized.TraversableF import Data.Proxy import Data.Word import qualified Flexdis86 as F @@ -335,7 +337,7 @@ def_mov = (F.DWordReg r, F.DWordSignedImm i) -> do reg32Loc r .= bvLit n32 (toInteger i) (F.DWordReg r, F.DWordImm i) -> do - (reg32Loc r .=) =<< getImm32 i + reg32Loc r .= getImm32 i (F.DWordReg r, F.Mem32 src) -> do v <- get =<< getBV32Addr src reg32Loc r .= v @@ -421,24 +423,24 @@ def_cmpxchg = defBinaryLV "cmpxchg" $ \d s -> do let p = a .=. temp zf_loc .= p d .= mux p s temp - modify acc $ \old -> mux p temp old + modify acc $ mux p temp def_cmpxchg8b :: InstructionDef def_cmpxchg8b = - defUnaryKnown "cmpxchg8b" $ \loc -> do - temp64 <- get loc - edx_eax <- bvCat <$> get edx <*> get eax - let p = edx_eax .=. temp64 - zf_loc .= p - ifte_ p - (do ecx_ebx <- bvCat <$> get ecx <*> get ebx - loc .= ecx_ebx - ) - (do let (upper,lower) = bvSplit temp64 - edx .= upper - eax .= lower - loc .= edx_eax -- FIXME: this store is redundant, but it is in the ISA, so we do it. - ) + defUnary "cmpxchg8b" $ \_ bloc -> do + loc <- + case bloc of + F.Mem64 ar -> eval =<< getBVAddress ar + F.VoidMem ar -> eval =<< getBVAddress ar + _ -> fail "Unexpected argument to cmpxchg8b" + eaxVal <- eval =<< get eax + ebxVal <- eval =<< get ebx + ecxVal <- eval =<< get ecx + edxVal <- eval =<< get edx + res <- evalArchFn (CMPXCHG8B loc eaxVal ebxVal ecxVal edxVal) + zf_loc .= app (TupleField knownRepr res P.index0) + eax .= app (TupleField knownRepr res P.index1) + edx .= app (TupleField knownRepr res P.index2) def_movsx :: InstructionDef def_movsx = defBinaryLVge "movsx" $ \l v -> l .= sext (typeWidth l) v @@ -968,8 +970,8 @@ exec_ror l count = do -- ** Bit and Byte Instructions getBT16RegOffset :: F.Value -> X86Generator st ids (BVExpr ids 16) -getBT16RegOffset val = - case val of +getBT16RegOffset idx = + case idx of F.ByteImm i -> do pure $ bvLit n16 $ toInteger $ i Bits..&. 0xF F.WordReg ir -> do @@ -1029,18 +1031,24 @@ def_bt mnem act = defBinary mnem $ \_ base_loc idx -> do iv <- getBT64RegOffset idx act knownNat (reg64Loc r) iv set_bt_flags - (F.Mem16 base, F.ByteImm i) -> do - when (i >= 16) $ fail $ mnem ++ " given invalid index." - loc <- getBV16Addr base - act knownNat loc (bvLit knownNat (toInteger i)) - set_bt_flags - (F.Mem16 base, F.WordReg ir) -> do - off <- get $! reg16Loc ir - base_addr <- getBVAddress base - let word_off = sext' n64 $ bvSar off (bvLit knownNat 4) - let loc = MemoryAddr (base_addr .+ word_off) wordMemRepr - let iv = off .&. bvLit knownNat 15 - act knownNat loc iv + (F.Mem16 base, _) -> do + case idx of + F.ByteImm i -> do + when (i >= 16) $ fail $ mnem ++ " given invalid index." + baseAddr <- getBVAddress base + let loc = MemoryAddr baseAddr wordMemRepr + act knownNat loc (bvLit knownNat (toInteger i)) + F.WordReg ir -> do + off <- get $! reg16Loc ir + + loc <- do + baseAddr <- getBVAddress base + let wordOff = sext' n64 $ bvSar off (bvLit knownNat 4) + pure $! MemoryAddr (baseAddr .+ wordOff) wordMemRepr + + let iv = off .&. bvLit knownNat 15 + act knownNat loc iv + _ -> error $ "bt given unexpected index." set_bt_flags (F.Mem32 base, F.ByteImm i) -> do when (i >= 32) $ fail $ mnem ++ " given invalid index." @@ -1050,9 +1058,9 @@ def_bt mnem act = defBinary mnem $ \_ base_loc idx -> do set_bt_flags (F.Mem32 base, F.DWordReg ir) -> do off <- get $! reg32Loc ir - base_addr <- getBVAddress base - let dword_off = sext' n64 $ bvSar off (bvLit knownNat 5) - let loc = MemoryAddr (base_addr .+ dword_off) dwordMemRepr + baseAddr <- getBVAddress base + let dwordOff = sext' n64 $ bvSar off (bvLit knownNat 5) + let loc = MemoryAddr (baseAddr .+ dwordOff) dwordMemRepr let iv = off .&. bvLit knownNat 31 act knownNat loc iv set_bt_flags @@ -1064,9 +1072,9 @@ def_bt mnem act = defBinary mnem $ \_ base_loc idx -> do set_bt_flags (F.Mem64 base, F.QWordReg ir) -> do off <- get $! reg64Loc ir - let qword_off = bvSar off (bvLit knownNat 6) - base_addr <- getBVAddress base - let loc = MemoryAddr (base_addr .+ qword_off) qwordMemRepr + let qwordOff = bvSar off (bvLit knownNat 6) + baseAddr <- getBVAddress base + let loc = MemoryAddr (baseAddr .+ qwordOff) qwordMemRepr let iv = off .&. bvLit knownNat 63 act knownNat loc iv set_bt_flags @@ -1123,14 +1131,11 @@ def_jcc_list = defConditionals "j" $ \mnem cc -> defUnary mnem $ \_ v -> do a <- cc - when_ a $ do - tgt <- getJumpTarget v - rip .= tgt + doJump a v def_jmp :: InstructionDef def_jmp = defUnary "jmp" $ \_ v -> do - tgt <- getJumpTarget v - rip .= tgt + doJump true v def_ret :: InstructionDef def_ret = defVariadic "ret" $ \_ vs -> @@ -1156,54 +1161,51 @@ def_ret = defVariadic "ret" $ \_ vs -> -- FIXME: probably doesn't work for 32 bit address sizes -- arguments are only for the size, they are fixed at rsi/rdi -exec_movs :: 1 <= w - => Bool -- Flag indicating if RepPrefix appeared before instruction - -> NatRepr w -- Number of bytes to move at a time. - -> X86Generator st ids () -exec_movs False w = do - let bytesPerOp = bvLit n64 (natValue w) - let repr = BVMemRepr w LittleEndian - -- The direction flag indicates post decrement or post increment. - df <- get df_loc - src <- get rsi - dest <- get rdi - v' <- get $ MemoryAddr src repr - MemoryAddr dest repr .= v' - - rsi .= mux df (src .- bytesPerOp) (src .+ bytesPerOp) - rdi .= mux df (dest .- bytesPerOp) (dest .+ bytesPerOp) -exec_movs True w = do - -- FIXME: aso modifies this - let count_reg = rcx - bytesPerOp = natValue w - bytesPerOpv = bvLit n64 bytesPerOp - -- The direction flag indicates post decrement or post increment. - df <- get df_loc - src <- get rsi - dest <- get rdi - count <- get count_reg - let total_bytes = count .* bytesPerOpv - -- FIXME: we might need direction for overlapping regions - count_reg .= bvLit n64 (0::Integer) - memcopy bytesPerOp count src dest df - rsi .= mux df (src .- total_bytes) (src .+ total_bytes) - rdi .= mux df (dest .- total_bytes) (dest .+ total_bytes) def_movs :: InstructionDef def_movs = defBinary "movs" $ \ii loc _ -> do - case loc of - F.Mem8 F.Addr_64{} -> - exec_movs (F.iiLockPrefix ii == F.RepPrefix) (knownNat :: NatRepr 1) - F.Mem16 F.Addr_64{} -> - exec_movs (F.iiLockPrefix ii == F.RepPrefix) (knownNat :: NatRepr 2) - F.Mem32 F.Addr_64{} -> - exec_movs (F.iiLockPrefix ii == F.RepPrefix) (knownNat :: NatRepr 4) - F.Mem64 F.Addr_64{} -> - exec_movs (F.iiLockPrefix ii == F.RepPrefix) (knownNat :: NatRepr 8) - _ -> fail "Bad argument to movs" + let pfx = F.iiPrefixes ii + Some w <- + case loc of + F.Mem8{} -> pure (Some ByteRepVal) + F.Mem16{} -> pure (Some WordRepVal) + F.Mem32{} -> pure (Some DWordRepVal) + F.Mem64{} -> pure (Some QWordRepVal) + _ -> error "Bad argument to movs" + let bytesPerOp = bvLit n64 (repValSizeByteCount w) + df <- get df_loc + src <- get rsi + dest <- get rdi + case pfx^.F.prLockPrefix of + F.RepPrefix -> do + when (pfx^.F.prASO) $ do + fail "Rep prefix semantics not defined when address size override is true." + -- The direction flag indicates post decrement or post increment. + count <- get rcx + addArchStmt =<< traverseF eval (RepMovs w count src dest df) + + -- We adjust rsi and rdi by a negative value if df is true. + -- The formula is organized so that the bytesPerOp literal is + -- passed to the multiply and we can avoid non-linear arithmetic. + let adj = bytesPerOp .* mux df (bvNeg count) count + rcx .= bvLit n64 (0::Integer) + rsi .= src .+ adj + rdi .= dest .+ adj + F.NoLockPrefix -> do + let repr = repValSizeMemRepr w + -- The direction flag indicates post decrement or post increment. + v' <- get $ MemoryAddr src repr + MemoryAddr dest repr .= v' + -- We adjust rsi and rdi by a negative value if df is true. + -- The formula is organized so that the bytesPerOp literal is + -- passed to the multiply and we can avoid non-linear arithmetic. + let adj = mux df (bvNeg bytesPerOp) bytesPerOp + rsi .= src .+ adj + rdi .= dest .+ adj + _ -> do + fail "movs given unsupported lock prefix" + --- FIXME: can also take rep prefix --- FIXME: we ignore the aso here. -- | CMPS/CMPSB Compare string/Compare byte string -- CMPS/CMPSW Compare string/Compare word string -- CMPS/CMPSD Compare string/Compare doubleword string @@ -1221,26 +1223,39 @@ exec_cmps repz_pfx rval = repValHasSupportedWidth rval $ do let bytesPerOp' = bvLit n64 bytesPerOp if repz_pfx then do count <- get rcx - unless_ (count .=. bvKLit 0) $ do - nsame <- memcmp bytesPerOp count v_rsi v_rdi df - let equal = (nsame .=. count) - nwordsSeen = mux equal count (count .- (nsame .+ bvKLit 1)) + count_v <- eval count + src_v <- eval v_rsi + dest_v <- eval v_rdi + is_reverse_v <- eval df + nsame <- evalArchFn $ MemCmp bytesPerOp count_v src_v dest_v is_reverse_v + let equal = (nsame .=. count) + nwordsSeen = mux equal count (count .- (nsame .+ bvKLit 1)) - -- we need to set the flags as if the last comparison was done, hence this. - let lastWordBytes = (nwordsSeen .- bvKLit 1) .* bytesPerOp' - lastSrc = mux df (v_rsi .- lastWordBytes) (v_rsi .+ lastWordBytes) - lastDest = mux df (v_rdi .- lastWordBytes) (v_rdi .+ lastWordBytes) + -- we need to set the flags as if the last comparison was done, hence this. + let lastWordBytes = (nwordsSeen .- bvKLit 1) .* bytesPerOp' + lastSrc1 <- eval $ mux df (v_rsi .- lastWordBytes) (v_rsi .+ lastWordBytes) + lastSrc2 <- eval $ mux df (v_rdi .- lastWordBytes) (v_rdi .+ lastWordBytes) + -- we do this to make it obvious so repz cmpsb ; jz ... is clear + let nbytesSeen = nwordsSeen .* bytesPerOp' - v' <- get $ MemoryAddr lastDest repr - exec_cmp (MemoryAddr lastSrc repr) v' -- FIXME: right way around? + -- Determine if count ever ran. + nzCount <- eval $ count .=/=. bvKLit 0 - -- we do this to make it obvious so repz cmpsb ; jz ... is clear - zf_loc .= equal - let nbytesSeen = nwordsSeen .* bytesPerOp' + src1Val <- evalAssignRhs $ CondReadMem repr nzCount lastSrc1 (mkLit knownNat 0) + src2Val <- evalAssignRhs $ CondReadMem repr nzCount lastSrc2 (mkLit knownNat 0) - rsi .= mux df (v_rsi .- nbytesSeen) (v_rsi .+ nbytesSeen) - rdi .= mux df (v_rdi .- nbytesSeen) (v_rdi .+ nbytesSeen) - rcx .= (count .- nwordsSeen) + -- Set result value. + let res = src1Val .- src2Val + -- Set flags + pf_val <- even_parity (least_byte res) + modify of_loc $ mux (ValueExpr nzCount) $ ssub_overflows src1Val src2Val + modify af_loc $ mux (ValueExpr nzCount) $ usub4_overflows src1Val src2Val + modify cf_loc $ mux (ValueExpr nzCount) $ usub_overflows src1Val src2Val + modify sf_loc $ mux (ValueExpr nzCount) $ msb res + modify pf_loc $ mux (ValueExpr nzCount) $ pf_val + modify rsi $ mux (ValueExpr nzCount) $ mux df (v_rsi .- nbytesSeen) (v_rsi .+ nbytesSeen) + modify rdi $ mux (ValueExpr nzCount) $ mux df (v_rdi .- nbytesSeen) (v_rdi .+ nbytesSeen) + modify rcx $ mux (ValueExpr nzCount) $ count .- nwordsSeen else do v' <- get $ MemoryAddr v_rdi repr exec_cmp (MemoryAddr v_rsi repr) v' -- FIXME: right way around? diff --git a/x86/tests/ElfX64Linux.hs b/x86/tests/ElfX64Linux.hs index fced3db1..8b01f79a 100644 --- a/x86/tests/ElfX64Linux.hs +++ b/x86/tests/ElfX64Linux.hs @@ -6,22 +6,21 @@ module ElfX64Linux ( elfX64LinuxTests ) where -import Control.Arrow ( first ) -import Control.Lens ( (^.) ) +import Control.Lens ( (^.) ) import Control.Monad ( unless ) import qualified Control.Monad.Catch as C import qualified Data.ByteString as B import qualified Data.Foldable as F import qualified Data.Map as M +import Data.Maybe import qualified Data.Set as S -import Data.Typeable ( Typeable ) -import Data.Word ( Word64 ) -import System.FilePath ( dropExtension, replaceExtension ) +import Data.Typeable ( Typeable ) +import Data.Word ( Word64 ) +import System.FilePath import qualified Test.Tasty as T import qualified Test.Tasty.HUnit as T -import Text.Printf ( printf ) -import Text.Read ( readMaybe ) -import Numeric (showHex) +import Text.Printf ( printf ) +import Text.Read ( readMaybe ) import qualified Data.ElfEdit as E @@ -31,34 +30,22 @@ import qualified Data.Macaw.Memory.ElfLoader as MM import qualified Data.Macaw.Discovery as MD import qualified Data.Macaw.X86 as RO --- | This is an offset we use to change the load address of the text section of --- the binary. --- --- The current binaries are position independent. This means that the load --- address is around 0x100. The problem is that there are constant offsets from --- the stack that are in this range; the abstract interpretation in AbsState.hs --- then interprets stack offsets as code pointers (since small integer values --- look like code pointers when the code is mapped at these low addresses). --- This throws off the abstract interpretation by hitting a "stack offset + code --- pointer" case, which resolves to Top. --- --- This offset forces macaw to load the binary at a higher address where this --- accidental overlap is less likely. We still need a more principled solution --- to this problem. --- Note (JHx): The code appears to be working without this, so I've disableed it. ---loadOffset :: Word64 ---loadOffset = 0x400000 - +-- | elfX64LinuxTests :: [FilePath] -> T.TestTree elfX64LinuxTests = T.testGroup "ELF x64 Linux" . map mkTest +data Addr = Addr Int Word64 + deriving (Read,Show,Eq) +-- ^ An address is a region index and offset + -- | The type of expected results for test cases data ExpectedResult = - R { funcs :: [(Word64, [(Word64, Integer)])] + R { funcs :: [(Addr, [(Addr, Integer)])] -- ^ The first element of the pair is the address of entry point -- of the function. The list is a list of the addresses of the - -- basic blocks in the function (including the first block). - , ignoreBlocks :: [Word64] + -- basic blocks in the function (including the first block) and the + -- size of the block + , ignoreBlocks :: [Addr] -- ^ This is a list of discovered blocks to ignore. This is -- basically just the address of the instruction after the exit -- syscall, as macaw doesn't know that exit never returns and @@ -66,32 +53,32 @@ data ExpectedResult = } deriving (Read, Show, Eq) +-- | Given a path to an expected file, this mkTest :: FilePath -> T.TestTree -mkTest fp = T.testCase fp $ withELF exeFilename (testDiscovery fp) +mkTest fp = T.testCase fp $ withELF elfFilename (testDiscovery fp) where - asmFilename = dropExtension fp - exeFilename = replaceExtension asmFilename "exe" + elfFilename = dropExtension fp -- | Run a test over a given expected result filename and the ELF file -- associated with it testDiscovery :: FilePath -> E.Elf 64 -> IO () testDiscovery expectedFilename elf = - withMemory MM.Addr64 elf $ \mem mentry -> do - let Just entryPoint = mentry - di = MD.cfgFromAddrs RO.x86_64_linux_info mem M.empty [entryPoint] [] - let memIdx = case E.elfType elf of - E.ET_DYN -> 1 - E.ET_EXEC -> 0 - eidx -> error $ "Unexpected elf type " ++ show eidx + withMemory MM.Addr64 elf $ \mem entries -> do + let di = MD.cfgFromAddrs RO.x86_64_linux_info mem M.empty entries [] expectedString <- readFile expectedFilename case readMaybe expectedString of Nothing -> T.assertFailure ("Invalid expected result: " ++ show expectedString) Just er -> do - let toSegOff :: Word64 -> MM.MemSegmentOff 64 - toSegOff off = - case MM.resolveAddr mem memIdx (fromIntegral off) of + let toSegOff :: Addr -> MM.MemSegmentOff 64 + toSegOff (Addr idx off) = do + let addr :: MM.MemAddr 64 + addr = MM.MemAddr idx (fromIntegral off) + case MM.asSegmentOff mem addr of Just a -> a - Nothing -> error $ "Could not resolve offset " ++ showHex off "" + Nothing -> do + let ppSeg seg = " Segment: " ++ show (MM.relativeAddr seg 0) + error $ "Could not resolve address : " ++ show addr ++ "\n" + ++ unlines (fmap ppSeg (MM.memSegments mem)) let expectedEntries = M.fromList [ (toSegOff entry , S.fromList ((\(s,sz) -> (toSegOff s, sz)) <$> starts) @@ -141,7 +128,7 @@ withMemory :: forall w m a . (C.MonadThrow m, MM.MemWidth w, Integral (E.ElfWordType w)) => MM.AddrWidthRepr w -> E.Elf w - -> (MM.Memory w -> Maybe (MM.MemSegmentOff w) -> m a) + -> (MM.Memory w -> [MM.MemSegmentOff w] -> m a) -> m a withMemory _relaWidth e k = do let opt = MM.LoadOptions { MM.loadRegionIndex = Nothing @@ -151,9 +138,9 @@ withMemory _relaWidth e k = do } case MM.resolveElfContents opt e of Left err -> C.throwM (MemoryLoadError err) - Right (warn, mem, mentry, _sym) -> - if length warn >= 3 then - k mem mentry + Right (warn, mem, mentry, syms) -> + if null warn then + k mem (maybeToList mentry ++ fmap MM.memSymbolStart syms) else error $ "Warnings while loading Elf " ++ show warn diff --git a/x86/tests/Main.hs b/x86/tests/Main.hs index c9638fe1..ea8c40bb 100644 --- a/x86/tests/Main.hs +++ b/x86/tests/Main.hs @@ -7,8 +7,7 @@ import qualified ElfX64Linux as T main :: IO () main = do - x64AsmTests <- namesMatching "tests/x64/*.s.expected" + x64AsmTests <- namesMatching "tests/x64/*.expected" T.defaultMain $ T.testGroup "ReoptTests" [ T.elfX64LinuxTests x64AsmTests ] - diff --git a/x86/tests/x64/Makefile b/x86/tests/x64/Makefile index 391239a1..0737d72e 100644 --- a/x86/tests/x64/Makefile +++ b/x86/tests/x64/Makefile @@ -14,7 +14,10 @@ test-tail-call.exe: test-tail-call.c %.s: %.c gcc -fno-stack-protector -foptimize-sibling-calls -S -c $< -o $@ +%.o: %.c + gcc -fno-stack-protector -c $< -o $@ + .PRECIOUS: %.s clean: - rm *.s *.exe + rm -f *.o *.s *.exe diff --git a/x86/tests/x64/test-conditional.exe.expected b/x86/tests/x64/test-conditional.exe.expected new file mode 100644 index 00000000..40815ebc --- /dev/null +++ b/x86/tests/x64/test-conditional.exe.expected @@ -0,0 +1,3 @@ +R { funcs = [(Addr 1 0x2b1, [(Addr 1 0x2b1, 14), (Addr 1 0x2ce , 16), (Addr 1 0x2bf, 15)])] + , ignoreBlocks = [Addr 1 0x2de] + } diff --git a/x86/tests/x64/test-conditional.s.expected b/x86/tests/x64/test-conditional.s.expected deleted file mode 100644 index 40b225a9..00000000 --- a/x86/tests/x64/test-conditional.s.expected +++ /dev/null @@ -1,3 +0,0 @@ -R { funcs = [(0x2b1, [(0x2b1, 14), (0x2ce, 16), (0x2bf, 15)])] - , ignoreBlocks = [0x2de] - } diff --git a/x86/tests/x64/test-direct-calls.exe.expected b/x86/tests/x64/test-direct-calls.exe.expected new file mode 100644 index 00000000..05c2b4ba --- /dev/null +++ b/x86/tests/x64/test-direct-calls.exe.expected @@ -0,0 +1,6 @@ +R { funcs = [ (Addr 1 0x336, [(Addr 1 0x336, 64), (Addr 1 0x376, 22)]) + , (Addr 1 0x2be, [(Addr 1 0x2be, 90), (Addr 1 0x318, 30)]) + , (Addr 1 0x2b1, [(Addr 1 0x2b1, 13)]) + ] + , ignoreBlocks = [Addr 1 0x38c] + } diff --git a/x86/tests/x64/test-direct-calls.s.expected b/x86/tests/x64/test-direct-calls.s.expected deleted file mode 100644 index c13904e2..00000000 --- a/x86/tests/x64/test-direct-calls.s.expected +++ /dev/null @@ -1,6 +0,0 @@ -R { funcs = [ (0x336, [(0x336, 64), (0x376, 22)]) - , (0x2be, [(0x2be, 90), (0x318, 30)]) - , (0x2b1, [(0x2b1, 13)]) - ] - , ignoreBlocks = [0x38c] - } diff --git a/x86/tests/x64/test-indirect-call.exe.expected b/x86/tests/x64/test-indirect-call.exe.expected new file mode 100644 index 00000000..9eb08158 --- /dev/null +++ b/x86/tests/x64/test-indirect-call.exe.expected @@ -0,0 +1,5 @@ +R { funcs = [ (Addr 1 0x2b1, [(Addr 1 0x2b1, 14)]) + , (Addr 1 0x2bf, [(Addr 1 0x2bf, 35), (Addr 1 0x2e2, 22)]) + ] + , ignoreBlocks = [Addr 1 0x2f8] + } diff --git a/x86/tests/x64/test-indirect-call.s.expected b/x86/tests/x64/test-indirect-call.s.expected deleted file mode 100644 index 43e15e50..00000000 --- a/x86/tests/x64/test-indirect-call.s.expected +++ /dev/null @@ -1,5 +0,0 @@ -R { funcs = [ (0x2b1, [(0x2b1, 14)]) - , (0x2bf, [(0x2bf, 35), (0x2e2, 22)]) - ] - , ignoreBlocks = [0x2f8] - } diff --git a/x86/tests/x64/test-jump-into-instruction.exe.expected b/x86/tests/x64/test-jump-into-instruction.exe.expected new file mode 100644 index 00000000..6da776e1 --- /dev/null +++ b/x86/tests/x64/test-jump-into-instruction.exe.expected @@ -0,0 +1,4 @@ +R { funcs = [(Addr 0 0x400144, + [(Addr 0 0x400144, 14), (Addr 0 0x40014c, 6), (Addr 0 0x400152, 7)])] + , ignoreBlocks = [] + } diff --git a/x86/tests/x64/test-jump-into-instruction.s.expected b/x86/tests/x64/test-jump-into-instruction.s.expected deleted file mode 100644 index 1b6bca2d..00000000 --- a/x86/tests/x64/test-jump-into-instruction.s.expected +++ /dev/null @@ -1,3 +0,0 @@ -R { funcs = [(0x400144, [(0x400144, 14), (0x40014c, 6), (0x400152, 7)])] - , ignoreBlocks = [] - } diff --git a/x86/tests/x64/test-jumptable.c b/x86/tests/x64/test-jumptable.c new file mode 100644 index 00000000..09234b2c --- /dev/null +++ b/x86/tests/x64/test-jumptable.c @@ -0,0 +1,32 @@ + +int a(int x); +int b(); +int c(); +int d(); +int e(); + + +int lookup(int i) { + switch (i) { + case 0: + return a(1); + case 1: + return b(); + case 2: + return c(); + case 3: + return d(); + case 4: + return e(); + case 5: + return 5; + case 6: + return 1123213; + case 7: + return 191286; + case 8: + return 921312; + default: + return 0; + } +} diff --git a/x86/tests/x64/test-jumptable.o b/x86/tests/x64/test-jumptable.o new file mode 100644 index 00000000..556c5383 Binary files /dev/null and b/x86/tests/x64/test-jumptable.o differ diff --git a/x86/tests/x64/test-jumptable.o.expected b/x86/tests/x64/test-jumptable.o.expected new file mode 100644 index 00000000..ad60f763 --- /dev/null +++ b/x86/tests/x64/test-jumptable.o.expected @@ -0,0 +1,13 @@ +R { funcs = [(Addr 1 0 + , [ (Addr 1 0, 9), (Addr 1 9,9), (Addr 1 0x18, 6) + , (Addr 1 0x20, 6) + , (Addr 1 0x30, 6) + , (Addr 1 0x40, 6) + , (Addr 1 0x50, 10) + , (Addr 1 0x60, 7) + , (Addr 1 0x70, 7) + , (Addr 1 0x80, 7) + , (Addr 1 0x90, 7) + , (Addr 1 0xa0, 3)])] + , ignoreBlocks = [] + } diff --git a/x86/tests/x64/test-just-exit.exe.expected b/x86/tests/x64/test-just-exit.exe.expected new file mode 100644 index 00000000..cfb1bd15 --- /dev/null +++ b/x86/tests/x64/test-just-exit.exe.expected @@ -0,0 +1,3 @@ +R { funcs = [(Addr 1 0x2b1, [(Addr 1 0x2b1, 20)])] + , ignoreBlocks = [Addr 1 0x2c5] + } diff --git a/x86/tests/x64/test-just-exit.s.expected b/x86/tests/x64/test-just-exit.s.expected deleted file mode 100644 index f2db5dfa..00000000 --- a/x86/tests/x64/test-just-exit.s.expected +++ /dev/null @@ -1,3 +0,0 @@ -R { funcs = [(0x2b1, [(0x2b1, 20)])] - , ignoreBlocks = [0x2c5] - } diff --git a/x86/tests/x64/test-tail-call.exe.expected b/x86/tests/x64/test-tail-call.exe.expected new file mode 100644 index 00000000..13b4b85b --- /dev/null +++ b/x86/tests/x64/test-tail-call.exe.expected @@ -0,0 +1,6 @@ +R { funcs = [ (Addr 1 0x2c0, [(Addr 1 0x2c0, 7)]) + , (Addr 1 0x2d0, [(Addr 1 0x2d0, 11), (Addr 1 0x2c0, 7)]) + , (Addr 1 0x2e0, [(Addr 1 0x2e0, 11), (Addr 1 0x2eb, 16)]) + ] + , ignoreBlocks = [Addr 1 0x2fb] + } diff --git a/x86/tests/x64/test-tail-call.s.expected b/x86/tests/x64/test-tail-call.s.expected deleted file mode 100644 index 28a125fa..00000000 --- a/x86/tests/x64/test-tail-call.s.expected +++ /dev/null @@ -1,5 +0,0 @@ -R { funcs = [ (0x2d0, [(0x2d0, 11), (0x2c0, 7)]) - , (0x2e0, [(0x2e0, 11), (0x2eb, 16)]) - ] - , ignoreBlocks = [0x2fb] - } diff --git a/x86_symbolic/src/Data/Macaw/X86/Crucible.hs b/x86_symbolic/src/Data/Macaw/X86/Crucible.hs index d98fa803..7fe46ac4 100644 --- a/x86_symbolic/src/Data/Macaw/X86/Crucible.hs +++ b/x86_symbolic/src/Data/Macaw/X86/Crucible.hs @@ -31,12 +31,11 @@ module Data.Macaw.X86.Crucible ) where -import Data.Parameterized.NatRepr +import Control.Lens ((^.)) +import Data.Bits hiding (xor) import Data.Parameterized.Context.Unsafe (empty,extend) - -import Data.Bits (shiftR, (.&.)) +import Data.Parameterized.NatRepr import Data.Word (Word8) -import Data.Bits (shiftL,testBit) import GHC.TypeLits (KnownNat) import What4.Interface hiding (IsExpr) @@ -72,8 +71,8 @@ funcSemantics :: SymFuns sym -> M.X86PrimFn (AtomWrapper (RegEntry sym)) mt -> S sym rtp bs r ctx -> IO (RegValue sym t, S sym rtp bs r ctx) -funcSemantics fs x s = do let sym = Sym { symIface = stateSymInterface s - , symTys = stateIntrinsicTypes s +funcSemantics fs x s = do let sym = Sym { symIface = s^.stateSymInterface + , symTys = s^.stateIntrinsicTypes , symFuns = fs } v <- pureSem sym x @@ -140,7 +139,7 @@ pureSem :: (IsSymInterface sym) => IO (RegValue sym (ToCrucibleType mt)) -- ^ Resulting value pureSem sym fn = case fn of - + M.CMPXCHG8B{} -> error "CMPXCHG8B" M.XGetBV {} -> error "XGetBV" M.ReadLoc {} -> error "ReadLoc" M.PShufb {} -> error "PShufb" diff --git a/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs b/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs index 8e82920e..68f3b373 100644 --- a/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs +++ b/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs @@ -101,15 +101,18 @@ getReg :: forall n t f. (Idx n (ArchRegContext M.X86_64) t) => RegAssign f -> f t getReg x = x ^. (field @n) +x86RegName' :: M.X86Reg tp -> String +x86RegName' M.X86_IP = "ip" +x86RegName' (M.X86_GP r) = show r +x86RegName' (M.X86_FlagReg r) = show r +x86RegName' (M.X87_StatusReg r) = show r +x86RegName' M.X87_TopReg = "x87Top" +x86RegName' (M.X87_TagReg r) = "x87Tag" ++ show r +x86RegName' (M.X87_FPUReg r) = show r +x86RegName' (M.X86_YMMReg r) = show r + x86RegName :: M.X86Reg tp -> C.SolverSymbol -x86RegName M.X86_IP = C.systemSymbol "!ip" -x86RegName (M.X86_GP r) = C.systemSymbol $ "!" ++ show r -x86RegName (M.X86_FlagReg r) = C.systemSymbol $ "!" ++ show r -x86RegName (M.X87_StatusReg r) = C.systemSymbol $ "!x87Status" ++ show r -x86RegName M.X87_TopReg = C.systemSymbol $ "!x87Top" -x86RegName (M.X87_TagReg r) = C.systemSymbol $ "!x87Tag" ++ show r -x86RegName (M.X87_FPUReg r) = C.systemSymbol $ "!" ++ show r -x86RegName (M.X86_YMMReg r) = C.systemSymbol $ "!" ++ show r +x86RegName r = C.systemSymbol $ "r!" ++ x86RegName' r gpReg :: Int -> M.X86Reg (M.BVType 64) gpReg = M.X86_GP . F.Reg64 . fromIntegral diff --git a/x86_symbolic/tests/Main.hs b/x86_symbolic/tests/Main.hs index cdb93a57..ae47fd5f 100644 --- a/x86_symbolic/tests/Main.hs +++ b/x86_symbolic/tests/Main.hs @@ -32,10 +32,11 @@ import qualified Lang.Crucible.Backend as C import qualified Lang.Crucible.Backend.Simple as C import qualified Lang.Crucible.CFG.Core as C import qualified Lang.Crucible.FunctionHandle as C +import Lang.Crucible.LLVM.DataLayout (EndianForm(LittleEndian)) +import qualified Lang.Crucible.LLVM.MemModel as C +import qualified Lang.Crucible.LLVM.MemModel.Pointer as C import qualified Lang.Crucible.Simulator.ExecutionTree as C import qualified Lang.Crucible.Simulator.RegValue as C -import qualified Lang.Crucible.LLVM.MemModel.Pointer as C - mkReg :: (C.IsSymInterface sym, M.HasRepr (M.ArchReg arch) M.TypeRepr) => MS.MacawSymbolicArchFunctions arch @@ -65,8 +66,7 @@ main = do let loadOpt :: Elf.LoadOptions loadOpt = Elf.LoadOptions { Elf.loadRegionIndex = Just 1 - , Elf.loadRegionBaseOffset = ??? --- , Elf.includeBSS = False + , Elf.loadRegionBaseOffset = 0 } putStrLn "Read elf" elfContents <- BS.readFile "tests/add_ubuntu64.o" @@ -121,9 +121,17 @@ main = do symFuns <- MX.newSymFuns sym putStrLn "Run code block" - execResult <- MS.runCodeBlock sym x86ArchFns (MX.x86_64MacawEvalFn symFuns) halloc ??? ??? g regs + initMem <- C.emptyMem LittleEndian + let globalMap :: MS.GlobalMap sym MX.X86_64 + globalMap = Map.empty + let lookupFn :: MS.LookupFunctionHandle sym MX.X86_64 + lookupFn _mem _regs = do + fail "Could not find function handle." + execResult <- + MS.runCodeBlock sym x86ArchFns (MX.x86_64MacawEvalFn symFuns) + halloc (initMem, globalMap) lookupFn g regs case execResult of - (_,C.FinishedExecution _ (C.TotalRes _pair))-> do + (_,C.FinishedResult _ (C.TotalRes _pair))-> do putStrLn "Done" _ -> do fail "Partial execution returned."