From 501a24c47af0ad7117bff659f8e74889371b3ca3 Mon Sep 17 00:00:00 2001 From: Jason Dagit Date: Fri, 7 Jul 2017 13:23:40 -0700 Subject: [PATCH 1/7] Discovery: Rebuild segmented addr meta data in concretizeAbsCodePointers --- src/Data/Macaw/Discovery.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Data/Macaw/Discovery.hs b/src/Data/Macaw/Discovery.hs index 9b5040a3..9154b9e5 100644 --- a/src/Data/Macaw/Discovery.hs +++ b/src/Data/Macaw/Discovery.hs @@ -101,9 +101,10 @@ concretizeAbsCodePointers mem (FinSet s) = , Just sa <- [absoluteAddrSegment mem (fromInteger a)] , Perm.isExecutable (segmentFlags (addrSegment sa)) ] -concretizeAbsCodePointers _ (CodePointers s _) = +concretizeAbsCodePointers mem (CodePointers s _) = [ sa - | sa <- Set.toList s + | a <- Set.toList s + , Just sa <- [absoluteAddrSegment mem (_addrOffset a)] , Perm.isExecutable (segmentFlags (addrSegment sa)) ] -- FIXME: this is dangerous !! From 3fe64acd078fbfcbc54e2ef0bbacd1c8c719616b Mon Sep 17 00:00:00 2001 From: Joe Hendrix Date: Fri, 7 Jul 2017 14:18:35 -0700 Subject: [PATCH 2/7] Refactor to support Boolean type; start mapping to Crucible. --- macaw-symbolic/macaw-symbolic.cabal | 24 +++++ macaw-symbolic/src/Data/Macaw/Symbolic.hs | 92 +++++++++++++++++++ macaw-symbolic/src/Data/Macaw/Symbolic/App.hs | 37 ++++++++ src/Data/Macaw/AbsDomain/AbsState.hs | 62 +++++-------- src/Data/Macaw/AbsDomain/JumpBounds.hs | 5 +- src/Data/Macaw/AbsDomain/Refine.hs | 13 +-- src/Data/Macaw/CFG/App.hs | 55 ++++++----- src/Data/Macaw/CFG/Core.hs | 36 ++++++-- src/Data/Macaw/CFG/DemandSet.hs | 1 + src/Data/Macaw/CFG/Rewriter.hs | 79 ++++++++-------- src/Data/Macaw/Fold.hs | 2 + src/Data/Macaw/Memory.hs | 3 +- src/Data/Macaw/Types.hs | 51 +++++++++- 13 files changed, 337 insertions(+), 123 deletions(-) create mode 100644 macaw-symbolic/macaw-symbolic.cabal create mode 100644 macaw-symbolic/src/Data/Macaw/Symbolic.hs create mode 100644 macaw-symbolic/src/Data/Macaw/Symbolic/App.hs diff --git a/macaw-symbolic/macaw-symbolic.cabal b/macaw-symbolic/macaw-symbolic.cabal new file mode 100644 index 00000000..798386e9 --- /dev/null +++ b/macaw-symbolic/macaw-symbolic.cabal @@ -0,0 +1,24 @@ +name: macaw-symbolic +version: 0.0.1 +author: Galois, Inc. +maintainer: jhendrix@galois.com +build-type: Simple +cabal-version: >= 1.9.2 + +library + build-depends: + base >= 4, + containers, + crucible, + macaw, + mtl, + parameterized-utils + + hs-source-dirs: src + + exposed-modules: + Data.Macaw.Symbolic.App + Data.Macaw.Symbolic + + ghc-options: -Wall -Werror + ghc-prof-options: -O2 -fprof-auto-top \ No newline at end of file diff --git a/macaw-symbolic/src/Data/Macaw/Symbolic.hs b/macaw-symbolic/src/Data/Macaw/Symbolic.hs new file mode 100644 index 00000000..0d6b3417 --- /dev/null +++ b/macaw-symbolic/src/Data/Macaw/Symbolic.hs @@ -0,0 +1,92 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE TypeOperators #-} +module Data.Macaw.Symbolic + ( stepBlocks + ) where + +import Control.Monad.ST +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map +import qualified Data.Parameterized.Context as Ctx +import Data.Parameterized.Ctx +import qualified Data.Parameterized.Map as MapF +import Data.String +import Data.Word +import qualified Lang.Crucible.CFG.Core as C +import qualified Lang.Crucible.CFG.Reg as CR +import qualified Lang.Crucible.CFG.SSAConversion as C +import qualified Lang.Crucible.Config as C +import qualified Lang.Crucible.FunctionHandle as C +import qualified Lang.Crucible.Simulator.ExecutionTree as C +import qualified Lang.Crucible.Simulator.GlobalState as C +import qualified Lang.Crucible.Simulator.OverrideSim as C +import qualified Lang.Crucible.Simulator.RegMap as C +import Lang.Crucible.Solver.Interface +import Numeric (showHex) +import System.IO (stdout) + +import qualified Data.Macaw.CFG.Block as M + +data ReoptSimulatorState sym = ReoptSimulatorState + +type family ArchRegContext (arch :: *) :: Ctx C.CrucibleType + +type ArchRegStruct (arch :: *) = C.StructType (ArchRegContext arch) + +type MacawFunctionArgs arch = EmptyCtx ::> ArchRegStruct arch +type MacawFunctionResult arch = ArchRegStruct arch + +translateBlock :: Map Word64 (CR.Label s) + -> M.Block arch ids + -> CR.Block s (MacawFunctionResult arch) +translateBlock = undefined + +--data MacawRegClass (tp :: M.TypeRepr) = + +stepBlocks :: forall sym arch ids + . IsSymInterface sym + => sym + -> Ctx.Assignment C.TypeRepr (ArchRegContext arch) + -> Word64 + -> Map Word64 (M.Block arch ids) + -- ^ Map from block indices to block + -> IO (C.ExecResult ReoptSimulatorState sym (C.RegEntry sym (C.StructType (ArchRegContext arch)))) +stepBlocks sym regTypes addr macawBlockMap = do + let macawStructRepr = C.StructRepr regTypes + halloc <- C.newHandleAllocator + let argTypes = Ctx.empty Ctx.%> macawStructRepr + let nm = fromString $ "macaw_0x" ++ showHex addr "" + h <- stToIO $ C.mkHandle' halloc nm argTypes macawStructRepr + -- Map block map to Crucible CFG + let blockLabelMap :: Map Word64 (CR.Label ()) + blockLabelMap = Map.fromList [ (w, CR.Label (fromIntegral w)) | w <- Map.keys macawBlockMap ] + -- Create control flow graph + let rg :: CR.CFG () (MacawFunctionArgs arch) (MacawFunctionResult arch) + rg = CR.CFG { CR.cfgHandle = h + , CR.cfgBlocks = translateBlock blockLabelMap <$> Map.elems macawBlockMap + } + cfg <- C.initialConfig 0 [] + let ctx :: C.SimContext ReoptSimulatorState sym + ctx = C.SimContext { C._ctxSymInterface = sym + , C.ctxSolverProof = \a -> a + , C.ctxIntrinsicTypes = MapF.empty + , C.simConfig = cfg + , C.simHandleAllocator = halloc + , C.printHandle = stdout + , C._functionBindings = C.emptyHandleMap + , C._cruciblePersonality = ReoptSimulatorState + } + -- Create the symbolic simulator state + let s = C.initSimState ctx C.emptyGlobals C.defaultErrorHandler + -- Define the arguments to call the Reopt CFG with. + -- This should be a symbolic variable for each register in the architecture. + let args :: C.RegMap sym (MacawFunctionArgs arch) + args = undefined + -- Run the symbolic simulator. + case C.toSSA rg of + C.SomeCFG g -> + C.runOverrideSim s macawStructRepr $ do + C.regValue <$> C.callCFG g args diff --git a/macaw-symbolic/src/Data/Macaw/Symbolic/App.hs b/macaw-symbolic/src/Data/Macaw/Symbolic/App.hs new file mode 100644 index 00000000..e11a1568 --- /dev/null +++ b/macaw-symbolic/src/Data/Macaw/Symbolic/App.hs @@ -0,0 +1,37 @@ +{- +-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE TypeFamilies #-} +{-# OPTIONS_GHC -Wwarn #-} +module Data.Macaw.Symbolic.App where + +import Control.Monad.Identity +import qualified Data.Macaw.CFG as M +import qualified Data.Macaw.Types as M +import qualified Lang.Crucible.CFG.Expr as C +import qualified Lang.Crucible.CFG.Reg as C +import qualified Lang.Crucible.Types as C + +type family ToCrucibleType (mtp :: M.Type) :: C.CrucibleType where + ToCrucibleType (M.BVType w) = C.BVType w + +type CrucGen = Identity + +valueToCrucible :: M.Value arch ids tp -> CrucGen (C.Value s (ToCrucibleType tp)) +valueToCrucible = undefined + +valueToBool :: M.Value arch ids M.BoolType -> CrucGen (C.Value s C.BoolType) +valueToBool = undefined + +crucibleValue :: C.App (C.Value s) ctp -> CrucGen (C.Value s ctp) +crucibleValue = undefined + +appToCrucible :: M.App (M.Value arch ids) tp -> CrucGen (C.Value s (ToCrucibleType tp)) +appToCrucible app = + case app of + M.Mux w c t f -> do + crucibleValue =<< + C.BVIte <$> valueToBool c + <*> pure w + <*> valueToCrucible t + <*> valueToCrucible f diff --git a/src/Data/Macaw/AbsDomain/AbsState.hs b/src/Data/Macaw/AbsDomain/AbsState.hs index 9f7782b0..1bf1294c 100644 --- a/src/Data/Macaw/AbsDomain/AbsState.hs +++ b/src/Data/Macaw/AbsDomain/AbsState.hs @@ -133,7 +133,9 @@ class Eq d => AbsDomain d where -- The first parameter is the width of pointers on the value. It is expected -- to be at most 64 bits. data AbsValue w (tp :: Type) - = forall n . (tp ~ BVType n) => FinSet !(Set Integer) + = (tp ~ BoolType) => BoolConst !Bool + -- ^ A Boolean constant + | forall n . (tp ~ BVType n) => FinSet !(Set Integer) -- ^ Denotes that this value can take any one of the fixed set. | (tp ~ BVType w) => CodePointers !(Set (SegmentedAddr w)) !Bool -- ^ A possibly empty set of values that either point to a code segment. @@ -242,6 +244,7 @@ instance Show (AbsValue w tp) where show = show . pretty instance Pretty (AbsValue w tp) where + pretty (BoolConst b) = text (show b) pretty (FinSet s) = text "finset" <+> ppIntegerSet s pretty (CodePointers s b) = text "code" <+> ppSet (s0 ++ sd) where s0 = if b then [text "0"] else [] @@ -459,6 +462,7 @@ mayBeMember _n _v = True -- | Returns true if this value represents the empty set. isBottom :: AbsValue w tp -> Bool +isBottom BoolConst{} = False isBottom (FinSet v) = Set.null v isBottom (CodePointers v b) = Set.null v && not b isBottom (StackOffset _ v) = Set.null v @@ -745,10 +749,10 @@ instance ShowF r => PrettyRegValue r (AbsValue w) where absTrue :: AbsValue w BoolType -absTrue = FinSet (Set.singleton 1) +absTrue = BoolConst True absFalse :: AbsValue w BoolType -absFalse = FinSet (Set.singleton 0) +absFalse = BoolConst False -- | This returns the smallest abstract value that contains the -- given unsigned integer. @@ -775,19 +779,19 @@ concreteStackOffset a o = StackOffset a (Set.singleton (fromInteger o)) ------------------------------------------------------------------------ -- Restrictions -hasMaximum :: TypeRepr tp -> AbsValue w tp -> Maybe Integer +hasMaximum :: TypeRepr (BVType w) -> AbsValue p (BVType w) -> Maybe Integer hasMaximum tp v = case v of - FinSet s | Set.null s -> Nothing - | otherwise -> Just $! Set.findMax s - CodePointers s b | Set.null s -> if b then Just 0 else Nothing - | otherwise -> Just $ case tp of BVTypeRepr n -> maxUnsigned n - StridedInterval si -> Just (SI.intervalEnd si) - TopV -> Just $ case tp of BVTypeRepr n -> maxUnsigned n - _ -> Nothing + FinSet s | Set.null s -> Nothing + | otherwise -> Just $! Set.findMax s + CodePointers s b | Set.null s -> if b then Just 0 else Nothing + | otherwise -> Just $ case tp of BVTypeRepr n -> maxUnsigned n + StridedInterval si -> Just (SI.intervalEnd si) + TopV -> Just $ case tp of BVTypeRepr n -> maxUnsigned n + _ -> Nothing -hasMinimum :: TypeRepr tp -> AbsValue w tp -> Maybe Integer +hasMinimum :: TypeRepr (BVType w) -> AbsValue p (BVType w) -> Maybe Integer hasMinimum _tp v = case v of FinSet s | Set.null s -> Nothing @@ -807,10 +811,9 @@ abstractULt :: MemWidth w -> AbsValue w tp -> (AbsValue w tp, AbsValue w tp) abstractULt _tp TopV TopV = (TopV, TopV) -abstractULt tp x y +abstractULt tp@(BVTypeRepr n) x y | Just u_y <- hasMaximum tp y - , Just l_x <- hasMinimum tp x - , BVTypeRepr n <- tp = + , Just l_x <- hasMinimum tp x = -- debug DAbsInt' ("abstractLt " ++ show (pretty x) ++ " " ++ show (pretty y) ++ " -> ") ( meet x (stridedInterval $ SI.mkStridedInterval n False 0 (u_y - 1) 1) , meet y (stridedInterval $ SI.mkStridedInterval n False (l_x + 1) @@ -821,14 +824,13 @@ abstractULt _tp x y = (x, y) -- | @abstractULeq x y@ refines x and y with the knowledge that @x <= y@ abstractULeq :: MemWidth w => TypeRepr tp - -> AbsValue w tp - -> AbsValue w tp - -> (AbsValue w tp, AbsValue w tp) + -> AbsValue w tp + -> AbsValue w tp + -> (AbsValue w tp, AbsValue w tp) abstractULeq _tp TopV TopV = (TopV, TopV) -abstractULeq tp x y +abstractULeq tp@(BVTypeRepr n) x y | Just u_y <- hasMaximum tp y - , Just l_x <- hasMinimum tp x - , BVTypeRepr n <- tp = + , Just l_x <- hasMinimum tp x = ( meet x (stridedInterval $ SI.mkStridedInterval n False 0 u_y 1) , meet y (stridedInterval $ SI.mkStridedInterval n False l_x (maxUnsigned n) 1)) @@ -1099,21 +1101,6 @@ deleteRange l h m deleteRange (k+1) h (Map.delete k m) _ -> m --- Return the width of a value. -someValueWidth :: ( HasRepr (ArchReg arch) TypeRepr - ) - => Value arch ids tp - -> Integer -someValueWidth v = - case typeRepr v of - BVTypeRepr w -> natValue w - -valueByteSize :: ( HasRepr (ArchReg arch) TypeRepr - ) - => Value arch ids tp - -> Int64 -valueByteSize v = fromInteger $ (someValueWidth v + 7) `div` 8 - -- | Prune stack based on write that may modify stack. pruneStack :: AbsBlockStack w -> AbsBlockStack w pruneStack = Map.filter f @@ -1143,6 +1130,7 @@ transferValue c v = do amap = c^.absAssignments aregs = c^.absInitialRegs case v of + BoolValue b -> BoolConst b BVValue w i | 0 <= i && i <= maxUnsigned w -> abstractSingleton code_width is_code w i | otherwise -> error $ "transferValue given illegal value " ++ show (pretty v) @@ -1183,7 +1171,7 @@ addMemWrite a memRepr v r = ++" in SomeStackOffset case") $ r & curAbsStack %~ pruneStack (StackOffset _ s, v_abs) -> - let w = valueByteSize v + let w = fromInteger (memReprBytes memRepr) e = StackEntry memRepr v_abs stk0 = r^.curAbsStack -- Delete information about old assignment diff --git a/src/Data/Macaw/AbsDomain/JumpBounds.hs b/src/Data/Macaw/AbsDomain/JumpBounds.hs index a45d6e01..dcc5368d 100644 --- a/src/Data/Macaw/AbsDomain/JumpBounds.hs +++ b/src/Data/Macaw/AbsDomain/JumpBounds.hs @@ -169,13 +169,14 @@ assertPred _ _ bnds = Right bnds -- | Lookup an upper bound or return analysis for why it is not defined. unsignedUpperBound :: ( MapF.OrdF (ArchReg arch) - , MapF.ShowF (ArchReg arch) - ) + , MapF.ShowF (ArchReg arch) + ) => IndexBounds (ArchReg arch) ids -> Value arch ids tp -> Either String (UpperBound tp) unsignedUpperBound bnds v = case v of + BoolValue _ -> Left "Boolean values do not have bounds." BVValue _ i -> Right (IntegerUpperBound i) RelocatableValue{} -> Left "Relocatable values do not have bounds." diff --git a/src/Data/Macaw/AbsDomain/Refine.hs b/src/Data/Macaw/AbsDomain/Refine.hs index 5c7a8d2f..a159cbb7 100644 --- a/src/Data/Macaw/AbsDomain/Refine.hs +++ b/src/Data/Macaw/AbsDomain/Refine.hs @@ -43,6 +43,7 @@ refineProcState :: RefineConstraints arch -> AbsValue (ArchAddrWidth arch) tp -- ^ Abstract value to assign value. -> AbsProcessorState (ArchReg arch) ids -> AbsProcessorState (ArchReg arch) ids +refineProcState (BoolValue _) _av regs = regs -- Skip refinment for literal values refineProcState (BVValue _n _val) _av regs = regs -- Skip refinment for literal values refineProcState (RelocatableValue _ _) _av regs = regs -- Skip refinment for relocatable values refineProcState (Initial r) av regs = @@ -84,8 +85,8 @@ refineApp app av regs = -- FIXME: HACK -- This detects r - x < 0 || r - x == 0, i.e. r <= x - BVOr _ (getAssignApp -> Just (UsbbOverflows _ r xv@(BVValue sz x) (BVValue _ 0))) - (getAssignApp -> Just (BVEq (getAssignApp -> Just (BVAdd _ r' y)) (BVValue _ 0))) + OrApp (getAssignApp -> Just (UsbbOverflows _ r xv@(BVValue sz x) (BoolValue False))) + (getAssignApp -> Just (Eq (getAssignApp -> Just (BVAdd _ r' y)) (BVValue _ 0))) | Just Refl <- testEquality r r' , Just Refl <- testEquality y (mkLit sz (negate x)) , Just b <- asConcreteSingleton av -> @@ -93,10 +94,10 @@ refineApp app av regs = -- FIXME: HACK -- This detects not (r - x < 0) && not (r - x == 0), i.e. x < r - BVAnd _ (getAssignApp -> Just (BVComplement _ - (getAssignApp -> Just (UsbbOverflows _ r xv@(BVValue sz x) (BVValue _ 0))))) - (getAssignApp -> Just (BVComplement _ - (getAssignApp -> Just (BVEq (getAssignApp -> Just (BVAdd _ r' y)) (BVValue _ 0))))) + AndApp (getAssignApp -> Just + (NotApp (getAssignApp -> Just (UsbbOverflows _ r xv@(BVValue sz x) (BoolValue False))))) + (getAssignApp -> Just + (NotApp (getAssignApp -> Just (Eq (getAssignApp -> Just (BVAdd _ r' y)) (BVValue _ 0))))) | Just Refl <- testEquality r r' , Just Refl <- testEquality y (mkLit sz (negate x)) , Just b <- asConcreteSingleton av -> diff --git a/src/Data/Macaw/CFG/App.hs b/src/Data/Macaw/CFG/App.hs index 4f628634..7bde4875 100644 --- a/src/Data/Macaw/CFG/App.hs +++ b/src/Data/Macaw/CFG/App.hs @@ -66,42 +66,44 @@ data App (f :: Type -> *) (tp :: Type) where -- Truncate a bitvector value. Trunc :: (1 <= n, n+1 <= m) => !(f (BVType m)) -> !(NatRepr n) -> App f (BVType n) -- Signed extension. - SExt :: (1 <= m, m+1 <= n) => f (BVType m) -> NatRepr n -> App f (BVType n) + SExt :: (1 <= m, m+1 <= n, 1 <= n) => f (BVType m) -> NatRepr n -> App f (BVType n) -- Unsigned extension. - UExt :: (1 <= m, m+1 <= n) => f (BVType m) -> NatRepr n -> App f (BVType n) + UExt :: (1 <= m, m+1 <= n, 1 <= n) => f (BVType m) -> NatRepr n -> App f (BVType n) ---------------------------------------------------------------------- -- Boolean operations + BoolMux :: !(f BoolType) -> !(f BoolType) -> !(f BoolType) -> App f BoolType AndApp :: !(f BoolType) -> !(f BoolType) -> App f BoolType OrApp :: !(f BoolType) -> !(f BoolType) -> App f BoolType NotApp :: !(f BoolType) -> App f BoolType + XorApp :: !(f BoolType) -> !(f BoolType) -> App f BoolType ---------------------------------------------------------------------- -- Bitvector operations - BVAdd :: !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) - BVSub :: !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) + BVAdd :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) + BVSub :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) -- Multiply two numbers - BVMul :: !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) + BVMul :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) -- Unsigned division (rounds fractions to zero). -- -- This operation is not defined when the denominator is zero. The -- caller should raise a #de exception in this case (see -- 'Reopt.Semantics.Implementation.exec_div'). - BVQuot :: !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) + BVQuot :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) -- Unsigned modulo (rounds fractional results to zero) -- -- See 'BVQuot' for usage. - BVRem :: !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) + BVRem :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) -- Signed division (rounds fractional results to zero). -- -- See 'BVQuot' for usage. - BVSignedQuot :: !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) + BVSignedQuot :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) -- Signed modulo (rounds fractional results to zero). -- @@ -110,13 +112,13 @@ data App (f :: Type -> *) (tp :: Type) where -- x = (y * BVSignedQuot x y) + BVSignedRem x y -- -- See 'BVQuot' for usage. - BVSignedRem :: !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) + BVSignedRem :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) -- Unsigned less than. - BVUnsignedLt :: !(f (BVType n)) -> !(f (BVType n)) -> App f BoolType + BVUnsignedLt :: (1 <= n) => !(f (BVType n)) -> !(f (BVType n)) -> App f BoolType -- Unsigned less than or equal. - BVUnsignedLe :: !(f (BVType n)) -> !(f (BVType n)) -> App f BoolType + BVUnsignedLe :: (1 <= n) => !(f (BVType n)) -> !(f (BVType n)) -> App f BoolType -- Signed less than BVSignedLt :: (1 <= n) => !(f (BVType n)) -> !(f (BVType n)) -> App f BoolType @@ -129,29 +131,29 @@ data App (f :: Type -> *) (tp :: Type) where BVTestBit :: !(f (BVType n)) -> !(f (BVType log_n)) -> App f BoolType -- Bitwise complement - BVComplement :: !(NatRepr n) -> !(f (BVType n)) -> App f (BVType n) + BVComplement :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> App f (BVType n) -- Bitwise and - BVAnd :: !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) + BVAnd :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) -- Bitwise or - BVOr :: !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) + BVOr :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) -- Exclusive or - BVXor :: !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) + BVXor :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) -- Logical left shift (x * 2 ^ n) - BVShl :: !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) + BVShl :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) -- Logical right shift (x / 2 ^ n) - BVShr :: !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) + BVShr :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) -- Arithmetic right shift (x / 2 ^ n) BVSar :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> !(f (BVType n)) -> App f (BVType n) -- Compare for equality. - BVEq :: !(f (BVType n)) -> !(f (BVType n)) -> App f BoolType + Eq :: !(f tp) -> !(f tp) -> App f BoolType -- Return true if value contains even number of true bits. EvenParity :: !(f (BVType 8)) -> App f BoolType -- Reverse the bytes in a bitvector expression. - ReverseBytes :: !(NatRepr n) -> !(f (BVType n)) -> App f (BVType n) + ReverseBytes :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> App f (BVType n) -- Add two values and a carry bit to determine if they have an unsigned -- overflow. @@ -186,12 +188,12 @@ data App (f :: Type -> *) (tp :: Type) where -- bsf "bit scan forward" returns the index of the least-significant -- bit that is 1. Undefined if value is zero. -- All bits at indices less than return value must be unset. - Bsf :: !(NatRepr n) -> !(f (BVType n)) -> App f (BVType n) + Bsf :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> App f (BVType n) -- bsr "bit scan reverse" returns the index of the most-significant -- bit that is 1. Undefined if value is zero. -- All bits at indices greater than return value must be unset. - Bsr :: !(NatRepr n) -> !(f (BVType n)) -> App f (BVType n) + Bsr :: (1 <= n) => !(NatRepr n) -> !(f (BVType n)) -> App f (BVType n) ---------------------------------------------------------------------- -- Floating point operations @@ -273,7 +275,8 @@ data App (f :: Type -> *) (tp :: Type) where -- If the conversion is out of the range of the bitvector, then a floating -- point exception should be raised. -- If that exception is masked, then this returns -1 (as a signed bitvector). - TruncFPToSignedBV :: FloatInfoRepr flt + TruncFPToSignedBV :: (1 <= n) + => FloatInfoRepr flt -> f (FloatType flt) -> NatRepr n -> App f (BVType n) @@ -381,9 +384,11 @@ ppAppA pp a0 = Trunc x w -> sexprA "trunc" [ pp x, ppNat w ] SExt x w -> sexprA "sext" [ pp x, ppNat w ] UExt x w -> sexprA "uext" [ pp x, ppNat w ] + BoolMux c t f -> sexprA "bool_mux" [ pp c, pp t, pp f ] AndApp x y -> sexprA "and" [ pp x, pp y ] OrApp x y -> sexprA "or" [ pp x, pp y ] NotApp x -> sexprA "not" [ pp x ] + XorApp x y -> sexprA "xor" [ pp x, pp y ] BVAdd _ x y -> sexprA "bv_add" [ pp x, pp y ] BVSub _ x y -> sexprA "bv_sub" [ pp x, pp y ] BVMul _ x y -> sexprA "bv_mul" [ pp x, pp y ] @@ -403,7 +408,7 @@ ppAppA pp a0 = BVShl _ x y -> sexprA "bv_shl" [ pp x, pp y ] BVShr _ x y -> sexprA "bv_shr" [ pp x, pp y ] BVSar _ x y -> sexprA "bv_sar" [ pp x, pp y ] - BVEq x y -> sexprA "bv_eq" [ pp x, pp y ] + Eq x y -> sexprA "eq" [ pp x, pp y ] EvenParity x -> sexprA "even_parity" [ pp x ] ReverseBytes _ x -> sexprA "reverse_bytes" [ pp x ] UadcOverflows _ x y c -> sexprA "uadc_overflows" [ pp x, pp y, pp c ] @@ -442,9 +447,11 @@ instance HasRepr (App f) TypeRepr where SExt _ w -> BVTypeRepr w UExt _ w -> BVTypeRepr w + BoolMux{} -> knownType AndApp{} -> knownType OrApp{} -> knownType NotApp{} -> knownType + XorApp{} -> knownType BVAdd w _ _ -> BVTypeRepr w BVSub w _ _ -> BVTypeRepr w @@ -467,7 +474,7 @@ instance HasRepr (App f) TypeRepr where BVShl w _ _ -> BVTypeRepr w BVShr w _ _ -> BVTypeRepr w BVSar w _ _ -> BVTypeRepr w - BVEq _ _ -> knownType + Eq _ _ -> knownType EvenParity _ -> knownType ReverseBytes w _ -> BVTypeRepr w diff --git a/src/Data/Macaw/CFG/Core.hs b/src/Data/Macaw/CFG/Core.hs index f1c07306..7bb34d7a 100644 --- a/src/Data/Macaw/CFG/Core.hs +++ b/src/Data/Macaw/CFG/Core.hs @@ -93,6 +93,7 @@ import Data.Parameterized.NatRepr import Data.Parameterized.Nonce import Data.Parameterized.Some import Data.Parameterized.TraversableF +import Data.Proxy import Data.Set (Set) import qualified Data.Set as Set import Data.Text (Text) @@ -212,10 +213,15 @@ type ArchSegmentedAddr arch = SegmentedAddr (ArchAddrWidth arch) -- | A value at runtime. data Value arch ids tp = forall n - . (tp ~ BVType n) + . (tp ~ BVType n, 1 <= n) => BVValue !(NatRepr n) !Integer -- ^ A constant bitvector - | ( tp ~ BVType (ArchAddrWidth arch)) + | (tp ~ BoolType) + => BoolValue !Bool + -- ^ A constant Boolean + | ( tp ~ BVType (ArchAddrWidth arch) + , 1 <= ArchAddrWidth arch + ) => RelocatableValue !(NatRepr (ArchAddrWidth arch)) !(ArchSegmentedAddr arch) -- ^ A given memory address. | AssignedValue !(Assignment arch ids tp) @@ -241,7 +247,7 @@ data Assignment arch ids tp = -- or least significant byte. The following indices either store -- the next lower or higher bytes. data MemRepr (tp :: Type) where - BVMemRepr :: !(NatRepr w) -> !Endianness -> MemRepr (BVType (8*w)) + BVMemRepr :: (1 <= w) => !(NatRepr w) -> !Endianness -> MemRepr (BVType (8*w)) instance Pretty (MemRepr tp) where pretty (BVMemRepr w BigEndian) = text "bvbe" <+> text (show w) @@ -260,7 +266,10 @@ instance TestEquality MemRepr where Nothing instance HasRepr MemRepr TypeRepr where - typeRepr (BVMemRepr w _) = BVTypeRepr (natMultiply n8 w) + typeRepr (BVMemRepr w _) = + let r = (natMultiply n8 w) + in case leqMulPos (Proxy :: Proxy 8) w of + LeqProof -> BVTypeRepr r -- | The right hand side of an assignment is an expression that -- returns a value. @@ -270,8 +279,7 @@ data AssignRhs (arch :: *) ids tp where -> AssignRhs arch ids tp -- An expression with an undefined value. - SetUndefined :: (tp ~ BVType n) - => !(NatRepr n) -- Width of undefined value. + SetUndefined :: !(TypeRepr tp) -> AssignRhs arch ids tp -- Read memory at given location. @@ -291,7 +299,7 @@ instance HasRepr (AssignRhs arch ids) TypeRepr where typeRepr rhs = case rhs of EvalApp a -> typeRepr a - SetUndefined w -> BVTypeRepr w + SetUndefined tp -> tp ReadMem _ tp -> typeRepr tp EvalArchFn _ rtp -> rtp @@ -299,6 +307,7 @@ instance ( HasRepr (ArchReg arch) TypeRepr ) => HasRepr (Value arch ids) TypeRepr where + typeRepr (BoolValue _) = BoolTypeRepr typeRepr (BVValue w _) = BVTypeRepr w typeRepr (RelocatableValue w _) = BVTypeRepr w typeRepr (AssignedValue a) = typeRepr (assignRhs a) @@ -316,6 +325,11 @@ valueWidth v = instance OrdF (ArchReg arch) => OrdF (Value arch ids) where + + compareF (BoolValue x) (BoolValue y) = fromOrdering (compare x y) + compareF BoolValue{} _ = LTF + compareF _ BoolValue{} = GTF + compareF (BVValue wx vx) (BVValue wy vy) = case compareF wx wy of LTF -> LTF @@ -324,6 +338,7 @@ instance OrdF (ArchReg arch) compareF BVValue{} _ = LTF compareF _ BVValue{} = GTF + compareF (RelocatableValue _ x) (RelocatableValue _ y) = fromOrdering (compare x y) compareF RelocatableValue{} _ = LTF @@ -359,11 +374,11 @@ instance OrdF (ArchReg arch) ------------------------------------------------------------------------ -- Value operations -mkLit :: NatRepr n -> Integer -> Value arch ids (BVType n) +mkLit :: (1 <= n) => NatRepr n -> Integer -> Value arch ids (BVType n) mkLit n v = BVValue n (v .&. mask) where mask = maxUnsigned n -bvValue :: KnownNat n => Integer -> Value arch ids (BVType n) +bvValue :: (KnownNat n, 1 <= n) => Integer -> Value arch ids (BVType n) bvValue i = mkLit knownNat i valueAsApp :: Value arch ids tp -> Maybe (App (Value arch ids) tp) @@ -509,6 +524,7 @@ ppLit w i -- | Pretty print a value. ppValue :: ShowF (ArchReg arch) => Prec -> Value arch ids tp -> Doc +ppValue _ (BoolValue b) = text $ if b then "true" else "false" ppValue p (BVValue w i) = assert (i >= 0) $ parenIf (p > colonPrec) $ ppLit w i ppValue p (RelocatableValue _ a) = parenIf (p > plusPrec) $ text (show a) ppValue _ (AssignedValue a) = ppAssignId (assignId a) @@ -542,7 +558,7 @@ ppAssignRhs :: (Applicative m, ArchConstraints arch) -> AssignRhs arch ids tp -> m Doc ppAssignRhs pp (EvalApp a) = ppAppA pp a -ppAssignRhs _ (SetUndefined w) = pure $ text "undef ::" <+> brackets (text (show w)) +ppAssignRhs _ (SetUndefined tp) = pure $ text "undef ::" <+> brackets (text (show tp)) ppAssignRhs pp (ReadMem a repr) = (\d -> text "read_mem" <+> d <+> PP.parens (pretty repr)) <$> pp a ppAssignRhs pp (EvalArchFn f _) = ppArchFn pp f diff --git a/src/Data/Macaw/CFG/DemandSet.hs b/src/Data/Macaw/CFG/DemandSet.hs index 4090457e..30a55c6f 100644 --- a/src/Data/Macaw/CFG/DemandSet.hs +++ b/src/Data/Macaw/CFG/DemandSet.hs @@ -90,6 +90,7 @@ addAssignmentDemands a = do addValueDemands :: Value arch ids tp -> DemandComp arch ids () addValueDemands v = do case v of + BoolValue{} -> pure () BVValue{} -> pure () RelocatableValue{} -> pure () AssignedValue a -> addAssignmentDemands a diff --git a/src/Data/Macaw/CFG/Rewriter.hs b/src/Data/Macaw/CFG/Rewriter.hs index 39adfed1..0c880bbc 100644 --- a/src/Data/Macaw/CFG/Rewriter.hs +++ b/src/Data/Macaw/CFG/Rewriter.hs @@ -118,13 +118,6 @@ addBinding srcId val = Rewriter $ do fail $ "Assignment " ++ show srcId ++ " is already bound." srcAssignedValues .= MapF.insert srcId val m -asApp :: Value arch tgt tp -> Maybe (App (Value arch tgt) tp) -asApp (AssignedValue a) = - case assignRhs a of - EvalApp app -> Just app - _ -> Nothing -asApp _ = Nothing - -- | Return true if values are identical identValue :: TestEquality (ArchReg arch) => Value arch tgt tp -> Value arch tgt tp -> Bool identValue (BVValue _ x) (BVValue _ y) = x == y @@ -134,16 +127,15 @@ identValue (Initial x) (Initial y) | Just Refl <- testEquality x y = True identValue _ _ = False boolLitValue :: Bool -> Value arch ids BoolType -boolLitValue True = BVValue n1 1 -boolLitValue False = BVValue n1 0 +boolLitValue = BoolValue rewriteApp :: App (Value arch tgt) tp -> Rewriter arch src tgt (Value arch tgt tp) rewriteApp app = do ctx <- Rewriter $ gets rwContext rwctxConstraints ctx $ do case app of - Mux _ (BVValue _ c) t f -> do - if c `testBit` 0 then + Mux _ (BoolValue c) t f -> do + if c then pure t else pure f @@ -155,21 +147,21 @@ rewriteApp app = do UExt (BVValue _ x) w -> do pure $ BVValue w x - AndApp (BVValue _ xc) y -> do - if xc `testBit` 0 then + AndApp (BoolValue xc) y -> do + if xc then pure y else - pure (BVValue n1 0) - AndApp x y@BVValue{} -> rewriteApp (AndApp y x) + pure (BoolValue False) + AndApp x y@BoolValue{} -> rewriteApp (AndApp y x) - OrApp (BVValue _ xc) y -> do - if xc `testBit` 0 then - pure (BVValue n1 1) + OrApp (BoolValue xc) y -> do + if xc then + pure (BoolValue True) else pure y - OrApp x y@BVValue{} -> rewriteApp (OrApp y x) - NotApp (BVValue _ xc) -> - pure $ boolLitValue (not (xc `testBit` 0)) + OrApp x y@BoolValue{} -> rewriteApp (OrApp y x) + NotApp (BoolValue b) -> + pure $ boolLitValue (not b) BVAdd _ x (BVValue _ 0) -> do pure x @@ -179,13 +171,13 @@ rewriteApp app = do BVAdd w (BVValue _ x) y -> do rewriteApp (BVAdd w y (BVValue w x)) -- (x + yc) + zc -> x + (yc + zc) - BVAdd w (asApp -> Just (BVAdd _ x (BVValue _ yc))) (BVValue _ zc) -> do + BVAdd w (valueAsApp -> Just (BVAdd _ x (BVValue _ yc))) (BVValue _ zc) -> do rewriteApp (BVAdd w x (BVValue w (toUnsigned w (yc + zc)))) -- (x - yc) + zc -> x + (zc - yc) - BVAdd w (asApp -> Just (BVSub _ x (BVValue _ yc))) (BVValue _ zc) -> do + BVAdd w (valueAsApp -> Just (BVSub _ x (BVValue _ yc))) (BVValue _ zc) -> do rewriteApp (BVAdd w x (BVValue w (toUnsigned w (zc - yc)))) -- (xc - y) + zc => (xc + zc) - y - BVAdd w (asApp -> Just (BVSub _ (BVValue _ xc) y)) (BVValue _ zc) -> do + BVAdd w (valueAsApp -> Just (BVSub _ (BVValue _ xc) y)) (BVValue _ zc) -> do rewriteApp (BVSub w (BVValue w (toUnsigned w (xc + zc))) y) -- x - yc = x + (negate yc) @@ -209,18 +201,16 @@ rewriteApp app = do BVTestBit (BVValue xw xc) (BVValue _ ic) | ic < min (natValue xw) (toInteger (maxBound :: Int)) -> do let v = xc `testBit` fromInteger ic pure $! boolLitValue v - BVTestBit x (BVValue _ 0) | Just Refl <- testEquality (typeWidth x) n1 -> do - pure x - BVTestBit (asApp -> Just (UExt x _)) i@(BVValue _ ic) -> do + BVTestBit (valueAsApp -> Just (UExt x _)) i@(BVValue _ ic) -> do let xw = typeWidth x if ic < natValue xw then rewriteApp (BVTestBit x i) else - pure (BVValue n1 0) - BVTestBit (asApp -> Just (BVXor _ x y@BVValue{})) i -> do + pure (BoolValue False) + BVTestBit (valueAsApp -> Just (BVXor _ x y@BVValue{})) i -> do xb <- rewriteApp (BVTestBit x i) yb <- rewriteApp (BVTestBit y i) - rewriteApp (BVXor n1 xb yb) + rewriteApp (XorApp xb yb) BVComplement w (BVValue _ x) -> do pure (BVValue w (toUnsigned w (complement x))) @@ -262,23 +252,35 @@ rewriteApp app = do let s = min y (natValue w) pure (BVValue w (toUnsigned w (toSigned w x `shiftR` fromInteger s))) + Eq (BoolValue x) (BoolValue y) -> do + pure $! boolLitValue (x == y) - BVEq (BVValue _ x) (BVValue _ y) -> do + Eq (BoolValue True) y -> do + pure $! y + Eq (BoolValue False) y -> do + rewriteApp $ NotApp y + + Eq x (BoolValue True) -> do + pure $! x + Eq x (BoolValue False) -> do + rewriteApp $ NotApp x + + Eq (BVValue _ x) (BVValue _ y) -> do pure $! boolLitValue (x == y) -- Move constant to right hand side. - BVEq x@BVValue{} y -> do - rewriteApp (BVEq y x) + Eq x@BVValue{} y -> do + rewriteApp (Eq y x) - BVEq (asApp -> Just (BVAdd w x (BVValue _ o))) (BVValue _ yc) -> do - rewriteApp (BVEq x (BVValue w (toUnsigned w (yc - o)))) + Eq (valueAsApp -> Just (BVAdd w x (BVValue _ o))) (BVValue _ yc) -> do + rewriteApp (Eq x (BVValue w (toUnsigned w (yc - o)))) - BVEq (asApp -> Just (UExt x _)) (BVValue _ yc) -> do + Eq (valueAsApp -> Just (UExt x _)) (BVValue _ yc) -> do let u = typeWidth x if yc > maxUnsigned u then - pure (BVValue n1 0) + pure (BoolValue False) else - rewriteApp (BVEq x (BVValue u (toUnsigned u yc))) + rewriteApp (Eq x (BVValue u (toUnsigned u yc))) _ -> evalRewrittenRhs (EvalApp app) @@ -299,6 +301,7 @@ rewriteAssignRhs rhs = rewriteValue :: Value arch src tp -> Rewriter arch src tgt (Value arch tgt tp) rewriteValue v = case v of + BoolValue b -> pure (BoolValue b) BVValue w i -> pure (BVValue w i) RelocatableValue w a -> pure (RelocatableValue w a) AssignedValue (Assignment aid _) -> do diff --git a/src/Data/Macaw/Fold.hs b/src/Data/Macaw/Fold.hs index d6ea5b91..dbf04d76 100644 --- a/src/Data/Macaw/Fold.hs +++ b/src/Data/Macaw/Fold.hs @@ -5,6 +5,7 @@ Maintainer : Joe Hendrix This module provides a function for folding over the subexpressions in a value without revisiting shared subterms. -} +{-# LANGUAGE DataKinds #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -73,6 +74,7 @@ foldValueCached litf rwf initf assignf = getStateMonadMonoid . go -> StateMonadMonoid (Map (Some (AssignId ids)) m) m go v = case v of + BoolValue b -> return (litf (knownNat :: NatRepr 1) (if b then 1 else 0)) BVValue sz i -> return $ litf sz i RelocatableValue w a -> pure $ rwf w a Initial r -> return $ initf r diff --git a/src/Data/Macaw/Memory.hs b/src/Data/Macaw/Memory.hs index 5efe17df..4af40091 100644 --- a/src/Data/Macaw/Memory.hs +++ b/src/Data/Macaw/Memory.hs @@ -14,6 +14,7 @@ n-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeOperators #-} {-# LANGUAGE UndecidableInstances #-} module Data.Macaw.Memory ( Memory @@ -190,7 +191,7 @@ instance Ord (MemWord w) where compare (MemWord x) (MemWord y) = compare x y -- | Typeclass for legal memory widths -class MemWidth w where +class (1 <= w) => MemWidth w where -- | @addrWidthMod w@ returns @2^(8 * addrSize w - 1)@. addrWidthMod :: p w -> Word64 diff --git a/src/Data/Macaw/Types.hs b/src/Data/Macaw/Types.hs index 0ac06032..d1d96513 100644 --- a/src/Data/Macaw/Types.hs +++ b/src/Data/Macaw/Types.hs @@ -73,8 +73,10 @@ data Type | X86_80Float -- | 128 bit binary IEE754 | QuadFloat - -- | 16 bit binary IEE754 + -- | 16 bit binary IEE754 | HalfFloat + -- | A Boolean value + | BoolType -- Return number of bytes in the type. type family TypeBytes (tp :: Type) :: Nat where @@ -101,31 +103,45 @@ type FloatType tp = BVType (8 * TypeBytes tp) type BVType = 'BVType -type BoolType = BVType 1 +type BoolType = 'BoolType -- | A runtime representation of @Type@ for case matching purposes. data TypeRepr (tp :: Type) where - BVTypeRepr :: !(NatRepr n) -> TypeRepr (BVType n) + BoolTypeRepr :: TypeRepr BoolType + BVTypeRepr :: (1 <= n) => !(NatRepr n) -> TypeRepr (BVType n) type_width :: TypeRepr (BVType n) -> NatRepr n type_width (BVTypeRepr n) = n instance TestEquality TypeRepr where + testEquality BoolTypeRepr BoolTypeRepr = do + return Refl testEquality (BVTypeRepr m) (BVTypeRepr n) = do Refl <- testEquality m n return Refl + testEquality _ _ = Nothing instance OrdF TypeRepr where + compareF BoolTypeRepr BoolTypeRepr = EQF + compareF BoolTypeRepr _ = LTF + compareF _ BoolTypeRepr = GTF compareF (BVTypeRepr m) (BVTypeRepr n) = do case compareF m n of LTF -> LTF EQF -> EQF GTF -> GTF +instance Show (TypeRepr tp) where + show BoolTypeRepr = "bool" + show (BVTypeRepr w) = "[" ++ show w ++ "]" + class KnownType tp where knownType :: TypeRepr tp -instance KnownNat n => KnownType (BVType n) where +instance KnownType BoolType where + knownType = BoolTypeRepr + +instance (KnownNat n, 1 <= n) => KnownType (BVType n) where knownType = BVTypeRepr knownNat ------------------------------------------------------------------------ @@ -185,11 +201,36 @@ floatInfoBytes fir = QuadFloatRepr -> knownNat X86_80FloatRepr -> knownNat +floatInfoBytesIsPos :: FloatInfoRepr flt -> LeqProof 1 (TypeBytes flt) +floatInfoBytesIsPos fir = + case fir of + HalfFloatRepr -> LeqProof + SingleFloatRepr -> LeqProof + DoubleFloatRepr -> LeqProof + QuadFloatRepr -> LeqProof + X86_80FloatRepr -> LeqProof + + floatInfoBits :: FloatInfoRepr flt -> NatRepr (8 * TypeBytes flt) floatInfoBits fir = natMultiply (knownNat :: NatRepr 8) (floatInfoBytes fir) floatTypeRepr :: FloatInfoRepr flt -> TypeRepr (BVType (8 * TypeBytes flt)) -floatTypeRepr fir = BVTypeRepr (floatInfoBits fir) +floatTypeRepr fir = + case fir of + HalfFloatRepr -> knownType + SingleFloatRepr -> knownType + DoubleFloatRepr -> knownType + QuadFloatRepr -> knownType + X86_80FloatRepr -> knownType + +floatInfoBitsIsPos :: FloatInfoRepr flt -> LeqProof 1 (8 * TypeBytes flt) +floatInfoBitsIsPos fir = + case fir of + HalfFloatRepr -> LeqProof + SingleFloatRepr -> LeqProof + DoubleFloatRepr -> LeqProof + QuadFloatRepr -> LeqProof + X86_80FloatRepr -> LeqProof ------------------------------------------------------------------------ -- From 876382fc0e8417c35892e76934a42515a7127b0b Mon Sep 17 00:00:00 2001 From: Joe Hendrix Date: Mon, 10 Jul 2017 12:30:40 -0700 Subject: [PATCH 3/7] Additional support for symbolic translator. --- macaw-symbolic/macaw-symbolic.cabal | 14 +- macaw-symbolic/src/Data/Macaw/Symbolic.hs | 19 +- macaw-symbolic/src/Data/Macaw/Symbolic/App.hs | 237 +++++++++++++++++- src/Data/Macaw/CFG/Core.hs | 7 + src/Data/Macaw/Memory.hs | 2 +- 5 files changed, 257 insertions(+), 22 deletions(-) diff --git a/macaw-symbolic/macaw-symbolic.cabal b/macaw-symbolic/macaw-symbolic.cabal index 798386e9..cd914ee5 100644 --- a/macaw-symbolic/macaw-symbolic.cabal +++ b/macaw-symbolic/macaw-symbolic.cabal @@ -7,12 +7,14 @@ cabal-version: >= 1.9.2 library build-depends: - base >= 4, - containers, - crucible, - macaw, - mtl, - parameterized-utils + base >= 4, + containers, + crucible, + lens, + macaw, + mtl, + parameterized-utils, + text hs-source-dirs: src diff --git a/macaw-symbolic/src/Data/Macaw/Symbolic.hs b/macaw-symbolic/src/Data/Macaw/Symbolic.hs index 0d6b3417..e63b4cbe 100644 --- a/macaw-symbolic/src/Data/Macaw/Symbolic.hs +++ b/macaw-symbolic/src/Data/Macaw/Symbolic.hs @@ -13,6 +13,7 @@ import qualified Data.Map.Strict as Map import qualified Data.Parameterized.Context as Ctx import Data.Parameterized.Ctx import qualified Data.Parameterized.Map as MapF +import qualified Data.Set as Set import Data.String import Data.Word import qualified Lang.Crucible.CFG.Core as C @@ -40,11 +41,18 @@ type MacawFunctionArgs arch = EmptyCtx ::> ArchRegStruct arch type MacawFunctionResult arch = ArchRegStruct arch translateBlock :: Map Word64 (CR.Label s) + -- ^ Map from block indices to the associated label. -> M.Block arch ids - -> CR.Block s (MacawFunctionResult arch) -translateBlock = undefined - ---data MacawRegClass (tp :: M.TypeRepr) = + -> Either String (CR.Block s (MacawFunctionResult arch)) +translateBlock idMap b = do + let idx = M.blockLabel b + lbl <- + case Map.lookup idx idMap of + Just lbl -> Right (CR.LabelID lbl) + Nothing -> Left $ "Internal: Could not find block with index " ++ show idx + let stmts = undefined + term = undefined + pure $ CR.mkBlock lbl Set.empty stmts term stepBlocks :: forall sym arch ids . IsSymInterface sym @@ -63,10 +71,11 @@ stepBlocks sym regTypes addr macawBlockMap = do -- Map block map to Crucible CFG let blockLabelMap :: Map Word64 (CR.Label ()) blockLabelMap = Map.fromList [ (w, CR.Label (fromIntegral w)) | w <- Map.keys macawBlockMap ] + let Right blks = traverse (translateBlock blockLabelMap) $ Map.elems macawBlockMap -- Create control flow graph let rg :: CR.CFG () (MacawFunctionArgs arch) (MacawFunctionResult arch) rg = CR.CFG { CR.cfgHandle = h - , CR.cfgBlocks = translateBlock blockLabelMap <$> Map.elems macawBlockMap + , CR.cfgBlocks = blks } cfg <- C.initialConfig 0 [] let ctx :: C.SimContext ReoptSimulatorState sym diff --git a/macaw-symbolic/src/Data/Macaw/Symbolic/App.hs b/macaw-symbolic/src/Data/Macaw/Symbolic/App.hs index e11a1568..4f728ac5 100644 --- a/macaw-symbolic/src/Data/Macaw/Symbolic/App.hs +++ b/macaw-symbolic/src/Data/Macaw/Symbolic/App.hs @@ -1,37 +1,254 @@ {- -} +{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE DataKinds #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RankNTypes #-} {-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE TypeOperators #-} {-# OPTIONS_GHC -Wwarn #-} module Data.Macaw.Symbolic.App where -import Control.Monad.Identity +import Control.Lens +import Control.Monad.ST +import Control.Monad.State.Strict import qualified Data.Macaw.CFG as M +import qualified Data.Macaw.Memory as M import qualified Data.Macaw.Types as M +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map +import Data.Parameterized.Classes +import qualified Data.Parameterized.Context as Ctx +import Data.Parameterized.Ctx +import Data.Parameterized.Map (MapF) +import qualified Data.Parameterized.Map as MapF +import Data.Parameterized.NatRepr +import Data.String +import Data.Text +import Data.Word import qualified Lang.Crucible.CFG.Expr as C import qualified Lang.Crucible.CFG.Reg as C +import qualified Lang.Crucible.FunctionHandle as C +import Lang.Crucible.ProgramLoc as C import qualified Lang.Crucible.Types as C type family ToCrucibleType (mtp :: M.Type) :: C.CrucibleType where ToCrucibleType (M.BVType w) = C.BVType w + ToCrucibleType M.BoolType = C.BoolType -type CrucGen = Identity +typeToCrucible :: M.TypeRepr tp -> C.TypeRepr (ToCrucibleType tp) +typeToCrucible tp = + case tp of + M.BoolTypeRepr -> C.BoolRepr + M.BVTypeRepr w -> C.BVRepr w -valueToCrucible :: M.Value arch ids tp -> CrucGen (C.Value s (ToCrucibleType tp)) -valueToCrucible = undefined +memReprToCrucible :: M.MemRepr tp -> C.TypeRepr (ToCrucibleType tp) +memReprToCrucible = typeToCrucible . M.typeRepr -valueToBool :: M.Value arch ids M.BoolType -> CrucGen (C.Value s C.BoolType) -valueToBool = undefined +-- | A Crucible value with a Macaw type. +data WrappedAtom s tp = WrappedAtom (C.Atom s (ToCrucibleType tp)) -crucibleValue :: C.App (C.Value s) ctp -> CrucGen (C.Value s ctp) -crucibleValue = undefined +type ArchConstraints arch + = ( M.MemWidth (M.ArchAddrWidth arch) + , OrdF (M.ArchReg arch) + ) -appToCrucible :: M.App (M.Value arch ids) tp -> CrucGen (C.Value s (ToCrucibleType tp)) +newtype SymbolicHandle f tp = SymbolicHandle (f (ToCrucibleType tp)) + +type ReadMemHandle arch = C.FnHandle (EmptyCtx ::> C.BVType (M.ArchAddrWidth arch)) + +-- | State used for generating blocks +data CrucGenState arch ids s + = CrucGenState + { archConstraints :: !(forall a . (ArchConstraints arch => a) -> a) + -- ^ Typeclass constraints for architecture + , archWidthRepr :: !(NatRepr (M.ArchAddrWidth arch)) + -- ^ Width of the architecture + , handleAlloc :: !(C.HandleAllocator s) + -- ^ Handle allocator + , binaryPath :: !Text + -- ^ Name of binary these blocks come from. + , codeAddr :: !Word64 + -- ^ Address of this code + , translateArchFn :: !(forall tp + . M.ArchFn arch ids tp + -> CrucGen arch ids s (C.Atom s (ToCrucibleType tp))) + , freshSymHandleMap :: !(MapF M.TypeRepr (SymbolicHandle (C.FnHandle EmptyCtx))) + -- ^ Map type reprs to associated handle for creating symbolic values. + , readMemHandleMap :: !(MapF M.MemRepr (SymbolicHandle (ReadMemHandle arch))) + , prevStmts :: ![C.Stmt s] + -- ^ List of states in reverse order + , valueCount :: !Int + -- ^ Counter used to get fresh indices for Crucible atoms. + , memSegmentMap :: !(Map M.SegmentIndex (C.Atom s (C.BVType (M.ArchAddrWidth arch)))) + -- ^ Map from segment indices to the Crucible value denoting the base. + , regValueMap :: !(MapF (M.ArchReg arch) (WrappedAtom s)) + -- ^ Maps Macaw register initial values in block to associated Crucible value. + , assignValueMap :: !(MapF (M.AssignId ids) (WrappedAtom s)) + -- ^ Map Macaw assign id to associated Crucible value. + } + +type CrucGen arch ids s = StateT (CrucGenState arch ids s) (ST s) + + +addStmt :: C.Stmt s -> CrucGen arch ids s () +addStmt stmt = seq stmt $ do + s <- get + put $! s { prevStmts = stmt : prevStmts s } + +freshValueIndex :: CrucGen arch ids s Int +freshValueIndex = do + s <- get + let cnt = valueCount s + put $! s { valueCount = cnt + 1 } + pure $! cnt + +-- | Evaluate the crucible app and return a reference to the result. +evalAtom :: C.AtomValue s ctp -> CrucGen arch ids s (C.Atom s ctp) +evalAtom av = do + fname <- gets binaryPath + addr <- gets codeAddr + let p = C.BinaryPos fname addr + i <- freshValueIndex + -- Make atom + let atom = C.Atom { C.atomPosition = p + , C.atomId = i + , C.atomSource = C.Assigned + , C.typeOfAtom = C.typeOfAtomValue av + } + addStmt $ C.DefineAtom atom av + pure $! atom + +-- | Evaluate the crucible app and return a reference to the result. +crucibleValue :: C.App (C.Atom s) ctp -> CrucGen arch ids s (C.Atom s ctp) +crucibleValue app = evalAtom (C.EvalApp app) + +appToCrucible :: M.App (M.Value arch ids) tp + -> CrucGen arch ids s (C.Atom s (ToCrucibleType tp)) appToCrucible app = case app of M.Mux w c t f -> do crucibleValue =<< - C.BVIte <$> valueToBool c + C.BVIte <$> valueToCrucible c <*> pure w <*> valueToCrucible t <*> valueToCrucible f + _ -> undefined + +valueToCrucible :: M.Value arch ids tp + -> CrucGen arch ids s (C.Atom s (ToCrucibleType tp)) +valueToCrucible v = do + cns <- gets archConstraints + cns $ do + case v of + M.BVValue w c -> do + crucibleValue (C.BVLit w c) + M.BoolValue b -> do + crucibleValue (C.BoolLit b) + M.RelocatableValue w addr -> do + let seg = M.addrSegment addr + case M.segmentBase seg of + Just base -> do + crucibleValue (C.BVLit w (toInteger base + toInteger (addr^.M.addrOffset))) + Nothing -> do + let idx = M.segmentIndex seg + segMap <- gets memSegmentMap + case Map.lookup idx segMap of + Just a -> pure a + Nothing -> fail $ "internal: No Crucible address associated with segment." + M.Initial r -> do + regmap <- gets regValueMap + case MapF.lookup r regmap of + Just (WrappedAtom a) -> pure a + Nothing -> fail $ "internal: Register is not bound." + M.AssignedValue asgn -> do + let idx = M.assignId asgn + amap <- gets assignValueMap + case MapF.lookup idx amap of + Just (WrappedAtom r) -> pure r + Nothing -> fail "internal: Assignment id is not bound." + +freshSymbolicHandle :: M.TypeRepr tp + -> CrucGen arch ids s (C.FnHandle EmptyCtx (ToCrucibleType tp)) +freshSymbolicHandle repr = do + hmap <- gets freshSymHandleMap + case MapF.lookup repr hmap of + Just (SymbolicHandle h) -> pure h + Nothing -> do + let fnm = case repr of + M.BoolTypeRepr -> "symbolicBool" + M.BVTypeRepr w -> fromString $ "symbolicBV" ++ show w + halloc <- gets handleAlloc + hndl <- lift $ C.mkHandle' halloc fnm Ctx.empty (typeToCrucible repr) + modify $ \s -> s { freshSymHandleMap = + MapF.insert repr (SymbolicHandle hndl) (freshSymHandleMap s) + } + pure $! hndl + +runCall :: C.FnHandle args ret + -> Ctx.Assignment (C.Atom s) args + -> C.TypeRepr ret + -> CrucGen arch ids s (C.Atom s ret) +runCall hndl args ret = do + hatom <- crucibleValue (C.HandleLit hndl) + evalAtom $ C.Call hatom args ret + +freshSymbolic :: M.TypeRepr tp -> CrucGen arch ids s (C.Atom s (ToCrucibleType tp)) +freshSymbolic repr = do + hndl <- freshSymbolicHandle repr + runCall hndl Ctx.empty (typeToCrucible repr) + +readMemHandle :: M.MemRepr tp + -> CrucGen arch ids s (ReadMemHandle arch (ToCrucibleType tp)) +readMemHandle repr = do + hmap <- gets readMemHandleMap + case MapF.lookup repr hmap of + Just (SymbolicHandle r) -> pure r + Nothing -> do + cns <- gets archConstraints + cns $ do + halloc <- gets handleAlloc + let fnm = case repr of + M.BVMemRepr w _ -> fromString $ "readWord" ++ show w + wrepr <- gets archWidthRepr + let argTypes = Ctx.empty Ctx.%> C.BVRepr wrepr + hndl <- lift $ C.mkHandle' halloc fnm argTypes (memReprToCrucible repr) + modify' $ \s -> + s { readMemHandleMap = MapF.insert repr (SymbolicHandle hndl) (readMemHandleMap s) } + pure hndl + +readMem :: M.ArchAddrValue arch ids + -> M.MemRepr tp + -> CrucGen arch ids s (C.Atom s (ToCrucibleType tp)) +readMem addr repr = do + hndl <- readMemHandle repr + caddr <- valueToCrucible addr + runCall hndl (Ctx.empty Ctx.%> caddr) (memReprToCrucible repr) + +assignRhsToCrucible :: M.AssignRhs arch ids tp + -> CrucGen arch ids s (C.Atom s (ToCrucibleType tp)) +assignRhsToCrucible rhs = + case rhs of + M.EvalApp app -> appToCrucible app + M.SetUndefined mrepr -> freshSymbolic mrepr + M.ReadMem addr repr -> readMem addr repr + M.EvalArchFn f _ -> do + fn <- gets translateArchFn + fn f + +addMacawStmt :: M.Stmt arch ids -> CrucGen arch ids s () +addMacawStmt stmt = do + case stmt of + M.AssignStmt asgn -> do + let idx = M.assignId asgn + a <- assignRhsToCrucible (M.assignRhs asgn) + modify' $ \s -> s { assignValueMap = MapF.insert idx (WrappedAtom a) (assignValueMap s) } + M.WriteMem addr repr val -> do + undefined addr repr val + M.PlaceHolderStmt vals msg -> do + undefined vals msg + M.Comment txt -> do + undefined txt + M.ExecArchStmt astmt -> do + undefined astmt diff --git a/src/Data/Macaw/CFG/Core.hs b/src/Data/Macaw/CFG/Core.hs index 7bb34d7a..c9e911c1 100644 --- a/src/Data/Macaw/CFG/Core.hs +++ b/src/Data/Macaw/CFG/Core.hs @@ -265,6 +265,13 @@ instance TestEquality MemRepr where else Nothing +instance OrdF MemRepr where + compareF (BVMemRepr xw xe) (BVMemRepr yw ye) = + case compareF xw yw of + LTF -> LTF + GTF -> GTF + EQF -> fromOrdering (compare xe ye) + instance HasRepr MemRepr TypeRepr where typeRepr (BVMemRepr w _) = let r = (natMultiply n8 w) diff --git a/src/Data/Macaw/Memory.hs b/src/Data/Macaw/Memory.hs index 4af40091..32ca264e 100644 --- a/src/Data/Macaw/Memory.hs +++ b/src/Data/Macaw/Memory.hs @@ -121,7 +121,7 @@ addrWidthNatRepr Addr64 = knownNat -- | Indicates whether bytes are stored in big or little endian representation. data Endianness = BigEndian | LittleEndian - deriving (Eq) + deriving (Eq, Ord) ------------------------------------------------------------------------ -- Utilities From 0e66a3dfeac00717641b7d33f8105b56b13ea378 Mon Sep 17 00:00:00 2001 From: Joe Hendrix Date: Mon, 10 Jul 2017 14:42:26 -0700 Subject: [PATCH 4/7] Convert macaw-symbolic to use CPS for CrucGen monad. --- macaw-symbolic/src/Data/Macaw/Symbolic.hs | 15 +- macaw-symbolic/src/Data/Macaw/Symbolic/App.hs | 206 ++++++++++++++---- 2 files changed, 174 insertions(+), 47 deletions(-) diff --git a/macaw-symbolic/src/Data/Macaw/Symbolic.hs b/macaw-symbolic/src/Data/Macaw/Symbolic.hs index e63b4cbe..989be04f 100644 --- a/macaw-symbolic/src/Data/Macaw/Symbolic.hs +++ b/macaw-symbolic/src/Data/Macaw/Symbolic.hs @@ -11,7 +11,6 @@ import Control.Monad.ST import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import qualified Data.Parameterized.Context as Ctx -import Data.Parameterized.Ctx import qualified Data.Parameterized.Map as MapF import qualified Data.Set as Set import Data.String @@ -31,15 +30,10 @@ import System.IO (stdout) import qualified Data.Macaw.CFG.Block as M +import Data.Macaw.Symbolic.App + data ReoptSimulatorState sym = ReoptSimulatorState -type family ArchRegContext (arch :: *) :: Ctx C.CrucibleType - -type ArchRegStruct (arch :: *) = C.StructType (ArchRegContext arch) - -type MacawFunctionArgs arch = EmptyCtx ::> ArchRegStruct arch -type MacawFunctionResult arch = ArchRegStruct arch - translateBlock :: Map Word64 (CR.Label s) -- ^ Map from block indices to the associated label. -> M.Block arch ids @@ -61,7 +55,10 @@ stepBlocks :: forall sym arch ids -> Word64 -> Map Word64 (M.Block arch ids) -- ^ Map from block indices to block - -> IO (C.ExecResult ReoptSimulatorState sym (C.RegEntry sym (C.StructType (ArchRegContext arch)))) + -> IO (C.ExecResult + ReoptSimulatorState + sym + (C.RegEntry sym (C.StructType (ArchRegContext arch)))) stepBlocks sym regTypes addr macawBlockMap = do let macawStructRepr = C.StructRepr regTypes halloc <- C.newHandleAllocator diff --git a/macaw-symbolic/src/Data/Macaw/Symbolic/App.hs b/macaw-symbolic/src/Data/Macaw/Symbolic/App.hs index 4f728ac5..842c093e 100644 --- a/macaw-symbolic/src/Data/Macaw/Symbolic/App.hs +++ b/macaw-symbolic/src/Data/Macaw/Symbolic/App.hs @@ -3,6 +3,7 @@ {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE TypeFamilies #-} @@ -24,8 +25,11 @@ import Data.Parameterized.Ctx import Data.Parameterized.Map (MapF) import qualified Data.Parameterized.Map as MapF import Data.Parameterized.NatRepr -import Data.String -import Data.Text +import qualified Data.Sequence as Seq +import qualified Data.Set as Set +import Data.String +import Data.Text (Text) +import qualified Data.Text as Text import Data.Word import qualified Lang.Crucible.CFG.Expr as C import qualified Lang.Crucible.CFG.Reg as C @@ -33,6 +37,13 @@ import qualified Lang.Crucible.FunctionHandle as C import Lang.Crucible.ProgramLoc as C import qualified Lang.Crucible.Types as C +type family ArchRegContext (arch :: *) :: Ctx C.CrucibleType + +type ArchRegStruct (arch :: *) = C.StructType (ArchRegContext arch) + +type MacawFunctionArgs arch = EmptyCtx ::> ArchRegStruct arch +type MacawFunctionResult arch = ArchRegStruct arch + type family ToCrucibleType (mtp :: M.Type) :: C.CrucibleType where ToCrucibleType (M.BVType w) = C.BVType w ToCrucibleType M.BoolType = C.BoolType @@ -56,7 +67,40 @@ type ArchConstraints arch newtype SymbolicHandle f tp = SymbolicHandle (f (ToCrucibleType tp)) -type ReadMemHandle arch = C.FnHandle (EmptyCtx ::> C.BVType (M.ArchAddrWidth arch)) +type ArchAddrCrucibleType arch = C.BVType (M.ArchAddrWidth arch) + +-- | Type +type ReadMemHandle arch = C.FnHandle (EmptyCtx ::> ArchAddrCrucibleType arch) + +type WriteMemHandle arch tp + = C.FnHandle (EmptyCtx ::> ArchAddrCrucibleType arch ::> tp) C.UnitType + +newtype WriteMemWrapper arch tp = WriteMemWrapper (WriteMemHandle arch (ToCrucibleType tp)) + +type FreshSymHandleMap = MapF M.TypeRepr (SymbolicHandle (C.FnHandle EmptyCtx)) + +type ReadMemHandleMap arch = MapF M.MemRepr (SymbolicHandle (ReadMemHandle arch)) +type WriteMemHandleMap arch = MapF M.MemRepr (WriteMemWrapper arch) + +-- | Structure for getitng information about what handles are used +data CrucGenHandles arch + = CrucGenHandles + { freshSymHandleMap :: !FreshSymHandleMap + -- ^ Map type reprs to associated handle for creating symbolic values. + , readMemHandleMap :: !(ReadMemHandleMap arch) + -- ^ Maps memory repr to symbolic handle for reading memory. + , writeMemHandleMap :: !(WriteMemHandleMap arch) + -- ^ Maps mem repr to function for writing to memory. + } + +freshSymHandleMapLens :: Simple Lens (CrucGenHandles arch) FreshSymHandleMap +freshSymHandleMapLens = lens freshSymHandleMap (\s v -> s { freshSymHandleMap = v}) + +readMemHandleMapLens :: Simple Lens (CrucGenHandles arch) (ReadMemHandleMap arch) +readMemHandleMapLens = lens readMemHandleMap (\s v -> s { readMemHandleMap = v}) + +writeMemHandleMapLens :: Simple Lens (CrucGenHandles arch) (WriteMemHandleMap arch) +writeMemHandleMapLens = lens writeMemHandleMap (\s v -> s { writeMemHandleMap = v}) -- | State used for generating blocks data CrucGenState arch ids s @@ -74,10 +118,14 @@ data CrucGenState arch ids s , translateArchFn :: !(forall tp . M.ArchFn arch ids tp -> CrucGen arch ids s (C.Atom s (ToCrucibleType tp))) - , freshSymHandleMap :: !(MapF M.TypeRepr (SymbolicHandle (C.FnHandle EmptyCtx))) - -- ^ Map type reprs to associated handle for creating symbolic values. - , readMemHandleMap :: !(MapF M.MemRepr (SymbolicHandle (ReadMemHandle arch))) - , prevStmts :: ![C.Stmt s] + -- ^ Function for translating an architecture specific function + , translateArchStmt :: !(M.ArchStmt arch ids -> CrucGen arch ids s ()) + -- ^ Function for translating an architecture specific statement. + , blockLabel :: (C.Label s) + -- ^ Label for this block we are tranlating + , handleMap :: !(CrucGenHandles arch) + -- ^ Handles found so far + , prevStmts :: ![C.Posd (C.Stmt s)] -- ^ List of states in reverse order , valueCount :: !Int -- ^ Counter used to get fresh indices for Crucible atoms. @@ -89,13 +137,66 @@ data CrucGenState arch ids s -- ^ Map Macaw assign id to associated Crucible value. } -type CrucGen arch ids s = StateT (CrucGenState arch ids s) (ST s) +handleMapLens :: Simple Lens (CrucGenState arch ids s) (CrucGenHandles arch) +handleMapLens = lens handleMap (\s v -> s { handleMap = v }) +data CrucGenResult arch ids s + = CrucGenResult + { resHandleMap :: !(CrucGenHandles arch) + , resBlock :: !(C.Block s (MacawFunctionResult arch)) + } + +newtype CrucGen arch ids s r + = CrucGen { unContGen + :: CrucGenState arch ids s + -> (CrucGenState arch ids s -> r -> ST s (CrucGenResult arch ids s)) + -> ST s (CrucGenResult arch ids s) + } + +instance Functor (CrucGen arch ids s) where + fmap f m = CrucGen $ \s0 cont -> unContGen m s0 $ \s1 v -> cont s1 (f v) + +instance Applicative (CrucGen arch ids s) where + pure r = CrucGen $ \s cont -> cont s r + mf <*> ma = CrucGen $ \s0 cont -> unContGen mf s0 + $ \s1 f -> unContGen ma s1 + $ \s2 a -> cont s2 (f a) + +instance Monad (CrucGen arch ids s) where + m >>= h = CrucGen $ \s0 cont -> unContGen m s0 $ \s1 r -> unContGen (h r) s1 cont + +instance MonadState (CrucGenState arch ids s) (CrucGen arch ids s) where + get = CrucGen $ \s cont -> cont s s + put s = CrucGen $ \_ cont -> cont s () + +liftST :: ST s r -> CrucGen arch ids s r +liftST m = CrucGen $ \s cont -> m >>= cont s + +getPos :: CrucGen arch ids s C.Position +getPos = undefined addStmt :: C.Stmt s -> CrucGen arch ids s () addStmt stmt = seq stmt $ do + p <- getPos s <- get - put $! s { prevStmts = stmt : prevStmts s } + let pstmt = C.Posd p stmt + seq pstmt $ do + put $! s { prevStmts = pstmt : prevStmts s } + +addTermStmt :: C.TermStmt s (MacawFunctionResult arch) + -> CrucGen arch ids s a +addTermStmt tstmt = do + termPos <- getPos + CrucGen $ \s _ -> do + let lbl = C.LabelID (blockLabel s) + let stmts = Seq.fromList (reverse (prevStmts s)) + let term = C.Posd termPos tstmt + let blk = C.mkBlock lbl Set.empty stmts term + let res = CrucGenResult + { resHandleMap = handleMap s + , resBlock = blk + } + pure $! res freshValueIndex :: CrucGen arch ids s Int freshValueIndex = do @@ -172,7 +273,7 @@ valueToCrucible v = do freshSymbolicHandle :: M.TypeRepr tp -> CrucGen arch ids s (C.FnHandle EmptyCtx (ToCrucibleType tp)) freshSymbolicHandle repr = do - hmap <- gets freshSymHandleMap + hmap <- use $ handleMapLens . freshSymHandleMapLens case MapF.lookup repr hmap of Just (SymbolicHandle h) -> pure h Nothing -> do @@ -180,12 +281,46 @@ freshSymbolicHandle repr = do M.BoolTypeRepr -> "symbolicBool" M.BVTypeRepr w -> fromString $ "symbolicBV" ++ show w halloc <- gets handleAlloc - hndl <- lift $ C.mkHandle' halloc fnm Ctx.empty (typeToCrucible repr) - modify $ \s -> s { freshSymHandleMap = - MapF.insert repr (SymbolicHandle hndl) (freshSymHandleMap s) - } + hndl <- liftST $ C.mkHandle' halloc fnm Ctx.empty (typeToCrucible repr) + handleMapLens . freshSymHandleMapLens %= MapF.insert repr (SymbolicHandle hndl) pure $! hndl +readMemHandle :: M.MemRepr tp + -> CrucGen arch ids s (ReadMemHandle arch (ToCrucibleType tp)) +readMemHandle repr = do + hmap <- use $ handleMapLens . readMemHandleMapLens + case MapF.lookup repr hmap of + Just (SymbolicHandle r) -> pure r + Nothing -> do + cns <- gets archConstraints + cns $ do + halloc <- gets handleAlloc + let fnm = case repr of + M.BVMemRepr w _ -> fromString $ "readWord" ++ show (8 * natValue w) + wrepr <- gets archWidthRepr + let argTypes = Ctx.empty Ctx.%> C.BVRepr wrepr + hndl <- liftST $ C.mkHandle' halloc fnm argTypes (memReprToCrucible repr) + handleMapLens . readMemHandleMapLens %= MapF.insert repr (SymbolicHandle hndl) + pure hndl + +writeMemHandle :: M.MemRepr tp + -> CrucGen arch ids s (WriteMemHandle arch (ToCrucibleType tp)) +writeMemHandle repr = do + hmap <- use $ handleMapLens . writeMemHandleMapLens + case MapF.lookup repr hmap of + Just (WriteMemWrapper r) -> pure r + Nothing -> do + cns <- gets archConstraints + cns $ do + halloc <- gets handleAlloc + let fnm = case repr of + M.BVMemRepr w _ -> fromString $ "readWord" ++ show (8 * natValue w) + wrepr <- gets archWidthRepr + let argTypes = Ctx.empty Ctx.%> C.BVRepr wrepr Ctx.%> memReprToCrucible repr + hndl <- liftST $ C.mkHandle' halloc fnm argTypes C.UnitRepr + handleMapLens . writeMemHandleMapLens %= MapF.insert repr (WriteMemWrapper hndl) + pure hndl + runCall :: C.FnHandle args ret -> Ctx.Assignment (C.Atom s) args -> C.TypeRepr ret @@ -199,24 +334,6 @@ freshSymbolic repr = do hndl <- freshSymbolicHandle repr runCall hndl Ctx.empty (typeToCrucible repr) -readMemHandle :: M.MemRepr tp - -> CrucGen arch ids s (ReadMemHandle arch (ToCrucibleType tp)) -readMemHandle repr = do - hmap <- gets readMemHandleMap - case MapF.lookup repr hmap of - Just (SymbolicHandle r) -> pure r - Nothing -> do - cns <- gets archConstraints - cns $ do - halloc <- gets handleAlloc - let fnm = case repr of - M.BVMemRepr w _ -> fromString $ "readWord" ++ show w - wrepr <- gets archWidthRepr - let argTypes = Ctx.empty Ctx.%> C.BVRepr wrepr - hndl <- lift $ C.mkHandle' halloc fnm argTypes (memReprToCrucible repr) - modify' $ \s -> - s { readMemHandleMap = MapF.insert repr (SymbolicHandle hndl) (readMemHandleMap s) } - pure hndl readMem :: M.ArchAddrValue arch ids -> M.MemRepr tp @@ -226,6 +343,17 @@ readMem addr repr = do caddr <- valueToCrucible addr runCall hndl (Ctx.empty Ctx.%> caddr) (memReprToCrucible repr) +writeMem :: M.ArchAddrValue arch ids + -> M.MemRepr tp + -> M.Value arch ids tp + -> CrucGen arch ids s () +writeMem addr repr val = do + hndl <- writeMemHandle repr + caddr <- valueToCrucible addr + cval <- valueToCrucible val + let args = Ctx.empty Ctx.%> caddr Ctx.%> cval + void $ runCall hndl args C.UnitRepr + assignRhsToCrucible :: M.AssignRhs arch ids tp -> CrucGen arch ids s (C.Atom s (ToCrucibleType tp)) assignRhsToCrucible rhs = @@ -245,10 +373,12 @@ addMacawStmt stmt = do a <- assignRhsToCrucible (M.assignRhs asgn) modify' $ \s -> s { assignValueMap = MapF.insert idx (WrappedAtom a) (assignValueMap s) } M.WriteMem addr repr val -> do - undefined addr repr val - M.PlaceHolderStmt vals msg -> do - undefined vals msg - M.Comment txt -> do - undefined txt + writeMem addr repr val + M.PlaceHolderStmt _vals msg -> do + cmsg <- crucibleValue (C.TextLit (Text.pack msg)) + addTermStmt (C.ErrorStmt cmsg) + M.Comment _txt -> do + pure () M.ExecArchStmt astmt -> do - undefined astmt + f <- gets translateArchStmt + f astmt From 66b41f1d567c781b9549c692f759117cabc41dd7 Mon Sep 17 00:00:00 2001 From: Jason Dagit Date: Wed, 12 Jul 2017 16:34:11 -0700 Subject: [PATCH 5/7] AbsState: Add missing casses to concretize and absValueSize --- src/Data/Macaw/AbsDomain/AbsState.hs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/Data/Macaw/AbsDomain/AbsState.hs b/src/Data/Macaw/AbsDomain/AbsState.hs index 1bf1294c..4ba4d39b 100644 --- a/src/Data/Macaw/AbsDomain/AbsState.hs +++ b/src/Data/Macaw/AbsDomain/AbsState.hs @@ -286,7 +286,11 @@ concretize (SubValue _ _) = Nothing -- we know nothing about _all_ values concretize (StridedInterval s) = debug DAbsInt ("Concretizing " ++ show (pretty s)) $ Just (Set.fromList (SI.toList s)) -concretize _ = Nothing +concretize (BoolConst b) = Just (Set.singleton (if b then 1 else 0)) +concretize SomeStackOffset{} = Nothing +concretize TopV = Nothing +concretize ReturnAddr = Nothing +concretize StackOffset{} = Nothing -- FIXME: make total, we would need to carry around tp absValueSize :: AbsValue w tp -> Maybe Integer @@ -294,9 +298,13 @@ absValueSize (FinSet s) = Just $ fromIntegral (Set.size s) absValueSize (CodePointers s b) = Just $ fromIntegral (Set.size s) + (if b then 1 else 0) absValueSize (StridedInterval s) = Just $ SI.size s absValueSize (StackOffset _ s) = Just $ fromIntegral (Set.size s) -absValueSize _ = Nothing +absValueSize (BoolConst _) = Just 1 +absValueSize SomeStackOffset{} = Nothing +absValueSize SubValue{} = Nothing +absValueSize TopV = Nothing +absValueSize ReturnAddr = Nothing --- | Return single value is the abstract value can only take on one value. +-- | Returns single value, if the abstract value can only take on one value. asConcreteSingleton :: MemWidth w => AbsValue w tp -> Maybe Integer asConcreteSingleton v = do sz <- absValueSize v From c8a5f7abf5a94fecb3f5348d5ee9b30619128dee Mon Sep 17 00:00:00 2001 From: Jason Dagit Date: Thu, 13 Jul 2017 17:17:28 -0700 Subject: [PATCH 6/7] add BoolConst case to joinAbsValue' --- src/Data/Macaw/AbsDomain/AbsState.hs | 4 +++- src/Data/Macaw/Discovery.hs | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Data/Macaw/AbsDomain/AbsState.hs b/src/Data/Macaw/AbsDomain/AbsState.hs index 4ba4d39b..7c51a2c2 100644 --- a/src/Data/Macaw/AbsDomain/AbsState.hs +++ b/src/Data/Macaw/AbsDomain/AbsState.hs @@ -449,7 +449,9 @@ joinAbsValue' (SomeStackOffset ax) (SomeStackOffset ay) | ax == ay = return $ No joinAbsValue' ReturnAddr ReturnAddr = return Nothing - +joinAbsValue' (BoolConst b1) (BoolConst b2) + | b1 == b2 = return Nothing + | otherwise = return $! Just TopV joinAbsValue' x y = do addWords (codePointerSet x) diff --git a/src/Data/Macaw/Discovery.hs b/src/Data/Macaw/Discovery.hs index 9154b9e5..f0547983 100644 --- a/src/Data/Macaw/Discovery.hs +++ b/src/Data/Macaw/Discovery.hs @@ -104,7 +104,7 @@ concretizeAbsCodePointers mem (FinSet s) = concretizeAbsCodePointers mem (CodePointers s _) = [ sa | a <- Set.toList s - , Just sa <- [absoluteAddrSegment mem (_addrOffset a)] + , Just sa <- [absoluteAddrSegment mem (addrValue a)] , Perm.isExecutable (segmentFlags (addrSegment sa)) ] -- FIXME: this is dangerous !! From 2eaa8233720003ea9e1d6715d122605db46347f7 Mon Sep 17 00:00:00 2001 From: Joe Hendrix Date: Mon, 17 Jul 2017 15:37:05 -0700 Subject: [PATCH 7/7] Change memory addresses. This changes the way memory addresses are represented to fix bugs that could arrise if one can jump between two segments. It replaces the SegmentAddr type with two types: * `MemAddr w` represents an address that is not necessarily valid. It could either be an absolute address or an address relative to some non-fixed segment. * `MemSegmentOff w` represents a legal offset into a memory segment. It must point to at least one valid byte of memory. --- macaw-symbolic/src/Data/Macaw/Symbolic/App.hs | 2 +- src/Data/Macaw/AbsDomain/AbsState.hs | 157 +++--- src/Data/Macaw/AbsDomain/Refine.hs | 9 +- src/Data/Macaw/Architecture/Info.hs | 17 +- src/Data/Macaw/CFG.hs | 2 +- src/Data/Macaw/CFG/Block.hs | 13 +- src/Data/Macaw/CFG/Core.hs | 81 +-- src/Data/Macaw/CFG/DemandSet.hs | 3 + src/Data/Macaw/CFG/Rewriter.hs | 9 +- src/Data/Macaw/Discovery.hs | 204 ++++---- src/Data/Macaw/Discovery/AbsEval.hs | 16 +- src/Data/Macaw/Discovery/State.hs | 164 +++--- src/Data/Macaw/Fold.hs | 4 +- src/Data/Macaw/Memory.hs | 489 ++++++++++-------- src/Data/Macaw/Memory/ElfLoader.hs | 41 +- 15 files changed, 637 insertions(+), 574 deletions(-) diff --git a/macaw-symbolic/src/Data/Macaw/Symbolic/App.hs b/macaw-symbolic/src/Data/Macaw/Symbolic/App.hs index 842c093e..a29afb3f 100644 --- a/macaw-symbolic/src/Data/Macaw/Symbolic/App.hs +++ b/macaw-symbolic/src/Data/Macaw/Symbolic/App.hs @@ -173,7 +173,7 @@ liftST :: ST s r -> CrucGen arch ids s r liftST m = CrucGen $ \s cont -> m >>= cont s getPos :: CrucGen arch ids s C.Position -getPos = undefined +getPos = C.BinaryPos <$> gets binaryPath <*> gets codeAddr addStmt :: C.Stmt s -> CrucGen arch ids s () addStmt stmt = seq stmt $ do diff --git a/src/Data/Macaw/AbsDomain/AbsState.hs b/src/Data/Macaw/AbsDomain/AbsState.hs index 1bf1294c..dc75484c 100644 --- a/src/Data/Macaw/AbsDomain/AbsState.hs +++ b/src/Data/Macaw/AbsDomain/AbsState.hs @@ -39,6 +39,7 @@ module Data.Macaw.AbsDomain.AbsState , codePointerSet , AbsDomain(..) , AbsProcessorState + , absMem , curAbsStack , absInitialRegs , indexBounds @@ -68,7 +69,7 @@ import Data.Int import Data.Map (Map) import qualified Data.Map.Strict as Map import Data.Maybe -import Data.Parameterized.Classes (EqF(..), OrdF(..), ShowF(..)) +import Data.Parameterized.Classes (EqF(..), ShowF(..)) import Data.Parameterized.Map (MapF) import qualified Data.Parameterized.Map as MapF import Data.Parameterized.NatRepr @@ -137,13 +138,15 @@ data AbsValue w (tp :: Type) -- ^ A Boolean constant | forall n . (tp ~ BVType n) => FinSet !(Set Integer) -- ^ Denotes that this value can take any one of the fixed set. - | (tp ~ BVType w) => CodePointers !(Set (SegmentedAddr w)) !Bool - -- ^ A possibly empty set of values that either point to a code segment. - -- This Boolean indicates whether this set contains the address 0. - | (tp ~ BVType w) => StackOffset !(SegmentedAddr w) !(Set Int64) + | (tp ~ BVType w) => CodePointers !(Set (MemSegmentOff w)) !Bool + -- ^ Represents that all values point to a bounded set of + -- addresses in an executable segment or the constant zero. The + -- set contains the possible addresses, and the Boolean indicates + -- whether this set contains the address 0. + | (tp ~ BVType w) => StackOffset !(MemAddr w) !(Set Int64) -- ^ Offset of stack from the beginning of the block at the given address. -- First argument is address of block. - | (tp ~ BVType w) => SomeStackOffset !(SegmentedAddr w) + | (tp ~ BVType w) => SomeStackOffset !(MemAddr w) -- ^ An offset to the stack at some offset. | forall n . (tp ~ BVType n) => StridedInterval !(SI.StridedInterval n) -- ^ A strided interval @@ -172,16 +175,15 @@ data SomeFinSet tp where -- | Given a segmented addr and flag indicating if zero is contained return the underlying -- integer set and the set of addresses that had no base. partitionAbsoluteAddrs :: MemWidth w - => Set (SegmentedAddr w) + => Set (MemSegmentOff w) -> Bool - -> (Set Integer, Set (SegmentedAddr w)) + -> (Set Integer, Set (MemSegmentOff w)) partitionAbsoluteAddrs addrSet b = foldl' f (s0, Set.empty) addrSet where s0 = if b then Set.singleton 0 else Set.empty f (intSet,badSet) addr = - case segmentBase (addrSegment addr) of - Just base -> seq intSet' $ (intSet', badSet) - where intSet' = Set.insert w intSet - w = toInteger base + toInteger (addr^.addrOffset) + case msegAddr addr of + Just addrVal -> seq intSet' $ (intSet', badSet) + where intSet' = Set.insert (toInteger addrVal) intSet Nothing -> seq badSet' $ (intSet, badSet') where badSet' = Set.insert addr badSet @@ -197,13 +199,11 @@ asFinSet _ (CodePointers s True) | Set.null s = IsFin (Set.singleton 0) asFinSet nm (CodePointers addrSet b) = go (Set.toList addrSet) $! s0 where s0 = if b then Set.singleton 0 else Set.empty - go :: [SegmentedAddr w] -> Set Integer -> SomeFinSet (BVType w) + go :: [MemSegmentOff w] -> Set Integer -> SomeFinSet (BVType w) go [] s = debug DAbsInt ("dropping Codeptr " ++ nm) $ IsFin s - go (a:r) s = - case segmentBase (addrSegment a) of - Just base -> go r $! s' - where v = toInteger base + toInteger (a^.addrOffset) - s' = Set.insert v s + go (seg_off: r) s = + case msegAddr seg_off of + Just addr -> go r $! Set.insert (toInteger addr) s Nothing -> NotFin asFinSet _ _ = NotFin @@ -214,7 +214,7 @@ asFinSet _ _ = NotFin -- | otherwise = debug DAbsInt ("dropping Codeptr " ++ nm) $ Just s -- asFinSet64 _ _ = Nothing -codePointerSet :: AbsValue w tp -> Set (SegmentedAddr w) +codePointerSet :: AbsValue w tp -> Set (MemSegmentOff w) codePointerSet (CodePointers s _) = s codePointerSet _ = Set.empty @@ -240,10 +240,10 @@ instance Eq (AbsValue w tp) where instance EqF (AbsValue w) where eqF = (==) -instance Show (AbsValue w tp) where +instance MemWidth w => Show (AbsValue w tp) where show = show . pretty -instance Pretty (AbsValue w tp) where +instance MemWidth w => Pretty (AbsValue w tp) where pretty (BoolConst b) = text (show b) pretty (FinSet s) = text "finset" <+> ppIntegerSet s pretty (CodePointers s b) = text "code" <+> ppSet (s0 ++ sd) @@ -277,9 +277,9 @@ ppIntegerSet s = ppSet (ppv <$> Set.toList s) concretize :: MemWidth w => AbsValue w tp -> Maybe (Set Integer) concretize (FinSet s) = Just s concretize (CodePointers s b) = Just $ Set.fromList $ - [ toInteger base + toInteger (addr^.addrOffset) - | addr <- Set.toList s - , base <- maybeToList (segmentBase (addrSegment addr)) + [ toInteger addr + | mseg <- Set.toList s + , addr <- maybeToList (msegAddr mseg) ] ++ (if b then [0] else []) concretize (SubValue _ _) = Nothing -- we know nothing about _all_ values @@ -331,9 +331,6 @@ isEmpty _ = False ------------------------------------------------------------------------------- -- Joining abstract values -ppSegAddrSet :: Set (SegmentedAddr w) -> Doc -ppSegAddrSet s = ppSet (text . show <$> Set.toList s) - -- | Join the old and new states and return the updated state iff -- the result is larger than the old state. -- This also returns any addresses that are discarded during joining. @@ -345,9 +342,9 @@ joinAbsValue x y | Set.null s = r | otherwise = debug DAbsInt ("dropping " ++ show dropped ++ "\n" ++ show x ++ "\n" ++ show y ++ "\n") r where (r,s) = runState (joinAbsValue' x y) Set.empty - dropped = ppSegAddrSet s + dropped = ppSet (text . show <$> Set.toList s) -addWords :: Set (SegmentedAddr w) -> State (Set (SegmentedAddr w)) () +addWords :: Set (MemSegmentOff w) -> State (Set (MemSegmentOff w)) () addWords s = modify $ Set.union s -- | Join the old and new states and return the updated state iff @@ -356,7 +353,7 @@ addWords s = modify $ Set.union s joinAbsValue' :: MemWidth w => AbsValue w tp -> AbsValue w tp - -> State (Set (SegmentedAddr w)) (Maybe (AbsValue w tp)) + -> State (Set (MemSegmentOff w)) (Maybe (AbsValue w tp)) joinAbsValue' TopV x = do addWords (codePointerSet x) return $! Nothing @@ -630,7 +627,7 @@ bvsub :: forall w u -> AbsValue w (BVType u) -> AbsValue w (BVType u) -> AbsValue w (BVType u) -bvsub _mem w (CodePointers s b) (FinSet t) +bvsub mem w (CodePointers s b) (FinSet t) -- If we just have zero. | Set.null s && b = FinSet (Set.map (toUnsigned w . negate) t) | all isJust vals && (not b || Set.singleton 0 == t) = @@ -639,15 +636,16 @@ bvsub _mem w (CodePointers s b) (FinSet t) -- TODO: Fix this. -- debug DAbsInt ("drooping " ++ show (ppIntegerSet s) ++ " " ++ show (ppIntegerSet t)) $ -- setL (stridedInterval . SI.fromFoldable w) FinSet (toInteger <$> vals) - where vals :: [Maybe (SegmentedAddr w)] + where vals :: [Maybe (MemSegmentOff w)] vals = do x <- Set.toList s y <- Set.toList t - if toInteger (x^.addrOffset) >= y then - return $ Just $ x & addrOffset -~ fromInteger y - else - return $ Nothing - + let z = relativeSegmentAddr x & incAddr (negate y) + case asSegmentOff mem z of + Just z_mseg | segmentFlags (msegSegment z_mseg) `Perm.hasPerm` Perm.execute -> + pure (Just z_mseg) + _ -> + pure Nothing bvsub _ _ xv@(CodePointers xs xb) (CodePointers ys yb) -- If we just have zero. | Set.null ys && yb = xv @@ -665,10 +663,7 @@ bvsub _ _ xv@(CodePointers xs xb) (CodePointers ys yb) vals = do x <- Set.toList xs y <- Set.toList ys - if segmentIndex (addrSegment x) == segmentIndex (addrSegment y) then - pure $ Just $ toInteger (x^.addrOffset) - toInteger (y^.addrOffset) - else do - pure $ Nothing + pure (relativeSegmentAddr x `diffAddr` relativeSegmentAddr y) bvsub _ w (FinSet s) (asFinSet "bvsub3" -> IsFin t) = setL (stridedInterval . SI.fromFoldable w) FinSet $ do x <- Set.toList s @@ -719,10 +714,12 @@ bvmul _ _ _ = TopV bvand :: MemWidth w => NatRepr u -> AbsValue w (BVType u) - -> AbsValue w (BVType u) -> AbsValue w (BVType u) -bvand _w (asFinSet "bvand" -> IsFin s) (asConcreteSingleton -> Just v) = FinSet (Set.map (flip (.&.) v) s) -bvand _w (asConcreteSingleton -> Just v) (asFinSet "bvand" -> IsFin s) = FinSet (Set.map ((.&.) v) s) + -> AbsValue w (BVType u) +bvand _w (asFinSet "bvand" -> IsFin s) (asConcreteSingleton -> Just v) = + FinSet (Set.map (flip (.&.) v) s) +bvand _w (asConcreteSingleton -> Just v) (asFinSet "bvand" -> IsFin s) = + FinSet (Set.map ((.&.) v) s) bvand _ _ _ = TopV -- FIXME: generalise @@ -738,12 +735,12 @@ bitop doOp _w (asConcreteSingleton -> Just v) (asFinSet "bvand" -> IsFin s) = FinSet (Set.map (doOp v) s) bitop _ _ _ _ = TopV -ppAbsValue :: AbsValue w tp -> Maybe Doc +ppAbsValue :: MemWidth w => AbsValue w tp -> Maybe Doc ppAbsValue TopV = Nothing ppAbsValue v = Just (pretty v) -- | Print a list of Docs vertically separated. -instance ShowF r => PrettyRegValue r (AbsValue w) where +instance (MemWidth w, ShowF r) => PrettyRegValue r (AbsValue w) where ppValueEq _ TopV = Nothing ppValueEq r v = Just (text (showF r) <+> text "=" <+> pretty v) @@ -757,23 +754,21 @@ absFalse = BoolConst False -- | This returns the smallest abstract value that contains the -- given unsigned integer. abstractSingleton :: MemWidth w - => NatRepr w + => Memory w -- ^ Width of code pointer - -> (MemWord w -> Maybe (SegmentedAddr w)) - -- ^ Predicate that recognizes if the given value is a code - -- pointer. -> NatRepr n -> Integer -> AbsValue w (BVType n) -abstractSingleton code_w is_code w i - | Just Refl <- testEquality w code_w +abstractSingleton mem w i + | Just Refl <- testEquality w (memWidth mem) , 0 <= i && i <= maxUnsigned w - , Just sa <- is_code (fromInteger i) = + , Just sa <- resolveAbsoluteAddr mem (fromInteger i) + , segmentFlags (msegSegment sa) `Perm.hasPerm` Perm.execute = CodePointers (Set.singleton sa) False | 0 <= i && i <= maxUnsigned w = FinSet (Set.singleton i) | otherwise = error $ "abstractSingleton given bad value: " ++ show i ++ " " ++ show w -concreteStackOffset :: SegmentedAddr w -> Integer -> AbsValue w (BVType w) +concreteStackOffset :: MemAddr w -> Integer -> AbsValue w (BVType w) concreteStackOffset a o = StackOffset a (Set.singleton (fromInteger o)) ------------------------------------------------------------------------ @@ -869,7 +864,7 @@ type AbsBlockStack w = Map Int64 (StackEntry w) absStackJoinD :: MemWidth w => AbsBlockStack w -> AbsBlockStack w - -> State (Bool,Set (SegmentedAddr w)) (AbsBlockStack w) + -> State (Bool,Set (MemSegmentOff w)) (AbsBlockStack w) absStackJoinD y x = do -- This attempts to merge information from the new state into the old state. let entryLeq (o, StackEntry y_tp y_v) = @@ -912,7 +907,7 @@ showSignedHex :: Integer -> ShowS showSignedHex x | x < 0 = showString "-0x" . showHex (negate x) | otherwise = showString "0x" . showHex x -ppAbsStack :: AbsBlockStack w -> Doc +ppAbsStack :: MemWidth w => AbsBlockStack w -> Doc ppAbsStack m = vcat (pp <$> Map.toDescList m) where pp (o,StackEntry _ v) = text (showSignedHex (toInteger o) " :=") <+> pretty v @@ -978,6 +973,7 @@ instance ( RegisterInfo r } instance ( ShowF r + , MemWidth (RegAddrWidth r) ) => Pretty (AbsBlockState r) where pretty s = text "registers:" <$$> @@ -990,12 +986,12 @@ instance ( ShowF r indent 2 (ppAbsStack stack) jmp_bnds = pretty (s^.initIndexBounds) -instance ShowF r => Show (AbsBlockState r) where +instance (ShowF r, MemWidth (RegAddrWidth r)) => Show (AbsBlockState r) where show = show . pretty -- | Update the block state to point to a specific IP address. setAbsIP :: RegisterInfo r - => SegmentedAddr (RegAddrWidth r) + => MemSegmentOff (RegAddrWidth r) -- ^ The address to set. -> AbsBlockState r -> AbsBlockState r @@ -1031,10 +1027,6 @@ data AbsProcessorState r ids , _indexBounds :: !(Jmp.IndexBounds r ids) } --- | The width of an address -absCodeWidth :: AbsProcessorState r ids -> NatRepr (RegAddrWidth r) -absCodeWidth = memWidth . absMem - absInitialRegs :: Simple Lens (AbsProcessorState r ids) (RegState r (AbsValue (RegAddrWidth r))) absInitialRegs = lens _absInitialRegs (\s v -> s { _absInitialRegs = v }) @@ -1050,12 +1042,8 @@ curAbsStack = lens _curAbsStack (\s v -> s { _curAbsStack = v }) indexBounds :: Simple Lens (AbsProcessorState r ids) (Jmp.IndexBounds r ids) indexBounds = lens _indexBounds (\s v -> s { _indexBounds = v }) -instance ShowF r - => Show (AbsProcessorState r ids) where - show = show . pretty --- FIXME -instance (ShowF r) +instance (ShowF r, MemWidth (RegAddrWidth r)) => Pretty (AbsProcessorState r ids) where pretty s = text "registers:" <$$> @@ -1066,6 +1054,10 @@ instance (ShowF r) | otherwise = text "stack:" <$$> indent 2 (ppAbsStack stack) +instance (ShowF r, MemWidth (RegAddrWidth r)) + => Show (AbsProcessorState r ids) where + show = show . pretty + initAbsProcessorState :: Memory (RegAddrWidth r) -- ^ Current state of memory in the processor. -- @@ -1111,31 +1103,30 @@ pruneStack = Map.filter f -- Transfer Value -- | Compute abstract value from value and current registers. -transferValue :: ( OrdF (ArchReg a) - , ShowF (ArchReg a) - , MemWidth (ArchAddrWidth a) +transferValue :: forall a ids tp + . ( RegisterInfo (ArchReg a) , HasCallStack ) => AbsProcessorState (ArchReg a) ids -> Value a ids tp -> ArchAbsValue a tp transferValue c v = do - let code_width = absCodeWidth c - is_code addr = do - sa <- absoluteAddrSegment (absMem c) addr - if segmentFlags (addrSegment sa) `Perm.hasPerm` Perm.execute then - Just $! sa - else - Nothing - amap = c^.absAssignments + let amap = c^.absAssignments aregs = c^.absInitialRegs case v of BoolValue b -> BoolConst b BVValue w i - | 0 <= i && i <= maxUnsigned w -> abstractSingleton code_width is_code w i + | 0 <= i && i <= maxUnsigned w -> abstractSingleton (absMem c) w i | otherwise -> error $ "transferValue given illegal value " ++ show (pretty v) -- TODO: Ensure a relocatable value is in code. - RelocatableValue _w i -> CodePointers (Set.singleton i) False + RelocatableValue _w i + | Just addr <- asSegmentOff (absMem c) i + , segmentFlags (msegSegment addr) `Perm.hasPerm` Perm.execute -> + CodePointers (Set.singleton addr) False + | Just addr <- asAbsoluteAddr i -> + FinSet $ Set.singleton $ toInteger addr + | otherwise -> + TopV -- Invariant: v is in m AssignedValue a -> fromMaybe (error $ "Missing assignment for " ++ show (assignId a)) @@ -1212,9 +1203,7 @@ finalAbsBlockState c s = ------------------------------------------------------------------------ -- Transfer functions -transferApp :: ( OrdF (ArchReg a) - , ShowF (ArchReg a) - , MemWidth (ArchAddrWidth a) +transferApp :: ( RegisterInfo (ArchReg a) , HasCallStack ) => AbsProcessorState (ArchReg a) ids @@ -1248,7 +1237,7 @@ absEvalCall :: forall r -- ^ Configuration -> AbsBlockState r -- ^ State before call - -> SegmentedAddr (RegAddrWidth r) + -> MemSegmentOff (RegAddrWidth r) -- ^ Address we are jumping to -> AbsBlockState r absEvalCall params ab0 addr = diff --git a/src/Data/Macaw/AbsDomain/Refine.hs b/src/Data/Macaw/AbsDomain/Refine.hs index a159cbb7..52fc6769 100644 --- a/src/Data/Macaw/AbsDomain/Refine.hs +++ b/src/Data/Macaw/AbsDomain/Refine.hs @@ -24,15 +24,14 @@ import Data.Parameterized.NatRepr import Data.Macaw.AbsDomain.AbsState import Data.Macaw.CFG -import Data.Macaw.Memory (MemWidth) import Data.Macaw.Types -- | Constraints needed for refinement on abstract states. type RefineConstraints arch - = ( OrdF (ArchReg arch) - , ShowF (ArchReg arch) - , HasRepr (ArchReg arch) TypeRepr - , MemWidth (ArchAddrWidth arch) + = ( RegisterInfo (ArchReg arch) +-- , ShowF (ArchReg arch) +-- , HasRepr (ArchReg arch) TypeRepr +-- , MemWidth (ArchAddrWidth arch) ) -- FIXME: if val \notin av then we should return bottom diff --git a/src/Data/Macaw/Architecture/Info.hs b/src/Data/Macaw/Architecture/Info.hs index 0d96681c..61a90698 100644 --- a/src/Data/Macaw/Architecture/Info.hs +++ b/src/Data/Macaw/Architecture/Info.hs @@ -36,10 +36,11 @@ import Data.Macaw.Memory -- block. type DisassembleFn arch = forall ids - . NonceGenerator (ST ids) ids - -> SegmentedAddr (ArchAddrWidth arch) - -- ^ Segment that we are disassembling from - -> MemWord (ArchAddrWidth arch) + . Memory (ArchAddrWidth arch) + -> NonceGenerator (ST ids) ids + -> ArchSegmentOff arch + -- ^ The offset to start reading from. + -> ArchAddrWord arch -- ^ Maximum offset for this to read from. -> AbsBlockState (ArchReg arch) -- ^ Abstract state associated with address that we are disassembling @@ -66,10 +67,12 @@ data ArchitectureInfo arch -- ^ Return true if architecture register should be preserved across a system call. , mkInitialAbsState :: !(Memory (RegAddrWidth (ArchReg arch)) - -> SegmentedAddr (RegAddrWidth (ArchReg arch)) + -> ArchSegmentOff arch -> AbsBlockState (ArchReg arch)) -- ^ Creates an abstract block state for representing the beginning of a -- function. + -- + -- The address is the entry point of the function. , absEvalArchFn :: !(forall ids tp . AbsProcessorState (ArchReg arch) ids -> ArchFn arch ids tp @@ -81,7 +84,7 @@ data ArchitectureInfo arch -> AbsProcessorState (ArchReg arch) ids) -- ^ Evaluates an architecture-specific statement , postCallAbsState :: AbsBlockState (ArchReg arch) - -> SegmentedAddr (RegAddrWidth (ArchReg arch)) + -> ArchSegmentOff arch -> AbsBlockState (ArchReg arch) -- ^ Update the abstract state after a function call returns , identifyReturn :: forall ids @@ -106,7 +109,7 @@ data ArchitectureInfo arch archPostSyscallAbsState :: ArchitectureInfo arch -- ^ Architecture information -> AbsBlockState (ArchReg arch) - -> SegmentedAddr (RegAddrWidth (ArchReg arch)) + -> ArchSegmentOff arch -> AbsBlockState (ArchReg arch) archPostSyscallAbsState info = withArchConstraints info $ AbsState.absEvalCall params where params = CallParams { postCallStackDelta = 0 diff --git a/src/Data/Macaw/CFG.hs b/src/Data/Macaw/CFG.hs index 00b76dc7..6a4b24a3 100644 --- a/src/Data/Macaw/CFG.hs +++ b/src/Data/Macaw/CFG.hs @@ -7,7 +7,7 @@ This exports the main CFG modules module Data.Macaw.CFG ( module Data.Macaw.CFG.Core , module Data.Macaw.CFG.App - , Data.Macaw.Memory.SegmentedAddr + , Data.Macaw.Memory.MemAddr ) where import Data.Macaw.CFG.App diff --git a/src/Data/Macaw/CFG/Block.hs b/src/Data/Macaw/CFG/Block.hs index 30248b9e..8276683d 100644 --- a/src/Data/Macaw/CFG/Block.hs +++ b/src/Data/Macaw/CFG/Block.hs @@ -8,10 +8,10 @@ types. {-# LANGUAGE FlexibleContexts #-} module Data.Macaw.CFG.Block ( Block(..) + , ppBlock , TermStmt(..) ) where -import Data.Parameterized.Classes import Data.Text (Text) import qualified Data.Text as Text import Data.Word @@ -44,8 +44,7 @@ data TermStmt arch ids -- occured and the error message recorded. | TranslateError !(RegState (ArchReg arch) (Value arch ids)) !Text -instance ( OrdF (ArchReg arch) - , ShowF (ArchReg arch) +instance ( RegisterInfo(ArchReg arch) ) => Pretty (TermStmt arch ids) where pretty (FetchAndExecute s) = @@ -75,7 +74,7 @@ data Block arch ids -- ^ The last statement in the block. } -instance ArchConstraints arch => Pretty (Block arch ids) where - pretty b = do - text (show (blockLabel b)) PP.<> text ":" <$$> - indent 2 (vcat (pretty <$> blockStmts b) <$$> pretty (blockTerm b)) +ppBlock :: ArchConstraints arch => Block arch ids -> Doc +ppBlock b = + text (show (blockLabel b)) PP.<> text ":" <$$> + indent 2 (vcat (ppStmt (text . show) <$> blockStmts b) <$$> pretty (blockTerm b)) diff --git a/src/Data/Macaw/CFG/Core.hs b/src/Data/Macaw/CFG/Core.hs index c9e911c1..3b36b4be 100644 --- a/src/Data/Macaw/CFG/Core.hs +++ b/src/Data/Macaw/CFG/Core.hs @@ -54,16 +54,15 @@ module Data.Macaw.CFG.Core , ppAssignId , ppLit , ppValue + , ppStmt , PrettyF(..) , ArchConstraints(..) , PrettyRegValue(..) -- * Architecture type families - , ArchAddr - , ArchSegmentedAddr , ArchFn , ArchReg , ArchStmt - , RegAddr + , RegAddrWord , RegAddrWidth -- * RegisterInfo , RegisterInfo(..) @@ -77,6 +76,9 @@ module Data.Macaw.CFG.Core -- ** Synonyms , ArchAddrWidth , ArchAddrValue + , ArchAddrWord + , ArchMemAddr + , ArchSegmentOff ) where import Control.Exception (assert) @@ -103,7 +105,7 @@ import Numeric (showHex) import Text.PrettyPrint.ANSI.Leijen as PP hiding ((<$>)) import Data.Macaw.CFG.App -import Data.Macaw.Memory (MemWord, MemWidth, SegmentedAddr(..), Endianness(..)) +import Data.Macaw.Memory (MemWord, MemWidth, MemAddr, MemSegmentOff, Endianness(..)) import Data.Macaw.Types -- Note: @@ -172,8 +174,8 @@ instance Show (AssignId ids tp) where -- | Width of register used to store addresses. type family RegAddrWidth (r :: Type -> *) :: Nat --- | The value used to store -type RegAddr r = MemWord (RegAddrWidth r) +-- | A word for the given architecture register type. +type RegAddrWord r = MemWord (RegAddrWidth r) -- | Type family for defining what a "register" is for this architecture. -- @@ -194,18 +196,19 @@ type family ArchFn (arch :: *) :: * -> Type -> * -- -- The second type parameter is the ids phantom type used to provide -- uniqueness of Nonce values that identify assignments. --- type family ArchStmt (arch :: *) :: * -> * --- | The type to use for addresses on the architecutre. -type ArchAddr arch = RegAddr (ArchReg arch) - --- | Number of bits in addreses for architecture. + -- | Number of bits in addreses for architecture. type ArchAddrWidth arch = RegAddrWidth (ArchReg arch) --- | A segmented addr for a given architecture. -type ArchSegmentedAddr arch = SegmentedAddr (ArchAddrWidth arch) +-- | A word for the given architecture bitwidth. +type ArchAddrWord arch = RegAddrWord (ArchReg arch) +-- | A segmented addr for a given architecture. +type ArchMemAddr arch = MemAddr (ArchAddrWidth arch) + +-- | A pair containing a segment and valid offset within the segment. +type ArchSegmentOff arch = MemSegmentOff (ArchAddrWidth arch) ------------------------------------------------------------------------ -- Value, Assignment, AssignRhs declarations. @@ -222,8 +225,8 @@ data Value arch ids tp | ( tp ~ BVType (ArchAddrWidth arch) , 1 <= ArchAddrWidth arch ) - => RelocatableValue !(NatRepr (ArchAddrWidth arch)) !(ArchSegmentedAddr arch) - -- ^ A given memory address. + => RelocatableValue !(NatRepr (ArchAddrWidth arch)) !(ArchMemAddr arch) + -- ^ A legal memory address | AssignedValue !(Assignment arch ids tp) -- ^ Value from an assignment statement. | Initial !(ArchReg arch tp) @@ -493,7 +496,7 @@ mkRegStateM f = RegState . MapF.fromList <$> traverse g archRegs where g (Some r) = MapF.Pair r <$> f r -- Create a pure register state -mkRegState :: RegisterInfo r -- AbsRegState r +mkRegState :: RegisterInfo r => (forall tp . r tp -> f tp) -> RegState r f mkRegState f = runIdentity (mkRegStateM (return . f)) @@ -530,20 +533,20 @@ ppLit w i | otherwise = error "ppLit given negative value" -- | Pretty print a value. -ppValue :: ShowF (ArchReg arch) => Prec -> Value arch ids tp -> Doc +ppValue :: RegisterInfo (ArchReg arch) => Prec -> Value arch ids tp -> Doc ppValue _ (BoolValue b) = text $ if b then "true" else "false" ppValue p (BVValue w i) = assert (i >= 0) $ parenIf (p > colonPrec) $ ppLit w i ppValue p (RelocatableValue _ a) = parenIf (p > plusPrec) $ text (show a) ppValue _ (AssignedValue a) = ppAssignId (assignId a) ppValue _ (Initial r) = text (showF r) PP.<> text "_0" -instance ShowF (ArchReg arch) => PrettyPrec (Value arch ids tp) where +instance RegisterInfo (ArchReg arch) => PrettyPrec (Value arch ids tp) where prettyPrec = ppValue -instance ShowF (ArchReg arch) => Pretty (Value arch ids tp) where +instance RegisterInfo (ArchReg arch) => Pretty (Value arch ids tp) where pretty = ppValue 0 -instance ShowF (ArchReg arch) => Show (Value arch ids tp) where +instance RegisterInfo (ArchReg arch) => Show (Value arch ids tp) where show = show . pretty class ( RegisterInfo (ArchReg arch) @@ -648,8 +651,7 @@ instance ( PrettyRegValue r f => Show (RegState r f) where show s = show (pretty s) -instance ( OrdF r - , ShowF r +instance ( RegisterInfo r , r ~ ArchReg arch ) => PrettyRegValue r (Value arch ids) where @@ -668,22 +670,39 @@ data Stmt arch ids -- ^ This denotes a write to memory, and consists of an address to write to, a `MemRepr` defining -- how the value should be stored in memory, and the value to be written. | PlaceHolderStmt !([Some (Value arch ids)]) !String + -- ^ A placeholder to indicate something the + -- architecture-specific backend does not support. + -- + -- Note that we plan to remove this eventually + | InstructionStart !(ArchAddrWord arch) !Text + -- ^ The start of an instruction + -- + -- The information includes the offset relative to the start of the block and the + -- disassembler output if available (or empty string if unavailable) | Comment !Text + -- ^ A user-level comment | ExecArchStmt !(ArchStmt arch ids) -- ^ Execute an architecture specific statement -instance ArchConstraints arch => Pretty (Stmt arch ids) where - pretty (AssignStmt a) = pretty a - pretty (WriteMem a _ rhs) = text "*" PP.<> prettyPrec 11 a <+> text ":=" <+> ppValue 0 rhs - pretty (PlaceHolderStmt vals name) = text ("PLACEHOLDER: " ++ name) - <+> parens (hcat $ punctuate comma - $ map (viewSome (ppValue 0)) vals) - pretty (Comment s) = text $ "# " ++ Text.unpack s - pretty (ExecArchStmt s) = prettyF s +ppStmt :: ArchConstraints arch + => (ArchAddrWord arch -> Doc) + -- ^ Function for pretty printing an offset + -> Stmt arch ids + -> Doc +ppStmt ppOff stmt = + case stmt of + AssignStmt a -> pretty a + WriteMem a _ rhs -> text "*" PP.<> prettyPrec 11 a <+> text ":=" <+> ppValue 0 rhs + PlaceHolderStmt vals name -> + text ("PLACEHOLDER: " ++ name) + <+> parens (hcat $ punctuate comma $ viewSome (ppValue 0) <$> vals) + InstructionStart off mnem -> text "#" <+> ppOff off <+> text (Text.unpack mnem) + Comment s -> text $ "# " ++ Text.unpack s + ExecArchStmt s -> prettyF s instance ArchConstraints arch => Show (Stmt arch ids) where - show = show . pretty + show = show . ppStmt (\w -> text (show w)) ------------------------------------------------------------------------ -- References diff --git a/src/Data/Macaw/CFG/DemandSet.hs b/src/Data/Macaw/CFG/DemandSet.hs index 30a55c6f..d26b1081 100644 --- a/src/Data/Macaw/CFG/DemandSet.hs +++ b/src/Data/Macaw/CFG/DemandSet.hs @@ -110,6 +110,8 @@ addStmtDemands s = addValueDemands val PlaceHolderStmt l _ -> mapM_ (\(Some v) -> addValueDemands v) l + InstructionStart{} -> + pure () Comment _ -> pure () ExecArchStmt astmt -> do @@ -126,5 +128,6 @@ stmtNeeded demandSet stmt = AssignStmt a -> Set.member (Some (assignId a)) demandSet WriteMem{} -> True PlaceHolderStmt{} -> True + InstructionStart{} -> True Comment{} -> True ExecArchStmt{} -> True diff --git a/src/Data/Macaw/CFG/Rewriter.hs b/src/Data/Macaw/CFG/Rewriter.hs index 0c880bbc..8efe9618 100644 --- a/src/Data/Macaw/CFG/Rewriter.hs +++ b/src/Data/Macaw/CFG/Rewriter.hs @@ -325,11 +325,14 @@ rewriteStmt s = WriteMem addr repr val -> do tgtAddr <- rewriteValue addr tgtVal <- rewriteValue val - appendRewrittenStmt (WriteMem tgtAddr repr tgtVal) + appendRewrittenStmt $ WriteMem tgtAddr repr tgtVal PlaceHolderStmt args nm -> do args' <- traverse (traverseSome rewriteValue) args - appendRewrittenStmt (PlaceHolderStmt args' nm) - Comment cmt -> appendRewrittenStmt (Comment cmt) + appendRewrittenStmt $ PlaceHolderStmt args' nm + Comment cmt -> + appendRewrittenStmt $ Comment cmt + InstructionStart off mnem -> + appendRewrittenStmt $ InstructionStart off mnem ExecArchStmt astmt -> do f <- Rewriter $ gets $ rwctxArchStmt . rwContext f astmt diff --git a/src/Data/Macaw/Discovery.hs b/src/Data/Macaw/Discovery.hs index 9154b9e5..90d7b215 100644 --- a/src/Data/Macaw/Discovery.hs +++ b/src/Data/Macaw/Discovery.hs @@ -48,7 +48,6 @@ module Data.Macaw.Discovery , State.symbolAddrs ) where -import Control.Exception import Control.Lens import Control.Monad.ST import Control.Monad.State.Strict @@ -94,18 +93,17 @@ import Data.Macaw.Types concretizeAbsCodePointers :: MemWidth w => Memory w -> AbsValue w (BVType w) - -> [SegmentedAddr w] + -> [MemSegmentOff w] concretizeAbsCodePointers mem (FinSet s) = [ sa | a <- Set.toList s - , Just sa <- [absoluteAddrSegment mem (fromInteger a)] - , Perm.isExecutable (segmentFlags (addrSegment sa)) + , sa <- maybeToList (resolveAbsoluteAddr mem (fromInteger a)) + , segmentFlags (msegSegment sa) `Perm.hasPerm` Perm.execute ] -concretizeAbsCodePointers mem (CodePointers s _) = +concretizeAbsCodePointers _ (CodePointers s _) = [ sa - | a <- Set.toList s - , Just sa <- [absoluteAddrSegment mem (_addrOffset a)] - , Perm.isExecutable (segmentFlags (addrSegment sa)) + | sa <- Set.toList s + , segmentFlags (msegSegment sa) `Perm.hasPerm` Perm.execute ] -- FIXME: this is dangerous !! concretizeAbsCodePointers _mem StridedInterval{} = [] -- FIXME: this case doesn't make sense @@ -114,8 +112,8 @@ concretizeAbsCodePointers _mem StridedInterval{} = [] -- FIXME: this case doesn' concretizeAbsCodePointers _mem _ = [] {- -printAddrBacktrace :: Map (ArchSegmentedAddr arch) (FoundAddr arch) - -> ArchSegmentedAddr arch +printAddrBacktrace :: Map (ArchMemAddr arch) (FoundAddr arch) + -> ArchMemAddr arch -> CodeAddrReason (ArchAddrWidth arch) -> [String] printAddrBacktrace found_map addr rsn = do @@ -131,14 +129,13 @@ printAddrBacktrace found_map addr rsn = do InitAddr -> [pp "Initial entry point."] CodePointerInMem src -> [pp ("Memory address " ++ show src ++ " contained code.")] SplitAt src -> pp ("Split from read of " ++ show src ++ ".") : prev src - InterProcedureJump src -> pp ("Reference from external address " ++ show src ++ ".") : prev src -- | Return true if this address was added because of the contents of a global address -- in memory initially. -- -- This heuristic is not very accurate, so we avoid printing errors when it leads to -- issues. -cameFromUnsoundReason :: Map (ArchSegmentedAddr arch) (FoundAddr arch) +cameFromUnsoundReason :: Map (ArchMemAddr arch) (FoundAddr arch) -> CodeAddrReason (ArchAddrWidth arch) -> Bool cameFromUnsoundReason found_map rsn = do @@ -153,7 +150,6 @@ cameFromUnsoundReason found_map rsn = do SplitAt src -> prev src InitAddr -> False CodePointerInMem{} -> True - InterProcedureJump src -> prev src -} ------------------------------------------------------------------------ @@ -221,14 +217,15 @@ elimDeadBlockStmts demandSet b = -- Memory utilities -- | Return true if range is entirely contained within a single read only segment.Q -rangeInReadonlySegment :: MemWidth w - => SegmentedAddr w -- ^ Start of range +rangeInReadonlySegment :: Memory w + -> MemAddr w -- ^ Start of range -> MemWord w -- ^ The size of the range -> Bool -rangeInReadonlySegment base size - = base^.addrOffset + size <= segmentSize seg - && Perm.isReadonly (segmentFlags seg) - where seg = addrSegment base +rangeInReadonlySegment mem base size = addrWidthClass (memAddrWidth mem) $ + case asSegmentOff mem base of + Just mseg -> size <= segmentSize (msegSegment mseg) - msegOffset mseg + && Perm.isReadonly (segmentFlags (msegSegment mseg)) + Nothing -> False ------------------------------------------------------------------------ -- DiscoveryState utilities @@ -238,7 +235,7 @@ markAddrAsFunction :: CodeAddrReason (ArchAddrWidth arch) -- ^ Information about why the code address was discovered -- -- Used for debugging - -> ArchSegmentedAddr arch + -> ArchSegmentOff arch -> DiscoveryState arch -> DiscoveryState arch markAddrAsFunction rsn addr s @@ -247,7 +244,7 @@ markAddrAsFunction rsn addr s -- | Mark a list of addresses as function entries with the same reason. markAddrsAsFunction :: CodeAddrReason (ArchAddrWidth arch) - -> [ArchSegmentedAddr arch] + -> [ArchSegmentOff arch] -> DiscoveryState arch -> DiscoveryState arch markAddrsAsFunction rsn addrs s0 = foldl' (\s a -> markAddrAsFunction rsn a s) s0 addrs @@ -270,17 +267,17 @@ data FoundAddr arch -- | The state for the function explroation monad data FunState arch ids = FunState { funNonceGen :: !(NonceGenerator (ST ids) ids) - , curFunAddr :: !(ArchSegmentedAddr arch) + , curFunAddr :: !(ArchSegmentOff arch) , _curFunCtx :: !(DiscoveryState arch) -- ^ Discovery state without this function - , _curFunBlocks :: !(Map (ArchSegmentedAddr arch) (ParsedBlock arch ids)) + , _curFunBlocks :: !(Map (ArchSegmentOff arch) (ParsedBlock arch ids)) -- ^ Maps an address to the blocks associated with that address. - , _foundAddrs :: !(Map (ArchSegmentedAddr arch) (FoundAddr arch)) + , _foundAddrs :: !(Map (ArchSegmentOff arch) (FoundAddr arch)) -- ^ Maps found address to the pre-state for that block. , _reverseEdges :: !(ReverseEdgeMap arch) -- ^ Maps each code address to the list of predecessors that -- affected its abstract state. - , _frontier :: !(Set (ArchSegmentedAddr arch)) + , _frontier :: !(Set (ArchSegmentOff arch)) -- ^ Addresses to explore next. } @@ -289,26 +286,24 @@ curFunCtx :: Simple Lens (FunState arch ids) (DiscoveryState arch) curFunCtx = lens _curFunCtx (\s v -> s { _curFunCtx = v }) -- | Information about current function we are working on -curFunBlocks :: Simple Lens (FunState arch ids) (Map (ArchSegmentedAddr arch) (ParsedBlock arch ids)) +curFunBlocks :: Simple Lens (FunState arch ids) (Map (ArchSegmentOff arch) (ParsedBlock arch ids)) curFunBlocks = lens _curFunBlocks (\s v -> s { _curFunBlocks = v }) -foundAddrs :: Simple Lens (FunState arch ids) (Map (ArchSegmentedAddr arch) (FoundAddr arch)) +foundAddrs :: Simple Lens (FunState arch ids) (Map (ArchSegmentOff arch) (FoundAddr arch)) foundAddrs = lens _foundAddrs (\s v -> s { _foundAddrs = v }) -type ReverseEdgeMap arch = Map (ArchSegmentedAddr arch) (Set (ArchSegmentedAddr arch)) +type ReverseEdgeMap arch = Map (ArchSegmentOff arch) (Set (ArchSegmentOff arch)) -- | Maps each code address to the list of predecessors that -- affected its abstract state. reverseEdges :: Simple Lens (FunState arch ids) (ReverseEdgeMap arch) reverseEdges = lens _reverseEdges (\s v -> s { _reverseEdges = v }) - - -- | Set of addresses to explore next. -- -- This is a map so that we can associate a reason why a code address -- was added to the frontier. -frontier :: Simple Lens (FunState arch ids) (Set (ArchSegmentedAddr arch)) +frontier :: Simple Lens (FunState arch ids) (Set (ArchSegmentOff arch)) frontier = lens _frontier (\s v -> s { _frontier = v }) ------------------------------------------------------------------------ @@ -330,11 +325,11 @@ liftST = FunM . lift -- | Joins in the new abstract state and returns the locations for -- which the new state is changed. -mergeIntraJump :: ArchSegmentedAddr arch +mergeIntraJump :: ArchSegmentOff arch -- ^ Source label that we are jumping from. -> AbsBlockState (ArchReg arch) -- ^ Block state after executing instructions. - -> ArchSegmentedAddr arch + -> ArchSegmentOff arch -- ^ Address we are trying to reach. -> FunM arch ids () mergeIntraJump src ab tgt = do @@ -373,15 +368,16 @@ mergeIntraJump src ab tgt = do matchJumpTable :: MemWidth (ArchAddrWidth arch) => Memory (ArchAddrWidth arch) -> BVValue arch ids (ArchAddrWidth arch) -- ^ Memory address that IP is read from. - -> Maybe (ArchSegmentedAddr arch, BVValue arch ids (ArchAddrWidth arch)) + -> Maybe (ArchMemAddr arch, BVValue arch ids (ArchAddrWidth arch)) matchJumpTable mem read_addr -- Turn the read address into base + offset. | Just (BVAdd _ offset base_val) <- valueAsApp read_addr - , Just base <- asLiteralAddr mem base_val + , Just base <- asLiteralAddr base_val -- Turn the offset into a multiple by an index. , Just (BVMul _ (BVValue _ mul) jump_index) <- valueAsApp offset , mul == toInteger (addrSize (memAddrWidth mem)) - , Perm.isReadonly (segmentFlags (addrSegment base)) = do + , Just mseg <- asSegmentOff mem base + , Perm.isReadonly (segmentFlags (msegSegment mseg)) = do Just (base, jump_index) matchJumpTable _ _ = Nothing @@ -409,15 +405,16 @@ showJumpTableBoundsError err = -- table. getJumpTableBounds :: ArchitectureInfo a -> AbsProcessorState (ArchReg a) ids -- ^ Current processor registers. - -> ArchSegmentedAddr a -- ^ Base + -> ArchMemAddr a -- ^ Base -> BVValue a ids (ArchAddrWidth a) -- ^ Index in jump table - -> Either (JumpTableBoundsError a ids) (ArchAddr a) + -> Either (JumpTableBoundsError a ids) (ArchAddrWord a) -- ^ One past last index in jump table or nothing getJumpTableBounds info regs base jump_index = withArchConstraints info $ case transferValue regs jump_index of StridedInterval (SI.StridedInterval _ index_base index_range index_stride) -> do + let mem = absMem regs let index_end = index_base + (index_range + 1) * index_stride - if rangeInReadonlySegment base (jumpTableEntrySize info * fromInteger index_end) then + if rangeInReadonlySegment mem base (jumpTableEntrySize info * fromInteger index_end) then case Jmp.unsignedUpperBound (regs^.indexBounds) jump_index of Right (Jmp.IntegerUpperBound bnd) | bnd == index_range -> Right $! fromInteger index_end Right bnd -> Left (UpperBoundMismatch bnd index_range) @@ -430,18 +427,6 @@ getJumpTableBounds info regs base jump_index = withArchConstraints info $ ------------------------------------------------------------------------ -- -tryLookupBlock :: String - -> ArchSegmentedAddr arch - -> Map Word64 (Block arch ids) - -> Word64 - -> Block arch ids -tryLookupBlock ctx base block_map idx = - case Map.lookup idx block_map of - Nothing -> - error $ "internal error: tryLookupBlock " ++ ctx ++ " " ++ show base - ++ " given invalid index " ++ show idx - Just b -> b - refineProcStateBounds :: ( OrdF (ArchReg arch) , HasRepr (ArchReg arch) TypeRepr ) @@ -458,20 +443,20 @@ refineProcStateBounds v isTrue ps = -- ParseState data ParseState arch ids = - ParseState { _writtenCodeAddrs :: ![ArchSegmentedAddr arch] + ParseState { _writtenCodeAddrs :: ![ArchSegmentOff arch] -- ^ Addresses marked executable that were written to memory. , _intraJumpTargets :: - ![(ArchSegmentedAddr arch, AbsBlockState (ArchReg arch))] - , _newFunctionAddrs :: ![ArchSegmentedAddr arch] + ![(ArchSegmentOff arch, AbsBlockState (ArchReg arch))] + , _newFunctionAddrs :: ![ArchSegmentOff arch] } -writtenCodeAddrs :: Simple Lens (ParseState arch ids) [ArchSegmentedAddr arch] +writtenCodeAddrs :: Simple Lens (ParseState arch ids) [ArchSegmentOff arch] writtenCodeAddrs = lens _writtenCodeAddrs (\s v -> s { _writtenCodeAddrs = v }) -intraJumpTargets :: Simple Lens (ParseState arch ids) [(ArchSegmentedAddr arch, AbsBlockState (ArchReg arch))] +intraJumpTargets :: Simple Lens (ParseState arch ids) [(ArchSegmentOff arch, AbsBlockState (ArchReg arch))] intraJumpTargets = lens _intraJumpTargets (\s v -> s { _intraJumpTargets = v }) -newFunctionAddrs :: Simple Lens (ParseState arch ids) [ArchSegmentedAddr arch] +newFunctionAddrs :: Simple Lens (ParseState arch ids) [ArchSegmentOff arch] newFunctionAddrs = lens _newFunctionAddrs (\s v -> s { _newFunctionAddrs = v }) @@ -500,7 +485,7 @@ identifyCall :: ( RegConstraint (ArchReg a) => Memory (ArchAddrWidth a) -> [Stmt a ids] -> RegState (ArchReg a) (Value a ids) - -> Maybe (Seq (Stmt a ids), ArchSegmentedAddr a) + -> Maybe (Seq (Stmt a ids), ArchSegmentOff a) identifyCall mem stmts0 s = go (Seq.fromList stmts0) Seq.empty where -- Get value of stack pointer next_sp = s^.boundValue sp_reg @@ -516,10 +501,11 @@ identifyCall mem stmts0 s = go (Seq.fromList stmts0) Seq.empty -- Check this is the right length. , Just Refl <- testEquality (typeRepr next_sp) (typeRepr val) -- Check if value is a valid literal address - , Just val_a <- asLiteralAddr mem val + , Just val_a <- asLiteralAddr val -- Check if segment of address is marked as executable. - , Perm.isExecutable (segmentFlags (addrSegment val_a)) -> - Just (prev Seq.>< after, val_a) + , Just ret_addr <- asSegmentOff mem val_a + , segmentFlags (msegSegment ret_addr) `Perm.hasPerm` Perm.execute -> + Just (prev Seq.>< after, ret_addr) -- Stop if we hit any architecture specific instructions prior to -- identifying return address since they may have side effects. | ExecArchStmt _ <- stmt -> Nothing @@ -531,9 +517,9 @@ identifyCall mem stmts0 s = go (Seq.fromList stmts0) Seq.empty data ParseContext arch ids = ParseContext { pctxMemory :: !(Memory (ArchAddrWidth arch)) , pctxArchInfo :: !(ArchitectureInfo arch) - , pctxFunAddr :: !(ArchSegmentedAddr arch) + , pctxFunAddr :: !(ArchSegmentOff arch) -- ^ Address of function this block is being parsed as - , pctxAddr :: !(ArchSegmentedAddr arch) + , pctxAddr :: !(ArchSegmentOff arch) -- ^ Address of the current block , pctxBlockMap :: !(Map Word64 (Block arch ids)) } @@ -567,7 +553,7 @@ parseFetchAndExecute ctx lbl_idx stmts regs s' = do -- The last statement was a call. -- Note that in some cases the call is known not to return, and thus -- this code will never jump to the return value. - _ | Just (prev_stmts, ret) <- identifyCall mem stmts s' -> do + _ | Just (prev_stmts, ret) <- identifyCall mem stmts s' -> do mapM_ (recordWriteStmt arch_info mem absProcState') prev_stmts let abst = finalAbsBlockState absProcState' s' seq abst $ do @@ -596,17 +582,18 @@ parseFetchAndExecute ctx lbl_idx stmts regs s' = do -- -- Note, we disallow jumps back to function entry point thus forcing them to be treated -- as tail calls or unclassified if the stack has changed size. - | Just tgt_addr <- asLiteralAddr mem (s'^.boundValue ip_reg) - , tgt_addr /= pctxFunAddr ctx -> do - assert (segmentFlags (addrSegment tgt_addr) `Perm.hasPerm` Perm.execute) $ do + | Just tgt_addr <- asLiteralAddr (s'^.boundValue ip_reg) + , Just tgt_mseg <- asSegmentOff mem tgt_addr + , segmentFlags (msegSegment tgt_mseg) `Perm.hasPerm` Perm.execute + , tgt_mseg /= pctxFunAddr ctx -> do mapM_ (recordWriteStmt arch_info mem absProcState') stmts -- Merge block state and add intra jump target. let abst = finalAbsBlockState absProcState' s' - let abst' = abst & setAbsIP tgt_addr - intraJumpTargets %= ((tgt_addr, abst'):) + let abst' = abst & setAbsIP tgt_mseg + intraJumpTargets %= ((tgt_mseg, abst'):) pure StatementList { stmtsIdent = lbl_idx , stmtsNonterm = stmts - , stmtsTerm = ParsedJump s' tgt_addr + , stmtsTerm = ParsedJump s' tgt_mseg , stmtsAbsState = absProcState' } -- Block ends with what looks like a jump table. @@ -635,24 +622,25 @@ parseFetchAndExecute ctx lbl_idx stmts regs s' = do -- If the current index can be interpreted as a intra-procedural jump, -- then it will add that to the current procedure. -- This returns the last address read. - let resolveJump :: [ArchSegmentedAddr arch] + let resolveJump :: [ArchSegmentOff arch] -- /\ Addresses in jump table in reverse order - -> ArchAddr arch + -> ArchAddrWord arch -- /\ Current index - -> State (ParseState arch ids) [ArchSegmentedAddr arch] + -> State (ParseState arch ids) [ArchSegmentOff arch] resolveJump prev idx | idx == read_end = do -- Stop jump table when we have reached computed bounds. return (reverse prev) resolveJump prev idx = do - let read_addr = base & addrOffset +~ 8 * idx + let read_addr = base & incAddr (toInteger (8 * idx)) case readAddr mem (archEndianness arch_info) read_addr of Right tgt_addr - | Perm.isReadonly (segmentFlags (addrSegment read_addr)) -> do - let flags = segmentFlags (addrSegment tgt_addr) - assert (flags `Perm.hasPerm` Perm.execute) $ do - let abst' = abst & setAbsIP tgt_addr - intraJumpTargets %= ((tgt_addr, abst'):) - resolveJump (tgt_addr:prev) (idx+1) + | Just read_mseg <- asSegmentOff mem read_addr + , Perm.isReadonly (segmentFlags (msegSegment read_mseg)) + , Just tgt_mseg <- asSegmentOff mem tgt_addr + , Perm.isExecutable (segmentFlags (msegSegment tgt_mseg)) -> do + let abst' = abst & setAbsIP tgt_mseg + intraJumpTargets %= ((tgt_mseg, abst'):) + resolveJump (tgt_mseg:prev) (idx+1) _ -> do debug DCFG ("Stop jump table: " ++ show idx ++ " " ++ show read_end) $ do return (reverse prev) @@ -707,7 +695,6 @@ parseBlock ctx b regs = do let mem = pctxMemory ctx let arch_info = pctxArchInfo ctx withArchConstraints arch_info $ do - let src = pctxAddr ctx let idx = blockLabel b let block_map = pctxBlockMap ctx -- FIXME: we should propagate c back to the initial block, not just b @@ -716,9 +703,9 @@ parseBlock ctx b regs = do Branch c lb rb -> do mapM_ (recordWriteStmt arch_info mem absProcState') (blockStmts b) - let l = tryLookupBlock "left branch" src block_map lb + let Just l = Map.lookup lb block_map let l_regs = refineProcStateBounds c True $ refineProcState c absTrue absProcState' - let r = tryLookupBlock "right branch" src block_map rb + let Just r = Map.lookup rb block_map let r_regs = refineProcStateBounds c False $ refineProcState c absFalse absProcState' let l_regs' = absEvalStmts arch_info l_regs (blockStmts b) @@ -764,11 +751,11 @@ parseBlock ctx b regs = do -- | This evalutes the statements in a block to expand the information known -- about control flow targets of this block. -transferBlocks :: ArchSegmentedAddr arch +transferBlocks :: ArchSegmentOff arch -- ^ Address of theze blocks -> FoundAddr arch -- ^ State leading to explore block - -> ArchAddr arch + -> ArchAddrWord arch -- ^ Size of the region these blocks cover. -> Map Word64 (Block arch ids) -- ^ Map from labelIndex to associated block @@ -805,24 +792,27 @@ transferBlocks src finfo sz block_map = mapM_ (\(addr, abs_state) -> mergeIntraJump src abs_state addr) (ps^.intraJumpTargets) -transfer :: ArchSegmentedAddr arch +transfer :: ArchSegmentOff arch -> FunM arch ids () transfer addr = do - mfinfo <- use $ foundAddrs . at addr - let finfo = fromMaybe (error $ "getBlock called on unfound address " ++ show addr ++ ".") $ - mfinfo ainfo <- uses curFunCtx archInfo withArchConstraints ainfo $ do + mfinfo <- use $ foundAddrs . at addr + let finfo = fromMaybe (error $ "transfer called on unfound address " ++ show addr ++ ".") $ + mfinfo + mem <- uses curFunCtx memory nonceGen <- gets funNonceGen prev_block_map <- use $ curFunBlocks -- Get maximum number of bytes to disassemble + let seg = msegSegment addr + off = msegOffset addr let max_size = case Map.lookupGT addr prev_block_map of - Just (next,_) | addrSegment next == addrSegment addr -> next^.addrOffset - addr^.addrOffset - _ -> segmentSize (addrSegment addr) - addr^.addrOffset + Just (next,_) | Just o <- diffSegmentOff next addr -> fromInteger o + _ -> segmentSize seg - off let ab = foundAbstractState finfo (bs0, sz, maybeError) <- - liftST $ disassembleFn ainfo nonceGen addr max_size ab + liftST $ disassembleFn ainfo mem nonceGen addr max_size ab let ctx = RewriteContext { rwctxNonceGen = nonceGen , rwctxArchFn = rewriteArchFn ainfo @@ -842,7 +832,7 @@ transfer addr = do let -- TODO: Fix this to work with segmented memory w = addrWidthNatRepr (archAddrWidth ainfo) errState = mkRegState Initial - & boundValue ip_reg .~ RelocatableValue w addr + & boundValue ip_reg .~ RelocatableValue w (relativeSegmentAddr addr) errMsg = Text.pack $ fromMaybe "Unknown error" maybeError errBlock = Block { blockLabel = 0 , blockStmts = [] @@ -873,7 +863,7 @@ analyzeBlocks = do -- -- This returns the updated state and the discovered control flow -- graph for this function. -analyzeFunction :: ArchSegmentedAddr arch +analyzeFunction :: ArchSegmentOff arch -- ^ The address to explore -> CodeAddrReason (ArchAddrWidth arch) -- ^ Reason to provide for why we are analyzing this function @@ -883,12 +873,13 @@ analyzeFunction :: ArchSegmentedAddr arch -> DiscoveryState arch -- ^ The current binary information. -> (DiscoveryState arch, Some (DiscoveryFunInfo arch)) -analyzeFunction addr rsn s = +analyzeFunction addr rsn s = do case Map.lookup addr (s^.funInfo) of Just finfo -> (s, finfo) Nothing -> do - withGlobalSTNonceGenerator $ \gen -> do let info = archInfo s + withArchConstraints info $ do + withGlobalSTNonceGenerator $ \gen -> do let mem = memory s let faddr = FoundAddr { foundReason = rsn @@ -926,27 +917,26 @@ analyzeDiscoveredFunctions info = analyzeDiscoveredFunctions $! fst (analyzeFunction addr rsn info) -- | This returns true if the address is writable and value is executable. -isDataCodePointer :: SegmentedAddr w -> SegmentedAddr w -> Bool +isDataCodePointer :: MemSegmentOff w -> MemSegmentOff w -> Bool isDataCodePointer a v - = segmentFlags (addrSegment a) `Perm.hasPerm` Perm.write - && segmentFlags (addrSegment v) `Perm.hasPerm` Perm.execute + = segmentFlags (msegSegment a) `Perm.hasPerm` Perm.write + && segmentFlags (msegSegment v) `Perm.hasPerm` Perm.execute - -addMemCodePointer :: (ArchSegmentedAddr arch, ArchSegmentedAddr arch) +addMemCodePointer :: (ArchSegmentOff arch, ArchSegmentOff arch) -> DiscoveryState arch -> DiscoveryState arch addMemCodePointer (src,val) = markAddrAsFunction (CodePointerInMem src) val -exploreMemPointers :: [(ArchSegmentedAddr arch, ArchSegmentedAddr arch)] +exploreMemPointers :: [(ArchSegmentOff arch, ArchSegmentOff arch)] -- ^ List of addresses and value pairs to use for -- considering possible addresses. -> DiscoveryState arch -> DiscoveryState arch exploreMemPointers mem_words info = flip execState info $ do - let mem_addrs = - filter (uncurry isDataCodePointer) $ - mem_words + let mem_addrs + = filter (\(a,v) -> isDataCodePointer a v) + $ mem_words mapM_ (modify . addMemCodePointer) mem_addrs -- | Construct a discovery info by starting with exploring from a given set of @@ -958,9 +948,9 @@ cfgFromAddrs :: forall arch -- ^ Memory to use when decoding instructions. -> SymbolAddrMap (ArchAddrWidth arch) -- ^ Map from addresses to the associated symbol name. - -> [ArchSegmentedAddr arch] + -> [ArchSegmentOff arch] -- ^ Initial function entry points. - -> [(ArchSegmentedAddr arch, ArchSegmentedAddr arch)] + -> [(ArchSegmentOff arch, ArchSegmentOff arch)] -- ^ Function entry points in memory to be explored -- after exploring function entry points. -- diff --git a/src/Data/Macaw/Discovery/AbsEval.hs b/src/Data/Macaw/Discovery/AbsEval.hs index 4261cf0e..4922caba 100644 --- a/src/Data/Macaw/Discovery/AbsEval.hs +++ b/src/Data/Macaw/Discovery/AbsEval.hs @@ -23,15 +23,13 @@ import Data.Macaw.AbsDomain.AbsState import Data.Macaw.Architecture.Info import Data.Macaw.CFG -import Data.Macaw.Memory - -- | Get the absolute value associated with an address. -absEvalReadMem :: (OrdF (ArchReg a), ShowF (ArchReg a), MemWidth (RegAddrWidth (ArchReg a))) - => AbsProcessorState (ArchReg a) ids - -> ArchAddrValue a ids - -> MemRepr tp - -- ^ Information about the memory layout for the value. - -> ArchAbsValue a tp +absEvalReadMem :: RegisterInfo (ArchReg a) + => AbsProcessorState (ArchReg a) ids + -> ArchAddrValue a ids + -> MemRepr tp + -- ^ Information about the memory layout for the value. + -> ArchAbsValue a tp absEvalReadMem r a tp | StackOffset _ s <- transferValue r a , [o] <- Set.toList s @@ -74,6 +72,8 @@ absEvalStmt info stmt = withArchConstraints info $ modify $ addMemWrite addr memRepr v PlaceHolderStmt{} -> pure () + InstructionStart _ _ -> + pure () Comment{} -> pure () ExecArchStmt astmt -> diff --git a/src/Data/Macaw/Discovery/State.hs b/src/Data/Macaw/Discovery/State.hs index d650f1b3..1ebdf20e 100644 --- a/src/Data/Macaw/Discovery/State.hs +++ b/src/Data/Macaw/Discovery/State.hs @@ -75,20 +75,18 @@ import Data.Macaw.Types -- | This describes the source of an address that was marked as containing code. data CodeAddrReason w - = InWrite !(SegmentedAddr w) + = InWrite !(MemSegmentOff w) -- ^ Exploring because the given block writes it to memory. - | NextIP !(SegmentedAddr w) + | NextIP !(MemSegmentOff w) -- ^ Exploring because the given block jumps here. - | CallTarget !(SegmentedAddr w) + | CallTarget !(MemSegmentOff w) -- ^ Exploring because address terminates with a call that jumps here. | InitAddr -- ^ Identified as an entry point from initial information - | CodePointerInMem !(SegmentedAddr w) + | CodePointerInMem !(MemSegmentOff w) -- ^ A code pointer that was stored at the given address. - | SplitAt !(SegmentedAddr w) + | SplitAt !(MemAddr w) -- ^ Added because the address split this block after it had been disassembled. - | InterProcedureJump !(SegmentedAddr w) - -- ^ A jump from an address in another function. | UserRequest -- ^ The user requested that we analyze this address as a function. deriving (Show) @@ -97,18 +95,18 @@ data CodeAddrReason w -- SymbolAddrMap -- | Map from addresses to the associated symbol name. -newtype SymbolAddrMap w = SymbolAddrMap { symbolAddrsAsMap :: Map (SegmentedAddr w) BSC.ByteString } +newtype SymbolAddrMap w = SymbolAddrMap { symbolAddrsAsMap :: Map (MemSegmentOff w) BSC.ByteString } -- | Return an empty symbol addr map emptySymbolAddrMap :: SymbolAddrMap w emptySymbolAddrMap = SymbolAddrMap Map.empty -- | Return addresses in symbol name map -symbolAddrs :: SymbolAddrMap w -> [SegmentedAddr w] +symbolAddrs :: SymbolAddrMap w -> [MemSegmentOff w] symbolAddrs = Map.keys . symbolAddrsAsMap -- | Return the symbol at the given map. -symbolAtAddr :: SegmentedAddr w -> SymbolAddrMap w -> Maybe BSC.ByteString +symbolAtAddr :: MemSegmentOff w -> SymbolAddrMap w -> Maybe BSC.ByteString symbolAtAddr a m = Map.lookup a (symbolAddrsAsMap m) -- | Check that a symbol name is well formed, returning an error message if not. @@ -125,26 +123,8 @@ checkSymbolName sym_nm = -- -- It returns either an error message or the map. symbolAddrMap :: forall w - . Map (SegmentedAddr w) BSC.ByteString + . Map (MemSegmentOff w) BSC.ByteString -> Either String (SymbolAddrMap w) -{- -symbolAddrMap symbols - | Map.size symbol_names /= Map.size symbols = do - let l = filter isMulti (Map.toList symbol_names) - in Left $ "Duplicate symbol names in symbol name map:\n" ++ show l - where symbol_names :: Map BSC.ByteString [SegmentedAddr w] - symbol_names = foldl insPair Map.empty (Map.toList symbols) - - isMulti :: (BSC.ByteString, [SegmentedAddr w]) - -> Bool - isMulti (_,[_]) = False - isMulti (_,_) = True - - insPair :: Map BSC.ByteString [SegmentedAddr w] - -> (SegmentedAddr w, BSC.ByteString) - -> Map BSC.ByteString [SegmentedAddr w] - insPair m (a,nm) = Map.insertWith (++) nm [a] m --} symbolAddrMap symbols = do mapM_ checkSymbolName (Map.elems symbols) pure $! SymbolAddrMap symbols @@ -173,13 +153,13 @@ instance (Integral w, Show w) => Show (GlobalDataInfo w) where -- interpreted. data ParsedTermStmt arch ids = ParsedCall !(RegState (ArchReg arch) (Value arch ids)) - !(Maybe (ArchSegmentedAddr arch)) + !(Maybe (ArchSegmentOff arch)) -- ^ A call with the current register values and location to return to or 'Nothing' if this is a tail call. - | ParsedJump !(RegState (ArchReg arch) (Value arch ids)) !(ArchSegmentedAddr arch) + | ParsedJump !(RegState (ArchReg arch) (Value arch ids)) !(ArchSegmentOff arch) -- ^ A jump to an explicit address within a function. | ParsedLookupTable !(RegState (ArchReg arch) (Value arch ids)) !(BVValue arch ids (ArchAddrWidth arch)) - !(V.Vector (ArchSegmentedAddr arch)) + !(V.Vector (ArchSegmentOff arch)) -- ^ A lookup table that branches to one of a vector of addresses. -- -- The registers store the registers, the value contains the index to jump @@ -189,7 +169,7 @@ data ParsedTermStmt arch ids | ParsedIte !(Value arch ids BoolType) !(StatementList arch ids) !(StatementList arch ids) -- ^ An if-then-else | ParsedSyscall !(RegState (ArchReg arch) (Value arch ids)) - !(ArchSegmentedAddr arch) + !(ArchSegmentOff arch) -- ^ A system call with the registers prior to call and given return address. | ParsedTranslateError !Text -- ^ An error occured in translating the block @@ -199,41 +179,49 @@ data ParsedTermStmt arch ids deriving instance ArchConstraints arch => Show (ParsedTermStmt arch ids) -- | Pretty print the block contents indented inside brackets. -ppStatementList :: ArchConstraints arch => StatementList arch ids -> Doc -ppStatementList b = +ppStatementList :: ArchConstraints arch => (ArchAddrWord arch -> Doc) -> StatementList arch ids -> Doc +ppStatementList ppOff b = text "{" <$$> - indent 2 (vcat (pretty <$> stmtsNonterm b) <$$> pretty (stmtsTerm b)) <$$> + indent 2 (vcat (ppStmt ppOff <$> stmtsNonterm b) <$$> + ppTermStmt ppOff (stmtsTerm b)) <$$> text "}" -instance ArchConstraints arch => Pretty (ParsedTermStmt arch ids) where - pretty (ParsedCall s Nothing) = - text "tail call" <$$> - indent 2 (pretty s) - pretty (ParsedCall s (Just next)) = - text "call and return to" <+> text (show next) <$$> - indent 2 (pretty s) - pretty (ParsedJump s addr) = - text "jump" <+> text (show addr) <$$> - indent 2 (pretty s) - pretty (ParsedLookupTable s idx entries) = - text "ijump" <+> pretty idx <$$> - indent 2 (vcat (imap (\i v -> int i <+> text ":->" <+> text (show v)) (V.toList entries))) <$$> - indent 2 (pretty s) - pretty (ParsedReturn s) = - text "return" <$$> - indent 2 (pretty s) - pretty (ParsedIte c t f) = - text "ite" <+> pretty c <$$> - ppStatementList t <$$> - ppStatementList f - pretty (ParsedSyscall s addr) = - text "syscall, return to" <+> text (show addr) <$$> - indent 2 (pretty s) - pretty (ParsedTranslateError msg) = - text "translation error" <+> text (Text.unpack msg) - pretty (ClassifyFailure s) = - text "unknown transfer" <$$> - indent 2 (pretty s) +ppTermStmt :: ArchConstraints arch + => (ArchAddrWord arch -> Doc) + -- ^ Given an address offset, this prints the value + -> ParsedTermStmt arch ids + -> Doc +ppTermStmt ppOff tstmt = + case tstmt of + ParsedCall s Nothing -> + text "tail call" <$$> + indent 2 (pretty s) + ParsedCall s (Just next) -> + text "call and return to" <+> text (show next) <$$> + indent 2 (pretty s) + ParsedJump s addr -> + text "jump" <+> text (show addr) <$$> + indent 2 (pretty s) + ParsedLookupTable s idx entries -> + text "ijump" <+> pretty idx <$$> + indent 2 (vcat (imap (\i v -> int i <+> text ":->" <+> text (show v)) + (V.toList entries))) <$$> + indent 2 (pretty s) + ParsedReturn s -> + text "return" <$$> + indent 2 (pretty s) + ParsedIte c t f -> + text "ite" <+> pretty c <$$> + ppStatementList ppOff t <$$> + ppStatementList ppOff f + ParsedSyscall s addr -> + text "syscall, return to" <+> text (show addr) <$$> + indent 2 (pretty s) + ParsedTranslateError msg -> + text "translation error" <+> text (Text.unpack msg) + ClassifyFailure s -> + text "unknown transfer" <$$> + indent 2 (pretty s) ------------------------------------------------------------------------ -- StatementList @@ -307,9 +295,9 @@ rewriteStatementList b = do -- | A contiguous region of instructions in memory. data ParsedBlock arch ids - = ParsedBlock { blockAddr :: !(ArchSegmentedAddr arch) + = ParsedBlock { blockAddr :: !(ArchSegmentOff arch) -- ^ Address of region - , blockSize :: !(ArchAddr arch) + , blockSize :: !(ArchAddrWord arch) -- ^ The size of the region of memory covered by this. , blockReason :: !(CodeAddrReason (ArchAddrWidth arch)) -- ^ Reason that we marked this address as @@ -326,25 +314,26 @@ deriving instance ArchConstraints arch instance ArchConstraints arch => Pretty (ParsedBlock arch ids) where - pretty r = - let b = blockStatementList r - in text (show (blockAddr r)) PP.<> text ":" <$$> - indent 2 (vcat (pretty <$> stmtsNonterm b) <$$> pretty (stmtsTerm b)) + pretty b = + let sl = blockStatementList b + ppOff o = text (show (incAddr (toInteger o) (relativeSegmentAddr (blockAddr b)))) + in text (show (blockAddr b)) PP.<> text ":" <$$> + indent 2 (vcat (ppStmt ppOff <$> stmtsNonterm sl) <$$> ppTermStmt ppOff (stmtsTerm sl)) ------------------------------------------------------------------------ -- DiscoveryFunInfo -- | Information discovered about a particular function data DiscoveryFunInfo arch ids - = DiscoveryFunInfo { discoveredFunAddr :: !(ArchSegmentedAddr arch) + = DiscoveryFunInfo { discoveredFunAddr :: !(ArchSegmentOff arch) -- ^ Address of function entry block. , discoveredFunName :: !BSC.ByteString -- ^ Name of function should be unique for program - , _parsedBlocks :: !(Map (ArchSegmentedAddr arch) (ParsedBlock arch ids)) + , _parsedBlocks :: !(Map (ArchSegmentOff arch) (ParsedBlock arch ids)) -- ^ Maps an address to the blocks associated with that address. } -parsedBlocks :: Simple Lens (DiscoveryFunInfo arch ids) (Map (ArchSegmentedAddr arch) (ParsedBlock arch ids)) +parsedBlocks :: Simple Lens (DiscoveryFunInfo arch ids) (Map (ArchSegmentOff arch) (ParsedBlock arch ids)) parsedBlocks = lens _parsedBlocks (\s v -> s { _parsedBlocks = v }) instance ArchConstraints arch => Pretty (DiscoveryFunInfo arch ids) where @@ -363,13 +352,13 @@ data DiscoveryState arch -- ^ Map addresses to known symbol names , archInfo :: !(ArchitectureInfo arch) -- ^ Architecture-specific information needed for discovery. - , _globalDataMap :: !(Map (ArchSegmentedAddr arch) - (GlobalDataInfo (ArchSegmentedAddr arch))) + , _globalDataMap :: !(Map (ArchMemAddr arch) + (GlobalDataInfo (ArchMemAddr arch))) -- ^ Maps each address that appears to be global data to information -- inferred about it. - , _funInfo :: !(Map (ArchSegmentedAddr arch) (Some (DiscoveryFunInfo arch))) + , _funInfo :: !(Map (ArchSegmentOff arch) (Some (DiscoveryFunInfo arch))) -- ^ Map from function addresses to discovered information about function - , _unexploredFunctions :: !(Map (ArchSegmentedAddr arch) (CodeAddrReason (ArchAddrWidth arch))) + , _unexploredFunctions :: !(Map (ArchSegmentOff arch) (CodeAddrReason (ArchAddrWidth arch))) -- ^ This maps addresses that have been marked as -- functions, but not yet analyzed to the reason -- they are analyzed. @@ -413,17 +402,16 @@ emptyDiscoveryState mem symbols info = -- | Map each jump table start to the address just after the end. globalDataMap :: Simple Lens (DiscoveryState arch) - (Map (ArchSegmentedAddr arch) - (GlobalDataInfo (ArchSegmentedAddr arch))) + (Map (ArchMemAddr arch) (GlobalDataInfo (ArchMemAddr arch))) globalDataMap = lens _globalDataMap (\s v -> s { _globalDataMap = v }) -- | List of functions to explore next. unexploredFunctions :: Simple Lens (DiscoveryState arch) - (Map (ArchSegmentedAddr arch) (CodeAddrReason (ArchAddrWidth arch))) + (Map (ArchSegmentOff arch) (CodeAddrReason (ArchAddrWidth arch))) unexploredFunctions = lens _unexploredFunctions (\s v -> s { _unexploredFunctions = v }) -- | Get information for specific functions -funInfo :: Simple Lens (DiscoveryState arch) (Map (ArchSegmentedAddr arch) (Some (DiscoveryFunInfo arch))) +funInfo :: Simple Lens (DiscoveryState arch) (Map (ArchSegmentOff arch) (Some (DiscoveryFunInfo arch))) funInfo = lens _funInfo (\s v -> s { _funInfo = v }) ------------------------------------------------------------------------ @@ -435,10 +423,8 @@ type RegConstraint r = (OrdF r, HasRepr r TypeRepr, RegisterInfo r, ShowF r) -- | This returns a segmented address if the value can be interpreted as a literal memory -- address, and returns nothing otherwise. asLiteralAddr :: MemWidth (ArchAddrWidth arch) - => Memory (ArchAddrWidth arch) - -> BVValue arch ids (ArchAddrWidth arch) - -> Maybe (ArchSegmentedAddr arch) -asLiteralAddr mem (BVValue _ val) = - absoluteAddrSegment mem (fromInteger val) -asLiteralAddr _ (RelocatableValue _ i) = Just i -asLiteralAddr _ _ = Nothing + => BVValue arch ids (ArchAddrWidth arch) + -> Maybe (ArchMemAddr arch) +asLiteralAddr (BVValue _ val) = Just $ absoluteAddr (fromInteger val) +asLiteralAddr (RelocatableValue _ i) = Just i +asLiteralAddr _ = Nothing diff --git a/src/Data/Macaw/Fold.hs b/src/Data/Macaw/Fold.hs index dbf04d76..5c3da8f8 100644 --- a/src/Data/Macaw/Fold.hs +++ b/src/Data/Macaw/Fold.hs @@ -59,7 +59,7 @@ foldValueCached :: forall m arch ids tp . (Monoid m, CanFoldValues arch) => (forall n. NatRepr n -> Integer -> m) -- ^ Function for literals - -> (forall n. NatRepr n -> ArchSegmentedAddr arch -> m) + -> (ArchMemAddr arch -> m) -- ^ Function for memwords -> (forall utp . ArchReg arch utp -> m) -- ^ Function for input registers @@ -76,7 +76,7 @@ foldValueCached litf rwf initf assignf = getStateMonadMonoid . go case v of BoolValue b -> return (litf (knownNat :: NatRepr 1) (if b then 1 else 0)) BVValue sz i -> return $ litf sz i - RelocatableValue w a -> pure $ rwf w a + RelocatableValue _ a -> pure $ rwf a Initial r -> return $ initf r AssignedValue (Assignment a_id rhs) -> do m <- get diff --git a/src/Data/Macaw/Memory.hs b/src/Data/Macaw/Memory.hs index 32ca264e..4966d354 100644 --- a/src/Data/Macaw/Memory.hs +++ b/src/Data/Macaw/Memory.hs @@ -13,7 +13,6 @@ n-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE UndecidableInstances #-} module Data.Macaw.Memory @@ -28,13 +27,6 @@ module Data.Macaw.Memory , memSegments , executableSegments , readonlySegments - , readAddr - , segmentOfRange - , addrPermissions - , isCodeAddr - , isCodeAddrOrNull - , absoluteAddrSegment - , memAsAddrPairs -- * AddrWidthRepr , AddrWidthRepr(..) , addrWidthNatRepr @@ -52,26 +44,38 @@ module Data.Macaw.Memory , ppMemSegment , segmentSize , SegmentRange(..) + -- * MemWord + , MemWord + , MemWidth(..) + , memWord + -- * Segment offsets + , MemSegmentOff + , resolveAbsoluteAddr + , resolveSegmentOff + , msegSegment + , msegOffset + , msegAddr + , incSegmentOff + , diffSegmentOff + , memAsAddrPairs + -- * Symbols , SymbolRef(..) , SymbolVersion(..) - -- * Address and offset. - , MemWidth(..) - , MemWord - , memWord - -- * Segmented Addresses - , SegmentedAddr(..) - , addrOffset + -- * General purposes addrs + , MemAddr + , absoluteAddr + , relativeAddr + , relativeSegmentAddr + , asAbsoluteAddr + , asSegmentOff + , diffAddr + , incAddr + , clearAddrLeastBit + -- * Reading + , MemoryError(..) , addrContentsAfter - , addrBase - , addrValue - , bsWord8 - , bsWord16be - , bsWord16le - , bsWord32be - , bsWord32le - , bsWord64be - , bsWord64le , readByteString + , readAddr , readWord8 , readWord16be , readWord16le @@ -79,18 +83,22 @@ module Data.Macaw.Memory , readWord32le , readWord64be , readWord64le - -- * Memory addrs - , MemoryError(..) + -- * Utilities + , bsWord8 + , bsWord16be + , bsWord16le + , bsWord32be + , bsWord32le + , bsWord64be + , bsWord64le ) where import Control.Exception (assert) -import Control.Lens import Data.Bits import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BSC import qualified Data.Foldable as Fold import qualified Data.Map.Strict as Map -import Data.Maybe import Data.Proxy import Data.Word import GHC.TypeLits @@ -178,12 +186,16 @@ bsWord64le bs ------------------------------------------------------------------------ -- MemBase -newtype MemWord (n :: Nat) = MemWord { memWordValue :: Word64 } --- ^ A value in memory. +-- | This represents a particular numeric address in memory. +-- +newtype MemWord (w :: Nat) = MemWord { _memWordValue :: Word64 } instance Show (MemWord w) where showsPrec _ (MemWord w) = showString "0x" . showHex w +instance Pretty (MemWord w) where + pretty = text . show + instance Eq (MemWord w) where MemWord x == MemWord y = x == y @@ -280,8 +292,6 @@ addrWidthClass :: AddrWidthRepr w -> (MemWidth w => a) -> a addrWidthClass Addr32 x = x addrWidthClass Addr64 x = x -$(pure []) - -- | A unique identifier for a segment. type SegmentIndex = Int @@ -369,7 +379,10 @@ data MemSegment w = MemSegment { segmentIndex :: !SegmentIndex -- ^ Unique index for this segment , segmentBase :: !(Maybe (MemWord w)) - -- ^ Base for this segment + -- ^ Base for this segment + -- + -- Note that the current code assumes that segments are + -- always on even addresses even if the base is omitted. , segmentFlags :: !Perm.Flags -- ^ Permisison flags , segmentContents :: !(SegmentContents w) @@ -377,6 +390,11 @@ data MemSegment w -- the segment. } +-- | Check base plus size of segment does not overflow +checkBaseAndSize :: MemWidth w => Maybe (MemWord w) -> MemWord w -> Bool +checkBaseAndSize Nothing _ = True +checkBaseAndSize (Just b) sz = b + sz >= b + -- | Create a memory segment with the given values. memSegment :: MemWidth w => SegmentIndex @@ -388,12 +406,18 @@ memSegment :: MemWidth w -> [SegmentRange w] -- ^ Range of vlaues. -> MemSegment w -memSegment idx base flags contents = - MemSegment { segmentIndex = idx - , segmentBase = base - , segmentFlags = flags - , segmentContents = contentsFromList contents - } +memSegment idx mbase flags contentsl + | checkBaseAndSize mbase (contentsSize contents) = + MemSegment { segmentIndex = idx + , segmentBase = mbase + , segmentFlags = flags + , segmentContents = contents + } + | otherwise = + error "Contents two large for base." + where contents = contentsFromList contentsl + + instance Eq (MemSegment w) where x == y = segmentIndex x == segmentIndex y @@ -417,60 +441,6 @@ ppMemSegment s = instance MemWidth w => Show (MemSegment w) where show = show . ppMemSegment ------------------------------------------------------------------------- --- SegmentedAddr - --- | A memory address is a reference to memory that uses an explicit segment plus --- offset representation. -data SegmentedAddr w = SegmentedAddr { addrSegment :: !(MemSegment w) - , _addrOffset :: !(MemWord w) - } - deriving (Eq, Ord) - -addrOffset :: Simple Lens (SegmentedAddr w) (MemWord w) -addrOffset = lens _addrOffset (\s v -> s { _addrOffset = v }) - --- | Return the base value of an address or 0 if undefined. -addrBase :: MemWidth w => SegmentedAddr w -> MemWord w -addrBase addr = fromMaybe 0 (segmentBase (addrSegment addr)) - --- | Return the offset of the address after adding the base segment value if defined. -addrValue :: MemWidth w => SegmentedAddr w -> MemWord w -addrValue addr = addrBase addr + addr^.addrOffset - -instance Show (SegmentedAddr w) where - showsPrec p a = - case segmentBase (addrSegment a) of - Just b -> - showString "0x" . showHex (memWordValue b + memWordValue (a^.addrOffset)) - Nothing -> - showParen (p > 6) - $ showString "segment" - . shows (segmentIndex (addrSegment a)) - . showString "+" - . shows (a^.addrOffset) - --- | Given a segemnted addr this returns the offset and range at that offset. -nextRegion :: MemWidth w - => SegmentedAddr w - -> Maybe (MemWord w, SegmentRange w) -nextRegion addr = do - let i = addr^.addrOffset - let SegmentContents m = segmentContents (addrSegment addr) - (k,r) <- Map.lookupLE (addr^.addrOffset) m - Just (i-k,r) - - --- | Return contents starting from location or throw a memory error if there --- is an unaligned relocation. -addrContentsAfter :: MemWidth w - => SegmentedAddr w - -> Either (MemoryError w) [SegmentRange w] -addrContentsAfter addr = - case contentsAfter (addr^.addrOffset) (segmentContents (addrSegment addr)) of - Nothing -> Left (UnexpectedRelocation addr) - Just l -> Right l - ------------------------------------------------------------------------ -- Memory @@ -503,70 +473,17 @@ emptyMemory w = Memory { memAddrWidth = w memSegments :: Memory w -> [MemSegment w] memSegments m = Map.elems (memAllSegments m) --- | Return segment with given index in memory. -lookupSegment :: Memory w -> SegmentIndex -> Maybe (MemSegment w) -lookupSegment m i = Map.lookup i (memAllSegments m) - --- | Return list of segmented address values in memory. --- --- Each address includes the value and the base. -memAsAddrPairs :: Memory w - -> Endianness - -> [(SegmentedAddr w, SegmentedAddr w)] -memAsAddrPairs mem end = addrWidthClass (memAddrWidth mem) $ do - seg <- memSegments mem - (contents_offset,r) <- contentsList (segmentContents seg) - let addr = SegmentedAddr seg contents_offset - let sz = addrSize mem - case r of - ByteRegion bs -> assert (BS.length bs `rem` fromIntegral sz == 0) $ do - w <- regularChunks (fromIntegral sz) bs - let Just val = addrRead end w - case Map.lookupLE val (memAbsoluteSegments mem) of - Just (base, value_seg) | val <= base + segmentSize value_seg -> do - let seg_val = SegmentedAddr value_seg (val - base) - in [(addr,seg_val)] - _ -> [] - SymbolicRef{} -> [] - -- | Get executable segments. executableSegments :: Memory w -> [MemSegment w] executableSegments = filter (Perm.isExecutable . segmentFlags) . memSegments +-- | Get readonly segments readonlySegments :: Memory w -> [MemSegment w] readonlySegments = filter (Perm.isReadonly . segmentFlags) . memSegments --- | Given an absolute address, this returns a segment and offset into the segment. -absoluteAddrSegment :: Memory w -> MemWord w -> Maybe (SegmentedAddr w) -absoluteAddrSegment mem addr = addrWidthClass (memAddrWidth mem) $ - case Map.lookupLE addr (memAbsoluteSegments mem) of - Just (base, seg) | addr < base + segmentSize seg -> - Just $! SegmentedAddr { addrSegment = seg - , _addrOffset = addr - base - } - _ -> Nothing - --- | Read an address from the value in the segment or report a memory error. -readAddr :: Memory w - -> Endianness - -> SegmentedAddr w - -> Either (MemoryError w) (SegmentedAddr w) -readAddr mem end addr = addrWidthClass (memAddrWidth mem) $ do - let sz = fromIntegral (addrSize addr) - case nextRegion addr of - Just (MemWord offset, ByteRegion bs) - | offset + sz >= offset -- Check for no overfow - , offset + sz <= fromIntegral (BS.length bs) -> do -- Check length - let Just val = addrRead end (BS.take (fromIntegral sz) (BS.drop (fromIntegral offset) bs)) - case Map.lookupLE val (memAbsoluteSegments mem) of - Just (base, seg) | val <= base + segmentSize seg -> Right $ - SegmentedAddr { addrSegment = seg - , _addrOffset = val - base - } - _ -> Left (InvalidAddr addr) - - _ | otherwise -> - Left (AccessViolation addr) +-- | Return segment with given index in memory. +lookupSegment :: Memory w -> SegmentIndex -> Maybe (MemSegment w) +lookupSegment m i = Map.lookup i (memAllSegments m) data InsertError w = OverlapSegment (MemWord w) (MemSegment w) @@ -616,53 +533,188 @@ insertMemSegment seg mem = addrWidthClass (memAddrWidth mem) $ do , memAllSegments = allMap } --- | Return segment if range is entirely contained within a single segment --- and 'Nothing' otherwise. -segmentOfRange :: MemWord w -- ^ Start of range - -> MemWord w -- ^ One past last index in range. - -> Memory w - -> Maybe (MemSegment w) -segmentOfRange base end mem = addrWidthClass (memAddrWidth mem) $ do - case Map.lookupLE base (memAbsoluteSegments mem) of - Just (seg_base, seg) | end <= seg_base + segmentSize seg -> Just seg +------------------------------------------------------------------------ +-- MemSegmentOff +-- | A pair containing a segment and offset. +-- +-- Constructrs enforce that the offset is valid +data MemSegmentOff w = MemSegmentOff { msegSegment :: !(MemSegment w) + , msegOffset :: !(MemWord w) + } + deriving (Eq, Ord) + +-- | Return the segment associated with the given address if well-defined. +resolveAbsoluteAddr :: Memory w -> MemWord w -> Maybe (MemSegmentOff w) +resolveAbsoluteAddr mem addr = addrWidthClass (memAddrWidth mem) $ + case Map.lookupLE addr (memAbsoluteSegments mem) of + Just (base, seg) | addr - base < segmentSize seg -> + Just $! MemSegmentOff seg (addr - base) _ -> Nothing --- | Return true if address satisfies permissions check. -addrPermissions :: MemWord w -> Memory w -> Perm.Flags -addrPermissions addr mem = addrWidthClass (memAddrWidth mem) $ - case Map.lookupLE addr (memAbsoluteSegments mem) of - Just (base, seg) | addr < base + segmentSize seg -> segmentFlags seg - _ -> Perm.none --- | Indicates if address is a code pointer. -isCodeAddr :: Memory w -> MemWord w -> Bool -isCodeAddr mem val = - addrPermissions val mem `Perm.hasPerm` Perm.execute +-- | Make a segment offset pair after ensuring the offset is valid +resolveSegmentOff :: MemWidth w => MemSegment w -> MemWord w -> Maybe (MemSegmentOff w) +resolveSegmentOff seg off + | off < segmentSize seg = Just (MemSegmentOff seg off) + | otherwise = Nothing --- | Indicates if address is an address in code segment or null. -isCodeAddrOrNull :: Memory w -> MemWord w -> Bool -isCodeAddrOrNull _ (MemWord 0) = True -isCodeAddrOrNull mem a = isCodeAddr mem a +-- | Return the absolute address associated with the segment offset pair (if any) +msegAddr :: MemWidth w => MemSegmentOff w -> Maybe (MemWord w) +msegAddr (MemSegmentOff seg off) = (+ off) <$> segmentBase seg + +-- | Increment a segment offset by a given amount. +-- +-- Returns 'Nothing' if the result would be out of range. +incSegmentOff :: MemWidth w => MemSegmentOff w -> Integer -> Maybe (MemSegmentOff w) +incSegmentOff (MemSegmentOff seg off) inc + | 0 <= next && next <= toInteger (segmentSize seg) = Just $ MemSegmentOff seg (fromInteger next) + | otherwise = Nothing + where next = toInteger off + inc + +-- | Return the difference between two segment offsets pairs or `Nothing` if undefined. +diffSegmentOff :: MemWidth w => MemSegmentOff w -> MemSegmentOff w -> Maybe Integer +diffSegmentOff (MemSegmentOff xseg xoff) (MemSegmentOff yseg yoff) + | xseg == yseg = Just $ toInteger xoff - toInteger yoff + | Just xb <- segmentBase xseg + , Just yb <- segmentBase yseg = + Just ((toInteger xb + toInteger xoff) - (toInteger yb + toInteger yoff)) + | otherwise = Nothing + +instance MemWidth w => Show (MemSegmentOff w) where + showsPrec p (MemSegmentOff seg off) = + case segmentBase seg of + Just base -> showString "0x" . showHex (base+off) + Nothing -> + showParen (p > 6) + $ showString "segment" + . shows (segmentIndex seg) + . showString "+" + . shows off + +instance MemWidth w => Pretty (MemSegmentOff w) where + pretty = text . show + +-- | Return list of segmented address values in memory. +-- +-- Each address includes the value and the base. +memAsAddrPairs :: Memory w + -> Endianness + -> [(MemSegmentOff w, MemSegmentOff w)] +memAsAddrPairs mem end = addrWidthClass (memAddrWidth mem) $ do + seg <- memSegments mem + (contents_offset,r) <- contentsList (segmentContents seg) + let sz = addrSize mem + case r of + ByteRegion bs -> assert (BS.length bs `rem` fromIntegral sz == 0) $ do + (off,w) <- + zip [contents_offset..] + (regularChunks (fromIntegral sz) bs) + let Just val = addrRead end w + case resolveAbsoluteAddr mem val of + Just val_ref -> do + pure (MemSegmentOff seg off, val_ref) + _ -> [] + SymbolicRef{} -> [] + +------------------------------------------------------------------------ +-- MemAddr + +-- | A memory address is either an absolute value in memory or an offset of segment that +-- could be relocated. +-- +-- This representation does not require that the address is mapped to +-- actual memory (see `MemSegmentOff` for an address representation +-- that ensures the reference points to allocated memory). +data MemAddr w + = AbsoluteAddr (MemWord w) + -- ^ An address formed from a specific value. + | RelativeAddr !(MemSegment w) !(MemWord w) + -- ^ An address that is relative to some specific segment. + -- + -- Note that the segment base value of this segment should be nothing. + deriving (Eq, Ord) + +-- | Given an absolute address, this returns a segment and offset into the segment. +absoluteAddr :: MemWord w -> MemAddr w +absoluteAddr = AbsoluteAddr + +-- | Return an address relative to a known memory segment +-- if the memory is unmapped. +relativeAddr :: MemWidth w => MemSegment w -> MemWord w -> MemAddr w +relativeAddr seg off = + case segmentBase seg of + Just base -> AbsoluteAddr (base + off) + Nothing -> RelativeAddr seg off + +-- | Return a segmented addr using the offset of an existing segment, or 'Nothing' +-- if the memory is unmapped. +relativeSegmentAddr :: MemWidth w => MemSegmentOff w -> MemAddr w +relativeSegmentAddr (MemSegmentOff seg off) = relativeAddr seg off + +-- | Return the offset of the address after adding the base segment value if defined. +asAbsoluteAddr :: MemWidth w => MemAddr w -> Maybe (MemWord w) +asAbsoluteAddr (AbsoluteAddr w) = Just w +asAbsoluteAddr RelativeAddr{} = Nothing + +-- | Return the resolved segment offset reference from an address. +asSegmentOff :: Memory w -> MemAddr w -> Maybe (MemSegmentOff w) +asSegmentOff mem (AbsoluteAddr addr) = resolveAbsoluteAddr mem addr +asSegmentOff mem (RelativeAddr seg off) = + addrWidthClass (memAddrWidth mem) $ + resolveSegmentOff seg off + +-- | Clear the least significant bit of an address. +clearAddrLeastBit :: MemWidth w => MemAddr w -> MemAddr w +clearAddrLeastBit sa = + case sa of + AbsoluteAddr a -> AbsoluteAddr (a .&. complement 1) + RelativeAddr seg off -> RelativeAddr seg (off .&. complement 1) + +-- | Increment an address by a fixed amount. +incAddr :: MemWidth w => Integer -> MemAddr w -> MemAddr w +incAddr o (AbsoluteAddr a) = AbsoluteAddr (a + fromInteger o) +incAddr o (RelativeAddr seg off) = RelativeAddr seg (off + fromInteger o) + +-- | Returns the number of bytes between two addresses if they are comparable +-- or 'Nothing' if they are not. +diffAddr :: MemWidth w => MemAddr w -> MemAddr w -> Maybe Integer +diffAddr (AbsoluteAddr x) (AbsoluteAddr y) = + Just $ toInteger x - toInteger y +diffAddr (RelativeAddr xseg xoff) (RelativeAddr yseg yoff) | xseg == yseg = + Just $ toInteger xoff - toInteger yoff +diffAddr _ _ = Nothing + +instance MemWidth w => Show (MemAddr w) where + showsPrec _ (AbsoluteAddr a) = showString "0x" . showHex a + showsPrec p (RelativeAddr seg off) = + showParen (p > 6) + $ showString "segment" + . shows (segmentIndex seg) + . showString "+" + . shows off + +instance MemWidth w => Pretty (MemAddr w) where + pretty = text . show ------------------------------------------------------------------------ -- MemoryError -- | Type of errors that may occur when reading memory. data MemoryError w - = UserMemoryError (SegmentedAddr w) !String + = UserMemoryError (MemAddr w) !String -- ^ the memory reader threw an unspecified error at the given location. - | InvalidInstruction (SegmentedAddr w) ![SegmentRange w] + | InvalidInstruction (MemAddr w) ![SegmentRange w] -- ^ The memory reader could not parse the value starting at the given address. - | AccessViolation (SegmentedAddr w) + | AccessViolation (MemAddr w) -- ^ Memory could not be read, because it was not defined. - | PermissionsError (SegmentedAddr w) + | PermissionsError (MemAddr w) -- ^ Memory could not be read due to insufficient permissions. - | UnexpectedRelocation (SegmentedAddr w) + | UnexpectedRelocation (MemAddr w) -- ^ Read from location that partially overlaps a relocated entry - | InvalidAddr (SegmentedAddr w) + | InvalidAddr (MemAddr w) -- ^ The data at the given address did not refer to a valid memory location. -instance Show (MemoryError w) where +instance MemWidth w => Show (MemoryError w) where show (UserMemoryError _ msg) = msg show (InvalidInstruction start contents) = "Invalid instruction at " ++ show start ++ ": " ++ showList contents "" @@ -676,45 +728,72 @@ instance Show (MemoryError w) where "Attempt to interpret an invalid address: " ++ show a ++ "." ------------------------------------------------------------------------ --- Meory reading utilities +-- Memory reading utilities + +-- | Return contents starting from location or throw a memory error if there +-- is an unaligned relocation. +addrContentsAfter :: Memory w + -> MemAddr w + -> Either (MemoryError w) [SegmentRange w] +addrContentsAfter mem addr = addrWidthClass (memAddrWidth mem) $ do + MemSegmentOff seg off <- + case asSegmentOff mem addr of + Just p -> pure p + Nothing -> Left (InvalidAddr addr) + case contentsAfter off (segmentContents seg) of + Just l -> Right l + Nothing -> Left (UnexpectedRelocation addr) -- | Attemtp to read a bytestring of the given length -readByteString :: MemWidth w => SegmentedAddr w -> Word64 -> Either (MemoryError w) BS.ByteString -readByteString addr sz = - case nextRegion addr of - Nothing -> Left (InvalidAddr addr) - Just (MemWord offset, ByteRegion bs) - | offset + sz >= offset -- Check for no overfow - , offset + sz <= fromIntegral (BS.length bs) -> do -- Check length - Right $ (BS.take (fromIntegral sz) (BS.drop (fromIntegral offset) bs)) - | otherwise -> Left (InvalidAddr addr) - Just (_, SymbolicRef{}) -> +readByteString :: Memory w -> MemAddr w -> Word64 -> Either (MemoryError w) BS.ByteString +readByteString mem addr sz = do + l <- addrContentsAfter mem addr + case l of + ByteRegion bs:_ + | sz <= fromIntegral (BS.length bs) -> do -- Check length + Right (BS.take (fromIntegral sz) bs) + | otherwise -> + Left (InvalidAddr addr) + SymbolicRef{}:_ -> Left (UnexpectedRelocation addr) + [] -> + Left (InvalidAddr addr) + +-- | Read an address from the value in the segment or report a memory error. +readAddr :: Memory w + -> Endianness + -> MemAddr w + -> Either (MemoryError w) (MemAddr w) +readAddr mem end addr = addrWidthClass (memAddrWidth mem) $ do + let sz = fromIntegral (addrSize addr) + bs <- readByteString mem addr sz + let Just val = addrRead end bs + Right $ AbsoluteAddr val -- | Read a big endian word16 -readWord8 :: MemWidth w => SegmentedAddr w -> Either (MemoryError w) Word8 -readWord8 addr = bsWord8 <$> readByteString addr 8 +readWord8 :: Memory w -> MemAddr w -> Either (MemoryError w) Word8 +readWord8 mem addr = bsWord8 <$> readByteString mem addr 1 -- | Read a big endian word16 -readWord16be :: MemWidth w => SegmentedAddr w -> Either (MemoryError w) Word16 -readWord16be addr = bsWord16be <$> readByteString addr 8 +readWord16be :: Memory w -> MemAddr w -> Either (MemoryError w) Word16 +readWord16be mem addr = bsWord16be <$> readByteString mem addr 2 -- | Read a little endian word16 -readWord16le :: MemWidth w => SegmentedAddr w -> Either (MemoryError w) Word16 -readWord16le addr = bsWord16le <$> readByteString addr 8 +readWord16le :: Memory w -> MemAddr w -> Either (MemoryError w) Word16 +readWord16le mem addr = bsWord16le <$> readByteString mem addr 2 -- | Read a big endian word32 -readWord32be :: MemWidth w => SegmentedAddr w -> Either (MemoryError w) Word32 -readWord32be addr = bsWord32be <$> readByteString addr 8 +readWord32be :: Memory w -> MemAddr w -> Either (MemoryError w) Word32 +readWord32be mem addr = bsWord32be <$> readByteString mem addr 4 -- | Read a little endian word32 -readWord32le :: MemWidth w => SegmentedAddr w -> Either (MemoryError w) Word32 -readWord32le addr = bsWord32le <$> readByteString addr 8 +readWord32le :: Memory w -> MemAddr w -> Either (MemoryError w) Word32 +readWord32le mem addr = bsWord32le <$> readByteString mem addr 4 -- | Read a big endian word64 -readWord64be :: MemWidth w => SegmentedAddr w -> Either (MemoryError w) Word64 -readWord64be addr = bsWord64be <$> readByteString addr 8 +readWord64be :: Memory w -> MemAddr w -> Either (MemoryError w) Word64 +readWord64be mem addr = bsWord64be <$> readByteString mem addr 8 -- | Read a little endian word64 -readWord64le :: MemWidth w => SegmentedAddr w -> Either (MemoryError w) Word64 -readWord64le addr = bsWord64le <$> readByteString addr 8 +readWord64le :: Memory w -> MemAddr w -> Either (MemoryError w) Word64 +readWord64le mem addr = bsWord64le <$> readByteString mem addr 8 diff --git a/src/Data/Macaw/Memory/ElfLoader.hs b/src/Data/Macaw/Memory/ElfLoader.hs index 41661109..042ceb7c 100644 --- a/src/Data/Macaw/Memory/ElfLoader.hs +++ b/src/Data/Macaw/Memory/ElfLoader.hs @@ -11,6 +11,7 @@ Operations for creating a view of memory from an elf file. {-# LANGUAGE KindSignatures #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TupleSections #-} {-# LANGUAGE TypeFamilies #-} module Data.Macaw.Memory.ElfLoader ( SectionIndexMap @@ -32,7 +33,6 @@ import Data.Bits import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BSC import qualified Data.ByteString.Lazy as L -import Data.Either (partitionEithers) import Data.ElfEdit import Data.Foldable import Data.IntervalMap.Strict (Interval(..), IntervalMap) @@ -60,7 +60,7 @@ sliceL (i,c) = L.take (fromIntegral c) . L.drop (fromIntegral i) -- address and section contents. -- -- The base address is expressed in terms of the underlying memory segment. -type SectionIndexMap w = Map ElfSectionIndex (SegmentedAddr w, ElfSection (ElfWordType w)) +type SectionIndexMap w = Map ElfSectionIndex (MemSegmentOff w, ElfSection (ElfWordType w)) ------------------------------------------------------------------------ -- Flag conversion @@ -333,8 +333,8 @@ insertElfSegment opt shdrMap contents relocMap phdr = do when (phdr_offset > shdr_start) $ do fail $ "Found section header that overlaps with program header." let sec_offset = fromIntegral $ shdr_start - phdr_offset - let pair = (SegmentedAddr seg sec_offset, sec) - mlsIndexMap %= Map.insert elfIdx pair + let Just addr = resolveSegmentOff seg sec_offset + mlsIndexMap %= Map.insert elfIdx (addr, sec) _ -> fail "Unexpected shdr interval" @@ -398,8 +398,8 @@ insertElfSection sec = do let seg = memSegmentForElfSection idx sec loadMemSegment ("Section " ++ BSC.unpack (elfSectionName sec)) seg let elfIdx = ElfSectionIndex (elfSectionIndex sec) - let pair = (SegmentedAddr seg 0, sec) - mlsIndexMap %= Map.insert elfIdx pair + let Just addr = resolveSegmentOff seg 0 + mlsIndexMap %= Map.insert elfIdx (addr, sec) -- | Load allocated Elf sections into memory. -- @@ -462,29 +462,22 @@ loadExecutable opt path = do ------------------------------------------------------------------------ -- Elf symbol utilities --- | The takes the elf symbol table map and attempts to identify segmented addresses for each one. --- --- It returns a two maps, the first contains entries that could not be resolved; the second --- contains those that could. +-- | The takes the elf symbol table map, creates a map from function +-- symbol addresses to the associated symbol name. resolvedSegmentedElfFuncSymbols :: forall w . Memory w -> [ElfSymbolTableEntry (ElfWordType w)] - -> (Map (MemWord w) [BS.ByteString], Map (SegmentedAddr w) [BS.ByteString]) + -> Map (MemSegmentOff w) [BS.ByteString] resolvedSegmentedElfFuncSymbols mem entries = reprConstraints (memAddrWidth mem) $ let -- Filter out just function entries - isCodeFuncSymbol ste = steType ste == STT_FUNC - && isCodeAddr mem (fromIntegral (steValue ste)) - func_entries = filter isCodeFuncSymbol entries - -- Build absolute address map - absAddrMap :: Map (MemWord w) [BS.ByteString] - absAddrMap = Map.fromListWith (++) $ [ (fromIntegral (steValue ste), [steName ste]) | ste <- func_entries ] - -- Resolve addresses - resolve (v,nms) = - case absoluteAddrSegment mem v of - Nothing -> Left (v, nms) - Just sv -> Right (sv, nms) - (u,r) = partitionEithers $ resolve <$> Map.toList absAddrMap - in (Map.fromList u, Map.fromList r) + func_entries = + [ (addr, [steName ste]) + | ste <- entries + , steType ste == STT_FUNC + , addr <- maybeToList $ resolveAbsoluteAddr mem (fromIntegral (steValue ste)) + , segmentFlags (msegSegment addr) `Perm.hasPerm` Perm.execute + ] + in Map.fromListWith (++) func_entries ppElfUnresolvedSymbols :: forall w . MemWidth w