From 3f39c614e9abe2b08a8bc0cee15918cc9b9d7ef6 Mon Sep 17 00:00:00 2001 From: Andrei Stefanescu Date: Mon, 26 Nov 2018 22:11:43 -0800 Subject: [PATCH] Add support for RepMovs and RepStos. --- symbolic/src/Data/Macaw/Symbolic.hs | 123 ++++++++++++++------ x86/src/Data/Macaw/X86/ArchTypes.hs | 42 ++++--- x86/src/Data/Macaw/X86/Semantics.hs | 23 ++-- x86_symbolic/src/Data/Macaw/X86/Crucible.hs | 97 +++++++++++++-- x86_symbolic/src/Data/Macaw/X86/Symbolic.hs | 28 ++++- 5 files changed, 234 insertions(+), 79 deletions(-) diff --git a/symbolic/src/Data/Macaw/Symbolic.hs b/symbolic/src/Data/Macaw/Symbolic.hs index d08d6f84..66b095f0 100644 --- a/symbolic/src/Data/Macaw/Symbolic.hs +++ b/symbolic/src/Data/Macaw/Symbolic.hs @@ -1,4 +1,5 @@ {-# LANGUAGE CPP #-} +{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE EmptyCase #-} {-# LANGUAGE FlexibleContexts #-} @@ -43,10 +44,15 @@ module Data.Macaw.Symbolic , Regs , freshValue , GlobalMap + -- * Symbolic architecture-specific types + , ArchBits + , ArchInfo(..) + , ArchVals(..) ) where import Control.Lens ((^.)) import Control.Monad (forM, join) +import Control.Monad.IO.Class import Control.Monad.ST (ST, RealWorld, stToIO) import Data.Foldable import Data.Map.Strict (Map) @@ -477,13 +483,17 @@ type MacawArchEvalFn sym arch = -- | This evaluates a Macaw statement extension in the simulator. -execMacawStmtExtension :: - IsSymInterface sym => - MacawArchEvalFn sym arch {- ^ Function for executing -} -> - C.GlobalVar MM.Mem -> - GlobalMap sym (M.ArchAddrWidth arch) -> - LookupFunctionHandle sym arch -> - EvalStmtFunc (MacawStmtExtension arch) (MacawSimulatorState sym) sym (MacawExt arch) +execMacawStmtExtension + :: IsSymInterface sym + => ( C.GlobalVar MM.Mem + -> GlobalMap sym (M.ArchAddrWidth arch) + -> MacawArchEvalFn sym arch + ) + {- ^ Function for executing -} + -> C.GlobalVar MM.Mem + -> GlobalMap sym (M.ArchAddrWidth arch) + -> LookupFunctionHandle sym arch + -> EvalStmtFunc (MacawStmtExtension arch) (MacawSimulatorState sym) sym (MacawExt arch) execMacawStmtExtension archStmtFn mvar globs (LFH lookupH) s0 st = case s0 of MacawReadMem w mr x -> doReadMem st mvar globs w mr x @@ -506,7 +516,7 @@ execMacawStmtExtension archStmtFn mvar globs (LFH lookupH) s0 st = (hv, st') <- doLookupFunctionHandle lookupH st mvar (C.regValue args) return (C.HandleFnVal hv, st') - MacawArchStmtExtension s -> archStmtFn s st + MacawArchStmtExtension s -> archStmtFn mvar globs s st MacawArchStateUpdate {} -> return ((), st) PtrEq w x y -> doPtrEq st mvar w x y @@ -568,37 +578,84 @@ freshValue sym str w ty = -- | Return macaw extension evaluation functions. -macawExtensions :: - IsSymInterface sym => - MacawArchEvalFn sym arch -> - C.GlobalVar MM.Mem -> - GlobalMap sym (M.ArchAddrWidth arch) -> - LookupFunctionHandle sym arch -> - C.ExtensionImpl (MacawSimulatorState sym) sym (MacawExt arch) +macawExtensions + :: IsSymInterface sym + => ( C.GlobalVar MM.Mem + -> GlobalMap sym (M.ArchAddrWidth arch) + -> MacawArchEvalFn sym arch + ) + -> C.GlobalVar MM.Mem + -> GlobalMap sym (M.ArchAddrWidth arch) + -> LookupFunctionHandle sym arch + -> C.ExtensionImpl (MacawSimulatorState sym) sym (MacawExt arch) macawExtensions f mvar globs lookupH = C.ExtensionImpl { C.extensionEval = evalMacawExprExtension , C.extensionExec = execMacawStmtExtension f mvar globs lookupH } +type ArchBits arch = + ( C.IsSyntaxExtension (MacawExt arch) + , M.ArchConstraints arch + , M.RegisterInfo (M.ArchReg arch) + , M.HasRepr (M.ArchReg arch) M.TypeRepr + , M.MemWidth (M.ArchAddrWidth arch) + , Show (M.ArchReg arch (M.BVType (M.ArchAddrWidth arch))) + , ArchInfo arch + ) + +type SymArchConstraints arch = + ( C.IsSyntaxExtension (MacawExt arch) + , M.MemWidth (M.ArchAddrWidth arch) + , M.PrettyF (M.ArchReg arch) + ) + +data ArchVals arch = ArchVals + { archFunctions :: MacawSymbolicArchFunctions arch + , withArchEval + :: forall a m sym + . (IsSymInterface sym, MonadIO m) + => sym + -> ( ( C.GlobalVar MM.Mem + -> GlobalMap sym (M.ArchAddrWidth arch) + -> MacawArchEvalFn sym arch + ) + -> m a + ) + -> m a + , withArchConstraints :: forall a . (SymArchConstraints arch => a) -> a + } + +-- | A class to capture the architecture-specific information required to +-- perform block recovery and translation into a Crucible CFG. +-- +-- For architectures that do not have a symbolic backend yet, have this function +-- return 'Nothing'. +class ArchInfo arch where + archVals :: proxy arch -> Maybe (ArchVals arch) + -- | Run the simulator over a contiguous set of code. -runCodeBlock :: forall sym arch blocks - . (C.IsSyntaxExtension (MacawExt arch), IsSymInterface sym) - => sym - -> MacawSymbolicArchFunctions arch - -- ^ Translation functions - -> MacawArchEvalFn sym arch - -> C.HandleAllocator RealWorld - -> (MM.MemImpl sym, GlobalMap sym (M.ArchAddrWidth arch)) - -> LookupFunctionHandle sym arch - -> C.CFG (MacawExt arch) blocks (EmptyCtx ::> ArchRegStruct arch) (ArchRegStruct arch) - -> Ctx.Assignment (C.RegValue' sym) (MacawCrucibleRegTypes arch) - -- ^ Register assignment - -> IO ( C.GlobalVar MM.Mem - , C.ExecResult - (MacawSimulatorState sym) - sym - (MacawExt arch) - (C.RegEntry sym (ArchRegStruct arch))) +runCodeBlock + :: forall sym arch blocks + . (C.IsSyntaxExtension (MacawExt arch), IsSymInterface sym) + => sym + -> MacawSymbolicArchFunctions arch + -- ^ Translation functions + -> ( C.GlobalVar MM.Mem + -> GlobalMap sym (M.ArchAddrWidth arch) + -> MacawArchEvalFn sym arch + ) + -> C.HandleAllocator RealWorld + -> (MM.MemImpl sym, GlobalMap sym (M.ArchAddrWidth arch)) + -> LookupFunctionHandle sym arch + -> C.CFG (MacawExt arch) blocks (EmptyCtx ::> ArchRegStruct arch) (ArchRegStruct arch) + -> Ctx.Assignment (C.RegValue' sym) (MacawCrucibleRegTypes arch) + -- ^ Register assignment + -> IO ( C.GlobalVar MM.Mem + , C.ExecResult + (MacawSimulatorState sym) + sym + (MacawExt arch) + (C.RegEntry sym (ArchRegStruct arch))) runCodeBlock sym archFns archEval halloc (initMem,globs) lookupH g regStruct = do mvar <- stToIO (MM.mkMemVar halloc) let crucRegTypes = crucArchRegTypes archFns diff --git a/x86/src/Data/Macaw/X86/ArchTypes.hs b/x86/src/Data/Macaw/X86/ArchTypes.hs index 81b00367..383c2a2d 100644 --- a/x86/src/Data/Macaw/X86/ArchTypes.hs +++ b/x86/src/Data/Macaw/X86/ArchTypes.hs @@ -33,6 +33,7 @@ module Data.Macaw.X86.ArchTypes , X86PrimLoc(..) , SIMDWidth(..) , RepValSize(..) + , SomeRepValSize(..) , repValSizeByteCount , repValSizeMemRepr ) where @@ -81,6 +82,9 @@ data RepValSize w | (w ~ 32) => DWordRepVal | (w ~ 64) => QWordRepVal +data SomeRepValSize where + SomeRepValSize :: (1 <= w) => RepValSize w -> SomeRepValSize + repValSizeMemRepr :: RepValSize w -> MemRepr (BVType w) repValSizeMemRepr v = case v of @@ -122,7 +126,7 @@ data X86PrimLoc tp | (tp ~ BVType 16) => FS -- ^ This refers to the selector of the 'FS' register. | (tp ~ BVType 16) => GS - -- ^ This refers to the se lector of the 'GS' register. + -- ^ This refers to the selector of the 'GS' register. | forall w . (tp ~ BVType w) => X87_ControlLoc !(X87_ControlReg w) -- ^ One of the x87 control registers @@ -895,12 +899,14 @@ data X86Stmt (v :: Type -> Kind.Type) where -- * @dir@ is a flag that indicates the direction of move ('True' == -- decrement, 'False' == increment) for updating the buffer -- pointers. - RepMovs :: !(RepValSize w) - -> !(v (BVType 64)) - -> !(v (BVType 64)) - -> !(v (BVType 64)) - -> !(v BoolType) - -> X86Stmt v + RepMovs + :: (1 <= w) + => !(RepValSize w) + -> !(v (BVType 64)) + -> !(v (BVType 64)) + -> !(v (BVType 64)) + -> !(v BoolType) + -> X86Stmt v -- | Assign all elements in an array in memory a specific value. -- @@ -912,16 +918,18 @@ data X86Stmt (v :: Type -> Kind.Type) where -- * @dir@ is a flag that indicates the direction of move ('True' == -- decrement, 'False' == increment) for updating the buffer -- pointers. - RepStos :: !(RepValSize w) - -> !(v (BVType 64)) - -- /\ Address to start assigning to. - -> !(v (BVType w)) - -- /\ Value to assign - -> !(v (BVType 64)) - -- /\ Number of values to assign - -> !(v BoolType) - -- /\ Direction flag - -> X86Stmt v + RepStos + :: (1 <= w) + => !(RepValSize w) + -> !(v (BVType 64)) + -- /\ Address to start assigning to. + -> !(v (BVType w)) + -- /\ Value to assign + -> !(v (BVType 64)) + -- /\ Number of values to assign + -> !(v BoolType) + -- /\ Direction flag + -> X86Stmt v -- | Empty MMX technology State. Sets the x87 FPU tag word to empty. -- diff --git a/x86/src/Data/Macaw/X86/Semantics.hs b/x86/src/Data/Macaw/X86/Semantics.hs index 4f8f6223..639595fe 100644 --- a/x86/src/Data/Macaw/X86/Semantics.hs +++ b/x86/src/Data/Macaw/X86/Semantics.hs @@ -1223,13 +1223,12 @@ def_ret = defVariadic "ret" $ \_ vs -> def_movs :: InstructionDef def_movs = defBinary "movs" $ \ii loc _ -> do let pfx = F.iiPrefixes ii - Some w <- - case loc of - F.Mem8{} -> pure (Some ByteRepVal) - F.Mem16{} -> pure (Some WordRepVal) - F.Mem32{} -> pure (Some DWordRepVal) - F.Mem64{} -> pure (Some QWordRepVal) - _ -> error "Bad argument to movs" + SomeRepValSize w <- case loc of + F.Mem8{} -> pure (SomeRepValSize ByteRepVal) + F.Mem16{} -> pure (SomeRepValSize WordRepVal) + F.Mem32{} -> pure (SomeRepValSize DWordRepVal) + F.Mem64{} -> pure (SomeRepValSize QWordRepVal) + _ -> error "Bad argument to movs" let bytesPerOp = bvLit n64 (repValSizeByteCount w) dest <- get rdi src <- get rsi @@ -1484,16 +1483,16 @@ def_lodsx suf elsz = defNullaryPrefix ("lods" ++ suf) $ \pfx -> do def_stos :: InstructionDef def_stos = defBinary "stos" $ \ii loc loc' -> do let pfx = F.iiPrefixes ii - Some rep <- + SomeRepValSize rep <- case (loc, loc') of (F.Mem8 (F.Addr_64 F.ES (Just F.RDI) Nothing F.NoDisplacement), F.ByteReg F.AL) -> do - pure (Some ByteRepVal) + pure (SomeRepValSize ByteRepVal) (F.Mem16 (F.Addr_64 F.ES (Just F.RDI) Nothing F.NoDisplacement), F.WordReg F.AX) -> do - pure (Some WordRepVal) + pure (SomeRepValSize WordRepVal) (F.Mem32 (F.Addr_64 F.ES (Just F.RDI) Nothing F.NoDisplacement), F.DWordReg F.EAX) -> do - pure (Some DWordRepVal) + pure (SomeRepValSize DWordRepVal) (F.Mem64 (F.Addr_64 F.ES (Just F.RDI) Nothing F.NoDisplacement), F.QWordReg F.RAX) -> do - pure (Some QWordRepVal) + pure (SomeRepValSize QWordRepVal) _ -> error $ "stos given bad arguments " ++ show (loc, loc') -- The direction flag indicates post decrement or post increment. dest <- get rdi diff --git a/x86_symbolic/src/Data/Macaw/X86/Crucible.hs b/x86_symbolic/src/Data/Macaw/X86/Crucible.hs index ca9fc0fc..be7a77e0 100644 --- a/x86_symbolic/src/Data/Macaw/X86/Crucible.hs +++ b/x86_symbolic/src/Data/Macaw/X86/Crucible.hs @@ -33,6 +33,7 @@ module Data.Macaw.X86.Crucible ) where import Control.Lens ((^.)) +import Control.Monad import Data.Bits hiding (xor) import Data.Kind ( Type ) import Data.Parameterized.Context.Unsafe (empty,extend) @@ -44,12 +45,14 @@ import Data.Word (Word8) import GHC.TypeLits (KnownNat) import Text.PrettyPrint.ANSI.Leijen hiding ( (<$>), (<>), empty ) +import What4.Concrete import What4.Interface hiding (IsExpr) import What4.InterpretedFloatingPoint import What4.Symbol (userSymbol) import Lang.Crucible.Backend (IsSymInterface) import Lang.Crucible.CFG.Expr +import qualified Lang.Crucible.Simulator as C import qualified Lang.Crucible.Simulator.Evaluation as C import Lang.Crucible.Simulator.ExecutionTree import Lang.Crucible.Simulator.Intrinsics (IntrinsicTypes) @@ -59,12 +62,20 @@ import Lang.Crucible.Types import qualified Lang.Crucible.Vector as V import Lang.Crucible.LLVM.MemModel - (LLVMPointerType, projectLLVM_bv, - pattern LLVMPointerRepr, llvmPointer_bv) + ( LLVMPointerType + , Mem + , ptrAdd + , projectLLVM_bv + , pattern LLVMPointerRepr + , llvmPointer_bv + ) +import qualified Data.Macaw.CFG.Core as M +import qualified Data.Macaw.Memory as M import qualified Data.Macaw.Types as M import Data.Macaw.Symbolic.CrucGen (MacawExt) -import Data.Macaw.Symbolic +import Data.Macaw.Symbolic.MemOps +import Data.Macaw.Symbolic.PersistentState import qualified Data.Macaw.X86 as M import qualified Data.Macaw.X86.ArchTypes as M import qualified Data.Macaw.CFG.Core as MC @@ -87,21 +98,83 @@ funcSemantics fs x s = do let sym = Sym { symIface = s^.stateSymInterface v <- pureSem sym x return (v,s) -stmtSemantics :: (IsSymInterface sym) - => SymFuns sym - -> M.X86Stmt (AtomWrapper (RegEntry sym)) - -> S sym rtp bs r ctx - -> IO (RegValue sym UnitType, S sym rtp bs r ctx) -stmtSemantics fs x s = error ("Symbolic execution semantics for x86 statements are not implemented yet: " <> - (show $ MC.ppArchStmt (liftAtomIn (pretty . regType)) x)) +withConcreteCountAndDir + :: (IsSymInterface sym, 1 <= w) + => S sym rtp bs r ctx + -> M.RepValSize w + -> (AtomWrapper (RegEntry sym) (M.BVType 64)) + -> (AtomWrapper (RegEntry sym) M.BoolType) + -> (S sym rtp bs r ctx -> (SymBV sym 64) -> IO (S sym rtp bs r ctx)) + -> IO (RegValue sym UnitType, S sym rtp bs r ctx) +withConcreteCountAndDir state val_size wrapped_count wrapped_dir func = do + let sym = state^.stateSymInterface + let val_byte_size = M.repValSizeByteCount val_size + bv_count <- toValBV sym wrapped_count + case asConcrete bv_count of + Just (ConcreteBV _ count) -> do + res_crux_state <- foldM func state + =<< mapM (\index -> bvLit sym knownNat $ index * val_byte_size) + -- [0..((if dir then 1 else -1) * (count - 1))] + [0..(count - 1)] + return ((), res_crux_state) + Nothing -> error $ "Unsupported symbolic count in rep stmt: " + +stmtSemantics + :: IsSymInterface sym + => SymFuns sym + -> C.GlobalVar Mem + -> GlobalMap sym (M.ArchAddrWidth M.X86_64) + -> M.X86Stmt (AtomWrapper (RegEntry sym)) + -> S sym rtp bs r ctx + -> IO (RegValue sym UnitType, S sym rtp bs r ctx) +stmtSemantics _sym_funs global_var_mem globals stmt state = do + let sym = state^.stateSymInterface + case stmt of + M.RepMovs val_size (AtomWrapper dest) (AtomWrapper src) count dir -> + withConcreteCountAndDir state val_size count dir $ \acc_state offset -> do + let mem_repr = M.repValSizeMemRepr val_size + curr_dest_ptr <- ptrAdd sym knownNat (regValue dest) offset + curr_src_ptr <- ptrAdd sym knownNat (regValue src) offset + (val, after_read_state) <- doReadMem + acc_state + global_var_mem + globals + M.Addr64 + mem_repr + (RegEntry knownRepr curr_src_ptr) + (_, after_write_state) <- doWriteMem + after_read_state + global_var_mem + globals + M.Addr64 + mem_repr + (RegEntry knownRepr curr_dest_ptr) + (RegEntry (typeToCrucible $ M.typeRepr mem_repr) val) + return after_write_state + M.RepStos val_size (AtomWrapper dest) (AtomWrapper val) count dir -> + withConcreteCountAndDir state val_size count dir $ \acc_state offset -> do + let mem_repr = M.repValSizeMemRepr val_size + curr_dest_ptr <- ptrAdd sym knownNat (regValue dest) offset + (_, after_write_state) <- doWriteMem + acc_state + global_var_mem + globals + M.Addr64 + mem_repr + (RegEntry knownRepr curr_dest_ptr) + val + return after_write_state + _ -> error $ + "Symbolic execution semantics for x86 statement are not implemented yet: " + <> (show $ MC.ppArchStmt (liftAtomIn (pretty . regType)) stmt) termSemantics :: (IsSymInterface sym) => SymFuns sym -> M.X86TermStmt ids -> S sym rtp bs r ctx -> IO (RegValue sym UnitType, S sym rtp bs r ctx) -termSemantics fs x s = error ("Symbolic execution semantics for x86 terminators are not implemented yet: " <> - (show $ MC.prettyF x)) +termSemantics _fs x _s = error ("Symbolic execution semantics for x86 terminators are not implemented yet: " <> + (show $ MC.prettyF x)) data Sym s = Sym { symIface :: s , symTys :: IntrinsicTypes s diff --git a/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs b/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs index 6c9b5110..85234796 100644 --- a/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs +++ b/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs @@ -12,6 +12,7 @@ {-# LANGUAGE TypeApplications #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE AllowAmbiguousTypes #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} module Data.Macaw.X86.Symbolic ( x86_64MacawSymbolicFns , x86_64MacawEvalFn @@ -28,6 +29,7 @@ module Data.Macaw.X86.Symbolic import Control.Lens ((^.),(%~),(&)) import Control.Monad ( void ) +import Control.Monad.IO.Class ( liftIO ) import Data.Functor.Identity (Identity(..)) import Data.Kind import Data.Parameterized.Context as Ctx @@ -55,6 +57,7 @@ import qualified Lang.Crucible.CFG.Extension as C import qualified Lang.Crucible.CFG.Reg as C import Lang.Crucible.Simulator.RegValue (RegValue'(..)) import qualified Lang.Crucible.Types as C +import qualified Lang.Crucible.LLVM.MemModel as MM ------------------------------------------------------------------------ -- Utilities for generating a type-level context with repeated elements. @@ -264,8 +267,23 @@ x86_64MacawSymbolicFns = -- | X86_64 specific function for evaluating a Macaw X86_64 program in Crucible. -x86_64MacawEvalFn :: - C.IsSymInterface sym => SymFuns sym -> MacawArchEvalFn sym M.X86_64 -x86_64MacawEvalFn fs (X86PrimFn x) s = funcSemantics fs x s -x86_64MacawEvalFn fs (X86PrimStmt stmt) s = stmtSemantics fs stmt s -x86_64MacawEvalFn fs (X86PrimTerm term) s = termSemantics fs term s +x86_64MacawEvalFn + :: C.IsSymInterface sym + => SymFuns sym + -> C.GlobalVar MM.Mem + -> GlobalMap sym (M.ArchAddrWidth M.X86_64) + -> MacawArchEvalFn sym M.X86_64 +x86_64MacawEvalFn fs global_var_mem globals ext_stmt crux_state = + case ext_stmt of + X86PrimFn x -> funcSemantics fs x crux_state + X86PrimStmt stmt -> stmtSemantics fs global_var_mem globals stmt crux_state + X86PrimTerm term -> termSemantics fs term crux_state + +instance ArchInfo M.X86_64 where + archVals _ = Just $ ArchVals + { archFunctions = x86_64MacawSymbolicFns + , withArchEval = \sym -> \k -> do + sfns <- liftIO $ newSymFuns sym + k $ x86_64MacawEvalFn sfns + , withArchConstraints = \x -> x + }