From 7a64cb614f8319773a2a68ff5e141dc91a70cc67 Mon Sep 17 00:00:00 2001 From: Kevin Quick Date: Tue, 20 Nov 2018 09:43:48 +0000 Subject: [PATCH 1/8] Explicit NoStarIsType with Data.Kind.Type and increasing do indentation (for GHC 8.6) --- base/macaw-base.cabal | 6 +- base/src/Data/Macaw/AbsDomain/AbsState.hs | 3 +- base/src/Data/Macaw/Analysis/FunctionArgs.hs | 5 +- base/src/Data/Macaw/CFG/App.hs | 3 +- base/src/Data/Macaw/CFG/AssignRhs.hs | 18 ++- base/src/Data/Macaw/CFG/Block.hs | 2 + base/src/Data/Macaw/CFG/Core.hs | 17 +- base/src/Data/Macaw/CFG/Rewriter.hs | 12 +- base/src/Data/Macaw/Discovery.hs | 154 +++++++++---------- base/src/Data/Macaw/Memory.hs | 2 + base/src/Data/Macaw/Memory/ElfLoader.hs | 29 ++-- base/src/Data/Macaw/Types.hs | 3 +- 12 files changed, 139 insertions(+), 115 deletions(-) diff --git a/base/macaw-base.cabal b/base/macaw-base.cabal index 4c989ef0..69e84f78 100644 --- a/base/macaw-base.cabal +++ b/base/macaw-base.cabal @@ -3,7 +3,7 @@ version: 0.3.2 author: Galois, Inc. maintainer: jhendrix@galois.com build-type: Simple -cabal-version: >= 1.9.2 +cabal-version: >= 1.10 license: BSD3 license-file: LICENSE description: @@ -80,3 +80,7 @@ library ghc-options: -Wall ghc-options: -fno-warn-unticked-promoted-constructors ghc-prof-options: -O2 -fprof-auto-top + default-language: Haskell2010 + + if impl(ghc >= 8.6) + default-extensions: NoStarIsType diff --git a/base/src/Data/Macaw/AbsDomain/AbsState.hs b/base/src/Data/Macaw/AbsDomain/AbsState.hs index 2387d5bc..dd1cda9a 100644 --- a/base/src/Data/Macaw/AbsDomain/AbsState.hs +++ b/base/src/Data/Macaw/AbsDomain/AbsState.hs @@ -66,6 +66,7 @@ import Data.Bits import Data.Foldable import Data.Functor import Data.Int +import qualified Data.Kind as Kind import Data.Map (Map) import qualified Data.Map.Strict as Map import Data.Maybe @@ -1316,7 +1317,7 @@ transferApp r a = do _ -> TopV -- | Minimal information needed to parse a function call/system call -data CallParams (r :: Type -> *) +data CallParams (r :: Type -> Kind.Type) = CallParams { postCallStackDelta :: Integer -- ^ Amount stack should shift by when going before/after call. , preserveReg :: forall tp . r tp -> Bool diff --git a/base/src/Data/Macaw/Analysis/FunctionArgs.hs b/base/src/Data/Macaw/Analysis/FunctionArgs.hs index b0b4aa59..1ee18afd 100644 --- a/base/src/Data/Macaw/Analysis/FunctionArgs.hs +++ b/base/src/Data/Macaw/Analysis/FunctionArgs.hs @@ -34,6 +34,7 @@ module Data.Macaw.Analysis.FunctionArgs import Control.Lens import Control.Monad.State.Strict import Data.Foldable as Fold (traverse_) +import qualified Data.Kind as Kind import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.Maybe @@ -81,13 +82,13 @@ import Data.Macaw.Types -- demanded? -- | A set of registrs -type RegisterSet (r :: Type -> *) = Set (Some r) +type RegisterSet (r :: Type -> Kind.Type) = Set (Some r) -- | A memory segment offset compatible with the architecture registers. type RegSegmentOff r = MemSegmentOff (RegAddrWidth r) -- | This stores the registers needed by a specific address -data DemandSet (r :: Type -> *) = +data DemandSet (r :: Type -> Kind.Type) = DemandSet { registerDemands :: !(RegisterSet r) -- | This maps a function address to the registers -- that it needs. diff --git a/base/src/Data/Macaw/CFG/App.hs b/base/src/Data/Macaw/CFG/App.hs index 2e74aab4..af0a9e16 100644 --- a/base/src/Data/Macaw/CFG/App.hs +++ b/base/src/Data/Macaw/CFG/App.hs @@ -21,6 +21,7 @@ module Data.Macaw.CFG.App , ppAppA ) where +import qualified Data.Kind as Kind import Control.Monad.Identity import Data.Parameterized.Classes import qualified Data.Parameterized.List as P @@ -40,7 +41,7 @@ import Data.Macaw.Utils.Pretty -- These operations are all total functions. Different architecture tend to have -- different ways of raising signals or exceptions, and so partial functions are -- all architecture specific. -data App (f :: Type -> *) (tp :: Type) where +data App (f :: Type -> Kind.Type) (tp :: Type) where -- Compare for equality. Eq :: !(f tp) -> !(f tp) -> App f BoolType diff --git a/base/src/Data/Macaw/CFG/AssignRhs.hs b/base/src/Data/Macaw/CFG/AssignRhs.hs index 358fa29b..51598363 100644 --- a/base/src/Data/Macaw/CFG/AssignRhs.hs +++ b/base/src/Data/Macaw/CFG/AssignRhs.hs @@ -6,6 +6,8 @@ {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeFamilyDependencies #-} {-# LANGUAGE TypeOperators #-} +{-# LANGUAGE UndecidableInstances #-} + module Data.Macaw.CFG.AssignRhs ( AssignRhs(..) -- * MemRepr @@ -25,6 +27,7 @@ module Data.Macaw.CFG.AssignRhs , ArchMemAddr ) where +import qualified Data.Kind as Kind import Data.Macaw.CFG.App import Data.Macaw.Memory (Endianness(..), MemSegmentOff, MemWord, MemAddr) import Data.Macaw.Types @@ -36,8 +39,11 @@ import Data.Parameterized.TraversableFC (FoldableFC(..)) import Data.Proxy import Text.PrettyPrint.ANSI.Leijen hiding ((<$>), (<>)) +import Prelude + + -- | Width of register used to store addresses. -type family RegAddrWidth (r :: Type -> *) :: Nat +type family RegAddrWidth (r :: Type -> Kind.Type) :: Nat -- | A word for the given architecture register type. type RegAddrWord r = MemWord (RegAddrWidth r) @@ -46,7 +52,7 @@ type RegAddrWord r = MemWord (RegAddrWidth r) -- -- Registers include things like the general purpose registers, any flag -- registers that can be read and written without side effects, -type family ArchReg (arch :: *) = (reg :: Type -> *) | reg -> arch +type family ArchReg (arch :: Kind.Type) = (reg :: Type -> Kind.Type) | reg -> arch -- Note the injectivity constraint. This makes GHC quit bothering us -- about ambigous types for functions taking ArchRegs as arguments. @@ -57,13 +63,13 @@ type family ArchReg (arch :: *) = (reg :: Type -> *) | reg -> arch -- -- The function may depend on the set of registers defined so far, and the type -- of the result. -type family ArchFn (arch :: *) = (fn :: (Type -> *) -> Type -> *) | fn -> arch +type family ArchFn (arch :: Kind.Type) = (fn :: (Type -> Kind.Type) -> Type -> Kind.Type) | fn -> arch -- | A type family for defining architecture-specific statements. -- -- The second parameter is used to denote the underlying values in the -- statements so that we can use ArchStmts with multiple CFGs. -type family ArchStmt (arch :: *) = (stmt :: (Type -> *) -> *) | stmt -> arch +type family ArchStmt (arch :: Kind.Type) = (stmt :: (Type -> Kind.Type) -> Kind.Type) | stmt -> arch -- | A type family for defining architecture-specific statements that -- may have instruction-specific effects on control-flow and register state. @@ -75,7 +81,7 @@ type family ArchStmt (arch :: *) = (stmt :: (Type -> *) -> *) | stmt -> arch -- values, it may or may not return to the current function. If it does return to the -- current function, it is assumed to be at most one location, and the block-translator -- must provide that value at translation time. -type family ArchTermStmt (arch :: *) :: * -> * +type family ArchTermStmt (arch :: Kind.Type) :: Kind.Type -> Kind.Type -- NOTE: Not injective because PPC32 and PPC64 use the same type. -- | Number of bits in addreses for architecture. @@ -134,7 +140,7 @@ instance HasRepr MemRepr TypeRepr where -- | The right hand side of an assignment is an expression that -- returns a value. -data AssignRhs (arch :: *) (f :: Type -> *) tp where +data AssignRhs (arch :: Kind.Type) (f :: Type -> Kind.Type) tp where -- | An expression that is computed from evaluating subexpressions. EvalApp :: !(App f tp) -> AssignRhs arch f tp diff --git a/base/src/Data/Macaw/CFG/Block.hs b/base/src/Data/Macaw/CFG/Block.hs index 1f4957f3..978023af 100644 --- a/base/src/Data/Macaw/CFG/Block.hs +++ b/base/src/Data/Macaw/CFG/Block.hs @@ -6,6 +6,8 @@ This exports the pre-classification term statement and block data types. -} {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE UndecidableInstances #-} + module Data.Macaw.CFG.Block ( Block(..) , ppBlock diff --git a/base/src/Data/Macaw/CFG/Core.hs b/base/src/Data/Macaw/CFG/Core.hs index 8df22ec3..f9f5703e 100644 --- a/base/src/Data/Macaw/CFG/Core.hs +++ b/base/src/Data/Macaw/CFG/Core.hs @@ -7,6 +7,7 @@ Defines data types needed to represent values, assignments, and statements from This is a low-level CFG representation where the entire program is a single CFG. -} +{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleContexts #-} @@ -21,7 +22,8 @@ single CFG. {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeOperators #-} -{-# LANGUAGE AllowAmbiguousTypes #-} +{-# LANGUAGE UndecidableInstances #-} + module Data.Macaw.CFG.Core ( -- * Stmt level declarations Stmt(..) @@ -84,6 +86,7 @@ import Control.Monad.Identity import Control.Monad.State.Strict import Data.Bits import Data.Int (Int64) +import qualified Data.Kind as Kind import Data.Maybe (isNothing, catMaybes) import Data.Parameterized.Classes import Data.Parameterized.Map (MapF) @@ -125,7 +128,7 @@ class PrettyPrec v where prettyPrec :: Int -> v -> Doc -- | Pretty print over all instances of a type. -class PrettyF (f :: k -> *) where +class PrettyF (f :: k -> Kind.Type) where prettyF :: f tp -> Doc -- | Pretty print a document with parens if condition is true @@ -153,7 +156,7 @@ addrWidthTypeRepr Addr64 = BVTypeRepr knownNat -- form. 'AssignId's are typed, and also include a type variable @ids@ -- that intuitively denotes the set of identifiers from which they are -- drawn. -newtype AssignId (ids :: *) (tp :: Type) = AssignId (Nonce ids tp) +newtype AssignId (ids :: Kind.Type) (tp :: Type) = AssignId (Nonce ids tp) ppAssignId :: AssignId ids tp -> Doc ppAssignId (AssignId w) = text ("r" ++ show (indexValue w)) @@ -392,7 +395,7 @@ class IPAlignment arch where -- RegState -- | This represents the state of the processor registers. -newtype RegState (r :: k -> *) (f :: k -> *) = RegState (MapF.MapF r f) +newtype RegState (r :: k -> Kind.Type) (f :: k -> Kind.Type) = RegState (MapF.MapF r f) deriving instance (OrdF r, EqF f) => Eq (RegState r f) @@ -539,7 +542,7 @@ instance RegisterInfo (ArchReg arch) => Show (Value arch ids tp) where show = show . pretty -- | Typeclass for architecture-specific functions -class IsArchFn (f :: (Type -> *) -> Type -> *) where +class IsArchFn (f :: (Type -> Kind.Type) -> Type -> Kind.Type) where -- | A function for pretty printing an archFn of a given type. ppArchFn :: Applicative m => (forall u . v u -> m Doc) @@ -548,7 +551,7 @@ class IsArchFn (f :: (Type -> *) -> Type -> *) where -> m Doc -- | Typeclass for architecture-specific statements -class IsArchStmt (f :: (Type -> *) -> *) where +class IsArchStmt (f :: (Type -> Kind.Type) -> Kind.Type) where -- | A function for pretty printing an architecture statement of a given type. ppArchStmt :: (forall u . v u -> Doc) -- ^ Function for pretty printing value. @@ -642,7 +645,7 @@ ppValueAssignmentList vals = -- | This class provides a way of optionally pretty printing the contents -- of a register or omitting them. -class PrettyRegValue r (f :: Type -> *) where +class PrettyRegValue r (f :: Type -> Kind.Type) where -- | ppValueEq should return a doc if the contents of the given register -- should be printed, and Nothing if the contents should be ignored. ppValueEq :: r tp -> f tp -> Maybe Doc diff --git a/base/src/Data/Macaw/CFG/Rewriter.hs b/base/src/Data/Macaw/CFG/Rewriter.hs index a5e436eb..ae1a7f56 100644 --- a/base/src/Data/Macaw/CFG/Rewriter.hs +++ b/base/src/Data/Macaw/CFG/Rewriter.hs @@ -125,7 +125,7 @@ appendRewrittenStmt stmt = Rewriter $ do stmts <- use rwRevStmts let stmts' = stmt : stmts seq stmt $ seq stmts' $ do - rwRevStmts .= stmts' + rwRevStmts .= stmts' -- | Add a statment to the list appendRewrittenArchStmt :: ArchStmt arch (Value arch tgt) -> Rewriter arch s src tgt () @@ -151,10 +151,10 @@ addBinding :: AssignId src tp -> Value arch tgt tp -> Rewriter arch s src tgt () addBinding srcId val = Rewriter $ do ref <- gets $ rwctxCache . rwContext lift $ do - m <- readSTRef ref - when (MapF.member srcId m) $ do - fail $ "Assignment " ++ show srcId ++ " is already bound." - writeSTRef ref $! MapF.insert srcId val m + m <- readSTRef ref + when (MapF.member srcId m) $ do + fail $ "Assignment " ++ show srcId ++ " is already bound." + writeSTRef ref $! MapF.insert srcId val m -- | Return true if values are identical identValue :: TestEquality (ArchReg arch) => Value arch tgt tp -> Value arch tgt tp -> Bool @@ -171,7 +171,7 @@ rewriteApp :: App (Value arch tgt) tp -> Rewriter arch s src tgt (Value arch tgt rewriteApp app = do ctx <- Rewriter $ gets rwContext rwctxConstraints ctx $ do - case app of + case app of Trunc (BVValue _ x) w -> do pure $ BVValue w $ toUnsigned w x diff --git a/base/src/Data/Macaw/Discovery.hs b/base/src/Data/Macaw/Discovery.hs index 9c405a47..f55f7ce2 100644 --- a/base/src/Data/Macaw/Discovery.hs +++ b/base/src/Data/Macaw/Discovery.hs @@ -415,13 +415,13 @@ mergeIntraJump :: ArchSegmentOff arch mergeIntraJump src ab tgt = do info <- uses curFunCtx archInfo withArchConstraints info $ do - when (not (absStackHasReturnAddr ab)) $ do + when (not (absStackHasReturnAddr ab)) $ do debug DCFG ("WARNING: Missing return value in jump from " ++ show src ++ " to\n" ++ show ab) $ pure () - let rsn = NextIP src - -- Associate a new abstract state with the code region. - foundMap <- use foundAddrs - case Map.lookup tgt foundMap of + let rsn = NextIP src + -- Associate a new abstract state with the code region. + foundMap <- use foundAddrs + case Map.lookup tgt foundMap of -- We have seen this block before, so need to join and see if -- the results is changed. Just old_info -> do @@ -764,8 +764,8 @@ recordWriteStmt arch_info mem regs stmt = do WriteMem _addr repr v | Just Refl <- testEquality repr (addrMemRepr arch_info) -> do withArchConstraints arch_info $ do - let addrs = identifyConcreteAddresses mem (transferValue regs v) - writtenCodeAddrs %= (filter isExecutableSegOff addrs ++) + let addrs = identifyConcreteAddresses mem (transferValue regs v) + writtenCodeAddrs %= (filter isExecutableSegOff addrs ++) _ -> return () ------------------------------------------------------------------------ @@ -840,10 +840,10 @@ parseFetchAndExecute ctx idx stmts regs s = do let ainfo= pctxArchInfo ctx let absProcState' = absEvalStmts ainfo regs stmts withArchConstraints ainfo $ do - -- See if next statement appears to end with a call. - -- We define calls as statements that end with a write that - -- stores the pc to an address. - case () of + -- See if next statement appears to end with a call. + -- We define calls as statements that end with a write that + -- stores the pc to an address. + case () of _ | Just (Mux _ c t f) <- valueAsApp (s^.boundValue ip_reg) -> do mapM_ (recordWriteStmt ainfo mem absProcState') stmts @@ -876,19 +876,19 @@ parseFetchAndExecute ctx idx stmts regs s = do mapM_ (recordWriteStmt ainfo mem absProcState') prev_stmts let abst = finalAbsBlockState absProcState' s seq abst $ do - -- Merge caller return information - intraJumpTargets %= ((ret, postCallAbsState ainfo abst ret):) - -- Use the abstract domain to look for new code pointers for the current IP. - addNewFunctionAddrs $ - identifyCallTargets mem abst s - -- Use the call-specific code to look for new IPs. + -- Merge caller return information + intraJumpTargets %= ((ret, postCallAbsState ainfo abst ret):) + -- Use the abstract domain to look for new code pointers for the current IP. + addNewFunctionAddrs $ + identifyCallTargets mem abst s + -- Use the call-specific code to look for new IPs. - let r = StatementList { stmtsIdent = idx - , stmtsNonterm = toList prev_stmts - , stmtsTerm = ParsedCall s (Just ret) - , stmtsAbsState = absProcState' - } - pure (r, idx+1) + let r = StatementList { stmtsIdent = idx + , stmtsNonterm = toList prev_stmts + , stmtsTerm = ParsedCall s (Just ret) + , stmtsAbsState = absProcState' + } + pure (r, idx+1) -- This block ends with a return as identified by the -- architecture-specific processing. Basic return @@ -993,16 +993,16 @@ parseFetchAndExecute ctx idx stmts regs s = do let abst = finalAbsBlockState absProcState' s seq abst $ do - -- Look for new instruction pointers - addNewFunctionAddrs $ - identifyConcreteAddresses mem (abst^.absRegState^.curIP) + -- Look for new instruction pointers + addNewFunctionAddrs $ + identifyConcreteAddresses mem (abst^.absRegState^.curIP) - let ret = StatementList { stmtsIdent = idx - , stmtsNonterm = stmts - , stmtsTerm = ParsedCall s Nothing - , stmtsAbsState = absProcState' - } - seq ret $ pure (ret,idx+1) + let ret = StatementList { stmtsIdent = idx + , stmtsNonterm = stmts + , stmtsTerm = ParsedCall s Nothing + , stmtsAbsState = absProcState' + } + seq ret $ pure (ret,idx+1) -- | this evalutes the statements in a block to expand the information known -- about control flow targets of this block. @@ -1019,7 +1019,7 @@ parseBlock ctx idx b regs = do let mem = pctxMemory ctx let ainfo = pctxArchInfo ctx withArchConstraints ainfo $ do - case blockTerm b of + case blockTerm b of Branch c lb rb -> do let blockMap = pctxBlockMap ctx -- FIXME: we should propagate c back to the initial block, not just b @@ -1136,56 +1136,56 @@ transfer addr = do s <- use curFunCtx let ainfo = archInfo s withArchConstraints ainfo $ do - mfinfo <- use $ foundAddrs . at addr - let finfo = fromMaybe (error $ "transfer called on unfound address " ++ show addr ++ ".") $ - mfinfo - let mem = memory s - nonceGen <- gets funNonceGen - prev_block_map <- use $ curFunBlocks - -- Get maximum number of bytes to disassemble - let maxSize :: Int - maxSize = - case Map.lookupGT addr prev_block_map of - Just (next,_) | Just o <- diffSegmentOff next addr -> fromInteger o - _ -> fromInteger (segoffBytesLeft addr) - let ab = foundAbstractState finfo - (bs0, sz, maybeError) <- liftST $ disassembleFn ainfo nonceGen addr maxSize ab + mfinfo <- use $ foundAddrs . at addr + let finfo = fromMaybe (error $ "transfer called on unfound address " ++ show addr ++ ".") $ + mfinfo + let mem = memory s + nonceGen <- gets funNonceGen + prev_block_map <- use $ curFunBlocks + -- Get maximum number of bytes to disassemble + let maxSize :: Int + maxSize = + case Map.lookupGT addr prev_block_map of + Just (next,_) | Just o <- diffSegmentOff next addr -> fromInteger o + _ -> fromInteger (segoffBytesLeft addr) + let ab = foundAbstractState finfo + (bs0, sz, maybeError) <- liftST $ disassembleFn ainfo nonceGen addr maxSize ab #ifdef USE_REWRITER - bs1 <- do - let archStmt = rewriteArchStmt ainfo - let secAddrMap = memSectionIndexMap mem - liftST $ do - ctx <- mkRewriteContext nonceGen (rewriteArchFn ainfo) archStmt secAddrMap - traverse (rewriteBlock ainfo ctx) bs0 + bs1 <- do + let archStmt = rewriteArchStmt ainfo + let secAddrMap = memSectionIndexMap mem + liftST $ do + ctx <- mkRewriteContext nonceGen (rewriteArchFn ainfo) archStmt secAddrMap + traverse (rewriteBlock ainfo ctx) bs0 #else - bs1 <- pure bs0 + bs1 <- pure bs0 #endif - -- If no blocks are returned, then we just add an empty parsed block. - if null bs1 then do - let errMsg = Text.pack $ fromMaybe "Unknown error" maybeError - let stmts = StatementList - { stmtsIdent = 0 - , stmtsNonterm = [] - , stmtsTerm = ParsedTranslateError errMsg - , stmtsAbsState = initAbsProcessorState mem (foundAbstractState finfo) - } - let pb = ParsedBlock { pblockAddr = addr - , blockSize = sz - , blockReason = foundReason finfo - , blockAbstractState = foundAbstractState finfo - , blockStatementList = stmts - } - id %= addFunBlock addr pb - else do - -- Rewrite returned blocks to simplify expressions + -- If no blocks are returned, then we just add an empty parsed block. + if null bs1 then do + let errMsg = Text.pack $ fromMaybe "Unknown error" maybeError + let stmts = StatementList + { stmtsIdent = 0 + , stmtsNonterm = [] + , stmtsTerm = ParsedTranslateError errMsg + , stmtsAbsState = initAbsProcessorState mem (foundAbstractState finfo) + } + let pb = ParsedBlock { pblockAddr = addr + , blockSize = sz + , blockReason = foundReason finfo + , blockAbstractState = foundAbstractState finfo + , blockStatementList = stmts + } + id %= addFunBlock addr pb + else do + -- Rewrite returned blocks to simplify expressions - -- Compute demand set - let bs = bs1 -- eliminateDeadStmts ainfo bs1 - -- Call transfer blocks to calculate parsedblocks - let blockMap = Map.fromList [ (blockLabel b, b) | b <- bs ] - addBlocks addr finfo sz blockMap + -- Compute demand set + let bs = bs1 -- eliminateDeadStmts ainfo bs1 + -- Call transfer blocks to calculate parsedblocks + let blockMap = Map.fromList [ (blockLabel b, b) | b <- bs ] + addBlocks addr finfo sz blockMap ------------------------------------------------------------------------ -- Main loop diff --git a/base/src/Data/Macaw/Memory.hs b/base/src/Data/Macaw/Memory.hs index e1f8734a..89de8240 100644 --- a/base/src/Data/Macaw/Memory.hs +++ b/base/src/Data/Macaw/Memory.hs @@ -156,6 +156,8 @@ import Data.Parameterized.NatRepr import qualified Data.Macaw.Memory.Permissions as Perm +import Prelude + ------------------------------------------------------------------------ -- AddrWidthRepr diff --git a/base/src/Data/Macaw/Memory/ElfLoader.hs b/base/src/Data/Macaw/Memory/ElfLoader.hs index d51b8bb7..18fc6546 100644 --- a/base/src/Data/Macaw/Memory/ElfLoader.hs +++ b/base/src/Data/Macaw/Memory/ElfLoader.hs @@ -72,7 +72,7 @@ import qualified Data.IntervalMap.Strict as IMap import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.Maybe -import Data.Monoid +import Data.Semigroup import Data.Set (Set) import qualified Data.Set as Set import qualified Data.Vector as V @@ -85,6 +85,9 @@ import Data.Macaw.Memory.LoadCommon import qualified Data.Macaw.Memory.Permissions as Perm import Data.Macaw.Memory.Symbols +import Prelude + + -- | Return a subrange of a bytestring. sliceL :: Integral w => Elf.Range w -> L.ByteString -> L.ByteString sliceL (i,c) = L.take (fromIntegral c) . L.drop (fromIntegral i) @@ -888,8 +891,8 @@ insertElfSegment :: RegionIndex -> MemLoader w () insertElfSegment regIdx addrOff shdrMap contents relocMap phdr = do w <- uses mlsMemory memAddrWidth - reprConstraints w $ do - when (Elf.phdrMemSize phdr > 0) $ do + reprConstraints w $ + when (Elf.phdrMemSize phdr > 0) $ do let segIdx = Elf.phdrSegmentIndex phdr seg <- do let linkBaseOff = fromIntegral (Elf.phdrSegmentVirtAddr phdr) @@ -932,21 +935,21 @@ memoryForElfSegments regIndex addrOff e = do let hdr = Elf.elfLayoutHeader l let w = elfAddrWidth (elfClass e) reprConstraints w $ do - let ph = Elf.allPhdrs l - let contents = elfLayoutBytes l - -- Create relocation map - relocMap <- dynamicRelocationMap hdr ph contents + let ph = Elf.allPhdrs l + let contents = elfLayoutBytes l + -- Create relocation map + relocMap <- dynamicRelocationMap hdr ph contents - let intervals :: ElfFileSectionMap (ElfWordType w) - intervals = IMap.fromList + let intervals :: ElfFileSectionMap (ElfWordType w) + intervals = IMap.fromList [ (IntervalCO start end, sec) | shdr <- Map.elems (l ^. Elf.shdrs) , let start = shdr^._3 , let sec = shdr^._1 , let end = start + elfSectionFileSize sec ] - mapM_ (insertElfSegment regIndex addrOff intervals contents relocMap) - (filter (\p -> Elf.phdrSegmentType p == Elf.PT_LOAD) ph) + mapM_ (insertElfSegment regIndex addrOff intervals contents relocMap) + (filter (\p -> Elf.phdrSegmentType p == Elf.PT_LOAD) ph) ------------------------------------------------------------------------ -- Elf section loading @@ -1008,8 +1011,8 @@ insertAllocatedSection :: Elf.ElfHeader w insertAllocatedSection hdr symtab sectionMap regIdx nm = do w <- uses mlsMemory memAddrWidth reprConstraints w $ do - msec <- findSection sectionMap nm - case msec of + msec <- findSection sectionMap nm + case msec of Nothing -> pure () Just sec -> do mRelBuffer <- fmap (fmap (L.fromStrict . elfSectionData)) $ diff --git a/base/src/Data/Macaw/Types.hs b/base/src/Data/Macaw/Types.hs index 0fc07a44..cb8d45cc 100644 --- a/base/src/Data/Macaw/Types.hs +++ b/base/src/Data/Macaw/Types.hs @@ -29,6 +29,7 @@ module Data.Macaw.Types , Data.Parameterized.NatRepr.knownNat ) where +import qualified Data.Kind as Kind import Data.Parameterized.Classes import qualified Data.Parameterized.List as P import Data.Parameterized.NatRepr @@ -251,7 +252,7 @@ instance OrdF FloatInfoRepr where -- | A multi-parameter type class that allows one to represent that a -- parameterized type value has some representative type such as a TypeRepr. -class HasRepr (f :: k -> *) (v :: k -> *) | f -> v where +class HasRepr (f :: k -> Kind.Type) (v :: k -> Kind.Type) | f -> v where typeRepr :: f tp -> v tp typeWidth :: HasRepr f TypeRepr => f (BVType w) -> NatRepr w From 1d7cdc87ebdb8b8b1effcb3ba60e572f7148051f Mon Sep 17 00:00:00 2001 From: Kevin Quick Date: Wed, 21 Nov 2018 00:08:33 +0000 Subject: [PATCH 2/8] Implement NoStarIsType and MonadFail for GHC 8.6. --- x86/macaw-x86.cabal | 2 ++ x86/src/Data/Macaw/X86/ArchTypes.hs | 3 ++- x86/src/Data/Macaw/X86/Generator.hs | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/x86/macaw-x86.cabal b/x86/macaw-x86.cabal index a4904a7c..186f0993 100644 --- a/x86/macaw-x86.cabal +++ b/x86/macaw-x86.cabal @@ -46,6 +46,8 @@ library ghc-options: -Wall ghc-options: -fno-warn-unticked-promoted-constructors ghc-prof-options: -O2 -fprof-auto-top + if impl(ghc >= 8.6) + default-extensions: NoStarIsType test-suite macaw-x86-tests type: exitcode-stdio-1.0 diff --git a/x86/src/Data/Macaw/X86/ArchTypes.hs b/x86/src/Data/Macaw/X86/ArchTypes.hs index 8889687e..81b00367 100644 --- a/x86/src/Data/Macaw/X86/ArchTypes.hs +++ b/x86/src/Data/Macaw/X86/ArchTypes.hs @@ -38,6 +38,7 @@ module Data.Macaw.X86.ArchTypes ) where import Data.Bits +import qualified Data.Kind as Kind import Data.Word(Word8) import Data.Macaw.CFG import Data.Macaw.CFG.Rewriter @@ -877,7 +878,7 @@ x86PrimFnHasSideEffects f = -- X86Stmt -- | An X86 specific statement. -data X86Stmt (v :: Type -> *) where +data X86Stmt (v :: Type -> Kind.Type) where WriteLoc :: !(X86PrimLoc tp) -> !(v tp) -> X86Stmt v -- | Store the X87 control register in the given address. diff --git a/x86/src/Data/Macaw/X86/Generator.hs b/x86/src/Data/Macaw/X86/Generator.hs index 80337ac5..109f3a65 100644 --- a/x86/src/Data/Macaw/X86/Generator.hs +++ b/x86/src/Data/Macaw/X86/Generator.hs @@ -61,6 +61,7 @@ module Data.Macaw.X86.Generator import Control.Lens import Control.Monad.Cont import Control.Monad.Except +import Control.Monad.Fail import Control.Monad.Reader import Control.Monad.ST import Control.Monad.State.Strict @@ -267,6 +268,8 @@ instance Monad (X86Generator st_s ids) where return v = seq v $ X86G $ return v (X86G m) >>= h = X86G $ m >>= \v -> seq v (unX86G (h v)) X86G m >> X86G n = X86G $ m >> n + +instance MonadFail (X86Generator st_s ids) where fail msg = seq t $ X86G $ ContT $ \_ -> throwError t where t = Text.pack msg From 3f77e763e916be6ce1540425a0ff6eb2ed74872b Mon Sep 17 00:00:00 2001 From: Kevin Quick Date: Wed, 21 Nov 2018 18:27:42 +0000 Subject: [PATCH 3/8] Implement NoStarIsType for GHC 8.6. --- x86_symbolic/macaw-x86-symbolic.cabal | 5 ++++- x86_symbolic/src/Data/Macaw/X86/Crucible.hs | 9 +++++---- x86_symbolic/src/Data/Macaw/X86/Symbolic.hs | 3 ++- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/x86_symbolic/macaw-x86-symbolic.cabal b/x86_symbolic/macaw-x86-symbolic.cabal index d2a4a991..467273f5 100644 --- a/x86_symbolic/macaw-x86-symbolic.cabal +++ b/x86_symbolic/macaw-x86-symbolic.cabal @@ -3,7 +3,7 @@ version: 0.0.1 author: Galois, Inc. maintainer: jhendrix@galois.com build-type: Simple -cabal-version: >= 1.9.2 +cabal-version: >= 1.10 license: BSD3 license-file: LICENSE @@ -20,6 +20,7 @@ library parameterized-utils, what4 >= 0.4 hs-source-dirs: src + default-language: Haskell2010 exposed-modules: Data.Macaw.X86.Symbolic @@ -27,6 +28,8 @@ library ghc-options: -Wall ghc-prof-options: -O2 -fprof-auto-top + if impl(ghc >= 8.6) + default-extensions: NoStarIsType test-suite macaw-x86-symbolic-tests type: exitcode-stdio-1.0 diff --git a/x86_symbolic/src/Data/Macaw/X86/Crucible.hs b/x86_symbolic/src/Data/Macaw/X86/Crucible.hs index dfdb3bca..a0bc13ce 100644 --- a/x86_symbolic/src/Data/Macaw/X86/Crucible.hs +++ b/x86_symbolic/src/Data/Macaw/X86/Crucible.hs @@ -34,9 +34,10 @@ module Data.Macaw.X86.Crucible import Control.Lens ((^.)) import Data.Bits hiding (xor) +import Data.Kind ( Type ) import Data.Parameterized.Context.Unsafe (empty,extend) -import Data.Parameterized.Utils.Endian (Endian(..)) import Data.Parameterized.NatRepr +import Data.Parameterized.Utils.Endian (Endian(..)) import qualified Data.Parameterized.Vector as PV import Data.Word (Word8) import GHC.TypeLits (KnownNat) @@ -156,7 +157,7 @@ pureSem sym fn = do M.RepnzScas{} -> error "RepnzScas" M.X86IDiv {} -> error "X86IDiv" M.X86IRem {} -> error "X86IRem" - M.X86Div {} -> error "X86Div" + M.X86Div w n d -> error "X86Div" M.X86Rem {} -> error "X86Rem" M.X87_Extend{} -> error "X87_Extend" M.X87_FAdd{} -> error "X87_FAdd" @@ -602,7 +603,7 @@ evalApp x = C.evalApp (symIface x) (symTys x) logger evalExt (evalE x) evalExt :: fun -> EmptyExprExtension f a -> IO (RegValue sym a) evalExt _ y = case y of {} -data E :: * -> CrucibleType -> * where +data E :: Type -> CrucibleType -> Type where ValBool :: RegValue sym BoolType -> E sym BoolType ValBV :: (1 <= w) => NatRepr w -> RegValue sym (BVType w) -> E sym (BVType w) Expr :: App () (E sym) t -> E sym t @@ -682,7 +683,7 @@ n128 = knownNat -------------------------------------------------------------------------------- -newtype AtomWrapper (f :: CrucibleType -> *) (tp :: M.Type) +newtype AtomWrapper (f :: CrucibleType -> Type) (tp :: M.Type) = AtomWrapper (f (ToCrucibleType tp)) liftAtomMap :: (forall s. f s -> g s) -> AtomWrapper f t -> AtomWrapper g t diff --git a/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs b/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs index 68f3b373..6c9b5110 100644 --- a/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs +++ b/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs @@ -29,6 +29,7 @@ module Data.Macaw.X86.Symbolic import Control.Lens ((^.),(%~),(&)) import Control.Monad ( void ) import Data.Functor.Identity (Identity(..)) +import Data.Kind import Data.Parameterized.Context as Ctx import Data.Parameterized.Map as MapF import Data.Parameterized.TraversableF @@ -185,7 +186,7 @@ freshX86Reg sym r = -- | We currently make a type like this, we could instead a generic -- X86PrimFn function -data X86StmtExtension (f :: C.CrucibleType -> *) (ctp :: C.CrucibleType) where +data X86StmtExtension (f :: C.CrucibleType -> Type) (ctp :: C.CrucibleType) where -- | To reduce clutter, but potentially increase clutter, we just make every -- Macaw X86PrimFn a Macaw-Crucible statement extension. X86PrimFn :: !(M.X86PrimFn (AtomWrapper f) t) -> From 0ee3f7df2d1a73201fea5441a1836ee9bfbb840b Mon Sep 17 00:00:00 2001 From: Kevin Quick Date: Sun, 25 Nov 2018 22:00:19 -0800 Subject: [PATCH 4/8] [x86_symbolic] more info for unimplemented statement and termstmt semantics. --- x86_symbolic/macaw-x86-symbolic.cabal | 1 + x86_symbolic/src/Data/Macaw/X86/Crucible.hs | 12 +++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/x86_symbolic/macaw-x86-symbolic.cabal b/x86_symbolic/macaw-x86-symbolic.cabal index d2a4a991..e1775cf6 100644 --- a/x86_symbolic/macaw-x86-symbolic.cabal +++ b/x86_symbolic/macaw-x86-symbolic.cabal @@ -9,6 +9,7 @@ license-file: LICENSE library build-depends: base >= 4, + ansi-wl-pprint, crucible >= 0.4, crucible-llvm, flexdis86 >= 0.1.2, diff --git a/x86_symbolic/src/Data/Macaw/X86/Crucible.hs b/x86_symbolic/src/Data/Macaw/X86/Crucible.hs index dfdb3bca..fe1ccc1e 100644 --- a/x86_symbolic/src/Data/Macaw/X86/Crucible.hs +++ b/x86_symbolic/src/Data/Macaw/X86/Crucible.hs @@ -35,11 +35,13 @@ module Data.Macaw.X86.Crucible import Control.Lens ((^.)) import Data.Bits hiding (xor) import Data.Parameterized.Context.Unsafe (empty,extend) -import Data.Parameterized.Utils.Endian (Endian(..)) import Data.Parameterized.NatRepr +import Data.Parameterized.Utils.Endian (Endian(..)) import qualified Data.Parameterized.Vector as PV +import Data.Semigroup import Data.Word (Word8) import GHC.TypeLits (KnownNat) +import Text.PrettyPrint.ANSI.Leijen hiding ( (<$>), (<>), empty ) import What4.Interface hiding (IsExpr) import What4.InterpretedFloatingPoint @@ -65,6 +67,8 @@ import Data.Macaw.Symbolic import qualified Data.Macaw.X86 as M import qualified Data.Macaw.X86.ArchTypes as M +import Prelude + type S sym rtp bs r ctx = CrucibleState (MacawSimulatorState sym) sym (MacawExt M.X86_64) rtp bs r ctx @@ -86,14 +90,16 @@ stmtSemantics :: (IsSymInterface sym) -> M.X86Stmt (AtomWrapper (RegEntry sym)) -> S sym rtp bs r ctx -> IO (RegValue sym UnitType, S sym rtp bs r ctx) -stmtSemantics = error "Symbolic-execution time semantics for x86 statements are not implemented yet" +stmtSemantics fs x s = error ("Symbolic execution semantics for x86 statements are not implemented yet: " <> + (show $ MC.ppArchStmt (liftAtomIn (pretty . regType)) x)) termSemantics :: (IsSymInterface sym) => SymFuns sym -> M.X86TermStmt ids -> S sym rtp bs r ctx -> IO (RegValue sym UnitType, S sym rtp bs r ctx) -termSemantics = error "Symbolic-execution time semantics for x86 terminators are not implemented yet" +termSemantics fs x s = error ("Symbolic execution semantics for x86 terminators are not implemented yet: " <> + (show $ MC.prettyF x)) data Sym s = Sym { symIface :: s , symTys :: IntrinsicTypes s From 01b8175e7f38c53e9d3e09cbec767add053e3f07 Mon Sep 17 00:00:00 2001 From: Kevin Quick Date: Sun, 25 Nov 2018 22:01:40 -0800 Subject: [PATCH 5/8] [x86_symbolic] Update cabal specification for compliance. --- x86_symbolic/macaw-x86-symbolic.cabal | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/x86_symbolic/macaw-x86-symbolic.cabal b/x86_symbolic/macaw-x86-symbolic.cabal index e1775cf6..6de2a389 100644 --- a/x86_symbolic/macaw-x86-symbolic.cabal +++ b/x86_symbolic/macaw-x86-symbolic.cabal @@ -3,7 +3,7 @@ version: 0.0.1 author: Galois, Inc. maintainer: jhendrix@galois.com build-type: Simple -cabal-version: >= 1.9.2 +cabal-version: >= 1.10 license: BSD3 license-file: LICENSE @@ -21,6 +21,7 @@ library parameterized-utils, what4 >= 0.4 hs-source-dirs: src + default-language: Haskell2010 exposed-modules: Data.Macaw.X86.Symbolic From 3f8769a424703853e2af076cf2accb9aec114baf Mon Sep 17 00:00:00 2001 From: Kevin Quick Date: Sun, 25 Nov 2018 22:02:18 -0800 Subject: [PATCH 6/8] [x86_symbolic] add semantics for X86Div, X86Rem, X86IDiv, and X86IRem. --- x86_symbolic/src/Data/Macaw/X86/Crucible.hs | 129 +++++++++++++++++++- 1 file changed, 125 insertions(+), 4 deletions(-) diff --git a/x86_symbolic/src/Data/Macaw/X86/Crucible.hs b/x86_symbolic/src/Data/Macaw/X86/Crucible.hs index fe1ccc1e..68afc80b 100644 --- a/x86_symbolic/src/Data/Macaw/X86/Crucible.hs +++ b/x86_symbolic/src/Data/Macaw/X86/Crucible.hs @@ -160,10 +160,10 @@ pureSem sym fn = do M.RDTSC{} -> error "RDTSC" M.MemCmp{} -> error "MemCmp" M.RepnzScas{} -> error "RepnzScas" - M.X86IDiv {} -> error "X86IDiv" - M.X86IRem {} -> error "X86IRem" - M.X86Div {} -> error "X86Div" - M.X86Rem {} -> error "X86Rem" + M.X86IDiv w n d -> sDiv sym w n d + M.X86IRem w n d -> sRem sym w n d + M.X86Div w n d -> uDiv sym w n d + M.X86Rem w n d -> uRem sym w n d M.X87_Extend{} -> error "X87_Extend" M.X87_FAdd{} -> error "X87_FAdd" M.X87_FSub{} -> error "X87_FSub" @@ -387,6 +387,127 @@ shuffleB xs is = fmap lkp is (bv 0) (bvLookup xs (app $ BVTrunc n4 knownNat i))) +-------------------------------------------------------------------------------- + +-- | Performs a simple unsigned division operation. +-- +-- The x86 numerator is twice the size as the denominator. +-- +-- This function is only reponsible for the dividend (not any +-- remainder--see uRem for that), and any divide-by-zero exception was +-- already handled via an Assert. +uDiv :: ( IsSymInterface sym ) => + Sym sym + -> M.RepValSize w + -> AtomWrapper (RegEntry sym) (M.BVType (w + w)) + -> AtomWrapper (RegEntry sym) (M.BVType w) + -> IO (RegValue sym (LLVMPointerType w)) +uDiv sym repsz n d = do + let dw = M.typeWidth $ M.repValSizeMemRepr repsz + withAddLeq dw dw $ \nw -> + case testLeq n1 nw of + Just LeqProof -> + divOp sym nw n dw d BVUdiv + Nothing -> error "uDiv unable to verify numerator is >= 1 bit" + +-- | Performs a simple unsigned division operation. +-- +-- The x86 numerator is twice the size as the denominator. +-- +-- This function is only reponsible for the remainder (the dividend is +-- computed separately by uDiv), and any divide-by-zero exception was +-- already handled via an Assert. +uRem :: ( IsSymInterface sym ) => + Sym sym + -> M.RepValSize w + -> AtomWrapper (RegEntry sym) (M.BVType (w + w)) + -> AtomWrapper (RegEntry sym) (M.BVType w) + -> IO (RegValue sym (LLVMPointerType w)) +uRem sym repsz n d = do + let dw = M.typeWidth $ M.repValSizeMemRepr repsz + withAddLeq dw dw $ \nw -> + case testLeq n1 nw of + Just LeqProof -> + divOp sym nw n dw d BVUrem + Nothing -> error "uRem unable to verify numerator is >= 1 bit" + +-- | Performs a simple signed division operation. +-- +-- The x86 numerator is twice the size as the denominator. +-- +-- This function is only reponsible for the dividend (not any +-- remainder--see sRem for that), and any divide-by-zero exception was +-- already handled via an Assert. +sDiv :: ( IsSymInterface sym ) => + Sym sym + -> M.RepValSize w + -> AtomWrapper (RegEntry sym) (M.BVType (w + w)) + -> AtomWrapper (RegEntry sym) (M.BVType w) + -> IO (RegValue sym (LLVMPointerType w)) +sDiv sym repsz n d = do + let dw = M.typeWidth $ M.repValSizeMemRepr repsz + withAddLeq dw dw $ \nw -> + case testLeq n1 nw of + Just LeqProof -> + divOp sym nw n dw d BVSdiv + Nothing -> error "sDiv unable to verify numerator is >= 1 bit" + +-- | Performs a simple signed division operation. +-- +-- The x86 numerator is twice the size as the denominator. +-- +-- This function is only reponsible for the remainder (the dividend is +-- computed separately by sDiv), and any divide-by-zero exception was +-- already handled via an Assert. +sRem :: ( IsSymInterface sym ) => + Sym sym + -> M.RepValSize w + -> AtomWrapper (RegEntry sym) (M.BVType (w + w)) + -> AtomWrapper (RegEntry sym) (M.BVType w) + -> IO (RegValue sym (LLVMPointerType w)) +sRem sym repsz n d = do + let dw = M.typeWidth $ M.repValSizeMemRepr repsz + withAddLeq dw dw $ \nw -> + case testLeq n1 nw of + Just LeqProof -> + divOp sym nw n dw d BVSrem + Nothing -> error "sRem unable to verify numerator is >= 1 bit" + +-- | Common function for division and remainder computation for both +-- signed and unsigned BV expressions. +-- +-- The x86 numerator is twice the size as the denominator, so +-- zero-extend the denominator, perform the division, then truncate +-- the result. +divOp :: ( IsSymInterface sym + , 1 <= (w + w) + , w <= (w + w) + ) => + Sym sym + -> NatRepr (w + w) + -> AtomWrapper (RegEntry sym) (M.BVType (w + w)) + -> NatRepr w + -> AtomWrapper (RegEntry sym) (M.BVType w) + -> (NatRepr (w + w) -> E sym (BVType (w + w)) -> E sym (BVType (w + w)) -> App () (E sym) (BVType (w + w))) + -> IO (RegValue sym (LLVMPointerType w)) +divOp sym nw n' dw d' op = do + let symi = symIface sym + n <- getBitVal symi n' + d <- getBitVal symi d' + case testLeq n1 dw of + Just LeqProof -> + case testLeq (incNat dw) nw of + Just LeqProof -> + llvmPointer_bv symi =<< (evalE sym + -- (assertExpr (app $ BVEq dw (app $ BVLit dw 0) + (app $ BVTrunc dw nw $ app $ op nw (app $ BVZext nw dw d) n)) + -- "must not be zero" + -- ) + -- ) + Nothing -> error "divOp unable to prove numerator size > denominator size + 1" + Nothing -> error "divOp unable to prove denominator size > 1 bit" + + -------------------------------------------------------------------------------- divExact :: NatRepr n -> From 3c7e2226763fea76d135eae8be5ddc67cfe6ff1b Mon Sep 17 00:00:00 2001 From: Kevin Quick Date: Mon, 26 Nov 2018 11:26:01 -0800 Subject: [PATCH 7/8] Add missing import for previous change. --- x86_symbolic/src/Data/Macaw/X86/Crucible.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/x86_symbolic/src/Data/Macaw/X86/Crucible.hs b/x86_symbolic/src/Data/Macaw/X86/Crucible.hs index 35b84a6d..ca9fc0fc 100644 --- a/x86_symbolic/src/Data/Macaw/X86/Crucible.hs +++ b/x86_symbolic/src/Data/Macaw/X86/Crucible.hs @@ -67,6 +67,7 @@ import Data.Macaw.Symbolic.CrucGen (MacawExt) import Data.Macaw.Symbolic import qualified Data.Macaw.X86 as M import qualified Data.Macaw.X86.ArchTypes as M +import qualified Data.Macaw.CFG.Core as MC import Prelude From 3f39c614e9abe2b08a8bc0cee15918cc9b9d7ef6 Mon Sep 17 00:00:00 2001 From: Andrei Stefanescu Date: Mon, 26 Nov 2018 22:11:43 -0800 Subject: [PATCH 8/8] Add support for RepMovs and RepStos. --- symbolic/src/Data/Macaw/Symbolic.hs | 123 ++++++++++++++------ x86/src/Data/Macaw/X86/ArchTypes.hs | 42 ++++--- x86/src/Data/Macaw/X86/Semantics.hs | 23 ++-- x86_symbolic/src/Data/Macaw/X86/Crucible.hs | 97 +++++++++++++-- x86_symbolic/src/Data/Macaw/X86/Symbolic.hs | 28 ++++- 5 files changed, 234 insertions(+), 79 deletions(-) diff --git a/symbolic/src/Data/Macaw/Symbolic.hs b/symbolic/src/Data/Macaw/Symbolic.hs index d08d6f84..66b095f0 100644 --- a/symbolic/src/Data/Macaw/Symbolic.hs +++ b/symbolic/src/Data/Macaw/Symbolic.hs @@ -1,4 +1,5 @@ {-# LANGUAGE CPP #-} +{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE EmptyCase #-} {-# LANGUAGE FlexibleContexts #-} @@ -43,10 +44,15 @@ module Data.Macaw.Symbolic , Regs , freshValue , GlobalMap + -- * Symbolic architecture-specific types + , ArchBits + , ArchInfo(..) + , ArchVals(..) ) where import Control.Lens ((^.)) import Control.Monad (forM, join) +import Control.Monad.IO.Class import Control.Monad.ST (ST, RealWorld, stToIO) import Data.Foldable import Data.Map.Strict (Map) @@ -477,13 +483,17 @@ type MacawArchEvalFn sym arch = -- | This evaluates a Macaw statement extension in the simulator. -execMacawStmtExtension :: - IsSymInterface sym => - MacawArchEvalFn sym arch {- ^ Function for executing -} -> - C.GlobalVar MM.Mem -> - GlobalMap sym (M.ArchAddrWidth arch) -> - LookupFunctionHandle sym arch -> - EvalStmtFunc (MacawStmtExtension arch) (MacawSimulatorState sym) sym (MacawExt arch) +execMacawStmtExtension + :: IsSymInterface sym + => ( C.GlobalVar MM.Mem + -> GlobalMap sym (M.ArchAddrWidth arch) + -> MacawArchEvalFn sym arch + ) + {- ^ Function for executing -} + -> C.GlobalVar MM.Mem + -> GlobalMap sym (M.ArchAddrWidth arch) + -> LookupFunctionHandle sym arch + -> EvalStmtFunc (MacawStmtExtension arch) (MacawSimulatorState sym) sym (MacawExt arch) execMacawStmtExtension archStmtFn mvar globs (LFH lookupH) s0 st = case s0 of MacawReadMem w mr x -> doReadMem st mvar globs w mr x @@ -506,7 +516,7 @@ execMacawStmtExtension archStmtFn mvar globs (LFH lookupH) s0 st = (hv, st') <- doLookupFunctionHandle lookupH st mvar (C.regValue args) return (C.HandleFnVal hv, st') - MacawArchStmtExtension s -> archStmtFn s st + MacawArchStmtExtension s -> archStmtFn mvar globs s st MacawArchStateUpdate {} -> return ((), st) PtrEq w x y -> doPtrEq st mvar w x y @@ -568,37 +578,84 @@ freshValue sym str w ty = -- | Return macaw extension evaluation functions. -macawExtensions :: - IsSymInterface sym => - MacawArchEvalFn sym arch -> - C.GlobalVar MM.Mem -> - GlobalMap sym (M.ArchAddrWidth arch) -> - LookupFunctionHandle sym arch -> - C.ExtensionImpl (MacawSimulatorState sym) sym (MacawExt arch) +macawExtensions + :: IsSymInterface sym + => ( C.GlobalVar MM.Mem + -> GlobalMap sym (M.ArchAddrWidth arch) + -> MacawArchEvalFn sym arch + ) + -> C.GlobalVar MM.Mem + -> GlobalMap sym (M.ArchAddrWidth arch) + -> LookupFunctionHandle sym arch + -> C.ExtensionImpl (MacawSimulatorState sym) sym (MacawExt arch) macawExtensions f mvar globs lookupH = C.ExtensionImpl { C.extensionEval = evalMacawExprExtension , C.extensionExec = execMacawStmtExtension f mvar globs lookupH } +type ArchBits arch = + ( C.IsSyntaxExtension (MacawExt arch) + , M.ArchConstraints arch + , M.RegisterInfo (M.ArchReg arch) + , M.HasRepr (M.ArchReg arch) M.TypeRepr + , M.MemWidth (M.ArchAddrWidth arch) + , Show (M.ArchReg arch (M.BVType (M.ArchAddrWidth arch))) + , ArchInfo arch + ) + +type SymArchConstraints arch = + ( C.IsSyntaxExtension (MacawExt arch) + , M.MemWidth (M.ArchAddrWidth arch) + , M.PrettyF (M.ArchReg arch) + ) + +data ArchVals arch = ArchVals + { archFunctions :: MacawSymbolicArchFunctions arch + , withArchEval + :: forall a m sym + . (IsSymInterface sym, MonadIO m) + => sym + -> ( ( C.GlobalVar MM.Mem + -> GlobalMap sym (M.ArchAddrWidth arch) + -> MacawArchEvalFn sym arch + ) + -> m a + ) + -> m a + , withArchConstraints :: forall a . (SymArchConstraints arch => a) -> a + } + +-- | A class to capture the architecture-specific information required to +-- perform block recovery and translation into a Crucible CFG. +-- +-- For architectures that do not have a symbolic backend yet, have this function +-- return 'Nothing'. +class ArchInfo arch where + archVals :: proxy arch -> Maybe (ArchVals arch) + -- | Run the simulator over a contiguous set of code. -runCodeBlock :: forall sym arch blocks - . (C.IsSyntaxExtension (MacawExt arch), IsSymInterface sym) - => sym - -> MacawSymbolicArchFunctions arch - -- ^ Translation functions - -> MacawArchEvalFn sym arch - -> C.HandleAllocator RealWorld - -> (MM.MemImpl sym, GlobalMap sym (M.ArchAddrWidth arch)) - -> LookupFunctionHandle sym arch - -> C.CFG (MacawExt arch) blocks (EmptyCtx ::> ArchRegStruct arch) (ArchRegStruct arch) - -> Ctx.Assignment (C.RegValue' sym) (MacawCrucibleRegTypes arch) - -- ^ Register assignment - -> IO ( C.GlobalVar MM.Mem - , C.ExecResult - (MacawSimulatorState sym) - sym - (MacawExt arch) - (C.RegEntry sym (ArchRegStruct arch))) +runCodeBlock + :: forall sym arch blocks + . (C.IsSyntaxExtension (MacawExt arch), IsSymInterface sym) + => sym + -> MacawSymbolicArchFunctions arch + -- ^ Translation functions + -> ( C.GlobalVar MM.Mem + -> GlobalMap sym (M.ArchAddrWidth arch) + -> MacawArchEvalFn sym arch + ) + -> C.HandleAllocator RealWorld + -> (MM.MemImpl sym, GlobalMap sym (M.ArchAddrWidth arch)) + -> LookupFunctionHandle sym arch + -> C.CFG (MacawExt arch) blocks (EmptyCtx ::> ArchRegStruct arch) (ArchRegStruct arch) + -> Ctx.Assignment (C.RegValue' sym) (MacawCrucibleRegTypes arch) + -- ^ Register assignment + -> IO ( C.GlobalVar MM.Mem + , C.ExecResult + (MacawSimulatorState sym) + sym + (MacawExt arch) + (C.RegEntry sym (ArchRegStruct arch))) runCodeBlock sym archFns archEval halloc (initMem,globs) lookupH g regStruct = do mvar <- stToIO (MM.mkMemVar halloc) let crucRegTypes = crucArchRegTypes archFns diff --git a/x86/src/Data/Macaw/X86/ArchTypes.hs b/x86/src/Data/Macaw/X86/ArchTypes.hs index 81b00367..383c2a2d 100644 --- a/x86/src/Data/Macaw/X86/ArchTypes.hs +++ b/x86/src/Data/Macaw/X86/ArchTypes.hs @@ -33,6 +33,7 @@ module Data.Macaw.X86.ArchTypes , X86PrimLoc(..) , SIMDWidth(..) , RepValSize(..) + , SomeRepValSize(..) , repValSizeByteCount , repValSizeMemRepr ) where @@ -81,6 +82,9 @@ data RepValSize w | (w ~ 32) => DWordRepVal | (w ~ 64) => QWordRepVal +data SomeRepValSize where + SomeRepValSize :: (1 <= w) => RepValSize w -> SomeRepValSize + repValSizeMemRepr :: RepValSize w -> MemRepr (BVType w) repValSizeMemRepr v = case v of @@ -122,7 +126,7 @@ data X86PrimLoc tp | (tp ~ BVType 16) => FS -- ^ This refers to the selector of the 'FS' register. | (tp ~ BVType 16) => GS - -- ^ This refers to the se lector of the 'GS' register. + -- ^ This refers to the selector of the 'GS' register. | forall w . (tp ~ BVType w) => X87_ControlLoc !(X87_ControlReg w) -- ^ One of the x87 control registers @@ -895,12 +899,14 @@ data X86Stmt (v :: Type -> Kind.Type) where -- * @dir@ is a flag that indicates the direction of move ('True' == -- decrement, 'False' == increment) for updating the buffer -- pointers. - RepMovs :: !(RepValSize w) - -> !(v (BVType 64)) - -> !(v (BVType 64)) - -> !(v (BVType 64)) - -> !(v BoolType) - -> X86Stmt v + RepMovs + :: (1 <= w) + => !(RepValSize w) + -> !(v (BVType 64)) + -> !(v (BVType 64)) + -> !(v (BVType 64)) + -> !(v BoolType) + -> X86Stmt v -- | Assign all elements in an array in memory a specific value. -- @@ -912,16 +918,18 @@ data X86Stmt (v :: Type -> Kind.Type) where -- * @dir@ is a flag that indicates the direction of move ('True' == -- decrement, 'False' == increment) for updating the buffer -- pointers. - RepStos :: !(RepValSize w) - -> !(v (BVType 64)) - -- /\ Address to start assigning to. - -> !(v (BVType w)) - -- /\ Value to assign - -> !(v (BVType 64)) - -- /\ Number of values to assign - -> !(v BoolType) - -- /\ Direction flag - -> X86Stmt v + RepStos + :: (1 <= w) + => !(RepValSize w) + -> !(v (BVType 64)) + -- /\ Address to start assigning to. + -> !(v (BVType w)) + -- /\ Value to assign + -> !(v (BVType 64)) + -- /\ Number of values to assign + -> !(v BoolType) + -- /\ Direction flag + -> X86Stmt v -- | Empty MMX technology State. Sets the x87 FPU tag word to empty. -- diff --git a/x86/src/Data/Macaw/X86/Semantics.hs b/x86/src/Data/Macaw/X86/Semantics.hs index 4f8f6223..639595fe 100644 --- a/x86/src/Data/Macaw/X86/Semantics.hs +++ b/x86/src/Data/Macaw/X86/Semantics.hs @@ -1223,13 +1223,12 @@ def_ret = defVariadic "ret" $ \_ vs -> def_movs :: InstructionDef def_movs = defBinary "movs" $ \ii loc _ -> do let pfx = F.iiPrefixes ii - Some w <- - case loc of - F.Mem8{} -> pure (Some ByteRepVal) - F.Mem16{} -> pure (Some WordRepVal) - F.Mem32{} -> pure (Some DWordRepVal) - F.Mem64{} -> pure (Some QWordRepVal) - _ -> error "Bad argument to movs" + SomeRepValSize w <- case loc of + F.Mem8{} -> pure (SomeRepValSize ByteRepVal) + F.Mem16{} -> pure (SomeRepValSize WordRepVal) + F.Mem32{} -> pure (SomeRepValSize DWordRepVal) + F.Mem64{} -> pure (SomeRepValSize QWordRepVal) + _ -> error "Bad argument to movs" let bytesPerOp = bvLit n64 (repValSizeByteCount w) dest <- get rdi src <- get rsi @@ -1484,16 +1483,16 @@ def_lodsx suf elsz = defNullaryPrefix ("lods" ++ suf) $ \pfx -> do def_stos :: InstructionDef def_stos = defBinary "stos" $ \ii loc loc' -> do let pfx = F.iiPrefixes ii - Some rep <- + SomeRepValSize rep <- case (loc, loc') of (F.Mem8 (F.Addr_64 F.ES (Just F.RDI) Nothing F.NoDisplacement), F.ByteReg F.AL) -> do - pure (Some ByteRepVal) + pure (SomeRepValSize ByteRepVal) (F.Mem16 (F.Addr_64 F.ES (Just F.RDI) Nothing F.NoDisplacement), F.WordReg F.AX) -> do - pure (Some WordRepVal) + pure (SomeRepValSize WordRepVal) (F.Mem32 (F.Addr_64 F.ES (Just F.RDI) Nothing F.NoDisplacement), F.DWordReg F.EAX) -> do - pure (Some DWordRepVal) + pure (SomeRepValSize DWordRepVal) (F.Mem64 (F.Addr_64 F.ES (Just F.RDI) Nothing F.NoDisplacement), F.QWordReg F.RAX) -> do - pure (Some QWordRepVal) + pure (SomeRepValSize QWordRepVal) _ -> error $ "stos given bad arguments " ++ show (loc, loc') -- The direction flag indicates post decrement or post increment. dest <- get rdi diff --git a/x86_symbolic/src/Data/Macaw/X86/Crucible.hs b/x86_symbolic/src/Data/Macaw/X86/Crucible.hs index ca9fc0fc..be7a77e0 100644 --- a/x86_symbolic/src/Data/Macaw/X86/Crucible.hs +++ b/x86_symbolic/src/Data/Macaw/X86/Crucible.hs @@ -33,6 +33,7 @@ module Data.Macaw.X86.Crucible ) where import Control.Lens ((^.)) +import Control.Monad import Data.Bits hiding (xor) import Data.Kind ( Type ) import Data.Parameterized.Context.Unsafe (empty,extend) @@ -44,12 +45,14 @@ import Data.Word (Word8) import GHC.TypeLits (KnownNat) import Text.PrettyPrint.ANSI.Leijen hiding ( (<$>), (<>), empty ) +import What4.Concrete import What4.Interface hiding (IsExpr) import What4.InterpretedFloatingPoint import What4.Symbol (userSymbol) import Lang.Crucible.Backend (IsSymInterface) import Lang.Crucible.CFG.Expr +import qualified Lang.Crucible.Simulator as C import qualified Lang.Crucible.Simulator.Evaluation as C import Lang.Crucible.Simulator.ExecutionTree import Lang.Crucible.Simulator.Intrinsics (IntrinsicTypes) @@ -59,12 +62,20 @@ import Lang.Crucible.Types import qualified Lang.Crucible.Vector as V import Lang.Crucible.LLVM.MemModel - (LLVMPointerType, projectLLVM_bv, - pattern LLVMPointerRepr, llvmPointer_bv) + ( LLVMPointerType + , Mem + , ptrAdd + , projectLLVM_bv + , pattern LLVMPointerRepr + , llvmPointer_bv + ) +import qualified Data.Macaw.CFG.Core as M +import qualified Data.Macaw.Memory as M import qualified Data.Macaw.Types as M import Data.Macaw.Symbolic.CrucGen (MacawExt) -import Data.Macaw.Symbolic +import Data.Macaw.Symbolic.MemOps +import Data.Macaw.Symbolic.PersistentState import qualified Data.Macaw.X86 as M import qualified Data.Macaw.X86.ArchTypes as M import qualified Data.Macaw.CFG.Core as MC @@ -87,21 +98,83 @@ funcSemantics fs x s = do let sym = Sym { symIface = s^.stateSymInterface v <- pureSem sym x return (v,s) -stmtSemantics :: (IsSymInterface sym) - => SymFuns sym - -> M.X86Stmt (AtomWrapper (RegEntry sym)) - -> S sym rtp bs r ctx - -> IO (RegValue sym UnitType, S sym rtp bs r ctx) -stmtSemantics fs x s = error ("Symbolic execution semantics for x86 statements are not implemented yet: " <> - (show $ MC.ppArchStmt (liftAtomIn (pretty . regType)) x)) +withConcreteCountAndDir + :: (IsSymInterface sym, 1 <= w) + => S sym rtp bs r ctx + -> M.RepValSize w + -> (AtomWrapper (RegEntry sym) (M.BVType 64)) + -> (AtomWrapper (RegEntry sym) M.BoolType) + -> (S sym rtp bs r ctx -> (SymBV sym 64) -> IO (S sym rtp bs r ctx)) + -> IO (RegValue sym UnitType, S sym rtp bs r ctx) +withConcreteCountAndDir state val_size wrapped_count wrapped_dir func = do + let sym = state^.stateSymInterface + let val_byte_size = M.repValSizeByteCount val_size + bv_count <- toValBV sym wrapped_count + case asConcrete bv_count of + Just (ConcreteBV _ count) -> do + res_crux_state <- foldM func state + =<< mapM (\index -> bvLit sym knownNat $ index * val_byte_size) + -- [0..((if dir then 1 else -1) * (count - 1))] + [0..(count - 1)] + return ((), res_crux_state) + Nothing -> error $ "Unsupported symbolic count in rep stmt: " + +stmtSemantics + :: IsSymInterface sym + => SymFuns sym + -> C.GlobalVar Mem + -> GlobalMap sym (M.ArchAddrWidth M.X86_64) + -> M.X86Stmt (AtomWrapper (RegEntry sym)) + -> S sym rtp bs r ctx + -> IO (RegValue sym UnitType, S sym rtp bs r ctx) +stmtSemantics _sym_funs global_var_mem globals stmt state = do + let sym = state^.stateSymInterface + case stmt of + M.RepMovs val_size (AtomWrapper dest) (AtomWrapper src) count dir -> + withConcreteCountAndDir state val_size count dir $ \acc_state offset -> do + let mem_repr = M.repValSizeMemRepr val_size + curr_dest_ptr <- ptrAdd sym knownNat (regValue dest) offset + curr_src_ptr <- ptrAdd sym knownNat (regValue src) offset + (val, after_read_state) <- doReadMem + acc_state + global_var_mem + globals + M.Addr64 + mem_repr + (RegEntry knownRepr curr_src_ptr) + (_, after_write_state) <- doWriteMem + after_read_state + global_var_mem + globals + M.Addr64 + mem_repr + (RegEntry knownRepr curr_dest_ptr) + (RegEntry (typeToCrucible $ M.typeRepr mem_repr) val) + return after_write_state + M.RepStos val_size (AtomWrapper dest) (AtomWrapper val) count dir -> + withConcreteCountAndDir state val_size count dir $ \acc_state offset -> do + let mem_repr = M.repValSizeMemRepr val_size + curr_dest_ptr <- ptrAdd sym knownNat (regValue dest) offset + (_, after_write_state) <- doWriteMem + acc_state + global_var_mem + globals + M.Addr64 + mem_repr + (RegEntry knownRepr curr_dest_ptr) + val + return after_write_state + _ -> error $ + "Symbolic execution semantics for x86 statement are not implemented yet: " + <> (show $ MC.ppArchStmt (liftAtomIn (pretty . regType)) stmt) termSemantics :: (IsSymInterface sym) => SymFuns sym -> M.X86TermStmt ids -> S sym rtp bs r ctx -> IO (RegValue sym UnitType, S sym rtp bs r ctx) -termSemantics fs x s = error ("Symbolic execution semantics for x86 terminators are not implemented yet: " <> - (show $ MC.prettyF x)) +termSemantics _fs x _s = error ("Symbolic execution semantics for x86 terminators are not implemented yet: " <> + (show $ MC.prettyF x)) data Sym s = Sym { symIface :: s , symTys :: IntrinsicTypes s diff --git a/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs b/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs index 6c9b5110..85234796 100644 --- a/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs +++ b/x86_symbolic/src/Data/Macaw/X86/Symbolic.hs @@ -12,6 +12,7 @@ {-# LANGUAGE TypeApplications #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE AllowAmbiguousTypes #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} module Data.Macaw.X86.Symbolic ( x86_64MacawSymbolicFns , x86_64MacawEvalFn @@ -28,6 +29,7 @@ module Data.Macaw.X86.Symbolic import Control.Lens ((^.),(%~),(&)) import Control.Monad ( void ) +import Control.Monad.IO.Class ( liftIO ) import Data.Functor.Identity (Identity(..)) import Data.Kind import Data.Parameterized.Context as Ctx @@ -55,6 +57,7 @@ import qualified Lang.Crucible.CFG.Extension as C import qualified Lang.Crucible.CFG.Reg as C import Lang.Crucible.Simulator.RegValue (RegValue'(..)) import qualified Lang.Crucible.Types as C +import qualified Lang.Crucible.LLVM.MemModel as MM ------------------------------------------------------------------------ -- Utilities for generating a type-level context with repeated elements. @@ -264,8 +267,23 @@ x86_64MacawSymbolicFns = -- | X86_64 specific function for evaluating a Macaw X86_64 program in Crucible. -x86_64MacawEvalFn :: - C.IsSymInterface sym => SymFuns sym -> MacawArchEvalFn sym M.X86_64 -x86_64MacawEvalFn fs (X86PrimFn x) s = funcSemantics fs x s -x86_64MacawEvalFn fs (X86PrimStmt stmt) s = stmtSemantics fs stmt s -x86_64MacawEvalFn fs (X86PrimTerm term) s = termSemantics fs term s +x86_64MacawEvalFn + :: C.IsSymInterface sym + => SymFuns sym + -> C.GlobalVar MM.Mem + -> GlobalMap sym (M.ArchAddrWidth M.X86_64) + -> MacawArchEvalFn sym M.X86_64 +x86_64MacawEvalFn fs global_var_mem globals ext_stmt crux_state = + case ext_stmt of + X86PrimFn x -> funcSemantics fs x crux_state + X86PrimStmt stmt -> stmtSemantics fs global_var_mem globals stmt crux_state + X86PrimTerm term -> termSemantics fs term crux_state + +instance ArchInfo M.X86_64 where + archVals _ = Just $ ArchVals + { archFunctions = x86_64MacawSymbolicFns + , withArchEval = \sym -> \k -> do + sfns <- liftIO $ newSymFuns sym + k $ x86_64MacawEvalFn sfns + , withArchConstraints = \x -> x + }