diff --git a/macaw-ppc/src/Data/Macaw/PPC.hs b/macaw-ppc/src/Data/Macaw/PPC.hs index e3097753..ea65b288 100644 --- a/macaw-ppc/src/Data/Macaw/PPC.hs +++ b/macaw-ppc/src/Data/Macaw/PPC.hs @@ -14,7 +14,7 @@ module Data.Macaw.PPC ( import Data.Proxy ( Proxy(..) ) import qualified Data.Macaw.Architecture.Info as MI -import Data.Macaw.CFG +import Data.Macaw.CFG import qualified Data.Macaw.CFG.DemandSet as MDS import qualified Data.Macaw.Memory as MM import qualified Data.Parameterized.TraversableFC as FC @@ -28,6 +28,7 @@ import Data.Macaw.PPC.Eval ( mkInitialAbsState, absEvalArchFn, absEvalArchStmt, postCallAbsState, + postPPCTermStmtAbsState, preserveRegAcrossSyscall ) import Data.Macaw.PPC.Identify ( identifyCall, @@ -66,7 +67,6 @@ ppc64_linux_info = , MI.archEndianness = MM.BigEndian , MI.jumpTableEntrySize = jumpTableEntrySize proxy , MI.disassembleFn = disassembleFn proxy PPC64.execInstruction - , MI.preserveRegAcrossSyscall = preserveRegAcrossSyscall proxy , MI.mkInitialAbsState = mkInitialAbsState proxy , MI.absEvalArchFn = absEvalArchFn proxy , MI.absEvalArchStmt = absEvalArchStmt proxy @@ -77,6 +77,7 @@ ppc64_linux_info = , MI.rewriteArchStmt = rewriteStmt , MI.rewriteArchTermStmt = rewriteTermStmt , MI.archDemandContext = archDemandContext proxy + , MI.postArchTermStmtAbsState = postPPCTermStmtAbsState (preserveRegAcrossSyscall proxy) } where proxy = Proxy @PPC64.PPC @@ -88,7 +89,6 @@ ppc32_linux_info = , MI.archEndianness = MM.BigEndian , MI.jumpTableEntrySize = jumpTableEntrySize proxy , MI.disassembleFn = disassembleFn proxy PPC32.execInstruction - , MI.preserveRegAcrossSyscall = preserveRegAcrossSyscall proxy , MI.mkInitialAbsState = mkInitialAbsState proxy , MI.absEvalArchFn = absEvalArchFn proxy , MI.absEvalArchStmt = absEvalArchStmt proxy @@ -99,6 +99,7 @@ ppc32_linux_info = , MI.rewriteArchStmt = rewriteStmt , MI.rewriteArchTermStmt = rewriteTermStmt , MI.archDemandContext = archDemandContext proxy + , MI.postArchTermStmtAbsState = postPPCTermStmtAbsState (preserveRegAcrossSyscall proxy) } where proxy = Proxy @PPC32.PPC diff --git a/macaw-ppc/src/Data/Macaw/PPC/Eval.hs b/macaw-ppc/src/Data/Macaw/PPC/Eval.hs index 113ee8cb..8e3e30af 100644 --- a/macaw-ppc/src/Data/Macaw/PPC/Eval.hs +++ b/macaw-ppc/src/Data/Macaw/PPC/Eval.hs @@ -1,12 +1,14 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} +{-# LANGUAGE RankNTypes #-} {-# LANGUAGE TypeOperators #-} module Data.Macaw.PPC.Eval ( mkInitialAbsState, absEvalArchFn, absEvalArchStmt, postCallAbsState, + postPPCTermStmtAbsState, preserveRegAcrossSyscall ) where @@ -29,6 +31,21 @@ preserveRegAcrossSyscall :: (ArchReg ppc ~ PPCReg ppc, 1 <= RegAddrWidth (PPCReg -> Bool preserveRegAcrossSyscall proxy r = S.member (Some r) (linuxSystemCallPreservedRegisters proxy) +postPPCTermStmtAbsState :: (PPCArch ppc) + => (forall tp . PPCReg ppc tp -> Bool) + -> AbsBlockState (PPCReg ppc) + -> PPCTermStmt ids + -> MM.MemSegmentOff (RegAddrWidth (ArchReg ppc)) + -> AbsBlockState (PPCReg ppc) +postPPCTermStmtAbsState preservePred s0 stmt nextIP = + case stmt of + PPCSyscall -> + let params = MA.CallParams { MA.postCallStackDelta = 0 + , MA.preserveReg = preservePred + } + in MA.absEvalCall params s0 nextIP + PPCTrap -> error "Trap isn't handled in the postPPCTermStmtAbsState function yet" + -- | Set up an initial abstract state that holds at the beginning of a basic -- block. -- diff --git a/macaw-ppc/src/Data/Macaw/PPC/Semantics/PPC32.hs b/macaw-ppc/src/Data/Macaw/PPC/Semantics/PPC32.hs index 3e2a4719..ba714aaf 100644 --- a/macaw-ppc/src/Data/Macaw/PPC/Semantics/PPC32.hs +++ b/macaw-ppc/src/Data/Macaw/PPC/Semantics/PPC32.hs @@ -1,4 +1,5 @@ {-# LANGUAGE DataKinds #-} +{-# LANGUAGE GADTs #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeApplications #-} module Data.Macaw.PPC.Semantics.PPC32 @@ -7,14 +8,14 @@ module Data.Macaw.PPC.Semantics.PPC32 import qualified Data.Constraint as C import Data.Proxy ( Proxy(..) ) -import qualified Dismantle.PPC as D +import Dismantle.PPC import qualified Data.Macaw.CFG as MC import qualified Data.Macaw.Types as MT import SemMC.Architecture.PPC32 ( PPC ) -import SemMC.Architecture.PPC32.Opcodes ( allSemantics ) +import SemMC.Architecture.PPC32.Opcodes ( allSemantics, allOpcodeInfo ) import Data.Macaw.PPC.Generator import Data.Macaw.PPC.Semantics.TH ( genExecInstruction ) -execInstruction :: MC.Value PPC s (MT.BVType 32) -> D.Instruction -> Maybe (PPCGenerator PPC s ()) -execInstruction = $(genExecInstruction (Proxy @PPC) (C.Sub C.Dict) allSemantics) +execInstruction :: MC.Value PPC s (MT.BVType 32) -> Instruction -> Maybe (PPCGenerator PPC s ()) +execInstruction = $(genExecInstruction (Proxy @PPC) (C.Sub C.Dict) allSemantics allOpcodeInfo) diff --git a/macaw-ppc/src/Data/Macaw/PPC/Semantics/PPC64.hs b/macaw-ppc/src/Data/Macaw/PPC/Semantics/PPC64.hs index 691d4f53..c282861b 100644 --- a/macaw-ppc/src/Data/Macaw/PPC/Semantics/PPC64.hs +++ b/macaw-ppc/src/Data/Macaw/PPC/Semantics/PPC64.hs @@ -1,4 +1,5 @@ {-# LANGUAGE DataKinds #-} +{-# LANGUAGE GADTs #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeApplications #-} module Data.Macaw.PPC.Semantics.PPC64 @@ -7,14 +8,14 @@ module Data.Macaw.PPC.Semantics.PPC64 import qualified Data.Constraint as C import Data.Proxy ( Proxy(..) ) -import qualified Dismantle.PPC as D +import Dismantle.PPC import qualified Data.Macaw.CFG as MC import qualified Data.Macaw.Types as MT import SemMC.Architecture.PPC64 ( PPC ) -import SemMC.Architecture.PPC64.Opcodes ( allSemantics ) +import SemMC.Architecture.PPC64.Opcodes ( allSemantics, allOpcodeInfo ) import Data.Macaw.PPC.Generator import Data.Macaw.PPC.Semantics.TH -execInstruction :: MC.Value PPC s (MT.BVType 64) -> D.Instruction -> Maybe (PPCGenerator PPC s ()) -execInstruction = $(genExecInstruction (Proxy @PPC) (C.Sub C.Dict) allSemantics) +execInstruction :: MC.Value PPC s (MT.BVType 64) -> Instruction -> Maybe (PPCGenerator PPC s ()) +execInstruction = $(genExecInstruction (Proxy @PPC) (C.Sub C.Dict) allSemantics allOpcodeInfo) diff --git a/macaw-ppc/src/Data/Macaw/PPC/Semantics/TH.hs b/macaw-ppc/src/Data/Macaw/PPC/Semantics/TH.hs index 74b6c596..9826c0a0 100644 --- a/macaw-ppc/src/Data/Macaw/PPC/Semantics/TH.hs +++ b/macaw-ppc/src/Data/Macaw/PPC/Semantics/TH.hs @@ -1,6 +1,7 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE KindSignatures #-} +{-# LANGUAGE PolyKinds #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TemplateHaskell #-} @@ -22,17 +23,26 @@ import Language.Haskell.TH.Syntax import GHC.TypeLits import Data.Parameterized.Classes +import Data.Parameterized.FreeParamF ( FreeParamF(..) ) +import qualified Data.Parameterized.Lift as LF import qualified Data.Parameterized.Map as Map import qualified Data.Parameterized.Nonce as PN +import qualified Data.Parameterized.ShapedList as SL import Data.Parameterized.Some ( Some(..) ) +import qualified Data.Parameterized.TraversableFC as FC import Data.Parameterized.Witness ( Witness(..) ) +import qualified Lang.Crucible.Solver.Interface as SI import qualified Lang.Crucible.Solver.SimpleBuilder as S import qualified Lang.Crucible.Solver.SimpleBackend as S import qualified Lang.Crucible.BaseTypes as S import qualified Dismantle.PPC as D +import qualified Dismantle.Tablegen.TH.Capture as DT + +import qualified SemMC.BoundVar as BV import SemMC.Formula import qualified SemMC.Architecture as A +import qualified SemMC.Architecture.Location as L import qualified SemMC.Architecture.PPC.Location as APPC import qualified Data.Macaw.CFG as M import qualified Data.Macaw.Types as M @@ -47,17 +57,161 @@ import Data.Parameterized.NatRepr ( knownNat import Data.Macaw.PPC.Generator import Data.Macaw.PPC.PPCReg --- generate a different case for each key/value pair in the map -genExecInstruction :: (A.Architecture arch, OrdF a, ShowF a) +-- | A different parameterized pair wrapper; the one in Data.Parameterized.Map +-- hides the @tp@ parameter under an existential, while we need the variant that +-- exposes it. +data PairF a b tp = PairF (a tp) (b tp) + +-- | Generate the top-level lambda with a case expression over an instruction +-- (casing on opcode) +-- +-- > \ipVar (Instruction opcode operandList) -> +-- > case opcode of +-- > ${CASES} +-- +-- where each case in ${CASES} is defined by 'mkSemanticsCase'; each case +-- matches one opcode. +instructionMatcher :: (OrdF a, LF.LiftF a) + => Map.MapF (Witness c a) (PairF (ParameterizedFormula (Sym t) arch) (DT.CaptureInfo c a)) + -> Q Exp +instructionMatcher formulas = do + ipVarName <- newName "ipVal" + opcodeVar <- newName "opcode" + operandListVar <- newName "operands" + let cases = map (mkSemanticsCase ipVarName operandListVar) (Map.toList formulas) + lamE [varP ipVarName, conP 'D.Instruction [varP opcodeVar, varP operandListVar]] (caseE (varE opcodeVar) cases) + +-- | Generate a single case for one opcode of the case expression. +-- +-- > ADD4 -> ${BODY} +-- +-- where the ${BODY} is generated by 'mkOperandListCase' +mkSemanticsCase :: (LF.LiftF a) + => Name + -> Name + -> Map.Pair (Witness c a) (PairF (ParameterizedFormula (Sym t) arch) (DT.CaptureInfo c a)) + -> MatchQ +mkSemanticsCase ipVarName operandListVar (Map.Pair (Witness opc) (PairF semantics capInfo)) = + match (conP (DT.capturedOpcodeName capInfo) []) (normalB (mkOperandListCase ipVarName operandListVar opc semantics capInfo)) [] + +-- | For each opcode case, we have a sub-case expression to destructure the +-- operand list into names that we can reference. This generates an expression +-- of the form: +-- +-- > case operandList of +-- > op1 :> op2 :> op3 :> Nil -> ${BODY} +-- +-- where ${BODY} is generated by 'genCaseBody', which references the operand +-- names introduced by this case (e.g., op1, op2, op3). Those names are pulled +-- from the DT.CaptureInfo, and have been pre-allocated. See +-- Dismantle.Tablegen.TH.Capture.captureInfo for information on how those names +-- are generated. +-- +-- Note that the structure of the operand list is actually a little more +-- complicated than the above. Each operand actually has an additional level of +-- wrapper around it, and really looks like: +-- +-- > Dismantle.PPC.ADD4 +-- > -> case operands_ayaa of { +-- > (Gprc gprc0 :> (Gprc gprc1 :> (Gprc gprc2 :> Nil))) +-- > -> ${BODY} +-- +-- in an example with three general purpose register operands. +mkOperandListCase :: Name -> Name -> a tp -> ParameterizedFormula (Sym t) arch tp -> DT.CaptureInfo c a tp -> Q Exp +mkOperandListCase ipVarName operandListVar opc semantics capInfo = do + body <- genCaseBody ipVarName opc semantics (DT.capturedOperandNames capInfo) + DT.genCase capInfo operandListVar body + +-- | This is the function that translates formulas (semantics) into expressions +-- that construct macaw terms. +-- +-- The stub implementation is essentially +-- +-- > undefined ipVar arg1 arg2 +-- +-- to avoid unused variable warnings. +-- +-- The two maps (locVars and opVars) are crucial for translating parameterized +-- formulas into expressions. +genCaseBody :: forall a sh t arch + . Name + -> a sh + -> ParameterizedFormula (Sym t) arch sh + -> SL.ShapedList (FreeParamF Name) sh + -> Q Exp +genCaseBody ipVarName opc semantics varNames = + [| undefined $(tupE (map varE binders)) |] + where + binders = ipVarName : FC.toListFC unFreeParamF varNames + + locVars :: Map.MapF (SI.BoundVar (Sym t)) (L.Location arch) + locVars = Map.foldrWithKey (collectVarForLocation (Proxy @arch)) Map.empty (pfLiteralVars semantics) + + opVars :: Map.MapF (SI.BoundVar (Sym t)) (FreeParamF Name) + opVars = SL.foldrFCIndexed (collectOperandVars varNames) Map.empty (pfOperandVars semantics) + +collectVarForLocation :: forall tp arch proxy t + . proxy arch + -> L.Location arch tp + -> SI.BoundVar (Sym t) tp + -> Map.MapF (SI.BoundVar (Sym t)) (L.Location arch) + -> Map.MapF (SI.BoundVar (Sym t)) (L.Location arch) +collectVarForLocation _ loc bv = Map.insert bv loc + +-- | Index variables that map to operands +-- +-- We record the TH 'Name' for the 'SI.BoundVar' that stands in for each +-- operand. The idea will be that we will look up bound variables in this map +-- to be able to compute a TH expression to refer to it. +-- +-- We have to unwrap and rewrap the 'FreeParamF' because the type parameter +-- changes when we switch from 'BV.BoundVar' to 'SI.BoundVar'. See the +-- SemMC.BoundVar module for information about the nature of that change +-- (basically, from 'Symbol' to BaseType). +collectOperandVars :: forall sh tp arch t + . SL.ShapedList (FreeParamF Name) sh + -> SL.Index sh tp + -> BV.BoundVar (Sym t) arch tp + -> Map.MapF (SI.BoundVar (Sym t)) (FreeParamF Name) + -> Map.MapF (SI.BoundVar (Sym t)) (FreeParamF Name) +collectOperandVars varNames ix (BV.BoundVar bv) m = + case SL.indexShapedList varNames ix of + FreeParamF name -> Map.insert bv (FreeParamF name) m + +-- | Generate an implementation of 'execInstruction' that runs in the +-- 'PPCGenerator' monad. We pass in both the original list of semantics files +-- along with the list of opcode info objects. We can match them up using +-- equality on opcodes (via a MapF). Generating a combined list up-front would +-- be ideal, but is difficult for various TH reasons (we can't call 'lift' on +-- all of the things we would need to for that). +-- +-- The structure of the term produced is documented in 'instructionMatcher' +genExecInstruction :: (A.Architecture arch, OrdF a, ShowF a, LF.LiftF a) => proxy arch -> (forall sh . c sh C.:- BuildOperandList arch sh) + -- ^ A constraint implication to let us extract/weaken the + -- constraint in our 'Witness' to the required 'BuildOperandList' -> [(Some (Witness c a), BS.ByteString)] + -- ^ A list of opcodes (with constraint information + -- witnessed) paired with the bytestrings containing their + -- semantics. This comes from semmc. + -> [Some (DT.CaptureInfo c a)] + -- ^ Extra information for each opcode to let us generate + -- some TH to match them. This comes from the semantics + -- definitions in semmc. -> Q Exp -genExecInstruction _ impl semantics = do +genExecInstruction _ impl semantics captureInfo = do Some ng <- runIO PN.newIONonceGenerator sym <- runIO (S.newSimpleBackend ng) formulas <- runIO (loadFormulas sym impl semantics) - [| undefined |] + let formulasWithInfo = foldr (attachInfo formulas) Map.empty captureInfo + instructionMatcher formulasWithInfo + where + attachInfo m0 (Some ci) m = + let co = DT.capturedOpcode ci + in case Map.lookup co m0 of + Nothing -> m + Just pf -> Map.insert co (PairF pf ci) m -- SemMC.Formula: instantiateFormula diff --git a/submodules/dismantle b/submodules/dismantle index d3e21f8c..16beb81b 160000 --- a/submodules/dismantle +++ b/submodules/dismantle @@ -1 +1 @@ -Subproject commit d3e21f8cc1a8d6970b019aae6492ec279907d6c5 +Subproject commit 16beb81bcdb6fe5c5d5a260dbc117fae35a2a85d diff --git a/submodules/macaw b/submodules/macaw index b17122e4..c95d3e7d 160000 --- a/submodules/macaw +++ b/submodules/macaw @@ -1 +1 @@ -Subproject commit b17122e4c53e550c3259d460ea0d85d2379b18da +Subproject commit c95d3e7d0f9859bbe59fdc55bf6330dd56481933 diff --git a/submodules/semmc b/submodules/semmc index eaf0f513..fcaefe36 160000 --- a/submodules/semmc +++ b/submodules/semmc @@ -1 +1 @@ -Subproject commit eaf0f5137f6cd3628cd576ffebfdcc2781b2e77a +Subproject commit fcaefe36c12065b515ec5e97c0287d1c8450fcfa