Implement macaw-riscv-symbolic

This adds the necessary plumbing to simulate Macaw-lifted RISC-V binaries using
`macaw-symbolic`. This proves relatively straightforward, given that RISC-V
does not have a lot of special primitive functions or statements to deal with.
I have also added a basic test suite to ensure that `macaw-riscv-symbolic`
works on end-to-end examples.

Fixes #409.
This commit is contained in:
Ryan Scott 2024-07-26 10:46:07 -04:00
parent eb0a3c7ccc
commit 1a758c70eb
33 changed files with 965 additions and 4 deletions

View File

@ -130,11 +130,15 @@ jobs:
run: cabal test pkg:macaw-ppc-symbolic
- name: Build macaw-riscv
run: cabal build pkg:macaw-riscv
run: cabal build pkg:macaw-riscv pkg:macaw-riscv-symbolic
- name: Test macaw-riscv
run: cabal test pkg:macaw-riscv
- name: Test macaw-riscv-symbolic
if: runner.os == 'Linux'
run: cabal test pkg:macaw-riscv-symbolic
- name: Build macaw-refinement
run: cabal build pkg:macaw-refinement

View File

@ -5,6 +5,7 @@ packages: base/
macaw-ppc/
macaw-ppc-symbolic/
macaw-riscv/
macaw-riscv-symbolic/
x86/
symbolic/
symbolic-syntax/

View File

@ -0,0 +1,5 @@
# Revision history for macaw-riscv-symbolic
## 0.1.0.0 -- YYYY-mm-dd
* First version. Released on an unsuspecting world.

View File

@ -0,0 +1,30 @@
Copyright (c) 2024 Galois Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Galois, Inc. nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain

View File

@ -0,0 +1,144 @@
Cabal-version: 2.2
Name: macaw-riscv-symbolic
Version: 0.1
Author: Galois Inc.
Maintainer: rscott@galois.com
Build-type: Simple
License: BSD-3-Clause
License-file: LICENSE
Category: Language
Synopsis: A symbolic reasoning backend for RISC-V
-- Description:
extra-doc-files: README.md
common shared
-- Specifying -Wall and -Werror can cause the project to fail to build on
-- newer versions of GHC simply due to new warnings being added to -Wall. To
-- prevent this from happening we manually list which warnings should be
-- considered errors. We also list some warnings that are not in -Wall, though
-- try to avoid "opinionated" warnings (though this judgement is clearly
-- subjective).
--
-- Warnings are grouped by the GHC version that introduced them, and then
-- alphabetically.
--
-- A list of warnings and the GHC version in which they were introduced is
-- available here:
-- https://ghc.gitlab.haskell.org/ghc/doc/users_guide/using-warnings.html
-- Since GHC 8.10 or earlier:
ghc-options:
-Wall
-Werror=compat-unqualified-imports
-Werror=deferred-type-errors
-Werror=deprecated-flags
-Werror=deprecations
-Werror=deriving-defaults
-Werror=dodgy-foreign-imports
-Werror=duplicate-exports
-Werror=empty-enumerations
-Werror=identities
-Werror=inaccessible-code
-Werror=incomplete-patterns
-Werror=incomplete-record-updates
-Werror=incomplete-uni-patterns
-Werror=inline-rule-shadowing
-Werror=missed-extra-shared-lib
-Werror=missing-exported-signatures
-Werror=missing-fields
-Werror=missing-home-modules
-Werror=missing-methods
-Werror=overflowed-literals
-Werror=overlapping-patterns
-Werror=partial-fields
-Werror=partial-type-signatures
-Werror=simplifiable-class-constraints
-Werror=star-binder
-Werror=star-is-type
-Werror=tabs
-Werror=typed-holes
-Werror=unrecognised-pragmas
-Werror=unrecognised-warning-flags
-Werror=unsupported-calling-conventions
-Werror=unsupported-llvm-version
-Werror=unticked-promoted-constructors
-Werror=unused-imports
-Werror=warnings-deprecations
-Werror=wrong-do-bind
if impl(ghc >= 9.2)
ghc-options:
-Werror=ambiguous-fields
-Werror=operator-whitespace
-Werror=operator-whitespace-ext-conflict
-Werror=redundant-bang-patterns
if impl(ghc >= 9.4)
ghc-options:
-Werror=forall-identifier
-Werror=misplaced-pragmas
-Werror=redundant-strictness-flags
-Werror=type-equality-out-of-scope
-Werror=type-equality-requires-operators
ghc-prof-options: -O2 -fprof-auto-top
default-language: Haskell2010
library
import: shared
build-depends:
base >= 4.13,
containers,
crucible,
crucible-llvm,
exceptions,
grift,
lens,
macaw-base,
macaw-riscv,
macaw-symbolic,
panic,
parameterized-utils,
what4
hs-source-dirs: src
exposed-modules:
Data.Macaw.RISCV.Symbolic
other-modules:
Data.Macaw.RISCV.Symbolic.AtomWrapper
Data.Macaw.RISCV.Symbolic.Functions
Data.Macaw.RISCV.Symbolic.Panic
Data.Macaw.RISCV.Symbolic.Repeat
test-suite macaw-riscv-symbolic-tests
import: shared
type: exitcode-stdio-1.0
main-is: Main.hs
build-depends:
base >= 4,
bytestring,
containers,
crucible,
crucible-llvm,
elf-edit,
filepath,
Glob >= 0.9 && < 0.11,
grift,
lens,
macaw-base,
macaw-riscv,
macaw-riscv-symbolic,
macaw-symbolic,
parameterized-utils,
prettyprinter,
tasty,
tasty-hunit,
text,
what4,
vector
hs-source-dirs: tests

View File

@ -0,0 +1,299 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
{-# OPTIONS_GHC -Wno-orphans #-}
module Data.Macaw.RISCV.Symbolic
( riscvMacawSymbolicFns
, riscvMacawEvalFn
, lookupReg
, updateReg
) where
import Control.Lens ((%~), (&))
import qualified Control.Monad.Catch as X
import Control.Monad.IO.Class (liftIO)
import Data.Functor (void)
import qualified Data.Kind as DK
import qualified Data.Functor.Identity as I
import qualified Data.Parameterized.Map as MapF
import qualified Data.Parameterized.Classes as PC
import qualified Data.Parameterized.Context as Ctx
import Data.Parameterized.Some (Some(..))
import qualified Data.Parameterized.TraversableF as TF
import qualified Data.Parameterized.TraversableFC as FC
import Data.Typeable (Typeable)
-- crucible
import qualified Lang.Crucible.CFG.Expr as C
import qualified Lang.Crucible.CFG.Reg as C
import qualified Lang.Crucible.Simulator as C
import qualified Lang.Crucible.Types as C
-- crucible-llvm
import qualified Lang.Crucible.LLVM.MemModel as LCLM
-- grift
import qualified GRIFT.Types as G
-- what4
import qualified What4.Symbol as WS
-- macaw-base
import qualified Data.Macaw.CFG as MC
import qualified Data.Macaw.Types as MT
-- macaw-riscv
import qualified Data.Macaw.RISCV as MR
-- macaw-symbolic
import qualified Data.Macaw.Symbolic as MS
import qualified Data.Macaw.Symbolic.Backend as MSB
-- macaw-riscv-symbolic
import qualified Data.Macaw.RISCV.Symbolic.AtomWrapper as RA
import qualified Data.Macaw.RISCV.Symbolic.Functions as RF
import qualified Data.Macaw.RISCV.Symbolic.Repeat as RR
riscvMacawSymbolicFns ::
forall rv
. (G.KnownRV rv, MR.RISCVConstraints rv)
=> MS.MacawSymbolicArchFunctions (MR.RISCV rv)
riscvMacawSymbolicFns =
MSB.MacawSymbolicArchFunctions
{ MSB.crucGenArchConstraints = \x -> x
, MSB.crucGenArchRegName = riscvRegName
, MSB.crucGenRegAssignment = riscvRegAssignment
, MSB.crucGenRegStructType = riscvRegStructType (PC.knownRepr :: G.RVRepr rv)
, MSB.crucGenArchFn = riscvGenFn
, MSB.crucGenArchStmt = riscvGenStmt
, MSB.crucGenArchTermStmt = riscvGenTermStmt
}
type instance MS.ArchRegContext (MR.RISCV rv) =
(Ctx.EmptyCtx Ctx.::> MT.BVType (G.RVWidth rv) -- PC
Ctx.<+> RR.CtxRepeat 31 (MT.BVType (G.RVWidth rv)) -- GPR regs. We use 31 instead of 32
-- because we exclude x0, which is
-- hardwired to the constant 0.
Ctx.<+> RR.CtxRepeat 32 (MT.BVType (G.RVFloatWidth rv)) -- FPR regs
Ctx.<+> RR.CtxRepeat 23 (MT.BVType (G.RVWidth rv)) -- CSR regs. Although there is a
-- theoretical maximum of 4096 of
-- these registers, `grift` only
-- supports 23 of them in practice.
Ctx.<+> (Ctx.EmptyCtx Ctx.::> MT.BVType 2 -- PrivLevel
Ctx.::> MT.BoolType)) -- EXC
riscvMacawEvalFn :: RF.SymFuns sym
-> MS.MacawArchStmtExtensionOverride (MR.RISCV rv)
-> MS.MacawArchEvalFn p sym mem (MR.RISCV rv)
riscvMacawEvalFn fs (MS.MacawArchStmtExtensionOverride override) =
MSB.MacawArchEvalFn $ \_ _ xt s -> do
mRes <- override xt s
case mRes of
Nothing ->
case xt of
RISCVPrimFn fn -> RF.funcSemantics fs fn s
RISCVPrimStmt stmt -> RF.stmtSemantics fs stmt s
RISCVPrimTerm term -> RF.termSemantics fs term s
Just res -> return res
instance (MS.IsMemoryModel mem, G.KnownRV rv, MR.RISCVConstraints rv, Typeable rv)
=> MS.GenArchInfo mem (MR.RISCV rv) where
genArchVals _ _ mOverride = Just $ MS.GenArchVals
{ MS.archFunctions = riscvMacawSymbolicFns
, MS.withArchEval = \sym k -> do
sfns <- liftIO $ RF.newSymFuns sym
let override = case mOverride of
Nothing -> MS.defaultMacawArchStmtExtensionOverride
Just ov -> ov
k (riscvMacawEvalFn sfns override)
, MS.withArchConstraints = \x -> x
, MS.lookupReg = archLookupReg
, MS.updateReg = archUpdateReg
}
riscvRegName :: MR.RISCVReg rv tp -> WS.SolverSymbol
riscvRegName r = WS.systemSymbol ("r!" ++ show (MC.prettyF r))
-- Note that `repeatAssign` starts indexing from 0, but we want to exclude
-- `GPR 0` (i.e., x0), as this is always hardwired to the constant 0. As such,
-- we offset all of the indexes by one using (+ 1).
gprRegs :: Ctx.Assignment (MR.RISCVReg rv) (RR.CtxRepeat 31 (MT.BVType (G.RVWidth rv)))
gprRegs = RR.repeatAssign (MR.GPR . fromIntegral . (+ 1))
fprRegs :: Ctx.Assignment (MR.RISCVReg rv) (RR.CtxRepeat 32 (MT.BVType (G.RVFloatWidth rv)))
fprRegs = RR.repeatAssign (MR.FPR . fromIntegral)
-- | The RISC-V control/status registers that are directly supported by Macaw.
csrRegs :: Ctx.Assignment (MR.RISCVReg rv) (RR.CtxRepeat 23 (MT.BVType (G.RVWidth rv)))
csrRegs = RR.repeatAssign (MR.CSR . toEnum)
-- | This contains an assignment that stores the register associated with each index in the
-- RISC-V register structure.
riscvRegAssignment :: Ctx.Assignment (MR.RISCVReg rv) (MS.ArchRegContext (MR.RISCV rv))
riscvRegAssignment =
Ctx.Empty Ctx.:> MR.PC
Ctx.<++> gprRegs
Ctx.<++> fprRegs
Ctx.<++> csrRegs
Ctx.<++> (Ctx.Empty Ctx.:> MR.PrivLevel Ctx.:> MR.EXC)
riscvRegStructType ::
forall rv
. G.KnownRV rv
=> G.RVRepr rv
-> C.TypeRepr (MSB.ArchRegStruct (MR.RISCV rv))
riscvRegStructType _rv =
C.StructRepr $ MS.typeCtxToCrucible $ FC.fmapFC MT.typeRepr $ riscvRegAssignment @rv
regIndexMap :: forall rv
. G.KnownRV rv
=> MSB.RegIndexMap (MR.RISCV rv)
regIndexMap = MSB.mkRegIndexMap assgn sz
where
assgn = riscvRegAssignment @rv
sz = Ctx.size $ MS.typeCtxToCrucible $ FC.fmapFC MT.typeRepr assgn
lookupReg :: forall rv m f tp
. (G.KnownRV rv, Typeable rv, X.MonadThrow m)
=> MR.RISCVReg rv tp
-> Ctx.Assignment f (MS.MacawCrucibleRegTypes (MR.RISCV rv))
-> m (f (MS.ToCrucibleType tp))
lookupReg r asgn =
case MapF.lookup r regIndexMap of
Nothing -> X.throwM (MissingRegisterInState (Some r))
Just pair -> return (asgn Ctx.! MSB.crucibleIndex pair)
archLookupReg :: (G.KnownRV rv, Typeable rv)
=> C.RegEntry sym (MS.ArchRegStruct (MR.RISCV rv))
-> MR.RISCVReg rv tp
-> C.RegEntry sym (MS.ToCrucibleType tp)
archLookupReg regEntry reg =
case lookupReg reg (C.regValue regEntry) of
Just (C.RV val) -> C.RegEntry (MS.typeToCrucible $ MT.typeRepr reg) val
Nothing -> error $ "unexpected register: " ++ show (MC.prettyF reg)
updateReg :: forall rv m f tp
. (G.KnownRV rv, Typeable rv, X.MonadThrow m)
=> MR.RISCVReg rv tp
-> (f (MS.ToCrucibleType tp) -> f (MS.ToCrucibleType tp))
-> Ctx.Assignment f (MS.MacawCrucibleRegTypes (MR.RISCV rv))
-> m (Ctx.Assignment f (MS.MacawCrucibleRegTypes (MR.RISCV rv)))
updateReg r upd asgn = do
case MapF.lookup r regIndexMap of
Nothing -> X.throwM (MissingRegisterInState (Some r))
Just pair -> return (asgn & MapF.ixF (MSB.crucibleIndex pair) %~ upd)
archUpdateReg :: (G.KnownRV rv, Typeable rv)
=> C.RegEntry sym (MS.ArchRegStruct (MR.RISCV rv))
-> MR.RISCVReg rv tp
-> C.RegValue sym (MS.ToCrucibleType tp)
-> C.RegEntry sym (MS.ArchRegStruct (MR.RISCV rv))
archUpdateReg regEntry reg val =
case updateReg reg (const $ C.RV val) (C.regValue regEntry) of
Just r -> regEntry { C.regValue = r }
Nothing -> error $ "unexpected register: " ++ show (MC.prettyF reg)
newtype RISCVSymbolicException rv = MissingRegisterInState (Some (MR.RISCVReg rv))
deriving Show
instance Typeable rv => X.Exception (RISCVSymbolicException rv)
data RISCVStmtExtension rv (f :: C.CrucibleType -> DK.Type) (ctp :: C.CrucibleType) where
-- | Wrappers around the arch-specific functions in RISC-V; these are
-- interpreted in 'funcSemantics'.
RISCVPrimFn :: MR.RISCVPrimFn rv (RA.AtomWrapper f) t
-> RISCVStmtExtension rv f (MS.ToCrucibleType t)
-- | Wrappers around the arch-specific statements in RISC-V; these are
-- interpreted in 'stmtSemantics'
RISCVPrimStmt :: MR.RISCVStmt rv (RA.AtomWrapper f)
-> RISCVStmtExtension rv f C.UnitType
-- | Wrappers around the arch-specific terminators in RISC-V; these are
-- interpreted in 'termSemantics'
RISCVPrimTerm :: MR.RISCVTermStmt rv (RA.AtomWrapper f)
-> RISCVStmtExtension rv f C.UnitType
instance FC.FunctorFC (RISCVStmtExtension ppc) where
fmapFC f (RISCVPrimFn x) = RISCVPrimFn (FC.fmapFC (RA.liftAtomMap f) x)
fmapFC f (RISCVPrimStmt s) = RISCVPrimStmt (TF.fmapF (RA.liftAtomMap f) s)
fmapFC f (RISCVPrimTerm t) = RISCVPrimTerm (TF.fmapF (RA.liftAtomMap f) t)
instance FC.FoldableFC (RISCVStmtExtension ppc) where
foldMapFC f (RISCVPrimFn x) = FC.foldMapFC (RA.liftAtomIn f) x
foldMapFC f (RISCVPrimStmt s) = TF.foldMapF (RA.liftAtomIn f) s
foldMapFC f (RISCVPrimTerm t) = TF.foldMapF (RA.liftAtomIn f) t
instance FC.TraversableFC (RISCVStmtExtension ppc) where
traverseFC f (RISCVPrimFn x) = RISCVPrimFn <$> FC.traverseFC (RA.liftAtomTrav f) x
traverseFC f (RISCVPrimStmt s) = RISCVPrimStmt <$> TF.traverseF (RA.liftAtomTrav f) s
traverseFC f (RISCVPrimTerm t) = RISCVPrimTerm <$> TF.traverseF (RA.liftAtomTrav f) t
instance C.TypeApp (RISCVStmtExtension v) where
appType (RISCVPrimFn x) = MS.typeToCrucible (MT.typeRepr x)
appType (RISCVPrimStmt _s) = C.UnitRepr
appType (RISCVPrimTerm _t) = C.UnitRepr
instance C.PrettyApp (RISCVStmtExtension v) where
ppApp ppSub (RISCVPrimFn x) =
I.runIdentity (MC.ppArchFn (I.Identity . RA.liftAtomIn ppSub) x)
ppApp ppSub (RISCVPrimStmt s) =
MC.ppArchStmt (RA.liftAtomIn ppSub) s
ppApp ppSub (RISCVPrimTerm t) = MC.ppArchTermStmt (RA.liftAtomIn ppSub) t
type instance MSB.MacawArchStmtExtension (MR.RISCV rv) = RISCVStmtExtension rv
riscvGenFn :: forall rv ids s tp
. MR.RISCVPrimFn rv (MC.Value (MR.RISCV rv) ids) tp
-> MSB.CrucGen (MR.RISCV rv) ids s (C.Atom s (MS.ToCrucibleType tp))
riscvGenFn fn =
case fn of
MR.RISCVEcall w v0 v1 v2 v3 v4 v5 v6 v7 -> do
a0 <- MSB.valueToCrucible v0
a1 <- MSB.valueToCrucible v1
a2 <- MSB.valueToCrucible v2
a3 <- MSB.valueToCrucible v3
a4 <- MSB.valueToCrucible v4
a5 <- MSB.valueToCrucible v5
a6 <- MSB.valueToCrucible v6
a7 <- MSB.valueToCrucible v7
let syscallArgs = Ctx.Empty Ctx.:> a0 Ctx.:> a1 Ctx.:> a2 Ctx.:> a3 Ctx.:> a4 Ctx.:> a5 Ctx.:> a6 Ctx.:> a7
let argTypes = PC.knownRepr
let retTypes = Ctx.Empty Ctx.:> LCLM.LLVMPointerRepr w Ctx.:> LCLM.LLVMPointerRepr w
let retRepr = C.StructRepr retTypes
syscallArgStructAtom <- MSB.evalAtom (C.EvalApp (C.MkStruct argTypes syscallArgs))
let lookupHdlStmt = MS.MacawLookupSyscallHandle argTypes retTypes syscallArgStructAtom
hdlAtom <- MSB.evalMacawStmt lookupHdlStmt
MSB.evalAtom $! C.Call hdlAtom syscallArgs retRepr
riscvGenStmt :: forall rv ids s
. MR.RISCVStmt rv (MC.Value (MR.RISCV rv) ids)
-> MSB.CrucGen (MR.RISCV rv) ids s ()
riscvGenStmt s = do
s' <- TF.traverseF f s
void (MSB.evalArchStmt (RISCVPrimStmt s'))
where
f :: forall a
. MC.Value (MR.RISCV rv) ids a
-> MSB.CrucGen (MR.RISCV rv) ids s (RA.AtomWrapper (C.Atom s) a)
f x = RA.AtomWrapper <$> MSB.valueToCrucible x
riscvGenTermStmt :: forall rv ids s
. MR.RISCVTermStmt rv (MC.Value (MR.RISCV rv) ids)
-> MC.RegState (MR.RISCVReg rv) (MC.Value (MR.RISCV rv) ids)
-> Maybe (C.Label s)
-> MSB.CrucGen (MR.RISCV rv) ids s ()
riscvGenTermStmt ts _rs _fallthrough = do
ts' <- TF.traverseF f ts
void (MSB.evalArchStmt (RISCVPrimTerm ts'))
where
f :: forall a
. MC.Value (MR.RISCV rv) ids a
-> MSB.CrucGen (MR.RISCV rv) ids s (RA.AtomWrapper (C.Atom s) a)
f x = RA.AtomWrapper <$> MSB.valueToCrucible x

View File

@ -0,0 +1,29 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE RankNTypes #-}
module Data.Macaw.RISCV.Symbolic.AtomWrapper (
AtomWrapper(..),
liftAtomMap,
liftAtomTrav,
liftAtomIn
) where
import Data.Kind ( Type )
import qualified Lang.Crucible.Types as C
import qualified Data.Macaw.Types as MT
import qualified Data.Macaw.Symbolic as MS
newtype AtomWrapper (f :: C.CrucibleType -> Type) (tp :: MT.Type)
= AtomWrapper (f (MS.ToCrucibleType tp))
liftAtomMap :: (forall s. f s -> g s) -> AtomWrapper f t -> AtomWrapper g t
liftAtomMap f (AtomWrapper x) = AtomWrapper (f x)
liftAtomTrav ::
Functor m =>
(forall s. f s -> m (g s)) -> (AtomWrapper f t -> m (AtomWrapper g t))
liftAtomTrav f (AtomWrapper x) = AtomWrapper <$> f x
liftAtomIn :: (forall s. f s -> a) -> AtomWrapper f t -> a
liftAtomIn f (AtomWrapper x) = f x

View File

@ -0,0 +1,54 @@
{-# LANGUAGE EmptyCase #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Data.Macaw.RISCV.Symbolic.Functions
( SymFuns
, newSymFuns
, funcSemantics
, stmtSemantics
, termSemantics
) where
-- crucible
import qualified Lang.Crucible.Backend as C
import qualified Lang.Crucible.Simulator.ExecutionTree as C
import qualified Lang.Crucible.Simulator.RegMap as C
import qualified Lang.Crucible.Types as C
-- macaw-riscv
import qualified Data.Macaw.RISCV as MR
-- macaw-symbolic
import qualified Data.Macaw.Symbolic as MS
-- macaw-riscv-symbolic
import qualified Data.Macaw.RISCV.Symbolic.AtomWrapper as RA
import qualified Data.Macaw.RISCV.Symbolic.Panic as RP
data SymFuns sym = SymFuns
funcSemantics :: SymFuns sym
-> MR.RISCVPrimFn rv (RA.AtomWrapper f) mt
-> S p rv sym rtp bs r ctx
-> IO (C.RegValue sym (MS.ToCrucibleType mt), S p rv sym rtp bs r ctx)
funcSemantics _sf pf _s =
case pf of
MR.RISCVEcall {} ->
RP.panic RP.RISCV "funcSemantics" ["The RISC-V syscall primitive should be eliminated and replaced by a handle lookup"]
stmtSemantics :: SymFuns sym
-> MR.RISCVStmt rv (RA.AtomWrapper (C.RegEntry sym))
-> S p rv sym rtp bs r ctx
-> IO (C.RegValue sym C.UnitType, S p rv sym rtp bs r ctx)
stmtSemantics _sf stmt _s = case stmt of {}
newSymFuns :: forall sym . (C.IsSymInterface sym) => sym -> IO (SymFuns sym)
newSymFuns _sym = pure SymFuns
termSemantics :: SymFuns sym
-> MR.RISCVTermStmt rv (RA.AtomWrapper (C.RegEntry sym))
-> S p rv sym rtp bs r ctx
-> IO (C.RegValue sym C.UnitType, S p rv sym rtp bs r ctx)
termSemantics _fs term _s = case term of {}
type S p rv sym rtp bs r ctx = C.CrucibleState p sym (MS.MacawExt (MR.RISCV rv)) rtp bs r ctx

View File

@ -0,0 +1,15 @@
{-# LANGUAGE TemplateHaskell #-}
module Data.Macaw.RISCV.Symbolic.Panic (
P.panic,
Component(..)
) where
import qualified Panic as P
data Component = RISCV
deriving (Show)
instance P.PanicComponent Component where
panicComponentName = show
panicComponentIssues _ = "https://github.com/GaloisInc/macaw/issues"
panicComponentRevision = $(P.useGitRevision)

View File

@ -0,0 +1,30 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE PolyKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE UndecidableInstances #-}
module Data.Macaw.RISCV.Symbolic.Repeat (
CtxRepeat,
RepeatAssign(..)
) where
import GHC.TypeLits
import qualified Data.Parameterized.Context as Ctx
type family CtxRepeat (n :: Nat) (c :: k) :: Ctx.Ctx k where
CtxRepeat 0 c = Ctx.EmptyCtx
CtxRepeat n c = CtxRepeat (n - 1) c Ctx.::> c
class RepeatAssign (tp :: k) (ctx :: Ctx.Ctx k) where
repeatAssign :: (Int -> f tp) -> Ctx.Assignment f ctx
instance RepeatAssign tp Ctx.EmptyCtx where
repeatAssign _ = Ctx.Empty
instance RepeatAssign tp ctx => RepeatAssign tp (ctx Ctx.::> tp) where
repeatAssign f =
let r = repeatAssign f
in r Ctx.:> f (Ctx.sizeInt (Ctx.size r))

View File

@ -0,0 +1,218 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE ImplicitParams #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
module Main (main) where
import Control.Lens ( (^.) )
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BS8
import qualified Data.ElfEdit as Elf
import qualified Data.Foldable as F
import qualified Data.Map as Map
import Data.Maybe ( mapMaybe )
import qualified Data.Parameterized.Classes as PC
import qualified Data.Parameterized.Nonce as PN
import Data.Parameterized.Some ( Some(..) )
import Data.Proxy ( Proxy(..) )
import GHC.TypeNats ( KnownNat, type (<=) )
import qualified Prettyprinter as PP
import System.FilePath ( (</>), (<.>) )
import qualified System.FilePath.Glob as SFG
import qualified System.IO as IO
import qualified Test.Tasty as TT
import qualified Test.Tasty.HUnit as TTH
import qualified Test.Tasty.Options as TTO
import qualified Test.Tasty.Runners as TTR
import qualified Data.Macaw.Architecture.Info as MAI
import qualified Data.Macaw.CFG as MC
import qualified Data.Macaw.Discovery as M
import qualified Data.Macaw.Memory.ElfLoader.PLTStubs as MMELP
import qualified Data.Macaw.Symbolic as MS
import qualified Data.Macaw.Symbolic.Testing as MST
import qualified Data.Macaw.RISCV as MR
import Data.Macaw.RISCV.Symbolic ()
import qualified GRIFT.Types as G
import qualified What4.Config as WC
import qualified What4.Expr.Builder as WEB
import qualified What4.Interface as WI
import qualified What4.ProblemFeatures as WPF
import qualified What4.Solver as WS
import qualified Lang.Crucible.Backend as CB
import qualified Lang.Crucible.Backend.Online as CBO
import qualified Lang.Crucible.Simulator as CS
import qualified Lang.Crucible.LLVM.MemModel as LLVM
-- | A Tasty option to tell us to save SMT queries and responses to /tmp for debugging purposes
data SaveSMT = SaveSMT Bool
deriving (Eq, Show)
instance TTO.IsOption SaveSMT where
defaultValue = SaveSMT False
parseValue v = SaveSMT <$> TTO.safeReadBool v
optionName = pure "save-smt"
optionHelp = pure "Save SMT sessions to files in /tmp for debugging"
-- | A tasty option to have the test suite save the macaw IR for each test case to /tmp for debugging purposes
data SaveMacaw = SaveMacaw Bool
instance TTO.IsOption SaveMacaw where
defaultValue = SaveMacaw False
parseValue v = SaveMacaw <$> TTO.safeReadBool v
optionName = pure "save-macaw"
optionHelp = pure "Save Macaw IR for each test case to /tmp for debugging"
ingredients :: [TTR.Ingredient]
ingredients = TT.includingOptions [ TTO.Option (Proxy @SaveSMT)
, TTO.Option (Proxy @SaveMacaw)
] : TT.defaultIngredients
main :: IO ()
main = do
-- These are pass/fail in that the assertions in the "pass" set are true (and
-- the solver returns Unsat), while the assertions in the "fail" set are false
-- (and the solver returns Sat).
passTestFilePaths <- SFG.glob "tests/pass/**/*.exe"
failTestFilePaths <- SFG.glob "tests/fail/**/*.exe"
let passRes = MST.SimulationResult MST.Unsat
let failRes = MST.SimulationResult MST.Sat
let passTests mmPreset = TT.testGroup "True assertions" (map (mkSymExTest passRes mmPreset) passTestFilePaths)
let failTests mmPreset = TT.testGroup "False assertions" (map (mkSymExTest failRes mmPreset) failTestFilePaths)
TT.defaultMainWithIngredients ingredients $
TT.testGroup "Binary Tests" $
map (\mmPreset ->
TT.testGroup (MST.describeMemModelPreset mmPreset)
[passTests mmPreset, failTests mmPreset])
[MST.DefaultMemModel, MST.LazyMemModel]
hasTestPrefix :: Some (M.DiscoveryFunInfo arch) -> Maybe (BS8.ByteString, Some (M.DiscoveryFunInfo arch))
hasTestPrefix (Some dfi) = do
bsname <- M.discoveredFunSymbol dfi
if BS8.pack "test_" `BS8.isPrefixOf` bsname
then return (bsname, Some dfi)
else Nothing
-- | RISC-V functions with a single scalar return value return it in @a0@.
--
-- Since all test functions must return a value to assert as true, this is
-- straightforward to extract.
riscvResultExtractor :: ( arch ~ MR.RISCV rv
, CB.IsSymInterface sym
, MC.ArchConstraints arch
, MS.ArchInfo arch
, KnownNat (G.RVWidth rv)
)
=> MS.ArchVals arch
-> MST.ResultExtractor sym arch
riscvResultExtractor archVals = MST.ResultExtractor $ \regs _sp _mem k -> do
let re = MS.lookupReg archVals regs MR.GPR_A0
k PC.knownRepr (CS.regValue re)
mkSymExTest :: MST.SimulationResult -> MST.MemModelPreset -> FilePath -> TT.TestTree
mkSymExTest expected mmPreset exePath = TT.askOption $ \saveSMT@(SaveSMT _) -> TT.askOption $ \saveMacaw@(SaveMacaw _) -> TTH.testCaseSteps exePath $ \step -> do
bytes <- BS.readFile exePath
case Elf.decodeElfHeaderInfo bytes of
Left (_, msg) -> TTH.assertFailure ("Error parsing ELF header from file '" ++ show exePath ++ "': " ++ msg)
Right (Elf.SomeElf ehi) -> do
case Elf.headerClass (Elf.header ehi) of
Elf.ELFCLASS32 ->
symExTestSized expected mmPreset exePath saveSMT saveMacaw step ehi (MR.riscv_info G.rv32GCRepr)
Elf.ELFCLASS64 ->
symExTestSized expected mmPreset exePath saveSMT saveMacaw step ehi (MR.riscv_info G.rv64GCRepr)
data MacawRISCVSymbolicData t = MacawRISCVSymbolicData
symExTestSized :: forall rv w arch
. ( w ~ G.RVWidth rv
, 16 <= w
, MC.ArchConstraints arch
, arch ~ MR.RISCV rv
, KnownNat w
, Show (Elf.ElfWordType w)
, MS.ArchInfo arch
)
=> MST.SimulationResult
-> MST.MemModelPreset
-> FilePath
-> SaveSMT
-> SaveMacaw
-> (String -> IO ())
-> Elf.ElfHeaderInfo w
-> MAI.ArchitectureInfo arch
-> TTH.Assertion
symExTestSized expected mmPreset exePath saveSMT saveMacaw step ehi archInfo = do
binfo <- MST.runDiscovery ehi exePath MST.toAddrSymMap archInfo
-- Test cases involving shared libraries are not
-- yet supported on the RISC-V backend. At a
-- minimum, this is blocked on
-- https://github.com/GaloisInc/elf-edit/issues/36.
(MMELP.noPLTStubInfo "RISC-V")
let funInfos = Map.elems (MST.binaryDiscState (MST.mainBinaryInfo binfo) ^. M.funInfo)
let testEntryPoints = mapMaybe hasTestPrefix funInfos
F.forM_ testEntryPoints $ \(name, Some dfi) -> do
step ("Testing " ++ BS8.unpack name ++ " at " ++ show (M.discoveredFunAddr dfi))
writeMacawIR saveMacaw (BS8.unpack name) dfi
Some (gen :: PN.NonceGenerator IO t) <- PN.newIONonceGenerator
sym <- WEB.newExprBuilder WEB.FloatRealRepr MacawRISCVSymbolicData gen
CBO.withYicesOnlineBackend sym CBO.NoUnsatFeatures WPF.noFeatures $ \bak -> do
-- We are using the z3 backend to discharge proof obligations, so
-- we need to add its options to the backend configuration
let solver = WS.z3Adapter
let backendConf = WI.getConfiguration sym
WC.extendConfig (WS.solver_adapter_config_options solver) backendConf
execFeatures <- MST.defaultExecFeatures (MST.SomeOnlineBackend bak)
archVals <- case MS.archVals (Proxy @(MR.RISCV rv)) Nothing of
Just archVals -> pure archVals
Nothing -> error "symExTestSized: impossible"
let extract = riscvResultExtractor archVals
logger <- makeGoalLogger saveSMT solver name exePath
let ?memOpts = LLVM.defaultMemOptions
simRes <- MST.simulateAndVerify solver logger bak execFeatures archInfo archVals binfo mmPreset extract dfi
TTH.assertEqual "AssertionResult" expected simRes
writeMacawIR :: (MC.ArchConstraints arch) => SaveMacaw -> String -> M.DiscoveryFunInfo arch ids -> IO ()
writeMacawIR (SaveMacaw sm) name dfi
| not sm = return ()
| otherwise = writeFile (toSavedMacawPath name) (show (PP.pretty dfi))
toSavedMacawPath :: String -> FilePath
toSavedMacawPath testName = "/tmp" </> name <.> "macaw"
where
name = fmap escapeSlash testName
-- | Construct a solver logger that saves the SMT session for the goal solving
-- in /tmp (if requested by the save-smt option)
--
-- The adapter name is included so that, if the same test is solved with
-- multiple solvers, we can differentiate them.
makeGoalLogger :: SaveSMT -> WS.SolverAdapter st -> BS8.ByteString -> FilePath -> IO WS.LogData
makeGoalLogger (SaveSMT saveSMT) adapter funName p
| not saveSMT = return WS.defaultLogData
| otherwise = do
hdl <- IO.openFile (toSavedSMTSessionPath adapter funName p) IO.WriteMode
return (WS.defaultLogData { WS.logHandle = Just hdl })
-- | Construct a path in /tmp to save the SMT session to
--
-- Just take the original path name and turn all of the slashes into underscores to escape them
toSavedSMTSessionPath :: WS.SolverAdapter st -> BS8.ByteString -> FilePath -> FilePath
toSavedSMTSessionPath adapter funName p = "/tmp" </> filename <.> "smtlib2"
where
filename = concat [ fmap escapeSlash p
, "_"
, BS8.unpack funName
, "_"
, WS.solver_adapter_name adapter
]
escapeSlash :: Char -> Char
escapeSlash '/' = '_'
escapeSlash c = c

View File

@ -0,0 +1,18 @@
* Overview
This test suite tests that symbolic execution of RISC-V programs works. It is also a convenient place to test the semantics. The test harness looks for binaries in the ~pass~ and ~fail~ subdirectories. In both cases, it enumerates the functions whose names are prefixed by ~test_~, each of which are symbolically executed with fully symbolic initial states. It treats the return value of each test function as an assertion that it will attempt to prove (i.e., it checks the satisfiability of the assertion). Test cases in ~pass~ are expected to return a value that is always true, while the test cases in ~fail~ are expected to not always be true (but note that the precise models are not inspected). The test harness uses yices for path satisfiability checking and z3 to prove goals. Since path satisfiability checking is enabled, test cases can include loops as long as they symbolically terminate.
In addition to proving that the result of a test case is always true (or not), the test harness will attempt to prove all generated side conditions for tests with names prefixed by ~test_and_verify_~. In both the ~pass~ and ~fail~ directories, these side conditions are expected to pass (or they will cause test failures).
* Usage
The test runner has two command line options (beyond the defaults for tasty):
- ~--save-macaw~: Saves the Macaw IR for each test case to /tmp for debugging purposes
- ~--save-smt~: Saves the generated SMTLib for each test to /tmp for debugging purposes
* Limitations
- It currently tests both optimized an unoptimized binaries. It is intended that this set will expand and that a wide variety of compilers will be tested.
- Only two solvers are involved in the test, rather than a whole matrix
- Function calls are not supported in test cases
- There are no facilities for generating symbolic data beyond the initial symbolic arguments to each function

View File

@ -0,0 +1,23 @@
# These binaries were obtained from https://musl.cc/
CC64=riscv64-linux-musl-gcc
CC32=riscv32-linux-musl-gcc
CFLAGS=-nostdlib -no-pie -static -fno-stack-protector
unopt32 = $(patsubst %.c,%.unopt32.exe,$(wildcard *.c))
unopt64 = $(patsubst %.c,%.unopt64.exe,$(wildcard *.c))
opt32 = $(patsubst %.c,%.opt32.exe,$(wildcard *.c))
opt64 = $(patsubst %.c,%.opt64.exe,$(wildcard *.c))
all: $(unopt32) $(opt32) $(unopt64) $(opt64)
%.unopt32.exe : %.c
$(CC32) $(CFLAGS) -O0 $< -o $@
%.opt32.exe : %.c
$(CC32) $(CFLAGS) -O2 $< -o $@
%.unopt64.exe : %.c
$(CC64) $(CFLAGS) -O0 $< -o $@
%.opt64.exe : %.c
$(CC64) $(CFLAGS) -O2 $< -o $@

View File

@ -0,0 +1,16 @@
// This test fails because x is not always 1
int __attribute__((noinline)) test_constant(int x) {
return x == 1;
}
// Notes:
//
// - The entry point is required by the compiler to make an executable
//
// - We can't put the test directly in _start because macaw-symbolic (and macaw
// in general) don't deal well with system calls
//
// - The test harness only looks for function symbols starting with test_
void _start() {
test_constant(0);
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,23 @@
# These binaries were obtained from https://musl.cc/
CC64=riscv64-linux-musl-gcc
CC32=riscv32-linux-musl-gcc
CFLAGS=-nostdlib -no-pie -static -fno-stack-protector
unopt32 = $(patsubst %.c,%.unopt32.exe,$(wildcard *.c))
unopt64 = $(patsubst %.c,%.unopt64.exe,$(wildcard *.c))
opt32 = $(patsubst %.c,%.opt32.exe,$(wildcard *.c))
opt64 = $(patsubst %.c,%.opt64.exe,$(wildcard *.c))
all: $(unopt32) $(opt32) $(unopt64) $(opt64)
%.unopt32.exe : %.c
$(CC32) $(CFLAGS) -O0 $< -o $@
%.opt32.exe : %.c
$(CC32) $(CFLAGS) -O2 $< -o $@
%.unopt64.exe : %.c
$(CC64) $(CFLAGS) -O0 $< -o $@
%.opt64.exe : %.c
$(CC64) $(CFLAGS) -O2 $< -o $@

View File

@ -0,0 +1,15 @@
int __attribute__((noinline)) test_identity(int x) {
return x == x;
}
// Notes:
//
// - The entry point is required by the compiler to make an executable
//
// - We can't put the test directly in _start because macaw-symbolic (and macaw
// in general) don't deal well with system calls
//
// - The test harness only looks for function symbols starting with test_
void _start() {
test_identity(0);
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,13 @@
#include <limits.h>
unsigned int __attribute__((noinline)) test_saturate_add(unsigned int x, unsigned int y) {
unsigned int c = x + y;
if(c < x || c < y)
c = UINT_MAX;
return c >= x && c >= y;
}
void _start() {
test_saturate_add(1, 2);
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -15,11 +15,12 @@ module Data.Macaw.RISCV (
-- * Type-level tags
G.RV(..),
G.RVRepr(..),
RISCV,
RISCVConstraints,
-- * RISC-V Types
RISCVReg(..),
RISCVTermStmt,
RISCVStmt,
RISCVPrimFn
RISCVPrimFn(..)
) where
import GHC.Stack (HasCallStack)

View File

@ -104,9 +104,15 @@ type instance MC.ArchFn (RISCV rv) = RISCVPrimFn rv
-- | RISC-V architecture-specific statements (none)
data RISCVStmt (rv :: G.RV) (expr :: MT.Type -> K.Type)
instance F.FunctorF (RISCVStmt rv) where
fmapF _ s = case s of {}
instance F.FoldableF (RISCVStmt rv) where
foldMapF _ s = case s of {}
instance F.TraversableF (RISCVStmt rv) where
traverseF _ s = pure (case s of {})
instance MC.IsArchStmt (RISCVStmt rv) where
ppArchStmt _ s = case s of {}
@ -115,6 +121,15 @@ type instance MC.ArchStmt (RISCV rv) = RISCVStmt rv
-- | RISC-V block termination statements (none)
data RISCVTermStmt (rv :: G.RV) (f :: MT.Type -> K.Type)
instance F.FunctorF (RISCVTermStmt rv) where
fmapF _ s = case s of {}
instance F.FoldableF (RISCVTermStmt rv) where
foldMapF _ s = case s of {}
instance F.TraversableF (RISCVTermStmt rv) where
traverseF _ s = pure (case s of {})
instance MC.PrettyF (RISCVTermStmt rv) where
prettyF s = case s of {}

View File

@ -15,6 +15,7 @@
module Data.Macaw.RISCV.RISCVReg
( -- * RISC-V macaw register state
RISCVReg(..)
, GPR(..)
-- ** Patterns for GPRs
, pattern GPR
, pattern GPR_RA
@ -62,6 +63,8 @@ import Data.Parameterized.TH.GADT ( structuralTypeEquality
, structuralTypeOrd
)
import qualified Prettyprinter as PP
import qualified GRIFT.Types as G
import qualified GRIFT.Semantics.Utils as G
@ -411,7 +414,7 @@ pattern GPR_T6 = GPR 31
instance Show (RISCVReg rv tp) where
show PC = "pc"
show (RISCV_GPR gpr) = show gpr
show (FPR rid) = "fpr[" <> show rid <> "]"
show (FPR rid) = "f" <> show (G.asUnsignedSized rid)
show (CSR csr) = show csr
show PrivLevel = "priv"
show EXC = "exc"
@ -426,6 +429,9 @@ instance OrdF (RISCVReg rv) where
instance ShowF (RISCVReg rv)
instance MC.PrettyF (RISCVReg rv) where
prettyF = PP.pretty . showF
instance G.KnownRV rv => MT.HasRepr (RISCVReg rv) MT.TypeRepr where
typeRepr PC = MT.BVTypeRepr knownNat
typeRepr (RISCV_GPR _) = MT.BVTypeRepr knownNat