macaw-symbolic-syntax: Concrete syntax for macaw-symbolic CFGs

This code was ported from ambient-verifier.
2024-11-22 05:45:51 +03:00 · 2023-11-01 15:14:15 -04:00 · 2023-11-01 15:14:15 -04:00 · a2ac7f4300
commit a2ac7f4300
parent e05a9db243
21 changed files with 1143 additions and 1 deletions
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@ -95,6 +95,12 @@ jobs:
        YICES_VERSION: "2.6.2"
        CVC4_VERSION: "4.1.8"

+    - name: Build macaw-symbolic-syntax
+      run: cabal build pkg:macaw-symbolic-syntax
+
+    - name: Test macaw-symbolic-syntax
+      run: cabal test pkg:macaw-symbolic-syntax
+
    - name: Build macaw-x86
      run: cabal build pkg:macaw-x86 pkg:macaw-x86-symbolic

--- a/cabal.project.dist
+++ b/cabal.project.dist
@ -7,6 +7,7 @@ packages: base/
          macaw-riscv/
          x86/
          symbolic/
+          symbolic-syntax/
          x86_symbolic/
          refinement/
          utils/compare-dwarfdump
@ -28,6 +29,7 @@ packages: base/
          deps/crucible/crucible/
          deps/crucible/crucible-llvm/
          deps/crucible/crucible-symio/
+          deps/crucible/crucible-syntax/
          deps/what4/what4/
          deps/dwarf/
          deps/elf-edit/
--- a/cabal.project.werror
+++ b/cabal.project.werror
@ -7,6 +7,8 @@ package macaw-semmc
  ghc-options: -Wall
 package macaw-symbolic
  ghc-options: -Wall -Werror
+package macaw-symbolic-syntax
+  ghc-options: -Wall -Werror
 -- Macaw-ppc has warnings.
 package macaw-ppc
  ghc-options: -Wall -Werror
--- a/deps/crucible
+++ b/deps/crucible
@ -1 +1 @@
-Subproject commit bc64fda29cf2e1f4641b381a739c53d7c4d2aa38
+Subproject commit 320e071fa413aabcb2528361adf3743cbbbe623a
--- a/symbolic-syntax/LICENSE
+++ b/symbolic-syntax/LICENSE
@ -0,0 +1,30 @@
+Copyright (c) 2023 Galois Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+  * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+
+  * Neither the name of Galois, Inc. nor the names of its contributors
+    may be used to endorse or promote products derived from this
+    software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/symbolic-syntax/README.md
+++ b/symbolic-syntax/README.md
@ -0,0 +1,40 @@
+# macaw-symbolic-syntax
+
+This package provides concrete syntax for macaw-symbolic types and operations.
+
+Concretely, it implements a `ParserHooks` for use with [`crucible-syntax`][syn].
+This `ParserHooks` supports the following types and operations:
+
+**Types**:
+
+The main type addition is for representing pointers:
+
+- `Pointer`
+
+Unlike C/C++, these pointers are untyped and essentially correspond to `uint8_t*``.
+
+There are a few wrappers around `Bitvector` types for portability and convenience:
+
+- `Byte` is an alias for `Bitvector 8`.
+- `Int` is an alias for `Bitvector 32`.
+- `Long` is an alias for `Bitvector 32` on Arm32 and `Bitvector 64` on X86_64.
+- `PidT` is an alias for `Bitvector 32`.
+- `Short` is an alias for `Bitvector 16`.
+- `SizeT` is an alias for `Bitvector 32` on Arm32 and `Bitvector 64` on X86_64.
+- `UidT` is an alias for `Bitvector 32`.
+
+**Operations**:
+
+The extra operations are:
+
+- `bv-typed-literal :: Type -> Integer -> Bitvector w` where the first argument is a `Bitvector` type alias (see the Types section), the second argument is the value the `Bitvector` should contain, and `w` is the number of bits in the returned `Bitvector` (will match the width of the `Type` argument).
+- `fresh-vec :: String Unicode -> forall (t :: Type) -> Nat -> Vector t`, where ``(fresh-vec s t n)`` generates a length-`n` vector where each element is a fresh constant of type `t` with the name ``<s>_<i>`` (for each `i` between `0` and ``<n> - 1`). Note that `t` must be a scalar type (e.g., no nested `Vector`\ s), and `s` and `n` must both be concrete values.
+- `make-null :: Pointer` returns a null pointer.
+- `pointer-add :: Pointer -> Bitvector w -> Pointer` where `w` is the number of bits in a pointer (usually 32 or 64).
+- `pointer-diff :: Pointer -> Pointer -> Bitvector w` where `w` is the number of bits in a pointer (usually 32 or 64).
+- `pointer-sub :: Pointer -> Bitvector w -> Pointer` where `w` is the number of bits in a pointer (usually 32 or 64).
+- `pointer-eq :: Pointer -> Pointer -> Bool`.
+- `pointer-read :: forall (t :: Type) -> Endianness -> Pointer -> t` where the first argument is the type of the value to read and the second argument is `le` or `be`. `Type` must either be `Bitvector (8 * w)`` (for some positive number `w`) or one of the type aliases listed above.
+- `pointer-write :: forall (t :: Type) -> Endianness -> Pointer -> t -> Unit` where the first argument is the type of the value to read and the second argument is `le` or `be`. `Type` must either be `Bitvector (8 * w)`` (for some positive number `w`) or one of the type aliases listed above.
+
+[syn]: https://github.com/GaloisInc/crucible/tree/master/crucible-syntax
--- a/symbolic-syntax/macaw-symbolic-syntax.cabal
+++ b/symbolic-syntax/macaw-symbolic-syntax.cabal
@ -0,0 +1,133 @@
+Cabal-version: 2.2
+Name:          macaw-symbolic-syntax
+Version:       0.1
+Author:        Galois Inc.
+Maintainer:    langston@galois.com
+Build-type:    Simple
+License:       BSD-3-Clause
+License-file:  LICENSE
+Category:      Language
+Synopsis:      A syntax for reading and writing macaw-symbolic control-flow graphs
+-- Description:
+
+extra-doc-files: README.md
+extra-source-files:
+  test-data/*.cbl
+  test-data/*.out.good
+
+common shared
+  -- Specifying -Wall and -Werror can cause the project to fail to build on
+  -- newer versions of GHC simply due to new warnings being added to -Wall. To
+  -- prevent this from happening we manually list which warnings should be
+  -- considered errors. We also list some warnings that are not in -Wall, though
+  -- try to avoid "opinionated" warnings (though this judgement is clearly
+  -- subjective).
+  --
+  -- Warnings are grouped by the GHC version that introduced them, and then
+  -- alphabetically.
+  --
+  -- A list of warnings and the GHC version in which they were introduced is
+  -- available here:
+  -- https://ghc.gitlab.haskell.org/ghc/doc/users_guide/using-warnings.html
+
+  -- Since GHC 8.10 or earlier:
+  ghc-options:
+    -Wall
+    -Werror=compat-unqualified-imports
+    -Werror=deferred-type-errors
+    -Werror=deprecated-flags
+    -Werror=deprecations
+    -Werror=deriving-defaults
+    -Werror=dodgy-foreign-imports
+    -Werror=duplicate-exports
+    -Werror=empty-enumerations
+    -Werror=identities
+    -Werror=inaccessible-code
+    -Werror=incomplete-patterns
+    -Werror=incomplete-record-updates
+    -Werror=incomplete-uni-patterns
+    -Werror=inline-rule-shadowing
+    -Werror=missed-extra-shared-lib
+    -Werror=missing-exported-signatures
+    -Werror=missing-fields
+    -Werror=missing-home-modules
+    -Werror=missing-methods
+    -Werror=overflowed-literals
+    -Werror=overlapping-patterns
+    -Werror=partial-fields
+    -Werror=partial-type-signatures
+    -Werror=simplifiable-class-constraints
+    -Werror=star-binder
+    -Werror=star-is-type
+    -Werror=tabs
+    -Werror=typed-holes
+    -Werror=unrecognised-pragmas
+    -Werror=unrecognised-warning-flags
+    -Werror=unsupported-calling-conventions
+    -Werror=unsupported-llvm-version
+    -Werror=unticked-promoted-constructors
+    -Werror=unused-imports
+    -Werror=warnings-deprecations
+    -Werror=wrong-do-bind
+
+  if impl(ghc >= 9.2)
+    ghc-options:
+      -Werror=ambiguous-fields
+      -Werror=operator-whitespace
+      -Werror=operator-whitespace-ext-conflict
+      -Werror=redundant-bang-patterns
+
+  if impl(ghc >= 9.4)
+    ghc-options:
+      -Werror=forall-identifier
+      -Werror=misplaced-pragmas
+      -Werror=redundant-strictness-flags
+      -Werror=type-equality-out-of-scope
+      -Werror=type-equality-requires-operators
+
+  ghc-prof-options: -O2 -fprof-auto-top
+  default-language: Haskell2010
+
+library
+  import: shared
+
+  build-depends:
+    base >= 4.13,
+    containers,
+    crucible >= 0.1,
+    crucible-llvm,
+    crucible-syntax,
+    macaw-base,
+    macaw-symbolic,
+    mtl,
+    parameterized-utils >= 0.1.7,
+    prettyprinter,
+    text,
+    what4,
+    vector,
+
+  hs-source-dirs: src
+
+  exposed-modules:
+    Data.Macaw.Symbolic.Syntax
+
+test-suite macaw-symbolic-syntax-tests
+  import: shared
+  type: exitcode-stdio-1.0
+  main-is: Test.hs
+  hs-source-dirs: test
+  build-depends:
+    base,
+    containers,
+    crucible >= 0.1,
+    crucible-syntax,
+    filepath,
+    macaw-symbolic,
+    macaw-symbolic-syntax,
+    macaw-x86,
+    macaw-x86-symbolic,
+    parameterized-utils >= 0.1.7,
+    tasty,
+    tasty-golden,
+    text,
+ 
--- a/symbolic-syntax/src/Data/Macaw/Symbolic/Syntax.hs
+++ b/symbolic-syntax/src/Data/Macaw/Symbolic/Syntax.hs
@ -0,0 +1,638 @@
+{-# LANGUAGE ConstraintKinds #-}
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE ImplicitParams #-}
+{-# LANGUAGE NamedFieldPuns #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE TypeOperators #-}
+
+-- | 'LCSC.ParserHooks' for parsing macaw-symbolic CFGs. Originally ported from:
+--
+-- https://github.com/GaloisInc/ambient-verifier/blob/eab04abb9750825a25ec0cbe0379add63f05f6c6/src/Ambient/FunctionOverride/Extension.hs#L863
+module Data.Macaw.Symbolic.Syntax (
+    TypeAlias(..)
+  , TypeLookup(..)
+  , x86_64LinuxTypes
+  , ExtensionParser
+  , SomeExtensionWrapper(..)
+  , extensionParser
+  , extensionTypeParser
+  , machineCodeParserHooks
+  ) where
+
+import Prelude
+
+import           Control.Applicative ( Alternative(empty) )
+import           Control.Monad.IO.Class ( MonadIO(..) )
+import           Control.Monad.State.Class ( MonadState )
+import           Control.Monad.Writer.Class ( MonadWriter )
+import qualified Data.Map.Strict as Map
+import qualified Data.Parameterized.Context as Ctx
+import qualified Data.Parameterized.NatRepr as PN
+import           Data.Parameterized.Some ( Some(..) )
+import qualified Data.Text as DT
+import qualified Data.Vector as DV
+import           GHC.TypeNats ( KnownNat, type (<=) )
+import           Numeric.Natural ( Natural )
+
+import qualified Data.Macaw.CFG as DMC
+import qualified Data.Macaw.Memory as DMM
+import qualified Data.Macaw.Symbolic as DMS
+import qualified Lang.Crucible.CFG.Expr as LCE
+import qualified Lang.Crucible.CFG.Extension as LCCE
+import qualified Lang.Crucible.CFG.Reg as LCCR
+import qualified Lang.Crucible.LLVM.MemModel as LCLM
+import qualified Lang.Crucible.Syntax.Atoms as LCSA
+import qualified Lang.Crucible.Syntax.Concrete as LCSC
+import qualified Lang.Crucible.Syntax.ExprParse as LCSE
+import qualified Lang.Crucible.Types as LCT
+
+import qualified What4.Interface as WI
+import qualified What4.ProgramLoc as WP
+
+-- | The additional types beyond those built into crucible-syntax.
+data TypeAlias = Byte | Int | Long | PidT | Pointer | Short | SizeT | UidT
+  deriving (Show, Eq, Enum, Bounded)
+
+-- | Lookup function from a 'TypeAlias' to the underlying crucible type it
+-- represents.
+newtype TypeLookup = TypeLookup (TypeAlias -> (Some LCT.TypeRepr))
+
+-- | A lookup function from 'TypeAlias' to types with the appropriate width
+-- on X86_64 Linux.
+x86_64LinuxTypes :: TypeLookup
+x86_64LinuxTypes = TypeLookup $ \tp ->
+  case tp of
+    Byte -> Some (LCT.BVRepr (PN.knownNat @8))
+    Int -> Some (LCT.BVRepr (PN.knownNat @32))
+    Long -> Some (LCT.BVRepr (PN.knownNat @64))
+    PidT -> Some (LCT.BVRepr (PN.knownNat @32))
+    Pointer -> Some (LCLM.LLVMPointerRepr (PN.knownNat @64))
+    Short -> Some (LCT.BVRepr (PN.knownNat @16))
+    SizeT -> Some (LCT.BVRepr (PN.knownNat @64))
+    UidT -> Some (LCT.BVRepr (PN.knownNat @32))
+-- | The constraints on the abstract parser monad
+type ExtensionParser m ext s = ( LCSE.MonadSyntax LCSA.Atomic m
+                               , MonadWriter [WP.Posd (LCCR.Stmt ext s)] m
+                               , MonadState (LCSC.SyntaxState s) m
+                               , MonadIO m
+                               , LCCE.IsSyntaxExtension ext
+                               )
+
+-- | A wrapper for a syntax extension statement
+--
+-- Note that these are pure and are intended to guide the substitution of syntax
+-- extensions for operations in the 'MDS.MacawExt' syntax.
+data ExtensionWrapper arch args ret =
+  ExtensionWrapper { extName :: LCSA.AtomName
+                  -- ^ Name of the syntax extension
+                   , extArgTypes :: LCT.CtxRepr args
+                  -- ^ Types of the arguments to the syntax extension
+                   , extWrapper :: forall s m
+                                 . ( ExtensionParser m (DMS.MacawExt arch) s)
+                                => Ctx.Assignment (LCCR.Atom s) args
+                                -> m (LCCR.AtomValue (DMS.MacawExt arch) s ret)
+                  -- ^ Syntax extension wrapper that takes the arguments passed
+                  -- to the extension operator, returning a suitable crucible
+                  -- 'LCCR.AtomValue' to represent it in the syntax extension.
+                   }
+
+data SomeExtensionWrapper arch =
+  forall args ret. SomeExtensionWrapper (ExtensionWrapper arch args ret)
+
+-- | A list of every type alias.
+allTypeAliases :: [TypeAlias]
+allTypeAliases = [minBound .. maxBound]
+
+-- | Parser for type extensions to crucible syntax
+extensionTypeParser
+  :: (LCSE.MonadSyntax LCSA.Atomic m)
+  => Map.Map LCSA.AtomName (Some LCT.TypeRepr)
+  -- ^ A mapping from additional type names to the crucible types they
+  -- represent
+  -> m (Some LCT.TypeRepr)
+extensionTypeParser types = do
+  name <- LCSC.atomName
+  case Map.lookup name types of
+    Just someTypeRepr -> return someTypeRepr
+    Nothing -> empty
+
+-- | Check that a 'WI.NatRepr' containing a value in bits is a multiple of 8.
+-- If so, pass the result of the value divided by 8 (i.e., as bytes) to the
+-- continuation. Otherwise, return a default result.
+bitsToBytes ::
+  WI.NatRepr bits ->
+  a ->
+  -- ^ Default value to use if @bits@ is not a multiple of 8
+  (forall bytes. ((8 WI.* bytes) ~ bits) => WI.NatRepr bytes -> a) ->
+  -- ^ Continuation to invoke is @bits@ is a multiple of 8
+  a
+bitsToBytes bitsRep nonMultipleOf8 multipleOf8 =
+  PN.withDivModNat bitsRep w8 $ \bytesRep modRep ->
+    case PN.isZeroNat modRep of
+      PN.NonZeroNat -> nonMultipleOf8
+      PN.ZeroNat
+        |  PN.Refl <- PN.mulComm bytesRep w8
+        -> multipleOf8 bytesRep
+  where
+    w8 = PN.knownNat @8
+
+-- | Perform a case analysis on the type argument to @pointer-read@ or
+-- @pointer-write@. If the supplied type is not supported, return 'empty'.
+-- This function packages factors out all of the grungy pattern-matching and
+-- constraint wrangling that @pointer-read@ and @pointer-write@ share in
+-- common.
+withSupportedPointerReadWriteTypes ::
+  Alternative f =>
+  LCT.TypeRepr tp ->
+  -- ^ The type argument
+  (forall bits bytes.
+     ( tp ~ LCT.BVType bits
+     , (8 WI.* bytes) ~ bits
+     , 1 <= bits
+     , 1 <= bytes
+     ) =>
+     WI.NatRepr bits ->
+     WI.NatRepr bytes ->
+     f a) ->
+  -- ^ Continuation to use if the argument is @'LCT.BVRepr (8 'WI.*' bytes)@
+  -- (for some positive number @bytes@).
+  (forall bits bytes.
+     ( tp ~ LCLM.LLVMPointerType bits
+     , (8 WI.* bytes) ~ bits
+     , 1 <= bits
+     , 1 <= bytes
+     ) =>
+     WI.NatRepr bits ->
+     WI.NatRepr bytes ->
+     f a) ->
+  -- ^ Continuation to use if the argument is
+  -- @'LCLM.LLVMPointerRepr (8 'WI.*' bytes)@ (for some positive number
+  -- @bytes@).
+  f a
+withSupportedPointerReadWriteTypes tp bvK ptrK =
+  case tp of
+    LCT.BVRepr bits ->
+      bitsToBytes bits empty $ \bytes ->
+        case PN.isPosNat bytes of
+          Nothing -> empty
+          Just PN.LeqProof -> bvK bits bytes
+    LCLM.LLVMPointerRepr bits ->
+      bitsToBytes bits empty $ \bytes ->
+        case PN.isPosNat bytes of
+          Nothing -> empty
+          Just PN.LeqProof -> ptrK bits bytes
+    _ -> empty
+
+-- | Parser for syntax extensions to crucible syntax
+--
+-- Note that the return value is a 'Pair.Pair', which seems a bit strange
+-- because the 'LCT.TypeRepr' in the first of the pair is redundant (it can be
+-- recovered by inspecting the 'LCCR.Atom'). The return value is set up this way
+-- because the use site of the parser in crucible-syntax expects the 'Pair.Pair'
+-- for all of the parsers that it attempts; returning the 'Pair.Pair' here
+-- simplifies the use site.
+extensionParser :: forall s m ext arch w
+                 . ( ExtensionParser m ext s
+                   , ext ~ (DMS.MacawExt arch)
+                   , w ~ DMC.ArchAddrWidth arch
+                   , 1 <= w
+                   , KnownNat w
+                   , DMM.MemWidth w
+                   )
+                => Map.Map LCSA.AtomName (SomeExtensionWrapper arch)
+                -- ^ Mapping from names to syntax extensions
+                -> LCSC.ParserHooks ext
+                -- ^ ParserHooks for the desired syntax extension
+                -> m (Some (LCCR.Atom s))
+                -- ^ A pair containing a result type and an atom of that type
+extensionParser wrappers hooks =
+  let ?parserHooks = hooks in
+  LCSE.depCons LCSC.atomName $ \name ->
+    case name of
+      LCSA.AtomName "pointer-read" -> do
+        -- Pointer reads are a special case because we must parse the type of
+        -- the value to read as well as the endianness of the read before
+        -- parsing the additional arguments as Atoms.
+        LCSE.depCons LCSC.isType $ \(Some tp) ->
+          LCSE.depCons LCSC.atomName $ \endiannessName ->
+            case endiannessFromAtomName endiannessName of
+              Just endianness ->
+                let readWrapper =
+                      buildPointerReadWrapper tp endianness in
+                go (SomeExtensionWrapper readWrapper)
+              Nothing -> empty
+      LCSA.AtomName "pointer-write" -> do
+        -- Pointer writes are a special case because we must parse the type of
+        -- the value to write out as well as the endianness of the write before
+        -- parsing the additional arguments as Atoms.
+        LCSE.depCons LCSC.isType $ \(Some tp) ->
+          LCSE.depCons LCSC.atomName $ \endiannessName ->
+            case endiannessFromAtomName endiannessName of
+              Just endianness ->
+                let writeWrapper =
+                      buildPointerWriteWrapper tp endianness in
+                go (SomeExtensionWrapper writeWrapper)
+              Nothing -> empty
+      LCSA.AtomName "bv-typed-literal" -> do
+        -- Bitvector literals with a type argument are a special case.  We must
+        -- parse the type argument separately before parsing the remaining
+        -- argument as an Atom.
+        LCSE.depCons LCSC.isType $ \(Some tp) ->
+          case tp of
+            LCT.BVRepr{} ->
+              go (SomeExtensionWrapper (buildBvTypedLitWrapper tp))
+            _ -> empty
+      LCSA.AtomName "fresh-vec" -> do
+        -- Generating fresh vectors are a special case. We must parse the
+        -- name and length arguments separately due to their need to be
+        -- concrete, and we must parse the type argument separately before we
+        -- can know the return type.
+        LCSE.depCons LCSC.string $ \nmPrefix ->
+          LCSE.depCons LCSC.isType $ \(Some tp) ->
+            LCSE.depCons LCSC.nat $ \len ->
+            go (SomeExtensionWrapper (buildFreshVecWrapper nmPrefix tp len))
+      _ ->
+        case Map.lookup name wrappers of
+          Nothing -> empty
+          Just w -> go w
+  where
+    go :: (?parserHooks :: LCSC.ParserHooks ext)
+       => SomeExtensionWrapper arch
+       -> m (Some (LCCR.Atom s))
+    go (SomeExtensionWrapper wrapper) = do
+      loc <- LCSE.position
+      -- Generate atoms for the arguments to this extension
+      operandAtoms <- LCSC.operands (extArgTypes wrapper)
+      -- Pass these atoms to the extension wrapper and return an atom for the
+      -- resulting value
+      atomVal <- extWrapper wrapper operandAtoms
+      endAtom <- LCSC.freshAtom loc atomVal
+      return (Some endAtom)
+
+    -- Parse an 'LCSA.AtomName' representing an endianness into a
+    -- 'Maybe DMM.Endianness'
+    endiannessFromAtomName endianness =
+      case endianness of
+        LCSA.AtomName "le" -> Just DMM.LittleEndian
+        LCSA.AtomName "be" -> Just DMM.BigEndian
+        _ -> Nothing
+
+-- | Wrap a statement extension binary operator
+binop :: (KnownNat w, Monad m)
+      => (  WI.NatRepr w
+         -> lhsType
+         -> rhsType
+         -> LCCE.StmtExtension ext (LCCR.Atom s) tp )
+      -- ^ Binary operator
+      -> lhsType
+      -- ^ Left-hand side operand
+      -> rhsType
+      -- ^ Right-hand side operand
+      -> m (LCCR.AtomValue ext s tp)
+binop op lhs rhs =
+  return (LCCR.EvalExt (op WI.knownNat lhs rhs))
+
+
+-- | Wrap a syntax extension binary operator, converting a bitvector in the
+-- right-hand side position to an 'LLVMPointerType' before wrapping.
+binopRhsBvToPtr :: ( ext ~ DMS.MacawExt arch
+                   , ExtensionParser m ext s
+                   , 1 <= w
+                   , KnownNat w )
+                => (  WI.NatRepr w
+                   -> lhsType
+                   -> LCCR.Atom s (LCLM.LLVMPointerType w)
+                   -> LCCE.StmtExtension ext (LCCR.Atom s) tp)
+                -- ^ Binary operator taking an 'LLVMPointerType' in the
+                -- right-hand side position
+                -> lhsType
+                -- ^ Left-hand side operand
+                -> LCCR.Atom s (LCT.BVType w)
+                -- ^ Right-hand side operand as a bitvector to convert to an
+                -- 'LLVMPointerType'
+                -> m (LCCR.AtomValue ext s tp)
+binopRhsBvToPtr op p1 p2 = do
+  toPtrAtom <- LCSC.freshAtom WP.InternalPos (LCCR.EvalApp (LCE.ExtensionApp (DMS.BitsToPtr WI.knownNat p2)))
+  binop op p1 toPtrAtom
+
+-- | Wrapper for the 'DMS.PtrAdd' syntax extension that enables users to add
+-- integer offsets to pointers:
+--
+-- > pointer-add ptr offset
+--
+-- Note that the underlying macaw primitive allows the offset to be in either
+-- position. This user-facing interface is more restrictive.
+wrapPointerAdd
+  :: ( 1 <= w
+     , KnownNat w
+     , DMC.MemWidth w
+     , w ~ DMC.ArchAddrWidth arch )
+  => ExtensionWrapper arch
+                      (Ctx.EmptyCtx Ctx.::> LCLM.LLVMPointerType w
+                                    Ctx.::> LCT.BVType w)
+                      (LCLM.LLVMPointerType w)
+wrapPointerAdd = ExtensionWrapper
+  { extName = LCSA.AtomName "pointer-add"
+  , extArgTypes = Ctx.empty Ctx.:> LCLM.LLVMPointerRepr LCT.knownNat
+                            Ctx.:> LCT.BVRepr LCT.knownNat
+  , extWrapper = \args -> Ctx.uncurryAssignment (binopRhsBvToPtr DMS.PtrAdd) args }
+
+-- | Wrapper for the 'DMS.PtrSub' syntax extension that enables users to
+-- subtract integer offsets from pointers:
+--
+-- > pointer-sub ptr offset
+--
+-- Note that the underlying macaw primitive allows the offset to be in either
+-- position. This user-facing interface is more restrictive.
+wrapPointerSub
+  :: ( 1 <= w
+     , KnownNat w
+     , DMC.MemWidth w
+     , w ~ DMC.ArchAddrWidth arch )
+  => ExtensionWrapper arch
+                      (Ctx.EmptyCtx Ctx.::> LCLM.LLVMPointerType w
+                                    Ctx.::> LCT.BVType w)
+                      (LCLM.LLVMPointerType w)
+wrapPointerSub = ExtensionWrapper
+  { extName = LCSA.AtomName "pointer-sub"
+  , extArgTypes = Ctx.empty Ctx.:> LCLM.LLVMPointerRepr LCT.knownNat
+                            Ctx.:> LCT.BVRepr LCT.knownNat
+  , extWrapper = \args -> Ctx.uncurryAssignment (binopRhsBvToPtr (DMS.PtrSub . DMM.addrWidthRepr)) args }
+
+-- | Compute the difference between two pointers in bytes using 'DMS.PtrSub'
+pointerDiff :: ( w ~ DMC.ArchAddrWidth arch
+               , ext ~ DMS.MacawExt arch
+               , ExtensionParser m ext s
+               , 1 <= w
+               , KnownNat w
+               , DMM.MemWidth w )
+            => LCCR.Atom s (LCLM.LLVMPointerType w)
+            -> LCCR.Atom s (LCLM.LLVMPointerType w)
+            -> m (LCCR.AtomValue ext s (LCT.BVType w))
+pointerDiff lhs rhs = do
+  ptrRes <- binop (DMS.PtrSub . DMM.addrWidthRepr) lhs rhs
+  ptrAtom <- LCSC.freshAtom WP.InternalPos ptrRes
+  -- 'DMS.PtrSub' of two pointers produces a bitvector (LLVMPointer with region
+  -- 0) so 'DMS.PtrToBits' is safe here.
+  return (LCCR.EvalApp (LCE.ExtensionApp (DMS.PtrToBits LCT.knownNat ptrAtom)))
+
+-- | Wrapper for the 'DMS.PtrSub' syntax extension that enables users to
+-- subtract pointers from pointers:
+--
+-- > pointer-diff ptr1 ptr2
+wrapPointerDiff
+  :: ( w ~ DMC.ArchAddrWidth arch
+     , 1 <= w
+     , KnownNat w
+     , DMC.MemWidth w )
+  => ExtensionWrapper arch
+                      (Ctx.EmptyCtx Ctx.::> LCLM.LLVMPointerType w
+                                    Ctx.::> LCLM.LLVMPointerType w )
+                      (LCT.BVType w)
+wrapPointerDiff = ExtensionWrapper
+  { extName = LCSA.AtomName "pointer-diff"
+  , extArgTypes = Ctx.empty Ctx.:> LCLM.LLVMPointerRepr LCT.knownNat
+                            Ctx.:> LCLM.LLVMPointerRepr LCT.knownNat
+  , extWrapper = \args -> Ctx.uncurryAssignment pointerDiff args }
+
+-- | Wrapper for 'DMS.MacawNullPtr' to construct a null pointer.
+--
+-- > make-null
+wrapMakeNull
+  :: ( w ~ DMC.ArchAddrWidth arch
+     , 1 <= w
+     , KnownNat w
+     , DMC.MemWidth w )
+  => ExtensionWrapper arch
+                      Ctx.EmptyCtx
+                      (LCLM.LLVMPointerType w)
+wrapMakeNull = ExtensionWrapper
+  { extName = LCSA.AtomName "make-null"
+  , extArgTypes = Ctx.empty
+  , extWrapper = \_ ->
+      let nullptr = DMS.MacawNullPtr (DMC.addrWidthRepr WI.knownNat) in
+      return (LCCR.EvalApp (LCE.ExtensionApp nullptr)) }
+
+-- | Wrapper for the 'DMS.PtrEq' syntax extension that enables users to test
+-- the equality of two pointers.
+--
+-- > pointer-eq ptr1 ptr2
+wrapPointerEq
+  :: ( 1 <= w
+     , KnownNat w
+     , DMC.MemWidth w
+     , w ~ DMC.ArchAddrWidth arch )
+  => ExtensionWrapper arch
+                      (Ctx.EmptyCtx Ctx.::> LCLM.LLVMPointerType w
+                                    Ctx.::> LCLM.LLVMPointerType w)
+                      LCT.BoolType
+wrapPointerEq = ExtensionWrapper
+ { extName = LCSA.AtomName "pointer-eq"
+ , extArgTypes = Ctx.empty Ctx.:> LCLM.LLVMPointerRepr LCT.knownNat
+                           Ctx.:> LCLM.LLVMPointerRepr LCT.knownNat
+ , extWrapper = \args -> Ctx.uncurryAssignment (binop (DMS.PtrEq . DMM.addrWidthRepr)) args }
+
+-- | Wrapper for the 'DMS.MacawReadMem' syntax extension that enables users to
+-- read through a pointer to retrieve data at the underlying memory location.
+--
+-- > pointer-read type endianness ptr
+buildPointerReadWrapper
+  :: ( 1 <= w
+     , KnownNat w
+     , DMC.MemWidth w
+     , w ~ DMC.ArchAddrWidth arch
+     )
+  => LCT.TypeRepr tp
+  -> DMM.Endianness
+  -> ExtensionWrapper arch
+                      (Ctx.EmptyCtx Ctx.::> LCLM.LLVMPointerType w)
+                      tp
+buildPointerReadWrapper tp endianness = ExtensionWrapper
+  { extName = LCSA.AtomName "pointer-read"
+  , extArgTypes = Ctx.empty Ctx.:> LCLM.LLVMPointerRepr LCT.knownNat
+  , extWrapper =
+      \args -> Ctx.uncurryAssignment (pointerRead tp endianness) args }
+
+-- | Read through a pointer using 'DMS.MacawReadMem'.
+pointerRead :: ( w ~ DMC.ArchAddrWidth arch
+               , DMM.MemWidth w
+               , KnownNat w
+               , ExtensionParser m ext s
+               , ext ~ DMS.MacawExt arch
+               )
+            => LCT.TypeRepr tp
+            -> DMM.Endianness
+            -> LCCR.Atom s (LCLM.LLVMPointerType w)
+            -> m (LCCR.AtomValue ext s tp)
+pointerRead tp endianness ptr =
+  withSupportedPointerReadWriteTypes tp
+    (\bits bytes -> do -- `Bitvector w` case
+      let readInfo = DMC.BVMemRepr bytes endianness
+      let readExt = DMS.MacawReadMem (DMC.addrWidthRepr WI.knownNat) readInfo ptr
+      readAtom <- LCSC.freshAtom WP.InternalPos (LCCR.EvalExt readExt)
+      return (LCCR.EvalApp (LCE.ExtensionApp (DMS.PtrToBits bits readAtom))))
+    (\_bits bytes -> do -- `Pointer` case
+      let readInfo = DMC.BVMemRepr bytes endianness
+      let readExt = DMS.MacawReadMem (DMC.addrWidthRepr WI.knownNat) readInfo ptr
+      return (LCCR.EvalExt readExt))
+
+-- | Wrapper for the 'DMS.MacawWriteMem' syntax extension that enables users to
+-- write data through a pointer to the underlying memory location.
+--
+-- > pointer-write type endianness ptr (val :: type)
+buildPointerWriteWrapper
+  :: ( w ~ DMC.ArchAddrWidth arch
+     , DMM.MemWidth w
+     , KnownNat w
+     , ext ~ DMS.MacawExt arch
+     )
+  => LCT.TypeRepr tp
+  -> DMM.Endianness
+  -> ExtensionWrapper arch
+                      (Ctx.EmptyCtx Ctx.::> LCLM.LLVMPointerType w
+                                    Ctx.::> tp)
+                      LCT.UnitType
+buildPointerWriteWrapper tp endianness = ExtensionWrapper
+  { extName = LCSA.AtomName "pointer-write"
+  , extArgTypes = Ctx.empty Ctx.:> LCLM.LLVMPointerRepr LCT.knownNat
+                            Ctx.:> tp
+  , extWrapper =
+      \args -> Ctx.uncurryAssignment (pointerWrite tp endianness) args }
+
+-- | Read through a pointer using 'DMS.MacawWriteMem'.
+pointerWrite :: ( w ~ DMC.ArchAddrWidth arch
+                , DMM.MemWidth w
+                , KnownNat w
+                , ExtensionParser m ext s
+                , ext ~ DMS.MacawExt arch
+                )
+              => LCT.TypeRepr tp
+              -> DMM.Endianness
+              -> LCCR.Atom s (LCLM.LLVMPointerType w)
+              -> LCCR.Atom s tp
+              -> m (LCCR.AtomValue ext s LCT.UnitType)
+pointerWrite tp endianness ptr val =
+  withSupportedPointerReadWriteTypes tp
+    (\bits bytes -> do -- `Bitvector w` case
+      toPtrAtom <- LCSC.freshAtom WP.InternalPos
+        (LCCR.EvalApp (LCE.ExtensionApp (DMS.BitsToPtr bits val)))
+      let writeInfo = DMC.BVMemRepr bytes endianness
+      let writeExt = DMS.MacawWriteMem (DMC.addrWidthRepr WI.knownNat)
+                                       writeInfo
+                                       ptr
+                                       toPtrAtom
+      return (LCCR.EvalExt writeExt))
+    (\_bits bytes -> do -- `Pointer` case
+      let writeInfo = DMC.BVMemRepr bytes endianness
+      let writeExt = DMS.MacawWriteMem (DMC.addrWidthRepr WI.knownNat)
+                                       writeInfo
+                                       ptr
+                                       val
+      return (LCCR.EvalExt writeExt))
+
+-- | Wrapper for constructing bitvector literals matching the size of an
+-- 'LCT.BVRepr'.  This enables users to instantiate literals with portable
+-- types (such as SizeT).
+--
+-- > bv-typed-literal type integer
+buildBvTypedLitWrapper
+  :: LCT.TypeRepr (LCT.BVType w)
+  -> ExtensionWrapper arch
+                      (Ctx.EmptyCtx Ctx.::> LCT.IntegerType)
+                      (LCT.BVType w)
+buildBvTypedLitWrapper tp = ExtensionWrapper
+  { extName = LCSA.AtomName "bv-typed-literal"
+  , extArgTypes = Ctx.empty Ctx.:> LCT.IntegerRepr
+  , extWrapper = \args -> Ctx.uncurryAssignment (bvTypedLit tp) args }
+
+-- | Create a bitvector literal matching the size of an 'LCT.BVRepr'
+bvTypedLit :: forall s ext w m
+           . ( ExtensionParser m ext s )
+          => LCT.TypeRepr (LCT.BVType w)
+          -> LCCR.Atom s LCT.IntegerType
+          -> m (LCCR.AtomValue ext s (LCT.BVType w))
+bvTypedLit (LCT.BVRepr w) val = return (LCCR.EvalApp (LCE.IntegerToBV w val))
+
+-- | Wrapper for generating a vector of the given length, where each element is
+-- a fresh constant of the supplied type whose name is derived from the given
+-- string. This is a convenience for users who wish to quickly generate
+-- symbolic data (e.g., for use with @write-bytes@).
+--
+-- > fresh-vec string type integer
+buildFreshVecWrapper ::
+     DT.Text
+  -> LCT.TypeRepr tp
+  -> Natural
+  -> ExtensionWrapper arch
+                      Ctx.EmptyCtx
+                      (LCT.VectorType tp)
+buildFreshVecWrapper nmPrefix tp len = ExtensionWrapper
+  { extName = LCSA.AtomName "fresh-vec"
+  , extArgTypes = Ctx.empty
+  , extWrapper = \_ -> freshVec nmPrefix tp len }
+
+-- | Generate a vector of the given length, where each element is a fresh
+-- constant of the supplied type whose name is derived from the given string.
+freshVec :: forall s ext tp m.
+            ExtensionParser m ext s
+         => DT.Text
+         -> LCT.TypeRepr tp
+         -> Natural
+         -> m (LCCR.AtomValue ext s (LCT.VectorType tp))
+freshVec nmPrefix tp len = do
+  case tp of
+    LCT.FloatRepr fi ->
+      mkVec (LCCR.FreshFloat fi)
+    LCT.NatRepr ->
+      mkVec LCCR.FreshNat
+    _ | LCT.AsBaseType bt <- LCT.asBaseType tp ->
+          mkVec (LCCR.FreshConstant bt)
+      | otherwise ->
+          empty
+  where
+    -- Construct an expression that looks roughly like:
+    --
+    --   (vector <tp> (fresh <s>_0) ... (fresh <s>_<n-1>))
+    --
+    -- Where the implementation of `fresh` is determined by the first argument.
+    mkVec :: (Maybe WI.SolverSymbol -> LCCR.AtomValue ext s tp)
+          -> m (LCCR.AtomValue ext s (LCT.VectorType tp))
+    mkVec mkFresh = do
+      vec <- DV.generateM (fromIntegral len) $ \i ->
+        LCSC.freshAtom WP.InternalPos $ mkFresh $ Just $ WI.safeSymbol $
+        DT.unpack nmPrefix ++ "_" ++ show i
+      pure $ LCCR.EvalApp $ LCE.VectorLit tp vec
+
+-- | Syntax extension wrappers
+extensionWrappers
+  :: (w ~ DMC.ArchAddrWidth arch, 1 <= w, KnownNat w, DMC.MemWidth w)
+  => Map.Map LCSA.AtomName (SomeExtensionWrapper arch)
+extensionWrappers = Map.fromList
+  [ (LCSA.AtomName "pointer-add", SomeExtensionWrapper wrapPointerAdd)
+  , (LCSA.AtomName "pointer-diff", SomeExtensionWrapper wrapPointerDiff)
+  , (LCSA.AtomName "pointer-sub", SomeExtensionWrapper wrapPointerSub)
+  , (LCSA.AtomName "pointer-eq", SomeExtensionWrapper wrapPointerEq)
+  , (LCSA.AtomName "make-null", SomeExtensionWrapper wrapMakeNull)
+  ]
+
+-- | All of the crucible syntax extensions to support machine code
+machineCodeParserHooks
+  :: forall w arch proxy
+   . (w ~ DMC.ArchAddrWidth arch, 1 <= w, KnownNat w, DMC.MemWidth w)
+  => proxy arch
+  -> TypeLookup
+  -- ^ A lookup function from a 'TypeAlias' to the underlying Crucible type
+  -- it represents.
+  -> LCSC.ParserHooks (DMS.MacawExt arch)
+machineCodeParserHooks proxy typeLookup =
+  let TypeLookup lookupFn = typeLookup
+      types = Map.fromList [ (LCSA.AtomName (DT.pack (show t)), lookupFn t)
+                           | t <- allTypeAliases ] in
+  LCSC.ParserHooks (extensionTypeParser types)
+                   (extensionParser extensionWrappers (machineCodeParserHooks proxy typeLookup))
--- a/symbolic-syntax/test-data/byte_id.cbl
+++ b/symbolic-syntax/test-data/byte_id.cbl
@ -0,0 +1,11 @@
+; The identity function for a Byte
+(defun @byte_id ((x Byte)) Byte
+  (start start:
+    (return x)))
+
+(defun @test_byte_id () Unit
+  (start start:
+    (let input (fresh Byte))
+    (let input_back (funcall @byte_id input))
+    (assert! (equal? input input_back) "byte_id test")
+    (return ())))
--- a/symbolic-syntax/test-data/byte_id.out
+++ b/symbolic-syntax/test-data/byte_id.out
@ -0,0 +1,33 @@
+(defun @byte_id ((x Byte)) Byte (start start: (return x)))
+
+(defun @test_byte_id () Unit
+   (start start:
+      (let input (fresh Byte))
+      (let input_back (funcall @byte_id input))
+      (assert! (equal? input input_back) "byte_id test")
+      (return ())))
+
+byte_id
+%0
+  % 4:5
+  return $0
+  % no postdom
+test_byte_id
+%0
+  % 8:16
+  $0 = fresh BaseBVRepr 8 input
+  % 9:21
+  $1 = handleLit(byte_id)
+  % 9:21
+  $2 = call $1($0);
+  % 10:14
+  $3 = baseIsEq(BaseBVRepr 8, $0, $2)
+  % 10:40
+  $4 = stringLit("byte_id test")
+  % 10:5
+  assert($3, $4)
+  % 11:13
+  $5 = emptyApp()
+  % 11:5
+  return $5
+  % no postdom
--- a/symbolic-syntax/test-data/byte_id.out.good
+++ b/symbolic-syntax/test-data/byte_id.out.good
@ -0,0 +1,33 @@
+(defun @byte_id ((x Byte)) Byte (start start: (return x)))
+
+(defun @test_byte_id () Unit
+   (start start:
+      (let input (fresh Byte))
+      (let input_back (funcall @byte_id input))
+      (assert! (equal? input input_back) "byte_id test")
+      (return ())))
+
+byte_id
+%0
+  % 4:5
+  return $0
+  % no postdom
+test_byte_id
+%0
+  % 8:16
+  $0 = fresh BaseBVRepr 8 input
+  % 9:21
+  $1 = handleLit(byte_id)
+  % 9:21
+  $2 = call $1($0);
+  % 10:14
+  $3 = baseIsEq(BaseBVRepr 8, $0, $2)
+  % 10:40
+  $4 = stringLit("byte_id test")
+  % 10:5
+  assert($3, $4)
+  % 11:13
+  $5 = emptyApp()
+  % 11:5
+  return $5
+  % no postdom
--- a/symbolic-syntax/test-data/copy.cbl
+++ b/symbolic-syntax/test-data/copy.cbl
@ -0,0 +1,6 @@
+; A basic function that copies the data pointed to by one pointer to another
+(defun @copy ((src Pointer) (dest Pointer)) Unit
+  (start start:
+    (let src-data (pointer-read Int le src))
+    (pointer-write Int le dest src-data)
+    (return ())))
--- a/symbolic-syntax/test-data/copy.out
+++ b/symbolic-syntax/test-data/copy.out
@ -0,0 +1,21 @@
+(defun @copy ((src Pointer) (dest Pointer)) Unit
+   (start start:
+      (let src-data (pointer-read Int le src))
+      (pointer-write Int le dest src-data)
+      (return ())))
+
+copy
+%0
+  % internal
+  $2 = (macawReadMem bvle4 $0)
+  % 4:19
+  $3 = extensionApp((ptr_to_bits_32 $2))
+  % internal
+  $4 = extensionApp((bits_to_ptr_32 $3))
+  % 5:5
+  $5 = (macawWriteMem bvle4 $1 $4)
+  % 6:13
+  $6 = emptyApp()
+  % 6:5
+  return $6
+  % no postdom
--- a/symbolic-syntax/test-data/copy.out.good
+++ b/symbolic-syntax/test-data/copy.out.good
@ -0,0 +1,21 @@
+(defun @copy ((src Pointer) (dest Pointer)) Unit
+   (start start:
+      (let src-data (pointer-read Int le src))
+      (pointer-write Int le dest src-data)
+      (return ())))
+
+copy
+%0
+  % internal
+  $2 = (macawReadMem bvle4 $0)
+  % 4:19
+  $3 = extensionApp((ptr_to_bits_32 $2))
+  % internal
+  $4 = extensionApp((bits_to_ptr_32 $3))
+  % 5:5
+  $5 = (macawWriteMem bvle4 $1 $4)
+  % 6:13
+  $6 = emptyApp()
+  % 6:5
+  return $6
+  % no postdom
--- a/symbolic-syntax/test-data/short_id.cbl
+++ b/symbolic-syntax/test-data/short_id.cbl
@ -0,0 +1,11 @@
+; The identity function for a 16-bit int
+(defun @short_id ((x Short)) Short
+  (start start:
+    (return x)))
+
+(defun @test_short_id () Unit
+  (start start:
+    (let input (fresh Short))
+    (let input_back (funcall @short_id input))
+    (assert! (equal? input input_back) "short_id test")
+    (return ())))
--- a/symbolic-syntax/test-data/short_id.out
+++ b/symbolic-syntax/test-data/short_id.out
@ -0,0 +1,33 @@
+(defun @short_id ((x Short)) Short (start start: (return x)))
+
+(defun @test_short_id () Unit
+   (start start:
+      (let input (fresh Short))
+      (let input_back (funcall @short_id input))
+      (assert! (equal? input input_back) "short_id test")
+      (return ())))
+
+short_id
+%0
+  % 4:5
+  return $0
+  % no postdom
+test_short_id
+%0
+  % 8:16
+  $0 = fresh BaseBVRepr 16 input
+  % 9:21
+  $1 = handleLit(short_id)
+  % 9:21
+  $2 = call $1($0);
+  % 10:14
+  $3 = baseIsEq(BaseBVRepr 16, $0, $2)
+  % 10:40
+  $4 = stringLit("short_id test")
+  % 10:5
+  assert($3, $4)
+  % 11:13
+  $5 = emptyApp()
+  % 11:5
+  return $5
+  % no postdom
--- a/symbolic-syntax/test-data/short_id.out.good
+++ b/symbolic-syntax/test-data/short_id.out.good
@ -0,0 +1,33 @@
+(defun @short_id ((x Short)) Short (start start: (return x)))
+
+(defun @test_short_id () Unit
+   (start start:
+      (let input (fresh Short))
+      (let input_back (funcall @short_id input))
+      (assert! (equal? input input_back) "short_id test")
+      (return ())))
+
+short_id
+%0
+  % 4:5
+  return $0
+  % no postdom
+test_short_id
+%0
+  % 8:16
+  $0 = fresh BaseBVRepr 16 input
+  % 9:21
+  $1 = handleLit(short_id)
+  % 9:21
+  $2 = call $1($0);
+  % 10:14
+  $3 = baseIsEq(BaseBVRepr 16, $0, $2)
+  % 10:40
+  $4 = stringLit("short_id test")
+  % 10:5
+  assert($3, $4)
+  % 11:13
+  $5 = emptyApp()
+  % 11:5
+  return $5
+  % no postdom
--- a/symbolic-syntax/test-data/zero_size_t.cbl
+++ b/symbolic-syntax/test-data/zero_size_t.cbl
@ -0,0 +1,5 @@
+; A basic function that returns a size_t containing zero
+(defun @zero_size_t () SizeT
+  (start start:
+    (let zero (bv-typed-literal SizeT 0))
+    (return zero)))
--- a/symbolic-syntax/test-data/zero_size_t.out
+++ b/symbolic-syntax/test-data/zero_size_t.out
@ -0,0 +1,14 @@
+(defun @zero_size_t () SizeT
+   (start start:
+      (let zero (bv-typed-literal SizeT 0))
+      (return zero)))
+
+zero_size_t
+%0
+  % 4:15
+  $0 = intLit(0)
+  % 4:15
+  $1 = integerToBV(64, $0)
+  % 5:5
+  return $1
+  % no postdom
--- a/symbolic-syntax/test-data/zero_size_t.out.good
+++ b/symbolic-syntax/test-data/zero_size_t.out.good
@ -0,0 +1,14 @@
+(defun @zero_size_t () SizeT
+   (start start:
+      (let zero (bv-typed-literal SizeT 0))
+      (return zero)))
+
+zero_size_t
+%0
+  % 4:15
+  $0 = intLit(0)
+  % 4:15
+  $1 = integerToBV(64, $0)
+  % 5:5
+  return $1
+  % no postdom
--- a/symbolic-syntax/test/Test.hs
+++ b/symbolic-syntax/test/Test.hs
@ -0,0 +1,56 @@
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE ImplicitParams #-}
+{-# LANGUAGE ImportQualifiedPost #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TypeApplications #-}
+
+module Main (main) where
+
+import Data.List (sort)
+import Data.Proxy (Proxy(Proxy))
+import Data.Text.IO qualified as T
+import System.FilePath
+import System.IO
+
+import Test.Tasty (defaultMain, TestTree, testGroup)
+import Test.Tasty.Golden
+
+import Lang.Crucible.Syntax.Prog (doParseCheck)
+
+-- The syntax extension is not important, the choice of x86_64 is arbitrary
+import Data.Macaw.X86 (X86_64)
+import Data.Macaw.X86.Symbolic ()  -- TraversableFC instance
+
+import Data.Macaw.Symbolic.Syntax (machineCodeParserHooks, x86_64LinuxTypes)
+
+main :: IO ()
+main = do
+  parseTests <- findTests "Parse tests" "test-data" testParser
+  defaultMain $ testGroup "Tests" [parseTests]
+
+findTests :: String -> FilePath -> (FilePath -> FilePath -> IO ()) -> IO TestTree
+findTests groupName testDir testAction =
+  do inputs <- findByExtension [".cbl"] testDir
+     return $ testGroup groupName
+       [ goldenFileTestCase input testAction
+       | input <- sort inputs
+       ]
+
+goldenFileTestCase :: FilePath -> (FilePath -> FilePath -> IO ()) -> TestTree
+goldenFileTestCase input testAction =
+  goldenVsFileDiff
+   (takeBaseName input) -- test name
+   (\x y -> ["diff", "-u", x, y])
+   goodFile -- golden file path
+   outFile
+   (testAction input outFile) -- action whose result is tested
+  where
+    outFile = replaceExtension input ".out"
+    goodFile = replaceExtension input ".out.good"
+
+testParser :: FilePath -> FilePath -> IO ()
+testParser inFile outFile =
+  do contents <- T.readFile inFile
+     let ?parserHooks = machineCodeParserHooks (Proxy @X86_64) x86_64LinuxTypes
+     withFile outFile WriteMode $ doParseCheck inFile contents True
+