split into several modules, use hsc2hs for error codes

This commit is contained in:
Jost Berthold 2014-08-28 14:23:04 +02:00
parent cdc1f6f41a
commit d6d6b18a99
7 changed files with 586 additions and 382 deletions

View File

@ -1,429 +1,97 @@
{-# LANGUAGE RecordWildCards, BangPatterns, DeriveDataTypeable, CPP,
ScopedTypeVariables #-}
{-# LANGUAGE MagicHash, UnboxedTuples #-}
{-# LANGUAGE GHCForeignImportPrim, ForeignFunctionInterface,
UnliftedFFITypes #-}
{-# OPTIONS_HADDOCK prune #-}
{-# LANGUAGE ScopedTypeVariables #-}
{- |
Module : GHC.Packing
Copyright : (c) Jost Berthold, 2010-2014,
License : probably BSD3 (soon)
Maintainer : berthold@diku.dk
License : BSD3
Maintainer : jb.diku@gmail.com
Stability : experimental
Portability : no (depends on GHC internals)
Serialisation of Haskell data structures TODO REWRITE TECH PARTS FOR
FOREIGNPRIMOP VERSION.
= Serialisation of Haskell data structures (independent of evaluation)
Haskell heap structures can be serialised, capturing their current
state of evaluation, and deserialised later during the same program
run (effectively duplicating the data) or materialised on storage and
deserialised in a different run of the /same/ executable binary.
run (effectively duplicating the data). Serialised data can also be
written to storage or sent over a network, and deserialised in a
different run or different instance of the /same/ executable binary.
The feature can be used to implement message passing over a network
(which is where the runtime support originated), or for various
applications based on data persistence, for instance checkpointing and
memoisation.
There are two basic operations to serialise Haskell heap data:
The library described here supports an operation to serialise Haskell
heap data:
> serialize, trySerialize :: a -> IO (Serialized a)
> trySerialize :: a -> IO (Serialized a)
Both routines will throw @'PackException'@s when error conditions
occur inside the runtime system. In presence of concurrent threads,
the variant @'serialize'@ may block in case another thread is
evaluating data /referred to/ by the data to be
serialised. @'trySerialize'@ variant will never block, but instead
signal the condition as @'PackException'@ @'P_BLACKHOLE'@. Other
exceptions thrown by these two operations indicate error conditions
within the runtime system support (see @'PackException'@).
The routine will throw a 'PackException' if an error occurs inside the
C code which accesses the Haskell heap (see @'PackException'@).
In presence of concurrent threads, another thread might be evaluating
data /referred to/ by the data to be serialised. It would be nice to
/block/ the calling thread in this case, but this is not possible in
the library version (see <#background Background Information> below).
'trySerialize' variant will instead signal the condition as
'PackException' 'P_BLACKHOLE'.
The inverse operation to serialisation is
> deserialize :: Serialized a -> IO a
The data type @'Serialized' a@ includes a phantom type @a@ to ensure
The data type 'Serialized' a includes a phantom type @a@ to ensure
type safety within one and the same program run. Type @a@ can be
polymorphic (at compile time, that is) when @Serialized a@ is not used
apart from being argument to @deserialize@.
polymorphic (at compile time, that is) when 'Serialized' @a@ is not used
apart from being argument to 'deserialize'.
The @Show@, @Read@, and @Binary@ instances of @Serialized a@ require an
additional @Typeable@ context (which requires @a@ to be monomorphic)
additional 'Typeable' context (which requires @a@ to be monomorphic)
in order to implement dynamic type checks when parsing and deserialising
data from external sources.
Consequently, the @'PackException'@ type contains exceptions which indicate
Consequently, the 'PackException' type contains exceptions which indicate
parse errors and type/binary mismatch.
-}
module GHC.Packing
( -- * Serialisation Operations
trySerialize
trySerialize, trySerializeWith
, deserialize
-- * Data Types
-- * Data Types and instances
, Serialized
-- $ShowReadBinary
, PackException(..)
-- * Serialisation and File I/O
-- $packexceptions
-- * Serialisation and binary file I/O
, encodeToFile
, decodeFromFile
-- * Background Information
-- $primitives
)
where
-- could make a compatibility layer for Eden-GHC-7.x (supports
-- serialize#) but we rather bail out here.
#if __GLASGOW_HASKELL__ < 708
#error This module assumes GHC-7.8 or above
#endif
-- all essentials are defined in other modules, and reexported here
import GHC.Packing.PackException
import GHC.Packing.Type
import GHC.Packing.Core
import GHC.IO ( IO(..) )
import GHC.Prim
import GHC.Exts ( Int(..))
import Data.Word( Word, Word64, Word32 )
import Data.Array.Base ( UArray(..), elems, listArray )
import Foreign.Storable ( sizeOf )
-- using mutable byte arrays as buffer (passed to the primitive)
import Control.Monad.Primitive
import Data.Primitive.ByteArray
-- Read and Show instances
import Text.Printf ( printf )
import Text.ParserCombinators.ReadP (sepBy1, many1, ReadP, munch,
munch1, pfail, readP_to_S, satisfy, skipSpaces, string )
import Data.Char ( isDigit )
import Data.Binary ( Get, Binary(..), encode, decode, encodeFile, decodeFile )
-- for dynamic type checks when parsing
import Data.Typeable -- ( Typeable(..), typeOf )
import Data.Typeable.Internal (TypeRep(..))
import qualified GHC.Fingerprint
-- for a hash of the executable. Using GHC.Fingerprint.getFileHash
import GHC.Fingerprint(getFileHash)
import System.Environment
import System.IO.Unsafe
import qualified Data.ByteString as B
import Control.Monad( when )
-- for exceptions thrown by trySerialize
import Data.Binary
import Control.Exception
-- Typeable is also required for this
----------------------------------------------
-- replacement for the old GHC.Constants.TargetWord. This is a cheap
-- and incomplete hack. I could just use a configure script. Too bad
-- the comfortable GHC.Constants was removed.
-- And, actually, GHC uses machine word size (as Haskell 2010
-- spec. does not fix it) so this should not be necessary at all...
-- http://www.haskell.org/ghc/docs/7.6.3/html/users_guide/bugs-and-infelicities.html#haskell-98-2010-undefined
import Data.Word
#if x86_64_BUILD_ARCH
type TargetWord = Word64
hexWordFmt = "0x%016x"
#elif i386_BUILD_ARCH
type TargetWord = Word32
hexWordFmt = "0x%08x"
#elif powerpc_BUILD_ARCH
#error Don't know word size of your Power-PC model
#else
#error Don't know the word size on your machine.
#endif
-----------------------------------------------
-- Helper functions to compare types at runtime:
-- We use type "fingerprints" defined in GHC.Fingerprint.Type
-- This should ensure (as of GHC.7.8) that types with the same name
-- but different definition get different hashes. (however, we also
-- require the executable to be exactly the same, so this is not
-- "strictly necessary" anyway).
-----------------------------------------------
-- Typeable context for dynamic type checks.
-- | The module uses a custom GHC fingerprint type with its two Word64
-- fields, to be able to /read/ fingerprints
data FP = FP Word64 Word64 deriving (Read, Show, Eq)
-- | comparing 'FP's
matches :: Typeable a => a -> FP -> Bool
matches x (FP c1 c2) = f1 == c1 && f2 == c2
where (TypeRep (GHC.Fingerprint.Fingerprint f1 f2) _ _) = typeOf x
-- | creating an 'FP' from a GHC 'Fingerprint'
toFP :: GHC.Fingerprint.Fingerprint -> FP
toFP (GHC.Fingerprint.Fingerprint f1 f2) = FP f1 f2
-- | creating a type fingerprint
typeFP :: Typeable a => a -> FP
typeFP x = toFP fp
where (TypeRep fp _ _) = typeOf x
-----------------------------------------------
-- | check that the program (executable) is
-- identical when packing and unpacking
-- It uses the fingerprint type from above (Read/Show instances required).
-- This 'FP' is computed once, by virtue of being a CAF (safe to
-- inline but inefficient).
{-# NOINLINE prgHash #-}
prgHash :: FP
prgHash = unsafePerformIO $
getExecutablePath >>= getFileHash >>= return . toFP
-----------------------------------
-- | The type of Serialized data. Phantom type 'a' ensures that we do
-- not unpack rubbish. The hash of the executable is not needed here,
-- but only when /externalising/ data (writing to disk, for instance).
data Serialized a = Serialized { packetData :: ByteArray# }
-- | Non-blocking serialisation routine using @'PackException'@s to
-- signal errors. This version does not block the calling thread when
-- a black hole is found, but instead signals the condition by the
-- @'P_BLACKHOLE'@ exception.
trySerialize :: a -> IO (Serialized a) -- throws PackException (RTS)
trySerialize x = trySerializeWith x defaultBufSize
-- | A default buffer size, used when using the old API
defaultBufSize :: Int
defaultBufSize = 10 * 2^20 -- 10 MB
-- | Extended interface function. Allocates buffer of given size, serializes
-- data into it, then truncates buffer to required size and returns
-- serialized data
trySerializeWith :: a -> Int -> IO (Serialized a) -- using instance PrimMonad IO
trySerializeWith dat bufsize
= do buf <- newByteArray bufsize
size <- trySerializeInto buf dat
buf' <- truncate' buf size
ByteArray b# <- unsafeFreezeByteArray buf'
return (Serialized { packetData = b# })
-- | core routine. Packs x into mutable byte array buf, returns size
-- of packed x in buf
trySerializeInto :: MutableByteArray RealWorld -> a -> IO Int
trySerializeInto (MutableByteArray buf# ) x
= primitive (tryPack (unsafeCoerce# x :: Any) buf# )
-- | calls primitive, decodes/throws errors + wraps Int# size into Int
tryPack :: Any -> MutableByteArray# s
-> State# s -> (# State# s , Int #)
tryPack x# buf# s = case tryPack# x# buf# s of
(# s', 0#, size# #) -> (# s', I# size# #)
(# s', e#, 0# #) -> (# s', throw (decodeEx e#) #)
-- | serialisation primitive, implemented in C. Returns: a
-- status/error code and size used inside the array
foreign import prim "stg_tryPack" tryPack#
:: Any -> MutableByteArray# s -> State# s -> (# State# s, Int#, Int# #)
-- GHC-7.8 does not have an in-place shrink operation for MutableByteArrays
-- (added in GHC-7.9 on August 16, 2014)
-- GHC-7.9, August 2014 :: MutableByteArray# s -> Int# -> State# s -> State# s
-- with this one available, tryPack could do the work
-- for GHC-7.8, we copy
truncate' :: PrimMonad m => MutableByteArray (PrimState m) -> Int -> m (MutableByteArray (PrimState m))
truncate' b size
= if sizeofMutableByteArray b < size
then throw P_NOBUFFER -- XXX other error?
else do b' <- newByteArray size
copyMutableByteArray b' 0 b 0 size
return b'
--------------------------------------------------------
-- | Deserialisation function. May throw @'PackException'@ @'P_GARBLED'@
deserialize :: Serialized a -> IO a
deserialize Serialized{..} = primitive (deser packetData)
deser :: ByteArray# -> State# s -> (# State# s, a #)
deser buf s = case unpack# buf s of
(# s', 0#, x #) -> (# s', x #)
(# s', n#, _ #) -> (# s', throw (decodeEx n#) #)
foreign import prim "stg_unpack" unpack# :: ByteArray# -> State# s -> (# State# s, Int#, a #)
import Data.Typeable
--------------------------------------------------------
-- | Packing exception codes, matching error codes implemented in the
-- runtime system or describing errors which can occur within Haskell.
data PackException = P_SUCCESS -- | no error, ==0. We do not expect this one to occur.
-- Error codes from the runtime system:
| P_BLACKHOLE -- ^ RTS: packing hit a blackhole (not blocking thread)
| P_NOBUFFER -- ^ RTS: buffer too small (increase RTS buffer with -qQ<size>)
| P_CANNOT_PACK -- ^ RTS: found a closure that cannot be packed (MVar, TVar)
| P_UNSUPPORTED -- ^ RTS: hit unsupported closure type (implementation missing)
| P_IMPOSSIBLE -- ^ RTS: hit impossible case (stack frame, message,...RTS bug!)
| P_GARBLED -- ^ RTS: invalid data for deserialisation
-- Error codes from inside Haskell
| P_ParseError -- ^ Haskell: Packet data could not be parsed
| P_BinaryMismatch -- ^ Haskell: Executable binaries do not match
| P_TypeMismatch -- ^ Haskell: Packet data encodes unexpected type
deriving (Eq, Ord, Typeable)
-- | decode an 'Int#' to a @'PackException'@
decodeEx :: Int# -> PackException
-- with hsc2hs: #include "Errors.h"; #define DECODE(ex) decode #{const ex} = ex
decodeEx 0# = P_SUCCESS -- unexpected
decodeEx 1# = P_BLACKHOLE
decodeEx 2# = P_NOBUFFER
decodeEx 3# = P_CANNOT_PACK
decodeEx 4# = P_UNSUPPORTED
decodeEx 5# = P_IMPOSSIBLE
decodeEx 6# = P_GARBLED
decodeEx i# = error $ "Error value " ++ show (I# i#) ++ " not defined!"
instance Show PackException where
show P_SUCCESS = "No error." -- we do not expect to see this
show P_BLACKHOLE = "Packing hit a blackhole"
show P_NOBUFFER = "Pack buffer too small"
show P_CANNOT_PACK = "Data contain a closure that cannot be packed (MVar, TVar)"
show P_UNSUPPORTED = "Contains an unsupported closure type (whose implementation is missing)"
show P_IMPOSSIBLE = "An impossible case happened (stack frame, message). This is probably a bug."
show P_GARBLED = "Garbled data for deserialisation"
show P_ParseError = "Packet parse error"
show P_BinaryMismatch = "Executable binaries do not match"
show P_TypeMismatch = "Packet data has unexpected type"
-- show other = "Packing error. TODO: define strings for more specific cases."
instance Exception PackException
-----------------------------------------------
-- Show Instance for packets:
-- | prints packet as Word array in 4 columns (/Word/ meaning the
-- machine word size), and additionally includes Fingerprint hash
-- values for executable binary and type.
instance Typeable a => Show (Serialized a) where
show (Serialized {..} )
= "Serialization Packet, size " ++ show size
++ ", program " ++ show prgHash ++ "\n"
++ ", type " ++ show t ++ "\n"
++ showWArray (UArray 0 (size-1) size packetData )
where size = case sizeofByteArray# packetData of
sz# -> (I# sz# ) `div` sizeOf(undefined::Word)
t = typeFP ( undefined :: a )
-- Helper to show a serialized structure as a packet (Word Array)
showWArray :: UArray Int TargetWord -> String
showWArray arr = unlines [ show i ++ ":" ++ unwords (map showH row)
| (i,row) <- zip [0,4..] elRows ]
where showH w = -- "\t0x" ++ showHex w " "
printf ('\t':hexWordFmt) w
elRows = takeEach4 (elems arr)
takeEach4 :: [a] -> [[a]]
takeEach4 [] = []
takeEach4 xs = first:takeEach4 rest
where (first,rest) = splitAt 4 xs
-----------------------------------------------
-- | Reads the format generated by the (@'Show'@) instance, checks
-- hash values for executable and type and parses exactly as much as
-- the included data size announces.
instance Typeable a => Read (Serialized a)
-- using ReadP parser (base-4.x), eats
where readsPrec _ input
= case parseP input of
[] -> throw P_ParseError -- no parse
[((sz,tp,dat),r)]
-> let !(UArray _ _ _ arr# ) = listArray (0,sz-1) dat
t = typeFP (undefined::a)
in if t == tp
then [(Serialized arr# , r)]
else throw P_TypeMismatch
other-> throw P_ParseError
-- ambiguous parse for packet
-- Packet Parser: read header with size and type, then iterate over
-- array values, reading several hex words in one row, separated by
-- tab and space. Packet size needed to avoid returning a prefix.
-- Could also consume other formats of the array (not implemented).
-- returns: (data size in words, type fingerprint, array values)
parseP :: ReadS (Int, FP, [TargetWord])
parseP = readP_to_S $
do string "Serialization Packet, size "
sz_str <- munch1 isDigit
let sz = read sz_str::Int
string ", program "
h <- munch1 (not . (== '\n'))
when (read h /= prgHash) (throw P_BinaryMismatch)
-- executables do not match. No ambiguous parses here,
-- so just throw; otherwise we would only pfail.
newline
string ", type "
tp <- munch1 (not . (== '\n'))
newline
let startRow = do { many1 digit; colon; tabSpace }
row = do { startRow; sepBy1 hexNum tabSpace }
valss <- sepBy1 row newline
skipSpaces -- eat remaining spaces
let vals = concat valss
l = length vals
-- filter out wrong lengths:
if (sz /= length vals) then pfail
else return (sz, read tp, vals)
digit = satisfy isDigit
colon = satisfy (==':')
tabSpace = munch1 ( \x -> x `elem` " \t" )
newline = munch1 (\x -> x `elem` " \n")
hexNum :: ReadP TargetWord -- we are fixing the type to what we need
hexNum = do string "0x"
ds <- munch hexDigit
return (read ("0x" ++ ds))
where hexDigit = (\x -> x `elem` "0123456789abcdefABCDEF")
------------------------------------------------------------------
-- | Binary instance for fingerprint data (encoding TypeRep and
-- executable in binary-encoded @Serialized a@)
instance Binary FP where
put (FP f1 f2) = do put f1
put f2
get = do f1 <- get :: Get Word64
f2 <- get :: Get Word64
return (FP f1 f2)
-- | The binary format of @'Serialized' a@ data includes FingerPrint
-- hash values for type and executable binary, which are checked
-- when reading Serialized data back in using @get@.
instance Typeable a => Binary (Serialized a) where
-- We make our life simple and construct/deconstruct Word
-- (U)Arrays, quite as we did in the Show/Read instances.
put (Serialized bArr#)
= do put prgHash
put (typeFP (undefined :: a))
let arr = UArray 0 (sz-1) sz bArr# :: UArray Int TargetWord
sz = case sizeofByteArray# bArr# of
sz# -> (I# sz# ) `div` sizeOf(undefined::TargetWord)
put arr
get = do hash <- get :: Get FP
when (hash /= prgHash)
(throw P_BinaryMismatch)
-- executables do not match
tp <- get :: Get FP
when (tp /= typeFP (undefined :: a))
(throw P_TypeMismatch)
-- Type error during packet parse
uarr <- get :: Get (UArray Int TargetWord)
let !(UArray _ _ sz bArr#) = uarr
return ( Serialized bArr# )
-- | Write serialised binary data directly to a file.
-- | Write serialised binary data directly to a file. May throw 'PackException's.
encodeToFile :: Typeable a => FilePath -> a -> IO ()
encodeToFile path x = trySerialize x >>= encodeFile path
-- | Directly read binary serialised data from a file. Catches
-- exceptions from decoding the file and re-throws @'ParseError'@s
-- | Directly read binary serialised data from a file. May throw
-- 'PackException's (catches I/O and Binary exceptions from decoding
-- the file and re-throws 'P_ParseError')
decodeFromFile :: Typeable a => FilePath -> IO a
decodeFromFile path = do ser <- (decodeFile path)
`catch`
@ -433,8 +101,53 @@ decodeFromFile path = do ser <- (decodeFile path)
----------------------------------------
-- digressive documentation
{- $ShowReadBinary
The power of evaluation-orthogonal serialisation is that one can
/externalise/ partially evaluated data (containing thunks), for
instance write it to disk or send it over a network.
Therefore, the module defines a 'Data.Binary' instance for
'Serialized' a, as well as instances for 'Read' and 'Show'@ which
satisfy @ 'read' . 'show' == 'id' :: 'Serialized' a -> 'Serialized' a@.
The phantom type is enough to ensure type-correctness when serialised
data remain in one single program run. However, when data from
previous runs are read in from an external source, their type needs to
be checked at runtime. Type information must be stored together with
the (binary) serialisation data.
The serialised data contain pointers to static data in the generating
program (top-level functions and constants) and very likely to
additional library code. Therefore, the /exact same binary/ must be
used when reading in serialised data from an external source. A hash
of the executable is therefore included in the representation as well.
-}
{- $packexceptions
'PackException's can occur at Haskell level or in the foreign primop.
The Haskell-level exceptions all occur when reading in
'GHC.Packing.Serialised' data, and are:
* 'P_BinaryMismatch': the serialised data have been produced by a
different executable (must be the same binary).
* 'P_TypeMismatch': the serialised data have the wrong type
* 'P_ParseError': serialised data could not be parsed (from binary or
text format)
The other exceptions are return codes of the foreign primitive
operation, and indicate errors at the C level. Most of them occur when
serialising data; the exception is 'P_GARBLED' which indicates corrupt
serialised data.
-}
{- $primitives
#background#
The functionality exposed by this module builds on serialisation of
Haskell heap graph structures, first implemented in the context of
implementing the GpH implementation GUM (Graph reduction on a

102
GHC/Packing/Core.hs Normal file
View File

@ -0,0 +1,102 @@
{-# LANGUAGE CPP, MagicHash, UnboxedTuples #-}
{-# LANGUAGE GHCForeignImportPrim, ForeignFunctionInterface,
UnliftedFFITypes #-}
{-|
Module : GHC.Packing
Copyright : (c) Jost Berthold, 2010-2014,
License : BSD3
Maintainer : jb.diku@gmail.com
Stability : experimental
Portability : no (depends on GHC internals)
= Wrapper module for the foreign primitive operations
-}
module GHC.Packing.Core
( trySerialize, trySerializeWith, deserialize
) where
import GHC.Packing.Type
import GHC.Packing.PackException
import GHC.Exts
import GHC.Prim
import Control.Monad.Primitive
import Data.Primitive.ByteArray
import Control.Exception(throw)
-- the entire package won't support GHC < 7.8
#if __GLASGOW_HASKELL__ < 708
#error This module assumes GHC-7.8 or above
#endif
-- | Non-blocking serialisation routine using 'PackException's to
-- signal errors. This version does not block the calling thread when
-- a black hole is found, but instead signals the condition by the
-- 'P_BLACKHOLE' exception.
trySerialize :: a -> IO (Serialized a) -- throws PackException (RTS)
trySerialize x = trySerializeWith x defaultBufSize
-- | A default buffer size, used when using the old API
defaultBufSize :: Int
defaultBufSize = 10 * 2^20 -- 10 MB
-- | Extended interface function: Allocates a buffer of given size (in
-- bytes), serialises data into it, then truncates the buffer to the
-- actually required size before returning it (as @'Serialized' a@)
trySerializeWith :: a -> Int -> IO (Serialized a) -- using instance PrimMonad IO
trySerializeWith dat bufsize
= do buf <- newByteArray bufsize
size <- trySerializeInto buf dat
buf' <- truncate' buf size
ByteArray b# <- unsafeFreezeByteArray buf'
return (Serialized { packetData = b# })
-- | core routine. Packs x into mutable byte array buf, returns size
-- of packed x in buf
trySerializeInto :: MutableByteArray RealWorld -> a -> IO Int
trySerializeInto (MutableByteArray buf# ) x
= primitive (tryPack (unsafeCoerce# x :: Any) buf# )
-- | calls primitive, decodes/throws errors + wraps Int# size into Int
tryPack :: Any -> MutableByteArray# s
-> State# s -> (# State# s , Int #)
tryPack x# buf# s = case tryPack# x# buf# s of
(# s', 0#, size# #) -> (# s', I# size# #)
(# s', e#, 0# #) -> (# s', throw (decodeEx e#) #)
-- | serialisation primitive, implemented in C. Returns: a
-- status/error code and size used inside the array
foreign import prim "stg_tryPack" tryPack#
:: Any -> MutableByteArray# s -> State# s -> (# State# s, Int#, Int# #)
-- GHC-7.8 does not have an in-place shrink operation for MutableByteArrays
-- (added in GHC-7.9 on August 16, 2014)
-- GHC-7.9, August 2014 :: MutableByteArray# s -> Int# -> State# s -> State# s
-- with this one available, tryPack could do the work
-- for GHC-7.8, we copy
truncate' :: PrimMonad m => MutableByteArray (PrimState m) -> Int -> m (MutableByteArray (PrimState m))
truncate' b size
= if sizeofMutableByteArray b < size
then throw P_NOBUFFER -- XXX other error?
else do b' <- newByteArray size
copyMutableByteArray b' 0 b 0 size
return b'
--------------------------------------------------------
-- | Deserialisation function. May throw @'PackException'@ @'P_GARBLED'@
deserialize :: Serialized a -> IO a
deserialize p = primitive (deser (packetData p))
deser :: ByteArray# -> State# s -> (# State# s, a #)
deser buf s = case unpack# buf s of
(# s', 0#, x #) -> (# s', x #)
(# s', n#, _ #) -> (# s', throw (decodeEx n#) #)
foreign import prim "stg_unpack" unpack# :: ByteArray# -> State# s -> (# State# s, Int#, a #)

View File

@ -0,0 +1,91 @@
{-# LANGUAGE MagicHash, DeriveDataTypeable #-}
{-|
Module : GHC.Packing.PackException
Copyright : (c) Jost Berthold, 2010-2014,
License : BSD3
Maintainer : jb.diku@gmail.com
Stability : experimental
Portability : no (depends on GHC internals)
Exception type for packman library, using magic constants #include'd
from a C header file shared with the foreign primitive operation code.
'PackException's can occur at Haskell level or in the foreign primop.
The Haskell-level exceptions all occur when reading in
'GHC.Packing.Serialised' data, and are:
* 'P_BinaryMismatch': the serialised data have been produced by a
different executable (must be the same binary).
* 'P_TypeMismatch': the serialised data have the wrong type
* 'P_ParseError': serialised data could not be parsed (from binary or
text format)
The other exceptions are return codes of the foreign primitive
operation, and indicate errors at the C level. Most of them occur when
serialising data; the exception is 'P_GARBLED' which indicates corrupt
serialised data.
-}
module GHC.Packing.PackException
( PackException(..)
, decodeEx
) where
-- bring in error codes from cbits/Errors.h
#include "Errors.h"
import GHC.Exts
import GHC.Prim
import Control.Exception
import Data.Typeable
-- | Packing exception codes, matching error codes implemented in the
-- runtime system or describing errors which can occur within Haskell.
data PackException =
-- keep in sync with Errors.h
P_SUCCESS -- ^ no error, ==0. We do not expect this one to occur.
-- Error codes from the runtime system:
| P_BLACKHOLE -- ^ RTS: packing hit a blackhole (not blocking thread)
| P_NOBUFFER -- ^ RTS: buffer too small
| P_CANNOTPACK -- ^ RTS: contains closure which cannot be packed (MVar, TVar)
| P_UNSUPPORTED -- ^ RTS: contains unsupported closure type (implementation missing)
| P_IMPOSSIBLE -- ^ RTS: impossible case (stack frame, message,...RTS bug!)
| P_GARBLED -- ^ RTS: corrupted data for deserialisation
-- Error codes from inside Haskell
| P_ParseError -- ^ Haskell: Packet data could not be parsed
| P_BinaryMismatch -- ^ Haskell: Executable binaries do not match
| P_TypeMismatch -- ^ Haskell: Packet data encodes unexpected type
deriving (Eq, Ord, Typeable)
-- | decode an 'Int#' to a @'PackException'@. Magic constants are read
-- from file /cbits/Errors.h/.
decodeEx :: Int## -> PackException
decodeEx #{const P_SUCCESS}## = P_SUCCESS -- unexpected
decodeEx #{const P_BLACKHOLE}## = P_BLACKHOLE
decodeEx #{const P_NOBUFFER}## = P_NOBUFFER
decodeEx #{const P_CANNOTPACK}## = P_CANNOTPACK
decodeEx #{const P_UNSUPPORTED}## = P_UNSUPPORTED
decodeEx #{const P_IMPOSSIBLE}## = P_IMPOSSIBLE
decodeEx #{const P_GARBLED}## = P_GARBLED
decodeEx #{const P_ParseError}## = P_ParseError
decodeEx #{const P_BinaryMismatch}## = P_BinaryMismatch
decodeEx #{const P_TypeMismatch}## = P_TypeMismatch
decodeEx i## = error $ "Error value " ++ show (I## i##) ++ " not defined!"
instance Show PackException where
-- keep in sync with Errors.h
show P_SUCCESS = "No error." -- we do not expect to see this
show P_BLACKHOLE = "Packing hit a blackhole"
show P_NOBUFFER = "Pack buffer too small"
show P_CANNOTPACK = "Data contain a closure that cannot be packed (MVar, TVar)"
show P_UNSUPPORTED = "Contains an unsupported closure type (whose implementation is missing)"
show P_IMPOSSIBLE = "An impossible case happened (stack frame, message). This is probably a bug."
show P_GARBLED = "Garbled data for deserialisation"
show P_ParseError = "Packet parse error"
show P_BinaryMismatch = "Executable binaries do not match"
show P_TypeMismatch = "Packet data has unexpected type"
instance Exception PackException

284
GHC/Packing/Type.hs Normal file
View File

@ -0,0 +1,284 @@
{-# LANGUAGE CPP, MagicHash, BangPatterns, ScopedTypeVariables #-}
{-|
Module : GHC.Packing.Type
Copyright : (c) Jost Berthold, 2010-2014,
License : BSD3
Maintainer : Jost Berthold <jb.diku@gmail.com>
Stability : experimental
Portability : no (depends on GHC internals)
= Serialized type for the packman library, instances and helpers
The data type @'Serialized' a@ includes a phantom type @a@ to ensure
type safety within one and the same program run. Type @a@ can be
polymorphic (at compile time, that is) when @Serialized a@ is not used
apart from being argument to @deserialize@.
The @Show@, @Read@, and @Binary@ instances of @Serialized a@ require an
additional @Typeable@ context (which requires @a@ to be monomorphic)
in order to implement dynamic type checks when parsing and deserialising
data from external sources.
-}
module GHC.Packing.Type
-- ( Serialized(..)
-- TOOD assemble export list with structure and headings/text blocks
-- , ... )
where
import GHC.Prim -- ByteArray#
import GHC.Exts ( Int(..)) -- I#
-- Read and Show instances
import Text.Printf ( printf )
import Text.ParserCombinators.ReadP (sepBy1, many1, ReadP, munch,
munch1, pfail, readP_to_S, satisfy, skipSpaces, string )
import Data.Char ( isDigit )
-- Binary instance
import Data.Binary ( Get, Binary(..), encode, decode, encodeFile, decodeFile )
-- we use UArrays of machine word size (TargetWord)
import Data.Word( Word, Word64, Word32 )
import Data.Array.Base ( UArray(..), elems, listArray )
import Foreign.Storable ( sizeOf )
-- for dynamic type checks when parsing
import Data.Typeable (Typeable(..), typeOf)
import Data.Typeable.Internal (TypeRep(..))
import qualified GHC.Fingerprint
-- for a hash of the executable. Using GHC.Fingerprint.getFileHash
import GHC.Fingerprint(getFileHash)
import System.Environment
import System.IO.Unsafe
-- for control flow and exceptions
import Control.Monad(when)
import Control.Exception(throw)
import GHC.Packing.PackException
-- | The type of Serialized data. Phantom type 'a' ensures that we
-- unpack the expected type do not unpack rubbish.
data Serialized a = Serialized { packetData :: ByteArray# }
{- $ShowReadBinary
The power of evaluation-orthogonal serialisation is that one can
/externalise/ partially evaluated data (containing thunks), for
instance write it to disk or send it over a network.
Therefore, the module defines a 'Binary' instance for 'Serialized a',
as well as instances for 'Read' and 'Show'@ which satisfy
@ read . show == id :: 'Serialized' a -> 'Serialized' a@.
The phantom type is enough to ensure type-correctness when serialised
data remain in one single program run. However, when data from
previous runs are read in from an external source, their type needs to
be checked at runtime. Type information must be stored together with
the (binary) serialisation data.
The serialised data contain pointers to static data in the generating
program (top-level functions and constants) and very likely to
additional library code. Therefore, the /exact same binary/ must be
used when reading in serialised data from an external source. A hash
of the executable is therefore included in the representation as well.
-}
-- | prints packet as Word array in 4 columns (/Word/ meaning the
-- machine word size), and additionally includes Fingerprint hash
-- values for executable binary and type.
instance Typeable a => Show (Serialized a) where
show p = unlines [ "Serialization Packet, size " ++ show size,
", program " ++ show prgHash,
", type fingerprint" ++ show t,
showWArray (UArray 0 (size-1) size dat) ]
where size = case sizeofByteArray# dat of
sz# -> (I# sz# ) `div` sizeOf(undefined::TargetWord)
t = typeFP ( undefined :: a )
dat = packetData p
-- | Helper to show a serialized structure as a packet (Word Array)
showWArray :: UArray Int TargetWord -> String
showWArray arr = unlines [ show i ++ ":" ++ unwords (map showH row)
| (i,row) <- zip [0,4..] elRows ]
where showH w = -- "\t0x" ++ showHex w " "
printf ('\t':hexWordFmt) w
elRows = takeEach4 (elems arr)
takeEach4 :: [a] -> [[a]]
takeEach4 [] = []
takeEach4 xs = first:takeEach4 rest
where (first,rest) = splitAt 4 xs
-----------------------------------------------
-- | Reads the format generated by the (@'Show'@) instance, checks
-- hash values for executable and type and parses exactly as much as
-- the included data size announces.
instance Typeable a => Read (Serialized a)
-- using ReadP parser (base-4.x), eats
where readsPrec _ input
= case parseP input of
[] -> throw P_ParseError -- no parse
[((sz,tp,dat),r)]
-> let !(UArray _ _ _ arr# ) = listArray (0,sz-1) dat
t = typeFP (undefined::a)
in if t == tp
then [(Serialized arr# , r)]
else throw P_TypeMismatch
other-> throw P_ParseError
-- ambiguous parse for packet
-- | Packet Parser: read header with size and type, then iterate over
-- array values, reading several hex words in one row, separated by
-- tab and space. Packet size needed to avoid returning a prefix.
-- Could also consume other formats of the array (not implemented).
-- Returns: (data size in words, type fingerprint, array values)
parseP :: ReadS (Int, FP, [TargetWord])
parseP = readP_to_S $
do string "Serialization Packet, size "
sz_str <- munch1 isDigit
let sz = read sz_str::Int
string ", program "
h <- munch1 (not . (== '\n'))
when (read h /= prgHash) (throw P_BinaryMismatch)
-- executables do not match. No ambiguous parses here,
-- so just throw; otherwise we would only pfail.
newline
string ", type "
tp <- munch1 (not . (== '\n'))
newline
let startRow = do { many1 digit; colon; tabSpace }
row = do { startRow; sepBy1 hexNum tabSpace }
valss <- sepBy1 row newline
skipSpaces -- eat remaining spaces
let vals = concat valss
l = length vals
-- filter out wrong lengths:
if (sz /= length vals) then pfail
else return (sz, read tp, vals)
digit = satisfy isDigit
colon = satisfy (==':')
tabSpace = munch1 ( \x -> x `elem` " \t" )
newline = munch1 (\x -> x `elem` " \n")
hexNum :: ReadP TargetWord
hexNum = do string "0x"
ds <- munch hexDigit
return (read ("0x" ++ ds))
where hexDigit = (\x -> x `elem` "0123456789abcdefABCDEF")
------------------------------------------------------------------
-- | The binary format of @'Serialized' a@ data includes FingerPrint
-- hash values for type and executable binary, which are checked
-- when reading Serialized data back in using @get@.
instance Typeable a => Binary (Serialized a) where
-- We make our life simple and construct/deconstruct Word
-- (U)Arrays, quite as we did in the Show/Read instances.
put (Serialized bArr#)
= do put prgHash
put (typeFP (undefined :: a))
let arr = UArray 0 (sz-1) sz bArr# :: UArray Int TargetWord
sz = case sizeofByteArray# bArr# of
sz# -> (I# sz# ) `div` sizeOf(undefined::TargetWord)
put arr
get = do hash <- get :: Get FP
when (hash /= prgHash)
(throw P_BinaryMismatch)
-- executables do not match
tp <- get :: Get FP
when (tp /= typeFP (undefined :: a))
(throw P_TypeMismatch)
-- Type error during packet parse
uarr <- get :: Get (UArray Int TargetWord)
let !(UArray _ _ sz bArr#) = uarr
return ( Serialized bArr# )
------------------------------------------------------------------
-- $ComparingTypes
-----------------------------------------------
-- Helper functions to compare types at runtime:
-- We use type "fingerprints" defined in 'GHC.Fingerprint.Type'
-- This should ensure (as of GHC.7.8) that types with the same name
-- but different definition get different hashes. (however, we also
-- require the executable to be exactly the same, so this is not
-- strictly necessary anyway).
-- Typeable context for dynamic type checks.
-- | The module uses a custom GHC fingerprint type with its two Word64
-- fields, to be able to /read/ fingerprints
data FP = FP Word64 Word64 deriving (Read, Show, Eq)
-- | comparing 'FP's
matches :: Typeable a => a -> FP -> Bool
matches x (FP c1 c2) = f1 == c1 && f2 == c2
where (TypeRep (GHC.Fingerprint.Fingerprint f1 f2) _ _) = typeOf x
-- | creating an 'FP' from a GHC 'Fingerprint'
toFP :: GHC.Fingerprint.Fingerprint -> FP
toFP (GHC.Fingerprint.Fingerprint f1 f2) = FP f1 f2
-- | creating a type fingerprint
typeFP :: Typeable a => a -> FP
typeFP x = toFP fp
where (TypeRep fp _ _) = typeOf x
-- | Binary instance for fingerprint data (encoding TypeRep and
-- executable in binary-encoded @Serialized a@)
instance Binary FP where
put (FP f1 f2) = do put f1
put f2
get = do f1 <- get :: Get Word64
f2 <- get :: Get Word64
return (FP f1 f2)
-----------------------------------------------
-- | To check that the program (executable) is identical when packing
-- and unpacking, the fingerprint type from above is used (Read/Show
-- instances required). An 'FP' fingerprint of the executable is
-- computed once, by unsafePerformIO inside this CAF (safe to inline,
-- just inefficient).
{-# NOINLINE prgHash #-}
prgHash :: FP
prgHash = unsafePerformIO $
getExecutablePath >>= getFileHash >>= return . toFP
-----------------------------------------------
-- | The target word size is the size of a machine word on the
-- platform we run on.
--
-- This type is only used in Binary, Read and Show instances, where
-- packets are stored as 'UArrays' of 'TargetWord'.
--
-- Actually, GHC uses machine word size (as Haskell 2010 spec. does
-- not fix it) so we could just use Word. See
-- <http://www.haskell.org/ghc/docs/7.8.3/html/users_guide/bugs-and-infelicities.html#haskell-98-2010-undefined>
-- We'd rather just import 'GHC.Constants.TargetWord' but it was
-- removed. This code here is a cheap and incomplete hack, as the
-- package would otherwise need a configure script.
#if x86_64_BUILD_ARCH
type TargetWord = Word64
hexWordFmt = "0x%016x"
#elif i386_BUILD_ARCH
type TargetWord = Word32
hexWordFmt = "0x%08x"
#elif powerpc_BUILD_ARCH
#error Don't know word size of your Power-PC model
#else
#warning Don't know the word size on your machine.
type TargetWord = Word
#endif

View File

@ -94,7 +94,7 @@ packThreadId _ = ("packing a thread ID (unsupported)",
packMVar :: MyTest
packMVar _ = ("packing an MVar (should be cannotpack)",
do m <- newEmptyMVar :: IO (MVar Integer)
expectException P_CANNOT_PACK $ trySerialize m
expectException P_CANNOTPACK $ trySerialize m
)
packBH :: MyTest

View File

@ -1,18 +1,29 @@
#ifndef ERRORS_H
#define ERRORS_H
/*
* Return codes for the packing routine (rts/parallel/Pack.c)
* Must be in sync with library code.
* We need them here for use in Cmm code in PrimOps.cmm
*/
#define P_SUCCESS 0x00 /* used for return value of PackToMemory only */
#define P_BLACKHOLE 0x01 /* possibly also blocking the packing thread */
#define P_NOBUFFER 0x02 /* buffer too small */
#define P_CANNOTPACK 0x03 /* type cannot be packed (MVar, TVar) */
#define P_UNSUPPORTED 0x04 /* type not supported (but could/should be) */
#define P_IMPOSSIBLE 0x05 /* impossible type found (stack frame,msg, etc) */
#define P_GARBLED 0x06 /* invalid data for deserialisation */
#define P_ERRCODEMAX 0x06
#define P_SUCCESS 0x00 /* used for return value of PackToMemory only */
#define P_BLACKHOLE 0x01 /* possibly also blocking the packing thread */
#define P_NOBUFFER 0x02 /* buffer too small */
#define P_CANNOTPACK 0x03 /* type cannot be packed (MVar, TVar) */
#define P_UNSUPPORTED 0x04 /* type not supported (but could/should be) */
#define P_IMPOSSIBLE 0x05 /* impossible type found (stack frame,msg, etc) */
#define P_GARBLED 0x06 /* invalid data for deserialisation */
// for completeness, we also include the Haskell error codes here:
#define P_ParseError 0x07 /* Packet parse error */
#define P_BinaryMismatch 0x08 /* Executable binaries do not match */
#define P_TypeMismatch 0x09 /* Packet data has unexpected type */
#undef P_ERRCODEMAX
#define P_ERRCODEMAX 0x09
// predicate for checks:
#define isPackError(bufptr) (((StgWord) (bufptr)) <= P_ERRCODEMAX)
#endif

View File

@ -22,6 +22,9 @@ flag Debug
library
exposed-modules: GHC.Packing
other-modules: GHC.Packing.PackException
GHC.Packing.Type
GHC.Packing.Core
build-depends: base >= 4.7,
ghc >= 7.8,
ghc-prim >= 0.3,