Update documentation

Restore "Alias"-free functions
This commit is contained in:
lyxia 2016-04-28 22:30:59 +02:00
parent 7ce4d60fc5
commit 93244e1132
5 changed files with 205 additions and 63 deletions

View File

@ -1,34 +1,26 @@
Generic random generators
=========================
Create a sized random generator for almost any type.
Define sized random generators for almost any type.
This will only work on types `a` such that:
{-# LANGUAGE DeriveDataTypeable #-}
import Data.Data
import Test.QuickCheck
import Data.Random.Generics
- they are instances of `Data`;
- the set of types of subterms of values of type `a` is finite;
- and all of these types have at least one finite value (i.e., values with
finitely many constructors) (otherwise our concept of "size" would not be
meaningful).
data Term = Lambda Int Term | App Term Term | Var Int
deriving (Show, Data)
The size of a value is its number of constructors.
instance Arbitrary Term where
arbitrary = sized (generator asGen)
The desired average `size` must be reachable, i.e., be larger than the smallest
value of type `a`, and smaller than the largest value of type `a` (when there
is one).
main = sample (arbitrary :: Gen Term)
Examples of problematic types
-----------------------------
data E a = L a | R (E [a])
Indeed, in `E`, every type of the form `[[...[[a]]...]]` occurs after
sufficiently many unwrappings of `R`'s.
data I = C I
If we ignore bottoms, the only value of type `I` is an infinite stack of `C`
constructors.
- Objects of the same size (number of constructors) occur with the same
probability (see Duchon et al., references below).
- Implements rejection sampling and pointing.
- Works with QuickCheck and MonadRandom.
- Can be extended or modified with user defined generators.
References
----------
@ -37,7 +29,7 @@ References
[Boltzmann Samplers for the Random Generation of Combinatorial Structures](http://algo.inria.fr/flajolet/Publications/DuFlLoSc04.pdf),
P. Duchon, P. Flajolet, G. Louchard, G. Schaeffer.
- The evaluation of generating functions defined by systems of equations is
taken from
- The numerical evaluation of recursively defined generating functions
is taken from
[Boltzmann Oracle for Combinatorial Systems](http://www.dmtcs.org/pdfpapers/dmAI0132.pdf),
C. Pivoteau, B. Salvy, M. Soria.

View File

@ -20,21 +20,21 @@ s (N l r) = 1 + s l + s r
s L = 1
rejectT :: Int -> Gen T
rejectT = generator asGen []
rejectT = generator asGen
rejectSimpleT :: Int -> Gen T
rejectSimpleT = simpleGenerator' asGen []
rejectSimpleT = simpleGenerator' asGen
-- Pointing makes the generator more precise.
pointT :: Int -> Gen T
pointT = pointedGenerator asGen []
pointT = pointedGenerator asGen
pointRejectT :: Int -> Gen T
pointRejectT size =
generator_ asGen [] 1 (Just size) (tolerance epsilon size)
main = defaultMain $
0 : [2 ^ e | e <- [5 .. 10]] >>= \n ->
[2 ^ e | e <- [5 .. 10]] >>= \n ->
[ bench ("reject " ++ show n) $
nfGen (rejectT n)
, bench ("reject-simple " ++ show n) $

View File

@ -22,7 +22,7 @@ instance Show F where
gen :: Gen [F]
gen = pointedGenerator asGen aliases 10
where
aliases = [alias' $ \() -> fmap F arbitrary :: Gen F]
aliases = [alias $ \() -> fmap F arbitrary :: Gen F]
main :: IO ()
main = sample gen

View File

@ -10,7 +10,7 @@ import Control.Monad.Random
gen :: IO [Int]
gen = pointedGenerator asMonadRandom aliases 20
where
aliases = [alias' $ \() -> getRandomR (0, 100) :: IO Int]
aliases = [alias $ \() -> getRandomR (0, 100) :: IO Int]
main :: IO ()
main = gen >>= print

View File

@ -1,4 +1,32 @@
-- | Generic Boltzmann samplers.
--
-- = Size
--
-- The size of a value is its number of constructors.
--
-- Below, however, the 'Size' parameter
--
-- @
-- type 'Size' = Int
-- @
--
-- is interpreted as the difference
-- between the size of the smallest value and the desired approximate size.
--
-- For example, values of type @Either () [Bool]@ have at least two constructors,
-- so
--
-- @
-- 'generator' 'asGen' delta :: Gen (Either () [Bool])
-- @
--
-- will target sizes close to @2 + delta@;
-- the offset becomes less noticeable as @delta@ grows to infinity.
--
-- This default behavior makes better use of the domain of sizes when used in
-- combination with the 'sized' combinator, so that QuickCheck generates
-- non-trivial data even at very small size values.
module Data.Random.Generics where
import Data.Bifunctor
@ -14,31 +42,140 @@ import Data.Random.Generics.Boltzmann.Types
-- * Main functions
-- | Singular ceiled rejection sampler.
--
-- It works with recursive tree-like structures, as opposed to (lists of)
-- structures with bounded size. It has the advantage of using the same oracle
-- for all sizes, which will be computed before passing the size argument.
-- Hence this is the most convenient function to get generators with parametric
-- size:
--
-- @
-- instance Arbitrary MyT where
-- arbitrary = sized (generator asGen)
-- @
generator :: (Data a, Monad m) => PrimRandom m -> [AliasR m] -> Size -> m a
generator primRandom aliases =
generator_ primRandom aliases 0 Nothing . tolerance epsilon
-- $sized
-- These functions and their @_With@ counterparts below can be partially
-- applied to a 'PrimRandom' dictionary: the numerical /oracles/ are computed
-- once and for all, so they can be reused for different sizes.
-- | Generator of pointed values.
-- | @
-- 'generator' 'asGen' :: Int -> 'Gen' a
-- 'generator' 'asMonadRandom' :: 'MonadRandom' m => Int -> m a
-- @
--
-- Singular ceiled rejection sampler.
--
-- This works with recursive tree-like structures, as opposed to (lists of)
-- structures with bounded size. More precisely, the generating function of the
-- given type should have a finite radius of convergence, with a singularity of
-- a certain kind (see Duchon et al., reference in the README).
--
-- This has the advantage of using the same oracle for all sizes. Hence this is
-- the most convenient function to get generators with parametric size:
--
-- @
-- instance 'Arbitrary' MyT where
-- 'arbitrary' = 'sized' ('generator' 'asGen')
-- @
generator :: (Data a, Monad m) => PrimRandom m -> Size -> m a
generator = generatorWith []
-- | @
-- 'pointedGenerator' 'asGen' :: Int -> 'Gen' a
-- 'pointedGenerator' 'asMonadRandom' :: 'MonadRandom' m => Int -> m a
-- @
--
-- Generator of pointed values.
--
-- It usually has a flatter distribution of sizes than a simple Boltzmann
-- sampler, making it an efficient alternative to rejection sampling.
--
-- It also works on more types, but relies on multiple oracles.
--
-- = Pointing
--
-- The /pointing/ of a type @t@ is a derived type whose values are essentially
-- values of type @t@, with one of their constructors being "pointed".
-- Alternatively, we may turn every constructor into variants that indicate
-- the position of points.
--
-- @
-- -- Original type
-- data Tree = Node Tree Tree | Leaf
-- -- Pointing of Tree
-- data Tree'
-- = Tree' Tree -- Point at the root
-- | Node'0 Tree' Tree -- Point to the left
-- | Node'1 Tree Tree' -- Point to the right
-- @
--
-- Pointed values are easily mapped back to the original type by erasing the
-- point. Pointing makes larger values occur much more frequently, while
-- preserving the uniformness of the distribution conditionally to a fixed
-- size.
-- Oracles are computed only for sizes that are a power of two away from
-- the minimum size of the datatype @minSize + 2 ^ e@.
pointedGenerator :: (Data a, Monad m) => PrimRandom m -> [Alias m] -> Size -> m a
pointedGenerator primRandom aliases = \size ->
pointedGenerator :: (Data a, Monad m) => PrimRandom m -> Size -> m a
pointedGenerator = pointedGeneratorWith []
-- ** Fixed size
-- $fixed
-- These functions do not benefit from the same precomputation pattern as the
-- above. 'simpleGenerator'' works with slightly more types than 'generator',
-- since it doesn't require the existence of a singularity.
--
-- The overhead of computing the "oracles" has not been measured yet.
-- | Generator of pointed values.
pointedGenerator' :: (Data a, Monad m) => PrimRandom m -> Size -> m a
pointedGenerator' = pointedGeneratorWith' []
-- | Ceiled rejection sampler with given average size.
simpleGenerator' :: (Data a, Monad m) => PrimRandom m -> Size -> m a
simpleGenerator' = simpleGeneratorWith' []
-- * Generators with aliases
-- $aliases
-- Boltzmann samplers can normally be defined only for types @a@ such that:
--
-- - they are instances of 'Data';
-- - the set of types of subterms of values of type @a@ is finite;
-- - and all of these types have at least one finite value (i.e., values with
-- finitely many constructors).
--
-- Examples of misbehaving types are:
--
-- - @a -> b -- Not Data@
-- - @data E a = L a | R (E [a]) -- Contains a, [a], [[a]], [[[a]]], etc.@
-- - @data I = C I -- No finite value@
--
-- = Alias
--
-- The 'Alias' type works around these limitations ('AliasR' for rejection
-- samplers).
-- This existential wrapper around a user-defined function @f :: a -> m b@
-- makes @generic-random@ view occurences of the type @b@ as @a@ when
-- processing a recursive system of types, possibly stopping some infinite
-- unrolling of type definitions. When a value of type @b@ needs to be
-- generated, it generates an @a@ which is passed to @f@.
--
-- @
-- let
-- as = ['aliasR' $ \\() -> return (L []) :: 'Gen' (E [[Int]])]
-- in
-- 'generatorWith' as 'asGen' :: 'Size' -> 'Gen' (E Int)
-- @
--
-- Another use is to plug in user-defined generators where the default is not
-- satisfactory, for example, to get positive @Int@s:
--
-- @
-- let
-- as = ['alias' $ \\() -> 'choose' (0, 100) :: 'Gen' Int)]
-- in
-- 'pointedGeneratorWith' as 'asGen' :: 'Size' -> 'Gen' [Int]
-- @
generatorWith
:: (Data a, Monad m) => [AliasR m] -> PrimRandom m -> Size -> m a
generatorWith aliases primRandom =
generator_ primRandom aliases 0 Nothing . tolerance epsilon
pointedGeneratorWith
:: (Data a, Monad m) => [Alias m] -> PrimRandom m -> Size -> m a
pointedGeneratorWith aliases primRandom = \size ->
snd . fromMaybe (last generators) . find ((>= size) . fst) $ generators
where
((minSize, maxSize'), makeGenerator') =
@ -52,16 +189,16 @@ pointedGenerator primRandom aliases = \size ->
maxSize_ = max 0 (maxSize - minSize - 1)
pow2s = 0 : [ 2 ^ e | e <- [0 :: Int ..] ]
-- * Fixed size
-- ** Fixed size
-- | Generator of pointed values.
pointedGenerator' :: (Data a, Monad m) => PrimRandom m -> [Alias m] -> Size -> m a
pointedGenerator' primRandom aliases size =
pointedGeneratorWith'
:: (Data a, Monad m) => [Alias m] -> PrimRandom m -> Size -> m a
pointedGeneratorWith' aliases primRandom size =
snd (makeGenerator primRandom aliases []) 1 (Just size)
-- | Ceiled rejection sampler with given average size.
simpleGenerator' :: (Data a, Monad m) => PrimRandom m -> [AliasR m] -> Size -> m a
simpleGenerator' primRandom aliases size =
simpleGeneratorWith'
:: (Data a, Monad m) => [AliasR m] -> PrimRandom m -> Size -> m a
simpleGeneratorWith' aliases primRandom size =
generator_ primRandom aliases 0 (Just size) (tolerance epsilon size)
-- * Auxiliary definitions
@ -70,7 +207,9 @@ simpleGenerator' primRandom aliases size =
-- outside the tolerance interval.
--
-- The target size and the tolerance interval are shifted and clamped to the actual
-- size range of the datatype.
-- size range of the datatype. (See Size section above.)
--
-- Used to implement 'generator' and 'simpleGenerator''.
generator_ :: (Data a, Monad m)
=> PrimRandom m -> [AliasR m] -> Int -> Maybe Size -> (Size, Size) -> m a
generator_ primRandom aliases = \k size ->
@ -80,6 +219,14 @@ generator_ primRandom aliases = \k size ->
clamp' = maybe id (min . max minSize . subtract 1) maxSize' . (minSize +)
clamp = maybe id min maxSize' . (minSize +)
-- ** Dictionaries
-- $primrandom
-- @'PrimRandom' m@ is a record of basic components to build our generators
-- with, allowing the implementation to remain abstract over both the
-- 'Test.QuickCheck.Gen' type and 'MonadRandom' instances. The concrete records
-- 'asGen' and 'asMonadRandom' provide their respective specializations.
-- | Dictionary for QuickCheck's 'Gen'.
asGen :: PrimRandom Gen
asGen = PrimRandom
@ -100,11 +247,14 @@ asMonadRandom = PrimRandom
getRandom
getRandom
alias :: (Data a, Data b) => (m a -> m b) -> Alias m
alias = Alias
-- ** Aliases
alias' :: (Monad m, Data a, Data b) => (a -> m b) -> Alias m
alias' = Alias . (=<<)
alias :: (Monad m, Data a, Data b) => (a -> m b) -> Alias m
alias = Alias . (=<<)
aliasR' :: (Monad m, Data a, Data b) => (a -> m b) -> AliasR m
aliasR' = Alias . (=<<) . fmap (lift . lift . lift)
aliasR :: (Monad m, Data a, Data b) => (a -> m b) -> AliasR m
aliasR = Alias . (=<<) . fmap (lift . lift . lift)
-- | The true and more general form of 'Alias'.
alias' :: (Data a, Data b) => (m a -> m b) -> Alias m
alias' = Alias