From ac0e4b22c8c000e95cfb9b451835b8562cfc0223 Mon Sep 17 00:00:00 2001 From: Huw Campbell Date: Tue, 20 Dec 2016 16:31:09 +1100 Subject: [PATCH] Remove primes on shape instantiations Add singletons for Shape and remove hacks on recurrent nets Add Recurrent Nets --- README.md | 2 +- cbits/gradient_decent.c | 13 + cbits/gradient_decent.h | 9 + cbits/im2col.c | 15 +- cbits/im2col.h | 9 +- grenade.cabal | 71 +++++ mafia | 6 +- main/feedforward.hs | 20 +- main/mnist.hs | 25 +- main/recurrent.hs | 87 +++++++ main/shakespeare.hs | 156 +++++++++++ src/Grenade/Core/Network.hs | 45 ++-- src/Grenade/Core/Runner.hs | 50 ++-- src/Grenade/Core/Shape.hs | 190 ++++++++++---- src/Grenade/Graph/GraphNetwork.hs | 37 +++ src/Grenade/Layers/Convolution.hs | 58 ++--- src/Grenade/Layers/Crop.hs | 12 +- src/Grenade/Layers/Dropout.hs | 13 +- src/Grenade/Layers/Flatten.hs | 29 ++- src/Grenade/Layers/FullyConnected.hs | 22 +- src/Grenade/Layers/Fuse.hs | 9 +- src/Grenade/Layers/Internal/Convolution.hs | 24 +- src/Grenade/Layers/Internal/Pooling.hs | 26 +- src/Grenade/Layers/Internal/Update.hs | 70 +++++ src/Grenade/Layers/Logit.hs | 16 +- src/Grenade/Layers/Pad.hs | 12 +- src/Grenade/Layers/Pooling.hs | 20 +- src/Grenade/Layers/Relu.hs | 15 +- src/Grenade/Layers/Tanh.hs | 15 +- src/Grenade/Recurrent.hs | 9 + src/Grenade/Recurrent/Core/Network.hs | 98 +++++++ src/Grenade/Recurrent/Core/Runner.hs | 144 +++++++++++ .../Recurrent/Layers/BasicRecurrent.hs | 92 +++++++ src/Grenade/Recurrent/Layers/LSTM.hs | 244 ++++++++++++++++++ src/Grenade/Recurrent/Layers/Trivial.hs | 23 ++ src/Grenade/Utils/OneHot.hs | 84 ++++++ test/Test/Grenade/Layers/Convolution.hs | 30 ++- test/Test/Grenade/Layers/FullyConnected.hs | 8 +- test/Test/Grenade/Layers/Pooling.hs | 10 +- test/Test/Grenade/Recurrent/Layers/LSTM.hs | 101 ++++++++ .../Recurrent/Layers/LSTM/Reference.hs | 149 +++++++++++ test/Test/Jack/Hmatrix.hs | 7 +- test/test.hs | 13 +- 43 files changed, 1785 insertions(+), 303 deletions(-) create mode 100644 cbits/gradient_decent.c create mode 100644 cbits/gradient_decent.h create mode 100644 main/recurrent.hs create mode 100644 main/shakespeare.hs create mode 100644 src/Grenade/Graph/GraphNetwork.hs create mode 100644 src/Grenade/Layers/Internal/Update.hs create mode 100644 src/Grenade/Recurrent.hs create mode 100644 src/Grenade/Recurrent/Core/Network.hs create mode 100644 src/Grenade/Recurrent/Core/Runner.hs create mode 100644 src/Grenade/Recurrent/Layers/BasicRecurrent.hs create mode 100644 src/Grenade/Recurrent/Layers/LSTM.hs create mode 100644 src/Grenade/Recurrent/Layers/Trivial.hs create mode 100644 src/Grenade/Utils/OneHot.hs create mode 100644 test/Test/Grenade/Recurrent/Layers/LSTM.hs create mode 100644 test/Test/Grenade/Recurrent/Layers/LSTM/Reference.hs diff --git a/README.md b/README.md index eb76892..c063a96 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ Usage To perform back propagation, one can call the eponymous function ```haskell backPropagate :: forall input target shapes layers. (Head shapes ~ input, Last shapes ~ target) - => Network layers shapes -> S' input -> S' target -> Gradients layers + => Network layers shapes -> S input -> S target -> Gradients layers ``` which takes a network, appropriate input and target data, and returns the back propagated gradients for the network. The shapes of the gradients are diff --git a/cbits/gradient_decent.c b/cbits/gradient_decent.c new file mode 100644 index 0000000..3eb4cf0 --- /dev/null +++ b/cbits/gradient_decent.c @@ -0,0 +1,13 @@ +#include "gradient_decent.h" + +void decend_cpu(int len, double rate, double momentum, double regulariser, + const double* weights, + const double* gradient, + const double* last, + double* outputWeights, double* outputMomentum) { + + for (int i = 0; i <= len; i++) { + outputMomentum[i] = momentum * last[i] - rate * gradient[i]; + outputWeights[i] = weights[i] + outputMomentum[i] - (rate * regulariser) * weights[i]; + } +} diff --git a/cbits/gradient_decent.h b/cbits/gradient_decent.h new file mode 100644 index 0000000..d2178e5 --- /dev/null +++ b/cbits/gradient_decent.h @@ -0,0 +1,9 @@ +#include +#include + +void decend_cpu(int len, double rate, double momentum, double regulariser, + const double* weights, + const double* gradient, + const double* last, + double* outputWeights, double* outputMomentum); + diff --git a/cbits/im2col.c b/cbits/im2col.c index 0302c10..d7f4bf4 100644 --- a/cbits/im2col.c +++ b/cbits/im2col.c @@ -1,11 +1,10 @@ #include "im2col.h" -void im2col_cpu(const double* data_im, int dataOffset, const int channels, +void im2col_cpu(const double* data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, double* data_col) { - data_im += dataOffset; const int channel_size = height * width; for (int fitting_height = 0; fitting_height <= (height - kernel_h); fitting_height += stride_h) { @@ -23,13 +22,12 @@ void im2col_cpu(const double* data_im, int dataOffset, const int channels, } } -void col2im_cpu(const double* data_col, int dataOffset, const int channels, +void col2im_cpu(const double* data_col, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, double* data_im) { memset(data_im, 0, height * width * channels * sizeof(double)); - data_col += dataOffset; const int channel_size = height * width; @@ -50,13 +48,11 @@ void col2im_cpu(const double* data_col, int dataOffset, const int channels, inline double max ( double a, double b ) { return a > b ? a : b; } -void pool_forwards_cpu(const double* data_im, int dataOffset, const int channels, +void pool_forwards_cpu(const double* data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, double* data_pooled) { - data_im += dataOffset; - const int channel_size = height * width; for (int channel = 0; channel < channels; channel++) { @@ -89,14 +85,11 @@ void pool_forwards_cpu(const double* data_im, int dataOffset, const int channels } } -void pool_backwards_cpu(const double* data_im, int data_im_offset, - const double* data_pooled, int data_pooled_offset, +void pool_backwards_cpu(const double* data_im, const double* data_pooled, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, double* data_backgrad ) { - data_im += data_im_offset; - data_pooled += data_pooled_offset; memset(data_backgrad, 0, height * width * channels * sizeof(double)); const int channel_size = height * width; diff --git a/cbits/im2col.h b/cbits/im2col.h index 4f7afac..c290177 100644 --- a/cbits/im2col.h +++ b/cbits/im2col.h @@ -2,23 +2,22 @@ #include #include -void im2col_cpu(const double* data_im, int dataOffset, const int channels, +void im2col_cpu(const double* data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, double* data_col); -void col2im_cpu(const double* data_col, int dataOffset, const int channels, +void col2im_cpu(const double* data_col, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, double* data_im); -void pool_forwards_cpu(const double* data_im, int dataOffset, const int channels, +void pool_forwards_cpu(const double* data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, double* data_pooled); -void pool_backwards_cpu(const double* data_im, int data_im_offset, - const double* data_pooled, int data_pooled_offset, +void pool_backwards_cpu(const double* data_im, const double* data_pooled, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, double* data_backgrad ); diff --git a/grenade.cabal b/grenade.cabal index 3a5cdb7..627688c 100644 --- a/grenade.cabal +++ b/grenade.cabal @@ -19,6 +19,8 @@ library base >= 4.8 && < 5 , bytestring == 0.10.* , async + , containers + , deepseq , either == 4.4.* , exceptions == 0.8.* , hmatrix @@ -26,9 +28,11 @@ library , mtl >= 2.2.1 && < 2.3 , parallel == 3.2.* , primitive + , reflection , text == 1.2.* , transformers , singletons + , vector ghc-options: -Wall @@ -55,9 +59,23 @@ library Grenade.Layers.Internal.Convolution Grenade.Layers.Internal.Pooling + Grenade.Layers.Internal.Update + + Grenade.Recurrent + + Grenade.Recurrent.Core.Network + Grenade.Recurrent.Core.Runner + + Grenade.Recurrent.Layers.BasicRecurrent + Grenade.Recurrent.Layers.LSTM + Grenade.Recurrent.Layers.Trivial + + Grenade.Utils.OneHot includes: cbits/im2col.h + cbits/gradient_decent.h c-sources: cbits/im2col.c + cbits/gradient_decent.c cc-options: -std=c99 -O3 -msse4.2 -Wall -Werror -DCABAL=1 @@ -90,6 +108,40 @@ executable mnist , transformers , singletons , MonadRandom + , vector + +executable recurrent + ghc-options: -Wall -threaded -O2 + main-is: main/recurrent.hs + build-depends: base + , grenade + , attoparsec + , either + , optparse-applicative == 0.12.* + , text == 1.2.* + , mtl >= 2.2.1 && < 2.3 + , hmatrix >= 0.18 && < 0.19 + , transformers + , singletons + , MonadRandom + + +executable shakespeare + ghc-options: -Wall -threaded -O2 + main-is: main/shakespeare.hs + build-depends: base + , grenade + , attoparsec + , either + , optparse-applicative == 0.12.* + , text == 1.2.* + , mtl >= 2.2.1 && < 2.3 + , hmatrix >= 0.18 && < 0.19 + , transformers + , singletons + , vector + , MonadRandom + , containers test-suite test @@ -117,6 +169,8 @@ test-suite test , quickcheck-instances == 0.3.* , MonadRandom , random + , ad + , reflection benchmark bench @@ -135,3 +189,20 @@ benchmark bench , criterion == 1.1.* , grenade , hmatrix + +benchmark bench-lstm + type: exitcode-stdio-1.0 + + main-is: bench-lstm.hs + + ghc-options: -Wall -threaded -O2 + + hs-source-dirs: + bench + + build-depends: + base >= 3 && < 5 + , bytestring == 0.10.* + , criterion == 1.1.* + , grenade + , hmatrix diff --git a/mafia b/mafia index 0e2103f..4f1fa87 100755 --- a/mafia +++ b/mafia @@ -1,5 +1,7 @@ #!/bin/sh -eu +: ${MAFIA_HOME:=$HOME/.mafia} + fetch_latest () { if [ -z ${MAFIA_TEST_MODE+x} ]; then TZ=$(date +"%T") @@ -55,7 +57,7 @@ exec_mafia () { # If we can't find the mafia version, then we need to upgrade the script. run_upgrade else - MAFIA_BIN=$HOME/.ambiata/mafia/bin + MAFIA_BIN=$MAFIA_HOME/bin MAFIA_FILE=mafia-$MAFIA_VERSION MAFIA_PATH=$MAFIA_BIN/$MAFIA_FILE @@ -118,4 +120,4 @@ case "$MODE" in upgrade) shift; run_upgrade "$@" ;; *) exec_mafia "$@" esac -# Version: a1b39ee8ac1969ed2e891b9062d079be75863e99 +# Version: 3044e63eb472fb9e16926d4ab2ca9dd9e255829c diff --git a/main/feedforward.hs b/main/feedforward.hs index 7b572d2..e0787b1 100644 --- a/main/feedforward.hs +++ b/main/feedforward.hs @@ -4,10 +4,10 @@ {-# LANGUAGE TypeOperators #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE FlexibleContexts #-} - import Control.Monad import Control.Monad.Random +import Data.List ( foldl' ) + import GHC.TypeLits import qualified Numeric.LinearAlgebra.Static as SA @@ -34,18 +34,18 @@ netTest :: MonadRandom m => LearningParameters -> Int -> m String netTest rate n = do inps <- replicateM n $ do s <- getRandom - return $ S1D' $ SA.randomVector s SA.Uniform * 2 - 1 - let outs = flip map inps $ \(S1D' v) -> + return $ S1D $ SA.randomVector s SA.Uniform * 2 - 1 + let outs = flip map inps $ \(S1D v) -> if v `inCircle` (fromRational 0.33, 0.33) || v `inCircle` (fromRational (-0.33), 0.33) - then S1D' $ fromRational 1 - else S1D' $ fromRational 0 + then S1D $ fromRational 1 + else S1D $ fromRational 0 net0 <- randomNet - let trained = foldl trainEach net0 (zip inps outs) + let trained = foldl' trainEach net0 (zip inps outs) let testIns = [ [ (x,y) | x <- [0..50] ] | y <- [0..20] ] - let outMat = fmap (fmap (\(x,y) -> (render . normx) $ runNet trained (S1D' $ SA.vector [x / 25 - 1,y / 10 - 1]))) testIns + let outMat = fmap (fmap (\(x,y) -> (render . normx) $ runNet trained (S1D $ SA.vector [x / 25 - 1,y / 10 - 1]))) testIns return $ unlines outMat where @@ -59,8 +59,8 @@ netTest rate n = do | n' <= 0.8 = '=' | otherwise = '#' - normx :: S' ('D1 1) -> Double - normx (S1D' r) = SA.mean r + normx :: S ('D1 1) -> Double + normx (S1D r) = SA.mean r data FeedForwardOpts = FeedForwardOpts Int LearningParameters diff --git a/main/mnist.hs b/main/mnist.hs index ccb2013..5066b7c 100644 --- a/main/mnist.hs +++ b/main/mnist.hs @@ -5,23 +5,24 @@ {-# LANGUAGE TupleSections #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE FlexibleContexts #-} - import Control.Applicative import Control.Monad import Control.Monad.Random -import Control.Monad.Trans.Class import Control.Monad.Trans.Except import qualified Data.Attoparsec.Text as A +import Data.List ( foldl' ) import qualified Data.Text as T import qualified Data.Text.IO as T +import qualified Data.Vector.Storable as V -import Numeric.LinearAlgebra (maxIndex) +import Numeric.LinearAlgebra ( maxIndex ) import qualified Numeric.LinearAlgebra.Static as SA import Options.Applicative import Grenade +import Grenade.Utils.OneHot -- The definition of our convolutional neural network. -- In the type signature, we have a type level list of shapes which are passed between the layers. @@ -49,9 +50,9 @@ convTest iterations trainFile validateFile rate = do trainEach rate' !network (i, o) = train rate' network i o runIteration trainRows validateRows net i = do - let trained' = foldl (trainEach ( rate { learningRate = learningRate rate * 0.9 ^ i} )) net trainRows + let trained' = foldl' (trainEach ( rate { learningRate = learningRate rate * 0.9 ^ i} )) net trainRows let res = fmap (\(rowP,rowL) -> (rowL,) $ runNet trained' rowP) validateRows - let res' = fmap (\(S1D' label, S1D' prediction) -> (maxIndex (SA.extract label), maxIndex (SA.extract prediction))) res + let res' = fmap (\(S1D label, S1D prediction) -> (maxIndex (SA.extract label), maxIndex (SA.extract prediction))) res print trained' putStrLn $ "Iteration " ++ show i ++ ": " ++ show (length (filter ((==) <$> fst <*> snd) res')) ++ " of " ++ show (length res') return trained' @@ -61,7 +62,7 @@ data MnistOpts = MnistOpts FilePath FilePath Int LearningParameters mnist' :: Parser MnistOpts mnist' = MnistOpts <$> argument str (metavar "TRAIN") <*> argument str (metavar "VALIDATE") - <*> option auto (long "iterations" <> short 'i' <> value 10) + <*> option auto (long "iterations" <> short 'i' <> value 15) <*> (LearningParameters <$> option auto (long "train_rate" <> short 'r' <> value 0.01) <*> option auto (long "momentum" <> value 0.9) @@ -78,14 +79,14 @@ main = do Right () -> pure () Left err -> putStrLn err -readMNIST :: FilePath -> ExceptT String IO [(S' ('D2 28 28), S' ('D1 10))] +readMNIST :: FilePath -> ExceptT String IO [(S ('D2 28 28), S ('D1 10))] readMNIST mnist = ExceptT $ do mnistdata <- T.readFile mnist return $ traverse (A.parseOnly parseMNIST) (T.lines mnistdata) -parseMNIST :: A.Parser (S' ('D2 28 28), S' ('D1 10)) +parseMNIST :: A.Parser (S ('D2 28 28), S ('D1 10)) parseMNIST = do - lab <- A.decimal - pixels <- many (A.char ',' >> A.double) - let lab' = replicate lab 0 ++ [1] ++ replicate (9 - lab) 0 - return (S2D' $ SA.fromList pixels, S1D' $ SA.fromList lab') + Just lab <- oneHot <$> A.decimal + pixels <- many (A.char ',' >> A.double) + image <- maybe (fail "Parsed row was of an incorrect size") pure (fromStorable . V.fromList $ pixels) + return (image, lab) diff --git a/main/recurrent.hs b/main/recurrent.hs new file mode 100644 index 0000000..3465201 --- /dev/null +++ b/main/recurrent.hs @@ -0,0 +1,87 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TypeFamilies #-} + +import Control.Monad ( foldM ) +import Control.Monad.Random ( MonadRandom, getRandomR ) + +import Data.List ( cycle, unfoldr ) +import qualified Numeric.LinearAlgebra.Static as SA + +import Options.Applicative + +import Grenade +import Grenade.Recurrent + +-- The defininition for our simple recurrent network. +-- This file just trains a network to generate a repeating sequence +-- of 0 0 1. +-- +-- The F and R types are Tagging types to ensure that the runner and +-- creation function know how to treat the layers. +type F = FeedForward +type R = Recurrent + +type RecNet = RecurrentNetwork '[ R (LSTM 1 4), R (LSTM 4 1), F Trivial] + '[ 'D1 1, 'D1 4, 'D1 1, 'D1 1 ] + +type RecInput = RecurrentInputs '[ R (LSTM 1 4), R (LSTM 4 1), F Trivial] + +randomNet :: MonadRandom m => m (RecNet, RecInput) +randomNet = randomRecurrent + +netTest :: MonadRandom m => RecNet -> RecInput -> LearningParameters -> Int -> m (RecNet, RecInput) +netTest net0 i0 rate iterations = + foldM trainIteration (net0,i0) [1..iterations] + where + trainingCycle = cycle [c 0, c 0, c 1] + + trainIteration (net, io) _ = do + dropping <- getRandomR (0, 2) + count <- getRandomR (5, 30) + let t = drop dropping trainingCycle + let example = ((,Nothing) <$> take count t) ++ [(t !! count, Just $ t !! (count + 1))] + return $ trainEach net io example + + trainEach !nt !io !ex = trainRecurrent rate nt io ex + +data FeedForwardOpts = FeedForwardOpts Int LearningParameters + +feedForward' :: Parser FeedForwardOpts +feedForward' = FeedForwardOpts <$> option auto (long "examples" <> short 'e' <> value 20000) + <*> (LearningParameters + <$> option auto (long "train_rate" <> short 'r' <> value 0.01) + <*> option auto (long "momentum" <> value 0.9) + <*> option auto (long "l2" <> value 0.0005) + ) + +generateRecurrent :: RecNet -> RecInput -> S ('D1 1) -> [Int] +generateRecurrent n s i = + unfoldr go (s, i) + where + go (x, y) = + do let (ns, o) = runRecurrent n x y + o' = heat o + Just (o', (ns, fromIntegral o')) + + heat :: S ('D1 1) -> Int + heat x = case x of + (S1D v) -> round (SA.mean v) + +main :: IO () +main = do + FeedForwardOpts examples rate <- execParser (info (feedForward' <**> helper) idm) + putStrLn "Training network..." + + (net0, i0) <- randomNet + (trained, bestInput) <- netTest net0 i0 rate examples + + let results = generateRecurrent trained bestInput (c 1) + + print . take 50 . drop 100 $ results + +c :: Double -> S ('D1 1) +c = S1D . SA.konst diff --git a/main/shakespeare.hs b/main/shakespeare.hs new file mode 100644 index 0000000..14cb072 --- /dev/null +++ b/main/shakespeare.hs @@ -0,0 +1,156 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE LambdaCase #-} + +import Control.Monad.Random +import Control.Monad.Trans.Except + +import Data.Char ( isUpper, toUpper, toLower ) +import Data.List ( unfoldr, foldl' ) +import Data.Maybe ( fromMaybe ) + +import qualified Data.Vector as V +import Data.Vector ( Vector ) + +import qualified Data.Map as M +import Data.Proxy ( Proxy (..) ) + + +import Data.Singletons.Prelude +import GHC.TypeLits + +import Numeric.LinearAlgebra.Static ( konst ) + +import Options.Applicative + +import Grenade +import Grenade.Recurrent +import Grenade.Utils.OneHot + +-- The defininition for our natural language recurrent network. +-- This network is able to learn and generate simple words in +-- about an hour. +-- +-- This is a first class recurrent net, although it's similar to +-- an unrolled graph. +-- +-- The F and R types are tagging types to ensure that the runner and +-- creation function know how to treat the layers. +-- +-- As an example, here's a short sequence generated. +-- +-- > the see and and the sir, and and the make and the make and go the make and go the make and the +-- +type F = FeedForward +type R = Recurrent + +-- The definition of our network +type Shakespeare = RecurrentNetwork '[ R (LSTM 40 40), F (FullyConnected 40 40), F Logit] + '[ 'D1 40, 'D1 40, 'D1 40, 'D1 40 ] + +-- The definition of the "sideways" input, which the network if fed recurrently. +type Shakespearian = RecurrentInputs '[ R (LSTM 40 40), F (FullyConnected 40 40), F Logit] + +randomNet :: MonadRandom m => m (Shakespeare, Shakespearian) +randomNet = randomRecurrent + +-- | Load the data files and prepare a map of characters to a compressed int representation. +loadShakespeare :: FilePath -> ExceptT String IO (Vector Int, M.Map Char Int, Vector Char) +loadShakespeare path = do + contents <- lift $ readFile path + let annotated = annotateCapitals contents + (m,cs) <- ExceptT . return . note "Couldn't fit data in hotMap" $ hotMap (Proxy :: Proxy 40) annotated + hot <- ExceptT . return . note "Couldn't generate hot values" $ traverse (`M.lookup` m) annotated + return (V.fromList hot, m, cs) + +trainSlice :: LearningParameters -> Shakespeare -> Shakespearian -> Vector Int -> Int -> Int -> (Shakespeare, Shakespearian) +trainSlice !rate !net !recIns input offset size = + let e = fmap (x . oneHot) . V.toList $ V.slice offset size input + in case reverse e of + (o : l : xs) -> + let examples = reverse $ (l, Just o) : ((,Nothing) <$> xs) + in trainRecurrent rate net recIns examples + _ -> error "Not enough input" + where + x = fromMaybe (error "Hot variable didn't fit.") + +runShakespeare :: ShakespeareOpts -> ExceptT String IO () +runShakespeare ShakespeareOpts {..} = do + (shakespeare, oneHotMap, oneHotDictionary) <- loadShakespeare trainingFile + (net0, i0) <- lift randomNet + lift $ foldM_ (\(!net, !io) size -> do + xs <- take (iterations `div` 15) <$> getRandomRs (0, length shakespeare - size - 1) + let (!trained, !bestInput) = foldl' (\(!n, !i) offset -> trainSlice rate n i shakespeare offset size) (net, io) xs + let results = take 100 $ generateParagraph trained bestInput oneHotMap oneHotDictionary ( S1D $ konst 0) + putStrLn ("TRAINING STEP WITH SIZE: " ++ show size) + putStrLn (unAnnotateCapitals results) + return (trained, bestInput) + ) (net0, i0) [10,10,15,15,20,20,25,25,30,30,35,35,40,40,50 :: Int] + +generateParagraph :: forall layers shapes n a. (Last shapes ~ 'D1 n, Head shapes ~ 'D1 n, KnownNat n, Ord a) + => RecurrentNetwork layers shapes + -> RecurrentInputs layers + -> M.Map a Int + -> Vector a + -> S ('D1 n) + -> [a] +generateParagraph n s hotmap hotdict i = + unfoldr go (s, i) + where + go (x, y) = + do let (ns, o) = runRecurrent n x y + un <- unHot hotdict o + re <- makeHot hotmap un + Just (un, (ns, re)) + +data ShakespeareOpts = ShakespeareOpts { + trainingFile :: FilePath + , iterations :: Int + , rate :: LearningParameters + } + +shakespeare' :: Parser ShakespeareOpts +shakespeare' = ShakespeareOpts <$> argument str (metavar "TRAIN") + <*> option auto (long "examples" <> short 'e' <> value 1000000) + <*> (LearningParameters + <$> option auto (long "train_rate" <> short 'r' <> value 0.01) + <*> option auto (long "momentum" <> value 0.95) + <*> option auto (long "l2" <> value 0.000001) + ) + +main :: IO () +main = do + shopts <- execParser (info (shakespeare' <**> helper) idm) + res <- runExceptT $ runShakespeare shopts + case res of + Right () -> pure () + Left err -> putStrLn err + + +-- Replace capitals with an annotation and the lower case letter +-- http://fastml.com/one-weird-trick-for-training-char-rnns/ +annotateCapitals :: String -> String +annotateCapitals (x : rest) + | isUpper x + = '^' : toLower x : annotateCapitals rest + | otherwise + = x : annotateCapitals rest +annotateCapitals [] + = [] + +unAnnotateCapitals :: String -> String +unAnnotateCapitals ('^' : x : rest) + = toUpper x : unAnnotateCapitals rest +unAnnotateCapitals (x : rest) + = x : unAnnotateCapitals rest +unAnnotateCapitals [] + = [] + +-- | Tag the 'Nothing' value of a 'Maybe' +note :: a -> Maybe b -> Either a b +note a = maybe (Left a) Right diff --git a/src/Grenade/Core/Network.hs b/src/Grenade/Core/Network.hs index fd6d68a..c8ffb0a 100644 --- a/src/Grenade/Core/Network.hs +++ b/src/Grenade/Core/Network.hs @@ -1,15 +1,21 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE GADTs #-} -{-# LANGUAGE KindSignatures #-} -{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE PolyKinds #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE LambdaCase #-} +{-| +Module : Grenade.Core.Network +Description : Core definition a simple neural etwork +Copyright : (c) Huw Campbell, 2016-2017 +License : BSD2 +Stability : experimental +This module defines the core data type for the simplest +Neural network we support. + +-} module Grenade.Core.Network ( Layer (..) , Network (..) @@ -20,10 +26,12 @@ module Grenade.Core.Network ( ) where import Control.Monad.Random (MonadRandom) - +import Data.List ( foldl' ) +import Data.Singletons import Grenade.Core.Shape +-- | Learning parameters for stochastic gradient descent. data LearningParameters = LearningParameters { learningRate :: Double , learningMomentum :: Double @@ -33,35 +41,43 @@ data LearningParameters = LearningParameters { -- | Class for updating a layer. All layers implement this, and it is -- shape independent. class Show x => UpdateLayer x where + {-# MINIMAL runUpdate, createRandom #-} -- | The type for the gradient for this layer. -- Unit if there isn't a gradient to pass back. type Gradient x :: * -- | Update a layer with its gradient and learning parameters runUpdate :: LearningParameters -> x -> Gradient x -> x + -- | Create a random layer, many layers will use pure createRandom :: MonadRandom m => m x + -- | Update a layer with many Gradients + runUpdates :: LearningParameters -> x -> [Gradient x] -> x + runUpdates rate = foldl' (runUpdate rate) + -- | Class for a layer. All layers implement this, however, they don't -- need to implement it for all shapes, only ones which are appropriate. class UpdateLayer x => Layer x (i :: Shape) (o :: Shape) where -- | Used in training and scoring. Take the input from the previous -- layer, and give the output from this layer. - runForwards :: x -> S' i -> S' o + runForwards :: x -> S i -> S o -- | Back propagate a step. Takes the current layer, the input that the -- layer gave from the input and the back propagated derivatives from -- the layer above. -- Returns the gradient layer and the derivatives to push back further. - runBackwards :: x -> S' i -> S' o -> (Gradient x, S' i) + runBackwards :: x -> S i -> S o -> (Gradient x, S i) -- | Type of a network. --- The [*] type specifies the types of the layers. This is needed for parallel --- running and being all the gradients beck together. +-- +-- The [*] type specifies the types of the layers. +-- -- The [Shape] type specifies the shapes of data passed between the layers. --- Could be considered to be a heterogeneous list of layers which are able to +-- +-- Can be considered to be a heterogeneous list of layers which are able to -- transform the data shapes of the network. data Network :: [*] -> [Shape] -> * where - O :: Layer x i o => !x -> Network '[x] '[i, o] - (:~>) :: Layer x i h => !x -> !(Network xs (h ': hs)) -> Network (x ': xs) (i ': h ': hs) + O :: (SingI i, SingI o, Layer x i o) => !x -> Network '[x] '[i, o] + (:~>) :: (SingI i, SingI h, Layer x i h) => !x -> !(Network xs (h ': hs)) -> Network (x ': xs) (i ': h ': hs) infixr 5 :~> instance Show (Network l h) where @@ -74,15 +90,14 @@ data Gradients :: [*] -> * where OG :: UpdateLayer x => Gradient x -> Gradients '[x] (:/>) :: UpdateLayer x => Gradient x -> Gradients xs -> Gradients (x ': xs) - -- | A network can easily be created by hand with (:~>), but an easy way to initialise a random -- network is with the randomNetwork. class CreatableNetwork (xs :: [*]) (ss :: [Shape]) where -- | Create a network of the types requested randomNetwork :: MonadRandom m => m (Network xs ss) -instance Layer x i o => CreatableNetwork (x ': '[]) (i ': o ': '[]) where +instance (SingI i, SingI o, Layer x i o) => CreatableNetwork (x ': '[]) (i ': o ': '[]) where randomNetwork = O <$> createRandom -instance (Layer x i o, CreatableNetwork xs (o ': r ': rs)) => CreatableNetwork (x ': xs) (i ': o ': r ': rs) where +instance (SingI i, SingI o, Layer x i o, CreatableNetwork xs (o ': r ': rs)) => CreatableNetwork (x ': xs) (i ': o ': r ': rs) where randomNetwork = (:~>) <$> createRandom <*> randomNetwork diff --git a/src/Grenade/Core/Runner.hs b/src/Grenade/Core/Runner.hs index 1cfd5cb..8cb9021 100644 --- a/src/Grenade/Core/Runner.hs +++ b/src/Grenade/Core/Runner.hs @@ -4,7 +4,16 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeFamilies #-} +{-| +Module : Grenade.Core.Shape +Description : Core definition of the Shapes of data we understand +Copyright : (c) Huw Campbell, 2016-2017 +License : BSD2 +Stability : experimental +This module defines simple back propagation and training functions +for a network. +-} module Grenade.Core.Runner ( train , backPropagate @@ -16,16 +25,22 @@ import Data.Singletons.Prelude import Grenade.Core.Network import Grenade.Core.Shape --- | Drive and network and collect its back propogated gradients. -backPropagate :: forall input output shapes layers. (Head shapes ~ input, Last shapes ~ output) - => Network layers shapes -> S' input -> S' output -> Gradients layers +-- | Perform reverse automatic differentiation on the network +-- for the current input and expected output. +-- +-- /Note:/ The loss function pushed backwards is appropriate +-- for both regression and classification as a squared loss +-- or log-loss respectively. Other loss functions are not yet +-- implemented. +backPropagate :: forall shapes layers. + Network layers shapes -> S (Head shapes) -> S (Last shapes) -> Gradients layers backPropagate network input target = fst $ go input network where - go :: forall j js sublayers. (Head js ~ j, Last js ~ output) - => S' j -- ^ input vector + go :: forall js sublayers. (Last js ~ Last shapes) + => S (Head js) -- ^ input vector -> Network sublayers js -- ^ network to train - -> (Gradients sublayers, S' j) + -> (Gradients sublayers, S (Head js)) -- handle input from the beginning, feeding upwards. go !x (layer :~> n) = let y = runForwards layer x @@ -44,16 +59,7 @@ backPropagate network input target = in (OG layer', dWs) --- | Update a network with new weights after training with an instance. -train :: forall input output shapes layers. (Head shapes ~ input, Last shapes ~ output) - => LearningParameters -- ^ learning rate - -> Network layers shapes -- ^ network to train - -> S' input -> S' output -- ^ target vector - -> Network layers shapes -train rate network input output = - let grads = backPropagate network input output - in applyUpdate rate network grads - +-- | Apply one step of stochastic gradient decent across the network. applyUpdate :: LearningParameters -> Network ls ss -> Gradients ls -> Network ls ss applyUpdate rate (O layer) (OG gradient) = O (runUpdate rate layer gradient) @@ -62,9 +68,13 @@ applyUpdate rate (layer :~> rest) (gradient :/> grest) applyUpdate _ _ _ = error "Impossible for the gradients of a network to have a different length to the network" --- | Just forwards propagation with no training. -runNet :: Network layers hs - -> S' (Head hs) -- ^ input vector - -> S' (Last hs) -- ^ target vector +-- | Update a network with new weights after training with an instance. +train :: LearningParameters -> Network layers shapes -> S (Head shapes) -> S (Last shapes) -> Network layers shapes +train rate network input output = + let grads = backPropagate network input output + in applyUpdate rate network grads + +-- | Run the network with input and return the given output. +runNet :: Network layers shapes -> S (Head shapes) -> S (Last shapes) runNet (layer :~> n) !x = let y = runForwards layer x in runNet n y runNet (O layer) !x = runForwards layer x diff --git a/src/Grenade/Core/Shape.hs b/src/Grenade/Core/Shape.hs index 57571d9..286a134 100644 --- a/src/Grenade/Core/Shape.hs +++ b/src/Grenade/Core/Shape.hs @@ -1,75 +1,171 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE KindSignatures #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE PolyKinds #-} -{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE RankNTypes #-} --- Ghc 8.0 gives a warning on `(+) _ _ = error ...` but ghc 7.10 fails to +-- Ghc 8.0 gives a warning on `n2 _ _ = error ...` but ghc 7.10 fails to -- compile without this default pattern. {-# OPTIONS_GHC -fno-warn-overlapping-patterns #-} +{-| +Module : Grenade.Core.Shape +Description : Core definition of the Shapes of data we understand +Copyright : (c) Huw Campbell, 2016-2017 +License : BSD2 +Stability : experimental + +This module defines the core data types for the shapes of data that +are understood by Grenade. +-} module Grenade.Core.Shape ( Shape (..) - , S' (..) + , S (..) + , randomOfShape + , fromStorable ) where +import Control.DeepSeq (NFData (..)) +import Control.Monad.Random ( MonadRandom, getRandom ) + +import Data.Singletons import Data.Singletons.TypeLits +import Data.Vector.Storable ( Vector ) +import qualified Data.Vector.Storable as V + import GHC.TypeLits +import qualified Numeric.LinearAlgebra.Static as H import Numeric.LinearAlgebra.Static - +import qualified Numeric.LinearAlgebra as NLA -- | The current shapes we accept. -- at the moment this is just one, two, and three dimensional -- Vectors/Matricies. -data Shape = - D1 Nat +data Shape + = D1 Nat | D2 Nat Nat | D3 Nat Nat Nat -instance Num (S' x) where - (+) (S1D' x) (S1D' y) = S1D' (x + y) - (+) (S2D' x) (S2D' y) = S2D' (x + y) - (+) (S3D' x) (S3D' y) = S3D' (x + y) - (+) _ _ = error "Impossible to have different constructors for the same shaped network" - - (-) (S1D' x) (S1D' y) = S1D' (x - y) - (-) (S2D' x) (S2D' y) = S2D' (x - y) - (-) (S3D' x) (S3D' y) = S3D' (x - y) - (-) _ _ = error "Impossible to have different constructors for the same shaped network" - - (*) (S1D' x) (S1D' y) = S1D' (x * y) - (*) (S2D' x) (S2D' y) = S2D' (x * y) - (*) (S3D' x) (S3D' y) = S3D' (x * y) - (*) _ _ = error "Impossible to have different constructors for the same shaped network" - - abs (S1D' x) = S1D' (abs x) - abs (S2D' x) = S2D' (abs x) - abs (S3D' x) = S3D' (abs x) - - signum (S1D' x) = S1D' (signum x) - signum (S2D' x) = S2D' (signum x) - signum (S3D' x) = S3D' (signum x) - - fromInteger _ = error "Unimplemented: fromInteger on Shape" - -- | Given a Shape n, these are the possible data structures with that shape. -- All shapes are held in contiguous memory. -- 3D is held in a matrix (usually row oriented) which has height depth * rows. -data S' (n :: Shape) where - S1D' :: ( KnownNat o ) => R o -> S' ('D1 o) - S2D' :: ( KnownNat rows, KnownNat columns ) => L rows columns -> S' ('D2 rows columns) - S3D' :: ( KnownNat rows - , KnownNat columns - , KnownNat depth - , KnownNat (rows * depth)) => L (rows * depth) columns -> S' ('D3 rows columns depth) +data S (n :: Shape) where + S1D :: ( KnownNat o ) => R o -> S ('D1 o) + S2D :: ( KnownNat rows, KnownNat columns ) => L rows columns -> S ('D2 rows columns) + S3D :: ( KnownNat rows + , KnownNat columns + , KnownNat depth + , KnownNat (rows * depth)) => L (rows * depth) columns -> S ('D3 rows columns depth) -instance Show (S' n) where - show (S1D' a) = "S1D' " ++ show a - show (S2D' a) = "S2D' " ++ show a - show (S3D' a) = "S3D' " ++ show a +deriving instance Show (S n) + +instance SingI x => Num (S x) where + (+) = n2 (+) + (-) = n2 (-) + (*) = n2 (*) + abs = n1 abs + signum = n1 signum + fromInteger x = case (sing :: Sing x) of + D1Sing -> S1D (konst $ fromInteger x) + D2Sing -> S2D (konst $ fromInteger x) + D3Sing -> S3D (konst $ fromInteger x) + +instance SingI x => Fractional (S x) where + (/) = n2 (/) + recip = n1 recip + fromRational x = case (sing :: Sing x) of + D1Sing -> S1D (konst $ fromRational x) + D2Sing -> S2D (konst $ fromRational x) + D3Sing -> S3D (konst $ fromRational x) + +instance SingI x => Floating (S x) where + pi = case (sing :: Sing x) of + D1Sing -> S1D (konst pi) + D2Sing -> S2D (konst pi) + D3Sing -> S3D (konst pi) + exp = n1 exp + log = n1 log + sqrt = n1 sqrt + (**) = n2 (**) + logBase = n2 logBase + sin = n1 sin + cos = n1 cos + tan = n1 tan + asin = n1 asin + acos = n1 acos + atan = n1 atan + sinh = n1 sinh + cosh = n1 cosh + tanh = n1 tanh + asinh = n1 asinh + acosh = n1 acosh + atanh = n1 atanh + +-- Singletons +-- These could probably be derived with template haskell, but this seems +-- clear and makes adding the KnownNat constraints simple. +data instance Sing (n :: Shape) where + D1Sing :: KnownNat a => Sing ('D1 a) + D2Sing :: (KnownNat a, KnownNat b) => Sing ('D2 a b) + D3Sing :: (KnownNat a, KnownNat b, KnownNat c, KnownNat (a * c)) => Sing ('D3 a b c) + +instance KnownNat a => SingI ('D1 a) where + sing = D1Sing +instance (KnownNat a, KnownNat b) => SingI ('D2 a b) where + sing = D2Sing +instance (KnownNat a, KnownNat b, KnownNat c, KnownNat (a * c)) => SingI ('D3 a b c) where + sing = D3Sing + +-- +-- I haven't made shapes strict, as sometimes they're not needed +-- (the last input gradient back for instance) +-- +instance NFData (S x) where + rnf (S1D x) = rnf x + rnf (S2D x) = rnf x + rnf (S3D x) = rnf x + +-- | Generate random data of the desired shape +randomOfShape :: forall x m. ( MonadRandom m, SingI x ) => m (S x) +randomOfShape = do + seed :: Int <- getRandom + return $ case (sing :: Sing x) of + D1Sing -> S1D (randomVector seed Uniform * 2 - 1) + D2Sing -> S2D (uniformSample seed (-1) 1) + D3Sing -> S3D (uniformSample seed (-1) 1) + +-- | Generate a shape from a Storable Vector. +-- +-- Returns Nothing if the vector is of the wrong size. +fromStorable :: forall x. SingI x => Vector Double -> Maybe (S x) +fromStorable xs = case sing :: Sing x of + D1Sing -> S1D <$> H.create xs + D2Sing -> S2D <$> mkL xs + D3Sing -> S3D <$> mkL xs + where + mkL :: forall rows columns. (KnownNat rows, KnownNat columns) + => Vector Double -> Maybe (L rows columns) + mkL v = + let rows = fromIntegral $ natVal (Proxy :: Proxy rows) + columns = fromIntegral $ natVal (Proxy :: Proxy columns) + in if rows * columns == V.length v + then H.create $ NLA.reshape columns v + else Nothing + +-- Helper function for creating the number instances +n1 :: ( forall a. Floating a => a -> a ) -> S x -> S x +n1 f (S1D x) = S1D (f x) +n1 f (S2D x) = S2D (f x) +n1 f (S3D x) = S3D (f x) + +-- Helper function for creating the number instances +n2 :: ( forall a. Floating a => a -> a -> a ) -> S x -> S x -> S x +n2 f (S1D x) (S1D y) = S1D (f x y) +n2 f (S2D x) (S2D y) = S2D (f x y) +n2 f (S3D x) (S3D y) = S3D (f x y) +n2 _ _ _ = error "Impossible to have different constructors for the same shaped network" diff --git a/src/Grenade/Graph/GraphNetwork.hs b/src/Grenade/Graph/GraphNetwork.hs new file mode 100644 index 0000000..695593d --- /dev/null +++ b/src/Grenade/Graph/GraphNetwork.hs @@ -0,0 +1,37 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE PolyKinds #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE LambdaCase #-} + +module Grenade.Graph.Network ( + Layer (..) + , UpdateLayer (..) + ) where + +import Control.Monad.Random (MonadRandom) +import Data.Singletons +import Data.Singletons.Prelude + +import GHC.TypeLits + +import Grenade.Core.Shape +import Grenade.Core.Network ( UpdateLayer (..), Layer (..) ) + +-- | Type of a DAG network + +data Fin :: Nat -> * where + Fin0 :: Fin (n + 1) + FinS :: Fin n -> Fin (n + 1) + +data Edge :: Nat -> * where + Edge :: Shape -> Fin n -> Edge n + +data Node a n where + Node :: a -> [Edge n] -> Node a n diff --git a/src/Grenade/Layers/Convolution.hs b/src/Grenade/Layers/Convolution.hs index 2ea5fb5..87f75d2 100644 --- a/src/Grenade/Layers/Convolution.hs +++ b/src/Grenade/Layers/Convolution.hs @@ -1,7 +1,5 @@ -{-# LANGUAGE BangPatterns #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE TypeOperators #-} @@ -9,9 +7,6 @@ {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE PolyKinds #-} -{-# LANGUAGE PatternGuards #-} - module Grenade.Layers.Convolution ( Convolution (..) , Convolution' (..) @@ -31,6 +26,7 @@ import Numeric.LinearAlgebra.Static hiding ((|||), build, toRows) import Grenade.Core.Network import Grenade.Core.Shape import Grenade.Layers.Internal.Convolution +import Grenade.Layers.Internal.Update -- | A convolution layer for a neural network. -- This uses the im2col convolution trick popularised by Caffe, which essentially turns the @@ -43,12 +39,12 @@ import Grenade.Layers.Internal.Convolution -- `out = (in - kernel) / stride + 1` for both dimensions. -- -- One probably shouldn't build their own layer, but rather use the randomConvolution function. -data Convolution :: Nat -- ^ Number of channels, for the first layer this could be RGB for instance. - -> Nat -- ^ Number of filters, this is the number of channels output by the layer. - -> Nat -- ^ The number of rows in the kernel filter - -> Nat -- ^ The number of column in the kernel filter - -> Nat -- ^ The row stride of the convolution filter - -> Nat -- ^ The columns stride of the convolution filter +data Convolution :: Nat -- Number of channels, for the first layer this could be RGB for instance. + -> Nat -- Number of filters, this is the number of channels output by the layer. + -> Nat -- The number of rows in the kernel filter + -> Nat -- The number of column in the kernel filter + -> Nat -- The row stride of the convolution filter + -> Nat -- The columns stride of the convolution filter -> * where Convolution :: ( KnownNat channels , KnownNat filters @@ -58,16 +54,16 @@ data Convolution :: Nat -- ^ Number of channels, for the first layer this could , KnownNat strideColumns , KnownNat kernelFlattened , kernelFlattened ~ (kernelRows * kernelColumns * channels)) - => !(L kernelFlattened filters) -- ^ The kernel filter weights - -> !(L kernelFlattened filters) -- ^ The last kernel update (or momentum) + => !(L kernelFlattened filters) -- The kernel filter weights + -> !(L kernelFlattened filters) -- The last kernel update (or momentum) -> Convolution channels filters kernelRows kernelColumns strideRows strideColumns -data Convolution' :: Nat -- ^ Number of channels, for the first layer this could be RGB for instance. - -> Nat -- ^ Number of filters, this is the number of channels output by the layer. - -> Nat -- ^ The number of rows in the kernel filter - -> Nat -- ^ The number of column in the kernel filter - -> Nat -- ^ The row stride of the convolution filter - -> Nat -- ^ The columns stride of the convolution filter +data Convolution' :: Nat -- Number of channels, for the first layer this could be RGB for instance. + -> Nat -- Number of filters, this is the number of channels output by the layer. + -> Nat -- The number of rows in the kernel filter + -> Nat -- The number of column in the kernel filter + -> Nat -- The row stride of the convolution filter + -> Nat -- The columns stride of the convolution filter -> * where Convolution' :: ( KnownNat channels , KnownNat filters @@ -77,7 +73,7 @@ data Convolution' :: Nat -- ^ Number of channels, for the first layer this could , KnownNat strideColumns , KnownNat kernelFlattened , kernelFlattened ~ (kernelRows * kernelColumns * channels)) - => !(L kernelFlattened filters) -- ^ The kernel filter gradient + => !(L kernelFlattened filters) -- The kernel filter gradient -> Convolution' channels filters kernelRows kernelColumns strideRows strideColumns instance Show (Convolution c f k k' s s') where @@ -109,7 +105,7 @@ randomConvolution :: ( MonadRandom m , kernelFlattened ~ (kernelRows * kernelColumns * channels)) => m (Convolution channels filters kernelRows kernelColumns strideRows strideColumns) randomConvolution = do - s :: Int <- getRandom + s <- getRandom let wN = uniformSample s (-1) 1 mm = konst 0 return $ Convolution wN mm @@ -124,9 +120,7 @@ instance ( KnownNat channels ) => UpdateLayer (Convolution channels filters kernelRows kernelColumns strideRows strideColumns) where type Gradient (Convolution channels filters kernelRows kernelCols strideRows strideCols) = (Convolution' channels filters kernelRows kernelCols strideRows strideCols) runUpdate LearningParameters {..} (Convolution oldKernel oldMomentum) (Convolution' kernelGradient) = - let newMomentum = konst learningMomentum * oldMomentum - konst learningRate * kernelGradient - regulariser = konst (learningRegulariser * learningRate) * oldKernel - newKernel = oldKernel + newMomentum - regulariser + let (newKernel, newMomentum) = decendMatrix learningRate learningMomentum learningRegulariser oldKernel kernelGradient oldMomentum in Convolution newKernel newMomentum createRandom = randomConvolution @@ -146,7 +140,7 @@ instance ( KnownNat kernelRows , KnownNat (kernelRows * kernelCols * 1) , KnownNat (outputRows * filters) ) => Layer (Convolution 1 filters kernelRows kernelCols strideRows strideCols) ('D2 inputRows inputCols) ('D3 outputRows outputCols filters) where - runForwards (Convolution kernel _) (S2D' input) = + runForwards (Convolution kernel _) (S2D input) = let ex = extract input ek = extract kernel kx = fromIntegral $ natVal (Proxy :: Proxy kernelRows) @@ -159,9 +153,9 @@ instance ( KnownNat kernelRows mt = c LA.<> ek r = col2vid 1 1 1 1 ox oy mt rs = fromJust . create $ r - in S3D' rs + in S3D rs - runBackwards (Convolution kernel _) (S2D' input) (S3D' dEdy) = + runBackwards (Convolution kernel _) (S2D input) (S3D dEdy) = let ex = extract input ix = fromIntegral $ natVal (Proxy :: Proxy inputRows) iy = fromIntegral $ natVal (Proxy :: Proxy inputCols) @@ -183,7 +177,7 @@ instance ( KnownNat kernelRows dW = vs LA.<> tr ek xW = col2im kx ky sx sy ix iy dW - in (Convolution' kN, S2D' . fromJust . create $ xW) + in (Convolution' kN, S2D . fromJust . create $ xW) -- | A three dimensional image (or 2d with many channels) can have @@ -203,7 +197,7 @@ instance ( KnownNat kernelRows , KnownNat (kernelRows * kernelCols * channels) , KnownNat (outputRows * filters) ) => Layer (Convolution channels filters kernelRows kernelCols strideRows strideCols) ('D3 inputRows inputCols channels) ('D3 outputRows outputCols filters) where - runForwards (Convolution kernel _) (S3D' input) = + runForwards (Convolution kernel _) (S3D input) = let ex = extract input ek = extract kernel ix = fromIntegral $ natVal (Proxy :: Proxy inputRows) @@ -219,8 +213,8 @@ instance ( KnownNat kernelRows mt = c LA.<> ek r = col2vid 1 1 1 1 ox oy mt rs = fromJust . create $ r - in S3D' rs - runBackwards (Convolution kernel _) (S3D' input) (S3D' dEdy) = + in S3D rs + runBackwards (Convolution kernel _) (S3D input) (S3D dEdy) = let ex = extract input ix = fromIntegral $ natVal (Proxy :: Proxy inputRows) iy = fromIntegral $ natVal (Proxy :: Proxy inputCols) @@ -243,4 +237,4 @@ instance ( KnownNat kernelRows dW = vs LA.<> tr ek xW = col2vid kx ky sx sy ix iy dW - in (Convolution' kN, S3D' . fromJust . create $ xW) + in (Convolution' kN, S3D . fromJust . create $ xW) diff --git a/src/Grenade/Layers/Crop.hs b/src/Grenade/Layers/Crop.hs index 2e5ae9c..6bac04a 100644 --- a/src/Grenade/Layers/Crop.hs +++ b/src/Grenade/Layers/Crop.hs @@ -4,10 +4,6 @@ {-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE MultiParamTypeClasses #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE PolyKinds #-} - module Grenade.Layers.Crop ( Crop (..) ) where @@ -50,19 +46,19 @@ instance ( KnownNat cropLeft , (inputRows - cropTop - cropBottom) ~ outputRows , (inputColumns - cropLeft - cropRight) ~ outputColumns ) => Layer (Crop cropLeft cropTop cropRight cropBottom) ('D2 inputRows inputColumns) ('D2 outputRows outputColumns) where - runForwards Crop (S2D' input) = + runForwards Crop (S2D input) = let cropl = fromIntegral $ natVal (Proxy :: Proxy cropLeft) cropt = fromIntegral $ natVal (Proxy :: Proxy cropTop) nrows = fromIntegral $ natVal (Proxy :: Proxy outputRows) ncols = fromIntegral $ natVal (Proxy :: Proxy outputColumns) m = extract input r = subMatrix (cropt, cropl) (nrows, ncols) m - in S2D' . fromJust . create $ r - runBackwards _ _ (S2D' dEdy) = + in S2D . fromJust . create $ r + runBackwards _ _ (S2D dEdy) = let cropl = fromIntegral $ natVal (Proxy :: Proxy cropLeft) cropt = fromIntegral $ natVal (Proxy :: Proxy cropTop) cropr = fromIntegral $ natVal (Proxy :: Proxy cropRight) cropb = fromIntegral $ natVal (Proxy :: Proxy cropBottom) eo = extract dEdy vs = diagBlock [konst 0 (cropt,cropl), eo, konst 0 (cropb,cropr)] - in ((), S2D' . fromJust . create $ vs) + in ((), S2D . fromJust . create $ vs) diff --git a/src/Grenade/Layers/Dropout.hs b/src/Grenade/Layers/Dropout.hs index 211f254..6903898 100644 --- a/src/Grenade/Layers/Dropout.hs +++ b/src/Grenade/Layers/Dropout.hs @@ -1,12 +1,7 @@ {-# LANGUAGE DataKinds #-} -{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE MultiParamTypeClasses #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE LambdaCase #-} - module Grenade.Layers.Dropout ( Dropout (..) , randomDropout @@ -45,7 +40,7 @@ randomDropout rate = do return $ Dropout xs instance (KnownNat i) => Layer (Dropout i) ('D1 i) ('D1 i) where - runForwards (Dropout drops) (S1D' x) = S1D' $ x * drops - runForwards (Pass rate) (S1D' x)= S1D' $ dvmap (* (1 - rate)) x - runBackwards (Dropout drops) _ (S1D' x) = ((), S1D' $ x * drops) - runBackwards (Pass rate) _ (S1D' x) = ((), S1D' $ dvmap (* (1 - rate)) x) + runForwards (Dropout drops) (S1D x) = S1D $ x * drops + runForwards (Pass rate) (S1D x)= S1D $ dvmap (* (1 - rate)) x + runBackwards (Dropout drops) _ (S1D x) = ((), S1D $ x * drops) + runBackwards (Pass rate) _ (S1D x) = ((), S1D $ dvmap (* (1 - rate)) x) diff --git a/src/Grenade/Layers/Flatten.hs b/src/Grenade/Layers/Flatten.hs index 9d54fba..7ee517a 100644 --- a/src/Grenade/Layers/Flatten.hs +++ b/src/Grenade/Layers/Flatten.hs @@ -1,13 +1,8 @@ -{-# LANGUAGE BangPatterns #-} {-# LANGUAGE DataKinds #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE FlexibleInstances #-} - module Grenade.Layers.Flatten ( FlattenLayer (..) ) where @@ -16,11 +11,16 @@ import Data.Singletons.TypeLits import GHC.TypeLits import Numeric.LinearAlgebra.Static -import Numeric.LinearAlgebra.Data as LA (flatten, toList) +import Numeric.LinearAlgebra.Data as LA ( flatten ) import Grenade.Core.Shape import Grenade.Core.Network +-- | Flatten Layer +-- +-- Flattens input down to D1 from either 2D or 3D data. +-- +-- Can also be used to turn a 3D image with only one channel into a 2D image. data FlattenLayer = FlattenLayer deriving Show @@ -29,11 +29,18 @@ instance UpdateLayer FlattenLayer where runUpdate _ _ _ = FlattenLayer createRandom = return FlattenLayer - instance (KnownNat a, KnownNat x, KnownNat y, a ~ (x * z)) => Layer FlattenLayer ('D2 x y) ('D1 a) where - runForwards _ (S2D' y) = S1D' . fromList . toList . flatten . extract $ y - runBackwards _ _ (S1D' y) = ((), S2D' . fromList . toList . unwrap $ y) + runForwards _ (S2D y) = fromJust' . fromStorable . flatten . extract $ y + runBackwards _ _ (S1D y) = ((), fromJust' . fromStorable . extract $ y) instance (KnownNat a, KnownNat x, KnownNat y, KnownNat (x * z), KnownNat z, a ~ (x * y * z)) => Layer FlattenLayer ('D3 x y z) ('D1 a) where - runForwards _ (S3D' y) = S1D' . fromList . toList . flatten . extract $ y - runBackwards _ _ (S1D' y) = ((), S3D' . fromList . toList . unwrap $ y) + runForwards _ (S3D y) = fromJust' . fromStorable . flatten . extract $ y + runBackwards _ _ (S1D y) = ((), fromJust' . fromStorable . extract $ y) + +instance (KnownNat y, KnownNat x, KnownNat z, z ~ 1) => Layer FlattenLayer ('D3 x y z) ('D2 x y) where + runForwards _ (S3D y) = S2D y + runBackwards _ _ (S2D y) = ((), S3D y) + +fromJust' :: Maybe x -> x +fromJust' (Just x) = x +fromJust' Nothing = error $ "FlattenLayer error: data shape couldn't be converted." diff --git a/src/Grenade/Layers/FullyConnected.hs b/src/Grenade/Layers/FullyConnected.hs index 13a8f05..3d999c6 100644 --- a/src/Grenade/Layers/FullyConnected.hs +++ b/src/Grenade/Layers/FullyConnected.hs @@ -1,11 +1,8 @@ {-# LANGUAGE DataKinds #-} -{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE MultiParamTypeClasses #-} -{-# LANGUAGE FlexibleInstances #-} - module Grenade.Layers.FullyConnected ( FullyConnected (..) , randomFullyConnected @@ -20,6 +17,8 @@ import Numeric.LinearAlgebra.Static import Grenade.Core.Network import Grenade.Core.Shape +import Grenade.Layers.Internal.Update + -- | A basic fully connected (or inner product) neural network layer. data FullyConnected i o = FullyConnected !(R o) -- Bias neuron weights @@ -38,32 +37,29 @@ instance (KnownNat i, KnownNat o) => UpdateLayer (FullyConnected i o) where type Gradient (FullyConnected i o) = (FullyConnected' i o) runUpdate LearningParameters {..} (FullyConnected oldBias oldBiasMomentum oldActivations oldMomentum) (FullyConnected' biasGradient activationGradient) = - let newBiasMomentum = konst learningMomentum * oldBiasMomentum - konst learningRate * biasGradient - newBias = oldBias + newBiasMomentum - newMomentum = konst learningMomentum * oldMomentum - konst learningRate * activationGradient - regulariser = konst (learningRegulariser * learningRate) * oldActivations - newActivations = oldActivations + newMomentum - regulariser + let (newBias, newBiasMomentum) = decendVector learningRate learningMomentum learningRegulariser oldBias biasGradient oldBiasMomentum + (newActivations, newMomentum) = decendMatrix learningRate learningMomentum learningRegulariser oldActivations activationGradient oldMomentum in FullyConnected newBias newBiasMomentum newActivations newMomentum createRandom = randomFullyConnected instance (KnownNat i, KnownNat o) => Layer (FullyConnected i o) ('D1 i) ('D1 o) where -- Do a matrix vector multiplication and return the result. - runForwards (FullyConnected wB _ wN _) (S1D' v) = S1D' (wB + wN #> v) + runForwards (FullyConnected wB _ wN _) (S1D v) = S1D (wB + wN #> v) -- Run a backpropogation step for a full connected layer. - runBackwards (FullyConnected _ _ wN _) (S1D' x) (S1D' dEdy) = + runBackwards (FullyConnected _ _ wN _) (S1D x) (S1D dEdy) = let wB' = dEdy mm' = dEdy `outer` x -- calcluate derivatives for next step dWs = tr wN #> dEdy - in (FullyConnected' wB' mm', S1D' dWs) + in (FullyConnected' wB' mm', S1D dWs) randomFullyConnected :: (MonadRandom m, KnownNat i, KnownNat o) => m (FullyConnected i o) randomFullyConnected = do - s1 :: Int <- getRandom - s2 :: Int <- getRandom + s1 <- getRandom + s2 <- getRandom let wB = randomVector s1 Uniform * 2 - 1 wN = uniformSample s2 (-1) 1 bm = konst 0 diff --git a/src/Grenade/Layers/Fuse.hs b/src/Grenade/Layers/Fuse.hs index 9ff7559..4bc28eb 100644 --- a/src/Grenade/Layers/Fuse.hs +++ b/src/Grenade/Layers/Fuse.hs @@ -1,16 +1,11 @@ -{-# LANGUAGE BangPatterns #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE GADTs #-} -{-# LANGUAGE KindSignatures #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE PolyKinds #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} - - module Grenade.Layers.Fuse ( Fuse (..) ) where @@ -42,11 +37,11 @@ instance (Layer x i h, Layer y h o) => UpdateLayer (Fuse x y i h o) where instance (Layer x i h, Layer y h o) => Layer (Fuse x y i h o) i o where runForwards (x :$$ y) input = - let yInput :: S' h = runForwards x input + let yInput :: S h = runForwards x input in runForwards y yInput runBackwards (x :$$ y) input backGradient = - let yInput :: S' h = runForwards x input + let yInput :: S h = runForwards x input (y', yGrad) = runBackwards y yInput backGradient (x', xGrad) = runBackwards x input yGrad in ((x', y'), xGrad) diff --git a/src/Grenade/Layers/Internal/Convolution.hs b/src/Grenade/Layers/Internal/Convolution.hs index 89841d3..4b43adb 100644 --- a/src/Grenade/Layers/Internal/Convolution.hs +++ b/src/Grenade/Layers/Internal/Convolution.hs @@ -6,7 +6,9 @@ module Grenade.Layers.Internal.Convolution ( , vid2col ) where -import Foreign ( mallocForeignPtrArray0, withForeignPtr ) +import qualified Data.Vector.Storable as U ( unsafeToForeignPtr0, unsafeFromForeignPtr0 ) + +import Foreign ( mallocForeignPtrArray, withForeignPtr ) import Foreign.Ptr ( Ptr ) import Numeric.LinearAlgebra ( Matrix, flatten, rows, cols ) @@ -28,19 +30,19 @@ col2im_c :: Int -> Int -> Int -> Int -> Int -> Int -> Int -> Matrix Double -> Ma col2im_c channels height width kernelRows kernelColumns strideRows strideColumns dataCol = let vec = flatten dataCol in unsafePerformIO $ do - outPtr <- mallocForeignPtrArray0 (height * width * channels) - let (inPtr, inOffset, _) = U.unsafeToForeignPtr vec + outPtr <- mallocForeignPtrArray (height * width * channels) + let (inPtr, _) = U.unsafeToForeignPtr0 vec withForeignPtr inPtr $ \inPtr' -> withForeignPtr outPtr $ \outPtr' -> - col2im_cpu inPtr' inOffset channels height width kernelRows kernelColumns strideRows strideColumns outPtr' + col2im_cpu inPtr' channels height width kernelRows kernelColumns strideRows strideColumns outPtr' - let matVec = U.unsafeFromForeignPtr outPtr 0 (height * width * channels) + let matVec = U.unsafeFromForeignPtr0 outPtr (height * width * channels) return $ U.matrixFromVector U.RowMajor (height * channels) width matVec foreign import ccall unsafe col2im_cpu - :: Ptr Double -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Ptr Double -> IO () + :: Ptr Double -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Ptr Double -> IO () vid2col :: Int -> Int -> Int -> Int -> Int -> Int -> Matrix Double -> Matrix Double vid2col kernelRows kernelColumns strideRows strideColumns height width dataVid = @@ -63,16 +65,16 @@ im2col_c channels height width kernelRows kernelColumns strideRows strideColumns kernelSize = kernelRows * kernelColumns numberOfPatches = rowOut * colOut in unsafePerformIO $ do - outPtr <- mallocForeignPtrArray0 (numberOfPatches * kernelSize * channels) - let (inPtr, inOffset, _) = U.unsafeToForeignPtr vec + outPtr <- mallocForeignPtrArray (numberOfPatches * kernelSize * channels) + let (inPtr, _) = U.unsafeToForeignPtr0 vec withForeignPtr inPtr $ \inPtr' -> withForeignPtr outPtr $ \outPtr' -> - im2col_cpu inPtr' inOffset channels height width kernelRows kernelColumns strideRows strideColumns outPtr' + im2col_cpu inPtr' channels height width kernelRows kernelColumns strideRows strideColumns outPtr' - let matVec = U.unsafeFromForeignPtr outPtr 0 (numberOfPatches * kernelSize * channels) + let matVec = U.unsafeFromForeignPtr0 outPtr (numberOfPatches * kernelSize * channels) return $ U.matrixFromVector U.RowMajor numberOfPatches (kernelSize * channels) matVec foreign import ccall unsafe im2col_cpu - :: Ptr Double -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Ptr Double -> IO () + :: Ptr Double -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Ptr Double -> IO () diff --git a/src/Grenade/Layers/Internal/Pooling.hs b/src/Grenade/Layers/Internal/Pooling.hs index 44c0322..3d21e78 100644 --- a/src/Grenade/Layers/Internal/Pooling.hs +++ b/src/Grenade/Layers/Internal/Pooling.hs @@ -4,7 +4,9 @@ module Grenade.Layers.Internal.Pooling ( , poolBackward ) where -import Foreign ( mallocForeignPtrArray0, withForeignPtr ) +import qualified Data.Vector.Storable as U ( unsafeToForeignPtr0, unsafeFromForeignPtr0 ) + +import Foreign ( mallocForeignPtrArray, withForeignPtr ) import Foreign.Ptr ( Ptr ) import Numeric.LinearAlgebra ( Matrix , flatten ) @@ -19,37 +21,37 @@ poolForward channels height width kernelRows kernelColumns strideRows strideColu colOut = (width - kernelColumns) `div` strideColumns + 1 numberOfPatches = rowOut * colOut in unsafePerformIO $ do - outPtr <- mallocForeignPtrArray0 (numberOfPatches * channels) - let (inPtr, inOffset, _) = U.unsafeToForeignPtr vec + outPtr <- mallocForeignPtrArray (numberOfPatches * channels) + let (inPtr, _) = U.unsafeToForeignPtr0 vec withForeignPtr inPtr $ \inPtr' -> withForeignPtr outPtr $ \outPtr' -> - pool_forwards_cpu inPtr' inOffset channels height width kernelRows kernelColumns strideRows strideColumns outPtr' + pool_forwards_cpu inPtr' channels height width kernelRows kernelColumns strideRows strideColumns outPtr' - let matVec = U.unsafeFromForeignPtr outPtr 0 (numberOfPatches * channels) + let matVec = U.unsafeFromForeignPtr0 outPtr (numberOfPatches * channels) return $ U.matrixFromVector U.RowMajor (rowOut * channels) colOut matVec foreign import ccall unsafe pool_forwards_cpu - :: Ptr Double -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Ptr Double -> IO () + :: Ptr Double -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Ptr Double -> IO () poolBackward :: Int -> Int -> Int -> Int -> Int -> Int -> Int -> Matrix Double -> Matrix Double -> Matrix Double poolBackward channels height width kernelRows kernelColumns strideRows strideColumns dataIm dataGrad = let vecIm = flatten dataIm vecGrad = flatten dataGrad in unsafePerformIO $ do - outPtr <- mallocForeignPtrArray0 (height * width * channels) - let (imPtr, imOffset, _) = U.unsafeToForeignPtr vecIm - let (gradPtr, gradOffset, _) = U.unsafeToForeignPtr vecGrad + outPtr <- mallocForeignPtrArray (height * width * channels) + let (imPtr, _) = U.unsafeToForeignPtr0 vecIm + let (gradPtr, _) = U.unsafeToForeignPtr0 vecGrad withForeignPtr imPtr $ \imPtr' -> withForeignPtr gradPtr $ \gradPtr' -> withForeignPtr outPtr $ \outPtr' -> - pool_backwards_cpu imPtr' imOffset gradPtr' gradOffset channels height width kernelRows kernelColumns strideRows strideColumns outPtr' + pool_backwards_cpu imPtr' gradPtr' channels height width kernelRows kernelColumns strideRows strideColumns outPtr' - let matVec = U.unsafeFromForeignPtr outPtr 0 (height * width * channels) + let matVec = U.unsafeFromForeignPtr0 outPtr (height * width * channels) return $ U.matrixFromVector U.RowMajor (height * channels) width matVec foreign import ccall unsafe pool_backwards_cpu - :: Ptr Double -> Int -> Ptr Double -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Ptr Double -> IO () + :: Ptr Double -> Ptr Double -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Ptr Double -> IO () diff --git a/src/Grenade/Layers/Internal/Update.hs b/src/Grenade/Layers/Internal/Update.hs new file mode 100644 index 0000000..719db39 --- /dev/null +++ b/src/Grenade/Layers/Internal/Update.hs @@ -0,0 +1,70 @@ +{-# LANGUAGE ForeignFunctionInterface #-} +module Grenade.Layers.Internal.Update ( + decendMatrix + , decendVector + ) where + +import Data.Maybe ( fromJust ) +import qualified Data.Vector.Storable as U ( unsafeToForeignPtr0, unsafeFromForeignPtr0 ) + +import Foreign ( mallocForeignPtrArray, withForeignPtr ) +import Foreign.Ptr ( Ptr ) +import GHC.TypeLits + +import Numeric.LinearAlgebra ( Vector, flatten ) +import Numeric.LinearAlgebra.Static +import qualified Numeric.LinearAlgebra.Devel as U + +import System.IO.Unsafe ( unsafePerformIO ) + +decendMatrix :: (KnownNat rows, KnownNat columns) => Double -> Double -> Double -> L rows columns -> L rows columns -> L rows columns -> (L rows columns, L rows columns) +decendMatrix rate momentum regulariser weights gradient lastUpdate = + let (rows, cols) = size weights + len = rows * cols + -- Most gradients come in in ColumnMajor, + -- so we'll transpose here before flattening them + -- into a vector to prevent a copy. + -- + -- This gives ~15% speed improvement for LSTMs. + weights' = flatten . tr . extract $ weights + gradient' = flatten . tr . extract $ gradient + lastUpdate' = flatten . tr . extract $ lastUpdate + (vw, vm) = decendUnsafe len rate momentum regulariser weights' gradient' lastUpdate' + + -- Note that it's ColumnMajor, as we did a transpose before + -- using the internal vectors. + mw = U.matrixFromVector U.ColumnMajor rows cols vw + mm = U.matrixFromVector U.ColumnMajor rows cols vm + in (fromJust . create $ mw, fromJust . create $ mm) + +decendVector :: (KnownNat r) => Double -> Double -> Double -> R r -> R r -> R r -> (R r, R r) +decendVector rate momentum regulariser weights gradient lastUpdate = + let len = size weights + weights' = extract weights + gradient' = extract gradient + lastUpdate' = extract lastUpdate + (vw, vm) = decendUnsafe len rate momentum regulariser weights' gradient' lastUpdate' + in (fromJust $ create vw, fromJust $ create vm) + +decendUnsafe :: Int -> Double -> Double -> Double -> Vector Double -> Vector Double -> Vector Double -> (Vector Double, Vector Double) +decendUnsafe len rate momentum regulariser weights gradient lastUpdate = + unsafePerformIO $ do + outWPtr <- mallocForeignPtrArray len + outMPtr <- mallocForeignPtrArray len + let (wPtr, _) = U.unsafeToForeignPtr0 weights + let (gPtr, _) = U.unsafeToForeignPtr0 gradient + let (lPtr, _) = U.unsafeToForeignPtr0 lastUpdate + + withForeignPtr wPtr $ \wPtr' -> + withForeignPtr gPtr $ \gPtr' -> + withForeignPtr lPtr $ \lPtr' -> + withForeignPtr outWPtr $ \outWPtr' -> + withForeignPtr outMPtr $ \outMPtr' -> + decend_cpu len rate momentum regulariser wPtr' gPtr' lPtr' outWPtr' outMPtr' + + return (U.unsafeFromForeignPtr0 outWPtr len, U.unsafeFromForeignPtr0 outMPtr len) + +foreign import ccall unsafe + decend_cpu + :: Int -> Double -> Double -> Double -> Ptr Double -> Ptr Double -> Ptr Double -> Ptr Double -> Ptr Double -> IO () + diff --git a/src/Grenade/Layers/Logit.hs b/src/Grenade/Layers/Logit.hs index e5bfea5..2be3899 100644 --- a/src/Grenade/Layers/Logit.hs +++ b/src/Grenade/Layers/Logit.hs @@ -1,10 +1,7 @@ {-# LANGUAGE DataKinds #-} -{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE MultiParamTypeClasses #-} -{-# LANGUAGE FlexibleInstances #-} - module Grenade.Layers.Logit ( Logit (..) ) where @@ -27,17 +24,16 @@ instance UpdateLayer Logit where createRandom = return Logit instance (KnownNat i) => Layer Logit ('D1 i) ('D1 i) where - runForwards _ (S1D' y) = S1D' (logistic y) - runBackwards _ (S1D' y) (S1D' dEdy) = ((), S1D' (logistic' y * dEdy)) + runForwards _ (S1D y) = S1D (logistic y) + runBackwards _ (S1D y) (S1D dEdy) = ((), S1D (logistic' y * dEdy)) instance (KnownNat i, KnownNat j) => Layer Logit ('D2 i j) ('D2 i j) where - runForwards _ (S2D' y) = S2D' (logistic y) - runBackwards _ (S2D' y) (S2D' dEdy) = ((), S2D' (logistic' y * dEdy)) + runForwards _ (S2D y) = S2D (logistic y) + runBackwards _ (S2D y) (S2D dEdy) = ((), S2D (logistic' y * dEdy)) instance (KnownNat i, KnownNat j, KnownNat k) => Layer Logit ('D3 i j k) ('D3 i j k) where - runForwards _ (S3D' y) = S3D' (logistic y) - runBackwards _ (S3D' y) (S3D' dEdy) = ((), S3D' (logistic' y * dEdy)) - + runForwards _ (S3D y) = S3D (logistic y) + runBackwards _ (S3D y) (S3D dEdy) = ((), S3D (logistic' y * dEdy)) logistic :: Floating a => a -> a logistic x = 1 / (1 + exp (-x)) diff --git a/src/Grenade/Layers/Pad.hs b/src/Grenade/Layers/Pad.hs index 0969e39..838a9dc 100644 --- a/src/Grenade/Layers/Pad.hs +++ b/src/Grenade/Layers/Pad.hs @@ -4,10 +4,6 @@ {-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE MultiParamTypeClasses #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE PolyKinds #-} - module Grenade.Layers.Pad ( Pad (..) ) where @@ -50,19 +46,19 @@ instance ( KnownNat padLeft , (inputRows + padTop + padBottom) ~ outputRows , (inputColumns + padLeft + padRight) ~ outputColumns ) => Layer (Pad padLeft padTop padRight padBottom) ('D2 inputRows inputColumns) ('D2 outputRows outputColumns) where - runForwards Pad (S2D' input) = + runForwards Pad (S2D input) = let padl = fromIntegral $ natVal (Proxy :: Proxy padLeft) padt = fromIntegral $ natVal (Proxy :: Proxy padTop) padr = fromIntegral $ natVal (Proxy :: Proxy padRight) padb = fromIntegral $ natVal (Proxy :: Proxy padBottom) m = extract input r = diagBlock [konst 0 (padt,padl), m, konst 0 (padb,padr)] - in S2D' . fromJust . create $ r - runBackwards Pad _ (S2D' dEdy) = + in S2D . fromJust . create $ r + runBackwards Pad _ (S2D dEdy) = let padl = fromIntegral $ natVal (Proxy :: Proxy padLeft) padt = fromIntegral $ natVal (Proxy :: Proxy padTop) nrows = fromIntegral $ natVal (Proxy :: Proxy inputRows) ncols = fromIntegral $ natVal (Proxy :: Proxy inputColumns) m = extract dEdy vs = subMatrix (padt, padl) (nrows, ncols) m - in ((), S2D' . fromJust . create $ vs) + in ((), S2D . fromJust . create $ vs) diff --git a/src/Grenade/Layers/Pooling.hs b/src/Grenade/Layers/Pooling.hs index ac794c5..cbfef95 100644 --- a/src/Grenade/Layers/Pooling.hs +++ b/src/Grenade/Layers/Pooling.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE BangPatterns #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE StandaloneDeriving #-} @@ -6,10 +5,7 @@ {-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE MultiParamTypeClasses #-} -{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE PolyKinds #-} - module Grenade.Layers.Pooling ( Pooling (..) ) where @@ -55,7 +51,7 @@ instance ( KnownNat kernelRows , ((outputRows - 1) * strideRows) ~ (inputRows - kernelRows) , ((outputColumns - 1) * strideColumns) ~ (inputColumns - kernelColumns) ) => Layer (Pooling kernelRows kernelColumns strideRows strideColumns) ('D2 inputRows inputColumns) ('D2 outputRows outputColumns) where - runForwards Pooling (S2D' input) = + runForwards Pooling (S2D input) = let height = fromIntegral $ natVal (Proxy :: Proxy inputRows) width = fromIntegral $ natVal (Proxy :: Proxy inputColumns) kx = fromIntegral $ natVal (Proxy :: Proxy kernelRows) @@ -65,8 +61,8 @@ instance ( KnownNat kernelRows ex = extract input r = poolForward 1 height width kx ky sx sy ex rs = fromJust . create $ r - in S2D' $ rs - runBackwards Pooling (S2D' input) (S2D' dEdy) = + in S2D $ rs + runBackwards Pooling (S2D input) (S2D dEdy) = let height = fromIntegral $ natVal (Proxy :: Proxy inputRows) width = fromIntegral $ natVal (Proxy :: Proxy inputColumns) kx = fromIntegral $ natVal (Proxy :: Proxy kernelRows) @@ -76,7 +72,7 @@ instance ( KnownNat kernelRows ex = extract input eo = extract dEdy vs = poolBackward 1 height width kx ky sx sy ex eo - in ((), S2D' . fromJust . create $ vs) + in ((), S2D . fromJust . create $ vs) -- | A three dimensional image can be pooled on each layer. @@ -93,7 +89,7 @@ instance ( KnownNat kernelRows , ((outputRows - 1) * strideRows) ~ (inputRows - kernelRows) , ((outputColumns - 1) * strideColumns) ~ (inputColumns - kernelColumns) ) => Layer (Pooling kernelRows kernelColumns strideRows strideColumns) ('D3 inputRows inputColumns channels) ('D3 outputRows outputColumns channels) where - runForwards Pooling (S3D' input) = + runForwards Pooling (S3D input) = let ix = fromIntegral $ natVal (Proxy :: Proxy inputRows) iy = fromIntegral $ natVal (Proxy :: Proxy inputColumns) kx = fromIntegral $ natVal (Proxy :: Proxy kernelRows) @@ -104,8 +100,8 @@ instance ( KnownNat kernelRows ex = extract input r = poolForward ch ix iy kx ky sx sy ex rs = fromJust . create $ r - in S3D' rs - runBackwards Pooling (S3D' input) (S3D' dEdy) = + in S3D rs + runBackwards Pooling (S3D input) (S3D dEdy) = let ix = fromIntegral $ natVal (Proxy :: Proxy inputRows) iy = fromIntegral $ natVal (Proxy :: Proxy inputColumns) kx = fromIntegral $ natVal (Proxy :: Proxy kernelRows) @@ -116,4 +112,4 @@ instance ( KnownNat kernelRows ex = extract input eo = extract dEdy vs = poolBackward ch ix iy kx ky sx sy ex eo - in ((), S3D' . fromJust . create $ vs) + in ((), S3D . fromJust . create $ vs) diff --git a/src/Grenade/Layers/Relu.hs b/src/Grenade/Layers/Relu.hs index c561238..57ad5f8 100644 --- a/src/Grenade/Layers/Relu.hs +++ b/src/Grenade/Layers/Relu.hs @@ -1,10 +1,7 @@ {-# LANGUAGE DataKinds #-} -{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE MultiParamTypeClasses #-} -{-# LANGUAGE FlexibleInstances #-} - module Grenade.Layers.Relu ( Relu (..) ) where @@ -27,25 +24,25 @@ instance UpdateLayer Relu where createRandom = return Relu instance ( KnownNat i) => Layer Relu ('D1 i) ('D1 i) where - runForwards _ (S1D' y) = S1D' (relu y) + runForwards _ (S1D y) = S1D (relu y) where relu = LAS.dvmap (\a -> if a <= 0 then 0 else a) - runBackwards _ (S1D' y) (S1D' dEdy) = ((), S1D' (relu' y * dEdy)) + runBackwards _ (S1D y) (S1D dEdy) = ((), S1D (relu' y * dEdy)) where relu' = LAS.dvmap (\a -> if a <= 0 then 0 else 1) instance (KnownNat i, KnownNat j) => Layer Relu ('D2 i j) ('D2 i j) where - runForwards _ (S2D' y) = S2D' (relu y) + runForwards _ (S2D y) = S2D (relu y) where relu = LAS.dmmap (\a -> if a <= 0 then 0 else a) - runBackwards _ (S2D' y) (S2D' dEdy) = ((), S2D' (relu' y * dEdy)) + runBackwards _ (S2D y) (S2D dEdy) = ((), S2D (relu' y * dEdy)) where relu' = LAS.dmmap (\a -> if a <= 0 then 0 else 1) instance (KnownNat i, KnownNat j, KnownNat k) => Layer Relu ('D3 i j k) ('D3 i j k) where - runForwards _ (S3D' y) = S3D' (relu y) + runForwards _ (S3D y) = S3D (relu y) where relu = LAS.dmmap (\a -> if a <= 0 then 0 else a) - runBackwards _ (S3D' y) (S3D' dEdy) = ((), S3D' (relu' y * dEdy)) + runBackwards _ (S3D y) (S3D dEdy) = ((), S3D (relu' y * dEdy)) where relu' = LAS.dmmap (\a -> if a <= 0 then 0 else 1) diff --git a/src/Grenade/Layers/Tanh.hs b/src/Grenade/Layers/Tanh.hs index 7fd20ed..5736bb4 100644 --- a/src/Grenade/Layers/Tanh.hs +++ b/src/Grenade/Layers/Tanh.hs @@ -1,10 +1,7 @@ {-# LANGUAGE DataKinds #-} -{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE MultiParamTypeClasses #-} -{-# LANGUAGE FlexibleInstances #-} - module Grenade.Layers.Tanh ( Tanh (..) ) where @@ -24,16 +21,16 @@ instance UpdateLayer Tanh where createRandom = return Tanh instance KnownNat i => Layer Tanh ('D1 i) ('D1 i) where - runForwards _ (S1D' y) = S1D' (tanh y) - runBackwards _ (S1D' y) (S1D' dEdy) = ((), S1D' (tanh' y * dEdy)) + runForwards _ (S1D y) = S1D (tanh y) + runBackwards _ (S1D y) (S1D dEdy) = ((), S1D (tanh' y * dEdy)) instance (KnownNat i, KnownNat j) => Layer Tanh ('D2 i j) ('D2 i j) where - runForwards _ (S2D' y) = S2D' (tanh y) - runBackwards _ (S2D' y) (S2D' dEdy) = ((), S2D' (tanh' y * dEdy)) + runForwards _ (S2D y) = S2D (tanh y) + runBackwards _ (S2D y) (S2D dEdy) = ((), S2D (tanh' y * dEdy)) instance (KnownNat i, KnownNat j, KnownNat k) => Layer Tanh ('D3 i j k) ('D3 i j k) where - runForwards _ (S3D' y) = S3D' (tanh y) - runBackwards _ (S3D' y) (S3D' dEdy) = ((), S3D' (tanh' y * dEdy)) + runForwards _ (S3D y) = S3D (tanh y) + runBackwards _ (S3D y) (S3D dEdy) = ((), S3D (tanh' y * dEdy)) tanh' :: (Floating a) => a -> a tanh' t = 1 - s ^ (2 :: Int) where s = tanh t diff --git a/src/Grenade/Recurrent.hs b/src/Grenade/Recurrent.hs new file mode 100644 index 0000000..f79e171 --- /dev/null +++ b/src/Grenade/Recurrent.hs @@ -0,0 +1,9 @@ +module Grenade.Recurrent ( + module X + ) where + +import Grenade.Recurrent.Core.Network as X +import Grenade.Recurrent.Core.Runner as X +import Grenade.Recurrent.Layers.BasicRecurrent as X +import Grenade.Recurrent.Layers.LSTM as X +import Grenade.Recurrent.Layers.Trivial as X diff --git a/src/Grenade/Recurrent/Core/Network.hs b/src/Grenade/Recurrent/Core/Network.hs new file mode 100644 index 0000000..23a7e3d --- /dev/null +++ b/src/Grenade/Recurrent/Core/Network.hs @@ -0,0 +1,98 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE EmptyDataDecls #-} +module Grenade.Recurrent.Core.Network ( + Recurrent + , FeedForward + , RecurrentLayer (..) + , RecurrentUpdateLayer (..) + , RecurrentNetwork (..) + , RecurrentInputs (..) + , CreatableRecurrent (..) + ) where + + +import Control.Monad.Random ( MonadRandom ) +import Data.Singletons ( SingI ) + +import Grenade.Core.Shape +import Grenade.Core.Network + + +-- | Witness type to say indicate we're building up with a normal feed +-- forward layer. +data FeedForward :: * -> * +-- | Witness type to say indicate we're building up with a recurrent layer. +data Recurrent :: * -> * + +-- | Class for a recurrent layer. +-- It's quite similar to a normal layer but for the input and output +-- of an extra recurrent data shape. +class UpdateLayer x => RecurrentUpdateLayer x where + -- | Shape of data that is passed between each subsequent run of the layer + type RecurrentShape x :: Shape + +class (RecurrentUpdateLayer x, SingI (RecurrentShape x)) => RecurrentLayer x (i :: Shape) (o :: Shape) where + -- | Used in training and scoring. Take the input from the previous + -- layer, and give the output from this layer. + runRecurrentForwards :: x -> S (RecurrentShape x) -> S i -> (S (RecurrentShape x), S o) + -- | Back propagate a step. Takes the current layer, the input that the + -- layer gave from the input and the back propagated derivatives from + -- the layer above. + -- Returns the gradient layer and the derivatives to push back further. + runRecurrentBackwards :: x -> S (RecurrentShape x) -> S i -> S (RecurrentShape x) -> S o -> (Gradient x, S (RecurrentShape x), S i) + +data RecurrentNetwork :: [*] -> [Shape] -> * where + OR :: (SingI i, SingI o, Layer x i o) => !x -> RecurrentNetwork '[FeedForward x] '[i, o] + (:~~>) :: (SingI i, Layer x i h) => !x -> !(RecurrentNetwork xs (h ': hs)) -> RecurrentNetwork (FeedForward x ': xs) (i ': h ': hs) + (:~@>) :: (SingI i, RecurrentLayer x i h) => !x -> !(RecurrentNetwork xs (h ': hs)) -> RecurrentNetwork (Recurrent x ': xs) (i ': h ': hs) +infixr 5 :~~> +infixr 5 :~@> + +instance Show (RecurrentNetwork l h) where + show (OR a) = "OR " ++ show a + show (i :~~> o) = show i ++ "\n:~~>\n" ++ show o + show (i :~@> o) = show i ++ "\n:~@>\n" ++ show o + + +-- | Recurrent inputs (sideways shapes on an imaginary unrolled graph) +-- Parameterised on the layers of a Network. +data RecurrentInputs :: [*] -> * where + ORS :: UpdateLayer x + => () -> RecurrentInputs '[FeedForward x] + (:~~+>) :: UpdateLayer x + => () -> !(RecurrentInputs xs) -> RecurrentInputs (FeedForward x ': xs) + (:~@+>) :: (SingI (RecurrentShape x), RecurrentUpdateLayer x) + => !(S (RecurrentShape x)) -> !(RecurrentInputs xs) -> RecurrentInputs (Recurrent x ': xs) +infixr 5 :~~+> +infixr 5 :~@+> + +-- | A network can easily be created by hand with (:~~>) and (:~@>), but an easy way to initialise a random +-- recurrent network and a set of random inputs for it is with the randomRecurrent. +class CreatableRecurrent (xs :: [*]) (ss :: [Shape]) where + -- | Create a network of the types requested + randomRecurrent :: MonadRandom m => m (RecurrentNetwork xs ss, RecurrentInputs xs) + +instance (SingI i, SingI o, Layer x i o) => CreatableRecurrent (FeedForward x ': '[]) (i ': o ': '[]) where + randomRecurrent = do + thisLayer <- createRandom + return (OR thisLayer, ORS ()) + +instance (SingI i, Layer x i o, CreatableRecurrent xs (o ': r ': rs)) => CreatableRecurrent (FeedForward x ': xs) (i ': o ': r ': rs) where + randomRecurrent = do + thisLayer <- createRandom + (rest, resti) <- randomRecurrent + return (thisLayer :~~> rest, () :~~+> resti) + +instance (SingI i, RecurrentLayer x i o, CreatableRecurrent xs (o ': r ': rs)) => CreatableRecurrent (Recurrent x ': xs) (i ': o ': r ': rs) where + randomRecurrent = do + thisLayer <- createRandom + thisShape <- randomOfShape + (rest, resti) <- randomRecurrent + return (thisLayer :~@> rest, thisShape :~@+> resti) + diff --git a/src/Grenade/Recurrent/Core/Runner.hs b/src/Grenade/Recurrent/Core/Runner.hs new file mode 100644 index 0000000..f5f4fd5 --- /dev/null +++ b/src/Grenade/Recurrent/Core/Runner.hs @@ -0,0 +1,144 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE RecordWildCards #-} +module Grenade.Recurrent.Core.Runner ( + trainRecurrent + , runRecurrent + ) where + +import Data.Singletons.Prelude +import Grenade.Core.Network +import Grenade.Core.Shape + +import Grenade.Recurrent.Core.Network + +-- | Drive and network and collect its back propogated gradients. +trainRecurrent :: forall shapes layers. SingI (Last shapes) + => LearningParameters + -> RecurrentNetwork layers shapes + -> RecurrentInputs layers + -> [(S (Head shapes), Maybe (S (Last shapes)))] + -> (RecurrentNetwork layers shapes, RecurrentInputs layers) +trainRecurrent rate network recinputs examples = + updateBack $ go inputs network recinputs + where + inputs = fst <$> examples + targets = snd <$> examples + updateBack (a,recgrad,_) = (a,updateRecInputs rate recinputs recgrad) + + go :: forall js sublayers. (Last js ~ Last shapes) + => [S (Head js)] -- ^ input vector + -> RecurrentNetwork sublayers js -- ^ network to train + -> RecurrentInputs sublayers + -> (RecurrentNetwork sublayers js, RecurrentInputs sublayers, [S (Head js)]) + + -- This is a simple non-recurrent layer, just map it forwards + -- Note we're doing training here, we could just return a list of gradients + -- (and probably will in future). + go !xs (layer :~~> n) (() :~~+> nIn) + = let ys = runForwards layer <$> xs + -- recursively run the rest of the network, and get the gradients from above. + (newFN, ig, grads) = go ys n nIn + -- calculate the gradient for this layer to pass down, + back = uncurry (runBackwards layer) <$> zip (reverse xs) grads + -- the new trained layer. + newlayer = runUpdates rate layer (fst <$> back) + + in (newlayer :~~> newFN, () :~~+> ig, snd <$> back) + + -- This is a recurrent layer, so we need to do a scan, first input to last, providing + -- the recurrent shape output to the next layer. + go !xs (layer :~@> n) (g :~@+> nIn) + = let ys = scanlFrom layer g xs + + (newFN, ig, grads) = go (snd <$> ys) n nIn + + backExamples = zip3 (fst <$> reverse ys) (reverse xs) grads + + (rg, back) = myscanbackward layer backExamples + -- the new trained layer. + newlayer = runUpdates rate layer (fst <$> back) + in (newlayer :~@> newFN, rg :~@+> ig, snd <$> back) + + -- Handle the output layer, bouncing the derivatives back down. + -- We may not have a target for each example, so when we don't use 0 gradient. + go !xs (OR layer) (ORS ()) + = let ys = runForwards layer <$> xs + -- recursively run the rest of the network, and get the gradients from above. + back = uncurry (runBackwards layer) <$> zip xs (zipWith makeError ys targets) + -- the new trained layer. + newlayer = runUpdates rate layer (reverse $ fst <$> back) + in (OR newlayer, ORS (), reverse (snd <$> back)) + + go _ _ _ = + error "Impossible for network and recurrent inputs to have different shapes" + + + makeError :: S (Last shapes) -> Maybe (S (Last shapes)) -> S (Last shapes) + makeError _ Nothing = 0 + makeError y (Just t) = y - t + + updateRecInputs :: forall sublayers. + LearningParameters + -> RecurrentInputs sublayers + -> RecurrentInputs sublayers + -> RecurrentInputs sublayers + + updateRecInputs l@LearningParameters {..} (() :~~+> xs) (() :~~+> ys) + = () :~~+> updateRecInputs l xs ys + + updateRecInputs l@LearningParameters {..} (x :~@+> xs) (y :~@+> ys) + = (x - realToFrac learningRate * y) :~@+> updateRecInputs l xs ys + + updateRecInputs _ (ORS ()) (ORS ()) + = ORS () + updateRecInputs _ _ _ + = error "Impossible for updateRecInputs to have different shapes" + +scanlFrom :: forall x i o. RecurrentLayer x i o + => x -- ^ the layer + -> S (RecurrentShape x) -- ^ place to start + -> [S i] -- ^ list of inputs to scan through + -> [(S (RecurrentShape x), S o)] -- ^ list of scan inputs and outputs +scanlFrom !layer !recShape (x:xs) = + let (lerec, lepush) = runRecurrentForwards layer recShape x + in (recShape, lepush) : scanlFrom layer lerec xs +scanlFrom _ _ [] = [] + +myscanbackward :: forall x i o. RecurrentLayer x i o + => x -- ^ the layer + -> [(S (RecurrentShape x), S i, S o)] -- ^ the list of inputs and output to scan over + -> (S (RecurrentShape x), [(Gradient x, S i)]) -- ^ list of gradients to fold and inputs to backprop +myscanbackward layer = + goX 0 + where + goX :: S (RecurrentShape x) -> [(S (RecurrentShape x), S i, S o)] -> (S (RecurrentShape x), [(Gradient x, S i)]) + goX !lastback ((recShape, lastin, backgrad):xs) = + let (layergrad, recgrad, ingrad) = runRecurrentBackwards layer recShape lastin lastback backgrad + (pushedback, ll) = goX recgrad xs + in (pushedback, (layergrad, ingrad) : ll) + goX !lastback [] = (lastback, []) + +-- | Just forwards propagation with no training. +runRecurrent :: RecurrentNetwork layers shapes + -> RecurrentInputs layers -> S (Head shapes) + -> (RecurrentInputs layers, S (Last shapes)) +runRecurrent (layer :~~> n) (() :~~+> nr) !x + = let ys = runForwards layer x + (nr', o) = runRecurrent n nr ys + in (() :~~+> nr', o) +runRecurrent (layer :~@> n) (recin :~@+> nr) !x + = let (recin', y) = runRecurrentForwards layer recin x + (nr', o) = runRecurrent n nr y + in (recin' :~@+> nr', o) +runRecurrent (OR layer) (ORS ()) !x + = (ORS (), runForwards layer x) + +runRecurrent _ _ _ + = error "Impossible for the gradients of a network to have a different length or shape to the network" diff --git a/src/Grenade/Recurrent/Layers/BasicRecurrent.hs b/src/Grenade/Recurrent/Layers/BasicRecurrent.hs new file mode 100644 index 0000000..6b1ec28 --- /dev/null +++ b/src/Grenade/Recurrent/Layers/BasicRecurrent.hs @@ -0,0 +1,92 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE UndecidableInstances #-} +module Grenade.Recurrent.Layers.BasicRecurrent ( + BasicRecurrent (..) + , randomBasicRecurrent + ) where + +import Control.Monad.Random ( MonadRandom, getRandom ) + +import Data.Singletons.TypeLits + +import Numeric.LinearAlgebra.Static + +import GHC.TypeLits + +import Grenade.Core.Network +import Grenade.Core.Shape +import Grenade.Recurrent.Core.Network + +data BasicRecurrent :: Nat -- Input layer size + -> Nat -- Output layer size + -> * where + BasicRecurrent :: ( KnownNat input + , KnownNat output + , KnownNat matrixCols + , matrixCols ~ (input + output)) + => !(R output) -- Bias neuron weights + -> !(R output) -- Bias neuron momentum + -> !(L output matrixCols) -- Activation + -> !(L output matrixCols) -- Momentum + -> BasicRecurrent input output + +data BasicRecurrent' :: Nat -- Input layer size + -> Nat -- Output layer size + -> * where + BasicRecurrent' :: ( KnownNat input + , KnownNat output + , KnownNat matrixCols + , matrixCols ~ (input + output)) + => !(R output) -- Bias neuron gradients + -> !(L output matrixCols) + -> BasicRecurrent' input output + +instance Show (BasicRecurrent i o) where + show BasicRecurrent {} = "BasicRecurrent" + +instance (KnownNat i, KnownNat o, KnownNat (i + o)) => UpdateLayer (BasicRecurrent i o) where + type Gradient (BasicRecurrent i o) = (BasicRecurrent' i o) + + runUpdate LearningParameters {..} (BasicRecurrent oldBias oldBiasMomentum oldActivations oldMomentum) (BasicRecurrent' biasGradient activationGradient) = + let newBiasMomentum = konst learningMomentum * oldBiasMomentum - konst learningRate * biasGradient + newBias = oldBias + newBiasMomentum + newMomentum = konst learningMomentum * oldMomentum - konst learningRate * activationGradient + regulariser = konst (learningRegulariser * learningRate) * oldActivations + newActivations = oldActivations + newMomentum - regulariser + in BasicRecurrent newBias newBiasMomentum newActivations newMomentum + + createRandom = randomBasicRecurrent + +instance (KnownNat i, KnownNat o, KnownNat (i + o), i <= (i + o), o ~ ((i + o) - i)) => RecurrentUpdateLayer (BasicRecurrent i o) where + type RecurrentShape (BasicRecurrent i o) = 'D1 o + +instance (KnownNat i, KnownNat o, KnownNat (i + o), i <= (i + o), o ~ ((i + o) - i)) => RecurrentLayer (BasicRecurrent i o) ('D1 i) ('D1 o) where + -- Do a matrix vector multiplication and return the result. + runRecurrentForwards (BasicRecurrent wB _ wN _) (S1D lastOutput) (S1D thisInput) = + let thisOutput = S1D $ wB + wN #> (thisInput # lastOutput) + in (thisOutput, thisOutput) + + -- Run a backpropogation step for a full connected layer. + runRecurrentBackwards (BasicRecurrent _ _ wN _) (S1D lastOutput) (S1D thisInput) (S1D dRec) (S1D dEdy) = + let biasGradient = (dRec + dEdy) + layerGrad = (dRec + dEdy) `outer` (thisInput # lastOutput) + -- calcluate derivatives for next step + (backGrad, recGrad) = split $ tr wN #> (dRec + dEdy) + in (BasicRecurrent' biasGradient layerGrad, S1D recGrad, S1D backGrad) + +randomBasicRecurrent :: (MonadRandom m, KnownNat i, KnownNat o, KnownNat x, x ~ (i + o)) + => m (BasicRecurrent i o) +randomBasicRecurrent = do + seed1 <- getRandom + seed2 <- getRandom + let wB = randomVector seed1 Uniform * 2 - 1 + wN = uniformSample seed2 (-1) 1 + bm = konst 0 + mm = konst 0 + return $ BasicRecurrent wB bm wN mm diff --git a/src/Grenade/Recurrent/Layers/LSTM.hs b/src/Grenade/Recurrent/Layers/LSTM.hs new file mode 100644 index 0000000..4e74383 --- /dev/null +++ b/src/Grenade/Recurrent/Layers/LSTM.hs @@ -0,0 +1,244 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ViewPatterns #-} +module Grenade.Recurrent.Layers.LSTM ( + LSTM (..) + , LSTMWeights (..) + , randomLSTM + ) where + +import Control.Monad.Random ( MonadRandom, getRandom ) + +-- import Data.List ( foldl1' ) +import Data.Singletons.TypeLits + +import Numeric.LinearAlgebra.Static + +import Grenade.Core.Network +import Grenade.Core.Shape + +import Grenade.Layers.Internal.Update + +import Grenade.Recurrent.Core.Network + +-- | Long Short Term Memory Recurrent unit +-- +-- This is a Peephole formulation, so the recurrent shape is +-- just the cell state, the previous output is not held or used +-- at all. +data LSTM :: Nat -> Nat -> * where + LSTM :: ( KnownNat input + , KnownNat output + ) => !(LSTMWeights input output) -- Weights + -> !(LSTMWeights input output) -- Momentums + -> LSTM input output + +data LSTMWeights :: Nat -> Nat -> * where + LSTMWeights :: ( KnownNat input + , KnownNat output + ) => { + lstmWf :: !(L output input) -- Weight Forget (W_f) + , lstmUf :: !(L output output) -- Cell State Forget (U_f) + , lstmBf :: !(R output) -- Bias Forget (b_f) + , lstmWi :: !(L output input) -- Weight Input (W_i) + , lstmUi :: !(L output output) -- Cell State Input (U_i) + , lstmBi :: !(R output) -- Bias Input (b_i) + , lstmWo :: !(L output input) -- Weight Output (W_o) + , lstmUo :: !(L output output) -- Cell State Output (U_o) + , lstmBo :: !(R output) -- Bias Output (b_o) + , lstmWc :: !(L output input) -- Weight Cell (W_c) + , lstmBc :: !(R output) -- Bias Cell (b_c) + } -> LSTMWeights input output + +instance Show (LSTM i o) where + show LSTM {} = "LSTM" + +instance (KnownNat i, KnownNat o) => UpdateLayer (LSTM i o) where + -- The gradients are the same shape as the weights and momentum + -- This seems to be a general pattern, maybe it should be enforced. + type Gradient (LSTM i o) = (LSTMWeights i o) + + -- Run the update function for each group matrix/vector of weights, momentums and gradients. + -- Hmm, maybe the function should be used instead of passing in the learning parameters. + runUpdate LearningParameters {..} (LSTM w m) g = + let (wf, wf') = u lstmWf w m g + (uf, uf') = u lstmUf w m g + (bf, bf') = v lstmBf w m g + (wi, wi') = u lstmWi w m g + (ui, ui') = u lstmUi w m g + (bi, bi') = v lstmBi w m g + (wo, wo') = u lstmWo w m g + (uo, uo') = u lstmUo w m g + (bo, bo') = v lstmBo w m g + (wc, wc') = u lstmWc w m g + (bc, bc') = v lstmBc w m g + in LSTM (LSTMWeights wf uf bf wi ui bi wo uo bo wc bc) (LSTMWeights wf' uf' bf' wi' ui' bi' wo' uo' bo' wc' bc') + where + -- Utility function for updating with the momentum, gradients, and weights. + u :: forall x ix out. (KnownNat ix, KnownNat out) => (x -> (L out ix)) -> x -> x -> x -> ((L out ix), (L out ix)) + u e (e -> weights) (e -> momentum) (e -> gradient) = + decendMatrix learningRate learningMomentum learningRegulariser weights gradient momentum + + v :: forall x ix. (KnownNat ix) => (x -> (R ix)) -> x -> x -> x -> ((R ix), (R ix)) + v e (e -> weights) (e -> momentum) (e -> gradient) = + decendVector learningRate learningMomentum learningRegulariser weights gradient momentum + + -- There's a lot of updates here, so to try and minimise the number of data copies + -- we'll create a mutable bucket for each. + -- runUpdates rate lstm gs = + -- let combinedGradient = foldl1' uu gs + -- in runUpdate rate lstm combinedGradient + -- where + -- uu :: (KnownNat i, KnownNat o) => LSTMWeights i o -> LSTMWeights i o -> LSTMWeights i o + -- uu a b = + -- let wf = u lstmWf a b + -- uf = u lstmUf a b + -- bf = v lstmBf a b + -- wi = u lstmWi a b + -- ui = u lstmUi a b + -- bi = v lstmBi a b + -- wo = u lstmWo a b + -- uo = u lstmUo a b + -- bo = v lstmBo a b + -- wc = u lstmWc a b + -- bc = v lstmBc a b + -- in LSTMWeights wf uf bf wi ui bi wo uo bo wc bc + -- u :: forall x ix out. (KnownNat ix, KnownNat out) => (x -> (L out ix)) -> x -> x -> L out ix + -- u e (e -> a) (e -> b) = tr $ tr a + tr b + + -- v :: forall x ix. (x -> (R ix)) -> x -> x -> R ix + -- v e (e -> a) (e -> b) = a + b + + createRandom = randomLSTM + +instance (KnownNat i, KnownNat o) => RecurrentUpdateLayer (LSTM i o) where + -- The recurrent shape is the same size as the output. + -- It's actually the cell state however, as this is a peephole variety LSTM. + type RecurrentShape (LSTM i o) = 'D1 o + +instance (KnownNat i, KnownNat o) => RecurrentLayer (LSTM i o) ('D1 i) ('D1 o) where + -- Forward propagation for the LSTM layer. + -- The size of the cell state is also the size of the output. + runRecurrentForwards (LSTM (LSTMWeights {..}) _) (S1D cell) (S1D input) = + let -- Forget state vector + f_t = sigmoid $ lstmBf + lstmWf #> input + lstmUf #> cell + -- Input state vector + i_t = sigmoid $ lstmBi + lstmWi #> input + lstmUi #> cell + -- Output state vector + o_t = sigmoid $ lstmBo + lstmWo #> input + lstmUo #> cell + -- Cell input state vector + c_x = tanh $ lstmBc + lstmWc #> input + -- Cell state + c_t = f_t * cell + i_t * c_x + -- Output (it's sometimes recommended to use tanh c_t) + h_t = o_t * c_t + in (S1D c_t, S1D h_t) + + -- Run a backpropogation step for an LSTM layer. + -- We're doing all the derivatives by hand here, so one should + -- be extra careful when changing this. + -- + -- There's a test version using the AD library without hmatrix in the test + -- suite. These should match always. + runRecurrentBackwards (LSTM (LSTMWeights {..}) _) (S1D cell) (S1D input) (S1D cellGrad) (S1D h_t') = + -- We're not keeping the Wengert tape during the forward pass, + -- so we're duplicating some work here. + -- + -- If I was being generous, I'd call it checkpointing. + -- + -- Maybe think about better ways to store some intermediate states. + let -- Forget state vector + f_s = lstmBf + lstmWf #> input + lstmUf #> cell + f_t = sigmoid f_s + -- Input state vector + i_s = lstmBi + lstmWi #> input + lstmUi #> cell + i_t = sigmoid i_s + -- Output state vector + o_s = lstmBo + lstmWo #> input + lstmUo #> cell + o_t = sigmoid o_s + -- Cell input state vector + c_s = lstmBc + lstmWc #> input + c_x = tanh c_s + -- Cell state + c_t = f_t * cell + i_t * c_x + + -- Reverse Mode AD Derivitives + c_t' = h_t' * o_t + cellGrad + + f_t' = c_t' * cell + f_s' = sigmoid' f_s * f_t' + + o_t' = h_t' * c_t + o_s' = sigmoid' o_s * o_t' + + i_t' = c_t' * c_x + i_s' = sigmoid' i_s * i_t' + + c_x' = c_t' * i_t + c_s' = tanh' c_s * c_x' + + -- The derivatives to pass sideways (recurrent) and downwards + cell' = tr lstmUf #> f_s' + tr lstmUo #> o_s' + tr lstmUi #> i_s' + c_t' * f_t + input' = tr lstmWf #> f_s' + tr lstmWo #> o_s' + tr lstmWi #> i_s' + tr lstmWc #> c_s' + + -- Calculate the gradient Matricies for the input + lstmWf' = f_s' `outer` input + lstmWi' = i_s' `outer` input + lstmWo' = o_s' `outer` input + lstmWc' = c_s' `outer` input + + -- Calculate the gradient Matricies for the cell + lstmUf' = f_s' `outer` cell + lstmUi' = i_s' `outer` cell + lstmUo' = o_s' `outer` cell + + -- The biases just get the values, but we'll write it so it's obvious + lstmBf' = f_s' + lstmBi' = i_s' + lstmBo' = o_s' + lstmBc' = c_s' + + gradients = LSTMWeights lstmWf' lstmUf' lstmBf' lstmWi' lstmUi' lstmBi' lstmWo' lstmUo' lstmBo' lstmWc' lstmBc' + in (gradients, S1D cell', S1D input') + +-- | Generate an LSTM layer with random Weights +-- one can also just call createRandom from UpdateLayer +-- +-- Has forget gate biases set to 1 to encourage early learning. +-- +-- https://github.com/karpathy/char-rnn/commit/0dfeaa454e687dd0278f036552ea1e48a0a408c9 +-- +randomLSTM :: forall m i o. (MonadRandom m, KnownNat i, KnownNat o) + => m (LSTM i o) +randomLSTM = do + let w = (\s -> uniformSample s (-1) 1 ) <$> getRandom + u = (\s -> uniformSample s (-1) 1 ) <$> getRandom + v = (\s -> randomVector s Uniform * 2 - 1) <$> getRandom + + w0 = konst 0 + u0 = konst 0 + v0 = konst 0 + + LSTM <$> (LSTMWeights <$> w <*> u <*> pure (konst 1) <*> w <*> u <*> v <*> w <*> u <*> v <*> w <*> v) + <*> pure (LSTMWeights w0 u0 v0 w0 u0 v0 w0 u0 v0 w0 v0) + +-- | Maths +-- +-- TODO: move to not here +sigmoid :: Floating a => a -> a +sigmoid x = 1 / (1 + exp (-x)) + +sigmoid' :: Floating a => a -> a +sigmoid' x = logix * (1 - logix) + where + logix = sigmoid x + +tanh' :: (Floating a) => a -> a +tanh' t = 1 - s ^ (2 :: Int) where s = tanh t diff --git a/src/Grenade/Recurrent/Layers/Trivial.hs b/src/Grenade/Recurrent/Layers/Trivial.hs new file mode 100644 index 0000000..7c18baa --- /dev/null +++ b/src/Grenade/Recurrent/Layers/Trivial.hs @@ -0,0 +1,23 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE FlexibleInstances #-} +module Grenade.Recurrent.Layers.Trivial ( + Trivial (..) + ) where + +import Grenade.Core.Network + +-- | A trivial layer. +data Trivial = Trivial + deriving Show + +instance UpdateLayer Trivial where + type Gradient Trivial = () + runUpdate _ _ _ = Trivial + createRandom = return Trivial + +instance (a ~ b) => Layer Trivial a b where + runForwards _ = id + runBackwards _ _ y = ((), y) diff --git a/src/Grenade/Utils/OneHot.hs b/src/Grenade/Utils/OneHot.hs new file mode 100644 index 0000000..991c420 --- /dev/null +++ b/src/Grenade/Utils/OneHot.hs @@ -0,0 +1,84 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE RankNTypes #-} + +module Grenade.Utils.OneHot ( + oneHot + , hotMap + , makeHot + , unHot + ) where + +import Data.List ( group, sort ) + +import Data.Map ( Map ) +import qualified Data.Map as M + +import Data.Proxy +import Data.Singletons.TypeLits + +import Data.Vector ( Vector ) +import qualified Data.Vector as V + +import Numeric.LinearAlgebra ( maxIndex ) +import Numeric.LinearAlgebra.Devel +import Numeric.LinearAlgebra.Static + +import Grenade.Core.Shape + +-- | From an int which is hot, create a 1D Shape +-- with one index hot (1) with the rest 0. +-- Rerurns Nothing if the hot number is larger +-- than the length of the vector. +oneHot :: forall n. (KnownNat n) + => Int -> Maybe (S ('D1 n)) +oneHot hot = + let len = fromIntegral $ natVal (Proxy :: Proxy n) + in if hot < len + then + fmap S1D . create $ runSTVector $ do + vec <- newVector 0 len + writeVector vec hot 1 + return vec + else Nothing + +-- | Create a one hot map from any enumerable. +-- Returns a map, and the ordered list for the reverse transformation +hotMap :: (Ord a, KnownNat n) => Proxy n -> [a] -> Maybe (Map a Int, Vector a) +hotMap n as = + let len = fromIntegral $ natVal n + uniq = [ c | (c:_) <- group $ sort as] + hotl = length uniq + in if hotl <= len + then + Just (M.fromList $ zip uniq [0..], V.fromList uniq) + else Nothing + +-- | From a map and value, create a 1D Shape +-- with one index hot (1) with the rest 0. +-- Rerurns Nothing if the hot number is larger +-- than the length of the vector or the map +-- doesn't contain the value. +makeHot :: forall a n. (Ord a, KnownNat n) + => Map a Int -> a -> Maybe (S ('D1 n)) +makeHot m x = do + hot <- M.lookup x m + let len = fromIntegral $ natVal (Proxy :: Proxy n) + if hot < len + then + fmap S1D . create $ runSTVector $ do + vec <- newVector 0 len + writeVector vec hot 1 + return vec + else Nothing + +unHot :: forall a n. (KnownNat n) + => Vector a -> (S ('D1 n)) -> Maybe a +unHot v (S1D xs) + = (V.!?) v + $ maxIndex (extract xs) + diff --git a/test/Test/Grenade/Layers/Convolution.hs b/test/Test/Grenade/Layers/Convolution.hs index 8ec9002..73ceffa 100644 --- a/test/Test/Grenade/Layers/Convolution.hs +++ b/test/Test/Grenade/Layers/Convolution.hs @@ -1,11 +1,10 @@ -{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE DataKinds #-} -{-# LANGUAGE GADTs #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE KindSignatures #-} -{-# LANGUAGE ConstraintKinds #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE TypeOperators #-} - {-# OPTIONS_GHC -fno-warn-missing-signatures #-} module Test.Grenade.Layers.Convolution where @@ -30,6 +29,17 @@ data OpaqueConvolution :: * where instance Show OpaqueConvolution where show (OpaqueConvolution n) = show n +genConvolution :: ( KnownNat channels + , KnownNat filters + , KnownNat kernelRows + , KnownNat kernelColumns + , KnownNat strideRows + , KnownNat strideColumns + , KnownNat kernelFlattened + , kernelFlattened ~ (kernelRows * kernelColumns * channels) + ) => Jack (Convolution channels filters kernelRows kernelColumns strideRows strideColumns) +genConvolution = Convolution <$> uniformSample <*> uniformSample + genOpaqueOpaqueConvolution :: Jack OpaqueConvolution genOpaqueOpaqueConvolution = do Just channels <- someNatVal <$> choose (1, 10) @@ -46,7 +56,7 @@ genOpaqueOpaqueConvolution = do p2 = natDict pkc p3 = natDict pch in case p1 %* p2 %* p3 of - Dict -> OpaqueConvolution <$> (Convolution <$> uniformSample <*> uniformSample :: Jack (Convolution ch fl kr kc sr sc)) + Dict -> OpaqueConvolution <$> (genConvolution :: Jack (Convolution ch fl kr kc sr sc)) prop_conv_net_witness = gamble genOpaqueOpaqueConvolution $ \onet -> @@ -80,9 +90,9 @@ prop_conv_net = , (unsafeCoerce (Dict :: Dict ()) :: Dict (((outRows - 1) * strideRows) ~ (inRows - kernelRows))) , (unsafeCoerce (Dict :: Dict ()) :: Dict (((outCols - 1) * strideCols) ~ (inCols - kernelCols)))) of (Dict, Dict, Dict, Dict) -> - gamble (S3D' <$> uniformSample) $ \(input :: S' ('D3 inRows inCols channels)) -> - let output :: S' ('D3 outRows outCols filters) = runForwards convLayer input - backed :: (Gradient (Convolution channels filters kernelRows kernelCols strideRows strideCols), S' ('D3 inRows inCols channels)) + gamble (S3D <$> uniformSample) $ \(input :: S ('D3 inRows inCols channels)) -> + let output :: S ('D3 outRows outCols filters) = runForwards convLayer input + backed :: (Gradient (Convolution channels filters kernelRows kernelCols strideRows strideCols), S ('D3 inRows inCols channels)) = runBackwards convLayer input output in backed `seq` True ) :: Property diff --git a/test/Test/Grenade/Layers/FullyConnected.hs b/test/Test/Grenade/Layers/FullyConnected.hs index 605230b..ff674f0 100644 --- a/test/Test/Grenade/Layers/FullyConnected.hs +++ b/test/Test/Grenade/Layers/FullyConnected.hs @@ -44,10 +44,10 @@ genOpaqueFullyConnected = do prop_fully_connected_forwards :: Property prop_fully_connected_forwards = gamble genOpaqueFullyConnected $ \(OpaqueFullyConnected (fclayer :: FullyConnected i o)) -> - gamble (S1D' <$> randomVector) $ \(input :: S' ('D1 i)) -> - let output :: S' ('D1 o) = runForwards fclayer input - backed :: (Gradient (FullyConnected i o), S' ('D1 i)) - = runBackwards fclayer input output + gamble (S1D <$> randomVector) $ \(input :: S ('D1 i)) -> + let output :: S ('D1 o) = runForwards fclayer input + backed :: (Gradient (FullyConnected i o), S ('D1 i)) + = runBackwards fclayer input output in backed `seq` True return [] diff --git a/test/Test/Grenade/Layers/Pooling.hs b/test/Test/Grenade/Layers/Pooling.hs index bc250c8..1c3bfbd 100644 --- a/test/Test/Grenade/Layers/Pooling.hs +++ b/test/Test/Grenade/Layers/Pooling.hs @@ -1,8 +1,8 @@ -{-# LANGUAGE TemplateHaskell #-} -{-# LANGUAGE DataKinds #-} -{-# LANGUAGE KindSignatures #-} -{-# LANGUAGE GADTs #-} -{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE ScopedTypeVariables #-} {-# OPTIONS_GHC -fno-warn-missing-signatures #-} module Test.Grenade.Layers.Pooling where diff --git a/test/Test/Grenade/Recurrent/Layers/LSTM.hs b/test/Test/Grenade/Recurrent/Layers/LSTM.hs new file mode 100644 index 0000000..3c5d33b --- /dev/null +++ b/test/Test/Grenade/Recurrent/Layers/LSTM.hs @@ -0,0 +1,101 @@ +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ConstraintKinds #-} +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RankNTypes #-} + +{-# OPTIONS_GHC -fno-warn-missing-signatures #-} +module Test.Grenade.Recurrent.Layers.LSTM where + +import Disorder.Jack + +import Data.Foldable ( toList ) +import Data.Singletons.TypeLits + +import Grenade +import Grenade.Recurrent + +import qualified Numeric.LinearAlgebra as H +import qualified Numeric.LinearAlgebra.Static as S + + +import qualified Test.Grenade.Recurrent.Layers.LSTM.Reference as Reference +import Test.Jack.Hmatrix + +genLSTM :: forall i o. (KnownNat i, KnownNat o) => Jack (LSTM i o) +genLSTM = do + let w = uniformSample + u = uniformSample + v = randomVector + + w0 = S.konst 0 + u0 = S.konst 0 + v0 = S.konst 0 + + LSTM <$> (LSTMWeights <$> w <*> u <*> v <*> w <*> u <*> v <*> w <*> u <*> v <*> w <*> v) + <*> pure (LSTMWeights w0 u0 v0 w0 u0 v0 w0 u0 v0 w0 v0) + +prop_lstm_reference_forwards = + gamble randomVector $ \(input :: S.R 3) -> + gamble randomVector $ \(cell :: S.R 2) -> + gamble genLSTM $ \(net@(LSTM lstmWeights _) :: LSTM 3 2) -> + let actual = runRecurrentForwards net (S1D cell) (S1D input) + in case actual of + ((S1D cellOut) :: S ('D1 2), (S1D output) :: S ('D1 2)) -> + let cellOut' = Reference.Vector . H.toList . S.extract $ cellOut + output' = Reference.Vector . H.toList . S.extract $ output + refNet = Reference.lstmToReference lstmWeights + refCell = Reference.Vector . H.toList . S.extract $ cell + refInput = Reference.Vector . H.toList . S.extract $ input + (refCO, refO) = Reference.runLSTM refNet refCell refInput + in toList refCO ~~~ toList cellOut' .&&. toList refO ~~~ toList output' + +prop_lstm_reference_backwards = + gamble randomVector $ \(input :: S.R 3) -> + gamble randomVector $ \(cell :: S.R 2) -> + gamble genLSTM $ \(net@(LSTM lstmWeights _) :: LSTM 3 2) -> + let actualBacks = runRecurrentBackwards net (S1D cell) (S1D input) (S1D (S.konst 1) :: S ('D1 2)) (S1D (S.konst 1) :: S ('D1 2)) + in case actualBacks of + (actualGradients, _, _) -> + let refNet = Reference.lstmToReference lstmWeights + refCell = Reference.Vector . H.toList . S.extract $ cell + refInput = Reference.Vector . H.toList . S.extract $ input + refGradients = Reference.runLSTMback refCell refInput refNet + in toList refGradients ~~~ toList (Reference.lstmToReference actualGradients) + +prop_lstm_reference_backwards_input = + gamble randomVector $ \(input :: S.R 3) -> + gamble randomVector $ \(cell :: S.R 2) -> + gamble genLSTM $ \(net@(LSTM lstmWeights _) :: LSTM 3 2) -> + let actualBacks = runRecurrentBackwards net (S1D cell) (S1D input) (S1D (S.konst 1) :: S ('D1 2)) (S1D (S.konst 1) :: S ('D1 2)) + in case actualBacks of + (_, _, S1D actualGradients) -> + let refNet = Reference.lstmToReference lstmWeights + refCell = Reference.Vector . H.toList . S.extract $ cell + refInput = Reference.Vector . H.toList . S.extract $ input + refGradients = Reference.runLSTMbackOnInput refCell refNet refInput + in toList refGradients ~~~ H.toList (S.extract actualGradients) + +prop_lstm_reference_backwards_cell = + gamble randomVector $ \(input :: S.R 3) -> + gamble randomVector $ \(cell :: S.R 2) -> + gamble genLSTM $ \(net@(LSTM lstmWeights _) :: LSTM 3 2) -> + let actualBacks = runRecurrentBackwards net (S1D cell) (S1D input) (S1D (S.konst 1) :: S ('D1 2)) (S1D (S.konst 1) :: S ('D1 2)) + in case actualBacks of + (_, S1D actualGradients, _) -> + let refNet = Reference.lstmToReference lstmWeights + refCell = Reference.Vector . H.toList . S.extract $ cell + refInput = Reference.Vector . H.toList . S.extract $ input + refGradients = Reference.runLSTMbackOnCell refInput refNet refCell + in toList refGradients ~~~ (H.toList . S.extract $ actualGradients) + + +(~~~) as bs = all (< 1e-8) (zipWith (-) as bs) +infix 4 ~~~ + +return [] +tests :: IO Bool +tests = $quickCheckAll diff --git a/test/Test/Grenade/Recurrent/Layers/LSTM/Reference.hs b/test/Test/Grenade/Recurrent/Layers/LSTM/Reference.hs new file mode 100644 index 0000000..f9373da --- /dev/null +++ b/test/Test/Grenade/Recurrent/Layers/LSTM/Reference.hs @@ -0,0 +1,149 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ConstraintKinds #-} +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE DeriveFoldable #-} +{-# LANGUAGE DeriveTraversable #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RankNTypes #-} + +{-# OPTIONS_GHC -fno-warn-missing-signatures #-} +module Test.Grenade.Recurrent.Layers.LSTM.Reference where + +import Data.Reflection +import Numeric.AD.Mode.Reverse +import Numeric.AD.Internal.Reverse ( Tape ) + +import qualified Grenade.Recurrent.Layers.LSTM as LSTM +import qualified Numeric.LinearAlgebra.Static as S +import qualified Numeric.LinearAlgebra as H + +-- +-- This module contains a set of list only versions of +-- an LSTM layer which can be used with the AD library. +-- +-- Using this, we can check to make sure that our fast +-- back propagation implementation is correct. +-- + +-- | List only matrix deriving functor +data Matrix a = Matrix { + matrixWeights :: [[a]] + } deriving (Functor, Foldable, Traversable, Eq, Show) + +-- | List only vector deriving functor +data Vector a = Vector { + vectorWeights :: [a] + } deriving (Functor, Foldable, Traversable, Eq, Show) + +-- | List only LSTM weights +data RefLSTM a = RefLSTM + { refLstmWf :: Matrix a -- Weight Forget (W_f) + , refLstmUf :: Matrix a -- Cell State Forget (U_f) + , refLstmBf :: Vector a -- Bias Forget (b_f) + , refLstmWi :: Matrix a -- Weight Input (W_i) + , refLstmUi :: Matrix a -- Cell State Input (U_i) + , refLstmBi :: Vector a -- Bias Input (b_i) + , refLstmWo :: Matrix a -- Weight Output (W_o) + , refLstmUo :: Matrix a -- Cell State Output (U_o) + , refLstmBo :: Vector a -- Bias Output (b_o) + , refLstmWc :: Matrix a -- Weight Cell (W_c) + , refLstmBc :: Vector a -- Bias Cell (b_c) + } deriving (Functor, Foldable, Traversable, Eq, Show) + +lstmToReference :: LSTM.LSTMWeights a b -> RefLSTM Double +lstmToReference LSTM.LSTMWeights {..} = + let refLstmWf = Matrix . H.toLists . S.extract $ lstmWf -- Weight Forget (W_f) + refLstmUf = Matrix . H.toLists . S.extract $ lstmUf -- Cell State Forget (U_f) + refLstmBf = Vector . H.toList . S.extract $ lstmBf -- Bias Forget (b_f) + refLstmWi = Matrix . H.toLists . S.extract $ lstmWi -- Weight Input (W_i) + refLstmUi = Matrix . H.toLists . S.extract $ lstmUi -- Cell State Input (U_i) + refLstmBi = Vector . H.toList . S.extract $ lstmBi -- Bias Input (b_i) + refLstmWo = Matrix . H.toLists . S.extract $ lstmWo -- Weight Output (W_o) + refLstmUo = Matrix . H.toLists . S.extract $ lstmUo -- Cell State Output (U_o) + refLstmBo = Vector . H.toList . S.extract $ lstmBo -- Bias Output (b_o) + refLstmWc = Matrix . H.toLists . S.extract $ lstmWc -- Weight Cell (W_c) + refLstmBc = Vector . H.toList . S.extract $ lstmBc -- Bias Cell (b_c) + in RefLSTM {..} + +runLSTM :: Floating a => RefLSTM a -> Vector a -> Vector a -> (Vector a, Vector a) +runLSTM RefLSTM {..} cell input = + let -- Forget state vector + f_t = sigmoid $ refLstmBf #+ refLstmWf #> input #+ refLstmUf #> cell + -- Input state vector + i_t = sigmoid $ refLstmBi #+ refLstmWi #> input #+ refLstmUi #> cell + -- Output state vector + o_t = sigmoid $ refLstmBo #+ refLstmWo #> input #+ refLstmUo #> cell + -- Cell input state vector + c_x = fmap tanh $ refLstmBc #+ refLstmWc #> input + -- Cell state + c_t = f_t #* cell #+ i_t #* c_x + -- Output (it's sometimes recommended to use tanh c_t) + h_t = o_t #* c_t + in (c_t, h_t) + +runLSTMback :: forall a. Floating a => Vector a -> Vector a -> RefLSTM a -> RefLSTM a +runLSTMback cell input = + grad f + where + f :: forall s. Reifies s Tape => RefLSTM (Reverse s a) -> Reverse s a + f net = + let cell' = fmap auto cell + input' = fmap auto input + (cells, forwarded) = runLSTM net cell' input' + in sum forwarded + sum cells + +runLSTMbackOnInput :: forall a. Floating a => Vector a -> RefLSTM a -> Vector a -> Vector a +runLSTMbackOnInput cell net = + grad f + where + f :: forall s. Reifies s Tape => Vector (Reverse s a) -> Reverse s a + f input = + let cell' = fmap auto cell + net' = fmap auto net + (cells, forwarded) = runLSTM net' cell' input + in sum forwarded + sum cells + +runLSTMbackOnCell :: forall a. Floating a => Vector a -> RefLSTM a -> Vector a -> Vector a +runLSTMbackOnCell input net = + grad f + where + f :: forall s. Reifies s Tape => Vector (Reverse s a) -> Reverse s a + f cell = + let input' = fmap auto input + net' = fmap auto net + (cells, forwarded) = runLSTM net' cell input' + in sum forwarded + sum cells + +-- | Helper to multiply a matrix by a vector +matMult :: Num a => Matrix a -> Vector a -> Vector a +matMult (Matrix m) (Vector v) = Vector result + where + lrs = map length m + l = length v + result = if all (== l) lrs + then map (\r -> sum $ zipWith (*) r v) m + else error $ "Matrix has rows of length " ++ show lrs ++ + " but vector is of length " ++ show l + +(#>) :: Num a => Matrix a -> Vector a -> Vector a +(#>) = matMult +infixr 8 #> + +(#+) :: Num a => Vector a -> Vector a -> Vector a +(#+) (Vector as) (Vector bs) = Vector $ zipWith (+) as bs +infixl 6 #+ + +(#-) :: Num a => Vector a -> Vector a -> Vector a +(#-) (Vector as) (Vector bs) = Vector $ zipWith (-) as bs +infixl 6 #- + +(#*) :: Num a => Vector a -> Vector a -> Vector a +(#*) (Vector as) (Vector bs) = Vector $ zipWith (*) as bs +infixl 7 #* + +sigmoid :: (Functor f, Floating a) => f a -> f a +sigmoid xs = (\x -> 1 / (1 + exp (-x))) <$> xs diff --git a/test/Test/Jack/Hmatrix.hs b/test/Test/Jack/Hmatrix.hs index 0346658..d2218d3 100644 --- a/test/Test/Jack/Hmatrix.hs +++ b/test/Test/Jack/Hmatrix.hs @@ -4,7 +4,6 @@ module Test.Jack.Hmatrix where -import Data.Proxy import Disorder.Jack import GHC.TypeLits @@ -12,9 +11,7 @@ import GHC.TypeLits import qualified Numeric.LinearAlgebra.Static as HStatic randomVector :: forall n. KnownNat n => Jack (HStatic.R n) -randomVector = HStatic.fromList <$> vectorOf (fromInteger (natVal (Proxy :: Proxy n))) sizedRealFrac +randomVector = (\s -> HStatic.randomVector s HStatic.Uniform * 2 - 1) <$> sizedNat uniformSample :: forall m n. (KnownNat m, KnownNat n) => Jack (HStatic.L m n) -uniformSample = HStatic.fromList - <$> vectorOf (fromInteger (natVal (Proxy :: Proxy m)) * fromInteger (natVal (Proxy :: Proxy n))) - sizedRealFrac +uniformSample = (\s -> HStatic.uniformSample s (-1) 1 ) <$> sizedNat diff --git a/test/test.hs b/test/test.hs index cd85300..ad7dbdd 100644 --- a/test/test.hs +++ b/test/test.hs @@ -1,12 +1,13 @@ import Disorder.Core.Main -import qualified Test.Grenade.Layers.Pooling as Test.Grenade.Layers.Pooling -import qualified Test.Grenade.Layers.Convolution as Test.Grenade.Layers.Convolution -import qualified Test.Grenade.Layers.FullyConnected as Test.Grenade.Layers.FullyConnected +import qualified Test.Grenade.Layers.Pooling +import qualified Test.Grenade.Layers.Convolution +import qualified Test.Grenade.Layers.FullyConnected -import qualified Test.Grenade.Layers.Internal.Convolution as Test.Grenade.Layers.Internal.Convolution -import qualified Test.Grenade.Layers.Internal.Pooling as Test.Grenade.Layers.Internal.Pooling +import qualified Test.Grenade.Layers.Internal.Convolution +import qualified Test.Grenade.Layers.Internal.Pooling +import qualified Test.Grenade.Recurrent.Layers.LSTM main :: IO () main = @@ -17,4 +18,6 @@ main = , Test.Grenade.Layers.Internal.Convolution.tests , Test.Grenade.Layers.Internal.Pooling.tests + + , Test.Grenade.Recurrent.Layers.LSTM.tests ]