diff --git a/Crypto/KDF/PBKDF2.hs b/Crypto/KDF/PBKDF2.hs index 2f5c183..16d17fd 100644 --- a/Crypto/KDF/PBKDF2.hs +++ b/Crypto/KDF/PBKDF2.hs @@ -8,17 +8,22 @@ -- Password Based Key Derivation Function 2 -- {-# LANGUAGE BangPatterns #-} +{-# LANGUAGE ForeignFunctionInterface #-} + module Crypto.KDF.PBKDF2 ( PRF , prfHMAC , Parameters(..) , generate + , fastPBKDF2_SHA1 + , fastPBKDF2_SHA256 ) where import Data.Word import Data.Bits import Foreign.Marshal.Alloc -import Foreign.Ptr (plusPtr) +import Foreign.Ptr (plusPtr, Ptr) +import Foreign.C.Types (CUInt(..), CInt(..), CSize(..)) import Crypto.Hash (HashAlgorithm) import qualified Crypto.MAC.HMAC as HMAC @@ -100,3 +105,48 @@ generate prf params password salt = c = fromIntegral ((w `shiftR` 8) .&. 0xff) d = fromIntegral (w .&. 0xff) {-# NOINLINE generate #-} + +fastPBKDF2_SHA1 :: (ByteArrayAccess password, ByteArrayAccess salt, ByteArray out) + => Parameters + -> password + -> salt + -> out +fastPBKDF2_SHA1 params password salt = + B.allocAndFreeze (outputLength params) $ \outPtr -> + B.withByteArray password $ \passPtr -> + B.withByteArray salt $ \saltPtr -> + c_cryptonite_fastpbkdf2_hmac_sha1 + passPtr (fromIntegral $ B.length password) + saltPtr (fromIntegral $ B.length salt) + (fromIntegral $ iterCounts params) + outPtr (fromIntegral $ outputLength params) + +fastPBKDF2_SHA256 :: (ByteArrayAccess password, ByteArrayAccess salt, ByteArray out) + => Parameters + -> password + -> salt + -> out +fastPBKDF2_SHA256 params password salt = + B.allocAndFreeze (outputLength params) $ \outPtr -> + B.withByteArray password $ \passPtr -> + B.withByteArray salt $ \saltPtr -> + c_cryptonite_fastpbkdf2_hmac_sha256 + passPtr (fromIntegral $ B.length password) + saltPtr (fromIntegral $ B.length salt) + (fromIntegral $ iterCounts params) + outPtr (fromIntegral $ outputLength params) + + +foreign import ccall unsafe "cryptonite_pbkdf2.h cryptonite_fastpbkdf2_hmac_sha1" + c_cryptonite_fastpbkdf2_hmac_sha1 :: Ptr Word8 -> CSize + -> Ptr Word8 -> CSize + -> CUInt + -> Ptr Word8 -> CSize + -> IO () + +foreign import ccall unsafe "cryptonite_pbkdf2.h cryptonite_fastpbkdf2_hmac_sha256" + c_cryptonite_fastpbkdf2_hmac_sha256 :: Ptr Word8 -> CSize + -> Ptr Word8 -> CSize + -> CUInt + -> Ptr Word8 -> CSize + -> IO () diff --git a/cbits/cryptonite_pbkdf2.c b/cbits/cryptonite_pbkdf2.c new file mode 100644 index 0000000..895c47a --- /dev/null +++ b/cbits/cryptonite_pbkdf2.c @@ -0,0 +1,356 @@ +/* + * fast-pbkdf2 - Optimal PBKDF2-HMAC calculation + * Written in 2015 by Joseph Birr-Pixton + * Ported to cryptonite in 2017 by Nicolas Di Prima + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to the + * public domain worldwide. This software is distributed without any + * warranty. + * + * You should have received a copy of the CC0 Public Domain Dedication + * along with this software. If not, see + * . + */ + +#include +#include + +#include "cryptonite_pbkdf2.h" +#include "cryptonite_bitfn.h" +#include "cryptonite_sha1.h" +#include "cryptonite_sha256.h" + +/* --- MSVC doesn't support C99 --- */ +#ifdef _MSC_VER +#define restrict +#define _Pragma __pragma +#endif + +/* --- Common useful things --- */ +#define MIN(a, b) ((a) > (b)) ? (b) : (a) + +static inline void write32_be(uint32_t n, uint8_t out[4]) +{ +#if defined(__GNUC__) && __GNUC__ >= 4 && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + *(uint32_t *)(out) = __builtin_bswap32(n); +#else + out[0] = (n >> 24) & 0xff; + out[1] = (n >> 16) & 0xff; + out[2] = (n >> 8) & 0xff; + out[3] = n & 0xff; +#endif +} + +static inline void write64_be(uint64_t n, uint8_t out[8]) +{ +#if defined(__GNUC__) && __GNUC__ >= 4 && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + *(uint64_t *)(out) = __builtin_bswap64(n); +#else + write32_be((n >> 32) & 0xffffffff, out); + write32_be(n & 0xffffffff, out + 4); +#endif +} + +/* --- Optional OpenMP parallelisation of consecutive blocks --- */ +#ifdef WITH_OPENMP +# define OPENMP_PARALLEL_FOR _Pragma("omp parallel for") +#else +# define OPENMP_PARALLEL_FOR +#endif + +/* Prepare block (of blocksz bytes) to contain md padding denoting a msg-size + * message (in bytes). block has a prefix of used bytes. + * + * Message length is expressed in 32 bits (so suitable for sha1, sha256, sha512). */ +static inline void md_pad(uint8_t *block, size_t blocksz, size_t used, size_t msg) +{ + memset(block + used, 0, blocksz - used - 4); + block[used] = 0x80; + block += blocksz - 4; + write32_be((uint32_t) (msg * 8), block); +} + +/* Internal function/type names for hash-specific things. */ +#define HMAC_CTX(_name) HMAC_ ## _name ## _ctx +#define HMAC_INIT(_name) HMAC_ ## _name ## _init +#define HMAC_UPDATE(_name) HMAC_ ## _name ## _update +#define HMAC_FINAL(_name) HMAC_ ## _name ## _final + +#define PBKDF2_F(_name) pbkdf2_f_ ## _name +#define PBKDF2(_name) pbkdf2_ ## _name + +/* This macro expands to decls for the whole implementation for a given + * hash function. Arguments are: + * + * _name like 'sha1', added to symbol names + * _blocksz block size, in bytes + * _hashsz digest output, in bytes + * _ctx hash context type + * _init hash context initialisation function + * args: (_ctx *c) + * _update hash context update function + * args: (_ctx *c, const void *data, size_t ndata) + * _final hash context finish function + * args: (void *out, _ctx *c) + * _xform hash context raw block update function + * args: (_ctx *c, const void *data) + * _xcpy hash context raw copy function (only need copy hash state) + * args: (_ctx * restrict out, const _ctx *restrict in) + * _xtract hash context state extraction + * args: args (_ctx *restrict c, uint8_t *restrict out) + * _xxor hash context xor function (only need xor hash state) + * args: (_ctx *restrict out, const _ctx *restrict in) + * + * The resulting function is named PBKDF2(_name). + */ +#define DECL_PBKDF2(_name, _blocksz, _hashsz, _ctx, \ + _init, _update, _xform, _final, _xcpy, _xtract, _xxor) \ + typedef struct { \ + _ctx inner; \ + _ctx outer; \ + } HMAC_CTX(_name); \ + \ + static inline void HMAC_INIT(_name)(HMAC_CTX(_name) *ctx, \ + const uint8_t *key, size_t nkey) \ + { \ + /* Prepare key: */ \ + uint8_t k[_blocksz]; \ + \ + /* Shorten long keys. */ \ + if (nkey > _blocksz) \ + { \ + _init(&ctx->inner); \ + _update(&ctx->inner, key, nkey); \ + _final(&ctx->inner, k); \ + \ + key = k; \ + nkey = _hashsz; \ + } \ + \ + /* Standard doesn't cover case where blocksz < hashsz. */ \ + assert(nkey <= _blocksz); \ + \ + /* Right zero-pad short keys. */ \ + if (k != key) \ + memcpy(k, key, nkey); \ + if (_blocksz > nkey) \ + memset(k + nkey, 0, _blocksz - nkey); \ + \ + /* Start inner hash computation */ \ + uint8_t blk_inner[_blocksz]; \ + uint8_t blk_outer[_blocksz]; \ + \ + for (size_t i = 0; i < _blocksz; i++) \ + { \ + blk_inner[i] = 0x36 ^ k[i]; \ + blk_outer[i] = 0x5c ^ k[i]; \ + } \ + \ + _init(&ctx->inner); \ + _update(&ctx->inner, blk_inner, sizeof blk_inner); \ + \ + /* And outer. */ \ + _init(&ctx->outer); \ + _update(&ctx->outer, blk_outer, sizeof blk_outer); \ + } \ + \ + static inline void HMAC_UPDATE(_name)(HMAC_CTX(_name) *ctx, \ + const void *data, size_t ndata) \ + { \ + _update(&ctx->inner, data, ndata); \ + } \ + \ + static inline void HMAC_FINAL(_name)(HMAC_CTX(_name) *ctx, \ + uint8_t out[_hashsz]) \ + { \ + _final(&ctx->inner, out); \ + _update(&ctx->outer, out, _hashsz); \ + _final(&ctx->outer, out); \ + } \ + \ + \ + /* --- PBKDF2 --- */ \ + static inline void PBKDF2_F(_name)(const HMAC_CTX(_name) *startctx, \ + uint32_t counter, \ + const uint8_t *salt, size_t nsalt, \ + uint32_t iterations, \ + uint8_t *out) \ + { \ + uint8_t countbuf[4]; \ + write32_be(counter, countbuf); \ + \ + /* Prepare loop-invariant padding block. */ \ + uint8_t Ublock[_blocksz]; \ + md_pad(Ublock, _blocksz, _hashsz, _blocksz + _hashsz); \ + \ + /* First iteration: \ + * U_1 = PRF(P, S || INT_32_BE(i)) \ + */ \ + HMAC_CTX(_name) ctx = *startctx; \ + HMAC_UPDATE(_name)(&ctx, salt, nsalt); \ + HMAC_UPDATE(_name)(&ctx, countbuf, sizeof countbuf); \ + HMAC_FINAL(_name)(&ctx, Ublock); \ + _ctx result = ctx.outer; \ + \ + /* Subsequent iterations: \ + * U_c = PRF(P, U_{c-1}) \ + */ \ + for (uint32_t i = 1; i < iterations; i++) \ + { \ + /* Complete inner hash with previous U */ \ + _xcpy(&ctx.inner, &startctx->inner); \ + _xform(&ctx.inner, Ublock); \ + _xtract(&ctx.inner, Ublock); \ + /* Complete outer hash with inner output */ \ + _xcpy(&ctx.outer, &startctx->outer); \ + _xform(&ctx.outer, Ublock); \ + _xtract(&ctx.outer, Ublock); \ + _xxor(&result, &ctx.outer); \ + } \ + \ + /* Reform result into output buffer. */ \ + _xtract(&result, out); \ + } \ + \ + static inline void PBKDF2(_name)(const uint8_t *pw, size_t npw, \ + const uint8_t *salt, size_t nsalt, \ + uint32_t iterations, \ + uint8_t *out, size_t nout) \ + { \ + assert(iterations); \ + assert(out && nout); \ + \ + /* Starting point for inner loop. */ \ + HMAC_CTX(_name) ctx; \ + HMAC_INIT(_name)(&ctx, pw, npw); \ + \ + /* How many blocks do we need? */ \ + uint32_t blocks_needed = (uint32_t)(nout + _hashsz - 1) / _hashsz; \ + \ + OPENMP_PARALLEL_FOR \ + for (uint32_t counter = 1; counter <= blocks_needed; counter++) \ + { \ + uint8_t block[_hashsz]; \ + PBKDF2_F(_name)(&ctx, counter, salt, nsalt, iterations, block); \ + \ + size_t offset = (counter - 1) * _hashsz; \ + size_t taken = MIN(nout - offset, _hashsz); \ + memcpy(out + offset, block, taken); \ + } \ + } + +static inline void sha1_extract(struct sha1_ctx *restrict ctx, uint8_t *restrict out) +{ + write32_be(ctx->h[0], out); + write32_be(ctx->h[1], out + 4); + write32_be(ctx->h[2], out + 8); + write32_be(ctx->h[3], out + 12); + write32_be(ctx->h[4], out + 16); +} + +static inline void sha1_cpy(struct sha1_ctx *restrict out, const struct sha1_ctx *restrict in) +{ + out->h[0] = in->h[0]; + out->h[1] = in->h[1]; + out->h[2] = in->h[2]; + out->h[3] = in->h[3]; + out->h[4] = in->h[4]; +} + +static inline void sha1_xor(struct sha1_ctx *restrict out, const struct sha1_ctx *restrict in) +{ + out->h[0] ^= in->h[0]; + out->h[1] ^= in->h[1]; + out->h[2] ^= in->h[2]; + out->h[3] ^= in->h[3]; + out->h[4] ^= in->h[4]; +} + +void cryptonite_sha1_transform(struct sha1_ctx* ctx, uint8_t block[SHA1_BLOCK_SIZE]) +{ + cryptonite_sha1_update(ctx, block, SHA1_BLOCK_SIZE); +} + +DECL_PBKDF2(sha1, + SHA1_BLOCK_SIZE, + SHA1_DIGEST_SIZE, + struct sha1_ctx, + cryptonite_sha1_init, + cryptonite_sha1_update, + cryptonite_sha1_transform, + cryptonite_sha1_finalize, + sha1_cpy, + sha1_extract, + sha1_xor); + +static inline void sha256_extract(struct sha256_ctx *restrict ctx, uint8_t *restrict out) +{ + write32_be(ctx->h[0], out); + write32_be(ctx->h[1], out + 4); + write32_be(ctx->h[2], out + 8); + write32_be(ctx->h[3], out + 12); + write32_be(ctx->h[4], out + 16); + write32_be(ctx->h[5], out + 20); + write32_be(ctx->h[6], out + 24); + write32_be(ctx->h[7], out + 28); +} + +static inline void sha256_cpy(struct sha256_ctx *restrict out, const struct sha256_ctx *restrict in) +{ + out->h[0] = in->h[0]; + out->h[1] = in->h[1]; + out->h[2] = in->h[2]; + out->h[3] = in->h[3]; + out->h[4] = in->h[4]; + out->h[5] = in->h[5]; + out->h[6] = in->h[6]; + out->h[7] = in->h[7]; +} + +static inline void sha256_xor(struct sha256_ctx *restrict out, const struct sha256_ctx *restrict in) +{ + out->h[0] ^= in->h[0]; + out->h[1] ^= in->h[1]; + out->h[2] ^= in->h[2]; + out->h[3] ^= in->h[3]; + out->h[4] ^= in->h[4]; + out->h[5] ^= in->h[5]; + out->h[6] ^= in->h[6]; + out->h[7] ^= in->h[7]; +} + +void cryptonite_sha256_transform(struct sha256_ctx* ctx, uint8_t block[SHA256_BLOCK_SIZE]) +{ + cryptonite_sha256_update(ctx, block, SHA256_BLOCK_SIZE); +} + +DECL_PBKDF2(sha256, + SHA256_BLOCK_SIZE, + SHA256_DIGEST_SIZE, + struct sha256_ctx, + cryptonite_sha256_init, + cryptonite_sha256_update, + cryptonite_sha256_transform, + cryptonite_sha256_finalize, + sha256_cpy, + sha256_extract, + sha256_xor); + +void cryptonite_fastpbkdf2_hmac_sha1( const uint8_t *pw, size_t npw + , const uint8_t *salt, size_t nsalt + , uint32_t iterations + , uint8_t *out, size_t nout + ) +{ + PBKDF2(sha1)(pw, npw, salt, nsalt, iterations, out, nout); +} + +void cryptonite_fastpbkdf2_hmac_sha256( const uint8_t *pw, size_t npw + , const uint8_t *salt, size_t nsalt + , uint32_t iterations + , uint8_t *out, size_t nout + ) +{ + PBKDF2(sha256)(pw, npw, salt, nsalt, iterations, out, nout); +} diff --git a/cbits/cryptonite_pbkdf2.h b/cbits/cryptonite_pbkdf2.h new file mode 100644 index 0000000..16f7d5a --- /dev/null +++ b/cbits/cryptonite_pbkdf2.h @@ -0,0 +1,26 @@ +#ifndef CRYPTONITE_PBKDF2_H_ +#define CRYPTONITE_PBKDF2_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +void cryptonite_fastpbkdf2_hmac_sha1( const uint8_t *pw, size_t npw + , const uint8_t *salt, size_t nsalt + , uint32_t iterations + , uint8_t *out, size_t nout + ); +void cryptonite_fastpbkdf2_hmac_sha256( const uint8_t *pw, size_t npw + , const uint8_t *salt, size_t nsalt + , uint32_t iterations + , uint8_t *out, size_t nout + ); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/cbits/cryptonite_sha1.h b/cbits/cryptonite_sha1.h index 5457a7d..ee3f202 100644 --- a/cbits/cryptonite_sha1.h +++ b/cbits/cryptonite_sha1.h @@ -26,10 +26,12 @@ #include +# define SHA1_BLOCK_SIZE 64 + struct sha1_ctx { uint64_t sz; - uint8_t buf[64]; + uint8_t buf[SHA1_BLOCK_SIZE]; uint32_t h[5]; }; diff --git a/cbits/cryptonite_sha256.h b/cbits/cryptonite_sha256.h index 8a20ff6..705ff9a 100644 --- a/cbits/cryptonite_sha256.h +++ b/cbits/cryptonite_sha256.h @@ -27,6 +27,8 @@ #include +#define SHA256_BLOCK_SIZE 64 + struct sha256_ctx { uint64_t sz; diff --git a/cryptonite.cabal b/cryptonite.cabal index 5c0b2f4..cf1cb78 100644 --- a/cryptonite.cabal +++ b/cryptonite.cabal @@ -240,6 +240,7 @@ Library , cbits/cryptonite_tiger.c , cbits/cryptonite_whirlpool.c , cbits/cryptonite_scrypt.c + , cbits/cryptonite_pbkdf2.c include-dirs: cbits cbits/ed25519 if arch(x86_64) diff --git a/tests/KAT_PBKDF2.hs b/tests/KAT_PBKDF2.hs index 63e5085..c72e3e5 100644 --- a/tests/KAT_PBKDF2.hs +++ b/tests/KAT_PBKDF2.hs @@ -44,12 +44,22 @@ vectors_hmac_sha256 = tests = testGroup "PBKDF2" [ testGroup "KATs-HMAC-SHA1" (katTests (PBKDF2.prfHMAC SHA1) vectors_hmac_sha1) + , testGroup "KATs-HMAC-SHA1 (fast)" (katTestFastPBKDF2_SHA1 vectors_hmac_sha1) , testGroup "KATs-HMAC-SHA256" (katTests (PBKDF2.prfHMAC SHA256) vectors_hmac_sha256) + , testGroup "KATs-HMAC-SHA256 (fast)" (katTestFastPBKDF2_SHA256 vectors_hmac_sha256) ] where katTests prf vects = map (toKatTest prf) $ zip is vects toKatTest prf (i, ((pass, salt, iter, dkLen), output)) = testCase (show i) (output @=? PBKDF2.generate prf (PBKDF2.Parameters iter dkLen) pass salt) + katTestFastPBKDF2_SHA1 = map toKatTestFastPBKDF2_SHA1 . zip is + toKatTestFastPBKDF2_SHA1 (i, ((pass, salt, iter, dkLen), output)) = + testCase (show i) (output @=? PBKDF2.fastPBKDF2_SHA1 (PBKDF2.Parameters iter dkLen) pass salt) + + katTestFastPBKDF2_SHA256 = map toKatTestFastPBKDF2_SHA256 . zip is + toKatTestFastPBKDF2_SHA256 (i, ((pass, salt, iter, dkLen), output)) = + testCase (show i) (output @=? PBKDF2.fastPBKDF2_SHA256 (PBKDF2.Parameters iter dkLen) pass salt) + is :: [Int] is = [1..]