Port Fast PBKDF2 for sha1 and sha256

This commit is contained in:
Nicolas DI PRIMA 2017-02-03 21:48:54 +00:00
parent e76bbaa8a7
commit 4189aa9389
7 changed files with 449 additions and 2 deletions

View File

@ -8,17 +8,22 @@
-- Password Based Key Derivation Function 2
--
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ForeignFunctionInterface #-}
module Crypto.KDF.PBKDF2
( PRF
, prfHMAC
, Parameters(..)
, generate
, fastPBKDF2_SHA1
, fastPBKDF2_SHA256
) where
import Data.Word
import Data.Bits
import Foreign.Marshal.Alloc
import Foreign.Ptr (plusPtr)
import Foreign.Ptr (plusPtr, Ptr)
import Foreign.C.Types (CUInt(..), CInt(..), CSize(..))
import Crypto.Hash (HashAlgorithm)
import qualified Crypto.MAC.HMAC as HMAC
@ -100,3 +105,48 @@ generate prf params password salt =
c = fromIntegral ((w `shiftR` 8) .&. 0xff)
d = fromIntegral (w .&. 0xff)
{-# NOINLINE generate #-}
fastPBKDF2_SHA1 :: (ByteArrayAccess password, ByteArrayAccess salt, ByteArray out)
=> Parameters
-> password
-> salt
-> out
fastPBKDF2_SHA1 params password salt =
B.allocAndFreeze (outputLength params) $ \outPtr ->
B.withByteArray password $ \passPtr ->
B.withByteArray salt $ \saltPtr ->
c_cryptonite_fastpbkdf2_hmac_sha1
passPtr (fromIntegral $ B.length password)
saltPtr (fromIntegral $ B.length salt)
(fromIntegral $ iterCounts params)
outPtr (fromIntegral $ outputLength params)
fastPBKDF2_SHA256 :: (ByteArrayAccess password, ByteArrayAccess salt, ByteArray out)
=> Parameters
-> password
-> salt
-> out
fastPBKDF2_SHA256 params password salt =
B.allocAndFreeze (outputLength params) $ \outPtr ->
B.withByteArray password $ \passPtr ->
B.withByteArray salt $ \saltPtr ->
c_cryptonite_fastpbkdf2_hmac_sha256
passPtr (fromIntegral $ B.length password)
saltPtr (fromIntegral $ B.length salt)
(fromIntegral $ iterCounts params)
outPtr (fromIntegral $ outputLength params)
foreign import ccall unsafe "cryptonite_pbkdf2.h cryptonite_fastpbkdf2_hmac_sha1"
c_cryptonite_fastpbkdf2_hmac_sha1 :: Ptr Word8 -> CSize
-> Ptr Word8 -> CSize
-> CUInt
-> Ptr Word8 -> CSize
-> IO ()
foreign import ccall unsafe "cryptonite_pbkdf2.h cryptonite_fastpbkdf2_hmac_sha256"
c_cryptonite_fastpbkdf2_hmac_sha256 :: Ptr Word8 -> CSize
-> Ptr Word8 -> CSize
-> CUInt
-> Ptr Word8 -> CSize
-> IO ()

356
cbits/cryptonite_pbkdf2.c Normal file
View File

@ -0,0 +1,356 @@
/*
* fast-pbkdf2 - Optimal PBKDF2-HMAC calculation
* Written in 2015 by Joseph Birr-Pixton <jpixton@gmail.com>
* Ported to cryptonite in 2017 by Nicolas Di Prima <nicolas@primetype.co.uk>
*
* To the extent possible under law, the author(s) have dedicated all
* copyright and related and neighboring rights to this software to the
* public domain worldwide. This software is distributed without any
* warranty.
*
* You should have received a copy of the CC0 Public Domain Dedication
* along with this software. If not, see
* <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include <assert.h>
#include <string.h>
#include "cryptonite_pbkdf2.h"
#include "cryptonite_bitfn.h"
#include "cryptonite_sha1.h"
#include "cryptonite_sha256.h"
/* --- MSVC doesn't support C99 --- */
#ifdef _MSC_VER
#define restrict
#define _Pragma __pragma
#endif
/* --- Common useful things --- */
#define MIN(a, b) ((a) > (b)) ? (b) : (a)
static inline void write32_be(uint32_t n, uint8_t out[4])
{
#if defined(__GNUC__) && __GNUC__ >= 4 && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
*(uint32_t *)(out) = __builtin_bswap32(n);
#else
out[0] = (n >> 24) & 0xff;
out[1] = (n >> 16) & 0xff;
out[2] = (n >> 8) & 0xff;
out[3] = n & 0xff;
#endif
}
static inline void write64_be(uint64_t n, uint8_t out[8])
{
#if defined(__GNUC__) && __GNUC__ >= 4 && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
*(uint64_t *)(out) = __builtin_bswap64(n);
#else
write32_be((n >> 32) & 0xffffffff, out);
write32_be(n & 0xffffffff, out + 4);
#endif
}
/* --- Optional OpenMP parallelisation of consecutive blocks --- */
#ifdef WITH_OPENMP
# define OPENMP_PARALLEL_FOR _Pragma("omp parallel for")
#else
# define OPENMP_PARALLEL_FOR
#endif
/* Prepare block (of blocksz bytes) to contain md padding denoting a msg-size
* message (in bytes). block has a prefix of used bytes.
*
* Message length is expressed in 32 bits (so suitable for sha1, sha256, sha512). */
static inline void md_pad(uint8_t *block, size_t blocksz, size_t used, size_t msg)
{
memset(block + used, 0, blocksz - used - 4);
block[used] = 0x80;
block += blocksz - 4;
write32_be((uint32_t) (msg * 8), block);
}
/* Internal function/type names for hash-specific things. */
#define HMAC_CTX(_name) HMAC_ ## _name ## _ctx
#define HMAC_INIT(_name) HMAC_ ## _name ## _init
#define HMAC_UPDATE(_name) HMAC_ ## _name ## _update
#define HMAC_FINAL(_name) HMAC_ ## _name ## _final
#define PBKDF2_F(_name) pbkdf2_f_ ## _name
#define PBKDF2(_name) pbkdf2_ ## _name
/* This macro expands to decls for the whole implementation for a given
* hash function. Arguments are:
*
* _name like 'sha1', added to symbol names
* _blocksz block size, in bytes
* _hashsz digest output, in bytes
* _ctx hash context type
* _init hash context initialisation function
* args: (_ctx *c)
* _update hash context update function
* args: (_ctx *c, const void *data, size_t ndata)
* _final hash context finish function
* args: (void *out, _ctx *c)
* _xform hash context raw block update function
* args: (_ctx *c, const void *data)
* _xcpy hash context raw copy function (only need copy hash state)
* args: (_ctx * restrict out, const _ctx *restrict in)
* _xtract hash context state extraction
* args: args (_ctx *restrict c, uint8_t *restrict out)
* _xxor hash context xor function (only need xor hash state)
* args: (_ctx *restrict out, const _ctx *restrict in)
*
* The resulting function is named PBKDF2(_name).
*/
#define DECL_PBKDF2(_name, _blocksz, _hashsz, _ctx, \
_init, _update, _xform, _final, _xcpy, _xtract, _xxor) \
typedef struct { \
_ctx inner; \
_ctx outer; \
} HMAC_CTX(_name); \
\
static inline void HMAC_INIT(_name)(HMAC_CTX(_name) *ctx, \
const uint8_t *key, size_t nkey) \
{ \
/* Prepare key: */ \
uint8_t k[_blocksz]; \
\
/* Shorten long keys. */ \
if (nkey > _blocksz) \
{ \
_init(&ctx->inner); \
_update(&ctx->inner, key, nkey); \
_final(&ctx->inner, k); \
\
key = k; \
nkey = _hashsz; \
} \
\
/* Standard doesn't cover case where blocksz < hashsz. */ \
assert(nkey <= _blocksz); \
\
/* Right zero-pad short keys. */ \
if (k != key) \
memcpy(k, key, nkey); \
if (_blocksz > nkey) \
memset(k + nkey, 0, _blocksz - nkey); \
\
/* Start inner hash computation */ \
uint8_t blk_inner[_blocksz]; \
uint8_t blk_outer[_blocksz]; \
\
for (size_t i = 0; i < _blocksz; i++) \
{ \
blk_inner[i] = 0x36 ^ k[i]; \
blk_outer[i] = 0x5c ^ k[i]; \
} \
\
_init(&ctx->inner); \
_update(&ctx->inner, blk_inner, sizeof blk_inner); \
\
/* And outer. */ \
_init(&ctx->outer); \
_update(&ctx->outer, blk_outer, sizeof blk_outer); \
} \
\
static inline void HMAC_UPDATE(_name)(HMAC_CTX(_name) *ctx, \
const void *data, size_t ndata) \
{ \
_update(&ctx->inner, data, ndata); \
} \
\
static inline void HMAC_FINAL(_name)(HMAC_CTX(_name) *ctx, \
uint8_t out[_hashsz]) \
{ \
_final(&ctx->inner, out); \
_update(&ctx->outer, out, _hashsz); \
_final(&ctx->outer, out); \
} \
\
\
/* --- PBKDF2 --- */ \
static inline void PBKDF2_F(_name)(const HMAC_CTX(_name) *startctx, \
uint32_t counter, \
const uint8_t *salt, size_t nsalt, \
uint32_t iterations, \
uint8_t *out) \
{ \
uint8_t countbuf[4]; \
write32_be(counter, countbuf); \
\
/* Prepare loop-invariant padding block. */ \
uint8_t Ublock[_blocksz]; \
md_pad(Ublock, _blocksz, _hashsz, _blocksz + _hashsz); \
\
/* First iteration: \
* U_1 = PRF(P, S || INT_32_BE(i)) \
*/ \
HMAC_CTX(_name) ctx = *startctx; \
HMAC_UPDATE(_name)(&ctx, salt, nsalt); \
HMAC_UPDATE(_name)(&ctx, countbuf, sizeof countbuf); \
HMAC_FINAL(_name)(&ctx, Ublock); \
_ctx result = ctx.outer; \
\
/* Subsequent iterations: \
* U_c = PRF(P, U_{c-1}) \
*/ \
for (uint32_t i = 1; i < iterations; i++) \
{ \
/* Complete inner hash with previous U */ \
_xcpy(&ctx.inner, &startctx->inner); \
_xform(&ctx.inner, Ublock); \
_xtract(&ctx.inner, Ublock); \
/* Complete outer hash with inner output */ \
_xcpy(&ctx.outer, &startctx->outer); \
_xform(&ctx.outer, Ublock); \
_xtract(&ctx.outer, Ublock); \
_xxor(&result, &ctx.outer); \
} \
\
/* Reform result into output buffer. */ \
_xtract(&result, out); \
} \
\
static inline void PBKDF2(_name)(const uint8_t *pw, size_t npw, \
const uint8_t *salt, size_t nsalt, \
uint32_t iterations, \
uint8_t *out, size_t nout) \
{ \
assert(iterations); \
assert(out && nout); \
\
/* Starting point for inner loop. */ \
HMAC_CTX(_name) ctx; \
HMAC_INIT(_name)(&ctx, pw, npw); \
\
/* How many blocks do we need? */ \
uint32_t blocks_needed = (uint32_t)(nout + _hashsz - 1) / _hashsz; \
\
OPENMP_PARALLEL_FOR \
for (uint32_t counter = 1; counter <= blocks_needed; counter++) \
{ \
uint8_t block[_hashsz]; \
PBKDF2_F(_name)(&ctx, counter, salt, nsalt, iterations, block); \
\
size_t offset = (counter - 1) * _hashsz; \
size_t taken = MIN(nout - offset, _hashsz); \
memcpy(out + offset, block, taken); \
} \
}
static inline void sha1_extract(struct sha1_ctx *restrict ctx, uint8_t *restrict out)
{
write32_be(ctx->h[0], out);
write32_be(ctx->h[1], out + 4);
write32_be(ctx->h[2], out + 8);
write32_be(ctx->h[3], out + 12);
write32_be(ctx->h[4], out + 16);
}
static inline void sha1_cpy(struct sha1_ctx *restrict out, const struct sha1_ctx *restrict in)
{
out->h[0] = in->h[0];
out->h[1] = in->h[1];
out->h[2] = in->h[2];
out->h[3] = in->h[3];
out->h[4] = in->h[4];
}
static inline void sha1_xor(struct sha1_ctx *restrict out, const struct sha1_ctx *restrict in)
{
out->h[0] ^= in->h[0];
out->h[1] ^= in->h[1];
out->h[2] ^= in->h[2];
out->h[3] ^= in->h[3];
out->h[4] ^= in->h[4];
}
void cryptonite_sha1_transform(struct sha1_ctx* ctx, uint8_t block[SHA1_BLOCK_SIZE])
{
cryptonite_sha1_update(ctx, block, SHA1_BLOCK_SIZE);
}
DECL_PBKDF2(sha1,
SHA1_BLOCK_SIZE,
SHA1_DIGEST_SIZE,
struct sha1_ctx,
cryptonite_sha1_init,
cryptonite_sha1_update,
cryptonite_sha1_transform,
cryptonite_sha1_finalize,
sha1_cpy,
sha1_extract,
sha1_xor);
static inline void sha256_extract(struct sha256_ctx *restrict ctx, uint8_t *restrict out)
{
write32_be(ctx->h[0], out);
write32_be(ctx->h[1], out + 4);
write32_be(ctx->h[2], out + 8);
write32_be(ctx->h[3], out + 12);
write32_be(ctx->h[4], out + 16);
write32_be(ctx->h[5], out + 20);
write32_be(ctx->h[6], out + 24);
write32_be(ctx->h[7], out + 28);
}
static inline void sha256_cpy(struct sha256_ctx *restrict out, const struct sha256_ctx *restrict in)
{
out->h[0] = in->h[0];
out->h[1] = in->h[1];
out->h[2] = in->h[2];
out->h[3] = in->h[3];
out->h[4] = in->h[4];
out->h[5] = in->h[5];
out->h[6] = in->h[6];
out->h[7] = in->h[7];
}
static inline void sha256_xor(struct sha256_ctx *restrict out, const struct sha256_ctx *restrict in)
{
out->h[0] ^= in->h[0];
out->h[1] ^= in->h[1];
out->h[2] ^= in->h[2];
out->h[3] ^= in->h[3];
out->h[4] ^= in->h[4];
out->h[5] ^= in->h[5];
out->h[6] ^= in->h[6];
out->h[7] ^= in->h[7];
}
void cryptonite_sha256_transform(struct sha256_ctx* ctx, uint8_t block[SHA256_BLOCK_SIZE])
{
cryptonite_sha256_update(ctx, block, SHA256_BLOCK_SIZE);
}
DECL_PBKDF2(sha256,
SHA256_BLOCK_SIZE,
SHA256_DIGEST_SIZE,
struct sha256_ctx,
cryptonite_sha256_init,
cryptonite_sha256_update,
cryptonite_sha256_transform,
cryptonite_sha256_finalize,
sha256_cpy,
sha256_extract,
sha256_xor);
void cryptonite_fastpbkdf2_hmac_sha1( const uint8_t *pw, size_t npw
, const uint8_t *salt, size_t nsalt
, uint32_t iterations
, uint8_t *out, size_t nout
)
{
PBKDF2(sha1)(pw, npw, salt, nsalt, iterations, out, nout);
}
void cryptonite_fastpbkdf2_hmac_sha256( const uint8_t *pw, size_t npw
, const uint8_t *salt, size_t nsalt
, uint32_t iterations
, uint8_t *out, size_t nout
)
{
PBKDF2(sha256)(pw, npw, salt, nsalt, iterations, out, nout);
}

26
cbits/cryptonite_pbkdf2.h Normal file
View File

@ -0,0 +1,26 @@
#ifndef CRYPTONITE_PBKDF2_H_
#define CRYPTONITE_PBKDF2_H_
#include <stdlib.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
void cryptonite_fastpbkdf2_hmac_sha1( const uint8_t *pw, size_t npw
, const uint8_t *salt, size_t nsalt
, uint32_t iterations
, uint8_t *out, size_t nout
);
void cryptonite_fastpbkdf2_hmac_sha256( const uint8_t *pw, size_t npw
, const uint8_t *salt, size_t nsalt
, uint32_t iterations
, uint8_t *out, size_t nout
);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -26,10 +26,12 @@
#include <stdint.h>
# define SHA1_BLOCK_SIZE 64
struct sha1_ctx
{
uint64_t sz;
uint8_t buf[64];
uint8_t buf[SHA1_BLOCK_SIZE];
uint32_t h[5];
};

View File

@ -27,6 +27,8 @@
#include <stdint.h>
#define SHA256_BLOCK_SIZE 64
struct sha256_ctx
{
uint64_t sz;

View File

@ -240,6 +240,7 @@ Library
, cbits/cryptonite_tiger.c
, cbits/cryptonite_whirlpool.c
, cbits/cryptonite_scrypt.c
, cbits/cryptonite_pbkdf2.c
include-dirs: cbits cbits/ed25519
if arch(x86_64)

View File

@ -44,12 +44,22 @@ vectors_hmac_sha256 =
tests = testGroup "PBKDF2"
[ testGroup "KATs-HMAC-SHA1" (katTests (PBKDF2.prfHMAC SHA1) vectors_hmac_sha1)
, testGroup "KATs-HMAC-SHA1 (fast)" (katTestFastPBKDF2_SHA1 vectors_hmac_sha1)
, testGroup "KATs-HMAC-SHA256" (katTests (PBKDF2.prfHMAC SHA256) vectors_hmac_sha256)
, testGroup "KATs-HMAC-SHA256 (fast)" (katTestFastPBKDF2_SHA256 vectors_hmac_sha256)
]
where katTests prf vects = map (toKatTest prf) $ zip is vects
toKatTest prf (i, ((pass, salt, iter, dkLen), output)) =
testCase (show i) (output @=? PBKDF2.generate prf (PBKDF2.Parameters iter dkLen) pass salt)
katTestFastPBKDF2_SHA1 = map toKatTestFastPBKDF2_SHA1 . zip is
toKatTestFastPBKDF2_SHA1 (i, ((pass, salt, iter, dkLen), output)) =
testCase (show i) (output @=? PBKDF2.fastPBKDF2_SHA1 (PBKDF2.Parameters iter dkLen) pass salt)
katTestFastPBKDF2_SHA256 = map toKatTestFastPBKDF2_SHA256 . zip is
toKatTestFastPBKDF2_SHA256 (i, ((pass, salt, iter, dkLen), output)) =
testCase (show i) (output @=? PBKDF2.fastPBKDF2_SHA256 (PBKDF2.Parameters iter dkLen) pass salt)
is :: [Int]
is = [1..]