Base16/Base32 decoding, props/tests

This commit is contained in:
Richard Marko 2020-03-04 10:41:04 +01:00
parent 04cea994f1
commit d82ba9f3a7
7 changed files with 200 additions and 51 deletions

View File

@ -19,6 +19,7 @@ cabal-version: >=1.10
library
exposed-modules: System.Nix.Base32
, System.Nix.Hash
, System.Nix.Internal.Base32
, System.Nix.Internal.Hash
, System.Nix.Internal.Signature
, System.Nix.Internal.StorePath
@ -63,6 +64,7 @@ test-suite format-tests
type: exitcode-stdio-1.0
main-is: Driver.hs
other-modules:
Arbitrary
NarFormat
Hash
hs-source-dirs:
@ -70,6 +72,7 @@ test-suite format-tests
build-depends:
hnix-store-core
, base
, base16-bytestring
, base64-bytestring
, binary
, bytestring

View File

@ -1,39 +1,9 @@
{-|
Description: Implementation of Nix's base32 encoding.
-}
module System.Nix.Base32 where
module System.Nix.Base32 (
encode
, decode
) where
import qualified Data.ByteString as BS
import qualified Data.Text as T
import qualified Data.Vector as V
-- | Encode a 'BS.ByteString' in Nix's base32 encoding
encode :: BS.ByteString -> T.Text
encode c = T.pack $ map char32 [nChar - 1, nChar - 2 .. 0]
where
digits32 = V.fromList "0123456789abcdfghijklmnpqrsvwxyz"
-- Each base32 character gives us 5 bits of information, while
-- each byte gives is 8. Because 'div' rounds down, we need to add
-- one extra character to the result, and because of that extra 1
-- we need to subtract one from the number of bits in the
-- bytestring to cover for the case where the number of bits is
-- already a factor of 5. Thus, the + 1 outside of the 'div' and
-- the - 1 inside of it.
nChar = fromIntegral $ ((BS.length c * 8 - 1) `div` 5) + 1
byte = BS.index c . fromIntegral
-- May need to switch to a more efficient calculation at some
-- point.
bAsInteger :: Integer
bAsInteger = sum [fromIntegral (byte j) * (256 ^ j)
| j <- [0 .. BS.length c - 1]
]
char32 :: Integer -> Char
char32 i = digits32 V.! digitInd
where
digitInd = fromIntegral $
bAsInteger
`div` (32^i)
`mod` 32
import System.Nix.Internal.Base32

View File

@ -12,7 +12,9 @@ module System.Nix.Hash (
, HNix.hashLazy
, HNix.encodeBase32
, HNix.decodeBase32
, HNix.encodeBase16
, HNix.decodeBase16
) where
import qualified System.Nix.Internal.Hash as HNix

View File

@ -0,0 +1,82 @@
module System.Nix.Internal.Base32 where
import Data.Bits (shiftR)
import Data.Char (chr, ord)
import Data.Word (Word8)
import Data.List (unfoldr)
import Data.Maybe (isJust, catMaybes)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BSC
import qualified Data.Text as T
import qualified Data.Vector as V
import Numeric (readInt)
-- omitted: E O U T
digits32 = V.fromList "0123456789abcdfghijklmnpqrsvwxyz"
-- | Encode a 'BS.ByteString' in Nix's base32 encoding
encode :: BS.ByteString -> T.Text
encode c = T.pack $ map char32 [nChar - 1, nChar - 2 .. 0]
where
-- Each base32 character gives us 5 bits of information, while
-- each byte gives is 8. Because 'div' rounds down, we need to add
-- one extra character to the result, and because of that extra 1
-- we need to subtract one from the number of bits in the
-- bytestring to cover for the case where the number of bits is
-- already a factor of 5. Thus, the + 1 outside of the 'div' and
-- the - 1 inside of it.
nChar = fromIntegral $ ((BS.length c * 8 - 1) `div` 5) + 1
byte = BS.index c . fromIntegral
-- May need to switch to a more efficient calculation at some
-- point.
bAsInteger :: Integer
bAsInteger = sum [fromIntegral (byte j) * (256 ^ j)
| j <- [0 .. BS.length c - 1]
]
char32 :: Integer -> Char
char32 i = digits32 V.! digitInd
where
digitInd = fromIntegral $
bAsInteger
`div` (32^i)
`mod` 32
-- | Decode Nix's base32 encoded text
decode :: T.Text -> Either String BS.ByteString
decode what = case T.all (flip elem digits32) what of
True -> unsafeDecode what
False -> Left "Invalid base32 string"
-- | Decode Nix's base32 encoded text
-- Doesn't check if all elements match `digits32`
unsafeDecode :: T.Text -> Either String BS.ByteString
unsafeDecode what =
case readInt 32
(flip elem digits32)
(\c -> maybe (error "character not in digits32") id $
V.findIndex (==c) digits32)
(T.unpack what)
of
[(i, _)] -> Right $ padded $ integerToBS i
x -> Left $ "Can't decode: readInt returned " ++ show x
where
padded x | BS.length x < decLen = x `BS.append`
(BSC.pack $ take (decLen - BS.length x) (cycle "\NUL"))
padded x | otherwise = x
decLen = T.length what * 5 `div` 8
-- | Encode an Integer to a bytestring
-- Similar to Data.Base32String (integerToBS) without `reverse`
integerToBS :: Integer -> BS.ByteString
integerToBS 0 = BS.pack [0]
integerToBS i
| i > 0 = BS.pack $ unfoldr f i
| otherwise = error "integerToBS not defined for negative values"
where
f 0 = Nothing
f x = Just (fromInteger x :: Word8, x `shiftR` 8)

View File

@ -99,10 +99,20 @@ hashLazy bsl =
encodeBase32 :: Digest a -> T.Text
encodeBase32 (Digest bs) = Base32.encode bs
-- | Decode a 'Digest' in the special Nix base-32 encoding.
decodeBase32 :: T.Text -> Either String (Digest a)
decodeBase32 t = Digest <$> Base32.decode t
-- | Encode a 'Digest' in hex.
encodeBase16 :: Digest a -> T.Text
encodeBase16 (Digest bs) = T.decodeUtf8 (Base16.encode bs)
-- | Decode a 'Digest' in hex
decodeBase16 :: T.Text -> Either String (Digest a)
decodeBase16 t = case Base16.decode (T.encodeUtf8 t) of
(x, "") -> Right $ Digest x
_ -> Left $ "Unable to decode base16 string " ++ T.unpack t
-- | Uses "Crypto.Hash.MD5" from cryptohash-md5.
instance ValidAlgo 'MD5 where
type AlgoCtx 'MD5 = MD5.Ctx

View File

@ -0,0 +1,37 @@
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE DataKinds #-}
module Arbitrary where
import Control.Monad (replicateM)
import qualified Data.ByteString.Char8 as BSC
import qualified Data.Text as T
import Test.Tasty.QuickCheck
import System.Nix.Hash
import System.Nix.Internal.Hash
import System.Nix.StorePath
import System.Nix.Internal.StorePath
genSafeChar :: Gen Char
genSafeChar = choose ('\1', '\127') -- ASCII without \NUL
nonEmptyString :: Gen String
nonEmptyString = listOf1 genSafeChar
dir = ('/':) <$> (listOf1 $ elements $ ('/':['a'..'z']))
instance Arbitrary StorePathName where
arbitrary = StorePathName . T.pack
<$> ((:) <$> s1 <*> listOf sn)
where
alphanum = ['a'..'z'] ++ ['A'..'Z'] ++ ['0'..'9']
s1 = elements $ alphanum ++ "+-_?="
sn = elements $ alphanum ++ "+-._?="
instance Arbitrary (Digest StorePathHashAlgo) where
arbitrary = hash . BSC.pack <$> arbitrary
instance Arbitrary (Digest SHA256) where
arbitrary = hash . BSC.pack <$> arbitrary

View File

@ -5,26 +5,20 @@
module Hash where
import Control.Monad.IO.Class (liftIO)
import Control.Exception (bracket)
import qualified Data.ByteString as BS
import Control.Monad (forM_)
import qualified Data.ByteString.Char8 as BSC
import qualified Data.ByteString.Base16 as B16
import qualified Data.ByteString.Base64.Lazy as B64
import qualified Data.ByteString.Lazy as BSL
import Data.Monoid ((<>))
import qualified Data.Text as T
import System.Directory (removeFile)
import System.IO.Temp (withSystemTempFile, writeSystemTempFile)
import qualified System.IO as IO -- (hGetContents, hPutStr, openFile)
import qualified System.Process as P
import Test.Tasty as T
import Test.Tasty.Hspec
import qualified Test.Tasty.HUnit as HU
import Test.Tasty.QuickCheck
import Text.Read (readMaybe)
import Test.Tasty.Hspec
import Test.Tasty.QuickCheck
import System.Nix.Base32
import System.Nix.Hash
import System.Nix.Internal.Hash
import System.Nix.StorePath
import NarFormat -- TODO: Move the fixtures into a common module
import Arbitrary
spec_hash :: Spec
spec_hash = do
@ -34,7 +28,9 @@ spec_hash = do
it "produces (base32 . sha256) of \"nix-output:foo\" the same as Nix does at the moment for placeholder \"foo\"" $
shouldBe (encodeBase32 (hash @SHA256 "nix-output:foo"))
"1x0ymrsy7yr7i9wdsqy9khmzc1yy7nvxw6rdp72yzn50285s67j5"
it "produces (base16 . md5) of \"Hello World\" the same as the thesis" $
shouldBe (encodeBase16 (hash @MD5 "Hello World"))
"b10a8db164e0754105b7a99be72e3fe5"
it "produces (base32 . sha1) of \"Hello World\" the same as the thesis" $
shouldBe (encodeBase32 (hash @SHA1 "Hello World"))
"s23c9fs0v32pf6bhmcph5rbqsyl5ak8a"
@ -47,3 +43,52 @@ spec_hash = do
<> "c0d7b98883f9ee3:/nix/store:myfile"
shouldBe (encodeBase32 @StorePathHashAlgo (hash exampleStr))
"xv2iccirbrvklck36f1g7vldn5v58vck"
-- | Test that Nix-like base32 encoding roundtrips
prop_nixBase32Roundtrip = forAllShrink nonEmptyString genericShrink $
\x -> Right (BSC.pack x) === (decode . encode . BSC.pack $ x)
-- | API variants
prop_nixBase16Roundtrip =
\(x :: Digest StorePathHashAlgo) -> Right x === (decodeBase16 . encodeBase16 $ x)
-- | Hash encoding conversion ground-truth.
-- Similiar to nix/tests/hash.sh
spec_nixhash :: Spec
spec_nixhash = do
describe "hashing parity with nix-nash" $ do
let
samples = [
( "800d59cfcd3c05e900cb4e214be48f6b886a08df"
, "vw46m23bizj4n8afrc0fj19wrp7mj3c0"
, "gA1Zz808BekAy04hS+SPa4hqCN8="
)
, ( "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
, "1b8m03r63zqhnjf7l5wnldhh7c134ap5vpj0850ymkq1iyzicy5s"
, "ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0="
)
, ( "204a8fc6dda82f0a0ced7beb8e08a41657c16ef468b228a8279be331a703c33596fd15c13b1b07f9aa1d3bea57789ca031ad85c7a71dd70354ec631238ca3445"
, "12k9jiq29iyqm03swfsgiw5mlqs173qazm3n7daz43infy12pyrcdf30fkk3qwv4yl2ick8yipc2mqnlh48xsvvxl60lbx8vp38yji0"
, "IEqPxt2oLwoM7XvrjgikFlfBbvRosiioJ5vjMacDwzWW/RXBOxsH+aodO+pXeJygMa2Fx6cd1wNU7GMSOMo0RQ=="
)
]
it "b16 encoded . b32 decoded should equal original b16" $
forM_ samples $ \(b16, b32, b64) -> shouldBe (B16.encode <$> decode b32) (Right b16)
it "b64 encoded . b32 decoded should equal original b64" $
forM_ samples $ \(b16, b32, b64) -> shouldBe (B64.encode . BSL.fromStrict <$> decode b32) (Right b64)
it "b32 encoded . b64 decoded should equal original b32" $
forM_ samples $ \(b16, b32, b64) -> shouldBe (encode . BSL.toStrict <$> B64.decode b64 ) (Right b32)
it "b16 encoded . b64 decoded should equal original b16" $
forM_ samples $ \(b16, b32, b64) -> shouldBe (B16.encode . BSL.toStrict <$> B64.decode b64 ) (Right b16)
it "b32 encoded . b16 decoded should equal original b32" $
forM_ samples $ \(b16, b32, b64) -> shouldBe (encode $ fst $ B16.decode b16 ) b32
it "b64 encoded . b16 decoded should equal original b64" $
forM_ samples $ \(b16, b32, b64) -> shouldBe (B64.encode $ BSL.fromStrict $ fst $ B16.decode b16 ) b64