A  hie.yaml
M  hnix-store-core/hnix-store-core.cabal
M  hnix-store-core/src/System/Nix/Hash.hs
M  hnix-store-core/src/System/Nix/Internal/Base.hs
M  hnix-store-core/src/System/Nix/Internal/Base32.hs
M  hnix-store-core/src/System/Nix/Internal/Old.hs
M  hnix-store-core/src/System/Nix/Internal/SriHash.hs
M  hnix-store-core/src/System/Nix/Internal/StorePath.hs
D  hnix-store-core/src/System/Nix/Internal/TruncatedHash.1hs
A  hnix-store-core/src/System/Nix/Internal/TruncatedHash.hs
A  hnix-store-core/src/System/Nix/Internal/Uncycle.hs
This commit is contained in:
Anton-Latukha 2021-01-31 16:10:33 +02:00
parent 091b1178ed
commit b0725fa107
No known key found for this signature in database
GPG Key ID: 3D84C07E91802E41
11 changed files with 516 additions and 288 deletions

13
hie.yaml Normal file
View File

@ -0,0 +1,13 @@
cradle:
cabal:
- path: "./hnix-store-core/src"
component: "lib:hnix-store-core"
- path: "./hnix-store-core/tests"
component: "hnix-store-core:test:format-tests"
- path: "./hnix-store-remote/src"
component: "lib:hnix-store-remote"
- path: "./hnix-store-remote/tests"
component: "hnix-store-remote:test:hnix-store-remote-tests"

View File

@ -50,6 +50,7 @@ library
, base16-bytestring
, base64-bytestring
, bytestring
, bytestring-show
, cereal
, containers
, cryptonite
@ -57,6 +58,7 @@ library
, cryptohash-sha1
, cryptohash-sha256
, cryptohash-sha512
, digits
, directory
, filepath
, hashable
@ -67,6 +69,7 @@ library
, saltine
, time
, text
, text-builder
, unix
, unordered-containers
, vector

View File

@ -7,7 +7,6 @@ module System.Nix.Hash (
, HNix.HashAlgorithm(..)
, HNix.ValidAlgo(..)
, HNix.NamedAlgo(..)
, HNix.SomeNamedDigest(..)
, HNix.hash
, HNix.hashLazy
, SRI.mkNamedDigest
@ -18,5 +17,6 @@ module System.Nix.Hash (
) where
import qualified System.Nix.Internal.Old as HNix
import qualified System.Nix.Internal.Hash as H
import qualified System.Nix.Internal.Base as B
import qualified System.Nix.Internal.SriHash as SRI

View File

@ -5,10 +5,13 @@
module System.Nix.Internal.Base where
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.ByteString.Base16 as Bytes.Base16
import qualified System.Nix.Base32 as Bytes.Base32 -- Nix has own Base32 encoding
import qualified Data.ByteString.Base64 as Bytes.Base64
import Data.Text (Text)
import qualified Data.Text.Encoding as Text
import Data.Coerce (coerce)
import qualified System.Nix.Internal.Old as O
import qualified System.Nix.Internal.Old as Old
-- | Constructors to indicate the base encodings
@ -19,23 +22,23 @@ data BaseEncoding
| Base64
-- | Take BaseEncoding type of the output -> take the Digeest as input -> encode O.Digest
encodeInBase :: BaseEncoding -> O.Digest a -> T.Text
encodeInBase Base16 = T.decodeUtf8 . Base16.encode . coerce
encodeInBase Base32 = Base32.encode . coerce
encodeInBase Base64 = T.decodeUtf8 . Base64.encode . coerce
-- | Take BaseEncoding type of the output -> take the Digeest as input -> encode Digest
encodeInBytes :: BaseEncoding -> Old.Digest a -> Text
encodeInBytes Base16 = Text.decodeUtf8 . Bytes.Base16.encode . coerce
encodeInBytes Base32 = Text.decodeUtf8 . Bytes.Base32.encode . coerce
encodeInBytes Base64 = Text.decodeUtf8 . Bytes.Base64.encode . coerce
-- | Take BaseEncoding type of the input -> take the input itself -> decodeBase into O.Digest
decodeBase :: BaseEncoding -> T.Text -> Either String (O.Digest a)
-- | Take BaseEncoding type of the input -> take the input itself -> decodeBase into Digest
decodeBase :: BaseEncoding -> Text -> Either String (Old.Digest a)
#if MIN_VERSION_base16_bytestring(1,0,0)
decodeBase Base16 = fmap O.Digest . Base16.decode . T.encodeUtf8
decodeBase Base16 = fmap Old.Digest . Bytes.Base16.decode . Text.encodeUtf8
#else
decodeBase Base16 = lDecode -- this tacit sugar simply makes GHC pleased with number of args
where
lDecode t = case Base16.decode (T.encodeUtf8 t) of
(x, "") -> Right $ O.Digest x
_ -> Left $ "Unable to decode base16 string" <> T.unpack t
lDecode t = case Base16.decode (Text.encodeUtf8 t) of
(x, "") -> Right $ Old.Digest x
_ -> Left $ "Unable to decode base16 string" <> Text.unpack t
#endif
decodeBase Base32 = fmap O.Digest . Base32.decode
decodeBase Base64 = fmap O.Digest . Base64.decode . T.encodeUtf8
decodeBase Base32 = fmap Old.Digest . Bytes.Base32.decode . Text.encodeUtf8
decodeBase Base64 = fmap Old.Digest . Bytes.Base64.decode . Text.encodeUtf8

View File

@ -1,27 +1,40 @@
{-# LANGUAGE OverloadedStrings #-}
-- | Nix has Nix-specific Base32 encoding.
-- Stick to using the Text. But lib provides ByteString (Bytes) just for unsafe optimization case.
module System.Nix.Internal.Base32 where
import Data.ByteString (ByteString)
import Data.Bits (shiftR)
import Data.List (unfoldr)
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Text.Builder
import Data.ByteString (ByteString)
import qualified Data.ByteString as Bytes
import qualified Data.ByteString.Char8 as Bytes.Char8
import qualified Data.ByteString.Internal as Bytes (c2w, unpackChars)
import Data.Bits (shiftR)
import Data.Vector (Vector)
import qualified Data.Vector as Vector
import Data.Word (Word8)
import Numeric (readInt)
import qualified Data.Maybe
import qualified Data.ByteString
import qualified Data.ByteString.Char8
import qualified Data.Text
import qualified Data.Vector
-- Special Nix Base 32 dictinary with omitted: {E,O,U,T}
dictNixBase32 :: Vector Char
dictNixBase32 = Data.Vector.fromList "0123456789abcdfghijklmnpqrsvwxyz"
dictNixBase32 = Vector.fromList "0123456789abcdfghijklmnpqrsvwxyz"
dictNixBase32Bytes :: Vector Word8
dictNixBase32Bytes = Vector.fromList (fmap Bytes.c2w "0123456789abcdfghijklmnpqrsvwxyz")
-- | Encode a 'BS.ByteString' in Nix's base32 encoding
encode :: ByteString -> Text
encode c = Data.Text.pack $ map char32 [nChar - 1, nChar - 2 .. 0]
type NixBase32 = Text
type NixBase32Bytes = ByteString
-- | Encode a Text in Nix-specific Base32 encoding
encode :: Text -> NixBase32
encode text = Text.pack $ fmap char32 [nChar - 1, nChar - 2 .. 0]
where
-- Each base32 character gives us 5 bits of information, while
-- each byte gives is 8. Because 'div' rounds down, we need to add
@ -30,60 +43,135 @@ encode c = Data.Text.pack $ map char32 [nChar - 1, nChar - 2 .. 0]
-- bytestring to cover for the case where the number of bits is
-- already a factor of 5. Thus, the + 1 outside of the 'div' and
-- the - 1 inside of it.
nChar = fromIntegral $ succ $ (Data.ByteString.length c * 8 - 1) `div` 5
nChar = fromIntegral $ succ $ (Text.length text * 8 - 1) `div` 5
byte = Data.ByteString.index c . fromIntegral
char = Text.index text . fromIntegral
-- May need to switch to a more efficient calculation at some
-- point.
bAsInteger :: Integer
bAsInteger = sum [fromIntegral (byte j) * (256 ^ j)
| j <- [0 .. Data.ByteString.length c - 1]
bAsInteger = sum [fromIntegral (char j) * (256 ^ j)
| j <- [0 .. Text.length text - 1]
]
char32 :: Integer -> Char
char32 i = dictNixBase32 Data.Vector.! digitInd
char32 i = dictNixBase32 Vector.! digitInd
where
digitInd = fromIntegral $
bAsInteger
`div` (32^i)
`mod` 32
-- | Decode Nix's base32 encoded text
decode :: Text -> Either String ByteString
-- | Encode a ByteString in Nix-specific Base32 encoding
encodeBytes :: ByteString -> NixBase32Bytes
encodeBytes bytes = Bytes.pack $ fmap char32 [nChar - 1, nChar - 2 .. 0]
where
-- Each base32 character gives us 5 bits of information, while
-- each byte gives is 8. Because 'div' rounds down, we need to add
-- one extra character to the result, and because of that extra 1
-- we need to subtract one from the number of bits in the
-- bytestring to cover for the case where the number of bits is
-- already a factor of 5. Thus, the + 1 outside of the 'div' and
-- the - 1 inside of it.
nChar = fromIntegral $ succ $ (Bytes.length bytes * 8 - 1) `div` 5
char32 :: Integer -> Word8
char32 i = dictNixBase32Bytes Vector.! digitInd
where
digitInd = fromIntegral $
bAsInteger
`div` (32^i)
`mod` 32
-- May need to switch to a more efficient calculation at some
-- point.
bAsInteger :: Integer
bAsInteger = sum [fromIntegral (byte j) * (256 ^ j)
| j <- [0 .. Bytes.length bytes - 1]
]
byte = Bytes.index bytes . fromIntegral
-- | Decode from Nix-specific Base32 encoding.
decode :: NixBase32 -> Either String Text
decode what =
if Data.Text.all (`elem` dictNixBase32) what
if Text.all (`elem` dictNixBase32) what
then unsafeDecode what
else Left "Invalid base32 string"
else Left "Invalid Nix-Base32 string"
-- | Decode Nix's base32 encoded text
-- Doesn't check if all elements match `dictNixBase32`
unsafeDecode :: Text -> Either String ByteString
-- | Decode from Nix-specific Base32 encoding.
decodeToBytes :: NixBase32Bytes -> Either String ByteString
decodeToBytes what =
if Bytes.all (`elem` dictNixBase32Bytes) what
then unsafeDecodeToBytes what
else Left "Invalid Nix-Base32 string"
-- | Fast decoding from Nix-specific Base32 encoding.
-- Doesn't check if all elements match `dictNixBase32`.
unsafeDecode :: NixBase32 -> Either String Text
unsafeDecode what =
case readInt 32
(`elem` dictNixBase32)
(\c -> Data.Maybe.fromMaybe (error "character not in dictNixBase32")
$ Data.Vector.findIndex (==c) dictNixBase32)
(Data.Text.unpack what)
of
[(i, _)] -> Right $ padded $ integerToBS i
x -> Left $ "Can't decode: readInt returned " ++ show x
where
padded x
| Data.ByteString.length x < decLen = x `Data.ByteString.append` bstr
| otherwise = x
where
bstr = Data.ByteString.Char8.pack $ take (decLen - Data.ByteString.length x) (cycle "\NUL")
-- 2021-01-31: NOTE: `text-show` has a faster version of this implementation,
-- because here Text -> String happens for readInt.
case readIntegerIntoString $ Text.unpack what of
[(i, _)] -> Right $ padded $ integerToText i
x -> Left $ "Can't decode `readInt` returned : " <> show x
where
readIntegerIntoString :: (ReadS Integer)
readIntegerIntoString =
readInt
32
(`elem` dictNixBase32)
(\ c -> Data.Maybe.fromMaybe
(error "character not in dictNixBase32")
$ Vector.findIndex (== c) dictNixBase32)
decLen = Data.Text.length what * 5 `div` 8
padded :: (Text -> Text)
padded x
| Text.length x < decLen = x <> bstr
| otherwise = x
where
bstr = Text.pack $ take (decLen - Text.length x) (cycle "\NUL")
decLen = Text.length what * 5 `div` 8
-- | Encode an Integer to a bytestring
-- | Fast decoding from Nix-specific Base32 encoding.
-- Doesn't check if all elements match `dictNixBase32`.
unsafeDecodeToBytes :: NixBase32Bytes -> Either String ByteString
unsafeDecodeToBytes what =
case readIntegerIntoString $ Bytes.unpackChars what of
[(i, _)] -> Right $ padded $ integerToBytes i
x -> Left $ "Can't decode `readInt` returned : " <> show x
where
readIntegerIntoString :: (ReadS Integer)
readIntegerIntoString =
readInt
32
-- 2021-01-30: NOTE: Use of `readInt` that requires `Char -> Bytestring` convertion a bit defeats a ByteString pipeline.
-- Using dictNixBase32 with Char because of it
(`elem` dictNixBase32)
(\ c -> Data.Maybe.fromMaybe
(error "character not in dictNixBase32")
$ Vector.findIndex (== c) dictNixBase32)
padded x
| Bytes.length x < decLen = x <> bstr
| otherwise = x
where
bstr = Bytes.Char8.pack $ take (decLen - Bytes.length x) (cycle "\NUL")
decLen = Bytes.length what * 5 `div` 8
-- | Encode an Integer to a Text
integerToText :: Integer -> Text
integerToText = Text.Builder.run . Text.Builder.decimal
-- | Encode an Integer to a ByteString
-- Similar to Data.Base32String (integerToBS) without `reverse`
integerToBS :: Integer -> ByteString
integerToBS 0 = Data.ByteString.pack [0]
integerToBS i
| i > 0 = Data.ByteString.pack $ unfoldr f i
| otherwise = error "integerToBS not defined for negative values"
where
f 0 = Nothing
f x = Just (fromInteger x :: Word8, x `shiftR` 8)
integerToBytes :: Integer -> ByteString
integerToBytes i
| i > 0 = Bytes.unfoldr f i
| i == 0 = Bytes.empty
| otherwise = error "integerToBytes not defined for negative values."
where
f 0 = Nothing
f x = Just (fromInteger x :: Word8, x `shiftR` 8)

View File

@ -15,32 +15,18 @@ Description : Cryptographic hashing interface for hnix-store, on top
module System.Nix.Internal.Old where
import qualified "cryptohash-md5" Crypto.Hash.MD5 as MD5
import qualified "cryptohash-sha1" Crypto.Hash.SHA1 as SHA1
import qualified "cryptohash-sha256" Crypto.Hash.SHA256 as SHA256
import qualified "cryptohash-sha512" Crypto.Hash.SHA512 as SHA512
import qualified Data.ByteString as BS
import qualified Data.ByteString.Base16 as Base16
import qualified System.Nix.Base32 as Base32 -- Nix has own Base32 encoding
import qualified Data.ByteString.Base64 as Base64
import Data.Bits (xor)
import qualified Data.ByteString.Lazy as BSL
import qualified Data.ByteString as Bytes
import qualified Data.ByteString.Lazy as LazyBytes
import qualified Data.Hashable as DataHashable
import Data.List (find, foldl')
import Data.Proxy (Proxy(Proxy))
import Data.List (foldl')
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import Data.Word (Word8)
import GHC.TypeLits (Nat, KnownNat, natVal)
import Data.Coerce (coerce)
import GHC.TypeLits (Nat)
-- | Constructors to indicate the base encodings
data BaseEncoding
= Base16
| Base32
-- ^ Nix has a special map of Base32 encoding
| Base64
-- | The universe of supported hash algorithms.
--
@ -73,10 +59,8 @@ instance HashProperties HashAlgorithm
-- | The result of running a 'HashAlgorithm'.
newtype Digest (a :: HashAlgorithm) =
Digest BS.ByteString deriving (Eq, Ord, DataHashable.Hashable)
Digest Bytes.ByteString deriving (Eq, Ord, DataHashable.Hashable)
instance Show (Digest a) where
show = ("Digest " <>) . show . encodeInBase Base32
-- | The primitive interface for incremental hashing for a given
-- 'HashAlgorithm'. Every 'HashAlgorithm' should have an instance.
@ -87,7 +71,7 @@ class ValidAlgo (a :: HashAlgorithm) where
-- | Start building a new hash.
initialize :: AlgoCtx a
-- | Append a 'BS.ByteString' to the overall contents to be hashed.
update :: AlgoCtx a -> BS.ByteString -> AlgoCtx a
update :: AlgoCtx a -> Bytes.ByteString -> AlgoCtx a
-- | Finish hashing and generate the output.
finalize :: AlgoCtx a -> Digest a
@ -113,89 +97,25 @@ instance NamedAlgo 'SHA512 where
algoName = "sha512"
hashSize = 64
-- | A digest whose 'NamedAlgo' is not known at compile time.
data SomeNamedDigest = forall a . NamedAlgo a => SomeDigest (Digest a)
instance Show SomeNamedDigest where
show (SomeDigest (digest :: Digest hashType)) = T.unpack $ "SomeDigest " <> algoName @hashType <> ":" <> encodeInBase Base32 digest
mkNamedDigest :: Text -> Text -> Either String SomeNamedDigest
mkNamedDigest name sriHash =
let (sriName, h) = T.breakOnEnd "-" sriHash in
if sriName == "" || sriName == (name <> "-")
then mkDigest h
else Left $ T.unpack $ "Sri hash method " <> sriName <> " does not match the required hash type " <> name
where
mkDigest :: Text -> Either String SomeNamedDigest
mkDigest h =
maybe (Left $ "Unknown hash name: " <> T.unpack name) (`decodeToSomeDigest` h) maybeFindHashTypeByName
maybeFindHashTypeByName :: Maybe HashAlgorithm
maybeFindHashTypeByName = find (\ hashType -> canonicalHashName hashType == name ) [SHA256, MD5, SHA1, SHA512] -- SHA256 is the most used in Nix - so it matches first
decodeToSomeDigest :: HashAlgorithm -> Text -> Either String SomeNamedDigest
decodeToSomeDigest MD5 = fmap SomeDigest . goDecode @'MD5
decodeToSomeDigest SHA1 = fmap SomeDigest . goDecode @'SHA1
decodeToSomeDigest SHA256 = fmap SomeDigest . goDecode @'SHA256
decodeToSomeDigest SHA512 = fmap SomeDigest . goDecode @'SHA512
goDecode :: forall a . (NamedAlgo a, ValidAlgo a) => Text -> Either String (Digest a)
goDecode h =
-- Base encoding detected by comparing the lengths of the hash in Base to the canonical length of the demanded hash type
maybe left (`decodeBase` h) maybeFindBaseEncByLenMatch
where
left = Left $ T.unpack sriHash <> " is not a valid " <> T.unpack name <> " hash. Its length (" <> show (T.length h) <> ") does not match any of " <> show (canonicalLenIf <$> bases)
maybeFindBaseEncByLenMatch = find (\ enc -> T.length h == canonicalLenIf enc) bases
expectedHashLen = hashSize @a
canonicalLenIf Base16 = 2 * expectedHashLen
canonicalLenIf Base32 = ((8 * expectedHashLen - 1) `div` 5) + 1
canonicalLenIf Base64 = ((4 * expectedHashLen `div` 3) + 3) `div` 4 * 4
bases = [Base32, Base16, Base64] -- 32 is the most used in Nix - so the first match
-- | Hash an entire (strict) 'BS.ByteString' as a single call.
-- | Hash an entire (strict) 'ByteString' as a single call.
--
-- For example:
-- > let d = hash "Hello, sha-256!" :: Digest SHA256
-- or
-- > :set -XTypeApplications
-- > let d = hash @SHA256 "Hello, sha-256!"
hash :: forall a.ValidAlgo a => BS.ByteString -> Digest a
hash :: forall a.ValidAlgo a => Bytes.ByteString -> Digest a
hash bs =
finalize $ update @a (initialize @a) bs
-- | Hash an entire (lazy) 'BSL.ByteString' as a single call.
-- | Hash an entire (lazy) 'ByteString' as a single call.
--
-- Use is the same as for 'hash'. This runs in constant space, but
-- forces the entire bytestring.
hashLazy :: forall a.ValidAlgo a => BSL.ByteString -> Digest a
hashLazy :: forall a.ValidAlgo a => LazyBytes.ByteString -> Digest a
hashLazy bsl =
finalize $ foldl' (update @a) (initialize @a) (BSL.toChunks bsl)
-- | Take BaseEncoding type of the output -> take the Digeest as input -> encode Digest
encodeInBase :: BaseEncoding -> Digest a -> T.Text
encodeInBase Base16 = T.decodeUtf8 . Base16.encode . coerce
encodeInBase Base32 = Base32.encode . coerce
encodeInBase Base64 = T.decodeUtf8 . Base64.encode . coerce
-- | Take BaseEncoding type of the input -> take the input itself -> decodeBase into Digest
decodeBase :: BaseEncoding -> T.Text -> Either String (Digest a)
#if MIN_VERSION_base16_bytestring(1,0,0)
decodeBase Base16 = fmap Digest . Base16.decode . T.encodeUtf8
#else
decodeBase Base16 = lDecode -- this tacit sugar simply makes GHC pleased with number of args
where
lDecode t = case Base16.decode (T.encodeUtf8 t) of
(x, "") -> Right $ Digest x
_ -> Left $ "Unable to decode base16 string" <> T.unpack t
#endif
decodeBase Base32 = fmap Digest . Base32.decode
decodeBase Base64 = fmap Digest . Base64.decode . T.encodeUtf8
finalize $ foldl' (update @a) (initialize @a) (LazyBytes.toChunks bsl)
-- | Uses "Crypto.Hash.MD5" from cryptohash-md5.
@ -225,36 +145,3 @@ instance ValidAlgo 'SHA512 where
initialize = SHA512.init
update = SHA512.update
finalize = Digest . SHA512.finalize
-- | Reuses the underlying 'ValidAlgo' instance, but does a
-- 'truncateDigest' at the end.
instance (ValidAlgo a, KnownNat n) => ValidAlgo ('Truncated n a) where
type AlgoCtx ('Truncated n a) = AlgoCtx a
initialize = initialize @a
update = update @a
finalize = truncateDigest @n . finalize @a
-- | Bytewise truncation of a 'Digest'.
--
-- When truncation length is greater than the length of the bytestring
-- but less than twice the bytestring length, truncation splits the
-- bytestring into a head part (truncation length) and tail part
-- (leftover part), right-pads the leftovers with 0 to the truncation
-- length, and combines the two strings bytewise with 'xor'.
truncateDigest
:: forall n a.(KnownNat n) => Digest a -> Digest ('Truncated n a)
truncateDigest (Digest c) =
Digest $ BS.pack $ map truncOutputByte [0.. n-1]
where
n = fromIntegral $ natVal (Proxy @n)
truncOutputByte :: Int -> Word8
truncOutputByte i = foldl' (aux i) 0 [0 .. BS.length c - 1]
inputByte :: Int -> Word8
inputByte j = BS.index c (fromIntegral j)
aux :: Int -> Word8 -> Int -> Word8
aux i x j = if j `mod` fromIntegral n == fromIntegral i
then xor x (inputByte $ fromIntegral j)
else x

View File

@ -1,42 +1,45 @@
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE DataKinds #-}
module System.Nix.Internal.SriHash where
import qualified System.Nix.Internal.Old as O
import qualified System.Nix.Internal.Base as B
import Data.Text (Text)
import qualified Data.Text as Text
import qualified System.Nix.Internal.Old as Old
import qualified System.Nix.Internal.Base as Base
import Data.Foldable (find)
mkNamedDigest :: Text -> Text -> Either String O.SomeNamedDigest
mkNamedDigest :: Text -> Text -> Either String (Old.Digest a)
mkNamedDigest name sriHash =
let (sriName, h) = T.breakOnEnd "-" sriHash in
let (sriName, h) = Text.breakOnEnd "-" sriHash in
if sriName == "" || sriName == (name <> "-")
then mkDigest h
else Left $ T.unpack $ "Sri hash method " <> sriName <> " does not match the required hash type " <> name
else Left $ Text.unpack $ "Sri hash method " <> sriName <> " does not match the required hash type " <> name
where
mkDigest :: Text -> Either String O.SomeNamedDigest
mkDigest :: Text -> Either String (Old.Digest a)
mkDigest h =
maybe (Left $ "Unknown hash name: " <> T.unpack name) (`decodeToSomeDigest` h) maybeFindHashTypeByName
maybe (Left $ "Unknown hash name: " <> Text.unpack name) (`decodeToSomeDigest` h) maybeFindHashTypeByName
maybeFindHashTypeByName :: Maybe O.HashAlgorithm
maybeFindHashTypeByName = find (\ hashType -> O.canonicalHashName hashType == name ) [O.SHA256, O.MD5, O.SHA1, O.SHA512] -- SHA256 is the most used in Nix - so it matches first
maybeFindHashTypeByName :: Maybe Old.HashAlgorithm
maybeFindHashTypeByName = find (\ hashType -> Old.canonicalHashName hashType == name ) [Old.SHA256, Old.MD5, Old.SHA1, Old.SHA512] -- SHA256 is the most used in Nix - so it matches first
decodeToSomeDigest :: O.HashAlgorithm -> Text -> Either String O.SomeNamedDigest
decodeToSomeDigest O.MD5 = fmap O.SomeDigest . goDecode @'O.MD5
decodeToSomeDigest O.SHA1 = fmap O.SomeDigest . goDecode @'O.SHA1
decodeToSomeDigest O.SHA256 = fmap O.SomeDigest . goDecode @'O.SHA256
decodeToSomeDigest O.SHA512 = fmap O.SomeDigest . goDecode @'O.SHA512
decodeToSomeDigest :: Old.HashAlgorithm -> Text -> Either String (Old.Digest a)
decodeToSomeDigest a s = goDecode a s
goDecode :: forall a . (O.NamedAlgo a, O.ValidAlgo a) => Text -> Either String (O.Digest a)
goDecode h =
goDecode :: Old.HashAlgorithm -> Text -> Either String (Old.Digest a)
goDecode a s =
-- Base encoding detected by comparing the lengths of the hash in Base to the canonical length of the demanded hash type
maybe left (`B.decodeBase` h) maybeFindBaseEncByLenMatch
maybe left (`Base.decodeBase` s) maybeFindBaseEncByLenMatch
where
left = Left $ T.unpack sriHash <> " is not a valid " <> T.unpack name <> " hash. Its length (" <> show (T.length h) <> ") does not match any of " <> show (canonicalLenIf <$> bases)
left = Left $ Text.unpack sriHash <> " is not a valid " <> Text.unpack name <> " hash. Its length (" <> show (Text.length s) <> ") does not match any of " <> show (canonicalLenIf <$> bases)
maybeFindBaseEncByLenMatch = find (\ enc -> T.length h == canonicalLenIf enc) bases
maybeFindBaseEncByLenMatch = find (\ enc -> Text.length s == canonicalLenIf enc) bases
expectedHashLen = O.hashSize @a
expectedHashLen = Old.canonicalHashLen a
canonicalLenIf B.Base16 = 2 * expectedHashLen
canonicalLenIf B.Base32 = ((8 * expectedHashLen - 1) `div` 5) + 1
canonicalLenIf B.Base64 = ((4 * expectedHashLen `div` 3) + 3) `div` 4 * 4
bases = [B.Base32, B.Base16, B.Base64] -- 32 is the most used in Nix - so the first match
canonicalLenIf Base.Base16 = 2 * expectedHashLen
canonicalLenIf Base.Base32 = ((8 * expectedHashLen - 1) `div` 5) + 1
canonicalLenIf Base.Base64 = ((4 * expectedHashLen `div` 3) + 3) `div` 4 * 4
bases = [Base.Base32, Base.Base16, Base.Base64] -- 32 is the most used in Nix - so the first match

View File

@ -8,32 +8,40 @@ Description : Representation of Nix store paths.
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE TypeInType #-} -- Needed for GHC 8.4.4 for some reason
module System.Nix.Internal.StorePath where
import System.Nix.Hash
( HashAlgorithm(Truncated, SHA256)
( HashAlgorithm (SHA256)
, Digest
, BaseEncoding(..)
, encodeInBase
, decodeBase
, SomeNamedDigest
)
import System.Nix.Internal.Old as Old
import System.Nix.Internal.TruncatedHash as TruncatedHash
import System.Nix.Internal.Base32 (dictNixBase32)
import Data.Bool (bool)
import Data.Text (Text)
import Data.Text.Encoding (encodeUtf8)
import qualified Data.Text as T
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Text (encodeUtf8)
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BC
import qualified Data.Char
import qualified Data.ByteString as ByteString
import qualified Data.ByteString.Char8 as ByteString.Char8
import qualified Data.Char as Char
import Data.Hashable (Hashable(..))
import Data.HashSet (HashSet)
import Data.Attoparsec.Text.Lazy (Parser, (<?>))
-- import Data.Attoparsec.Text.Lazy (Parser, (<?>))
import Data.Attoparsec.ByteString.Lazy (Parser, (<?>))
import qualified Data.Attoparsec.Text.Lazy
import qualified System.FilePath
import qualified Data.Attoparsec.Text.Lazy as Parse.Text
import qualified Data.Attoparsec.ByteString.Lazy as Parse.ByteString
import qualified Data.Attoparsec.ByteString.Char8 as Parse.ByteString (char)
import qualified System.FilePath as FilePath
-- | A path in a Nix store.
--
@ -61,7 +69,7 @@ instance Hashable StorePath where
s `hashWithSalt` storePathHash `hashWithSalt` storePathName
instance Show StorePath where
show p = BC.unpack $ storePathToRawFilePath p
show p = ByteString.Char8.unpack $ storePathToRawFilePath p
-- | The name portion of a Nix path.
--
@ -73,7 +81,7 @@ newtype StorePathName = StorePathName
} deriving (Eq, Hashable, Ord)
-- | The hash algorithm used for store path hashes.
type StorePathHashAlgo = 'Truncated 20 'SHA256
type StorePathHashAlgo = 'Old.Truncated 20 'SHA256
-- | A set of 'StorePath's.
type StorePathSet = HashSet StorePath
@ -92,10 +100,10 @@ data ContentAddressableAddress
-- addTextToStore. It is addressed according to a sha256sum of the
-- file contents.
Text !(Digest 'SHA256)
| -- | The path was added to the store via makeFixedOutputPath or
-- addToStore. It is addressed according to some hash algorithm
-- applied to the nar serialization via some 'NarHashMode'.
Fixed !NarHashMode !SomeNamedDigest
-- | -- | The path was added to the store via makeFixedOutputPath or
-- -- addToStore. It is addressed according to some hash algorithm
-- -- applied to the nar serialization via some 'NarHashMode'.
-- Fixed !NarHashMode !(Digest 'HashAlgorithm)
-- | Schemes for hashing a Nix archive.
--
@ -115,26 +123,26 @@ makeStorePathName n = case validStorePathName n of
reasonInvalid :: Text -> String
reasonInvalid n | n == "" = "Empty name"
reasonInvalid n | (T.length n > 211) = "Path too long"
reasonInvalid n | (T.head n == '.') = "Leading dot"
reasonInvalid n | (Text.length n > 211) = "Path too long"
reasonInvalid n | (Text.head n == '.') = "Leading dot"
reasonInvalid _ | otherwise = "Invalid character"
validStorePathName :: Text -> Bool
validStorePathName "" = False
validStorePathName n = (T.length n <= 211)
&& T.head n /= '.'
&& T.all validateStorePathNameChar n
validStorePathName n = (Text.length n <= 211)
&& Text.head n /= '.'
&& Text.all validateStorePathNameChar n
validateStorePathNameChar :: Char -> Bool
validateStorePathNameChar c =
bool
False
(any ($ c)
[ Data.Char.isLower
, Data.Char.isDigit
, Data.Char.isUpper
[ Char.isLower
, Char.isDigit
, Char.isUpper
, (`elem` ("+-._?=" :: String))])
(Data.Char.isAscii c)
(Char.isAscii c)
-- | Copied from @RawFilePath@ in the @unix@ package, duplicated here
-- to avoid the dependency.
@ -146,7 +154,7 @@ storePathToRawFilePath
-> RawFilePath
storePathToRawFilePath StorePath{..} = root <> "/" <> hashPart <> "-" <> name
where
root = BC.pack storePathRoot
root = ByteString.Char8.pack storePathRoot
hashPart = encodeUtf8 $ encodeInBase Base32 storePathHash
name = encodeUtf8 $ unStorePathName storePathName
@ -154,35 +162,35 @@ storePathToRawFilePath StorePath{..} = root <> "/" <> hashPart <> "-" <> name
storePathToFilePath
:: StorePath
-> FilePath
storePathToFilePath = BC.unpack . storePathToRawFilePath
storePathToFilePath = ByteString.Char8.unpack . storePathToRawFilePath
-- | Render a 'StorePath' as a 'Text'.
storePathToText
:: StorePath
-> Text
storePathToText = T.pack . BC.unpack . storePathToRawFilePath
storePathToText = Text.pack . ByteString.Char8.unpack . storePathToRawFilePath
-- | Build `narinfo` suffix from `StorePath` which
-- can be used to query binary caches.
storePathToNarInfo
:: StorePath
-> BC.ByteString
-> ByteString.Char8.ByteString
storePathToNarInfo StorePath{..} = storePathHashInNixBase <> ".narinfo"
where
storePathHashInNixBase = encodeUtf8 $ encodeInBase Base32 storePathHash
-- | Parse `StorePath` from `BC.ByteString`, checking
-- | Parse `StorePath` from `ByteString.Char8.ByteString`, checking
-- that store directory matches `expectedRoot`.
parsePath
:: FilePath
-> BC.ByteString
-> ByteString.Char8.ByteString
-> Either String StorePath
parsePath expectedRoot x =
let
(rootDir, fname) = System.FilePath.splitFileName . BC.unpack $ x
(digestPart, namePart) = T.breakOn "-" $ T.pack fname
(rootDir, fname) = FilePath.splitFileName . ByteString.Char8.unpack $ x
(digestPart, namePart) = Text.breakOn "-" $ Text.pack fname
digest = decodeBase Base32 digestPart
name = makeStorePathName . T.drop 1 $ namePart
name = makeStorePathName . Text.drop 1 $ namePart
--rootDir' = dropTrailingPathSeparator rootDir
-- cannot use ^^ as it drops multiple slashes /a/b/// -> /a/b
rootDir' = init rootDir
@ -192,28 +200,29 @@ parsePath expectedRoot x =
in
StorePath <$> digest <*> name <*> storeDir
-- 2021-01-30: NOTE: Converted Text parser to ByteString. This currently uses Char8
pathParser :: FilePath -> Parser StorePath
pathParser expectedRoot = do
_ <- Data.Attoparsec.Text.Lazy.string (T.pack expectedRoot)
_ <- Parse.ByteString.string (Text.pack expectedRoot)
<?> "Store root mismatch" -- e.g. /nix/store
_ <- Data.Attoparsec.Text.Lazy.char '/'
_ <- Parse.ByteString.char '/'
<?> "Expecting path separator"
digest <- decodeBase Base32
<$> Data.Attoparsec.Text.Lazy.takeWhile1 (`elem` dictNixBase32)
<$> Parse.ByteString.takeWhile1 (`elem` dictNixBase32)
<?> "Invalid Base32 part"
_ <- Data.Attoparsec.Text.Lazy.char '-'
_ <- Parse.ByteString.char '-'
<?> "Expecting dash (path name separator)"
c0 <- Data.Attoparsec.Text.Lazy.satisfy (\c -> c /= '.' && validateStorePathNameChar c)
c0 <- Parse.ByteString.satisfy (\c -> c /= '.' && validateStorePathNameChar c)
<?> "Leading path name character is a dot or invalid character"
rest <- Data.Attoparsec.Text.Lazy.takeWhile validateStorePathNameChar
rest <- Parse.ByteString.takeWhile validateStorePathNameChar
<?> "Path name contains invalid character"
let name = makeStorePathName $ T.cons c0 rest
let name = makeStorePathName $ Text.cons c0 rest
either fail return
$ StorePath <$> digest <*> name <*> pure expectedRoot

View File

@ -1,33 +0,0 @@
module System.Nix.Internal.TruncatedHash where
-- data TruncatedHash = Truncated HashAlgorithm Integer
-- ^ The hash algorithm obtained by truncating the result of the
-- input 'HashAlgorithm' to the given number of bytes. See
-- 'truncateDigest' for a description of the truncation algorithm.
-- TODO: HNix really uses 1 type/length of truncation.
-- | Bytewise truncation of a 'Digest'.
--
-- When truncation length is greater than the length of the bytestring
-- but less than twice the bytestring length, truncation splits the
-- bytestring into a head part (truncation length) and tail part
-- (leftover part), right-pads the leftovers with 0 to the truncation
-- length, and combines the two strings bytewise with 'xor'.
-- truncateDigest
-- :: forall n a.(KnownNat n) => Digest a -> Digest ('Truncated n a)
-- truncateDigest (Digest c) =
-- Digest $ BS.pack $ map truncOutputByte [0.. n-1]
-- where
-- n = fromIntegral $ natVal (Proxy @n)
-- truncOutputByte :: Int -> Word8
-- truncOutputByte i = foldl' (aux i) 0 [0 .. BS.length c - 1]
-- inputByte :: Int -> Word8
-- inputByte j = BS.index c (fromIntegral j)
-- aux :: Int -> Word8 -> Int -> Word8
-- aux i x j = if j `mod` fromIntegral n == fromIntegral i
-- then xor x (inputByte $ fromIntegral j)
-- else x

View File

@ -0,0 +1,61 @@
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ExistentialQuantification #-}
module System.Nix.Internal.TruncatedHash where
import qualified Data.ByteString as ByteString
import Data.Word (Word8)
import Data.Foldable (foldl')
import Data.Bits (xor)
import System.Nix.Internal.Old as Old
import Data.Bool (bool)
import GHC.TypeLits (Nat, KnownNat, natVal)
import Data.Proxy (Proxy(Proxy))
-- TODO: HNix really uses 1 type/length of truncation.
-- data TruncatedHash = Truncated Nat Old.HashAlgorithm
-- | Reuses the underlying 'ValidAlgo' instance, but does a
-- 'truncateDigest' at the end.
instance (Old.ValidAlgo a, KnownNat n) => Old.ValidAlgo ('Old.Truncated n a) where
type AlgoCtx ('Old.Truncated n a) = AlgoCtx a
initialize = initialize @a
update = update @a
finalize = truncateDigest @n . finalize @a
-- | Bytewise truncation of a 'Digest'.
--
-- When truncation length is greater than the length of the bytestring
-- but less than twice the bytestring length, truncation splits the
-- bytestring into a head part (truncation length) and tail part
-- (leftover part), right-pads the leftovers with 0 to the truncation
-- length, and combines the two strings bytewise with 'xor'.
truncateDigest
:: forall n a.(KnownNat n) => Old.Digest a -> Old.Digest ('Old.Truncated n a)
truncateDigest (Old.Digest c) =
Old.Digest $ ByteString.pack $ map truncOutputByte [0.. n-1]
where
n = fromIntegral $ natVal (Proxy @n)
truncOutputByte :: Int -> Word8
truncOutputByte i = foldl' (aux i) 0 [0 .. ByteString.length c - 1]
inputByte :: Int -> Word8
inputByte j = ByteString.index c (fromIntegral j)
aux :: Int -> Word8 -> Int -> Word8
aux i x j = if j `mod` fromIntegral n == fromIntegral i
then xor x (inputByte $ fromIntegral j)
else x

View File

@ -0,0 +1,194 @@
{-|
Description : Cryptographic hashing interface for hnix-store, on top
of the cryptohash family of libraries.
-}
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE PackageImports #-}
module System.Nix.Internal.Uncycle where
import qualified "cryptohash-md5" Crypto.Hash.MD5 as MD5
import qualified "cryptohash-sha1" Crypto.Hash.SHA1 as SHA1
import qualified "cryptohash-sha256" Crypto.Hash.SHA256 as SHA256
import qualified "cryptohash-sha512" Crypto.Hash.SHA512 as SHA512
import qualified Data.ByteString as BS
import Data.Bits (xor)
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Hashable as DataHashable
import Data.List (foldl')
import Data.Proxy (Proxy(Proxy))
import Data.Text (Text)
import qualified Data.Text as T
import Data.Word (Word8)
import GHC.TypeLits (Nat, KnownNat, natVal)
import qualified System.Nix.Internal.Base as B
import qualified System.Nix.Internal.Old as O
-- | The universe of supported hash algorithms.
--
-- Currently only intended for use at the type level.
data HashAlgorithm
= MD5
| SHA1
| SHA256
| SHA512
| Truncated Nat HashAlgorithm
-- ^ The hash algorithm obtained by truncating the result of the
-- input 'HashAlgorithm' to the given number of bytes. See
-- 'truncateDigest' for a description of the truncation algorithm.
class HashProperties a
where
canonicalHashName :: a -> Text
canonicalHashLen :: a -> Int
instance HashProperties HashAlgorithm
where
canonicalHashName SHA256 = "sha256" -- SHA256 is the most used in Nix - so it matches first
canonicalHashName MD5 = "md5"
canonicalHashName SHA1 = "sha1"
canonicalHashName SHA512 = "sha512"
canonicalHashLen SHA256 = 32
canonicalHashLen MD5 = 16
canonicalHashLen SHA1 = 20
canonicalHashLen SHA512 = 64
-- | The result of running a 'HashAlgorithm'.
newtype Digest (a :: HashAlgorithm) =
Digest BS.ByteString deriving (Eq, Ord, DataHashable.Hashable)
instance Show (Digest a) where
show = ("Digest " <>) . show . encodeInBase Base32
-- | The primitive interface for incremental hashing for a given
-- 'HashAlgorithm'. Every 'HashAlgorithm' should have an instance.
class ValidAlgo (a :: HashAlgorithm) where
-- | The incremental state for constructing a hash.
type AlgoCtx a
-- | Start building a new hash.
initialize :: AlgoCtx a
-- | Append a 'BS.ByteString' to the overall contents to be hashed.
update :: AlgoCtx a -> BS.ByteString -> AlgoCtx a
-- | Finish hashing and generate the output.
finalize :: AlgoCtx a -> Digest a
-- | A 'HashAlgorithm' with a canonical name, for serialization
-- purposes (e.g. SRI hashes)
class ValidAlgo a => NamedAlgo (a :: HashAlgorithm) where
algoName :: Text
hashSize :: Int
instance NamedAlgo 'MD5 where
algoName = "md5"
hashSize = 16
instance NamedAlgo 'SHA1 where
algoName = "sha1"
hashSize = 20
instance NamedAlgo 'SHA256 where
algoName = "sha256"
hashSize = 32
instance NamedAlgo 'SHA512 where
algoName = "sha512"
hashSize = 64
-- | A digest whose 'NamedAlgo' is not known at compile time.
data SomeNamedDigest = forall a . NamedAlgo a => SomeDigest (Digest a)
instance Show SomeNamedDigest where
show (SomeDigest (digest :: Digest hashType)) = T.unpack $ "SomeDigest " <> algoName @hashType <> ":" <> encodeInBase Base32 digest
-- | Hash an entire (strict) 'BS.ByteString' as a single call.
--
-- For example:
-- > let d = hash "Hello, sha-256!" :: Digest SHA256
-- or
-- > :set -XTypeApplications
-- > let d = hash @SHA256 "Hello, sha-256!"
hash :: forall a.ValidAlgo a => BS.ByteString -> Digest a
hash bs =
finalize $ update @a (initialize @a) bs
-- | Hash an entire (lazy) 'BSL.ByteString' as a single call.
--
-- Use is the same as for 'hash'. This runs in constant space, but
-- forces the entire bytestring.
hashLazy :: forall a.ValidAlgo a => BSL.ByteString -> Digest a
hashLazy bsl =
finalize $ foldl' (update @a) (initialize @a) (BSL.toChunks bsl)
-- | Uses "Crypto.Hash.MD5" from cryptohash-md5.
instance ValidAlgo 'MD5 where
type AlgoCtx 'MD5 = MD5.Ctx
initialize = MD5.init
update = MD5.update
finalize = Digest . MD5.finalize
-- | Uses "Crypto.Hash.SHA1" from cryptohash-sha1.
instance ValidAlgo 'SHA1 where
type AlgoCtx 'SHA1 = SHA1.Ctx
initialize = SHA1.init
update = SHA1.update
finalize = Digest . SHA1.finalize
-- | Uses "Crypto.Hash.SHA256" from cryptohash-sha256.
instance ValidAlgo 'SHA256 where
type AlgoCtx 'SHA256 = SHA256.Ctx
initialize = SHA256.init
update = SHA256.update
finalize = Digest . SHA256.finalize
-- | Uses "Crypto.Hash.SHA512" from cryptohash-sha512.
instance ValidAlgo 'SHA512 where
type AlgoCtx 'SHA512 = SHA512.Ctx
initialize = SHA512.init
update = SHA512.update
finalize = Digest . SHA512.finalize
-- | Reuses the underlying 'ValidAlgo' instance, but does a
-- 'truncateDigest' at the end.
instance (ValidAlgo a, KnownNat n) => ValidAlgo ('Truncated n a) where
type AlgoCtx ('Truncated n a) = AlgoCtx a
initialize = initialize @a
update = update @a
finalize = truncateDigest @n . finalize @a
-- | Bytewise truncation of a 'Digest'.
--
-- When truncation length is greater than the length of the bytestring
-- but less than twice the bytestring length, truncation splits the
-- bytestring into a head part (truncation length) and tail part
-- (leftover part), right-pads the leftovers with 0 to the truncation
-- length, and combines the two strings bytewise with 'xor'.
truncateDigest
:: forall n a.(KnownNat n) => Digest a -> Digest ('Truncated n a)
truncateDigest (Digest c) =
Digest $ BS.pack $ map truncOutputByte [0.. n-1]
where
n = fromIntegral $ natVal (Proxy @n)
truncOutputByte :: Int -> Word8
truncOutputByte i = foldl' (aux i) 0 [0 .. BS.length c - 1]
inputByte :: Int -> Word8
inputByte j = BS.index c (fromIntegral j)
aux :: Int -> Word8 -> Int -> Word8
aux i x j = if j `mod` fromIntegral n == fromIntegral i
then xor x (inputByte $ fromIntegral j)
else x