From 46538b5ca919c39445e22db11c4acb4e28a751a4 Mon Sep 17 00:00:00 2001 From: "Boris M. Yartsev" Date: Sat, 3 Jun 2017 01:14:04 +0300 Subject: [PATCH] REPL for hackage package --- REPL/LICENSE | 30 ++++ REPL/README.md | 1 + REPL/Setup.hs | 2 + REPL/app/Main.hs | 93 +++++++++++++ REPL/index-project.cabal | 65 +++++++++ REPL/src/ArchiveUpdate.hs | 279 ++++++++++++++++++++++++++++++++++++++ REPL/src/REPL.hs | 214 +++++++++++++++++++++++++++++ REPL/src/TarUtil.hs | 86 ++++++++++++ REPL/stack.yaml | 66 +++++++++ REPL/test/Spec.hs | 2 + 10 files changed, 838 insertions(+) create mode 100644 REPL/LICENSE create mode 100644 REPL/README.md create mode 100644 REPL/Setup.hs create mode 100644 REPL/app/Main.hs create mode 100644 REPL/index-project.cabal create mode 100644 REPL/src/ArchiveUpdate.hs create mode 100644 REPL/src/REPL.hs create mode 100644 REPL/src/TarUtil.hs create mode 100644 REPL/stack.yaml create mode 100644 REPL/test/Spec.hs diff --git a/REPL/LICENSE b/REPL/LICENSE new file mode 100644 index 0000000..6a042c2 --- /dev/null +++ b/REPL/LICENSE @@ -0,0 +1,30 @@ +Copyright Author name here (c) 2017 + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of Author name here nor the names of other + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/REPL/README.md b/REPL/README.md new file mode 100644 index 0000000..eebf215 --- /dev/null +++ b/REPL/README.md @@ -0,0 +1 @@ +# index-project diff --git a/REPL/Setup.hs b/REPL/Setup.hs new file mode 100644 index 0000000..9a994af --- /dev/null +++ b/REPL/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/REPL/app/Main.hs b/REPL/app/Main.hs new file mode 100644 index 0000000..abd6227 --- /dev/null +++ b/REPL/app/Main.hs @@ -0,0 +1,93 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} + +module Main where + +import qualified Data.ByteString.Lazy as BL +import Data.Int(Int64) +import qualified Control.Exception as X +import qualified Data.Char as DC +import qualified Data.List as DL +import Control.Monad(forever) +import System.Directory(copyFile) +import System.IO (stdout, hFlush) +import qualified Data.Map.Strict as Map + +import ArchiveUpdate +import TarUtil +import REPL +{- +updateHackage :: IO() +updateHackage = do + val <- performSmartUpdate archiveFile snapshotURL archiveURL + putStrLn $ "Updated " ++ show val + +-- Compares the hackage archive file with the original file +compareArchive :: FilePath -> FilePath -> IO() +compareArchive archive1 archive2= do + val <- compareFiles archive1 archive2 + putStrLn $ "Compare result " ++ archive1 ++ " " ++ archive2 ++ " " ++ (show val) + +-- Parses the integer value at the end of the string +-- Used to parse commands like "cut 42" +parseIntEnd :: (Num a, Read a) => String -> a +parseIntEnd val | DL.length l > 0 = read (DL.last l) + | otherwise = 0 + where l = words val + +processCommand :: String -> IO() +processCommand command + | chk "check" = showUpdateData archiveFile snapshotURL -- checks the current gzip archive and understands what to download + | chk "checkclone" = showUpdateData archiveCloneFile snapshotURL -- checks the current gzip archive and understands what to download + + | chk "file" = showFileSnapshot archiveFile -- shows the snapshot of hackage file + | chk "fileclone" = showFileSnapshot archiveCloneFile + | chk "copyorig" = copyArchive archiveFile archiveCloneFile -- copies the current archive to the orig place + + | chk "cut" = cutFile archiveFile (parseIntEnd command) -- cuts the end of the gzip file for checking purposes + | chk "cutclone" = cutFile archiveCloneFile (parseIntEnd command) + + | chk "unzip" = unzipArchive archiveFile tarArchive -- unzips the downloaded gzip archive + | chk "unzipclone" = unzipArchive archiveCloneFile tarArchiveClone -- unzips the downloaded gzip archive + + | chk "tarparse" = showMap tarArchive 50 -- loads the tar information in the memory + | chk "tarparseclone" = showMap tarArchiveClone 50 -- loads the tar clone information in the memory + + | chk "tarshow" = showTarContents tarArchive + | chk "tarshowclone" = showTarContents tarArchiveClone + + | chk "compare" = showArchiveCompare archiveFile archiveCloneFile + | chk "update" = updateHackage -- updates the current gzip archive + + | chk "tarcmp" = showDiffMap tarArchive tarArchiveClone + | chk "exit" = exitREPL + + | chk "help" = showHelp + | otherwise = showHelp + where pc = map DC.toLower command + chk val = DL.isPrefixOf val pc + +processCycle :: IO () +processCycle = forever $ do + putStr "Input command: " + hFlush stdout + command <- getLine + hFlush stdout + (processCommand command) `X.catch` eh `X.catch` eh2 `X.catch` eh3 + where + eh (e :: X.IOException) = putStrLn $ "IO Error: " ++ (show e) + eh2 (e :: UpdateArchiveException) = putStrLn $ "Parsing error: " ++ (show e) + eh3 (e :: X.ErrorCall) = putStrLn $ "Error call: " ++ (show e) +-} + +defaultPBI :: ProcessBuilderInfo +defaultPBI = PBI { + archiveURL = "https://hackage.haskell.org/01-index.tar.gz", + snapshotURL = "https://hackage.haskell.org/snapshot.json", + archive = "01-index.tar.gz", + archiveClone = "01-index.tar.gz.orig", + tar = "01-index.tar", + tarClone = "01-index.orig.tar" } + +main :: IO () +main = processCycle defaultPBI diff --git a/REPL/index-project.cabal b/REPL/index-project.cabal new file mode 100644 index 0000000..bdb7174 --- /dev/null +++ b/REPL/index-project.cabal @@ -0,0 +1,65 @@ +name: index-project +version: 0.1.0.0 +-- synopsis: +-- description: +homepage: https://github.com/githubuser/index-project#readme +license: BSD3 +license-file: LICENSE +author: Boris Yartsev +maintainer: borboss@gmail.com +copyright: Boris Yartsev +category: Web +build-type: Simple +extra-source-files: README.md +cabal-version: >=1.10 + +library + hs-source-dirs: src + exposed-modules: ArchiveUpdate, TarUtil, REPL + build-depends: base >= 4.7 && < 5 + , directory + , containers + , tar + , split + , http-types + , bytestring + , http-client + , filepath + , http-client-tls + , pureMD5 + , aeson + , text + , cereal + , unix + , exceptions + , transformers + , zlib + + default-language: Haskell2010 + +executable index-project-exe + hs-source-dirs: app + main-is: Main.hs + ghc-options: -threaded -rtsopts -with-rtsopts=-N + build-depends: base + , index-project + , containers + , tar + , bytestring + , http-client + , directory + + default-language: Haskell2010 + +test-suite index-project-test + type: exitcode-stdio-1.0 + hs-source-dirs: test + main-is: Spec.hs + build-depends: base + , index-project + ghc-options: -threaded -rtsopts -with-rtsopts=-N + default-language: Haskell2010 + +source-repository head + type: git + location: https://github.com/githubuser/index-project diff --git a/REPL/src/ArchiveUpdate.hs b/REPL/src/ArchiveUpdate.hs new file mode 100644 index 0000000..c624923 --- /dev/null +++ b/REPL/src/ArchiveUpdate.hs @@ -0,0 +1,279 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +module ArchiveUpdate ( + URL, + SnapshotData(..), + HackageSnapshotData, + FileSnapshotData, + UpdateArchiveException, + performArchiveFileUpdate, + performArchiveCutUpdate, + getFileSubstring, + calcFileData, + calcUpdateResult2, + truncateIfExists, + unzipFile, + compareFiles) where + +import Network.HTTP.Client(Request(..), parseUrlThrow, newManager, responseBody, httpLbs) +import Network.HTTP.Client.TLS (tlsManagerSettings) +import Network.HTTP.Types.Header + +import qualified Data.ByteString.Lazy as BL +import qualified Data.ByteString as BS + +import qualified Data.Aeson as A +import qualified Data.Aeson.Parser as AP +import qualified Data.Text as T +import qualified Data.Char as DC +import qualified Data.List as DL +import Data.Aeson.Types + +import Data.Digest.Pure.MD5 +import qualified Data.Serialize as DS + +import Data.Int(Int64) +import qualified Control.Exception as X +import System.IO.Error (isDoesNotExistError) +import System.Posix(fileSize) +import System.Posix.Types(FileOffset, COff(..)) +import System.Posix.Files (getFileStatus, setFileSize) +import System.Directory(removeFile, doesFileExist, copyFile) +import Control.Monad(when, forever) +import qualified Codec.Compression.GZip as GZip + + +data SnapshotData = SnapshotData { + md5Hash :: String, + lengthFile :: Int64 +} deriving (Eq, Show) + + +-- Two type aliases for the snapshot, that is created from reading the disk file +-- and the snapshot that is retrieved from the hackage. +type HackageSnapshotData = SnapshotData +type FileSnapshotData = SnapshotData + +-- Snapshot aeson construction instance +instance FromJSON SnapshotData where + parseJSON = withObject "snapshot" $ \o -> do + signedO <- o .: "signed" + metaO <- signedO .: "meta" + tarO <- metaO .: "/01-index.tar.gz" + hashesO <- tarO .: "hashes" + md5str <- hashesO .: "md5" + len <- tarO .: "length" + return (SnapshotData md5str len) + +-- The exception, that is raised, when there is problems with creating the +-- snapshot +newtype UpdateArchiveException = UAE String deriving (Show, Eq) +instance X.Exception UpdateArchiveException + +-- The method, that raises an exception, if it was not able to parse the +-- snapshot from JSON +parseSnapshotJSONThrow :: BL.ByteString -> IO SnapshotData +parseSnapshotJSONThrow body = case A.decode body of + (Just snapshot) -> return snapshot + Nothing -> X.throwIO $ UAE "Could not decode JSON" + +-- Alias for URL address. Just to make the code more pleasant +type URL = String + +-- The range, from which to download +type Range = (Int64, Int64) + +-- Chops the range into the list of ranges, for adequate downloading +cropRanges :: Int64 -> Range -> [Range] +cropRanges maxRange (from, to) + | to - from + 1 <= maxRange = [(from, to)] + | otherwise = (from, from + maxRange - 1) : cropRanges maxRange (from + maxRange, to) + +-- Creates the request by parsing url and then modifies it to make range request +createRangeRequest :: URL -> Range -> IO Request +createRangeRequest url range = makeRangeRequest range <$> parseUrlThrow url + +-- Writes the range to the simple http request +makeRangeRequest :: Range -> Request -> Request +makeRangeRequest (from, to) = makeRange + where + br = ByteRangeFromTo (fromIntegral from) (fromIntegral to) + makeRange r = r { + requestHeaders = (hRange, renderByteRanges [br]) : requestHeaders r + } + +-- Returns the data from response, returned to the request +fetchResponseData :: Request -> IO BL.ByteString +fetchResponseData req = newManager tlsManagerSettings >>= httpLbs req >>= return.responseBody + +-- Returns the snapshot of archive from the hackage +fetchSnapshot :: URL -> IO SnapshotData +fetchSnapshot url = parseUrlThrow url >>= fetchResponseData >>= parseSnapshotJSONThrow + +-- Returns the bytes from the range request +fetchRangeData :: URL -> Range -> IO BL.ByteString +fetchRangeData url range = createRangeRequest url range >>= fetchResponseData + +-- Calculates the MD5 hash of the file +calcMD5 :: FilePath -> IO MD5Digest +calcMD5 file = BL.readFile file >>= return.md5 + +-- Calculates the file size +getFileSize :: String -> IO Int64 +getFileSize path = getFileStatus path >>= return.fileSize >>= \(COff v) -> return v + +-- Calculates the snapshot of the file of the archive +calcFileData :: FilePath -> IO SnapshotData +calcFileData file = do + exists <- doesFileExist file -- does not throw anything + if exists then do + digest <- calcMD5 file; + offset <- getFileSize file; + return $ SnapshotData (show digest) offset + else return $ SnapshotData (show $ md5 "") 0 + +-- The action, that is needed to perform to correctly update the downloaded +-- archive. ArchiveIsOk - everything is fine. +-- Update - need to add some information to the end of the file +-- Reload - need to redownload the whole archive completely +data UpdateRange = ArchiveIsOk | Reload Range | Update Range deriving (Eq, Show) + + +-- The maximum range to download in one request from the hackage +maxRange :: Int64 +maxRange = 512000 + +-- Calculates the update result of the current archive using two snapshots +calcUpdateResult :: HackageSnapshotData -> FileSnapshotData -> UpdateRange +calcUpdateResult hackage file + | hackage == file = ArchiveIsOk -- both are equal + | lenH > lenF = Update (lenF, lenH - 1) -- need to append a bit + | otherwise = Reload (0, lenH - 1) -- delete old file and redownload it + where lenH = lengthFile hackage + lenF = lengthFile file + +-- Calculates the update range in the IO monad +-- I didn't know how to name this method, so just added 2 to the end +calcUpdateResult2 :: FilePath -> URL -> IO (UpdateRange, HackageSnapshotData, FileSnapshotData) +calcUpdateResult2 file json = do + snapshot <- fetchSnapshot json + fileData <- calcFileData file + return (calcUpdateResult snapshot fileData, snapshot, fileData) + + +-- Deletes the file it it exists. +removeIfExists :: FilePath -> IO () +removeIfExists file = removeFile file `X.catch` exhandler + where exhandler e | isDoesNotExistError e = return () + | otherwise = X.throwIO e + +-- Cuts the end of the file, in case it exists and the amount of bytes to cut is +-- less than file's length +truncateIfExists :: FilePath -> Int64 -> IO () +truncateIfExists file amount = do + fileData <- calcFileData file + when (lengthFile fileData - amount > 0) $ setFileSize file $ COff (lengthFile fileData - amount) + + +-- compares two files and returns the byte number, when they start to differ +-- It it used to check, where the archive and the updated archive differ +compareFiles :: FilePath -> FilePath -> IO Int64 +compareFiles file1 file2 = do + c1 <- BL.readFile file1 + c2 <- BL.readFile file2 + return $ compareFunc 0 c1 c2 + where + compareFunc :: Int64 -> BL.ByteString -> BL.ByteString -> Int64 + compareFunc ind bstr1 bstr2 + | BL.null bstr1 && BL.null bstr2 = -1 -- the strings are equal + | BL.null bstr1 || BL.null bstr2 = ind -- one string is empty so the diff is on ind byte + | BL.head bstr1 /= BL.head bstr2 = ind -- the byte is not equal + | otherwise = compareFunc (ind + 1) (BL.tail bstr1) (BL.tail bstr2) + +-- Returns the byte substring from file +getFileSubstring :: FilePath -> Int64 -> Int64 -> IO BL.ByteString +getFileSubstring file from len = do + c <- BL.readFile file + return $ BL.take len $ BL.drop from c +-- unzips the file to the other file +unzipFile :: FilePath -> FilePath -> IO() +unzipFile from to = do + removeIfExists to + fileBody <- (BL.readFile from) + BL.appendFile to (GZip.decompress fileBody) + +{- +-- The description of the file, that is used to compare archive on the harddisk +-- with the archive in the hackage. It uses length and md5 hash from the pureMD5 +-- library +-- Updates the archive with zip stuff +performSmartUpdate :: FilePath -> URL -> URL -> IO Bool +performSmartUpdate file json archive = do + (range, snapshot, _) <- calcUpdateResult2 file json + case range of + ArchiveIsOk -> do + putStrLn $ "Archive is up to date" + return False + (Update range) -> do + putStrLn $ "Updating the archive" + update range snapshot + return True + (Reload range) -> do + putStrLn $ "Reloading the archive" + removeIfExists file + update range snapshot + return True + where + ranges = cropRanges maxRange + write2File range = do + body <- fetchRangeData archive range + print "Start of range: " + print $ BL.take 100 body + BL.appendFile file body + putStrLn $ "\tAppended chunk " ++ (show range) + update range snapshot = do + mapM_ write2File (ranges range) + newFileData <- calcFileData file + when (newFileData /= snapshot) $ X.throwIO $ UAE $ "Updated archive corrupted" +-} + +-- performs the update, returns True if the the archive was modified +performArchiveFileUpdate :: URL -> URL -> FilePath -> IO Bool +performArchiveFileUpdate snapshotURL archiveURL archive = do + (range, snapshot, _) <- calcUpdateResult2 archive snapshotURL + putStrLn "Updating" + putStrLn $ "Snapshot from " ++ snapshotURL ++ " " ++ (show snapshot) + putStrLn $ "Update range " ++ (show range) + case range of + ArchiveIsOk -> (putStrLn $ "Archive is up to date") >> return False + Update range -> do + putStrLn $ "Updating " ++ archive ++ " from " ++ archiveURL + result <- updateArchive archive archiveURL snapshot range + putStrLn $ if result then "Update successfull" else "MD5 does not match" + return True + Reload range -> undefined + +updateArchive :: FilePath -> URL -> HackageSnapshotData -> Range -> IO Bool +updateArchive archive archiveURL snapshot range = do + mapM_ (write2File archive archiveURL) (cropRanges maxRange range) + newFileData <- calcFileData archive + return (newFileData == snapshot) + +write2File :: FilePath -> URL -> Range -> IO() +write2File archive url range = do + putStrLn $ "\tGetting range " ++ (show range) ++ " from " ++ url + body <- fetchRangeData url range + putStrLn $ "\tGot range " ++ (show (BL.take 50 body)) + BL.appendFile archive body + putStrLn "Append ok" + + +performArchiveCutUpdateF :: (FilePath -> IO Bool) -> FilePath -> Int64 -> IO Bool +performArchiveCutUpdateF updateFunc archive cutSize = do + putStrLn $ "Cutting " ++ (show cutSize) ++ " from " ++ archive ++ " before update" + truncateIfExists archive cutSize + updateFunc archive + +performArchiveCutUpdate :: URL -> URL -> FilePath -> Int64 -> IO Bool +performArchiveCutUpdate snapshotURL archiveURL = performArchiveCutUpdateF (performArchiveFileUpdate snapshotURL archiveURL) + diff --git a/REPL/src/REPL.hs b/REPL/src/REPL.hs new file mode 100644 index 0000000..6fa0cc8 --- /dev/null +++ b/REPL/src/REPL.hs @@ -0,0 +1,214 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} + +module REPL ( {- + showFirstDirEntries, + showFileSnapshot, + showUpdateData, + showFileSubstring, + showHelp, + showMap, + showDiffMap, + showTarContents, + showArchiveCompare, + exitREPL, + copyArchive, + cutFile, + unzipArchive, + -} + processCycle, + ProcessBuilderInfo (..) + ) where +import qualified Codec.Archive.Tar.Index as TI +import qualified Data.Map.Strict as M +import qualified Data.Char as DC +import qualified Data.List as DL +import qualified Control.Exception as X +import Control.Monad(forever) +import System.IO (stdout, hFlush) + +import Data.Int(Int64) +import System.Exit(exitSuccess) +import System.Directory(copyFile) + +import TarUtil +import ArchiveUpdate + +data ProcessBuilderInfo = PBI { + archive :: FilePath, + archiveClone :: FilePath, + tar :: FilePath, + tarClone :: FilePath, + snapshotURL :: URL, + archiveURL :: URL +} deriving (Eq, Show) + +parseIntEnd :: (Num a, Read a) => String -> a +parseIntEnd val | DL.length l > 0 = read (DL.last l) + | otherwise = 0 + where l = words val + +processCycle :: ProcessBuilderInfo -> IO () +processCycle pbi = forever $ do + putStr "Input command: " + hFlush stdout + command <- getLine + hFlush stdout + (processCommand command) `X.catch` eh `X.catch` eh2 `X.catch` eh3 + where + processCommand = buildCommand pbi + eh (e :: X.IOException) = putStrLn $ "IO Error: " ++ (show e) + eh2 (e :: UpdateArchiveException) = putStrLn $ "Parsing error: " ++ (show e) + eh3 (e :: X.ErrorCall) = putStrLn $ "Error call: " ++ (show e) + +buildCommand :: ProcessBuilderInfo -> (String -> IO()) +buildCommand pbi = processCommand + where + processCommand command + -- checks the current gzip archive and understands what to download + | chk "checkclone" = showUpdateData (archiveClone pbi) (snapshotURL pbi) + -- checks the current gzip archive and understands what to download + | chk "check" = showUpdateData (archive pbi) (snapshotURL pbi) + + | chk "fileclone" = showFileSnapshot (archiveClone pbi) + | chk "file" = showFileSnapshot (archive pbi) -- shows the snapshot of hackage file + + | chk "copyorig" = copyArchive (archive pbi) (archiveClone pbi) -- copies the current archive to the orig place + + | chk "cutclone" = cutFile (archiveClone pbi) (parseIntEnd command) + | chk "cut" = cutFile (archive pbi) (parseIntEnd command) -- cuts the end of the gzip file for checking purposes + + | chk "unzipclone" = unzipArchive (archiveClone pbi) (tarClone pbi) -- unzips the downloaded gzip archive + | chk "unzip" = unzipArchive (archive pbi) (tar pbi) -- unzips the downloaded gzip archive + + | chk "tarparseclone" = showMap (tarClone pbi) 50 -- loads the tar clone information in the memory + | chk "tarparse" = showMap (tar pbi) 50 -- loads the tar information in the memory + + | chk "tarshowclone" = showTarContents (tarClone pbi) + | chk "tarshow" = showTarContents (tar pbi) + + | chk "compare" = showArchiveCompare (archive pbi) (archiveClone pbi) + + | chk "updatecut" = performArchiveCutUpdate (snapshotURL pbi) (archiveURL pbi) + (archive pbi) (parseIntEnd command) >> return () + | chk "update" = performArchiveFileUpdate (snapshotURL pbi) (archiveURL pbi) (archive pbi) >> return () + -- | chk "updatesmart" = undefined + + | chk "tarcmp" = showDiffMap (tar pbi) (tarClone pbi) + | chk "exit" = exitREPL + + | chk "help" = showHelp pbi + | otherwise = showHelp pbi + + where pc = map DC.toLower command + chk val = DL.isPrefixOf val pc + +showFirstDirEntries :: TI.TarIndex -> Int -> IO () +showFirstDirEntries index count = mapM_ print $ take count (getEntries index) + +-- Displays the snapshot of the file +showFileSnapshot :: FilePath -> IO() +showFileSnapshot file = do + filesnapshot <- calcFileData file + putStrLn $ "File result for " ++ file + putStrLn $ "\tFile snapshot: " ++ (show filesnapshot) + +-- Shows the update data for the archive on disk +showUpdateData :: FilePath -> URL -> IO() +showUpdateData file json = do + (range, snapshot, filesnapshot) <- calcUpdateResult2 file json + putStrLn $ "Update result for file " ++ file + putStrLn $ "\tHackage snapshot: " ++ (show snapshot) + putStrLn $ "\tFile snapshot: " ++ (show filesnapshot) + putStrLn $ "\tRange to update: " ++ (show range) + +-- shows the substring of specified length from file from offset +showFileSubstring :: FilePath -> Int64 -> Int64 -> IO () +showFileSubstring file from length = do + putStrLn $ "Showing " ++ file ++ " substr" + putStr "\t" + substr <- getFileSubstring file from length + print substr + +-- Copies the archive from first filename to the second +copyArchive :: FilePath -> FilePath -> IO () +copyArchive archive1 archive2 = do + copyFile archive1 archive2 + putStrLn $ "Copied the " ++ archive1 ++ " to " ++ archive2 + +showMap :: FilePath -> Int -> IO() +showMap path count = do + putStrLn $ "Displaying " ++ (show count) ++ " entries for " ++ path + tarIndexE <- loadTarIndex path + case tarIndexE of + Left error -> putStrLn "Whoa. Error loading tar" + Right index -> mapM_ (print.snd) $ take count $ M.toList $ buildHackageMap index + +showDiffMap :: FilePath -> FilePath -> IO () +showDiffMap newTarFile oldTarFile = do + putStrLn $ "Displaying difference between " ++ newTarFile ++ " and " ++ oldTarFile + newTarIndexE <- loadTarIndex newTarFile + oldTarIndexE <- loadTarIndex oldTarFile + let newMapE = buildHackageMap <$> newTarIndexE + let oldMapE = buildHackageMap <$> oldTarIndexE + let diffMapE = buildDifferenceMap <$> oldMapE <*> newMapE + case diffMapE of + Right m -> mapM_ (print.snd) $ M.toList m + Left _ -> print "Error creating the indexes" + +showHelp :: ProcessBuilderInfo -> IO() +showHelp pbi = do + putStrLn "Available commands: " + + putStrLn $ "check - downloads the json length and md5 hash from " ++ (snapshotURL pbi) ++ + ", and compares it with local " ++ (archive pbi) + putStrLn $ "checkclone - same for " ++ (archiveClone pbi) + putStrLn $ "file - displays the current " ++ (archive pbi) ++ " length and md5 hash" + putStrLn $ "fileclone - same for " ++ (archiveClone pbi) ++ " file" + putStrLn $ "copyorig - copy the " ++ (archive pbi) ++ " to " ++ (archiveClone pbi) + putStrLn $ "cut size - cuts the size bytes from the end of the " ++ (archive pbi) ++ " , for update command" + putStrLn $ "cutclone size - cuts the size bytes from the end of the 01-index.tar.gz, for update command" + putStrLn $ "unzip - unzips the " ++ (archive pbi) ++ " in the " ++ (tar pbi) ++ " file" + putStrLn $ "unzipclone - unzips the " ++ (archiveClone pbi) ++ " in the " ++ (tarClone pbi) ++ " file" + putStrLn $ "compare - compares the " ++ (archive pbi) ++ " with " ++ (archiveClone pbi) + putStrLn $ "tarparse - loads the map of entries from " ++ (tar pbi) ++ " and displays it" + putStrLn $ "tarparseclone - same for " ++ (tarClone pbi) + putStrLn $ "tarshow - show sample contents from " ++ (tar pbi) + putStrLn $ "tarshowclone - show sample contents from " ++ (tarClone pbi) + putStrLn $ "tarcmp - compares the entries of " ++ (tar pbi) ++ " and " ++ (tarClone pbi) + putStrLn $ "update - updates the current " ++ (archive pbi) ++ " from " ++ (archiveURL pbi) + putStrLn $ "updatecut size - cuts the size from " ++ (archive pbi) ++ " and then updates" + putStrLn "exit - exits this repl" + +showArchiveCompare :: FilePath -> FilePath -> IO() +showArchiveCompare archive1 archive2= do + val <- compareFiles archive1 archive2 + putStrLn $ "Compare result " ++ archive1 ++ " " ++ archive2 ++ " " ++ (show val) + + +showTarContents :: FilePath -> IO() +showTarContents archive = do + putStrLn $ "Displaying the tar indices" ++ " for " ++ archive + tarIndexE <- loadTarIndex archive + case tarIndexE of + Left error -> putStrLn "Whoa. Error loading tar" + Right index -> showFirstDirEntries index 100 + + + +exitREPL :: IO() +exitREPL = putStrLn "Finished working with hackage REPL" >> exitSuccess + +-- this method cuts the data from the end of the archive +-- needed mostly for testing purposes +cutFile :: FilePath -> Int64 -> IO() +cutFile path size = do + truncateIfExists path size + putStrLn $ "Cut " ++ (show size) ++ " bytes from " ++ path + +unzipArchive :: FilePath -> FilePath -> IO() +unzipArchive archive tar = do + putStrLn $ "Unzipping " ++ archive ++ " to " ++ tar + unzipFile archive tar + + diff --git a/REPL/src/TarUtil.hs b/REPL/src/TarUtil.hs new file mode 100644 index 0000000..5319ff0 --- /dev/null +++ b/REPL/src/TarUtil.hs @@ -0,0 +1,86 @@ +module TarUtil (getEntries, + loadTarIndex, + buildHackageMap, + buildDifferenceMap + ) where + +import qualified Codec.Archive.Tar.Index as TI +import qualified Codec.Archive.Tar as Tar +import qualified Data.List.Split as SPLT +import qualified Data.Char as DC +import qualified Data.List as DL +import qualified Data.ByteString.Lazy as BL +import qualified Data.Version as DV + +import qualified Data.Map.Strict as Map +import System.FilePath.Posix(hasTrailingPathSeparator) +import Control.Monad(guard) + +import qualified Text.ParserCombinators.ReadP as RP + +-- The record for each of the package from hackage +-- TODO - add another information about the packages +data HackagePackage = HP { + name :: String, + version :: DV.Version +} deriving (Eq, Show) + +-- The status of the package between two updates +data HackageUpdate = Added | Removed | Updated deriving (Eq, Show) + +-- The map of all the hackage packages with name as the key and HackagePackage +-- as the value +type HackageMap = Map.Map String HackagePackage + +-- The map, that shows, which packages have change since the last update +type HackageUpdateMap = Map.Map String (HackageUpdate, HackagePackage) + +-- Parses the file path of the cabal file to get version and package name +parseCabalFilePath :: RP.ReadP HackagePackage +parseCabalFilePath = do + package <- RP.munch1 DC.isLetter + RP.char '/' + version <- DV.parseVersion + RP.char '/' + name <- RP.munch1 DC.isLetter + guard (name == package) + suff <- RP.string ".cabal" + RP.eof + pure $ HP { name = package, version = version} + +-- Update map of the packages with the hackage package +-- Update when, the version of package is newer than version of package in the +-- map +updateMap :: HackagePackage -> HackageMap -> HackageMap +updateMap hp map = case Map.lookup (name hp) map of + Just oldHp -> if (version hp) > (version oldHp) then updatedMap + else map + Nothing -> updatedMap + where updatedMap = Map.insert (name hp) hp map + +getEntries :: TI.TarIndex -> [HackagePackage] +getEntries index = map fst $ map head $ filter (not.null) $ map (goodParse.parse.getPath) entries + where entries = TI.toList index + getPath = fst + parse = RP.readP_to_S parseCabalFilePath + goodParse = filter (null.snd) + +loadTarIndex :: FilePath -> IO (Either Tar.FormatError TI.TarIndex) +loadTarIndex file = do + content <- BL.readFile file + return $ TI.build $ Tar.read content + + +-- convert tarindex to list, then apply parser combinator, throw out all +-- empty parsingresults and then take the first successfull parsing result +buildHackageMap :: TI.TarIndex -> HackageMap +buildHackageMap index = foldr updateMap Map.empty (getEntries index) + +buildDifferenceMap :: HackageMap -> HackageMap -> HackageUpdateMap +buildDifferenceMap oldMap newMap = foldr Map.union Map.empty [deletedMap, addedMap, updatedMap] + where + deletedMap = Map.map ((,) Removed) $ Map.difference oldMap newMap + addedMap = Map.map ((,) Added) $ Map.difference newMap oldMap + updatedMap' = Map.intersection newMap oldMap + updatedMap = Map.map ((,) Updated) $ Map.differenceWith diff updatedMap' oldMap + diff newpack oldpack = if (newpack /= oldpack) then Just newpack else Nothing diff --git a/REPL/stack.yaml b/REPL/stack.yaml new file mode 100644 index 0000000..fdd3c4c --- /dev/null +++ b/REPL/stack.yaml @@ -0,0 +1,66 @@ +# This file was automatically generated by 'stack init' +# +# Some commonly used options have been documented as comments in this file. +# For advanced use and comprehensive documentation of the format, please see: +# http://docs.haskellstack.org/en/stable/yaml_configuration/ + +# Resolver to choose a 'specific' stackage snapshot or a compiler version. +# A snapshot resolver dictates the compiler version and the set of packages +# to be used for project dependencies. For example: +# +# resolver: lts-3.5 +# resolver: nightly-2015-09-21 +# resolver: ghc-7.10.2 +# resolver: ghcjs-0.1.0_ghc-7.10.2 +# resolver: +# name: custom-snapshot +# location: "./custom-snapshot.yaml" +resolver: lts-8.15 + +# User packages to be built. +# Various formats can be used as shown in the example below. +# +# packages: +# - some-directory +# - https://example.com/foo/bar/baz-0.0.2.tar.gz +# - location: +# git: https://github.com/commercialhaskell/stack.git +# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a +# - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a +# extra-dep: true +# subdirs: +# - auto-update +# - wai +# +# A package marked 'extra-dep: true' will only be built if demanded by a +# non-dependency (i.e. a user package), and its test suites and benchmarks +# will not be run. This is useful for tweaking upstream packages. +packages: +- '.' +# Dependency packages to be pulled from upstream that are not in the resolver +# (e.g., acme-missiles-0.3) +extra-deps: [] + +# Override default flag values for local packages and extra-deps +flags: {} + +# Extra package databases containing global packages +extra-package-dbs: [] + +# Control whether we use the GHC we find on the path +# system-ghc: true +# +# Require a specific version of stack, using version ranges +# require-stack-version: -any # Default +# require-stack-version: ">=1.4" +# +# Override the architecture used by stack, especially useful on Windows +# arch: i386 +# arch: x86_64 +# +# Extra directories used by stack for building +# extra-include-dirs: [/path/to/dir] +# extra-lib-dirs: [/path/to/dir] +# +# Allow a newer minor version of GHC than the snapshot specifies +# compiler-check: newer-minor \ No newline at end of file diff --git a/REPL/test/Spec.hs b/REPL/test/Spec.hs new file mode 100644 index 0000000..cd4753f --- /dev/null +++ b/REPL/test/Spec.hs @@ -0,0 +1,2 @@ +main :: IO () +main = putStrLn "Test suite not yet implemented"