From 2ba61a21a0262002eaf0c4ff31e4a5fa7bdf4c4f Mon Sep 17 00:00:00 2001 From: lalaition Date: Wed, 5 Jun 2019 20:07:48 -0600 Subject: [PATCH 1/5] Use Attoparsec to parse git output instead of manually splitting Text --- src/Semantic/Git.hs | 53 +++++++++++++++++++++++++++++++++++-------- test/Semantic/Spec.hs | 18 +++++++++++++++ 2 files changed, 62 insertions(+), 9 deletions(-) diff --git a/src/Semantic/Git.hs b/src/Semantic/Git.hs index ffa0ae563..2ae520c61 100644 --- a/src/Semantic/Git.hs +++ b/src/Semantic/Git.hs @@ -9,12 +9,18 @@ module Semantic.Git , ObjectType(..) , ObjectMode(..) , OID(..) + + -- Testing Purposes + , parseEntries + , parseEntry ) where import Control.Monad.IO.Class -import Data.Text as Text -import Shelly hiding (FilePath) -import System.IO (hSetBinaryMode) +import Data.Attoparsec.Text (Parser) +import Data.Attoparsec.Text as AP +import Data.Text as Text +import Shelly hiding (FilePath) +import System.IO (hSetBinaryMode) -- | git clone --bare clone :: Text -> FilePath -> IO () @@ -30,16 +36,45 @@ catFile gitDir (OID oid) = sh $ do lsTree :: FilePath -> OID -> IO [TreeEntry] lsTree gitDir (OID sha) = sh $ do out <- run "git" [pack ("--git-dir=" <> gitDir), "ls-tree", "-rz", sha] - pure $ mkEntry <$> splitOn "\NUL" out - where - mkEntry row | [mode, ty, rest] <- splitOn " " row - , [oid, path] <- splitOn "\t" rest - = TreeEntry (objectMode mode) (objectType ty) (OID oid) (unpack path) - | otherwise = nullTreeEntry + pure $ parseEntries out sh :: MonadIO m => Sh a -> m a sh = shelly . silently . onCommandHandles (initOutputHandles (`hSetBinaryMode` True)) +-- | Parses an list of entries separated by \NUL, and on failure return [] +parseEntries :: Text -> [TreeEntry] +parseEntries = either (const []) id . AP.parseOnly (AP.sepBy entryParser (AP.char '\NUL')) + +-- | Parse the entire input with entryParser, and on failure return a default +-- For testing purposes only +parseEntry :: Text -> TreeEntry +parseEntry = either (const nullTreeEntry) id . AP.parseOnly entryParser + +-- | Parses an entry successfully, falling back to the failure case if necessary. +entryParser :: Parser TreeEntry +entryParser = AP.choice [entrySuccessParser, entryDefaultParser] + +-- | Attoparsec parser for a block af text ending with \NUL +-- in order to consume invalid input +entryDefaultParser :: Parser TreeEntry +entryDefaultParser = do + _ <- AP.takeWhile (/= '\NUL') + pure $ nullTreeEntry + +-- | Attoparsec parser for a single line of git ls-tree -rz output +entrySuccessParser :: Parser TreeEntry +entrySuccessParser = do + mode <- takeWhileToNul (/= ' ') + _ <- AP.char ' ' + ty <- takeWhileToNul (/= ' ') + _ <- AP.char ' ' + oid <- takeWhileToNul (/= '\t') + _ <- AP.char '\t' + path <- takeWhileToNul (const True) + pure $ TreeEntry (objectMode mode) (objectType ty) (OID oid) (unpack path) + where + takeWhileToNul f = AP.takeWhile (\x -> f x && x /= '\NUL') + newtype OID = OID Text deriving (Eq, Show, Ord) diff --git a/test/Semantic/Spec.hs b/test/Semantic/Spec.hs index 84ea9ca7b..ef5ff911d 100644 --- a/test/Semantic/Spec.hs +++ b/test/Semantic/Spec.hs @@ -3,6 +3,7 @@ module Semantic.Spec (spec) where import Data.Diff import Data.Patch import Semantic.Api hiding (Blob) +import Semantic.Git import System.Exit import SpecHelpers @@ -24,5 +25,22 @@ spec = parallel $ do it "renders with the specified renderer" $ do output <- fmap runBuilder . runTaskOrDie $ parseTermBuilder TermSExpression [methodsBlob] output `shouldBe` "(Statements\n (Method\n (Empty)\n (Identifier)\n (Statements)))\n" + + describe "gitParsing" $ do + it "parses a git output string" $ do + let input = "100644 tree ThisIsTheOid\t/this/is/the/path" + let expected = TreeEntry NormalMode TreeObject (OID "ThisIsTheOid") "/this/is/the/path" + parseEntry input `shouldBe` expected + + it "parses nonsense into a default value" $ do + let input = "iel jgh\nf2 8i4p\r8f2y4fpoxin u3y2 unz" + let expected = TreeEntry OtherMode OtherObjectType (OID mempty) mempty + parseEntry input `shouldBe` expected + + it "parses many outputs separated by \\NUL" $ do + let input = "100644 tree ThisIsTheOid\t/this/is/the/path\NULiel jgh\nf2 8i4p\r8f2y4fpoxin u3y2 unz\NUL120000 blob 17776\t/dev/urandom" + let expected = [ TreeEntry NormalMode TreeObject (OID "ThisIsTheOid") "/this/is/the/path", TreeEntry OtherMode OtherObjectType (OID mempty) mempty, TreeEntry SymlinkMode BlobObject (OID "17776") "/dev/urandom"] + parseEntries input `shouldBe` expected + where methodsBlob = makeBlob "def foo\nend\n" "methods.rb" Ruby mempty From 0abcac73b8212ea9b75bde9a9134dd4a2c646fad Mon Sep 17 00:00:00 2001 From: lalaition Date: Wed, 5 Jun 2019 20:11:40 -0600 Subject: [PATCH 2/5] Run stylish-haskell on changes --- src/Semantic/Git.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Semantic/Git.hs b/src/Semantic/Git.hs index 2ae520c61..e25a7af84 100644 --- a/src/Semantic/Git.hs +++ b/src/Semantic/Git.hs @@ -58,7 +58,7 @@ entryParser = AP.choice [entrySuccessParser, entryDefaultParser] -- in order to consume invalid input entryDefaultParser :: Parser TreeEntry entryDefaultParser = do - _ <- AP.takeWhile (/= '\NUL') + _ <- AP.takeWhile (/= '\NUL') pure $ nullTreeEntry -- | Attoparsec parser for a single line of git ls-tree -rz output From be96fcf52fcceee66018d4b733668baff4f65f14 Mon Sep 17 00:00:00 2001 From: lalaition Date: Thu, 6 Jun 2019 21:24:22 -0600 Subject: [PATCH 3/5] Refactored code to be in line with suggestions --- src/Semantic/Git.hs | 57 +++++++++++++------------------------------ test/Semantic/Spec.hs | 16 ++++++------ 2 files changed, 25 insertions(+), 48 deletions(-) diff --git a/src/Semantic/Git.hs b/src/Semantic/Git.hs index e25a7af84..3d53f4f8f 100644 --- a/src/Semantic/Git.hs +++ b/src/Semantic/Git.hs @@ -30,50 +30,39 @@ clone url path = sh $ do -- | git cat-file -p catFile :: FilePath -> OID -> IO Text catFile gitDir (OID oid) = sh $ do - run "git" [pack ("--git-dir=" <> gitDir), "cat-file", "-p", oid] + run "git" ["-C", pack gitDir, "cat-file", "-p", oid] -- | git ls-tree -rz lsTree :: FilePath -> OID -> IO [TreeEntry] -lsTree gitDir (OID sha) = sh $ do - out <- run "git" [pack ("--git-dir=" <> gitDir), "ls-tree", "-rz", sha] - pure $ parseEntries out +lsTree gitDir (OID sha) = sh $ parseEntries <$> run "git" ["-C", pack gitDir, "ls-tree", "-rz", sha] sh :: MonadIO m => Sh a -> m a sh = shelly . silently . onCommandHandles (initOutputHandles (`hSetBinaryMode` True)) -- | Parses an list of entries separated by \NUL, and on failure return [] parseEntries :: Text -> [TreeEntry] -parseEntries = either (const []) id . AP.parseOnly (AP.sepBy entryParser (AP.char '\NUL')) +parseEntries text = case parseOnly everything text of + Done "" ls -> ls + other -> error ("There was an error parsing the Git output: " <> show other) + where + everything = AP.sepBy entryParser "\NUL" <* ("\NUL\n" "End sequence") <* AP.endOfInput "Everything" + parseOnly p t = AP.feed (AP.parse p t) "" -- | Parse the entire input with entryParser, and on failure return a default -- For testing purposes only parseEntry :: Text -> TreeEntry -parseEntry = either (const nullTreeEntry) id . AP.parseOnly entryParser +parseEntry = either (const nullTreeEntry) id . AP.parseOnly (entryParser <* AP.endOfInput) --- | Parses an entry successfully, falling back to the failure case if necessary. +-- | Parses a TreeEntry entryParser :: Parser TreeEntry -entryParser = AP.choice [entrySuccessParser, entryDefaultParser] - --- | Attoparsec parser for a block af text ending with \NUL --- in order to consume invalid input -entryDefaultParser :: Parser TreeEntry -entryDefaultParser = do - _ <- AP.takeWhile (/= '\NUL') - pure $ nullTreeEntry - --- | Attoparsec parser for a single line of git ls-tree -rz output -entrySuccessParser :: Parser TreeEntry -entrySuccessParser = do - mode <- takeWhileToNul (/= ' ') - _ <- AP.char ' ' - ty <- takeWhileToNul (/= ' ') - _ <- AP.char ' ' - oid <- takeWhileToNul (/= '\t') - _ <- AP.char '\t' - path <- takeWhileToNul (const True) - pure $ TreeEntry (objectMode mode) (objectType ty) (OID oid) (unpack path) +entryParser = TreeEntry + <$> modeParser <* (" " "First Space") + <*> typeParser <* (" " "Second Space") + <*> (OID <$> AP.takeWhile (AP.inClass "0123456789abcdef") "OID Parser") <* ("\t" "Tab") + <*> (unpack <$> AP.takeWhile (/= '\NUL') "Filepath") "Entry Parser" where - takeWhileToNul f = AP.takeWhile (\x -> f x && x /= '\NUL') + typeParser = AP.choice [BlobObject <$ "blob", TreeObject <$ "tree"] "Type Parser" + modeParser = AP.choice [NormalMode <$ "100644", ExecutableMode <$ "100755", SymlinkMode <$ "120000", TreeMode <$ "040000"] "Mode Parser" newtype OID = OID Text deriving (Eq, Show, Ord) @@ -86,24 +75,12 @@ data ObjectMode | OtherMode deriving (Eq, Show) -objectMode :: Text -> ObjectMode -objectMode "100644" = NormalMode -objectMode "100755" = ExecutableMode -objectMode "120000" = SymlinkMode -objectMode "040000" = TreeMode -objectMode _ = OtherMode - data ObjectType = BlobObject | TreeObject | OtherObjectType deriving (Eq, Show) -objectType :: Text -> ObjectType -objectType "blob" = BlobObject -objectType "tree" = TreeObject -objectType _ = OtherObjectType - data TreeEntry = TreeEntry { treeEntryMode :: ObjectMode diff --git a/test/Semantic/Spec.hs b/test/Semantic/Spec.hs index ef5ff911d..55d9c08f2 100644 --- a/test/Semantic/Spec.hs +++ b/test/Semantic/Spec.hs @@ -26,20 +26,20 @@ spec = parallel $ do output <- fmap runBuilder . runTaskOrDie $ parseTermBuilder TermSExpression [methodsBlob] output `shouldBe` "(Statements\n (Method\n (Empty)\n (Identifier)\n (Statements)))\n" - describe "gitParsing" $ do + describe "git ls-tree parsing" $ do it "parses a git output string" $ do - let input = "100644 tree ThisIsTheOid\t/this/is/the/path" - let expected = TreeEntry NormalMode TreeObject (OID "ThisIsTheOid") "/this/is/the/path" + let input = "100644 tree abcdef\t/this/is/the/path" + let expected = TreeEntry NormalMode TreeObject (OID "abcdef") "/this/is/the/path" parseEntry input `shouldBe` expected - it "parses nonsense into a default value" $ do - let input = "iel jgh\nf2 8i4p\r8f2y4fpoxin u3y2 unz" - let expected = TreeEntry OtherMode OtherObjectType (OID mempty) mempty + it "allows whitespace in the path" $ do + let input = "100644 tree 12345\t/this\n/is\t/the /path\r" + let expected = TreeEntry NormalMode TreeObject (OID "12345") "/this\n/is\t/the /path\r" parseEntry input `shouldBe` expected it "parses many outputs separated by \\NUL" $ do - let input = "100644 tree ThisIsTheOid\t/this/is/the/path\NULiel jgh\nf2 8i4p\r8f2y4fpoxin u3y2 unz\NUL120000 blob 17776\t/dev/urandom" - let expected = [ TreeEntry NormalMode TreeObject (OID "ThisIsTheOid") "/this/is/the/path", TreeEntry OtherMode OtherObjectType (OID mempty) mempty, TreeEntry SymlinkMode BlobObject (OID "17776") "/dev/urandom"] + let input = "100644 tree abcdef\t/this/is/the/path\NUL120000 blob 17776\t/dev/urandom\NUL\n" + let expected = [ TreeEntry NormalMode TreeObject (OID "abcdef") "/this/is/the/path", TreeEntry SymlinkMode BlobObject (OID "17776") "/dev/urandom"] parseEntries input `shouldBe` expected where From 28c26d015e838e61c66c5f4f7d86b9ff65b07806 Mon Sep 17 00:00:00 2001 From: lalaition Date: Thu, 6 Jun 2019 21:25:47 -0600 Subject: [PATCH 4/5] Run stylish-haskell on changes, again --- src/Semantic/Git.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Semantic/Git.hs b/src/Semantic/Git.hs index 3d53f4f8f..7a17f509b 100644 --- a/src/Semantic/Git.hs +++ b/src/Semantic/Git.hs @@ -43,8 +43,8 @@ sh = shelly . silently . onCommandHandles (initOutputHandles (`hSetBinaryMode` T parseEntries :: Text -> [TreeEntry] parseEntries text = case parseOnly everything text of Done "" ls -> ls - other -> error ("There was an error parsing the Git output: " <> show other) - where + other -> error ("There was an error parsing the Git output: " <> show other) + where everything = AP.sepBy entryParser "\NUL" <* ("\NUL\n" "End sequence") <* AP.endOfInput "Everything" parseOnly p t = AP.feed (AP.parse p t) "" From e4319224e374935cdd1c9cffb79b93892fda1960 Mon Sep 17 00:00:00 2001 From: lalaition Date: Sun, 9 Jun 2019 14:11:58 -0600 Subject: [PATCH 5/5] Cleaned up parser --- src/Semantic/Git.hs | 27 ++++++++++++--------------- test/Semantic/Spec.hs | 4 ++-- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/src/Semantic/Git.hs b/src/Semantic/Git.hs index 7a17f509b..c6dad9214 100644 --- a/src/Semantic/Git.hs +++ b/src/Semantic/Git.hs @@ -18,6 +18,7 @@ module Semantic.Git import Control.Monad.IO.Class import Data.Attoparsec.Text (Parser) import Data.Attoparsec.Text as AP +import Data.Char import Data.Text as Text import Shelly hiding (FilePath) import System.IO (hSetBinaryMode) @@ -41,28 +42,26 @@ sh = shelly . silently . onCommandHandles (initOutputHandles (`hSetBinaryMode` T -- | Parses an list of entries separated by \NUL, and on failure return [] parseEntries :: Text -> [TreeEntry] -parseEntries text = case parseOnly everything text of - Done "" ls -> ls - other -> error ("There was an error parsing the Git output: " <> show other) +parseEntries = either (const []) id . AP.parseOnly everything where - everything = AP.sepBy entryParser "\NUL" <* ("\NUL\n" "End sequence") <* AP.endOfInput "Everything" - parseOnly p t = AP.feed (AP.parse p t) "" + everything = AP.sepBy entryParser "\NUL" <* "\NUL\n" <* AP.endOfInput -- | Parse the entire input with entryParser, and on failure return a default -- For testing purposes only -parseEntry :: Text -> TreeEntry -parseEntry = either (const nullTreeEntry) id . AP.parseOnly (entryParser <* AP.endOfInput) +parseEntry :: Text -> Either String TreeEntry +parseEntry = AP.parseOnly (entryParser <* AP.endOfInput) -- | Parses a TreeEntry entryParser :: Parser TreeEntry entryParser = TreeEntry - <$> modeParser <* (" " "First Space") - <*> typeParser <* (" " "Second Space") - <*> (OID <$> AP.takeWhile (AP.inClass "0123456789abcdef") "OID Parser") <* ("\t" "Tab") - <*> (unpack <$> AP.takeWhile (/= '\NUL') "Filepath") "Entry Parser" + <$> modeParser <* AP.char ' ' + <*> typeParser <* AP.char ' ' + <*> oidParser <* AP.char '\t' + <*> (unpack <$> AP.takeWhile (/= '\NUL')) where - typeParser = AP.choice [BlobObject <$ "blob", TreeObject <$ "tree"] "Type Parser" - modeParser = AP.choice [NormalMode <$ "100644", ExecutableMode <$ "100755", SymlinkMode <$ "120000", TreeMode <$ "040000"] "Mode Parser" + typeParser = AP.choice [BlobObject <$ "blob", TreeObject <$ "tree"] + modeParser = AP.choice [NormalMode <$ "100644", ExecutableMode <$ "100755", SymlinkMode <$ "120000", TreeMode <$ "040000"] + oidParser = OID <$> AP.takeWhile isHexDigit newtype OID = OID Text deriving (Eq, Show, Ord) @@ -89,5 +88,3 @@ data TreeEntry , treeEntryPath :: FilePath } deriving (Eq, Show) -nullTreeEntry :: TreeEntry -nullTreeEntry = TreeEntry OtherMode OtherObjectType (OID mempty) mempty diff --git a/test/Semantic/Spec.hs b/test/Semantic/Spec.hs index 55d9c08f2..76b53738a 100644 --- a/test/Semantic/Spec.hs +++ b/test/Semantic/Spec.hs @@ -29,12 +29,12 @@ spec = parallel $ do describe "git ls-tree parsing" $ do it "parses a git output string" $ do let input = "100644 tree abcdef\t/this/is/the/path" - let expected = TreeEntry NormalMode TreeObject (OID "abcdef") "/this/is/the/path" + let expected = Right $ TreeEntry NormalMode TreeObject (OID "abcdef") "/this/is/the/path" parseEntry input `shouldBe` expected it "allows whitespace in the path" $ do let input = "100644 tree 12345\t/this\n/is\t/the /path\r" - let expected = TreeEntry NormalMode TreeObject (OID "12345") "/this\n/is\t/the /path\r" + let expected = Right $ TreeEntry NormalMode TreeObject (OID "12345") "/this\n/is\t/the /path\r" parseEntry input `shouldBe` expected it "parses many outputs separated by \\NUL" $ do