1
1
mirror of https://github.com/github/semantic.git synced 2024-12-27 00:44:57 +03:00

Merge pull request #98 from lalaithion/gitparsing

Use Attoparsec for parsing git output instead of manually splitting Text
This commit is contained in:
Patrick Thomson 2019-06-10 15:35:43 -04:00 committed by GitHub
commit eaf1378383
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 53 additions and 26 deletions

View File

@ -9,12 +9,19 @@ module Semantic.Git
, ObjectType(..) , ObjectType(..)
, ObjectMode(..) , ObjectMode(..)
, OID(..) , OID(..)
-- Testing Purposes
, parseEntries
, parseEntry
) where ) where
import Control.Monad.IO.Class import Control.Monad.IO.Class
import Data.Text as Text import Data.Attoparsec.Text (Parser)
import Shelly hiding (FilePath) import Data.Attoparsec.Text as AP
import System.IO (hSetBinaryMode) import Data.Char
import Data.Text as Text
import Shelly hiding (FilePath)
import System.IO (hSetBinaryMode)
-- | git clone --bare -- | git clone --bare
clone :: Text -> FilePath -> IO () clone :: Text -> FilePath -> IO ()
@ -24,22 +31,38 @@ clone url path = sh $ do
-- | git cat-file -p -- | git cat-file -p
catFile :: FilePath -> OID -> IO Text catFile :: FilePath -> OID -> IO Text
catFile gitDir (OID oid) = sh $ do catFile gitDir (OID oid) = sh $ do
run "git" [pack ("--git-dir=" <> gitDir), "cat-file", "-p", oid] run "git" ["-C", pack gitDir, "cat-file", "-p", oid]
-- | git ls-tree -rz -- | git ls-tree -rz
lsTree :: FilePath -> OID -> IO [TreeEntry] lsTree :: FilePath -> OID -> IO [TreeEntry]
lsTree gitDir (OID sha) = sh $ do lsTree gitDir (OID sha) = sh $ parseEntries <$> run "git" ["-C", pack gitDir, "ls-tree", "-rz", sha]
out <- run "git" [pack ("--git-dir=" <> gitDir), "ls-tree", "-rz", sha]
pure $ mkEntry <$> splitOn "\NUL" out
where
mkEntry row | [mode, ty, rest] <- splitOn " " row
, [oid, path] <- splitOn "\t" rest
= TreeEntry (objectMode mode) (objectType ty) (OID oid) (unpack path)
| otherwise = nullTreeEntry
sh :: MonadIO m => Sh a -> m a sh :: MonadIO m => Sh a -> m a
sh = shelly . silently . onCommandHandles (initOutputHandles (`hSetBinaryMode` True)) sh = shelly . silently . onCommandHandles (initOutputHandles (`hSetBinaryMode` True))
-- | Parses an list of entries separated by \NUL, and on failure return []
parseEntries :: Text -> [TreeEntry]
parseEntries = either (const []) id . AP.parseOnly everything
where
everything = AP.sepBy entryParser "\NUL" <* "\NUL\n" <* AP.endOfInput
-- | Parse the entire input with entryParser, and on failure return a default
-- For testing purposes only
parseEntry :: Text -> Either String TreeEntry
parseEntry = AP.parseOnly (entryParser <* AP.endOfInput)
-- | Parses a TreeEntry
entryParser :: Parser TreeEntry
entryParser = TreeEntry
<$> modeParser <* AP.char ' '
<*> typeParser <* AP.char ' '
<*> oidParser <* AP.char '\t'
<*> (unpack <$> AP.takeWhile (/= '\NUL'))
where
typeParser = AP.choice [BlobObject <$ "blob", TreeObject <$ "tree"]
modeParser = AP.choice [NormalMode <$ "100644", ExecutableMode <$ "100755", SymlinkMode <$ "120000", TreeMode <$ "040000"]
oidParser = OID <$> AP.takeWhile isHexDigit
newtype OID = OID Text newtype OID = OID Text
deriving (Eq, Show, Ord) deriving (Eq, Show, Ord)
@ -51,24 +74,12 @@ data ObjectMode
| OtherMode | OtherMode
deriving (Eq, Show) deriving (Eq, Show)
objectMode :: Text -> ObjectMode
objectMode "100644" = NormalMode
objectMode "100755" = ExecutableMode
objectMode "120000" = SymlinkMode
objectMode "040000" = TreeMode
objectMode _ = OtherMode
data ObjectType data ObjectType
= BlobObject = BlobObject
| TreeObject | TreeObject
| OtherObjectType | OtherObjectType
deriving (Eq, Show) deriving (Eq, Show)
objectType :: Text -> ObjectType
objectType "blob" = BlobObject
objectType "tree" = TreeObject
objectType _ = OtherObjectType
data TreeEntry data TreeEntry
= TreeEntry = TreeEntry
{ treeEntryMode :: ObjectMode { treeEntryMode :: ObjectMode
@ -77,5 +88,3 @@ data TreeEntry
, treeEntryPath :: FilePath , treeEntryPath :: FilePath
} deriving (Eq, Show) } deriving (Eq, Show)
nullTreeEntry :: TreeEntry
nullTreeEntry = TreeEntry OtherMode OtherObjectType (OID mempty) mempty

View File

@ -3,6 +3,7 @@ module Semantic.Spec (spec) where
import Data.Diff import Data.Diff
import Data.Patch import Data.Patch
import Semantic.Api hiding (Blob) import Semantic.Api hiding (Blob)
import Semantic.Git
import System.Exit import System.Exit
import SpecHelpers import SpecHelpers
@ -24,5 +25,22 @@ spec = parallel $ do
it "renders with the specified renderer" $ do it "renders with the specified renderer" $ do
output <- fmap runBuilder . runTaskOrDie $ parseTermBuilder TermSExpression [methodsBlob] output <- fmap runBuilder . runTaskOrDie $ parseTermBuilder TermSExpression [methodsBlob]
output `shouldBe` "(Statements\n (Method\n (Empty)\n (Identifier)\n (Statements)))\n" output `shouldBe` "(Statements\n (Method\n (Empty)\n (Identifier)\n (Statements)))\n"
describe "git ls-tree parsing" $ do
it "parses a git output string" $ do
let input = "100644 tree abcdef\t/this/is/the/path"
let expected = Right $ TreeEntry NormalMode TreeObject (OID "abcdef") "/this/is/the/path"
parseEntry input `shouldBe` expected
it "allows whitespace in the path" $ do
let input = "100644 tree 12345\t/this\n/is\t/the /path\r"
let expected = Right $ TreeEntry NormalMode TreeObject (OID "12345") "/this\n/is\t/the /path\r"
parseEntry input `shouldBe` expected
it "parses many outputs separated by \\NUL" $ do
let input = "100644 tree abcdef\t/this/is/the/path\NUL120000 blob 17776\t/dev/urandom\NUL\n"
let expected = [ TreeEntry NormalMode TreeObject (OID "abcdef") "/this/is/the/path", TreeEntry SymlinkMode BlobObject (OID "17776") "/dev/urandom"]
parseEntries input `shouldBe` expected
where where
methodsBlob = makeBlob "def foo\nend\n" "methods.rb" Ruby mempty methodsBlob = makeBlob "def foo\nend\n" "methods.rb" Ruby mempty