1
1
mirror of https://github.com/github/semantic.git synced 2024-12-26 08:25:19 +03:00

Use Attoparsec to parse git output instead of manually splitting Text

This commit is contained in:
lalaition 2019-06-05 20:07:48 -06:00
parent c562987921
commit 2ba61a21a0
2 changed files with 62 additions and 9 deletions

View File

@ -9,12 +9,18 @@ module Semantic.Git
, ObjectType(..)
, ObjectMode(..)
, OID(..)
-- Testing Purposes
, parseEntries
, parseEntry
) where
import Control.Monad.IO.Class
import Data.Text as Text
import Shelly hiding (FilePath)
import System.IO (hSetBinaryMode)
import Data.Attoparsec.Text (Parser)
import Data.Attoparsec.Text as AP
import Data.Text as Text
import Shelly hiding (FilePath)
import System.IO (hSetBinaryMode)
-- | git clone --bare
clone :: Text -> FilePath -> IO ()
@ -30,16 +36,45 @@ catFile gitDir (OID oid) = sh $ do
lsTree :: FilePath -> OID -> IO [TreeEntry]
lsTree gitDir (OID sha) = sh $ do
out <- run "git" [pack ("--git-dir=" <> gitDir), "ls-tree", "-rz", sha]
pure $ mkEntry <$> splitOn "\NUL" out
where
mkEntry row | [mode, ty, rest] <- splitOn " " row
, [oid, path] <- splitOn "\t" rest
= TreeEntry (objectMode mode) (objectType ty) (OID oid) (unpack path)
| otherwise = nullTreeEntry
pure $ parseEntries out
sh :: MonadIO m => Sh a -> m a
sh = shelly . silently . onCommandHandles (initOutputHandles (`hSetBinaryMode` True))
-- | Parses an list of entries separated by \NUL, and on failure return []
parseEntries :: Text -> [TreeEntry]
parseEntries = either (const []) id . AP.parseOnly (AP.sepBy entryParser (AP.char '\NUL'))
-- | Parse the entire input with entryParser, and on failure return a default
-- For testing purposes only
parseEntry :: Text -> TreeEntry
parseEntry = either (const nullTreeEntry) id . AP.parseOnly entryParser
-- | Parses an entry successfully, falling back to the failure case if necessary.
entryParser :: Parser TreeEntry
entryParser = AP.choice [entrySuccessParser, entryDefaultParser]
-- | Attoparsec parser for a block af text ending with \NUL
-- in order to consume invalid input
entryDefaultParser :: Parser TreeEntry
entryDefaultParser = do
_ <- AP.takeWhile (/= '\NUL')
pure $ nullTreeEntry
-- | Attoparsec parser for a single line of git ls-tree -rz output
entrySuccessParser :: Parser TreeEntry
entrySuccessParser = do
mode <- takeWhileToNul (/= ' ')
_ <- AP.char ' '
ty <- takeWhileToNul (/= ' ')
_ <- AP.char ' '
oid <- takeWhileToNul (/= '\t')
_ <- AP.char '\t'
path <- takeWhileToNul (const True)
pure $ TreeEntry (objectMode mode) (objectType ty) (OID oid) (unpack path)
where
takeWhileToNul f = AP.takeWhile (\x -> f x && x /= '\NUL')
newtype OID = OID Text
deriving (Eq, Show, Ord)

View File

@ -3,6 +3,7 @@ module Semantic.Spec (spec) where
import Data.Diff
import Data.Patch
import Semantic.Api hiding (Blob)
import Semantic.Git
import System.Exit
import SpecHelpers
@ -24,5 +25,22 @@ spec = parallel $ do
it "renders with the specified renderer" $ do
output <- fmap runBuilder . runTaskOrDie $ parseTermBuilder TermSExpression [methodsBlob]
output `shouldBe` "(Statements\n (Method\n (Empty)\n (Identifier)\n (Statements)))\n"
describe "gitParsing" $ do
it "parses a git output string" $ do
let input = "100644 tree ThisIsTheOid\t/this/is/the/path"
let expected = TreeEntry NormalMode TreeObject (OID "ThisIsTheOid") "/this/is/the/path"
parseEntry input `shouldBe` expected
it "parses nonsense into a default value" $ do
let input = "iel jgh\nf2 8i4p\r8f2y4fpoxin u3y2 unz"
let expected = TreeEntry OtherMode OtherObjectType (OID mempty) mempty
parseEntry input `shouldBe` expected
it "parses many outputs separated by \\NUL" $ do
let input = "100644 tree ThisIsTheOid\t/this/is/the/path\NULiel jgh\nf2 8i4p\r8f2y4fpoxin u3y2 unz\NUL120000 blob 17776\t/dev/urandom"
let expected = [ TreeEntry NormalMode TreeObject (OID "ThisIsTheOid") "/this/is/the/path", TreeEntry OtherMode OtherObjectType (OID mempty) mempty, TreeEntry SymlinkMode BlobObject (OID "17776") "/dev/urandom"]
parseEntries input `shouldBe` expected
where
methodsBlob = makeBlob "def foo\nend\n" "methods.rb" Ruby mempty