1
1
mirror of https://github.com/github/semantic.git synced 2025-01-03 04:51:57 +03:00

Merge branch 'master' into javascript-improvements

This commit is contained in:
Rick Winfrey 2016-12-05 18:07:51 -06:00 committed by GitHub
commit 171544de29
14 changed files with 175 additions and 87 deletions

View File

@ -36,6 +36,7 @@ library
, Language.Markdown
, Language.Go
, Language.Ruby
, Parse
, Parser
, Patch
, Patch.Arbitrary
@ -55,13 +56,13 @@ library
, Syntax
, Term
, Term.Arbitrary
, Term.Instances
, TreeSitter
, FDoc.Term
, FDoc.RecursionSchemes
, FDoc.NatExample
build-depends: base >= 4.8 && < 5
, aeson
, aeson-pretty
, async-pool
, bifunctors
, blaze-html

View File

@ -1,5 +1,5 @@
{-# OPTIONS_GHC -funbox-strict-fields #-}
module Arguments (Arguments(..), CmdLineOptions(..), DiffMode(..), ExtraArg(..), programArguments, args) where
module Arguments (Arguments(..), CmdLineOptions(..), DiffMode(..), ExtraArg(..), RunMode(..), programArguments, args) where
import Data.Functor.Both
import Data.Maybe
@ -20,6 +20,10 @@ data DiffMode = PathDiff (Both FilePath)
| CommitDiff
deriving (Show)
data RunMode = Diff
| Parse
deriving (Show)
-- | The command line options to the application (arguments for optparse-applicative).
data CmdLineOptions = CmdLineOptions
{ outputFormat :: R.Format
@ -28,6 +32,7 @@ data CmdLineOptions = CmdLineOptions
, noIndex :: Bool
, extraArgs :: [ExtraArg]
, developmentMode' :: Bool
, runMode' :: RunMode
}
-- | Arguments for the program (includes command line, environment, and defaults).
@ -38,6 +43,7 @@ data Arguments = Arguments
, timeoutInMicroseconds :: Int
, output :: Maybe FilePath
, diffMode :: DiffMode
, runMode :: RunMode
, shaRange :: Both (Maybe String)
, filePaths :: [FilePath]
, developmentMode :: Bool
@ -63,6 +69,7 @@ programArguments CmdLineOptions{..} = do
, diffMode = case (noIndex, filePaths) of
(True, [fileA, fileB]) -> PathDiff (both fileA fileB)
(_, _) -> CommitDiff
, runMode = runMode'
, shaRange = fetchShas extraArgs
, filePaths = filePaths
, developmentMode = developmentMode'
@ -87,6 +94,7 @@ args gitDir sha1 sha2 filePaths format = Arguments
, timeoutInMicroseconds = defaultTimeout
, output = Nothing
, diffMode = CommitDiff
, runMode = Diff
, shaRange = Just <$> both sha1 sha2
, filePaths = filePaths
, developmentMode = False

View File

@ -1,8 +1,10 @@
{-# LANGUAGE MultiParamTypeClasses #-}
{-# OPTIONS_GHC -funbox-strict-fields #-}
module Category where
import Prologue
import Test.QuickCheck hiding (Args)
import Data.Text (pack)
import Data.Text.Arbitrary()
-- | A standardized category of AST node. Used to determine the semantics for
@ -167,6 +169,9 @@ data Category
instance Hashable Category
instance (StringConv Category Text) where
strConv _ = pack . show
instance Arbitrary Category where
arbitrary = oneof [
pure Program

View File

@ -3,20 +3,15 @@ module Diffing where
import Prologue hiding (fst, snd)
import Category
import qualified Data.ByteString.Char8 as B1
import Data.Functor.Both
import Data.Record
import qualified Data.Text.IO as TextIO
import qualified Data.Text.ICU.Detect as Detect
import qualified Data.Text.ICU.Convert as Convert
import Data.These
import Diff
import Info
import Interpreter
import Language
import Language.Markdown
import Parser
import Patch
import Parser
import Renderer
import Renderer.JSON
import Renderer.Patch
@ -29,9 +24,6 @@ import System.FilePath
import qualified System.IO as IO
import System.Environment (lookupEnv)
import Term
import TreeSitter
import Text.Parser.TreeSitter.Language
import qualified Data.Text as T
import Data.Aeson (ToJSON, toJSON, toEncoding)
import Data.Aeson.Encoding (encodingToLazyByteString)
@ -67,56 +59,6 @@ getLabel (h :< t) = (category h, case t of
Leaf s -> Just s
_ -> Nothing)
-- | Return a parser based on the file extension (including the ".").
parserForType :: Text -> Parser (Syntax Text) (Record '[Range, Category, SourceSpan])
parserForType mediaType = case languageForType mediaType of
Just C -> treeSitterParser C ts_language_c
Just JavaScript -> treeSitterParser JavaScript ts_language_javascript
Just Markdown -> cmarkParser
Just Ruby -> treeSitterParser Ruby ts_language_ruby
Just Language.Go -> treeSitterParser Language.Go ts_language_go
_ -> lineByLineParser
-- | A fallback parser that treats a file simply as rows of strings.
lineByLineParser :: Parser (Syntax Text) (Record '[Range, Category, SourceSpan])
lineByLineParser SourceBlob{..} = pure . cofree . root $ case foldl' annotateLeaves ([], 0) lines of
(leaves, _) -> cofree <$> leaves
where
lines = actualLines source
root children = (Range 0 (length source) .: Program .: rangeToSourceSpan source (Range 0 (length source)) .: RNil) :< Indexed children
leaf charIndex line = (Range charIndex (charIndex + T.length line) .: Program .: rangeToSourceSpan source (Range charIndex (charIndex + T.length line)) .: RNil) :< Leaf line
annotateLeaves (accum, charIndex) line =
(accum <> [ leaf charIndex (toText line) ] , charIndex + length line)
toText = T.pack . Source.toString
-- | Return the parser that should be used for a given path.
parserForFilepath :: FilePath -> Parser (Syntax Text) (Record '[Cost, Range, Category, SourceSpan])
parserForFilepath path blob = decorateTerm termCostDecorator <$> parserForType (toS (takeExtension path)) blob
-- | Transcode a file to a unicode source.
transcode :: B1.ByteString -> IO (Source Char)
transcode text = fromText <$> do
match <- Detect.detectCharset text
converter <- Convert.open match Nothing
pure $ Convert.toUnicode converter text
-- | Read the file and convert it to Unicode.
readAndTranscodeFile :: FilePath -> IO (Source Char)
readAndTranscodeFile path = do
text <- B1.readFile path
transcode text
-- | A function computing a value to decorate terms with. This can be used to cache synthesized attributes on terms.
type TermDecorator f fields field = TermF f (Record fields) (Record (field ': fields)) -> field
-- | Decorate a 'Term' using a function to compute the annotation values at every node.
decorateTerm :: Functor f => TermDecorator f fields field -> Term f (Record fields) -> Term f (Record (field ': fields))
decorateTerm decorator = cata $ \ c -> cofree ((decorator (extract <$> c) .: headF c) :< tailF c)
-- | Term decorator computing the cost of an unpacked term.
termCostDecorator :: (Foldable f, Functor f) => TermDecorator f a Cost
termCostDecorator c = 1 + sum (cost <$> tailF c)
-- | Determine whether two terms are comparable based on the equality of their categories.
compareCategoryEq :: Functor f => HasField fields Category => Term f (Record fields) -> Term f (Record fields) -> Bool
compareCategoryEq = (==) `on` category . extract

View File

@ -8,7 +8,6 @@ import Term
import Syntax
import Prologue
import Prelude
import Data.Functor.Foldable hiding (ListF)
import FDoc.Term
data NewField = NewField deriving (Show)

View File

@ -1,5 +1,5 @@
{-# LANGUAGE DataKinds, GeneralizedNewtypeDeriving #-}
module Info (Range(..), characterRange, setCharacterRange, Category(..), category, setCategory, Cost(..), cost, setCost, SourceSpan(..), SourcePos(..), SourceSpans(..)) where
module Info (Range(..), characterRange, setCharacterRange, Category(..), category, setCategory, Cost(..), cost, setCost, SourceSpan(..), SourcePos(..), SourceSpans(..), SourceText(..), sourceText) where
import Data.Record
import Prologue
@ -12,6 +12,9 @@ import Data.Aeson
newtype Cost = Cost { unCost :: Int }
deriving (Eq, Num, Ord, Show, ToJSON)
newtype SourceText = SourceText { unText :: Text }
deriving (Show, ToJSON)
characterRange :: HasField fields Range => Record fields -> Range
characterRange = getField
@ -27,6 +30,9 @@ setCategory = setField
cost :: HasField fields Cost => Record fields -> Cost
cost = getField
sourceText :: HasField fields SourceText => Record fields -> SourceText
sourceText = getField
setCost :: HasField fields Cost => Record fields -> Cost -> Record fields
setCost = setField

121
src/Parse.hs Normal file
View File

@ -0,0 +1,121 @@
{-# LANGUAGE DataKinds, RankNTypes, TypeOperators, DeriveAnyClass #-}
module Parse where
import Arguments
import Category
import Data.Aeson (ToJSON)
import Data.Aeson.Encode.Pretty
import qualified Data.ByteString.Char8 as B1
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text.ICU.Convert as Convert
import qualified Data.Text.ICU.Detect as Detect
import Data.Record
import qualified Data.Text as T
import Info
import Language
import Language.Markdown
import Parser
import Prologue
import Source
import Syntax
import System.FilePath
import Term
import TreeSitter
import Text.Parser.TreeSitter.Language
import Renderer.JSON()
data ParseJSON = ParseJSON
{ category :: Text
, range :: Range
, text :: SourceText
, children :: [ParseJSON]
} deriving (Show, Generic, ToJSON)
run :: Arguments -> IO ()
run Arguments{..} = do
sources <- sequence $ readAndTranscodeFile <$> filePaths
terms <- zipWithM (\parser sourceBlob -> parser sourceBlob) parsers (sourceBlobs sources)
writeToOutput output (cata algebra <$> terms)
where
sourceBlobs sources = Source.SourceBlob <$> sources <*> pure mempty <*> filePaths <*> pure (Just Source.defaultPlainBlob)
parsers = parserWithSource <$> filePaths
algebra :: TermF (Syntax leaf) (Record '[SourceText, Range, Category, SourceSpan]) ParseJSON -> ParseJSON
algebra term = case term of
(annotation :< Leaf _) -> ParseJSON (category' annotation) (range' annotation) (text' annotation) []
(annotation :< syntax) -> ParseJSON (category' annotation) (range' annotation) (text' annotation) (toList syntax)
where
category' = toS . Info.category
range' = characterRange
text' = Info.sourceText
writeToOutput :: Maybe FilePath -> [ParseJSON] -> IO ()
writeToOutput output parseJSON =
case output of
Nothing -> for_ parseJSON (putStrLn . encodePretty)
Just path -> for_ parseJSON (BL.writeFile path . encodePretty)
-- | Return a parser that decorates with the cost of a term and its children.
parserWithCost :: FilePath -> Parser (Syntax Text) (Record '[Cost, Range, Category, SourceSpan])
parserWithCost path blob = decorateTerm termCostDecorator <$> parserForType (toS (takeExtension path)) blob
-- | Return a parser that decorates with the source text.
parserWithSource :: FilePath -> Parser (Syntax Text) (Record '[SourceText, Range, Category, SourceSpan])
parserWithSource path blob = decorateTerm (termSourceDecorator (source blob)) <$> parserForType (toS (takeExtension path)) blob
-- | Return a parser based on the file extension (including the ".").
parserForType :: Text -> Parser (Syntax Text) (Record '[Range, Category, SourceSpan])
parserForType mediaType = case languageForType mediaType of
Just C -> treeSitterParser C ts_language_c
Just JavaScript -> treeSitterParser JavaScript ts_language_javascript
Just Markdown -> cmarkParser
Just Ruby -> treeSitterParser Ruby ts_language_ruby
Just Language.Go -> treeSitterParser Language.Go ts_language_go
_ -> lineByLineParser
-- | Decorate a 'Term' using a function to compute the annotation values at every node.
decorateTerm :: (Functor f) => TermDecorator f fields field -> Term f (Record fields) -> Term f (Record (field ': fields))
decorateTerm decorator = cata $ \ term -> cofree ((decorator (extract <$> term) .: headF term) :< tailF term)
-- | A function computing a value to decorate terms with. This can be used to cache synthesized attributes on terms.
type TermDecorator f fields field = TermF f (Record fields) (Record (field ': fields)) -> field
-- | Term decorator computing the cost of an unpacked term.
termCostDecorator :: (Foldable f, Functor f) => TermDecorator f a Cost
termCostDecorator c = 1 + sum (cost <$> tailF c)
-- | Term decorator extracting the source text for a term.
termSourceDecorator :: (HasField fields Range) => Source Char -> TermDecorator f fields SourceText
termSourceDecorator source c = SourceText . toText $ Source.slice range' source
where range' = characterRange $ headF c
-- | A fallback parser that treats a file simply as rows of strings.
lineByLineParser :: Parser (Syntax Text) (Record '[Range, Category, SourceSpan])
lineByLineParser SourceBlob{..} = pure . cofree . root $ case foldl' annotateLeaves ([], 0) lines of
(leaves, _) -> cofree <$> leaves
where
lines = actualLines source
root children = (Range 0 (length source) .: Program .: rangeToSourceSpan source (Range 0 (length source)) .: RNil) :< Indexed children
leaf charIndex line = (Range charIndex (charIndex + T.length line) .: Program .: rangeToSourceSpan source (Range charIndex (charIndex + T.length line)) .: RNil) :< Leaf line
annotateLeaves (accum, charIndex) line =
(accum <> [ leaf charIndex (toText line) ] , charIndex + length line)
toText = T.pack . Source.toString
-- | Return the parser that should be used for a given path.
parserForFilepath :: FilePath -> Parser (Syntax Text) (Record '[Cost, Range, Category, SourceSpan])
parserForFilepath path blob = decorateTerm termCostDecorator <$> parserForType (toS (takeExtension path)) blob
-- | Read the file and convert it to Unicode.
readAndTranscodeFile :: FilePath -> IO (Source Char)
readAndTranscodeFile path = do
text <- B1.readFile path
transcode text
-- | Transcode a file to a unicode source.
transcode :: B1.ByteString -> IO (Source Char)
transcode text = fromText <$> do
match <- Detect.detectCharset text
converter <- Convert.open match Nothing
pure $ Convert.toUnicode converter text

View File

@ -3,7 +3,7 @@ module Prologue
, lookup
, (&&&)
, (***)
, hylo, cata, para
, hylo, cata, para, ana
, module Data.Hashable
, last
) where
@ -17,6 +17,6 @@ import Control.Comonad as X
import Control.Arrow ((&&&), (***))
import Data.Functor.Foldable (hylo, cata, para)
import Data.Functor.Foldable (hylo, cata, para, ana)
import Data.Hashable

View File

@ -10,7 +10,7 @@ import Test.QuickCheck
-- | A half-open interval of integers, defined by start & end indices.
data Range = Range { start :: Int, end :: Int }
deriving (Eq, Show)
deriving (Eq, Show, Generic)
-- | Make a range at a given index.
rangeAt :: Int -> Range

View File

@ -27,7 +27,8 @@ json blobs diff = JSONOutput $ Map.fromList [
("rows", toJSON (annotateRows (alignDiff (source <$> blobs) diff))),
("oids", toJSON (oid <$> blobs)),
("paths", toJSON (path <$> blobs)) ]
where annotateRows = fmap (fmap NumberedLine) . numberedRows
where annotateRows :: [Join These a] -> [Join These (NumberedLine a)]
annotateRows = fmap (fmap NumberedLine) . numberedRows
-- | A numbered 'a'.
newtype NumberedLine a = NumberedLine (Int, a)
@ -65,7 +66,11 @@ instance (ToJSON (Record fields), ToJSON leaf, HasField fields Category, HasFiel
toEncoding term |
(info :< syntax) <- runCofree term = pairs $ mconcat (termFields info syntax)
lineFields :: (ToJSON leaf, ToJSON (Record fields), HasField fields Category, HasField fields Range) => KeyValue kv => Int -> SplitSyntaxDiff leaf fields -> Range -> [kv]
lineFields :: (ToJSON leaf, ToJSON (Record fields), HasField fields Category, HasField fields Range, KeyValue kv) =>
Int ->
SplitSyntaxDiff leaf fields ->
Range ->
[kv]
lineFields n term range = [ "number" .= n
, "terms" .= [ term ]
, "range" .= range
@ -78,7 +83,9 @@ termFields :: (ToJSON recur, KeyValue kv, HasField fields Category, HasField fie
[kv]
termFields info syntax = "range" .= characterRange info : "category" .= category info : syntaxToTermField syntax
patchFields :: (ToJSON (Record fields), ToJSON leaf, KeyValue kv, HasField fields Category, HasField fields Range) => SplitPatch (SyntaxTerm leaf fields) -> [kv]
patchFields :: (ToJSON (Record fields), ToJSON leaf, KeyValue kv, HasField fields Category, HasField fields Range) =>
SplitPatch (SyntaxTerm leaf fields) ->
[kv]
patchFields patch = case patch of
SplitInsert term -> fields "insert" term
SplitDelete term -> fields "delete" term
@ -87,7 +94,9 @@ patchFields patch = case patch of
fields kind term |
(info :< syntax) <- runCofree term = "patch" .= T.pack kind : termFields info syntax
syntaxToTermField :: (ToJSON recur, KeyValue kv) => Syntax leaf recur -> [kv]
syntaxToTermField :: (ToJSON recur, KeyValue kv) =>
Syntax leaf recur ->
[kv]
syntaxToTermField syntax = case syntax of
Leaf _ -> []
Indexed c -> childrenFields c

View File

@ -25,11 +25,17 @@ import qualified Source
import qualified Control.Concurrent.Async.Pool as Async
import GHC.Conc (numCapabilities)
import Development.GitRev
import Parse
main :: IO ()
main = do
args@Arguments{..} <- programArguments =<< execParser argumentsParser
case diffMode of
case runMode of
Diff -> runDiff args
Parse -> Parse.run args
runDiff :: Arguments -> IO ()
runDiff args@Arguments{..} = case diffMode of
PathDiff paths -> diffPaths args paths
CommitDiff -> diffCommits args
@ -50,6 +56,7 @@ argumentsParser = info (version <*> helper <*> argumentsP)
<*> switch (long "no-index" <> help "compare two paths on the filesystem")
<*> some (argument (eitherReader parseShasAndFiles) (metavar "SHA_A..SHAB FILES..."))
<*> switch (long "development" <> short 'd' <> help "set development mode which prevents timeout behavior by default")
<*> flag Diff Parse (long "parse" <> short 'p' <> help "parses a source file without diffing")
where
parseShasAndFiles :: String -> Either String ExtraArg
parseShasAndFiles s = case matchRegex regex s of
@ -79,7 +86,7 @@ diffPaths :: Arguments -> Both FilePath -> IO ()
diffPaths args@Arguments{..} paths = do
sources <- sequence $ readAndTranscodeFile <$> paths
let sourceBlobs = Source.SourceBlob <$> sources <*> pure mempty <*> paths <*> pure (Just Source.defaultPlainBlob)
D.printDiff (parserForFilepath (fst paths)) (diffArgs args) sourceBlobs
D.printDiff (parserWithCost (fst paths)) (diffArgs args) sourceBlobs
where
diffArgs Arguments{..} = R.DiffArguments { format = format, output = output }
@ -104,7 +111,7 @@ fetchDiff' Arguments{..} filepath = do
let sources = fromMaybe (Source.emptySourceBlob filepath) <$> sourcesAndOids
let sourceBlobs = Source.idOrEmptySourceBlob <$> sources
let textDiff = D.textDiff (parserForFilepath filepath) diffArguments sourceBlobs
let textDiff = D.textDiff (parserWithCost filepath) diffArguments sourceBlobs
text <- fetchText textDiff
truncatedPatch <- liftIO $ D.truncatedDiff diffArguments sourceBlobs

View File

@ -29,8 +29,8 @@ data SourcePos = SourcePos
} deriving (Show, Read, Eq, Ord, Generic, Hashable)
displaySourcePos :: SourcePos -> Text
displaySourcePos sp =
"line " <> show (line sp) <> ", column " <> show (column sp)
displaySourcePos SourcePos{..} =
"line " <> show line <> ", column " <> show column
instance A.ToJSON SourcePos where
toJSON SourcePos{..} =

View File

@ -1,11 +0,0 @@
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Term.Instances where
import Prologue
import Data.Record
import Term
import Data.Aeson
instance (ToJSON leaf, ToJSON (Record fields)) => ToJSON (SyntaxTerm leaf fields) where
toJSON syntaxTerm = case runCofree syntaxTerm of
(record :< syntax) -> object [ ("record", toJSON record), ("syntax", toJSON syntax) ]

View File

@ -11,6 +11,7 @@ import Diffing
import GHC.Show (Show(..))
import Info
import Prologue hiding (fst, snd, lookup)
import Parse
import Renderer
import qualified Renderer.JSON as J
import qualified Renderer.Patch as P
@ -84,7 +85,7 @@ testDiff renderer paths diff matcher = do
expected <- Verbatim <$> readFile file
matcher actual (Just expected)
where diffFiles' sources parser = diffFiles parser renderer (sourceBlobs sources paths)
parser = parserForFilepath <$> runBothWith (<|>) paths
parser = parserWithCost <$> runBothWith (<|>) paths
sourceBlobs :: Both (Maybe (S.Source Char)) -> Both (Maybe FilePath) -> Both S.SourceBlob
sourceBlobs sources paths = case runJoin paths of
(Nothing, Nothing) -> Join (S.emptySourceBlob "", S.emptySourceBlob "")