1
1
mirror of https://github.com/github/semantic.git synced 2025-01-07 07:58:12 +03:00

Merge remote-tracking branch 'origin/master' into remove-breakdown-leaves-by-word

This commit is contained in:
Timothy Clem 2016-09-19 08:35:19 -07:00
commit 9dbb004085
29 changed files with 217 additions and 116 deletions

View File

@ -24,7 +24,6 @@ library
, Data.Record
, Data.Mergeable
, Data.Mergeable.Generic
, Data.These.Arbitrary
, Diff
, Diff.Arbitrary
, Diffing
@ -33,6 +32,7 @@ library
, Language
, Language.C
, Language.JavaScript
, Language.Markdown
, Parser
, Patch
, Patch.Arbitrary
@ -59,6 +59,7 @@ library
, blaze-html
, blaze-markup
, bytestring
, cmark
, containers
, directory
, dlist
@ -84,7 +85,7 @@ library
, quickcheck-instances
default-language: Haskell2010
default-extensions: DeriveFunctor, DeriveFoldable, DeriveTraversable, DeriveGeneric, FlexibleContexts, FlexibleInstances, OverloadedStrings, NoImplicitPrelude, RecordWildCards, LambdaCase
ghc-options: -Wall -fno-warn-name-shadowing -O2 -threaded -fprof-auto "-with-rtsopts=-N -p -s -h -i0.1" -j
ghc-options: -Wall -fno-warn-name-shadowing -O2 -fprof-auto -j
benchmark semantic-diff-bench
type: exitcode-stdio-1.0
@ -114,6 +115,8 @@ test-suite semantic-diff-test
, DiffSummarySpec
, InterpreterSpec
, PatchOutputSpec
, RangeSpec
, Source.Spec
, TermSpec
build-depends: base
, bifunctors

View File

@ -24,7 +24,7 @@ import Info
import Patch
import Prologue hiding (fst, snd)
import Range
import Source hiding (break, fromList, uncons, (++))
import Source hiding (break, fromList, uncons)
import SplitDiff
import Syntax
import Term
@ -38,7 +38,7 @@ numberedRows = countUp (both 1 1)
nextLineNumbers from row = modifyJoin (fromThese identity identity) (succ <$ row) <*> from
-- | Determine whether a line contains any patches.
hasChanges :: (Prologue.Foldable f, Functor f) => SplitDiff f annotation -> Bool
hasChanges :: (Foldable f, Functor f) => SplitDiff f annotation -> Bool
hasChanges = or . (True <$)
-- | Align a Diff into a list of Join These SplitDiffs representing the (possibly blank) lines on either side.

View File

@ -16,8 +16,8 @@ data Category
| Boolean
-- | A bitwise operator.
| BitwiseOperator
-- | An operator with 2 operands.
| BinaryOperator
-- | A boolean operator (e.g. ||, &&).
| BooleanOperator
-- | A literal key-value data structure.
| DictionaryLiteral
-- | A pair, e.g. of a key & value
@ -104,6 +104,8 @@ data Category
| RelationalOperator
-- | An empty statement. (e.g. ; in JavaScript)
| Empty
-- | A mathematical operator (e.g. +, -, *, /).
| MathOperator
deriving (Eq, Generic, Ord, Show)
-- Instances
@ -115,7 +117,8 @@ instance Arbitrary Category where
pure Program
, pure Error
, pure Boolean
, pure BinaryOperator
, pure BooleanOperator
, pure MathOperator
, pure DictionaryLiteral
, pure Pair
, pure FunctionCall

View File

@ -4,7 +4,6 @@ module Data.Bifunctor.Join.Arbitrary where
import Data.Bifunctor.Join
import Data.Functor.Both as Both
import Data.These
import Data.These.Arbitrary ()
import Prologue
import Test.QuickCheck

View File

@ -24,6 +24,10 @@ fst = Prologue.fst . runJoin
snd :: Both a -> a
snd = Prologue.snd . runJoin
instance Monoid a => Monoid (Join (,) a) where
instance (Semigroup a, Monoid a) => Monoid (Join (,) a) where
mempty = pure mempty
mappend a b = mappend <$> a <*> b
mappend = (<>)
instance (Semigroup a) => Semigroup (Join (,) a) where
a <> b = Join $ runJoin a <> runJoin b

View File

@ -32,7 +32,7 @@ import Test.QuickCheck hiding (Fixed)
import Test.QuickCheck.Random
-- | Given a function comparing two terms recursively, and a function to compute a Hashable label from an unpacked term, compute the diff of a pair of lists of terms using a random walk similarity metric, which completes in log-linear time. This implementation is based on the paper [_RWS-Diff—Flexible and Efficient Change Detection in Hierarchical Data_](https://github.com/github/semantic-diff/files/325837/RWS-Diff.Flexible.and.Efficient.Change.Detection.in.Hierarchical.Data.pdf).
rws :: forall f fields. (Eq (Record fields), Prologue.Foldable f, Functor f, Eq (f (Cofree f (Record fields))), HasField fields (Vector.Vector Double))
rws :: forall f fields. (Foldable f, Functor f, HasField fields (Vector.Vector Double))
=> (Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))))) -- ^ A function which compares a pair of terms recursively, returning 'Just' their diffed value if appropriate, or 'Nothing' if they should not be compared.
-> [Cofree f (Record fields)] -- ^ The list of old terms.
-> [Cofree f (Record fields)] -- ^ The list of new terms.
@ -83,7 +83,7 @@ rws compare as bs
deleteRemaining diffs (_, unmappedA, _) = foldl' (flip (List.insertBy (comparing fst))) diffs ((termIndex &&& deleting . term) <$> unmappedA)
-- | Computes a constant-time approximation to the edit distance of a diff. This is done by comparing at most _m_ nodes, & assuming the rest are zero-cost.
editDistanceUpTo :: (Prologue.Foldable f, Functor f) => Integer -> Free (CofreeF f (Both a)) (Patch (Cofree f a)) -> Int
editDistanceUpTo :: (Foldable f, Functor f) => Integer -> Free (CofreeF f (Both a)) (Patch (Cofree f a)) -> Int
editDistanceUpTo m = diffSum (patchSum termSize) . cutoff m
where diffSum patchCost diff = sum $ fmap (maybe 0 patchCost) diff

View File

@ -1,12 +0,0 @@
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Data.These.Arbitrary where
import Data.These
import Prologue
import Test.QuickCheck
instance (Arbitrary a, Arbitrary b) => Arbitrary (These a b) where
arbitrary = oneof [ This <$> arbitrary
, That <$> arbitrary
, These <$> arbitrary <*> arbitrary ]
shrink = these (fmap This . shrink) (fmap That . shrink) (\ a b -> (This <$> shrink a) ++ (That <$> shrink b) ++ (These <$> shrink a <*> shrink b))

View File

@ -19,26 +19,26 @@ type SyntaxDiff leaf fields = Diff (Syntax leaf) (Record fields)
type instance Base (Free f a) = FreeF f a
instance Functor f => Foldable.Foldable (Free f a) where project = runFree
instance Functor f => Foldable.Unfoldable (Free f a) where embed = free
instance Functor f => Recursive (Free f a) where project = runFree
instance Functor f => Corecursive (Free f a) where embed = free
diffSum :: (Prologue.Foldable f, Functor f) => (Patch (Term f annotation) -> Int) -> Diff f annotation -> Int
diffSum :: (Foldable f, Functor f) => (Patch (Term f annotation) -> Int) -> Diff f annotation -> Int
diffSum patchCost diff = sum $ fmap patchCost diff
-- | The sum of the node count of the diffs patches.
diffCost :: (Prologue.Foldable f, Functor f) => Diff f annotation -> Int
diffCost :: (Foldable f, Functor f) => Diff f annotation -> Int
diffCost = diffSum $ patchSum termSize
-- | Merge a diff using a function to provide the Term (in Maybe, to simplify recovery of the before/after state) for every Patch.
mergeMaybe :: (Functor f, Mergeable f) => (Patch (Term f annotation) -> Maybe (Term f annotation)) -> Diff f annotation -> Maybe (Term f annotation)
mergeMaybe :: Mergeable f => (Patch (Term f annotation) -> Maybe (Term f annotation)) -> Diff f annotation -> Maybe (Term f annotation)
mergeMaybe transform = iter algebra . fmap transform
where algebra :: Mergeable f => TermF f (Both annotation) (Maybe (Term f annotation)) -> Maybe (Term f annotation)
algebra (annotations :< syntax) = cofree . (Both.fst annotations :<) <$> sequenceAlt syntax
-- | Recover the before state of a diff.
beforeTerm :: (Functor f, Mergeable f) => Diff f annotation -> Maybe (Term f annotation)
beforeTerm :: Mergeable f => Diff f annotation -> Maybe (Term f annotation)
beforeTerm = mergeMaybe before
-- | Recover the after state of a diff.
afterTerm :: (Functor f, Mergeable f) => Diff f annotation -> Maybe (Term f annotation)
afterTerm :: Mergeable f => Diff f annotation -> Maybe (Term f annotation)
afterTerm = mergeMaybe after

View File

@ -24,22 +24,21 @@ import Source
data Identifiable a = Identifiable a | Unidentifiable a
isIdentifiable :: (HasCategory leaf, HasField fields Category, HasField fields Range) => SyntaxTerm leaf fields -> Bool
isIdentifiable term =
case unwrap term of
S.FunctionCall _ _ -> True
S.Function{} -> True
S.Assignment{} -> True
S.MathAssignment{} -> True
S.VarAssignment{} -> True
S.SubscriptAccess{} -> True
S.Class _ _ _ -> True
S.Method _ _ _ -> True
S.Leaf _ -> True
_ -> False
identifiable :: (HasCategory leaf, HasField fields Category, HasField fields Range) => SyntaxTerm leaf fields -> Identifiable (SyntaxTerm leaf fields)
identifiable term = if isIdentifiable term then Identifiable term else Unidentifiable term
identifiable :: SyntaxTerm leaf fields -> Identifiable (SyntaxTerm leaf fields)
identifiable term = isIdentifiable (unwrap term) $ term
where isIdentifiable = \case
S.FunctionCall{} -> Identifiable
S.MethodCall{} -> Identifiable
S.Function{} -> Identifiable
S.Assignment{} -> Identifiable
S.MathAssignment{} -> Identifiable
S.VarAssignment{} -> Identifiable
S.SubscriptAccess{} -> Identifiable
S.Class{} -> Identifiable
S.Method{} -> Identifiable
S.Leaf{} -> Identifiable
S.DoWhile{} -> Identifiable
_ -> Unidentifiable
data DiffInfo = LeafInfo { categoryName :: Text, termName :: Text }
| BranchInfo { branches :: [ DiffInfo ], categoryName :: Text, branchType :: Branch }
@ -141,8 +140,8 @@ toTermName source term = case unwrap term of
S.Ternary expr _ -> toTermName' expr
S.MathAssignment id _ -> toTermName' id
S.Operator _ -> termNameFromSource term
S.Object kvs -> "{" <> intercalate ", " (toTermName' <$> kvs) <> "}"
S.Pair a b -> toTermName' a <> ": " <> toTermName' b
S.Object kvs -> "{ " <> intercalate ", " (toTermName' <$> kvs) <> " }"
S.Pair a _ -> toTermName' a <> ": "
S.Return expr -> maybe "empty" toTermName' expr
S.Error _ _ -> termNameFromSource term
S.If expr _ _ -> termNameFromSource expr
@ -162,14 +161,14 @@ toTermName source term = case unwrap term of
termNameFromSource term = termNameFromRange (range term)
termNameFromRange range = toText $ Source.slice range source
range = characterRange . extract
toArgName :: (HasCategory leaf, HasField fields Category, HasField fields Range) => SyntaxTerm leaf fields -> Text
toArgName :: SyntaxTerm leaf fields -> Text
toArgName arg = case identifiable arg of
Identifiable arg -> toTermName' arg
Unidentifiable _ -> "..."
Unidentifiable _ -> ""
maybeParentContext :: Maybe (Category, Text) -> Doc
maybeParentContext = maybe "" (\annotation ->
space <> "in the" <+> (toDoc $ snd annotation) <+> toDoc (toCategoryName $ fst annotation))
space P.<> "in the" <+> (toDoc $ snd annotation) <+> toDoc (toCategoryName $ fst annotation))
toDoc :: Text -> Doc
toDoc = string . toS
@ -222,7 +221,8 @@ instance HasCategory Text where
instance HasCategory Category where
toCategoryName = \case
ArrayLiteral -> "array"
BinaryOperator -> "binary operator"
BooleanOperator -> "boolean operator"
MathOperator -> "math operator"
BitwiseOperator -> "bitwise operator"
RelationalOperator -> "relational operator"
Boolean -> "boolean"
@ -242,7 +242,7 @@ instance HasCategory Category where
C.Case -> "case statement"
C.SubscriptAccess -> "subscript access"
C.MathAssignment -> "math assignment"
C.Ternary -> "ternary"
C.Ternary -> "ternary expression"
C.Operator -> "operator"
Identifier -> "identifier"
IntegerLiteral -> "integer"
@ -270,18 +270,18 @@ instance HasCategory Category where
C.CommaOperator -> "comma operator"
C.Empty -> "empty statement"
instance (HasCategory leaf, HasField fields Category) => HasCategory (SyntaxTerm leaf fields) where
instance HasField fields Category => HasCategory (SyntaxTerm leaf fields) where
toCategoryName = toCategoryName . category . extract
instance Arbitrary Branch where
arbitrary = oneof [ pure BIndexed, pure BFixed ]
shrink = genericShrink
instance (Eq a, Arbitrary a) => Arbitrary (DiffSummary a) where
instance Arbitrary a => Arbitrary (DiffSummary a) where
arbitrary = DiffSummary <$> arbitrary <*> arbitrary
shrink = genericShrink
instance P.Pretty DiffInfo where
pretty LeafInfo{..} = squotes (string $ toSL termName) <+> (string $ toSL categoryName)
pretty BranchInfo{..} = mconcat $ punctuate (string "," <> space) (pretty <$> branches)
pretty BranchInfo{..} = mconcat $ punctuate (string "," P.<> space) (pretty <$> branches)
pretty ErrorInfo{..} = squotes (string $ toSL termName) <+> "at" <+> (string . toSL $ displayStartEndPos errorSpan) <+> "in" <+> (string . toSL $ spanName errorSpan)

View File

@ -1,8 +1,8 @@
{-# LANGUAGE DataKinds, RankNTypes, TypeOperators #-}
module Diffing where
import qualified Prologue
import Prologue hiding (fst, snd)
import Category
import qualified Data.ByteString.Char8 as B1
import Data.Functor.Both
import Data.Functor.Foldable
@ -18,6 +18,7 @@ import Diff
import Info
import Interpreter
import Language
import Language.Markdown
import Parser
import Patch
import Range
@ -26,7 +27,7 @@ import Renderer.JSON
import Renderer.Patch
import Renderer.Split
import Renderer.Summary
import Source hiding ((++))
import Source
import Syntax
import System.Directory
import System.FilePath
@ -35,7 +36,6 @@ import Term
import TreeSitter
import Text.Parser.TreeSitter.Language
import qualified Data.Text as T
import Category
import Data.Aeson (toJSON, toEncoding)
import Data.Aeson.Encoding (encodingToLazyByteString)
@ -43,7 +43,7 @@ import Data.Aeson.Encoding (encodingToLazyByteString)
-- | result.
-- | Returns the rendered result strictly, so it's always fully evaluated
-- | with respect to other IO actions.
diffFiles :: (HasField fields Category, HasField fields Cost, HasField fields Range, Eq (Record fields)) => Parser (Syntax Text) (Record fields) -> Renderer (Record (Vector.Vector Double ': fields)) -> Both SourceBlob -> IO Output
diffFiles :: (HasField fields Category, HasField fields Cost) => Parser (Syntax Text) (Record fields) -> Renderer (Record (Vector.Vector Double ': fields)) -> Both SourceBlob -> IO Output
diffFiles parser renderer sourceBlobs = do
terms <- traverse (fmap (defaultFeatureVectorDecorator getLabel) . parser) sourceBlobs
@ -70,6 +70,7 @@ parserForType :: Text -> Parser (Syntax Text) (Record '[Range, Category])
parserForType mediaType = case languageForType mediaType of
Just C -> treeSitterParser C ts_language_c
Just JavaScript -> treeSitterParser JavaScript ts_language_javascript
Just Markdown -> cmarkParser
Just Ruby -> treeSitterParser Ruby ts_language_ruby
_ -> lineByLineParser
@ -111,7 +112,7 @@ decorateTerm :: Functor f => TermDecorator f fields field -> Cofree f (Record fi
decorateTerm decorator = cata $ \ c -> cofree ((decorator (extract <$> c) .: headF c) :< tailF c)
-- | Term decorator computing the cost of an unpacked term.
termCostDecorator :: (Prologue.Foldable f, Functor f) => TermDecorator f a Cost
termCostDecorator :: (Foldable f, Functor f) => TermDecorator f a Cost
termCostDecorator c = 1 + sum (cost <$> tailF c)
-- | Determine whether two terms are comparable based on the equality of their categories.
@ -125,7 +126,7 @@ diffCostWithCachedTermCosts diff = unCost $ case runFree diff of
Pure patch -> sum (cost . extract <$> patch)
-- | Returns a rendered diff given a parser, diff arguments and two source blobs.
textDiff :: (Eq (Record fields), HasField fields Category, HasField fields Cost, HasField fields Range) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO Output
textDiff :: (HasField fields Category, HasField fields Cost, HasField fields Range) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO Output
textDiff parser arguments = diffFiles parser $ case format arguments of
Split -> split
Patch -> patch
@ -141,7 +142,7 @@ truncatedDiff arguments sources = pure $ case format arguments of
Summary -> SummaryOutput mempty
-- | Prints a rendered diff to stdio or a filepath given a parser, diff arguments and two source blobs.
printDiff :: (Eq (Record fields), HasField fields Category, HasField fields Cost, HasField fields Range) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO ()
printDiff :: (HasField fields Category, HasField fields Cost, HasField fields Range) => Parser (Syntax Text) (Record fields) -> DiffArguments -> Both SourceBlob -> IO ()
printDiff parser arguments sources = do
rendered <- textDiff parser arguments sources
let renderedText = case rendered of

View File

@ -25,7 +25,7 @@ type Comparable f annotation = Term f annotation -> Term f annotation -> Bool
type DiffConstructor f annotation = TermF f (Both annotation) (Diff f annotation) -> Diff f annotation
-- | Diff two terms recursively, given functions characterizing the diffing.
diffTerms :: (Eq leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double))
diffTerms :: (Eq leaf, HasField fields Category, HasField fields (Vector.Vector Double))
=> DiffConstructor (Syntax leaf) (Record fields) -- ^ A function to wrap up & possibly annotate every produced diff.
-> Comparable (Syntax leaf) (Record fields) -- ^ A function to determine whether or not two terms should even be compared.
-> SES.Cost (SyntaxDiff leaf fields) -- ^ A function to compute the cost of a given diff node.
@ -35,7 +35,7 @@ diffTerms :: (Eq leaf, Eq (Record fields), HasField fields Category, HasField fi
diffTerms construct comparable cost a b = fromMaybe (replacing a b) $ diffComparableTerms construct comparable cost a b
-- | Diff two terms recursively, given functions characterizing the diffing. If the terms are incomparable, returns 'Nothing'.
diffComparableTerms :: (Eq leaf, Eq (Record fields), HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor (Syntax leaf) (Record fields) -> Comparable (Syntax leaf) (Record fields) -> SES.Cost (SyntaxDiff leaf fields) -> SyntaxTerm leaf fields -> SyntaxTerm leaf fields -> Maybe (SyntaxDiff leaf fields)
diffComparableTerms :: (Eq leaf, HasField fields Category, HasField fields (Vector.Vector Double)) => DiffConstructor (Syntax leaf) (Record fields) -> Comparable (Syntax leaf) (Record fields) -> SES.Cost (SyntaxDiff leaf fields) -> SyntaxTerm leaf fields -> SyntaxTerm leaf fields -> Maybe (SyntaxDiff leaf fields)
diffComparableTerms construct comparable cost = recur
where recur a b
| (category <$> a) == (category <$> b) = hylo construct runCofree <$> zipTerms a b
@ -71,7 +71,7 @@ algorithmWithTerms construct t1 t2 = case (unwrap t1, unwrap t2) of
branch constructor a b = bySimilarity a b >>= annotate . constructor
-- | Run an algorithm, given functions characterizing the evaluation.
runAlgorithm :: (Functor f, GAlign f, Eq a, Eq (Record fields), Eq (f (Cofree f (Record fields))), Prologue.Foldable f, Traversable f, HasField fields (Vector.Vector Double))
runAlgorithm :: (GAlign f, Traversable f, HasField fields (Vector.Vector Double))
=> (CofreeF f (Both (Record fields)) (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))) -> Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))) -- ^ A function to wrap up & possibly annotate every produced diff.
-> (Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))))) -- ^ A function to diff two subterms recursively, if they are comparable, or else return 'Nothing'.
-> SES.Cost (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))) -- ^ A function to compute the cost of a given diff node.

View File

@ -20,6 +20,7 @@ data Language =
| HTML
| Java
| JavaScript
| Markdown
| ObjectiveC
| Perl
| PHP
@ -35,6 +36,7 @@ languageForType mediaType = case mediaType of
".h" -> Just C
".c" -> Just C
".js" -> Just JavaScript
".md" -> Just Markdown
".rb" -> Just Ruby
_ -> Nothing

View File

@ -88,8 +88,8 @@ categoryForJavaScriptProductionName name = case name of
"undefined" -> Identifier
"arrow_function" -> Function
"generator_function" -> Function
"math_op" -> BinaryOperator -- bitwise operator, e.g. +, -, *, /.
"bool_op" -> BinaryOperator -- boolean operator, e.g. ||, &&.
"math_op" -> MathOperator -- math operator, e.g. +, -, *, /.
"bool_op" -> BooleanOperator -- boolean operator, e.g. ||, &&.
"comma_op" -> CommaOperator -- comma operator, e.g. expr1, expr2.
"delete_op" -> Operator -- delete operator, e.g. delete x[2].
"type_op" -> Operator -- type operator, e.g. typeof Object.

38
src/Language/Markdown.hs Normal file
View File

@ -0,0 +1,38 @@
{-# LANGUAGE DataKinds #-}
module Language.Markdown where
import CMark
import Data.Record
import Data.Text
import Info
import Parser
import Prologue
import Range
import Source
import SourceSpan
import Syntax
cmarkParser :: Parser (Syntax Text) (Record '[Range, Category])
cmarkParser SourceBlob{..} = pure . toTerm (totalRange source) $ commonmarkToNode [ optSourcePos, optSafe ] (toText source)
where toTerm :: Range -> Node -> Cofree (Syntax Text) (Record '[Range, Category])
toTerm within (Node position t children) = let range = maybe within (sourceSpanToRange source . toSpan) position in cofree $ (range .: toCategory t .: RNil) :< case t of
-- Leaves
CODE text -> Leaf text
TEXT text -> Leaf text
CODE_BLOCK _ text -> Leaf text
-- Branches
_ -> Indexed (toTerm range <$> children)
toCategory :: NodeType -> Category
toCategory (TEXT _) = Other "text"
toCategory (CODE _) = Other "code"
toCategory (HTML_BLOCK _) = Other "html"
toCategory (HTML_INLINE _) = Other "html"
toCategory (HEADING _) = Other "heading"
toCategory (LIST (ListAttributes{..})) = Other $ case listType of
BULLET_LIST -> "unordered list"
ORDERED_LIST -> "ordered list"
toCategory (LINK{}) = Other "link"
toCategory (IMAGE{}) = Other "image"
toCategory t = Other (show t)
toSpan PosInfo{..} = SourceSpan "" (SourcePos (pred startLine) (pred startColumn)) (SourcePos (pred endLine) endColumn)

View File

@ -1,7 +1,6 @@
module Prologue
( module X
, lookup
, traceShowId
) where
import Protolude as X
@ -10,10 +9,3 @@ import Data.List (lookup)
import Control.Comonad.Trans.Cofree as X
import Control.Monad.Trans.Free as X
import Control.Comonad as X
import qualified GHC.Show as P
import qualified Debug.Trace as T
{-# WARNING traceShowId "'traceShowId' remains in code" #-}
traceShowId :: P.Show a => a -> a
traceShowId a = T.trace (P.show a) a

View File

@ -13,7 +13,7 @@ import Syntax
-- | A function that will render a diff, given the two source blobs.
type Renderer annotation = Both SourceBlob -> Diff (Syntax Text) annotation -> Output
data DiffArguments = DiffArguments { format :: Format, output :: Maybe FilePath, outputPath :: FilePath }
data DiffArguments = DiffArguments { format :: Format, output :: Maybe FilePath }
deriving (Show)
-- | The available types of diff rendering.

View File

@ -126,14 +126,14 @@ hunks diff blobs = hunksInRows (pure 1) $ alignDiff (source <$> blobs) diff
-- | Given beginning line numbers, turn rows in a split diff into hunks in a
-- | patch.
hunksInRows :: (Prologue.Foldable f, Functor f) => Both (Sum Int) -> [Join These (SplitDiff f annotation)] -> [Hunk (SplitDiff f annotation)]
hunksInRows :: (Foldable f, Functor f) => Both (Sum Int) -> [Join These (SplitDiff f annotation)] -> [Hunk (SplitDiff f annotation)]
hunksInRows start rows = case nextHunk start rows of
Nothing -> []
Just (hunk, rest) -> hunk : hunksInRows (offset hunk <> hunkLength hunk) rest
-- | Given beginning line numbers, return the next hunk and the remaining rows
-- | of the split diff.
nextHunk :: (Prologue.Foldable f, Functor f) => Both (Sum Int) -> [Join These (SplitDiff f annotation)] -> Maybe (Hunk (SplitDiff f annotation), [Join These (SplitDiff f annotation)])
nextHunk :: (Foldable f, Functor f) => Both (Sum Int) -> [Join These (SplitDiff f annotation)] -> Maybe (Hunk (SplitDiff f annotation), [Join These (SplitDiff f annotation)])
nextHunk start rows = case nextChange start rows of
Nothing -> Nothing
Just (offset, change, rest) -> let (changes, rest') = contiguousChanges rest in Just (Hunk offset (change : changes) $ take 3 rest', drop 3 rest')
@ -145,7 +145,7 @@ nextHunk start rows = case nextChange start rows of
-- | Given beginning line numbers, return the number of lines to the next
-- | the next change, and the remaining rows of the split diff.
nextChange :: (Prologue.Foldable f, Functor f) => Both (Sum Int) -> [Join These (SplitDiff f annotation)] -> Maybe (Both (Sum Int), Change (SplitDiff f annotation), [Join These (SplitDiff f annotation)])
nextChange :: (Foldable f, Functor f) => Both (Sum Int) -> [Join These (SplitDiff f annotation)] -> Maybe (Both (Sum Int), Change (SplitDiff f annotation), [Join These (SplitDiff f annotation)])
nextChange start rows = case changeIncludingContext leadingContext afterLeadingContext of
Nothing -> Nothing
Just (change, afterChanges) -> Just (start <> mconcat (rowIncrement <$> skippedContext), change, afterChanges)
@ -155,12 +155,12 @@ nextChange start rows = case changeIncludingContext leadingContext afterLeadingC
-- | Return a Change with the given context and the rows from the begginning of
-- | the given rows that have changes, or Nothing if the first row has no
-- | changes.
changeIncludingContext :: (Prologue.Foldable f, Functor f) => [Join These (SplitDiff f annotation)] -> [Join These (SplitDiff f annotation)] -> Maybe (Change (SplitDiff f annotation), [Join These (SplitDiff f annotation)])
changeIncludingContext :: (Foldable f, Functor f) => [Join These (SplitDiff f annotation)] -> [Join These (SplitDiff f annotation)] -> Maybe (Change (SplitDiff f annotation), [Join These (SplitDiff f annotation)])
changeIncludingContext leadingContext rows = case changes of
[] -> Nothing
_ -> Just (Change leadingContext changes, afterChanges)
where (changes, afterChanges) = span rowHasChanges rows
-- | Whether a row has changes on either side.
rowHasChanges :: (Prologue.Foldable f, Functor f) => Join These (SplitDiff f annotation) -> Bool
rowHasChanges :: (Foldable f, Functor f) => Join These (SplitDiff f annotation) -> Bool
rowHasChanges row = or (hasChanges <$> row)

View File

@ -10,7 +10,7 @@ import Data.Record
import qualified Data.Text.Lazy as TL
import Data.These
import Info
import Prologue hiding (div, head, fst, snd, link)
import Prologue hiding (div, head, fst, snd, link, (<>))
import qualified Prologue
import Renderer
import Source
@ -33,7 +33,8 @@ styleName :: Category -> Text
styleName category = "category-" <> case category of
Program -> "program"
C.Error -> "error"
BinaryOperator -> "binary_operator"
BooleanOperator -> "boolean_operator"
MathOperator -> "math_operator"
BitwiseOperator -> "bitwise_operator"
RelationalOperator -> "relational_operator"
Boolean -> "boolean"
@ -114,9 +115,9 @@ split blobs diff = SplitOutput . TL.toStrict . renderHtml
numberedLinesToMarkup numberedLines = tr $ runBothWith (<>) (renderLine <$> Join (fromThese Nothing Nothing (runJoin (Just <$> numberedLines))) <*> sources) <> string "\n"
renderLine (Just (number, line)) source = toMarkup $ Cell (hasChanges line) number (Renderable source line)
renderLine _ _ =
td mempty ! A.class_ (stringValue "blob-num blob-num-empty empty-cell")
<> td mempty ! A.class_ (stringValue "blob-code blob-code-empty empty-cell")
renderLine _ _
= (td mempty ! A.class_ (stringValue "blob-num blob-num-empty empty-cell"))
<> (td mempty ! A.class_ (stringValue "blob-code blob-code-empty empty-cell"))
<> string "\n"
-- | A cell in a table, characterized by whether it contains changes & its line number.
@ -142,12 +143,12 @@ wrapIn f p = f p
-- Instances
instance (ToMarkup f, HasField fields Category, HasField fields Cost, HasField fields Range) => ToMarkup (Renderable (SyntaxTermF leaf fields (f, Range))) where
instance (ToMarkup f, HasField fields Category, HasField fields Range) => ToMarkup (Renderable (SyntaxTermF leaf fields (f, Range))) where
toMarkup (Renderable source (info :< syntax)) = classifyMarkup (category info) $ case syntax of
Leaf _ -> span . string . toString $ slice (characterRange info) source
_ -> ul . mconcat $ wrapIn li <$> contentElements source (characterRange info) (toList syntax)
instance (HasField fields Category, HasField fields Cost, HasField fields Range) => ToMarkup (Renderable (SyntaxTerm leaf fields)) where
instance (HasField fields Category, HasField fields Range) => ToMarkup (Renderable (SyntaxTerm leaf fields)) where
toMarkup (Renderable source term) = Prologue.fst $ cata (\ t -> (toMarkup $ Renderable source t, characterRange (headF t))) term
instance (HasField fields Category, HasField fields Cost, HasField fields Range) => ToMarkup (Renderable (SplitSyntaxDiff leaf fields)) where
@ -159,7 +160,10 @@ instance (HasField fields Category, HasField fields Cost, HasField fields Range)
| otherwise = identity
instance ToMarkup a => ToMarkup (Cell a) where
toMarkup (Cell hasChanges num line) =
td (string $ show num) ! A.class_ (stringValue $ if hasChanges then "blob-num blob-num-replacement" else "blob-num")
<> td (toMarkup line) ! A.class_ (stringValue $ if hasChanges then "blob-code blob-code-replacement" else "blob-code")
toMarkup (Cell hasChanges num line)
= (td (string (show num)) ! A.class_ (stringValue $ if hasChanges then "blob-num blob-num-replacement" else "blob-num"))
<> (td (toMarkup line) ! A.class_ (stringValue $ if hasChanges then "blob-code blob-code-replacement" else "blob-code"))
<> string "\n"
(<>) :: Monoid m => m -> m -> m
(<>) = mappend

View File

@ -7,6 +7,7 @@ import Data.String
import qualified Data.Vector as Vector
import Numeric
import Range
import SourceSpan
-- | The source, oid, path, and Maybe SourceKind of a blob in a Git repo.
data SourceBlob = SourceBlob { source :: Source Char, oid :: String, path :: FilePath, blobKind :: Maybe SourceKind }
@ -77,24 +78,31 @@ uncons (Source vector) = if null vector then Nothing else Just (Vector.head vect
break :: (a -> Bool) -> Source a -> (Source a, Source a)
break predicate (Source vector) = let (start, remainder) = Vector.break predicate vector in (Source start, Source remainder)
-- | Concatenate two sources.
(++) :: Source a -> Source a -> Source a
(++) (Source a) = Source . (a Vector.++) . getVector
-- | Split the contents of the source after newlines.
actualLines :: Source Char -> [Source Char]
actualLines source | null source = [ source ]
actualLines source = case Source.break (== '\n') source of
(l, lines') -> case uncons lines' of
Nothing -> [ l ]
Just (_, lines') -> (l Source.++ fromList "\n") : actualLines lines'
Just (_, lines') -> (l <> fromList "\n") : actualLines lines'
-- | Compute the line ranges within a given range of a string.
actualLineRanges :: Range -> Source Char -> [Range]
actualLineRanges range = drop 1 . scanl toRange (Range (start range) (start range)) . actualLines . slice range
where toRange previous string = Range (end previous) $ end previous + length string
-- | Compute the character range corresponding to a given SourceSpan within a Source.
sourceSpanToRange :: Source Char -> SourceSpan -> Range
sourceSpanToRange source SourceSpan{..} = Range start end
where start = sumLengths leadingRanges + column spanStart
end = start + sumLengths (take (line spanEnd - line spanStart) remainingRanges) + (column spanEnd - column spanStart)
(leadingRanges, remainingRanges) = splitAt (line spanStart) (actualLineRanges (totalRange source) source)
sumLengths = sum . fmap (\ Range{..} -> end - start)
instance Semigroup (Source a) where
Source a <> Source b = Source (a Vector.++ b)
instance Monoid (Source a) where
mempty = fromList []
mappend = (Source.++)
mappend = (<>)

View File

@ -79,4 +79,4 @@ instance Arbitrary SourcePos where
instance Arbitrary SourceSpan where
arbitrary = SourceSpan <$> arbitrary <*> arbitrary <*> arbitrary
shrink = genericShrink
shrink = genericShrink

View File

@ -18,18 +18,18 @@ type SyntaxTermF leaf fields = TermF (Syntax leaf) (Record fields)
type SyntaxTerm leaf fields = Term (Syntax leaf) (Record fields)
type instance Base (Term f a) = TermF f a
instance Functor f => Foldable.Foldable (Term f a) where project = runCofree
instance Functor f => Foldable.Unfoldable (Term f a) where embed = cofree
instance Functor f => Recursive (Term f a) where project = runCofree
instance Functor f => Corecursive (Term f a) where embed = cofree
-- | Zip two terms by combining their annotations into a pair of annotations.
-- | If the structure of the two terms don't match, then Nothing will be returned.
zipTerms :: (Eq annotation, Traversable f, GAlign f) => Term f annotation -> Term f annotation -> Maybe (Term f (Both annotation))
zipTerms :: (Traversable f, GAlign f) => Term f annotation -> Term f annotation -> Maybe (Term f (Both annotation))
zipTerms t1 t2 = iter go (alignCofreeWith galign (const Nothing) both (These t1 t2))
where go (a :< s) = cofree . (a :<) <$> sequenceA s
-- | Return the node count of a term.
termSize :: (Prologue.Foldable f, Functor f) => Term f annotation -> Int
termSize :: (Foldable f, Functor f) => Term f annotation -> Int
termSize = cata size where
size (_ :< syntax) = 1 + sum syntax

View File

@ -1,7 +1,7 @@
{-# LANGUAGE TypeFamilies #-}
module Term.Arbitrary where
import Data.Functor.Foldable (Base, cata, unfold, Unfoldable(embed))
import Data.Functor.Foldable (Base, cata, unfold, Corecursive(embed))
import Data.Text.Arbitrary ()
import Prologue
import Syntax
@ -27,7 +27,7 @@ arbitraryTermSize = cata (succ . sum) . toTerm
-- Instances
type instance Base (ArbitraryTerm leaf annotation) = TermF (Syntax leaf) annotation
instance Unfoldable (ArbitraryTerm leaf annotation) where embed (a :< s) = ArbitraryTerm a s
instance Corecursive (ArbitraryTerm leaf annotation) where embed (a :< s) = ArbitraryTerm a s
instance (Eq leaf, Eq annotation, Arbitrary leaf, Arbitrary annotation) => Arbitrary (ArbitraryTerm leaf annotation) where
arbitrary = sized $ \ n -> do

View File

@ -2,7 +2,6 @@
module TreeSitter (treeSitterParser) where
import Prologue hiding (Constructor)
import Control.Monad
import Category
import Data.Record
import Language

View File

@ -10,7 +10,7 @@ import Data.Bifunctor.Join
import Data.Bifunctor.Join.Arbitrary ()
import Data.Functor.Both as Both
import Data.List (nub)
import Data.Monoid
import Data.Monoid hiding ((<>))
import Data.Record
import Data.String
import Data.Text.Arbitrary ()
@ -269,7 +269,7 @@ prettyDiff sources = PrettyDiff sources . fmap (fmap ((getRange &&& identity) .
data PrettyDiff a = PrettyDiff { unPrettySources :: Both (Source.Source Char), unPrettyLines :: [Join These (Range, a)] }
deriving Eq
instance Show a => Show (PrettyDiff a) where
instance Show (PrettyDiff a) where
showsPrec _ (PrettyDiff sources lines) = (prettyPrinted ++) -- . (("\n" ++ show lines) ++)
where prettyPrinted = showLine (maximum (0 : (maximum . fmap length <$> shownLines))) <$> shownLines >>= ('\n':)
shownLines = catMaybes $ toBoth <$> lines

View File

@ -5,6 +5,7 @@ import Data.Functor.Both
import Data.Functor.Foldable (cata)
import Data.RandomWalkSimilarity
import Data.Record
import qualified Data.Vector as Vector
import Diff
import Info
import Patch
@ -21,17 +22,16 @@ spec = parallel $ do
let positively = succ . abs
describe "pqGramDecorator" $ do
prop "produces grams with stems of the specified length" $
\ (term, p, q) -> pqGramDecorator (rhead . headF) (positively p) (positively q) (toTerm term :: Term (Syntax Text) (Record '[Text])) `shouldSatisfy` all ((== (positively p)) . length . stem . rhead)
\ (term, p, q) -> pqGramDecorator (rhead . headF) (positively p) (positively q) (toTerm term :: Term (Syntax Text) (Record '[Text])) `shouldSatisfy` all ((== positively p) . length . stem . rhead)
prop "produces grams with bases of the specified width" $
\ (term, p, q) -> pqGramDecorator (rhead . headF) (positively p) (positively q) (toTerm term :: Term (Syntax Text) (Record '[Text])) `shouldSatisfy` all ((== (positively q)) . length . base . rhead)
\ (term, p, q) -> pqGramDecorator (rhead . headF) (positively p) (positively q) (toTerm term :: Term (Syntax Text) (Record '[Text])) `shouldSatisfy` all ((== positively q) . length . base . rhead)
describe "featureVectorDecorator" $ do
prop "produces a vector of the specified dimension" $
\ (term, p, q, d) -> featureVectorDecorator (rhead . headF) (positively p) (positively q) (positively d) (toTerm term :: Term (Syntax Text) (Record '[Text])) `shouldSatisfy` all ((== (positively d)) . length . rhead)
\ (term, p, q, d) -> featureVectorDecorator (rhead . headF) (positively p) (positively q) (positively d) (toTerm term :: Term (Syntax Text) (Record '[Text])) `shouldSatisfy` all ((== positively d) . length . rhead)
describe "rws" $ do
let decorate = defaultFeatureVectorDecorator (category . headF)
let toTerm' = decorate . toTerm
prop "produces correct diffs" . forAll (scale (`div` 4) arbitrary) $
\ (as, bs) -> let tas = toTerm' <$> (as :: [ArbitraryTerm Text (Record '[Category])])
@ -41,7 +41,7 @@ spec = parallel $ do
(beforeTerm diff, afterTerm diff) `shouldBe` (Just (root (stripTerm <$> tas)), Just (root (stripTerm <$> tbs)))
it "produces unbiased insertions within branches" $
let (a, b) = (decorate (cofree ((StringLiteral .: RNil) :< Indexed [ cofree ((StringLiteral .: RNil) :< Leaf "a") ])), decorate (cofree ((StringLiteral .: RNil) :< Indexed [ cofree ((StringLiteral .: RNil) :< Leaf "b") ]))) in
let (a, b) = (decorate (cofree ((StringLiteral .: RNil) :< Indexed [ cofree ((StringLiteral .: RNil) :< Leaf ("a" :: Text)) ])), decorate (cofree ((StringLiteral .: RNil) :< Indexed [ cofree ((StringLiteral .: RNil) :< Leaf "b") ]))) in
fmap stripDiff (rws compare [ b ] [ a, b ]) `shouldBe` fmap stripDiff [ inserting a, copying b ]
where compare :: (HasField fields Category, Functor f, Eq (Cofree f Category)) => Cofree f (Record fields) -> Cofree f (Record fields) -> Maybe (Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields))))
@ -49,3 +49,5 @@ spec = parallel $ do
| otherwise = if ((==) `on` category . extract) a b then Just (replacing a b) else Nothing
copying :: Functor f => Cofree f (Record fields) -> Free (CofreeF f (Both (Record fields))) (Patch (Cofree f (Record fields)))
copying = cata wrap . fmap pure
decorate :: SyntaxTerm leaf '[Category] -> SyntaxTerm leaf '[Vector.Vector Double, Category]
decorate = defaultFeatureVectorDecorator (category . headF)

View File

@ -1,7 +1,8 @@
module RangeSpec where
import Test.Hspec
import Prologue
import Range
import Test.Hspec
spec :: Spec
spec = parallel $ do

53
test/Source/Spec.hs Normal file
View File

@ -0,0 +1,53 @@
module Source.Spec where
import qualified Prelude
import Prologue
import Range
import Source
import SourceSpan
import Test.Hspec
import Test.Hspec.QuickCheck
spec :: Spec
spec = parallel $ do
describe "actualLineRanges" $ do
prop "produces 1 more range than there are newlines" $
\ s -> length (actualLineRanges (totalRange s) (fromList s)) `shouldBe` succ (length (filter (== '\n') s))
prop "produces exhaustive ranges" $
\ s -> let source = fromList s in
foldMap (`slice` source) (actualLineRanges (totalRange s) source) `shouldBe` source
describe "sourceSpanToRange" $ do
prop "computes single-line ranges" $
\ s -> let source = fromList s
spans = zipWith (\ i Range {..} -> SourceSpan "" (SourcePos i 0) (SourcePos i (end - start))) [0..] ranges
ranges = actualLineRanges (totalRange source) source in
sourceSpanToRange source <$> spans `shouldBe` ranges
prop "computes multi-line ranges" $
\ s -> let source = fromList s in
sourceSpanToRange source (totalSpan source) `shouldBe` totalRange source
prop "computes sub-line ranges" $
\ s -> let source = fromList ('*' : s <> "*") in
sourceSpanToRange source (insetSpan (totalSpan source)) `shouldBe` insetRange (totalRange source)
describe "totalSpan" $ do
prop "covers single lines" $
\ n -> totalSpan (fromList (replicate n '*')) `shouldBe` SourceSpan "" (SourcePos 0 0) (SourcePos 0 (max 0 n))
prop "covers multiple lines" $
\ n -> totalSpan (fromList (intersperse '\n' (replicate n '*'))) `shouldBe` SourceSpan "" (SourcePos 0 0) (SourcePos (max 0 (pred n)) (if n > 0 then 1 else 0))
totalSpan :: Source Char -> SourceSpan
totalSpan source = SourceSpan "" (SourcePos 0 0) (SourcePos (pred (length ranges)) (end lastRange - start lastRange))
where ranges = actualLineRanges (totalRange source) source
lastRange = Prelude.last ranges
insetSpan :: SourceSpan -> SourceSpan
insetSpan sourceSpan = sourceSpan { spanStart = (spanStart sourceSpan) { column = succ (column (spanStart sourceSpan)) }
, spanEnd = (spanEnd sourceSpan) { column = pred (column (spanEnd sourceSpan)) } }
insetRange :: Range -> Range
insetRange Range {..} = Range (succ start) (pred end)

View File

@ -9,6 +9,8 @@ import qualified Diff.Spec
import qualified DiffSummarySpec
import qualified InterpreterSpec
import qualified PatchOutputSpec
import qualified RangeSpec
import qualified Source.Spec
import qualified TermSpec
import Test.Hspec
@ -22,4 +24,6 @@ main = hspec . parallel $ do
describe "DiffSummary" DiffSummarySpec.spec
describe "Interpreter" InterpreterSpec.spec
describe "PatchOutput" PatchOutputSpec.spec
describe "Range" RangeSpec.spec
describe "Source" Source.Spec.spec
describe "Term" TermSpec.spec

2
vendor/gitlib vendored

@ -1 +1 @@
Subproject commit 4828dbf14adfc4ce0ac3536f8b192e65828e97bc
Subproject commit 77df9cce6bbc37f36f4554a31b36336ba887fcd2