1
1
mirror of https://github.com/github/semantic.git synced 2024-12-28 09:21:35 +03:00
semantic/src/Alignment.hs

222 lines
15 KiB
Haskell
Raw Normal View History

2016-03-14 21:13:06 +03:00
{-# LANGUAGE RankNTypes #-}
2016-03-11 20:29:17 +03:00
module Alignment
2016-03-14 21:13:06 +03:00
( hasChanges
2016-03-11 20:29:17 +03:00
, linesInRangeOfSource
, numberedRows
, splitAbstractedTerm
, splitDiffByLines
, Row
2016-03-22 15:20:56 +03:00
, AlignedDiff
, alignDiff
, groupChildrenByLine
2016-03-11 20:29:17 +03:00
) where
import Control.Arrow ((&&&))
import Control.Comonad.Cofree
2016-03-05 04:18:49 +03:00
import Control.Monad
import Control.Monad.Free
2016-03-12 01:59:58 +03:00
import Data.Adjoined
import Data.Align
import Data.Bifunctor
2016-03-18 21:30:22 +03:00
import Data.Bifunctor.Join
import Data.Bifunctor.These
2016-03-14 17:35:31 +03:00
import Data.Coalescent
2016-03-03 07:11:24 +03:00
import Data.Copointed
2016-03-12 01:59:58 +03:00
import Data.Foldable
2016-04-04 23:25:16 +03:00
import Data.Functor.Both as Both
import Data.Functor.Identity
import Data.Maybe
import Data.Monoid
import qualified Data.OrderedMap as Map
2016-03-14 17:35:31 +03:00
import qualified Data.Text as T
import Diff
2016-03-31 00:26:52 +03:00
import Info
import Line
import Patch
2016-03-01 03:39:04 +03:00
import Prelude hiding (fst, snd)
import qualified Prelude
import Range
import Source hiding (fromList, uncons, (++))
import SplitDiff
import Syntax
import Term
-- | Assign line numbers to the lines on each side of a list of rows.
numberedRows :: [Row a] -> [Both (Int, Line a)]
2016-03-08 01:13:51 +03:00
numberedRows = countUp (pure 1)
2016-03-10 08:56:31 +03:00
where countUp from (row : rows) = ((,) <$> from <*> row) : countUp ((+) <$> from <*> (lineIncrement <$> row)) rows
2016-03-08 01:13:51 +03:00
countUp _ [] = []
-- | Determine whether a line contains any patches.
hasChanges :: Line (SplitDiff leaf Info) -> Bool
hasChanges = or . fmap (or . (True <$))
-- | Split a diff, which may span multiple lines, into rows of split diffs paired with the Range of characters spanned by that Row on each side of the diff.
splitDiffByLines :: Both (Source Char) -> Diff leaf Info -> [Row (SplitDiff leaf Info, Range)]
splitDiffByLines sources = toList . iter (\ (Annotated infos syntax) -> splitAbstractedTerm ((Free .) . Annotated) sources infos syntax) . fmap (splitPatchByLines sources)
2016-03-03 00:05:59 +03:00
-- | Split a patch, which may span multiple lines, into rows of split diffs.
splitPatchByLines :: Both (Source Char) -> Patch (Term leaf Info) -> Adjoined (Both (Line (SplitDiff leaf Info, Range)))
splitPatchByLines sources patch = wrapTermInPatch <$> splitAndFoldTerm (unPatch patch)
2016-03-29 16:22:54 +03:00
where splitAndFoldTerm (This deleted) = tsequenceL mempty $ both (runIdentity <$> cata (splitAbstractedTerm (:<) (Identity $ fst sources)) (Identity <$> deleted)) nil
splitAndFoldTerm (That inserted) = tsequenceL mempty $ both nil (runIdentity <$> cata (splitAbstractedTerm (:<) (Identity $ snd sources)) (Identity <$> inserted))
splitAndFoldTerm (These deleted inserted) = tsequenceL mempty $ both (runIdentity <$> cata (splitAbstractedTerm (:<) (Identity $ fst sources)) (Identity <$> deleted)) (runIdentity <$> cata (splitAbstractedTerm (:<) (Identity $ snd sources)) (Identity <$> inserted))
2016-03-14 21:13:06 +03:00
wrapTermInPatch = fmap (fmap (first (Pure . constructor patch)))
constructor (Replace _ _) = SplitReplace
constructor (Insert _) = SplitInsert
constructor (Delete _) = SplitDelete
-- | Split a term comprised of an Info & Syntax up into one `outTerm` (abstracted by an alignment function & constructor) per line in `Source`.
splitAbstractedTerm :: (Applicative f, Coalescent (f (Line (Maybe (Identity outTerm), Range))), Coalescent (f (Line (Maybe (T.Text, outTerm), Range))), Foldable f, TotalCrosswalk f) => (Info -> Syntax leaf outTerm -> outTerm) -> f (Source Char) -> f Info -> Syntax leaf (Adjoined (f (Line (outTerm, Range)))) -> Adjoined (f (Line (outTerm, Range)))
splitAbstractedTerm makeTerm sources infos syntax = case syntax of
2016-03-31 00:26:52 +03:00
Leaf a -> tsequenceL (pure mempty) $ fmap <$> ((\ categories -> fmap (\ range -> (makeTerm (Info range categories) (Leaf a), range))) <$> (Info.categories <$> infos)) <*> (linesInRangeOfSource <$> (characterRange <$> infos) <*> sources)
Indexed children -> adjoinChildren sources infos (constructor (Indexed . fmap runIdentity)) (Identity <$> children)
Fixed children -> adjoinChildren sources infos (constructor (Fixed . fmap runIdentity)) (Identity <$> children)
Keyed children -> adjoinChildren sources infos (constructor (Keyed . Map.fromList)) (Map.toList children)
where constructor with info = makeTerm info . with
2016-03-11 19:54:39 +03:00
-- | Adjoin a branch terms lines, wrapping children & context in branch nodes using a constructor.
adjoinChildren :: (Copointed c, Functor c, Applicative f, Coalescent (f (Line (Maybe (c a), Range))), Foldable f, TotalCrosswalk f) => f (Source Char) -> f Info -> (Info -> [c a] -> outTerm) -> [c (Adjoined (f (Line (a, Range))))] -> Adjoined (f (Line (outTerm, Range)))
2016-03-14 21:57:39 +03:00
adjoinChildren sources infos constructor children = wrap <$> leadingContext <> lines
where (lines, next) = foldr (childLines sources) (mempty, end <$> ranges) children
2016-03-09 12:02:00 +03:00
ranges = characterRange <$> infos
2016-03-31 00:26:52 +03:00
categories = Info.categories <$> infos
2016-03-15 17:58:54 +03:00
leadingContext = tsequenceL (pure mempty) $ makeContextLines <$> (linesInRangeOfSource <$> (Range <$> (start <$> ranges) <*> next) <*> sources)
2016-03-09 12:17:39 +03:00
wrap = (wrapLineContents <$> (makeBranchTerm constructor <$> categories <*> next) <*>)
makeBranchTerm constructor categories next children = let range = unionRangesFrom (rangeAt next) $ Prelude.snd <$> children in
(constructor (Info range categories) . catMaybes . toList $ Prelude.fst <$> children, range)
2016-03-09 12:02:00 +03:00
2016-03-11 20:00:06 +03:00
-- | Accumulate the lines of and between a branch terms children.
childLines :: (Copointed c, Functor c, Applicative f, Coalescent (f (Line (Maybe (c a), Range))), Foldable f, TotalCrosswalk f) => f (Source Char) -> c (Adjoined (f (Line (a, Range)))) -> (Adjoined (f (Line (Maybe (c a), Range))), f Int) -> (Adjoined (f (Line (Maybe (c a), Range))), f Int)
-- We depend on source ranges increasing monotonically. If a child invalidates that, e.g. if its a move in a Keyed node, we dont output rows for it in this iteration. (It will still show up in the diff as context rows.) This works around https://github.com/github/semantic-diff/issues/488.
2016-03-15 15:56:18 +03:00
childLines sources child (nextLines, next) | or ((>) . end <$> childRanges <*> next) = (nextLines, next)
| otherwise = ((makeChildLines <$> copoint child)
2016-03-15 17:58:54 +03:00
<> tsequenceL (pure mempty) (makeContextLines <$> trailingContextLines)
2016-03-15 15:56:18 +03:00
<> nextLines, start <$> childRanges)
where makeChildLines = fmap (fmap (first (Just . (<$ child))))
2016-03-15 17:36:18 +03:00
trailingContextLines = linesInRangeOfSource <$> (Range <$> (end <$> childRanges) <*> next) <*> sources
2016-03-28 11:39:32 +03:00
childRanges = unionRangesFrom <$> (rangeAt <$> next) <*> (concatMap (fmap Prelude.snd . unLine) <$> sequenceA (copoint child))
2016-03-15 17:58:54 +03:00
makeContextLines :: Adjoined (Line Range) -> Adjoined (Line (Maybe a, Range))
2016-03-15 15:54:35 +03:00
makeContextLines = fmap (fmap ((,) Nothing))
2016-03-10 17:08:49 +03:00
-- | Produce open/closed lines for the portion of the source spanned by a range.
2016-03-15 17:58:54 +03:00
linesInRangeOfSource :: Range -> Source Char -> Adjoined (Line Range)
linesInRangeOfSource range source = fromList $ pureBy (openRange source) <$> actualLineRanges range source
2016-03-10 16:53:24 +03:00
-- | Does this Range in this Source end with a newline?
openRange :: Source Char -> Range -> Bool
openRange source range = (at source <$> maybeLastIndex range) /= Just '\n'
2016-03-09 09:36:40 +03:00
-- | A row in a split diff, composed of a before line and an after line.
type Row a = Both (Line a)
2016-03-22 15:20:56 +03:00
type AlignedDiff leaf = [Join These (SplitDiff leaf Info)]
2016-03-18 21:15:49 +03:00
alignPatch :: Both (Source Char) -> Patch (Term leaf Info) -> AlignedDiff leaf
2016-04-05 00:26:40 +03:00
alignPatch sources (Delete term) = hylo (alignSyntax (Join . This . runIdentity) (Identity (fst sources))) unCofree (Identity <$> term)
alignPatch sources (Insert term) = hylo (alignSyntax (Join . That . runIdentity) (Identity (snd sources))) unCofree (Identity <$> term)
alignPatch sources (Replace term1 term2) = alignWith (fmap (these id id const . runJoin) . Join)
(hylo (alignSyntax (Join . This . runIdentity) (Identity (fst sources))) unCofree (Identity <$> term1))
(hylo (alignSyntax (Join . That . runIdentity) (Identity (snd sources))) unCofree (Identity <$> term2))
2016-03-21 20:26:38 +03:00
alignDiff :: Both (Source Char) -> Diff leaf Info -> AlignedDiff leaf
alignDiff sources diff = iter (uncurry (alignSyntax (runBothWith ((Join .) . These)) sources) . (annotation &&& syntax)) (alignPatch sources <$> diff)
2016-04-04 22:49:22 +03:00
alignSyntax :: Applicative f => (forall a. f a -> Join These a) -> f (Source Char) -> f Info -> Syntax leaf (AlignedDiff leaf) -> AlignedDiff leaf
alignSyntax toJoinThese sources infos syntax = case syntax of
Leaf s -> catMaybes $ wrapInBranch (const (Leaf s)) . fmap (flip (,) []) <$> sequenceL lineRanges
Indexed children -> catMaybes $ wrapInBranch Indexed <$> groupChildrenByLine lineRanges children
Fixed children -> catMaybes $ wrapInBranch Fixed <$> groupChildrenByLine lineRanges children
2016-04-04 22:49:22 +03:00
_ -> []
where lineRanges = toJoinThese $ actualLineRanges <$> (characterRange <$> infos) <*> sources
wrapInBranch constructor = applyThese $ toJoinThese ((\ info (range, children) -> Free (Annotated (setCharacterRange info range) (constructor children))) <$> infos)
2016-03-22 15:20:56 +03:00
groupChildrenByLine :: Join These [Range] -> [AlignedDiff leaf] -> [Join These (Range, [SplitDiff leaf Info])]
2016-04-05 00:56:53 +03:00
groupChildrenByLine ranges children | not (and $ null <$> ranges)
, (nextRanges, nextChildren, lines) <- group2 ranges children
= lines ++ groupChildrenByLine nextRanges nextChildren
2016-04-05 00:56:53 +03:00
| otherwise = []
group2 :: Join These [Range] -> [AlignedDiff leaf] -> (Join These [Range], [AlignedDiff leaf], [Join These (Range, [SplitDiff leaf Info])])
group2 ranges children | Just (headRanges, tailRanges) <- unconsThese ranges
, ((firstLine:rest):restOfChildren) <- children
, ~(l, r) <- split firstLine
= case fromThese False False . runJoin $ intersects headRanges firstLine of
(True, True) -> let (moreRanges, moreChildren, remainingLines) = group2 tailRanges (rest:restOfChildren) in
(moreRanges, moreChildren, pairRangesWithLine headRanges (pure <$> firstLine) : remainingLines)
(True, False) -> let (moreRanges, moreChildren, remainingLines) = group2 (modifyJoin (bimap (drop 1) (if null r then id else drop 1)) ranges) ((r ++ rest):restOfChildren) in
(moreRanges, moreChildren, pairRangesWithLine headRanges (mask firstLine $ modifyJoin (uncurry These . fromThese [] []) $ pure <$> head l) : remainingLines)
(False, True) -> let (moreRanges, moreChildren, remainingLines) = group2 (modifyJoin (bimap (if null l then id else drop 1) (drop 1)) ranges) ((l ++ rest):restOfChildren) in
(moreRanges, moreChildren, pairRangesWithLine headRanges (mask firstLine $ modifyJoin (uncurry These . fromThese [] []) $ pure <$> head r) : remainingLines)
_ -> (tailRanges, children, [ flip (,) [] <$> headRanges ])
| ([]:rest) <- children = group2 ranges rest
2016-04-05 00:56:53 +03:00
| otherwise = ([] <$ ranges, children, fmap (flip (,) []) <$> sequenceL ranges)
-- | Partitions and splits a list of children into a tuple consisting of:
2016-04-08 19:06:19 +03:00
-- | - elements which matched; if an element matches only partially this field will contain only the matching side
-- | - the left sides of elements which matched only on the right side
-- | - the right sides of elements which matched only on the left side
2016-04-08 19:06:19 +03:00
-- | - elements which do not intersect.
spanThese :: (Join These a -> Join These Bool) -> [[Join These a]] -> ([[Join These a]], [[Join These a]], [[Join These a]], [[Join These a]])
2016-04-08 19:04:01 +03:00
spanThese pred children | (child:rest) <- children
, not (null child)
, ~(moreChildren, moreL, moreR, moreLines) <- spanThese pred rest
2016-04-08 19:04:01 +03:00
, ~(l, r) <- split (head child)
= case fromThese False False (runJoin (pred (head child))) of
(True, True) -> (child : moreChildren, moreL, moreR, moreLines)
(True, False) -> (l : moreChildren, moreL, r : moreR, moreLines)
(False, True) -> (r : moreChildren, l : moreL, moreR, moreLines)
_ -> ([], [], [], children)
2016-04-08 19:04:01 +03:00
| ([]:rest) <- children = spanThese pred rest
| otherwise = ([], [], [], children)
2016-04-08 19:04:01 +03:00
pairRangesWithLine :: Monoid b => Join These a -> Join These b -> Join These (a, b)
pairRangesWithLine headRanges childLine = fromMaybe (flip (,) mempty <$> headRanges) $ (,) <$> headRanges `applyThese` childLine
mask :: Join These a -> Join These b -> Join These b
mask (Join (This _)) (Join (This b1)) = Join $ This b1
mask (Join (This _)) (Join (These b1 _)) = Join $ This b1
mask (Join (That _)) (Join (That b2)) = Join $ That b2
mask (Join (That _)) (Join (These _ b2)) = Join $ That b2
mask (Join (These _ _)) (Join (This b1)) = Join $ This b1
mask (Join (These _ _)) (Join (That b2)) = Join $ That b2
mask (Join (These _ _)) (Join (These b1 b2)) = Join $ These b1 b2
mask _ b = b
unconsThese :: Join These [a] -> Maybe (Join These a, Join These [a])
unconsThese (Join (This (a:as))) = Just (Join (This a), Join (This as))
unconsThese (Join (That (b:bs))) = Just (Join (That b), Join (That bs))
unconsThese (Join (These (a:as) (b:bs))) = Just (Join (These a b), Join (These as bs))
unconsThese (Join (These (a:as) _)) = Just (Join (This a), Join (This as))
unconsThese (Join (These _ (b:bs))) = Just (Join (That b), Join (That bs))
unconsThese _ = Nothing
getRange :: SplitDiff leaf Info -> Range
getRange (Free (Annotated (Info range _) _)) = range
getRange (Pure patch) | Info range _ :< _ <- getSplitTerm patch = range
2016-04-05 01:05:46 +03:00
intersects :: Join These Range -> Join These (SplitDiff leaf Info) -> Join These Bool
intersects ranges line = fromMaybe (False <$ line) $ intersectsChild <$> ranges `applyThese` line
intersectsChild :: Range -> SplitDiff leaf Info -> Bool
intersectsChild range child = end (getRange child) <= end range
split :: Join These a -> ([Join These a], [Join These a])
split these = fromThese [] [] $ bimap (pure . Join . This) (pure . Join . That) (runJoin these)
infixl 4 `applyThese`
applyThese :: Join These (a -> b) -> Join These a -> Maybe (Join These b)
applyThese (Join (This f)) (Join (This a)) = Just (Join (This (f a)))
applyThese (Join (That g)) (Join (That b)) = Just (Join (That (g b)))
applyThese (Join (These f g)) (Join (These a b)) = Just (Join (These (f a) (g b)))
applyThese (Join (These f _)) (Join (This a)) = Just (Join (This (f a)))
applyThese (Join (These _ g)) (Join (That b)) = Just (Join (That (g b)))
applyThese (Join (This f)) (Join (These a _)) = Just (Join (This (f a)))
applyThese (Join (That g)) (Join (These _ b)) = Just (Join (That (g b)))
applyThese _ _ = Nothing
2016-03-29 16:58:31 +03:00
modifyJoin :: (p a a -> q b b) -> Join p a -> Join q b
modifyJoin f = Join . f . runJoin