1
1
mirror of https://github.com/github/semantic.git synced 2024-12-28 09:21:35 +03:00
semantic/src/Alignment.hs

239 lines
13 KiB
Haskell
Raw Normal View History

2016-05-17 19:22:21 +03:00
{-# LANGUAGE RankNTypes #-}
2016-03-11 20:29:17 +03:00
module Alignment
2016-03-14 21:13:06 +03:00
( hasChanges
2016-03-11 20:29:17 +03:00
, numberedRows
2016-03-22 15:20:56 +03:00
, AlignedDiff
, alignDiff
, alignBranch
, applyThese
, modifyJoin
, unionThese
2016-03-11 20:29:17 +03:00
) where
import Control.Applicative
import Control.Arrow ((***))
2016-03-05 04:18:49 +03:00
import Control.Monad
import Data.Align
import Data.Biapplicative
2016-03-18 21:30:22 +03:00
import Data.Bifunctor.Join
2016-03-03 07:11:24 +03:00
import Data.Copointed
2016-03-12 01:59:58 +03:00
import Data.Foldable
import Data.Function
2016-04-04 23:25:16 +03:00
import Data.Functor.Both as Both
import Data.Functor.Foldable hiding (Foldable, fold)
import Data.Functor.Identity
import Data.List (partition)
import Data.Maybe
import Data.Monoid
import qualified Data.OrderedMap as Map
import Data.These
import Diff
2016-03-31 00:26:52 +03:00
import Info
import Patch
import Prologue hiding (fst, snd)
import Range
import Source hiding (break, fromList, uncons, (++))
import SplitDiff
import Syntax
import Term
-- | Assign line numbers to the lines on each side of a list of rows.
numberedRows :: [Join These a] -> [Join These (Int, a)]
numberedRows = countUp (both 1 1)
where countUp from (row : rows) = fromJust ((,) <$> modifyJoin (uncurry These) from `applyThese` row) : countUp (modifyJoin (fromThese identity identity) (succ <$ row) <*> from) rows
2016-03-08 01:13:51 +03:00
countUp _ [] = []
-- | Determine whether a line contains any patches.
hasChanges :: SplitDiff leaf Info -> Bool
hasChanges = or . (True <$)
2016-03-09 09:36:40 +03:00
2016-03-22 15:20:56 +03:00
type AlignedDiff leaf = [Join These (SplitDiff leaf Info)]
alignDiff :: Show leaf => Both (Source Char) -> Diff leaf Info -> AlignedDiff leaf
alignDiff sources diff = iter (alignSyntax (runBothWith ((Join .) . These)) (free . Free) getRange sources) (alignPatch sources <$> diff)
2016-04-15 00:03:53 +03:00
alignPatch :: Show leaf => Both (Source Char) -> Patch (Term leaf Info) -> AlignedDiff leaf
2016-04-15 03:27:24 +03:00
alignPatch sources patch = case patch of
Delete term -> fmap (pure . SplitDelete) <$> hylo (alignSyntax this cofree getRange (Identity (fst sources))) runCofree (Identity <$> term)
Insert term -> fmap (pure . SplitInsert) <$> hylo (alignSyntax that cofree getRange (Identity (snd sources))) runCofree (Identity <$> term)
Replace term1 term2 -> fmap (pure . SplitReplace) <$> alignWith (fmap (these identity identity const . runJoin) . Join)
(hylo (alignSyntax this cofree getRange (Identity (fst sources))) runCofree (Identity <$> term1))
(hylo (alignSyntax that cofree getRange (Identity (snd sources))) runCofree (Identity <$> term2))
where getRange = characterRange . extract
2016-04-15 03:27:24 +03:00
this = Join . This . runIdentity
that = Join . That . runIdentity
-- | The Applicative instance f is either Identity or Both. Identity is for Terms in Patches, Both is for Diffs in unchanged portions of the diff.
alignSyntax :: (Applicative f, Show term) => (forall a. f a -> Join These a) -> (CofreeF (Syntax leaf) Info term -> term) -> (term -> Range) -> f (Source Char) -> CofreeF (Syntax leaf) (f Info) [Join These term] -> [Join These term]
alignSyntax toJoinThese toNode getRange sources (infos :< syntax) = case syntax of
Leaf s -> catMaybes $ wrapInBranch (const (Leaf s)) . fmap (flip (,) []) <$> sequenceL lineRanges
Indexed children -> catMaybes $ wrapInBranch (Indexed . fmap runIdentity) <$> alignBranch getRange (Identity <$> children) (modifyJoin (fromThese [] []) lineRanges)
Fixed children -> catMaybes $ wrapInBranch (Fixed . fmap runIdentity) <$> alignBranch getRange (Identity <$> children) (modifyJoin (fromThese [] []) lineRanges)
2016-05-19 20:38:24 +03:00
Keyed children -> catMaybes $ wrapInBranch (Keyed . Map.fromList) <$> alignBranch getRange (Map.toList children) (modifyJoin (fromThese [] []) lineRanges)
where lineRanges = toJoinThese $ actualLineRanges <$> (characterRange <$> infos) <*> sources
wrapInBranch constructor = applyThese $ toJoinThese ((\ info (range, children) -> toNode (info { characterRange = range } :< constructor children)) <$> infos)
2016-03-22 15:20:56 +03:00
{-
We align asymmetrically since the first child is asymmetrical, and then continue aligning symmetrically afterwards:
[ | [
a |
, b | b
] | ]
The first child is asymmetrical but there is also a symmetrical child on the same line, so we align symmetrically, producing:
[ a, b ] | [ b ]
and not:
[ a, b ] |
| [ b ]
We align the child symmetrically, and thus have to take the first line range on the right asymmetrically so as not to break the childs alignment.
| [
[ b ] | b
| ]
(Eventually, well align the left hand side of this up a line, but that constraint is undecidable for now.)
If a is replaced with b in a Replace patch, we would like to align them side by side (thats what makes it a replacementthey correlate), but a catamorphism which loses the Replace relationship (by splitting it into two SplitReplaces) cant know that theyre related:
[ a ] | [ b ]
If a is deleted and b is coincidentally inserted, we want to separate them, because theyre semantically unrelated:
[ a ] |
| [ b ]
The presence of a symmetrical child forces it to be symmetrical again:
[ a, c ] | [ c, b ]
We might split up children so `This` and `That` arent 1:1 with `Delete` and `Insert`. This is because earlier symmetrical children take precedence over later ones:
[ a, b ] | [ a
| , b
| ]
Lines without children on them are aligned irrespective of their textual content:
[\n | [\n
a\n | a, b\n
,\n | \n
b\n | \n
] | ]
We should avoid taking asymmetrical children greedily so as not to misalign asymmetrical children before symmetrical children on the same line:
| [ a
[ b, c ] | , c
| ]
-}
2016-05-13 23:40:46 +03:00
-- | Given a function to get the range, a list of already-aligned children, and the lists of ranges spanned by a branch, return the aligned lines.
alignBranch :: (Copointed c, Functor c, Show term) => (term -> Range) -> [c [Join These term]] -> Both [Range] -> [Join These (Range, [c term])]
2016-05-25 17:28:56 +03:00
-- The first child is empty, and so can safely be dropped.
alignBranch getRange (first:children) ranges | null (copoint first) = alignBranch getRange children ranges
2016-05-13 23:40:46 +03:00
-- There are no more ranges, so were done.
alignBranch _ [] (Join ([], [])) = []
alignBranch _ children (Join ([], [])) = trace ("exhausted ranges with " ++ show (length children) ++ " children remaining") []
2016-05-13 23:40:46 +03:00
-- There are no more children, so we can just zip the remaining ranges together.
alignBranch _ [] ranges = runBothWith (alignWith Join) (fmap (flip (,) []) <$> ranges)
-- There are both children and ranges, so we need to proceed line by line
alignBranch getRange children ranges = case intersectingChildren of
2016-05-18 21:55:31 +03:00
-- No child intersects the current ranges on either side, so advance.
[] -> (flip (,) [] <$> headRanges) : alignBranch getRange children (drop 1 <$> ranges)
2016-05-18 21:55:31 +03:00
-- At least one child intersects on at least one side.
_ -> case fromThese True True . runJoin . intersectsFirstLine getRange headRanges . copoint <$> listToMaybe remainingIntersectingChildren of
-- At least one child intersects on both sides, so align symmetrically.
Just (True, True) -> let (line, remaining) = lineAndRemaining intersectingChildren headRanges in
line : alignBranch getRange (remaining ++ nonIntersectingChildren) (drop 1 <$> ranges)
-- A symmetrical child intersects on the right, so align asymmetrically on the left.
Just (False, True) -> let (leftLine, remainingAtLeft) = maybe (id, []) (first (:)) $ lineAndRemaining asymmetricalChildren <$> leftRange in
leftLine $ alignBranch getRange (remainingAtLeft ++ remainingIntersectingChildren ++ nonIntersectingChildren) (modifyJoin (first (drop 1)) ranges)
-- A symmetrical child intersects on the left, so align asymmetrically on the right.
Just (True, False) -> let (rightLine, remainingAtRight) = maybe (id, []) (first (:)) $ lineAndRemaining asymmetricalChildren <$> rightRange in
rightLine $ alignBranch getRange (remainingAtRight ++ remainingIntersectingChildren ++ nonIntersectingChildren) (modifyJoin (second (drop 1)) ranges)
-- No symmetrical child intersects, so align asymmetrically, picking the left side first to match the deletion/insertion order convention in diffs.
_ -> if any (maybe False (isThis . runJoin) . head . copoint) asymmetricalChildren
then let (leftLine, remainingAtLeft) = maybe (identity, []) (first (:)) $ lineAndRemaining asymmetricalChildren <$> leftRange in
leftLine $ alignBranch getRange (remainingAtLeft ++ nonIntersectingChildren) (modifyJoin (first (drop 1)) ranges)
else let (rightLine, remainingAtRight) = maybe (identity, []) (first (:)) $ lineAndRemaining asymmetricalChildren <$> rightRange in
rightLine $ alignBranch getRange (remainingAtRight ++ nonIntersectingChildren) (modifyJoin (second (drop 1)) ranges)
where (intersectingChildren, nonIntersectingChildren) = partition (or . intersectsFirstLine getRange headRanges . copoint) children
2016-05-27 17:10:58 +03:00
(remainingIntersectingChildren, asymmetricalChildren) = partition (maybe False (isThese . runJoin) . head . copoint) intersectingChildren
2016-05-27 18:00:43 +03:00
Just headRanges = sequenceL (listToMaybe <$> Join (runBothWith These ranges))
(leftRange, rightRange) = splitThese headRanges
lineAndRemaining children ranges = let (intersections, remaining) = alignChildren getRange children ranges in
((,) <$> ranges `applyToBoth` intersections, remaining)
2016-05-26 21:49:57 +03:00
2016-05-27 17:07:32 +03:00
intersectsFirstLine :: (term -> Range) -> Join These Range -> [Join These term] -> Join These Bool
intersectsFirstLine getRange ranges = maybe (False <$ ranges) (intersects getRange ranges) . listToMaybe
2016-05-17 19:22:21 +03:00
-- | Given a list of aligned children, produce lists of their intersecting first lines, and a list of the remaining lines/nonintersecting first lines.
alignChildren :: (Copointed c, Functor c) => (term -> Range) -> [c [Join These term]] -> Join These Range -> (Both [c term], [c [Join These term]])
alignChildren _ [] _ = (both [] [], [])
alignChildren getRange (first:rest) headRanges
| (firstLine:restOfLines) <- copoint first
, ~(l, r) <- splitThese firstLine
= case fromThese False False . runJoin $ intersectsFirstLine getRange headRanges (copoint first) of
2016-05-27 17:38:20 +03:00
-- It intersects on both sides, so we can just take the first line whole.
(True, True) -> ((++) <$> toTerms firstLine <*> firstRemaining, (restOfLines <$ first) : restRemaining)
-- It only intersects on the left, so split it up.
(True, False) -> ((++) <$> toTerms (fromJust l) <*> firstRemaining, (maybe identity (:) r restOfLines <$ first) : restRemaining)
-- It only intersects on the right, so split it up.
(False, True) -> ((++) <$> toTerms (fromJust r) <*> firstRemaining, (maybe identity (:) l restOfLines <$ first) : restRemaining)
-- It doesnt intersect at all, so skip it and move along.
(False, False) -> (firstRemaining, first:restRemaining)
| otherwise = alignChildren getRange rest headRanges
where (firstRemaining, restRemaining) = alignChildren getRange rest headRanges
toTerms line = modifyJoin (fromThese [] []) (pure . (<$ first) <$> line)
2016-05-17 19:22:21 +03:00
2016-04-15 19:45:33 +03:00
unionThese :: (Alternative f, Foldable f, Monoid (f a)) => f (Join These a) -> Join These (f a)
unionThese as = fromMaybe (Join (These empty empty)) . getUnion . fold $ Union . Just . fmap pure <$> as
2016-04-15 16:35:41 +03:00
-- | Test ranges and terms for intersection on either or both sides.
2016-04-15 03:27:24 +03:00
intersects :: (term -> Range) -> Join These Range -> Join These term -> Join These Bool
intersects getRange ranges line = intersectsRange <$> ranges `applyToBoth` modifyJoin (fromThese (Range (-1) (-1)) (Range (-1) (-1))) (getRange <$> line)
2016-04-15 16:31:28 +03:00
-- | Split a These value up into independent These values representing the left and right sides, if any.
splitThese :: Join These a -> (Maybe (Join These a), Maybe (Join These a))
splitThese these = fromThese Nothing Nothing $ bimap (Just . Join . This) (Just . Join . That) (runJoin these)
infixl 4 `applyThese`
2016-04-15 16:29:23 +03:00
-- | Like `<*>`, but it returns its result in `Maybe` since the result is the intersection of the shapes of the inputs.
applyThese :: Join These (a -> b) -> Join These a -> Maybe (Join These b)
2016-04-15 16:29:23 +03:00
applyThese (Join fg) (Join ab) = fmap Join . uncurry maybeThese $ uncurry (***) (bimap (<*>) (<*>) (unpack fg)) (unpack ab)
where unpack = fromThese Nothing Nothing . bimap Just Just
2016-03-29 16:58:31 +03:00
infixl 4 `applyToBoth`
2016-05-27 17:22:24 +03:00
-- | Like `<*>`, but it takes a `Both` on the right to ensure that it can always return a value.
applyToBoth :: Join These (a -> b) -> Both a -> Join These b
applyToBoth (Join fg) (Join (a, b)) = Join $ these (This . ($ a)) (That . ($ b)) (\ f g -> These (f a) (g b)) fg
2016-04-15 16:30:20 +03:00
-- Map over the bifunctor inside a Join, producing another Join.
modifyJoin :: (p a a -> q b b) -> Join p a -> Join q b
modifyJoin f = Join . f . runJoin
-- | Given a pair of Maybes, produce a These containing Just their values, or Nothing if they havent any.
maybeThese :: Maybe a -> Maybe b -> Maybe (These a b)
maybeThese (Just a) (Just b) = Just (These a b)
maybeThese (Just a) _ = Just (This a)
maybeThese _ (Just b) = Just (That b)
maybeThese _ _ = Nothing
2016-04-15 19:15:47 +03:00
-- | A Monoid wrapping Join These, for which mappend is the smallest shape covering both arguments.
newtype Union a = Union { getUnion :: Maybe (Join These a) }
2016-04-22 17:39:15 +03:00
deriving (Eq, Functor, Show)
2016-04-15 16:23:41 +03:00
2016-04-15 16:26:16 +03:00
-- | Instances
2016-04-15 19:15:47 +03:00
instance Monoid a => Monoid (Union a) where
mempty = Union Nothing
2016-04-15 19:15:47 +03:00
Union (Just a) `mappend` Union (Just b) = Union $ Join <$> uncurry maybeThese (uncurry (***) (bimap mappend mappend (unpack a)) (unpack b))
where unpack = fromThese Nothing Nothing . runJoin . fmap Just
Union (Just a) `mappend` _ = Union $ Just a
Union _ `mappend` Union (Just b) = Union $ Just b
_ `mappend` _ = Union Nothing
2016-04-15 16:26:16 +03:00
instance Bicrosswalk t => Crosswalk (Join t) where
crosswalk f = fmap Join . bicrosswalk f f . runJoin