Merge origin/master into git-diff

2024-12-24 15:35:14 +03:00 · 2016-01-14 13:44:29 -05:00 · 2016-01-14 13:44:29 -05:00 · 2c3ff936b7
commit 2c3ff936b7
parent 6c69cf4189 50422673b6
4 changed files with 40 additions and 10 deletions
--- a/app/Main.hs
+++ b/app/Main.hs
@ -30,10 +30,12 @@ import Data.Tagged
 import Control.Monad.Reader
 import System.Environment

+-- | The available types of diff rendering.
 data Renderer = Unified | Split | Patch

 data Arguments = Arguments { renderer :: Renderer, output :: Maybe FilePath, shaA :: String, shaB :: String, filepaths :: [FilePath] }

+-- | A parser for the application's command-line arguments.
 arguments :: Parser Arguments
 arguments = Arguments
  <$> (flag Split Unified (long "unified" <> help "output a unified diff")
@ -94,6 +96,7 @@ printDiff arguments filepath (aSource, bSource) (aTerm, bTerm) = case renderer a
  where diff = interpret comparable aTerm bTerm
        write rendered h = TextIO.hPutStr h rendered

+-- | Replace every string leaf with leaves of the words in the string.
 breakDownLeavesByWord :: Source Char -> Term T.Text Info -> Term T.Text Info
 breakDownLeavesByWord source = cata replaceIn
  where
@ -102,6 +105,7 @@ breakDownLeavesByWord source = cata replaceIn
    rangesAndWordsInSource range = rangesAndWordsFrom (start range) (toList $ slice range source)
    makeLeaf categories (range, substring) = Info range categories :< Leaf (T.pack substring)

+-- | Read the file and convert it to Unicode.
 readAndTranscodeFile :: FilePath -> IO (Source Char)
 readAndTranscodeFile path = do
  text <- B1.readFile path
--- a/src/Range.hs
+++ b/src/Range.hs
@ -11,48 +11,56 @@ import Data.Semigroup
 data Range = Range { start :: !Int, end :: !Int }
  deriving (Eq, Show)

+-- | Return the length of the range.
 rangeLength :: Range -> Int
 rangeLength range = end range - start range

+-- | Return the portion of the text identified by the given range.
 substring :: Range -> T.Text -> T.Text
 substring range = T.take (rangeLength range) . T.drop (start range)

+-- | Return the portion of the list identified by the given range.
 sublist :: Range -> [a] -> [a]
 sublist range = take (rangeLength range) . drop (start range)

+-- | Return a range that covers the entire text.
 totalRange :: T.Text -> Range
 totalRange t = Range 0 $ T.length t

+-- | Return a range that has its start and end offset by the given amount.
 offsetRange :: Int -> Range -> Range
 offsetRange i (Range start end) = Range (i + start) (i + end)

+-- | Break a string down into words and sequences of punctuation. Return a list
+-- | strings with ranges, assuming that the first character in the string is
+-- | at the given index.
 rangesAndWordsFrom :: Int -> String -> [(Range, String)]
 rangesAndWordsFrom _ "" = []
-rangesAndWordsFrom startIndex string = fromMaybe [] $ takeAndContinue <$> (word <|> punctuation) <|> skipAndContinue <$> space
+rangesAndWordsFrom startIndex string = fromMaybe [] $ take isWord <|> take isPunctuation <|> skip Char.isSpace
  where
-    word = parse isWord string
-    punctuation = parse (not . isWordOrSpace) string
-    space = parse Char.isSpace string
-    takeAndContinue (parsed, rest) = (Range startIndex $ endFor parsed, parsed) : rangesAndWordsFrom (endFor parsed) rest
-    skipAndContinue (parsed, rest) = rangesAndWordsFrom (endFor parsed) rest
+    save parsed = (Range startIndex $ endFor parsed, parsed)
+    take = parse (Just . save)
+    skip = parse (const Nothing)
    endFor parsed = startIndex + length parsed
-    parse predicate string = case span predicate string of
+    parse transform predicate = case span predicate string of
      ([], _) -> Nothing
-      (parsed, rest) -> Just (parsed, rest)
-    isWordOrSpace c = Char.isSpace c || isWord c
+      (parsed, rest) -> Just $ maybe id (:) (transform parsed) $ rangesAndWordsFrom (endFor parsed) rest
    -- | Is this a word character?
    -- | Word characters are defined as in [Ruby’s `\p{Word}` syntax](http://ruby-doc.org/core-2.1.1/Regexp.html#class-Regexp-label-Character+Properties), i.e.:
    -- | > A member of one of the following Unicode general category _Letter_, _Mark_, _Number_, _Connector_Punctuation_
    isWord c = Char.isLetter c || Char.isNumber c || Char.isMark c || Char.generalCategory c == Char.ConnectorPunctuation
+    isPunctuation c = not (Char.isSpace c || isWord c)

 -- | Return Just the last index from a non-empty range, or if the range is empty, Nothing.
 maybeLastIndex :: Range -> Maybe Int
 maybeLastIndex (Range start end) | start == end = Nothing
 maybeLastIndex (Range _ end) = Just $ end - 1

+-- | Return a range that contains both the given ranges.
 unionRange :: Range -> Range -> Range
 unionRange (Range start1 end1) (Range start2 end2) = Range (min start1 start2) (max end1 end2)

+-- | Return a range that contains all the ranges in f.
 unionRanges :: (Functor f, Foldable f) => f Range -> Range
 unionRanges ranges = option (Range 0 0) id . foldl mappend mempty $ Option . Just <$> ranges

--- a/src/Source.hs
+++ b/src/Source.hs
@ -5,42 +5,55 @@ import Range
 import qualified Data.Vector as Vector
 import qualified Data.Text as T

+-- | The contents of a source file, backed by a vector for efficient slicing.
 newtype Source a = Source { getVector :: Vector.Vector a  }
  deriving (Eq, Show, Functor, Foldable, Traversable)

+-- | Return a Source from a list of items.
 fromList :: [a] -> Source a
 fromList = Source . Vector.fromList

+-- | Return a Source of Chars from a Text.
 fromText :: T.Text -> Source Char
 fromText = Source . Vector.fromList . T.unpack

+-- | Return a list of items with the contents of the Source.
 toList :: Source a -> [a]
 toList = Vector.toList . getVector

+-- | Return a Source that contains a slice of the given Source.
 slice :: Range -> Source a -> Source a
 slice range = Source . Vector.slice (start range) (rangeLength range) . getVector

+-- | Return a String with the contents of the Source.
 toString :: Source Char -> String
 toString = toList

+-- | Return the item at the given  index.
 at :: Source a -> Int -> a
 at = (Vector.!) . getVector

+-- | Test whether the source is empty.
 null :: Source a -> Bool
 null = Vector.null . getVector

+-- | Prepend an item.
 cons :: a -> Source a -> Source a
 cons a = Source . Vector.cons a . getVector

+-- | Remove the first item and return it with the rest of the source.
 uncons :: Source a -> Maybe (a, Source a)
 uncons (Source vector) = if Vector.null vector then Nothing else Just (Vector.head vector, Source $ Vector.tail vector)

+-- | Split the source into the longest prefix of elements that do not satisfy the predicate and the rest without copying.
 break :: (a -> Bool) -> Source a -> (Source a, Source a)
 break predicate (Source vector) = let (start, remainder) = Vector.break predicate vector in (Source start, Source remainder)

+-- | Concatenate two sources.
 (++) :: Source a -> Source a -> Source a
 (++) (Source a) = Source . (a Vector.++) . getVector

+-- | Split the contents of the source after newlines.
 actualLines :: Source Char -> [Source Char]
 actualLines source | Source.null source = [ source ]
 actualLines source = case Source.break (== '\n') source of
--- a/src/Term.hs
+++ b/src/Term.hs
@ -5,8 +5,11 @@ import Data.Maybe
 import Control.Comonad.Cofree
 import Syntax

+-- | An annotated node (Syntax) in an abstract syntax tree.
 type Term a annotation = Cofree (Syntax a) annotation

+-- | Zip two terms by combining their annotations into a pair of annotations.
+-- | If the structure of the two terms don't match, then Nothing will be returned.
 zipTerms :: Term a annotation -> Term a annotation -> Maybe (Term a (annotation, annotation))
 zipTerms (annotation1 :< a) (annotation2 :< b) = annotate $ zipUnwrap a b
  where
@ -18,11 +21,13 @@ zipTerms (annotation1 :< a) (annotation2 :< b) = annotate $ zipUnwrap a b
    zipUnwrap _ _ = Nothing
    zipUnwrapMaps a' b' key = (,) key <$> zipTerms (a' ! key) (b' ! key)

+-- | Fold a term into some other value, starting with the leaves.
 cata :: (annotation -> Syntax a b -> b) -> Term a annotation -> b
 cata f (annotation :< syntax) = f annotation $ cata f <$> syntax

+-- | Return the number of leaves in the node.
 termSize :: Term a annotation -> Integer
-termSize term = cata size term where
+termSize = cata size where
  size _ (Leaf _) = 1
  size _ (Indexed i) = sum i
  size _ (Fixed f) = sum f