diff --git a/app/Main.hs b/app/Main.hs index d9bebc4a0..751c10985 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -30,10 +30,12 @@ import Data.Tagged import Control.Monad.Reader import System.Environment +-- | The available types of diff rendering. data Renderer = Unified | Split | Patch data Arguments = Arguments { renderer :: Renderer, output :: Maybe FilePath, shaA :: String, shaB :: String, filepaths :: [FilePath] } +-- | A parser for the application's command-line arguments. arguments :: Parser Arguments arguments = Arguments <$> (flag Split Unified (long "unified" <> help "output a unified diff") @@ -94,6 +96,7 @@ printDiff arguments filepath (aSource, bSource) (aTerm, bTerm) = case renderer a where diff = interpret comparable aTerm bTerm write rendered h = TextIO.hPutStr h rendered +-- | Replace every string leaf with leaves of the words in the string. breakDownLeavesByWord :: Source Char -> Term T.Text Info -> Term T.Text Info breakDownLeavesByWord source = cata replaceIn where @@ -102,6 +105,7 @@ breakDownLeavesByWord source = cata replaceIn rangesAndWordsInSource range = rangesAndWordsFrom (start range) (toList $ slice range source) makeLeaf categories (range, substring) = Info range categories :< Leaf (T.pack substring) +-- | Read the file and convert it to Unicode. readAndTranscodeFile :: FilePath -> IO (Source Char) readAndTranscodeFile path = do text <- B1.readFile path diff --git a/src/Range.hs b/src/Range.hs index bb2073219..482757b50 100644 --- a/src/Range.hs +++ b/src/Range.hs @@ -11,48 +11,56 @@ import Data.Semigroup data Range = Range { start :: !Int, end :: !Int } deriving (Eq, Show) +-- | Return the length of the range. rangeLength :: Range -> Int rangeLength range = end range - start range +-- | Return the portion of the text identified by the given range. substring :: Range -> T.Text -> T.Text substring range = T.take (rangeLength range) . T.drop (start range) +-- | Return the portion of the list identified by the given range. sublist :: Range -> [a] -> [a] sublist range = take (rangeLength range) . drop (start range) +-- | Return a range that covers the entire text. totalRange :: T.Text -> Range totalRange t = Range 0 $ T.length t +-- | Return a range that has its start and end offset by the given amount. offsetRange :: Int -> Range -> Range offsetRange i (Range start end) = Range (i + start) (i + end) +-- | Break a string down into words and sequences of punctuation. Return a list +-- | strings with ranges, assuming that the first character in the string is +-- | at the given index. rangesAndWordsFrom :: Int -> String -> [(Range, String)] rangesAndWordsFrom _ "" = [] -rangesAndWordsFrom startIndex string = fromMaybe [] $ takeAndContinue <$> (word <|> punctuation) <|> skipAndContinue <$> space +rangesAndWordsFrom startIndex string = fromMaybe [] $ take isWord <|> take isPunctuation <|> skip Char.isSpace where - word = parse isWord string - punctuation = parse (not . isWordOrSpace) string - space = parse Char.isSpace string - takeAndContinue (parsed, rest) = (Range startIndex $ endFor parsed, parsed) : rangesAndWordsFrom (endFor parsed) rest - skipAndContinue (parsed, rest) = rangesAndWordsFrom (endFor parsed) rest + save parsed = (Range startIndex $ endFor parsed, parsed) + take = parse (Just . save) + skip = parse (const Nothing) endFor parsed = startIndex + length parsed - parse predicate string = case span predicate string of + parse transform predicate = case span predicate string of ([], _) -> Nothing - (parsed, rest) -> Just (parsed, rest) - isWordOrSpace c = Char.isSpace c || isWord c + (parsed, rest) -> Just $ maybe id (:) (transform parsed) $ rangesAndWordsFrom (endFor parsed) rest -- | Is this a word character? -- | Word characters are defined as in [Ruby’s `\p{Word}` syntax](http://ruby-doc.org/core-2.1.1/Regexp.html#class-Regexp-label-Character+Properties), i.e.: -- | > A member of one of the following Unicode general category _Letter_, _Mark_, _Number_, _Connector_Punctuation_ isWord c = Char.isLetter c || Char.isNumber c || Char.isMark c || Char.generalCategory c == Char.ConnectorPunctuation + isPunctuation c = not (Char.isSpace c || isWord c) -- | Return Just the last index from a non-empty range, or if the range is empty, Nothing. maybeLastIndex :: Range -> Maybe Int maybeLastIndex (Range start end) | start == end = Nothing maybeLastIndex (Range _ end) = Just $ end - 1 +-- | Return a range that contains both the given ranges. unionRange :: Range -> Range -> Range unionRange (Range start1 end1) (Range start2 end2) = Range (min start1 start2) (max end1 end2) +-- | Return a range that contains all the ranges in f. unionRanges :: (Functor f, Foldable f) => f Range -> Range unionRanges ranges = option (Range 0 0) id . foldl mappend mempty $ Option . Just <$> ranges diff --git a/src/Source.hs b/src/Source.hs index 9c5630179..6ad1dbbdf 100644 --- a/src/Source.hs +++ b/src/Source.hs @@ -5,42 +5,55 @@ import Range import qualified Data.Vector as Vector import qualified Data.Text as T +-- | The contents of a source file, backed by a vector for efficient slicing. newtype Source a = Source { getVector :: Vector.Vector a } deriving (Eq, Show, Functor, Foldable, Traversable) +-- | Return a Source from a list of items. fromList :: [a] -> Source a fromList = Source . Vector.fromList +-- | Return a Source of Chars from a Text. fromText :: T.Text -> Source Char fromText = Source . Vector.fromList . T.unpack +-- | Return a list of items with the contents of the Source. toList :: Source a -> [a] toList = Vector.toList . getVector +-- | Return a Source that contains a slice of the given Source. slice :: Range -> Source a -> Source a slice range = Source . Vector.slice (start range) (rangeLength range) . getVector +-- | Return a String with the contents of the Source. toString :: Source Char -> String toString = toList +-- | Return the item at the given index. at :: Source a -> Int -> a at = (Vector.!) . getVector +-- | Test whether the source is empty. null :: Source a -> Bool null = Vector.null . getVector +-- | Prepend an item. cons :: a -> Source a -> Source a cons a = Source . Vector.cons a . getVector +-- | Remove the first item and return it with the rest of the source. uncons :: Source a -> Maybe (a, Source a) uncons (Source vector) = if Vector.null vector then Nothing else Just (Vector.head vector, Source $ Vector.tail vector) +-- | Split the source into the longest prefix of elements that do not satisfy the predicate and the rest without copying. break :: (a -> Bool) -> Source a -> (Source a, Source a) break predicate (Source vector) = let (start, remainder) = Vector.break predicate vector in (Source start, Source remainder) +-- | Concatenate two sources. (++) :: Source a -> Source a -> Source a (++) (Source a) = Source . (a Vector.++) . getVector +-- | Split the contents of the source after newlines. actualLines :: Source Char -> [Source Char] actualLines source | Source.null source = [ source ] actualLines source = case Source.break (== '\n') source of diff --git a/src/Term.hs b/src/Term.hs index 0738e1218..64b63a2b2 100644 --- a/src/Term.hs +++ b/src/Term.hs @@ -5,8 +5,11 @@ import Data.Maybe import Control.Comonad.Cofree import Syntax +-- | An annotated node (Syntax) in an abstract syntax tree. type Term a annotation = Cofree (Syntax a) annotation +-- | Zip two terms by combining their annotations into a pair of annotations. +-- | If the structure of the two terms don't match, then Nothing will be returned. zipTerms :: Term a annotation -> Term a annotation -> Maybe (Term a (annotation, annotation)) zipTerms (annotation1 :< a) (annotation2 :< b) = annotate $ zipUnwrap a b where @@ -18,11 +21,13 @@ zipTerms (annotation1 :< a) (annotation2 :< b) = annotate $ zipUnwrap a b zipUnwrap _ _ = Nothing zipUnwrapMaps a' b' key = (,) key <$> zipTerms (a' ! key) (b' ! key) +-- | Fold a term into some other value, starting with the leaves. cata :: (annotation -> Syntax a b -> b) -> Term a annotation -> b cata f (annotation :< syntax) = f annotation $ cata f <$> syntax +-- | Return the number of leaves in the node. termSize :: Term a annotation -> Integer -termSize term = cata size term where +termSize = cata size where size _ (Leaf _) = 1 size _ (Indexed i) = sum i size _ (Fixed f) = sum f