1
1
mirror of https://github.com/github/semantic.git synced 2024-12-23 06:41:45 +03:00
semantic/src/Range.hs

64 lines
2.5 KiB
Haskell
Raw Normal View History

{-# LANGUAGE FlexibleInstances #-}
2015-12-03 05:40:34 +03:00
module Range where
2015-12-15 19:48:26 +03:00
import qualified Data.Text as T
import Control.Applicative ((<|>))
2015-12-14 20:44:48 +03:00
import qualified Data.Char as Char
import Data.Maybe (fromMaybe)
import Data.Semigroup
2015-12-14 20:44:48 +03:00
2015-12-21 20:23:21 +03:00
-- | A half-open interval of integers, defined by start & end indices.
2015-12-24 01:41:35 +03:00
data Range = Range { start :: !Int, end :: !Int }
2015-12-03 05:40:34 +03:00
deriving (Eq, Show)
rangeLength :: Range -> Int
rangeLength range = end range - start range
2015-12-15 19:48:26 +03:00
substring :: Range -> T.Text -> T.Text
substring range = T.take (rangeLength range) . T.drop (start range)
2015-12-03 05:40:34 +03:00
2015-12-24 05:39:35 +03:00
sublist :: Range -> [a] -> [a]
sublist range = take (rangeLength range) . drop (start range)
2015-12-24 05:39:35 +03:00
2015-12-15 23:08:24 +03:00
totalRange :: T.Text -> Range
totalRange t = Range 0 $ T.length t
2015-12-14 20:20:51 +03:00
offsetRange :: Int -> Range -> Range
offsetRange i (Range start end) = Range (i + start) (i + end)
rangesAndWordsFrom :: Int -> String -> [(Range, String)]
rangesAndWordsFrom _ "" = []
2016-01-13 20:12:52 +03:00
rangesAndWordsFrom startIndex string = fromMaybe [] $ take <$> (word <|> punctuation) <|> skip <$> space
2015-12-14 20:44:48 +03:00
where
word = parse isWord string
punctuation = parse isPunctuation string
space = parse Char.isSpace string
2016-01-13 20:12:52 +03:00
take (parsed, rest) = (Range startIndex $ endFor parsed, parsed) : rangesAndWordsFrom (endFor parsed) rest
skip (parsed, rest) = rangesAndWordsFrom (endFor parsed) rest
endFor parsed = startIndex + length parsed
2015-12-15 00:07:49 +03:00
parse predicate string = case span predicate string of
([], _) -> Nothing
(parsed, rest) -> Just (parsed, rest)
isPunctuation c = not (Char.isSpace c || isWord c)
2015-12-14 23:02:09 +03:00
-- | Is this a word character?
-- | Word characters are defined as in [Rubys `\p{Word}` syntax](http://ruby-doc.org/core-2.1.1/Regexp.html#class-Regexp-label-Character+Properties), i.e.:
-- | > A member of one of the following Unicode general category _Letter_, _Mark_, _Number_, _Connector_Punctuation_
isWord c = Char.isLetter c || Char.isNumber c || Char.isMark c || Char.generalCategory c == Char.ConnectorPunctuation
2015-12-21 20:22:52 +03:00
-- | Return Just the last index from a non-empty range, or if the range is empty, Nothing.
maybeLastIndex :: Range -> Maybe Int
maybeLastIndex (Range start end) | start == end = Nothing
maybeLastIndex (Range _ end) = Just $ end - 1
2015-12-14 20:44:48 +03:00
2016-01-13 17:49:50 +03:00
unionRange :: Range -> Range -> Range
unionRange (Range start1 end1) (Range start2 end2) = Range (min start1 start2) (max end1 end2)
unionRanges :: (Functor f, Foldable f) => f Range -> Range
2016-01-13 17:53:15 +03:00
unionRanges ranges = option (Range 0 0) id . foldl mappend mempty $ Option . Just <$> ranges
instance Semigroup Range where
(<>) = unionRange
2015-12-03 05:40:34 +03:00
instance Ord Range where
a <= b = start a <= start b