1
1
mirror of https://github.com/github/semantic.git synced 2024-12-23 06:41:45 +03:00
semantic/src/Range.hs

42 lines
1.7 KiB
Haskell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

module Range where
import Control.Applicative ((<|>))
import qualified Data.Char as Char
data Range = Range { start :: Int, end :: Int }
deriving (Eq, Show)
substring :: Range -> String -> String
substring range = take (end range - start range) . drop (start range)
totalRange :: [a] -> Range
totalRange list = Range 0 $ length list
offsetRange :: Int -> Range -> Range
offsetRange i (Range start end) = Range (i + start) (i + end)
rangesAndWordsFrom :: Int -> String -> [(Range, String)]
rangesAndWordsFrom _ "" = []
rangesAndWordsFrom startIndex string =
case takeAndContinue <$> (parse isWord string <|> parse (not . isWordOrSpace) string) of
Just a -> a
Nothing ->
case parse Char.isSpace string of
Just parsed -> skipAndContinue parsed
Nothing -> []
where
takeAndContinue (parsed, rest) = (Range startIndex $ startIndex + length parsed, parsed) : rangesAndWordsFrom (startIndex + length parsed) rest
skipAndContinue (parsed, rest) = rangesAndWordsFrom (startIndex + length parsed) rest
parse predicate string = case span predicate string of
([], _) -> Nothing
(parsed, rest) -> Just (parsed, rest)
isWordOrSpace c = Char.isSpace c || isWord c
-- | Is this a word character?
-- | Word characters are defined as in [Rubys `\p{Word}` syntax](http://ruby-doc.org/core-2.1.1/Regexp.html#class-Regexp-label-Character+Properties), i.e.:
-- | > A member of one of the following Unicode general category _Letter_, _Mark_, _Number_, _Connector_Punctuation_
isWord c = Char.isLetter c || Char.isNumber c || Char.isMark c || Char.generalCategory c == Char.ConnectorPunctuation
instance Ord Range where
a <= b = start a <= start b