mirror of
https://github.com/github/semantic.git
synced 2024-12-23 06:41:45 +03:00
42 lines
1.7 KiB
Haskell
42 lines
1.7 KiB
Haskell
module Range where
|
||
|
||
import Control.Applicative ((<|>))
|
||
import qualified Data.Char as Char
|
||
|
||
data Range = Range { start :: Int, end :: Int }
|
||
deriving (Eq, Show)
|
||
|
||
substring :: Range -> String -> String
|
||
substring range = take (end range - start range) . drop (start range)
|
||
|
||
totalRange :: [a] -> Range
|
||
totalRange list = Range 0 $ length list
|
||
|
||
offsetRange :: Int -> Range -> Range
|
||
offsetRange i (Range start end) = Range (i + start) (i + end)
|
||
|
||
rangesAndWordsFrom :: Int -> String -> [(Range, String)]
|
||
rangesAndWordsFrom _ "" = []
|
||
rangesAndWordsFrom startIndex string =
|
||
case takeAndContinue <$> (parse isWord string <|> parse (not . isWordOrSpace) string) of
|
||
Just a -> a
|
||
Nothing ->
|
||
case parse Char.isSpace string of
|
||
Just parsed -> skipAndContinue parsed
|
||
Nothing -> []
|
||
where
|
||
takeAndContinue (parsed, rest) = (Range startIndex $ startIndex + length parsed, parsed) : rangesAndWordsFrom (startIndex + length parsed) rest
|
||
skipAndContinue (parsed, rest) = rangesAndWordsFrom (startIndex + length parsed) rest
|
||
parse predicate string = case span predicate string of
|
||
([], _) -> Nothing
|
||
(parsed, rest) -> Just (parsed, rest)
|
||
isWordOrSpace c = Char.isSpace c || isWord c
|
||
-- | Is this a word character?
|
||
-- | Word characters are defined as in [Ruby’s `\p{Word}` syntax](http://ruby-doc.org/core-2.1.1/Regexp.html#class-Regexp-label-Character+Properties), i.e.:
|
||
-- | > A member of one of the following Unicode general category _Letter_, _Mark_, _Number_, _Connector_Punctuation_
|
||
isWord c = Char.isLetter c || Char.isNumber c || Char.isMark c || Char.generalCategory c == Char.ConnectorPunctuation
|
||
|
||
|
||
instance Ord Range where
|
||
a <= b = start a <= start b
|