2015-12-03 05:40:34 +03:00
module Range where
2015-12-15 00:23:09 +03:00
import Control.Applicative ( ( <|> ) )
2015-12-14 20:44:48 +03:00
import qualified Data.Char as Char
2015-12-03 05:40:34 +03:00
data Range = Range { start :: Int , end :: Int }
deriving ( Eq , Show )
substring :: Range -> String -> String
substring range = take ( end range - start range ) . drop ( start range )
2015-12-04 17:20:23 +03:00
totalRange :: [ a ] -> Range
totalRange list = Range 0 $ length list
2015-12-14 20:20:51 +03:00
offsetRange :: Int -> Range -> Range
offsetRange i ( Range start end ) = Range ( i + start ) ( i + end )
2015-12-14 23:37:30 +03:00
rangesAndWordsFrom :: Int -> String -> [ ( Range , String ) ]
2015-12-15 00:20:42 +03:00
rangesAndWordsFrom _ " " = []
rangesAndWordsFrom startIndex string =
2015-12-15 00:24:03 +03:00
case parse isWord string <|> parse ( not . isWordOrSeparator ) string of
2015-12-15 00:20:42 +03:00
Just parsed -> takeAndContinue parsed
Nothing ->
2015-12-15 00:25:51 +03:00
case parse Char . isSpace string of
2015-12-15 00:23:09 +03:00
Just ( space , rest ) -> rangesAndWordsFrom ( startIndex + length space ) rest
Nothing -> []
2015-12-14 20:44:48 +03:00
where
2015-12-15 00:20:09 +03:00
takeAndContinue ( parsed , rest ) = ( Range startIndex $ startIndex + length parsed , parsed ) : rangesAndWordsFrom ( startIndex + length parsed ) rest
2015-12-15 00:07:49 +03:00
parse predicate string = case span predicate string of
( [] , _ ) -> Nothing
( parsed , rest ) -> Just ( parsed , rest )
2015-12-15 00:19:21 +03:00
isWordOrSeparator c = Char . isSeparator c || isWord c
2015-12-14 23:02:09 +03:00
-- | Is this a word character?
-- | Word characters are defined as in [Ruby’ s `\p{Word}` syntax](http://ruby-doc.org/core-2.1.1/Regexp.html#class-Regexp-label-Character+Properties), i.e.:
-- | > A member of one of the following Unicode general category _Letter_, _Mark_, _Number_, _Connector_Punctuation_
2015-12-14 23:01:38 +03:00
isWord c = Char . isLetter c || Char . isNumber c || Char . isMark c || Char . generalCategory c == Char . ConnectorPunctuation
2015-12-14 20:44:48 +03:00
2015-12-03 05:40:34 +03:00
instance Ord Range where
a <= b = start a <= start b