2015-12-03 05:40:34 +03:00
module Range where
2015-12-15 00:23:09 +03:00
import Control.Applicative ( ( <|> ) )
2015-12-14 20:44:48 +03:00
import qualified Data.Char as Char
2015-12-03 05:40:34 +03:00
data Range = Range { start :: Int , end :: Int }
deriving ( Eq , Show )
substring :: Range -> String -> String
substring range = take ( end range - start range ) . drop ( start range )
2015-12-04 17:20:23 +03:00
totalRange :: [ a ] -> Range
totalRange list = Range 0 $ length list
2015-12-14 20:20:51 +03:00
offsetRange :: Int -> Range -> Range
offsetRange i ( Range start end ) = Range ( i + start ) ( i + end )
2015-12-14 23:37:30 +03:00
rangesAndWordsFrom :: Int -> String -> [ ( Range , String ) ]
2015-12-15 00:20:42 +03:00
rangesAndWordsFrom _ " " = []
2015-12-15 00:37:39 +03:00
rangesAndWordsFrom startIndex string = maybe [] id $ takeAndContinue <$> ( word <|> punctuation ) <|> skipAndContinue <$> space
2015-12-14 20:44:48 +03:00
where
2015-12-15 00:32:24 +03:00
word = parse isWord string
punctuation = parse ( not . isWordOrSpace ) string
space = parse Char . isSpace string
2015-12-15 00:20:09 +03:00
takeAndContinue ( parsed , rest ) = ( Range startIndex $ startIndex + length parsed , parsed ) : rangesAndWordsFrom ( startIndex + length parsed ) rest
2015-12-15 00:27:36 +03:00
skipAndContinue ( parsed , rest ) = rangesAndWordsFrom ( startIndex + length parsed ) rest
2015-12-15 00:07:49 +03:00
parse predicate string = case span predicate string of
( [] , _ ) -> Nothing
( parsed , rest ) -> Just ( parsed , rest )
2015-12-15 00:31:29 +03:00
isWordOrSpace c = Char . isSpace c || isWord c
2015-12-14 23:02:09 +03:00
-- | Is this a word character?
-- | Word characters are defined as in [Ruby’ s `\p{Word}` syntax](http://ruby-doc.org/core-2.1.1/Regexp.html#class-Regexp-label-Character+Properties), i.e.:
-- | > A member of one of the following Unicode general category _Letter_, _Mark_, _Number_, _Connector_Punctuation_
2015-12-14 23:01:38 +03:00
isWord c = Char . isLetter c || Char . isNumber c || Char . isMark c || Char . generalCategory c == Char . ConnectorPunctuation
2015-12-14 20:44:48 +03:00
2015-12-03 05:40:34 +03:00
instance Ord Range where
a <= b = start a <= start b