2015-12-03 05:40:34 +03:00
module Range where
2015-12-14 20:44:48 +03:00
import qualified Data.Char as Char
2015-12-03 05:40:34 +03:00
data Range = Range { start :: Int , end :: Int }
deriving ( Eq , Show )
substring :: Range -> String -> String
substring range = take ( end range - start range ) . drop ( start range )
2015-12-04 17:20:23 +03:00
totalRange :: [ a ] -> Range
totalRange list = Range 0 $ length list
2015-12-14 20:20:51 +03:00
offsetRange :: Int -> Range -> Range
offsetRange i ( Range start end ) = Range ( i + start ) ( i + end )
2015-12-14 23:37:30 +03:00
rangesAndWordsFrom :: Int -> String -> [ ( Range , String ) ]
2015-12-14 23:31:45 +03:00
rangesAndWordsFrom startIndex string = case break ( not . isWord ) string of
2015-12-14 20:44:48 +03:00
( [] , [] ) -> []
2015-12-14 23:55:02 +03:00
( [] , rest ) | ( whitespace , rest ) <- break isWord rest -> rangesAndWordsFrom ( startIndex + length whitespace ) rest
2015-12-14 23:56:44 +03:00
( word , rest ) -> ( Range startIndex $ startIndex + length word , word ) : case break isWord rest of ( whitespace , rest ) -> rangesAndWordsFrom ( startIndex + length word + length whitespace ) rest
2015-12-14 20:44:48 +03:00
where
2015-12-15 00:20:09 +03:00
takeAndContinue ( parsed , rest ) = ( Range startIndex $ startIndex + length parsed , parsed ) : rangesAndWordsFrom ( startIndex + length parsed ) rest
2015-12-15 00:07:49 +03:00
parse predicate string = case span predicate string of
( [] , _ ) -> Nothing
( parsed , rest ) -> Just ( parsed , rest )
2015-12-15 00:19:21 +03:00
isWordOrSeparator c = Char . isSeparator c || isWord c
2015-12-14 23:02:09 +03:00
-- | Is this a word character?
-- | Word characters are defined as in [Ruby’ s `\p{Word}` syntax](http://ruby-doc.org/core-2.1.1/Regexp.html#class-Regexp-label-Character+Properties), i.e.:
-- | > A member of one of the following Unicode general category _Letter_, _Mark_, _Number_, _Connector_Punctuation_
2015-12-14 23:01:38 +03:00
isWord c = Char . isLetter c || Char . isNumber c || Char . isMark c || Char . generalCategory c == Char . ConnectorPunctuation
2015-12-14 20:44:48 +03:00
2015-12-03 05:40:34 +03:00
instance Ord Range where
a <= b = start a <= start b