From 835a3a02458d10e200a34175fa5844575d089c67 Mon Sep 17 00:00:00 2001 From: Rob Rix Date: Mon, 14 Dec 2015 16:20:42 -0500 Subject: [PATCH] Parse spans of words & non-word punctuation. --- src/Range.hs | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/Range.hs b/src/Range.hs index 7c68de975..d939d5590 100644 --- a/src/Range.hs +++ b/src/Range.hs @@ -15,10 +15,17 @@ offsetRange :: Int -> Range -> Range offsetRange i (Range start end) = Range (i + start) (i + end) rangesAndWordsFrom :: Int -> String -> [(Range, String)] -rangesAndWordsFrom startIndex string = case break (not . isWord) string of - ([], []) -> [] - ([], rest) | (whitespace, rest) <- break isWord rest -> rangesAndWordsFrom (startIndex + length whitespace) rest - (word, rest) -> (Range startIndex $ startIndex + length word, word) : case break isWord rest of (whitespace, rest) -> rangesAndWordsFrom (startIndex + length word + length whitespace) rest +rangesAndWordsFrom _ "" = [] +rangesAndWordsFrom startIndex string = + case parse isWord string of + Just parsed -> takeAndContinue parsed + Nothing -> + case parse (not . isWordOrSeparator) string of + Just parsed -> takeAndContinue parsed + Nothing -> + case parse Char.isSeparator string of + Just (space, rest) -> rangesAndWordsFrom (startIndex + length space) rest + Nothing -> [] where takeAndContinue (parsed, rest) = (Range startIndex $ startIndex + length parsed, parsed) : rangesAndWordsFrom (startIndex + length parsed) rest parse predicate string = case span predicate string of