1
1
mirror of https://github.com/github/semantic.git synced 2025-01-02 12:23:08 +03:00

Parse spans of words & non-word punctuation.

This commit is contained in:
Rob Rix 2015-12-14 16:20:42 -05:00
parent be9e4c7a01
commit 835a3a0245

View File

@ -15,10 +15,17 @@ offsetRange :: Int -> Range -> Range
offsetRange i (Range start end) = Range (i + start) (i + end)
rangesAndWordsFrom :: Int -> String -> [(Range, String)]
rangesAndWordsFrom startIndex string = case break (not . isWord) string of
([], []) -> []
([], rest) | (whitespace, rest) <- break isWord rest -> rangesAndWordsFrom (startIndex + length whitespace) rest
(word, rest) -> (Range startIndex $ startIndex + length word, word) : case break isWord rest of (whitespace, rest) -> rangesAndWordsFrom (startIndex + length word + length whitespace) rest
rangesAndWordsFrom _ "" = []
rangesAndWordsFrom startIndex string =
case parse isWord string of
Just parsed -> takeAndContinue parsed
Nothing ->
case parse (not . isWordOrSeparator) string of
Just parsed -> takeAndContinue parsed
Nothing ->
case parse Char.isSeparator string of
Just (space, rest) -> rangesAndWordsFrom (startIndex + length space) rest
Nothing -> []
where
takeAndContinue (parsed, rest) = (Range startIndex $ startIndex + length parsed, parsed) : rangesAndWordsFrom (startIndex + length parsed) rest
parse predicate string = case span predicate string of