mirror of
https://github.com/github/semantic.git
synced 2024-12-22 14:21:31 +03:00
Parse spans of words & non-word punctuation.
This commit is contained in:
parent
be9e4c7a01
commit
835a3a0245
15
src/Range.hs
15
src/Range.hs
@ -15,10 +15,17 @@ offsetRange :: Int -> Range -> Range
|
|||||||
offsetRange i (Range start end) = Range (i + start) (i + end)
|
offsetRange i (Range start end) = Range (i + start) (i + end)
|
||||||
|
|
||||||
rangesAndWordsFrom :: Int -> String -> [(Range, String)]
|
rangesAndWordsFrom :: Int -> String -> [(Range, String)]
|
||||||
rangesAndWordsFrom startIndex string = case break (not . isWord) string of
|
rangesAndWordsFrom _ "" = []
|
||||||
([], []) -> []
|
rangesAndWordsFrom startIndex string =
|
||||||
([], rest) | (whitespace, rest) <- break isWord rest -> rangesAndWordsFrom (startIndex + length whitespace) rest
|
case parse isWord string of
|
||||||
(word, rest) -> (Range startIndex $ startIndex + length word, word) : case break isWord rest of (whitespace, rest) -> rangesAndWordsFrom (startIndex + length word + length whitespace) rest
|
Just parsed -> takeAndContinue parsed
|
||||||
|
Nothing ->
|
||||||
|
case parse (not . isWordOrSeparator) string of
|
||||||
|
Just parsed -> takeAndContinue parsed
|
||||||
|
Nothing ->
|
||||||
|
case parse Char.isSeparator string of
|
||||||
|
Just (space, rest) -> rangesAndWordsFrom (startIndex + length space) rest
|
||||||
|
Nothing -> []
|
||||||
where
|
where
|
||||||
takeAndContinue (parsed, rest) = (Range startIndex $ startIndex + length parsed, parsed) : rangesAndWordsFrom (startIndex + length parsed) rest
|
takeAndContinue (parsed, rest) = (Range startIndex $ startIndex + length parsed, parsed) : rangesAndWordsFrom (startIndex + length parsed) rest
|
||||||
parse predicate string = case span predicate string of
|
parse predicate string = case span predicate string of
|
||||||
|
Loading…
Reference in New Issue
Block a user