mirror of
https://github.com/simonmichael/hledger.git
synced 2024-12-28 12:54:07 +03:00
lib: add alternate regex utilities that don't call error (#1312)
This commit is contained in:
parent
ede4bfd5b4
commit
a112085092
@ -35,7 +35,7 @@ module Hledger.Utils.Regex (
|
|||||||
-- * type aliases
|
-- * type aliases
|
||||||
Regexp
|
Regexp
|
||||||
,Replacement
|
,Replacement
|
||||||
-- * standard regex operations
|
-- * partial regex operations (may call error)
|
||||||
,regexMatches
|
,regexMatches
|
||||||
,regexMatchesCI
|
,regexMatchesCI
|
||||||
,regexReplace
|
,regexReplace
|
||||||
@ -44,9 +44,19 @@ module Hledger.Utils.Regex (
|
|||||||
,regexReplaceCIMemo
|
,regexReplaceCIMemo
|
||||||
,regexReplaceBy
|
,regexReplaceBy
|
||||||
,regexReplaceByCI
|
,regexReplaceByCI
|
||||||
|
-- * total regex operations
|
||||||
|
,regexMatches_
|
||||||
|
,regexMatchesCI_
|
||||||
|
,regexReplace_
|
||||||
|
,regexReplaceCI_
|
||||||
|
,regexReplaceMemo_
|
||||||
|
,regexReplaceCIMemo_
|
||||||
|
,regexReplaceBy_
|
||||||
|
,regexReplaceByCI_
|
||||||
)
|
)
|
||||||
where
|
where
|
||||||
|
|
||||||
|
import Control.Monad (foldM)
|
||||||
import Data.Array
|
import Data.Array
|
||||||
import Data.Char
|
import Data.Char
|
||||||
import Data.List (foldl')
|
import Data.List (foldl')
|
||||||
@ -66,23 +76,14 @@ type Regexp = String
|
|||||||
-- | A replacement pattern. May include numeric backreferences (\N).
|
-- | A replacement pattern. May include numeric backreferences (\N).
|
||||||
type Replacement = String
|
type Replacement = String
|
||||||
|
|
||||||
-- | Convert our string-based Regexp to a real Regex.
|
-- | An regular expression compilation/processing error message.
|
||||||
-- Or if it's not well formed, call error with a "malformed regexp" message.
|
type Error = String
|
||||||
toRegex :: Regexp -> Regex
|
|
||||||
toRegex = memo (compileRegexOrError defaultCompOpt defaultExecOpt) -- PARTIAL:
|
|
||||||
|
|
||||||
-- | Like toRegex but make a case-insensitive Regex.
|
--------------------------------------------------------------------------------
|
||||||
toRegexCI :: Regexp -> Regex
|
-- old partial functions -- PARTIAL:
|
||||||
toRegexCI = memo (compileRegexOrError defaultCompOpt{caseSensitive=False} defaultExecOpt) -- PARTIAL:
|
|
||||||
|
|
||||||
compileRegexOrError :: CompOption -> ExecOption -> Regexp -> Regex
|
|
||||||
compileRegexOrError compopt execopt r =
|
|
||||||
fromMaybe
|
|
||||||
(error' $ "this regular expression could not be compiled: " ++ show r) $ -- PARTIAL:
|
|
||||||
makeRegexOptsM compopt execopt r
|
|
||||||
|
|
||||||
-- regexMatch' :: RegexContext Regexp String a => Regexp -> String -> a
|
-- regexMatch' :: RegexContext Regexp String a => Regexp -> String -> a
|
||||||
-- regexMatch' r s = s =~ (toRegex r)
|
-- regexMatch' r s = s =~ (toRegex' r)
|
||||||
|
|
||||||
regexMatches :: Regexp -> String -> Bool
|
regexMatches :: Regexp -> String -> Bool
|
||||||
regexMatches = flip (=~)
|
regexMatches = flip (=~)
|
||||||
@ -90,13 +91,6 @@ regexMatches = flip (=~)
|
|||||||
regexMatchesCI :: Regexp -> String -> Bool
|
regexMatchesCI :: Regexp -> String -> Bool
|
||||||
regexMatchesCI r = match (toRegexCI r)
|
regexMatchesCI r = match (toRegexCI r)
|
||||||
|
|
||||||
-- | Replace all occurrences of the regexp, transforming each match with the given function.
|
|
||||||
regexReplaceBy :: Regexp -> (String -> String) -> String -> String
|
|
||||||
regexReplaceBy r = replaceAllBy (toRegex r)
|
|
||||||
|
|
||||||
regexReplaceByCI :: Regexp -> (String -> String) -> String -> String
|
|
||||||
regexReplaceByCI r = replaceAllBy (toRegexCI r)
|
|
||||||
|
|
||||||
-- | Replace all occurrences of the regexp with the replacement
|
-- | Replace all occurrences of the regexp with the replacement
|
||||||
-- pattern. The replacement pattern supports numeric backreferences
|
-- pattern. The replacement pattern supports numeric backreferences
|
||||||
-- (\N) but no other RE syntax.
|
-- (\N) but no other RE syntax.
|
||||||
@ -114,36 +108,154 @@ regexReplaceMemo re repl = memo (regexReplace re repl)
|
|||||||
regexReplaceCIMemo :: Regexp -> Replacement -> String -> String
|
regexReplaceCIMemo :: Regexp -> Replacement -> String -> String
|
||||||
regexReplaceCIMemo re repl = memo (regexReplaceCI re repl)
|
regexReplaceCIMemo re repl = memo (regexReplaceCI re repl)
|
||||||
|
|
||||||
--
|
-- | Replace all occurrences of the regexp, transforming each match with the given function.
|
||||||
|
regexReplaceBy :: Regexp -> (String -> String) -> String -> String
|
||||||
|
regexReplaceBy r = replaceAllBy (toRegex r)
|
||||||
|
|
||||||
|
regexReplaceByCI :: Regexp -> (String -> String) -> String -> String
|
||||||
|
regexReplaceByCI r = replaceAllBy (toRegexCI r)
|
||||||
|
|
||||||
|
-- helpers
|
||||||
|
|
||||||
|
-- | Convert our string-based Regexp to a real Regex.
|
||||||
|
-- Or if it's not well formed, call error with a "malformed regexp" message.
|
||||||
|
toRegex :: Regexp -> Regex
|
||||||
|
toRegex = memo (compileRegex defaultCompOpt defaultExecOpt) -- PARTIAL:
|
||||||
|
|
||||||
|
-- | Like toRegex but make a case-insensitive Regex.
|
||||||
|
toRegexCI :: Regexp -> Regex
|
||||||
|
toRegexCI = memo (compileRegex defaultCompOpt{caseSensitive=False} defaultExecOpt) -- PARTIAL:
|
||||||
|
|
||||||
|
compileRegex :: CompOption -> ExecOption -> Regexp -> Regex
|
||||||
|
compileRegex compopt execopt r =
|
||||||
|
fromMaybe
|
||||||
|
(error $ "this regular expression could not be compiled: " ++ show r) $ -- PARTIAL:
|
||||||
|
makeRegexOptsM compopt execopt r
|
||||||
|
|
||||||
replaceRegex :: Regex -> Replacement -> String -> String
|
replaceRegex :: Regex -> Replacement -> String -> String
|
||||||
replaceRegex re repl s = foldl (replaceMatch repl) s (reverse $ match re s :: [MatchText String])
|
replaceRegex re repl s = foldl (replaceMatch repl) s (reverse $ match re s :: [MatchText String])
|
||||||
|
|
||||||
replaceMatch :: Replacement -> String -> MatchText String -> String
|
|
||||||
replaceMatch replpat s matchgroups = pre ++ repl ++ post
|
|
||||||
where
|
where
|
||||||
((_,(off,len)):_) = elems matchgroups -- groups should have 0-based indexes, and there should always be at least one, since this is a match
|
replaceMatch :: Replacement -> String -> MatchText String -> String
|
||||||
(pre, post') = splitAt off s
|
replaceMatch replpat s matchgroups = pre ++ repl ++ post
|
||||||
post = drop len post'
|
where
|
||||||
repl = replaceAllBy (toRegex "\\\\[0-9]+") (replaceBackReference matchgroups) replpat
|
((_,(off,len)):_) = elems matchgroups -- groups should have 0-based indexes, and there should always be at least one, since this is a match
|
||||||
|
(pre, post') = splitAt off s
|
||||||
|
post = drop len post'
|
||||||
|
repl = replaceAllBy (toRegex "\\\\[0-9]+") (lookupMatchGroup matchgroups) replpat
|
||||||
|
where
|
||||||
|
lookupMatchGroup :: MatchText String -> String -> String
|
||||||
|
lookupMatchGroup grps ('\\':s@(_:_)) | all isDigit s =
|
||||||
|
case read s of n | n `elem` indices grps -> fst (grps ! n)
|
||||||
|
-- PARTIAL:
|
||||||
|
_ -> error' $ "no match group exists for backreference \"\\"++s++"\""
|
||||||
|
lookupMatchGroup _ s = error' $ "lookupMatchGroup called on non-numeric-backreference \""++s++"\", shouldn't happen"
|
||||||
|
|
||||||
replaceBackReference :: MatchText String -> String -> String
|
--------------------------------------------------------------------------------
|
||||||
replaceBackReference grps ('\\':s@(_:_)) | all isDigit s =
|
-- new total functions
|
||||||
case read s of n | n `elem` indices grps -> fst (grps ! n)
|
|
||||||
-- PARTIAL:D
|
|
||||||
_ -> error' $ "no match group exists for backreference \"\\"++s++"\""
|
|
||||||
replaceBackReference _ s = error' $ "replaceBackReference called on non-numeric-backreference \""++s++"\", shouldn't happen"
|
|
||||||
|
|
||||||
--
|
regexMatches_ :: Regexp -> String -> Either Error Bool
|
||||||
|
regexMatches_ r s = (`match` s) <$> toRegex_ r
|
||||||
|
|
||||||
-- http://stackoverflow.com/questions/9071682/replacement-substition-with-haskell-regex-libraries :
|
regexMatchesCI_ :: Regexp -> String -> Either Error Bool
|
||||||
-- | Replace all occurrences of a regexp in a string, transforming each match with the given function.
|
regexMatchesCI_ r s = (`match` s) <$> toRegexCI_ r
|
||||||
|
|
||||||
|
-- | Replace all occurrences of the regexp with the replacement
|
||||||
|
-- pattern, or return an error message. The replacement pattern
|
||||||
|
-- supports numeric backreferences (\N) but no other RE syntax.
|
||||||
|
regexReplace_ :: Regexp -> Replacement -> String -> Either Error String
|
||||||
|
regexReplace_ re repl s = toRegex_ re >>= \rx -> replaceRegex_ rx repl s
|
||||||
|
|
||||||
|
regexReplaceCI_ :: Regexp -> Replacement -> String -> Either Error String
|
||||||
|
regexReplaceCI_ re repl s = toRegexCI_ re >>= \rx -> replaceRegex_ rx repl s
|
||||||
|
|
||||||
|
-- | A memoising version of regexReplace_. Caches the result for each
|
||||||
|
-- search pattern, replacement pattern, target string tuple.
|
||||||
|
regexReplaceMemo_ :: Regexp -> Replacement -> String -> Either Error String
|
||||||
|
regexReplaceMemo_ re repl = memo (regexReplace_ re repl)
|
||||||
|
|
||||||
|
regexReplaceCIMemo_ :: Regexp -> Replacement -> String -> Either Error String
|
||||||
|
regexReplaceCIMemo_ re repl = memo (regexReplaceCI_ re repl)
|
||||||
|
|
||||||
|
-- | Replace all occurrences of the regexp, transforming each match
|
||||||
|
-- with the given function, or return an error message.
|
||||||
|
regexReplaceBy_ :: Regexp -> (String -> String) -> String -> Either Error String
|
||||||
|
regexReplaceBy_ r f s = toRegex_ r >>= \rx -> Right $ replaceAllBy rx f s
|
||||||
|
|
||||||
|
regexReplaceByCI_ :: Regexp -> (String -> String) -> String -> Either Error String
|
||||||
|
regexReplaceByCI_ r f s = toRegexCI_ r >>= \rx -> Right $ replaceAllBy rx f s
|
||||||
|
|
||||||
|
-- helpers:
|
||||||
|
|
||||||
|
-- | Convert our string-based Regexp to a real Regex, or return a parse error.
|
||||||
|
toRegex_ :: Regexp -> Either Error Regex
|
||||||
|
toRegex_ = memo (compileRegex_ defaultCompOpt defaultExecOpt)
|
||||||
|
|
||||||
|
-- | Convert our string-based Regexp to a case-insensitive real Regex,
|
||||||
|
-- or return a parse error.
|
||||||
|
toRegexCI_ :: Regexp -> Either Error Regex
|
||||||
|
toRegexCI_ = memo (compileRegex_ defaultCompOpt{caseSensitive=False} defaultExecOpt)
|
||||||
|
|
||||||
|
compileRegex_ :: CompOption -> ExecOption -> Regexp -> Either Error Regex
|
||||||
|
compileRegex_ compopt execopt r =
|
||||||
|
maybe (Left $ "this regular expression could not be compiled: " ++ show r) Right $
|
||||||
|
makeRegexOptsM compopt execopt r
|
||||||
|
|
||||||
|
-- Replace this regular expression with this replacement pattern in this string, or return an error message.
|
||||||
|
replaceRegex_ :: Regex -> Replacement -> String -> Either Error String
|
||||||
|
replaceRegex_ re repl s = foldM (replaceMatch_ repl) s (reverse $ match re s :: [MatchText String])
|
||||||
|
where
|
||||||
|
-- Replace one match within the string with the replacement text
|
||||||
|
-- appropriate for this match. Or return an error message.
|
||||||
|
replaceMatch_ :: Replacement -> String -> MatchText String -> Either Error String
|
||||||
|
replaceMatch_ replpat s matchgroups =
|
||||||
|
erepl >>= \repl -> Right $ pre ++ repl ++ post
|
||||||
|
where
|
||||||
|
((_,(off,len)):_) = elems matchgroups -- groups should have 0-based indexes, and there should always be at least one, since this is a match
|
||||||
|
(pre, post') = splitAt off s
|
||||||
|
post = drop len post'
|
||||||
|
-- The replacement text: the replacement pattern with all
|
||||||
|
-- numeric backreferences replaced by the appropriate groups
|
||||||
|
-- from this match. Or an error message.
|
||||||
|
erepl = toRegex_ "\\\\[0-9]+" >>= \rx -> replaceAllByM rx (lookupMatchGroup_ matchgroups) replpat
|
||||||
|
where
|
||||||
|
-- Given some match groups and a numeric backreference,
|
||||||
|
-- return the referenced group text, or an error message.
|
||||||
|
lookupMatchGroup_ :: MatchText String -> String -> Either Error String
|
||||||
|
lookupMatchGroup_ grps ('\\':s@(_:_)) | all isDigit s =
|
||||||
|
case read s of n | n `elem` indices grps -> Right $ fst (grps ! n)
|
||||||
|
_ -> Left $ "no match group exists for backreference \"\\"++s++"\""
|
||||||
|
lookupMatchGroup_ _ s = Left $ "lookupMatchGroup called on non-numeric-backreference \""++s++"\", shouldn't happen"
|
||||||
|
|
||||||
|
-- helpers
|
||||||
|
|
||||||
|
-- Adapted from http://stackoverflow.com/questions/9071682/replacement-substition-with-haskell-regex-libraries:
|
||||||
|
|
||||||
|
-- | Replace all occurrences of a regexp in a string, transforming each match
|
||||||
|
-- with the given function.
|
||||||
replaceAllBy :: Regex -> (String -> String) -> String -> String
|
replaceAllBy :: Regex -> (String -> String) -> String -> String
|
||||||
replaceAllBy re f s = start end
|
replaceAllBy re transform s = prependdone rest
|
||||||
where
|
where
|
||||||
(_, end, start) = foldl' go (0, s, id) $ (getAllMatches $ match re s :: [(Int, Int)])
|
(_, rest, prependdone) = foldl' go (0, s, id) matches
|
||||||
go (ind,read,write) (off,len) =
|
where
|
||||||
let (skip, start) = splitAt (off - ind) read
|
matches = getAllMatches $ match re s :: [(Int, Int)] -- offset and length
|
||||||
(matched, remaining) = splitAt len start
|
go :: (Int,String,String->String) -> (Int,Int) -> (Int,String,String->String)
|
||||||
in (off + len, remaining, write . (skip++) . (f matched ++))
|
go (pos,todo,prepend) (off,len) =
|
||||||
|
let (prematch, matchandrest) = splitAt (off - pos) todo
|
||||||
|
(matched, rest) = splitAt len matchandrest
|
||||||
|
in (off + len, rest, prepend . (prematch++) . (transform matched ++))
|
||||||
|
|
||||||
|
-- | Replace all occurrences of a regexp in a string, transforming each match
|
||||||
|
-- with the given monadic transform function. Eg if the monad is Either, a
|
||||||
|
-- Left result from the transform function short-circuits and is returned as
|
||||||
|
-- the overall result.
|
||||||
|
replaceAllByM :: forall m. Monad m => Regex -> (String -> m String) -> String -> m String
|
||||||
|
replaceAllByM re transform s =
|
||||||
|
foldM go (0, s, id) matches >>= \(_, rest, prependdone) -> pure $ prependdone rest
|
||||||
|
where
|
||||||
|
matches = getAllMatches $ match re s :: [(Int, Int)] -- offset and length
|
||||||
|
go :: (Int,String,String->String) -> (Int,Int) -> m (Int,String,String->String)
|
||||||
|
go (pos,todo,prepend) (off,len) =
|
||||||
|
let (prematch, matchandrest) = splitAt (off - pos) todo
|
||||||
|
(matched, rest) = splitAt len matchandrest
|
||||||
|
in transform matched >>= \matched' -> pure (off + len, rest, prepend . (prematch++) . (matched' ++))
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user