hledger/hledger-lib/Hledger/Utils/String.hs
2023-03-16 14:48:59 -10:00

242 lines
7.7 KiB
Haskell

-- | String formatting helpers, starting to get a bit out of control.
module Hledger.Utils.String (
takeEnd,
-- * misc
lowercase,
uppercase,
underline,
stripbrackets,
-- quoting
quoteIfNeeded,
singleQuoteIfNeeded,
quoteForCommandLine,
-- quotechars,
-- whitespacechars,
words',
unwords',
stripAnsi,
-- * single-line layout
strip,
lstrip,
rstrip,
strip1Char,
stripBy,
strip1By,
chomp,
chomp1,
singleline,
elideLeft,
elideRight,
formatString,
-- * wide-character-aware layout
charWidth,
strWidth,
strWidthAnsi,
takeWidth,
) where
import Data.Char (isSpace, toLower, toUpper)
import Data.List (intercalate, dropWhileEnd)
import qualified Data.Text as T
import Text.Megaparsec ((<|>), between, many, noneOf, sepBy)
import Text.Megaparsec.Char (char)
import Text.Printf (printf)
import Hledger.Utils.Parse
import Hledger.Utils.Regex (toRegex', regexReplace)
import Text.DocLayout (charWidth, realLength)
-- | Take elements from the end of a list.
takeEnd n l = go (drop n l) l
where
go (_:xs) (_:ys) = go xs ys
go [] r = r
go _ [] = []
lowercase, uppercase :: String -> String
lowercase = map toLower
uppercase = map toUpper
-- | Remove leading and trailing whitespace.
strip :: String -> String
strip = lstrip . rstrip
-- | Remove leading whitespace.
lstrip :: String -> String
lstrip = dropWhile isSpace
-- | Remove trailing whitespace.
rstrip :: String -> String
rstrip = reverse . lstrip . reverse
-- | Strip the given starting and ending character
-- from the start and end of a string if both are present.
strip1Char :: Char -> Char -> String -> String
strip1Char b e s = case s of
(c:cs) | c==b, not $ null cs, last cs==e -> init cs
_ -> s
-- | Strip a run of zero or more characters matching the predicate
-- from the start and end of a string.
stripBy :: (Char -> Bool) -> String -> String
stripBy f = dropWhileEnd f . dropWhile f
-- | Strip a single balanced enclosing pair of a character matching the predicate
-- from the start and end of a string.
strip1By :: (Char -> Bool) -> String -> String
strip1By f s = case s of
(c:cs) | f c, not $ null cs, last cs==c -> init cs
_ -> s
-- | Remove all trailing newlines/carriage returns.
chomp :: String -> String
chomp = reverse . dropWhile (`elem` "\r\n") . reverse
-- | Remove all trailing newline/carriage returns, leaving just one trailing newline.
chomp1 :: String -> String
chomp1 = (++"\n") . chomp
-- | Remove consecutive line breaks, replacing them with single space
singleline :: String -> String
singleline = unwords . filter (/="") . (map strip) . lines
stripbrackets :: String -> String
stripbrackets = dropWhile (`elem` "([") . reverse . dropWhile (`elem` "])") . reverse :: String -> String
elideLeft :: Int -> String -> String
elideLeft width s =
if length s > width then ".." ++ takeEnd (width - 2) s else s
elideRight :: Int -> String -> String
elideRight width s =
if length s > width then take (width - 2) s ++ ".." else s
-- | Clip and pad a string to a minimum & maximum width, and/or left/right justify it.
-- Works on multi-line strings too (but will rewrite non-unix line endings).
formatString :: Bool -> Maybe Int -> Maybe Int -> String -> String
formatString leftJustified minwidth maxwidth s = intercalate "\n" $ map (printf fmt) $ lines s
where
justify = if leftJustified then "-" else ""
minwidth' = maybe "" show minwidth
maxwidth' = maybe "" (("."++).show) maxwidth
fmt = "%" ++ justify ++ minwidth' ++ maxwidth' ++ "s"
underline :: String -> String
underline s = s' ++ replicate (length s) '-' ++ "\n"
where s'
| last s == '\n' = s
| otherwise = s ++ "\n"
-- | Double-quote this string if it contains whitespace, single quotes
-- or double-quotes, escaping the quotes as needed.
quoteIfNeeded :: String -> String
quoteIfNeeded s | any (`elem` s) (quotechars++whitespacechars++redirectchars) = showChar '"' $ escapeQuotes s "\""
| otherwise = s
where
escapeQuotes [] x = x
escapeQuotes ('"':cs) x = showString "\\\"" $ escapeQuotes cs x
escapeQuotes (c:cs) x = showChar c $ escapeQuotes cs x
-- | Single-quote this string if it contains whitespace or double-quotes.
-- Does not work for strings containing single quotes.
singleQuoteIfNeeded :: String -> String
singleQuoteIfNeeded s | any (`elem` s) (quotechars++whitespacechars) = singleQuote s
| otherwise = s
-- | Prepend and append single quotes to a string.
singleQuote :: String -> String
singleQuote s = "'"++s++"'"
-- | Try to single- and backslash-quote a string as needed to make it usable
-- as an argument on a (sh/bash) shell command line. At least, well enough
-- to handle common currency symbols, like $. Probably broken in many ways.
--
-- >>> quoteForCommandLine "a"
-- "a"
-- >>> quoteForCommandLine "\""
-- "'\"'"
-- >>> quoteForCommandLine "$"
-- "'\\$'"
--
quoteForCommandLine :: String -> String
quoteForCommandLine s
| any (`elem` s) (quotechars++whitespacechars++shellchars) = singleQuote $ quoteShellChars s
| otherwise = s
-- | Try to backslash-quote common shell-significant characters in this string.
-- Doesn't handle single quotes, & probably others.
quoteShellChars :: String -> String
quoteShellChars = concatMap escapeShellChar
where
escapeShellChar c | c `elem` shellchars = ['\\',c]
escapeShellChar c = [c]
quotechars, whitespacechars, redirectchars, shellchars :: [Char]
quotechars = "'\""
whitespacechars = " \t\n\r"
redirectchars = "<>"
shellchars = "<>(){}[]$7?#!~`"
-- | Quote-aware version of words - don't split on spaces which are inside quotes.
-- NB correctly handles "a'b" but not "''a''". Can raise an error if parsing fails.
words' :: String -> [String]
words' "" = []
words' s = map stripquotes $ fromparse $ parsewithString p s
where
p = (singleQuotedPattern <|> doubleQuotedPattern <|> patterns) `sepBy` skipNonNewlineSpaces1
-- eof
patterns = many (noneOf whitespacechars)
singleQuotedPattern = between (char '\'') (char '\'') (many $ noneOf "'")
doubleQuotedPattern = between (char '"') (char '"') (many $ noneOf "\"")
-- | Quote-aware version of unwords - single-quote strings which contain whitespace
unwords' :: [String] -> String
unwords' = unwords . map quoteIfNeeded
-- | Strip one matching pair of single or double quotes on the ends of a string.
stripquotes :: String -> String
stripquotes s = if isSingleQuoted s || isDoubleQuoted s then init $ tail s else s
isSingleQuoted s@(_:_:_) = head s == '\'' && last s == '\''
isSingleQuoted _ = False
isDoubleQuoted s@(_:_:_) = head s == '"' && last s == '"'
isDoubleQuoted _ = False
-- Functions below treat wide (eg CJK) characters as double-width.
-- | Double-width-character-aware string truncation. Take as many
-- characters as possible from a string without exceeding the
-- specified width. Eg takeWidth 3 "りんご" = "り".
takeWidth :: Int -> String -> String
takeWidth _ "" = ""
takeWidth 0 _ = ""
takeWidth w (c:cs) | cw <= w = c:takeWidth (w-cw) cs
| otherwise = ""
where cw = charWidth c
-- | Like strWidth, but also strips ANSI escape sequences before
-- calculating the width.
--
-- This is no longer used in code, as widths are calculated before
-- adding ANSI escape sequences, but is being kept around for now.
strWidthAnsi :: String -> Int
strWidthAnsi = strWidth . stripAnsi
-- | Alias for 'realLength'.
strWidth :: String -> Int
strWidth = realLength
-- | Strip ANSI escape sequences from a string.
--
-- >>> stripAnsi "\ESC[31m-1\ESC[m"
-- "-1"
stripAnsi :: String -> String
stripAnsi s = either err id $ regexReplace ansire "" s
where
err = error "stripAnsi: invalid replacement pattern" -- PARTIAL, shouldn't happen
ansire = toRegex' $ T.pack "\ESC\\[([0-9]+;)*([0-9]+)?[ABCDHJKfmsu]" -- PARTIAL, should succeed