hledger/hledger-lib/Hledger/Utils/String.hs

366 lines
13 KiB
Haskell
Raw Normal View History

-- | String formatting helpers, starting to get a bit out of control.
2015-08-19 23:47:26 +03:00
module Hledger.Utils.String (
takeEnd,
2015-08-19 23:47:26 +03:00
-- * misc
lowercase,
uppercase,
underline,
stripbrackets,
unbracket,
-- quoting
quoteIfNeeded,
singleQuoteIfNeeded,
-- quotechars,
-- whitespacechars,
words',
unwords',
stripAnsi,
2015-08-19 23:47:26 +03:00
-- * single-line layout
strip,
lstrip,
rstrip,
chomp,
chomp1,
singleline,
2015-08-19 23:47:26 +03:00
elideLeft,
elideRight,
formatString,
2015-08-19 23:47:26 +03:00
-- * multi-line layout
concatTopPadded,
concatBottomPadded,
concatOneLine,
vConcatLeftAligned,
2015-08-19 23:47:26 +03:00
vConcatRightAligned,
padtop,
padbottom,
padleft,
padright,
cliptopleft,
fitto,
linesPrepend,
linesPrepend2,
-- * wide-character-aware layout
lib: textification begins! account names The first of several conversions from String to (strict) Text, hopefully reducing space and time usage. This one shows a small improvement, with GHC 7.10.3 and text-1.2.2.1: hledger -f data/100x100x10.journal stats string: <<ghc: 39471064 bytes, 77 GCs, 198421/275048 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.001 elapsed), 0.015 MUT (0.020 elapsed), 0.010 GC (0.014 elapsed) :ghc>> text: <<ghc: 39268024 bytes, 77 GCs, 197018/270840 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.002 elapsed), 0.016 MUT (0.022 elapsed), 0.009 GC (0.011 elapsed) :ghc>> hledger -f data/1000x100x10.journal stats string: <<ghc: 318555920 bytes, 617 GCs, 2178997/7134472 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.001 elapsed), 0.129 MUT (0.136 elapsed), 0.067 GC (0.077 elapsed) :ghc>> text: <<ghc: 314248496 bytes, 612 GCs, 2074045/6617960 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.003 elapsed), 0.137 MUT (0.145 elapsed), 0.067 GC (0.079 elapsed) :ghc>> hledger -f data/10000x100x10.journal stats string: <<ghc: 3114763608 bytes, 6026 GCs, 18858950/75552024 avg/max bytes residency (11 samples), 201M in use, 0.000 INIT (0.000 elapsed), 1.331 MUT (1.372 elapsed), 0.699 GC (0.812 elapsed) :ghc>> text: <<ghc: 3071468920 bytes, 5968 GCs, 14120344/62951360 avg/max bytes residency (9 samples), 124M in use, 0.000 INIT (0.003 elapsed), 1.272 MUT (1.349 elapsed), 0.513 GC (0.578 elapsed) :ghc>> hledger -f data/100000x100x10.journal stats string: <<ghc: 31186579432 bytes, 60278 GCs, 135332581/740228992 avg/max bytes residency (13 samples), 1697M in use, 0.000 INIT (0.008 elapsed), 14.677 MUT (15.508 elapsed), 7.081 GC (8.074 elapsed) :ghc>> text: <<ghc: 30753427672 bytes, 59763 GCs, 117595958/666457240 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.008 elapsed), 13.713 MUT (13.966 elapsed), 6.220 GC (7.108 elapsed) :ghc>>
2016-05-24 04:16:21 +03:00
charWidth,
strWidth,
strWidthAnsi,
takeWidth,
fitString,
fitStringMulti,
padLeftWide,
padRightWide
2015-08-19 23:47:26 +03:00
) where
2020-09-11 03:46:16 +03:00
import Data.Char (isSpace, toLower, toUpper)
import Data.Default (def)
import Data.List (intercalate)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
2020-09-11 03:46:16 +03:00
import Text.Megaparsec ((<|>), between, many, noneOf, sepBy)
import Text.Megaparsec.Char (char)
import Text.Printf (printf)
2015-08-19 23:47:26 +03:00
import Hledger.Utils.Parse
2020-09-11 03:46:16 +03:00
import Hledger.Utils.Regex (toRegex', regexReplace)
import Text.Tabular (Header(..), Properties(..))
import Text.Tabular.AsciiWide (Align(..), Cell(..), TableOpts(..), renderRow)
import Text.WideString (charWidth, strWidth, textWidth)
-- | Take elements from the end of a list.
takeEnd n l = go (drop n l) l
where
go (_:xs) (_:ys) = go xs ys
go [] r = r
go _ [] = []
2015-08-19 23:47:26 +03:00
lowercase, uppercase :: String -> String
lowercase = map toLower
uppercase = map toUpper
-- | Remove leading and trailing whitespace.
strip :: String -> String
strip = lstrip . rstrip
-- | Remove leading whitespace.
lstrip :: String -> String
2016-12-20 20:29:12 +03:00
lstrip = dropWhile isSpace
2015-08-19 23:47:26 +03:00
-- | Remove trailing whitespace.
rstrip :: String -> String
rstrip = reverse . lstrip . reverse
-- | Remove all trailing newlines/carriage returns.
2015-08-19 23:47:26 +03:00
chomp :: String -> String
chomp = reverse . dropWhile (`elem` "\r\n") . reverse
-- | Remove all trailing newline/carriage returns, leaving just one trailing newline.
chomp1 :: String -> String
chomp1 = (++"\n") . chomp
2020-01-05 04:37:55 +03:00
-- | Remove consecutive line breaks, replacing them with single space
singleline :: String -> String
singleline = unwords . filter (/="") . (map strip) . lines
2015-08-19 23:47:26 +03:00
stripbrackets :: String -> String
stripbrackets = dropWhile (`elem` "([") . reverse . dropWhile (`elem` "])") . reverse :: String -> String
elideLeft :: Int -> String -> String
elideLeft width s =
if length s > width then ".." ++ takeEnd (width - 2) s else s
2015-08-19 23:47:26 +03:00
elideRight :: Int -> String -> String
elideRight width s =
if length s > width then take (width - 2) s ++ ".." else s
-- | Clip and pad a string to a minimum & maximum width, and/or left/right justify it.
-- Works on multi-line strings too (but will rewrite non-unix line endings).
formatString :: Bool -> Maybe Int -> Maybe Int -> String -> String
formatString leftJustified minwidth maxwidth s = intercalate "\n" $ map (printf fmt) $ lines s
where
justify = if leftJustified then "-" else ""
minwidth' = maybe "" show minwidth
maxwidth' = maybe "" (("."++).show) maxwidth
fmt = "%" ++ justify ++ minwidth' ++ maxwidth' ++ "s"
2015-08-19 23:47:26 +03:00
underline :: String -> String
underline s = s' ++ replicate (length s) '-' ++ "\n"
where s'
| last s == '\n' = s
| otherwise = s ++ "\n"
-- | Double-quote this string if it contains whitespace, single quotes
-- or double-quotes, escaping the quotes as needed.
quoteIfNeeded :: String -> String
quoteIfNeeded s | any (`elem` s) (quotechars++whitespacechars++redirectchars) = showChar '"' $ escapeQuotes s "\""
2015-08-19 23:47:26 +03:00
| otherwise = s
where
escapeQuotes [] x = x
escapeQuotes ('"':cs) x = showString "\\\"" $ escapeQuotes cs x
escapeQuotes (c:cs) x = showChar c $ escapeQuotes cs x
2015-08-19 23:47:26 +03:00
-- | Single-quote this string if it contains whitespace or double-quotes.
-- No good for strings containing single quotes.
singleQuoteIfNeeded :: String -> String
singleQuoteIfNeeded s | any (`elem` s) (quotechars++whitespacechars) = "'"++s++"'"
2015-08-19 23:47:26 +03:00
| otherwise = s
quotechars, whitespacechars, redirectchars :: [Char]
2015-08-19 23:47:26 +03:00
quotechars = "'\""
whitespacechars = " \t\n\r"
redirectchars = "<>"
2015-08-19 23:47:26 +03:00
-- | Quote-aware version of words - don't split on spaces which are inside quotes.
-- NB correctly handles "a'b" but not "''a''". Can raise an error if parsing fails.
words' :: String -> [String]
words' "" = []
words' s = map stripquotes $ fromparse $ parsewithString p s
2015-08-19 23:47:26 +03:00
where
p = do ss <- (singleQuotedPattern <|> doubleQuotedPattern <|> pattern) `sepBy` skipNonNewlineSpaces1
2015-08-19 23:47:26 +03:00
-- eof
return ss
pattern = many (noneOf whitespacechars)
singleQuotedPattern = between (char '\'') (char '\'') (many $ noneOf "'")
doubleQuotedPattern = between (char '"') (char '"') (many $ noneOf "\"")
-- | Quote-aware version of unwords - single-quote strings which contain whitespace
unwords' :: [String] -> String
unwords' = unwords . map quoteIfNeeded
-- | Strip one matching pair of single or double quotes on the ends of a string.
stripquotes :: String -> String
stripquotes s = if isSingleQuoted s || isDoubleQuoted s then init $ tail s else s
isSingleQuoted s@(_:_:_) = head s == '\'' && last s == '\''
isSingleQuoted _ = False
isDoubleQuoted s@(_:_:_) = head s == '"' && last s == '"'
isDoubleQuoted _ = False
unbracket :: String -> String
unbracket s
| (head s == '[' && last s == ']') || (head s == '(' && last s == ')') = init $ tail s
| otherwise = s
-- | Join several multi-line strings as side-by-side rectangular strings of the same height, top-padded.
-- Treats wide characters as double width.
2015-08-19 23:47:26 +03:00
concatTopPadded :: [String] -> String
concatTopPadded = TL.unpack . renderRow def{tableBorders=False, borderSpaces=False}
. Group NoLine . map (Header . cell)
where cell = Cell BottomLeft . map (\x -> (x, textWidth x)) . T.lines . T.pack
2015-08-19 23:47:26 +03:00
-- | Join several multi-line strings as side-by-side rectangular strings of the same height, bottom-padded.
-- Treats wide characters as double width.
2015-08-19 23:47:26 +03:00
concatBottomPadded :: [String] -> String
concatBottomPadded = TL.unpack . renderRow def{tableBorders=False, borderSpaces=False}
. Group NoLine . map (Header . cell)
where cell = Cell TopLeft . map (\x -> (x, textWidth x)) . T.lines . T.pack
2015-08-19 23:47:26 +03:00
-- | Join multi-line strings horizontally, after compressing each of
-- them to a single line with a comma and space between each original line.
concatOneLine :: [String] -> String
concatOneLine strs = concat $ map ((intercalate ", ").lines) strs
-- | Join strings vertically, left-aligned and right-padded.
vConcatLeftAligned :: [String] -> String
vConcatLeftAligned ss = intercalate "\n" $ map showfixedwidth ss
where
showfixedwidth = printf (printf "%%-%ds" width)
width = maximum $ map length ss
-- | Join strings vertically, right-aligned and left-padded.
2015-08-19 23:47:26 +03:00
vConcatRightAligned :: [String] -> String
vConcatRightAligned ss = intercalate "\n" $ map showfixedwidth ss
where
showfixedwidth = printf (printf "%%%ds" width)
width = maximum $ map length ss
-- | Convert a multi-line string to a rectangular string top-padded to the specified height.
padtop :: Int -> String -> String
padtop h s = intercalate "\n" xpadded
where
ls = lines s
sh = length ls
sw | null ls = 0
| otherwise = maximum $ map length ls
ypadded = replicate (difforzero h sh) "" ++ ls
xpadded = map (padleft sw) ypadded
-- | Convert a multi-line string to a rectangular string bottom-padded to the specified height.
padbottom :: Int -> String -> String
padbottom h s = intercalate "\n" xpadded
where
ls = lines s
sh = length ls
sw | null ls = 0
| otherwise = maximum $ map length ls
ypadded = ls ++ replicate (difforzero h sh) ""
xpadded = map (padleft sw) ypadded
difforzero :: (Num a, Ord a) => a -> a -> a
difforzero a b = maximum [(a - b), 0]
-- | Convert a multi-line string to a rectangular string left-padded to the specified width.
-- Treats wide characters as double width.
2015-08-19 23:47:26 +03:00
padleft :: Int -> String -> String
padleft w "" = concat $ replicate w " "
padleft w s = intercalate "\n" $ map (printf (printf "%%%ds" w)) $ lines s
-- | Convert a multi-line string to a rectangular string right-padded to the specified width.
-- Treats wide characters as double width.
2015-08-19 23:47:26 +03:00
padright :: Int -> String -> String
padright w "" = concat $ replicate w " "
padright w s = intercalate "\n" $ map (printf (printf "%%-%ds" w)) $ lines s
-- | Clip a multi-line string to the specified width and height from the top left.
cliptopleft :: Int -> Int -> String -> String
cliptopleft w h = intercalate "\n" . take h . map (take w) . lines
-- | Clip and pad a multi-line string to fill the specified width and height.
fitto :: Int -> Int -> String -> String
fitto w h s = intercalate "\n" $ take h $ rows ++ repeat blankline
where
rows = map (fit w) $ lines s
fit w = take w . (++ repeat ' ')
blankline = replicate w ' '
-- Functions below treat wide (eg CJK) characters as double-width.
-- | General-purpose wide-char-aware single-line string layout function.
-- It can left- or right-pad a short string to a minimum width.
2015-09-29 07:47:05 +03:00
-- It can left- or right-clip a long string to a maximum width, optionally inserting an ellipsis (the third argument).
-- It clips and pads on the right when the fourth argument is true, otherwise on the left.
-- It treats wide characters as double width.
fitString :: Maybe Int -> Maybe Int -> Bool -> Bool -> String -> String
fitString mminwidth mmaxwidth ellipsify rightside s = (clip . pad) s
where
clip :: String -> String
clip s =
case mmaxwidth of
Just w
| strWidth s > w ->
case rightside of
True -> takeWidth (w - length ellipsis) s ++ ellipsis
False -> ellipsis ++ reverse (takeWidth (w - length ellipsis) $ reverse s)
| otherwise -> s
where
ellipsis = if ellipsify then ".." else ""
Nothing -> s
pad :: String -> String
pad s =
case mminwidth of
Just w
| sw < w ->
case rightside of
True -> s ++ replicate (w - sw) ' '
False -> replicate (w - sw) ' ' ++ s
| otherwise -> s
Nothing -> s
where sw = strWidth s
2015-09-29 07:47:05 +03:00
-- | A version of fitString that works on multi-line strings,
-- separate for now to avoid breakage.
-- This will rewrite any line endings to unix newlines.
fitStringMulti :: Maybe Int -> Maybe Int -> Bool -> Bool -> String -> String
fitStringMulti mminwidth mmaxwidth ellipsify rightside s =
(intercalate "\n" . map (fitString mminwidth mmaxwidth ellipsify rightside) . lines) s
-- | Left-pad a string to the specified width.
-- Treats wide characters as double width.
-- Works on multi-line strings too (but will rewrite non-unix line endings).
padLeftWide :: Int -> String -> String
padLeftWide w "" = replicate w ' '
2015-09-29 07:47:05 +03:00
padLeftWide w s = intercalate "\n" $ map (fitString (Just w) Nothing False False) $ lines s
-- XXX not yet replaceable by
-- padLeftWide w = fitStringMulti (Just w) Nothing False False
2015-09-29 07:47:05 +03:00
-- | Right-pad a string to the specified width.
-- Treats wide characters as double width.
-- Works on multi-line strings too (but will rewrite non-unix line endings).
padRightWide :: Int -> String -> String
padRightWide w "" = replicate w ' '
2015-09-29 07:47:05 +03:00
padRightWide w s = intercalate "\n" $ map (fitString (Just w) Nothing False True) $ lines s
-- XXX not yet replaceable by
-- padRightWide w = fitStringMulti (Just w) Nothing False True
-- | Double-width-character-aware string truncation. Take as many
-- characters as possible from a string without exceeding the
-- specified width. Eg takeWidth 3 "りんご" = "り".
takeWidth :: Int -> String -> String
takeWidth _ "" = ""
takeWidth 0 _ = ""
takeWidth w (c:cs) | cw <= w = c:takeWidth (w-cw) cs
| otherwise = ""
where cw = charWidth c
-- | Like strWidth, but also strips ANSI escape sequences before
-- calculating the width.
--
-- This is no longer used in code, as widths are calculated before
-- adding ANSI escape sequences, but is being kept around for now.
strWidthAnsi :: String -> Int
strWidthAnsi = strWidth . stripAnsi
-- | Strip ANSI escape sequences from a string.
--
-- >>> stripAnsi "\ESC[31m-1\ESC[m"
-- "-1"
stripAnsi :: String -> String
2020-09-11 03:46:16 +03:00
stripAnsi s = either err id $ regexReplace ansire "" s
where
err = error "stripAnsi: invalid replacement pattern" -- PARTIAL, shouldn't happen
ansire = toRegex' "\ESC\\[([0-9]+;)*([0-9]+)?[ABCDHJKfmsu]" -- PARTIAL, should succeed
-- | Add a prefix to each line of a string.
linesPrepend :: String -> String -> String
linesPrepend prefix = unlines . map (prefix++) . lines
-- | Add a prefix to the first line of a string,
-- and a different prefix to the remaining lines.
linesPrepend2 :: String -> String -> String -> String
linesPrepend2 prefix1 prefix2 s =
unlines $ (prefix1++l) : map (prefix2++) ls
where l:ls = lines s