mirror of
https://github.com/simonmichael/hledger.git
synced 2025-01-07 11:19:32 +03:00
d7d5f8a064
Older megaparsec is still supported. Also cleans up our custom parser types, and some text (un)packing is done in different places (possible performance impact).
382 lines
14 KiB
Haskell
382 lines
14 KiB
Haskell
-- | String formatting helpers, starting to get a bit out of control.
|
|
|
|
module Hledger.Utils.String (
|
|
-- * misc
|
|
lowercase,
|
|
uppercase,
|
|
underline,
|
|
stripbrackets,
|
|
unbracket,
|
|
-- quoting
|
|
quoteIfNeeded,
|
|
singleQuoteIfNeeded,
|
|
-- quotechars,
|
|
-- whitespacechars,
|
|
escapeQuotes,
|
|
words',
|
|
unwords',
|
|
stripAnsi,
|
|
-- * single-line layout
|
|
strip,
|
|
lstrip,
|
|
rstrip,
|
|
chomp,
|
|
elideLeft,
|
|
elideRight,
|
|
formatString,
|
|
-- * multi-line layout
|
|
concatTopPadded,
|
|
concatBottomPadded,
|
|
concatOneLine,
|
|
vConcatLeftAligned,
|
|
vConcatRightAligned,
|
|
padtop,
|
|
padbottom,
|
|
padleft,
|
|
padright,
|
|
cliptopleft,
|
|
fitto,
|
|
-- * wide-character-aware layout
|
|
charWidth,
|
|
strWidth,
|
|
takeWidth,
|
|
fitString,
|
|
fitStringMulti,
|
|
padLeftWide,
|
|
padRightWide
|
|
) where
|
|
|
|
|
|
import Data.Char
|
|
import Data.List
|
|
import Text.Megaparsec.Compat
|
|
import Text.Printf (printf)
|
|
|
|
import Hledger.Utils.Parse
|
|
import Hledger.Utils.Regex
|
|
|
|
lowercase, uppercase :: String -> String
|
|
lowercase = map toLower
|
|
uppercase = map toUpper
|
|
|
|
-- | Remove leading and trailing whitespace.
|
|
strip :: String -> String
|
|
strip = lstrip . rstrip
|
|
|
|
-- | Remove leading whitespace.
|
|
lstrip :: String -> String
|
|
lstrip = dropWhile isSpace
|
|
|
|
-- | Remove trailing whitespace.
|
|
rstrip :: String -> String
|
|
rstrip = reverse . lstrip . reverse
|
|
|
|
-- | Remove trailing newlines/carriage returns.
|
|
chomp :: String -> String
|
|
chomp = reverse . dropWhile (`elem` "\r\n") . reverse
|
|
|
|
stripbrackets :: String -> String
|
|
stripbrackets = dropWhile (`elem` "([") . reverse . dropWhile (`elem` "])") . reverse :: String -> String
|
|
|
|
elideLeft :: Int -> String -> String
|
|
elideLeft width s =
|
|
if length s > width then ".." ++ reverse (take (width - 2) $ reverse s) else s
|
|
|
|
elideRight :: Int -> String -> String
|
|
elideRight width s =
|
|
if length s > width then take (width - 2) s ++ ".." else s
|
|
|
|
-- | Clip and pad a string to a minimum & maximum width, and/or left/right justify it.
|
|
-- Works on multi-line strings too (but will rewrite non-unix line endings).
|
|
formatString :: Bool -> Maybe Int -> Maybe Int -> String -> String
|
|
formatString leftJustified minwidth maxwidth s = intercalate "\n" $ map (printf fmt) $ lines s
|
|
where
|
|
justify = if leftJustified then "-" else ""
|
|
minwidth' = maybe "" show minwidth
|
|
maxwidth' = maybe "" (("."++).show) maxwidth
|
|
fmt = "%" ++ justify ++ minwidth' ++ maxwidth' ++ "s"
|
|
|
|
underline :: String -> String
|
|
underline s = s' ++ replicate (length s) '-' ++ "\n"
|
|
where s'
|
|
| last s == '\n' = s
|
|
| otherwise = s ++ "\n"
|
|
|
|
-- | Double-quote this string if it contains whitespace, single quotes
|
|
-- or double-quotes, escaping the quotes as needed.
|
|
quoteIfNeeded :: String -> String
|
|
quoteIfNeeded s | any (`elem` s) (quotechars++whitespacechars) = "\"" ++ escapeDoubleQuotes s ++ "\""
|
|
| otherwise = s
|
|
-- | Single-quote this string if it contains whitespace or double-quotes.
|
|
-- No good for strings containing single quotes.
|
|
singleQuoteIfNeeded :: String -> String
|
|
singleQuoteIfNeeded s | any (`elem` s) whitespacechars = "'"++s++"'"
|
|
| otherwise = s
|
|
|
|
quotechars, whitespacechars :: [Char]
|
|
quotechars = "'\""
|
|
whitespacechars = " \t\n\r"
|
|
|
|
escapeDoubleQuotes :: String -> String
|
|
escapeDoubleQuotes = regexReplace "\"" "\""
|
|
|
|
escapeQuotes :: String -> String
|
|
escapeQuotes = regexReplace "([\"'])" "\\1"
|
|
|
|
-- | Quote-aware version of words - don't split on spaces which are inside quotes.
|
|
-- NB correctly handles "a'b" but not "''a''". Can raise an error if parsing fails.
|
|
words' :: String -> [String]
|
|
words' "" = []
|
|
words' s = map stripquotes $ fromparse $ parsewithString p s
|
|
where
|
|
p = do ss <- (singleQuotedPattern <|> doubleQuotedPattern <|> pattern) `sepBy` some spacenonewline
|
|
-- eof
|
|
return ss
|
|
pattern = many (noneOf whitespacechars)
|
|
singleQuotedPattern = between (char '\'') (char '\'') (many $ noneOf "'")
|
|
doubleQuotedPattern = between (char '"') (char '"') (many $ noneOf "\"")
|
|
|
|
-- | Quote-aware version of unwords - single-quote strings which contain whitespace
|
|
unwords' :: [String] -> String
|
|
unwords' = unwords . map quoteIfNeeded
|
|
|
|
-- | Strip one matching pair of single or double quotes on the ends of a string.
|
|
stripquotes :: String -> String
|
|
stripquotes s = if isSingleQuoted s || isDoubleQuoted s then init $ tail s else s
|
|
|
|
isSingleQuoted s@(_:_:_) = head s == '\'' && last s == '\''
|
|
isSingleQuoted _ = False
|
|
|
|
isDoubleQuoted s@(_:_:_) = head s == '"' && last s == '"'
|
|
isDoubleQuoted _ = False
|
|
|
|
unbracket :: String -> String
|
|
unbracket s
|
|
| (head s == '[' && last s == ']') || (head s == '(' && last s == ')') = init $ tail s
|
|
| otherwise = s
|
|
|
|
-- | Join several multi-line strings as side-by-side rectangular strings of the same height, top-padded.
|
|
-- Treats wide characters as double width.
|
|
concatTopPadded :: [String] -> String
|
|
concatTopPadded strs = intercalate "\n" $ map concat $ transpose padded
|
|
where
|
|
lss = map lines strs
|
|
h = maximum $ map length lss
|
|
ypad ls = replicate (difforzero h (length ls)) "" ++ ls
|
|
xpad ls = map (padLeftWide w) ls where w | null ls = 0
|
|
| otherwise = maximum $ map strWidth ls
|
|
padded = map (xpad . ypad) lss
|
|
|
|
-- | Join several multi-line strings as side-by-side rectangular strings of the same height, bottom-padded.
|
|
-- Treats wide characters as double width.
|
|
concatBottomPadded :: [String] -> String
|
|
concatBottomPadded strs = intercalate "\n" $ map concat $ transpose padded
|
|
where
|
|
lss = map lines strs
|
|
h = maximum $ map length lss
|
|
ypad ls = ls ++ replicate (difforzero h (length ls)) ""
|
|
xpad ls = map (padRightWide w) ls where w | null ls = 0
|
|
| otherwise = maximum $ map strWidth ls
|
|
padded = map (xpad . ypad) lss
|
|
|
|
|
|
-- | Join multi-line strings horizontally, after compressing each of
|
|
-- them to a single line with a comma and space between each original line.
|
|
concatOneLine :: [String] -> String
|
|
concatOneLine strs = concat $ map ((intercalate ", ").lines) strs
|
|
|
|
-- | Join strings vertically, left-aligned and right-padded.
|
|
vConcatLeftAligned :: [String] -> String
|
|
vConcatLeftAligned ss = intercalate "\n" $ map showfixedwidth ss
|
|
where
|
|
showfixedwidth = printf (printf "%%-%ds" width)
|
|
width = maximum $ map length ss
|
|
|
|
-- | Join strings vertically, right-aligned and left-padded.
|
|
vConcatRightAligned :: [String] -> String
|
|
vConcatRightAligned ss = intercalate "\n" $ map showfixedwidth ss
|
|
where
|
|
showfixedwidth = printf (printf "%%%ds" width)
|
|
width = maximum $ map length ss
|
|
|
|
-- | Convert a multi-line string to a rectangular string top-padded to the specified height.
|
|
padtop :: Int -> String -> String
|
|
padtop h s = intercalate "\n" xpadded
|
|
where
|
|
ls = lines s
|
|
sh = length ls
|
|
sw | null ls = 0
|
|
| otherwise = maximum $ map length ls
|
|
ypadded = replicate (difforzero h sh) "" ++ ls
|
|
xpadded = map (padleft sw) ypadded
|
|
|
|
-- | Convert a multi-line string to a rectangular string bottom-padded to the specified height.
|
|
padbottom :: Int -> String -> String
|
|
padbottom h s = intercalate "\n" xpadded
|
|
where
|
|
ls = lines s
|
|
sh = length ls
|
|
sw | null ls = 0
|
|
| otherwise = maximum $ map length ls
|
|
ypadded = ls ++ replicate (difforzero h sh) ""
|
|
xpadded = map (padleft sw) ypadded
|
|
|
|
difforzero :: (Num a, Ord a) => a -> a -> a
|
|
difforzero a b = maximum [(a - b), 0]
|
|
|
|
-- | Convert a multi-line string to a rectangular string left-padded to the specified width.
|
|
-- Treats wide characters as double width.
|
|
padleft :: Int -> String -> String
|
|
padleft w "" = concat $ replicate w " "
|
|
padleft w s = intercalate "\n" $ map (printf (printf "%%%ds" w)) $ lines s
|
|
|
|
-- | Convert a multi-line string to a rectangular string right-padded to the specified width.
|
|
-- Treats wide characters as double width.
|
|
padright :: Int -> String -> String
|
|
padright w "" = concat $ replicate w " "
|
|
padright w s = intercalate "\n" $ map (printf (printf "%%-%ds" w)) $ lines s
|
|
|
|
-- | Clip a multi-line string to the specified width and height from the top left.
|
|
cliptopleft :: Int -> Int -> String -> String
|
|
cliptopleft w h = intercalate "\n" . take h . map (take w) . lines
|
|
|
|
-- | Clip and pad a multi-line string to fill the specified width and height.
|
|
fitto :: Int -> Int -> String -> String
|
|
fitto w h s = intercalate "\n" $ take h $ rows ++ repeat blankline
|
|
where
|
|
rows = map (fit w) $ lines s
|
|
fit w = take w . (++ repeat ' ')
|
|
blankline = replicate w ' '
|
|
|
|
-- Functions below treat wide (eg CJK) characters as double-width.
|
|
|
|
-- | General-purpose wide-char-aware single-line string layout function.
|
|
-- It can left- or right-pad a short string to a minimum width.
|
|
-- It can left- or right-clip a long string to a maximum width, optionally inserting an ellipsis (the third argument).
|
|
-- It clips and pads on the right when the fourth argument is true, otherwise on the left.
|
|
-- It treats wide characters as double width.
|
|
fitString :: Maybe Int -> Maybe Int -> Bool -> Bool -> String -> String
|
|
fitString mminwidth mmaxwidth ellipsify rightside s = (clip . pad) s
|
|
where
|
|
clip :: String -> String
|
|
clip s =
|
|
case mmaxwidth of
|
|
Just w
|
|
| strWidth s > w ->
|
|
case rightside of
|
|
True -> takeWidth (w - length ellipsis) s ++ ellipsis
|
|
False -> ellipsis ++ reverse (takeWidth (w - length ellipsis) $ reverse s)
|
|
| otherwise -> s
|
|
where
|
|
ellipsis = if ellipsify then ".." else ""
|
|
Nothing -> s
|
|
pad :: String -> String
|
|
pad s =
|
|
case mminwidth of
|
|
Just w
|
|
| sw < w ->
|
|
case rightside of
|
|
True -> s ++ replicate (w - sw) ' '
|
|
False -> replicate (w - sw) ' ' ++ s
|
|
| otherwise -> s
|
|
Nothing -> s
|
|
where sw = strWidth s
|
|
|
|
-- | A version of fitString that works on multi-line strings,
|
|
-- separate for now to avoid breakage.
|
|
-- This will rewrite any line endings to unix newlines.
|
|
fitStringMulti :: Maybe Int -> Maybe Int -> Bool -> Bool -> String -> String
|
|
fitStringMulti mminwidth mmaxwidth ellipsify rightside s =
|
|
(intercalate "\n" . map (fitString mminwidth mmaxwidth ellipsify rightside) . lines) s
|
|
|
|
-- | Left-pad a string to the specified width.
|
|
-- Treats wide characters as double width.
|
|
-- Works on multi-line strings too (but will rewrite non-unix line endings).
|
|
padLeftWide :: Int -> String -> String
|
|
padLeftWide w "" = replicate w ' '
|
|
padLeftWide w s = intercalate "\n" $ map (fitString (Just w) Nothing False False) $ lines s
|
|
-- XXX not yet replaceable by
|
|
-- padLeftWide w = fitStringMulti (Just w) Nothing False False
|
|
|
|
-- | Right-pad a string to the specified width.
|
|
-- Treats wide characters as double width.
|
|
-- Works on multi-line strings too (but will rewrite non-unix line endings).
|
|
padRightWide :: Int -> String -> String
|
|
padRightWide w "" = replicate w ' '
|
|
padRightWide w s = intercalate "\n" $ map (fitString (Just w) Nothing False True) $ lines s
|
|
-- XXX not yet replaceable by
|
|
-- padRightWide w = fitStringMulti (Just w) Nothing False True
|
|
|
|
-- | Double-width-character-aware string truncation. Take as many
|
|
-- characters as possible from a string without exceeding the
|
|
-- specified width. Eg takeWidth 3 "りんご" = "り".
|
|
takeWidth :: Int -> String -> String
|
|
takeWidth _ "" = ""
|
|
takeWidth 0 _ = ""
|
|
takeWidth w (c:cs) | cw <= w = c:takeWidth (w-cw) cs
|
|
| otherwise = ""
|
|
where cw = charWidth c
|
|
|
|
-- from Pandoc (copyright John MacFarlane, GPL)
|
|
-- see also http://unicode.org/reports/tr11/#Description
|
|
|
|
-- | Calculate the render width of a string, considering
|
|
-- wide characters (counted as double width), ANSI escape codes
|
|
-- (not counted), and line breaks (in a multi-line string, the longest
|
|
-- line determines the width).
|
|
strWidth :: String -> Int
|
|
strWidth "" = 0
|
|
strWidth s = maximum $ map (foldr (\a b -> charWidth a + b) 0) $ lines s'
|
|
where s' = stripAnsi s
|
|
|
|
stripAnsi :: String -> String
|
|
stripAnsi = regexReplace "\ESC\\[([0-9]+;)*([0-9]+)?[ABCDHJKfmsu]" ""
|
|
|
|
-- | Get the designated render width of a character: 0 for a combining
|
|
-- character, 1 for a regular character, 2 for a wide character.
|
|
-- (Wide characters are rendered as exactly double width in apps and
|
|
-- fonts that support it.) (From Pandoc.)
|
|
charWidth :: Char -> Int
|
|
charWidth c =
|
|
case c of
|
|
_ | c < '\x0300' -> 1
|
|
| c >= '\x0300' && c <= '\x036F' -> 0 -- combining
|
|
| c >= '\x0370' && c <= '\x10FC' -> 1
|
|
| c >= '\x1100' && c <= '\x115F' -> 2
|
|
| c >= '\x1160' && c <= '\x11A2' -> 1
|
|
| c >= '\x11A3' && c <= '\x11A7' -> 2
|
|
| c >= '\x11A8' && c <= '\x11F9' -> 1
|
|
| c >= '\x11FA' && c <= '\x11FF' -> 2
|
|
| c >= '\x1200' && c <= '\x2328' -> 1
|
|
| c >= '\x2329' && c <= '\x232A' -> 2
|
|
| c >= '\x232B' && c <= '\x2E31' -> 1
|
|
| c >= '\x2E80' && c <= '\x303E' -> 2
|
|
| c == '\x303F' -> 1
|
|
| c >= '\x3041' && c <= '\x3247' -> 2
|
|
| c >= '\x3248' && c <= '\x324F' -> 1 -- ambiguous
|
|
| c >= '\x3250' && c <= '\x4DBF' -> 2
|
|
| c >= '\x4DC0' && c <= '\x4DFF' -> 1
|
|
| c >= '\x4E00' && c <= '\xA4C6' -> 2
|
|
| c >= '\xA4D0' && c <= '\xA95F' -> 1
|
|
| c >= '\xA960' && c <= '\xA97C' -> 2
|
|
| c >= '\xA980' && c <= '\xABF9' -> 1
|
|
| c >= '\xAC00' && c <= '\xD7FB' -> 2
|
|
| c >= '\xD800' && c <= '\xDFFF' -> 1
|
|
| c >= '\xE000' && c <= '\xF8FF' -> 1 -- ambiguous
|
|
| c >= '\xF900' && c <= '\xFAFF' -> 2
|
|
| c >= '\xFB00' && c <= '\xFDFD' -> 1
|
|
| c >= '\xFE00' && c <= '\xFE0F' -> 1 -- ambiguous
|
|
| c >= '\xFE10' && c <= '\xFE19' -> 2
|
|
| c >= '\xFE20' && c <= '\xFE26' -> 1
|
|
| c >= '\xFE30' && c <= '\xFE6B' -> 2
|
|
| c >= '\xFE70' && c <= '\xFEFF' -> 1
|
|
| c >= '\xFF01' && c <= '\xFF60' -> 2
|
|
| c >= '\xFF61' && c <= '\x16A38' -> 1
|
|
| c >= '\x1B000' && c <= '\x1B001' -> 2
|
|
| c >= '\x1D000' && c <= '\x1F1FF' -> 1
|
|
| c >= '\x1F200' && c <= '\x1F251' -> 2
|
|
| c >= '\x1F300' && c <= '\x1F773' -> 1
|
|
| c >= '\x20000' && c <= '\x3FFFD' -> 2
|
|
| otherwise -> 1
|
|
|