mirror of
https://github.com/simonmichael/hledger.git
synced 2024-11-10 14:16:41 +03:00
4141067428
* Replace Parsec with Megaparsec (see #289) This builds upon PR #289 by @rasendubi * Revert renaming of parseWithState to parseWithCtx * Fix doctests * Update for Megaparsec 5 * Specialize parser to improve performance * Pretty print errors * Swap StateT and ParsecT This is necessary to get the correct backtracking behavior, i.e. discard state changes if the parsing fails.
415 lines
16 KiB
Haskell
415 lines
16 KiB
Haskell
-- | Text formatting helpers, ported from String as needed.
|
|
-- There may be better alternatives out there.
|
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
module Hledger.Utils.Text
|
|
-- (
|
|
-- -- * misc
|
|
-- lowercase,
|
|
-- uppercase,
|
|
-- underline,
|
|
-- stripbrackets,
|
|
-- unbracket,
|
|
-- -- quoting
|
|
-- quoteIfSpaced,
|
|
-- quoteIfNeeded,
|
|
-- singleQuoteIfNeeded,
|
|
-- -- quotechars,
|
|
-- -- whitespacechars,
|
|
-- escapeDoubleQuotes,
|
|
-- escapeSingleQuotes,
|
|
-- escapeQuotes,
|
|
-- words',
|
|
-- unwords',
|
|
-- stripquotes,
|
|
-- isSingleQuoted,
|
|
-- isDoubleQuoted,
|
|
-- -- * single-line layout
|
|
-- strip,
|
|
-- lstrip,
|
|
-- rstrip,
|
|
-- chomp,
|
|
-- elideLeft,
|
|
-- elideRight,
|
|
-- formatString,
|
|
-- -- * multi-line layout
|
|
-- concatTopPadded,
|
|
-- concatBottomPadded,
|
|
-- concatOneLine,
|
|
-- vConcatLeftAligned,
|
|
-- vConcatRightAligned,
|
|
-- padtop,
|
|
-- padbottom,
|
|
-- padleft,
|
|
-- padright,
|
|
-- cliptopleft,
|
|
-- fitto,
|
|
-- -- * wide-character-aware layout
|
|
-- strWidth,
|
|
-- textTakeWidth,
|
|
-- fitString,
|
|
-- fitStringMulti,
|
|
-- padLeftWide,
|
|
-- padRightWide
|
|
-- )
|
|
where
|
|
|
|
-- import Data.Char
|
|
import Data.List
|
|
import Data.Monoid
|
|
import Data.Text (Text)
|
|
import qualified Data.Text as T
|
|
-- import Text.Parsec
|
|
-- import Text.Printf (printf)
|
|
|
|
-- import Hledger.Utils.Parse
|
|
-- import Hledger.Utils.Regex
|
|
import Hledger.Utils.String (charWidth)
|
|
|
|
-- lowercase, uppercase :: String -> String
|
|
-- lowercase = map toLower
|
|
-- uppercase = map toUpper
|
|
|
|
-- | Remove leading and trailing whitespace.
|
|
textstrip :: Text -> Text
|
|
textstrip = textlstrip . textrstrip
|
|
|
|
-- | Remove leading whitespace.
|
|
textlstrip :: Text -> Text
|
|
textlstrip = T.dropWhile (`elem` (" \t" :: String)) :: Text -> Text -- XXX isSpace ?
|
|
|
|
-- | Remove trailing whitespace.
|
|
textrstrip = T.reverse . textlstrip . T.reverse
|
|
textrstrip :: Text -> Text
|
|
|
|
-- -- | Remove trailing newlines/carriage returns.
|
|
-- chomp :: String -> String
|
|
-- chomp = reverse . dropWhile (`elem` "\r\n") . reverse
|
|
|
|
-- stripbrackets :: String -> String
|
|
-- stripbrackets = dropWhile (`elem` "([") . reverse . dropWhile (`elem` "])") . reverse :: String -> String
|
|
|
|
-- elideLeft :: Int -> String -> String
|
|
-- elideLeft width s =
|
|
-- if length s > width then ".." ++ reverse (take (width - 2) $ reverse s) else s
|
|
|
|
textElideRight :: Int -> Text -> Text
|
|
textElideRight width t =
|
|
if T.length t > width then T.take (width - 2) t <> ".." else t
|
|
|
|
-- -- | Clip and pad a string to a minimum & maximum width, and/or left/right justify it.
|
|
-- -- Works on multi-line strings too (but will rewrite non-unix line endings).
|
|
-- formatString :: Bool -> Maybe Int -> Maybe Int -> String -> String
|
|
-- formatString leftJustified minwidth maxwidth s = intercalate "\n" $ map (printf fmt) $ lines s
|
|
-- where
|
|
-- justify = if leftJustified then "-" else ""
|
|
-- minwidth' = maybe "" show minwidth
|
|
-- maxwidth' = maybe "" (("."++).show) maxwidth
|
|
-- fmt = "%" ++ justify ++ minwidth' ++ maxwidth' ++ "s"
|
|
|
|
-- underline :: String -> String
|
|
-- underline s = s' ++ replicate (length s) '-' ++ "\n"
|
|
-- where s'
|
|
-- | last s == '\n' = s
|
|
-- | otherwise = s ++ "\n"
|
|
|
|
-- | Wrap a string in double quotes, and \-prefix any embedded single
|
|
-- quotes, if it contains whitespace and is not already single- or
|
|
-- double-quoted.
|
|
quoteIfSpaced :: T.Text -> T.Text
|
|
quoteIfSpaced s | isSingleQuoted s || isDoubleQuoted s = s
|
|
| not $ any (`elem` (T.unpack s)) whitespacechars = s
|
|
| otherwise = "'"<>escapeSingleQuotes s<>"'"
|
|
|
|
-- -- | Wrap a string in double quotes, and \-prefix any embedded single
|
|
-- -- quotes, if it contains whitespace and is not already single- or
|
|
-- -- double-quoted.
|
|
-- quoteIfSpaced :: String -> String
|
|
-- quoteIfSpaced s | isSingleQuoted s || isDoubleQuoted s = s
|
|
-- | not $ any (`elem` s) whitespacechars = s
|
|
-- | otherwise = "'"++escapeSingleQuotes s++"'"
|
|
|
|
-- -- | Double-quote this string if it contains whitespace, single quotes
|
|
-- -- or double-quotes, escaping the quotes as needed.
|
|
-- quoteIfNeeded :: T.Text -> T.Text
|
|
-- quoteIfNeeded s | any (`elem` T.unpack s) (quotechars++whitespacechars) = "\"" <> escapeDoubleQuotes s <> "\""
|
|
-- | otherwise = s
|
|
|
|
-- -- | Single-quote this string if it contains whitespace or double-quotes.
|
|
-- -- No good for strings containing single quotes.
|
|
-- singleQuoteIfNeeded :: String -> String
|
|
-- singleQuoteIfNeeded s | any (`elem` s) whitespacechars = "'"++s++"'"
|
|
-- | otherwise = s
|
|
|
|
quotechars, whitespacechars :: [Char]
|
|
quotechars = "'\""
|
|
whitespacechars = " \t\n\r"
|
|
|
|
escapeDoubleQuotes :: T.Text -> T.Text
|
|
escapeDoubleQuotes = T.replace "\"" "\""
|
|
|
|
escapeSingleQuotes :: T.Text -> T.Text
|
|
escapeSingleQuotes = T.replace "'" "\'"
|
|
|
|
-- escapeQuotes :: String -> String
|
|
-- escapeQuotes = regexReplace "([\"'])" "\\1"
|
|
|
|
-- -- | Quote-aware version of words - don't split on spaces which are inside quotes.
|
|
-- -- NB correctly handles "a'b" but not "''a''". Can raise an error if parsing fails.
|
|
-- words' :: String -> [String]
|
|
-- words' "" = []
|
|
-- words' s = map stripquotes $ fromparse $ parsewith p s
|
|
-- where
|
|
-- p = do ss <- (singleQuotedPattern <|> doubleQuotedPattern <|> pattern) `sepBy` many1 spacenonewline
|
|
-- -- eof
|
|
-- return ss
|
|
-- pattern = many (noneOf whitespacechars)
|
|
-- singleQuotedPattern = between (char '\'') (char '\'') (many $ noneOf "'")
|
|
-- doubleQuotedPattern = between (char '"') (char '"') (many $ noneOf "\"")
|
|
|
|
-- -- | Quote-aware version of unwords - single-quote strings which contain whitespace
|
|
-- unwords' :: [Text] -> Text
|
|
-- unwords' = T.unwords . map quoteIfNeeded
|
|
|
|
-- | Strip one matching pair of single or double quotes on the ends of a string.
|
|
stripquotes :: Text -> Text
|
|
stripquotes s = if isSingleQuoted s || isDoubleQuoted s then T.init $ T.tail s else s
|
|
|
|
isSingleQuoted :: Text -> Bool
|
|
isSingleQuoted s =
|
|
T.length (T.take 2 s) == 2 && T.head s == '\'' && T.last s == '\''
|
|
|
|
isDoubleQuoted :: Text -> Bool
|
|
isDoubleQuoted s =
|
|
T.length (T.take 2 s) == 2 && T.head s == '"' && T.last s == '"'
|
|
|
|
textUnbracket :: Text -> Text
|
|
textUnbracket s
|
|
| (T.head s == '[' && T.last s == ']') || (T.head s == '(' && T.last s == ')') = T.init $ T.tail s
|
|
| otherwise = s
|
|
|
|
-- | Join several multi-line strings as side-by-side rectangular strings of the same height, top-padded.
|
|
-- Treats wide characters as double width.
|
|
textConcatTopPadded :: [Text] -> Text
|
|
textConcatTopPadded ts = T.intercalate "\n" $ map T.concat $ transpose padded
|
|
where
|
|
lss = map T.lines ts :: [[Text]]
|
|
h = maximum $ map length lss
|
|
ypad ls = replicate (difforzero h (length ls)) "" ++ ls
|
|
xpad ls = map (textPadLeftWide w) ls
|
|
where w | null ls = 0
|
|
| otherwise = maximum $ map textWidth ls
|
|
padded = map (xpad . ypad) lss :: [[Text]]
|
|
|
|
-- -- | Join several multi-line strings as side-by-side rectangular strings of the same height, bottom-padded.
|
|
-- -- Treats wide characters as double width.
|
|
-- concatBottomPadded :: [String] -> String
|
|
-- concatBottomPadded strs = intercalate "\n" $ map concat $ transpose padded
|
|
-- where
|
|
-- lss = map lines strs
|
|
-- h = maximum $ map length lss
|
|
-- ypad ls = ls ++ replicate (difforzero h (length ls)) ""
|
|
-- xpad ls = map (padRightWide w) ls where w | null ls = 0
|
|
-- | otherwise = maximum $ map strWidth ls
|
|
-- padded = map (xpad . ypad) lss
|
|
|
|
|
|
-- -- | Join multi-line strings horizontally, after compressing each of
|
|
-- -- them to a single line with a comma and space between each original line.
|
|
-- concatOneLine :: [String] -> String
|
|
-- concatOneLine strs = concat $ map ((intercalate ", ").lines) strs
|
|
|
|
-- -- | Join strings vertically, left-aligned and right-padded.
|
|
-- vConcatLeftAligned :: [String] -> String
|
|
-- vConcatLeftAligned ss = intercalate "\n" $ map showfixedwidth ss
|
|
-- where
|
|
-- showfixedwidth = printf (printf "%%-%ds" width)
|
|
-- width = maximum $ map length ss
|
|
|
|
-- -- | Join strings vertically, right-aligned and left-padded.
|
|
-- vConcatRightAligned :: [String] -> String
|
|
-- vConcatRightAligned ss = intercalate "\n" $ map showfixedwidth ss
|
|
-- where
|
|
-- showfixedwidth = printf (printf "%%%ds" width)
|
|
-- width = maximum $ map length ss
|
|
|
|
-- -- | Convert a multi-line string to a rectangular string top-padded to the specified height.
|
|
-- padtop :: Int -> String -> String
|
|
-- padtop h s = intercalate "\n" xpadded
|
|
-- where
|
|
-- ls = lines s
|
|
-- sh = length ls
|
|
-- sw | null ls = 0
|
|
-- | otherwise = maximum $ map length ls
|
|
-- ypadded = replicate (difforzero h sh) "" ++ ls
|
|
-- xpadded = map (padleft sw) ypadded
|
|
|
|
-- -- | Convert a multi-line string to a rectangular string bottom-padded to the specified height.
|
|
-- padbottom :: Int -> String -> String
|
|
-- padbottom h s = intercalate "\n" xpadded
|
|
-- where
|
|
-- ls = lines s
|
|
-- sh = length ls
|
|
-- sw | null ls = 0
|
|
-- | otherwise = maximum $ map length ls
|
|
-- ypadded = ls ++ replicate (difforzero h sh) ""
|
|
-- xpadded = map (padleft sw) ypadded
|
|
|
|
difforzero :: (Num a, Ord a) => a -> a -> a
|
|
difforzero a b = maximum [(a - b), 0]
|
|
|
|
-- -- | Convert a multi-line string to a rectangular string left-padded to the specified width.
|
|
-- -- Treats wide characters as double width.
|
|
-- padleft :: Int -> String -> String
|
|
-- padleft w "" = concat $ replicate w " "
|
|
-- padleft w s = intercalate "\n" $ map (printf (printf "%%%ds" w)) $ lines s
|
|
|
|
-- -- | Convert a multi-line string to a rectangular string right-padded to the specified width.
|
|
-- -- Treats wide characters as double width.
|
|
-- padright :: Int -> String -> String
|
|
-- padright w "" = concat $ replicate w " "
|
|
-- padright w s = intercalate "\n" $ map (printf (printf "%%-%ds" w)) $ lines s
|
|
|
|
-- -- | Clip a multi-line string to the specified width and height from the top left.
|
|
-- cliptopleft :: Int -> Int -> String -> String
|
|
-- cliptopleft w h = intercalate "\n" . take h . map (take w) . lines
|
|
|
|
-- -- | Clip and pad a multi-line string to fill the specified width and height.
|
|
-- fitto :: Int -> Int -> String -> String
|
|
-- fitto w h s = intercalate "\n" $ take h $ rows ++ repeat blankline
|
|
-- where
|
|
-- rows = map (fit w) $ lines s
|
|
-- fit w = take w . (++ repeat ' ')
|
|
-- blankline = replicate w ' '
|
|
|
|
-- -- Functions below treat wide (eg CJK) characters as double-width.
|
|
|
|
-- | General-purpose wide-char-aware single-line text layout function.
|
|
-- It can left- or right-pad a short string to a minimum width.
|
|
-- It can left- or right-clip a long string to a maximum width, optionally inserting an ellipsis (the third argument).
|
|
-- It clips and pads on the right when the fourth argument is true, otherwise on the left.
|
|
-- It treats wide characters as double width.
|
|
fitText :: Maybe Int -> Maybe Int -> Bool -> Bool -> Text -> Text
|
|
fitText mminwidth mmaxwidth ellipsify rightside s = (clip . pad) s
|
|
where
|
|
clip :: Text -> Text
|
|
clip s =
|
|
case mmaxwidth of
|
|
Just w
|
|
| textWidth s > w ->
|
|
case rightside of
|
|
True -> textTakeWidth (w - T.length ellipsis) s <> ellipsis
|
|
False -> ellipsis <> T.reverse (textTakeWidth (w - T.length ellipsis) $ T.reverse s)
|
|
| otherwise -> s
|
|
where
|
|
ellipsis = if ellipsify then ".." else ""
|
|
Nothing -> s
|
|
pad :: Text -> Text
|
|
pad s =
|
|
case mminwidth of
|
|
Just w
|
|
| sw < w ->
|
|
case rightside of
|
|
True -> s <> T.replicate (w - sw) " "
|
|
False -> T.replicate (w - sw) " " <> s
|
|
| otherwise -> s
|
|
Nothing -> s
|
|
where sw = textWidth s
|
|
|
|
-- -- | A version of fitString that works on multi-line strings,
|
|
-- -- separate for now to avoid breakage.
|
|
-- -- This will rewrite any line endings to unix newlines.
|
|
-- fitStringMulti :: Maybe Int -> Maybe Int -> Bool -> Bool -> String -> String
|
|
-- fitStringMulti mminwidth mmaxwidth ellipsify rightside s =
|
|
-- (intercalate "\n" . map (fitString mminwidth mmaxwidth ellipsify rightside) . lines) s
|
|
|
|
-- | Left-pad a text to the specified width.
|
|
-- Treats wide characters as double width.
|
|
-- Works on multi-line texts too (but will rewrite non-unix line endings).
|
|
textPadLeftWide :: Int -> Text -> Text
|
|
textPadLeftWide w "" = T.replicate w " "
|
|
textPadLeftWide w s = T.intercalate "\n" $ map (fitText (Just w) Nothing False False) $ T.lines s
|
|
-- XXX not yet replaceable by
|
|
-- padLeftWide w = fitStringMulti (Just w) Nothing False False
|
|
|
|
-- | Right-pad a string to the specified width.
|
|
-- Treats wide characters as double width.
|
|
-- Works on multi-line strings too (but will rewrite non-unix line endings).
|
|
textPadRightWide :: Int -> Text -> Text
|
|
textPadRightWide w "" = T.replicate w " "
|
|
textPadRightWide w s = T.intercalate "\n" $ map (fitText (Just w) Nothing False True) $ T.lines s
|
|
-- XXX not yet replaceable by
|
|
-- padRightWide w = fitStringMulti (Just w) Nothing False True
|
|
|
|
-- | Double-width-character-aware string truncation. Take as many
|
|
-- characters as possible from a string without exceeding the
|
|
-- specified width. Eg textTakeWidth 3 "りんご" = "り".
|
|
textTakeWidth :: Int -> Text -> Text
|
|
textTakeWidth _ "" = ""
|
|
textTakeWidth 0 _ = ""
|
|
textTakeWidth w t | not (T.null t),
|
|
let c = T.head t,
|
|
let cw = charWidth c,
|
|
cw <= w
|
|
= T.cons c $ textTakeWidth (w-cw) (T.tail t)
|
|
| otherwise = ""
|
|
|
|
-- -- from Pandoc (copyright John MacFarlane, GPL)
|
|
-- -- see also http://unicode.org/reports/tr11/#Description
|
|
|
|
-- | Calculate the designated render width of a string, taking into
|
|
-- account wide characters and line breaks (the longest line within a
|
|
-- multi-line string determines the width ).
|
|
textWidth :: Text -> Int
|
|
textWidth "" = 0
|
|
textWidth s = maximum $ map (T.foldr (\a b -> charWidth a + b) 0) $ T.lines s
|
|
|
|
-- -- | Get the designated render width of a character: 0 for a combining
|
|
-- -- character, 1 for a regular character, 2 for a wide character.
|
|
-- -- (Wide characters are rendered as exactly double width in apps and
|
|
-- -- fonts that support it.) (From Pandoc.)
|
|
-- charWidth :: Char -> Int
|
|
-- charWidth c =
|
|
-- case c of
|
|
-- _ | c < '\x0300' -> 1
|
|
-- | c >= '\x0300' && c <= '\x036F' -> 0 -- combining
|
|
-- | c >= '\x0370' && c <= '\x10FC' -> 1
|
|
-- | c >= '\x1100' && c <= '\x115F' -> 2
|
|
-- | c >= '\x1160' && c <= '\x11A2' -> 1
|
|
-- | c >= '\x11A3' && c <= '\x11A7' -> 2
|
|
-- | c >= '\x11A8' && c <= '\x11F9' -> 1
|
|
-- | c >= '\x11FA' && c <= '\x11FF' -> 2
|
|
-- | c >= '\x1200' && c <= '\x2328' -> 1
|
|
-- | c >= '\x2329' && c <= '\x232A' -> 2
|
|
-- | c >= '\x232B' && c <= '\x2E31' -> 1
|
|
-- | c >= '\x2E80' && c <= '\x303E' -> 2
|
|
-- | c == '\x303F' -> 1
|
|
-- | c >= '\x3041' && c <= '\x3247' -> 2
|
|
-- | c >= '\x3248' && c <= '\x324F' -> 1 -- ambiguous
|
|
-- | c >= '\x3250' && c <= '\x4DBF' -> 2
|
|
-- | c >= '\x4DC0' && c <= '\x4DFF' -> 1
|
|
-- | c >= '\x4E00' && c <= '\xA4C6' -> 2
|
|
-- | c >= '\xA4D0' && c <= '\xA95F' -> 1
|
|
-- | c >= '\xA960' && c <= '\xA97C' -> 2
|
|
-- | c >= '\xA980' && c <= '\xABF9' -> 1
|
|
-- | c >= '\xAC00' && c <= '\xD7FB' -> 2
|
|
-- | c >= '\xD800' && c <= '\xDFFF' -> 1
|
|
-- | c >= '\xE000' && c <= '\xF8FF' -> 1 -- ambiguous
|
|
-- | c >= '\xF900' && c <= '\xFAFF' -> 2
|
|
-- | c >= '\xFB00' && c <= '\xFDFD' -> 1
|
|
-- | c >= '\xFE00' && c <= '\xFE0F' -> 1 -- ambiguous
|
|
-- | c >= '\xFE10' && c <= '\xFE19' -> 2
|
|
-- | c >= '\xFE20' && c <= '\xFE26' -> 1
|
|
-- | c >= '\xFE30' && c <= '\xFE6B' -> 2
|
|
-- | c >= '\xFE70' && c <= '\xFEFF' -> 1
|
|
-- | c >= '\xFF01' && c <= '\xFF60' -> 2
|
|
-- | c >= '\xFF61' && c <= '\x16A38' -> 1
|
|
-- | c >= '\x1B000' && c <= '\x1B001' -> 2
|
|
-- | c >= '\x1D000' && c <= '\x1F1FF' -> 1
|
|
-- | c >= '\x1F200' && c <= '\x1F251' -> 2
|
|
-- | c >= '\x1F300' && c <= '\x1F773' -> 1
|
|
-- | c >= '\x20000' && c <= '\x3FFFD' -> 2
|
|
-- | otherwise -> 1
|
|
|