hledger/hledger-lib/Hledger/Utils/Text.hs
2018-09-03 15:54:53 -07:00

434 lines
16 KiB
Haskell

-- | Text formatting helpers, ported from String as needed.
-- There may be better alternatives out there.
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE CPP #-}
module Hledger.Utils.Text
(
-- -- * misc
-- lowercase,
-- uppercase,
-- underline,
-- stripbrackets,
textUnbracket,
-- -- quoting
quoteIfSpaced,
-- quoteIfNeeded,
-- singleQuoteIfNeeded,
-- -- quotechars,
-- -- whitespacechars,
escapeDoubleQuotes,
-- escapeSingleQuotes,
-- escapeQuotes,
-- words',
-- unwords',
stripquotes,
-- isSingleQuoted,
-- isDoubleQuoted,
-- -- * single-line layout
textstrip,
textlstrip,
textrstrip,
-- chomp,
-- elideLeft,
textElideRight,
-- formatString,
-- -- * multi-line layout
textConcatTopPadded,
-- concatBottomPadded,
-- concatOneLine,
-- vConcatLeftAligned,
-- vConcatRightAligned,
-- padtop,
-- padbottom,
-- padleft,
-- padright,
-- cliptopleft,
-- fitto,
fitText,
-- -- * wide-character-aware layout
textWidth,
textTakeWidth,
-- fitString,
-- fitStringMulti,
textPadLeftWide,
textPadRightWide,
-- -- * tests
easytests_Text
)
where
-- import Data.Char
import Data.List
#if !(MIN_VERSION_base(4,11,0))
import Data.Monoid
#endif
import Data.Text (Text)
import qualified Data.Text as T
-- import Text.Parsec
-- import Text.Printf (printf)
-- import Hledger.Utils.Parse
-- import Hledger.Utils.Regex
import Hledger.Utils.String (charWidth)
import Hledger.Utils.Test
-- lowercase, uppercase :: String -> String
-- lowercase = map toLower
-- uppercase = map toUpper
-- | Remove leading and trailing whitespace.
textstrip :: Text -> Text
textstrip = textlstrip . textrstrip
-- | Remove leading whitespace.
textlstrip :: Text -> Text
textlstrip = T.dropWhile (`elem` (" \t" :: String)) :: Text -> Text -- XXX isSpace ?
-- | Remove trailing whitespace.
textrstrip = T.reverse . textlstrip . T.reverse
textrstrip :: Text -> Text
-- -- | Remove trailing newlines/carriage returns.
-- chomp :: String -> String
-- chomp = reverse . dropWhile (`elem` "\r\n") . reverse
-- stripbrackets :: String -> String
-- stripbrackets = dropWhile (`elem` "([") . reverse . dropWhile (`elem` "])") . reverse :: String -> String
-- elideLeft :: Int -> String -> String
-- elideLeft width s =
-- if length s > width then ".." ++ reverse (take (width - 2) $ reverse s) else s
textElideRight :: Int -> Text -> Text
textElideRight width t =
if T.length t > width then T.take (width - 2) t <> ".." else t
-- -- | Clip and pad a string to a minimum & maximum width, and/or left/right justify it.
-- -- Works on multi-line strings too (but will rewrite non-unix line endings).
-- formatString :: Bool -> Maybe Int -> Maybe Int -> String -> String
-- formatString leftJustified minwidth maxwidth s = intercalate "\n" $ map (printf fmt) $ lines s
-- where
-- justify = if leftJustified then "-" else ""
-- minwidth' = maybe "" show minwidth
-- maxwidth' = maybe "" (("."++).show) maxwidth
-- fmt = "%" ++ justify ++ minwidth' ++ maxwidth' ++ "s"
-- underline :: String -> String
-- underline s = s' ++ replicate (length s) '-' ++ "\n"
-- where s'
-- | last s == '\n' = s
-- | otherwise = s ++ "\n"
-- | Wrap a string in double quotes, and \-prefix any embedded single
-- quotes, if it contains whitespace and is not already single- or
-- double-quoted.
quoteIfSpaced :: T.Text -> T.Text
quoteIfSpaced s | isSingleQuoted s || isDoubleQuoted s = s
| not $ any (`elem` (T.unpack s)) whitespacechars = s
| otherwise = quoteIfNeeded s
-- -- | Wrap a string in double quotes, and \-prefix any embedded single
-- -- quotes, if it contains whitespace and is not already single- or
-- -- double-quoted.
-- quoteIfSpaced :: String -> String
-- quoteIfSpaced s | isSingleQuoted s || isDoubleQuoted s = s
-- | not $ any (`elem` s) whitespacechars = s
-- | otherwise = "'"++escapeSingleQuotes s++"'"
-- -- | Double-quote this string if it contains whitespace, single quotes
-- -- or double-quotes, escaping the quotes as needed.
quoteIfNeeded :: T.Text -> T.Text
quoteIfNeeded s | any (`elem` T.unpack s) (quotechars++whitespacechars) = "\"" <> escapeDoubleQuotes s <> "\""
| otherwise = s
-- -- | Single-quote this string if it contains whitespace or double-quotes.
-- -- No good for strings containing single quotes.
-- singleQuoteIfNeeded :: String -> String
-- singleQuoteIfNeeded s | any (`elem` s) whitespacechars = "'"++s++"'"
-- | otherwise = s
quotechars, whitespacechars :: [Char]
quotechars = "'\""
whitespacechars = " \t\n\r"
escapeDoubleQuotes :: T.Text -> T.Text
escapeDoubleQuotes = T.replace "\"" "\\\""
-- escapeSingleQuotes :: T.Text -> T.Text
-- escapeSingleQuotes = T.replace "'" "\'"
-- escapeQuotes :: String -> String
-- escapeQuotes = regexReplace "([\"'])" "\\1"
-- -- | Quote-aware version of words - don't split on spaces which are inside quotes.
-- -- NB correctly handles "a'b" but not "''a''". Can raise an error if parsing fails.
-- words' :: String -> [String]
-- words' "" = []
-- words' s = map stripquotes $ fromparse $ parsewith p s
-- where
-- p = do ss <- (singleQuotedPattern <|> doubleQuotedPattern <|> pattern) `sepBy` many1 spacenonewline
-- -- eof
-- return ss
-- pattern = many (noneOf whitespacechars)
-- singleQuotedPattern = between (char '\'') (char '\'') (many $ noneOf "'")
-- doubleQuotedPattern = between (char '"') (char '"') (many $ noneOf "\"")
-- -- | Quote-aware version of unwords - single-quote strings which contain whitespace
-- unwords' :: [Text] -> Text
-- unwords' = T.unwords . map quoteIfNeeded
-- | Strip one matching pair of single or double quotes on the ends of a string.
stripquotes :: Text -> Text
stripquotes s = if isSingleQuoted s || isDoubleQuoted s then T.init $ T.tail s else s
isSingleQuoted :: Text -> Bool
isSingleQuoted s =
T.length (T.take 2 s) == 2 && T.head s == '\'' && T.last s == '\''
isDoubleQuoted :: Text -> Bool
isDoubleQuoted s =
T.length (T.take 2 s) == 2 && T.head s == '"' && T.last s == '"'
textUnbracket :: Text -> Text
textUnbracket s
| (T.head s == '[' && T.last s == ']') || (T.head s == '(' && T.last s == ')') = T.init $ T.tail s
| otherwise = s
-- | Join several multi-line strings as side-by-side rectangular strings of the same height, top-padded.
-- Treats wide characters as double width.
textConcatTopPadded :: [Text] -> Text
textConcatTopPadded ts = T.intercalate "\n" $ map T.concat $ transpose padded
where
lss = map T.lines ts :: [[Text]]
h = maximum $ map length lss
ypad ls = replicate (difforzero h (length ls)) "" ++ ls
xpad ls = map (textPadLeftWide w) ls
where w | null ls = 0
| otherwise = maximum $ map textWidth ls
padded = map (xpad . ypad) lss :: [[Text]]
-- -- | Join several multi-line strings as side-by-side rectangular strings of the same height, bottom-padded.
-- -- Treats wide characters as double width.
-- concatBottomPadded :: [String] -> String
-- concatBottomPadded strs = intercalate "\n" $ map concat $ transpose padded
-- where
-- lss = map lines strs
-- h = maximum $ map length lss
-- ypad ls = ls ++ replicate (difforzero h (length ls)) ""
-- xpad ls = map (padRightWide w) ls where w | null ls = 0
-- | otherwise = maximum $ map strWidth ls
-- padded = map (xpad . ypad) lss
-- -- | Join multi-line strings horizontally, after compressing each of
-- -- them to a single line with a comma and space between each original line.
-- concatOneLine :: [String] -> String
-- concatOneLine strs = concat $ map ((intercalate ", ").lines) strs
-- -- | Join strings vertically, left-aligned and right-padded.
-- vConcatLeftAligned :: [String] -> String
-- vConcatLeftAligned ss = intercalate "\n" $ map showfixedwidth ss
-- where
-- showfixedwidth = printf (printf "%%-%ds" width)
-- width = maximum $ map length ss
-- -- | Join strings vertically, right-aligned and left-padded.
-- vConcatRightAligned :: [String] -> String
-- vConcatRightAligned ss = intercalate "\n" $ map showfixedwidth ss
-- where
-- showfixedwidth = printf (printf "%%%ds" width)
-- width = maximum $ map length ss
-- -- | Convert a multi-line string to a rectangular string top-padded to the specified height.
-- padtop :: Int -> String -> String
-- padtop h s = intercalate "\n" xpadded
-- where
-- ls = lines s
-- sh = length ls
-- sw | null ls = 0
-- | otherwise = maximum $ map length ls
-- ypadded = replicate (difforzero h sh) "" ++ ls
-- xpadded = map (padleft sw) ypadded
-- -- | Convert a multi-line string to a rectangular string bottom-padded to the specified height.
-- padbottom :: Int -> String -> String
-- padbottom h s = intercalate "\n" xpadded
-- where
-- ls = lines s
-- sh = length ls
-- sw | null ls = 0
-- | otherwise = maximum $ map length ls
-- ypadded = ls ++ replicate (difforzero h sh) ""
-- xpadded = map (padleft sw) ypadded
difforzero :: (Num a, Ord a) => a -> a -> a
difforzero a b = maximum [(a - b), 0]
-- -- | Convert a multi-line string to a rectangular string left-padded to the specified width.
-- -- Treats wide characters as double width.
-- padleft :: Int -> String -> String
-- padleft w "" = concat $ replicate w " "
-- padleft w s = intercalate "\n" $ map (printf (printf "%%%ds" w)) $ lines s
-- -- | Convert a multi-line string to a rectangular string right-padded to the specified width.
-- -- Treats wide characters as double width.
-- padright :: Int -> String -> String
-- padright w "" = concat $ replicate w " "
-- padright w s = intercalate "\n" $ map (printf (printf "%%-%ds" w)) $ lines s
-- -- | Clip a multi-line string to the specified width and height from the top left.
-- cliptopleft :: Int -> Int -> String -> String
-- cliptopleft w h = intercalate "\n" . take h . map (take w) . lines
-- -- | Clip and pad a multi-line string to fill the specified width and height.
-- fitto :: Int -> Int -> String -> String
-- fitto w h s = intercalate "\n" $ take h $ rows ++ repeat blankline
-- where
-- rows = map (fit w) $ lines s
-- fit w = take w . (++ repeat ' ')
-- blankline = replicate w ' '
-- -- Functions below treat wide (eg CJK) characters as double-width.
-- | General-purpose wide-char-aware single-line text layout function.
-- It can left- or right-pad a short string to a minimum width.
-- It can left- or right-clip a long string to a maximum width, optionally inserting an ellipsis (the third argument).
-- It clips and pads on the right when the fourth argument is true, otherwise on the left.
-- It treats wide characters as double width.
fitText :: Maybe Int -> Maybe Int -> Bool -> Bool -> Text -> Text
fitText mminwidth mmaxwidth ellipsify rightside = clip . pad
where
clip :: Text -> Text
clip s =
case mmaxwidth of
Just w
| textWidth s > w ->
case rightside of
True -> textTakeWidth (w - T.length ellipsis) s <> ellipsis
False -> ellipsis <> T.reverse (textTakeWidth (w - T.length ellipsis) $ T.reverse s)
| otherwise -> s
where
ellipsis = if ellipsify then ".." else ""
Nothing -> s
pad :: Text -> Text
pad s =
case mminwidth of
Just w
| sw < w ->
case rightside of
True -> s <> T.replicate (w - sw) " "
False -> T.replicate (w - sw) " " <> s
| otherwise -> s
Nothing -> s
where sw = textWidth s
-- -- | A version of fitString that works on multi-line strings,
-- -- separate for now to avoid breakage.
-- -- This will rewrite any line endings to unix newlines.
-- fitStringMulti :: Maybe Int -> Maybe Int -> Bool -> Bool -> String -> String
-- fitStringMulti mminwidth mmaxwidth ellipsify rightside s =
-- (intercalate "\n" . map (fitString mminwidth mmaxwidth ellipsify rightside) . lines) s
-- | Left-pad a text to the specified width.
-- Treats wide characters as double width.
-- Works on multi-line texts too (but will rewrite non-unix line endings).
textPadLeftWide :: Int -> Text -> Text
textPadLeftWide w "" = T.replicate w " "
textPadLeftWide w s = T.intercalate "\n" $ map (fitText (Just w) Nothing False False) $ T.lines s
-- XXX not yet replaceable by
-- padLeftWide w = fitStringMulti (Just w) Nothing False False
-- | Right-pad a string to the specified width.
-- Treats wide characters as double width.
-- Works on multi-line strings too (but will rewrite non-unix line endings).
textPadRightWide :: Int -> Text -> Text
textPadRightWide w "" = T.replicate w " "
textPadRightWide w s = T.intercalate "\n" $ map (fitText (Just w) Nothing False True) $ T.lines s
-- XXX not yet replaceable by
-- padRightWide w = fitStringMulti (Just w) Nothing False True
-- | Double-width-character-aware string truncation. Take as many
-- characters as possible from a string without exceeding the
-- specified width. Eg textTakeWidth 3 "りんご" = "り".
textTakeWidth :: Int -> Text -> Text
textTakeWidth _ "" = ""
textTakeWidth 0 _ = ""
textTakeWidth w t | not (T.null t),
let c = T.head t,
let cw = charWidth c,
cw <= w
= T.cons c $ textTakeWidth (w-cw) (T.tail t)
| otherwise = ""
-- -- from Pandoc (copyright John MacFarlane, GPL)
-- -- see also http://unicode.org/reports/tr11/#Description
-- | Calculate the designated render width of a string, taking into
-- account wide characters and line breaks (the longest line within a
-- multi-line string determines the width ).
textWidth :: Text -> Int
textWidth "" = 0
textWidth s = maximum $ map (T.foldr (\a b -> charWidth a + b) 0) $ T.lines s
-- -- | Get the designated render width of a character: 0 for a combining
-- -- character, 1 for a regular character, 2 for a wide character.
-- -- (Wide characters are rendered as exactly double width in apps and
-- -- fonts that support it.) (From Pandoc.)
-- charWidth :: Char -> Int
-- charWidth c =
-- case c of
-- _ | c < '\x0300' -> 1
-- | c >= '\x0300' && c <= '\x036F' -> 0 -- combining
-- | c >= '\x0370' && c <= '\x10FC' -> 1
-- | c >= '\x1100' && c <= '\x115F' -> 2
-- | c >= '\x1160' && c <= '\x11A2' -> 1
-- | c >= '\x11A3' && c <= '\x11A7' -> 2
-- | c >= '\x11A8' && c <= '\x11F9' -> 1
-- | c >= '\x11FA' && c <= '\x11FF' -> 2
-- | c >= '\x1200' && c <= '\x2328' -> 1
-- | c >= '\x2329' && c <= '\x232A' -> 2
-- | c >= '\x232B' && c <= '\x2E31' -> 1
-- | c >= '\x2E80' && c <= '\x303E' -> 2
-- | c == '\x303F' -> 1
-- | c >= '\x3041' && c <= '\x3247' -> 2
-- | c >= '\x3248' && c <= '\x324F' -> 1 -- ambiguous
-- | c >= '\x3250' && c <= '\x4DBF' -> 2
-- | c >= '\x4DC0' && c <= '\x4DFF' -> 1
-- | c >= '\x4E00' && c <= '\xA4C6' -> 2
-- | c >= '\xA4D0' && c <= '\xA95F' -> 1
-- | c >= '\xA960' && c <= '\xA97C' -> 2
-- | c >= '\xA980' && c <= '\xABF9' -> 1
-- | c >= '\xAC00' && c <= '\xD7FB' -> 2
-- | c >= '\xD800' && c <= '\xDFFF' -> 1
-- | c >= '\xE000' && c <= '\xF8FF' -> 1 -- ambiguous
-- | c >= '\xF900' && c <= '\xFAFF' -> 2
-- | c >= '\xFB00' && c <= '\xFDFD' -> 1
-- | c >= '\xFE00' && c <= '\xFE0F' -> 1 -- ambiguous
-- | c >= '\xFE10' && c <= '\xFE19' -> 2
-- | c >= '\xFE20' && c <= '\xFE26' -> 1
-- | c >= '\xFE30' && c <= '\xFE6B' -> 2
-- | c >= '\xFE70' && c <= '\xFEFF' -> 1
-- | c >= '\xFF01' && c <= '\xFF60' -> 2
-- | c >= '\xFF61' && c <= '\x16A38' -> 1
-- | c >= '\x1B000' && c <= '\x1B001' -> 2
-- | c >= '\x1D000' && c <= '\x1F1FF' -> 1
-- | c >= '\x1F200' && c <= '\x1F251' -> 2
-- | c >= '\x1F300' && c <= '\x1F773' -> 1
-- | c >= '\x20000' && c <= '\x3FFFD' -> 2
-- | otherwise -> 1
easytests_Text = tests "Text" [
tests "quoteIfSpaced" [
expectEq' "a'a" (quoteIfSpaced "a'a")
,expectEq' "a\"a" (quoteIfSpaced "a\"a")
,expectEq' "\"a a\"" (quoteIfSpaced "a a")
,expectEq' "\"mimi's cafe\"" (quoteIfSpaced "mimi's cafe")
,expectEq' "\"\\\"alex\\\" cafe\"" (quoteIfSpaced "\"alex\" cafe")
,expectEq' "\"le'shan's cafe\"" (quoteIfSpaced "le'shan's cafe")
,expectEq' "\"\\\"be'any's\\\" cafe\"" (quoteIfSpaced "\"be'any's\" cafe")
]
]