hledger/hledger-lib/Hledger/Data/AccountName.hs
Chris Lemaire 4ada2a3fbc journal: Fully unbracket AccountNames in account directives
Currently an account name like "a:(aa)" will not have (aa) unbracketed.
However, this seems reasonable since the full name is unbracketed and
thus will not be confused with virtual or virtual-balanced posting.
2023-01-23 12:43:13 -10:00

397 lines
17 KiB
Haskell

{-# LANGUAGE NoMonomorphismRestriction #-}
{-# LANGUAGE OverloadedStrings #-}
{-|
'AccountName's are strings like @assets:cash:petty@, with multiple
components separated by ':'. From a set of these we derive the account
hierarchy.
-}
module Hledger.Data.AccountName (
accountLeafName
,accountNameComponents
,accountNameDrop
,accountNameFromComponents
,accountNameLevel
,accountNameToAccountOnlyRegex
,accountNameToAccountOnlyRegexCI
,accountNameToAccountRegex
,accountNameToAccountRegexCI
,accountNameTreeFrom
,accountSummarisedName
,accountNameInferType
,accountNameType
,assetAccountRegex
,cashAccountRegex
,liabilityAccountRegex
,equityAccountRegex
,conversionAccountRegex
,revenueAccountRegex
,expenseAccountRegex
,acctsep
,acctsepchar
,clipAccountName
,clipOrEllipsifyAccountName
,elideAccountName
,escapeName
,expandAccountName
,expandAccountNames
,isAccountNamePrefixOf
-- ,isAccountRegex
,isSubAccountNameOf
,parentAccountName
,parentAccountNames
,subAccountNamesFrom
,topAccountNames
,unbudgetedAccountName
,accountNamePostingType
,accountNameWithoutPostingType
,accountNameWithPostingType
,joinAccountNames
,concatAccountNames
,accountNameApplyAliases
,accountNameApplyAliasesMemo
,tests_AccountName
)
where
import Control.Applicative ((<|>))
import Control.Monad (foldM)
import Data.Foldable (asum, toList)
import qualified Data.List.NonEmpty as NE
import qualified Data.Map as M
import Data.MemoUgly (memo)
import qualified Data.Set as S
import Data.Text (Text)
import qualified Data.Text as T
import Data.Tree (Tree(..))
import Safe
import Text.DocLayout (realLength)
import Hledger.Data.Types
import Hledger.Utils
-- $setup
-- >>> :set -XOverloadedStrings
acctsepchar :: Char
acctsepchar = ':'
acctsep :: Text
acctsep = T.pack [acctsepchar]
-- accountNameComponents :: AccountName -> [String]
-- accountNameComponents = splitAtElement acctsepchar
accountNameComponents :: AccountName -> [Text]
accountNameComponents = T.splitOn acctsep
accountNameFromComponents :: [Text] -> AccountName
accountNameFromComponents = T.intercalate acctsep
accountLeafName :: AccountName -> Text
accountLeafName = last . accountNameComponents
-- | Truncate all account name components but the last to two characters.
accountSummarisedName :: AccountName -> Text
accountSummarisedName a
-- length cs > 1 = take 2 (head cs) ++ ":" ++ a'
| length cs > 1 = T.intercalate ":" (map (T.take 2) $ init cs) <> ":" <> a'
| otherwise = a'
where
cs = accountNameComponents a
a' = accountLeafName a
-- | Regular expressions matching common English top-level account names,
-- used as a fallback when account types are not declared.
assetAccountRegex = toRegexCI' "^assets?(:|$)"
cashAccountRegex = toRegexCI' "^assets?(:.+)?:(cash|bank|che(ck|que?)(ing)?|savings?|current)(:|$)"
liabilityAccountRegex = toRegexCI' "^(debts?|liabilit(y|ies))(:|$)"
equityAccountRegex = toRegexCI' "^equity(:|$)"
conversionAccountRegex = toRegexCI' "^equity:(trad(e|ing)|conversion)s?(:|$)"
revenueAccountRegex = toRegexCI' "^(income|revenue)s?(:|$)"
expenseAccountRegex = toRegexCI' "^expenses?(:|$)"
-- | Try to guess an account's type from its name,
-- matching common English top-level account names.
accountNameInferType :: AccountName -> Maybe AccountType
accountNameInferType a
| regexMatchText cashAccountRegex a = Just Cash
| regexMatchText assetAccountRegex a = Just Asset
| regexMatchText liabilityAccountRegex a = Just Liability
| regexMatchText conversionAccountRegex a = Just Conversion
| regexMatchText equityAccountRegex a = Just Equity
| regexMatchText revenueAccountRegex a = Just Revenue
| regexMatchText expenseAccountRegex a = Just Expense
| otherwise = Nothing
-- Extract the 'AccountType' of an 'AccountName' by looking it up in the
-- provided Map, traversing the parent accounts if necessary. If none of those
-- work, try 'accountNameInferType'.
accountNameType :: M.Map AccountName AccountType -> AccountName -> Maybe AccountType
accountNameType atypes a = asum (map (`M.lookup` atypes) $ a : parentAccountNames a)
<|> accountNameInferType a
accountNameLevel :: AccountName -> Int
accountNameLevel "" = 0
accountNameLevel a = T.length (T.filter (==acctsepchar) a) + 1
-- | A top-level account prefixed to some accounts in budget reports.
-- Defined here so it can be ignored by accountNameDrop.
unbudgetedAccountName :: T.Text
unbudgetedAccountName = "<unbudgeted>"
accountNamePostingType :: AccountName -> PostingType
accountNamePostingType a
| T.null a = RegularPosting
| T.head a == '[' && T.last a == ']' = BalancedVirtualPosting
| T.head a == '(' && T.last a == ')' = VirtualPosting
| otherwise = RegularPosting
accountNameWithoutPostingType :: AccountName -> AccountName
accountNameWithoutPostingType a = case accountNamePostingType a of
BalancedVirtualPosting -> textUnbracket a
VirtualPosting -> textUnbracket a
RegularPosting -> a
accountNameWithPostingType :: PostingType -> AccountName -> AccountName
accountNameWithPostingType BalancedVirtualPosting = wrap "[" "]" . accountNameWithoutPostingType
accountNameWithPostingType VirtualPosting = wrap "(" ")" . accountNameWithoutPostingType
accountNameWithPostingType RegularPosting = accountNameWithoutPostingType
-- | Prefix one account name to another, preserving posting type
-- indicators like concatAccountNames.
joinAccountNames :: AccountName -> AccountName -> AccountName
joinAccountNames a b = concatAccountNames $ filter (not . T.null) [a,b]
-- | Join account names into one. If any of them has () or [] posting type
-- indicators, these (the first type encountered) will also be applied to
-- the resulting account name.
concatAccountNames :: [AccountName] -> AccountName
concatAccountNames as = accountNameWithPostingType t $ T.intercalate ":" $ map accountNameWithoutPostingType as
where t = headDef RegularPosting $ filter (/= RegularPosting) $ map accountNamePostingType as
-- | Rewrite an account name using all matching aliases from the given list, in sequence.
-- Each alias sees the result of applying the previous aliases.
-- Or, return any error arising from a bad regular expression in the aliases.
accountNameApplyAliases :: [AccountAlias] -> AccountName -> Either RegexError AccountName
accountNameApplyAliases aliases a =
let (name,typ) = (accountNameWithoutPostingType a, accountNamePostingType a)
in foldM
(\acct alias -> dbg6 "result" $ aliasReplace (dbg6 "alias" alias) (dbg6 "account" acct))
name
aliases
>>= Right . accountNameWithPostingType typ
-- | Memoising version of accountNameApplyAliases, maybe overkill.
accountNameApplyAliasesMemo :: [AccountAlias] -> AccountName -> Either RegexError AccountName
accountNameApplyAliasesMemo aliases = memo (accountNameApplyAliases aliases)
-- XXX re-test this memoisation
-- aliasMatches :: AccountAlias -> AccountName -> Bool
-- aliasMatches (BasicAlias old _) a = old `isAccountNamePrefixOf` a
-- aliasMatches (RegexAlias re _) a = regexMatchesCI re a
aliasReplace :: AccountAlias -> AccountName -> Either RegexError AccountName
aliasReplace (BasicAlias old new) a
| old `isAccountNamePrefixOf` a || old == a =
Right $ new <> T.drop (T.length old) a
| otherwise = Right a
aliasReplace (RegexAlias re repl) a =
fmap T.pack . regexReplace re repl $ T.unpack a -- XXX
-- | Remove some number of account name components from the front of the account name.
-- If the special "<unbudgeted>" top-level account is present, it is preserved and
-- dropping affects the rest of the account name.
accountNameDrop :: Int -> AccountName -> AccountName
accountNameDrop n a
| a == unbudgetedAccountName = a
| unbudgetedAccountAndSep `T.isPrefixOf` a =
case accountNameDrop n $ T.drop (T.length unbudgetedAccountAndSep) a of
"" -> unbudgetedAccountName
a' -> unbudgetedAccountAndSep <> a'
| otherwise = accountNameFromComponentsOrElide . drop n $ accountNameComponents a
where
unbudgetedAccountAndSep = unbudgetedAccountName <> acctsep
accountNameFromComponentsOrElide [] = "..."
accountNameFromComponentsOrElide xs = accountNameFromComponents xs
-- | Sorted unique account names implied by these account names,
-- ie these plus all their parent accounts up to the root.
-- Eg: ["a:b:c","d:e"] -> ["a","a:b","a:b:c","d","d:e"]
expandAccountNames :: [AccountName] -> [AccountName]
expandAccountNames = toList . foldMap (S.fromList . expandAccountName)
-- | "a:b:c" -> ["a","a:b","a:b:c"]
expandAccountName :: AccountName -> [AccountName]
expandAccountName = map accountNameFromComponents . NE.tail . NE.inits . accountNameComponents
-- | ["a:b:c","d:e"] -> ["a","d"]
topAccountNames :: [AccountName] -> [AccountName]
topAccountNames = filter ((1==) . accountNameLevel) . expandAccountNames
parentAccountName :: AccountName -> AccountName
parentAccountName = accountNameFromComponents . init . accountNameComponents
parentAccountNames :: AccountName -> [AccountName]
parentAccountNames a = parentAccountNames' $ parentAccountName a
where
parentAccountNames' "" = []
parentAccountNames' a2 = a2 : parentAccountNames' (parentAccountName a2)
-- | Is the first account a parent or other ancestor of (and not the same as) the second ?
isAccountNamePrefixOf :: AccountName -> AccountName -> Bool
isAccountNamePrefixOf = T.isPrefixOf . (<> acctsep)
isSubAccountNameOf :: AccountName -> AccountName -> Bool
s `isSubAccountNameOf` p =
(p `isAccountNamePrefixOf` s) && (accountNameLevel s == (accountNameLevel p + 1))
-- | From a list of account names, select those which are direct
-- subaccounts of the given account name.
subAccountNamesFrom :: [AccountName] -> AccountName -> [AccountName]
subAccountNamesFrom accts a = filter (`isSubAccountNameOf` a) accts
-- | Convert a list of account names to a tree.
accountNameTreeFrom :: [AccountName] -> Tree AccountName
accountNameTreeFrom accts =
Node "root" (accounttreesfrom (topAccountNames accts))
where
accounttreesfrom :: [AccountName] -> [Tree AccountName]
accounttreesfrom [] = []
accounttreesfrom as = [Node a (accounttreesfrom $ subs a) | a <- as]
subs = subAccountNamesFrom (expandAccountNames accts)
--nullaccountnametree = Node "root" []
-- | Elide an account name to fit in the specified width.
-- From the ledger 2.6 news:
--
-- @
-- What Ledger now does is that if an account name is too long, it will
-- start abbreviating the first parts of the account name down to two
-- letters in length. If this results in a string that is still too
-- long, the front will be elided -- not the end. For example:
--
-- Expenses:Cash ; OK, not too long
-- Ex:Wednesday:Cash ; "Expenses" was abbreviated to fit
-- Ex:We:Afternoon:Cash ; "Expenses" and "Wednesday" abbreviated
-- ; Expenses:Wednesday:Afternoon:Lunch:Snack:Candy:Chocolate:Cash
-- ..:Af:Lu:Sn:Ca:Ch:Cash ; Abbreviated and elided!
-- @
elideAccountName :: Int -> AccountName -> AccountName
elideAccountName width s
-- XXX special case for transactions register's multi-account pseudo-names
| " (split)" `T.isSuffixOf` s =
let
names = T.splitOn ", " $ T.take (T.length s - 8) s
widthpername = max 0 (width - 8 - 2 * (max 1 (length names) - 1)) `div` length names
in
fitText Nothing (Just width) True False $
(<>" (split)") $
T.intercalate ", "
[accountNameFromComponents $ elideparts widthpername [] $ accountNameComponents s' | s' <- names]
| otherwise =
fitText Nothing (Just width) True False $ accountNameFromComponents $ elideparts width [] $ accountNameComponents s
where
elideparts :: Int -> [Text] -> [Text] -> [Text]
elideparts w done ss
| realLength (accountNameFromComponents $ done++ss) <= w = done++ss
| length ss > 1 = elideparts w (done++[textTakeWidth 2 $ head ss]) (tail ss)
| otherwise = done++ss
-- | Keep only the first n components of an account name, where n
-- is a positive integer. If n is Just 0, returns the empty string, if n is
-- Nothing, return the full name.
clipAccountName :: Maybe Int -> AccountName -> AccountName
clipAccountName Nothing = id
clipAccountName (Just n) = accountNameFromComponents . take n . accountNameComponents
-- | Keep only the first n components of an account name, where n
-- is a positive integer. If n is Just 0, returns "...", if n is Nothing, return
-- the full name.
clipOrEllipsifyAccountName :: Maybe Int -> AccountName -> AccountName
clipOrEllipsifyAccountName (Just 0) = const "..."
clipOrEllipsifyAccountName n = clipAccountName n
-- | Escape an AccountName for use within a regular expression.
-- >>> putStr . T.unpack $ escapeName "First?!#$*?$(*) !@^#*? %)*!@#"
-- First\?!#\$\*\?\$\(\*\) !@\^#\*\? %\)\*!@#
escapeName :: AccountName -> Text
escapeName = T.concatMap escapeChar
where
escapeChar c = if c `elem` escapedChars then T.snoc "\\" c else T.singleton c
escapedChars = ['[', '?', '+', '|', '(', ')', '*', '$', '^', '\\']
-- | Convert an account name to a regular expression matching it and its subaccounts.
accountNameToAccountRegex :: AccountName -> Regexp
accountNameToAccountRegex a = toRegex' $ "^" <> escapeName a <> "(:|$)" -- PARTIAL: Is this safe after escapeName?
-- | Convert an account name to a regular expression matching it and its subaccounts,
-- case insensitively.
accountNameToAccountRegexCI :: AccountName -> Regexp
accountNameToAccountRegexCI a = toRegexCI' $ "^" <> escapeName a <> "(:|$)" -- PARTIAL: Is this safe after escapeName?
-- | Convert an account name to a regular expression matching it but not its subaccounts.
accountNameToAccountOnlyRegex :: AccountName -> Regexp
accountNameToAccountOnlyRegex a = toRegex' $ "^" <> escapeName a <> "$" -- PARTIAL: Is this safe after escapeName?
-- | Convert an account name to a regular expression matching it but not its subaccounts,
-- case insensitively.
accountNameToAccountOnlyRegexCI :: AccountName -> Regexp
accountNameToAccountOnlyRegexCI a = toRegexCI' $ "^" <> escapeName a <> "$" -- PARTIAL: Is this safe after escapeName?
-- -- | Does this string look like an exact account-matching regular expression ?
--isAccountRegex :: String -> Bool
--isAccountRegex s = take 1 s == "^" && take 5 (reverse s) == ")$|:("
tests_AccountName = testGroup "AccountName" [
testCase "accountNameTreeFrom" $ do
accountNameTreeFrom ["a"] @?= Node "root" [Node "a" []]
accountNameTreeFrom ["a","b"] @?= Node "root" [Node "a" [], Node "b" []]
accountNameTreeFrom ["a","a:b"] @?= Node "root" [Node "a" [Node "a:b" []]]
accountNameTreeFrom ["a:b:c"] @?= Node "root" [Node "a" [Node "a:b" [Node "a:b:c" []]]]
,testCase "expandAccountNames" $ do
expandAccountNames ["assets:cash","assets:checking","expenses:vacation"] @?=
["assets","assets:cash","assets:checking","expenses","expenses:vacation"]
,testCase "isAccountNamePrefixOf" $ do
"assets" `isAccountNamePrefixOf` "assets" @?= False
"assets" `isAccountNamePrefixOf` "assets:bank" @?= True
"assets" `isAccountNamePrefixOf` "assets:bank:checking" @?= True
"my assets" `isAccountNamePrefixOf` "assets:bank" @?= False
,testCase "isSubAccountNameOf" $ do
"assets" `isSubAccountNameOf` "assets" @?= False
"assets:bank" `isSubAccountNameOf` "assets" @?= True
"assets:bank:checking" `isSubAccountNameOf` "assets" @?= False
"assets:bank" `isSubAccountNameOf` "my assets" @?= False
,testCase "accountNameInferType" $ do
accountNameInferType "assets" @?= Just Asset
accountNameInferType "assets:cash" @?= Just Cash
accountNameInferType "assets:A/R" @?= Just Asset
accountNameInferType "liabilities" @?= Just Liability
accountNameInferType "equity" @?= Just Equity
accountNameInferType "equity:conversion" @?= Just Conversion
accountNameInferType "expenses" @?= Just Expense
accountNameInferType "revenues" @?= Just Revenue
accountNameInferType "revenue" @?= Just Revenue
accountNameInferType "income" @?= Just Revenue
,testCase "joinAccountNames" $ do
joinAccountNames "assets" "cash" @?= "assets:cash"
joinAccountNames "assets:cash" "a" @?= "assets:cash:a"
joinAccountNames "assets" "(cash)" @?= "(assets:cash)"
joinAccountNames "assets" "[cash]" @?= "[assets:cash]"
joinAccountNames "(assets)" "cash" @?= "(assets:cash)"
joinAccountNames "" "assets" @?= "assets"
joinAccountNames "assets" "" @?= "assets"
,testCase "concatAccountNames" $ do
concatAccountNames ["assets", "cash"] @?= "assets:cash"
concatAccountNames ["assets:cash", "a"] @?= "assets:cash:a"
concatAccountNames ["assets", "(cash)"] @?= "(assets:cash)"
concatAccountNames ["assets", "[cash]"] @?= "[assets:cash]"
concatAccountNames ["(assets)", "cash"] @?= "(assets:cash)"
concatAccountNames ["", "assets"] @?= ":assets"
concatAccountNames ["assets", ""] @?= "assets:"
]