hledger/bin/hledger-smooth.hs

138 lines
6.0 KiB
Haskell
Raw Normal View History

#!/usr/bin/env stack
-- stack runghc --verbosity info --package hledger --package string-qq
2021-01-12 22:07:29 +03:00
-- Run from inside the hledger source tree, or compile with compile.sh.
-- See hledger-check-fancyassertions.hs.
2020-01-23 23:53:43 +03:00
-- This is an unfinished prototype, see https://github.com/simonmichael/hledger/issues/1171
-- Requires a contemporaneous version of the hledger package.
-- Requires journal entries to be sorted by date.
-- Run it inside an up to date hledger source tree, eg: bin/hledger-smooth.hs ACCT
-- Or add bin/ to $PATH and [stack ghc bin/hledger-smooth;] hledger smooth ACCT
2021-01-31 04:27:30 +03:00
-- see also: https://github.com/Akuukis/beancount_interpolate
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# OPTIONS_GHC -Wall -Wno-missing-signatures -Wno-name-shadowing #-}
import Data.List
import Data.Maybe
import Data.String.QQ (s)
import qualified Data.Text as T
import Data.Time.Calendar
import Safe
-- import Hledger
import Hledger.Cli
------------------------------------------------------------------------------
cmdmode = hledgerCommandMode
[s| smooth
Like the print command, but splits any posting to ACCT (a full account name)
into multiple daily postings having a similar overall effect.
Each posting is smoothed across the period until the next ACCT posting, and
the last one is smoothed until the report end date, or today.
Eg: $30 on 1/1 and $50 on 1/4, if smoothed on 1/6 with no end date specified,
becomes $10 on 1/1, $10 on 1/2, $10 on 1/3, $25 on 1/4, $25 on 1/5.
The last new posting's amount is left blank to ensure a balanced transaction.
It can differ from the others.
Useful for preprocessing a journal to smooth out irregular revenues or
expenses in daily/weekly/monthly reports, eg:
hledger smooth revenues:consulting | hledger -f- incomestatement -W
_FLAGS
|]
[]
[generalflagsgroup1]
[]
([], Just $ argsFlag "ACCT")
------------------------------------------------------------------------------
-- we could smooth postings across the journal period, or within standard intervals: --smooth-interval=posting|journal|weekly|monthly|...
-- we could perhaps split transactions instead: --smooth-split=postings|transactions
main :: IO ()
main = do
copts@CliOpts{reportspec_=rspec, rawopts_} <- getHledgerCliOpts cmdmode
let ropts = rsOpts rspec
copts' = copts{
-- One of our postings will probably have a missing amount; this ensures it's
-- explicit on all the others.
rawopts_ = setboolopt "explicit" rawopts_
-- Don't let our ACCT argument be interpreted as a query by print
,reportspec_ = rspec{rsOpts=ropts{querystring_=[]}}
}
withJournalDo copts' $ \j -> do
today <- getCurrentDay
let
menddate = reportPeriodLastDay rspec
q = rsQuery rspec
acct = headDef (error' "Please provide an account name argument") $ querystring_ ropts
pr = postingsReport rspec{rsQuery = And [Acct $ accountNameToAccountRegexCI acct, q]} j
-- dates of postings to acct (in report)
2021-01-01 01:43:00 +03:00
pdates = map (postingDate . fourth5) pr
-- the specified report end date or today's date
enddate = fromMaybe today menddate
dates = pdates ++ [enddate]
(_,ts') = mapAccumL (splitTransactionPostings q acct) dates $ jtxns j
j' = j{jtxns=ts'}
print' copts' j'
-- | Split a transaction's postings to acct, if the transaction is matched by q,
-- into equivalent daily postings up to the next given end date,
-- keeping track of remaining end dates.
splitTransactionPostings :: Query -> AccountName -> [Day] -> Transaction -> ([Day], Transaction)
splitTransactionPostings _q acct dates t
-- | q `matchesTransaction` t = (dates', t')
-- | otherwise = (dates, t)
= (dates', t')
where
(dates', pss') = mapAccumL (splitPosting acct) dates $ tpostings t
t' = txnTieKnot t{tpostings=concat pss'}
-- | Split a posting to acct into equivalent daily postings
-- up to the next given end date, keeping track of remaining end dates.
-- We assume we will see postings in number and order corresponding the given end dates.
splitPosting :: AccountName -> [Day] -> Posting -> ([Day], [Posting])
splitPosting acct dates p@Posting{paccount,pamount}
| paccount == acct = (dates', ps')
| otherwise = (dates, [p])
where
start = dbg4 "start" $ postingDate p
(end, dates') =
case dbg4 "dates" dates of
-- XXX fragile, breaks if transactions are not date-ordered
(d1:d2:ds) -> if d1==start then (d2, d2:ds) else error' "splitPosting got wrong date, should not happen (maybe sort your transactions by date)"
[d] -> (d, [])
[] -> error' "splitPosting ran out of dates, should not happen (maybe sort your transactions by date)"
days = initSafe [start..end]
lib: Change internal representation of MixedAmount to use a strict Map instead of a list of Amounts. No longer export Mixed constructor, to keep API clean (if you really need it, you can import it directly from Hledger.Data.Types). We also ensure the JSON representation of MixedAmount doesn't change: it is stored as a normalised list of Amounts. This commit improves performance. Here are some indicative results. hledger reg -f examples/10000x1000x10.journal - Maximum residency decreases from 65MB to 60MB (8% decrease) - Total memory in use decreases from 178MiB to 157MiB (12% decrease) hledger reg -f examples/10000x10000x10.journal - Maximum residency decreases from 69MB to 60MB (13% decrease) - Total memory in use decreases from 198MiB to 153MiB (23% decrease) hledger bal -f examples/10000x1000x10.journal - Total heap usage decreases from 6.4GB to 6.0GB (6% decrease) - Total memory in use decreases from 178MiB to 153MiB (14% decrease) hledger bal -f examples/10000x10000x10.journal - Total heap usage decreases from 7.3GB to 6.9GB (5% decrease) - Total memory in use decreases from 196MiB to 185MiB (5% decrease) hledger bal -M -f examples/10000x1000x10.journal - Total heap usage decreases from 16.8GB to 10.6GB (47% decrease) - Total time decreases from 14.3s to 12.0s (16% decrease) hledger bal -M -f examples/10000x10000x10.journal - Total heap usage decreases from 108GB to 48GB (56% decrease) - Total time decreases from 62s to 41s (33% decrease) If you never directly use the constructor Mixed or pattern match against it then you don't need to make any changes. If you do, then do the following: - If you really care about the individual Amounts and never normalise your MixedAmount (for example, just storing `Mixed amts` and then extracting `amts` as a pattern match, then use should switch to using [Amount]. This should just involve removing the `Mixed` constructor. - If you ever call `mixed`, `normaliseMixedAmount`, or do any sort of amount arithmetic (+), (-), then you should replace the constructor `Mixed` with the function `mixed`. To extract the list of Amounts, use the function `amounts`. - If you ever call `normaliseMixedAmountSquashPricesForDisplay`, you can replace that with `mixedAmountStripPrices`. (N.B. this does something slightly different from `normaliseMixedAmountSquashPricesForDisplay`, but I don't think there's any use case for squashing prices and then keeping the first of the squashed prices around. If you disagree let me know.) - Any remaining calls to `normaliseMixedAmount` can be removed, as that is now the identity function.
2021-01-29 08:07:11 +03:00
amt = (fromIntegral $ length days) `divideMixedAmount` pamount
-- give one of the postings an exact balancing amount to ensure the transaction is balanced
-- lastamt = pamount - ptrace (amt `multiplyMixedAmount` (fromIntegral $ length days))
lastamt = missingmixedamt
lib: Change internal representation of MixedAmount to use a strict Map instead of a list of Amounts. No longer export Mixed constructor, to keep API clean (if you really need it, you can import it directly from Hledger.Data.Types). We also ensure the JSON representation of MixedAmount doesn't change: it is stored as a normalised list of Amounts. This commit improves performance. Here are some indicative results. hledger reg -f examples/10000x1000x10.journal - Maximum residency decreases from 65MB to 60MB (8% decrease) - Total memory in use decreases from 178MiB to 157MiB (12% decrease) hledger reg -f examples/10000x10000x10.journal - Maximum residency decreases from 69MB to 60MB (13% decrease) - Total memory in use decreases from 198MiB to 153MiB (23% decrease) hledger bal -f examples/10000x1000x10.journal - Total heap usage decreases from 6.4GB to 6.0GB (6% decrease) - Total memory in use decreases from 178MiB to 153MiB (14% decrease) hledger bal -f examples/10000x10000x10.journal - Total heap usage decreases from 7.3GB to 6.9GB (5% decrease) - Total memory in use decreases from 196MiB to 185MiB (5% decrease) hledger bal -M -f examples/10000x1000x10.journal - Total heap usage decreases from 16.8GB to 10.6GB (47% decrease) - Total time decreases from 14.3s to 12.0s (16% decrease) hledger bal -M -f examples/10000x10000x10.journal - Total heap usage decreases from 108GB to 48GB (56% decrease) - Total time decreases from 62s to 41s (33% decrease) If you never directly use the constructor Mixed or pattern match against it then you don't need to make any changes. If you do, then do the following: - If you really care about the individual Amounts and never normalise your MixedAmount (for example, just storing `Mixed amts` and then extracting `amts` as a pattern match, then use should switch to using [Amount]. This should just involve removing the `Mixed` constructor. - If you ever call `mixed`, `normaliseMixedAmount`, or do any sort of amount arithmetic (+), (-), then you should replace the constructor `Mixed` with the function `mixed`. To extract the list of Amounts, use the function `amounts`. - If you ever call `normaliseMixedAmountSquashPricesForDisplay`, you can replace that with `mixedAmountStripPrices`. (N.B. this does something slightly different from `normaliseMixedAmountSquashPricesForDisplay`, but I don't think there's any use case for squashing prices and then keeping the first of the squashed prices around. If you disagree let me know.) - Any remaining calls to `normaliseMixedAmount` can be removed, as that is now the identity function.
2021-01-29 08:07:11 +03:00
daysamts = zip days (replicate (length days - 1) amt ++ [lastamt])
ps' = [postingSetDate (Just d) p{pamount=a} | (d,a) <- daysamts ]
-- | Set a posting's (primary) date, as if it had been parsed from the journal entry:
-- Updates the date field,
-- adds a "date" tag to the parsed tag list (replacing any existing "date" tags there),
-- and adds the "date" tag to the unparsed comment field as well, for display purposes.
-- If the date is Nothing, unsets the date and removes it from the tags list.
-- Does not remove existing date tags from the comment field.
postingSetDate :: Maybe Day -> Posting -> Posting
postingSetDate md p@Posting{ptags,pcomment} = p{pdate=md, ptags=ptags'', pcomment=pcomment'}
where
ptags'' = case md of
Nothing -> ptags'
Just d -> ptags'++[("date", T.pack $ show d)]
where
ptags' = filter ((/="date").fst) ptags
pcomment' = case md of
Nothing -> pcomment
Just d -> commentAddTag pcomment ("date:", T.pack $ show d)