hledger/bin/hledger-smooth.hs

#!/usr/bin/env stack
-- stack runghc --verbosity info --package hledger --package string-qq
-- Run from inside the hledger source tree, or compile with compile.sh.
-- See hledger-check-fancyassertions.hs.


-- This is an unfinished prototype, see https://github.com/simonmichael/hledger/issues/1171
-- Requires a contemporaneous version of the hledger package.
-- Requires journal entries to be sorted by date.
-- Run it inside an up to date hledger source tree, eg: bin/hledger-smooth.hs ACCT
-- Or add bin/ to $PATH and [stack ghc bin/hledger-smooth;] hledger smooth ACCT

-- see also: https://github.com/Akuukis/beancount_interpolate

{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# OPTIONS_GHC -Wall -Wno-missing-signatures #-}

import Data.List
import Data.Maybe
import Data.String.QQ (s)
import qualified Data.Text as T
import Data.Time.Calendar
import Safe
-- import Hledger
import Hledger.Cli

------------------------------------------------------------------------------
cmdmode = hledgerCommandMode
  [s| smooth
Like the print command, but splits any posting to ACCT (a full account name)
into multiple daily postings having a similar overall effect.

Each posting is smoothed across the period until the next ACCT posting, and
the last one is smoothed until the report end date, or today.
Eg: $30 on 1/1 and $50 on 1/4, if smoothed on 1/6 with no end date specified,
becomes $10 on 1/1, $10 on 1/2, $10 on 1/3, $25 on 1/4, $25 on 1/5.

The last new posting's amount is left blank to ensure a balanced transaction.
It can differ from the others.

Useful for preprocessing a journal to smooth out irregular revenues or
expenses in daily/weekly/monthly reports, eg:
hledger smooth revenues:consulting | hledger -f- incomestatement -W

_FLAGS
  |]
  []
  [generalflagsgroup1]
  []
  ([], Just $ argsFlag "ACCT")
------------------------------------------------------------------------------
-- we could smooth postings across the journal period, or within standard intervals: --smooth-interval=posting|journal|weekly|monthly|...
-- we could perhaps split transactions instead: --smooth-split=postings|transactions

main :: IO ()
main = do
  copts@CliOpts{reportspec_=rspec, rawopts_} <- getHledgerCliOpts cmdmode
  let ropts = _rsReportOpts rspec
      copts' = copts{
        -- One of our postings will probably have a missing amount; this ensures it's
        -- explicit on all the others.
        rawopts_ = setboolopt "explicit" rawopts_
        -- Don't let our ACCT argument be interpreted as a query by print
        ,reportspec_ = rspec{_rsReportOpts=ropts{querystring_=[]}}
        }
  withJournalDo copts' $ \j -> do
    today <- getCurrentDay
    let
      menddate = reportPeriodLastDay rspec
      q = _rsQuery rspec
      acct = headDef (error' "Please provide an account name argument") $ querystring_ ropts
      pr = postingsReport rspec{_rsQuery = And [Acct $ accountNameToAccountRegexCI acct, q]} j

      -- dates of postings to acct (in report)
      pdates = map (postingDate . fourth5) pr
      -- the specified report end date or today's date
      enddate = fromMaybe today menddate
      dates = pdates ++ [enddate]
      (_,ts') = mapAccumL (splitTransactionPostings q acct) dates $ jtxns j
      j' = j{jtxns=ts'}
    print' copts' j'

-- | Split a transaction's postings to acct, if the transaction is matched by q,
-- into equivalent daily postings up to the next given end date,
-- keeping track of remaining end dates.
splitTransactionPostings :: Query -> AccountName -> [Day] -> Transaction -> ([Day], Transaction)
splitTransactionPostings _q acct dates t
  -- | q `matchesTransaction` t = (dates', t')
  -- | otherwise                = (dates, t)
                             = (dates', t')
  where
    (dates', pss') = mapAccumL (splitPosting acct) dates $ tpostings t
    t' = txnTieKnot t{tpostings=concat pss'}

-- | Split a posting to acct into equivalent daily postings
-- up to the next given end date, keeping track of remaining end dates.
-- We assume we will see postings in number and order corresponding the given end dates.
splitPosting :: AccountName -> [Day] -> Posting -> ([Day], [Posting])
splitPosting acct dates p@Posting{paccount,pamount}
  | paccount == acct = (dates', ps')
  | otherwise        = (dates, [p])
  where
    start = dbg4 "start" $ postingDate p
    (end, dates') =
      case dbg4 "dates" dates of
        -- XXX fragile, breaks if transactions are not date-ordered
        (d1:d2:ds) -> if d1==start then (d2, d2:ds) else error' "splitPosting got wrong date, should not happen (maybe sort your transactions by date)"
        [d]        -> (d, [])
        []         -> error' "splitPosting ran out of dates, should not happen (maybe sort your transactions by date)"
    days = initSafe [start..end]
    amt  = (fromIntegral $ length days) `divideMixedAmount` pamount
    -- give one of the postings an exact balancing amount to ensure the transaction is balanced
    -- lastamt = pamount - ptrace (amt `multiplyMixedAmount` (fromIntegral $ length days))
    lastamt = missingmixedamt
    daysamts = zip days (replicate (length days - 1) amt ++ [lastamt])
    ps'  = [postingSetDate (Just d) p{pamount=a} | (d,a) <- daysamts ]

-- | Set a posting's (primary) date, as if it had been parsed from the journal entry:
-- Updates the date field,
-- adds a "date" tag to the parsed tag list (replacing any existing "date" tags there),
-- and adds the "date" tag to the unparsed comment field as well, for display purposes.
-- If the date is Nothing, unsets the date and removes it from the tags list.
-- Does not remove existing date tags from the comment field.
postingSetDate :: Maybe Day -> Posting -> Posting
postingSetDate md p@Posting{ptags,pcomment} = p{pdate=md, ptags=ptags'', pcomment=pcomment'}
  where
    ptags'' = case md of
                Nothing -> ptags'
                Just d  -> ptags'++[("date", T.pack $ show d)]
      where
        ptags' = filter ((/="date").fst) ptags

    pcomment' = case md of
                  Nothing -> pcomment
                  Just d  -> commentAddTag pcomment ("date:", T.pack $ show d)
bin: switch to "runghc", drop "env -S" (#1453) env -S isn't a thing on linux of course. Go back to using standard env, which means using a stack options line, which means not using "ghc". This new setup is probably simpler anyway. I've just had to give up on the goal of having each script's required packages being defined in one place; now (to they extent they are required) they must be defined both in the script header and in compile.sh. 2021-01-12 21:55:00 +03:00			`#!/usr/bin/env stack`
bin: more robust --package syntax, comma form sometimes fails 2021-02-13 00:55:31 +03:00			`-- stack runghc --verbosity info --package hledger --package string-qq`
;bin: doc 2021-01-12 22:07:29 +03:00			`-- Run from inside the hledger source tree, or compile with compile.sh.`
			`-- See hledger-check-fancyassertions.hs.`
bin: drop explicit --package options from stack scripts, not needed If there are no --package options, stack's script command infers them from the imports. 2020-08-15 19:59:59 +03:00
bin: switch to "runghc", drop "env -S" (#1453) env -S isn't a thing on linux of course. Go back to using standard env, which means using a stack options line, which means not using "ghc". This new setup is probably simpler anyway. I've just had to give up on the goal of having each script's required packages being defined in one place; now (to they extent they are required) they must be defined both in the script header and in compile.sh. 2021-01-12 21:55:00 +03:00
;bin: smooth: link to #1171 [ci skip] 2020-01-23 23:53:43 +03:00			`-- This is an unfinished prototype, see https://github.com/simonmichael/hledger/issues/1171`
;bin: smooth: note/warn about date-order requirement (#1171) [ci skip] 2020-01-23 23:40:25 +03:00			`-- Requires a contemporaneous version of the hledger package.`
			`-- Requires journal entries to be sorted by date.`
smooth: smoothes out irregular transactions (experimental addon) 2018-07-17 01:36:06 +03:00			`-- Run it inside an up to date hledger source tree, eg: bin/hledger-smooth.hs ACCT`
			`-- Or add bin/ to $PATH and [stack ghc bin/hledger-smooth;] hledger smooth ACCT`

;bin: smooth: note beancount tool 2021-01-31 04:27:30 +03:00			`-- see also: https://github.com/Akuukis/beancount_interpolate`

smooth: smoothes out irregular transactions (experimental addon) 2018-07-17 01:36:06 +03:00			`{-# LANGUAGE QuasiQuotes #-}`
			`{-# LANGUAGE OverloadedStrings #-}`
			`{-# LANGUAGE NamedFieldPuns #-}`
dev: lib, cli, bin: enable/fix name shadowing warnings And a few other cleanups. 2022-08-23 13:58:31 +03:00			`{-# OPTIONS_GHC -Wall -Wno-missing-signatures #-}`
smooth: smoothes out irregular transactions (experimental addon) 2018-07-17 01:36:06 +03:00
			`import Data.List`
			`import Data.Maybe`
;bin: smooth: here -> string-qq, save several minutes of build time [ci skip] 2020-01-23 23:02:55 +03:00			`import Data.String.QQ (s)`
smooth: smoothes out irregular transactions (experimental addon) 2018-07-17 01:36:06 +03:00			`import qualified Data.Text as T`
			`import Data.Time.Calendar`
			`import Safe`
;bin: smooth: doesn't use hledger-lib [ci skip] 2020-01-23 23:03:52 +03:00			`-- import Hledger`
smooth: smoothes out irregular transactions (experimental addon) 2018-07-17 01:36:06 +03:00			`import Hledger.Cli`

			`------------------------------------------------------------------------------`
			`cmdmode = hledgerCommandMode`
;bin: smooth: here -> string-qq, save several minutes of build time [ci skip] 2020-01-23 23:02:55 +03:00			`[s\| smooth`
smooth: smoothes out irregular transactions (experimental addon) 2018-07-17 01:36:06 +03:00			`Like the print command, but splits any posting to ACCT (a full account name)`
			`into multiple daily postings having a similar overall effect.`

			`Each posting is smoothed across the period until the next ACCT posting, and`
			`the last one is smoothed until the report end date, or today.`
			`Eg: $30 on 1/1 and $50 on 1/4, if smoothed on 1/6 with no end date specified,`
			`becomes $10 on 1/1, $10 on 1/2, $10 on 1/3, $25 on 1/4, $25 on 1/5.`

			`The last new posting's amount is left blank to ensure a balanced transaction.`
			`It can differ from the others.`

			`Useful for preprocessing a journal to smooth out irregular revenues or`
			`expenses in daily/weekly/monthly reports, eg:`
			`hledger smooth revenues:consulting \| hledger -f- incomestatement -W`

;bin: print-location, smooth, swap-dates: update _FLAG marker 2020-08-15 21:29:35 +03:00			`_FLAGS`
smooth: smoothes out irregular transactions (experimental addon) 2018-07-17 01:36:06 +03:00			`\|]`
code: Strip extraneous trailing whitespace from Haskell sources 2019-07-15 13:28:52 +03:00			`[]`
smooth: smoothes out irregular transactions (experimental addon) 2018-07-17 01:36:06 +03:00			`[generalflagsgroup1]`
			`[]`
			`([], Just $ argsFlag "ACCT")`
			`------------------------------------------------------------------------------`
			`-- we could smooth postings across the journal period, or within standard intervals: --smooth-interval=posting\|journal\|weekly\|monthly\|...`
			`-- we could perhaps split transactions instead: --smooth-split=postings\|transactions`

			`main :: IO ()`
			`main = do`
bin: Update bin scripts for current hledger-lib. (cherry picked from commit bc4aef17b7fa13ec0754b93325e1c5e5ee04f1e7) 2020-12-30 09:59:12 +03:00			`copts@CliOpts{reportspec_=rspec, rawopts_} <- getHledgerCliOpts cmdmode`
fix: bin: Get scripts compiling with current hledger-lib. 2021-09-08 10:27:49 +03:00			`let ropts = _rsReportOpts rspec`
bin: Update bin scripts for current hledger-lib. (cherry picked from commit bc4aef17b7fa13ec0754b93325e1c5e5ee04f1e7) 2020-12-30 09:59:12 +03:00			`copts' = copts{`
bin: hledger-smooth: update for latest hledger-lib (#1072) [ci skip] 2019-07-17 01:36:37 +03:00			`-- One of our postings will probably have a missing amount; this ensures it's`
			`-- explicit on all the others.`
bin: Update bin scripts for current hledger-lib. (cherry picked from commit bc4aef17b7fa13ec0754b93325e1c5e5ee04f1e7) 2020-12-30 09:59:12 +03:00			`rawopts_ = setboolopt "explicit" rawopts_`
bin: hledger-smooth: update for latest hledger-lib (#1072) [ci skip] 2019-07-17 01:36:37 +03:00			`-- Don't let our ACCT argument be interpreted as a query by print`
fix: bin: Get scripts compiling with current hledger-lib. 2021-09-08 10:27:49 +03:00			`,reportspec_ = rspec{_rsReportOpts=ropts{querystring_=[]}}`
bin: hledger-smooth: update for latest hledger-lib (#1072) [ci skip] 2019-07-17 01:36:37 +03:00			`}`
			`withJournalDo copts' $ \j -> do`
smooth: smoothes out irregular transactions (experimental addon) 2018-07-17 01:36:06 +03:00			`today <- getCurrentDay`
			`let`
bin: Update bin scripts for current hledger-lib. (cherry picked from commit bc4aef17b7fa13ec0754b93325e1c5e5ee04f1e7) 2020-12-30 09:59:12 +03:00			`menddate = reportPeriodLastDay rspec`
fix: bin: Get scripts compiling with current hledger-lib. 2021-09-08 10:27:49 +03:00			`q = _rsQuery rspec`
bin: Update bin scripts for current hledger-lib. (cherry picked from commit bc4aef17b7fa13ec0754b93325e1c5e5ee04f1e7) 2020-12-30 09:59:12 +03:00			`acct = headDef (error' "Please provide an account name argument") $ querystring_ ropts`
fix: bin: Get scripts compiling with current hledger-lib. 2021-09-08 10:27:49 +03:00			`pr = postingsReport rspec{_rsQuery = And [Acct $ accountNameToAccountRegexCI acct, q]} j`
code: Strip extraneous trailing whitespace from Haskell sources 2019-07-15 13:28:52 +03:00
smooth: smoothes out irregular transactions (experimental addon) 2018-07-17 01:36:06 +03:00			`-- dates of postings to acct (in report)`
bin: Update bin scripts for new API. 2021-01-01 01:43:00 +03:00			`pdates = map (postingDate . fourth5) pr`
smooth: smoothes out irregular transactions (experimental addon) 2018-07-17 01:36:06 +03:00			`-- the specified report end date or today's date`
			`enddate = fromMaybe today menddate`
			`dates = pdates ++ [enddate]`
			`(_,ts') = mapAccumL (splitTransactionPostings q acct) dates $ jtxns j`
			`j' = j{jtxns=ts'}`
			`print' copts' j'`

			`-- \| Split a transaction's postings to acct, if the transaction is matched by q,`
			`-- into equivalent daily postings up to the next given end date,`
			`-- keeping track of remaining end dates.`
			`splitTransactionPostings :: Query -> AccountName -> [Day] -> Transaction -> ([Day], Transaction)`
bin: smooth, swap-dates: fix compile.sh warnings (#1072) [ci skip] 2019-07-17 01:58:50 +03:00			`splitTransactionPostings _q acct dates t`
smooth: smoothes out irregular transactions (experimental addon) 2018-07-17 01:36:06 +03:00			-- \| q `matchesTransaction` t = (dates', t')
			`-- \| otherwise = (dates, t)`
cln: hlint: Remove rendundant guard warnings. 2021-08-16 08:21:49 +03:00			`= (dates', t')`
smooth: smoothes out irregular transactions (experimental addon) 2018-07-17 01:36:06 +03:00			`where`
			`(dates', pss') = mapAccumL (splitPosting acct) dates $ tpostings t`
			`t' = txnTieKnot t{tpostings=concat pss'}`

			`-- \| Split a posting to acct into equivalent daily postings`
			`-- up to the next given end date, keeping track of remaining end dates.`
			`-- We assume we will see postings in number and order corresponding the given end dates.`
			`splitPosting :: AccountName -> [Day] -> Posting -> ([Day], [Posting])`
			`splitPosting acct dates p@Posting{paccount,pamount}`
			`\| paccount == acct = (dates', ps')`
			`\| otherwise = (dates, [p])`
			`where`
			`start = dbg4 "start" $ postingDate p`
			`(end, dates') =`
			`case dbg4 "dates" dates of`
;bin: smooth: note/warn about date-order requirement (#1171) [ci skip] 2020-01-23 23:40:25 +03:00			`-- XXX fragile, breaks if transactions are not date-ordered`
			`(d1:d2:ds) -> if d1==start then (d2, d2:ds) else error' "splitPosting got wrong date, should not happen (maybe sort your transactions by date)"`
smooth: smoothes out irregular transactions (experimental addon) 2018-07-17 01:36:06 +03:00			`[d] -> (d, [])`
;bin: smooth: note/warn about date-order requirement (#1171) [ci skip] 2020-01-23 23:40:25 +03:00			`[] -> error' "splitPosting ran out of dates, should not happen (maybe sort your transactions by date)"`
smooth: smoothes out irregular transactions (experimental addon) 2018-07-17 01:36:06 +03:00			`days = initSafe [start..end]`
lib: Change internal representation of MixedAmount to use a strict Map instead of a list of Amounts. No longer export Mixed constructor, to keep API clean (if you really need it, you can import it directly from Hledger.Data.Types). We also ensure the JSON representation of MixedAmount doesn't change: it is stored as a normalised list of Amounts. This commit improves performance. Here are some indicative results. hledger reg -f examples/10000x1000x10.journal - Maximum residency decreases from 65MB to 60MB (8% decrease) - Total memory in use decreases from 178MiB to 157MiB (12% decrease) hledger reg -f examples/10000x10000x10.journal - Maximum residency decreases from 69MB to 60MB (13% decrease) - Total memory in use decreases from 198MiB to 153MiB (23% decrease) hledger bal -f examples/10000x1000x10.journal - Total heap usage decreases from 6.4GB to 6.0GB (6% decrease) - Total memory in use decreases from 178MiB to 153MiB (14% decrease) hledger bal -f examples/10000x10000x10.journal - Total heap usage decreases from 7.3GB to 6.9GB (5% decrease) - Total memory in use decreases from 196MiB to 185MiB (5% decrease) hledger bal -M -f examples/10000x1000x10.journal - Total heap usage decreases from 16.8GB to 10.6GB (47% decrease) - Total time decreases from 14.3s to 12.0s (16% decrease) hledger bal -M -f examples/10000x10000x10.journal - Total heap usage decreases from 108GB to 48GB (56% decrease) - Total time decreases from 62s to 41s (33% decrease) If you never directly use the constructor Mixed or pattern match against it then you don't need to make any changes. If you do, then do the following: - If you really care about the individual Amounts and never normalise your MixedAmount (for example, just storing `Mixed amts` and then extracting `amts` as a pattern match, then use should switch to using [Amount]. This should just involve removing the `Mixed` constructor. - If you ever call `mixed`, `normaliseMixedAmount`, or do any sort of amount arithmetic (+), (-), then you should replace the constructor `Mixed` with the function `mixed`. To extract the list of Amounts, use the function `amounts`. - If you ever call `normaliseMixedAmountSquashPricesForDisplay`, you can replace that with `mixedAmountStripPrices`. (N.B. this does something slightly different from `normaliseMixedAmountSquashPricesForDisplay`, but I don't think there's any use case for squashing prices and then keeping the first of the squashed prices around. If you disagree let me know.) - Any remaining calls to `normaliseMixedAmount` can be removed, as that is now the identity function. 2021-01-29 08:07:11 +03:00			amt = (fromIntegral $ length days) `divideMixedAmount` pamount
smooth: smoothes out irregular transactions (experimental addon) 2018-07-17 01:36:06 +03:00			`-- give one of the postings an exact balancing amount to ensure the transaction is balanced`
			-- lastamt = pamount - ptrace (amt `multiplyMixedAmount` (fromIntegral $ length days))
			`lastamt = missingmixedamt`
lib: Change internal representation of MixedAmount to use a strict Map instead of a list of Amounts. No longer export Mixed constructor, to keep API clean (if you really need it, you can import it directly from Hledger.Data.Types). We also ensure the JSON representation of MixedAmount doesn't change: it is stored as a normalised list of Amounts. This commit improves performance. Here are some indicative results. hledger reg -f examples/10000x1000x10.journal - Maximum residency decreases from 65MB to 60MB (8% decrease) - Total memory in use decreases from 178MiB to 157MiB (12% decrease) hledger reg -f examples/10000x10000x10.journal - Maximum residency decreases from 69MB to 60MB (13% decrease) - Total memory in use decreases from 198MiB to 153MiB (23% decrease) hledger bal -f examples/10000x1000x10.journal - Total heap usage decreases from 6.4GB to 6.0GB (6% decrease) - Total memory in use decreases from 178MiB to 153MiB (14% decrease) hledger bal -f examples/10000x10000x10.journal - Total heap usage decreases from 7.3GB to 6.9GB (5% decrease) - Total memory in use decreases from 196MiB to 185MiB (5% decrease) hledger bal -M -f examples/10000x1000x10.journal - Total heap usage decreases from 16.8GB to 10.6GB (47% decrease) - Total time decreases from 14.3s to 12.0s (16% decrease) hledger bal -M -f examples/10000x10000x10.journal - Total heap usage decreases from 108GB to 48GB (56% decrease) - Total time decreases from 62s to 41s (33% decrease) If you never directly use the constructor Mixed or pattern match against it then you don't need to make any changes. If you do, then do the following: - If you really care about the individual Amounts and never normalise your MixedAmount (for example, just storing `Mixed amts` and then extracting `amts` as a pattern match, then use should switch to using [Amount]. This should just involve removing the `Mixed` constructor. - If you ever call `mixed`, `normaliseMixedAmount`, or do any sort of amount arithmetic (+), (-), then you should replace the constructor `Mixed` with the function `mixed`. To extract the list of Amounts, use the function `amounts`. - If you ever call `normaliseMixedAmountSquashPricesForDisplay`, you can replace that with `mixedAmountStripPrices`. (N.B. this does something slightly different from `normaliseMixedAmountSquashPricesForDisplay`, but I don't think there's any use case for squashing prices and then keeping the first of the squashed prices around. If you disagree let me know.) - Any remaining calls to `normaliseMixedAmount` can be removed, as that is now the identity function. 2021-01-29 08:07:11 +03:00			`daysamts = zip days (replicate (length days - 1) amt ++ [lastamt])`
smooth: smoothes out irregular transactions (experimental addon) 2018-07-17 01:36:06 +03:00			`ps' = [postingSetDate (Just d) p{pamount=a} \| (d,a) <- daysamts ]`

			`-- \| Set a posting's (primary) date, as if it had been parsed from the journal entry:`
			`-- Updates the date field,`
			`-- adds a "date" tag to the parsed tag list (replacing any existing "date" tags there),`
			`-- and adds the "date" tag to the unparsed comment field as well, for display purposes.`
			`-- If the date is Nothing, unsets the date and removes it from the tags list.`
			`-- Does not remove existing date tags from the comment field.`
			`postingSetDate :: Maybe Day -> Posting -> Posting`
			`postingSetDate md p@Posting{ptags,pcomment} = p{pdate=md, ptags=ptags'', pcomment=pcomment'}`
			`where`
			`ptags'' = case md of`
			`Nothing -> ptags'`
			`Just d -> ptags'++[("date", T.pack $ show d)]`
			`where`
cln: hlint: Clean up == and elem related hlint warnings. 2021-08-16 07:09:17 +03:00			`ptags' = filter ((/="date").fst) ptags`
smooth: smoothes out irregular transactions (experimental addon) 2018-07-17 01:36:06 +03:00
			`pcomment' = case md of`
			`Nothing -> pcomment`
;bin: smooth: fix Text "Empty stream" error (#1171) [ci skip] 2020-01-23 23:22:24 +03:00			`Just d -> commentAddTag pcomment ("date:", T.pack $ show d)`