Calculate MultiReportBalance columns more efficiently.

Only calculate posting date once for each posting, and calculate their
columns instead of checking each DateSpan separately.
This commit is contained in:
Stephen Morgan 2019-11-12 12:14:21 +11:00 committed by Simon Michael
parent f55f814155
commit 38904372b2
4 changed files with 67 additions and 40 deletions

View File

@ -63,6 +63,8 @@ module Hledger.Data.Dates (
spanDefaultsFrom,
spanUnion,
spansUnion,
daysSpan,
latestSpanContaining,
smartdate,
splitSpan,
fixSmartDate,
@ -79,10 +81,11 @@ import Prelude ()
import "base-compat-batteries" Prelude.Compat hiding (fail)
import qualified "base-compat-batteries" Control.Monad.Fail.Compat as Fail (MonadFail, fail)
import Control.Applicative.Permutations
import Control.Monad (unless)
import Control.Monad (guard, unless)
import "base-compat-batteries" Data.List.Compat
import Data.Default
import Data.Maybe
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
#if MIN_VERSION_time(1,5,0)
@ -95,7 +98,7 @@ import Data.Time.Calendar
import Data.Time.Calendar.OrdinalDate
import Data.Time.Clock
import Data.Time.LocalTime
import Safe (headMay, lastMay, readMay)
import Safe (headMay, lastMay, readMay, maximumMay, minimumMay)
import Text.Megaparsec
import Text.Megaparsec.Char
import Text.Megaparsec.Custom
@ -231,9 +234,8 @@ daysInSpan _ = Nothing
-- | Is this an empty span, ie closed with the end date on or before the start date ?
isEmptySpan :: DateSpan -> Bool
isEmptySpan s = case daysInSpan s of
Just n -> n < 1
Nothing -> False
isEmptySpan (DateSpan (Just s) (Just e)) = e <= s
isEmptySpan _ = False
-- | Does the span include the given date ?
spanContainsDate :: DateSpan -> Day -> Bool
@ -287,6 +289,36 @@ earliest d Nothing = d
earliest Nothing d = d
earliest (Just d1) (Just d2) = Just $ min d1 d2
-- | Calculate the minimal DateSpan containing all of the given Days (in the
-- usual exclusive-end-date sense: beginning on the earliest, and ending on
-- the day after the latest).
daysSpan :: [Day] -> DateSpan
daysSpan ds = DateSpan (minimumMay ds) (addDays 1 <$> maximumMay ds)
-- | Select the DateSpan containing a given Day, if any, from a given list of
-- DateSpans.
--
-- If the DateSpans are non-overlapping, this returns the unique containing
-- DateSpan, if it exists. If the DateSpans are overlapping, it will return the
-- containing DateSpan with the latest start date, and then latest end date.
-- Note: This will currently return `DateSpan (Just s) (Just e)` before it will
-- return `DateSpan (Just s) Nothing`. It's unclear which behaviour is desired.
-- This is irrelevant at the moment as it's never applied to any list with
-- overlapping DateSpans.
latestSpanContaining :: [DateSpan] -> Day -> Maybe DateSpan
latestSpanContaining datespans = go
where
go day = do
span <- Set.lookupLT supSpan spanSet
guard $ spanContainsDate span day
return span
where
-- The smallest DateSpan larger than any DateSpan containing day.
supSpan = DateSpan (Just $ addDays 1 day) Nothing
spanSet = Set.fromList $ filter (not . isEmptySpan) datespans
-- | Parse a period expression to an Interval and overall DateSpan using
-- the provided reference date, or return a parse error.
parsePeriodExpr

View File

@ -34,8 +34,9 @@ import Text.Printf
import Hledger.Utils.Test
import Hledger.Data.Types
import Hledger.Data.Account
import Hledger.Data.Dates (daysSpan)
import Hledger.Data.Journal
import Hledger.Data.Posting
import Hledger.Data.Posting (postingDate)
import Hledger.Query
@ -100,7 +101,7 @@ ledgerPostings = journalPostings . ljournal
-- | The (fully specified) date span containing all the ledger's (filtered) transactions,
-- or DateSpan Nothing Nothing if there are none.
ledgerDateSpan :: Ledger -> DateSpan
ledgerDateSpan = postingsDateSpan . ledgerPostings
ledgerDateSpan = daysSpan . map postingDate . ledgerPostings
-- | All commodities used in this ledger.
ledgerCommodities :: Ledger -> [CommoditySymbol]

View File

@ -42,8 +42,6 @@ module Hledger.Data.Posting (
postingDate2,
isPostingInDateSpan,
isPostingInDateSpan',
postingsDateSpan,
postingsDateSpan',
-- * account name operations
accountNamesFromPostings,
accountNamePostingType,
@ -69,6 +67,8 @@ module Hledger.Data.Posting (
tests_Posting
)
where
import Data.Foldable (asum)
import Data.List
import qualified Data.Map as M
import Data.Maybe
@ -204,20 +204,19 @@ removePrices p = p{ pamount = Mixed $ remove <$> amounts (pamount p) }
-- otherwise the parent transaction's primary date, or the null date if
-- there is no parent transaction.
postingDate :: Posting -> Day
postingDate p = fromMaybe txndate $ pdate p
where
txndate = maybe nulldate tdate $ ptransaction p
postingDate p = fromMaybe nulldate $ asum dates
where dates = [ pdate p, tdate <$> ptransaction p ]
-- | Get a posting's secondary (secondary) date, which is the first of:
-- posting's secondary date, transaction's secondary date, posting's
-- primary date, transaction's primary date, or the null date if there is
-- no parent transaction.
postingDate2 :: Posting -> Day
postingDate2 p = headDef nulldate $ catMaybes dates
where dates = [pdate2 p
,maybe Nothing tdate2 $ ptransaction p
,pdate p
,fmap tdate (ptransaction p)
postingDate2 p = fromMaybe nulldate $ asum dates
where dates = [ pdate2 p
, tdate2 =<< ptransaction p
, pdate p
, tdate <$> ptransaction p
]
-- | Get a posting's status. This is cleared or pending if those are
@ -246,7 +245,7 @@ relatedPostings _ = []
-- | Does this posting fall within the given date span ?
isPostingInDateSpan :: DateSpan -> Posting -> Bool
isPostingInDateSpan s = spanContainsDate s . postingDate
isPostingInDateSpan = isPostingInDateSpan' PrimaryDate
-- --date2-sensitive version, separate for now to avoid disturbing multiBalanceReport.
isPostingInDateSpan' :: WhichDate -> DateSpan -> Posting -> Bool
@ -256,21 +255,6 @@ isPostingInDateSpan' SecondaryDate s = spanContainsDate s . postingDate2
isEmptyPosting :: Posting -> Bool
isEmptyPosting = isZeroMixedAmount . pamount
-- | Get the minimal date span which contains all the postings, or the
-- null date span if there are none.
postingsDateSpan :: [Posting] -> DateSpan
postingsDateSpan [] = DateSpan Nothing Nothing
postingsDateSpan ps = DateSpan (Just $ postingDate $ head ps') (Just $ addDays 1 $ postingDate $ last ps')
where ps' = sortOn postingDate ps
-- --date2-sensitive version, as above.
postingsDateSpan' :: WhichDate -> [Posting] -> DateSpan
postingsDateSpan' _ [] = DateSpan Nothing Nothing
postingsDateSpan' wd ps = DateSpan (Just $ postingdate $ head ps') (Just $ addDays 1 $ postingdate $ last ps')
where
ps' = sortOn postingdate ps
postingdate = if wd == PrimaryDate then postingDate else postingDate2
-- AccountName stuff that depends on PostingType
accountNamePostingType :: AccountName -> PostingType

View File

@ -24,6 +24,7 @@ where
import GHC.Generics (Generic)
import Control.DeepSeq (NFData)
import Data.List
import qualified Data.Map as M
import Data.Maybe
import Data.Ord
import Data.Time.Calendar
@ -150,7 +151,7 @@ multiBalanceReportWith ropts@ReportOpts{..} q j priceoracle =
displayspan
| empty_ = dbg1 "displayspan (-E)" reportspan -- all the requested intervals
| otherwise = dbg1 "displayspan" $ requestedspan `spanIntersect` matchedspan -- exclude leading/trailing empty intervals
matchedspan = dbg1 "matchedspan" $ postingsDateSpan' (whichDateFromOpts ropts) ps
matchedspan = dbg1 "matchedspan" . daysSpan $ map snd ps
-- If doing cost valuation, convert amounts to cost.
j' = journalSelectingAmountFromOpts ropts j
@ -187,17 +188,26 @@ multiBalanceReportWith ropts@ReportOpts{..} q j priceoracle =
-- 3. Gather postings for each column.
-- Postings matching the query within the report period.
ps :: [Posting] =
ps :: [(Posting, Day)] =
dbg1 "ps" $
map postingWithDate $
journalPostings $
filterJournalAmounts symq $ -- remove amount parts excluded by cur:
filterJournalPostings reportq $ -- remove postings not matched by (adjusted) query
j'
where
postingWithDate p = case whichDateFromOpts ropts of
PrimaryDate -> (p, postingDate p)
SecondaryDate -> (p, postingDate2 p)
-- Group postings into their columns, with the column end dates.
colps :: [([Posting], Maybe Day)] =
dbg1 "colps"
[(filter (isPostingInDateSpan' (whichDateFromOpts ropts) s) ps, spanEnd s) | s <- colspans]
[ (posts, end) | (DateSpan _ end, posts) <- M.toList colMap ]
where
colMap = foldr addPosting emptyMap ps
addPosting (p, d) = maybe id (M.adjust (p:)) $ latestSpanContaining colspans d
emptyMap = M.fromList . zip colspans $ repeat []
----------------------------------------------------------------------
-- 4. Calculate account balance changes in each column.
@ -228,13 +238,13 @@ multiBalanceReportWith ropts@ReportOpts{..} q j priceoracle =
then nub $ sort $ startaccts ++ allpostedaccts
else allpostedaccts
where
allpostedaccts :: [AccountName] = dbg1 "allpostedaccts" $ sort $ accountNamesFromPostings ps
allpostedaccts :: [AccountName] =
dbg1 "allpostedaccts" . sort . accountNamesFromPostings $ map fst ps
-- Each column's balance changes for each account, adding zeroes where needed.
colallacctchanges :: [[(ClippedAccountName, MixedAmount)]] =
dbg1 "colallacctchanges"
[sortBy (comparing fst) $
unionBy (\(a,_) (a',_) -> a == a') postedacctchanges zeroes
| postedacctchanges <- colacctchanges]
[ sortOn fst $ unionBy (\(a,_) (a',_) -> a == a') postedacctchanges zeroes
| postedacctchanges <- colacctchanges ]
where zeroes = [(a, nullmixedamt) | a <- displayaccts]
-- Transpose to get each account's balance changes across all columns.
acctchanges :: [(ClippedAccountName, [MixedAmount])] =