hledger/hledger-lib/Hledger/Read/JournalReader.hs

852 lines
29 KiB
Haskell
Raw Normal View History

2016-04-23 21:27:39 +03:00
--- * doc
-- Lines beginning "--- *" are collapsible orgstruct nodes. Emacs users,
2016-04-23 03:43:16 +03:00
-- (add-hook 'haskell-mode-hook
2016-04-23 21:27:39 +03:00
-- (lambda () (set-variable 'orgstruct-heading-prefix-regexp "--- " t))
2016-04-23 03:43:16 +03:00
-- 'orgstruct-mode)
-- and press TAB on nodes to expand/collapse.
2016-04-23 03:43:16 +03:00
2008-10-01 04:29:58 +04:00
{-|
2008-10-03 06:28:58 +04:00
A reader for hledger's journal file format
(<http://hledger.org/MANUAL.html#the-journal-file>). hledger's journal
format is a compatible subset of c++ ledger's
(<http://ledger-cli.org/3.0/doc/ledger3.html#Journal-Format>), so this
reader should handle many ledger files as well. Example:
2008-10-01 05:40:32 +04:00
@
2012\/3\/24 gift
expenses:gifts $10
assets:cash
2008-10-01 05:40:32 +04:00
@
2008-10-01 04:29:58 +04:00
Journal format supports the include directive which can read files in
other formats, so the other file format readers need to be importable
here. Some low-level journal syntax parsers which those readers also
use are therefore defined separately in Hledger.Read.Common, avoiding
import cycles.
2007-02-09 04:23:12 +03:00
-}
2016-04-23 21:27:39 +03:00
--- * module
{-# LANGUAGE CPP, RecordWildCards, NamedFieldPuns, NoMonoLocalBinds, ScopedTypeVariables, FlexibleContexts, TupleSections, OverloadedStrings, PackageImports #-}
2016-04-23 03:43:16 +03:00
2010-11-15 10:01:46 +03:00
module Hledger.Read.JournalReader (
--- * exports
-- * Reader
reader,
2016-04-23 03:43:16 +03:00
-- * Parsing utils
genericSourcePos,
2016-04-23 03:43:16 +03:00
parseAndFinaliseJournal,
runJournalParser,
rjp,
2016-04-23 03:43:16 +03:00
-- * Parsers used elsewhere
getParentAccount,
journalp,
directivep,
defaultyeardirectivep,
marketpricedirectivep,
datetimep,
datep,
-- codep,
-- accountnamep,
2015-09-25 03:23:52 +03:00
modifiedaccountnamep,
2014-02-06 01:02:24 +04:00
postingp,
-- amountp,
-- amountp',
-- mamountp',
-- numberp,
statusp,
emptyorcommentlinep,
followingcommentp
-- * Tests
,tests_Hledger_Read_JournalReader
,easytests
)
2010-03-13 02:46:20 +03:00
where
2016-04-23 21:27:39 +03:00
--- * imports
import Prelude ()
import "base-compat-batteries" Prelude.Compat hiding (readFile)
2012-03-30 01:19:35 +04:00
import qualified Control.Exception as C
import Control.Monad
import Control.Monad.Except (ExceptT(..))
import Control.Monad.State.Strict
import Data.Bifunctor (first)
2018-06-18 01:23:41 +03:00
import Data.Functor.Identity (Identity(..))
import qualified Data.Map.Strict as M
lib: textification: parse stream 10% more allocation, but 35% lower maximum residency, and slightly quicker. hledger -f data/100x100x10.journal stats <<ghc: 39327768 bytes, 77 GCs, 196834/269496 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.010 elapsed), 0.020 MUT (0.092 elapsed), 0.014 GC (0.119 elapsed) :ghc>> <<ghc: 42842136 bytes, 84 GCs, 194010/270912 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.009 elapsed), 0.016 MUT (0.029 elapsed), 0.012 GC (0.120 elapsed) :ghc>> hledger -f data/1000x1000x10.journal stats <<ghc: 314291440 bytes, 612 GCs, 2070776/6628048 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.000 elapsed), 0.128 MUT (0.144 elapsed), 0.059 GC (0.070 elapsed) :ghc>> <<ghc: 349558872 bytes, 681 GCs, 1397597/4106384 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.004 elapsed), 0.124 MUT (0.133 elapsed), 0.047 GC (0.053 elapsed) :ghc>> hledger -f data/10000x1000x10.journal stats <<ghc: 3070026824 bytes, 5973 GCs, 12698030/62951784 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.002 elapsed), 1.268 MUT (1.354 elapsed), 0.514 GC (0.587 elapsed) :ghc>> <<ghc: 3424013128 bytes, 6658 GCs, 11405501/41071624 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.001 elapsed), 1.343 MUT (1.406 elapsed), 0.511 GC (0.573 elapsed) :ghc>> hledger -f data/100000x1000x10.journal stats <<ghc: 30753387392 bytes, 59811 GCs, 117615462/666703600 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.000 elapsed), 12.068 MUT (12.238 elapsed), 6.015 GC (7.190 elapsed) :ghc>> <<ghc: 34306530696 bytes, 66727 GCs, 76806196/414629312 avg/max bytes residency (14 samples), 1009M in use, 0.000 INIT (0.010 elapsed), 14.357 MUT (16.370 elapsed), 5.298 GC (6.534 elapsed) :ghc>>
2016-05-25 01:58:23 +03:00
import Data.Text (Text)
import Data.String
import Data.List
lib: textification begins! account names The first of several conversions from String to (strict) Text, hopefully reducing space and time usage. This one shows a small improvement, with GHC 7.10.3 and text-1.2.2.1: hledger -f data/100x100x10.journal stats string: <<ghc: 39471064 bytes, 77 GCs, 198421/275048 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.001 elapsed), 0.015 MUT (0.020 elapsed), 0.010 GC (0.014 elapsed) :ghc>> text: <<ghc: 39268024 bytes, 77 GCs, 197018/270840 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.002 elapsed), 0.016 MUT (0.022 elapsed), 0.009 GC (0.011 elapsed) :ghc>> hledger -f data/1000x100x10.journal stats string: <<ghc: 318555920 bytes, 617 GCs, 2178997/7134472 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.001 elapsed), 0.129 MUT (0.136 elapsed), 0.067 GC (0.077 elapsed) :ghc>> text: <<ghc: 314248496 bytes, 612 GCs, 2074045/6617960 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.003 elapsed), 0.137 MUT (0.145 elapsed), 0.067 GC (0.079 elapsed) :ghc>> hledger -f data/10000x100x10.journal stats string: <<ghc: 3114763608 bytes, 6026 GCs, 18858950/75552024 avg/max bytes residency (11 samples), 201M in use, 0.000 INIT (0.000 elapsed), 1.331 MUT (1.372 elapsed), 0.699 GC (0.812 elapsed) :ghc>> text: <<ghc: 3071468920 bytes, 5968 GCs, 14120344/62951360 avg/max bytes residency (9 samples), 124M in use, 0.000 INIT (0.003 elapsed), 1.272 MUT (1.349 elapsed), 0.513 GC (0.578 elapsed) :ghc>> hledger -f data/100000x100x10.journal stats string: <<ghc: 31186579432 bytes, 60278 GCs, 135332581/740228992 avg/max bytes residency (13 samples), 1697M in use, 0.000 INIT (0.008 elapsed), 14.677 MUT (15.508 elapsed), 7.081 GC (8.074 elapsed) :ghc>> text: <<ghc: 30753427672 bytes, 59763 GCs, 117595958/666457240 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.008 elapsed), 13.713 MUT (13.966 elapsed), 6.220 GC (7.108 elapsed) :ghc>>
2016-05-24 04:16:21 +03:00
import qualified Data.Text as T
2011-05-28 08:11:44 +04:00
import Data.Time.Calendar
import Data.Time.LocalTime
import Safe
import Text.Megaparsec hiding (parse)
import Text.Megaparsec.Char
import Text.Megaparsec.Custom
2011-05-28 08:11:44 +04:00
import Text.Printf
import System.FilePath
import "Glob" System.FilePath.Glob hiding (match)
2010-11-15 10:18:35 +03:00
import Hledger.Data hiding (easytests)
import Hledger.Read.Common hiding (easytests)
import Hledger.Read.TimeclockReader (timeclockfilep)
import Hledger.Read.TimedotReader (timedotfilep)
2011-05-28 08:11:44 +04:00
import Hledger.Utils
2010-03-13 02:46:20 +03:00
-- $setup
-- >>> :set -XOverloadedStrings
2010-03-13 02:46:20 +03:00
2016-04-23 21:27:39 +03:00
--- * reader
2010-03-13 02:46:20 +03:00
reader :: Reader
reader = Reader
{rFormat = "journal"
,rExtensions = ["journal", "j", "hledger", "ledger"]
,rParser = parse
,rExperimental = False
}
-- | Parse and post-process a "Journal" from hledger's journal file
-- format, or give an error.
parse :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal
parse iopts = parseAndFinaliseJournal journalp' iopts
where
journalp' = do
-- reverse parsed aliases to ensure that they are applied in order given on commandline
mapM_ addAccountAlias (reverse $ aliasesFromOpts iopts)
journalp
-- | Get the account name aliases from options, if any.
aliasesFromOpts :: InputOpts -> [AccountAlias]
aliasesFromOpts = map (\a -> fromparse $ runParser accountaliasp ("--alias "++quoteIfNeeded a) $ T.pack a)
. aliases_
2010-03-13 02:46:20 +03:00
2016-04-23 21:27:39 +03:00
--- * parsers
--- ** journal
-- | A journal parser. Accumulates and returns a "ParsedJournal",
-- which should be finalised/validated before use.
--
-- >>> rjp (journalp <* eof) "2015/1/1\n a 0\n"
-- Right Journal with 1 transactions, 1 accounts
--
2018-06-06 09:29:52 +03:00
journalp :: MonadIO m => JournalParser m ParsedJournal
journalp = do
many addJournalItemP
eof
get
-- | A side-effecting parser; parses any kind of journal item
-- and updates the parse state accordingly.
2018-06-06 09:29:52 +03:00
addJournalItemP :: MonadIO m => JournalParser m ()
lib: textification: comments and tags No change. hledger -f data/100x100x10.journal stats <<ghc: 42859576 bytes, 84 GCs, 193781/269984 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.001 elapsed), 0.016 MUT (0.020 elapsed), 0.009 GC (0.011 elapsed) :ghc>> <<ghc: 42859576 bytes, 84 GCs, 193781/269984 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.001 elapsed), 0.015 MUT (0.018 elapsed), 0.009 GC (0.013 elapsed) :ghc>> hledger -f data/1000x1000x10.journal stats <<ghc: 349576344 bytes, 681 GCs, 1407388/4091680 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.000 elapsed), 0.124 MUT (0.130 elapsed), 0.047 GC (0.055 elapsed) :ghc>> <<ghc: 349576280 bytes, 681 GCs, 1407388/4091680 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.000 elapsed), 0.126 MUT (0.132 elapsed), 0.049 GC (0.058 elapsed) :ghc>> hledger -f data/10000x1000x10.journal stats <<ghc: 3424030664 bytes, 6658 GCs, 11403359/41071624 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.000 elapsed), 1.207 MUT (1.228 elapsed), 0.473 GC (0.528 elapsed) :ghc>> <<ghc: 3424030760 bytes, 6658 GCs, 11403874/41077288 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.002 elapsed), 1.234 MUT (1.256 elapsed), 0.470 GC (0.520 elapsed) :ghc>> hledger -f data/100000x1000x10.journal stats <<ghc: 34306547448 bytes, 66727 GCs, 76805504/414629288 avg/max bytes residency (14 samples), 1009M in use, 0.000 INIT (0.003 elapsed), 12.615 MUT (12.813 elapsed), 4.656 GC (5.291 elapsed) :ghc>> <<ghc: 34306547320 bytes, 66727 GCs, 76805504/414629288 avg/max bytes residency (14 samples), 1009M in use, 0.000 INIT (0.009 elapsed), 12.802 MUT (13.065 elapsed), 4.774 GC (5.441 elapsed) :ghc>>
2016-05-25 03:09:20 +03:00
addJournalItemP =
-- all journal line types can be distinguished by the first
-- character, can use choice without backtracking
choice [
directivep
, transactionp >>= modify' . addTransaction
, transactionmodifierp >>= modify' . addTransactionModifier
, periodictransactionp >>= modify' . addPeriodicTransaction
, marketpricedirectivep >>= modify' . addMarketPrice
, void (lift emptyorcommentlinep)
, void (lift multilinecommentp)
] <?> "transaction or directive"
2016-04-23 21:27:39 +03:00
--- ** directives
2016-04-23 03:43:16 +03:00
-- | Parse any journal directive and update the parse state accordingly.
-- Cf http://hledger.org/manual.html#directives,
-- http://ledger-cli.org/3.0/doc/ledger3.html#Command-Directives
2018-06-06 09:29:52 +03:00
directivep :: MonadIO m => JournalParser m ()
directivep = (do
optional $ char '!'
choice [
includedirectivep
,aliasdirectivep
,endaliasesdirectivep
,accountdirectivep
,applyaccountdirectivep
,commoditydirectivep
,endapplyaccountdirectivep
,tagdirectivep
,endtagdirectivep
,defaultyeardirectivep
,defaultcommoditydirectivep
,commodityconversiondirectivep
,ignoredpricecommoditydirectivep
]
) <?> "directive"
2010-03-13 02:46:20 +03:00
2018-06-06 09:29:52 +03:00
includedirectivep :: MonadIO m => JournalParser m ()
includedirectivep = do
2011-08-04 11:49:10 +04:00
string "include"
lift (skipSome spacenonewline)
filename <- T.unpack <$> takeWhileP Nothing (/= '\n') -- don't consume newline yet
parentpos <- getPosition
filepaths <- getFilePaths parentpos filename
forM_ filepaths $ parseChild parentpos
void newline
where
getFilePaths parserpos filename = do
curdir <- lift $ expandPath (takeDirectory $ sourceName parserpos) ""
`orRethrowIOError` (show parserpos ++ " locating " ++ filename)
-- Compiling filename as a glob pattern works even if it is a literal
fileglob <- case tryCompileWith compDefault{errorRecovery=False} filename of
Right x -> pure x
Left e -> parseErrorAt parserpos $ "Invalid glob pattern: " ++ e
-- Get all matching files in the current working directory, sorting in
-- lexicographic order to simulate the output of 'ls'.
filepaths <- liftIO $ sort <$> globDir1 fileglob curdir
if (not . null) filepaths
then pure filepaths
else parseErrorAt parserpos $ "No existing files match pattern: " ++ filename
parseChild parentpos filepath = do
parentfilestack <- fmap sourceName . statePos <$> getParserState
when (filepath `elem` parentfilestack)
$ parseErrorAt parentpos ("Cyclic include: " ++ filepath)
childInput <- lift $ readFilePortably filepath
`orRethrowIOError` (show parentpos ++ " reading " ++ filepath)
-- save parent state
parentParserState <- getParserState
parentj <- get
let childj = newJournalWithParseStateFrom parentj
-- set child state
setInput childInput
pushPosition $ initialPos filepath
put childj
-- parse include file
let parsers = [ journalp
, timeclockfilep
, timedotfilep
] -- can't include a csv file yet, that reader is special
updatedChildj <- journalAddFile (filepath, childInput) <$>
region (withSource childInput) (choiceInState parsers)
-- restore parent state, prepending the child's parse info
setParserState parentParserState
put $ updatedChildj <> parentj
-- discard child's parse info, prepend its (reversed) list data, combine other fields
2016-05-23 11:02:19 +03:00
newJournalWithParseStateFrom :: Journal -> Journal
newJournalWithParseStateFrom j = mempty{
jparsedefaultyear = jparsedefaultyear j
,jparsedefaultcommodity = jparsedefaultcommodity j
,jparseparentaccounts = jparseparentaccounts j
,jparsealiases = jparsealiases j
,jcommodities = jcommodities j
-- ,jparsetransactioncount = jparsetransactioncount j
2016-05-23 11:02:19 +03:00
,jparsetimeclockentries = jparsetimeclockentries j
}
-- | Lift an IO action into the exception monad, rethrowing any IO
-- error with the given message prepended.
orRethrowIOError :: MonadIO m => IO a -> String -> TextParser m a
orRethrowIOError io msg = do
eResult <- liftIO $ (Right <$> io) `C.catch` \(e::C.IOException) -> pure $ Left $ printf "%s:\n%s" msg (show e)
case eResult of
Right res -> pure res
Left errMsg -> fail errMsg
accountdirectivep :: JournalParser m ()
accountdirectivep = do
string "account"
lift (skipSome spacenonewline)
acct <- modifiedaccountnamep -- account directives can be modified by alias/apply account
macode' :: Maybe String <- (optional $ lift $ skipSome spacenonewline >> some digitChar)
let macode :: Maybe AccountCode = read <$> macode'
newline
skipMany indentedlinep
modify' (\j -> j{jaccounts = (acct, macode) : jaccounts j})
indentedlinep :: JournalParser m String
indentedlinep = lift (skipSome spacenonewline) >> (rstrip <$> lift restofline)
-- | Parse a one-line or multi-line commodity directive.
--
-- >>> Right _ <- rjp commoditydirectivep "commodity $1.00"
-- >>> Right _ <- rjp commoditydirectivep "commodity $\n format $1.00"
-- >>> Right _ <- rjp commoditydirectivep "commodity $\n\n" -- a commodity with no format
-- >>> Right _ <- rjp commoditydirectivep "commodity $1.00\n format $1.00" -- both, what happens ?
2018-06-06 08:52:28 +03:00
commoditydirectivep :: JournalParser m ()
commoditydirectivep = commoditydirectiveonelinep <|> commoditydirectivemultilinep
-- | Parse a one-line commodity directive.
--
-- >>> Right _ <- rjp commoditydirectiveonelinep "commodity $1.00"
-- >>> Right _ <- rjp commoditydirectiveonelinep "commodity $1.00 ; blah\n"
2018-06-06 08:52:28 +03:00
commoditydirectiveonelinep :: JournalParser m ()
commoditydirectiveonelinep = do
(pos, Amount{acommodity,astyle}) <- try $ do
string "commodity"
lift (skipSome spacenonewline)
pos <- getPosition
amount <- amountp
pure $ (pos, amount)
lift (skipMany spacenonewline)
_ <- lift followingcommentp
let comm = Commodity{csymbol=acommodity, cformat=Just $ dbg2 "style from commodity directive" astyle}
if asdecimalpoint astyle == Nothing
then parseErrorAt pos pleaseincludedecimalpoint
else modify' (\j -> j{jcommodities=M.insert acommodity comm $ jcommodities j})
pleaseincludedecimalpoint :: String
pleaseincludedecimalpoint = "to avoid ambiguity, please include a decimal separator in commodity directives"
-- | Parse a multi-line commodity directive, containing 0 or more format subdirectives.
--
-- >>> Right _ <- rjp commoditydirectivemultilinep "commodity $ ; blah \n format $1.00 ; blah"
2018-06-06 08:52:28 +03:00
commoditydirectivemultilinep :: JournalParser m ()
commoditydirectivemultilinep = do
string "commodity"
lift (skipSome spacenonewline)
sym <- lift commoditysymbolp
_ <- lift followingcommentp
mformat <- lastMay <$> many (indented $ formatdirectivep sym)
let comm = Commodity{csymbol=sym, cformat=mformat}
modify' (\j -> j{jcommodities=M.insert sym comm $ jcommodities j})
where
indented = (lift (skipSome spacenonewline) >>)
-- | Parse a format (sub)directive, throwing a parse error if its
-- symbol does not match the one given.
2018-06-06 08:52:28 +03:00
formatdirectivep :: CommoditySymbol -> JournalParser m AmountStyle
formatdirectivep expectedsym = do
string "format"
lift (skipSome spacenonewline)
pos <- getPosition
Amount{acommodity,astyle} <- amountp
_ <- lift followingcommentp
if acommodity==expectedsym
then
if asdecimalpoint astyle == Nothing
then parseErrorAt pos pleaseincludedecimalpoint
else return $ dbg2 "style from format subdirective" astyle
else parseErrorAt pos $
printf "commodity directive symbol \"%s\" and format directive symbol \"%s\" should be the same" expectedsym acommodity
keywordp :: String -> JournalParser m ()
keywordp = (() <$) . string . fromString
spacesp :: JournalParser m ()
spacesp = () <$ lift (skipSome spacenonewline)
-- | Backtracking parser similar to string, but allows varying amount of space between words
keywordsp :: String -> JournalParser m ()
keywordsp = try . sequence_ . intersperse spacesp . map keywordp . words
applyaccountdirectivep :: JournalParser m ()
applyaccountdirectivep = do
keywordsp "apply account" <?> "apply account directive"
lift (skipSome spacenonewline)
parent <- lift accountnamep
2011-08-04 11:49:10 +04:00
newline
pushParentAccount parent
2010-03-13 02:46:20 +03:00
endapplyaccountdirectivep :: JournalParser m ()
endapplyaccountdirectivep = do
keywordsp "end apply account" <?> "end apply account directive"
2011-08-04 11:49:10 +04:00
popParentAccount
2010-03-13 02:46:20 +03:00
aliasdirectivep :: JournalParser m ()
aliasdirectivep = do
string "alias"
lift (skipSome spacenonewline)
alias <- lift accountaliasp
addAccountAlias alias
accountaliasp :: TextParser m AccountAlias
accountaliasp = regexaliasp <|> basicaliasp
basicaliasp :: TextParser m AccountAlias
basicaliasp = do
2018-07-16 17:28:58 +03:00
-- dbgparse 0 "basicaliasp"
old <- rstrip <$> (some $ noneOf ("=" :: [Char]))
char '='
skipMany spacenonewline
new <- rstrip <$> anyChar `manyTill` eolof -- eol in journal, eof in command lines, normally
lib: textification begins! account names The first of several conversions from String to (strict) Text, hopefully reducing space and time usage. This one shows a small improvement, with GHC 7.10.3 and text-1.2.2.1: hledger -f data/100x100x10.journal stats string: <<ghc: 39471064 bytes, 77 GCs, 198421/275048 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.001 elapsed), 0.015 MUT (0.020 elapsed), 0.010 GC (0.014 elapsed) :ghc>> text: <<ghc: 39268024 bytes, 77 GCs, 197018/270840 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.002 elapsed), 0.016 MUT (0.022 elapsed), 0.009 GC (0.011 elapsed) :ghc>> hledger -f data/1000x100x10.journal stats string: <<ghc: 318555920 bytes, 617 GCs, 2178997/7134472 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.001 elapsed), 0.129 MUT (0.136 elapsed), 0.067 GC (0.077 elapsed) :ghc>> text: <<ghc: 314248496 bytes, 612 GCs, 2074045/6617960 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.003 elapsed), 0.137 MUT (0.145 elapsed), 0.067 GC (0.079 elapsed) :ghc>> hledger -f data/10000x100x10.journal stats string: <<ghc: 3114763608 bytes, 6026 GCs, 18858950/75552024 avg/max bytes residency (11 samples), 201M in use, 0.000 INIT (0.000 elapsed), 1.331 MUT (1.372 elapsed), 0.699 GC (0.812 elapsed) :ghc>> text: <<ghc: 3071468920 bytes, 5968 GCs, 14120344/62951360 avg/max bytes residency (9 samples), 124M in use, 0.000 INIT (0.003 elapsed), 1.272 MUT (1.349 elapsed), 0.513 GC (0.578 elapsed) :ghc>> hledger -f data/100000x100x10.journal stats string: <<ghc: 31186579432 bytes, 60278 GCs, 135332581/740228992 avg/max bytes residency (13 samples), 1697M in use, 0.000 INIT (0.008 elapsed), 14.677 MUT (15.508 elapsed), 7.081 GC (8.074 elapsed) :ghc>> text: <<ghc: 30753427672 bytes, 59763 GCs, 117595958/666457240 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.008 elapsed), 13.713 MUT (13.966 elapsed), 6.220 GC (7.108 elapsed) :ghc>>
2016-05-24 04:16:21 +03:00
return $ BasicAlias (T.pack old) (T.pack new)
regexaliasp :: TextParser m AccountAlias
regexaliasp = do
2018-07-16 17:28:58 +03:00
-- dbgparse 0 "regexaliasp"
char '/'
re <- some $ noneOf ("/\n\r" :: [Char]) -- paranoid: don't try to read past line end
char '/'
skipMany spacenonewline
char '='
skipMany spacenonewline
repl <- anyChar `manyTill` eolof
return $ RegexAlias re repl
endaliasesdirectivep :: JournalParser m ()
endaliasesdirectivep = do
keywordsp "end aliases" <?> "end aliases directive"
clearAccountAliases
tagdirectivep :: JournalParser m ()
tagdirectivep = do
2011-08-04 11:49:10 +04:00
string "tag" <?> "tag directive"
lift (skipSome spacenonewline)
_ <- lift $ some nonspace
lift restofline
return ()
2007-02-09 04:23:12 +03:00
endtagdirectivep :: JournalParser m ()
endtagdirectivep = do
(keywordsp "end tag" <|> keywordp "pop") <?> "end tag or pop directive"
lift restofline
return ()
2011-08-04 11:49:10 +04:00
defaultyeardirectivep :: JournalParser m ()
defaultyeardirectivep = do
2011-08-04 11:49:10 +04:00
char 'Y' <?> "default year"
lift (skipMany spacenonewline)
y <- some digitChar
2011-08-04 11:49:10 +04:00
let y' = read y
failIfInvalidYear y
setYear y'
2018-06-06 08:52:28 +03:00
defaultcommoditydirectivep :: JournalParser m ()
defaultcommoditydirectivep = do
2011-08-04 11:49:10 +04:00
char 'D' <?> "default commodity"
lift (skipSome spacenonewline)
pos <- getPosition
Amount{acommodity,astyle} <- amountp
lift restofline
if asdecimalpoint astyle == Nothing
then parseErrorAt pos pleaseincludedecimalpoint
else setDefaultCommodityAndStyle (acommodity, astyle)
2007-02-09 04:23:12 +03:00
2018-06-06 08:52:28 +03:00
marketpricedirectivep :: JournalParser m MarketPrice
marketpricedirectivep = do
char 'P' <?> "market price"
lift (skipMany spacenonewline)
date <- try (do {LocalTime d _ <- datetimep; return d}) <|> datep -- a time is ignored
lift (skipSome spacenonewline)
symbol <- lift commoditysymbolp
lift (skipMany spacenonewline)
price <- amountp
lift restofline
return $ MarketPrice date symbol price
ignoredpricecommoditydirectivep :: JournalParser m ()
ignoredpricecommoditydirectivep = do
char 'N' <?> "ignored-price commodity"
lift (skipSome spacenonewline)
lift commoditysymbolp
lift restofline
return ()
2018-06-06 08:52:28 +03:00
commodityconversiondirectivep :: JournalParser m ()
commodityconversiondirectivep = do
char 'C' <?> "commodity conversion"
lift (skipSome spacenonewline)
amountp
lift (skipMany spacenonewline)
char '='
lift (skipMany spacenonewline)
amountp
lift restofline
return ()
2016-04-23 21:27:39 +03:00
--- ** transactions
2016-04-23 03:43:16 +03:00
transactionmodifierp :: JournalParser m TransactionModifier
transactionmodifierp = do
2011-08-04 11:49:10 +04:00
char '=' <?> "modifier transaction"
lift (skipMany spacenonewline)
querytxt <- lift $ T.strip <$> descriptionp
(_comment, _tags) <- lift transactioncommentp -- TODO apply these to modified txns ?
postings <- postingsp Nothing
return $ TransactionModifier querytxt postings
-- | Parse a periodic transaction
periodictransactionp :: MonadIO m => JournalParser m PeriodicTransaction
periodictransactionp = do
-- first line
2011-08-04 11:49:10 +04:00
char '~' <?> "periodic transaction"
lift $ skipMany spacenonewline
-- a period expression
pos <- getPosition
d <- liftIO getCurrentDay
(periodtxt, (interval, span)) <- lift $ first T.strip <$> match (periodexprp d)
-- In periodic transactions, the period expression has an additional constraint:
case checkPeriodicTransactionStartDate interval span periodtxt of
Just e -> parseErrorAt pos e
Nothing -> pure ()
-- The line can end here, or it can continue with one or more spaces
-- and then zero or more of the following fields. A bit awkward.
(status, code, description, (comment, tags)) <-
(lift eolof >> return (Unmarked, "", "", ("", [])))
<|>
(do
lift $ skipSome spacenonewline
s <- lift statusp
c <- lift codep
desc <- lift $ T.strip <$> descriptionp
(cmt, ts) <- lift transactioncommentp
return (s,c,desc,(cmt,ts))
)
-- next lines
postings <- postingsp (Just $ first3 $ toGregorian d)
return $ nullperiodictransaction{
ptperiodexpr=periodtxt
,ptinterval=interval
,ptspan=span
,ptstatus=status
,ptcode=code
,ptdescription=description
,ptcomment=comment
,pttags=tags
,ptpostings=postings
}
-- | Parse a (possibly unbalanced) transaction.
2018-06-06 08:52:28 +03:00
transactionp :: JournalParser m Transaction
transactionp = do
2018-07-16 17:28:58 +03:00
-- dbgparse 0 "transactionp"
startpos <- getPosition
date <- datep <?> "transaction"
2018-05-24 07:36:19 +03:00
edate <- optional (lift $ secondarydatep date) <?> "secondary date"
lookAhead (lift spacenonewline <|> newline) <?> "whitespace or newline"
status <- lift statusp <?> "cleared status"
code <- lift codep <?> "transaction code"
2018-06-06 08:52:28 +03:00
description <- lift $ T.strip <$> descriptionp
(comment, tags) <- lift transactioncommentp
let year = first3 $ toGregorian date
postings <- postingsp (Just year)
endpos <- getPosition
let sourcepos = journalSourcePos startpos endpos
return $ txnTieKnot $ Transaction 0 sourcepos date edate status code description comment tags postings ""
2007-02-09 04:23:12 +03:00
transactionp_tests = tests "transactionp" [
test "just-a-date" $ expectParseEq transactionp "2015/1/1\n" nulltransaction{tdate=parsedate "2015/01/01"}
,test "more-complex" $ expectParseEq transactionp
(T.unlines [
"2012/05/14=2012/05/15 (code) desc ; tcomment1",
" ; tcomment2",
" ; ttag1: val1",
" * a $1.00 ; pcomment1",
" ; pcomment2",
" ; ptag1: val1",
" ; ptag2: val2"
])
nulltransaction{
tsourcepos=JournalSourcePos "" (1,7), -- XXX why 7 here ?
tpreceding_comment_lines="",
tdate=parsedate "2012/05/14",
tdate2=Just $ parsedate "2012/05/15",
tstatus=Unmarked,
tcode="code",
tdescription="desc",
tcomment="tcomment1\ntcomment2\nttag1: val1\n",
ttags=[("ttag1","val1")],
tpostings=[
nullposting{
pdate=Nothing,
pstatus=Cleared,
paccount="a",
pamount=Mixed [usd 1],
pcomment="pcomment1\npcomment2\nptag1: val1\nptag2: val2\n",
ptype=RegularPosting,
ptags=[("ptag1","val1"),("ptag2","val2")],
ptransaction=Nothing
}
]
}
,test "parses a well-formed transaction" $
expect $ isRight $ rjp transactionp $ T.unlines
["2007/01/28 coopportunity"
," expenses:food:groceries $47.18"
," assets:checking $-47.18"
,""
]
,test "does not parse a following comment as part of the description" $
expectParseEqOn transactionp "2009/1/1 a ;comment\n b 1\n" tdescription "a"
,test "transactionp parses a following whitespace line" $
expect $ isRight $ rjp transactionp $ T.unlines
["2012/1/1"
," a 1"
," b"
," "
]
,test "comments everywhere, two postings parsed" $
expectParseEqOn transactionp
(T.unlines
["2009/1/1 x ; transaction comment"
," a 1 ; posting 1 comment"
," ; posting 1 comment 2"
," b"
," ; posting 2 comment"
])
(length . tpostings)
2
]
2016-04-23 21:27:39 +03:00
--- ** postings
2007-02-09 04:23:12 +03:00
-- Parse the following whitespace-beginning lines as postings, posting
-- tags, and/or comments (inferring year, if needed, from the given date).
2018-06-06 08:52:28 +03:00
postingsp :: Maybe Year -> JournalParser m [Posting]
postingsp mTransactionYear = many (postingp mTransactionYear) <?> "postings"
2018-06-06 08:52:28 +03:00
-- linebeginningwithspaces :: JournalParser m String
-- linebeginningwithspaces = do
-- sp <- lift (skipSome spacenonewline)
-- c <- nonspace
-- cs <- lift restofline
-- return $ sp ++ (c:cs) ++ "\n"
2007-02-09 04:23:12 +03:00
2018-06-06 08:52:28 +03:00
postingp :: Maybe Year -> JournalParser m Posting
postingp mTransactionYear = do
-- lift $ dbgparse 0 "postingp"
(status, account) <- try $ do
lift (skipSome spacenonewline)
status <- lift statusp
lift (skipMany spacenonewline)
account <- modifiedaccountnamep
return (status, account)
lib: textification begins! account names The first of several conversions from String to (strict) Text, hopefully reducing space and time usage. This one shows a small improvement, with GHC 7.10.3 and text-1.2.2.1: hledger -f data/100x100x10.journal stats string: <<ghc: 39471064 bytes, 77 GCs, 198421/275048 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.001 elapsed), 0.015 MUT (0.020 elapsed), 0.010 GC (0.014 elapsed) :ghc>> text: <<ghc: 39268024 bytes, 77 GCs, 197018/270840 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.002 elapsed), 0.016 MUT (0.022 elapsed), 0.009 GC (0.011 elapsed) :ghc>> hledger -f data/1000x100x10.journal stats string: <<ghc: 318555920 bytes, 617 GCs, 2178997/7134472 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.001 elapsed), 0.129 MUT (0.136 elapsed), 0.067 GC (0.077 elapsed) :ghc>> text: <<ghc: 314248496 bytes, 612 GCs, 2074045/6617960 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.003 elapsed), 0.137 MUT (0.145 elapsed), 0.067 GC (0.079 elapsed) :ghc>> hledger -f data/10000x100x10.journal stats string: <<ghc: 3114763608 bytes, 6026 GCs, 18858950/75552024 avg/max bytes residency (11 samples), 201M in use, 0.000 INIT (0.000 elapsed), 1.331 MUT (1.372 elapsed), 0.699 GC (0.812 elapsed) :ghc>> text: <<ghc: 3071468920 bytes, 5968 GCs, 14120344/62951360 avg/max bytes residency (9 samples), 124M in use, 0.000 INIT (0.003 elapsed), 1.272 MUT (1.349 elapsed), 0.513 GC (0.578 elapsed) :ghc>> hledger -f data/100000x100x10.journal stats string: <<ghc: 31186579432 bytes, 60278 GCs, 135332581/740228992 avg/max bytes residency (13 samples), 1697M in use, 0.000 INIT (0.008 elapsed), 14.677 MUT (15.508 elapsed), 7.081 GC (8.074 elapsed) :ghc>> text: <<ghc: 30753427672 bytes, 59763 GCs, 117595958/666457240 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.008 elapsed), 13.713 MUT (13.966 elapsed), 6.220 GC (7.108 elapsed) :ghc>>
2016-05-24 04:16:21 +03:00
let (ptype, account') = (accountNamePostingType account, textUnbracket account)
lift (skipMany spacenonewline)
amount <- option missingmixedamt $ Mixed . (:[]) <$> amountp
massertion <- partialbalanceassertionp
_ <- fixedlotpricep
lift (skipMany spacenonewline)
(comment,tags,mdate,mdate2) <- lift $ postingcommentp mTransactionYear
return posting
{ pdate=mdate
, pdate2=mdate2
, pstatus=status
, paccount=account'
, pamount=amount
, pcomment=comment
, ptype=ptype
, ptags=tags
, pbalanceassertion=massertion
}
2018-06-18 01:23:41 +03:00
test_postingp = TestCase $ do
let s `gives` ep = do
2018-06-18 01:23:41 +03:00
let parse = runIdentity $ parseWithState mempty (postingp Nothing) s
assertBool "Example is parsed well" $ isRight parse
let Right ap = parse
same msg f = assertEqual ("Posting "++msg++" differs") (f ep) (f ap)
same "date" pdate
same "status" pstatus
same "account" paccount
-- same "amount" pamount
-- more revealing:
assertEqual "amount differs!" (showMixedAmountDebug $ pamount ep) (showMixedAmountDebug $ pamount ap)
same "comment" pcomment
same "type" ptype
same "tags" ptags
same "transaction" ptransaction
" expenses:food:dining $10.00 ; a: a a \n ; b: b b \n" `gives`
posting{paccount="expenses:food:dining", pamount=Mixed [usd 10], pcomment="a: a a\nb: b b\n", ptags=[("a","a a"), ("b","b b")]}
" a 1. ; [2012/11/28]\n" `gives` -- trailing decimal point required to match num's asdecimalpoint
("a" `post` num 1){pcomment="[2012/11/28]\n"
,pdate=parsedateM "2012/11/28"}
" a 2. ; a:a, [=2012/11/28]\n" `gives`
("a" `post` num 2){pcomment="a:a, [=2012/11/28]\n"
,ptags=[("a","a")]
,pdate=Nothing}
" a 3. ; a:a\n ; [2012/11/28=2012/11/29],b:b\n" `gives`
("a" `post` num 3){pcomment="a:a\n[2012/11/28=2012/11/29],b:b\n"
,ptags=[("a","a"), ("[2012/11/28=2012/11/29],b","b")] -- XXX ugly tag name parsed
,pdate=parsedateM "2012/11/28"}
2018-06-18 01:23:41 +03:00
assertBool "postingp parses a quoted commodity with numbers"
(isRight . runIdentity $ parseWithState mempty (postingp Nothing) " a 1 \"DE123\"\n")
2018-06-18 01:23:41 +03:00
assertBool "postingp parses balance assertions and fixed lot prices"
(isRight . runIdentity $ parseWithState mempty (postingp Nothing) " a 1 \"DE123\" =$1 { =2.2 EUR} \n")
-- let parse = parseWithState mempty postingp " a\n ;next-line comment\n"
-- assertRight parse
-- let Right p = parse
-- assertEqual "next-line comment\n" (pcomment p)
-- assertEqual (Just nullmixedamt) (pbalanceassertion p)
--- * more tests
2016-04-23 03:43:16 +03:00
2018-06-18 01:23:41 +03:00
tests_Hledger_Read_JournalReader = TestList [
2018-08-20 10:22:31 +03:00
test_postingp
]
easytests = tests "JournalReader" [
tests "transactionmodifierp" [
test "basic" $ expectParseEq transactionmodifierp
"= (some value expr)\n some:postings 1.\n"
nulltransactionmodifier {
tmquerytxt = "(some value expr)"
,tmpostings = [nullposting{paccount="some:postings", pamount=Mixed[num 1]}]
}
]
,tests "periodictransactionp" [
test "more-period-text-in-comment-after-one-space" $ expectParseEq periodictransactionp
"~ monthly from 2018/6 ;In 2019 we will change this\n"
nullperiodictransaction {
ptperiodexpr = "monthly from 2018/6"
,ptinterval = Months 1
,ptspan = DateSpan (Just $ parsedate "2018/06/01") Nothing
,ptstatus = Unmarked
,ptcode = ""
,ptdescription = ""
,ptcomment = "In 2019 we will change this\n"
,pttags = []
,ptpostings = []
}
-- TODO #807
,_test "more-period-text-in-description-after-two-spaces" $ expectParseEq periodictransactionp
"~ monthly from 2018/6 In 2019 we will change this\n"
nullperiodictransaction {
ptperiodexpr = "monthly from 2018/6"
,ptinterval = Months 1
,ptspan = DateSpan (Just $ parsedate "2018/06/01") Nothing
,ptdescription = "In 2019 we will change this\n"
}
,_test "more-period-text-in-description-after-one-space" $ expectParseEq periodictransactionp
"~ monthly from 2018/6 In 2019 we will change this\n"
nullperiodictransaction {
ptperiodexpr = "monthly from 2018/6"
,ptinterval = Months 1
,ptspan = DateSpan (Just $ parsedate "2018/06/01") Nothing
,ptdescription = "In 2019 we will change this\n"
}
2018-08-20 10:22:31 +03:00
,_test "Next-year-in-description" $ expectParseEq periodictransactionp
"~ monthly Next year blah blah\n"
nullperiodictransaction {
ptperiodexpr = "monthly"
,ptinterval = Months 1
,ptspan = DateSpan Nothing Nothing
,ptdescription = "Next year blah blah\n"
}
,transactionp_tests
,test "showParsedMarketPrice" $ expectParseEq marketpricedirectivep
"P 2017/01/30 BTC $922.83\n"
MarketPrice{
mpdate = parsedate "2017/01/30",
mpcommodity = "BTC",
mpamount = usd 922.83
}
{- old hunit tests TODO
,"periodictransactionp" ~: do
assertParse (parseWithState mempty periodictransactionp "~ (some period expr)\n some:postings 1\n")
,"directivep" ~: do
assertParse (parseWithState mempty directivep "!include /some/file.x\n")
assertParse (parseWithState mempty directivep "account some:account\n")
assertParse (parseWithState mempty (directivep >> directivep) "!account a\nend\n")
,"comment" ~: do
assertParse (parseWithState mempty comment "; some comment \n")
assertParse (parseWithState mempty comment " \t; x\n")
assertParse (parseWithState mempty comment "#x")
,"datep" ~: do
assertParse (parseWithState mempty datep "2011/1/1")
assertParseFailure (parseWithState mempty datep "1/1")
assertParse (parseWithState mempty{jpsYear=Just 2011} datep "1/1")
,"datetimep" ~: do
let p = do {t <- datetimep; eof; return t}
bad = assertParseFailure . parseWithState mempty p
good = assertParse . parseWithState mempty p
bad "2011/1/1"
bad "2011/1/1 24:00:00"
bad "2011/1/1 00:60:00"
bad "2011/1/1 00:00:60"
good "2011/1/1 00:00"
good "2011/1/1 23:59:59"
good "2011/1/1 3:5:7"
-- timezone is parsed but ignored
let startofday = LocalTime (fromGregorian 2011 1 1) (TimeOfDay 0 0 (fromIntegral 0))
assertParseEqual (parseWithState mempty p "2011/1/1 00:00-0800") startofday
assertParseEqual (parseWithState mempty p "2011/1/1 00:00+1234") startofday
,"defaultyeardirectivep" ~: do
assertParse (parseWithState mempty defaultyeardirectivep "Y 2010\n")
assertParse (parseWithState mempty defaultyeardirectivep "Y 10001\n")
,"marketpricedirectivep" ~:
assertParseEqual (parseWithState mempty marketpricedirectivep "P 2004/05/01 XYZ $55.00\n") (MarketPrice (parsedate "2004/05/01") "XYZ" $ usd 55)
,"ignoredpricecommoditydirectivep" ~: do
assertParse (parseWithState mempty ignoredpricecommoditydirectivep "N $\n")
,"defaultcommoditydirectivep" ~: do
assertParse (parseWithState mempty defaultcommoditydirectivep "D $1,000.0\n")
,"commodityconversiondirectivep" ~: do
assertParse (parseWithState mempty commodityconversiondirectivep "C 1h = $50.00\n")
,"tagdirectivep" ~: do
assertParse (parseWithState mempty tagdirectivep "tag foo \n")
,"endtagdirectivep" ~: do
assertParse (parseWithState mempty endtagdirectivep "end tag \n")
assertParse (parseWithState mempty endtagdirectivep "pop \n")
,"accountnamep" ~: do
assertBool "accountnamep parses a normal account name" (isRight $ parsewith accountnamep "a:b:c")
assertBool "accountnamep rejects an empty inner component" (isLeft $ parsewith accountnamep "a::c")
assertBool "accountnamep rejects an empty leading component" (isLeft $ parsewith accountnamep ":b:c")
assertBool "accountnamep rejects an empty trailing component" (isLeft $ parsewith accountnamep "a:b:")
2010-03-11 20:16:03 +03:00
,"leftsymbolamountp" ~: do
assertParseEqual (parseWithState mempty leftsymbolamountp "$1") (usd 1 `withPrecision` 0)
assertParseEqual (parseWithState mempty leftsymbolamountp "$-1") (usd (-1) `withPrecision` 0)
assertParseEqual (parseWithState mempty leftsymbolamountp "-$1") (usd (-1) `withPrecision` 0)
,"amount" ~: do
let -- | compare a parse result with an expected amount, showing the debug representation for clarity
assertAmountParse parseresult amount =
(either (const "parse error") showAmountDebug parseresult) ~?= (showAmountDebug amount)
assertAmountParse (parseWithState mempty amountp "1 @ $2")
(num 1 `withPrecision` 0 `at` (usd 2 `withPrecision` 0))
-}
]
]