hledger/hledger-lib/Hledger/Read/JournalReader.hs

852 lines
30 KiB
Haskell
Raw Normal View History

2016-04-23 21:27:39 +03:00
--- * doc
-- Lines beginning "--- *" are collapsible orgstruct nodes. Emacs users,
2016-04-23 03:43:16 +03:00
-- (add-hook 'haskell-mode-hook
2016-04-23 21:27:39 +03:00
-- (lambda () (set-variable 'orgstruct-heading-prefix-regexp "--- " t))
2016-04-23 03:43:16 +03:00
-- 'orgstruct-mode)
-- and press TAB on nodes to expand/collapse.
2016-04-23 03:43:16 +03:00
2008-10-01 04:29:58 +04:00
{-|
2008-10-03 06:28:58 +04:00
A reader for hledger's journal file format
(<http://hledger.org/MANUAL.html#the-journal-file>). hledger's journal
format is a compatible subset of c++ ledger's
(<http://ledger-cli.org/3.0/doc/ledger3.html#Journal-Format>), so this
reader should handle many ledger files as well. Example:
2008-10-01 05:40:32 +04:00
@
2012\/3\/24 gift
expenses:gifts $10
assets:cash
2008-10-01 05:40:32 +04:00
@
2008-10-01 04:29:58 +04:00
Journal format supports the include directive which can read files in
other formats, so the other file format readers need to be importable
here. Some low-level journal syntax parsers which those readers also
use are therefore defined separately in Hledger.Read.Common, avoiding
import cycles.
2007-02-09 04:23:12 +03:00
-}
2016-04-23 21:27:39 +03:00
--- * module
{-# LANGUAGE CPP, RecordWildCards, NamedFieldPuns, NoMonoLocalBinds, ScopedTypeVariables, FlexibleContexts, TupleSections, OverloadedStrings, PackageImports #-}
2016-04-23 03:43:16 +03:00
2010-11-15 10:01:46 +03:00
module Hledger.Read.JournalReader (
--- * exports
-- * Reader
reader,
2016-04-23 03:43:16 +03:00
-- * Parsing utils
genericSourcePos,
2016-04-23 03:43:16 +03:00
parseAndFinaliseJournal,
runJournalParser,
rjp,
2016-04-23 03:43:16 +03:00
-- * Parsers used elsewhere
getParentAccount,
journalp,
directivep,
defaultyeardirectivep,
marketpricedirectivep,
datetimep,
datep,
-- codep,
-- accountnamep,
2015-09-25 03:23:52 +03:00
modifiedaccountnamep,
2014-02-06 01:02:24 +04:00
postingp,
-- amountp,
-- amountp',
-- mamountp',
-- numberp,
statusp,
emptyorcommentlinep,
followingcommentp
-- * Tests
,tests_Hledger_Read_JournalReader
,easytests
)
2010-03-13 02:46:20 +03:00
where
2016-04-23 21:27:39 +03:00
--- * imports
import Prelude ()
import "base-compat-batteries" Prelude.Compat hiding (readFile)
2012-03-30 01:19:35 +04:00
import qualified Control.Exception as C
import Control.Monad
import Control.Monad.Except (ExceptT(..))
import Control.Monad.State.Strict
import Data.Bifunctor (first)
2018-06-18 01:23:41 +03:00
import Data.Functor.Identity (Identity(..))
import qualified Data.Map.Strict as M
lib: textification: parse stream 10% more allocation, but 35% lower maximum residency, and slightly quicker. hledger -f data/100x100x10.journal stats <<ghc: 39327768 bytes, 77 GCs, 196834/269496 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.010 elapsed), 0.020 MUT (0.092 elapsed), 0.014 GC (0.119 elapsed) :ghc>> <<ghc: 42842136 bytes, 84 GCs, 194010/270912 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.009 elapsed), 0.016 MUT (0.029 elapsed), 0.012 GC (0.120 elapsed) :ghc>> hledger -f data/1000x1000x10.journal stats <<ghc: 314291440 bytes, 612 GCs, 2070776/6628048 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.000 elapsed), 0.128 MUT (0.144 elapsed), 0.059 GC (0.070 elapsed) :ghc>> <<ghc: 349558872 bytes, 681 GCs, 1397597/4106384 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.004 elapsed), 0.124 MUT (0.133 elapsed), 0.047 GC (0.053 elapsed) :ghc>> hledger -f data/10000x1000x10.journal stats <<ghc: 3070026824 bytes, 5973 GCs, 12698030/62951784 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.002 elapsed), 1.268 MUT (1.354 elapsed), 0.514 GC (0.587 elapsed) :ghc>> <<ghc: 3424013128 bytes, 6658 GCs, 11405501/41071624 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.001 elapsed), 1.343 MUT (1.406 elapsed), 0.511 GC (0.573 elapsed) :ghc>> hledger -f data/100000x1000x10.journal stats <<ghc: 30753387392 bytes, 59811 GCs, 117615462/666703600 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.000 elapsed), 12.068 MUT (12.238 elapsed), 6.015 GC (7.190 elapsed) :ghc>> <<ghc: 34306530696 bytes, 66727 GCs, 76806196/414629312 avg/max bytes residency (14 samples), 1009M in use, 0.000 INIT (0.010 elapsed), 14.357 MUT (16.370 elapsed), 5.298 GC (6.534 elapsed) :ghc>>
2016-05-25 01:58:23 +03:00
import Data.Text (Text)
import Data.String
import Data.List
lib: textification begins! account names The first of several conversions from String to (strict) Text, hopefully reducing space and time usage. This one shows a small improvement, with GHC 7.10.3 and text-1.2.2.1: hledger -f data/100x100x10.journal stats string: <<ghc: 39471064 bytes, 77 GCs, 198421/275048 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.001 elapsed), 0.015 MUT (0.020 elapsed), 0.010 GC (0.014 elapsed) :ghc>> text: <<ghc: 39268024 bytes, 77 GCs, 197018/270840 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.002 elapsed), 0.016 MUT (0.022 elapsed), 0.009 GC (0.011 elapsed) :ghc>> hledger -f data/1000x100x10.journal stats string: <<ghc: 318555920 bytes, 617 GCs, 2178997/7134472 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.001 elapsed), 0.129 MUT (0.136 elapsed), 0.067 GC (0.077 elapsed) :ghc>> text: <<ghc: 314248496 bytes, 612 GCs, 2074045/6617960 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.003 elapsed), 0.137 MUT (0.145 elapsed), 0.067 GC (0.079 elapsed) :ghc>> hledger -f data/10000x100x10.journal stats string: <<ghc: 3114763608 bytes, 6026 GCs, 18858950/75552024 avg/max bytes residency (11 samples), 201M in use, 0.000 INIT (0.000 elapsed), 1.331 MUT (1.372 elapsed), 0.699 GC (0.812 elapsed) :ghc>> text: <<ghc: 3071468920 bytes, 5968 GCs, 14120344/62951360 avg/max bytes residency (9 samples), 124M in use, 0.000 INIT (0.003 elapsed), 1.272 MUT (1.349 elapsed), 0.513 GC (0.578 elapsed) :ghc>> hledger -f data/100000x100x10.journal stats string: <<ghc: 31186579432 bytes, 60278 GCs, 135332581/740228992 avg/max bytes residency (13 samples), 1697M in use, 0.000 INIT (0.008 elapsed), 14.677 MUT (15.508 elapsed), 7.081 GC (8.074 elapsed) :ghc>> text: <<ghc: 30753427672 bytes, 59763 GCs, 117595958/666457240 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.008 elapsed), 13.713 MUT (13.966 elapsed), 6.220 GC (7.108 elapsed) :ghc>>
2016-05-24 04:16:21 +03:00
import qualified Data.Text as T
2011-05-28 08:11:44 +04:00
import Data.Time.Calendar
import Data.Time.LocalTime
import EasyTest hiding (char, char')
import Safe
import Test.HUnit
import Text.Megaparsec hiding (parse)
import Text.Megaparsec.Char
import Text.Megaparsec.Custom
2011-05-28 08:11:44 +04:00
import Text.Printf
import System.FilePath
import "Glob" System.FilePath.Glob hiding (match)
2010-11-15 10:18:35 +03:00
import Hledger.Data hiding (easytests)
import Hledger.Read.Common hiding (easytests)
import Hledger.Read.TimeclockReader (timeclockfilep)
import Hledger.Read.TimedotReader (timedotfilep)
2011-05-28 08:11:44 +04:00
import Hledger.Utils
2010-03-13 02:46:20 +03:00
-- $setup
-- >>> :set -XOverloadedStrings
2010-03-13 02:46:20 +03:00
2016-04-23 21:27:39 +03:00
--- * reader
2010-03-13 02:46:20 +03:00
reader :: Reader
reader = Reader
{rFormat = "journal"
,rExtensions = ["journal", "j", "hledger", "ledger"]
,rParser = parse
,rExperimental = False
}
-- | Parse and post-process a "Journal" from hledger's journal file
-- format, or give an error.
parse :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal
parse iopts = parseAndFinaliseJournal journalp' iopts
where
journalp' = do
-- reverse parsed aliases to ensure that they are applied in order given on commandline
mapM_ addAccountAlias (reverse $ aliasesFromOpts iopts)
journalp
-- | Get the account name aliases from options, if any.
aliasesFromOpts :: InputOpts -> [AccountAlias]
aliasesFromOpts = map (\a -> fromparse $ runParser accountaliasp ("--alias "++quoteIfNeeded a) $ T.pack a)
. aliases_
2010-03-13 02:46:20 +03:00
2016-04-23 21:27:39 +03:00
--- * parsers
--- ** journal
-- | A journal parser. Accumulates and returns a "ParsedJournal",
-- which should be finalised/validated before use.
--
-- >>> rjp (journalp <* eof) "2015/1/1\n a 0\n"
-- Right Journal with 1 transactions, 1 accounts
--
2018-06-06 09:29:52 +03:00
journalp :: MonadIO m => JournalParser m ParsedJournal
journalp = do
many addJournalItemP
eof
get
-- | A side-effecting parser; parses any kind of journal item
-- and updates the parse state accordingly.
2018-06-06 09:29:52 +03:00
addJournalItemP :: MonadIO m => JournalParser m ()
lib: textification: comments and tags No change. hledger -f data/100x100x10.journal stats <<ghc: 42859576 bytes, 84 GCs, 193781/269984 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.001 elapsed), 0.016 MUT (0.020 elapsed), 0.009 GC (0.011 elapsed) :ghc>> <<ghc: 42859576 bytes, 84 GCs, 193781/269984 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.001 elapsed), 0.015 MUT (0.018 elapsed), 0.009 GC (0.013 elapsed) :ghc>> hledger -f data/1000x1000x10.journal stats <<ghc: 349576344 bytes, 681 GCs, 1407388/4091680 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.000 elapsed), 0.124 MUT (0.130 elapsed), 0.047 GC (0.055 elapsed) :ghc>> <<ghc: 349576280 bytes, 681 GCs, 1407388/4091680 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.000 elapsed), 0.126 MUT (0.132 elapsed), 0.049 GC (0.058 elapsed) :ghc>> hledger -f data/10000x1000x10.journal stats <<ghc: 3424030664 bytes, 6658 GCs, 11403359/41071624 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.000 elapsed), 1.207 MUT (1.228 elapsed), 0.473 GC (0.528 elapsed) :ghc>> <<ghc: 3424030760 bytes, 6658 GCs, 11403874/41077288 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.002 elapsed), 1.234 MUT (1.256 elapsed), 0.470 GC (0.520 elapsed) :ghc>> hledger -f data/100000x1000x10.journal stats <<ghc: 34306547448 bytes, 66727 GCs, 76805504/414629288 avg/max bytes residency (14 samples), 1009M in use, 0.000 INIT (0.003 elapsed), 12.615 MUT (12.813 elapsed), 4.656 GC (5.291 elapsed) :ghc>> <<ghc: 34306547320 bytes, 66727 GCs, 76805504/414629288 avg/max bytes residency (14 samples), 1009M in use, 0.000 INIT (0.009 elapsed), 12.802 MUT (13.065 elapsed), 4.774 GC (5.441 elapsed) :ghc>>
2016-05-25 03:09:20 +03:00
addJournalItemP =
-- all journal line types can be distinguished by the first
-- character, can use choice without backtracking
choice [
directivep
, transactionp >>= modify' . addTransaction
, transactionmodifierp >>= modify' . addTransactionModifier
, periodictransactionp >>= modify' . addPeriodicTransaction
, marketpricedirectivep >>= modify' . addMarketPrice
, void (lift emptyorcommentlinep)
, void (lift multilinecommentp)
] <?> "transaction or directive"
2016-04-23 21:27:39 +03:00
--- ** directives
2016-04-23 03:43:16 +03:00
-- | Parse any journal directive and update the parse state accordingly.
-- Cf http://hledger.org/manual.html#directives,
-- http://ledger-cli.org/3.0/doc/ledger3.html#Command-Directives
2018-06-06 09:29:52 +03:00
directivep :: MonadIO m => JournalParser m ()
directivep = (do
optional $ char '!'
choice [
includedirectivep
,aliasdirectivep
,endaliasesdirectivep
,accountdirectivep
,applyaccountdirectivep
,commoditydirectivep
,endapplyaccountdirectivep
,tagdirectivep
,endtagdirectivep
,defaultyeardirectivep
,defaultcommoditydirectivep
,commodityconversiondirectivep
,ignoredpricecommoditydirectivep
]
) <?> "directive"
2010-03-13 02:46:20 +03:00
2018-06-06 09:29:52 +03:00
includedirectivep :: MonadIO m => JournalParser m ()
includedirectivep = do
2011-08-04 11:49:10 +04:00
string "include"
lift (skipSome spacenonewline)
filename <- T.unpack <$> takeWhileP Nothing (/= '\n') -- don't consume newline yet
parentpos <- getPosition
filepaths <- getFilePaths parentpos filename
forM_ filepaths $ parseChild parentpos
void newline
where
getFilePaths parserpos filename = do
curdir <- lift $ expandPath (takeDirectory $ sourceName parserpos) ""
`orRethrowIOError` (show parserpos ++ " locating " ++ filename)
-- Compiling filename as a glob pattern works even if it is a literal
fileglob <- case tryCompileWith compDefault{errorRecovery=False} filename of
Right x -> pure x
Left e -> parseErrorAt parserpos $ "Invalid glob pattern: " ++ e
-- Get all matching files in the current working directory, sorting in
-- lexicographic order to simulate the output of 'ls'.
filepaths <- liftIO $ sort <$> globDir1 fileglob curdir
if (not . null) filepaths
then pure filepaths
else parseErrorAt parserpos $ "No existing files match pattern: " ++ filename
parseChild parentpos filepath = do
parentfilestack <- fmap sourceName . statePos <$> getParserState
when (filepath `elem` parentfilestack)
$ parseErrorAt parentpos ("Cyclic include: " ++ filepath)
childInput <- lift $ readFilePortably filepath
`orRethrowIOError` (show parentpos ++ " reading " ++ filepath)
-- save parent state
parentParserState <- getParserState
parentj <- get
let childj = newJournalWithParseStateFrom parentj
-- set child state
setInput childInput
pushPosition $ initialPos filepath
put childj
-- parse include file
let parsers = [ journalp
, timeclockfilep
, timedotfilep
] -- can't include a csv file yet, that reader is special
updatedChildj <- journalAddFile (filepath, childInput) <$>
region (withSource childInput) (choiceInState parsers)
-- restore parent state, prepending the child's parse info
setParserState parentParserState
put $ updatedChildj <> parentj
-- discard child's parse info, prepend its (reversed) list data, combine other fields
2016-05-23 11:02:19 +03:00
newJournalWithParseStateFrom :: Journal -> Journal
newJournalWithParseStateFrom j = mempty{
jparsedefaultyear = jparsedefaultyear j
,jparsedefaultcommodity = jparsedefaultcommodity j
,jparseparentaccounts = jparseparentaccounts j
,jparsealiases = jparsealiases j
,jcommodities = jcommodities j
-- ,jparsetransactioncount = jparsetransactioncount j
2016-05-23 11:02:19 +03:00
,jparsetimeclockentries = jparsetimeclockentries j
}
-- | Lift an IO action into the exception monad, rethrowing any IO
-- error with the given message prepended.
orRethrowIOError :: MonadIO m => IO a -> String -> TextParser m a
orRethrowIOError io msg = do
eResult <- liftIO $ (Right <$> io) `C.catch` \(e::C.IOException) -> pure $ Left $ printf "%s:\n%s" msg (show e)
case eResult of
Right res -> pure res
Left errMsg -> fail errMsg
accountdirectivep :: JournalParser m ()
accountdirectivep = do
string "account"
lift (skipSome spacenonewline)
acct <- modifiedaccountnamep -- account directives can be modified by alias/apply account
macode' :: Maybe String <- (optional $ lift $ skipSome spacenonewline >> some digitChar)
let macode :: Maybe AccountCode = read <$> macode'
newline
skipMany indentedlinep
modify' (\j -> j{jaccounts = (acct, macode) : jaccounts j})
indentedlinep :: JournalParser m String
indentedlinep = lift (skipSome spacenonewline) >> (rstrip <$> lift restofline)
-- | Parse a one-line or multi-line commodity directive.
--
-- >>> Right _ <- rjp commoditydirectivep "commodity $1.00"
-- >>> Right _ <- rjp commoditydirectivep "commodity $\n format $1.00"
-- >>> Right _ <- rjp commoditydirectivep "commodity $\n\n" -- a commodity with no format
-- >>> Right _ <- rjp commoditydirectivep "commodity $1.00\n format $1.00" -- both, what happens ?
2018-06-06 08:52:28 +03:00
commoditydirectivep :: JournalParser m ()
commoditydirectivep = commoditydirectiveonelinep <|> commoditydirectivemultilinep
-- | Parse a one-line commodity directive.
--
-- >>> Right _ <- rjp commoditydirectiveonelinep "commodity $1.00"
-- >>> Right _ <- rjp commoditydirectiveonelinep "commodity $1.00 ; blah\n"
2018-06-06 08:52:28 +03:00
commoditydirectiveonelinep :: JournalParser m ()
commoditydirectiveonelinep = do
(pos, Amount{acommodity,astyle}) <- try $ do
string "commodity"
lift (skipSome spacenonewline)
pos <- getPosition
amount <- amountp
pure $ (pos, amount)
lift (skipMany spacenonewline)
_ <- lift followingcommentp
let comm = Commodity{csymbol=acommodity, cformat=Just $ dbg2 "style from commodity directive" astyle}
if asdecimalpoint astyle == Nothing
then parseErrorAt pos pleaseincludedecimalpoint
else modify' (\j -> j{jcommodities=M.insert acommodity comm $ jcommodities j})
pleaseincludedecimalpoint :: String
pleaseincludedecimalpoint = "to avoid ambiguity, please include a decimal separator in commodity directives"
-- | Parse a multi-line commodity directive, containing 0 or more format subdirectives.
--
-- >>> Right _ <- rjp commoditydirectivemultilinep "commodity $ ; blah \n format $1.00 ; blah"
2018-06-06 08:52:28 +03:00
commoditydirectivemultilinep :: JournalParser m ()
commoditydirectivemultilinep = do
string "commodity"
lift (skipSome spacenonewline)
sym <- lift commoditysymbolp
_ <- lift followingcommentp
mformat <- lastMay <$> many (indented $ formatdirectivep sym)
let comm = Commodity{csymbol=sym, cformat=mformat}
modify' (\j -> j{jcommodities=M.insert sym comm $ jcommodities j})
where
indented = (lift (skipSome spacenonewline) >>)
-- | Parse a format (sub)directive, throwing a parse error if its
-- symbol does not match the one given.
2018-06-06 08:52:28 +03:00
formatdirectivep :: CommoditySymbol -> JournalParser m AmountStyle
formatdirectivep expectedsym = do
string "format"
lift (skipSome spacenonewline)
pos <- getPosition
Amount{acommodity,astyle} <- amountp
_ <- lift followingcommentp
if acommodity==expectedsym
then
if asdecimalpoint astyle == Nothing
then parseErrorAt pos pleaseincludedecimalpoint
else return $ dbg2 "style from format subdirective" astyle
else parseErrorAt pos $
printf "commodity directive symbol \"%s\" and format directive symbol \"%s\" should be the same" expectedsym acommodity
keywordp :: String -> JournalParser m ()
keywordp = (() <$) . string . fromString
spacesp :: JournalParser m ()
spacesp = () <$ lift (skipSome spacenonewline)
-- | Backtracking parser similar to string, but allows varying amount of space between words
keywordsp :: String -> JournalParser m ()
keywordsp = try . sequence_ . intersperse spacesp . map keywordp . words
applyaccountdirectivep :: JournalParser m ()
applyaccountdirectivep = do
keywordsp "apply account" <?> "apply account directive"
lift (skipSome spacenonewline)
parent <- lift accountnamep
2011-08-04 11:49:10 +04:00
newline
pushParentAccount parent
2010-03-13 02:46:20 +03:00
endapplyaccountdirectivep :: JournalParser m ()
endapplyaccountdirectivep = do
keywordsp "end apply account" <?> "end apply account directive"
2011-08-04 11:49:10 +04:00
popParentAccount
2010-03-13 02:46:20 +03:00
aliasdirectivep :: JournalParser m ()
aliasdirectivep = do
string "alias"
lift (skipSome spacenonewline)
alias <- lift accountaliasp
addAccountAlias alias
accountaliasp :: TextParser m AccountAlias
accountaliasp = regexaliasp <|> basicaliasp
basicaliasp :: TextParser m AccountAlias
basicaliasp = do
2018-07-16 17:28:58 +03:00
-- dbgparse 0 "basicaliasp"
old <- rstrip <$> (some $ noneOf ("=" :: [Char]))
char '='
skipMany spacenonewline
new <- rstrip <$> anyChar `manyTill` eolof -- eol in journal, eof in command lines, normally
lib: textification begins! account names The first of several conversions from String to (strict) Text, hopefully reducing space and time usage. This one shows a small improvement, with GHC 7.10.3 and text-1.2.2.1: hledger -f data/100x100x10.journal stats string: <<ghc: 39471064 bytes, 77 GCs, 198421/275048 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.001 elapsed), 0.015 MUT (0.020 elapsed), 0.010 GC (0.014 elapsed) :ghc>> text: <<ghc: 39268024 bytes, 77 GCs, 197018/270840 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.002 elapsed), 0.016 MUT (0.022 elapsed), 0.009 GC (0.011 elapsed) :ghc>> hledger -f data/1000x100x10.journal stats string: <<ghc: 318555920 bytes, 617 GCs, 2178997/7134472 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.001 elapsed), 0.129 MUT (0.136 elapsed), 0.067 GC (0.077 elapsed) :ghc>> text: <<ghc: 314248496 bytes, 612 GCs, 2074045/6617960 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.003 elapsed), 0.137 MUT (0.145 elapsed), 0.067 GC (0.079 elapsed) :ghc>> hledger -f data/10000x100x10.journal stats string: <<ghc: 3114763608 bytes, 6026 GCs, 18858950/75552024 avg/max bytes residency (11 samples), 201M in use, 0.000 INIT (0.000 elapsed), 1.331 MUT (1.372 elapsed), 0.699 GC (0.812 elapsed) :ghc>> text: <<ghc: 3071468920 bytes, 5968 GCs, 14120344/62951360 avg/max bytes residency (9 samples), 124M in use, 0.000 INIT (0.003 elapsed), 1.272 MUT (1.349 elapsed), 0.513 GC (0.578 elapsed) :ghc>> hledger -f data/100000x100x10.journal stats string: <<ghc: 31186579432 bytes, 60278 GCs, 135332581/740228992 avg/max bytes residency (13 samples), 1697M in use, 0.000 INIT (0.008 elapsed), 14.677 MUT (15.508 elapsed), 7.081 GC (8.074 elapsed) :ghc>> text: <<ghc: 30753427672 bytes, 59763 GCs, 117595958/666457240 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.008 elapsed), 13.713 MUT (13.966 elapsed), 6.220 GC (7.108 elapsed) :ghc>>
2016-05-24 04:16:21 +03:00
return $ BasicAlias (T.pack old) (T.pack new)
regexaliasp :: TextParser m AccountAlias
regexaliasp = do
2018-07-16 17:28:58 +03:00
-- dbgparse 0 "regexaliasp"
char '/'
re <- some $ noneOf ("/\n\r" :: [Char]) -- paranoid: don't try to read past line end
char '/'
skipMany spacenonewline
char '='
skipMany spacenonewline
repl <- anyChar `manyTill` eolof
return $ RegexAlias re repl
endaliasesdirectivep :: JournalParser m ()
endaliasesdirectivep = do
keywordsp "end aliases" <?> "end aliases directive"
clearAccountAliases
tagdirectivep :: JournalParser m ()
tagdirectivep = do
2011-08-04 11:49:10 +04:00
string "tag" <?> "tag directive"
lift (skipSome spacenonewline)
_ <- lift $ some nonspace
lift restofline
return ()
2007-02-09 04:23:12 +03:00
endtagdirectivep :: JournalParser m ()
endtagdirectivep = do
(keywordsp "end tag" <|> keywordp "pop") <?> "end tag or pop directive"
lift restofline
return ()
2011-08-04 11:49:10 +04:00
defaultyeardirectivep :: JournalParser m ()
defaultyeardirectivep = do
2011-08-04 11:49:10 +04:00
char 'Y' <?> "default year"
lift (skipMany spacenonewline)
y <- some digitChar
2011-08-04 11:49:10 +04:00
let y' = read y
failIfInvalidYear y
setYear y'
2018-06-06 08:52:28 +03:00
defaultcommoditydirectivep :: JournalParser m ()
defaultcommoditydirectivep = do
2011-08-04 11:49:10 +04:00
char 'D' <?> "default commodity"
lift (skipSome spacenonewline)
pos <- getPosition
Amount{acommodity,astyle} <- amountp
lift restofline
if asdecimalpoint astyle == Nothing
then parseErrorAt pos pleaseincludedecimalpoint
else setDefaultCommodityAndStyle (acommodity, astyle)
2007-02-09 04:23:12 +03:00
2018-06-06 08:52:28 +03:00
marketpricedirectivep :: JournalParser m MarketPrice
marketpricedirectivep = do
char 'P' <?> "market price"
lift (skipMany spacenonewline)
date <- try (do {LocalTime d _ <- datetimep; return d}) <|> datep -- a time is ignored
lift (skipSome spacenonewline)
symbol <- lift commoditysymbolp
lift (skipMany spacenonewline)
price <- amountp
lift restofline
return $ MarketPrice date symbol price
ignoredpricecommoditydirectivep :: JournalParser m ()
ignoredpricecommoditydirectivep = do
char 'N' <?> "ignored-price commodity"
lift (skipSome spacenonewline)
lift commoditysymbolp
lift restofline
return ()
2018-06-06 08:52:28 +03:00
commodityconversiondirectivep :: JournalParser m ()
commodityconversiondirectivep = do
char 'C' <?> "commodity conversion"
lift (skipSome spacenonewline)
amountp
lift (skipMany spacenonewline)
char '='
lift (skipMany spacenonewline)
amountp
lift restofline
return ()
2016-04-23 21:27:39 +03:00
--- ** transactions
2016-04-23 03:43:16 +03:00
transactionmodifierp :: JournalParser m TransactionModifier
transactionmodifierp = do
2011-08-04 11:49:10 +04:00
char '=' <?> "modifier transaction"
lift (skipMany spacenonewline)
querytxt <- lift $ T.strip <$> descriptionp
(_comment, _tags) <- lift transactioncommentp -- TODO apply these to modified txns ?
postings <- postingsp Nothing
return $ TransactionModifier querytxt postings
-- | Parse a periodic transaction
periodictransactionp :: MonadIO m => JournalParser m PeriodicTransaction
periodictransactionp = do
-- first line
2011-08-04 11:49:10 +04:00
char '~' <?> "periodic transaction"
lift $ skipMany spacenonewline
-- a period expression
pos <- getPosition
d <- liftIO getCurrentDay
(periodtxt, (interval, span)) <- lift $ first T.strip <$> match (periodexprp d)
-- In periodic transactions, the period expression has an additional constraint:
case checkPeriodicTransactionStartDate interval span periodtxt of
Just e -> parseErrorAt pos e
Nothing -> pure ()
-- The line can end here, or it can continue with one or more spaces
-- and then zero or more of the following fields. A bit awkward.
(status, code, description, (comment, tags)) <-
(lift eolof >> return (Unmarked, "", "", ("", [])))
<|>
(do
lift $ skipSome spacenonewline
s <- lift statusp
c <- lift codep
desc <- lift $ T.strip <$> descriptionp
(cmt, ts) <- lift transactioncommentp
return (s,c,desc,(cmt,ts))
)
-- next lines
postings <- postingsp (Just $ first3 $ toGregorian d)
return $ nullperiodictransaction{
ptperiodexpr=periodtxt
,ptinterval=interval
,ptspan=span
,ptstatus=status
,ptcode=code
,ptdescription=description
,ptcomment=comment
,pttags=tags
,ptpostings=postings
}
-- | Parse a (possibly unbalanced) transaction.
2018-06-06 08:52:28 +03:00
transactionp :: JournalParser m Transaction
transactionp = do
2018-07-16 17:28:58 +03:00
-- dbgparse 0 "transactionp"
startpos <- getPosition
date <- datep <?> "transaction"
2018-05-24 07:36:19 +03:00
edate <- optional (lift $ secondarydatep date) <?> "secondary date"
lookAhead (lift spacenonewline <|> newline) <?> "whitespace or newline"
status <- lift statusp <?> "cleared status"
code <- lift codep <?> "transaction code"
2018-06-06 08:52:28 +03:00
description <- lift $ T.strip <$> descriptionp
(comment, tags) <- lift transactioncommentp
let year = first3 $ toGregorian date
postings <- postingsp (Just year)
endpos <- getPosition
let sourcepos = journalSourcePos startpos endpos
return $ txnTieKnot $ Transaction 0 sourcepos date edate status code description comment tags postings ""
2007-02-09 04:23:12 +03:00
2018-06-18 01:23:41 +03:00
test_transactionp = TestCase $ do
let s `gives` t = do
2018-06-18 01:23:41 +03:00
let p = runIdentity $ parseWithState mempty transactionp s
assertBool "Parse success" (isRight p)
let Right t2 = p
-- same f = assertEqual (f t) (f t2)
2018-06-18 01:23:41 +03:00
assertEqual "Equal date" (tdate t) (tdate t2)
assertEqual "Equal date2" (tdate2 t) (tdate2 t2)
assertEqual "Equal status" (tstatus t) (tstatus t2)
assertEqual "Equal code" (tcode t) (tcode t2)
assertEqual "Equal description" (tdescription t) (tdescription t2)
assertEqual "Equal comment" (tcomment t) (tcomment t2)
assertEqual "Equal tags" (ttags t) (ttags t2)
assertEqual "Equal preceding comments" (tpreceding_comment_lines t) (tpreceding_comment_lines t2)
assertEqual "Equal postings" (show $ tpostings t) (show $ tpostings t2)
-- "0000/01/01\n\n" `gives` nulltransaction
2018-06-18 01:23:41 +03:00
T.unlines [
"2012/05/14=2012/05/15 (code) desc ; tcomment1",
" ; tcomment2",
" ; ttag1: val1",
" * a $1.00 ; pcomment1",
" ; pcomment2",
" ; ptag1: val1",
" ; ptag2: val2"
]
`gives`
nulltransaction{
tdate=parsedate "2012/05/14",
tdate2=Just $ parsedate "2012/05/15",
tstatus=Unmarked,
tcode="code",
tdescription="desc",
tcomment="tcomment1\ntcomment2\nttag1: val1\n",
2012-05-28 02:59:06 +04:00
ttags=[("ttag1","val1")],
tpostings=[
nullposting{
pdate=Just $ parsedate "2012/05/14",
pstatus=Cleared,
paccount="a",
pamount=Mixed [usd 1],
pcomment="pcomment1\npcomment2\nptag1: val1\nptag2: val2\n",
ptype=RegularPosting,
2012-05-28 02:59:06 +04:00
ptags=[("ptag1","val1"),("ptag2","val2")],
ptransaction=Nothing
}
],
tpreceding_comment_lines=""
}
2018-06-18 01:23:41 +03:00
T.unlines ["2015/1/1"]
`gives`
2018-06-18 01:23:41 +03:00
nulltransaction{ tdate=parsedate "2015/01/01" }
2018-06-18 01:23:41 +03:00
assertBool "transactionp parses a well-formed transactionParse OK" $
isRight . runIdentity . parseWithState mempty transactionp $ T.unlines
["2007/01/28 coopportunity"
," expenses:food:groceries $47.18"
," assets:checking $-47.18"
,""
]
2018-06-18 01:23:41 +03:00
let p = runIdentity $ parseWithState mempty transactionp "2009/1/1 a ;comment\n b 1\n"
assertEqual "transactionp should not parse a following comment as part of the description"
(Right "a") (tdescription <$> p)
2018-06-18 01:23:41 +03:00
assertBool "transactionp parses a following whitespace line" $
isRight . runIdentity . parseWithState mempty transactionp $ T.unlines
["2012/1/1"
," a 1"
," b"
," "
]
2018-06-18 01:23:41 +03:00
let p = runIdentity . parseWithState mempty transactionp $ T.unlines
["2009/1/1 x ; transaction comment"
," a 1 ; posting 1 comment"
," ; posting 1 comment 2"
," b"
," ; posting 2 comment"
]
2018-06-18 01:23:41 +03:00
assertBool "transactionp parses parses comments anywhere" (isRight p)
assertEqual "Has 2 postings" 2 (let Right t = p in length $ tpostings t)
2016-04-23 21:27:39 +03:00
--- ** postings
2007-02-09 04:23:12 +03:00
-- Parse the following whitespace-beginning lines as postings, posting
-- tags, and/or comments (inferring year, if needed, from the given date).
2018-06-06 08:52:28 +03:00
postingsp :: Maybe Year -> JournalParser m [Posting]
postingsp mTransactionYear = many (postingp mTransactionYear) <?> "postings"
2018-06-06 08:52:28 +03:00
-- linebeginningwithspaces :: JournalParser m String
-- linebeginningwithspaces = do
-- sp <- lift (skipSome spacenonewline)
-- c <- nonspace
-- cs <- lift restofline
-- return $ sp ++ (c:cs) ++ "\n"
2007-02-09 04:23:12 +03:00
2018-06-06 08:52:28 +03:00
postingp :: Maybe Year -> JournalParser m Posting
postingp mTransactionYear = do
-- lift $ dbgparse 0 "postingp"
(status, account) <- try $ do
lift (skipSome spacenonewline)
status <- lift statusp
lift (skipMany spacenonewline)
account <- modifiedaccountnamep
return (status, account)
lib: textification begins! account names The first of several conversions from String to (strict) Text, hopefully reducing space and time usage. This one shows a small improvement, with GHC 7.10.3 and text-1.2.2.1: hledger -f data/100x100x10.journal stats string: <<ghc: 39471064 bytes, 77 GCs, 198421/275048 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.001 elapsed), 0.015 MUT (0.020 elapsed), 0.010 GC (0.014 elapsed) :ghc>> text: <<ghc: 39268024 bytes, 77 GCs, 197018/270840 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.002 elapsed), 0.016 MUT (0.022 elapsed), 0.009 GC (0.011 elapsed) :ghc>> hledger -f data/1000x100x10.journal stats string: <<ghc: 318555920 bytes, 617 GCs, 2178997/7134472 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.001 elapsed), 0.129 MUT (0.136 elapsed), 0.067 GC (0.077 elapsed) :ghc>> text: <<ghc: 314248496 bytes, 612 GCs, 2074045/6617960 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.003 elapsed), 0.137 MUT (0.145 elapsed), 0.067 GC (0.079 elapsed) :ghc>> hledger -f data/10000x100x10.journal stats string: <<ghc: 3114763608 bytes, 6026 GCs, 18858950/75552024 avg/max bytes residency (11 samples), 201M in use, 0.000 INIT (0.000 elapsed), 1.331 MUT (1.372 elapsed), 0.699 GC (0.812 elapsed) :ghc>> text: <<ghc: 3071468920 bytes, 5968 GCs, 14120344/62951360 avg/max bytes residency (9 samples), 124M in use, 0.000 INIT (0.003 elapsed), 1.272 MUT (1.349 elapsed), 0.513 GC (0.578 elapsed) :ghc>> hledger -f data/100000x100x10.journal stats string: <<ghc: 31186579432 bytes, 60278 GCs, 135332581/740228992 avg/max bytes residency (13 samples), 1697M in use, 0.000 INIT (0.008 elapsed), 14.677 MUT (15.508 elapsed), 7.081 GC (8.074 elapsed) :ghc>> text: <<ghc: 30753427672 bytes, 59763 GCs, 117595958/666457240 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.008 elapsed), 13.713 MUT (13.966 elapsed), 6.220 GC (7.108 elapsed) :ghc>>
2016-05-24 04:16:21 +03:00
let (ptype, account') = (accountNamePostingType account, textUnbracket account)
lift (skipMany spacenonewline)
amount <- option missingmixedamt $ Mixed . (:[]) <$> amountp
massertion <- partialbalanceassertionp
_ <- fixedlotpricep
lift (skipMany spacenonewline)
(comment,tags,mdate,mdate2) <- lift $ postingcommentp mTransactionYear
return posting
{ pdate=mdate
, pdate2=mdate2
, pstatus=status
, paccount=account'
, pamount=amount
, pcomment=comment
, ptype=ptype
, ptags=tags
, pbalanceassertion=massertion
}
2018-06-18 01:23:41 +03:00
test_postingp = TestCase $ do
let s `gives` ep = do
2018-06-18 01:23:41 +03:00
let parse = runIdentity $ parseWithState mempty (postingp Nothing) s
assertBool "Example is parsed well" $ isRight parse
let Right ap = parse
same msg f = assertEqual ("Posting "++msg++" differs") (f ep) (f ap)
same "date" pdate
same "status" pstatus
same "account" paccount
-- same "amount" pamount
-- more revealing:
assertEqual "amount differs!" (showMixedAmountDebug $ pamount ep) (showMixedAmountDebug $ pamount ap)
same "comment" pcomment
same "type" ptype
same "tags" ptags
same "transaction" ptransaction
" expenses:food:dining $10.00 ; a: a a \n ; b: b b \n" `gives`
posting{paccount="expenses:food:dining", pamount=Mixed [usd 10], pcomment="a: a a\nb: b b\n", ptags=[("a","a a"), ("b","b b")]}
" a 1. ; [2012/11/28]\n" `gives` -- trailing decimal point required to match num's asdecimalpoint
("a" `post` num 1){pcomment="[2012/11/28]\n"
,pdate=parsedateM "2012/11/28"}
" a 2. ; a:a, [=2012/11/28]\n" `gives`
("a" `post` num 2){pcomment="a:a, [=2012/11/28]\n"
,ptags=[("a","a")]
,pdate=Nothing}
" a 3. ; a:a\n ; [2012/11/28=2012/11/29],b:b\n" `gives`
("a" `post` num 3){pcomment="a:a\n[2012/11/28=2012/11/29],b:b\n"
,ptags=[("a","a"), ("[2012/11/28=2012/11/29],b","b")] -- XXX ugly tag name parsed
,pdate=parsedateM "2012/11/28"}
2018-06-18 01:23:41 +03:00
assertBool "postingp parses a quoted commodity with numbers"
(isRight . runIdentity $ parseWithState mempty (postingp Nothing) " a 1 \"DE123\"\n")
2018-06-18 01:23:41 +03:00
assertBool "postingp parses balance assertions and fixed lot prices"
(isRight . runIdentity $ parseWithState mempty (postingp Nothing) " a 1 \"DE123\" =$1 { =2.2 EUR} \n")
-- let parse = parseWithState mempty postingp " a\n ;next-line comment\n"
-- assertRight parse
-- let Right p = parse
-- assertEqual "next-line comment\n" (pcomment p)
-- assertEqual (Just nullmixedamt) (pbalanceassertion p)
--- * more tests
2016-04-23 03:43:16 +03:00
2018-06-18 01:23:41 +03:00
tests_Hledger_Read_JournalReader = TestList [
test_transactionp,
test_postingp,
"showParsedMarketPrice" ~: do
let mp = parseWithState mempty marketpricedirectivep "P 2017/01/30 BTC $922.83\n"
mpString = (fmap . fmap) showMarketPrice mp
mpString `is` (Just (Right "P 2017/01/30 BTC $922.83"))
]
{- old hunit tests
tests_Hledger_Read_JournalReader = TestList $ concat [
test_numberp,
test_amountp,
test_spaceandamountormissingp,
test_tagcomment,
test_inlinecomment,
test_comments,
test_ledgerDateSyntaxToTags,
test_postingp,
test_transactionp,
[
"transactionmodifierp" ~: do
assertParse (parseWithState mempty transactionmodifierp "= (some value expr)\n some:postings 1\n")
,"periodictransactionp" ~: do
assertParse (parseWithState mempty periodictransactionp "~ (some period expr)\n some:postings 1\n")
,"directivep" ~: do
assertParse (parseWithState mempty directivep "!include /some/file.x\n")
assertParse (parseWithState mempty directivep "account some:account\n")
assertParse (parseWithState mempty (directivep >> directivep) "!account a\nend\n")
,"comment" ~: do
assertParse (parseWithState mempty comment "; some comment \n")
assertParse (parseWithState mempty comment " \t; x\n")
assertParse (parseWithState mempty comment "#x")
,"datep" ~: do
assertParse (parseWithState mempty datep "2011/1/1")
assertParseFailure (parseWithState mempty datep "1/1")
assertParse (parseWithState mempty{jpsYear=Just 2011} datep "1/1")
,"datetimep" ~: do
let p = do {t <- datetimep; eof; return t}
bad = assertParseFailure . parseWithState mempty p
good = assertParse . parseWithState mempty p
bad "2011/1/1"
bad "2011/1/1 24:00:00"
bad "2011/1/1 00:60:00"
bad "2011/1/1 00:00:60"
good "2011/1/1 00:00"
good "2011/1/1 23:59:59"
good "2011/1/1 3:5:7"
-- timezone is parsed but ignored
let startofday = LocalTime (fromGregorian 2011 1 1) (TimeOfDay 0 0 (fromIntegral 0))
assertParseEqual (parseWithState mempty p "2011/1/1 00:00-0800") startofday
assertParseEqual (parseWithState mempty p "2011/1/1 00:00+1234") startofday
,"defaultyeardirectivep" ~: do
assertParse (parseWithState mempty defaultyeardirectivep "Y 2010\n")
assertParse (parseWithState mempty defaultyeardirectivep "Y 10001\n")
,"marketpricedirectivep" ~:
assertParseEqual (parseWithState mempty marketpricedirectivep "P 2004/05/01 XYZ $55.00\n") (MarketPrice (parsedate "2004/05/01") "XYZ" $ usd 55)
,"ignoredpricecommoditydirectivep" ~: do
assertParse (parseWithState mempty ignoredpricecommoditydirectivep "N $\n")
,"defaultcommoditydirectivep" ~: do
assertParse (parseWithState mempty defaultcommoditydirectivep "D $1,000.0\n")
,"commodityconversiondirectivep" ~: do
assertParse (parseWithState mempty commodityconversiondirectivep "C 1h = $50.00\n")
,"tagdirectivep" ~: do
assertParse (parseWithState mempty tagdirectivep "tag foo \n")
,"endtagdirectivep" ~: do
assertParse (parseWithState mempty endtagdirectivep "end tag \n")
assertParse (parseWithState mempty endtagdirectivep "pop \n")
,"accountnamep" ~: do
assertBool "accountnamep parses a normal account name" (isRight $ parsewith accountnamep "a:b:c")
assertBool "accountnamep rejects an empty inner component" (isLeft $ parsewith accountnamep "a::c")
assertBool "accountnamep rejects an empty leading component" (isLeft $ parsewith accountnamep ":b:c")
assertBool "accountnamep rejects an empty trailing component" (isLeft $ parsewith accountnamep "a:b:")
2010-03-11 20:16:03 +03:00
,"leftsymbolamountp" ~: do
assertParseEqual (parseWithState mempty leftsymbolamountp "$1") (usd 1 `withPrecision` 0)
assertParseEqual (parseWithState mempty leftsymbolamountp "$-1") (usd (-1) `withPrecision` 0)
assertParseEqual (parseWithState mempty leftsymbolamountp "-$1") (usd (-1) `withPrecision` 0)
,"amount" ~: do
let -- | compare a parse result with an expected amount, showing the debug representation for clarity
assertAmountParse parseresult amount =
(either (const "parse error") showAmountDebug parseresult) ~?= (showAmountDebug amount)
assertAmountParse (parseWithState mempty amountp "1 @ $2")
(num 1 `withPrecision` 0 `at` (usd 2 `withPrecision` 0))
2012-05-09 19:34:05 +04:00
]]
-}
easytests = scope "JournalReader" $ tests [
scope "periodictransactionp" $ tests [
scope "more-period-text-in-comment" $ expectParseEqIO periodictransactionp
"~ monthly from 2018/6 ;In 2019 we will change this\n"
nullperiodictransaction {
ptperiodexpr = "monthly from 2018/6"
,ptinterval = Months 1
,ptspan = DateSpan (Just $ parsedate "2018/06/01") Nothing
,ptstatus = Unmarked
,ptcode = ""
,ptdescription = ""
,ptcomment = "In 2019 we will change this\n"
,pttags = []
,ptpostings = []
}
,scope "more-period-text-in-description-after-two-spaces" $ skip >> expectParseEqIO periodictransactionp
"~ monthly from 2018/6 In 2019 we will change this\n"
nullperiodictransaction {
ptperiodexpr = "monthly from 2018/6"
,ptinterval = Months 1
,ptspan = DateSpan (Just $ parsedate "2018/06/01") Nothing
,ptcomment = "In 2019 we will change this\n"
}
,scope "more-period-text-in-description-after-one-space" $ skip >> expectParseEqIO periodictransactionp
"~ monthly from 2018/6 In 2019 we will change this\n"
nullperiodictransaction {
ptperiodexpr = "monthly from 2018/6"
,ptinterval = Months 1
,ptspan = DateSpan (Just $ parsedate "2018/06/01") Nothing
,ptcomment = "In 2019 we will change this\n"
}
]
]