2016-05-18 05:46:54 +03:00
--- * doc
-- Lines beginning "--- *" are collapsible orgstruct nodes. Emacs users,
-- (add-hook 'haskell-mode-hook
-- (lambda () (set-variable 'orgstruct-heading-prefix-regexp "--- " t))
-- 'orgstruct-mode)
-- and press TAB on nodes to expand/collapse.
{- |
2016-05-19 02:37:31 +03:00
Some common parsers and helpers used by several readers .
Some of these might belong in Hledger . Read . JournalReader or Hledger . Read .
2016-05-18 05:46:54 +03:00
- }
--- * module
2018-03-25 16:53:44 +03:00
{- # LANGUAGE CPP, BangPatterns, DeriveDataTypeable, RecordWildCards, NamedFieldPuns, NoMonoLocalBinds, ScopedTypeVariables, FlexibleContexts, TupleSections, OverloadedStrings # -}
2018-06-06 08:44:02 +03:00
{- # LANGUAGE TypeFamilies # -}
2017-11-05 02:40:54 +03:00
{- # LANGUAGE LambdaCase # -}
2018-06-04 22:30:43 +03:00
{- # LANGUAGE PackageImports # -}
2016-05-18 05:46:54 +03:00
2018-05-10 22:58:55 +03:00
module Hledger.Read.Common (
Reader ( .. ) ,
InputOpts ( .. ) ,
2018-05-14 06:37:00 +03:00
definputopts ,
2018-05-10 22:58:55 +03:00
rawOptsToInputOpts ,
-- * parsing utilities
runTextParser ,
2018-05-11 17:17:38 +03:00
rtp ,
2018-05-10 22:58:55 +03:00
runJournalParser ,
rjp ,
genericSourcePos ,
journalSourcePos ,
2018-07-31 11:03:45 +03:00
applyTransactionModifiers ,
2018-05-10 22:58:55 +03:00
parseAndFinaliseJournal ,
setYear ,
2018-05-14 06:37:00 +03:00
getYear ,
2018-05-10 22:58:55 +03:00
setDefaultCommodityAndStyle ,
getDefaultCommodityAndStyle ,
2018-05-14 06:37:00 +03:00
getDefaultAmountStyle ,
getAmountStyle ,
2018-09-23 10:06:29 +03:00
pushDeclaredAccount ,
2018-05-10 22:58:55 +03:00
pushParentAccount ,
popParentAccount ,
getParentAccount ,
addAccountAlias ,
2018-05-14 06:37:00 +03:00
getAccountAliases ,
2018-05-10 22:58:55 +03:00
clearAccountAliases ,
journalAddFile ,
-- * parsers
-- ** transaction bits
statusp ,
codep ,
descriptionp ,
-- ** dates
datep ,
datetimep ,
secondarydatep ,
-- ** account names
modifiedaccountnamep ,
accountnamep ,
-- ** amounts
spaceandamountormissingp ,
amountp ,
2018-05-14 06:37:00 +03:00
amountp' ,
2018-05-10 22:58:55 +03:00
mamountp' ,
commoditysymbolp ,
2018-05-14 06:37:00 +03:00
priceamountp ,
2018-05-10 22:58:55 +03:00
partialbalanceassertionp ,
fixedlotpricep ,
numberp ,
2018-05-14 06:37:00 +03:00
fromRawNumber ,
rawnumberp ,
2018-05-10 22:58:55 +03:00
-- ** comments
multilinecommentp ,
emptyorcommentlinep ,
2018-06-06 08:44:02 +03:00
followingcommentp ,
transactioncommentp ,
postingcommentp ,
2018-05-14 06:37:00 +03:00
-- ** bracketed dates
2018-06-09 05:35:27 +03:00
bracketeddatetagsp ,
-- ** misc
singlespacedtextp ,
2018-06-18 01:23:41 +03:00
singlespacep ,
-- * tests
2018-09-06 23:08:26 +03:00
tests_Common ,
2018-05-10 22:58:55 +03:00
)
2016-05-18 05:46:54 +03:00
where
--- * imports
import Prelude ( )
2018-06-05 02:28:28 +03:00
import " base-compat-batteries " Prelude.Compat hiding ( readFile )
import " base-compat-batteries " Control.Monad.Compat
2018-06-06 21:21:17 +03:00
import Control.Monad.Except ( ExceptT ( .. ) , throwError )
2016-07-29 18:57:10 +03:00
import Control.Monad.State.Strict
2018-06-06 08:44:02 +03:00
import Data.Bifunctor ( bimap , second )
2017-11-02 10:36:49 +03:00
import Data.Char
2017-09-15 03:41:42 +03:00
import Data.Data
2018-05-25 00:52:09 +03:00
import Data.Decimal ( DecimalRaw ( Decimal ) , Decimal )
2017-09-15 03:41:42 +03:00
import Data.Default
2016-05-18 05:46:54 +03:00
import Data.Functor.Identity
2018-06-05 02:28:28 +03:00
import " base-compat-batteries " Data.List.Compat
2016-07-29 18:57:10 +03:00
import Data.List.NonEmpty ( NonEmpty ( .. ) )
2016-05-18 05:46:54 +03:00
import Data.Maybe
2017-10-28 18:14:54 +03:00
import qualified Data.Map as M
2018-05-23 22:45:57 +03:00
import qualified Data.Semigroup as Sem
lib: textification: saved journal source
Slightly worse on small files, better on large ones.
hledger -f data/100x100x10.journal stats
<<ghc: 39305392 bytes, 77 GCs, 196354/268584 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.007 elapsed), 0.014 MUT (0.027 elapsed), 0.011 GC (0.111 elapsed) :ghc>>
<<ghc: 39307728 bytes, 77 GCs, 196909/270248 avg/max bytes residency (3 samples), 2M in use, 0.001 INIT (0.010 elapsed), 0.015 MUT (0.028 elapsed), 0.012 GC (0.116 elapsed) :ghc>>
hledger -f data/1000x1000x10.journal stats
<<ghc: 314285912 bytes, 612 GCs, 2064811/6597608 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.002 elapsed), 0.126 MUT (0.134 elapsed), 0.059 GC (0.069 elapsed) :ghc>>
<<ghc: 314271368 bytes, 612 GCs, 2070227/6628024 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.000 elapsed), 0.137 MUT (0.143 elapsed), 0.059 GC (0.068 elapsed) :ghc>>
hledger -f data/10000x1000x10.journal stats
<<ghc: 3070033264 bytes, 5965 GCs, 12699294/62962464 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.000 elapsed), 1.245 MUT (1.300 elapsed), 0.498 GC (0.558 elapsed) :ghc>>
<<ghc: 3070006752 bytes, 5973 GCs, 12687314/62848920 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.003 elapsed), 1.257 MUT (1.281 elapsed), 0.496 GC (0.554 elapsed) :ghc>>
hledger -f data/100000x1000x10.journal stats
<<ghc: 30753465088 bytes, 59763 GCs, 117723618/666643528 avg/max bytes residency (14 samples), 1589M in use, 0.000 INIT (0.002 elapsed), 12.536 MUT (12.793 elapsed), 5.978 GC (7.155 elapsed) :ghc>>
<<ghc: 30753367256 bytes, 59811 GCs, 117723236/666627528 avg/max bytes residency (14 samples), 1590M in use, 0.001 INIT (0.012 elapsed), 12.923 MUT (13.169 elapsed), 5.981 GC (6.860 elapsed) :ghc>>
2016-05-24 05:24:39 +03:00
import Data.Text ( Text )
lib: textification begins! account names
The first of several conversions from String to (strict) Text, hopefully
reducing space and time usage.
This one shows a small improvement, with GHC 7.10.3 and text-1.2.2.1:
hledger -f data/100x100x10.journal stats
string: <<ghc: 39471064 bytes, 77 GCs, 198421/275048 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.001 elapsed), 0.015 MUT (0.020 elapsed), 0.010 GC (0.014 elapsed) :ghc>>
text: <<ghc: 39268024 bytes, 77 GCs, 197018/270840 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.002 elapsed), 0.016 MUT (0.022 elapsed), 0.009 GC (0.011 elapsed) :ghc>>
hledger -f data/1000x100x10.journal stats
string: <<ghc: 318555920 bytes, 617 GCs, 2178997/7134472 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.001 elapsed), 0.129 MUT (0.136 elapsed), 0.067 GC (0.077 elapsed) :ghc>>
text: <<ghc: 314248496 bytes, 612 GCs, 2074045/6617960 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.003 elapsed), 0.137 MUT (0.145 elapsed), 0.067 GC (0.079 elapsed) :ghc>>
hledger -f data/10000x100x10.journal stats
string: <<ghc: 3114763608 bytes, 6026 GCs, 18858950/75552024 avg/max bytes residency (11 samples), 201M in use, 0.000 INIT (0.000 elapsed), 1.331 MUT (1.372 elapsed), 0.699 GC (0.812 elapsed) :ghc>>
text: <<ghc: 3071468920 bytes, 5968 GCs, 14120344/62951360 avg/max bytes residency (9 samples), 124M in use, 0.000 INIT (0.003 elapsed), 1.272 MUT (1.349 elapsed), 0.513 GC (0.578 elapsed) :ghc>>
hledger -f data/100000x100x10.journal stats
string: <<ghc: 31186579432 bytes, 60278 GCs, 135332581/740228992 avg/max bytes residency (13 samples), 1697M in use, 0.000 INIT (0.008 elapsed), 14.677 MUT (15.508 elapsed), 7.081 GC (8.074 elapsed) :ghc>>
text: <<ghc: 30753427672 bytes, 59763 GCs, 117595958/666457240 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.008 elapsed), 13.713 MUT (13.966 elapsed), 6.220 GC (7.108 elapsed) :ghc>>
2016-05-24 04:16:21 +03:00
import qualified Data.Text as T
2016-05-18 05:46:54 +03:00
import Data.Time.Calendar
import Data.Time.LocalTime
import System.Time ( getClockTime )
2018-05-22 01:47:56 +03:00
import Text.Megaparsec
import Text.Megaparsec.Char
2018-05-23 05:17:51 +03:00
import Text.Megaparsec.Char.Lexer ( decimal )
2018-06-11 22:49:14 +03:00
import Text.Megaparsec.Custom
2016-05-18 05:46:54 +03:00
2018-08-20 16:28:40 +03:00
import Hledger.Data
2016-05-18 05:46:54 +03:00
import Hledger.Utils
2018-04-17 00:47:04 +03:00
2018-08-03 21:38:55 +03:00
-- $setup
-- >>> :set -XOverloadedStrings
2018-04-17 00:47:04 +03:00
-- | A hledger journal reader is a triple of storage format name, a
-- detector of that format, and a parser from that format to Journal.
data Reader = Reader {
-- The canonical name of the format handled by this reader
rFormat :: StorageFormat
-- The file extensions recognised as containing this format
, rExtensions :: [ String ]
-- A text parser for this format, accepting input options, file
-- path for error messages and file contents, producing an exception-raising IO
-- action that returns a journal or error message.
, rParser :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal
-- Experimental readers are never tried automatically.
, rExperimental :: Bool
}
instance Show Reader where show r = rFormat r ++ " reader "
2016-05-18 05:46:54 +03:00
2016-05-25 04:28:26 +03:00
-- $setup
2016-05-18 05:46:54 +03:00
2017-09-15 03:41:42 +03:00
-- | Various options to use when reading journal files.
-- Similar to CliOptions.inputflags, simplifies the journal-reading functions.
data InputOpts = InputOpts {
-- files_ :: [FilePath]
mformat_ :: Maybe StorageFormat -- ^ a file/storage format to try, unless overridden
-- by a filename prefix. Nothing means try all.
, mrules_file_ :: Maybe FilePath -- ^ a conversion rules file to use (when reading CSV)
2018-09-07 20:12:13 +03:00
, separator_ :: Char -- ^ the separator to use (when reading CSV)
2017-09-15 03:41:42 +03:00
, aliases_ :: [ String ] -- ^ account name aliases to apply
, anon_ :: Bool -- ^ do light anonymisation/obfuscation of the data
, ignore_assertions_ :: Bool -- ^ don't check balance assertions
2017-09-15 19:55:17 +03:00
, new_ :: Bool -- ^ read only new transactions since this file was last read
2017-09-18 04:57:42 +03:00
, new_save_ :: Bool -- ^ save latest new transactions state for next time
2017-09-15 03:41:42 +03:00
, pivot_ :: String -- ^ use the given field's value as the account name
2018-04-17 00:47:04 +03:00
, auto_ :: Bool -- ^ generate automatic postings when journal is parsed
2017-09-15 03:41:42 +03:00
} deriving ( Show , Data ) --, Typeable)
instance Default InputOpts where def = definputopts
definputopts :: InputOpts
2018-09-07 20:12:13 +03:00
definputopts = InputOpts def def ',' def def def def True def def
2017-09-15 03:41:42 +03:00
rawOptsToInputOpts :: RawOpts -> InputOpts
rawOptsToInputOpts rawopts = InputOpts {
-- files_ = map (T.unpack . stripquotes . T.pack) $ listofstringopt "file" rawopts
mformat_ = Nothing
, mrules_file_ = maybestringopt " rules-file " rawopts
2018-09-07 20:12:13 +03:00
, separator_ = fromMaybe ',' ( maybecharopt " separator " rawopts )
2017-09-15 03:41:42 +03:00
, aliases_ = map ( T . unpack . stripquotes . T . pack ) $ listofstringopt " alias " rawopts
, anon_ = boolopt " anon " rawopts
, ignore_assertions_ = boolopt " ignore-assertions " rawopts
2017-09-15 19:55:17 +03:00
, new_ = boolopt " new " rawopts
2017-09-18 04:57:42 +03:00
, new_save_ = True
2017-09-15 03:41:42 +03:00
, pivot_ = stringopt " pivot " rawopts
2018-04-17 00:47:04 +03:00
, auto_ = boolopt " auto " rawopts
2017-09-15 03:41:42 +03:00
}
2018-05-10 22:58:55 +03:00
--- * parsing utilities
2016-05-18 05:46:54 +03:00
2018-08-04 18:11:32 +03:00
-- | Run a text parser in the identity monad. See also: parseWithState.
2018-06-05 23:23:47 +03:00
runTextParser , rtp :: TextParser Identity a -> Text -> Either ( ParseError Char CustomErr ) a
2016-07-29 18:57:10 +03:00
runTextParser p t = runParser p " " t
lib: textification: parse stream
10% more allocation, but 35% lower maximum residency, and slightly quicker.
hledger -f data/100x100x10.journal stats
<<ghc: 39327768 bytes, 77 GCs, 196834/269496 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.010 elapsed), 0.020 MUT (0.092 elapsed), 0.014 GC (0.119 elapsed) :ghc>>
<<ghc: 42842136 bytes, 84 GCs, 194010/270912 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.009 elapsed), 0.016 MUT (0.029 elapsed), 0.012 GC (0.120 elapsed) :ghc>>
hledger -f data/1000x1000x10.journal stats
<<ghc: 314291440 bytes, 612 GCs, 2070776/6628048 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.000 elapsed), 0.128 MUT (0.144 elapsed), 0.059 GC (0.070 elapsed) :ghc>>
<<ghc: 349558872 bytes, 681 GCs, 1397597/4106384 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.004 elapsed), 0.124 MUT (0.133 elapsed), 0.047 GC (0.053 elapsed) :ghc>>
hledger -f data/10000x1000x10.journal stats
<<ghc: 3070026824 bytes, 5973 GCs, 12698030/62951784 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.002 elapsed), 1.268 MUT (1.354 elapsed), 0.514 GC (0.587 elapsed) :ghc>>
<<ghc: 3424013128 bytes, 6658 GCs, 11405501/41071624 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.001 elapsed), 1.343 MUT (1.406 elapsed), 0.511 GC (0.573 elapsed) :ghc>>
hledger -f data/100000x1000x10.journal stats
<<ghc: 30753387392 bytes, 59811 GCs, 117615462/666703600 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.000 elapsed), 12.068 MUT (12.238 elapsed), 6.015 GC (7.190 elapsed) :ghc>>
<<ghc: 34306530696 bytes, 66727 GCs, 76806196/414629312 avg/max bytes residency (14 samples), 1009M in use, 0.000 INIT (0.010 elapsed), 14.357 MUT (16.370 elapsed), 5.298 GC (6.534 elapsed) :ghc>>
2016-05-25 01:58:23 +03:00
rtp = runTextParser
2018-08-04 18:11:32 +03:00
-- | Run a journal parser in some monad. See also: parseWithState.
2018-06-05 23:23:47 +03:00
runJournalParser , rjp :: Monad m => JournalParser m a -> Text -> m ( Either ( ParseError Char CustomErr ) a )
2018-05-25 06:18:55 +03:00
runJournalParser p t = runParserT ( evalStateT p mempty ) " " t
2016-05-18 05:46:54 +03:00
rjp = runJournalParser
genericSourcePos :: SourcePos -> GenericSourcePos
2016-07-29 18:57:10 +03:00
genericSourcePos p = GenericSourcePos ( sourceName p ) ( fromIntegral . unPos $ sourceLine p ) ( fromIntegral . unPos $ sourceColumn p )
2016-05-18 05:46:54 +03:00
2018-06-09 05:35:27 +03:00
-- | Construct a generic start & end line parse position from start and end megaparsec SourcePos's.
2017-01-20 18:33:24 +03:00
journalSourcePos :: SourcePos -> SourcePos -> GenericSourcePos
journalSourcePos p p' = JournalSourcePos ( sourceName p ) ( fromIntegral . unPos $ sourceLine p , fromIntegral $ line' )
where line'
| ( unPos $ sourceColumn p' ) == 1 = unPos ( sourceLine p' ) - 1
| otherwise = unPos $ sourceLine p' -- might be at end of file withat last new-line
2018-07-31 11:03:45 +03:00
-- | Apply any transaction modifier rules in the journal
-- (adding automated postings to transactions, eg).
applyTransactionModifiers :: Journal -> Journal
applyTransactionModifiers j = j { jtxns = map applyallmodifiers $ jtxns j }
2018-04-17 00:47:04 +03:00
where
2018-07-31 10:57:46 +03:00
applyallmodifiers =
2018-07-31 12:39:11 +03:00
foldr ( flip ( . ) . transactionModifierToFunction ) id ( jtxnmodifiers j )
2018-04-17 00:47:04 +03:00
-- | Given a megaparsec ParsedJournal parser, input options, file
2016-11-13 00:54:48 +03:00
-- path and file content: parse and post-process a Journal, or give an error.
2018-06-06 09:29:52 +03:00
parseAndFinaliseJournal :: JournalParser IO ParsedJournal -> InputOpts
2018-04-17 00:47:04 +03:00
-> FilePath -> Text -> ExceptT String IO Journal
parseAndFinaliseJournal parser iopts f txt = do
2016-05-23 10:32:55 +03:00
t <- liftIO getClockTime
2016-05-18 05:46:54 +03:00
y <- liftIO getCurrentYear
2018-06-06 09:29:52 +03:00
ep <- liftIO $ runParserT ( evalStateT parser nulljournal { jparsedefaultyear = Just y } ) f txt
2016-05-23 10:32:55 +03:00
case ep of
2018-04-17 00:47:04 +03:00
Right pj ->
2018-07-31 11:03:45 +03:00
let pj' = if auto_ iopts then applyTransactionModifiers pj else pj in
2018-04-17 00:47:04 +03:00
case journalFinalise t f txt ( not $ ignore_assertions_ iopts ) pj' of
2016-05-23 10:32:55 +03:00
Right j -> return j
Left e -> throwError e
2018-06-06 09:31:37 +03:00
Left e -> throwError $ customParseErrorPretty txt e
2016-05-18 05:46:54 +03:00
2017-07-27 14:59:55 +03:00
setYear :: Year -> JournalParser m ()
2016-07-29 18:57:10 +03:00
setYear y = modify' ( \ j -> j { jparsedefaultyear = Just y } )
2016-05-18 05:46:54 +03:00
2017-07-27 14:59:55 +03:00
getYear :: JournalParser m ( Maybe Year )
2016-07-29 18:57:10 +03:00
getYear = fmap jparsedefaultyear get
2016-05-18 05:46:54 +03:00
2017-07-27 14:59:55 +03:00
setDefaultCommodityAndStyle :: ( CommoditySymbol , AmountStyle ) -> JournalParser m ()
2016-07-29 18:57:10 +03:00
setDefaultCommodityAndStyle cs = modify' ( \ j -> j { jparsedefaultcommodity = Just cs } )
2016-05-18 05:46:54 +03:00
2017-07-27 14:59:55 +03:00
getDefaultCommodityAndStyle :: JournalParser m ( Maybe ( CommoditySymbol , AmountStyle ) )
2016-07-29 18:57:10 +03:00
getDefaultCommodityAndStyle = jparsedefaultcommodity ` fmap ` get
2016-05-18 05:46:54 +03:00
2017-11-05 02:40:54 +03:00
-- | Get amount style associated with default currency.
--
-- Returns 'AmountStyle' used to defined by a latest default commodity directive
-- prior to current position within this file or its parents.
getDefaultAmountStyle :: JournalParser m ( Maybe AmountStyle )
getDefaultAmountStyle = fmap snd <$> getDefaultCommodityAndStyle
2017-10-28 18:14:54 +03:00
2017-11-05 02:40:54 +03:00
-- | Lookup currency-specific amount style.
--
-- Returns 'AmountStyle' used in commodity directive within current journal
-- prior to current position or in its parents files.
getAmountStyle :: CommoditySymbol -> JournalParser m ( Maybe AmountStyle )
getAmountStyle commodity = do
2017-11-05 01:59:15 +03:00
specificStyle <- maybe Nothing cformat . M . lookup commodity . jcommodities <$> get
defaultStyle <- fmap snd <$> getDefaultCommodityAndStyle
let effectiveStyle = listToMaybe $ catMaybes [ specificStyle , defaultStyle ]
2017-11-05 02:40:54 +03:00
return effectiveStyle
2017-10-28 18:14:54 +03:00
2018-09-23 10:06:29 +03:00
pushDeclaredAccount :: AccountName -> JournalParser m ()
pushDeclaredAccount acct = modify' ( \ j -> j { jdeclaredaccounts = acct : jdeclaredaccounts j } )
2016-05-18 05:46:54 +03:00
2017-07-27 14:59:55 +03:00
pushParentAccount :: AccountName -> JournalParser m ()
2016-07-29 18:57:10 +03:00
pushParentAccount acct = modify' ( \ j -> j { jparseparentaccounts = acct : jparseparentaccounts j } )
2017-07-27 14:59:55 +03:00
popParentAccount :: JournalParser m ()
2016-05-23 10:32:55 +03:00
popParentAccount = do
2016-07-29 18:57:10 +03:00
j <- get
2016-05-23 10:32:55 +03:00
case jparseparentaccounts j of
2016-07-29 18:57:10 +03:00
[] -> unexpected ( Tokens ( 'E' :| " nd of apply account block with no beginning " ) )
( _ : rest ) -> put j { jparseparentaccounts = rest }
2016-05-18 05:46:54 +03:00
2017-07-27 14:59:55 +03:00
getParentAccount :: JournalParser m AccountName
2016-07-29 18:57:10 +03:00
getParentAccount = fmap ( concatAccountNames . reverse . jparseparentaccounts ) get
2016-05-18 05:46:54 +03:00
2016-07-29 18:57:10 +03:00
addAccountAlias :: MonadState Journal m => AccountAlias -> m ()
addAccountAlias a = modify' ( \ ( j @ Journal { .. } ) -> j { jparsealiases = a : jparsealiases } )
2016-05-18 05:46:54 +03:00
2016-07-29 18:57:10 +03:00
getAccountAliases :: MonadState Journal m => m [ AccountAlias ]
getAccountAliases = fmap jparsealiases get
2016-05-18 05:46:54 +03:00
2016-07-29 18:57:10 +03:00
clearAccountAliases :: MonadState Journal m => m ()
clearAccountAliases = modify' ( \ ( j @ Journal { .. } ) -> j { jparsealiases = [] } )
2016-05-18 05:46:54 +03:00
2016-08-14 22:44:19 +03:00
-- getTransactionCount :: MonadState Journal m => m Integer
-- getTransactionCount = fmap jparsetransactioncount get
--
-- setTransactionCount :: MonadState Journal m => Integer -> m ()
-- setTransactionCount i = modify' (\j -> j{jparsetransactioncount=i})
--
-- -- | Increment the transaction index by one and return the new value.
-- incrementTransactionCount :: MonadState Journal m => m Integer
-- incrementTransactionCount = do
-- modify' (\j -> j{jparsetransactioncount=jparsetransactioncount j + 1})
-- getTransactionCount
2016-05-18 05:46:54 +03:00
lib: textification: saved journal source
Slightly worse on small files, better on large ones.
hledger -f data/100x100x10.journal stats
<<ghc: 39305392 bytes, 77 GCs, 196354/268584 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.007 elapsed), 0.014 MUT (0.027 elapsed), 0.011 GC (0.111 elapsed) :ghc>>
<<ghc: 39307728 bytes, 77 GCs, 196909/270248 avg/max bytes residency (3 samples), 2M in use, 0.001 INIT (0.010 elapsed), 0.015 MUT (0.028 elapsed), 0.012 GC (0.116 elapsed) :ghc>>
hledger -f data/1000x1000x10.journal stats
<<ghc: 314285912 bytes, 612 GCs, 2064811/6597608 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.002 elapsed), 0.126 MUT (0.134 elapsed), 0.059 GC (0.069 elapsed) :ghc>>
<<ghc: 314271368 bytes, 612 GCs, 2070227/6628024 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.000 elapsed), 0.137 MUT (0.143 elapsed), 0.059 GC (0.068 elapsed) :ghc>>
hledger -f data/10000x1000x10.journal stats
<<ghc: 3070033264 bytes, 5965 GCs, 12699294/62962464 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.000 elapsed), 1.245 MUT (1.300 elapsed), 0.498 GC (0.558 elapsed) :ghc>>
<<ghc: 3070006752 bytes, 5973 GCs, 12687314/62848920 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.003 elapsed), 1.257 MUT (1.281 elapsed), 0.496 GC (0.554 elapsed) :ghc>>
hledger -f data/100000x1000x10.journal stats
<<ghc: 30753465088 bytes, 59763 GCs, 117723618/666643528 avg/max bytes residency (14 samples), 1589M in use, 0.000 INIT (0.002 elapsed), 12.536 MUT (12.793 elapsed), 5.978 GC (7.155 elapsed) :ghc>>
<<ghc: 30753367256 bytes, 59811 GCs, 117723236/666627528 avg/max bytes residency (14 samples), 1590M in use, 0.001 INIT (0.012 elapsed), 12.923 MUT (13.169 elapsed), 5.981 GC (6.860 elapsed) :ghc>>
2016-05-24 05:24:39 +03:00
journalAddFile :: ( FilePath , Text ) -> Journal -> Journal
2016-05-23 10:32:55 +03:00
journalAddFile f j @ Journal { jfiles = fs } = j { jfiles = fs ++ [ f ] }
-- append, unlike the other fields, even though we do a final reverse,
-- to compensate for additional reversal due to including/monoid-concatting
2016-05-18 05:46:54 +03:00
--- * parsers
2018-05-24 07:36:19 +03:00
2016-05-18 05:46:54 +03:00
--- ** transaction bits
2017-06-16 02:54:34 +03:00
statusp :: TextParser m Status
2016-05-18 05:46:54 +03:00
statusp =
choice'
2018-03-25 16:53:44 +03:00
[ skipMany spacenonewline >> char '*' >> return Cleared
, skipMany spacenonewline >> char '!' >> return Pending
2017-06-16 02:25:37 +03:00
, return Unmarked
2016-05-18 05:46:54 +03:00
]
2018-05-22 04:52:34 +03:00
codep :: TextParser m Text
2018-06-21 04:45:11 +03:00
codep = option " " $ do
try $ do
skipSome spacenonewline
char '('
code <- takeWhileP Nothing $ \ c -> c /= ')' && c /= '\ n'
char ')' <?> " closing bracket ')' for transaction code "
pure code
2016-05-18 05:46:54 +03:00
2018-06-06 08:52:28 +03:00
descriptionp :: TextParser m Text
2018-05-24 07:36:19 +03:00
descriptionp = takeWhileP Nothing ( not . semicolonOrNewline )
where semicolonOrNewline c = c == ';' || c == '\ n'
2016-05-18 05:46:54 +03:00
--- ** dates
-- | Parse a date in YYYY/MM/DD format.
-- Hyphen (-) and period (.) are also allowed as separators.
-- The year may be omitted if a default year has been set.
-- Leading zeroes may be omitted.
2017-07-27 14:59:55 +03:00
datep :: JournalParser m Day
2016-05-18 05:46:54 +03:00
datep = do
2018-05-24 07:36:19 +03:00
mYear <- getYear
lift $ datep' mYear
2018-05-16 05:03:59 +03:00
datep' :: Maybe Year -> TextParser m Day
2018-05-23 05:17:51 +03:00
datep' mYear = do
2018-06-06 20:15:38 +03:00
startPos <- getPosition
2018-05-23 05:17:51 +03:00
d1 <- decimal <?> " year or month "
sep <- satisfy isDateSepChar <?> " date separator "
d2 <- decimal <?> " month or day "
2018-06-06 20:15:38 +03:00
fullDate startPos d1 sep d2 <|> partialDate startPos mYear d1 sep d2
2016-05-18 05:46:54 +03:00
<?> " full or partial date "
2018-05-23 05:17:51 +03:00
where
2018-06-06 20:15:38 +03:00
fullDate :: SourcePos -> Integer -> Char -> Int -> TextParser m Day
fullDate startPos year sep1 month = do
2018-05-23 05:17:51 +03:00
sep2 <- satisfy isDateSepChar <?> " date separator "
day <- decimal <?> " day "
2018-06-06 20:15:38 +03:00
endPos <- getPosition
2018-05-23 05:17:51 +03:00
let dateStr = show year ++ [ sep1 ] ++ show month ++ [ sep2 ] ++ show day
2018-06-06 20:15:38 +03:00
when ( sep1 /= sep2 ) $ parseErrorAtRegion startPos endPos $
2018-05-23 05:17:51 +03:00
" invalid date (mixing date separators is not allowed): " ++ dateStr
2018-05-24 07:36:19 +03:00
case fromGregorianValid year month day of
2018-06-06 20:15:38 +03:00
Nothing -> parseErrorAtRegion startPos endPos $
" well-formed but invalid date: " ++ dateStr
2018-05-27 07:42:02 +03:00
Just date -> pure $! date
2018-05-23 05:17:51 +03:00
2018-06-06 20:15:38 +03:00
partialDate
:: SourcePos -> Maybe Year -> Integer -> Char -> Int -> TextParser m Day
partialDate startPos mYear month sep day = do
endPos <- getPosition
case mYear of
Just year ->
case fromGregorianValid year ( fromIntegral month ) day of
Nothing -> parseErrorAtRegion startPos endPos $
" well-formed but invalid date: " ++ dateStr
Just date -> pure $! date
where dateStr = show year ++ [ sep ] ++ show month ++ [ sep ] ++ show day
Nothing -> parseErrorAtRegion startPos endPos $
" partial date " ++ dateStr ++ " found, but the current year is unknown "
where dateStr = show month ++ [ sep ] ++ show day
2018-05-23 05:17:51 +03:00
2018-06-06 08:44:02 +03:00
{- # INLINABLE datep' # -}
2016-05-18 05:46:54 +03:00
-- | Parse a date and time in YYYY/MM/DD HH:MM[:SS][+-ZZZZ] format.
-- Hyphen (-) and period (.) are also allowed as date separators.
-- The year may be omitted if a default year has been set.
-- Seconds are optional.
-- The timezone is optional and ignored (the time is always interpreted as a local time).
-- Leading zeroes may be omitted (except in a timezone).
2017-07-27 14:59:55 +03:00
datetimep :: JournalParser m LocalTime
2016-05-18 05:46:54 +03:00
datetimep = do
2018-06-15 21:56:38 +03:00
mYear <- getYear
lift $ datetimep' mYear
datetimep' :: Maybe Year -> TextParser m LocalTime
datetimep' mYear = do
day <- datep' mYear
skipSome spacenonewline
time <- timeOfDay
optional timeZone -- ignoring time zones
pure $ LocalTime day time
where
timeOfDay :: TextParser m TimeOfDay
timeOfDay = do
pos1 <- getPosition
h' <- twoDigitDecimal <?> " hour "
pos2 <- getPosition
unless ( h' >= 0 && h' <= 23 ) $ parseErrorAtRegion pos1 pos2
" invalid time (bad hour) "
char ':' <?> " ':' (hour-minute separator) "
pos3 <- getPosition
m' <- twoDigitDecimal <?> " minute "
pos4 <- getPosition
unless ( m' >= 0 && m' <= 59 ) $ parseErrorAtRegion pos3 pos4
" invalid time (bad minute) "
s' <- option 0 $ do
char ':' <?> " ':' (minute-second separator) "
pos5 <- getPosition
s' <- twoDigitDecimal <?> " second "
pos6 <- getPosition
unless ( s' >= 0 && s' <= 59 ) $ parseErrorAtRegion pos5 pos6
" invalid time (bad second) " -- we do not support leap seconds
pure s'
pure $ TimeOfDay h' m' ( fromIntegral s' )
twoDigitDecimal :: TextParser m Int
twoDigitDecimal = do
d1 <- digitToInt <$> digitChar
d2 <- digitToInt <$> ( digitChar <?> " a second digit " )
pure $ d1 * 10 + d2
timeZone :: TextParser m String
timeZone = do
plusminus <- satisfy $ \ c -> c == '-' || c == '+'
fourDigits <- count 4 ( digitChar <?> " a digit (for a time zone) " )
pure $ plusminus : fourDigits
2016-05-18 05:46:54 +03:00
2018-05-24 07:36:19 +03:00
secondarydatep :: Day -> TextParser m Day
secondarydatep primaryDate = char '=' *> datep' ( Just primaryYear )
where primaryYear = first3 $ toGregorian primaryDate
2016-05-18 05:46:54 +03:00
--- ** account names
2018-06-27 13:26:08 +03:00
-- | Parse an account name (plus one following space if present),
-- then apply any parent account prefix and/or account aliases currently in effect,
-- in that order. (Ie first add the parent account prefix, then rewrite with aliases).
2017-07-27 14:59:55 +03:00
modifiedaccountnamep :: JournalParser m AccountName
2016-05-18 05:46:54 +03:00
modifiedaccountnamep = do
parent <- getParentAccount
aliases <- getAccountAliases
2016-07-29 18:57:10 +03:00
a <- lift accountnamep
2018-05-27 07:42:02 +03:00
return $!
2016-05-18 05:46:54 +03:00
accountNameApplyAliases aliases $
-- XXX accountNameApplyAliasesMemo ? doesn't seem to make a difference
joinAccountNames parent
a
2018-06-09 05:35:27 +03:00
-- | Parse an account name, plus one following space if present.
2018-07-26 10:30:32 +03:00
-- Account names have one or more parts separated by the account separator character,
2018-06-09 05:35:27 +03:00
-- and are terminated by two or more spaces (or end of input).
2018-07-26 10:30:32 +03:00
-- Each part is at least one character long, may have single spaces inside it,
-- and starts with a non-whitespace.
-- Note, this means "{account}", "%^!" and ";comment" are all accepted
-- (parent parsers usually prevent/consume the last).
-- It should have required parts to start with an alphanumeric;
-- for now it remains as-is for backwards compatibility.
2016-07-29 18:57:10 +03:00
accountnamep :: TextParser m AccountName
2018-06-09 05:35:27 +03:00
accountnamep = singlespacedtextp
-- | Parse any text beginning with a non-whitespace character, until a double space or the end of input.
-- Consumes one of the following spaces, if present.
singlespacedtextp :: TextParser m T . Text
singlespacedtextp = do
2018-05-22 04:09:47 +03:00
firstPart <- part
2018-06-09 05:35:27 +03:00
otherParts <- many $ try $ singlespacep *> part
2018-05-27 07:42:02 +03:00
pure $! T . unwords $ firstPart : otherParts
2018-05-22 04:09:47 +03:00
where
part = takeWhile1P Nothing ( not . isSpace )
2018-06-09 05:35:27 +03:00
-- | Parse one non-newline whitespace character that is not followed by another one.
singlespacep = void spacenonewline *> notFollowedBy spacenonewline
2016-05-18 05:46:54 +03:00
--- ** amounts
-- | Parse whitespace then an amount, with an optional left or right
-- currency symbol and optional price, or return the special
-- "missing" marker amount.
2018-06-06 08:52:28 +03:00
spaceandamountormissingp :: JournalParser m MixedAmount
2016-05-18 05:46:54 +03:00
spaceandamountormissingp =
2018-05-24 07:36:19 +03:00
option missingmixedamt $ try $ do
lift $ skipSome spacenonewline
Mixed . ( : [] ) <$> amountp
2016-05-18 05:46:54 +03:00
-- | Parse a single-commodity amount, with optional symbol on the left or
-- right, optional unit or total price, and optional (ignored)
-- ledger-style balance assertion or fixed lot price declaration.
2018-06-06 08:52:28 +03:00
amountp :: JournalParser m Amount
2018-06-21 03:33:29 +03:00
amountp = label " amount " $ do
2018-05-25 04:49:16 +03:00
amount <- amountwithoutpricep
2018-06-21 04:52:49 +03:00
lift $ skipMany spacenonewline
2018-05-25 04:49:16 +03:00
price <- priceamountp
pure $ amount { aprice = price }
2018-06-06 08:52:28 +03:00
amountwithoutpricep :: JournalParser m Amount
2018-06-21 03:33:29 +03:00
amountwithoutpricep = do
( mult , sign ) <- lift $ ( , ) <$> multiplierp <*> signp
leftsymbolamountp mult sign <|> rightornosymbolamountp mult sign
where
leftsymbolamountp :: Bool -> ( Decimal -> Decimal ) -> JournalParser m Amount
leftsymbolamountp mult sign = label " amount " $ do
c <- lift commoditysymbolp
suggestedStyle <- getAmountStyle c
commodityspaced <- lift $ skipMany' spacenonewline
sign2 <- lift $ signp
posBeforeNum <- getPosition
ambiguousRawNum <- lift rawnumberp
mExponent <- lift $ optional $ try exponentp
posAfterNum <- getPosition
let numRegion = ( posBeforeNum , posAfterNum )
2018-08-17 08:47:55 +03:00
( q , prec , mdec , mgrps ) <- lift $ interpretNumber numRegion suggestedStyle ambiguousRawNum mExponent
2018-06-21 03:33:29 +03:00
let s = amountstyle { ascommodityside = L , ascommodityspaced = commodityspaced , asprecision = prec , asdecimalpoint = mdec , asdigitgroups = mgrps }
return $ Amount c ( sign ( sign2 q ) ) NoPrice s mult
2018-08-17 08:47:55 +03:00
rightornosymbolamountp :: Bool -> ( Decimal -> Decimal ) -> JournalParser m Amount
2018-06-21 03:33:29 +03:00
rightornosymbolamountp mult sign = label " amount " $ do
posBeforeNum <- getPosition
ambiguousRawNum <- lift rawnumberp
mExponent <- lift $ optional $ try exponentp
posAfterNum <- getPosition
let numRegion = ( posBeforeNum , posAfterNum )
2018-08-17 08:47:55 +03:00
mSpaceAndCommodity <- lift $ optional $ try $ ( , ) <$> skipMany' spacenonewline <*> commoditysymbolp
2018-06-21 03:33:29 +03:00
case mSpaceAndCommodity of
2018-08-17 08:47:55 +03:00
-- right symbol amount
2018-06-21 03:33:29 +03:00
Just ( commodityspaced , c ) -> do
suggestedStyle <- getAmountStyle c
2018-08-17 08:47:55 +03:00
( q , prec , mdec , mgrps ) <- lift $ interpretNumber numRegion suggestedStyle ambiguousRawNum mExponent
2018-06-21 03:33:29 +03:00
let s = amountstyle { ascommodityside = R , ascommodityspaced = commodityspaced , asprecision = prec , asdecimalpoint = mdec , asdigitgroups = mgrps }
return $ Amount c ( sign q ) NoPrice s mult
2018-08-17 08:47:55 +03:00
-- no symbol amount
2018-06-21 03:33:29 +03:00
Nothing -> do
suggestedStyle <- getDefaultAmountStyle
2018-08-17 08:47:55 +03:00
( q , prec , mdec , mgrps ) <- lift $ interpretNumber numRegion suggestedStyle ambiguousRawNum mExponent
-- if a default commodity has been set, apply it and its style to this amount
2018-08-17 09:39:17 +03:00
-- (unless it's a multiplier in an automated posting)
2018-06-21 03:33:29 +03:00
defcs <- getDefaultCommodityAndStyle
2018-08-17 09:39:17 +03:00
let ( c , s ) = case ( mult , defcs ) of
( False , Just ( defc , defs ) ) -> ( defc , defs { asprecision = max ( asprecision defs ) prec } )
_ -> ( " " , amountstyle { asprecision = prec , asdecimalpoint = mdec , asdigitgroups = mgrps } )
2018-06-21 03:33:29 +03:00
return $ Amount c ( sign q ) NoPrice s mult
-- For reducing code duplication. Doesn't parse anything. Has the type
-- of a parser only in order to throw parse errors (for convenience).
interpretNumber
:: ( SourcePos , SourcePos )
-> Maybe AmountStyle
-> Either AmbiguousNumber RawNumber
-> Maybe Int
-> TextParser m ( Quantity , Int , Maybe Char , Maybe DigitGroupStyle )
interpretNumber posRegion suggestedStyle ambiguousNum mExp =
let rawNum = either ( disambiguateNumber suggestedStyle ) id ambiguousNum
in case fromRawNumber rawNum mExp of
Left errMsg -> uncurry parseErrorAtRegion posRegion errMsg
Right res -> pure res
2016-05-18 05:46:54 +03:00
-- | Parse an amount from a string, or get an error.
amountp' :: String -> Amount
amountp' s =
2016-07-29 18:57:10 +03:00
case runParser ( evalStateT ( amountp <* eof ) mempty ) " " ( T . pack s ) of
lib: textification: parse stream
10% more allocation, but 35% lower maximum residency, and slightly quicker.
hledger -f data/100x100x10.journal stats
<<ghc: 39327768 bytes, 77 GCs, 196834/269496 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.010 elapsed), 0.020 MUT (0.092 elapsed), 0.014 GC (0.119 elapsed) :ghc>>
<<ghc: 42842136 bytes, 84 GCs, 194010/270912 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.009 elapsed), 0.016 MUT (0.029 elapsed), 0.012 GC (0.120 elapsed) :ghc>>
hledger -f data/1000x1000x10.journal stats
<<ghc: 314291440 bytes, 612 GCs, 2070776/6628048 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.000 elapsed), 0.128 MUT (0.144 elapsed), 0.059 GC (0.070 elapsed) :ghc>>
<<ghc: 349558872 bytes, 681 GCs, 1397597/4106384 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.004 elapsed), 0.124 MUT (0.133 elapsed), 0.047 GC (0.053 elapsed) :ghc>>
hledger -f data/10000x1000x10.journal stats
<<ghc: 3070026824 bytes, 5973 GCs, 12698030/62951784 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.002 elapsed), 1.268 MUT (1.354 elapsed), 0.514 GC (0.587 elapsed) :ghc>>
<<ghc: 3424013128 bytes, 6658 GCs, 11405501/41071624 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.001 elapsed), 1.343 MUT (1.406 elapsed), 0.511 GC (0.573 elapsed) :ghc>>
hledger -f data/100000x1000x10.journal stats
<<ghc: 30753387392 bytes, 59811 GCs, 117615462/666703600 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.000 elapsed), 12.068 MUT (12.238 elapsed), 6.015 GC (7.190 elapsed) :ghc>>
<<ghc: 34306530696 bytes, 66727 GCs, 76806196/414629312 avg/max bytes residency (14 samples), 1009M in use, 0.000 INIT (0.010 elapsed), 14.357 MUT (16.370 elapsed), 5.298 GC (6.534 elapsed) :ghc>>
2016-05-25 01:58:23 +03:00
Right amt -> amt
Left err -> error ' $ s h o w e r r - - X X X s h o u l d t h r o w E r r o r
2016-05-18 05:46:54 +03:00
-- | Parse a mixed amount from a string, or get an error.
mamountp' :: String -> MixedAmount
mamountp' = Mixed . ( : [] ) . amountp'
2018-05-24 19:08:52 +03:00
signp :: Num a => TextParser m ( a -> a )
signp = char '-' *> pure negate <|> char '+' *> pure id <|> pure id
2016-05-18 05:46:54 +03:00
2017-05-30 17:30:15 +03:00
multiplierp :: TextParser m Bool
2018-05-24 07:36:19 +03:00
multiplierp = option False $ char '*' *> pure True
2017-05-30 17:30:15 +03:00
2018-03-25 16:53:44 +03:00
-- | This is like skipMany but it returns True if at least one element
-- was skipped. This is helpful if you’ re just using many to check if
-- the resulting list is empty or not.
skipMany' :: MonadPlus m => m a -> m Bool
skipMany' p = go False
where
go ! isNull = do
more <- option False ( True <$ p )
if more
then go True
else pure isNull
2016-07-29 18:57:10 +03:00
commoditysymbolp :: TextParser m CommoditySymbol
2018-05-25 05:34:00 +03:00
commoditysymbolp =
quotedcommoditysymbolp <|> simplecommoditysymbolp <?> " commodity symbol "
2016-05-18 05:46:54 +03:00
2016-07-29 18:57:10 +03:00
quotedcommoditysymbolp :: TextParser m CommoditySymbol
2018-05-22 04:52:34 +03:00
quotedcommoditysymbolp =
2018-05-24 07:36:19 +03:00
between ( char '"' ) ( char '"' ) $ takeWhile1P Nothing f
where f c = c /= ';' && c /= '\ n' && c /= '\ " '
2016-05-18 05:46:54 +03:00
2016-07-29 18:57:10 +03:00
simplecommoditysymbolp :: TextParser m CommoditySymbol
2018-05-22 04:52:34 +03:00
simplecommoditysymbolp = takeWhile1P Nothing ( not . isNonsimpleCommodityChar )
2016-05-18 05:46:54 +03:00
2018-06-06 08:52:28 +03:00
priceamountp :: JournalParser m Price
2018-06-21 04:45:11 +03:00
priceamountp = option NoPrice $ do
2018-06-21 04:52:49 +03:00
char '@'
2018-05-25 05:34:00 +03:00
priceConstructor <- char '@' *> pure TotalPrice <|> pure UnitPrice
2018-05-24 07:36:19 +03:00
lift ( skipMany spacenonewline )
2018-06-21 05:05:25 +03:00
priceAmount <- amountwithoutpricep <?> " amount (as a price) "
2018-05-24 07:36:19 +03:00
pure $ priceConstructor priceAmount
2016-05-18 05:46:54 +03:00
2018-06-06 08:52:28 +03:00
partialbalanceassertionp :: JournalParser m BalanceAssertion
2018-06-21 04:45:11 +03:00
partialbalanceassertionp = optional $ do
sourcepos <- try $ do
lift ( skipMany spacenonewline )
sourcepos <- genericSourcePos <$> lift getPosition
char '='
pure sourcepos
2018-05-24 07:36:19 +03:00
lift ( skipMany spacenonewline )
2018-06-21 05:05:25 +03:00
a <- amountp <?> " amount (for a balance assertion or assignment) " -- XXX should restrict to a simple amount
2018-05-24 07:36:19 +03:00
return ( a , sourcepos )
2016-05-18 05:46:54 +03:00
2016-07-29 18:57:10 +03:00
-- balanceassertion :: Monad m => TextParser m (Maybe MixedAmount)
2016-05-18 05:46:54 +03:00
-- balanceassertion =
-- try (do
2018-03-25 16:53:44 +03:00
-- lift (skipMany spacenonewline)
2016-05-18 05:46:54 +03:00
-- string "=="
2018-03-25 16:53:44 +03:00
-- lift (skipMany spacenonewline)
2016-05-18 05:46:54 +03:00
-- a <- amountp -- XXX should restrict to a simple amount
-- return $ Just $ Mixed [a])
-- <|> return Nothing
-- http://ledger-cli.org/3.0/doc/ledger3.html#Fixing-Lot-Prices
2018-06-06 08:52:28 +03:00
fixedlotpricep :: JournalParser m ( Maybe Amount )
2018-06-21 04:45:11 +03:00
fixedlotpricep = optional $ do
try $ do
lift ( skipMany spacenonewline )
char '{'
2018-05-24 07:36:19 +03:00
lift ( skipMany spacenonewline )
char '='
lift ( skipMany spacenonewline )
a <- amountp -- XXX should restrict to a simple amount
lift ( skipMany spacenonewline )
char '}'
return a
2016-05-18 05:46:54 +03:00
-- | Parse a string representation of a number for its value and display
-- attributes.
--
-- Some international number formats are accepted, eg either period or comma
-- may be used for the decimal point, and the other of these may be used for
-- separating digit groups in the integer part. See
-- http://en.wikipedia.org/wiki/Decimal_separator for more examples.
--
-- This returns: the parsed numeric value, the precision (number of digits
-- seen following the decimal point), the decimal point character used if any,
-- and the digit group style if any.
--
2017-11-05 02:40:54 +03:00
numberp :: Maybe AmountStyle -> TextParser m ( Quantity , Int , Maybe Char , Maybe DigitGroupStyle )
2018-06-21 05:05:25 +03:00
numberp suggestedStyle = label " number " $ do
2017-10-28 21:07:24 +03:00
-- a number is an optional sign followed by a sequence of digits possibly
-- interspersed with periods, commas, or both
2018-07-16 17:28:58 +03:00
-- dbgparse 0 "numberp"
2017-10-28 21:07:24 +03:00
sign <- signp
2018-05-25 02:46:17 +03:00
rawNum <- either ( disambiguateNumber suggestedStyle ) id <$> rawnumberp
2018-05-24 19:08:52 +03:00
mExp <- optional $ try $ exponentp
2018-05-25 02:46:17 +03:00
dbg8 " numberp suggestedStyle " suggestedStyle ` seq ` return ()
case dbg8 " numberp quantity,precision,mdecimalpoint,mgrps "
$ fromRawNumber rawNum mExp of
Left errMsg -> fail errMsg
Right ( q , p , d , g ) -> pure ( sign q , p , d , g )
2017-10-28 21:07:24 +03:00
2018-05-25 02:46:17 +03:00
exponentp :: TextParser m Int
2018-06-21 05:05:25 +03:00
exponentp = char' 'e' *> signp <*> decimal <?> " exponent "
2018-02-10 01:18:16 +03:00
2018-05-25 00:52:09 +03:00
-- | Interpret a raw number as a decimal number.
2018-05-23 22:45:57 +03:00
--
2018-04-21 07:39:06 +03:00
-- Returns:
-- - the decimal number
-- - the precision (number of digits after the decimal point)
-- - the decimal point character, if any
-- - the digit group style, if any (digit group character and sizes of digit groups)
2018-05-25 02:46:17 +03:00
fromRawNumber
:: RawNumber
-> Maybe Int
-> Either String
( Quantity , Int , Maybe Char , Maybe DigitGroupStyle )
fromRawNumber raw mExp = case raw of
2018-05-25 00:52:09 +03:00
NoSeparators digitGrp mDecimals ->
2018-05-27 07:54:31 +03:00
let mDecPt = fmap fst mDecimals
decimalGrp = maybe mempty snd mDecimals
2018-05-25 02:46:17 +03:00
( quantity , precision ) =
maybe id applyExp mExp $ toQuantity digitGrp decimalGrp
2018-05-27 07:54:31 +03:00
in Right ( quantity , precision , mDecPt , Nothing )
2018-05-25 00:52:09 +03:00
2018-05-27 07:54:31 +03:00
WithSeparators digitSep digitGrps mDecimals -> case mExp of
Nothing ->
let mDecPt = fmap fst mDecimals
decimalGrp = maybe mempty snd mDecimals
digitGroupStyle = DigitGroups digitSep ( groupSizes digitGrps )
2018-05-25 02:46:17 +03:00
2018-05-27 07:54:31 +03:00
( quantity , precision ) = toQuantity ( mconcat digitGrps ) decimalGrp
2018-05-25 02:46:17 +03:00
2018-05-27 07:54:31 +03:00
in Right ( quantity , precision , mDecPt , Just digitGroupStyle )
2018-06-21 05:05:25 +03:00
Just _ -> Left
" invalid number: mixing digit separators with exponents is not allowed "
2018-05-23 22:45:57 +03:00
where
-- Outputs digit group sizes from least significant to most significant
groupSizes :: [ DigitGrp ] -> [ Int ]
groupSizes digitGrps = reverse $ case map digitGroupLength digitGrps of
( a : b : cs ) | a < b -> b : cs
gs -> gs
2018-05-25 02:46:17 +03:00
toQuantity :: DigitGrp -> DigitGrp -> ( Quantity , Int )
toQuantity preDecimalGrp postDecimalGrp = ( quantity , precision )
2018-05-25 00:52:09 +03:00
where
quantity = Decimal ( fromIntegral precision )
( digitGroupNumber $ preDecimalGrp <> postDecimalGrp )
precision = digitGroupLength postDecimalGrp
2018-05-25 02:46:17 +03:00
applyExp :: Int -> ( Decimal , Int ) -> ( Decimal , Int )
applyExp exponent ( quantity , precision ) =
( quantity * 10 ^^ exponent , max 0 ( precision - exponent ) )
2018-05-25 00:52:09 +03:00
disambiguateNumber :: Maybe AmountStyle -> AmbiguousNumber -> RawNumber
disambiguateNumber suggestedStyle ( AmbiguousNumber grp1 sep grp2 ) =
-- If present, use the suggested style to disambiguate;
-- otherwise, assume that the separator is a decimal point where possible.
if isDecimalPointChar sep &&
maybe True ( sep ` isValidDecimalBy ` ) suggestedStyle
then NoSeparators grp1 ( Just ( sep , grp2 ) )
else WithSeparators sep [ grp1 , grp2 ] Nothing
where
2018-05-23 22:45:57 +03:00
isValidDecimalBy :: Char -> AmountStyle -> Bool
isValidDecimalBy c = \ case
AmountStyle { asdecimalpoint = Just d } -> d == c
AmountStyle { asdigitgroups = Just ( DigitGroups g _ ) } -> g /= c
AmountStyle { asprecision = 0 } -> False
_ -> True
2018-05-25 00:52:09 +03:00
-- | Parse and interpret the structure of a number without external hints.
-- Numbers are digit strings, possibly separated into digit groups by one
-- of two types of separators. (1) Numbers may optionally have a decimal
-- point, which may be either a period or comma. (2) Numbers may
-- optionally contain digit group separators, which must all be either a
-- period, a comma, or a space.
2018-05-11 10:10:04 +03:00
--
2018-05-23 22:45:57 +03:00
-- It is our task to deduce the identities of the decimal point and digit
-- separator characters, based on the allowed syntax. For instance, we
-- make use of the fact that a decimal point can occur at most once and
-- must succeed all digit group separators.
2018-05-11 10:10:04 +03:00
--
-- >>> parseTest rawnumberp "1,234,567.89"
2018-05-25 00:52:09 +03:00
-- Right (WithSeparators ',' ["1","234","567"] (Just ('.',"89")))
-- >>> parseTest rawnumberp "1,000"
-- Left (AmbiguousNumber "1" ',' "000")
2018-05-11 10:10:04 +03:00
-- >>> parseTest rawnumberp "1 000"
2018-05-25 00:52:09 +03:00
-- Right (WithSeparators ' ' ["1","000"] Nothing)
2018-05-23 22:45:57 +03:00
--
2018-05-25 00:52:09 +03:00
rawnumberp :: TextParser m ( Either AmbiguousNumber RawNumber )
2018-06-21 05:05:25 +03:00
rawnumberp = label " number " $ do
2018-05-25 00:52:09 +03:00
rawNumber <- fmap Right leadingDecimalPt <|> leadingDigits
2018-06-21 05:05:25 +03:00
2018-05-23 22:45:57 +03:00
-- Guard against mistyped numbers
2018-06-21 05:05:25 +03:00
mExtraDecimalSep <- optional $ lookAhead $ satisfy isDecimalPointChar
when ( isJust mExtraDecimalSep ) $
fail " invalid number (invalid use of separator) "
mExtraFragment <- optional $ lookAhead $ try $
char ' ' *> getPosition <* digitChar
case mExtraFragment of
Just pos -> parseErrorAt pos " invalid number (excessive trailing digits) "
Nothing -> pure ()
2018-05-23 22:45:57 +03:00
return $ dbg8 " rawnumberp " rawNumber
where
leadingDecimalPt :: TextParser m RawNumber
2018-05-25 00:52:09 +03:00
leadingDecimalPt = do
decPt <- satisfy isDecimalPointChar
decGrp <- digitgroupp
pure $ NoSeparators mempty ( Just ( decPt , decGrp ) )
2018-05-23 22:45:57 +03:00
2018-05-25 00:52:09 +03:00
leadingDigits :: TextParser m ( Either AmbiguousNumber RawNumber )
2018-05-23 22:45:57 +03:00
leadingDigits = do
2018-05-25 00:29:20 +03:00
grp1 <- digitgroupp
2018-05-25 00:52:09 +03:00
withSeparators grp1 <|> fmap Right ( trailingDecimalPt grp1 )
<|> pure ( Right $ NoSeparators grp1 Nothing )
2018-05-23 22:45:57 +03:00
2018-05-25 00:52:09 +03:00
withSeparators :: DigitGrp -> TextParser m ( Either AmbiguousNumber RawNumber )
2018-05-23 22:45:57 +03:00
withSeparators grp1 = do
2018-05-25 00:29:20 +03:00
( sep , grp2 ) <- try $ ( , ) <$> satisfy isDigitSeparatorChar <*> digitgroupp
grps <- many $ try $ char sep *> digitgroupp
2018-05-23 22:45:57 +03:00
let digitGroups = grp1 : grp2 : grps
2018-05-25 00:52:09 +03:00
fmap Right ( withDecimalPt sep digitGroups )
<|> pure ( withoutDecimalPt grp1 sep grp2 grps )
2018-05-23 22:45:57 +03:00
withDecimalPt :: Char -> [ DigitGrp ] -> TextParser m RawNumber
withDecimalPt digitSep digitGroups = do
2018-05-25 00:52:09 +03:00
decPt <- satisfy $ \ c -> isDecimalPointChar c && c /= digitSep
decDigitGrp <- option mempty digitgroupp
2018-05-23 22:45:57 +03:00
2018-05-25 00:52:09 +03:00
pure $ WithSeparators digitSep digitGroups ( Just ( decPt , decDigitGrp ) )
2018-05-23 22:45:57 +03:00
2018-05-25 00:52:09 +03:00
withoutDecimalPt
:: DigitGrp
-> Char
-> DigitGrp
-> [ DigitGrp ]
-> Either AmbiguousNumber RawNumber
2018-05-23 22:45:57 +03:00
withoutDecimalPt grp1 sep grp2 grps
2018-05-25 00:52:09 +03:00
| null grps && isDecimalPointChar sep =
Left $ AmbiguousNumber grp1 sep grp2
| otherwise = Right $ WithSeparators sep ( grp1 : grp2 : grps ) Nothing
2018-05-23 22:45:57 +03:00
trailingDecimalPt :: DigitGrp -> TextParser m RawNumber
trailingDecimalPt grp1 = do
2018-05-25 00:52:09 +03:00
decPt <- satisfy isDecimalPointChar
pure $ NoSeparators grp1 ( Just ( decPt , mempty ) )
2018-05-23 22:45:57 +03:00
isDecimalPointChar :: Char -> Bool
isDecimalPointChar c = c == '.' || c == ','
isDigitSeparatorChar :: Char -> Bool
isDigitSeparatorChar c = isDecimalPointChar c || c == ' '
data DigitGrp = DigitGrp {
2018-05-27 07:42:02 +03:00
digitGroupLength :: ! Int ,
digitGroupNumber :: ! Integer
2018-05-23 22:45:57 +03:00
} deriving ( Eq )
instance Show DigitGrp where
show ( DigitGrp len num )
| len > 0 = " \ " " ++ padding ++ numStr ++ " \ " "
| otherwise = " \ " \ " "
where numStr = show num
padding = replicate ( len - length numStr ) '0'
instance Sem . Semigroup DigitGrp where
DigitGrp l1 n1 <> DigitGrp l2 n2 = DigitGrp ( l1 + l2 ) ( n1 * 10 ^ l2 + n2 )
instance Monoid DigitGrp where
mempty = DigitGrp 0 0
mappend = ( Sem .<> )
2018-05-25 00:29:20 +03:00
digitgroupp :: TextParser m DigitGrp
2018-06-21 05:05:25 +03:00
digitgroupp = label " digits "
2018-05-23 22:45:57 +03:00
$ makeGroup <$> takeWhile1P ( Just " digit " ) isDigit
where
makeGroup = uncurry DigitGrp . foldl' step ( 0 , 0 ) . T . unpack
step ( ! l , ! a ) c = ( l + 1 , a * 10 + fromIntegral ( digitToInt c ) )
data RawNumber
2018-05-25 00:52:09 +03:00
= NoSeparators DigitGrp ( Maybe ( Char , DigitGrp ) ) -- 100 or 100. or .100 or 100.50
| WithSeparators Char [ DigitGrp ] ( Maybe ( Char , DigitGrp ) ) -- 1,000,000 or 1,000.50
deriving ( Show , Eq )
data AmbiguousNumber = AmbiguousNumber DigitGrp Char DigitGrp -- 1,000
2018-05-23 22:45:57 +03:00
deriving ( Show , Eq )
2016-05-18 05:46:54 +03:00
--- ** comments
2018-05-16 03:59:49 +03:00
multilinecommentp :: TextParser m ()
2018-05-11 00:23:19 +03:00
multilinecommentp = startComment *> anyLine ` skipManyTill ` endComment
2016-05-18 05:46:54 +03:00
where
2018-06-06 08:44:02 +03:00
startComment = string " comment " *> trailingSpaces
endComment = eof <|> string " end comment " *> trailingSpaces
trailingSpaces = skipMany spacenonewline <* newline
anyLine = void $ takeWhileP Nothing ( \ c -> c /= '\ n' ) *> newline
2018-05-17 06:31:56 +03:00
2018-06-06 08:44:02 +03:00
{- # INLINABLE multilinecommentp # -}
2016-05-18 05:46:54 +03:00
2018-05-16 03:59:49 +03:00
emptyorcommentlinep :: TextParser m ()
2016-05-18 05:46:54 +03:00
emptyorcommentlinep = do
2018-05-16 03:59:49 +03:00
skipMany spacenonewline
2018-06-06 08:44:02 +03:00
skiplinecommentp <|> void newline
where
-- A line (file-level) comment can start with a semicolon, hash, or star
-- (allowing org nodes).
skiplinecommentp :: TextParser m ()
skiplinecommentp = do
satisfy $ \ c -> c == ';' || c == '#' || c == '*'
void $ takeWhileP Nothing ( \ c -> c /= '\ n' )
optional newline
pure ()
{- # INLINABLE emptyorcommentlinep # -}
-- A parser combinator for parsing (possibly multiline) comments
-- following journal items.
2016-05-18 05:46:54 +03:00
--
2018-06-06 08:44:02 +03:00
-- Several journal items may be followed by comments, which begin with
-- semicolons and extend to the end of the line. Such comments may span
-- multiple lines, but comment lines below the journal item must be
-- preceeded by leading whitespace.
2016-05-18 05:46:54 +03:00
--
2018-06-06 08:44:02 +03:00
-- This parser combinator accepts a parser that consumes all input up
-- until the next newline. This parser should extract the "content" from
-- comments. The resulting parser returns this content plus the raw text
-- of the comment itself.
2018-08-03 21:38:55 +03:00
--
-- See followingcommentp for tests.
--
followingcommentp' :: ( Monoid a , Show a ) => TextParser m a -> TextParser m ( Text , a )
2018-06-06 08:44:02 +03:00
followingcommentp' contentp = do
skipMany spacenonewline
2018-08-03 21:38:55 +03:00
-- there can be 0 or 1 sameLine
sameLine <- try headerp *> ( ( : [] ) <$> match' contentp ) <|> pure []
2018-06-06 08:44:02 +03:00
_ <- eolof
2018-08-03 21:38:55 +03:00
-- there can be 0 or more nextLines
nextLines <- many $
2018-06-06 08:44:02 +03:00
try ( skipSome spacenonewline *> headerp ) *> match' contentp <* eolof
2018-08-03 21:38:55 +03:00
let
-- if there's just a next-line comment, insert an empty same-line comment
-- so the next-line comment doesn't get rendered as a same-line comment.
sameLine' | null sameLine && not ( null nextLines ) = [ ( " " , mempty ) ]
| otherwise = sameLine
( texts , contents ) = unzip $ sameLine' ++ nextLines
strippedCommentText = T . unlines $ map T . strip texts
commentContent = mconcat contents
pure ( strippedCommentText , commentContent )
2018-05-11 04:02:28 +03:00
2018-05-11 02:30:00 +03:00
where
2018-06-06 08:44:02 +03:00
headerp = char ';' *> skipMany spacenonewline
{- # INLINABLE followingcommentp' # -}
2018-08-03 21:38:55 +03:00
-- | Parse the text of a (possibly multiline) comment following a journal item.
--
-- >>> rtp followingcommentp "" -- no comment
-- Right ""
-- >>> rtp followingcommentp ";" -- just a (empty) same-line comment. newline is added
-- Right "\n"
-- >>> rtp followingcommentp "; \n"
-- Right "\n"
-- >>> rtp followingcommentp ";\n ;\n" -- a same-line and a next-line comment
-- Right "\n\n"
-- >>> rtp followingcommentp "\n ;\n" -- just a next-line comment. Insert an empty same-line comment so the next-line comment doesn't become a same-line comment.
-- Right "\n\n"
--
2018-06-06 08:44:02 +03:00
followingcommentp :: TextParser m Text
followingcommentp =
fst <$> followingcommentp' ( void $ takeWhileP Nothing ( /= '\ n' ) )
{- # INLINABLE followingcommentp # -}
2016-05-18 05:46:54 +03:00
2018-06-06 08:44:02 +03:00
-- | Parse a transaction comment and extract its tags.
2016-05-18 05:46:54 +03:00
--
2018-06-06 08:44:02 +03:00
-- The first line of a transaction may be followed by comments, which
-- begin with semicolons and extend to the end of the line. Transaction
-- comments may span multiple lines, but comment lines below the
-- transaction must be preceeded by leading whitespace.
2016-05-18 05:46:54 +03:00
--
2018-06-06 08:44:02 +03:00
-- 2000/1/1 ; a transaction comment starting on the same line ...
-- ; extending to the next line
-- account1 $1
-- account2
2016-05-18 05:46:54 +03:00
--
2018-06-06 08:44:02 +03:00
-- Tags are name-value pairs.
2016-05-18 05:46:54 +03:00
--
2018-06-06 08:44:02 +03:00
-- >>> let getTags (_,tags) = tags
-- >>> let parseTags = fmap getTags . rtp transactioncommentp
2016-05-18 05:46:54 +03:00
--
2018-06-06 08:44:02 +03:00
-- >>> parseTags "; name1: val1, name2:all this is value2"
-- Right [("name1","val1"),("name2","all this is value2")]
2018-05-12 21:25:02 +03:00
--
2018-06-06 08:44:02 +03:00
-- A tag's name must be immediately followed by a colon, without
-- separating whitespace. The corresponding value consists of all the text
-- following the colon up until the next colon or newline, stripped of
-- leading and trailing whitespace.
--
transactioncommentp :: TextParser m ( Text , [ Tag ] )
transactioncommentp = followingcommentp' commenttagsp
{- # INLINABLE transactioncommentp # -}
2016-05-18 05:46:54 +03:00
2018-06-06 08:44:02 +03:00
commenttagsp :: TextParser m [ Tag ]
commenttagsp = do
tagName <- fmap ( last . T . split isSpace )
$ takeWhileP Nothing ( \ c -> c /= ':' && c /= '\ n' )
atColon tagName <|> pure [] -- if not ':', then either '\n' or EOF
2018-05-12 21:25:02 +03:00
where
2018-06-06 08:44:02 +03:00
atColon :: Text -> TextParser m [ Tag ]
atColon name = char ':' *> do
if T . null name
then commenttagsp
else do
skipMany spacenonewline
val <- tagValue
let tag = ( name , val )
( tag : ) <$> commenttagsp
2018-05-12 21:25:02 +03:00
2018-06-06 08:44:02 +03:00
tagValue :: TextParser m Text
tagValue = do
val <- T . strip <$> takeWhileP Nothing ( \ c -> c /= ',' && c /= '\ n' )
_ <- optional $ char ','
pure val
2018-05-12 21:25:02 +03:00
2018-06-06 08:44:02 +03:00
{- # INLINABLE commenttagsp # -}
2018-05-12 21:25:02 +03:00
2018-06-06 08:44:02 +03:00
-- | Parse a posting comment and extract its tags and dates.
--
-- Postings may be followed by comments, which begin with semicolons and
-- extend to the end of the line. Posting comments may span multiple
-- lines, but comment lines below the posting must be preceeded by
-- leading whitespace.
--
-- 2000/1/1
-- account1 $1 ; a posting comment starting on the same line ...
-- ; extending to the next line
--
-- account2
-- ; a posting comment beginning on the next line
--
-- Tags are name-value pairs.
--
-- >>> let getTags (_,tags,_,_) = tags
-- >>> let parseTags = fmap getTags . rtp (postingcommentp Nothing)
--
-- >>> parseTags "; name1: val1, name2:all this is value2"
-- Right [("name1","val1"),("name2","all this is value2")]
--
-- A tag's name must be immediately followed by a colon, without
-- separating whitespace. The corresponding value consists of all the text
-- following the colon up until the next colon or newline, stripped of
-- leading and trailing whitespace.
--
-- Posting dates may be expressed with "date"/"date2" tags or with
-- bracketed date syntax. Posting dates will inherit their year from the
-- transaction date if the year is not specified. We throw parse errors on
-- invalid dates.
--
-- >>> let getDates (_,_,d1,d2) = (d1, d2)
-- >>> let parseDates = fmap getDates . rtp (postingcommentp (Just 2000))
--
-- >>> parseDates "; date: 1/2, date2: 1999/12/31"
-- Right (Just 2000-01-02,Just 1999-12-31)
-- >>> parseDates "; [1/2=1999/12/31]"
-- Right (Just 2000-01-02,Just 1999-12-31)
--
-- Example: tags, date tags, and bracketed dates
-- >>> rtp (postingcommentp (Just 2000)) "; a:b, date:3/4, [=5/6]"
-- Right ("a:b, date:3/4, [=5/6]\n",[("a","b"),("date","3/4")],Just 2000-03-04,Just 2000-05-06)
--
-- Example: extraction of dates from date tags ignores trailing text
-- >>> rtp (postingcommentp (Just 2000)) "; date:3/4=5/6"
-- Right ("date:3/4=5/6\n",[("date","3/4=5/6")],Just 2000-03-04,Nothing)
--
postingcommentp
:: Maybe Year -> TextParser m ( Text , [ Tag ] , Maybe Day , Maybe Day )
postingcommentp mYear = do
( commentText , ( tags , dateTags ) ) <-
followingcommentp' ( commenttagsanddatesp mYear )
let mdate = fmap snd $ find ( ( == " date " ) . fst ) dateTags
mdate2 = fmap snd $ find ( ( == " date2 " ) . fst ) dateTags
pure ( commentText , tags , mdate , mdate2 )
{- # INLINABLE postingcommentp # -}
commenttagsanddatesp
:: Maybe Year -> TextParser m ( [ Tag ] , [ DateTag ] )
commenttagsanddatesp mYear = do
( txt , dateTags ) <- match $ readUpTo ':'
-- next char is either ':' or '\n' (or EOF)
let tagName = last ( T . split isSpace txt )
( fmap . second ) ( dateTags ++ ) ( atColon tagName ) <|> pure ( [] , dateTags ) -- if not ':', then either '\n' or EOF
2016-05-18 05:46:54 +03:00
2018-06-06 08:44:02 +03:00
where
readUpTo :: Char -> TextParser m [ DateTag ]
readUpTo end = do
void $ takeWhileP Nothing ( \ c -> c /= end && c /= '\ n' && c /= '[' )
-- if not '[' then ':' or '\n' or EOF
atBracket ( readUpTo end ) <|> pure []
atBracket :: TextParser m [ DateTag ] -> TextParser m [ DateTag ]
atBracket cont = do
-- Uses the fact that bracketed date-tags cannot contain newlines
dateTags <- option [] $ lookAhead ( bracketeddatetagsp mYear )
_ <- char '['
dateTags' <- cont
pure $ dateTags ++ dateTags'
atColon :: Text -> TextParser m ( [ Tag ] , [ DateTag ] )
atColon name = char ':' *> do
skipMany spacenonewline
( tags , dateTags ) <- case name of
" " -> pure ( [] , [] )
" date " -> dateValue name
" date2 " -> dateValue name
_ -> tagValue name
_ <- optional $ char ','
bimap ( tags ++ ) ( dateTags ++ ) <$> commenttagsanddatesp mYear
dateValue :: Text -> TextParser m ( [ Tag ] , [ DateTag ] )
dateValue name = do
( txt , ( date , dateTags ) ) <- match' $ do
date <- datep' mYear
dateTags <- readUpTo ','
pure ( date , dateTags )
let val = T . strip txt
pure $ ( [ ( name , val ) ]
, ( name , date ) : dateTags )
tagValue :: Text -> TextParser m ( [ Tag ] , [ DateTag ] )
tagValue name = do
( txt , dateTags ) <- match' $ readUpTo ','
let val = T . strip txt
pure $ ( [ ( name , val ) ]
, dateTags )
{- # INLINABLE commenttagsanddatesp # -}
2016-05-18 05:46:54 +03:00
--- ** bracketed dates
-- | Parse Ledger-style bracketed posting dates ([DATE=DATE2]), as
-- "date" and/or "date2" tags. Anything that looks like an attempt at
-- this (a square-bracketed sequence of 0123456789/-.= containing at
-- least one digit and one date separator) is also parsed, and will
-- throw an appropriate error.
--
-- The dates are parsed in full here so that errors are reported in
-- the right position. A missing year in DATE can be inferred if a
-- default date is provided. A missing year in DATE2 will be inferred
-- from DATE.
--
2018-05-25 02:46:17 +03:00
-- >>> either (Left . parseErrorPretty) Right $ rtp (bracketeddatetagsp Nothing) "[2016/1/2=3/4]"
2016-05-18 05:46:54 +03:00
-- Right [("date",2016-01-02),("date2",2016-03-04)]
--
2018-05-25 02:46:17 +03:00
-- >>> either (Left . parseErrorPretty) Right $ rtp (bracketeddatetagsp Nothing) "[1]"
2016-05-18 05:46:54 +03:00
-- Left ...not a bracketed date...
--
2018-05-25 02:46:17 +03:00
-- >>> either (Left . parseErrorPretty) Right $ rtp (bracketeddatetagsp Nothing) "[2016/1/32]"
2018-05-23 05:17:51 +03:00
-- Left ...1:11:...well-formed but invalid date: 2016/1/32...
2016-05-18 05:46:54 +03:00
--
2018-05-25 02:46:17 +03:00
-- >>> either (Left . parseErrorPretty) Right $ rtp (bracketeddatetagsp Nothing) "[1/31]"
2016-07-29 18:57:10 +03:00
-- Left ...1:6:...partial date 1/31 found, but the current year is unknown...
2016-05-18 05:46:54 +03:00
--
2018-05-25 02:46:17 +03:00
-- >>> either (Left . parseErrorPretty) Right $ rtp (bracketeddatetagsp Nothing) "[0123456789/-.=/-.=]"
2018-05-23 05:17:51 +03:00
-- Left ...1:13:...expecting month or day...
2016-05-18 05:46:54 +03:00
--
2018-06-06 08:44:02 +03:00
bracketeddatetagsp
:: Maybe Year -> TextParser m [ ( TagName , Day ) ]
bracketeddatetagsp mYear1 = do
2018-07-16 17:28:58 +03:00
-- dbgparse 0 "bracketeddatetagsp"
2018-05-16 18:28:06 +03:00
try $ do
2018-05-17 06:31:56 +03:00
s <- lookAhead
$ between ( char '[' ) ( char ']' )
2018-05-22 04:52:34 +03:00
$ takeWhile1P Nothing isBracketedDateChar
unless ( T . any isDigit s && T . any isDateSepChar s ) $
2018-05-16 18:28:06 +03:00
fail " not a bracketed date "
-- Looks sufficiently like a bracketed date to commit to parsing a date
2018-05-17 06:31:56 +03:00
2018-05-16 18:28:06 +03:00
between ( char '[' ) ( char ']' ) $ do
2018-06-06 08:44:02 +03:00
md1 <- optional $ datep' mYear1
2018-05-17 06:31:56 +03:00
2018-06-06 08:44:02 +03:00
let mYear2 = fmap readYear md1 <|> mYear1
md2 <- optional $ char '=' *> datep' mYear2
2018-05-17 06:31:56 +03:00
2018-05-16 18:28:06 +03:00
pure $ catMaybes [ ( " date " , ) <$> md1 , ( " date2 " , ) <$> md2 ]
2018-05-17 06:31:56 +03:00
2018-05-22 04:52:34 +03:00
where
readYear = first3 . toGregorian
isBracketedDateChar c = isDigit c || isDateSepChar c || c == '='
2018-06-06 08:44:02 +03:00
{- # INLINABLE bracketeddatetagsp # -}
--- ** helper parsers
-- A version of `match` that is strict in the returned text
match' :: TextParser m a -> TextParser m ( Text , a )
match' p = do
( ! txt , p ) <- match p
pure ( txt , p )
2018-06-18 01:23:41 +03:00
2018-08-20 17:07:22 +03:00
--- * tests
2018-08-15 21:43:29 +03:00
2018-09-06 23:08:26 +03:00
tests_Common = tests " Common " [
2018-08-20 17:07:22 +03:00
2018-08-18 09:33:41 +03:00
tests " amountp " [
2018-08-17 15:38:58 +03:00
test " basic " $ expectParseEq amountp " $47.18 " ( usd 47.18 )
2018-08-20 16:43:35 +03:00
, test " ends with decimal mark " $ expectParseEq amountp " $1. " ( usd 1 ` withPrecision ` 0 )
, test " unit price " $ expectParseEq amountp " $10 @ €0.5 "
2018-08-16 08:34:56 +03:00
-- not precise enough:
-- (usd 10 `withPrecision` 0 `at` (eur 0.5 `withPrecision` 1)) -- `withStyle` asdecimalpoint=Just '.'
amount {
acommodity = " $ "
, aquantity = 10 -- need to test internal precision with roundTo ? I think not
, astyle = amountstyle { asprecision = 0 , asdecimalpoint = Nothing }
, aprice = UnitPrice $
amount {
acommodity = " € "
, aquantity = 0.5
, astyle = amountstyle { asprecision = 1 , asdecimalpoint = Just '.' }
}
}
2018-08-20 16:43:35 +03:00
, test " total price " $ expectParseEq amountp " $10 @@ €5 "
2018-08-16 08:34:56 +03:00
amount {
acommodity = " $ "
, aquantity = 10
, astyle = amountstyle { asprecision = 0 , asdecimalpoint = Nothing }
, aprice = TotalPrice $
amount {
acommodity = " € "
, aquantity = 5
, astyle = amountstyle { asprecision = 0 , asdecimalpoint = Nothing }
}
}
2018-08-15 21:43:29 +03:00
]
2018-08-20 17:07:22 +03:00
, let p = lift ( numberp Nothing ) :: JournalParser IO ( Quantity , Int , Maybe Char , Maybe DigitGroupStyle ) in
tests " numberp " [
test " . " $ expectParseEq p " 0 " ( 0 , 0 , Nothing , Nothing )
, test " . " $ expectParseEq p " 1 " ( 1 , 0 , Nothing , Nothing )
, test " . " $ expectParseEq p " 1.1 " ( 1.1 , 1 , Just '.' , Nothing )
, test " . " $ expectParseEq p " 1,000.1 " ( 1000.1 , 1 , Just '.' , Just $ DigitGroups ',' [ 3 ] )
, test " . " $ expectParseEq p " 1.00.000,1 " ( 100000.1 , 1 , Just ',' , Just $ DigitGroups '.' [ 3 , 2 ] )
, test " . " $ expectParseEq p " 1,000,000 " ( 1000000 , 0 , Nothing , Just $ DigitGroups ',' [ 3 , 3 ] ) -- could be simplified to [3]
, test " . " $ expectParseEq p " 1. " ( 1 , 0 , Just '.' , Nothing )
, test " . " $ expectParseEq p " 1, " ( 1 , 0 , Just ',' , Nothing )
, test " . " $ expectParseEq p " .1 " ( 0.1 , 1 , Just '.' , Nothing )
, test " . " $ expectParseEq p " ,1 " ( 0.1 , 1 , Just ',' , Nothing )
, test " . " $ expectParseError p " " " "
, test " . " $ expectParseError p " 1,000.000,1 " " "
, test " . " $ expectParseError p " 1.000,000.1 " " "
, test " . " $ expectParseError p " 1,000.000.1 " " "
, test " . " $ expectParseError p " 1,,1 " " "
, test " . " $ expectParseError p " 1..1 " " "
, test " . " $ expectParseError p " .1, " " "
, test " . " $ expectParseError p " ,1. " " "
]
, tests " spaceandamountormissingp " [
test " space and amount " $ expectParseEq spaceandamountormissingp " $47.18 " ( Mixed [ usd 47.18 ] )
, test " empty string " $ expectParseEq spaceandamountormissingp " " missingmixedamt
, _test " just space " $ expectParseEq spaceandamountormissingp " " missingmixedamt -- XXX should it ?
-- ,test "just amount" $ expectParseError spaceandamountormissingp "$47.18" "" -- succeeds, consuming nothing
]
2018-08-15 21:43:29 +03:00
]