2016-05-18 05:46:54 +03:00
|
|
|
|
--- * doc
|
|
|
|
|
-- Lines beginning "--- *" are collapsible orgstruct nodes. Emacs users,
|
|
|
|
|
-- (add-hook 'haskell-mode-hook
|
|
|
|
|
-- (lambda () (set-variable 'orgstruct-heading-prefix-regexp "--- " t))
|
|
|
|
|
-- 'orgstruct-mode)
|
|
|
|
|
-- and press TAB on nodes to expand/collapse.
|
|
|
|
|
|
|
|
|
|
{-|
|
|
|
|
|
|
2016-05-19 02:37:31 +03:00
|
|
|
|
Some common parsers and helpers used by several readers.
|
|
|
|
|
Some of these might belong in Hledger.Read.JournalReader or Hledger.Read.
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
|
|
|
|
-}
|
|
|
|
|
|
|
|
|
|
--- * module
|
2018-03-25 16:53:44 +03:00
|
|
|
|
{-# LANGUAGE CPP, BangPatterns, DeriveDataTypeable, RecordWildCards, NamedFieldPuns, NoMonoLocalBinds, ScopedTypeVariables, FlexibleContexts, TupleSections, OverloadedStrings #-}
|
2017-11-05 02:40:54 +03:00
|
|
|
|
{-# LANGUAGE LambdaCase #-}
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2018-05-10 22:58:55 +03:00
|
|
|
|
module Hledger.Read.Common (
|
|
|
|
|
Reader (..),
|
|
|
|
|
InputOpts (..),
|
2018-05-14 06:37:00 +03:00
|
|
|
|
definputopts,
|
2018-05-10 22:58:55 +03:00
|
|
|
|
rawOptsToInputOpts,
|
|
|
|
|
|
|
|
|
|
-- * parsing utilities
|
|
|
|
|
runTextParser,
|
2018-05-11 17:17:38 +03:00
|
|
|
|
rtp,
|
2018-05-10 22:58:55 +03:00
|
|
|
|
runJournalParser,
|
|
|
|
|
rjp,
|
|
|
|
|
runErroringJournalParser,
|
|
|
|
|
rejp,
|
|
|
|
|
genericSourcePos,
|
|
|
|
|
journalSourcePos,
|
|
|
|
|
generateAutomaticPostings,
|
|
|
|
|
parseAndFinaliseJournal,
|
2018-05-11 17:17:38 +03:00
|
|
|
|
parseAndFinaliseJournal', -- TODO unused ? check addons
|
2018-05-10 22:58:55 +03:00
|
|
|
|
setYear,
|
2018-05-14 06:37:00 +03:00
|
|
|
|
getYear,
|
2018-05-10 22:58:55 +03:00
|
|
|
|
setDefaultCommodityAndStyle,
|
|
|
|
|
getDefaultCommodityAndStyle,
|
2018-05-14 06:37:00 +03:00
|
|
|
|
getDefaultAmountStyle,
|
|
|
|
|
getAmountStyle,
|
2018-05-11 17:17:38 +03:00
|
|
|
|
pushAccount,
|
2018-05-10 22:58:55 +03:00
|
|
|
|
pushParentAccount,
|
|
|
|
|
popParentAccount,
|
|
|
|
|
getParentAccount,
|
|
|
|
|
addAccountAlias,
|
2018-05-14 06:37:00 +03:00
|
|
|
|
getAccountAliases,
|
2018-05-10 22:58:55 +03:00
|
|
|
|
clearAccountAliases,
|
|
|
|
|
journalAddFile,
|
|
|
|
|
parserErrorAt,
|
|
|
|
|
|
|
|
|
|
-- * parsers
|
|
|
|
|
-- ** transaction bits
|
|
|
|
|
statusp,
|
|
|
|
|
codep,
|
|
|
|
|
descriptionp,
|
|
|
|
|
|
|
|
|
|
-- ** dates
|
|
|
|
|
datep,
|
|
|
|
|
datetimep,
|
|
|
|
|
secondarydatep,
|
|
|
|
|
|
|
|
|
|
-- ** account names
|
|
|
|
|
modifiedaccountnamep,
|
|
|
|
|
accountnamep,
|
|
|
|
|
|
|
|
|
|
-- ** amounts
|
|
|
|
|
spaceandamountormissingp,
|
|
|
|
|
amountp,
|
2018-05-14 06:37:00 +03:00
|
|
|
|
amountp',
|
2018-05-10 22:58:55 +03:00
|
|
|
|
mamountp',
|
|
|
|
|
commoditysymbolp,
|
2018-05-14 06:37:00 +03:00
|
|
|
|
priceamountp,
|
2018-05-10 22:58:55 +03:00
|
|
|
|
partialbalanceassertionp,
|
|
|
|
|
fixedlotpricep,
|
|
|
|
|
numberp,
|
2018-05-14 06:37:00 +03:00
|
|
|
|
fromRawNumber,
|
|
|
|
|
rawnumberp,
|
2018-05-10 22:58:55 +03:00
|
|
|
|
|
|
|
|
|
-- ** comments
|
|
|
|
|
multilinecommentp,
|
|
|
|
|
emptyorcommentlinep,
|
|
|
|
|
followingcommentp,
|
|
|
|
|
followingcommentandtagsp,
|
|
|
|
|
|
|
|
|
|
-- ** tags
|
|
|
|
|
commentTags,
|
2018-05-14 06:37:00 +03:00
|
|
|
|
tagsp,
|
|
|
|
|
|
|
|
|
|
-- ** bracketed dates
|
|
|
|
|
bracketeddatetagsp
|
2018-05-10 22:58:55 +03:00
|
|
|
|
)
|
2016-05-18 05:46:54 +03:00
|
|
|
|
where
|
|
|
|
|
--- * imports
|
|
|
|
|
import Prelude ()
|
|
|
|
|
import Prelude.Compat hiding (readFile)
|
|
|
|
|
import Control.Monad.Compat
|
2016-07-29 18:57:10 +03:00
|
|
|
|
import Control.Monad.Except (ExceptT(..), runExceptT, throwError) --, catchError)
|
|
|
|
|
import Control.Monad.State.Strict
|
2017-11-02 10:36:49 +03:00
|
|
|
|
import Data.Char
|
2017-09-15 03:41:42 +03:00
|
|
|
|
import Data.Data
|
2018-05-25 00:52:09 +03:00
|
|
|
|
import Data.Decimal (DecimalRaw (Decimal), Decimal)
|
2017-09-15 03:41:42 +03:00
|
|
|
|
import Data.Default
|
2016-05-18 05:46:54 +03:00
|
|
|
|
import Data.Functor.Identity
|
|
|
|
|
import Data.List.Compat
|
2016-07-29 18:57:10 +03:00
|
|
|
|
import Data.List.NonEmpty (NonEmpty(..))
|
2016-05-18 05:46:54 +03:00
|
|
|
|
import Data.Maybe
|
2018-05-23 22:45:57 +03:00
|
|
|
|
#if !(MIN_VERSION_base(4,11,0))
|
|
|
|
|
import Data.Monoid
|
|
|
|
|
#endif
|
2017-10-28 18:14:54 +03:00
|
|
|
|
import qualified Data.Map as M
|
2018-05-23 22:45:57 +03:00
|
|
|
|
import qualified Data.Semigroup as Sem
|
lib: textification: saved journal source
Slightly worse on small files, better on large ones.
hledger -f data/100x100x10.journal stats
<<ghc: 39305392 bytes, 77 GCs, 196354/268584 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.007 elapsed), 0.014 MUT (0.027 elapsed), 0.011 GC (0.111 elapsed) :ghc>>
<<ghc: 39307728 bytes, 77 GCs, 196909/270248 avg/max bytes residency (3 samples), 2M in use, 0.001 INIT (0.010 elapsed), 0.015 MUT (0.028 elapsed), 0.012 GC (0.116 elapsed) :ghc>>
hledger -f data/1000x1000x10.journal stats
<<ghc: 314285912 bytes, 612 GCs, 2064811/6597608 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.002 elapsed), 0.126 MUT (0.134 elapsed), 0.059 GC (0.069 elapsed) :ghc>>
<<ghc: 314271368 bytes, 612 GCs, 2070227/6628024 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.000 elapsed), 0.137 MUT (0.143 elapsed), 0.059 GC (0.068 elapsed) :ghc>>
hledger -f data/10000x1000x10.journal stats
<<ghc: 3070033264 bytes, 5965 GCs, 12699294/62962464 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.000 elapsed), 1.245 MUT (1.300 elapsed), 0.498 GC (0.558 elapsed) :ghc>>
<<ghc: 3070006752 bytes, 5973 GCs, 12687314/62848920 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.003 elapsed), 1.257 MUT (1.281 elapsed), 0.496 GC (0.554 elapsed) :ghc>>
hledger -f data/100000x1000x10.journal stats
<<ghc: 30753465088 bytes, 59763 GCs, 117723618/666643528 avg/max bytes residency (14 samples), 1589M in use, 0.000 INIT (0.002 elapsed), 12.536 MUT (12.793 elapsed), 5.978 GC (7.155 elapsed) :ghc>>
<<ghc: 30753367256 bytes, 59811 GCs, 117723236/666627528 avg/max bytes residency (14 samples), 1590M in use, 0.001 INIT (0.012 elapsed), 12.923 MUT (13.169 elapsed), 5.981 GC (6.860 elapsed) :ghc>>
2016-05-24 05:24:39 +03:00
|
|
|
|
import Data.Text (Text)
|
lib: textification begins! account names
The first of several conversions from String to (strict) Text, hopefully
reducing space and time usage.
This one shows a small improvement, with GHC 7.10.3 and text-1.2.2.1:
hledger -f data/100x100x10.journal stats
string: <<ghc: 39471064 bytes, 77 GCs, 198421/275048 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.001 elapsed), 0.015 MUT (0.020 elapsed), 0.010 GC (0.014 elapsed) :ghc>>
text: <<ghc: 39268024 bytes, 77 GCs, 197018/270840 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.002 elapsed), 0.016 MUT (0.022 elapsed), 0.009 GC (0.011 elapsed) :ghc>>
hledger -f data/1000x100x10.journal stats
string: <<ghc: 318555920 bytes, 617 GCs, 2178997/7134472 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.001 elapsed), 0.129 MUT (0.136 elapsed), 0.067 GC (0.077 elapsed) :ghc>>
text: <<ghc: 314248496 bytes, 612 GCs, 2074045/6617960 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.003 elapsed), 0.137 MUT (0.145 elapsed), 0.067 GC (0.079 elapsed) :ghc>>
hledger -f data/10000x100x10.journal stats
string: <<ghc: 3114763608 bytes, 6026 GCs, 18858950/75552024 avg/max bytes residency (11 samples), 201M in use, 0.000 INIT (0.000 elapsed), 1.331 MUT (1.372 elapsed), 0.699 GC (0.812 elapsed) :ghc>>
text: <<ghc: 3071468920 bytes, 5968 GCs, 14120344/62951360 avg/max bytes residency (9 samples), 124M in use, 0.000 INIT (0.003 elapsed), 1.272 MUT (1.349 elapsed), 0.513 GC (0.578 elapsed) :ghc>>
hledger -f data/100000x100x10.journal stats
string: <<ghc: 31186579432 bytes, 60278 GCs, 135332581/740228992 avg/max bytes residency (13 samples), 1697M in use, 0.000 INIT (0.008 elapsed), 14.677 MUT (15.508 elapsed), 7.081 GC (8.074 elapsed) :ghc>>
text: <<ghc: 30753427672 bytes, 59763 GCs, 117595958/666457240 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.008 elapsed), 13.713 MUT (13.966 elapsed), 6.220 GC (7.108 elapsed) :ghc>>
2016-05-24 04:16:21 +03:00
|
|
|
|
import qualified Data.Text as T
|
2016-05-18 05:46:54 +03:00
|
|
|
|
import Data.Time.Calendar
|
|
|
|
|
import Data.Time.LocalTime
|
2018-05-22 01:47:56 +03:00
|
|
|
|
import Data.Void (Void)
|
2016-05-18 05:46:54 +03:00
|
|
|
|
import System.Time (getClockTime)
|
2018-05-22 01:47:56 +03:00
|
|
|
|
import Text.Megaparsec
|
|
|
|
|
import Text.Megaparsec.Char
|
2018-05-23 05:17:51 +03:00
|
|
|
|
import Text.Megaparsec.Char.Lexer (decimal)
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
|
|
|
|
import Hledger.Data
|
|
|
|
|
import Hledger.Utils
|
2018-04-17 00:47:04 +03:00
|
|
|
|
import qualified Hledger.Query as Q (Query(Any))
|
|
|
|
|
|
|
|
|
|
-- | A hledger journal reader is a triple of storage format name, a
|
|
|
|
|
-- detector of that format, and a parser from that format to Journal.
|
|
|
|
|
data Reader = Reader {
|
|
|
|
|
|
|
|
|
|
-- The canonical name of the format handled by this reader
|
|
|
|
|
rFormat :: StorageFormat
|
|
|
|
|
|
|
|
|
|
-- The file extensions recognised as containing this format
|
|
|
|
|
,rExtensions :: [String]
|
|
|
|
|
|
|
|
|
|
-- A text parser for this format, accepting input options, file
|
|
|
|
|
-- path for error messages and file contents, producing an exception-raising IO
|
|
|
|
|
-- action that returns a journal or error message.
|
|
|
|
|
,rParser :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal
|
|
|
|
|
|
|
|
|
|
-- Experimental readers are never tried automatically.
|
|
|
|
|
,rExperimental :: Bool
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
instance Show Reader where show r = rFormat r ++ " reader"
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2016-05-25 04:28:26 +03:00
|
|
|
|
-- $setup
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2017-09-15 03:41:42 +03:00
|
|
|
|
-- | Various options to use when reading journal files.
|
|
|
|
|
-- Similar to CliOptions.inputflags, simplifies the journal-reading functions.
|
|
|
|
|
data InputOpts = InputOpts {
|
|
|
|
|
-- files_ :: [FilePath]
|
|
|
|
|
mformat_ :: Maybe StorageFormat -- ^ a file/storage format to try, unless overridden
|
|
|
|
|
-- by a filename prefix. Nothing means try all.
|
|
|
|
|
,mrules_file_ :: Maybe FilePath -- ^ a conversion rules file to use (when reading CSV)
|
|
|
|
|
,aliases_ :: [String] -- ^ account name aliases to apply
|
|
|
|
|
,anon_ :: Bool -- ^ do light anonymisation/obfuscation of the data
|
|
|
|
|
,ignore_assertions_ :: Bool -- ^ don't check balance assertions
|
2017-09-15 19:55:17 +03:00
|
|
|
|
,new_ :: Bool -- ^ read only new transactions since this file was last read
|
2017-09-18 04:57:42 +03:00
|
|
|
|
,new_save_ :: Bool -- ^ save latest new transactions state for next time
|
2017-09-15 03:41:42 +03:00
|
|
|
|
,pivot_ :: String -- ^ use the given field's value as the account name
|
2018-04-17 00:47:04 +03:00
|
|
|
|
,auto_ :: Bool -- ^ generate automatic postings when journal is parsed
|
2017-09-15 03:41:42 +03:00
|
|
|
|
} deriving (Show, Data) --, Typeable)
|
|
|
|
|
|
|
|
|
|
instance Default InputOpts where def = definputopts
|
|
|
|
|
|
|
|
|
|
definputopts :: InputOpts
|
2018-04-17 00:47:04 +03:00
|
|
|
|
definputopts = InputOpts def def def def def def True def def
|
2017-09-15 03:41:42 +03:00
|
|
|
|
|
|
|
|
|
rawOptsToInputOpts :: RawOpts -> InputOpts
|
|
|
|
|
rawOptsToInputOpts rawopts = InputOpts{
|
|
|
|
|
-- files_ = map (T.unpack . stripquotes . T.pack) $ listofstringopt "file" rawopts
|
|
|
|
|
mformat_ = Nothing
|
|
|
|
|
,mrules_file_ = maybestringopt "rules-file" rawopts
|
|
|
|
|
,aliases_ = map (T.unpack . stripquotes . T.pack) $ listofstringopt "alias" rawopts
|
|
|
|
|
,anon_ = boolopt "anon" rawopts
|
|
|
|
|
,ignore_assertions_ = boolopt "ignore-assertions" rawopts
|
2017-09-15 19:55:17 +03:00
|
|
|
|
,new_ = boolopt "new" rawopts
|
2017-09-18 04:57:42 +03:00
|
|
|
|
,new_save_ = True
|
2017-09-15 03:41:42 +03:00
|
|
|
|
,pivot_ = stringopt "pivot" rawopts
|
2018-04-17 00:47:04 +03:00
|
|
|
|
,auto_ = boolopt "auto" rawopts
|
2017-09-15 03:41:42 +03:00
|
|
|
|
}
|
|
|
|
|
|
2018-05-10 22:58:55 +03:00
|
|
|
|
--- * parsing utilities
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
|
|
|
|
-- | Run a string parser with no state in the identity monad.
|
2018-05-22 01:47:56 +03:00
|
|
|
|
runTextParser, rtp :: TextParser Identity a -> Text -> Either (ParseError Char Void) a
|
2016-07-29 18:57:10 +03:00
|
|
|
|
runTextParser p t = runParser p "" t
|
lib: textification: parse stream
10% more allocation, but 35% lower maximum residency, and slightly quicker.
hledger -f data/100x100x10.journal stats
<<ghc: 39327768 bytes, 77 GCs, 196834/269496 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.010 elapsed), 0.020 MUT (0.092 elapsed), 0.014 GC (0.119 elapsed) :ghc>>
<<ghc: 42842136 bytes, 84 GCs, 194010/270912 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.009 elapsed), 0.016 MUT (0.029 elapsed), 0.012 GC (0.120 elapsed) :ghc>>
hledger -f data/1000x1000x10.journal stats
<<ghc: 314291440 bytes, 612 GCs, 2070776/6628048 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.000 elapsed), 0.128 MUT (0.144 elapsed), 0.059 GC (0.070 elapsed) :ghc>>
<<ghc: 349558872 bytes, 681 GCs, 1397597/4106384 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.004 elapsed), 0.124 MUT (0.133 elapsed), 0.047 GC (0.053 elapsed) :ghc>>
hledger -f data/10000x1000x10.journal stats
<<ghc: 3070026824 bytes, 5973 GCs, 12698030/62951784 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.002 elapsed), 1.268 MUT (1.354 elapsed), 0.514 GC (0.587 elapsed) :ghc>>
<<ghc: 3424013128 bytes, 6658 GCs, 11405501/41071624 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.001 elapsed), 1.343 MUT (1.406 elapsed), 0.511 GC (0.573 elapsed) :ghc>>
hledger -f data/100000x1000x10.journal stats
<<ghc: 30753387392 bytes, 59811 GCs, 117615462/666703600 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.000 elapsed), 12.068 MUT (12.238 elapsed), 6.015 GC (7.190 elapsed) :ghc>>
<<ghc: 34306530696 bytes, 66727 GCs, 76806196/414629312 avg/max bytes residency (14 samples), 1009M in use, 0.000 INIT (0.010 elapsed), 14.357 MUT (16.370 elapsed), 5.298 GC (6.534 elapsed) :ghc>>
2016-05-25 01:58:23 +03:00
|
|
|
|
rtp = runTextParser
|
|
|
|
|
|
2018-03-31 04:44:04 +03:00
|
|
|
|
-- XXX odd, why doesn't this take a JournalParser ?
|
2016-05-18 05:46:54 +03:00
|
|
|
|
-- | Run a journal parser with a null journal-parsing state.
|
2018-05-22 01:47:56 +03:00
|
|
|
|
runJournalParser, rjp :: Monad m => TextParser m a -> Text -> m (Either (ParseError Char Void) a)
|
2016-07-29 18:57:10 +03:00
|
|
|
|
runJournalParser p t = runParserT p "" t
|
2016-05-18 05:46:54 +03:00
|
|
|
|
rjp = runJournalParser
|
|
|
|
|
|
|
|
|
|
-- | Run an error-raising journal parser with a null journal-parsing state.
|
2016-12-10 02:57:17 +03:00
|
|
|
|
runErroringJournalParser, rejp :: Monad m => ErroringJournalParser m a -> Text -> m (Either String a)
|
2016-07-29 18:57:10 +03:00
|
|
|
|
runErroringJournalParser p t =
|
|
|
|
|
runExceptT $
|
|
|
|
|
runJournalParser (evalStateT p mempty)
|
|
|
|
|
t >>=
|
|
|
|
|
either (throwError . parseErrorPretty) return
|
2016-05-18 05:46:54 +03:00
|
|
|
|
rejp = runErroringJournalParser
|
|
|
|
|
|
|
|
|
|
genericSourcePos :: SourcePos -> GenericSourcePos
|
2016-07-29 18:57:10 +03:00
|
|
|
|
genericSourcePos p = GenericSourcePos (sourceName p) (fromIntegral . unPos $ sourceLine p) (fromIntegral . unPos $ sourceColumn p)
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2017-01-20 18:33:24 +03:00
|
|
|
|
journalSourcePos :: SourcePos -> SourcePos -> GenericSourcePos
|
|
|
|
|
journalSourcePos p p' = JournalSourcePos (sourceName p) (fromIntegral . unPos $ sourceLine p, fromIntegral $ line')
|
|
|
|
|
where line'
|
|
|
|
|
| (unPos $ sourceColumn p') == 1 = unPos (sourceLine p') - 1
|
|
|
|
|
| otherwise = unPos $ sourceLine p' -- might be at end of file withat last new-line
|
|
|
|
|
|
|
|
|
|
|
2018-04-17 00:47:04 +03:00
|
|
|
|
-- | Generate Automatic postings and add them to the current journal.
|
|
|
|
|
generateAutomaticPostings :: Journal -> Journal
|
|
|
|
|
generateAutomaticPostings j = j { jtxns = map modifier $ jtxns j }
|
|
|
|
|
where
|
|
|
|
|
modifier = foldr (flip (.) . runModifierTransaction') id mtxns
|
|
|
|
|
runModifierTransaction' = fmap txnTieKnot . runModifierTransaction Q.Any
|
|
|
|
|
mtxns = jmodifiertxns j
|
|
|
|
|
|
|
|
|
|
-- | Given a megaparsec ParsedJournal parser, input options, file
|
2016-11-13 00:54:48 +03:00
|
|
|
|
-- path and file content: parse and post-process a Journal, or give an error.
|
2018-04-17 00:47:04 +03:00
|
|
|
|
parseAndFinaliseJournal :: ErroringJournalParser IO ParsedJournal -> InputOpts
|
|
|
|
|
-> FilePath -> Text -> ExceptT String IO Journal
|
|
|
|
|
parseAndFinaliseJournal parser iopts f txt = do
|
2016-05-23 10:32:55 +03:00
|
|
|
|
t <- liftIO getClockTime
|
2016-05-18 05:46:54 +03:00
|
|
|
|
y <- liftIO getCurrentYear
|
2016-07-29 18:57:10 +03:00
|
|
|
|
ep <- runParserT (evalStateT parser nulljournal {jparsedefaultyear=Just y}) f txt
|
2016-05-23 10:32:55 +03:00
|
|
|
|
case ep of
|
2018-04-17 00:47:04 +03:00
|
|
|
|
Right pj ->
|
|
|
|
|
let pj' = if auto_ iopts then generateAutomaticPostings pj else pj in
|
|
|
|
|
case journalFinalise t f txt (not $ ignore_assertions_ iopts) pj' of
|
2016-05-23 10:32:55 +03:00
|
|
|
|
Right j -> return j
|
|
|
|
|
Left e -> throwError e
|
2016-07-29 18:57:10 +03:00
|
|
|
|
Left e -> throwError $ parseErrorPretty e
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2018-04-17 00:47:04 +03:00
|
|
|
|
parseAndFinaliseJournal' :: JournalParser Identity ParsedJournal -> InputOpts
|
|
|
|
|
-> FilePath -> Text -> ExceptT String IO Journal
|
|
|
|
|
parseAndFinaliseJournal' parser iopts f txt = do
|
2016-07-29 18:57:10 +03:00
|
|
|
|
t <- liftIO getClockTime
|
|
|
|
|
y <- liftIO getCurrentYear
|
|
|
|
|
let ep = runParser (evalStateT parser nulljournal {jparsedefaultyear=Just y}) f txt
|
|
|
|
|
case ep of
|
2018-04-17 00:47:04 +03:00
|
|
|
|
Right pj ->
|
|
|
|
|
let pj' = if auto_ iopts then generateAutomaticPostings pj else pj in
|
|
|
|
|
case journalFinalise t f txt (not $ ignore_assertions_ iopts) pj' of
|
2016-07-29 18:57:10 +03:00
|
|
|
|
Right j -> return j
|
|
|
|
|
Left e -> throwError e
|
|
|
|
|
Left e -> throwError $ parseErrorPretty e
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2017-07-27 14:59:55 +03:00
|
|
|
|
setYear :: Year -> JournalParser m ()
|
2016-07-29 18:57:10 +03:00
|
|
|
|
setYear y = modify' (\j -> j{jparsedefaultyear=Just y})
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2017-07-27 14:59:55 +03:00
|
|
|
|
getYear :: JournalParser m (Maybe Year)
|
2016-07-29 18:57:10 +03:00
|
|
|
|
getYear = fmap jparsedefaultyear get
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2017-07-27 14:59:55 +03:00
|
|
|
|
setDefaultCommodityAndStyle :: (CommoditySymbol,AmountStyle) -> JournalParser m ()
|
2016-07-29 18:57:10 +03:00
|
|
|
|
setDefaultCommodityAndStyle cs = modify' (\j -> j{jparsedefaultcommodity=Just cs})
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2017-07-27 14:59:55 +03:00
|
|
|
|
getDefaultCommodityAndStyle :: JournalParser m (Maybe (CommoditySymbol,AmountStyle))
|
2016-07-29 18:57:10 +03:00
|
|
|
|
getDefaultCommodityAndStyle = jparsedefaultcommodity `fmap` get
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2017-11-05 02:40:54 +03:00
|
|
|
|
-- | Get amount style associated with default currency.
|
|
|
|
|
--
|
|
|
|
|
-- Returns 'AmountStyle' used to defined by a latest default commodity directive
|
|
|
|
|
-- prior to current position within this file or its parents.
|
|
|
|
|
getDefaultAmountStyle :: JournalParser m (Maybe AmountStyle)
|
|
|
|
|
getDefaultAmountStyle = fmap snd <$> getDefaultCommodityAndStyle
|
2017-10-28 18:14:54 +03:00
|
|
|
|
|
2017-11-05 02:40:54 +03:00
|
|
|
|
-- | Lookup currency-specific amount style.
|
|
|
|
|
--
|
|
|
|
|
-- Returns 'AmountStyle' used in commodity directive within current journal
|
|
|
|
|
-- prior to current position or in its parents files.
|
|
|
|
|
getAmountStyle :: CommoditySymbol -> JournalParser m (Maybe AmountStyle)
|
|
|
|
|
getAmountStyle commodity = do
|
2017-11-05 01:59:15 +03:00
|
|
|
|
specificStyle <- maybe Nothing cformat . M.lookup commodity . jcommodities <$> get
|
|
|
|
|
defaultStyle <- fmap snd <$> getDefaultCommodityAndStyle
|
|
|
|
|
let effectiveStyle = listToMaybe $ catMaybes [specificStyle, defaultStyle]
|
2017-11-05 02:40:54 +03:00
|
|
|
|
return effectiveStyle
|
2017-10-28 18:14:54 +03:00
|
|
|
|
|
2017-07-27 14:59:55 +03:00
|
|
|
|
pushAccount :: AccountName -> JournalParser m ()
|
2018-01-19 23:37:46 +03:00
|
|
|
|
pushAccount acct = modify' (\j -> j{jaccounts = (acct, Nothing) : jaccounts j})
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2017-07-27 14:59:55 +03:00
|
|
|
|
pushParentAccount :: AccountName -> JournalParser m ()
|
2016-07-29 18:57:10 +03:00
|
|
|
|
pushParentAccount acct = modify' (\j -> j{jparseparentaccounts = acct : jparseparentaccounts j})
|
|
|
|
|
|
2017-07-27 14:59:55 +03:00
|
|
|
|
popParentAccount :: JournalParser m ()
|
2016-05-23 10:32:55 +03:00
|
|
|
|
popParentAccount = do
|
2016-07-29 18:57:10 +03:00
|
|
|
|
j <- get
|
2016-05-23 10:32:55 +03:00
|
|
|
|
case jparseparentaccounts j of
|
2016-07-29 18:57:10 +03:00
|
|
|
|
[] -> unexpected (Tokens ('E' :| "nd of apply account block with no beginning"))
|
|
|
|
|
(_:rest) -> put j{jparseparentaccounts=rest}
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2017-07-27 14:59:55 +03:00
|
|
|
|
getParentAccount :: JournalParser m AccountName
|
2016-07-29 18:57:10 +03:00
|
|
|
|
getParentAccount = fmap (concatAccountNames . reverse . jparseparentaccounts) get
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2016-07-29 18:57:10 +03:00
|
|
|
|
addAccountAlias :: MonadState Journal m => AccountAlias -> m ()
|
|
|
|
|
addAccountAlias a = modify' (\(j@Journal{..}) -> j{jparsealiases=a:jparsealiases})
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2016-07-29 18:57:10 +03:00
|
|
|
|
getAccountAliases :: MonadState Journal m => m [AccountAlias]
|
|
|
|
|
getAccountAliases = fmap jparsealiases get
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2016-07-29 18:57:10 +03:00
|
|
|
|
clearAccountAliases :: MonadState Journal m => m ()
|
|
|
|
|
clearAccountAliases = modify' (\(j@Journal{..}) -> j{jparsealiases=[]})
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2016-08-14 22:44:19 +03:00
|
|
|
|
-- getTransactionCount :: MonadState Journal m => m Integer
|
|
|
|
|
-- getTransactionCount = fmap jparsetransactioncount get
|
|
|
|
|
--
|
|
|
|
|
-- setTransactionCount :: MonadState Journal m => Integer -> m ()
|
|
|
|
|
-- setTransactionCount i = modify' (\j -> j{jparsetransactioncount=i})
|
|
|
|
|
--
|
|
|
|
|
-- -- | Increment the transaction index by one and return the new value.
|
|
|
|
|
-- incrementTransactionCount :: MonadState Journal m => m Integer
|
|
|
|
|
-- incrementTransactionCount = do
|
|
|
|
|
-- modify' (\j -> j{jparsetransactioncount=jparsetransactioncount j + 1})
|
|
|
|
|
-- getTransactionCount
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
lib: textification: saved journal source
Slightly worse on small files, better on large ones.
hledger -f data/100x100x10.journal stats
<<ghc: 39305392 bytes, 77 GCs, 196354/268584 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.007 elapsed), 0.014 MUT (0.027 elapsed), 0.011 GC (0.111 elapsed) :ghc>>
<<ghc: 39307728 bytes, 77 GCs, 196909/270248 avg/max bytes residency (3 samples), 2M in use, 0.001 INIT (0.010 elapsed), 0.015 MUT (0.028 elapsed), 0.012 GC (0.116 elapsed) :ghc>>
hledger -f data/1000x1000x10.journal stats
<<ghc: 314285912 bytes, 612 GCs, 2064811/6597608 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.002 elapsed), 0.126 MUT (0.134 elapsed), 0.059 GC (0.069 elapsed) :ghc>>
<<ghc: 314271368 bytes, 612 GCs, 2070227/6628024 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.000 elapsed), 0.137 MUT (0.143 elapsed), 0.059 GC (0.068 elapsed) :ghc>>
hledger -f data/10000x1000x10.journal stats
<<ghc: 3070033264 bytes, 5965 GCs, 12699294/62962464 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.000 elapsed), 1.245 MUT (1.300 elapsed), 0.498 GC (0.558 elapsed) :ghc>>
<<ghc: 3070006752 bytes, 5973 GCs, 12687314/62848920 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.003 elapsed), 1.257 MUT (1.281 elapsed), 0.496 GC (0.554 elapsed) :ghc>>
hledger -f data/100000x1000x10.journal stats
<<ghc: 30753465088 bytes, 59763 GCs, 117723618/666643528 avg/max bytes residency (14 samples), 1589M in use, 0.000 INIT (0.002 elapsed), 12.536 MUT (12.793 elapsed), 5.978 GC (7.155 elapsed) :ghc>>
<<ghc: 30753367256 bytes, 59811 GCs, 117723236/666627528 avg/max bytes residency (14 samples), 1590M in use, 0.001 INIT (0.012 elapsed), 12.923 MUT (13.169 elapsed), 5.981 GC (6.860 elapsed) :ghc>>
2016-05-24 05:24:39 +03:00
|
|
|
|
journalAddFile :: (FilePath,Text) -> Journal -> Journal
|
2016-05-23 10:32:55 +03:00
|
|
|
|
journalAddFile f j@Journal{jfiles=fs} = j{jfiles=fs++[f]}
|
|
|
|
|
-- append, unlike the other fields, even though we do a final reverse,
|
|
|
|
|
-- to compensate for additional reversal due to including/monoid-concatting
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
|
|
|
|
-- -- | Terminate parsing entirely, returning the given error message
|
|
|
|
|
-- -- with the current parse position prepended.
|
|
|
|
|
-- parserError :: String -> ErroringJournalParser a
|
|
|
|
|
-- parserError s = do
|
|
|
|
|
-- pos <- getPosition
|
|
|
|
|
-- parserErrorAt pos s
|
|
|
|
|
|
|
|
|
|
-- | Terminate parsing entirely, returning the given error message
|
|
|
|
|
-- with the given parse position prepended.
|
2016-12-10 02:57:17 +03:00
|
|
|
|
parserErrorAt :: Monad m => SourcePos -> String -> ErroringJournalParser m a
|
2016-07-29 18:57:10 +03:00
|
|
|
|
parserErrorAt pos s = throwError $ sourcePosPretty pos ++ ":\n" ++ s
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
|
|
|
|
--- * parsers
|
2018-05-24 07:36:19 +03:00
|
|
|
|
|
2016-05-18 05:46:54 +03:00
|
|
|
|
--- ** transaction bits
|
|
|
|
|
|
2017-06-16 02:54:34 +03:00
|
|
|
|
statusp :: TextParser m Status
|
2016-05-18 05:46:54 +03:00
|
|
|
|
statusp =
|
|
|
|
|
choice'
|
2018-03-25 16:53:44 +03:00
|
|
|
|
[ skipMany spacenonewline >> char '*' >> return Cleared
|
|
|
|
|
, skipMany spacenonewline >> char '!' >> return Pending
|
2017-06-16 02:25:37 +03:00
|
|
|
|
, return Unmarked
|
2016-05-18 05:46:54 +03:00
|
|
|
|
]
|
|
|
|
|
|
2018-05-22 04:52:34 +03:00
|
|
|
|
codep :: TextParser m Text
|
2018-05-24 07:36:19 +03:00
|
|
|
|
codep = option "" $ try $ do
|
|
|
|
|
skipSome spacenonewline
|
|
|
|
|
between (char '(') (char ')') $ takeWhileP Nothing (/= ')')
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2018-05-22 04:52:34 +03:00
|
|
|
|
descriptionp :: JournalParser m Text
|
2018-05-24 07:36:19 +03:00
|
|
|
|
descriptionp = takeWhileP Nothing (not . semicolonOrNewline)
|
|
|
|
|
where semicolonOrNewline c = c == ';' || c == '\n'
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
|
|
|
|
--- ** dates
|
|
|
|
|
|
|
|
|
|
-- | Parse a date in YYYY/MM/DD format.
|
|
|
|
|
-- Hyphen (-) and period (.) are also allowed as separators.
|
|
|
|
|
-- The year may be omitted if a default year has been set.
|
|
|
|
|
-- Leading zeroes may be omitted.
|
2017-07-27 14:59:55 +03:00
|
|
|
|
datep :: JournalParser m Day
|
2016-05-18 05:46:54 +03:00
|
|
|
|
datep = do
|
2018-05-24 07:36:19 +03:00
|
|
|
|
mYear <- getYear
|
|
|
|
|
lift $ datep' mYear
|
2018-05-16 05:03:59 +03:00
|
|
|
|
|
|
|
|
|
datep' :: Maybe Year -> TextParser m Day
|
2018-05-23 05:17:51 +03:00
|
|
|
|
datep' mYear = do
|
|
|
|
|
d1 <- decimal <?> "year or month"
|
|
|
|
|
sep <- satisfy isDateSepChar <?> "date separator"
|
|
|
|
|
d2 <- decimal <?> "month or day"
|
|
|
|
|
fullDate d1 sep d2 <|> partialDate mYear d1 sep d2
|
2016-05-18 05:46:54 +03:00
|
|
|
|
<?> "full or partial date"
|
|
|
|
|
|
2018-05-23 05:17:51 +03:00
|
|
|
|
where
|
|
|
|
|
|
2018-05-24 07:36:19 +03:00
|
|
|
|
fullDate :: Integer -> Char -> Int -> TextParser m Day
|
2018-05-23 05:17:51 +03:00
|
|
|
|
fullDate year sep1 month = do
|
|
|
|
|
sep2 <- satisfy isDateSepChar <?> "date separator"
|
|
|
|
|
day <- decimal <?> "day"
|
|
|
|
|
let dateStr = show year ++ [sep1] ++ show month ++ [sep2] ++ show day
|
|
|
|
|
|
|
|
|
|
when (sep1 /= sep2) $ fail $
|
|
|
|
|
"invalid date (mixing date separators is not allowed): " ++ dateStr
|
|
|
|
|
|
2018-05-24 07:36:19 +03:00
|
|
|
|
case fromGregorianValid year month day of
|
2018-05-23 05:17:51 +03:00
|
|
|
|
Nothing -> fail $ "well-formed but invalid date: " ++ dateStr
|
|
|
|
|
Just date -> pure date
|
|
|
|
|
|
2018-05-24 07:36:19 +03:00
|
|
|
|
partialDate :: Maybe Year -> Integer -> Char -> Int -> TextParser m Day
|
2018-05-23 05:17:51 +03:00
|
|
|
|
partialDate mYear month sep day = case mYear of
|
|
|
|
|
Just year ->
|
2018-05-24 07:36:19 +03:00
|
|
|
|
case fromGregorianValid year (fromIntegral month) day of
|
2018-05-23 05:17:51 +03:00
|
|
|
|
Nothing -> fail $ "well-formed but invalid date: " ++ dateStr
|
|
|
|
|
Just date -> pure date
|
|
|
|
|
where dateStr = show year ++ [sep] ++ show month ++ [sep] ++ show day
|
2018-05-24 07:36:19 +03:00
|
|
|
|
|
2018-05-23 05:17:51 +03:00
|
|
|
|
Nothing -> fail $
|
|
|
|
|
"partial date "++dateStr++" found, but the current year is unknown"
|
|
|
|
|
where dateStr = show month ++ [sep] ++ show day
|
|
|
|
|
|
2016-05-18 05:46:54 +03:00
|
|
|
|
-- | Parse a date and time in YYYY/MM/DD HH:MM[:SS][+-ZZZZ] format.
|
|
|
|
|
-- Hyphen (-) and period (.) are also allowed as date separators.
|
|
|
|
|
-- The year may be omitted if a default year has been set.
|
|
|
|
|
-- Seconds are optional.
|
|
|
|
|
-- The timezone is optional and ignored (the time is always interpreted as a local time).
|
|
|
|
|
-- Leading zeroes may be omitted (except in a timezone).
|
2017-07-27 14:59:55 +03:00
|
|
|
|
datetimep :: JournalParser m LocalTime
|
2016-05-18 05:46:54 +03:00
|
|
|
|
datetimep = do
|
|
|
|
|
day <- datep
|
2018-03-25 16:53:44 +03:00
|
|
|
|
lift $ skipSome spacenonewline
|
2016-07-29 18:57:10 +03:00
|
|
|
|
h <- some digitChar
|
2016-05-18 05:46:54 +03:00
|
|
|
|
let h' = read h
|
|
|
|
|
guard $ h' >= 0 && h' <= 23
|
|
|
|
|
char ':'
|
2016-07-29 18:57:10 +03:00
|
|
|
|
m <- some digitChar
|
2016-05-18 05:46:54 +03:00
|
|
|
|
let m' = read m
|
|
|
|
|
guard $ m' >= 0 && m' <= 59
|
2016-07-29 18:57:10 +03:00
|
|
|
|
s <- optional $ char ':' >> some digitChar
|
2016-05-18 05:46:54 +03:00
|
|
|
|
let s' = case s of Just sstr -> read sstr
|
|
|
|
|
Nothing -> 0
|
|
|
|
|
guard $ s' >= 0 && s' <= 59
|
|
|
|
|
{- tz <- -}
|
2016-07-29 18:57:10 +03:00
|
|
|
|
optional $ do
|
|
|
|
|
plusminus <- oneOf ("-+" :: [Char])
|
|
|
|
|
d1 <- digitChar
|
|
|
|
|
d2 <- digitChar
|
|
|
|
|
d3 <- digitChar
|
|
|
|
|
d4 <- digitChar
|
2016-05-18 05:46:54 +03:00
|
|
|
|
return $ plusminus:d1:d2:d3:d4:""
|
|
|
|
|
-- ltz <- liftIO $ getCurrentTimeZone
|
|
|
|
|
-- let tz' = maybe ltz (fromMaybe ltz . parseTime defaultTimeLocale "%z") tz
|
|
|
|
|
-- return $ localTimeToUTC tz' $ LocalTime day $ TimeOfDay h' m' (fromIntegral s')
|
|
|
|
|
return $ LocalTime day $ TimeOfDay h' m' (fromIntegral s')
|
|
|
|
|
|
2018-05-24 07:36:19 +03:00
|
|
|
|
secondarydatep :: Day -> TextParser m Day
|
|
|
|
|
secondarydatep primaryDate = char '=' *> datep' (Just primaryYear)
|
|
|
|
|
where primaryYear = first3 $ toGregorian primaryDate
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
|
|
|
|
--- ** account names
|
|
|
|
|
|
|
|
|
|
-- | Parse an account name, then apply any parent account prefix and/or account aliases currently in effect.
|
2017-07-27 14:59:55 +03:00
|
|
|
|
modifiedaccountnamep :: JournalParser m AccountName
|
2016-05-18 05:46:54 +03:00
|
|
|
|
modifiedaccountnamep = do
|
|
|
|
|
parent <- getParentAccount
|
|
|
|
|
aliases <- getAccountAliases
|
2016-07-29 18:57:10 +03:00
|
|
|
|
a <- lift accountnamep
|
2016-05-18 05:46:54 +03:00
|
|
|
|
return $
|
|
|
|
|
accountNameApplyAliases aliases $
|
|
|
|
|
-- XXX accountNameApplyAliasesMemo ? doesn't seem to make a difference
|
|
|
|
|
joinAccountNames parent
|
|
|
|
|
a
|
|
|
|
|
|
|
|
|
|
-- | Parse an account name. Account names start with a non-space, may
|
|
|
|
|
-- have single spaces inside them, and are terminated by two or more
|
|
|
|
|
-- spaces (or end of input). Also they have one or more components of
|
|
|
|
|
-- at least one character, separated by the account separator char.
|
|
|
|
|
-- (This parser will also consume one following space, if present.)
|
2016-07-29 18:57:10 +03:00
|
|
|
|
accountnamep :: TextParser m AccountName
|
2016-05-18 05:46:54 +03:00
|
|
|
|
accountnamep = do
|
2018-05-22 04:09:47 +03:00
|
|
|
|
firstPart <- part
|
|
|
|
|
otherParts <- many $ try $ singleSpace *> part
|
|
|
|
|
let account = T.unwords $ firstPart : otherParts
|
2018-05-24 07:36:19 +03:00
|
|
|
|
|
|
|
|
|
let roundTripAccount =
|
|
|
|
|
accountNameFromComponents $ accountNameComponents account
|
|
|
|
|
when (account /= roundTripAccount) $ fail $
|
|
|
|
|
"account name seems ill-formed: " ++ T.unpack account
|
|
|
|
|
|
2018-05-22 04:09:47 +03:00
|
|
|
|
pure account
|
|
|
|
|
where
|
|
|
|
|
part = takeWhile1P Nothing (not . isSpace)
|
|
|
|
|
singleSpace = void spacenonewline *> notFollowedBy spacenonewline
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
|
|
|
|
--- ** amounts
|
|
|
|
|
|
|
|
|
|
-- | Parse whitespace then an amount, with an optional left or right
|
|
|
|
|
-- currency symbol and optional price, or return the special
|
|
|
|
|
-- "missing" marker amount.
|
2017-07-27 14:59:55 +03:00
|
|
|
|
spaceandamountormissingp :: Monad m => JournalParser m MixedAmount
|
2016-05-18 05:46:54 +03:00
|
|
|
|
spaceandamountormissingp =
|
2018-05-24 07:36:19 +03:00
|
|
|
|
option missingmixedamt $ try $ do
|
|
|
|
|
lift $ skipSome spacenonewline
|
|
|
|
|
Mixed . (:[]) <$> amountp
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
|
|
|
|
#ifdef TESTS
|
|
|
|
|
assertParseEqual' :: (Show a, Eq a) => (Either ParseError a) -> a -> Assertion
|
|
|
|
|
assertParseEqual' parse expected = either (assertFailure.show) (`is'` expected) parse
|
|
|
|
|
|
|
|
|
|
is' :: (Eq a, Show a) => a -> a -> Assertion
|
|
|
|
|
a `is'` e = assertEqual e a
|
|
|
|
|
|
|
|
|
|
test_spaceandamountormissingp = do
|
2016-05-23 10:32:55 +03:00
|
|
|
|
assertParseEqual' (parseWithState mempty spaceandamountormissingp " $47.18") (Mixed [usd 47.18])
|
|
|
|
|
assertParseEqual' (parseWithState mempty spaceandamountormissingp "$47.18") missingmixedamt
|
|
|
|
|
assertParseEqual' (parseWithState mempty spaceandamountormissingp " ") missingmixedamt
|
|
|
|
|
assertParseEqual' (parseWithState mempty spaceandamountormissingp "") missingmixedamt
|
2016-05-18 05:46:54 +03:00
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
-- | Parse a single-commodity amount, with optional symbol on the left or
|
|
|
|
|
-- right, optional unit or total price, and optional (ignored)
|
|
|
|
|
-- ledger-style balance assertion or fixed lot price declaration.
|
2017-07-27 14:59:55 +03:00
|
|
|
|
amountp :: Monad m => JournalParser m Amount
|
2018-05-25 04:49:16 +03:00
|
|
|
|
amountp = do
|
|
|
|
|
amount <- amountwithoutpricep
|
|
|
|
|
price <- priceamountp
|
|
|
|
|
pure $ amount { aprice = price }
|
|
|
|
|
|
|
|
|
|
amountwithoutpricep :: Monad m => JournalParser m Amount
|
|
|
|
|
amountwithoutpricep =
|
|
|
|
|
try leftsymbolamountp <|> try rightsymbolamountp <|> nosymbolamountp
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
|
|
|
|
#ifdef TESTS
|
|
|
|
|
test_amountp = do
|
2016-05-23 10:32:55 +03:00
|
|
|
|
assertParseEqual' (parseWithState mempty amountp "$47.18") (usd 47.18)
|
|
|
|
|
assertParseEqual' (parseWithState mempty amountp "$1.") (usd 1 `withPrecision` 0)
|
2016-05-18 05:46:54 +03:00
|
|
|
|
-- ,"amount with unit price" ~: do
|
|
|
|
|
assertParseEqual'
|
2016-05-23 10:32:55 +03:00
|
|
|
|
(parseWithState mempty amountp "$10 @ €0.5")
|
2016-05-18 05:46:54 +03:00
|
|
|
|
(usd 10 `withPrecision` 0 `at` (eur 0.5 `withPrecision` 1))
|
|
|
|
|
-- ,"amount with total price" ~: do
|
|
|
|
|
assertParseEqual'
|
2016-05-23 10:32:55 +03:00
|
|
|
|
(parseWithState mempty amountp "$10 @@ €5")
|
2016-05-18 05:46:54 +03:00
|
|
|
|
(usd 10 `withPrecision` 0 @@ (eur 5 `withPrecision` 0))
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
-- | Parse an amount from a string, or get an error.
|
|
|
|
|
amountp' :: String -> Amount
|
|
|
|
|
amountp' s =
|
2016-07-29 18:57:10 +03:00
|
|
|
|
case runParser (evalStateT (amountp <* eof) mempty) "" (T.pack s) of
|
lib: textification: parse stream
10% more allocation, but 35% lower maximum residency, and slightly quicker.
hledger -f data/100x100x10.journal stats
<<ghc: 39327768 bytes, 77 GCs, 196834/269496 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.010 elapsed), 0.020 MUT (0.092 elapsed), 0.014 GC (0.119 elapsed) :ghc>>
<<ghc: 42842136 bytes, 84 GCs, 194010/270912 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.009 elapsed), 0.016 MUT (0.029 elapsed), 0.012 GC (0.120 elapsed) :ghc>>
hledger -f data/1000x1000x10.journal stats
<<ghc: 314291440 bytes, 612 GCs, 2070776/6628048 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.000 elapsed), 0.128 MUT (0.144 elapsed), 0.059 GC (0.070 elapsed) :ghc>>
<<ghc: 349558872 bytes, 681 GCs, 1397597/4106384 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.004 elapsed), 0.124 MUT (0.133 elapsed), 0.047 GC (0.053 elapsed) :ghc>>
hledger -f data/10000x1000x10.journal stats
<<ghc: 3070026824 bytes, 5973 GCs, 12698030/62951784 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.002 elapsed), 1.268 MUT (1.354 elapsed), 0.514 GC (0.587 elapsed) :ghc>>
<<ghc: 3424013128 bytes, 6658 GCs, 11405501/41071624 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.001 elapsed), 1.343 MUT (1.406 elapsed), 0.511 GC (0.573 elapsed) :ghc>>
hledger -f data/100000x1000x10.journal stats
<<ghc: 30753387392 bytes, 59811 GCs, 117615462/666703600 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.000 elapsed), 12.068 MUT (12.238 elapsed), 6.015 GC (7.190 elapsed) :ghc>>
<<ghc: 34306530696 bytes, 66727 GCs, 76806196/414629312 avg/max bytes residency (14 samples), 1009M in use, 0.000 INIT (0.010 elapsed), 14.357 MUT (16.370 elapsed), 5.298 GC (6.534 elapsed) :ghc>>
2016-05-25 01:58:23 +03:00
|
|
|
|
Right amt -> amt
|
|
|
|
|
Left err -> error' $ show err -- XXX should throwError
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
|
|
|
|
-- | Parse a mixed amount from a string, or get an error.
|
|
|
|
|
mamountp' :: String -> MixedAmount
|
|
|
|
|
mamountp' = Mixed . (:[]) . amountp'
|
|
|
|
|
|
2018-05-24 19:08:52 +03:00
|
|
|
|
signp :: Num a => TextParser m (a -> a)
|
|
|
|
|
signp = char '-' *> pure negate <|> char '+' *> pure id <|> pure id
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2017-05-30 17:30:15 +03:00
|
|
|
|
multiplierp :: TextParser m Bool
|
2018-05-24 07:36:19 +03:00
|
|
|
|
multiplierp = option False $ char '*' *> pure True
|
2017-05-30 17:30:15 +03:00
|
|
|
|
|
2018-03-25 16:53:44 +03:00
|
|
|
|
-- | This is like skipMany but it returns True if at least one element
|
|
|
|
|
-- was skipped. This is helpful if you’re just using many to check if
|
|
|
|
|
-- the resulting list is empty or not.
|
|
|
|
|
skipMany' :: MonadPlus m => m a -> m Bool
|
|
|
|
|
skipMany' p = go False
|
|
|
|
|
where
|
|
|
|
|
go !isNull = do
|
|
|
|
|
more <- option False (True <$ p)
|
|
|
|
|
if more
|
|
|
|
|
then go True
|
|
|
|
|
else pure isNull
|
|
|
|
|
|
2017-07-27 14:59:55 +03:00
|
|
|
|
leftsymbolamountp :: Monad m => JournalParser m Amount
|
2016-05-18 05:46:54 +03:00
|
|
|
|
leftsymbolamountp = do
|
2016-07-29 18:57:10 +03:00
|
|
|
|
sign <- lift signp
|
2017-05-30 17:30:15 +03:00
|
|
|
|
m <- lift multiplierp
|
2016-07-29 18:57:10 +03:00
|
|
|
|
c <- lift commoditysymbolp
|
2017-11-05 02:40:54 +03:00
|
|
|
|
suggestedStyle <- getAmountStyle c
|
2018-03-25 16:53:44 +03:00
|
|
|
|
commodityspaced <- lift $ skipMany' spacenonewline
|
2017-11-05 02:40:54 +03:00
|
|
|
|
(q,prec,mdec,mgrps) <- lift $ numberp suggestedStyle
|
2018-03-25 16:53:44 +03:00
|
|
|
|
let s = amountstyle{ascommodityside=L, ascommodityspaced=commodityspaced, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
|
2018-05-25 04:49:16 +03:00
|
|
|
|
return $ Amount c (sign q) NoPrice s m
|
2016-05-18 05:46:54 +03:00
|
|
|
|
<?> "left-symbol amount"
|
|
|
|
|
|
2017-07-27 14:59:55 +03:00
|
|
|
|
rightsymbolamountp :: Monad m => JournalParser m Amount
|
2016-05-18 05:46:54 +03:00
|
|
|
|
rightsymbolamountp = do
|
2017-05-30 17:30:15 +03:00
|
|
|
|
m <- lift multiplierp
|
2017-10-28 18:14:54 +03:00
|
|
|
|
sign <- lift signp
|
2018-05-25 00:52:09 +03:00
|
|
|
|
ambiguousRawNum <- lift rawnumberp
|
2018-05-25 02:46:17 +03:00
|
|
|
|
mExponent <- lift $ optional $ try exponentp
|
2018-03-25 16:53:44 +03:00
|
|
|
|
commodityspaced <- lift $ skipMany' spacenonewline
|
2016-07-29 18:57:10 +03:00
|
|
|
|
c <- lift commoditysymbolp
|
2017-11-05 02:40:54 +03:00
|
|
|
|
suggestedStyle <- getAmountStyle c
|
2018-05-25 02:46:17 +03:00
|
|
|
|
|
|
|
|
|
let rawNum = either (disambiguateNumber suggestedStyle) id ambiguousRawNum
|
|
|
|
|
(q, prec, mdec, mgrps) <- case fromRawNumber rawNum mExponent of
|
|
|
|
|
Left errMsg -> fail errMsg
|
|
|
|
|
Right res -> pure res
|
|
|
|
|
|
2018-03-25 16:53:44 +03:00
|
|
|
|
let s = amountstyle{ascommodityside=R, ascommodityspaced=commodityspaced, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
|
2018-05-25 04:49:16 +03:00
|
|
|
|
return $ Amount c (sign q) NoPrice s m
|
2016-05-18 05:46:54 +03:00
|
|
|
|
<?> "right-symbol amount"
|
|
|
|
|
|
2017-07-27 14:59:55 +03:00
|
|
|
|
nosymbolamountp :: Monad m => JournalParser m Amount
|
2016-05-18 05:46:54 +03:00
|
|
|
|
nosymbolamountp = do
|
2017-05-30 17:30:15 +03:00
|
|
|
|
m <- lift multiplierp
|
2017-11-05 02:40:54 +03:00
|
|
|
|
suggestedStyle <- getDefaultAmountStyle
|
|
|
|
|
(q,prec,mdec,mgrps) <- lift $ numberp suggestedStyle
|
2016-05-18 05:46:54 +03:00
|
|
|
|
-- apply the most recently seen default commodity and style to this commodityless amount
|
|
|
|
|
defcs <- getDefaultCommodityAndStyle
|
|
|
|
|
let (c,s) = case defcs of
|
|
|
|
|
Just (defc,defs) -> (defc, defs{asprecision=max (asprecision defs) prec})
|
|
|
|
|
Nothing -> ("", amountstyle{asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps})
|
2018-05-25 04:49:16 +03:00
|
|
|
|
return $ Amount c q NoPrice s m
|
2016-05-18 05:46:54 +03:00
|
|
|
|
<?> "no-symbol amount"
|
|
|
|
|
|
2016-07-29 18:57:10 +03:00
|
|
|
|
commoditysymbolp :: TextParser m CommoditySymbol
|
2016-05-18 05:46:54 +03:00
|
|
|
|
commoditysymbolp = (quotedcommoditysymbolp <|> simplecommoditysymbolp) <?> "commodity symbol"
|
|
|
|
|
|
2016-07-29 18:57:10 +03:00
|
|
|
|
quotedcommoditysymbolp :: TextParser m CommoditySymbol
|
2018-05-22 04:52:34 +03:00
|
|
|
|
quotedcommoditysymbolp =
|
2018-05-24 07:36:19 +03:00
|
|
|
|
between (char '"') (char '"') $ takeWhile1P Nothing f
|
|
|
|
|
where f c = c /= ';' && c /= '\n' && c /= '\"'
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2016-07-29 18:57:10 +03:00
|
|
|
|
simplecommoditysymbolp :: TextParser m CommoditySymbol
|
2018-05-22 04:52:34 +03:00
|
|
|
|
simplecommoditysymbolp = takeWhile1P Nothing (not . isNonsimpleCommodityChar)
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2017-07-27 14:59:55 +03:00
|
|
|
|
priceamountp :: Monad m => JournalParser m Price
|
2018-05-24 07:36:19 +03:00
|
|
|
|
priceamountp = option NoPrice $ try $ do
|
|
|
|
|
lift (skipMany spacenonewline)
|
|
|
|
|
char '@'
|
|
|
|
|
|
|
|
|
|
m <- optional $ char '@'
|
|
|
|
|
let priceConstructor = case m of
|
|
|
|
|
Just _ -> TotalPrice
|
|
|
|
|
Nothing -> UnitPrice
|
|
|
|
|
|
|
|
|
|
lift (skipMany spacenonewline)
|
2018-05-25 04:49:16 +03:00
|
|
|
|
priceAmount <- amountwithoutpricep
|
2018-05-24 07:36:19 +03:00
|
|
|
|
|
|
|
|
|
pure $ priceConstructor priceAmount
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2017-10-15 20:52:41 +03:00
|
|
|
|
partialbalanceassertionp :: Monad m => JournalParser m BalanceAssertion
|
2018-05-24 07:36:19 +03:00
|
|
|
|
partialbalanceassertionp = optional $ try $ do
|
|
|
|
|
lift (skipMany spacenonewline)
|
|
|
|
|
sourcepos <- genericSourcePos <$> lift getPosition
|
|
|
|
|
char '='
|
|
|
|
|
lift (skipMany spacenonewline)
|
|
|
|
|
a <- amountp -- XXX should restrict to a simple amount
|
|
|
|
|
return (a, sourcepos)
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2016-07-29 18:57:10 +03:00
|
|
|
|
-- balanceassertion :: Monad m => TextParser m (Maybe MixedAmount)
|
2016-05-18 05:46:54 +03:00
|
|
|
|
-- balanceassertion =
|
|
|
|
|
-- try (do
|
2018-03-25 16:53:44 +03:00
|
|
|
|
-- lift (skipMany spacenonewline)
|
2016-05-18 05:46:54 +03:00
|
|
|
|
-- string "=="
|
2018-03-25 16:53:44 +03:00
|
|
|
|
-- lift (skipMany spacenonewline)
|
2016-05-18 05:46:54 +03:00
|
|
|
|
-- a <- amountp -- XXX should restrict to a simple amount
|
|
|
|
|
-- return $ Just $ Mixed [a])
|
|
|
|
|
-- <|> return Nothing
|
|
|
|
|
|
|
|
|
|
-- http://ledger-cli.org/3.0/doc/ledger3.html#Fixing-Lot-Prices
|
2017-07-27 14:59:55 +03:00
|
|
|
|
fixedlotpricep :: Monad m => JournalParser m (Maybe Amount)
|
2018-05-24 07:36:19 +03:00
|
|
|
|
fixedlotpricep = optional $ try $ do
|
|
|
|
|
lift (skipMany spacenonewline)
|
|
|
|
|
char '{'
|
|
|
|
|
lift (skipMany spacenonewline)
|
|
|
|
|
char '='
|
|
|
|
|
lift (skipMany spacenonewline)
|
|
|
|
|
a <- amountp -- XXX should restrict to a simple amount
|
|
|
|
|
lift (skipMany spacenonewline)
|
|
|
|
|
char '}'
|
|
|
|
|
return a
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
|
|
|
|
-- | Parse a string representation of a number for its value and display
|
|
|
|
|
-- attributes.
|
|
|
|
|
--
|
|
|
|
|
-- Some international number formats are accepted, eg either period or comma
|
|
|
|
|
-- may be used for the decimal point, and the other of these may be used for
|
|
|
|
|
-- separating digit groups in the integer part. See
|
|
|
|
|
-- http://en.wikipedia.org/wiki/Decimal_separator for more examples.
|
|
|
|
|
--
|
|
|
|
|
-- This returns: the parsed numeric value, the precision (number of digits
|
|
|
|
|
-- seen following the decimal point), the decimal point character used if any,
|
|
|
|
|
-- and the digit group style if any.
|
|
|
|
|
--
|
2017-11-05 02:40:54 +03:00
|
|
|
|
numberp :: Maybe AmountStyle -> TextParser m (Quantity, Int, Maybe Char, Maybe DigitGroupStyle)
|
|
|
|
|
numberp suggestedStyle = do
|
2017-10-28 21:07:24 +03:00
|
|
|
|
-- a number is an optional sign followed by a sequence of digits possibly
|
|
|
|
|
-- interspersed with periods, commas, or both
|
|
|
|
|
-- ptrace "numberp"
|
|
|
|
|
sign <- signp
|
2018-05-25 02:46:17 +03:00
|
|
|
|
rawNum <- either (disambiguateNumber suggestedStyle) id <$> rawnumberp
|
2018-05-24 19:08:52 +03:00
|
|
|
|
mExp <- optional $ try $ exponentp
|
2018-05-25 02:46:17 +03:00
|
|
|
|
dbg8 "numberp suggestedStyle" suggestedStyle `seq` return ()
|
|
|
|
|
case dbg8 "numberp quantity,precision,mdecimalpoint,mgrps"
|
|
|
|
|
$ fromRawNumber rawNum mExp of
|
|
|
|
|
Left errMsg -> fail errMsg
|
|
|
|
|
Right (q, p, d, g) -> pure (sign q, p, d, g)
|
2017-10-28 21:07:24 +03:00
|
|
|
|
<?> "numberp"
|
|
|
|
|
|
2018-05-25 02:46:17 +03:00
|
|
|
|
exponentp :: TextParser m Int
|
2018-02-10 01:18:16 +03:00
|
|
|
|
exponentp = do
|
2018-05-25 02:46:17 +03:00
|
|
|
|
char' 'e'
|
|
|
|
|
sign <- signp
|
|
|
|
|
d <- decimal
|
|
|
|
|
pure $ sign d
|
|
|
|
|
<?> "exponentp"
|
2018-02-10 01:18:16 +03:00
|
|
|
|
|
2018-05-25 00:52:09 +03:00
|
|
|
|
-- | Interpret a raw number as a decimal number.
|
2018-05-23 22:45:57 +03:00
|
|
|
|
--
|
2018-04-21 07:39:06 +03:00
|
|
|
|
-- Returns:
|
|
|
|
|
-- - the decimal number
|
|
|
|
|
-- - the precision (number of digits after the decimal point)
|
|
|
|
|
-- - the decimal point character, if any
|
|
|
|
|
-- - the digit group style, if any (digit group character and sizes of digit groups)
|
2018-05-25 02:46:17 +03:00
|
|
|
|
fromRawNumber
|
|
|
|
|
:: RawNumber
|
|
|
|
|
-> Maybe Int
|
|
|
|
|
-> Either String
|
|
|
|
|
(Quantity, Int, Maybe Char, Maybe DigitGroupStyle)
|
|
|
|
|
fromRawNumber raw mExp = case raw of
|
2018-05-25 00:52:09 +03:00
|
|
|
|
|
|
|
|
|
NoSeparators digitGrp mDecimals ->
|
|
|
|
|
let decimalGrp = maybe mempty snd mDecimals
|
2018-05-25 02:46:17 +03:00
|
|
|
|
(quantity, precision) =
|
|
|
|
|
maybe id applyExp mExp $ toQuantity digitGrp decimalGrp
|
|
|
|
|
|
|
|
|
|
in Right (quantity, precision, fmap fst mDecimals, Nothing)
|
2018-05-25 00:52:09 +03:00
|
|
|
|
|
2018-05-25 02:46:17 +03:00
|
|
|
|
WithSeparators digitSep digitGrps mDecimals -> do
|
2018-05-25 00:52:09 +03:00
|
|
|
|
let decimalGrp = maybe mempty snd mDecimals
|
|
|
|
|
digitGroupStyle = DigitGroups digitSep (groupSizes digitGrps)
|
2018-05-25 02:46:17 +03:00
|
|
|
|
|
|
|
|
|
let errMsg = "mixing digit separators with exponents is not allowed"
|
|
|
|
|
(quantity, precision) <- maybe Right (const $ const $ Left errMsg) mExp
|
|
|
|
|
$ toQuantity (mconcat digitGrps) decimalGrp
|
|
|
|
|
|
|
|
|
|
Right (quantity, precision, fmap fst mDecimals, Just digitGroupStyle)
|
2018-05-23 22:45:57 +03:00
|
|
|
|
|
|
|
|
|
where
|
|
|
|
|
-- Outputs digit group sizes from least significant to most significant
|
|
|
|
|
groupSizes :: [DigitGrp] -> [Int]
|
|
|
|
|
groupSizes digitGrps = reverse $ case map digitGroupLength digitGrps of
|
|
|
|
|
(a:b:cs) | a < b -> b:cs
|
|
|
|
|
gs -> gs
|
|
|
|
|
|
2018-05-25 02:46:17 +03:00
|
|
|
|
toQuantity :: DigitGrp -> DigitGrp -> (Quantity, Int)
|
|
|
|
|
toQuantity preDecimalGrp postDecimalGrp = (quantity, precision)
|
2018-05-25 00:52:09 +03:00
|
|
|
|
where
|
|
|
|
|
quantity = Decimal (fromIntegral precision)
|
|
|
|
|
(digitGroupNumber $ preDecimalGrp <> postDecimalGrp)
|
|
|
|
|
precision = digitGroupLength postDecimalGrp
|
|
|
|
|
|
2018-05-25 02:46:17 +03:00
|
|
|
|
applyExp :: Int -> (Decimal, Int) -> (Decimal, Int)
|
|
|
|
|
applyExp exponent (quantity, precision) =
|
|
|
|
|
(quantity * 10^^exponent, max 0 (precision - exponent))
|
|
|
|
|
|
2018-05-25 00:52:09 +03:00
|
|
|
|
|
|
|
|
|
disambiguateNumber :: Maybe AmountStyle -> AmbiguousNumber -> RawNumber
|
|
|
|
|
disambiguateNumber suggestedStyle (AmbiguousNumber grp1 sep grp2) =
|
|
|
|
|
-- If present, use the suggested style to disambiguate;
|
|
|
|
|
-- otherwise, assume that the separator is a decimal point where possible.
|
|
|
|
|
if isDecimalPointChar sep &&
|
|
|
|
|
maybe True (sep `isValidDecimalBy`) suggestedStyle
|
|
|
|
|
then NoSeparators grp1 (Just (sep, grp2))
|
|
|
|
|
else WithSeparators sep [grp1, grp2] Nothing
|
|
|
|
|
where
|
2018-05-23 22:45:57 +03:00
|
|
|
|
isValidDecimalBy :: Char -> AmountStyle -> Bool
|
|
|
|
|
isValidDecimalBy c = \case
|
|
|
|
|
AmountStyle{asdecimalpoint = Just d} -> d == c
|
|
|
|
|
AmountStyle{asdigitgroups = Just (DigitGroups g _)} -> g /= c
|
|
|
|
|
AmountStyle{asprecision = 0} -> False
|
|
|
|
|
_ -> True
|
|
|
|
|
|
2018-05-25 00:52:09 +03:00
|
|
|
|
-- | Parse and interpret the structure of a number without external hints.
|
|
|
|
|
-- Numbers are digit strings, possibly separated into digit groups by one
|
|
|
|
|
-- of two types of separators. (1) Numbers may optionally have a decimal
|
|
|
|
|
-- point, which may be either a period or comma. (2) Numbers may
|
|
|
|
|
-- optionally contain digit group separators, which must all be either a
|
|
|
|
|
-- period, a comma, or a space.
|
2018-05-11 10:10:04 +03:00
|
|
|
|
--
|
2018-05-23 22:45:57 +03:00
|
|
|
|
-- It is our task to deduce the identities of the decimal point and digit
|
|
|
|
|
-- separator characters, based on the allowed syntax. For instance, we
|
|
|
|
|
-- make use of the fact that a decimal point can occur at most once and
|
|
|
|
|
-- must succeed all digit group separators.
|
2018-05-11 10:10:04 +03:00
|
|
|
|
--
|
|
|
|
|
-- >>> parseTest rawnumberp "1,234,567.89"
|
2018-05-25 00:52:09 +03:00
|
|
|
|
-- Right (WithSeparators ',' ["1","234","567"] (Just ('.',"89")))
|
|
|
|
|
-- >>> parseTest rawnumberp "1,000"
|
|
|
|
|
-- Left (AmbiguousNumber "1" ',' "000")
|
2018-05-11 10:10:04 +03:00
|
|
|
|
-- >>> parseTest rawnumberp "1 000"
|
2018-05-25 00:52:09 +03:00
|
|
|
|
-- Right (WithSeparators ' ' ["1","000"] Nothing)
|
2018-05-23 22:45:57 +03:00
|
|
|
|
--
|
2018-05-25 00:52:09 +03:00
|
|
|
|
rawnumberp :: TextParser m (Either AmbiguousNumber RawNumber)
|
2018-05-23 22:45:57 +03:00
|
|
|
|
rawnumberp = label "rawnumberp" $ do
|
2018-05-25 00:52:09 +03:00
|
|
|
|
rawNumber <- fmap Right leadingDecimalPt <|> leadingDigits
|
2018-05-23 22:45:57 +03:00
|
|
|
|
-- Guard against mistyped numbers
|
2018-05-25 00:52:09 +03:00
|
|
|
|
notFollowedBy $ satisfy isDecimalPointChar <|> char ' ' *> digitChar
|
2018-05-23 22:45:57 +03:00
|
|
|
|
return $ dbg8 "rawnumberp" rawNumber
|
|
|
|
|
where
|
|
|
|
|
|
|
|
|
|
leadingDecimalPt :: TextParser m RawNumber
|
2018-05-25 00:52:09 +03:00
|
|
|
|
leadingDecimalPt = do
|
|
|
|
|
decPt <- satisfy isDecimalPointChar
|
|
|
|
|
decGrp <- digitgroupp
|
|
|
|
|
pure $ NoSeparators mempty (Just (decPt, decGrp))
|
2018-05-23 22:45:57 +03:00
|
|
|
|
|
2018-05-25 00:52:09 +03:00
|
|
|
|
leadingDigits :: TextParser m (Either AmbiguousNumber RawNumber)
|
2018-05-23 22:45:57 +03:00
|
|
|
|
leadingDigits = do
|
2018-05-25 00:29:20 +03:00
|
|
|
|
grp1 <- digitgroupp
|
2018-05-25 00:52:09 +03:00
|
|
|
|
withSeparators grp1 <|> fmap Right (trailingDecimalPt grp1)
|
|
|
|
|
<|> pure (Right $ NoSeparators grp1 Nothing)
|
2018-05-23 22:45:57 +03:00
|
|
|
|
|
2018-05-25 00:52:09 +03:00
|
|
|
|
withSeparators :: DigitGrp -> TextParser m (Either AmbiguousNumber RawNumber)
|
2018-05-23 22:45:57 +03:00
|
|
|
|
withSeparators grp1 = do
|
2018-05-25 00:29:20 +03:00
|
|
|
|
(sep, grp2) <- try $ (,) <$> satisfy isDigitSeparatorChar <*> digitgroupp
|
|
|
|
|
grps <- many $ try $ char sep *> digitgroupp
|
2018-05-23 22:45:57 +03:00
|
|
|
|
|
|
|
|
|
let digitGroups = grp1 : grp2 : grps
|
2018-05-25 00:52:09 +03:00
|
|
|
|
fmap Right (withDecimalPt sep digitGroups)
|
|
|
|
|
<|> pure (withoutDecimalPt grp1 sep grp2 grps)
|
2018-05-23 22:45:57 +03:00
|
|
|
|
|
|
|
|
|
withDecimalPt :: Char -> [DigitGrp] -> TextParser m RawNumber
|
|
|
|
|
withDecimalPt digitSep digitGroups = do
|
2018-05-25 00:52:09 +03:00
|
|
|
|
decPt <- satisfy $ \c -> isDecimalPointChar c && c /= digitSep
|
|
|
|
|
decDigitGrp <- option mempty digitgroupp
|
2018-05-23 22:45:57 +03:00
|
|
|
|
|
2018-05-25 00:52:09 +03:00
|
|
|
|
pure $ WithSeparators digitSep digitGroups (Just (decPt, decDigitGrp))
|
2018-05-23 22:45:57 +03:00
|
|
|
|
|
2018-05-25 00:52:09 +03:00
|
|
|
|
withoutDecimalPt
|
|
|
|
|
:: DigitGrp
|
|
|
|
|
-> Char
|
|
|
|
|
-> DigitGrp
|
|
|
|
|
-> [DigitGrp]
|
|
|
|
|
-> Either AmbiguousNumber RawNumber
|
2018-05-23 22:45:57 +03:00
|
|
|
|
withoutDecimalPt grp1 sep grp2 grps
|
2018-05-25 00:52:09 +03:00
|
|
|
|
| null grps && isDecimalPointChar sep =
|
|
|
|
|
Left $ AmbiguousNumber grp1 sep grp2
|
|
|
|
|
| otherwise = Right $ WithSeparators sep (grp1:grp2:grps) Nothing
|
2018-05-23 22:45:57 +03:00
|
|
|
|
|
|
|
|
|
trailingDecimalPt :: DigitGrp -> TextParser m RawNumber
|
|
|
|
|
trailingDecimalPt grp1 = do
|
2018-05-25 00:52:09 +03:00
|
|
|
|
decPt <- satisfy isDecimalPointChar
|
|
|
|
|
pure $ NoSeparators grp1 (Just (decPt, mempty))
|
2018-05-23 22:45:57 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
isDecimalPointChar :: Char -> Bool
|
|
|
|
|
isDecimalPointChar c = c == '.' || c == ','
|
|
|
|
|
|
|
|
|
|
isDigitSeparatorChar :: Char -> Bool
|
|
|
|
|
isDigitSeparatorChar c = isDecimalPointChar c || c == ' '
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data DigitGrp = DigitGrp {
|
|
|
|
|
digitGroupLength :: Int,
|
|
|
|
|
digitGroupNumber :: Integer
|
|
|
|
|
} deriving (Eq)
|
|
|
|
|
|
|
|
|
|
instance Show DigitGrp where
|
|
|
|
|
show (DigitGrp len num)
|
|
|
|
|
| len > 0 = "\"" ++ padding ++ numStr ++ "\""
|
|
|
|
|
| otherwise = "\"\""
|
|
|
|
|
where numStr = show num
|
|
|
|
|
padding = replicate (len - length numStr) '0'
|
|
|
|
|
|
|
|
|
|
instance Sem.Semigroup DigitGrp where
|
|
|
|
|
DigitGrp l1 n1 <> DigitGrp l2 n2 = DigitGrp (l1 + l2) (n1 * 10^l2 + n2)
|
|
|
|
|
|
|
|
|
|
instance Monoid DigitGrp where
|
|
|
|
|
mempty = DigitGrp 0 0
|
|
|
|
|
mappend = (Sem.<>)
|
|
|
|
|
|
2018-05-25 00:29:20 +03:00
|
|
|
|
digitgroupp :: TextParser m DigitGrp
|
|
|
|
|
digitgroupp = label "digit group"
|
2018-05-23 22:45:57 +03:00
|
|
|
|
$ makeGroup <$> takeWhile1P (Just "digit") isDigit
|
|
|
|
|
where
|
|
|
|
|
makeGroup = uncurry DigitGrp . foldl' step (0, 0) . T.unpack
|
|
|
|
|
step (!l, !a) c = (l+1, a*10 + fromIntegral (digitToInt c))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data RawNumber
|
2018-05-25 00:52:09 +03:00
|
|
|
|
= NoSeparators DigitGrp (Maybe (Char, DigitGrp)) -- 100 or 100. or .100 or 100.50
|
|
|
|
|
| WithSeparators Char [DigitGrp] (Maybe (Char, DigitGrp)) -- 1,000,000 or 1,000.50
|
|
|
|
|
deriving (Show, Eq)
|
|
|
|
|
|
|
|
|
|
data AmbiguousNumber = AmbiguousNumber DigitGrp Char DigitGrp -- 1,000
|
2018-05-23 22:45:57 +03:00
|
|
|
|
deriving (Show, Eq)
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
|
|
|
|
-- test_numberp = do
|
2016-05-23 10:32:55 +03:00
|
|
|
|
-- let s `is` n = assertParseEqual (parseWithState mempty numberp s) n
|
|
|
|
|
-- assertFails = assertBool . isLeft . parseWithState mempty numberp
|
2016-05-18 05:46:54 +03:00
|
|
|
|
-- assertFails ""
|
|
|
|
|
-- "0" `is` (0, 0, '.', ',', [])
|
|
|
|
|
-- "1" `is` (1, 0, '.', ',', [])
|
|
|
|
|
-- "1.1" `is` (1.1, 1, '.', ',', [])
|
|
|
|
|
-- "1,000.1" `is` (1000.1, 1, '.', ',', [3])
|
|
|
|
|
-- "1.00.000,1" `is` (100000.1, 1, ',', '.', [3,2])
|
|
|
|
|
-- "1,000,000" `is` (1000000, 0, '.', ',', [3,3])
|
|
|
|
|
-- "1." `is` (1, 0, '.', ',', [])
|
|
|
|
|
-- "1," `is` (1, 0, ',', '.', [])
|
|
|
|
|
-- ".1" `is` (0.1, 1, '.', ',', [])
|
|
|
|
|
-- ",1" `is` (0.1, 1, ',', '.', [])
|
|
|
|
|
-- assertFails "1,000.000,1"
|
|
|
|
|
-- assertFails "1.000,000.1"
|
|
|
|
|
-- assertFails "1,000.000.1"
|
|
|
|
|
-- assertFails "1,,1"
|
|
|
|
|
-- assertFails "1..1"
|
|
|
|
|
-- assertFails ".1,"
|
|
|
|
|
-- assertFails ",1."
|
|
|
|
|
|
|
|
|
|
--- ** comments
|
|
|
|
|
|
2018-05-16 03:59:49 +03:00
|
|
|
|
multilinecommentp :: TextParser m ()
|
2018-05-11 00:23:19 +03:00
|
|
|
|
multilinecommentp = startComment *> anyLine `skipManyTill` endComment
|
2016-05-18 05:46:54 +03:00
|
|
|
|
where
|
2018-05-24 07:36:19 +03:00
|
|
|
|
startComment = string "comment" >> skipLine
|
|
|
|
|
endComment = eof <|> string "end comment" *> skipLine
|
2018-05-17 06:31:56 +03:00
|
|
|
|
|
2018-05-24 07:36:19 +03:00
|
|
|
|
skipLine = void $ skipMany spacenonewline *> newline
|
2018-05-22 04:52:34 +03:00
|
|
|
|
anyLine = takeWhileP Nothing (\c -> c /= '\n') *> newline
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2018-05-16 03:59:49 +03:00
|
|
|
|
emptyorcommentlinep :: TextParser m ()
|
2016-05-18 05:46:54 +03:00
|
|
|
|
emptyorcommentlinep = do
|
2018-05-16 03:59:49 +03:00
|
|
|
|
skipMany spacenonewline
|
2018-05-11 00:23:19 +03:00
|
|
|
|
void linecommentp <|> void newline
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
|
|
|
|
-- | Parse a possibly multi-line comment following a semicolon.
|
2018-05-16 03:59:49 +03:00
|
|
|
|
followingcommentp :: TextParser m Text
|
2018-05-11 04:02:28 +03:00
|
|
|
|
followingcommentp = T.unlines . map snd <$> followingcommentlinesp
|
2018-05-11 02:30:00 +03:00
|
|
|
|
|
2018-05-16 03:59:49 +03:00
|
|
|
|
followingcommentlinesp :: TextParser m [(SourcePos, Text)]
|
2018-05-11 04:02:28 +03:00
|
|
|
|
followingcommentlinesp = do
|
2018-05-16 05:03:59 +03:00
|
|
|
|
skipMany spacenonewline
|
2018-05-17 06:31:56 +03:00
|
|
|
|
|
2018-05-16 00:10:37 +03:00
|
|
|
|
samelineComment@(_, samelineCommentText)
|
2018-05-16 23:18:10 +03:00
|
|
|
|
<- try commentp <|> (,) <$> (getPosition <* eolof) <*> pure ""
|
2018-05-11 02:30:00 +03:00
|
|
|
|
newlineComments <- many $ try $ do
|
2018-05-16 03:59:49 +03:00
|
|
|
|
skipSome spacenonewline -- leading whitespace is required
|
2018-05-11 02:30:00 +03:00
|
|
|
|
commentp
|
2018-05-17 06:31:56 +03:00
|
|
|
|
|
2018-05-16 00:10:37 +03:00
|
|
|
|
if T.null samelineCommentText && null newlineComments
|
|
|
|
|
then pure []
|
|
|
|
|
else pure $ samelineComment : newlineComments
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
|
|
|
|
-- | Parse a possibly multi-line comment following a semicolon, and
|
|
|
|
|
-- any tags and/or posting dates within it. Posting dates can be
|
|
|
|
|
-- expressed with "date"/"date2" tags and/or bracketed dates. The
|
|
|
|
|
-- dates are parsed in full here so that errors are reported in the
|
|
|
|
|
-- right position. Missing years can be inferred if a default date is
|
|
|
|
|
-- provided.
|
|
|
|
|
--
|
|
|
|
|
-- >>> rejp (followingcommentandtagsp (Just $ fromGregorian 2000 1 2)) "; a:b, date:3/4, [=5/6]"
|
|
|
|
|
-- Right ("a:b, date:3/4, [=5/6]\n",[("a","b"),("date","3/4")],Just 2000-03-04,Just 2000-05-06)
|
|
|
|
|
--
|
|
|
|
|
-- Year unspecified and no default provided -> unknown year error, at correct position:
|
|
|
|
|
-- >>> rejp (followingcommentandtagsp Nothing) " ; xxx date:3/4\n ; second line"
|
2016-07-29 18:57:10 +03:00
|
|
|
|
-- Left ...1:22...partial date 3/4 found, but the current year is unknown...
|
2016-05-18 05:46:54 +03:00
|
|
|
|
--
|
|
|
|
|
-- Date tag value contains trailing text - forgot the comma, confused:
|
|
|
|
|
-- the syntaxes ? We'll accept the leading date anyway
|
|
|
|
|
-- >>> rejp (followingcommentandtagsp (Just $ fromGregorian 2000 1 2)) "; date:3/4=5/6"
|
|
|
|
|
-- Right ("date:3/4=5/6\n",[("date","3/4=5/6")],Just 2000-03-04,Nothing)
|
|
|
|
|
--
|
2018-05-16 03:59:49 +03:00
|
|
|
|
followingcommentandtagsp
|
2018-05-17 06:31:56 +03:00
|
|
|
|
:: Monad m
|
|
|
|
|
=> Maybe Day
|
|
|
|
|
-> ErroringJournalParser m (Text, [Tag], Maybe Day, Maybe Day)
|
2016-05-18 05:46:54 +03:00
|
|
|
|
followingcommentandtagsp mdefdate = do
|
|
|
|
|
-- pdbg 0 "followingcommentandtagsp"
|
|
|
|
|
|
2018-05-16 03:59:49 +03:00
|
|
|
|
commentLines <- lift followingcommentlinesp
|
2018-05-11 04:02:28 +03:00
|
|
|
|
-- pdbg 0 $ "commentws:" ++ show commentLines
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
|
|
|
|
-- Reparse the comment for any tags.
|
2018-05-17 06:31:56 +03:00
|
|
|
|
tagsWithPositions <- case
|
|
|
|
|
traverse (runTextParserAt tagswithvaluepositions) commentLines of
|
|
|
|
|
Right tss -> pure $ concat tss
|
|
|
|
|
Left e -> throwError $ parseErrorPretty e
|
2018-05-11 02:30:00 +03:00
|
|
|
|
|
2018-05-12 23:16:22 +03:00
|
|
|
|
-- Extract date-tag style posting dates from the tags.
|
2018-05-11 02:30:00 +03:00
|
|
|
|
-- Use the transaction date for defaults, if provided.
|
2018-05-17 06:31:56 +03:00
|
|
|
|
let isDateLabel txt = txt == "date" || txt == "date2"
|
|
|
|
|
isDateTag = isDateLabel . fst . snd
|
|
|
|
|
tagDates <- case traverse tagDate $ filter isDateTag tagsWithPositions of
|
|
|
|
|
Right ds -> pure ds
|
|
|
|
|
Left e -> throwError $ parseErrorPretty e
|
2018-05-12 23:16:22 +03:00
|
|
|
|
|
|
|
|
|
-- Reparse the comment for any bracketed style posting dates.
|
|
|
|
|
-- Use the transaction date for defaults, if provided.
|
2018-05-17 06:31:56 +03:00
|
|
|
|
bracketedDates <- case
|
|
|
|
|
traverse (runTextParserAt (bracketedpostingdatesp mdefdate))
|
|
|
|
|
commentLines of
|
|
|
|
|
Right dss -> pure $ concat dss
|
|
|
|
|
Left e -> throwError $ parseErrorPretty e
|
2018-05-12 23:16:22 +03:00
|
|
|
|
|
|
|
|
|
let pdates = tagDates ++ bracketedDates
|
2018-05-17 06:31:56 +03:00
|
|
|
|
mdate = fmap snd $ find ((=="date") .fst) pdates
|
|
|
|
|
mdate2 = fmap snd $ find ((=="date2").fst) pdates
|
2018-05-12 23:16:22 +03:00
|
|
|
|
-- pdbg 0 $ "allDates: "++show pdates
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2018-05-11 04:02:28 +03:00
|
|
|
|
let strippedComment = T.unlines $ map (T.strip . snd) commentLines
|
2018-05-12 23:16:22 +03:00
|
|
|
|
tags = map snd tagsWithPositions
|
2018-05-11 02:30:00 +03:00
|
|
|
|
-- pdbg 0 $ "comment:"++show strippedComment
|
|
|
|
|
|
|
|
|
|
pure (strippedComment, tags, mdate, mdate2)
|
2018-05-11 04:02:28 +03:00
|
|
|
|
|
2018-05-11 02:30:00 +03:00
|
|
|
|
where
|
|
|
|
|
runTextParserAt parser (pos, txt) =
|
|
|
|
|
runTextParser (setPosition pos *> parser) txt
|
2018-05-12 23:16:22 +03:00
|
|
|
|
|
2018-05-17 06:31:56 +03:00
|
|
|
|
tagDate :: (SourcePos, Tag)
|
2018-05-22 01:47:56 +03:00
|
|
|
|
-> Either (ParseError Char Void) (TagName, Day)
|
2018-05-16 05:03:59 +03:00
|
|
|
|
tagDate (pos, (name, value)) =
|
2018-05-17 06:31:56 +03:00
|
|
|
|
fmap (name,) $ runTextParserAt (datep' myear) (pos, value)
|
2018-05-16 05:03:59 +03:00
|
|
|
|
where myear = fmap (first3 . toGregorian) mdefdate
|
2018-05-12 23:16:22 +03:00
|
|
|
|
|
2018-05-17 06:31:56 +03:00
|
|
|
|
-- A transaction/posting comment must start with a semicolon. This parser
|
|
|
|
|
-- discards the leading whitespace of the comment and returns the source
|
|
|
|
|
-- position of the comment's first non-whitespace character.
|
2018-05-16 03:59:49 +03:00
|
|
|
|
commentp :: TextParser m (SourcePos, Text)
|
2018-05-17 06:31:56 +03:00
|
|
|
|
commentp = commentStartingWithp (==';')
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2018-05-17 06:31:56 +03:00
|
|
|
|
-- A line (file-level) comment can start with a semicolon, hash, or star
|
|
|
|
|
-- (allowing org nodes). This parser discards the leading whitespace of
|
|
|
|
|
-- the comment and returns the source position of the comment's first
|
|
|
|
|
-- non-whitespace character.
|
2018-05-16 03:59:49 +03:00
|
|
|
|
linecommentp :: TextParser m (SourcePos, Text)
|
2018-05-17 06:31:56 +03:00
|
|
|
|
linecommentp =
|
|
|
|
|
commentStartingWithp $ \c -> c == ';' || c == '#' || c == '*'
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2018-05-17 06:31:56 +03:00
|
|
|
|
commentStartingWithp :: (Char -> Bool) -> TextParser m (SourcePos, Text)
|
|
|
|
|
commentStartingWithp f = do
|
2016-05-18 05:46:54 +03:00
|
|
|
|
-- ptrace "commentStartingWith"
|
2018-05-17 06:31:56 +03:00
|
|
|
|
satisfy f
|
2018-05-16 03:59:49 +03:00
|
|
|
|
skipMany spacenonewline
|
2018-05-11 00:37:31 +03:00
|
|
|
|
startPos <- getPosition
|
2018-05-22 04:52:34 +03:00
|
|
|
|
content <- takeWhileP Nothing (\c -> c /= '\n')
|
2016-05-18 05:46:54 +03:00
|
|
|
|
optional newline
|
2018-05-11 00:37:31 +03:00
|
|
|
|
return (startPos, content)
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
|
|
|
|
--- ** tags
|
|
|
|
|
|
|
|
|
|
-- | Extract any tags (name:value ended by comma or newline) embedded in a string.
|
|
|
|
|
--
|
|
|
|
|
-- >>> commentTags "a b:, c:c d:d, e"
|
|
|
|
|
-- [("b",""),("c","c d:d")]
|
|
|
|
|
--
|
|
|
|
|
-- >>> commentTags "a [1/1/1] [1/1] [1], [=1/1/1] [=1/1] [=1] [1/1=1/1/1] [1=1/1/1] b:c"
|
|
|
|
|
-- [("b","c")]
|
|
|
|
|
--
|
|
|
|
|
-- --[("date","1/1/1"),("date","1/1"),("date2","1/1/1"),("date2","1/1"),("date","1/1"),("date2","1/1/1"),("date","1"),("date2","1/1/1")]
|
|
|
|
|
--
|
|
|
|
|
-- >>> commentTags "\na b:, \nd:e, f"
|
|
|
|
|
-- [("b",""),("d","e")]
|
|
|
|
|
--
|
2018-05-12 21:25:02 +03:00
|
|
|
|
-- >>> commentTags ":value"
|
|
|
|
|
-- []
|
|
|
|
|
--
|
lib: textification: parse stream
10% more allocation, but 35% lower maximum residency, and slightly quicker.
hledger -f data/100x100x10.journal stats
<<ghc: 39327768 bytes, 77 GCs, 196834/269496 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.010 elapsed), 0.020 MUT (0.092 elapsed), 0.014 GC (0.119 elapsed) :ghc>>
<<ghc: 42842136 bytes, 84 GCs, 194010/270912 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.009 elapsed), 0.016 MUT (0.029 elapsed), 0.012 GC (0.120 elapsed) :ghc>>
hledger -f data/1000x1000x10.journal stats
<<ghc: 314291440 bytes, 612 GCs, 2070776/6628048 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.000 elapsed), 0.128 MUT (0.144 elapsed), 0.059 GC (0.070 elapsed) :ghc>>
<<ghc: 349558872 bytes, 681 GCs, 1397597/4106384 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.004 elapsed), 0.124 MUT (0.133 elapsed), 0.047 GC (0.053 elapsed) :ghc>>
hledger -f data/10000x1000x10.journal stats
<<ghc: 3070026824 bytes, 5973 GCs, 12698030/62951784 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.002 elapsed), 1.268 MUT (1.354 elapsed), 0.514 GC (0.587 elapsed) :ghc>>
<<ghc: 3424013128 bytes, 6658 GCs, 11405501/41071624 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.001 elapsed), 1.343 MUT (1.406 elapsed), 0.511 GC (0.573 elapsed) :ghc>>
hledger -f data/100000x1000x10.journal stats
<<ghc: 30753387392 bytes, 59811 GCs, 117615462/666703600 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.000 elapsed), 12.068 MUT (12.238 elapsed), 6.015 GC (7.190 elapsed) :ghc>>
<<ghc: 34306530696 bytes, 66727 GCs, 76806196/414629312 avg/max bytes residency (14 samples), 1009M in use, 0.000 INIT (0.010 elapsed), 14.357 MUT (16.370 elapsed), 5.298 GC (6.534 elapsed) :ghc>>
2016-05-25 01:58:23 +03:00
|
|
|
|
commentTags :: Text -> [Tag]
|
2018-05-17 06:31:56 +03:00
|
|
|
|
commentTags s = either (const []) id $ runTextParser tagsp s
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
|
|
|
|
-- | Parse all tags found in a string.
|
2017-07-27 14:59:55 +03:00
|
|
|
|
tagsp :: SimpleTextParser [Tag]
|
2018-05-12 23:16:22 +03:00
|
|
|
|
tagsp = map snd <$> tagswithvaluepositions
|
|
|
|
|
|
|
|
|
|
tagswithvaluepositions :: SimpleTextParser [(SourcePos, Tag)]
|
|
|
|
|
tagswithvaluepositions = do
|
2016-05-18 05:46:54 +03:00
|
|
|
|
-- pdbg 0 $ "tagsp"
|
|
|
|
|
|
2018-05-12 21:25:02 +03:00
|
|
|
|
-- If we parse in a single pass, we cannot know whether some text
|
|
|
|
|
-- belongs to a tag label until we have reached a colon (in which case
|
|
|
|
|
-- it does) or whitespace (in which case it does not). Therefore, we
|
|
|
|
|
-- hold on to the text until we reach such a break point, and then
|
|
|
|
|
-- decide what to do.
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
2018-05-12 21:25:02 +03:00
|
|
|
|
potentialTagName <- tillNextBreak
|
|
|
|
|
atSpaceChar <|> atColon potentialTagName <|> atEof
|
|
|
|
|
|
|
|
|
|
where
|
|
|
|
|
|
2018-05-22 04:52:34 +03:00
|
|
|
|
isBreak :: Char -> Bool
|
|
|
|
|
isBreak c = isSpace c || c == ':'
|
2018-05-12 21:25:02 +03:00
|
|
|
|
|
|
|
|
|
tillNextBreak :: SimpleTextParser Text
|
2018-05-22 04:52:34 +03:00
|
|
|
|
tillNextBreak = takeWhileP Nothing (not . isBreak)
|
2018-05-12 21:25:02 +03:00
|
|
|
|
|
|
|
|
|
tagValue :: SimpleTextParser Text
|
2018-05-22 04:52:34 +03:00
|
|
|
|
tagValue = T.strip <$> takeWhileP Nothing (not . commaOrNewline)
|
|
|
|
|
where commaOrNewline c = c == ',' || c == '\n'
|
2018-05-12 21:25:02 +03:00
|
|
|
|
|
2018-05-12 23:16:22 +03:00
|
|
|
|
atSpaceChar :: SimpleTextParser [(SourcePos, Tag)]
|
|
|
|
|
atSpaceChar = skipSome spaceChar *> tagswithvaluepositions
|
2018-05-12 21:25:02 +03:00
|
|
|
|
|
2018-05-12 23:16:22 +03:00
|
|
|
|
atColon :: Text -> SimpleTextParser [(SourcePos, Tag)]
|
2018-05-12 21:25:02 +03:00
|
|
|
|
atColon tagName = do
|
|
|
|
|
char ':'
|
|
|
|
|
if T.null tagName
|
2018-05-12 23:16:22 +03:00
|
|
|
|
then tagswithvaluepositions
|
|
|
|
|
else do
|
|
|
|
|
pos <- getPosition
|
2018-05-17 06:31:56 +03:00
|
|
|
|
tagVal <- tagValue
|
|
|
|
|
let tag = (pos, (tagName, tagVal))
|
|
|
|
|
tags <- tagswithvaluepositions
|
|
|
|
|
pure $ tag : tags
|
2018-05-12 21:25:02 +03:00
|
|
|
|
|
2018-05-12 23:16:22 +03:00
|
|
|
|
atEof :: SimpleTextParser [(SourcePos, Tag)]
|
2018-05-12 21:25:02 +03:00
|
|
|
|
atEof = eof *> pure []
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
|
|
|
|
--- ** posting dates
|
|
|
|
|
|
2018-05-12 23:59:53 +03:00
|
|
|
|
-- | Parse all bracketed posting dates found in a string. The dates are
|
|
|
|
|
-- parsed fully to give useful errors. Missing years can be inferred only
|
|
|
|
|
-- if a default date is provided.
|
2016-05-18 05:46:54 +03:00
|
|
|
|
--
|
2018-05-16 21:46:24 +03:00
|
|
|
|
bracketedpostingdatesp :: Maybe Day -> SimpleTextParser [(TagName,Day)]
|
2018-05-12 23:59:53 +03:00
|
|
|
|
bracketedpostingdatesp mdefdate = do
|
|
|
|
|
-- pdbg 0 $ "bracketedpostingdatesp"
|
2018-05-22 04:52:34 +03:00
|
|
|
|
skipMany $ notChar '['
|
2018-05-24 07:36:19 +03:00
|
|
|
|
concat <$> sepEndBy (bracketeddatetagsp mdefdate <|> char '[' *> pure [])
|
|
|
|
|
(skipMany $ notChar '[')
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
|
|
|
|
--- ** bracketed dates
|
|
|
|
|
|
|
|
|
|
-- | Parse Ledger-style bracketed posting dates ([DATE=DATE2]), as
|
|
|
|
|
-- "date" and/or "date2" tags. Anything that looks like an attempt at
|
|
|
|
|
-- this (a square-bracketed sequence of 0123456789/-.= containing at
|
|
|
|
|
-- least one digit and one date separator) is also parsed, and will
|
|
|
|
|
-- throw an appropriate error.
|
|
|
|
|
--
|
|
|
|
|
-- The dates are parsed in full here so that errors are reported in
|
|
|
|
|
-- the right position. A missing year in DATE can be inferred if a
|
|
|
|
|
-- default date is provided. A missing year in DATE2 will be inferred
|
|
|
|
|
-- from DATE.
|
|
|
|
|
--
|
2018-05-25 02:46:17 +03:00
|
|
|
|
-- >>> either (Left . parseErrorPretty) Right $ rtp (bracketeddatetagsp Nothing) "[2016/1/2=3/4]"
|
2016-05-18 05:46:54 +03:00
|
|
|
|
-- Right [("date",2016-01-02),("date2",2016-03-04)]
|
|
|
|
|
--
|
2018-05-25 02:46:17 +03:00
|
|
|
|
-- >>> either (Left . parseErrorPretty) Right $ rtp (bracketeddatetagsp Nothing) "[1]"
|
2016-05-18 05:46:54 +03:00
|
|
|
|
-- Left ...not a bracketed date...
|
|
|
|
|
--
|
2018-05-25 02:46:17 +03:00
|
|
|
|
-- >>> either (Left . parseErrorPretty) Right $ rtp (bracketeddatetagsp Nothing) "[2016/1/32]"
|
2018-05-23 05:17:51 +03:00
|
|
|
|
-- Left ...1:11:...well-formed but invalid date: 2016/1/32...
|
2016-05-18 05:46:54 +03:00
|
|
|
|
--
|
2018-05-25 02:46:17 +03:00
|
|
|
|
-- >>> either (Left . parseErrorPretty) Right $ rtp (bracketeddatetagsp Nothing) "[1/31]"
|
2016-07-29 18:57:10 +03:00
|
|
|
|
-- Left ...1:6:...partial date 1/31 found, but the current year is unknown...
|
2016-05-18 05:46:54 +03:00
|
|
|
|
--
|
2018-05-25 02:46:17 +03:00
|
|
|
|
-- >>> either (Left . parseErrorPretty) Right $ rtp (bracketeddatetagsp Nothing) "[0123456789/-.=/-.=]"
|
2018-05-23 05:17:51 +03:00
|
|
|
|
-- Left ...1:13:...expecting month or day...
|
2016-05-18 05:46:54 +03:00
|
|
|
|
--
|
2018-05-16 21:46:24 +03:00
|
|
|
|
bracketeddatetagsp :: Maybe Day -> SimpleTextParser [(TagName, Day)]
|
|
|
|
|
bracketeddatetagsp mdefdate = do
|
2016-05-18 05:46:54 +03:00
|
|
|
|
-- pdbg 0 "bracketeddatetagsp"
|
2018-05-16 18:28:06 +03:00
|
|
|
|
try $ do
|
2018-05-17 06:31:56 +03:00
|
|
|
|
s <- lookAhead
|
|
|
|
|
$ between (char '[') (char ']')
|
2018-05-22 04:52:34 +03:00
|
|
|
|
$ takeWhile1P Nothing isBracketedDateChar
|
|
|
|
|
unless (T.any isDigit s && T.any isDateSepChar s) $
|
2018-05-16 18:28:06 +03:00
|
|
|
|
fail "not a bracketed date"
|
|
|
|
|
-- Looks sufficiently like a bracketed date to commit to parsing a date
|
2018-05-17 06:31:56 +03:00
|
|
|
|
|
2018-05-16 18:28:06 +03:00
|
|
|
|
between (char '[') (char ']') $ do
|
2018-05-17 06:31:56 +03:00
|
|
|
|
let myear1 = fmap readYear mdefdate
|
2018-05-16 18:28:06 +03:00
|
|
|
|
md1 <- optional $ datep' myear1
|
2018-05-17 06:31:56 +03:00
|
|
|
|
|
|
|
|
|
let myear2 = fmap readYear md1 <|> myear1
|
2018-05-22 04:52:34 +03:00
|
|
|
|
md2 <- optional $ char '=' *> datep' myear2
|
2018-05-17 06:31:56 +03:00
|
|
|
|
|
2018-05-16 18:28:06 +03:00
|
|
|
|
pure $ catMaybes [("date",) <$> md1, ("date2",) <$> md2]
|
2018-05-17 06:31:56 +03:00
|
|
|
|
|
2018-05-22 04:52:34 +03:00
|
|
|
|
where
|
|
|
|
|
readYear = first3 . toGregorian
|
|
|
|
|
isBracketedDateChar c = isDigit c || isDateSepChar c || c == '='
|