2014-09-11 00:07:53 +04:00
|
|
|
{-|
|
2010-05-30 23:11:58 +04:00
|
|
|
|
2012-03-23 20:21:41 +04:00
|
|
|
This is the entry point to hledger's reading system, which can read
|
2012-03-24 22:08:11 +04:00
|
|
|
Journals from various data formats. Use this module if you want to parse
|
|
|
|
journal data or read journal files. Generally it should not be necessary
|
2012-03-23 20:21:41 +04:00
|
|
|
to import modules below this one.
|
2009-04-04 12:50:36 +04:00
|
|
|
-}
|
|
|
|
|
lib: textification: parse stream
10% more allocation, but 35% lower maximum residency, and slightly quicker.
hledger -f data/100x100x10.journal stats
<<ghc: 39327768 bytes, 77 GCs, 196834/269496 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.010 elapsed), 0.020 MUT (0.092 elapsed), 0.014 GC (0.119 elapsed) :ghc>>
<<ghc: 42842136 bytes, 84 GCs, 194010/270912 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.009 elapsed), 0.016 MUT (0.029 elapsed), 0.012 GC (0.120 elapsed) :ghc>>
hledger -f data/1000x1000x10.journal stats
<<ghc: 314291440 bytes, 612 GCs, 2070776/6628048 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.000 elapsed), 0.128 MUT (0.144 elapsed), 0.059 GC (0.070 elapsed) :ghc>>
<<ghc: 349558872 bytes, 681 GCs, 1397597/4106384 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.004 elapsed), 0.124 MUT (0.133 elapsed), 0.047 GC (0.053 elapsed) :ghc>>
hledger -f data/10000x1000x10.journal stats
<<ghc: 3070026824 bytes, 5973 GCs, 12698030/62951784 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.002 elapsed), 1.268 MUT (1.354 elapsed), 0.514 GC (0.587 elapsed) :ghc>>
<<ghc: 3424013128 bytes, 6658 GCs, 11405501/41071624 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.001 elapsed), 1.343 MUT (1.406 elapsed), 0.511 GC (0.573 elapsed) :ghc>>
hledger -f data/100000x1000x10.journal stats
<<ghc: 30753387392 bytes, 59811 GCs, 117615462/666703600 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.000 elapsed), 12.068 MUT (12.238 elapsed), 6.015 GC (7.190 elapsed) :ghc>>
<<ghc: 34306530696 bytes, 66727 GCs, 76806196/414629312 avg/max bytes residency (14 samples), 1009M in use, 0.000 INIT (0.010 elapsed), 14.357 MUT (16.370 elapsed), 5.298 GC (6.534 elapsed) :ghc>>
2016-05-25 01:58:23 +03:00
|
|
|
{-# LANGUAGE ScopedTypeVariables, OverloadedStrings #-}
|
|
|
|
|
2016-11-18 07:20:07 +03:00
|
|
|
module Hledger.Read (
|
|
|
|
|
|
|
|
-- * Journal files
|
2016-11-20 21:30:38 +03:00
|
|
|
PrefixedFilePath,
|
2016-11-18 07:20:07 +03:00
|
|
|
defaultJournal,
|
|
|
|
defaultJournalPath,
|
2017-09-15 03:41:42 +03:00
|
|
|
readJournalFilesWithOpts,
|
2016-11-18 07:20:07 +03:00
|
|
|
readJournalFiles,
|
|
|
|
readJournalFile,
|
|
|
|
requireJournalFileExists,
|
|
|
|
ensureJournalFileExists,
|
2016-11-20 21:30:38 +03:00
|
|
|
splitReaderPrefix,
|
2016-11-18 07:20:07 +03:00
|
|
|
|
|
|
|
-- * Journal parsing
|
|
|
|
readJournal,
|
|
|
|
readJournal',
|
|
|
|
|
|
|
|
-- * Re-exported
|
2016-11-20 21:27:16 +03:00
|
|
|
JournalReader.accountaliasp,
|
|
|
|
JournalReader.postingp,
|
2016-11-18 07:20:07 +03:00
|
|
|
module Hledger.Read.Common,
|
|
|
|
|
|
|
|
-- * Tests
|
|
|
|
samplejournal,
|
|
|
|
tests_Hledger_Read,
|
|
|
|
|
|
|
|
) where
|
|
|
|
|
2016-11-20 21:30:38 +03:00
|
|
|
import Control.Applicative ((<|>))
|
2016-12-01 03:39:33 +03:00
|
|
|
import Control.Arrow (right)
|
2016-05-18 05:46:54 +03:00
|
|
|
import qualified Control.Exception as C
|
2015-03-29 17:53:23 +03:00
|
|
|
import Control.Monad.Except
|
2018-04-17 00:47:04 +03:00
|
|
|
import Data.Default
|
2011-05-28 08:11:44 +04:00
|
|
|
import Data.List
|
2012-03-23 20:21:41 +04:00
|
|
|
import Data.Maybe
|
2017-09-15 19:55:17 +03:00
|
|
|
import Data.Ord
|
lib: textification: parse stream
10% more allocation, but 35% lower maximum residency, and slightly quicker.
hledger -f data/100x100x10.journal stats
<<ghc: 39327768 bytes, 77 GCs, 196834/269496 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.010 elapsed), 0.020 MUT (0.092 elapsed), 0.014 GC (0.119 elapsed) :ghc>>
<<ghc: 42842136 bytes, 84 GCs, 194010/270912 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.009 elapsed), 0.016 MUT (0.029 elapsed), 0.012 GC (0.120 elapsed) :ghc>>
hledger -f data/1000x1000x10.journal stats
<<ghc: 314291440 bytes, 612 GCs, 2070776/6628048 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.000 elapsed), 0.128 MUT (0.144 elapsed), 0.059 GC (0.070 elapsed) :ghc>>
<<ghc: 349558872 bytes, 681 GCs, 1397597/4106384 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.004 elapsed), 0.124 MUT (0.133 elapsed), 0.047 GC (0.053 elapsed) :ghc>>
hledger -f data/10000x1000x10.journal stats
<<ghc: 3070026824 bytes, 5973 GCs, 12698030/62951784 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.002 elapsed), 1.268 MUT (1.354 elapsed), 0.514 GC (0.587 elapsed) :ghc>>
<<ghc: 3424013128 bytes, 6658 GCs, 11405501/41071624 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.001 elapsed), 1.343 MUT (1.406 elapsed), 0.511 GC (0.573 elapsed) :ghc>>
hledger -f data/100000x1000x10.journal stats
<<ghc: 30753387392 bytes, 59811 GCs, 117615462/666703600 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.000 elapsed), 12.068 MUT (12.238 elapsed), 6.015 GC (7.190 elapsed) :ghc>>
<<ghc: 34306530696 bytes, 66727 GCs, 76806196/414629312 avg/max bytes residency (14 samples), 1009M in use, 0.000 INIT (0.010 elapsed), 14.357 MUT (16.370 elapsed), 5.298 GC (6.534 elapsed) :ghc>>
2016-05-25 01:58:23 +03:00
|
|
|
import Data.Text (Text)
|
|
|
|
import qualified Data.Text as T
|
2017-09-15 19:55:17 +03:00
|
|
|
import Data.Time (Day)
|
2016-11-19 00:24:57 +03:00
|
|
|
import Safe
|
2016-05-18 05:46:54 +03:00
|
|
|
import System.Directory (doesFileExist, getHomeDirectory)
|
|
|
|
import System.Environment (getEnv)
|
|
|
|
import System.Exit (exitFailure)
|
2017-09-15 19:55:17 +03:00
|
|
|
import System.FilePath
|
|
|
|
import System.IO
|
2011-05-28 08:11:44 +04:00
|
|
|
import Test.HUnit
|
2016-05-18 05:46:54 +03:00
|
|
|
import Text.Printf
|
2011-01-21 04:24:51 +03:00
|
|
|
|
2017-09-15 19:55:17 +03:00
|
|
|
import Hledger.Data.Dates (getCurrentDay, parsedate, showDate)
|
2016-05-18 05:46:54 +03:00
|
|
|
import Hledger.Data.Types
|
2016-05-23 10:32:55 +03:00
|
|
|
import Hledger.Read.Common
|
2016-11-20 21:27:16 +03:00
|
|
|
import qualified Hledger.Read.JournalReader as JournalReader
|
2017-03-30 07:00:16 +03:00
|
|
|
-- import qualified Hledger.Read.LedgerReader as LedgerReader
|
2016-11-20 21:27:16 +03:00
|
|
|
import qualified Hledger.Read.TimedotReader as TimedotReader
|
|
|
|
import qualified Hledger.Read.TimeclockReader as TimeclockReader
|
|
|
|
import qualified Hledger.Read.CsvReader as CsvReader
|
2011-05-28 08:11:44 +04:00
|
|
|
import Hledger.Utils
|
2011-09-23 07:53:14 +04:00
|
|
|
import Prelude hiding (getContents, writeFile)
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
|
|
|
2016-11-20 21:30:38 +03:00
|
|
|
journalEnvVar = "LEDGER_FILE"
|
|
|
|
journalEnvVar2 = "LEDGER"
|
|
|
|
journalDefaultFilename = ".hledger.journal"
|
|
|
|
|
2016-11-19 00:24:57 +03:00
|
|
|
-- The available journal readers, each one handling a particular data format.
|
2016-05-18 05:46:54 +03:00
|
|
|
readers :: [Reader]
|
|
|
|
readers = [
|
|
|
|
JournalReader.reader
|
|
|
|
,TimeclockReader.reader
|
|
|
|
,TimedotReader.reader
|
|
|
|
,CsvReader.reader
|
2017-03-30 07:00:16 +03:00
|
|
|
-- ,LedgerReader.reader
|
2016-05-18 05:46:54 +03:00
|
|
|
]
|
|
|
|
|
2016-11-20 21:30:38 +03:00
|
|
|
readerNames :: [String]
|
|
|
|
readerNames = map rFormat readers
|
|
|
|
|
|
|
|
-- | A file path optionally prefixed by a reader name and colon
|
|
|
|
-- (journal:, csv:, timedot:, etc.).
|
|
|
|
type PrefixedFilePath = FilePath
|
2016-05-18 05:46:54 +03:00
|
|
|
|
2016-11-18 07:20:07 +03:00
|
|
|
-- | Read the default journal file specified by the environment, or raise an error.
|
|
|
|
defaultJournal :: IO Journal
|
2018-04-17 00:47:04 +03:00
|
|
|
defaultJournal = defaultJournalPath >>= readJournalFile Nothing def >>= either error' return
|
2016-05-18 05:46:54 +03:00
|
|
|
|
2016-11-18 07:20:07 +03:00
|
|
|
-- | Get the default journal file path specified by the environment.
|
|
|
|
-- Like ledger, we look first for the LEDGER_FILE environment
|
|
|
|
-- variable, and if that does not exist, for the legacy LEDGER
|
|
|
|
-- environment variable. If neither is set, or the value is blank,
|
|
|
|
-- return the hard-coded default, which is @.hledger.journal@ in the
|
|
|
|
-- users's home directory (or in the current directory, if we cannot
|
|
|
|
-- determine a home directory).
|
|
|
|
defaultJournalPath :: IO String
|
|
|
|
defaultJournalPath = do
|
|
|
|
s <- envJournalPath
|
|
|
|
if null s then defaultJournalPath else return s
|
2016-05-18 05:46:54 +03:00
|
|
|
where
|
2016-11-18 07:20:07 +03:00
|
|
|
envJournalPath =
|
|
|
|
getEnv journalEnvVar
|
|
|
|
`C.catch` (\(_::C.IOException) -> getEnv journalEnvVar2
|
|
|
|
`C.catch` (\(_::C.IOException) -> return ""))
|
|
|
|
defaultJournalPath = do
|
|
|
|
home <- getHomeDirectory `C.catch` (\(_::C.IOException) -> return "")
|
|
|
|
return $ home </> journalDefaultFilename
|
2016-05-18 05:46:54 +03:00
|
|
|
|
2016-11-20 21:30:38 +03:00
|
|
|
-- | @readJournalFiles mformat mrulesfile assrt prefixedfiles@
|
|
|
|
--
|
|
|
|
-- Read a Journal from each specified file path and combine them into one.
|
|
|
|
-- Or, return the first error message.
|
|
|
|
--
|
|
|
|
-- Combining Journals means concatenating them, basically.
|
|
|
|
-- The parse state resets at the start of each file, which means that
|
|
|
|
-- directives & aliases do not cross file boundaries.
|
|
|
|
-- (The final parse state saved in the Journal does span all files, however.)
|
|
|
|
--
|
|
|
|
-- As with readJournalFile,
|
2018-04-17 00:47:04 +03:00
|
|
|
-- input ioptions (@iopts@) specify CSV conversion rules file to help convert CSV data,
|
|
|
|
-- enable or disable balance assertion checking and automated posting generation.
|
2016-11-20 21:30:38 +03:00
|
|
|
--
|
2018-04-17 00:47:04 +03:00
|
|
|
readJournalFiles :: Maybe StorageFormat -> InputOpts -> [PrefixedFilePath] -> IO (Either String Journal)
|
|
|
|
readJournalFiles mformat iopts prefixedfiles = do
|
2016-12-01 03:39:33 +03:00
|
|
|
(right mconcat1 . sequence)
|
2018-04-17 00:47:04 +03:00
|
|
|
<$> mapM (readJournalFile mformat iopts) prefixedfiles
|
2016-12-01 03:39:33 +03:00
|
|
|
where mconcat1 :: Monoid t => [t] -> t
|
|
|
|
mconcat1 [] = mempty
|
|
|
|
mconcat1 x = foldr1 mappend x
|
2016-11-20 21:30:38 +03:00
|
|
|
|
|
|
|
-- | @readJournalFile mformat mrulesfile assrt prefixedfile@
|
|
|
|
--
|
|
|
|
-- Read a Journal from this file, or from stdin if the file path is -,
|
|
|
|
-- or return an error message. The file path can have a READER: prefix.
|
|
|
|
--
|
|
|
|
-- The reader (data format) is chosen based on (in priority order):
|
|
|
|
-- the @mformat@ argument;
|
|
|
|
-- the file path's READER: prefix, if any;
|
|
|
|
-- a recognised file name extension (in readJournal);
|
|
|
|
-- if none of these identify a known reader, all built-in readers are tried in turn.
|
|
|
|
--
|
2018-04-17 00:47:04 +03:00
|
|
|
-- Input ioptions (@iopts@) specify CSV conversion rules file to help convert CSV data,
|
|
|
|
-- enable or disable balance assertion checking and automated posting generation.
|
2016-11-20 21:30:38 +03:00
|
|
|
--
|
2018-04-17 00:47:04 +03:00
|
|
|
readJournalFile :: Maybe StorageFormat -> InputOpts -> PrefixedFilePath -> IO (Either String Journal)
|
|
|
|
readJournalFile mformat iopts prefixedfile = do
|
2016-11-20 21:30:38 +03:00
|
|
|
let
|
|
|
|
(mprefixformat, f) = splitReaderPrefix prefixedfile
|
|
|
|
mfmt = mformat <|> mprefixformat
|
2016-05-25 04:28:26 +03:00
|
|
|
requireJournalFileExists f
|
2018-04-17 00:47:04 +03:00
|
|
|
readFileOrStdinPortably f >>= readJournal mfmt iopts (Just f)
|
2016-05-25 04:28:26 +03:00
|
|
|
|
2016-11-20 21:30:38 +03:00
|
|
|
-- | If a filepath is prefixed by one of the reader names and a colon,
|
|
|
|
-- split that off. Eg "csv:-" -> (Just "csv", "-").
|
|
|
|
splitReaderPrefix :: PrefixedFilePath -> (Maybe String, FilePath)
|
|
|
|
splitReaderPrefix f =
|
|
|
|
headDef (Nothing, f)
|
|
|
|
[(Just r, drop (length r + 1) f) | r <- readerNames, (r++":") `isPrefixOf` f]
|
|
|
|
|
|
|
|
-- | If the specified journal file does not exist (and is not "-"),
|
|
|
|
-- give a helpful error and quit.
|
2016-05-18 05:46:54 +03:00
|
|
|
requireJournalFileExists :: FilePath -> IO ()
|
|
|
|
requireJournalFileExists "-" = return ()
|
|
|
|
requireJournalFileExists f = do
|
|
|
|
exists <- doesFileExist f
|
lib: textification: parse stream
10% more allocation, but 35% lower maximum residency, and slightly quicker.
hledger -f data/100x100x10.journal stats
<<ghc: 39327768 bytes, 77 GCs, 196834/269496 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.010 elapsed), 0.020 MUT (0.092 elapsed), 0.014 GC (0.119 elapsed) :ghc>>
<<ghc: 42842136 bytes, 84 GCs, 194010/270912 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.009 elapsed), 0.016 MUT (0.029 elapsed), 0.012 GC (0.120 elapsed) :ghc>>
hledger -f data/1000x1000x10.journal stats
<<ghc: 314291440 bytes, 612 GCs, 2070776/6628048 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.000 elapsed), 0.128 MUT (0.144 elapsed), 0.059 GC (0.070 elapsed) :ghc>>
<<ghc: 349558872 bytes, 681 GCs, 1397597/4106384 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.004 elapsed), 0.124 MUT (0.133 elapsed), 0.047 GC (0.053 elapsed) :ghc>>
hledger -f data/10000x1000x10.journal stats
<<ghc: 3070026824 bytes, 5973 GCs, 12698030/62951784 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.002 elapsed), 1.268 MUT (1.354 elapsed), 0.514 GC (0.587 elapsed) :ghc>>
<<ghc: 3424013128 bytes, 6658 GCs, 11405501/41071624 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.001 elapsed), 1.343 MUT (1.406 elapsed), 0.511 GC (0.573 elapsed) :ghc>>
hledger -f data/100000x1000x10.journal stats
<<ghc: 30753387392 bytes, 59811 GCs, 117615462/666703600 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.000 elapsed), 12.068 MUT (12.238 elapsed), 6.015 GC (7.190 elapsed) :ghc>>
<<ghc: 34306530696 bytes, 66727 GCs, 76806196/414629312 avg/max bytes residency (14 samples), 1009M in use, 0.000 INIT (0.010 elapsed), 14.357 MUT (16.370 elapsed), 5.298 GC (6.534 elapsed) :ghc>>
2016-05-25 01:58:23 +03:00
|
|
|
when (not exists) $ do -- XXX might not be a journal file
|
2016-05-18 05:46:54 +03:00
|
|
|
hPrintf stderr "The hledger journal file \"%s\" was not found.\n" f
|
|
|
|
hPrintf stderr "Please create it first, eg with \"hledger add\" or a text editor.\n"
|
|
|
|
hPrintf stderr "Or, specify an existing journal file with -f or LEDGER_FILE.\n"
|
|
|
|
exitFailure
|
|
|
|
|
|
|
|
-- | Ensure there is a journal file at the given path, creating an empty one if needed.
|
|
|
|
ensureJournalFileExists :: FilePath -> IO ()
|
|
|
|
ensureJournalFileExists f = do
|
|
|
|
exists <- doesFileExist f
|
|
|
|
when (not exists) $ do
|
|
|
|
hPrintf stderr "Creating hledger journal file %s.\n" f
|
|
|
|
-- note Hledger.Utils.UTF8.* do no line ending conversion on windows,
|
|
|
|
-- we currently require unix line endings on all platforms.
|
|
|
|
newJournalContent >>= writeFile f
|
|
|
|
|
|
|
|
-- | Give the content for a new auto-created journal file.
|
|
|
|
newJournalContent :: IO String
|
|
|
|
newJournalContent = do
|
|
|
|
d <- getCurrentDay
|
|
|
|
return $ printf "; journal created %s by hledger\n" (show d)
|
|
|
|
|
2016-11-20 21:30:38 +03:00
|
|
|
-- | Read a Journal from the given text trying all readers in turn, or throw an error.
|
lib: textification: parse stream
10% more allocation, but 35% lower maximum residency, and slightly quicker.
hledger -f data/100x100x10.journal stats
<<ghc: 39327768 bytes, 77 GCs, 196834/269496 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.010 elapsed), 0.020 MUT (0.092 elapsed), 0.014 GC (0.119 elapsed) :ghc>>
<<ghc: 42842136 bytes, 84 GCs, 194010/270912 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.009 elapsed), 0.016 MUT (0.029 elapsed), 0.012 GC (0.120 elapsed) :ghc>>
hledger -f data/1000x1000x10.journal stats
<<ghc: 314291440 bytes, 612 GCs, 2070776/6628048 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.000 elapsed), 0.128 MUT (0.144 elapsed), 0.059 GC (0.070 elapsed) :ghc>>
<<ghc: 349558872 bytes, 681 GCs, 1397597/4106384 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.004 elapsed), 0.124 MUT (0.133 elapsed), 0.047 GC (0.053 elapsed) :ghc>>
hledger -f data/10000x1000x10.journal stats
<<ghc: 3070026824 bytes, 5973 GCs, 12698030/62951784 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.002 elapsed), 1.268 MUT (1.354 elapsed), 0.514 GC (0.587 elapsed) :ghc>>
<<ghc: 3424013128 bytes, 6658 GCs, 11405501/41071624 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.001 elapsed), 1.343 MUT (1.406 elapsed), 0.511 GC (0.573 elapsed) :ghc>>
hledger -f data/100000x1000x10.journal stats
<<ghc: 30753387392 bytes, 59811 GCs, 117615462/666703600 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.000 elapsed), 12.068 MUT (12.238 elapsed), 6.015 GC (7.190 elapsed) :ghc>>
<<ghc: 34306530696 bytes, 66727 GCs, 76806196/414629312 avg/max bytes residency (14 samples), 1009M in use, 0.000 INIT (0.010 elapsed), 14.357 MUT (16.370 elapsed), 5.298 GC (6.534 elapsed) :ghc>>
2016-05-25 01:58:23 +03:00
|
|
|
readJournal' :: Text -> IO Journal
|
2018-04-17 00:47:04 +03:00
|
|
|
readJournal' t = readJournal Nothing def Nothing t >>= either error' return
|
2016-05-18 05:46:54 +03:00
|
|
|
|
|
|
|
tests_readJournal' = [
|
|
|
|
"readJournal' parses sample journal" ~: do
|
|
|
|
_ <- samplejournal
|
|
|
|
assertBool "" True
|
|
|
|
]
|
|
|
|
|
2016-11-20 21:30:38 +03:00
|
|
|
-- | @readJournal mformat mrulesfile assrt mfile txt@
|
|
|
|
--
|
|
|
|
-- Read a Journal from some text, or return an error message.
|
|
|
|
--
|
|
|
|
-- The reader (data format) is chosen based on (in priority order):
|
|
|
|
-- the @mformat@ argument;
|
|
|
|
-- a recognised file name extension in @mfile@ (if provided).
|
|
|
|
-- If none of these identify a known reader, all built-in readers are tried in turn
|
|
|
|
-- (returning the first one's error message if none of them succeed).
|
|
|
|
--
|
2018-04-17 00:47:04 +03:00
|
|
|
-- Input ioptions (@iopts@) specify CSV conversion rules file to help convert CSV data,
|
|
|
|
-- enable or disable balance assertion checking and automated posting generation.
|
2016-11-18 07:20:07 +03:00
|
|
|
--
|
2018-04-17 00:47:04 +03:00
|
|
|
readJournal :: Maybe StorageFormat -> InputOpts -> Maybe FilePath -> Text -> IO (Either String Journal)
|
|
|
|
readJournal mformat iopts mfile txt =
|
2016-11-20 21:30:38 +03:00
|
|
|
let
|
2016-11-20 21:42:12 +03:00
|
|
|
stablereaders = filter (not.rExperimental) readers
|
|
|
|
rs = maybe stablereaders (:[]) $ findReader mformat mfile
|
2016-11-20 21:30:38 +03:00
|
|
|
in
|
2018-04-17 00:47:04 +03:00
|
|
|
tryReaders rs iopts mfile txt
|
2016-11-18 07:20:07 +03:00
|
|
|
|
2016-11-19 00:24:57 +03:00
|
|
|
-- | @findReader mformat mpath@
|
2016-11-18 07:20:07 +03:00
|
|
|
--
|
2016-11-20 21:30:38 +03:00
|
|
|
-- Find the reader named by @mformat@, if provided.
|
|
|
|
-- Or, if a file path is provided, find the first reader that handles
|
|
|
|
-- its file extension, if any.
|
2016-11-19 00:24:57 +03:00
|
|
|
findReader :: Maybe StorageFormat -> Maybe FilePath -> Maybe Reader
|
|
|
|
findReader Nothing Nothing = Nothing
|
2016-11-20 21:30:38 +03:00
|
|
|
findReader (Just fmt) _ = headMay [r | r <- readers, rFormat r == fmt]
|
|
|
|
findReader Nothing (Just path) =
|
|
|
|
case prefix of
|
|
|
|
Just fmt -> headMay [r | r <- readers, rFormat r == fmt]
|
|
|
|
Nothing -> headMay [r | r <- readers, ext `elem` rExtensions r]
|
2016-11-19 00:24:57 +03:00
|
|
|
where
|
2016-11-20 21:30:38 +03:00
|
|
|
(prefix,path') = splitReaderPrefix path
|
|
|
|
ext = drop 1 $ takeExtension path'
|
2016-11-18 07:20:07 +03:00
|
|
|
|
|
|
|
-- | @tryReaders readers mrulesfile assrt path t@
|
|
|
|
--
|
|
|
|
-- Try to parse the given text to a Journal using each reader in turn,
|
|
|
|
-- returning the first success, or if all of them fail, the first error message.
|
2018-04-17 00:47:04 +03:00
|
|
|
tryReaders :: [Reader] -> InputOpts -> Maybe FilePath -> Text -> IO (Either String Journal)
|
|
|
|
tryReaders readers iopts path t = firstSuccessOrFirstError [] readers
|
2016-11-18 07:20:07 +03:00
|
|
|
where
|
|
|
|
firstSuccessOrFirstError :: [String] -> [Reader] -> IO (Either String Journal)
|
|
|
|
firstSuccessOrFirstError [] [] = return $ Left "no readers found"
|
|
|
|
firstSuccessOrFirstError errs (r:rs) = do
|
|
|
|
dbg1IO "trying reader" (rFormat r)
|
2018-04-17 00:47:04 +03:00
|
|
|
result <- (runExceptT . (rParser r) iopts path') t
|
2016-11-18 07:20:07 +03:00
|
|
|
dbg1IO "reader result" $ either id show result
|
|
|
|
case result of Right j -> return $ Right j -- success!
|
|
|
|
Left e -> firstSuccessOrFirstError (errs++[e]) rs -- keep trying
|
|
|
|
firstSuccessOrFirstError (e:_) [] = return $ Left e -- none left, return first error
|
|
|
|
path' = fromMaybe "(string)" path
|
|
|
|
|
|
|
|
|
2017-09-17 23:26:42 +03:00
|
|
|
--- New versions of readJournal* with easier arguments, and support for --new.
|
2017-09-15 03:41:42 +03:00
|
|
|
|
|
|
|
readJournalFilesWithOpts :: InputOpts -> [FilePath] -> IO (Either String Journal)
|
|
|
|
readJournalFilesWithOpts iopts =
|
|
|
|
(right mconcat1 . sequence <$>) . mapM (readJournalFileWithOpts iopts)
|
|
|
|
where
|
|
|
|
mconcat1 :: Monoid t => [t] -> t
|
|
|
|
mconcat1 [] = mempty
|
|
|
|
mconcat1 x = foldr1 mappend x
|
|
|
|
|
|
|
|
readJournalFileWithOpts :: InputOpts -> PrefixedFilePath -> IO (Either String Journal)
|
|
|
|
readJournalFileWithOpts iopts prefixedfile = do
|
|
|
|
let
|
|
|
|
(mfmt, f) = splitReaderPrefix prefixedfile
|
|
|
|
iopts' = iopts{mformat_=firstJust [mfmt, mformat_ iopts]}
|
|
|
|
requireJournalFileExists f
|
2018-01-05 03:17:25 +03:00
|
|
|
t <- readFileOrStdinPortably f
|
2017-09-15 19:55:17 +03:00
|
|
|
ej <- readJournalWithOpts iopts' (Just f) t
|
|
|
|
case ej of
|
|
|
|
Left e -> return $ Left e
|
|
|
|
Right j | new_ iopts -> do
|
2017-09-17 23:26:42 +03:00
|
|
|
ds <- previousLatestDates f
|
|
|
|
let (newj, newds) = journalFilterSinceLatestDates ds j
|
2017-09-18 04:57:42 +03:00
|
|
|
when (new_save_ iopts && not (null newds)) $ saveLatestDates newds f
|
2017-09-15 19:55:17 +03:00
|
|
|
return $ Right newj
|
|
|
|
Right j -> return $ Right j
|
|
|
|
|
2017-09-17 23:26:42 +03:00
|
|
|
-- A "LatestDates" is zero or more copies of the same date,
|
|
|
|
-- representing the latest transaction date read from a file,
|
|
|
|
-- and how many transactions there were on that date.
|
|
|
|
type LatestDates = [Day]
|
2017-09-15 19:55:17 +03:00
|
|
|
|
|
|
|
-- | Get all instances of the latest date in an unsorted list of dates.
|
|
|
|
-- Ie, if the latest date appears once, return it in a one-element list,
|
|
|
|
-- if it appears three times (anywhere), return three of it.
|
2017-09-17 23:26:42 +03:00
|
|
|
latestDates :: [Day] -> LatestDates
|
2017-09-15 19:55:17 +03:00
|
|
|
latestDates = headDef [] . take 1 . group . reverse . sort
|
|
|
|
|
2017-09-17 23:26:42 +03:00
|
|
|
-- | Remember that these transaction dates were the latest seen when
|
|
|
|
-- reading this journal file.
|
|
|
|
saveLatestDates :: LatestDates -> FilePath -> IO ()
|
|
|
|
saveLatestDates dates f = writeFile (latestDatesFileFor f) $ unlines $ map showDate dates
|
2017-09-15 19:55:17 +03:00
|
|
|
|
|
|
|
-- | What were the latest transaction dates seen the last time this
|
|
|
|
-- journal file was read ? If there were multiple transactions on the
|
|
|
|
-- latest date, that number of dates is returned, otherwise just one.
|
2017-09-17 23:26:42 +03:00
|
|
|
-- Or none if no transactions were read, or if latest dates info is not
|
|
|
|
-- available for this file.
|
|
|
|
previousLatestDates :: FilePath -> IO LatestDates
|
|
|
|
previousLatestDates f = do
|
|
|
|
let latestfile = latestDatesFileFor f
|
|
|
|
exists <- doesFileExist latestfile
|
2017-09-15 19:55:17 +03:00
|
|
|
if exists
|
2017-09-17 23:26:42 +03:00
|
|
|
then map (parsedate . strip) . lines . strip . T.unpack <$> readFileStrictly latestfile
|
2017-09-15 19:55:17 +03:00
|
|
|
else return []
|
|
|
|
|
2017-09-17 23:26:42 +03:00
|
|
|
-- | Where to save latest transaction dates for the given file path.
|
|
|
|
-- (.latest.FILE)
|
|
|
|
latestDatesFileFor :: FilePath -> FilePath
|
|
|
|
latestDatesFileFor f = dir </> ".latest" <.> fname
|
|
|
|
where
|
|
|
|
(dir, fname) = splitFileName f
|
|
|
|
|
2017-09-15 19:55:17 +03:00
|
|
|
readFileStrictly :: FilePath -> IO Text
|
2018-01-05 03:17:25 +03:00
|
|
|
readFileStrictly f = readFilePortably f >>= \t -> C.evaluate (T.length t) >> return t
|
2017-09-15 19:55:17 +03:00
|
|
|
|
2017-09-17 23:26:42 +03:00
|
|
|
-- | Given zero or more latest dates (all the same, representing the
|
|
|
|
-- latest previously seen transaction date, and how many transactions
|
|
|
|
-- were seen on that date), remove transactions with earlier dates
|
|
|
|
-- from the journal, and the same number of transactions on the
|
|
|
|
-- latest date, if any, leaving only transactions that we can assume
|
|
|
|
-- are newer. Also returns the new latest dates of the new journal.
|
|
|
|
journalFilterSinceLatestDates :: LatestDates -> Journal -> (Journal, LatestDates)
|
|
|
|
journalFilterSinceLatestDates [] j = (j, latestDates $ map tdate $ jtxns j)
|
|
|
|
journalFilterSinceLatestDates ds@(d:_) j = (j', ds')
|
|
|
|
where
|
|
|
|
samedateorlaterts = filter ((>= d).tdate) $ jtxns j
|
|
|
|
(samedatets, laterts) = span ((== d).tdate) $ sortBy (comparing tdate) samedateorlaterts
|
|
|
|
newsamedatets = drop (length ds) samedatets
|
|
|
|
j' = j{jtxns=newsamedatets++laterts}
|
|
|
|
ds' = latestDates $ map tdate $ samedatets++laterts
|
2017-09-15 03:41:42 +03:00
|
|
|
|
|
|
|
readJournalWithOpts :: InputOpts -> Maybe FilePath -> Text -> IO (Either String Journal)
|
|
|
|
readJournalWithOpts iopts mfile txt =
|
|
|
|
tryReadersWithOpts iopts mfile specifiedorallreaders txt
|
|
|
|
where
|
|
|
|
specifiedorallreaders = maybe stablereaders (:[]) $ findReader (mformat_ iopts) mfile
|
|
|
|
stablereaders = filter (not.rExperimental) readers
|
|
|
|
|
|
|
|
tryReadersWithOpts :: InputOpts -> Maybe FilePath -> [Reader] -> Text -> IO (Either String Journal)
|
|
|
|
tryReadersWithOpts iopts mpath readers txt = firstSuccessOrFirstError [] readers
|
|
|
|
where
|
|
|
|
firstSuccessOrFirstError :: [String] -> [Reader] -> IO (Either String Journal)
|
|
|
|
firstSuccessOrFirstError [] [] = return $ Left "no readers found"
|
|
|
|
firstSuccessOrFirstError errs (r:rs) = do
|
|
|
|
dbg1IO "trying reader" (rFormat r)
|
2018-04-17 00:47:04 +03:00
|
|
|
result <- (runExceptT . (rParser r) iopts path) txt
|
2017-09-15 03:41:42 +03:00
|
|
|
dbg1IO "reader result" $ either id show result
|
|
|
|
case result of Right j -> return $ Right j -- success!
|
|
|
|
Left e -> firstSuccessOrFirstError (errs++[e]) rs -- keep trying
|
|
|
|
firstSuccessOrFirstError (e:_) [] = return $ Left e -- none left, return first error
|
|
|
|
path = fromMaybe "(string)" mpath
|
|
|
|
|
|
|
|
---
|
|
|
|
|
2016-11-18 07:20:07 +03:00
|
|
|
|
2016-05-18 05:46:54 +03:00
|
|
|
-- tests
|
2009-04-04 12:50:36 +04:00
|
|
|
|
lib: textification: parse stream
10% more allocation, but 35% lower maximum residency, and slightly quicker.
hledger -f data/100x100x10.journal stats
<<ghc: 39327768 bytes, 77 GCs, 196834/269496 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.010 elapsed), 0.020 MUT (0.092 elapsed), 0.014 GC (0.119 elapsed) :ghc>>
<<ghc: 42842136 bytes, 84 GCs, 194010/270912 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.009 elapsed), 0.016 MUT (0.029 elapsed), 0.012 GC (0.120 elapsed) :ghc>>
hledger -f data/1000x1000x10.journal stats
<<ghc: 314291440 bytes, 612 GCs, 2070776/6628048 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.000 elapsed), 0.128 MUT (0.144 elapsed), 0.059 GC (0.070 elapsed) :ghc>>
<<ghc: 349558872 bytes, 681 GCs, 1397597/4106384 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.004 elapsed), 0.124 MUT (0.133 elapsed), 0.047 GC (0.053 elapsed) :ghc>>
hledger -f data/10000x1000x10.journal stats
<<ghc: 3070026824 bytes, 5973 GCs, 12698030/62951784 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.002 elapsed), 1.268 MUT (1.354 elapsed), 0.514 GC (0.587 elapsed) :ghc>>
<<ghc: 3424013128 bytes, 6658 GCs, 11405501/41071624 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.001 elapsed), 1.343 MUT (1.406 elapsed), 0.511 GC (0.573 elapsed) :ghc>>
hledger -f data/100000x1000x10.journal stats
<<ghc: 30753387392 bytes, 59811 GCs, 117615462/666703600 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.000 elapsed), 12.068 MUT (12.238 elapsed), 6.015 GC (7.190 elapsed) :ghc>>
<<ghc: 34306530696 bytes, 66727 GCs, 76806196/414629312 avg/max bytes residency (14 samples), 1009M in use, 0.000 INIT (0.010 elapsed), 14.357 MUT (16.370 elapsed), 5.298 GC (6.534 elapsed) :ghc>>
2016-05-25 01:58:23 +03:00
|
|
|
samplejournal = readJournal' $ T.unlines
|
2016-05-18 05:46:54 +03:00
|
|
|
["2008/01/01 income"
|
|
|
|
," assets:bank:checking $1"
|
|
|
|
," income:salary"
|
|
|
|
,""
|
|
|
|
,"comment"
|
|
|
|
,"multi line comment here"
|
|
|
|
,"for testing purposes"
|
|
|
|
,"end comment"
|
|
|
|
,""
|
|
|
|
,"2008/06/01 gift"
|
|
|
|
," assets:bank:checking $1"
|
|
|
|
," income:gifts"
|
|
|
|
,""
|
|
|
|
,"2008/06/02 save"
|
|
|
|
," assets:bank:saving $1"
|
|
|
|
," assets:bank:checking"
|
|
|
|
,""
|
|
|
|
,"2008/06/03 * eat & shop"
|
|
|
|
," expenses:food $1"
|
|
|
|
," expenses:supplies $1"
|
|
|
|
," assets:cash"
|
|
|
|
,""
|
|
|
|
,"2008/12/31 * pay off"
|
|
|
|
," liabilities:debts $1"
|
|
|
|
," assets:bank:checking"
|
|
|
|
]
|
2009-04-04 12:50:36 +04:00
|
|
|
|
2012-05-27 22:14:20 +04:00
|
|
|
tests_Hledger_Read = TestList $
|
|
|
|
tests_readJournal'
|
|
|
|
++ [
|
2016-11-20 21:27:16 +03:00
|
|
|
JournalReader.tests_Hledger_Read_JournalReader,
|
2017-03-30 07:00:16 +03:00
|
|
|
-- LedgerReader.tests_Hledger_Read_LedgerReader,
|
2016-11-20 21:27:16 +03:00
|
|
|
TimeclockReader.tests_Hledger_Read_TimeclockReader,
|
|
|
|
TimedotReader.tests_Hledger_Read_TimedotReader,
|
|
|
|
CsvReader.tests_Hledger_Read_CsvReader,
|
2010-05-30 23:11:58 +04:00
|
|
|
|
2012-05-09 19:34:05 +04:00
|
|
|
"journal" ~: do
|
2016-05-23 10:32:55 +03:00
|
|
|
r <- runExceptT $ parseWithState mempty JournalReader.journalp ""
|
2015-10-17 22:09:03 +03:00
|
|
|
assertBool "journalp should parse an empty file" (isRight $ r)
|
2018-04-17 00:47:04 +03:00
|
|
|
jE <- readJournal Nothing def Nothing "" -- don't know how to get it from journal
|
2015-10-17 22:09:03 +03:00
|
|
|
either error' (assertBool "journalp parsing an empty file should give an empty journal" . null . jtxns) jE
|
2010-05-30 23:11:58 +04:00
|
|
|
|
|
|
|
]
|