2020-03-29 03:09:47 +03:00
|
|
|
--- * -*- outline-regexp:"--- \\*"; -*-
|
2020-03-02 09:00:39 +03:00
|
|
|
--- ** doc
|
2020-02-28 10:51:54 +03:00
|
|
|
-- In Emacs, use TAB on lines beginning with "-- *" to collapse/expand sections.
|
2014-09-11 00:07:53 +04:00
|
|
|
{-|
|
2010-05-30 23:11:58 +04:00
|
|
|
|
2012-03-23 20:21:41 +04:00
|
|
|
This is the entry point to hledger's reading system, which can read
|
2012-03-24 22:08:11 +04:00
|
|
|
Journals from various data formats. Use this module if you want to parse
|
|
|
|
journal data or read journal files. Generally it should not be necessary
|
2012-03-23 20:21:41 +04:00
|
|
|
to import modules below this one.
|
2020-02-28 10:51:54 +03:00
|
|
|
|
2009-04-04 12:50:36 +04:00
|
|
|
-}
|
|
|
|
|
2020-03-02 09:00:39 +03:00
|
|
|
--- ** language
|
2020-12-27 10:59:30 +03:00
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
{-# LANGUAGE PackageImports #-}
|
2020-02-28 10:51:54 +03:00
|
|
|
{-# LANGUAGE ScopedTypeVariables #-}
|
|
|
|
|
2020-03-02 09:00:39 +03:00
|
|
|
--- ** exports
|
2016-11-18 07:20:07 +03:00
|
|
|
module Hledger.Read (
|
|
|
|
|
|
|
|
-- * Journal files
|
2016-11-20 21:30:38 +03:00
|
|
|
PrefixedFilePath,
|
2016-11-18 07:20:07 +03:00
|
|
|
defaultJournal,
|
|
|
|
defaultJournalPath,
|
2018-04-18 01:13:13 +03:00
|
|
|
readJournalFiles,
|
|
|
|
readJournalFile,
|
2016-11-18 07:20:07 +03:00
|
|
|
requireJournalFileExists,
|
|
|
|
ensureJournalFileExists,
|
|
|
|
|
|
|
|
-- * Journal parsing
|
|
|
|
readJournal,
|
|
|
|
readJournal',
|
|
|
|
|
|
|
|
-- * Re-exported
|
2021-07-02 15:54:49 +03:00
|
|
|
JournalReader.tmpostingrulep,
|
2020-03-01 21:16:52 +03:00
|
|
|
findReader,
|
|
|
|
splitReaderPrefix,
|
2016-11-18 07:20:07 +03:00
|
|
|
module Hledger.Read.Common,
|
|
|
|
|
|
|
|
-- * Tests
|
2018-09-06 23:08:26 +03:00
|
|
|
tests_Read,
|
2016-11-18 07:20:07 +03:00
|
|
|
|
|
|
|
) where
|
|
|
|
|
2020-03-02 09:00:39 +03:00
|
|
|
--- ** imports
|
2016-12-01 03:39:33 +03:00
|
|
|
import Control.Arrow (right)
|
2016-05-18 05:46:54 +03:00
|
|
|
import qualified Control.Exception as C
|
2019-12-02 19:21:06 +03:00
|
|
|
import Control.Monad (when)
|
|
|
|
import "mtl" Control.Monad.Except (runExceptT)
|
2020-02-29 12:54:24 +03:00
|
|
|
import Data.Default (def)
|
2020-02-28 10:31:53 +03:00
|
|
|
import Data.Foldable (asum)
|
2020-02-29 12:54:24 +03:00
|
|
|
import Data.List (group, sort, sortBy)
|
|
|
|
import Data.List.NonEmpty (nonEmpty)
|
|
|
|
import Data.Maybe (fromMaybe)
|
|
|
|
import Data.Ord (comparing)
|
|
|
|
import Data.Semigroup (sconcat)
|
lib: textification: parse stream
10% more allocation, but 35% lower maximum residency, and slightly quicker.
hledger -f data/100x100x10.journal stats
<<ghc: 39327768 bytes, 77 GCs, 196834/269496 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.010 elapsed), 0.020 MUT (0.092 elapsed), 0.014 GC (0.119 elapsed) :ghc>>
<<ghc: 42842136 bytes, 84 GCs, 194010/270912 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.009 elapsed), 0.016 MUT (0.029 elapsed), 0.012 GC (0.120 elapsed) :ghc>>
hledger -f data/1000x1000x10.journal stats
<<ghc: 314291440 bytes, 612 GCs, 2070776/6628048 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.000 elapsed), 0.128 MUT (0.144 elapsed), 0.059 GC (0.070 elapsed) :ghc>>
<<ghc: 349558872 bytes, 681 GCs, 1397597/4106384 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.004 elapsed), 0.124 MUT (0.133 elapsed), 0.047 GC (0.053 elapsed) :ghc>>
hledger -f data/10000x1000x10.journal stats
<<ghc: 3070026824 bytes, 5973 GCs, 12698030/62951784 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.002 elapsed), 1.268 MUT (1.354 elapsed), 0.514 GC (0.587 elapsed) :ghc>>
<<ghc: 3424013128 bytes, 6658 GCs, 11405501/41071624 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.001 elapsed), 1.343 MUT (1.406 elapsed), 0.511 GC (0.573 elapsed) :ghc>>
hledger -f data/100000x1000x10.journal stats
<<ghc: 30753387392 bytes, 59811 GCs, 117615462/666703600 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.000 elapsed), 12.068 MUT (12.238 elapsed), 6.015 GC (7.190 elapsed) :ghc>>
<<ghc: 34306530696 bytes, 66727 GCs, 76806196/414629312 avg/max bytes residency (14 samples), 1009M in use, 0.000 INIT (0.010 elapsed), 14.357 MUT (16.370 elapsed), 5.298 GC (6.534 elapsed) :ghc>>
2016-05-25 01:58:23 +03:00
|
|
|
import Data.Text (Text)
|
|
|
|
import qualified Data.Text as T
|
2020-11-05 04:58:04 +03:00
|
|
|
import qualified Data.Text.IO as T
|
2017-09-15 19:55:17 +03:00
|
|
|
import Data.Time (Day)
|
2020-02-29 12:54:24 +03:00
|
|
|
import Safe (headDef)
|
2016-05-18 05:46:54 +03:00
|
|
|
import System.Directory (doesFileExist, getHomeDirectory)
|
|
|
|
import System.Environment (getEnv)
|
|
|
|
import System.Exit (exitFailure)
|
2020-02-29 12:54:24 +03:00
|
|
|
import System.FilePath ((<.>), (</>), splitDirectories, splitFileName)
|
2019-06-26 20:19:53 +03:00
|
|
|
import System.Info (os)
|
2020-11-05 04:58:04 +03:00
|
|
|
import System.IO (hPutStr, stderr)
|
2011-01-21 04:24:51 +03:00
|
|
|
|
2020-08-26 11:11:20 +03:00
|
|
|
import Hledger.Data.Dates (getCurrentDay, parsedateM, showDate)
|
2016-05-18 05:46:54 +03:00
|
|
|
import Hledger.Data.Types
|
2018-08-20 16:28:40 +03:00
|
|
|
import Hledger.Read.Common
|
2020-03-01 21:16:52 +03:00
|
|
|
import Hledger.Read.JournalReader as JournalReader
|
|
|
|
import Hledger.Read.CsvReader (tests_CsvReader)
|
|
|
|
-- import Hledger.Read.TimedotReader (tests_TimedotReader)
|
|
|
|
-- import Hledger.Read.TimeclockReader (tests_TimeclockReader)
|
2011-05-28 08:11:44 +04:00
|
|
|
import Hledger.Utils
|
2011-09-23 07:53:14 +04:00
|
|
|
import Prelude hiding (getContents, writeFile)
|
2016-05-18 05:46:54 +03:00
|
|
|
|
2020-03-02 09:00:39 +03:00
|
|
|
--- ** doctest setup
|
|
|
|
-- $setup
|
|
|
|
-- >>> :set -XOverloadedStrings
|
|
|
|
|
|
|
|
--- ** journal reading
|
2016-05-18 05:46:54 +03:00
|
|
|
|
2016-11-20 21:30:38 +03:00
|
|
|
journalEnvVar = "LEDGER_FILE"
|
|
|
|
journalEnvVar2 = "LEDGER"
|
|
|
|
journalDefaultFilename = ".hledger.journal"
|
|
|
|
|
lib: drop the file format auto-detection feature
For a long time hledger has auto-detected the file format when it's
not known, eg when reading from a file with unusual extension (like
.dat or .txt), or from standard input (-f-), or when using the include
directive (which currently ignores file extensions).
Auto-detecting has been done by trying all readers until one succeeds.
This could guess wrong in some cases, but it was so rare that it has
been working fine.
Recently, more conveniences have been added to timedot format,
increasing its overlap with journal format, which makes this kind of
auto-detection unreliable.
Auto-detection and auto-detection failures are (probably) still pretty
rare in practice. But when it does happen it's confusing, giving
misleading errors or false successes (eg printing timedot entries
instead of a journal error).
For predictability and to minimise confusion, hledger no longer tries
to guess; when there's no file extension or reader prefix, it assumes
journal format. To specify one of the other formats, you must use a
standard file extension (.timeclock, .timedot, .csv, .ssv, .tsv), or a
reader prefix (-f csv:foo.txt, -f timedot:-).
For now, the include directive still tries to autodetect
(journal/timeclock/timedot), and this can't be overridden; it will be
fixed later.
Experimental; testing and feedback welcome.
2020-02-29 20:17:39 +03:00
|
|
|
-- | Read a Journal from the given text, assuming journal format; or
|
|
|
|
-- throw an error.
|
2020-02-28 10:51:54 +03:00
|
|
|
readJournal' :: Text -> IO Journal
|
2021-07-04 12:44:35 +03:00
|
|
|
readJournal' t = readJournal definputopts Nothing t >>= either error' return -- PARTIAL:
|
2020-02-28 10:51:54 +03:00
|
|
|
|
|
|
|
-- | @readJournal iopts mfile txt@
|
|
|
|
--
|
|
|
|
-- Read a Journal from some text, or return an error message.
|
|
|
|
--
|
lib: drop the file format auto-detection feature
For a long time hledger has auto-detected the file format when it's
not known, eg when reading from a file with unusual extension (like
.dat or .txt), or from standard input (-f-), or when using the include
directive (which currently ignores file extensions).
Auto-detecting has been done by trying all readers until one succeeds.
This could guess wrong in some cases, but it was so rare that it has
been working fine.
Recently, more conveniences have been added to timedot format,
increasing its overlap with journal format, which makes this kind of
auto-detection unreliable.
Auto-detection and auto-detection failures are (probably) still pretty
rare in practice. But when it does happen it's confusing, giving
misleading errors or false successes (eg printing timedot entries
instead of a journal error).
For predictability and to minimise confusion, hledger no longer tries
to guess; when there's no file extension or reader prefix, it assumes
journal format. To specify one of the other formats, you must use a
standard file extension (.timeclock, .timedot, .csv, .ssv, .tsv), or a
reader prefix (-f csv:foo.txt, -f timedot:-).
For now, the include directive still tries to autodetect
(journal/timeclock/timedot), and this can't be overridden; it will be
fixed later.
Experimental; testing and feedback welcome.
2020-02-29 20:17:39 +03:00
|
|
|
-- The reader (data format) is chosen based on, in this order:
|
2020-02-28 10:51:54 +03:00
|
|
|
--
|
lib: drop the file format auto-detection feature
For a long time hledger has auto-detected the file format when it's
not known, eg when reading from a file with unusual extension (like
.dat or .txt), or from standard input (-f-), or when using the include
directive (which currently ignores file extensions).
Auto-detecting has been done by trying all readers until one succeeds.
This could guess wrong in some cases, but it was so rare that it has
been working fine.
Recently, more conveniences have been added to timedot format,
increasing its overlap with journal format, which makes this kind of
auto-detection unreliable.
Auto-detection and auto-detection failures are (probably) still pretty
rare in practice. But when it does happen it's confusing, giving
misleading errors or false successes (eg printing timedot entries
instead of a journal error).
For predictability and to minimise confusion, hledger no longer tries
to guess; when there's no file extension or reader prefix, it assumes
journal format. To specify one of the other formats, you must use a
standard file extension (.timeclock, .timedot, .csv, .ssv, .tsv), or a
reader prefix (-f csv:foo.txt, -f timedot:-).
For now, the include directive still tries to autodetect
(journal/timeclock/timedot), and this can't be overridden; it will be
fixed later.
Experimental; testing and feedback welcome.
2020-02-29 20:17:39 +03:00
|
|
|
-- - a reader name provided in @iopts@
|
2020-02-28 10:51:54 +03:00
|
|
|
--
|
lib: drop the file format auto-detection feature
For a long time hledger has auto-detected the file format when it's
not known, eg when reading from a file with unusual extension (like
.dat or .txt), or from standard input (-f-), or when using the include
directive (which currently ignores file extensions).
Auto-detecting has been done by trying all readers until one succeeds.
This could guess wrong in some cases, but it was so rare that it has
been working fine.
Recently, more conveniences have been added to timedot format,
increasing its overlap with journal format, which makes this kind of
auto-detection unreliable.
Auto-detection and auto-detection failures are (probably) still pretty
rare in practice. But when it does happen it's confusing, giving
misleading errors or false successes (eg printing timedot entries
instead of a journal error).
For predictability and to minimise confusion, hledger no longer tries
to guess; when there's no file extension or reader prefix, it assumes
journal format. To specify one of the other formats, you must use a
standard file extension (.timeclock, .timedot, .csv, .ssv, .tsv), or a
reader prefix (-f csv:foo.txt, -f timedot:-).
For now, the include directive still tries to autodetect
(journal/timeclock/timedot), and this can't be overridden; it will be
fixed later.
Experimental; testing and feedback welcome.
2020-02-29 20:17:39 +03:00
|
|
|
-- - a reader prefix in the @mfile@ path
|
|
|
|
--
|
|
|
|
-- - a file extension in @mfile@
|
|
|
|
--
|
|
|
|
-- If none of these is available, or if the reader name is unrecognised,
|
|
|
|
-- we use the journal reader. (We used to try all readers in this case;
|
|
|
|
-- since hledger 1.17, we prefer predictability.)
|
2020-02-28 10:51:54 +03:00
|
|
|
readJournal :: InputOpts -> Maybe FilePath -> Text -> IO (Either String Journal)
|
lib: drop the file format auto-detection feature
For a long time hledger has auto-detected the file format when it's
not known, eg when reading from a file with unusual extension (like
.dat or .txt), or from standard input (-f-), or when using the include
directive (which currently ignores file extensions).
Auto-detecting has been done by trying all readers until one succeeds.
This could guess wrong in some cases, but it was so rare that it has
been working fine.
Recently, more conveniences have been added to timedot format,
increasing its overlap with journal format, which makes this kind of
auto-detection unreliable.
Auto-detection and auto-detection failures are (probably) still pretty
rare in practice. But when it does happen it's confusing, giving
misleading errors or false successes (eg printing timedot entries
instead of a journal error).
For predictability and to minimise confusion, hledger no longer tries
to guess; when there's no file extension or reader prefix, it assumes
journal format. To specify one of the other formats, you must use a
standard file extension (.timeclock, .timedot, .csv, .ssv, .tsv), or a
reader prefix (-f csv:foo.txt, -f timedot:-).
For now, the include directive still tries to autodetect
(journal/timeclock/timedot), and this can't be overridden; it will be
fixed later.
Experimental; testing and feedback welcome.
2020-02-29 20:17:39 +03:00
|
|
|
readJournal iopts mpath txt = do
|
2020-03-01 21:16:52 +03:00
|
|
|
let r :: Reader IO =
|
|
|
|
fromMaybe JournalReader.reader $ findReader (mformat_ iopts) mpath
|
2020-07-03 21:37:01 +03:00
|
|
|
dbg6IO "trying reader" (rFormat r)
|
2020-03-01 21:16:52 +03:00
|
|
|
(runExceptT . (rReadFn r) iopts (fromMaybe "(string)" mpath)) txt
|
2016-05-18 05:46:54 +03:00
|
|
|
|
2016-11-18 07:20:07 +03:00
|
|
|
-- | Read the default journal file specified by the environment, or raise an error.
|
|
|
|
defaultJournal :: IO Journal
|
2021-07-04 12:44:35 +03:00
|
|
|
defaultJournal = defaultJournalPath >>= readJournalFile definputopts >>= either error' return -- PARTIAL:
|
2016-05-18 05:46:54 +03:00
|
|
|
|
2016-11-18 07:20:07 +03:00
|
|
|
-- | Get the default journal file path specified by the environment.
|
|
|
|
-- Like ledger, we look first for the LEDGER_FILE environment
|
|
|
|
-- variable, and if that does not exist, for the legacy LEDGER
|
|
|
|
-- environment variable. If neither is set, or the value is blank,
|
|
|
|
-- return the hard-coded default, which is @.hledger.journal@ in the
|
|
|
|
-- users's home directory (or in the current directory, if we cannot
|
|
|
|
-- determine a home directory).
|
|
|
|
defaultJournalPath :: IO String
|
|
|
|
defaultJournalPath = do
|
|
|
|
s <- envJournalPath
|
|
|
|
if null s then defaultJournalPath else return s
|
2016-05-18 05:46:54 +03:00
|
|
|
where
|
2016-11-18 07:20:07 +03:00
|
|
|
envJournalPath =
|
|
|
|
getEnv journalEnvVar
|
|
|
|
`C.catch` (\(_::C.IOException) -> getEnv journalEnvVar2
|
|
|
|
`C.catch` (\(_::C.IOException) -> return ""))
|
|
|
|
defaultJournalPath = do
|
|
|
|
home <- getHomeDirectory `C.catch` (\(_::C.IOException) -> return "")
|
|
|
|
return $ home </> journalDefaultFilename
|
2016-05-18 05:46:54 +03:00
|
|
|
|
2020-02-28 10:51:54 +03:00
|
|
|
-- | A file path optionally prefixed by a reader name and colon
|
|
|
|
-- (journal:, csv:, timedot:, etc.).
|
|
|
|
type PrefixedFilePath = FilePath
|
|
|
|
|
|
|
|
-- | Read a Journal from each specified file path and combine them into one.
|
|
|
|
-- Or, return the first error message.
|
|
|
|
--
|
|
|
|
-- Combining Journals means concatenating them, basically.
|
|
|
|
-- The parse state resets at the start of each file, which means that
|
|
|
|
-- directives & aliases do not affect subsequent sibling or parent files.
|
|
|
|
-- They do affect included child files though.
|
|
|
|
-- Also the final parse state saved in the Journal does span all files.
|
|
|
|
readJournalFiles :: InputOpts -> [PrefixedFilePath] -> IO (Either String Journal)
|
|
|
|
readJournalFiles iopts =
|
2020-02-29 12:54:24 +03:00
|
|
|
fmap (right (maybe def sconcat . nonEmpty) . sequence) . mapM (readJournalFile iopts)
|
2020-02-28 10:51:54 +03:00
|
|
|
|
|
|
|
-- | Read a Journal from this file, or from stdin if the file path is -,
|
|
|
|
-- or return an error message. The file path can have a READER: prefix.
|
|
|
|
--
|
|
|
|
-- The reader (data format) to use is determined from (in priority order):
|
|
|
|
-- the @mformat_@ specified in the input options, if any;
|
|
|
|
-- the file path's READER: prefix, if any;
|
|
|
|
-- a recognised file name extension.
|
lib: drop the file format auto-detection feature
For a long time hledger has auto-detected the file format when it's
not known, eg when reading from a file with unusual extension (like
.dat or .txt), or from standard input (-f-), or when using the include
directive (which currently ignores file extensions).
Auto-detecting has been done by trying all readers until one succeeds.
This could guess wrong in some cases, but it was so rare that it has
been working fine.
Recently, more conveniences have been added to timedot format,
increasing its overlap with journal format, which makes this kind of
auto-detection unreliable.
Auto-detection and auto-detection failures are (probably) still pretty
rare in practice. But when it does happen it's confusing, giving
misleading errors or false successes (eg printing timedot entries
instead of a journal error).
For predictability and to minimise confusion, hledger no longer tries
to guess; when there's no file extension or reader prefix, it assumes
journal format. To specify one of the other formats, you must use a
standard file extension (.timeclock, .timedot, .csv, .ssv, .tsv), or a
reader prefix (-f csv:foo.txt, -f timedot:-).
For now, the include directive still tries to autodetect
(journal/timeclock/timedot), and this can't be overridden; it will be
fixed later.
Experimental; testing and feedback welcome.
2020-02-29 20:17:39 +03:00
|
|
|
-- if none of these identify a known reader, the journal reader is used.
|
2020-02-28 10:51:54 +03:00
|
|
|
--
|
|
|
|
-- The input options can also configure balance assertion checking, automated posting
|
|
|
|
-- generation, a rules file for converting CSV data, etc.
|
|
|
|
readJournalFile :: InputOpts -> PrefixedFilePath -> IO (Either String Journal)
|
|
|
|
readJournalFile iopts prefixedfile = do
|
|
|
|
let
|
|
|
|
(mfmt, f) = splitReaderPrefix prefixedfile
|
2020-02-28 10:31:53 +03:00
|
|
|
iopts' = iopts{mformat_=asum [mfmt, mformat_ iopts]}
|
2020-02-28 10:51:54 +03:00
|
|
|
requireJournalFileExists f
|
|
|
|
t <- readFileOrStdinPortably f
|
|
|
|
-- <- T.readFile f -- or without line ending translation, for testing
|
|
|
|
ej <- readJournal iopts' (Just f) t
|
|
|
|
case ej of
|
|
|
|
Left e -> return $ Left e
|
|
|
|
Right j | new_ iopts -> do
|
|
|
|
ds <- previousLatestDates f
|
|
|
|
let (newj, newds) = journalFilterSinceLatestDates ds j
|
|
|
|
when (new_save_ iopts && not (null newds)) $ saveLatestDates newds f
|
|
|
|
return $ Right newj
|
|
|
|
Right j -> return $ Right j
|
|
|
|
|
2020-03-02 09:00:39 +03:00
|
|
|
--- ** utilities
|
2020-02-28 10:51:54 +03:00
|
|
|
|
2016-11-20 21:30:38 +03:00
|
|
|
-- | If the specified journal file does not exist (and is not "-"),
|
|
|
|
-- give a helpful error and quit.
|
2016-05-18 05:46:54 +03:00
|
|
|
requireJournalFileExists :: FilePath -> IO ()
|
|
|
|
requireJournalFileExists "-" = return ()
|
|
|
|
requireJournalFileExists f = do
|
|
|
|
exists <- doesFileExist f
|
lib: textification: parse stream
10% more allocation, but 35% lower maximum residency, and slightly quicker.
hledger -f data/100x100x10.journal stats
<<ghc: 39327768 bytes, 77 GCs, 196834/269496 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.010 elapsed), 0.020 MUT (0.092 elapsed), 0.014 GC (0.119 elapsed) :ghc>>
<<ghc: 42842136 bytes, 84 GCs, 194010/270912 avg/max bytes residency (3 samples), 2M in use, 0.000 INIT (0.009 elapsed), 0.016 MUT (0.029 elapsed), 0.012 GC (0.120 elapsed) :ghc>>
hledger -f data/1000x1000x10.journal stats
<<ghc: 314291440 bytes, 612 GCs, 2070776/6628048 avg/max bytes residency (7 samples), 16M in use, 0.000 INIT (0.000 elapsed), 0.128 MUT (0.144 elapsed), 0.059 GC (0.070 elapsed) :ghc>>
<<ghc: 349558872 bytes, 681 GCs, 1397597/4106384 avg/max bytes residency (7 samples), 11M in use, 0.000 INIT (0.004 elapsed), 0.124 MUT (0.133 elapsed), 0.047 GC (0.053 elapsed) :ghc>>
hledger -f data/10000x1000x10.journal stats
<<ghc: 3070026824 bytes, 5973 GCs, 12698030/62951784 avg/max bytes residency (10 samples), 124M in use, 0.000 INIT (0.002 elapsed), 1.268 MUT (1.354 elapsed), 0.514 GC (0.587 elapsed) :ghc>>
<<ghc: 3424013128 bytes, 6658 GCs, 11405501/41071624 avg/max bytes residency (11 samples), 111M in use, 0.000 INIT (0.001 elapsed), 1.343 MUT (1.406 elapsed), 0.511 GC (0.573 elapsed) :ghc>>
hledger -f data/100000x1000x10.journal stats
<<ghc: 30753387392 bytes, 59811 GCs, 117615462/666703600 avg/max bytes residency (14 samples), 1588M in use, 0.000 INIT (0.000 elapsed), 12.068 MUT (12.238 elapsed), 6.015 GC (7.190 elapsed) :ghc>>
<<ghc: 34306530696 bytes, 66727 GCs, 76806196/414629312 avg/max bytes residency (14 samples), 1009M in use, 0.000 INIT (0.010 elapsed), 14.357 MUT (16.370 elapsed), 5.298 GC (6.534 elapsed) :ghc>>
2016-05-25 01:58:23 +03:00
|
|
|
when (not exists) $ do -- XXX might not be a journal file
|
2021-07-13 23:41:51 +03:00
|
|
|
hPutStr stderr $ "The hledger journal file \"" <> f <> "\" was not found.\n"
|
2020-11-05 04:58:04 +03:00
|
|
|
hPutStr stderr "Please create it first, eg with \"hledger add\" or a text editor.\n"
|
|
|
|
hPutStr stderr "Or, specify an existing journal file with -f or LEDGER_FILE.\n"
|
2016-05-18 05:46:54 +03:00
|
|
|
exitFailure
|
|
|
|
|
|
|
|
-- | Ensure there is a journal file at the given path, creating an empty one if needed.
|
2019-06-26 20:19:53 +03:00
|
|
|
-- On Windows, also ensure that the path contains no trailing dots
|
|
|
|
-- which could cause data loss (see 'isWindowsUnsafeDotPath').
|
2016-05-18 05:46:54 +03:00
|
|
|
ensureJournalFileExists :: FilePath -> IO ()
|
|
|
|
ensureJournalFileExists f = do
|
2019-06-26 20:19:53 +03:00
|
|
|
when (os/="mingw32" && isWindowsUnsafeDotPath f) $ do
|
2020-11-05 04:58:04 +03:00
|
|
|
hPutStr stderr $ "Part of file path \"" <> show f <> "\"\n ends with a dot, which is unsafe on Windows; please use a different path.\n"
|
2019-06-26 20:19:53 +03:00
|
|
|
exitFailure
|
2016-05-18 05:46:54 +03:00
|
|
|
exists <- doesFileExist f
|
|
|
|
when (not exists) $ do
|
2020-11-05 04:58:04 +03:00
|
|
|
hPutStr stderr $ "Creating hledger journal file " <> show f <> ".\n"
|
2016-05-18 05:46:54 +03:00
|
|
|
-- note Hledger.Utils.UTF8.* do no line ending conversion on windows,
|
|
|
|
-- we currently require unix line endings on all platforms.
|
2020-11-05 04:58:04 +03:00
|
|
|
newJournalContent >>= T.writeFile f
|
2016-05-18 05:46:54 +03:00
|
|
|
|
2019-06-26 20:19:53 +03:00
|
|
|
-- | Does any part of this path contain non-. characters and end with a . ?
|
|
|
|
-- Such paths are not safe to use on Windows (cf #1056).
|
|
|
|
isWindowsUnsafeDotPath :: FilePath -> Bool
|
|
|
|
isWindowsUnsafeDotPath =
|
|
|
|
not . null .
|
|
|
|
filter (not . all (=='.')) .
|
|
|
|
filter ((=='.').last) .
|
|
|
|
splitDirectories
|
|
|
|
|
2016-05-18 05:46:54 +03:00
|
|
|
-- | Give the content for a new auto-created journal file.
|
2020-11-05 04:58:04 +03:00
|
|
|
newJournalContent :: IO Text
|
2016-05-18 05:46:54 +03:00
|
|
|
newJournalContent = do
|
|
|
|
d <- getCurrentDay
|
2020-11-05 04:58:04 +03:00
|
|
|
return $ "; journal created " <> T.pack (show d) <> " by hledger\n"
|
2016-05-18 05:46:54 +03:00
|
|
|
|
2017-09-17 23:26:42 +03:00
|
|
|
-- A "LatestDates" is zero or more copies of the same date,
|
|
|
|
-- representing the latest transaction date read from a file,
|
|
|
|
-- and how many transactions there were on that date.
|
|
|
|
type LatestDates = [Day]
|
2017-09-15 19:55:17 +03:00
|
|
|
|
|
|
|
-- | Get all instances of the latest date in an unsorted list of dates.
|
|
|
|
-- Ie, if the latest date appears once, return it in a one-element list,
|
|
|
|
-- if it appears three times (anywhere), return three of it.
|
2017-09-17 23:26:42 +03:00
|
|
|
latestDates :: [Day] -> LatestDates
|
2017-09-15 19:55:17 +03:00
|
|
|
latestDates = headDef [] . take 1 . group . reverse . sort
|
|
|
|
|
2017-09-17 23:26:42 +03:00
|
|
|
-- | Remember that these transaction dates were the latest seen when
|
|
|
|
-- reading this journal file.
|
2019-07-15 13:28:52 +03:00
|
|
|
saveLatestDates :: LatestDates -> FilePath -> IO ()
|
2020-11-05 04:58:04 +03:00
|
|
|
saveLatestDates dates f = T.writeFile (latestDatesFileFor f) $ T.unlines $ map showDate dates
|
2017-09-15 19:55:17 +03:00
|
|
|
|
2019-07-15 13:28:52 +03:00
|
|
|
-- | What were the latest transaction dates seen the last time this
|
2017-09-15 19:55:17 +03:00
|
|
|
-- journal file was read ? If there were multiple transactions on the
|
|
|
|
-- latest date, that number of dates is returned, otherwise just one.
|
2019-07-15 13:28:52 +03:00
|
|
|
-- Or none if no transactions were read, or if latest dates info is not
|
2017-09-17 23:26:42 +03:00
|
|
|
-- available for this file.
|
|
|
|
previousLatestDates :: FilePath -> IO LatestDates
|
|
|
|
previousLatestDates f = do
|
|
|
|
let latestfile = latestDatesFileFor f
|
2020-08-26 11:11:20 +03:00
|
|
|
parsedate s = maybe (fail $ "could not parse date \"" ++ s ++ "\"") return $
|
|
|
|
parsedateM s
|
2017-09-17 23:26:42 +03:00
|
|
|
exists <- doesFileExist latestfile
|
2017-09-15 19:55:17 +03:00
|
|
|
if exists
|
2020-08-26 11:11:20 +03:00
|
|
|
then traverse (parsedate . T.unpack . T.strip) . T.lines =<< readFileStrictly latestfile
|
2017-09-15 19:55:17 +03:00
|
|
|
else return []
|
|
|
|
|
2017-09-17 23:26:42 +03:00
|
|
|
-- | Where to save latest transaction dates for the given file path.
|
|
|
|
-- (.latest.FILE)
|
|
|
|
latestDatesFileFor :: FilePath -> FilePath
|
|
|
|
latestDatesFileFor f = dir </> ".latest" <.> fname
|
|
|
|
where
|
|
|
|
(dir, fname) = splitFileName f
|
|
|
|
|
2017-09-15 19:55:17 +03:00
|
|
|
readFileStrictly :: FilePath -> IO Text
|
2018-01-05 03:17:25 +03:00
|
|
|
readFileStrictly f = readFilePortably f >>= \t -> C.evaluate (T.length t) >> return t
|
2017-09-15 19:55:17 +03:00
|
|
|
|
2017-09-17 23:26:42 +03:00
|
|
|
-- | Given zero or more latest dates (all the same, representing the
|
|
|
|
-- latest previously seen transaction date, and how many transactions
|
|
|
|
-- were seen on that date), remove transactions with earlier dates
|
|
|
|
-- from the journal, and the same number of transactions on the
|
|
|
|
-- latest date, if any, leaving only transactions that we can assume
|
|
|
|
-- are newer. Also returns the new latest dates of the new journal.
|
|
|
|
journalFilterSinceLatestDates :: LatestDates -> Journal -> (Journal, LatestDates)
|
|
|
|
journalFilterSinceLatestDates [] j = (j, latestDates $ map tdate $ jtxns j)
|
|
|
|
journalFilterSinceLatestDates ds@(d:_) j = (j', ds')
|
|
|
|
where
|
|
|
|
samedateorlaterts = filter ((>= d).tdate) $ jtxns j
|
|
|
|
(samedatets, laterts) = span ((== d).tdate) $ sortBy (comparing tdate) samedateorlaterts
|
|
|
|
newsamedatets = drop (length ds) samedatets
|
|
|
|
j' = j{jtxns=newsamedatets++laterts}
|
|
|
|
ds' = latestDates $ map tdate $ samedatets++laterts
|
2017-09-15 03:41:42 +03:00
|
|
|
|
2020-03-02 09:00:39 +03:00
|
|
|
--- ** tests
|
2009-04-04 12:50:36 +04:00
|
|
|
|
2018-09-06 23:08:26 +03:00
|
|
|
tests_Read = tests "Read" [
|
|
|
|
tests_Common
|
|
|
|
,tests_CsvReader
|
|
|
|
,tests_JournalReader
|
2018-08-15 21:43:29 +03:00
|
|
|
]
|