lib, cli: allow a READER: prefix on data file paths

This provides a way to override the file format detection logic,
useful eg for files with wrong extensions or standard input.
This commit is contained in:
Simon Michael 2016-11-20 10:30:38 -08:00
parent 7d56af5a03
commit 98ef8f9a60
6 changed files with 611 additions and 493 deletions

View File

@ -11,12 +11,14 @@ to import modules below this one.
module Hledger.Read (
-- * Journal files
PrefixedFilePath,
defaultJournal,
defaultJournalPath,
readJournalFiles,
readJournalFile,
requireJournalFileExists,
ensureJournalFileExists,
splitReaderPrefix,
-- * Journal parsing
readJournal,
@ -33,6 +35,7 @@ module Hledger.Read (
) where
import Control.Applicative ((<|>))
import qualified Control.Exception as C
import Control.Monad.Except
import Data.List
@ -61,6 +64,10 @@ import Prelude hiding (getContents, writeFile)
import Hledger.Utils.UTF8IOCompat (writeFile)
journalEnvVar = "LEDGER_FILE"
journalEnvVar2 = "LEDGER"
journalDefaultFilename = ".hledger.journal"
-- The available journal readers, each one handling a particular data format.
readers :: [Reader]
readers = [
@ -71,9 +78,12 @@ readers = [
,LedgerReader.reader
]
journalEnvVar = "LEDGER_FILE"
journalEnvVar2 = "LEDGER"
journalDefaultFilename = ".hledger.journal"
readerNames :: [String]
readerNames = map rFormat readers
-- | A file path optionally prefixed by a reader name and colon
-- (journal:, csv:, timedot:, etc.).
type PrefixedFilePath = FilePath
-- | Read the default journal file specified by the environment, or raise an error.
defaultJournal :: IO Journal
@ -99,34 +109,58 @@ defaultJournalPath = do
home <- getHomeDirectory `C.catch` (\(_::C.IOException) -> return "")
return $ home </> journalDefaultFilename
-- | @readJournalFiles mformat mrulesfile assrt fs@
-- | @readJournalFiles mformat mrulesfile assrt prefixedfiles@
--
-- Call readJournalFile on each specified file path, and combine the
-- resulting journals into one. If there are any errors, the first is
-- returned, otherwise they are combined per Journal's monoid instance
-- (concatenated, basically). Parse context (eg directives & aliases)
-- is not maintained across file boundaries, it resets at the start of
-- each file (though the final parse state saved in the resulting
-- journal is the combination of parse states from all files).
readJournalFiles :: Maybe StorageFormat -> Maybe FilePath -> Bool -> [FilePath] -> IO (Either String Journal)
readJournalFiles mformat mrulesfile assrt fs = do
-- Read a Journal from each specified file path and combine them into one.
-- Or, return the first error message.
--
-- Combining Journals means concatenating them, basically.
-- The parse state resets at the start of each file, which means that
-- directives & aliases do not cross file boundaries.
-- (The final parse state saved in the Journal does span all files, however.)
--
-- As with readJournalFile,
-- file paths can optionally have a READER: prefix,
-- and the @mformat@, @mrulesfile, and @assrt@ arguments are supported
-- (and these are applied to all files).
--
readJournalFiles :: Maybe StorageFormat -> Maybe FilePath -> Bool -> [PrefixedFilePath] -> IO (Either String Journal)
readJournalFiles mformat mrulesfile assrt prefixedfiles = do
(either Left (Right . mconcat) . sequence)
<$> mapM (readJournalFile mformat mrulesfile assrt) fs
<$> mapM (readJournalFile mformat mrulesfile assrt) prefixedfiles
-- | @readJournalFile mformat mrulesfile assrt f@
-- | @readJournalFile mformat mrulesfile assrt prefixedfile@
--
-- Read a Journal from this file (or stdin if the file path is -).
-- Assume the specified data format, or a format identified from the file path,
-- or try all readers.
-- A CSV conversion rules file may be specified for better conversion of CSV.
-- Also optionally check any balance assertions in the journal.
-- If parsing or balance assertions fail, return an error message instead.
readJournalFile :: Maybe StorageFormat -> Maybe FilePath -> Bool -> FilePath -> IO (Either String Journal)
readJournalFile mformat mrulesfile assrt f = do
-- Read a Journal from this file, or from stdin if the file path is -,
-- or return an error message. The file path can have a READER: prefix.
--
-- The reader (data format) is chosen based on (in priority order):
-- the @mformat@ argument;
-- the file path's READER: prefix, if any;
-- a recognised file name extension (in readJournal);
-- if none of these identify a known reader, all built-in readers are tried in turn.
--
-- A CSV conversion rules file (@mrulesfiles@) can be specified to help convert CSV data.
--
-- Optionally, any balance assertions in the journal can be checked (@assrt@).
--
readJournalFile :: Maybe StorageFormat -> Maybe FilePath -> Bool -> PrefixedFilePath -> IO (Either String Journal)
readJournalFile mformat mrulesfile assrt prefixedfile = do
let
(mprefixformat, f) = splitReaderPrefix prefixedfile
mfmt = mformat <|> mprefixformat
requireJournalFileExists f
readFileOrStdinAnyLineEnding f >>= readJournal mformat mrulesfile assrt (Just f)
readFileOrStdinAnyLineEnding f >>= readJournal mfmt mrulesfile assrt (Just f)
-- | If the specified journal file does not exist, give a helpful error and quit.
-- | If a filepath is prefixed by one of the reader names and a colon,
-- split that off. Eg "csv:-" -> (Just "csv", "-").
splitReaderPrefix :: PrefixedFilePath -> (Maybe String, FilePath)
splitReaderPrefix f =
headDef (Nothing, f)
[(Just r, drop (length r + 1) f) | r <- readerNames, (r++":") `isPrefixOf` f]
-- | If the specified journal file does not exist (and is not "-"),
-- give a helpful error and quit.
requireJournalFileExists :: FilePath -> IO ()
requireJournalFileExists "-" = return ()
requireJournalFileExists f = do
@ -153,7 +187,7 @@ newJournalContent = do
d <- getCurrentDay
return $ printf "; journal created %s by hledger\n" (show d)
-- | Read a journal from the given text, trying all known formats, or simply throw an error.
-- | Read a Journal from the given text trying all readers in turn, or throw an error.
readJournal' :: Text -> IO Journal
readJournal' t = readJournal Nothing Nothing True Nothing t >>= either error' return
@ -163,30 +197,42 @@ tests_readJournal' = [
assertBool "" True
]
-- | @readJournal mformat mrulesfile assrt mpath t@
-- | @readJournal mformat mrulesfile assrt mfile txt@
--
-- Read a Journal from some text, or return an error message.
--
-- The reader (data format) is chosen based on (in priority order):
-- the @mformat@ argument;
-- a recognised file name extension in @mfile@ (if provided).
-- If none of these identify a known reader, all built-in readers are tried in turn
-- (returning the first one's error message if none of them succeed).
--
-- A CSV conversion rules file (@mrulesfiles@) can be specified to help convert CSV data.
--
-- Optionally, any balance assertions in the journal can be checked (@assrt@).
--
-- Try to read a Journal from some text.
-- If a format is specified (mformat), try only that reader.
-- Otherwise if the file path is provided (mpath), and it specifies a format, try only that reader.
-- Otherwise try all readers in turn until one succeeds, or return the first error if none of them succeed.
-- A CSV conversion rules file may be specified (mrulesfile) for use by the CSV reader.
-- If the assrt flag is true, also check and enforce balance assertions in the journal.
readJournal :: Maybe StorageFormat -> Maybe FilePath -> Bool -> Maybe FilePath -> Text -> IO (Either String Journal)
readJournal mformat mrulesfile assrt mpath t =
let rs = maybe readers (:[]) $ findReader mformat mpath
in tryReaders rs mrulesfile assrt mpath t
readJournal mformat mrulesfile assrt mfile txt =
let
rs = maybe readers (:[]) $ findReader mformat mfile
in
tryReaders rs mrulesfile assrt mfile txt
-- | @findReader mformat mpath@
--
-- Find the reader for the given format (mformat), if any.
-- Or if no format is provided, find the first reader that handles the
-- file name's extension, if any.
-- Find the reader named by @mformat@, if provided.
-- Or, if a file path is provided, find the first reader that handles
-- its file extension, if any.
findReader :: Maybe StorageFormat -> Maybe FilePath -> Maybe Reader
findReader Nothing Nothing = Nothing
findReader (Just fmt) _ = headMay [r | r <- readers, fmt == rFormat r]
findReader Nothing (Just path) = headMay [r | r <- readers, ext `elem` rExtensions r]
findReader (Just fmt) _ = headMay [r | r <- readers, rFormat r == fmt]
findReader Nothing (Just path) =
case prefix of
Just fmt -> headMay [r | r <- readers, rFormat r == fmt]
Nothing -> headMay [r | r <- readers, ext `elem` rExtensions r]
where
ext = drop 1 $ takeExtension path
(prefix,path') = splitReaderPrefix path
ext = drop 1 $ takeExtension path'
-- | @tryReaders readers mrulesfile assrt path t@
--

View File

@ -397,14 +397,22 @@ aliasesFromOpts = map (\a -> fromparse $ runParser accountaliasp ("--alias "++qu
-- 1. options, 2. an environment variable, or 3. the default.
-- Actually, returns one or more file paths. There will be more
-- than one if multiple -f options were provided.
-- File paths can have a READER: prefix naming a reader/data format.
journalFilePathFromOpts :: CliOpts -> IO [String]
journalFilePathFromOpts opts = do
f <- defaultJournalPath
d <- getCurrentDirectory
mapM (expandPath d) $ ifEmpty (file_ opts) [f]
where
ifEmpty [] d = d
ifEmpty l _ = l
case file_ opts of
[] -> return [f]
fs -> mapM (expandPathPreservingPrefix d) fs
expandPathPreservingPrefix :: FilePath -> PrefixedFilePath -> IO PrefixedFilePath
expandPathPreservingPrefix d prefixedf = do
let (p,f) = splitReaderPrefix prefixedf
f' <- expandPath d f
return $ case p of
Just p -> p ++ ":" ++ f'
Nothing -> f'
-- | Get the expanded, absolute output file path from options,
-- or the default (-, meaning stdout).

View File

@ -403,10 +403,6 @@ Eg \-p jan \-p feb is equivalent to \-p feb.
.PP
hledger reads transactions from a data file (and the add command writes
to it).
Usually this is in hledger\[aq]s journal format, but it can also be one
of the other supported file types, such as timeclock, timedot, CSV, or a
C++ Ledger journal (partial support).
.PP
By default this file is \f[C]$HOME/.hledger.journal\f[] (or on Windows,
something like \f[C]C:/Users/USER/.hledger.journal\f[]).
You can override this with the \f[C]$LEDGER_FILE\f[] environment
@ -423,51 +419,10 @@ or with the \f[C]\-f/\-\-file\f[] option:
.IP
.nf
\f[C]
$\ hledger\ \-f\ some/file.ext\ stats
$\ hledger\ \-f\ /some/file\ stats
\f[]
.fi
.PP
hledger tries to identify the file format based on the file extension,
as follows:
.PP
.TS
tab(@);
l l.
T{
File extension:
T}@T{
Use format:
T}
_
T{
\f[C]\&.journal\f[], \f[C]\&.j\f[], \f[C]\&.hledger\f[],
\f[C]\&.ledger\f[]
T}@T{
journal
T}
T{
\f[C]\&.timeclock\f[]
T}@T{
timeclock
T}
T{
\f[C]\&.timedot\f[]
T}@T{
timedot
T}
T{
\f[C]\&.csv\f[]
T}@T{
CSV
T}
.TE
.PP
If the file name has some other extension, or none, hledger tries each
of these formats in turn.
(Plus one more: the experimental "ledger" format, an alternate parser
for C++ Ledger journals, which we try only as a last resort as it\[aq]s
new and hledger\[aq]s journal parser works better for now.)
.PP
The file name \f[C]\-\f[] (hyphen) means standard input, as usual:
.IP
.nf
@ -476,6 +431,89 @@ $\ cat\ some.journal\ |\ hledger\ \-f\-
\f[]
.fi
.PP
Usually this file is in hledger\[aq]s journal format, but it can also be
one of several other formats, shown below.
hledger tries to identify the format based on the file extension, as
follows:
.PP
.TS
tab(@);
l l l.
T{
Format:
T}@T{
Description:
T}@T{
File extensions:
T}
_
T{
journal
T}@T{
hledger\[aq]s journal format
T}@T{
\f[C]\&.journal\f[], \f[C]\&.j\f[], \f[C]\&.hledger\f[],
\f[C]\&.ledger\f[]
T}
T{
timeclock
T}@T{
timeclock files (precise time logging)
T}@T{
\f[C]\&.timeclock\f[]
T}
T{
timedot
T}@T{
timedot files (approximate time logging)
T}@T{
\f[C]\&.timedot\f[]
T}
T{
CSV
T}@T{
comma\-separated values (data interchange)
T}@T{
\f[C]\&.csv\f[]
T}
.TE
.PP
hledger identifies the format based on the file extension if possible.
If that does not identify a known format, it tries each format in turn.
.PP
If needed, eg to ensure correct error messages, you can force a specific
format by prepending it to the file path with a colon.
Examples:
.IP
.nf
\f[C]
$\ hledger\ \-f\ csv:/some/csv\-file.dat\ stats
$\ echo\ \[aq]i\ 2009/13/1\ 08:00:00\[aq]\ |\ hledger\ print\ \-ftimeclock:\-
\f[]
.fi
.PP
Some other experimental formats are available but not yet used by
default:
.PP
.TS
tab(@);
l l l.
T{
Format:
T}@T{
Description:
T}@T{
File extensions:
T}
_
T{
ledger
T}@T{
Ledger\[aq]s journal format (incomplete)
T}@T{
T}
.TE
.PP
You can specify multiple \f[C]\-f\f[] options, to read multiple files as
one big journal.
Directives in one file will not affect subsequent files in this case (if

View File

@ -324,13 +324,9 @@ File: hledger.1.info, Node: Input files, Next: Depth limiting, Prev: Reportin
===============
hledger reads transactions from a data file (and the add command writes
to it). Usually this is in hledger's journal format, but it can also be
one of the other supported file types, such as timeclock, timedot, CSV,
or a C++ Ledger journal (partial support).
By default this file is `$HOME/.hledger.journal' (or on Windows,
something like `C:/Users/USER/.hledger.journal'). You can override this
with the `$LEDGER_FILE' environment variable:
to it). By default this file is `$HOME/.hledger.journal' (or on
Windows, something like `C:/Users/USER/.hledger.journal'). You can
override this with the `$LEDGER_FILE' environment variable:
$ setenv LEDGER_FILE ~/finance/2016.journal
@ -339,29 +335,43 @@ $ hledger stats
or with the `-f/--file' option:
$ hledger -f some/file.ext stats
hledger tries to identify the file format based on the file
extension, as follows:
File extension: Use format:
--------------------------------------------------------
`.journal', `.j', `.hledger', `.ledger' journal
`.timeclock' timeclock
`.timedot' timedot
`.csv' CSV
If the file name has some other extension, or none, hledger tries
each of these formats in turn. (Plus one more: the experimental "ledger"
format, an alternate parser for C++ Ledger journals, which we try only
as a last resort as it's new and hledger's journal parser works better
for now.)
$ hledger -f /some/file stats
The file name `-' (hyphen) means standard input, as usual:
$ cat some.journal | hledger -f-
Usually this file is in hledger's journal format, but it can also be
one of several other formats, shown below. hledger tries to identify the
format based on the file extension, as follows:
Format: Description: File extensions:
--------------------------------------------------------------------------------------------------
journal hledger's journal format `.journal', `.j', `.hledger', `.ledger'
timeclock timeclock files (precise time logging) `.timeclock'
timedot timedot files (approximate time logging) `.timedot'
CSV comma-separated values (data interchange) `.csv'
hledger identifies the format based on the file extension if
possible. If that does not identify a known format, it tries each
format in turn.
If needed, eg to ensure correct error messages, you can force a
specific format by prepending it to the file path with a colon.
Examples:
$ hledger -f csv:/some/csv-file.dat stats
$ echo 'i 2009/13/1 08:00:00' | hledger print -ftimeclock:-
Some other experimental formats are available but not yet used by
default:
Format: Description: File extensions:
--------------------------------------------------------------------
ledger Ledger's journal format (incomplete)
You can specify multiple `-f' options, to read multiple files as one
big journal. Directives in one file will not affect subsequent files in
this case (if you need that, use the include directive instead).
@ -2165,91 +2175,91 @@ Node: Reporting options7583
Ref: #reporting-options7736
Node: Input files9512
Ref: #input-files9652
Node: Depth limiting11233
Ref: #depth-limiting11373
Node: Smart dates11574
Ref: #smart-dates11713
Node: Report intervals12710
Ref: #report-intervals12863
Node: Period expressions13199
Ref: #period-expressions13364
Node: Regular expressions15699
Ref: #regular-expressions15841
Node: QUERIES17324
Ref: #queries17428
Node: COMMANDS21067
Ref: #commands21181
Node: accounts21854
Ref: #accounts21954
Node: activity22936
Ref: #activity23048
Node: add23407
Ref: #add23508
Node: balance26167
Ref: #balance26280
Node: Flat mode29253
Ref: #flat-mode29380
Node: Depth limited balance reports29799
Ref: #depth-limited-balance-reports30002
Node: Multicolumn balance reports30423
Ref: #multicolumn-balance-reports30625
Node: Market value35274
Ref: #market-value35438
Node: Custom balance output35931
Ref: #custom-balance-output36104
Node: Output destination38208
Ref: #output-destination38373
Node: CSV output38643
Ref: #csv-output38762
Node: balancesheet39159
Ref: #balancesheet39287
Node: cashflow39939
Ref: #cashflow40056
Node: help40746
Ref: #help40858
Node: incomestatement41695
Ref: #incomestatement41825
Node: info42552
Ref: #info42659
Node: man43021
Ref: #man43118
Node: print43521
Ref: #print43626
Node: register44972
Ref: #register45085
Node: Custom register output49577
Ref: #custom-register-output49708
Node: stats51005
Ref: #stats51111
Node: test51987
Ref: #test52074
Node: ADD-ON COMMANDS52441
Ref: #add-on-commands52577
Node: api53865
Ref: #api53957
Node: autosync53991
Ref: #autosync54106
Node: diff56421
Ref: #diff56531
Node: equity57195
Ref: #equity57309
Node: interest58637
Ref: #interest58754
Node: irr61838
Ref: #irr61951
Node: print-unique64326
Ref: #print-unique64456
Node: rewrite64714
Ref: #rewrite64833
Node: ui65362
Ref: #ui65462
Node: web65503
Ref: #web65591
Node: TROUBLESHOOTING65624
Ref: #troubleshooting65743
Node: Run-time problems65797
Ref: #run-time-problems65940
Node: Known limitations67884
Ref: #known-limitations68027
Node: Depth limiting11677
Ref: #depth-limiting11817
Node: Smart dates12018
Ref: #smart-dates12157
Node: Report intervals13154
Ref: #report-intervals13307
Node: Period expressions13643
Ref: #period-expressions13808
Node: Regular expressions16143
Ref: #regular-expressions16285
Node: QUERIES17768
Ref: #queries17872
Node: COMMANDS21511
Ref: #commands21625
Node: accounts22298
Ref: #accounts22398
Node: activity23380
Ref: #activity23492
Node: add23851
Ref: #add23952
Node: balance26611
Ref: #balance26724
Node: Flat mode29697
Ref: #flat-mode29824
Node: Depth limited balance reports30243
Ref: #depth-limited-balance-reports30446
Node: Multicolumn balance reports30867
Ref: #multicolumn-balance-reports31069
Node: Market value35718
Ref: #market-value35882
Node: Custom balance output36375
Ref: #custom-balance-output36548
Node: Output destination38652
Ref: #output-destination38817
Node: CSV output39087
Ref: #csv-output39206
Node: balancesheet39603
Ref: #balancesheet39731
Node: cashflow40383
Ref: #cashflow40500
Node: help41190
Ref: #help41302
Node: incomestatement42139
Ref: #incomestatement42269
Node: info42996
Ref: #info43103
Node: man43465
Ref: #man43562
Node: print43965
Ref: #print44070
Node: register45416
Ref: #register45529
Node: Custom register output50021
Ref: #custom-register-output50152
Node: stats51449
Ref: #stats51555
Node: test52431
Ref: #test52518
Node: ADD-ON COMMANDS52885
Ref: #add-on-commands53021
Node: api54309
Ref: #api54401
Node: autosync54435
Ref: #autosync54550
Node: diff56865
Ref: #diff56975
Node: equity57639
Ref: #equity57753
Node: interest59081
Ref: #interest59198
Node: irr62282
Ref: #irr62395
Node: print-unique64770
Ref: #print-unique64900
Node: rewrite65158
Ref: #rewrite65277
Node: ui65806
Ref: #ui65906
Node: web65947
Ref: #web66035
Node: TROUBLESHOOTING66068
Ref: #troubleshooting66187
Node: Run-time problems66241
Ref: #run-time-problems66384
Node: Known limitations68328
Ref: #known-limitations68471

End Tag Table

File diff suppressed because it is too large Load Diff

View File

@ -75,13 +75,6 @@ Eg -p jan -p feb is equivalent to -p feb.
## Input files
hledger reads transactions from a data file (and the add command writes to it).
Usually this is in hledger's journal format,
but it can also be one of the other supported file types, such as
timeclock,
timedot,
CSV,
or a C++ Ledger journal (partial support).
By default this file is `$HOME/.hledger.journal`
(or on Windows, something like `C:/Users/USER/.hledger.journal`).
You can override this with the `$LEDGER_FILE` environment variable:
@ -91,30 +84,41 @@ $ hledger stats
```
or with the `-f/--file` option:
```bash
$ hledger -f some/file.ext stats
$ hledger -f /some/file stats
```
hledger tries to identify the file format based on the file extension,
as follows:
| File extension: | Use format:
|-------------------------------------------|----------------
| `.journal`, `.j`, `.hledger`, `.ledger` | journal
| `.timeclock` | timeclock
| `.timedot` | timedot
| `.csv` | CSV
If the file name has some other extension, or none,
hledger tries each of these formats in turn.
(Plus one more: the experimental "ledger" format, an alternate
parser for C++ Ledger journals, which we try only as a last resort
as it's new and hledger's journal parser works better for now.)
The file name `-` (hyphen) means standard input, as usual:
```bash
$ cat some.journal | hledger -f-
```
Usually this file is in hledger's journal format,
but it can also be one of several other formats, shown below.
hledger tries to identify the format based on the file extension, as follows:
| Format: | Description: | File extensions:
|------------|---------------------------------------------|-------------------------------------------
| journal | hledger's journal format | `.journal`, `.j`, `.hledger`, `.ledger`
| timeclock | timeclock files (precise time logging) | `.timeclock`
| timedot | timedot files (approximate time logging) | `.timedot`
| CSV | comma-separated values (data interchange) | `.csv`
hledger identifies the format based on the file extension if possible.
If that does not identify a known format, it tries each format in turn.
If needed, eg to ensure correct error messages, you can force a specific format
by prepending it to the file path with a colon. Examples:
```bash
$ hledger -f csv:/some/csv-file.dat stats
$ echo 'i 2009/13/1 08:00:00' | hledger print -ftimeclock:-
```
Some other experimental formats are available but not yet used by default:
| Format: | Description: | File extensions:
|------------|---------------------------------------------|-------------------------------------------
| ledger | Ledger's journal format (incomplete) |
You can specify multiple `-f` options, to read multiple files as one big journal.
Directives in one file will not affect subsequent files in this case (if you need that,
use the [include directive](#including-other-files) instead).