mirror of
https://github.com/simonmichael/hledger.git
synced 2024-09-20 10:47:29 +03:00
514f015849
This patch replaces the strings used in the Entry, TimeLogEntry, and Transaction records with real types. Rather than use the inbuild system date and time types directly, two custom types have been implemented that wrap UTCTime: Date and DateTime. A minimal API for these has been added.
480 lines
16 KiB
Haskell
480 lines
16 KiB
Haskell
{-|
|
||
|
||
Parsers for standard ledger and timelog files.
|
||
|
||
-}
|
||
|
||
module Ledger.Parse
|
||
where
|
||
import Text.ParserCombinators.Parsec
|
||
import Text.ParserCombinators.Parsec.Char
|
||
import Text.ParserCombinators.Parsec.Language
|
||
import Text.ParserCombinators.Parsec.Combinator
|
||
import qualified Text.ParserCombinators.Parsec.Token as P
|
||
import System.IO
|
||
import qualified Data.Map as Map
|
||
import Ledger.Utils
|
||
import Ledger.Types
|
||
import Ledger.Amount
|
||
import Ledger.Entry
|
||
import Ledger.Commodity
|
||
import Ledger.TimeLog
|
||
import Data.Time.LocalTime
|
||
import Data.Time.Calendar
|
||
|
||
|
||
-- utils
|
||
|
||
parseLedgerFile :: String -> IO (Either ParseError RawLedger)
|
||
parseLedgerFile "-" = fmap (parse ledgerfile "-") $ hGetContents stdin
|
||
parseLedgerFile f = parseFromFile ledgerfile f
|
||
|
||
printParseError :: (Show a) => a -> IO ()
|
||
printParseError e = do putStr "ledger parse error at "; print e
|
||
|
||
-- set up token parsing, though we're not yet using these much
|
||
ledgerLanguageDef = LanguageDef {
|
||
commentStart = ""
|
||
, commentEnd = ""
|
||
, commentLine = ";"
|
||
, nestedComments = False
|
||
, identStart = letter <|> char '_'
|
||
, identLetter = alphaNum <|> oneOf "_':"
|
||
, opStart = opLetter emptyDef
|
||
, opLetter = oneOf "!#$%&*+./<=>?@\\^|-~"
|
||
, reservedOpNames= []
|
||
, reservedNames = []
|
||
, caseSensitive = False
|
||
}
|
||
lexer = P.makeTokenParser ledgerLanguageDef
|
||
whiteSpace = P.whiteSpace lexer
|
||
lexeme = P.lexeme lexer
|
||
--symbol = P.symbol lexer
|
||
natural = P.natural lexer
|
||
parens = P.parens lexer
|
||
semi = P.semi lexer
|
||
identifier = P.identifier lexer
|
||
reserved = P.reserved lexer
|
||
reservedOp = P.reservedOp lexer
|
||
|
||
-- parsers
|
||
|
||
-- | Parse a RawLedger from either a ledger file or a timelog file.
|
||
-- It tries first the timelog parser then the ledger parser; this means
|
||
-- parse errors for ledgers are useful while those for timelogs are not.
|
||
ledgerfile :: Parser RawLedger
|
||
ledgerfile = try ledgerfromtimelog <|> ledger
|
||
|
||
{-| Parse a ledger file. Here is the ledger grammar from the ledger 2.5 manual:
|
||
|
||
@
|
||
The ledger file format is quite simple, but also very flexible. It supports
|
||
many options, though typically the user can ignore most of them. They are
|
||
summarized below. The initial character of each line determines what the
|
||
line means, and how it should be interpreted. Allowable initial characters
|
||
are:
|
||
|
||
NUMBER A line beginning with a number denotes an entry. It may be followed by any
|
||
number of lines, each beginning with whitespace, to denote the entry’s account
|
||
transactions. The format of the first line is:
|
||
|
||
DATE[=EDATE] [*|!] [(CODE)] DESC
|
||
|
||
If ‘*’ appears after the date (with optional effective date), it indicates the entry
|
||
is “cleared”, which can mean whatever the user wants it t omean. If ‘!’ appears
|
||
after the date, it indicates d the entry is “pending”; i.e., tentatively cleared from
|
||
the user’s point of view, but not yet actually cleared. If a ‘CODE’ appears in
|
||
parentheses, it may be used to indicate a check number, or the type of the
|
||
transaction. Following these is the payee, or a description of the transaction.
|
||
The format of each following transaction is:
|
||
|
||
ACCOUNT AMOUNT [; NOTE]
|
||
|
||
The ‘ACCOUNT’ may be surrounded by parentheses if it is a virtual
|
||
transactions, or square brackets if it is a virtual transactions that must
|
||
balance. The ‘AMOUNT’ can be followed by a per-unit transaction cost,
|
||
by specifying ‘ AMOUNT’, or a complete transaction cost with ‘\@ AMOUNT’.
|
||
Lastly, the ‘NOTE’ may specify an actual and/or effective date for the
|
||
transaction by using the syntax ‘[ACTUAL_DATE]’ or ‘[=EFFECTIVE_DATE]’ or
|
||
‘[ACTUAL_DATE=EFFECtIVE_DATE]’.
|
||
|
||
= An automated entry. A value expression must appear after the equal sign.
|
||
After this initial line there should be a set of one or more transactions, just as
|
||
if it were normal entry. If the amounts of the transactions have no commodity,
|
||
they will be applied as modifiers to whichever real transaction is matched by
|
||
the value expression.
|
||
|
||
~ A period entry. A period expression must appear after the tilde.
|
||
After this initial line there should be a set of one or more transactions, just as
|
||
if it were normal entry.
|
||
|
||
! A line beginning with an exclamation mark denotes a command directive. It
|
||
must be immediately followed by the command word. The supported commands
|
||
are:
|
||
|
||
‘!include’
|
||
Include the stated ledger file.
|
||
‘!account’
|
||
The account name is given is taken to be the parent of all transac-
|
||
tions that follow, until ‘!end’ is seen.
|
||
‘!end’ Ends an account block.
|
||
|
||
; A line beginning with a colon indicates a comment, and is ignored.
|
||
|
||
Y If a line begins with a capital Y, it denotes the year used for all subsequent
|
||
entries that give a date without a year. The year should appear immediately
|
||
after the Y, for example: ‘Y2004’. This is useful at the beginning of a file, to
|
||
specify the year for that file. If all entries specify a year, however, this command
|
||
has no effect.
|
||
|
||
|
||
P Specifies a historical price for a commodity. These are usually found in a pricing
|
||
history file (see the ‘-Q’ option). The syntax is:
|
||
|
||
P DATE SYMBOL PRICE
|
||
|
||
N SYMBOL Indicates that pricing information is to be ignored for a given symbol, nor will
|
||
quotes ever be downloaded for that symbol. Useful with a home currency, such
|
||
as the dollar ($). It is recommended that these pricing options be set in the price
|
||
database file, which defaults to ‘~/.pricedb’. The syntax for this command is:
|
||
|
||
N SYMBOL
|
||
|
||
|
||
D AMOUNT Specifies the default commodity to use, by specifying an amount in the expected
|
||
format. The entry command will use this commodity as the default when none
|
||
other can be determined. This command may be used multiple times, to set
|
||
the default flags for different commodities; whichever is seen last is used as the
|
||
default commodity. For example, to set US dollars as the default commodity,
|
||
while also setting the thousands flag and decimal flag for that commodity, use:
|
||
|
||
D $1,000.00
|
||
|
||
C AMOUNT1 = AMOUNT2
|
||
Specifies a commodity conversion, where the first amount is given to be equiv-
|
||
alent to the second amount. The first amount should use the decimal precision
|
||
desired during reporting:
|
||
|
||
C 1.00 Kb = 1024 bytes
|
||
|
||
i, o, b, h
|
||
These four relate to timeclock support, which permits ledger to read timelog
|
||
files. See the timeclock’s documentation for more info on the syntax of its
|
||
timelog files.
|
||
@
|
||
|
||
See "Tests" for sample data.
|
||
-}
|
||
ledger :: Parser RawLedger
|
||
ledger = do
|
||
-- we expect these to come first, unlike ledger
|
||
modifier_entries <- many ledgermodifierentry
|
||
periodic_entries <- many ledgerperiodicentry
|
||
|
||
entries <- (many ledgerentry) <?> "entry"
|
||
final_comment_lines <- ledgernondatalines
|
||
eof
|
||
return $ RawLedger modifier_entries periodic_entries entries (unlines final_comment_lines)
|
||
|
||
ledgernondatalines :: Parser [String]
|
||
ledgernondatalines = many (ledgerdirective <|> -- treat as comments
|
||
commentline <|>
|
||
blankline)
|
||
|
||
ledgerdirective :: Parser String
|
||
ledgerdirective = char '!' >> restofline <?> "directive"
|
||
|
||
blankline :: Parser String
|
||
blankline =
|
||
do {s <- many1 spacenonewline; newline; return s} <|>
|
||
do {newline; return ""} <?> "blank line"
|
||
|
||
commentline :: Parser String
|
||
commentline = do
|
||
char ';' <?> "comment line"
|
||
l <- restofline
|
||
return $ ";" ++ l
|
||
|
||
ledgercomment :: Parser String
|
||
ledgercomment =
|
||
try (do
|
||
char ';'
|
||
many spacenonewline
|
||
many (noneOf "\n")
|
||
)
|
||
<|> return "" <?> "comment"
|
||
|
||
ledgermodifierentry :: Parser ModifierEntry
|
||
ledgermodifierentry = do
|
||
char '=' <?> "entry"
|
||
many spacenonewline
|
||
valueexpr <- restofline
|
||
transactions <- ledgertransactions
|
||
return (ModifierEntry valueexpr transactions)
|
||
|
||
ledgerperiodicentry :: Parser PeriodicEntry
|
||
ledgerperiodicentry = do
|
||
char '~' <?> "entry"
|
||
many spacenonewline
|
||
periodexpr <- restofline
|
||
transactions <- ledgertransactions
|
||
return (PeriodicEntry periodexpr transactions)
|
||
|
||
ledgerentry :: Parser Entry
|
||
ledgerentry = do
|
||
preceding <- ledgernondatalines
|
||
date <- ledgerdate <?> "entry"
|
||
status <- ledgerstatus
|
||
code <- ledgercode
|
||
-- ledger treats entry comments as part of the description, we will too
|
||
-- desc <- many (noneOf ";\n") <?> "description"
|
||
-- let description = reverse $ dropWhile (==' ') $ reverse desc
|
||
description <- many (noneOf "\n") <?> "description"
|
||
comment <- ledgercomment
|
||
restofline
|
||
transactions <- ledgertransactions
|
||
return $ balanceEntry $ Entry date status code description comment transactions (unlines preceding)
|
||
|
||
ledgerday :: Parser Day
|
||
ledgerday = do
|
||
y <- many1 digit
|
||
char '/'
|
||
m <- many1 digit
|
||
char '/'
|
||
d <- many1 digit
|
||
many1 spacenonewline
|
||
return (fromGregorian (read y) (read m) (read d))
|
||
|
||
ledgerdate :: Parser Date
|
||
ledgerdate = fmap mkDate ledgerday
|
||
|
||
ledgerdatetime :: Parser DateTime
|
||
ledgerdatetime = do
|
||
day <- ledgerday
|
||
h <- many1 digit
|
||
char ':'
|
||
m <- many1 digit
|
||
many1 spacenonewline
|
||
return (mkDateTime day (TimeOfDay (read h) (read m) 0))
|
||
|
||
|
||
ledgerstatus :: Parser Bool
|
||
ledgerstatus = try (do { char '*'; many1 spacenonewline; return True } ) <|> return False
|
||
|
||
ledgercode :: Parser String
|
||
ledgercode = try (do { char '('; code <- anyChar `manyTill` char ')'; many1 spacenonewline; return code } ) <|> return ""
|
||
|
||
ledgertransactions :: Parser [RawTransaction]
|
||
ledgertransactions =
|
||
((try virtualtransaction <|> try balancedvirtualtransaction <|> ledgertransaction) <?> "transaction")
|
||
`manyTill` (do {newline <?> "blank line"; return ()} <|> eof)
|
||
|
||
ledgertransaction :: Parser RawTransaction
|
||
ledgertransaction = do
|
||
many1 spacenonewline
|
||
account <- ledgeraccountname
|
||
amount <- transactionamount
|
||
many spacenonewline
|
||
comment <- ledgercomment
|
||
restofline
|
||
return (RawTransaction account amount comment RegularTransaction)
|
||
|
||
virtualtransaction :: Parser RawTransaction
|
||
virtualtransaction = do
|
||
many1 spacenonewline
|
||
char '('
|
||
account <- ledgeraccountname
|
||
char ')'
|
||
amount <- transactionamount
|
||
many spacenonewline
|
||
comment <- ledgercomment
|
||
restofline
|
||
return (RawTransaction account amount comment VirtualTransaction)
|
||
|
||
balancedvirtualtransaction :: Parser RawTransaction
|
||
balancedvirtualtransaction = do
|
||
many1 spacenonewline
|
||
char '['
|
||
account <- ledgeraccountname
|
||
char ']'
|
||
amount <- transactionamount
|
||
many spacenonewline
|
||
comment <- ledgercomment
|
||
restofline
|
||
return (RawTransaction account amount comment BalancedVirtualTransaction)
|
||
|
||
-- | account names may have single spaces inside them, and are terminated by two or more spaces
|
||
ledgeraccountname :: Parser String
|
||
ledgeraccountname = do
|
||
accountname <- many1 (accountnamechar <|> singlespace)
|
||
return $ striptrailingspace accountname
|
||
where
|
||
singlespace = try (do {spacenonewline; do {notFollowedBy spacenonewline; return ' '}})
|
||
-- couldn't avoid consuming a final space sometimes, harmless
|
||
striptrailingspace s = if last s == ' ' then init s else s
|
||
|
||
accountnamechar = notFollowedBy (oneOf "()[]") >> nonspace
|
||
<?> "account name character (non-bracket, non-parenthesis, non-whitespace)"
|
||
|
||
transactionamount :: Parser MixedAmount
|
||
transactionamount =
|
||
try (do
|
||
many1 spacenonewline
|
||
a <- someamount <|> return missingamt
|
||
return a
|
||
) <|> return missingamt
|
||
|
||
someamount = try leftsymbolamount <|> try rightsymbolamount <|> nosymbolamount
|
||
|
||
leftsymbolamount :: Parser MixedAmount
|
||
leftsymbolamount = do
|
||
sym <- commoditysymbol
|
||
sp <- many spacenonewline
|
||
(q,p,comma) <- amountquantity
|
||
pri <- priceamount
|
||
let c = Commodity {symbol=sym,side=L,spaced=not $ null sp,comma=comma,precision=p}
|
||
return $ Mixed [Amount c q pri]
|
||
<?> "left-symbol amount"
|
||
|
||
rightsymbolamount :: Parser MixedAmount
|
||
rightsymbolamount = do
|
||
(q,p,comma) <- amountquantity
|
||
sp <- many spacenonewline
|
||
sym <- commoditysymbol
|
||
pri <- priceamount
|
||
let c = Commodity {symbol=sym,side=R,spaced=not $ null sp,comma=comma,precision=p}
|
||
return $ Mixed [Amount c q pri]
|
||
<?> "right-symbol amount"
|
||
|
||
nosymbolamount :: Parser MixedAmount
|
||
nosymbolamount = do
|
||
(q,p,comma) <- amountquantity
|
||
pri <- priceamount
|
||
let c = Commodity {symbol="",side=L,spaced=False,comma=comma,precision=p}
|
||
return $ Mixed [Amount c q pri]
|
||
<?> "no-symbol amount"
|
||
|
||
commoditysymbol :: Parser String
|
||
commoditysymbol = many1 (noneOf "-.0123456789;\n ") <?> "commodity symbol"
|
||
|
||
priceamount :: Parser (Maybe MixedAmount)
|
||
priceamount =
|
||
try (do
|
||
many spacenonewline
|
||
char '@'
|
||
many spacenonewline
|
||
a <- someamount
|
||
return $ Just a
|
||
) <|> return Nothing
|
||
|
||
-- gawd.. trying to parse a ledger number without error:
|
||
|
||
-- | parse a ledger-style numeric quantity and also return the number of
|
||
-- digits to the right of the decimal point and whether thousands are
|
||
-- separated by comma.
|
||
amountquantity :: Parser (Double, Int, Bool)
|
||
amountquantity = do
|
||
sign <- optionMaybe $ string "-"
|
||
(intwithcommas,frac) <- numberparts
|
||
let comma = ',' `elem` intwithcommas
|
||
let precision = length frac
|
||
-- read the actual value. We expect this read to never fail.
|
||
let int = filter (/= ',') intwithcommas
|
||
let int' = if null int then "0" else int
|
||
let frac' = if null frac then "0" else frac
|
||
let sign' = fromMaybe "" sign
|
||
let quantity = read $ sign'++int'++"."++frac'
|
||
return (quantity, precision, comma)
|
||
<?> "commodity quantity"
|
||
|
||
-- | parse the two strings of digits before and after a possible decimal
|
||
-- point. The integer part may contain commas, or either part may be
|
||
-- empty, or there may be no point.
|
||
numberparts :: Parser (String,String)
|
||
numberparts = numberpartsstartingwithdigit <|> numberpartsstartingwithpoint
|
||
|
||
numberpartsstartingwithdigit :: Parser (String,String)
|
||
numberpartsstartingwithdigit = do
|
||
let digitorcomma = digit <|> char ','
|
||
first <- digit
|
||
rest <- many digitorcomma
|
||
frac <- try (do {char '.'; many digit >>= return}) <|> return ""
|
||
return (first:rest,frac)
|
||
|
||
numberpartsstartingwithpoint :: Parser (String,String)
|
||
numberpartsstartingwithpoint = do
|
||
char '.'
|
||
frac <- many1 digit
|
||
return ("",frac)
|
||
|
||
|
||
spacenonewline :: Parser Char
|
||
spacenonewline = satisfy (\c -> c `elem` " \v\f\t")
|
||
|
||
restofline :: Parser String
|
||
restofline = anyChar `manyTill` newline
|
||
|
||
whiteSpace1 :: Parser ()
|
||
whiteSpace1 = do space; whiteSpace
|
||
|
||
nonspace = satisfy (not . isSpace)
|
||
|
||
|
||
{-| Parse a timelog file. Here is the timelog grammar, from timeclock.el 2.6:
|
||
|
||
@
|
||
A timelog contains data in the form of a single entry per line.
|
||
Each entry has the form:
|
||
|
||
CODE YYYY/MM/DD HH:MM:SS [COMMENT]
|
||
|
||
CODE is one of: b, h, i, o or O. COMMENT is optional when the code is
|
||
i, o or O. The meanings of the codes are:
|
||
|
||
b Set the current time balance, or \"time debt\". Useful when
|
||
archiving old log data, when a debt must be carried forward.
|
||
The COMMENT here is the number of seconds of debt.
|
||
|
||
h Set the required working time for the given day. This must
|
||
be the first entry for that day. The COMMENT in this case is
|
||
the number of hours in this workday. Floating point amounts
|
||
are allowed.
|
||
|
||
i Clock in. The COMMENT in this case should be the name of the
|
||
project worked on.
|
||
|
||
o Clock out. COMMENT is unnecessary, but can be used to provide
|
||
a description of how the period went, for example.
|
||
|
||
O Final clock out. Whatever project was being worked on, it is
|
||
now finished. Useful for creating summary reports.
|
||
@
|
||
|
||
Example:
|
||
|
||
i 2007/03/10 12:26:00 hledger
|
||
o 2007/03/10 17:26:02
|
||
|
||
-}
|
||
timelog :: Parser TimeLog
|
||
timelog = do
|
||
entries <- many timelogentry <?> "timelog entry"
|
||
eof
|
||
return $ TimeLog entries
|
||
|
||
timelogentry :: Parser TimeLogEntry
|
||
timelogentry = do
|
||
many (commentline <|> blankline)
|
||
code <- oneOf "bhioO"
|
||
many1 spacenonewline
|
||
datetime <- ledgerdatetime
|
||
many spacenonewline
|
||
comment <- restofline
|
||
return $ TimeLogEntry code datetime comment
|
||
|
||
ledgerfromtimelog :: Parser RawLedger
|
||
ledgerfromtimelog = do
|
||
tl <- timelog
|
||
return $ ledgerFromTimeLog tl
|
||
|