module Parse where import qualified Data.Map as Map import Text.ParserCombinators.Parsec import Text.ParserCombinators.Parsec.Language import qualified Text.ParserCombinators.Parsec.Token as P import System.IO import Utils import Models -- set up token parsing, though we're not yet using these much ledgerLanguageDef = LanguageDef { commentStart = "" , commentEnd = "" , commentLine = ";" , nestedComments = False , identStart = letter <|> char '_' , identLetter = alphaNum <|> oneOf "_':" , opStart = opLetter emptyDef , opLetter = oneOf "!#$%&*+./<=>?@\\^|-~" , reservedOpNames= [] , reservedNames = [] , caseSensitive = False } lexer = P.makeTokenParser ledgerLanguageDef whiteSpace = P.whiteSpace lexer lexeme = P.lexeme lexer symbol = P.symbol lexer natural = P.natural lexer parens = P.parens lexer semi = P.semi lexer identifier = P.identifier lexer reserved = P.reserved lexer reservedOp = P.reservedOp lexer -- standard ledger file parser {- Here's the ledger 2.5 grammar: "The ledger file format is quite simple, but also very flexible. It supports many options, though typically the user can ignore most of them. They are summarized below. The initial character of each line determines what the line means, and how it should be interpreted. Allowable initial characters are: NUMBER A line beginning with a number denotes an entry. It may be followed by any number of lines, each beginning with whitespace, to denote the entry’s account transactions. The format of the first line is: DATE[=EDATE] [*|!] [(CODE)] DESC If ‘*’ appears after the date (with optional effective date), it indicates the entry is “cleared”, which can mean whatever the user wants it t omean. If ‘!’ appears after the date, it indicates d the entry is “pending”; i.e., tentatively cleared from the user’s point of view, but not yet actually cleared. If a ‘CODE’ appears in parentheses, it may be used to indicate a check number, or the type of the transaction. Following these is the payee, or a description of the transaction. The format of each following transaction is: ACCOUNT AMOUNT [; NOTE] The ‘ACCOUNT’ may be surrounded by parentheses if it is a virtual transactions, or square brackets if it is a virtual transactions that must balance. The ‘AMOUNT’ can be followed by a per-unit transaction cost, by specifying ‘ AMOUNT’, or a complete transaction cost with ‘@ AMOUNT’. Lastly, the ‘NOTE’ may specify an actual and/or effective date for the transaction by using the syntax ‘[ACTUAL_DATE]’ or ‘[=EFFECTIVE_DATE]’ or ‘[ACTUAL_DATE=EFFECtIVE_DATE]’. = An automated entry. A value expression must appear after the equal sign. After this initial line there should be a set of one or more transactions, just as if it were normal entry. If the amounts of the transactions have no commodity, they will be applied as modifiers to whichever real transaction is matched by the value expression. ~ A period entry. A period expression must appear after the tilde. After this initial line there should be a set of one or more transactions, just as if it were normal entry. ! A line beginning with an exclamation mark denotes a command directive. It must be immediately followed by the command word. The supported commands are: ‘!include’ Include the stated ledger file. ‘!account’ The account name is given is taken to be the parent of all transac- tions that follow, until ‘!end’ is seen. ‘!end’ Ends an account block. ; A line beginning with a colon indicates a comment, and is ignored. Y If a line begins with a capital Y, it denotes the year used for all subsequent entries that give a date without a year. The year should appear immediately after the Y, for example: ‘Y2004’. This is useful at the beginning of a file, to specify the year for that file. If all entries specify a year, however, this command has no effect. P Specifies a historical price for a commodity. These are usually found in a pricing history file (see the ‘-Q’ option). The syntax is: P DATE SYMBOL PRICE N SYMBOL Indicates that pricing information is to be ignored for a given symbol, nor will quotes ever be downloaded for that symbol. Useful with a home currency, such as the dollar ($). It is recommended that these pricing options be set in the price database file, which defaults to ‘~/.pricedb’. The syntax for this command is: N SYMBOL D AMOUNT Specifies the default commodity to use, by specifying an amount in the expected format. The entry command will use this commodity as the default when none other can be determined. This command may be used multiple times, to set the default flags for different commodities; whichever is seen last is used as the default commodity. For example, to set US dollars as the default commodity, while also setting the thousands flag and decimal flag for that commodity, use: D $1,000.00 C AMOUNT1 = AMOUNT2 Specifies a commodity conversion, where the first amount is given to be equiv- alent to the second amount. The first amount should use the decimal precision desired during reporting: C 1.00 Kb = 1024 bytes i, o, b, h These four relate to timeclock support, which permits ledger to read timelog files. See the timeclock’s documentation for more info on the syntax of its timelog files." -} -- parsec example: http://pandoc.googlecode.com/svn/trunk/src/Text/Pandoc/Readers/RST.hs -- sample data in Tests.hs ledgerfile :: Parser LedgerFile ledgerfile = ledger <|> ledgerfromtimelog ledger :: Parser LedgerFile ledger = do ledgernondatalines -- for now these must come first, unlike ledger modifier_entries <- many ledgermodifierentry periodic_entries <- many ledgerperiodicentry -- entries <- (many ledgerentry) "entry" eof return $ LedgerFile modifier_entries periodic_entries entries ledgernondatalines :: Parser [String] ledgernondatalines = many (ledgerdirective <|> ledgercommentline <|> do {whiteSpace1; return []}) ledgercommentline :: Parser String ledgercommentline = do char ';' many spacenonewline restofline "comment line" ledgercomment :: Parser String ledgercomment = try (do char ';' many spacenonewline many (noneOf "\n") ) <|> return "" "comment" ledgerdirective :: Parser String ledgerdirective = char '!' >> restofline "directive" ledgermodifierentry :: Parser ModifierEntry ledgermodifierentry = do char '=' "entry" many spacenonewline valueexpr <- restofline transactions <- ledgertransactions ledgernondatalines return (ModifierEntry valueexpr transactions) ledgerperiodicentry :: Parser PeriodicEntry ledgerperiodicentry = do char '~' "entry" many spacenonewline periodexpr <- restofline transactions <- ledgertransactions ledgernondatalines return (PeriodicEntry periodexpr transactions) ledgerentry :: Parser LedgerEntry ledgerentry = do date <- ledgerdate status <- ledgerstatus code <- ledgercode -- ledger treats entry comments as part of the description, we too for now -- desc <- many (noneOf ";\n") "description" -- let description = reverse $ dropWhile (==' ') $ reverse desc description <- many (noneOf "\n") "description" comment <- ledgercomment restofline transactions <- ledgertransactions ledgernondatalines return $ autofillEntry $ LedgerEntry date status code description comment transactions ledgerdate :: Parser String ledgerdate = do y <- many1 digit char '/' m <- many1 digit char '/' d <- many1 digit many1 spacenonewline return $ printf "%04s/%02s/%02s" y m d ledgerstatus :: Parser Bool ledgerstatus = try (do { char '*'; many1 spacenonewline; return True } ) <|> return False ledgercode :: Parser String ledgercode = try (do { char '('; code <- anyChar `manyTill` char ')'; many1 spacenonewline; return code } ) <|> return "" ledgertransactions :: Parser [LedgerTransaction] ledgertransactions = (ledgertransaction "transaction") `manyTill` (do {newline "blank line"; return ()} <|> eof) ledgertransaction :: Parser LedgerTransaction ledgertransaction = do many1 spacenonewline account <- ledgeraccount amount <- ledgeramount many spacenonewline comment <- ledgercomment restofline many ledgercommentline return (LedgerTransaction account amount comment) -- account names may have single spaces in them, and are terminated by two or more spaces ledgeraccount :: Parser String ledgeraccount = many1 ((alphaNum <|> char ':' <|> char '/' <|> char '_' "account name") <|> try (do {spacenonewline; do {notFollowedBy spacenonewline; return ' '}} "double space")) ledgeramount :: Parser Amount ledgeramount = try (do many1 spacenonewline c <- many (noneOf "-.0123456789;\n") "currency" q <- many1 (oneOf "-.,0123456789") "quantity" let q' = stripcommas $ striptrailingpoint q let (int,frac) = break (=='.') q' let precision = length $ dropWhile (=='.') frac return (Amount (getcurrency c) (read q') precision) ) <|> return (Amount (Currency "AUTO" 0) 0 0) where stripcommas = filter (',' /=) striptrailingpoint = reverse . dropWhile (=='.') . reverse ledgereol :: Parser String ledgereol = do {newline; return []} spacenonewline :: Parser Char spacenonewline = satisfy (\c -> c `elem` " \v\f\t") restofline :: Parser String restofline = anyChar `manyTill` newline whiteSpace1 :: Parser () whiteSpace1 = do space; whiteSpace -- timelog file parser {- timelog grammar, from timeclock.el 2.6 A timelog contains data in the form of a single entry per line. Each entry has the form: CODE YYYY/MM/DD HH:MM:SS [COMMENT] CODE is one of: b, h, i, o or O. COMMENT is optional when the code is i, o or O. The meanings of the codes are: b Set the current time balance, or \"time debt\". Useful when archiving old log data, when a debt must be carried forward. The COMMENT here is the number of seconds of debt. h Set the required working time for the given day. This must be the first entry for that day. The COMMENT in this case is the number of hours in this workday. Floating point amounts are allowed. i Clock in. The COMMENT in this case should be the name of the project worked on. o Clock out. COMMENT is unnecessary, but can be used to provide a description of how the period went, for example. O Final clock out. Whatever project was being worked on, it is now finished. Useful for creating summary reports. example: i 2007/03/10 12:26:00 hledger o 2007/03/10 17:26:02 -} ledgerfromtimelog :: Parser LedgerFile ledgerfromtimelog = do tl <- timelog return $ ledgerFromTimeLog tl timelog :: Parser TimeLog timelog = do entries <- many timelogentry eof return $ TimeLog entries timelogentry :: Parser TimeLogEntry timelogentry = do code <- oneOf "bhioO" many1 spacenonewline date <- ledgerdate time <- many $ oneOf "0123456789:" let datetime = date ++ " " ++ time many spacenonewline comment <- restofline return $ TimeLogEntry code datetime comment -- utils parseError :: (Show a) => a -> IO () parseError e = do putStr "ledger parse error at "; print e printParseResult :: Show v => Either ParseError v -> IO () printParseResult r = case r of Left e -> parseError e Right v -> print v parseLedgerFile :: String -> IO (Either ParseError LedgerFile) parseLedgerFile "-" = fmap (parse ledgerfile "-") $ hGetContents stdin parseLedgerFile f = parseFromFile ledgerfile f