convert: new rules file format, more docs

This commit is contained in:
Simon Michael 2009-12-03 20:54:20 +00:00
parent 43e327d05b
commit 2607082e9e
3 changed files with 285 additions and 95 deletions

View File

@ -1,98 +1,249 @@
{-|
Convert account data in CSV format (eg downloaded from a bank) to ledger
format, and print it on stdout. See the manual for more details.
-}
module Commands.Convert where
import Data.List.Split (splitOn)
import Options -- (Opt,Debug)
import Ledger.Types (Ledger,AccountName)
import Ledger.Utils (strip)
import System.IO (stderr, hPutStr)
import Options (Opt(Debug))
import Ledger.Types (Ledger,AccountName,LedgerTransaction(..),Posting(..),PostingType(..))
import Ledger.Utils (strip, spacenonewline, restofline)
import Ledger.Parse (someamount, emptyCtx, ledgeraccountname)
import Ledger.Amount (nullmixedamt)
import System.IO (stderr)
import Text.CSV (parseCSVFromFile, printCSV)
import Text.Printf (printf)
import Text.Printf (hPrintf)
import Text.RegexPR (matchRegexPR)
import Data.Maybe
import Ledger.Dates (firstJust, showDate)
import Ledger.Dates (firstJust, showDate, parsedate)
import Locale (defaultTimeLocale)
import Data.Time.Format (parseTime)
import Control.Monad (when)
import Safe (readMay, readDef)
import Control.Monad (when, guard)
import Safe (readDef, readMay)
import System.FilePath.Posix (takeBaseName)
import Text.ParserCombinators.Parsec
convert :: [Opt] -> [String] -> Ledger -> IO ()
convert opts args _ = do
when (length args /= 3) (error "please specify a csv file, base account, and import rules file.")
let [csvfile,baseacct,rulesfile] = args
rulesstr <- readFile rulesfile
(fieldpositions,rules) <- parseRules rulesstr
parse <- parseCSVFromFile csvfile
let records = case parse of
when (length args /= 2) (error "please specify a csv data file and conversion rules file.")
let debug = Debug `elem` opts
[csvfile,rulesfile] = args
csvparse <- parseCSVFromFile csvfile
let records = case csvparse of
Left e -> error $ show e
Right rs -> reverse rs
mapM_ (print_ledger_txn (Debug `elem` opts) (baseacct,fieldpositions,rules)) records
Right rs -> reverse $ filter (/= [""]) rs
rulesstr <- readFile rulesfile
let rules = case parseCsvRules (takeBaseName csvfile) rulesstr of
Left e -> error $ show e
Right r -> r
when debug $ hPrintf stderr "using csv conversion rules file %s\n" rulesfile
when debug $ hPrintf stderr "%s\n" (show rules)
mapM_ (printTxn debug rules) records
{- |
A set of data definitions and account-matching patterns sufficient to
convert a particular CSV data file into meaningful ledger transactions. See above.
-}
data CsvRules = CsvRules {
dateField :: Maybe FieldPosition,
statusField :: Maybe FieldPosition,
codeField :: Maybe FieldPosition,
descriptionField :: Maybe FieldPosition,
amountField :: Maybe FieldPosition,
currencyField :: Maybe FieldPosition,
baseCurrency :: Maybe String,
baseAccount :: AccountName,
accountRules :: [AccountRule]
} deriving (Show)
type Rule = (
[(String, Maybe String)] -- list of patterns and optional replacements
,AccountName -- account name to use for a matched transaction
nullrules = CsvRules {
dateField=Nothing,
statusField=Nothing,
codeField=Nothing,
descriptionField=Nothing,
amountField=Nothing,
currencyField=Nothing,
baseCurrency=Nothing,
baseAccount="unknown",
accountRules=[]
}
type FieldPosition = Int
type AccountRule = (
[(String, Maybe String)] -- list of regex match patterns with optional replacements
,AccountName -- account name to use for a transaction matching this rule
)
parseRules :: String -> IO ([Maybe Int],[Rule])
parseRules s = do
let ls = map strip $ lines s
let paras = splitOn [""] ls
let fieldpositions = map readMay $ splitOn "," $ head $ head paras
let rules = [(map parsePatRepl $ init ls, last ls) | ls <- tail paras]
return (fieldpositions,rules)
type CsvRecord = [String]
parsePatRepl :: String -> (String, Maybe String)
parsePatRepl l = case splitOn "=" l of
(p:r:_) -> (p, Just r)
_ -> (l, Nothing)
-- rules file parser
print_ledger_txn :: Bool -> (String,[Maybe Int],[Rule]) -> [String] -> IO ()
print_ledger_txn _ (_,[],_) _ = return ()
print_ledger_txn _ (('#':_),_,_) _ = return ()
print_ledger_txn debug (baseacct,fieldpositions,rules) csvrecord
| length csvrecord < maximum (map (fromMaybe 0) fieldpositions) + 1 = return ()
| otherwise =
do
when debug $ hPutStr stderr $ printCSV [csvrecord]
let date = maybe "" (csvrecord !!) (fieldpositions !! 0)
number = maybe "" (csvrecord !!) (fieldpositions !! 1)
description = maybe "" (csvrecord !!) (fieldpositions !! 2)
amount = maybe "" (csvrecord !!) (fieldpositions !! 3)
amount' = strnegate amount where strnegate ('-':s) = s
strnegate s = '-':s
unknownacct | (readDef 0 amount' :: Double) < 0 = "income:unknown"
parseCsvRules :: String -> String -> Either ParseError CsvRules
parseCsvRules basefilename s = runParser csvrulesP nullrules{baseAccount=basefilename} "" s
csvrulesP :: GenParser Char CsvRules CsvRules
csvrulesP = do
optional blanklines
many definitions
r <- getState
ars <- many accountrule
optional blanklines
eof
return r{accountRules=ars}
-- | Real independent parser choice, even when alternative matches share a prefix.
choice' parsers = choice $ map try (init parsers) ++ [last parsers]
definitions :: GenParser Char CsvRules ()
definitions = do
choice' [
datefield
,statusfield
,codefield
,descriptionfield
,amountfield
,currencyfield
,basecurrency
,baseaccount
] <?> "definition"
return ()
datefield = do
string "date-field"
many1 spacenonewline
v <- restofline
r <- getState
setState r{dateField=readMay v}
codefield = do
string "code-field"
many1 spacenonewline
v <- restofline
r <- getState
setState r{codeField=readMay v}
statusfield = do
string "status-field"
many1 spacenonewline
v <- restofline
r <- getState
setState r{statusField=readMay v}
descriptionfield = do
string "description-field"
many1 spacenonewline
v <- restofline
r <- getState
setState r{descriptionField=readMay v}
amountfield = do
string "amount-field"
many1 spacenonewline
v <- restofline
r <- getState
setState r{amountField=readMay v}
currencyfield = do
string "currency-field"
many1 spacenonewline
v <- restofline
r <- getState
setState r{currencyField=readMay v}
basecurrency = do
string "currency"
many1 spacenonewline
v <- restofline
r <- getState
setState r{baseCurrency=Just v}
baseaccount = do
string "base-account"
many1 spacenonewline
v <- ledgeraccountname
optional newline
r <- getState
setState r{baseAccount=v}
accountrule :: GenParser Char CsvRules AccountRule
accountrule = do
blanklines
pats <- many1 matchreplacepattern
guard $ length pats >= 2
let pats' = init pats
acct = either (fail.show) id $ runParser ledgeraccountname () "" $ fst $ last pats
return (pats',acct)
blanklines = many1 blankline >> return ()
blankline = many spacenonewline >> newline >> return () <?> "blank line"
matchreplacepattern = do
matchpat <- many1 (noneOf "=\n")
replpat <- optionMaybe $ do {char '='; many $ noneOf "\n"}
newline
return (matchpat,replpat)
printTxn :: Bool -> CsvRules -> CsvRecord -> IO ()
printTxn debug rules rec = do
when debug $ hPrintf stderr "csv: %s" (printCSV [rec])
putStr $ show $ transactionFromCsvRecord rules rec
-- csv record conversion
transactionFromCsvRecord :: CsvRules -> CsvRecord -> LedgerTransaction
transactionFromCsvRecord rules fields =
let
date = parsedate $ normaliseDate $ maybe "1900/1/1" (fields !!) (dateField rules)
status = maybe False (null . strip . (fields !!)) (statusField rules)
code = maybe "" (fields !!) (codeField rules)
desc = maybe "" (fields !!) (descriptionField rules)
comment = ""
precomment = ""
amountstr = maybe "" (fields !!) (amountField rules)
amountstr' = strnegate amountstr where strnegate ('-':s) = s
strnegate s = '-':s
currency = maybe (fromMaybe "" $ baseCurrency rules) (fields !!) (currencyField rules)
amountstr'' = currency ++ amountstr'
amountparse = runParser someamount emptyCtx "" amountstr''
amount = either (const nullmixedamt) id amountparse
unknownacct | (readDef 0 amountstr' :: Double) < 0 = "income:unknown"
| otherwise = "expenses:unknown"
(acct,desc) = choose_acct_desc rules (unknownacct,description)
printf "%s%s %s\n" (fixdate date) (if not (null number) then printf " (%s)" number else "") desc
printf " %-30s %15s\n" acct (printf "$%s" amount' :: String)
printf " %s\n\n" baseacct
(acct,newdesc) = identify (accountRules rules) unknownacct desc
in
LedgerTransaction {
ltdate=date,
lteffectivedate=Nothing,
ltstatus=status,
ltcode=code,
ltdescription=newdesc,
ltcomment=comment,
ltpreceding_comment_lines=precomment,
ltpostings=[
Posting {
pstatus=False,
paccount=acct,
pamount=amount,
pcomment="",
ptype=RegularPosting
},
Posting {
pstatus=False,
paccount=baseAccount rules,
pamount=(-amount),
pcomment="",
ptype=RegularPosting
}
]
}
choose_acct_desc :: [Rule] -> (String,String) -> (String,String)
choose_acct_desc rules (acct,desc) | null matchingrules = (acct,desc)
| otherwise = (a,d)
where
matchingrules = filter ismatch rules :: [Rule]
where ismatch = any (isJust . flip matchregex desc . fst) . fst
(prs,a) = head matchingrules
mrs = filter (isJust . fst) $ map (\(p,r) -> (matchregex p desc, r)) prs
(m,repl) = head mrs
matched = fst $ fst $ fromJust m
d = fromMaybe matched repl
matchregex = matchRegexPR . ("(?i)" ++)
fixdate :: String -> String
fixdate s = maybe "0000/00/00" showDate $
-- | Convert some date string with unknown format to YYYY/MM/DD.
normaliseDate :: String -> String
normaliseDate s = maybe "0000/00/00" showDate $
firstJust
[parseTime defaultTimeLocale "%Y/%m/%e" s
-- can't parse a month without leading 0, try adding onee
-- can't parse a month without leading 0, try adding one
,parseTime defaultTimeLocale "%Y/%m/%e" (take 5 s ++ "0" ++ drop 5 s)
,parseTime defaultTimeLocale "%Y-%m-%e" s
,parseTime defaultTimeLocale "%Y-%m-%e" (take 5 s ++ "0" ++ drop 5 s)
@ -102,3 +253,19 @@ fixdate s = maybe "0000/00/00" showDate $
,parseTime defaultTimeLocale "%m-%e-%Y" ('0':s)
]
-- | Apply account matching rules to a transaction description to obtain
-- the most appropriate account and a new description.
identify :: [AccountRule] -> String -> String -> (String,String)
identify rules defacct desc | null matchingrules = (defacct,desc)
| otherwise = (acct,newdesc)
where
matchingrules = filter ismatch rules :: [AccountRule]
where ismatch = any (isJust . flip matchregex desc . fst) . fst
(prs,acct) = head matchingrules
mrs = filter (isJust . fst) $ map (\(p,r) -> (matchregex p desc, r)) prs
(m,repl) = head mrs
matched = fst $ fst $ fromJust m
newdesc = fromMaybe matched repl
matchregex = matchRegexPR . ("(?i)" ++)

View File

@ -20,7 +20,7 @@ timeprogname = "hours"
usagehdr =
"Usage: hledger [OPTIONS] [COMMAND [PATTERNS]]\n" ++
" hours [OPTIONS] [COMMAND [PATTERNS]]\n" ++
" hledger convert CSVFILE ACCOUNTNAME RULESFILE\n" ++
" hledger convert CSVFILE RULESFILE\n" ++
"\n" ++
"hledger uses your ~/.ledger or $LEDGER file (or another specified with -f),\n" ++
"while hours uses your ~/.timelog or $TIMELOG file.\n" ++

73
README
View File

@ -157,26 +157,35 @@ Commands
Convert
,,,,,,,
The convert command converts CSV (comma-separated value) files downloaded
from a bank into ledger format. Doing a bulk import in this way can be a
easier than entering each transaction by hand. The downside is you no
longer have your own data with which to detect errors by the bank.
The convert command reads a CSV_ file you have downloaded from your bank,
and prints out the transactions in ledger format, suitable for adding to
your ledger. This can be a lot quicker than entering every transaction by
hand. (The downside is that you are less likely to notice if your bank makes an
error, so keep an eye on them!)
Usage is different from the other commands, typically it looks like this::
Use it like this::
hledger convert FILE.csv BASEACCOUNT FILE.rules >FILE.ledger
(review FILE.ledger, then copy and paste into main ledger)
hledger convert FILE.csv FILE.rules >FILE.ledger
Ie convert the csv data and save the output into a similarly named ledger file.
(It's not required, but convenient to use the same base filename for the
csv, rules and output files as above.)
This will convert the csv data in FILE.csv using the conversion hints in
FILE.rules, and save the output into a temporary ledger file. Then you
should review FILE.ledger for problems; update the rules and convert again
if needed; and finally copy/paste transactions which are new into your
main ledger.
BASEACCOUNT is the source account for these transactions, eg
``assets:bank:checking``.
rules file
''''''''''
FILE.rules contains some rules to help convert the data. Here's an example::
A .rules file contains some data definitions and some rules for assigning
destination accounts to transactions. Typically you will have one csv file
and corresponding rules file per bank account. Here's an example rules
file for converting the csv download from a Wells Fargo checking account::
0,-,4,1
base-account assets:bank:checking
date-field 0
description-field 4
amount-field 1
currency $
SPECTRUM
expenses:health:gym
@ -188,21 +197,33 @@ FILE.rules contains some rules to help convert the data. Here's an example::
(TO|FROM) SAVINGS
assets:bank:savings
It must contain:
This says:
- paragraphs separated by one blank line.
- the ledger account corresponding to this csv file is assets:bank:checking
- the first csv field is the date, the second is the amount, the fifth is the description
- prepend a dollar sign to the amount field
- if description contains SPECTRUM (case-insensitive), the transaction is a gym expense
- if description contains ITUNES or BLKBSTR, the transaction is an entertainment expense; also rewrite BLKBSTR as BLOCKBUSTER
- if description contains TO SAVINGS or FROM SAVINGS, the transaction is a savings transfer
- The first paragraph is a single line of four comma-separated fields,
which are numbers indicating the (0-based) csv field positions
corresponding to the transaction date, code/number, description, and amount.
If a field does not exist in the csv, use - to specify a default value.
Notes:
- Other paragraphs consist of one or more regular expression patterns, one
per line, followed by a line specifying the account to use when a
transaction's description matches any of these patterns. Patterns may
optionally have a replacement pattern specified after =, otherwise the
matching part is used.
- Definitions must come first, one per line, all in one paragraph. Each
is a name and a value separated by whitespace. Supported names are:
base-account, date-field, status-field, code-field, description-field,
amount-field, currency-field, currency. All are optional and will
use defaults if not specified.
- The remainder of the file is account-assigning rules. Each is a
paragraph consisting of one or more description-matching patterns
(case-insensitive regular expressions), one per line, followed by the
account name to use when the transaction's description matches any of
these patterns.
- A match pattern may optionally be followed by = and a replacement
pattern, which will become the ledger transaction's description.
Otherwise the matched part of the csv description is used. (To preserve
the full csv description, use .* before and after the match pattern.)
Smart dates
...........
@ -377,3 +398,5 @@ Other differences
.. _c++ ledger's manual: http://joyful.com/repos/ledger/doc/ledger.html
.. _binaries: http://hledger.org/binaries/
.. _Haskell Platform: http://hackage.haskell.org/platform/
.. _CSV: http://en.wikipedia.org/wiki/Comma-separated_values