parsing: better international number format support (#32)

This allows period and comma to be used for decimal point and digit group
separator or vice versa, and also flexible digit groups.  See
http://en.wikipedia.org/wiki/Decimal_separator .  Digit group separators
are possibly not worth the trouble and might not stay.
This commit is contained in:
Simon Michael 2011-01-19 12:32:18 +00:00
parent 5913f688ad
commit d3663b818e
9 changed files with 190 additions and 97 deletions

View File

@ -251,8 +251,8 @@ commodity name on either the left or right. Commodity names which contain
more than just letters should be enclosed in double quotes. Negative
amounts usually have the minus sign next to the number (`$-1`), but it may
also go before a currency symbol/commodity name (`-$1`). The number may
optionally have thousands separators. Currently, thousands separators must
be `,` (comma) and the decimal point must be `.` (period).
optionally have a decimal point and/or digit group separators (`.` and `,`
or vice-versa).
hledger's file format aims to be compatible with c++ ledger, so you
can use both tools on your journal. For more details, see [File format
@ -1016,10 +1016,6 @@ need to make small edits to restore compatibility for one or the other.
hledger does not allow separate dates for individual postings, unlike c++
ledger.
Likewise, hledger does not support per-posting cleared status. It does
ignore a cleared flag (`*`) at the start of a posting, so that the account
name is parsed correctly.
(See also [usage issues](#usage-issues))
### Features not supported
@ -1106,13 +1102,16 @@ entries, and the following c++ ledger options and commands:
- hledger doesn't track the value of commodities with varying
price; prices are fixed as of the transaction date
- hledger's output follows the decimal point character, digit grouping,
and digit group separator character used in the journal.
- hledger print shows amounts for all postings, and shows unit
prices for amounts which have them. (This currently means that
it does not print multi-commodity transactions in valid journal format.)
- hledger's default commodity directive (D) applies the commodity to
- hledger's default commodity directive (D) sets the commodity for
subsequent commodityless amounts. ledger uses it only to set commodity
display settings (and for the entry command).
display settings and for the entry command.
## Troubleshooting

View File

@ -179,18 +179,36 @@ showAmountWithoutPriceOrCommodity :: Amount -> String
showAmountWithoutPriceOrCommodity a@Amount{commodity=c} = showAmount a{commodity=c{symbol=""}, price=Nothing}
-- | Get the string representation of the number part of of an amount,
-- using the display precision from its commodity.
-- using the display settings from its commodity.
showAmount' :: Amount -> String
showAmount' (Amount (Commodity {comma=comma,precision=p}) q _) = addthousandsseparators $ qstr
showAmount' (Amount (Commodity {decimalpoint=d,precision=p,separator=s,separatorpositions=spos}) q _) =
punctuatenumber d s spos $ qstr
where
addthousandsseparators = if comma then punctuatethousands else id
qstr | p == maxprecision && isint q = printf "%d" (round q::Integer)
qstr -- | p == maxprecision && isint q = printf "%d" (round q::Integer)
| p == maxprecision = printf "%f" q
| otherwise = printf ("%."++show p++"f") q
isint n = fromIntegral (round n) == n
-- isint n = fromIntegral (round n) == n
maxprecision = 999999
-- | Replace a number string's decimal point with the specified character,
-- and add the specified digit group separators.
punctuatenumber :: Char -> Char -> [Int] -> String -> String
punctuatenumber dec sep grps str = sign ++ reverse (addseps sep (extend grps) (reverse int)) ++ frac''
where
(sign,num) = break isDigit str
(int,frac) = break (=='.') num
frac' = dropWhile (=='.') frac
frac'' | null frac' = ""
| otherwise = dec:frac'
extend [] = []
extend gs = init gs ++ repeat (last gs)
addseps _ [] str = str
addseps sep (g:gs) str
| length str <= g = str
| otherwise = let (s,rest) = splitAt g str
in s ++ [sep] ++ addseps sep gs rest
-- | Add thousands-separating commas to a decimal number string
punctuatethousands :: String -> String
punctuatethousands s =
@ -404,7 +422,7 @@ nullmixedamt = Mixed []
-- | A temporary value for parsed transactions which had no amount specified.
missingamt :: MixedAmount
missingamt = Mixed [Amount Commodity {symbol="AUTO",side=L,spaced=False,comma=False,precision=0} 0 Nothing]
missingamt = Mixed [Amount unknown{symbol="AUTO"} 0 Nothing]
tests_Hledger_Data_Amount = TestList [

View File

@ -21,11 +21,11 @@ quoteCommoditySymbolIfNeeded s | any (`elem` nonsimplecommoditychars) s = "\"" +
-- convenient amount and commodity constructors, for tests etc.
unknown = Commodity {symbol="", side=L,spaced=False,comma=False,precision=0}
dollar = Commodity {symbol="$",side=L,spaced=False,comma=False,precision=2}
euro = Commodity {symbol="",side=L,spaced=False,comma=False,precision=2}
pound = Commodity {symbol="£",side=L,spaced=False,comma=False,precision=2}
hour = Commodity {symbol="h",side=R,spaced=False,comma=False,precision=1}
unknown = Commodity {symbol="", side=L,spaced=False,decimalpoint='.',precision=0,separator=',',separatorpositions=[]}
dollar = Commodity {symbol="$",side=L,spaced=False,decimalpoint='.',precision=2,separator=',',separatorpositions=[]}
euro = Commodity {symbol="",side=L,spaced=False,decimalpoint='.',precision=2,separator=',',separatorpositions=[]}
pound = Commodity {symbol="£",side=L,spaced=False,decimalpoint='.',precision=2,separator=',',separatorpositions=[]}
hour = Commodity {symbol="h",side=R,spaced=False,decimalpoint='.',precision=1,separator=',',separatorpositions=[]}
dollars n = Amount dollar n Nothing
euros n = Amount euro n Nothing

View File

@ -58,8 +58,11 @@ data Commodity = Commodity {
-- display preferences for amounts of this commodity
side :: Side, -- ^ should the symbol appear on the left or the right
spaced :: Bool, -- ^ should there be a space between symbol and quantity
comma :: Bool, -- ^ should thousands be comma-separated
precision :: Int -- ^ number of decimal places to display
precision :: Int, -- ^ number of decimal places to display
-- XXX these three might be better belonging to Journal
decimalpoint :: Char, -- ^ character to use as decimal point
separator :: Char, -- ^ character to use for separating digit groups (eg thousands)
separatorpositions :: [Int] -- ^ positions of separators, counting leftward from decimal point
} deriving (Eq,Ord,Show,Read)
-- | An amount's price may be written as @ unit price or @@ total price.

View File

@ -119,6 +119,7 @@ module Hledger.Read.JournalReader (
where
import Control.Monad.Error (ErrorT(..), throwError, catchError)
import Data.List.Split (wordsBy)
import Safe (headDef)
import Text.ParserCombinators.Parsec hiding (parse)
#if __GLASGOW_HASKELL__ <= 610
import Prelude hiding (readFile, putStr, putStrLn, print, getContents)
@ -495,28 +496,28 @@ leftsymbolamount = do
let applysign = if isJust sign then negate else id
sym <- commoditysymbol
sp <- many spacenonewline
(q,p,comma) <- amountquantity
(q,p,d,s,spos) <- number
pri <- priceamount
let c = Commodity {symbol=sym,side=L,spaced=not $ null sp,comma=comma,precision=p}
let c = Commodity {symbol=sym,side=L,spaced=not $ null sp,decimalpoint=d,precision=p,separator=s,separatorpositions=spos}
return $ applysign $ Mixed [Amount c q pri]
<?> "left-symbol amount"
rightsymbolamount :: GenParser Char JournalContext MixedAmount
rightsymbolamount = do
(q,p,comma) <- amountquantity
(q,p,d,s,spos) <- number
sp <- many spacenonewline
sym <- commoditysymbol
pri <- priceamount
let c = Commodity {symbol=sym,side=R,spaced=not $ null sp,comma=comma,precision=p}
let c = Commodity {symbol=sym,side=R,spaced=not $ null sp,decimalpoint=d,precision=p,separator=s,separatorpositions=spos}
return $ Mixed [Amount c q pri]
<?> "right-symbol amount"
nosymbolamount :: GenParser Char JournalContext MixedAmount
nosymbolamount = do
(q,p,comma) <- amountquantity
(q,p,d,s,spos) <- number
pri <- priceamount
defc <- getCommodity
let c = fromMaybe Commodity{symbol="",side=L,spaced=False,comma=comma,precision=p} defc
let c = fromMaybe Commodity{symbol="",side=L,spaced=False,decimalpoint=d,precision=p,separator=s,separatorpositions=spos} defc
return $ Mixed [Amount c q pri]
<?> "no-symbol amount"
@ -541,58 +542,130 @@ priceamount =
try (do
char '@'
many spacenonewline
a <- someamount -- XXX this could parse more prices ad infinitum, but shouldn't
a <- someamount -- XXX can parse more prices ad infinitum, shouldn't
return $ Just $ TotalPrice a)
<|> (do
many spacenonewline
a <- someamount -- XXX this could parse more prices ad infinitum, but shouldn't
a <- someamount -- XXX can parse more prices ad infinitum, shouldn't
return $ Just $ UnitPrice a))
<|> return Nothing
-- gawd.. trying to parse a ledger number without error:
-- | Parse a ledger-style numeric quantity and also return the number of
-- digits to the right of the decimal point and whether thousands are
-- separated by comma.
amountquantity :: GenParser Char JournalContext (Double, Int, Bool)
amountquantity = do
type Quantity = Double
-- -- | Parse a ledger-style numeric quantity and also return the number of
-- -- digits to the right of the decimal point and whether thousands are
-- -- separated by comma.
-- amountquantity :: GenParser Char JournalContext (Quantity, Int, Bool)
-- amountquantity = do
-- sign <- optionMaybe $ string "-"
-- (intwithcommas,frac) <- numberparts
-- let comma = ',' `elem` intwithcommas
-- let precision = length frac
-- -- read the actual value. We expect this read to never fail.
-- let int = filter (/= ',') intwithcommas
-- let int' = if null int then "0" else int
-- let frac' = if null frac then "0" else frac
-- let sign' = fromMaybe "" sign
-- let quantity = read $ sign'++int'++"."++frac'
-- return (quantity, precision, comma)
-- <?> "commodity quantity"
-- -- | parse the two strings of digits before and after a possible decimal
-- -- point. The integer part may contain commas, or either part may be
-- -- empty, or there may be no point.
-- numberparts :: GenParser Char JournalContext (String,String)
-- numberparts = numberpartsstartingwithdigit <|> numberpartsstartingwithpoint
-- numberpartsstartingwithdigit :: GenParser Char JournalContext (String,String)
-- numberpartsstartingwithdigit = do
-- let digitorcomma = digit <|> char ','
-- first <- digit
-- rest <- many digitorcomma
-- frac <- try (do {char '.'; many digit}) <|> return ""
-- return (first:rest,frac)
-- numberpartsstartingwithpoint :: GenParser Char JournalContext (String,String)
-- numberpartsstartingwithpoint = do
-- char '.'
-- frac <- many1 digit
-- return ("",frac)
-- | Parse a numeric quantity for its value and display attributes. Some
-- international number formats (cf
-- http://en.wikipedia.org/wiki/Decimal_separator) are accepted: either
-- period or comma may be used for the decimal point, and the other of
-- these may be used for separating digit groups in the integer part (eg a
-- thousands separator). This returns the numeric value, the precision
-- (number of digits to the right of the decimal point), the decimal point
-- and separator characters (defaulting to . and ,), and the positions of
-- separators (counting leftward from the decimal point, the last is
-- assumed to repeat).
number :: GenParser Char JournalContext (Quantity, Int, Char, Char, [Int])
number = do
sign <- optionMaybe $ string "-"
(intwithcommas,frac) <- numberparts
let comma = ',' `elem` intwithcommas
let precision = length frac
-- read the actual value. We expect this read to never fail.
let int = filter (/= ',') intwithcommas
let int' = if null int then "0" else int
let frac' = if null frac then "0" else frac
let sign' = fromMaybe "" sign
let quantity = read $ sign'++int'++"."++frac'
return (quantity, precision, comma)
<?> "commodity quantity"
-- | parse the two strings of digits before and after a possible decimal
-- point. The integer part may contain commas, or either part may be
-- empty, or there may be no point.
numberparts :: GenParser Char JournalContext (String,String)
numberparts = numberpartsstartingwithdigit <|> numberpartsstartingwithpoint
numberpartsstartingwithdigit :: GenParser Char JournalContext (String,String)
numberpartsstartingwithdigit = do
let digitorcomma = digit <|> char ','
first <- digit
rest <- many digitorcomma
frac <- try (do {char '.'; many digit}) <|> return ""
return (first:rest,frac)
numberpartsstartingwithpoint :: GenParser Char JournalContext (String,String)
numberpartsstartingwithpoint = do
char '.'
frac <- many1 digit
return ("",frac)
parts <- many1 $ choice' [many1 digit, many1 $ char ',', many1 $ char '.']
let numeric = isNumber . headDef '_'
(_, puncparts) = partition numeric parts
(ok,decimalpoint',separator') =
case puncparts of
[] -> (True, Nothing, Nothing) -- no punctuation chars
[d:""] -> (True, Just d, Nothing) -- just one punctuation char, assume it's a decimal point
[_] -> (False, Nothing, Nothing) -- adjacent punctuation chars, not ok
_:_:_ -> let (s:ss, d) = (init puncparts, last puncparts) -- two or more punctuation chars
in if (any ((/=1).length) puncparts -- adjacent punctuation chars, not ok
|| any (s/=) ss -- separator chars differ, not ok
|| head parts == s) -- number begins with a separator char, not ok
then (False, Nothing, Nothing)
else if s == d
then (True, Nothing, Just $ head s) -- just one kind of punctuation, assume separator chars
else (True, Just $ head d, Just $ head s) -- separators and a decimal point
when (not ok) (fail $ "number seems ill-formed: "++concat parts)
let (intparts',fracparts') = span ((/= decimalpoint') . Just . head) parts
(intparts, fracpart) = (filter numeric intparts', filter numeric fracparts')
separatorpositions = reverse $ map length $ drop 1 intparts
int = concat $ "":intparts
frac = concat $ "":fracpart
precision = length frac
int' = if null int then "0" else int
frac' = if null frac then "0" else frac
sign' = fromMaybe "" sign
quantity = read $ sign'++int'++"."++frac' -- this read should never fail
(decimalpoint, separator) = case (decimalpoint', separator') of (Just d, Just s) -> (d,s)
(Just '.',Nothing) -> ('.',',')
(Just ',',Nothing) -> (',','.')
(Nothing, Just '.') -> (',','.')
(Nothing, Just ',') -> ('.',',')
_ -> ('.',',')
return (quantity,precision,decimalpoint,separator,separatorpositions)
<?> "number"
tests_Hledger_Read_JournalReader = TestList [
"ledgerTransaction" ~: do
"number" ~: do
let s `is` n = assertParseEqual (parseWithCtx nullctx number s) n
assertFails = assertBool "" . isLeft . parseWithCtx nullctx number
assertFails ""
"0" `is` (0, 0, '.', ',', [])
"1" `is` (1, 0, '.', ',', [])
"1.1" `is` (1.1, 1, '.', ',', [])
"1,000.1" `is` (1000.1, 1, '.', ',', [3])
"1.00.000,1" `is` (100000.1, 1, ',', '.', [3,2])
"1,000,000" `is` (1000000, 0, '.', ',', [3,3])
"1." `is` (1, 0, '.', ',', [])
"1," `is` (1, 0, ',', '.', [])
".1" `is` (0.1, 1, '.', ',', [])
",1" `is` (0.1, 1, ',', '.', [])
assertFails "1,000.000,1"
assertFails "1.000,000.1"
assertFails "1,000.000.1"
assertFails "1,,1"
assertFails "1..1"
assertFails ".1,"
assertFails ",1."
,"ledgerTransaction" ~: do
assertParseEqual (parseWithCtx nullctx ledgerTransaction entry1_str) entry1
assertBool "ledgerTransaction should not parse just a date"
$ isLeft $ parseWithCtx nullctx ledgerTransaction "2009/1/1\n"
@ -662,7 +735,7 @@ tests_Hledger_Read_JournalReader = TestList [
,"postingamount" ~: do
assertParseEqual (parseWithCtx nullctx postingamount " $47.18") (Mixed [dollars 47.18])
assertParseEqual (parseWithCtx nullctx postingamount " $1.")
(Mixed [Amount Commodity {symbol="$",side=L,spaced=False,comma=False,precision=0} 1 Nothing])
(Mixed [Amount Commodity {symbol="$",side=L,spaced=False,decimalpoint='.',precision=0,separator=',',separatorpositions=[]} 1 Nothing])
,"postingamount with unit price" ~: do
assertParseEqual
(parseWithCtx nullctx postingamount " $10 @ €0.5")
@ -682,11 +755,11 @@ tests_Hledger_Read_JournalReader = TestList [
,"leftsymbolamount" ~: do
assertParseEqual (parseWithCtx nullctx leftsymbolamount "$1")
(Mixed [Amount Commodity {symbol="$",side=L,spaced=False,comma=False,precision=0} 1 Nothing])
(Mixed [Amount Commodity {symbol="$",side=L,spaced=False,decimalpoint='.',precision=0,separator=',',separatorpositions=[]} 1 Nothing])
assertParseEqual (parseWithCtx nullctx leftsymbolamount "$-1")
(Mixed [Amount Commodity {symbol="$",side=L,spaced=False,comma=False,precision=0} (-1) Nothing])
(Mixed [Amount Commodity {symbol="$",side=L,spaced=False,decimalpoint='.',precision=0,separator=',',separatorpositions=[]} (-1) Nothing])
assertParseEqual (parseWithCtx nullctx leftsymbolamount "-$1")
(Mixed [Amount Commodity {symbol="$",side=L,spaced=False,comma=False,precision=0} (-1) Nothing])
(Mixed [Amount Commodity {symbol="$",side=L,spaced=False,decimalpoint='.',precision=0,separator=',',separatorpositions=[]} (-1) Nothing])
]

View File

@ -261,7 +261,7 @@ tests_Hledger_Cli = TestList
(map precision $ journalAmountAndPriceCommodities $ journalCanonicaliseAmounts $ journalWithAmounts ["1","2.00"]) `is` [2,2]
,"commodities" ~:
Map.elems (commodities ledger7) `is` [Commodity {symbol="$", side=L, spaced=False, comma=False, precision=2}]
Map.elems (commodities ledger7) `is` [Commodity {symbol="$", side=L, spaced=False, decimalpoint='.', precision=2, separator=',', separatorpositions=[]}]
-- don't know what this should do
-- ,"elideAccountName" ~: do

View File

@ -1,13 +1,13 @@
##############################################################################
# data validation
#
# should prompt again for a bad date
# 1. should prompt again for a bad date
rm -f add-default-commodity-$$.j; bin/hledger -f add-default-commodity-$$.j add; rm -f add-default-commodity-$$.j
<<<
2009/1/32
>>> /date .*: date .*/
#
# should accept a blank date
# 2. should accept a blank date
rm -f add-default-commodity-$$.j; bin/hledger -f add-default-commodity-$$.j add; rm -f add-default-commodity-$$.j
<<<
@ -16,7 +16,7 @@
##############################################################################
# precision and commodity handling
#
# simple add with no existing journal, no commodity entered
# 3. simple add with no existing journal, no commodity entered
rm -f add-default-commodity-$$.j; bin/hledger -f add-default-commodity-$$.j add; rm -f add-default-commodity-$$.j
<<<
@ -26,9 +26,9 @@ a
b
.
>>> /^date \[.*\]: description \[\]: account 1: amount 1: account 2: amount 2 \[-1000\]: account 3: date \[.*\]: $/
>>> /^date \[.*\]: description \[\]: account 1: amount 1: account 2: amount 2 \[-1000.0\]: account 3: date \[.*\]: $/
#
# default commodity with greater precision
# 4. default commodity with greater precision
printf 'D $1000.00\n' >add-default-commodity-$$.j; hledger -fadd-default-commodity-$$.j add >/dev/null; cat add-default-commodity-$$.j; rm -f add-default-commodity-$$.j
<<<
@ -40,7 +40,7 @@ b
.
>>> /a +\$1000\.0/
#
# default commodity with less precision
# 5. default commodity with less precision
printf 'D $1000.0\n' >add-default-commodity-$$.j; hledger -fadd-default-commodity-$$.j add >/dev/null; cat add-default-commodity-$$.j; rm -f add-default-commodity-$$.j
<<<
@ -52,7 +52,7 @@ b
.
>>> /a +\$1000\.00/
#
# existing commodity with greater precision
# 6. existing commodity with greater precision
printf '2010/1/1\n a $1000.00\n b\n' >add-default-commodity-$$.j; hledger -fadd-default-commodity-$$.j add >/dev/null; cat add-default-commodity-$$.j; rm -f add-default-commodity-$$.j
<<<
@ -64,7 +64,7 @@ b
.
>>> /a +\$1000\.0/
#
# existing commodity with less precision
# 7. existing commodity with less precision
printf '2010/1/1\n a $1000.0\n b\n' >add-default-commodity-$$.j; hledger -fadd-default-commodity-$$.j add >/dev/null; cat add-default-commodity-$$.j; rm -f add-default-commodity-$$.j
<<<
@ -76,7 +76,7 @@ b
.
>>> /a +\$1000\.00/
#
# no commodity entered, the (most recent) default commodity should be applied
# 8. no commodity entered, the (most recent) default commodity should be applied
printf 'D $1000.0\nD £1,000.00\n' >add-default-commodity-$$.j; hledger -fadd-default-commodity-$$.j add; cat add-default-commodity-$$.j; rm -f add-default-commodity-$$.j
<<<
2010/1/1
@ -86,8 +86,8 @@ a
b
.
>>> /a +£1,000.00/
# default amounts should not fail to balance due to precision
>>> /a +£1,000.0/
# 9. default amounts should not fail to balance due to precision
bin/hledger -f nosuch.journal add
<<<
2010/1/1

View File

@ -9,14 +9,14 @@ bin/hledger -f- print
a 1000
b
; pound, two decimal places, no thousands separator
; pound, two decimal places, no digit group separator
D £1000.00
2010/1/1 y
a 1000
b
; dollar, no decimal places, comma thousands separator
; dollar, comma decimal point, three decimal places, no digit group separator
D $1,000
2010/1/1 z
@ -33,6 +33,6 @@ D $1,000
b £-1000.00
2010/01/01 z
a $1,000
b $-1,000
a $1000,000
b $-1000,000