look harder for decimal point & digit groups (fixes #196)

Amount display styles have been reworked a bit; they are now calculated
after journal parsing, not during it. This allows the fix for #196:
we now search through the amounts until a decimal point is detected,
instead of just looking at the first one; likewise for digit groups.
Digit groups are now implemented with a better type.

Digit group size detection has been improved a little:
1000,000 now gives group sizes [3,4,4,...], not [3,3,...], and
10,000 gives groups sizes [3,3,...] not [3,2,2,..].
(To get [3,2,2,...] you'd use eg 00,00,000.)

There are still some old (or new ?) issues; I don't think we handle
inconsistent decimal points & digit groups too well. But for now all
tests pass.
This commit is contained in:
Simon Michael 2014-07-02 23:26:16 -07:00
parent 647d5833ff
commit c31710d942
5 changed files with 122 additions and 103 deletions

View File

@ -68,7 +68,6 @@ module Hledger.Data.Amount (
setAmountPrecision,
withPrecision,
canonicaliseAmount,
canonicalStyles,
-- * MixedAmount
nullmixedamt,
missingmixedamt,
@ -99,7 +98,7 @@ module Hledger.Data.Amount (
import Data.Char (isDigit)
import Data.List
import Data.Map (findWithDefault)
import Data.Ord (comparing)
import Data.Maybe
import Test.HUnit
import Text.Printf
import qualified Data.Map as M
@ -111,7 +110,7 @@ import Hledger.Utils
deriving instance Show HistoricalPrice
amountstyle = AmountStyle L False 0 '.' ',' []
amountstyle = AmountStyle L False 0 (Just '.') Nothing
-------------------------------------------------------------------------------
-- Amount
@ -281,33 +280,38 @@ showAmount a@(Amount{acommodity=c, aprice=p, astyle=AmountStyle{..}}) =
-- | Get the string representation of the number part of of an amount,
-- using the display settings from its commodity.
showamountquantity :: Amount -> String
showamountquantity Amount{aquantity=q, astyle=AmountStyle{asprecision=p, asdecimalpoint=d, asseparator=s, asseparatorpositions=spos}} =
punctuatenumber d s spos $ qstr
showamountquantity Amount{aquantity=q, astyle=AmountStyle{asprecision=p, asdecimalpoint=mdec, asdigitgroups=mgrps}} =
punctuatenumber (fromMaybe '.' mdec) mgrps $ qstr
where
-- isint n = fromIntegral (round n) == n
qstr -- p == maxprecision && isint q = printf "%d" (round q::Integer)
| p == maxprecisionwithpoint = printf "%f" q
| p == maxprecision = chopdotzero $ printf "%f" q
| otherwise = printf ("%."++show p++"f") q
-- isint n = fromIntegral (round n) == n
qstr -- p == maxprecision && isint q = printf "%d" (round q::Integer)
| p == maxprecisionwithpoint = printf "%f" q
| p == maxprecision = chopdotzero $ printf "%f" q
| otherwise = printf ("%."++show p++"f") q
-- | Replace a number string's decimal point with the specified character,
-- and add the specified digit group separators. The last digit group will
-- be repeated as needed.
punctuatenumber :: Char -> Char -> [Int] -> String -> String
punctuatenumber dec sep grps str = sign ++ reverse (addseps sep (extend grps) (reverse int)) ++ frac''
punctuatenumber :: Char -> Maybe DigitGroupStyle -> String -> String
punctuatenumber dec mgrps s = sign ++ reverse (applyDigitGroupStyle mgrps (reverse int)) ++ frac''
where
(sign,num) = break isDigit str
(sign,num) = break isDigit s
(int,frac) = break (=='.') num
frac' = dropWhile (=='.') frac
frac'' | null frac' = ""
| otherwise = dec:frac'
extend [] = []
extend gs = init gs ++ repeat (last gs)
addseps _ [] str = str
addseps sep (g:gs) str
| length str <= g = str
| otherwise = let (s,rest) = splitAt g str
in s ++ [sep] ++ addseps sep gs rest
applyDigitGroupStyle :: Maybe DigitGroupStyle -> String -> String
applyDigitGroupStyle Nothing s = s
applyDigitGroupStyle (Just (DigitGroups c gs)) s = addseps (repeatLast gs) s
where
addseps [] s = s
addseps (g:gs) s
| length s <= g = s
| otherwise = let (part,rest) = splitAt g s
in part ++ [c] ++ addseps gs rest
repeatLast [] = []
repeatLast gs = init gs ++ repeat (last gs)
chopdotzero str = reverse $ case reverse str of
'0':'.':s -> s
@ -501,23 +505,6 @@ showMixedAmountWithoutPrice m = concat $ intersperse "\n" $ map showfixedwidth a
canonicaliseMixedAmount :: M.Map Commodity AmountStyle -> MixedAmount -> MixedAmount
canonicaliseMixedAmount styles (Mixed as) = Mixed $ map (canonicaliseAmount styles) as
-- | Given a list of amounts in parse order, build a map from commodities
-- to canonical display styles for amounts in that commodity.
canonicalStyles :: [Amount] -> M.Map Commodity AmountStyle
canonicalStyles amts = M.fromList commstyles
where
samecomm = \a1 a2 -> acommodity a1 == acommodity a2
commamts = [(acommodity $ head as, as) | as <- groupBy samecomm $ sortBy (comparing acommodity) amts]
commstyles = [(c, s)
| (c,as) <- commamts
, let styles = map astyle as
, let maxprec = maximum $ map asprecision styles
, let s = (head styles){asprecision=maxprec}
]
-- lookupStyle :: M.Map Commodity AmountStyle -> Commodity -> AmountStyle
-- lookupStyle
-------------------------------------------------------------------------------
-- misc

View File

@ -46,6 +46,7 @@ module Hledger.Data.Journal (
journalEquityAccountQuery,
journalCashAccountQuery,
-- * Misc
canonicalStyles,
matchpats,
nullctx,
nulljournal,
@ -481,12 +482,34 @@ journalCanonicaliseAmounts :: Journal -> Journal
journalCanonicaliseAmounts j@Journal{jtxns=ts} = j''
where
j'' = j'{jtxns=map fixtransaction ts}
j' = j{jcommoditystyles = canonicalStyles $ journalAmounts j}
j' = j{jcommoditystyles = canonicalStyles $ dbgAt 8 "journalAmounts" $ journalAmounts j}
fixtransaction t@Transaction{tpostings=ps} = t{tpostings=map fixposting ps}
fixposting p@Posting{pamount=a} = p{pamount=fixmixedamount a}
fixmixedamount (Mixed as) = Mixed $ map fixamount as
fixamount a@Amount{acommodity=c} = a{astyle=journalCommodityStyle j' c}
-- | Given a list of amounts in parse order, build a map from commodities
-- to canonical display styles for amounts in that commodity.
canonicalStyles :: [Amount] -> M.Map Commodity AmountStyle
canonicalStyles amts = M.fromList commstyles
where
samecomm = \a1 a2 -> acommodity a1 == acommodity a2
commamts = [(acommodity $ head as, as) | as <- groupBy samecomm $ sortBy (comparing acommodity) amts]
commstyles = [(c, canonicalStyleFrom $ map astyle as) | (c,as) <- commamts]
-- Given an ordered list of amount styles for a commodity, build a canonical style.
canonicalStyleFrom :: [AmountStyle] -> AmountStyle
canonicalStyleFrom [] = amountstyle
canonicalStyleFrom ss@(first:_) =
first{asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
where
-- precision is the maximum of all precisions seen
prec = maximum $ map asprecision ss
-- find the first decimal point and the first digit group style seen,
-- or use defaults.
mdec = Just $ headDef '.' $ catMaybes $ map asdecimalpoint ss
mgrps = maybe Nothing Just $ headMay $ catMaybes $ map asdigitgroups ss
-- | Get this journal's canonical amount style for the given commodity, or the null style.
journalCommodityStyle :: Journal -> Commodity -> AmountStyle
journalCommodityStyle j c = M.findWithDefault amountstyle c $ jcommoditystyles j

View File

@ -56,11 +56,19 @@ data AmountStyle = AmountStyle {
ascommodityside :: Side, -- ^ does the symbol appear on the left or the right ?
ascommodityspaced :: Bool, -- ^ space between symbol and quantity ?
asprecision :: Int, -- ^ number of digits displayed after the decimal point
asdecimalpoint :: Char, -- ^ character used as decimal point
asseparator :: Char, -- ^ character used for separating digit groups (eg thousands)
asseparatorpositions :: [Int] -- ^ positions of digit group separators, counting leftward from decimal point
asdecimalpoint :: Maybe Char, -- ^ character used as decimal point: period or comma. Nothing means "unspecified, use default"
asdigitgroups :: Maybe DigitGroupStyle -- ^ style for displaying digit groups, if any
} deriving (Eq,Ord,Read,Show,Typeable,Data)
-- | A style for displaying digit groups in the integer part of a
-- floating point number. It consists of the character used to
-- separate groups (comma or period, whichever is not used as decimal
-- point), and the size of each group, starting with the one nearest
-- the decimal point. The last group size is assumed to repeat. Eg,
-- comma between thousands is DigitGroups ',' [3].
data DigitGroupStyle = DigitGroups Char [Int]
deriving (Eq,Ord,Read,Show,Typeable,Data)
data Amount = Amount {
acommodity :: Commodity,
aquantity :: Quantity,

View File

@ -662,8 +662,8 @@ leftsymbolamount = do
sign <- signp
c <- commoditysymbol
sp <- many spacenonewline
(q,prec,dec,sep,seppos) <- numberp
let s = amountstyle{ascommodityside=L, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=dec, asseparator=sep, asseparatorpositions=seppos}
(q,prec,mdec,mgrps) <- numberp
let s = amountstyle{ascommodityside=L, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
p <- priceamount
let applysign = if sign=="-" then negate else id
return $ applysign $ Amount c q p s
@ -671,23 +671,23 @@ leftsymbolamount = do
rightsymbolamount :: GenParser Char JournalContext Amount
rightsymbolamount = do
(q,prec,dec,sep,seppos) <- numberp
(q,prec,mdec,mgrps) <- numberp
sp <- many spacenonewline
c <- commoditysymbol
p <- priceamount
let s = amountstyle{ascommodityside=R, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=dec, asseparator=sep, asseparatorpositions=seppos}
let s = amountstyle{ascommodityside=R, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
return $ Amount c q p s
<?> "right-symbol amount"
nosymbolamount :: GenParser Char JournalContext Amount
nosymbolamount = do
(q,prec,dec,sep,seppos) <- numberp
(q,prec,mdec,mgrps) <- numberp
p <- priceamount
-- apply the most recently seen default commodity and style to this commodityless amount
defcs <- getDefaultCommodityAndStyle
let (c,s) = case defcs of
Just (defc,defs) -> (defc, defs{asprecision=max (asprecision defs) prec})
Nothing -> ("", amountstyle{asprecision=prec, asdecimalpoint=dec, asseparator=sep, asseparatorpositions=seppos})
Nothing -> ("", amountstyle{asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps})
return $ Amount c q p s
<?> "no-symbol amount"
@ -745,55 +745,68 @@ fixedlotprice =
return $ Just a)
<|> return Nothing
-- | Parse a numeric quantity for its value and display attributes. Some
-- international number formats (cf
-- http://en.wikipedia.org/wiki/Decimal_separator) are accepted: either
-- period or comma may be used for the decimal point, and the other of
-- these may be used for separating digit groups in the integer part (eg a
-- thousands separator). This returns the numeric value, the precision
-- (number of digits to the right of the decimal point), the decimal point
-- and separator characters (defaulting to . and ,), and the positions of
-- separators (counting leftward from the decimal point, the last is
-- assumed to repeat).
numberp :: GenParser Char JournalContext (Quantity, Int, Char, Char, [Int])
-- | Parse a string representation of a number for its value and display
-- attributes.
--
-- Some international number formats are accepted, eg either period or comma
-- may be used for the decimal point, and the other of these may be used for
-- separating digit groups in the integer part. See
-- http://en.wikipedia.org/wiki/Decimal_separator for more examples.
--
-- This returns: the parsed numeric value, the precision (number of digits
-- seen following the decimal point), the decimal point character used if any,
-- and the digit group style if any.
--
numberp :: GenParser Char JournalContext (Quantity, Int, Maybe Char, Maybe DigitGroupStyle)
numberp = do
-- a number is an optional sign followed by a sequence of digits possibly
-- interspersed with periods, commas, or both
-- ptrace "numberp"
sign <- signp
parts <- many1 $ choice' [many1 digit, many1 $ char ',', many1 $ char '.']
let numeric = isNumber . headDef '_'
(numparts, puncparts) = partition numeric parts
(ok,decimalpoint',separator') =
case (numparts,puncparts) of
([],_) -> (False, Nothing, Nothing) -- no digits
(_,[]) -> (True, Nothing, Nothing) -- no punctuation chars
(_,[d:""]) -> (True, Just d, Nothing) -- just one punctuation char, assume it's a decimal point
(_,[_]) -> (False, Nothing, Nothing) -- adjacent punctuation chars, not ok
(_,_:_:_) -> let (s:ss, d) = (init puncparts, last puncparts) -- two or more punctuation chars
in if (any ((/=1).length) puncparts -- adjacent punctuation chars, not ok
|| any (s/=) ss -- separator chars differ, not ok
|| head parts == s) -- number begins with a separator char, not ok
then (False, Nothing, Nothing)
else if s == d
then (True, Nothing, Just $ head s) -- just one kind of punctuation, assume separator chars
else (True, Just $ head d, Just $ head s) -- separators and a decimal point
dbgAt 8 "numberp parsed" (sign,parts) `seq` return ()
-- check the number is well-formed and identify the decimal point and digit
-- group separator characters used, if any
let (numparts, puncparts) = partition numeric parts
(ok, mdecimalpoint, mseparator) =
case (numparts, puncparts) of
([],_) -> (False, Nothing, Nothing) -- no digits, not ok
(_,[]) -> (True, Nothing, Nothing) -- digits with no punctuation, ok
(_,[[d]]) -> (True, Just d, Nothing) -- just a single punctuation of length 1, assume it's a decimal point
(_,[_]) -> (False, Nothing, Nothing) -- a single punctuation of some other length, not ok
(_,_:_:_) -> -- two or more punctuations
let (s:ss, d) = (init puncparts, last puncparts) -- the leftmost is a separator and the rightmost may be a decimal point
in if (any ((/=1).length) puncparts -- adjacent punctuation chars, not ok
|| any (s/=) ss -- separator chars vary, not ok
|| head parts == s) -- number begins with a separator char, not ok
then (False, Nothing, Nothing)
else if s == d
then (True, Nothing, Just $ head s) -- just one kind of punctuation - must be separators
else (True, Just $ head d, Just $ head s) -- separator(s) and a decimal point
when (not ok) (fail $ "number seems ill-formed: "++concat parts)
let (intparts',fracparts') = span ((/= decimalpoint') . Just . head) parts
-- get the digit group sizes and digit group style if any
let (intparts',fracparts') = span ((/= mdecimalpoint) . Just . head) parts
(intparts, fracpart) = (filter numeric intparts', filter numeric fracparts')
separatorpositions = reverse $ map length $ drop 1 intparts
int = concat $ "":intparts
groupsizes = reverse $ case map length intparts of
(a:b:cs) | a < b -> b:cs
gs -> gs
mgrps = maybe Nothing (Just . (`DigitGroups` groupsizes)) $ mseparator
-- put the parts back together without digit group separators, get the precision and parse the value
let int = concat $ "":intparts
frac = concat $ "":fracpart
precision = length frac
int' = if null int then "0" else int
frac' = if null frac then "0" else frac
quantity = read $ sign++int'++"."++frac' -- this read should never fail
(decimalpoint, separator) = case (decimalpoint', separator') of (Just d, Just s) -> (d,s)
(Just '.',Nothing) -> ('.',',')
(Just ',',Nothing) -> (',','.')
(Nothing, Just '.') -> (',','.')
(Nothing, Just ',') -> ('.',',')
_ -> ('.',',')
return (quantity,precision,decimalpoint,separator,separatorpositions)
<?> "numberp"
return $ dbgAt 8 "numberp quantity,precision,mdecimalpoint,mgrps" (quantity,precision,mdecimalpoint,mgrps)
<?> "numberp"
where
numeric = isNumber . headDef '_'
#ifdef TESTS
test_numberp = do
let s `is` n = assertParseEqual' (parseWithCtx nullctx numberp s) n

View File

@ -1,5 +1,7 @@
# a default commodity defined with the D directive will be used for any
# commodity-less amounts in subsequent transactions.
# subsequent commodity-less posting amounts. The sample amount's display style
# is also applied, and the resulting amount may end up setting the canonical
# display style for the commodity.
# 1. no default commodity
hledgerdev -f- print
@ -54,8 +56,9 @@ D $1,000.0
>>>=0
# 5. as above, sets the commodity of the commodityless amount, but an
# earlier explicit dollar amount sets the display settings for dollar
# 5. commodity and display style applied to the second posting amount..
# which ends up setting the digit group style, since it's the first amount
# with digit groups. The great precision is used.
hledgerdev -f- print
<<<
D $1,000.0
@ -63,21 +66,6 @@ D $1,000.0
(a) $1000000.00
(b) 1000000
>>>
2010/01/01
(a) $1000000.00
(b) $1000000.00
>>>=0
# 6. as above, but the commodityless amount is earliest, so it sets the
# display settings for dollar. The greatest precision is preserved though.
hledgerdev -f- print
<<<
D $1,000.0
2010/1/1
(a) 1000000
(b) $1000000.00
>>>
2010/01/01
(a) $1,000,000.00
(b) $1,000,000.00