look harder for decimal point & digit groups (fixes #196)

Amount display styles have been reworked a bit; they are now calculated
after journal parsing, not during it. This allows the fix for #196:
we now search through the amounts until a decimal point is detected,
instead of just looking at the first one; likewise for digit groups.
Digit groups are now implemented with a better type.

Digit group size detection has been improved a little:
1000,000 now gives group sizes [3,4,4,...], not [3,3,...], and
10,000 gives groups sizes [3,3,...] not [3,2,2,..].
(To get [3,2,2,...] you'd use eg 00,00,000.)

There are still some old (or new ?) issues; I don't think we handle
inconsistent decimal points & digit groups too well. But for now all
tests pass.
This commit is contained in:
Simon Michael 2014-07-02 23:26:16 -07:00
parent 647d5833ff
commit c31710d942
5 changed files with 122 additions and 103 deletions

View File

@ -68,7 +68,6 @@ module Hledger.Data.Amount (
setAmountPrecision, setAmountPrecision,
withPrecision, withPrecision,
canonicaliseAmount, canonicaliseAmount,
canonicalStyles,
-- * MixedAmount -- * MixedAmount
nullmixedamt, nullmixedamt,
missingmixedamt, missingmixedamt,
@ -99,7 +98,7 @@ module Hledger.Data.Amount (
import Data.Char (isDigit) import Data.Char (isDigit)
import Data.List import Data.List
import Data.Map (findWithDefault) import Data.Map (findWithDefault)
import Data.Ord (comparing) import Data.Maybe
import Test.HUnit import Test.HUnit
import Text.Printf import Text.Printf
import qualified Data.Map as M import qualified Data.Map as M
@ -111,7 +110,7 @@ import Hledger.Utils
deriving instance Show HistoricalPrice deriving instance Show HistoricalPrice
amountstyle = AmountStyle L False 0 '.' ',' [] amountstyle = AmountStyle L False 0 (Just '.') Nothing
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
-- Amount -- Amount
@ -281,33 +280,38 @@ showAmount a@(Amount{acommodity=c, aprice=p, astyle=AmountStyle{..}}) =
-- | Get the string representation of the number part of of an amount, -- | Get the string representation of the number part of of an amount,
-- using the display settings from its commodity. -- using the display settings from its commodity.
showamountquantity :: Amount -> String showamountquantity :: Amount -> String
showamountquantity Amount{aquantity=q, astyle=AmountStyle{asprecision=p, asdecimalpoint=d, asseparator=s, asseparatorpositions=spos}} = showamountquantity Amount{aquantity=q, astyle=AmountStyle{asprecision=p, asdecimalpoint=mdec, asdigitgroups=mgrps}} =
punctuatenumber d s spos $ qstr punctuatenumber (fromMaybe '.' mdec) mgrps $ qstr
where where
-- isint n = fromIntegral (round n) == n -- isint n = fromIntegral (round n) == n
qstr -- p == maxprecision && isint q = printf "%d" (round q::Integer) qstr -- p == maxprecision && isint q = printf "%d" (round q::Integer)
| p == maxprecisionwithpoint = printf "%f" q | p == maxprecisionwithpoint = printf "%f" q
| p == maxprecision = chopdotzero $ printf "%f" q | p == maxprecision = chopdotzero $ printf "%f" q
| otherwise = printf ("%."++show p++"f") q | otherwise = printf ("%."++show p++"f") q
-- | Replace a number string's decimal point with the specified character, -- | Replace a number string's decimal point with the specified character,
-- and add the specified digit group separators. The last digit group will -- and add the specified digit group separators. The last digit group will
-- be repeated as needed. -- be repeated as needed.
punctuatenumber :: Char -> Char -> [Int] -> String -> String punctuatenumber :: Char -> Maybe DigitGroupStyle -> String -> String
punctuatenumber dec sep grps str = sign ++ reverse (addseps sep (extend grps) (reverse int)) ++ frac'' punctuatenumber dec mgrps s = sign ++ reverse (applyDigitGroupStyle mgrps (reverse int)) ++ frac''
where where
(sign,num) = break isDigit str (sign,num) = break isDigit s
(int,frac) = break (=='.') num (int,frac) = break (=='.') num
frac' = dropWhile (=='.') frac frac' = dropWhile (=='.') frac
frac'' | null frac' = "" frac'' | null frac' = ""
| otherwise = dec:frac' | otherwise = dec:frac'
extend [] = []
extend gs = init gs ++ repeat (last gs) applyDigitGroupStyle :: Maybe DigitGroupStyle -> String -> String
addseps _ [] str = str applyDigitGroupStyle Nothing s = s
addseps sep (g:gs) str applyDigitGroupStyle (Just (DigitGroups c gs)) s = addseps (repeatLast gs) s
| length str <= g = str where
| otherwise = let (s,rest) = splitAt g str addseps [] s = s
in s ++ [sep] ++ addseps sep gs rest addseps (g:gs) s
| length s <= g = s
| otherwise = let (part,rest) = splitAt g s
in part ++ [c] ++ addseps gs rest
repeatLast [] = []
repeatLast gs = init gs ++ repeat (last gs)
chopdotzero str = reverse $ case reverse str of chopdotzero str = reverse $ case reverse str of
'0':'.':s -> s '0':'.':s -> s
@ -501,23 +505,6 @@ showMixedAmountWithoutPrice m = concat $ intersperse "\n" $ map showfixedwidth a
canonicaliseMixedAmount :: M.Map Commodity AmountStyle -> MixedAmount -> MixedAmount canonicaliseMixedAmount :: M.Map Commodity AmountStyle -> MixedAmount -> MixedAmount
canonicaliseMixedAmount styles (Mixed as) = Mixed $ map (canonicaliseAmount styles) as canonicaliseMixedAmount styles (Mixed as) = Mixed $ map (canonicaliseAmount styles) as
-- | Given a list of amounts in parse order, build a map from commodities
-- to canonical display styles for amounts in that commodity.
canonicalStyles :: [Amount] -> M.Map Commodity AmountStyle
canonicalStyles amts = M.fromList commstyles
where
samecomm = \a1 a2 -> acommodity a1 == acommodity a2
commamts = [(acommodity $ head as, as) | as <- groupBy samecomm $ sortBy (comparing acommodity) amts]
commstyles = [(c, s)
| (c,as) <- commamts
, let styles = map astyle as
, let maxprec = maximum $ map asprecision styles
, let s = (head styles){asprecision=maxprec}
]
-- lookupStyle :: M.Map Commodity AmountStyle -> Commodity -> AmountStyle
-- lookupStyle
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
-- misc -- misc

View File

@ -46,6 +46,7 @@ module Hledger.Data.Journal (
journalEquityAccountQuery, journalEquityAccountQuery,
journalCashAccountQuery, journalCashAccountQuery,
-- * Misc -- * Misc
canonicalStyles,
matchpats, matchpats,
nullctx, nullctx,
nulljournal, nulljournal,
@ -481,12 +482,34 @@ journalCanonicaliseAmounts :: Journal -> Journal
journalCanonicaliseAmounts j@Journal{jtxns=ts} = j'' journalCanonicaliseAmounts j@Journal{jtxns=ts} = j''
where where
j'' = j'{jtxns=map fixtransaction ts} j'' = j'{jtxns=map fixtransaction ts}
j' = j{jcommoditystyles = canonicalStyles $ journalAmounts j} j' = j{jcommoditystyles = canonicalStyles $ dbgAt 8 "journalAmounts" $ journalAmounts j}
fixtransaction t@Transaction{tpostings=ps} = t{tpostings=map fixposting ps} fixtransaction t@Transaction{tpostings=ps} = t{tpostings=map fixposting ps}
fixposting p@Posting{pamount=a} = p{pamount=fixmixedamount a} fixposting p@Posting{pamount=a} = p{pamount=fixmixedamount a}
fixmixedamount (Mixed as) = Mixed $ map fixamount as fixmixedamount (Mixed as) = Mixed $ map fixamount as
fixamount a@Amount{acommodity=c} = a{astyle=journalCommodityStyle j' c} fixamount a@Amount{acommodity=c} = a{astyle=journalCommodityStyle j' c}
-- | Given a list of amounts in parse order, build a map from commodities
-- to canonical display styles for amounts in that commodity.
canonicalStyles :: [Amount] -> M.Map Commodity AmountStyle
canonicalStyles amts = M.fromList commstyles
where
samecomm = \a1 a2 -> acommodity a1 == acommodity a2
commamts = [(acommodity $ head as, as) | as <- groupBy samecomm $ sortBy (comparing acommodity) amts]
commstyles = [(c, canonicalStyleFrom $ map astyle as) | (c,as) <- commamts]
-- Given an ordered list of amount styles for a commodity, build a canonical style.
canonicalStyleFrom :: [AmountStyle] -> AmountStyle
canonicalStyleFrom [] = amountstyle
canonicalStyleFrom ss@(first:_) =
first{asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
where
-- precision is the maximum of all precisions seen
prec = maximum $ map asprecision ss
-- find the first decimal point and the first digit group style seen,
-- or use defaults.
mdec = Just $ headDef '.' $ catMaybes $ map asdecimalpoint ss
mgrps = maybe Nothing Just $ headMay $ catMaybes $ map asdigitgroups ss
-- | Get this journal's canonical amount style for the given commodity, or the null style. -- | Get this journal's canonical amount style for the given commodity, or the null style.
journalCommodityStyle :: Journal -> Commodity -> AmountStyle journalCommodityStyle :: Journal -> Commodity -> AmountStyle
journalCommodityStyle j c = M.findWithDefault amountstyle c $ jcommoditystyles j journalCommodityStyle j c = M.findWithDefault amountstyle c $ jcommoditystyles j

View File

@ -56,11 +56,19 @@ data AmountStyle = AmountStyle {
ascommodityside :: Side, -- ^ does the symbol appear on the left or the right ? ascommodityside :: Side, -- ^ does the symbol appear on the left or the right ?
ascommodityspaced :: Bool, -- ^ space between symbol and quantity ? ascommodityspaced :: Bool, -- ^ space between symbol and quantity ?
asprecision :: Int, -- ^ number of digits displayed after the decimal point asprecision :: Int, -- ^ number of digits displayed after the decimal point
asdecimalpoint :: Char, -- ^ character used as decimal point asdecimalpoint :: Maybe Char, -- ^ character used as decimal point: period or comma. Nothing means "unspecified, use default"
asseparator :: Char, -- ^ character used for separating digit groups (eg thousands) asdigitgroups :: Maybe DigitGroupStyle -- ^ style for displaying digit groups, if any
asseparatorpositions :: [Int] -- ^ positions of digit group separators, counting leftward from decimal point
} deriving (Eq,Ord,Read,Show,Typeable,Data) } deriving (Eq,Ord,Read,Show,Typeable,Data)
-- | A style for displaying digit groups in the integer part of a
-- floating point number. It consists of the character used to
-- separate groups (comma or period, whichever is not used as decimal
-- point), and the size of each group, starting with the one nearest
-- the decimal point. The last group size is assumed to repeat. Eg,
-- comma between thousands is DigitGroups ',' [3].
data DigitGroupStyle = DigitGroups Char [Int]
deriving (Eq,Ord,Read,Show,Typeable,Data)
data Amount = Amount { data Amount = Amount {
acommodity :: Commodity, acommodity :: Commodity,
aquantity :: Quantity, aquantity :: Quantity,

View File

@ -662,8 +662,8 @@ leftsymbolamount = do
sign <- signp sign <- signp
c <- commoditysymbol c <- commoditysymbol
sp <- many spacenonewline sp <- many spacenonewline
(q,prec,dec,sep,seppos) <- numberp (q,prec,mdec,mgrps) <- numberp
let s = amountstyle{ascommodityside=L, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=dec, asseparator=sep, asseparatorpositions=seppos} let s = amountstyle{ascommodityside=L, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
p <- priceamount p <- priceamount
let applysign = if sign=="-" then negate else id let applysign = if sign=="-" then negate else id
return $ applysign $ Amount c q p s return $ applysign $ Amount c q p s
@ -671,23 +671,23 @@ leftsymbolamount = do
rightsymbolamount :: GenParser Char JournalContext Amount rightsymbolamount :: GenParser Char JournalContext Amount
rightsymbolamount = do rightsymbolamount = do
(q,prec,dec,sep,seppos) <- numberp (q,prec,mdec,mgrps) <- numberp
sp <- many spacenonewline sp <- many spacenonewline
c <- commoditysymbol c <- commoditysymbol
p <- priceamount p <- priceamount
let s = amountstyle{ascommodityside=R, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=dec, asseparator=sep, asseparatorpositions=seppos} let s = amountstyle{ascommodityside=R, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
return $ Amount c q p s return $ Amount c q p s
<?> "right-symbol amount" <?> "right-symbol amount"
nosymbolamount :: GenParser Char JournalContext Amount nosymbolamount :: GenParser Char JournalContext Amount
nosymbolamount = do nosymbolamount = do
(q,prec,dec,sep,seppos) <- numberp (q,prec,mdec,mgrps) <- numberp
p <- priceamount p <- priceamount
-- apply the most recently seen default commodity and style to this commodityless amount -- apply the most recently seen default commodity and style to this commodityless amount
defcs <- getDefaultCommodityAndStyle defcs <- getDefaultCommodityAndStyle
let (c,s) = case defcs of let (c,s) = case defcs of
Just (defc,defs) -> (defc, defs{asprecision=max (asprecision defs) prec}) Just (defc,defs) -> (defc, defs{asprecision=max (asprecision defs) prec})
Nothing -> ("", amountstyle{asprecision=prec, asdecimalpoint=dec, asseparator=sep, asseparatorpositions=seppos}) Nothing -> ("", amountstyle{asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps})
return $ Amount c q p s return $ Amount c q p s
<?> "no-symbol amount" <?> "no-symbol amount"
@ -745,55 +745,68 @@ fixedlotprice =
return $ Just a) return $ Just a)
<|> return Nothing <|> return Nothing
-- | Parse a numeric quantity for its value and display attributes. Some -- | Parse a string representation of a number for its value and display
-- international number formats (cf -- attributes.
-- http://en.wikipedia.org/wiki/Decimal_separator) are accepted: either --
-- period or comma may be used for the decimal point, and the other of -- Some international number formats are accepted, eg either period or comma
-- these may be used for separating digit groups in the integer part (eg a -- may be used for the decimal point, and the other of these may be used for
-- thousands separator). This returns the numeric value, the precision -- separating digit groups in the integer part. See
-- (number of digits to the right of the decimal point), the decimal point -- http://en.wikipedia.org/wiki/Decimal_separator for more examples.
-- and separator characters (defaulting to . and ,), and the positions of --
-- separators (counting leftward from the decimal point, the last is -- This returns: the parsed numeric value, the precision (number of digits
-- assumed to repeat). -- seen following the decimal point), the decimal point character used if any,
numberp :: GenParser Char JournalContext (Quantity, Int, Char, Char, [Int]) -- and the digit group style if any.
--
numberp :: GenParser Char JournalContext (Quantity, Int, Maybe Char, Maybe DigitGroupStyle)
numberp = do numberp = do
-- a number is an optional sign followed by a sequence of digits possibly
-- interspersed with periods, commas, or both
-- ptrace "numberp"
sign <- signp sign <- signp
parts <- many1 $ choice' [many1 digit, many1 $ char ',', many1 $ char '.'] parts <- many1 $ choice' [many1 digit, many1 $ char ',', many1 $ char '.']
let numeric = isNumber . headDef '_' dbgAt 8 "numberp parsed" (sign,parts) `seq` return ()
(numparts, puncparts) = partition numeric parts
(ok,decimalpoint',separator') = -- check the number is well-formed and identify the decimal point and digit
case (numparts,puncparts) of -- group separator characters used, if any
([],_) -> (False, Nothing, Nothing) -- no digits let (numparts, puncparts) = partition numeric parts
(_,[]) -> (True, Nothing, Nothing) -- no punctuation chars (ok, mdecimalpoint, mseparator) =
(_,[d:""]) -> (True, Just d, Nothing) -- just one punctuation char, assume it's a decimal point case (numparts, puncparts) of
(_,[_]) -> (False, Nothing, Nothing) -- adjacent punctuation chars, not ok ([],_) -> (False, Nothing, Nothing) -- no digits, not ok
(_,_:_:_) -> let (s:ss, d) = (init puncparts, last puncparts) -- two or more punctuation chars (_,[]) -> (True, Nothing, Nothing) -- digits with no punctuation, ok
in if (any ((/=1).length) puncparts -- adjacent punctuation chars, not ok (_,[[d]]) -> (True, Just d, Nothing) -- just a single punctuation of length 1, assume it's a decimal point
|| any (s/=) ss -- separator chars differ, not ok (_,[_]) -> (False, Nothing, Nothing) -- a single punctuation of some other length, not ok
|| head parts == s) -- number begins with a separator char, not ok (_,_:_:_) -> -- two or more punctuations
then (False, Nothing, Nothing) let (s:ss, d) = (init puncparts, last puncparts) -- the leftmost is a separator and the rightmost may be a decimal point
else if s == d in if (any ((/=1).length) puncparts -- adjacent punctuation chars, not ok
then (True, Nothing, Just $ head s) -- just one kind of punctuation, assume separator chars || any (s/=) ss -- separator chars vary, not ok
else (True, Just $ head d, Just $ head s) -- separators and a decimal point || head parts == s) -- number begins with a separator char, not ok
then (False, Nothing, Nothing)
else if s == d
then (True, Nothing, Just $ head s) -- just one kind of punctuation - must be separators
else (True, Just $ head d, Just $ head s) -- separator(s) and a decimal point
when (not ok) (fail $ "number seems ill-formed: "++concat parts) when (not ok) (fail $ "number seems ill-formed: "++concat parts)
let (intparts',fracparts') = span ((/= decimalpoint') . Just . head) parts
-- get the digit group sizes and digit group style if any
let (intparts',fracparts') = span ((/= mdecimalpoint) . Just . head) parts
(intparts, fracpart) = (filter numeric intparts', filter numeric fracparts') (intparts, fracpart) = (filter numeric intparts', filter numeric fracparts')
separatorpositions = reverse $ map length $ drop 1 intparts groupsizes = reverse $ case map length intparts of
int = concat $ "":intparts (a:b:cs) | a < b -> b:cs
gs -> gs
mgrps = maybe Nothing (Just . (`DigitGroups` groupsizes)) $ mseparator
-- put the parts back together without digit group separators, get the precision and parse the value
let int = concat $ "":intparts
frac = concat $ "":fracpart frac = concat $ "":fracpart
precision = length frac precision = length frac
int' = if null int then "0" else int int' = if null int then "0" else int
frac' = if null frac then "0" else frac frac' = if null frac then "0" else frac
quantity = read $ sign++int'++"."++frac' -- this read should never fail quantity = read $ sign++int'++"."++frac' -- this read should never fail
(decimalpoint, separator) = case (decimalpoint', separator') of (Just d, Just s) -> (d,s)
(Just '.',Nothing) -> ('.',',')
(Just ',',Nothing) -> (',','.')
(Nothing, Just '.') -> (',','.')
(Nothing, Just ',') -> ('.',',')
_ -> ('.',',')
return (quantity,precision,decimalpoint,separator,separatorpositions)
<?> "numberp"
return $ dbgAt 8 "numberp quantity,precision,mdecimalpoint,mgrps" (quantity,precision,mdecimalpoint,mgrps)
<?> "numberp"
where
numeric = isNumber . headDef '_'
#ifdef TESTS #ifdef TESTS
test_numberp = do test_numberp = do
let s `is` n = assertParseEqual' (parseWithCtx nullctx numberp s) n let s `is` n = assertParseEqual' (parseWithCtx nullctx numberp s) n

View File

@ -1,5 +1,7 @@
# a default commodity defined with the D directive will be used for any # a default commodity defined with the D directive will be used for any
# commodity-less amounts in subsequent transactions. # subsequent commodity-less posting amounts. The sample amount's display style
# is also applied, and the resulting amount may end up setting the canonical
# display style for the commodity.
# 1. no default commodity # 1. no default commodity
hledgerdev -f- print hledgerdev -f- print
@ -54,8 +56,9 @@ D $1,000.0
>>>=0 >>>=0
# 5. as above, sets the commodity of the commodityless amount, but an # 5. commodity and display style applied to the second posting amount..
# earlier explicit dollar amount sets the display settings for dollar # which ends up setting the digit group style, since it's the first amount
# with digit groups. The great precision is used.
hledgerdev -f- print hledgerdev -f- print
<<< <<<
D $1,000.0 D $1,000.0
@ -63,21 +66,6 @@ D $1,000.0
(a) $1000000.00 (a) $1000000.00
(b) 1000000 (b) 1000000
>>> >>>
2010/01/01
(a) $1000000.00
(b) $1000000.00
>>>=0
# 6. as above, but the commodityless amount is earliest, so it sets the
# display settings for dollar. The greatest precision is preserved though.
hledgerdev -f- print
<<<
D $1,000.0
2010/1/1
(a) 1000000
(b) $1000000.00
>>>
2010/01/01 2010/01/01
(a) $1,000,000.00 (a) $1,000,000.00
(b) $1,000,000.00 (b) $1,000,000.00