mirror of
https://github.com/simonmichael/hledger.git
synced 2024-11-08 07:09:28 +03:00
look harder for decimal point & digit groups (fixes #196)
Amount display styles have been reworked a bit; they are now calculated after journal parsing, not during it. This allows the fix for #196: we now search through the amounts until a decimal point is detected, instead of just looking at the first one; likewise for digit groups. Digit groups are now implemented with a better type. Digit group size detection has been improved a little: 1000,000 now gives group sizes [3,4,4,...], not [3,3,...], and 10,000 gives groups sizes [3,3,...] not [3,2,2,..]. (To get [3,2,2,...] you'd use eg 00,00,000.) There are still some old (or new ?) issues; I don't think we handle inconsistent decimal points & digit groups too well. But for now all tests pass.
This commit is contained in:
parent
647d5833ff
commit
c31710d942
@ -68,7 +68,6 @@ module Hledger.Data.Amount (
|
||||
setAmountPrecision,
|
||||
withPrecision,
|
||||
canonicaliseAmount,
|
||||
canonicalStyles,
|
||||
-- * MixedAmount
|
||||
nullmixedamt,
|
||||
missingmixedamt,
|
||||
@ -99,7 +98,7 @@ module Hledger.Data.Amount (
|
||||
import Data.Char (isDigit)
|
||||
import Data.List
|
||||
import Data.Map (findWithDefault)
|
||||
import Data.Ord (comparing)
|
||||
import Data.Maybe
|
||||
import Test.HUnit
|
||||
import Text.Printf
|
||||
import qualified Data.Map as M
|
||||
@ -111,7 +110,7 @@ import Hledger.Utils
|
||||
|
||||
deriving instance Show HistoricalPrice
|
||||
|
||||
amountstyle = AmountStyle L False 0 '.' ',' []
|
||||
amountstyle = AmountStyle L False 0 (Just '.') Nothing
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
-- Amount
|
||||
@ -281,33 +280,38 @@ showAmount a@(Amount{acommodity=c, aprice=p, astyle=AmountStyle{..}}) =
|
||||
-- | Get the string representation of the number part of of an amount,
|
||||
-- using the display settings from its commodity.
|
||||
showamountquantity :: Amount -> String
|
||||
showamountquantity Amount{aquantity=q, astyle=AmountStyle{asprecision=p, asdecimalpoint=d, asseparator=s, asseparatorpositions=spos}} =
|
||||
punctuatenumber d s spos $ qstr
|
||||
showamountquantity Amount{aquantity=q, astyle=AmountStyle{asprecision=p, asdecimalpoint=mdec, asdigitgroups=mgrps}} =
|
||||
punctuatenumber (fromMaybe '.' mdec) mgrps $ qstr
|
||||
where
|
||||
-- isint n = fromIntegral (round n) == n
|
||||
qstr -- p == maxprecision && isint q = printf "%d" (round q::Integer)
|
||||
| p == maxprecisionwithpoint = printf "%f" q
|
||||
| p == maxprecision = chopdotzero $ printf "%f" q
|
||||
| otherwise = printf ("%."++show p++"f") q
|
||||
-- isint n = fromIntegral (round n) == n
|
||||
qstr -- p == maxprecision && isint q = printf "%d" (round q::Integer)
|
||||
| p == maxprecisionwithpoint = printf "%f" q
|
||||
| p == maxprecision = chopdotzero $ printf "%f" q
|
||||
| otherwise = printf ("%."++show p++"f") q
|
||||
|
||||
-- | Replace a number string's decimal point with the specified character,
|
||||
-- and add the specified digit group separators. The last digit group will
|
||||
-- be repeated as needed.
|
||||
punctuatenumber :: Char -> Char -> [Int] -> String -> String
|
||||
punctuatenumber dec sep grps str = sign ++ reverse (addseps sep (extend grps) (reverse int)) ++ frac''
|
||||
punctuatenumber :: Char -> Maybe DigitGroupStyle -> String -> String
|
||||
punctuatenumber dec mgrps s = sign ++ reverse (applyDigitGroupStyle mgrps (reverse int)) ++ frac''
|
||||
where
|
||||
(sign,num) = break isDigit str
|
||||
(sign,num) = break isDigit s
|
||||
(int,frac) = break (=='.') num
|
||||
frac' = dropWhile (=='.') frac
|
||||
frac'' | null frac' = ""
|
||||
| otherwise = dec:frac'
|
||||
extend [] = []
|
||||
extend gs = init gs ++ repeat (last gs)
|
||||
addseps _ [] str = str
|
||||
addseps sep (g:gs) str
|
||||
| length str <= g = str
|
||||
| otherwise = let (s,rest) = splitAt g str
|
||||
in s ++ [sep] ++ addseps sep gs rest
|
||||
|
||||
applyDigitGroupStyle :: Maybe DigitGroupStyle -> String -> String
|
||||
applyDigitGroupStyle Nothing s = s
|
||||
applyDigitGroupStyle (Just (DigitGroups c gs)) s = addseps (repeatLast gs) s
|
||||
where
|
||||
addseps [] s = s
|
||||
addseps (g:gs) s
|
||||
| length s <= g = s
|
||||
| otherwise = let (part,rest) = splitAt g s
|
||||
in part ++ [c] ++ addseps gs rest
|
||||
repeatLast [] = []
|
||||
repeatLast gs = init gs ++ repeat (last gs)
|
||||
|
||||
chopdotzero str = reverse $ case reverse str of
|
||||
'0':'.':s -> s
|
||||
@ -501,23 +505,6 @@ showMixedAmountWithoutPrice m = concat $ intersperse "\n" $ map showfixedwidth a
|
||||
canonicaliseMixedAmount :: M.Map Commodity AmountStyle -> MixedAmount -> MixedAmount
|
||||
canonicaliseMixedAmount styles (Mixed as) = Mixed $ map (canonicaliseAmount styles) as
|
||||
|
||||
-- | Given a list of amounts in parse order, build a map from commodities
|
||||
-- to canonical display styles for amounts in that commodity.
|
||||
canonicalStyles :: [Amount] -> M.Map Commodity AmountStyle
|
||||
canonicalStyles amts = M.fromList commstyles
|
||||
where
|
||||
samecomm = \a1 a2 -> acommodity a1 == acommodity a2
|
||||
commamts = [(acommodity $ head as, as) | as <- groupBy samecomm $ sortBy (comparing acommodity) amts]
|
||||
commstyles = [(c, s)
|
||||
| (c,as) <- commamts
|
||||
, let styles = map astyle as
|
||||
, let maxprec = maximum $ map asprecision styles
|
||||
, let s = (head styles){asprecision=maxprec}
|
||||
]
|
||||
|
||||
-- lookupStyle :: M.Map Commodity AmountStyle -> Commodity -> AmountStyle
|
||||
-- lookupStyle
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
-- misc
|
||||
|
||||
|
@ -46,6 +46,7 @@ module Hledger.Data.Journal (
|
||||
journalEquityAccountQuery,
|
||||
journalCashAccountQuery,
|
||||
-- * Misc
|
||||
canonicalStyles,
|
||||
matchpats,
|
||||
nullctx,
|
||||
nulljournal,
|
||||
@ -481,12 +482,34 @@ journalCanonicaliseAmounts :: Journal -> Journal
|
||||
journalCanonicaliseAmounts j@Journal{jtxns=ts} = j''
|
||||
where
|
||||
j'' = j'{jtxns=map fixtransaction ts}
|
||||
j' = j{jcommoditystyles = canonicalStyles $ journalAmounts j}
|
||||
j' = j{jcommoditystyles = canonicalStyles $ dbgAt 8 "journalAmounts" $ journalAmounts j}
|
||||
fixtransaction t@Transaction{tpostings=ps} = t{tpostings=map fixposting ps}
|
||||
fixposting p@Posting{pamount=a} = p{pamount=fixmixedamount a}
|
||||
fixmixedamount (Mixed as) = Mixed $ map fixamount as
|
||||
fixamount a@Amount{acommodity=c} = a{astyle=journalCommodityStyle j' c}
|
||||
|
||||
-- | Given a list of amounts in parse order, build a map from commodities
|
||||
-- to canonical display styles for amounts in that commodity.
|
||||
canonicalStyles :: [Amount] -> M.Map Commodity AmountStyle
|
||||
canonicalStyles amts = M.fromList commstyles
|
||||
where
|
||||
samecomm = \a1 a2 -> acommodity a1 == acommodity a2
|
||||
commamts = [(acommodity $ head as, as) | as <- groupBy samecomm $ sortBy (comparing acommodity) amts]
|
||||
commstyles = [(c, canonicalStyleFrom $ map astyle as) | (c,as) <- commamts]
|
||||
|
||||
-- Given an ordered list of amount styles for a commodity, build a canonical style.
|
||||
canonicalStyleFrom :: [AmountStyle] -> AmountStyle
|
||||
canonicalStyleFrom [] = amountstyle
|
||||
canonicalStyleFrom ss@(first:_) =
|
||||
first{asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
|
||||
where
|
||||
-- precision is the maximum of all precisions seen
|
||||
prec = maximum $ map asprecision ss
|
||||
-- find the first decimal point and the first digit group style seen,
|
||||
-- or use defaults.
|
||||
mdec = Just $ headDef '.' $ catMaybes $ map asdecimalpoint ss
|
||||
mgrps = maybe Nothing Just $ headMay $ catMaybes $ map asdigitgroups ss
|
||||
|
||||
-- | Get this journal's canonical amount style for the given commodity, or the null style.
|
||||
journalCommodityStyle :: Journal -> Commodity -> AmountStyle
|
||||
journalCommodityStyle j c = M.findWithDefault amountstyle c $ jcommoditystyles j
|
||||
|
@ -56,11 +56,19 @@ data AmountStyle = AmountStyle {
|
||||
ascommodityside :: Side, -- ^ does the symbol appear on the left or the right ?
|
||||
ascommodityspaced :: Bool, -- ^ space between symbol and quantity ?
|
||||
asprecision :: Int, -- ^ number of digits displayed after the decimal point
|
||||
asdecimalpoint :: Char, -- ^ character used as decimal point
|
||||
asseparator :: Char, -- ^ character used for separating digit groups (eg thousands)
|
||||
asseparatorpositions :: [Int] -- ^ positions of digit group separators, counting leftward from decimal point
|
||||
asdecimalpoint :: Maybe Char, -- ^ character used as decimal point: period or comma. Nothing means "unspecified, use default"
|
||||
asdigitgroups :: Maybe DigitGroupStyle -- ^ style for displaying digit groups, if any
|
||||
} deriving (Eq,Ord,Read,Show,Typeable,Data)
|
||||
|
||||
-- | A style for displaying digit groups in the integer part of a
|
||||
-- floating point number. It consists of the character used to
|
||||
-- separate groups (comma or period, whichever is not used as decimal
|
||||
-- point), and the size of each group, starting with the one nearest
|
||||
-- the decimal point. The last group size is assumed to repeat. Eg,
|
||||
-- comma between thousands is DigitGroups ',' [3].
|
||||
data DigitGroupStyle = DigitGroups Char [Int]
|
||||
deriving (Eq,Ord,Read,Show,Typeable,Data)
|
||||
|
||||
data Amount = Amount {
|
||||
acommodity :: Commodity,
|
||||
aquantity :: Quantity,
|
||||
|
@ -662,8 +662,8 @@ leftsymbolamount = do
|
||||
sign <- signp
|
||||
c <- commoditysymbol
|
||||
sp <- many spacenonewline
|
||||
(q,prec,dec,sep,seppos) <- numberp
|
||||
let s = amountstyle{ascommodityside=L, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=dec, asseparator=sep, asseparatorpositions=seppos}
|
||||
(q,prec,mdec,mgrps) <- numberp
|
||||
let s = amountstyle{ascommodityside=L, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
|
||||
p <- priceamount
|
||||
let applysign = if sign=="-" then negate else id
|
||||
return $ applysign $ Amount c q p s
|
||||
@ -671,23 +671,23 @@ leftsymbolamount = do
|
||||
|
||||
rightsymbolamount :: GenParser Char JournalContext Amount
|
||||
rightsymbolamount = do
|
||||
(q,prec,dec,sep,seppos) <- numberp
|
||||
(q,prec,mdec,mgrps) <- numberp
|
||||
sp <- many spacenonewline
|
||||
c <- commoditysymbol
|
||||
p <- priceamount
|
||||
let s = amountstyle{ascommodityside=R, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=dec, asseparator=sep, asseparatorpositions=seppos}
|
||||
let s = amountstyle{ascommodityside=R, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
|
||||
return $ Amount c q p s
|
||||
<?> "right-symbol amount"
|
||||
|
||||
nosymbolamount :: GenParser Char JournalContext Amount
|
||||
nosymbolamount = do
|
||||
(q,prec,dec,sep,seppos) <- numberp
|
||||
(q,prec,mdec,mgrps) <- numberp
|
||||
p <- priceamount
|
||||
-- apply the most recently seen default commodity and style to this commodityless amount
|
||||
defcs <- getDefaultCommodityAndStyle
|
||||
let (c,s) = case defcs of
|
||||
Just (defc,defs) -> (defc, defs{asprecision=max (asprecision defs) prec})
|
||||
Nothing -> ("", amountstyle{asprecision=prec, asdecimalpoint=dec, asseparator=sep, asseparatorpositions=seppos})
|
||||
Nothing -> ("", amountstyle{asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps})
|
||||
return $ Amount c q p s
|
||||
<?> "no-symbol amount"
|
||||
|
||||
@ -745,55 +745,68 @@ fixedlotprice =
|
||||
return $ Just a)
|
||||
<|> return Nothing
|
||||
|
||||
-- | Parse a numeric quantity for its value and display attributes. Some
|
||||
-- international number formats (cf
|
||||
-- http://en.wikipedia.org/wiki/Decimal_separator) are accepted: either
|
||||
-- period or comma may be used for the decimal point, and the other of
|
||||
-- these may be used for separating digit groups in the integer part (eg a
|
||||
-- thousands separator). This returns the numeric value, the precision
|
||||
-- (number of digits to the right of the decimal point), the decimal point
|
||||
-- and separator characters (defaulting to . and ,), and the positions of
|
||||
-- separators (counting leftward from the decimal point, the last is
|
||||
-- assumed to repeat).
|
||||
numberp :: GenParser Char JournalContext (Quantity, Int, Char, Char, [Int])
|
||||
-- | Parse a string representation of a number for its value and display
|
||||
-- attributes.
|
||||
--
|
||||
-- Some international number formats are accepted, eg either period or comma
|
||||
-- may be used for the decimal point, and the other of these may be used for
|
||||
-- separating digit groups in the integer part. See
|
||||
-- http://en.wikipedia.org/wiki/Decimal_separator for more examples.
|
||||
--
|
||||
-- This returns: the parsed numeric value, the precision (number of digits
|
||||
-- seen following the decimal point), the decimal point character used if any,
|
||||
-- and the digit group style if any.
|
||||
--
|
||||
numberp :: GenParser Char JournalContext (Quantity, Int, Maybe Char, Maybe DigitGroupStyle)
|
||||
numberp = do
|
||||
-- a number is an optional sign followed by a sequence of digits possibly
|
||||
-- interspersed with periods, commas, or both
|
||||
-- ptrace "numberp"
|
||||
sign <- signp
|
||||
parts <- many1 $ choice' [many1 digit, many1 $ char ',', many1 $ char '.']
|
||||
let numeric = isNumber . headDef '_'
|
||||
(numparts, puncparts) = partition numeric parts
|
||||
(ok,decimalpoint',separator') =
|
||||
case (numparts,puncparts) of
|
||||
([],_) -> (False, Nothing, Nothing) -- no digits
|
||||
(_,[]) -> (True, Nothing, Nothing) -- no punctuation chars
|
||||
(_,[d:""]) -> (True, Just d, Nothing) -- just one punctuation char, assume it's a decimal point
|
||||
(_,[_]) -> (False, Nothing, Nothing) -- adjacent punctuation chars, not ok
|
||||
(_,_:_:_) -> let (s:ss, d) = (init puncparts, last puncparts) -- two or more punctuation chars
|
||||
in if (any ((/=1).length) puncparts -- adjacent punctuation chars, not ok
|
||||
|| any (s/=) ss -- separator chars differ, not ok
|
||||
|| head parts == s) -- number begins with a separator char, not ok
|
||||
then (False, Nothing, Nothing)
|
||||
else if s == d
|
||||
then (True, Nothing, Just $ head s) -- just one kind of punctuation, assume separator chars
|
||||
else (True, Just $ head d, Just $ head s) -- separators and a decimal point
|
||||
dbgAt 8 "numberp parsed" (sign,parts) `seq` return ()
|
||||
|
||||
-- check the number is well-formed and identify the decimal point and digit
|
||||
-- group separator characters used, if any
|
||||
let (numparts, puncparts) = partition numeric parts
|
||||
(ok, mdecimalpoint, mseparator) =
|
||||
case (numparts, puncparts) of
|
||||
([],_) -> (False, Nothing, Nothing) -- no digits, not ok
|
||||
(_,[]) -> (True, Nothing, Nothing) -- digits with no punctuation, ok
|
||||
(_,[[d]]) -> (True, Just d, Nothing) -- just a single punctuation of length 1, assume it's a decimal point
|
||||
(_,[_]) -> (False, Nothing, Nothing) -- a single punctuation of some other length, not ok
|
||||
(_,_:_:_) -> -- two or more punctuations
|
||||
let (s:ss, d) = (init puncparts, last puncparts) -- the leftmost is a separator and the rightmost may be a decimal point
|
||||
in if (any ((/=1).length) puncparts -- adjacent punctuation chars, not ok
|
||||
|| any (s/=) ss -- separator chars vary, not ok
|
||||
|| head parts == s) -- number begins with a separator char, not ok
|
||||
then (False, Nothing, Nothing)
|
||||
else if s == d
|
||||
then (True, Nothing, Just $ head s) -- just one kind of punctuation - must be separators
|
||||
else (True, Just $ head d, Just $ head s) -- separator(s) and a decimal point
|
||||
when (not ok) (fail $ "number seems ill-formed: "++concat parts)
|
||||
let (intparts',fracparts') = span ((/= decimalpoint') . Just . head) parts
|
||||
|
||||
-- get the digit group sizes and digit group style if any
|
||||
let (intparts',fracparts') = span ((/= mdecimalpoint) . Just . head) parts
|
||||
(intparts, fracpart) = (filter numeric intparts', filter numeric fracparts')
|
||||
separatorpositions = reverse $ map length $ drop 1 intparts
|
||||
int = concat $ "":intparts
|
||||
groupsizes = reverse $ case map length intparts of
|
||||
(a:b:cs) | a < b -> b:cs
|
||||
gs -> gs
|
||||
mgrps = maybe Nothing (Just . (`DigitGroups` groupsizes)) $ mseparator
|
||||
|
||||
-- put the parts back together without digit group separators, get the precision and parse the value
|
||||
let int = concat $ "":intparts
|
||||
frac = concat $ "":fracpart
|
||||
precision = length frac
|
||||
int' = if null int then "0" else int
|
||||
frac' = if null frac then "0" else frac
|
||||
quantity = read $ sign++int'++"."++frac' -- this read should never fail
|
||||
(decimalpoint, separator) = case (decimalpoint', separator') of (Just d, Just s) -> (d,s)
|
||||
(Just '.',Nothing) -> ('.',',')
|
||||
(Just ',',Nothing) -> (',','.')
|
||||
(Nothing, Just '.') -> (',','.')
|
||||
(Nothing, Just ',') -> ('.',',')
|
||||
_ -> ('.',',')
|
||||
return (quantity,precision,decimalpoint,separator,separatorpositions)
|
||||
<?> "numberp"
|
||||
|
||||
return $ dbgAt 8 "numberp quantity,precision,mdecimalpoint,mgrps" (quantity,precision,mdecimalpoint,mgrps)
|
||||
<?> "numberp"
|
||||
where
|
||||
numeric = isNumber . headDef '_'
|
||||
|
||||
#ifdef TESTS
|
||||
test_numberp = do
|
||||
let s `is` n = assertParseEqual' (parseWithCtx nullctx numberp s) n
|
||||
|
@ -1,5 +1,7 @@
|
||||
# a default commodity defined with the D directive will be used for any
|
||||
# commodity-less amounts in subsequent transactions.
|
||||
# subsequent commodity-less posting amounts. The sample amount's display style
|
||||
# is also applied, and the resulting amount may end up setting the canonical
|
||||
# display style for the commodity.
|
||||
|
||||
# 1. no default commodity
|
||||
hledgerdev -f- print
|
||||
@ -54,8 +56,9 @@ D $1,000.0
|
||||
|
||||
>>>=0
|
||||
|
||||
# 5. as above, sets the commodity of the commodityless amount, but an
|
||||
# earlier explicit dollar amount sets the display settings for dollar
|
||||
# 5. commodity and display style applied to the second posting amount..
|
||||
# which ends up setting the digit group style, since it's the first amount
|
||||
# with digit groups. The great precision is used.
|
||||
hledgerdev -f- print
|
||||
<<<
|
||||
D $1,000.0
|
||||
@ -63,21 +66,6 @@ D $1,000.0
|
||||
(a) $1000000.00
|
||||
(b) 1000000
|
||||
>>>
|
||||
2010/01/01
|
||||
(a) $1000000.00
|
||||
(b) $1000000.00
|
||||
|
||||
>>>=0
|
||||
|
||||
# 6. as above, but the commodityless amount is earliest, so it sets the
|
||||
# display settings for dollar. The greatest precision is preserved though.
|
||||
hledgerdev -f- print
|
||||
<<<
|
||||
D $1,000.0
|
||||
2010/1/1
|
||||
(a) 1000000
|
||||
(b) $1000000.00
|
||||
>>>
|
||||
2010/01/01
|
||||
(a) $1,000,000.00
|
||||
(b) $1,000,000.00
|
||||
|
Loading…
Reference in New Issue
Block a user