look harder for decimal point & digit groups (fixes #196)

Amount display styles have been reworked a bit; they are now calculated after journal parsing, not during it. This allows the fix for #196: we now search through the amounts until a decimal point is detected, instead of just looking at the first one; likewise for digit groups. Digit groups are now implemented with a better type. Digit group size detection has been improved a little: 1000,000 now gives group sizes [3,4,4,...], not [3,3,...], and 10,000 gives groups sizes [3,3,...] not [3,2,2,..]. (To get [3,2,2,...] you'd use eg 00,00,000.) There are still some old (or new ?) issues; I don't think we handle inconsistent decimal points & digit groups too well. But for now all tests pass.
2024-11-08 07:09:28 +03:00 · 2014-07-02 23:26:16 -07:00 · 2014-07-02 23:26:16 -07:00 · c31710d942
commit c31710d942
parent 647d5833ff
5 changed files with 122 additions and 103 deletions
--- a/hledger-lib/Hledger/Data/Amount.hs
+++ b/hledger-lib/Hledger/Data/Amount.hs
@ -68,7 +68,6 @@ module Hledger.Data.Amount (
  setAmountPrecision,
  withPrecision,
  canonicaliseAmount,
-  canonicalStyles,
  -- * MixedAmount
  nullmixedamt,
  missingmixedamt,
@ -99,7 +98,7 @@ module Hledger.Data.Amount (
 import Data.Char (isDigit)
 import Data.List
 import Data.Map (findWithDefault)
-import Data.Ord (comparing)
+import Data.Maybe
 import Test.HUnit
 import Text.Printf
 import qualified Data.Map as M
@ -111,7 +110,7 @@ import Hledger.Utils

 deriving instance Show HistoricalPrice

-amountstyle = AmountStyle L False 0 '.' ',' []
+amountstyle = AmountStyle L False 0 (Just '.') Nothing

 -------------------------------------------------------------------------------
 -- Amount
@ -281,33 +280,38 @@ showAmount a@(Amount{acommodity=c, aprice=p, astyle=AmountStyle{..}}) =
 -- | Get the string representation of the number part of of an amount,
 -- using the display settings from its commodity.
 showamountquantity :: Amount -> String
-showamountquantity Amount{aquantity=q, astyle=AmountStyle{asprecision=p, asdecimalpoint=d, asseparator=s, asseparatorpositions=spos}} =
-    punctuatenumber d s spos $ qstr
+showamountquantity Amount{aquantity=q, astyle=AmountStyle{asprecision=p, asdecimalpoint=mdec, asdigitgroups=mgrps}} =
+    punctuatenumber (fromMaybe '.' mdec) mgrps $ qstr
    where
-    -- isint n = fromIntegral (round n) == n
-    qstr -- p == maxprecision && isint q = printf "%d" (round q::Integer)
-         | p == maxprecisionwithpoint    = printf "%f" q
-         | p == maxprecision             = chopdotzero $ printf "%f" q
-         | otherwise                    = printf ("%."++show p++"f") q
+      -- isint n = fromIntegral (round n) == n
+      qstr -- p == maxprecision && isint q = printf "%d" (round q::Integer)
+        | p == maxprecisionwithpoint    = printf "%f" q
+        | p == maxprecision             = chopdotzero $ printf "%f" q
+        | otherwise                    = printf ("%."++show p++"f") q

 -- | Replace a number string's decimal point with the specified character,
 -- and add the specified digit group separators. The last digit group will
 -- be repeated as needed.
-punctuatenumber :: Char -> Char -> [Int] -> String -> String
-punctuatenumber dec sep grps str = sign ++ reverse (addseps sep (extend grps) (reverse int)) ++ frac''
+punctuatenumber :: Char -> Maybe DigitGroupStyle -> String -> String
+punctuatenumber dec mgrps s = sign ++ reverse (applyDigitGroupStyle mgrps (reverse int)) ++ frac''
    where
-      (sign,num) = break isDigit str
+      (sign,num) = break isDigit s
      (int,frac) = break (=='.') num
      frac' = dropWhile (=='.') frac
      frac'' | null frac' = ""
             | otherwise  = dec:frac'
-      extend [] = []
-      extend gs = init gs ++ repeat (last gs)
-      addseps _ [] str = str
-      addseps sep (g:gs) str
-          | length str <= g = str
-          | otherwise = let (s,rest) = splitAt g str
-                        in s ++ [sep] ++ addseps sep gs rest
+
+applyDigitGroupStyle :: Maybe DigitGroupStyle -> String -> String
+applyDigitGroupStyle Nothing s = s
+applyDigitGroupStyle (Just (DigitGroups c gs)) s = addseps (repeatLast gs) s
+  where
+    addseps [] s = s
+    addseps (g:gs) s
+      | length s <= g = s
+      | otherwise     = let (part,rest) = splitAt g s
+                        in part ++ [c] ++ addseps gs rest
+    repeatLast [] = []
+    repeatLast gs = init gs ++ repeat (last gs)

 chopdotzero str = reverse $ case reverse str of
                              '0':'.':s -> s
@ -501,23 +505,6 @@ showMixedAmountWithoutPrice m = concat $ intersperse "\n" $ map showfixedwidth a
 canonicaliseMixedAmount :: M.Map Commodity AmountStyle -> MixedAmount -> MixedAmount
 canonicaliseMixedAmount styles (Mixed as) = Mixed $ map (canonicaliseAmount styles) as

-- | Given a list of amounts in parse order, build a map from commodities
-- to canonical display styles for amounts in that commodity.
-canonicalStyles :: [Amount] -> M.Map Commodity AmountStyle
-canonicalStyles amts = M.fromList commstyles
-  where
-    samecomm = \a1 a2 -> acommodity a1 == acommodity a2
-    commamts = [(acommodity $ head as, as) | as <- groupBy samecomm $ sortBy (comparing acommodity) amts]
-    commstyles = [(c, s)
-                 | (c,as) <- commamts
-                 , let styles = map astyle as
-                 , let maxprec = maximum $ map asprecision styles
-                 , let s = (head styles){asprecision=maxprec}
-                 ]
-
-- lookupStyle :: M.Map Commodity AmountStyle -> Commodity -> AmountStyle
-- lookupStyle 
-
 -------------------------------------------------------------------------------
 -- misc

--- a/hledger-lib/Hledger/Data/Journal.hs
+++ b/hledger-lib/Hledger/Data/Journal.hs
@ -46,6 +46,7 @@ module Hledger.Data.Journal (
  journalEquityAccountQuery,
  journalCashAccountQuery,
  -- * Misc
+  canonicalStyles,
  matchpats,
  nullctx,
  nulljournal,
@ -481,12 +482,34 @@ journalCanonicaliseAmounts :: Journal -> Journal
 journalCanonicaliseAmounts j@Journal{jtxns=ts} = j''
    where
      j'' = j'{jtxns=map fixtransaction ts}
-      j' = j{jcommoditystyles = canonicalStyles $ journalAmounts j}
+      j' = j{jcommoditystyles = canonicalStyles $ dbgAt 8 "journalAmounts" $ journalAmounts j}
      fixtransaction t@Transaction{tpostings=ps} = t{tpostings=map fixposting ps}
      fixposting p@Posting{pamount=a} = p{pamount=fixmixedamount a}
      fixmixedamount (Mixed as) = Mixed $ map fixamount as
      fixamount a@Amount{acommodity=c} = a{astyle=journalCommodityStyle j' c}

+-- | Given a list of amounts in parse order, build a map from commodities
+-- to canonical display styles for amounts in that commodity.
+canonicalStyles :: [Amount] -> M.Map Commodity AmountStyle
+canonicalStyles amts = M.fromList commstyles
+  where
+    samecomm = \a1 a2 -> acommodity a1 == acommodity a2
+    commamts = [(acommodity $ head as, as) | as <- groupBy samecomm $ sortBy (comparing acommodity) amts]
+    commstyles = [(c, canonicalStyleFrom $ map astyle as) | (c,as) <- commamts]
+
+-- Given an ordered list of amount styles for a commodity, build a canonical style.
+canonicalStyleFrom :: [AmountStyle] -> AmountStyle
+canonicalStyleFrom [] = amountstyle
+canonicalStyleFrom ss@(first:_) =
+  first{asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
+  where
+    -- precision is the maximum of all precisions seen
+    prec = maximum $ map asprecision ss
+    -- find the first decimal point and the first digit group style seen,
+    -- or use defaults.
+    mdec  = Just $ headDef '.' $ catMaybes $ map asdecimalpoint ss
+    mgrps = maybe Nothing Just $ headMay $ catMaybes $ map asdigitgroups ss
+
 -- | Get this journal's canonical amount style for the given commodity, or the null style.
 journalCommodityStyle :: Journal -> Commodity -> AmountStyle
 journalCommodityStyle j c = M.findWithDefault amountstyle c $ jcommoditystyles j
--- a/hledger-lib/Hledger/Data/Types.hs
+++ b/hledger-lib/Hledger/Data/Types.hs
@ -56,11 +56,19 @@ data AmountStyle = AmountStyle {
      ascommodityside :: Side,       -- ^ does the symbol appear on the left or the right ?
      ascommodityspaced :: Bool,     -- ^ space between symbol and quantity ?
      asprecision :: Int,            -- ^ number of digits displayed after the decimal point
-      asdecimalpoint :: Char,        -- ^ character used as decimal point
-      asseparator :: Char,           -- ^ character used for separating digit groups (eg thousands)
-      asseparatorpositions :: [Int]  -- ^ positions of digit group separators, counting leftward from decimal point
+      asdecimalpoint :: Maybe Char,  -- ^ character used as decimal point: period or comma. Nothing means "unspecified, use default"
+      asdigitgroups :: Maybe DigitGroupStyle -- ^ style for displaying digit groups, if any
 } deriving (Eq,Ord,Read,Show,Typeable,Data)

+-- | A style for displaying digit groups in the integer part of a
+-- floating point number. It consists of the character used to
+-- separate groups (comma or period, whichever is not used as decimal
+-- point), and the size of each group, starting with the one nearest
+-- the decimal point. The last group size is assumed to repeat. Eg,
+-- comma between thousands is DigitGroups ',' [3].
+data DigitGroupStyle = DigitGroups Char [Int]
+  deriving (Eq,Ord,Read,Show,Typeable,Data)
+
 data Amount = Amount {
      acommodity :: Commodity,
      aquantity :: Quantity,
--- a/hledger-lib/Hledger/Read/JournalReader.hs
+++ b/hledger-lib/Hledger/Read/JournalReader.hs
@ -662,8 +662,8 @@ leftsymbolamount = do
  sign <- signp
  c <- commoditysymbol 
  sp <- many spacenonewline
-  (q,prec,dec,sep,seppos) <- numberp
-  let s = amountstyle{ascommodityside=L, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=dec, asseparator=sep, asseparatorpositions=seppos}
+  (q,prec,mdec,mgrps) <- numberp
+  let s = amountstyle{ascommodityside=L, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
  p <- priceamount
  let applysign = if sign=="-" then negate else id
  return $ applysign $ Amount c q p s
@ -671,23 +671,23 @@ leftsymbolamount = do

 rightsymbolamount :: GenParser Char JournalContext Amount
 rightsymbolamount = do
-  (q,prec,dec,sep,seppos) <- numberp
+  (q,prec,mdec,mgrps) <- numberp
  sp <- many spacenonewline
  c <- commoditysymbol
  p <- priceamount
-  let s = amountstyle{ascommodityside=R, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=dec, asseparator=sep, asseparatorpositions=seppos}
+  let s = amountstyle{ascommodityside=R, ascommodityspaced=not $ null sp, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
  return $ Amount c q p s
  <?> "right-symbol amount"

 nosymbolamount :: GenParser Char JournalContext Amount
 nosymbolamount = do
-  (q,prec,dec,sep,seppos) <- numberp
+  (q,prec,mdec,mgrps) <- numberp
  p <- priceamount
  -- apply the most recently seen default commodity and style to this commodityless amount
  defcs <- getDefaultCommodityAndStyle
  let (c,s) = case defcs of
        Just (defc,defs) -> (defc, defs{asprecision=max (asprecision defs) prec})
-        Nothing          -> ("", amountstyle{asprecision=prec, asdecimalpoint=dec, asseparator=sep, asseparatorpositions=seppos})
+        Nothing          -> ("", amountstyle{asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps})
  return $ Amount c q p s
  <?> "no-symbol amount"

@ -745,55 +745,68 @@ fixedlotprice =
          return $ Just a)
         <|> return Nothing

-- | Parse a numeric quantity for its value and display attributes.  Some
-- international number formats (cf
-- http://en.wikipedia.org/wiki/Decimal_separator) are accepted: either
-- period or comma may be used for the decimal point, and the other of
-- these may be used for separating digit groups in the integer part (eg a
-- thousands separator).  This returns the numeric value, the precision
-- (number of digits to the right of the decimal point), the decimal point
-- and separator characters (defaulting to . and ,), and the positions of
-- separators (counting leftward from the decimal point, the last is
-- assumed to repeat).
-numberp :: GenParser Char JournalContext (Quantity, Int, Char, Char, [Int])
+-- | Parse a string representation of a number for its value and display
+-- attributes.
+-- 
+-- Some international number formats are accepted, eg either period or comma
+-- may be used for the decimal point, and the other of these may be used for
+-- separating digit groups in the integer part. See
+-- http://en.wikipedia.org/wiki/Decimal_separator for more examples.
+-- 
+-- This returns: the parsed numeric value, the precision (number of digits
+-- seen following the decimal point), the decimal point character used if any,
+-- and the digit group style if any.
+-- 
+numberp :: GenParser Char JournalContext (Quantity, Int, Maybe Char, Maybe DigitGroupStyle)
 numberp = do
+  -- a number is an optional sign followed by a sequence of digits possibly
+  -- interspersed with periods, commas, or both
+  -- ptrace "numberp"
  sign <- signp
  parts <- many1 $ choice' [many1 digit, many1 $ char ',', many1 $ char '.']
-  let numeric = isNumber . headDef '_'
-      (numparts, puncparts) = partition numeric parts
-      (ok,decimalpoint',separator') =
-          case (numparts,puncparts) of
-            ([],_)     -> (False, Nothing, Nothing)  -- no digits
-            (_,[])     -> (True, Nothing, Nothing)  -- no punctuation chars
-            (_,[d:""]) -> (True, Just d, Nothing)   -- just one punctuation char, assume it's a decimal point
-            (_,[_])    -> (False, Nothing, Nothing) -- adjacent punctuation chars, not ok
-            (_,_:_:_)  -> let (s:ss, d) = (init puncparts, last puncparts) -- two or more punctuation chars
-                          in if (any ((/=1).length) puncparts  -- adjacent punctuation chars, not ok
-                                 || any (s/=) ss                -- separator chars differ, not ok
-                                 || head parts == s)            -- number begins with a separator char, not ok
-                              then (False, Nothing, Nothing)
-                              else if s == d
-                                    then (True, Nothing, Just $ head s) -- just one kind of punctuation, assume separator chars
-                                    else (True, Just $ head d, Just $ head s) -- separators and a decimal point
+  dbgAt 8 "numberp parsed" (sign,parts) `seq` return ()
+
+  -- check the number is well-formed and identify the decimal point and digit
+  -- group separator characters used, if any
+  let (numparts, puncparts) = partition numeric parts
+      (ok, mdecimalpoint, mseparator) =
+          case (numparts, puncparts) of
+            ([],_)     -> (False, Nothing, Nothing)  -- no digits, not ok
+            (_,[])     -> (True, Nothing, Nothing)   -- digits with no punctuation, ok
+            (_,[[d]])  -> (True, Just d, Nothing)    -- just a single punctuation of length 1, assume it's a decimal point
+            (_,[_])    -> (False, Nothing, Nothing)  -- a single punctuation of some other length, not ok
+            (_,_:_:_)  ->                                       -- two or more punctuations
+              let (s:ss, d) = (init puncparts, last puncparts)  -- the leftmost is a separator and the rightmost may be a decimal point
+              in if (any ((/=1).length) puncparts               -- adjacent punctuation chars, not ok
+                     || any (s/=) ss                            -- separator chars vary, not ok
+                     || head parts == s)                        -- number begins with a separator char, not ok
+                 then (False, Nothing, Nothing)
+                 else if s == d
+                      then (True, Nothing, Just $ head s)       -- just one kind of punctuation - must be separators
+                      else (True, Just $ head d, Just $ head s) -- separator(s) and a decimal point
  when (not ok) (fail $ "number seems ill-formed: "++concat parts)
-  let (intparts',fracparts') = span ((/= decimalpoint') . Just . head) parts
+
+  -- get the digit group sizes and digit group style if any
+  let (intparts',fracparts') = span ((/= mdecimalpoint) . Just . head) parts
      (intparts, fracpart) = (filter numeric intparts', filter numeric fracparts')
-      separatorpositions = reverse $ map length $ drop 1 intparts
-      int = concat $ "":intparts
+      groupsizes = reverse $ case map length intparts of
+                               (a:b:cs) | a < b -> b:cs
+                               gs               -> gs
+      mgrps = maybe Nothing (Just . (`DigitGroups` groupsizes)) $ mseparator
+
+  -- put the parts back together without digit group separators, get the precision and parse the value
+  let int = concat $ "":intparts
      frac = concat $ "":fracpart
      precision = length frac
      int' = if null int then "0" else int
      frac' = if null frac then "0" else frac
      quantity = read $ sign++int'++"."++frac' -- this read should never fail
-      (decimalpoint, separator) = case (decimalpoint', separator') of (Just d,  Just s)   -> (d,s)
-                                                                      (Just '.',Nothing)  -> ('.',',')
-                                                                      (Just ',',Nothing)  -> (',','.')
-                                                                      (Nothing, Just '.') -> (',','.')
-                                                                      (Nothing, Just ',') -> ('.',',')
-                                                                      _                   -> ('.',',')
-  return (quantity,precision,decimalpoint,separator,separatorpositions)
-  <?> "numberp"

+  return $ dbgAt 8 "numberp quantity,precision,mdecimalpoint,mgrps" (quantity,precision,mdecimalpoint,mgrps)
+  <?> "numberp"
+  where
+    numeric = isNumber . headDef '_'
+      
 #ifdef TESTS
 test_numberp = do
      let s `is` n = assertParseEqual' (parseWithCtx nullctx numberp s) n
--- a/tests/journal/default-commodity.test
+++ b/tests/journal/default-commodity.test
@ -1,5 +1,7 @@
 # a default commodity defined with the D directive will be used for any
-# commodity-less amounts in subsequent transactions.
+# subsequent commodity-less posting amounts. The sample amount's display style
+# is also applied, and the resulting amount may end up setting the canonical
+# display style for the commodity.

 # 1. no default commodity
 hledgerdev -f- print
@ -54,8 +56,9 @@ D $1,000.0

 >>>=0

-# 5. as above, sets the commodity of the commodityless amount, but an
-# earlier explicit dollar amount sets the display settings for dollar
+# 5. commodity and display style applied to the second posting amount..
+# which ends up setting the digit group style, since it's the first amount
+# with digit groups. The great precision is used.
 hledgerdev -f- print
 <<<
 D $1,000.0
@ -63,21 +66,6 @@ D $1,000.0
  (a)  $1000000.00
  (b)   1000000
 >>>
-2010/01/01
-    (a)   $1000000.00
-    (b)   $1000000.00
-
->>>=0
-
-# 6. as above, but the commodityless amount is earliest, so it sets the
-# display settings for dollar. The greatest precision is preserved though.
-hledgerdev -f- print
-<<<
-D $1,000.0
-2010/1/1
-  (a)   1000000
-  (b)  $1000000.00
->>>
 2010/01/01
    (a)  $1,000,000.00
    (b)  $1,000,000.00