diff --git a/hledger-lib/Hledger/Read/Common.hs b/hledger-lib/Hledger/Read/Common.hs index b23a9c3e5..a3634aa73 100644 --- a/hledger-lib/Hledger/Read/Common.hs +++ b/hledger-lib/Hledger/Read/Common.hs @@ -1169,7 +1169,20 @@ rawnumberp = label "number" $ do pure $ NoSeparators grp1 (Just (decPt, mempty)) isDigitSeparatorChar :: Char -> Bool -isDigitSeparatorChar c = isDecimalMark c || c == ' ' +isDigitSeparatorChar c = isDecimalMark c || isDigitSeparatorSpaceChar c + +-- | Kinds of unicode space character we accept as digit group marks. +-- See also https://en.wikipedia.org/wiki/Decimal_separator#Digit_grouping . +isDigitSeparatorSpaceChar :: Char -> Bool +isDigitSeparatorSpaceChar c = + c == ' ' -- space + || c == ' ' -- no-break space + || c == ' ' -- en space + || c == ' ' -- em space + || c == ' ' -- punctuation space + || c == ' ' -- thin space + || c == ' ' -- narrow no-break space + || c == ' ' -- medium mathematical space -- | Some kinds of number literal we might parse. data RawNumber diff --git a/hledger/hledger.m4.md b/hledger/hledger.m4.md index 50a6bd568..5637603c4 100644 --- a/hledger/hledger.m4.md +++ b/hledger/hledger.m4.md @@ -1200,13 +1200,23 @@ A *decimal mark* can be written as a period or a comma: 1,23 In the integer part of the quantity (left of the decimal mark), groups -of digits can optionally be separated by a *digit group mark* - a -space, comma, or period (different from the decimal mark): +of digits can optionally be separated by a *digit group mark* - +a comma or period (whichever is not used as decimal mark), +or a space (any of these Unicode space characters: +space, +no-break space, +en space, +em space, +punctuation space, +thin space, +narrow no-break space, +medium mathematical space). $1,000,000.00 EUR 2.000.000,00 INR 9,99,99,999.00 - 1 000 000.9455 + 1 000 000.00 <- ordinary space + 1 000 000.00 <- no-break space hledger is not biased towards [period or comma decimal marks][international number formats], so a number containing just one period or comma, like `1,000` or `1.000`, is ambiguous. diff --git a/hledger/test/journal/numbers.test b/hledger/test/journal/numbers.test index d9ab010dc..931f3e13d 100644 --- a/hledger/test/journal/numbers.test +++ b/hledger/test/journal/numbers.test @@ -36,14 +36,27 @@ $ hledger bal -f - >2 // >= 1 -# ** 5. Space between digits groups +# ** 5. Spaces between digit groups, any of the 8 unicode space characters we support. < -2017/1/1 - a 1 000.00 EUR - b -1 000.00 EUR -$ hledger bal -f - --no-total - 1 000.00 EUR a - -1 000.00 EUR b +2000-01-01 + (a) 1 000. A ; space + (b) 1 000. B ; no-break space + (c) 1 000. C ; en space + (d) 1 000. D ; em space + (e) 1 000. E ; punctuation space + (f) 1 000. F ; thin space + (g) 1 000. G ; narrow no-break space + (h) 1 000. H ; medium mathematical space + +$ hledger bal -f - -N + 1 000 A a + 1 000 B b + 1 000 C c + 1 000 D d + 1 000 E e + 1 000 F f + 1 000 G g + 1 000 H h # ** 6. Space between digits groups in commodity directive < @@ -203,3 +216,24 @@ Balance changes in 2021: ===++=================================== a || -0.12345678901234567890123456 EUR +# ** 21. When spaces are used inconsistently, what happens ? The usual, first one seen sets commodity style. +< +2000-01-01 + (a) 1 000. A ; no-break space + (a) 1 000. A ; space + (a) 1 000. A ; en space + +$ hledger bal -f - -N + 3 000 A a + +# ** 22. And a commodity directive can enforce consistency as usual. +< +commodity 1 000. A ; narrow no-break space + +2000-01-01 + (a) 1 000. A ; space + (a) 1 000. A ; no-break space + (a) 1 000. A ; en space + +$ hledger bal -f - -N + 3 000 A a