From fac3ee89afa5faaf06c83c522f4b98a47fa2e741 Mon Sep 17 00:00:00 2001 From: Simon Michael Date: Wed, 28 Feb 2024 09:12:17 -1000 Subject: [PATCH] feat:allow other kinds of unicode space as digit group separators Based on feedback in chat, I added support for several more kinds of Unicode space character for separating digit groups, both when reading and when displaying numbers. These are the spaces currently supported, which are just my best guess at the ones that might show up in CSV files now and then: space, no-break space, en space, em space, punctuation space, thin space, narrow no-break space, medium mathematical space --- hledger-lib/Hledger/Read/Common.hs | 15 +++++++++- hledger/hledger.m4.md | 16 ++++++++-- hledger/test/journal/numbers.test | 48 +++++++++++++++++++++++++----- 3 files changed, 68 insertions(+), 11 deletions(-) diff --git a/hledger-lib/Hledger/Read/Common.hs b/hledger-lib/Hledger/Read/Common.hs index b23a9c3e5..a3634aa73 100644 --- a/hledger-lib/Hledger/Read/Common.hs +++ b/hledger-lib/Hledger/Read/Common.hs @@ -1169,7 +1169,20 @@ rawnumberp = label "number" $ do pure $ NoSeparators grp1 (Just (decPt, mempty)) isDigitSeparatorChar :: Char -> Bool -isDigitSeparatorChar c = isDecimalMark c || c == ' ' +isDigitSeparatorChar c = isDecimalMark c || isDigitSeparatorSpaceChar c + +-- | Kinds of unicode space character we accept as digit group marks. +-- See also https://en.wikipedia.org/wiki/Decimal_separator#Digit_grouping . +isDigitSeparatorSpaceChar :: Char -> Bool +isDigitSeparatorSpaceChar c = + c == ' ' -- space + || c == ' ' -- no-break space + || c == ' ' -- en space + || c == ' ' -- em space + || c == ' ' -- punctuation space + || c == ' ' -- thin space + || c == ' ' -- narrow no-break space + || c == ' ' -- medium mathematical space -- | Some kinds of number literal we might parse. data RawNumber diff --git a/hledger/hledger.m4.md b/hledger/hledger.m4.md index 50a6bd568..5637603c4 100644 --- a/hledger/hledger.m4.md +++ b/hledger/hledger.m4.md @@ -1200,13 +1200,23 @@ A *decimal mark* can be written as a period or a comma: 1,23 In the integer part of the quantity (left of the decimal mark), groups -of digits can optionally be separated by a *digit group mark* - a -space, comma, or period (different from the decimal mark): +of digits can optionally be separated by a *digit group mark* - +a comma or period (whichever is not used as decimal mark), +or a space (any of these Unicode space characters: +space, +no-break space, +en space, +em space, +punctuation space, +thin space, +narrow no-break space, +medium mathematical space). $1,000,000.00 EUR 2.000.000,00 INR 9,99,99,999.00 - 1 000 000.9455 + 1 000 000.00 <- ordinary space + 1 000 000.00 <- no-break space hledger is not biased towards [period or comma decimal marks][international number formats], so a number containing just one period or comma, like `1,000` or `1.000`, is ambiguous. diff --git a/hledger/test/journal/numbers.test b/hledger/test/journal/numbers.test index d9ab010dc..931f3e13d 100644 --- a/hledger/test/journal/numbers.test +++ b/hledger/test/journal/numbers.test @@ -36,14 +36,27 @@ $ hledger bal -f - >2 // >= 1 -# ** 5. Space between digits groups +# ** 5. Spaces between digit groups, any of the 8 unicode space characters we support. < -2017/1/1 - a 1 000.00 EUR - b -1 000.00 EUR -$ hledger bal -f - --no-total - 1 000.00 EUR a - -1 000.00 EUR b +2000-01-01 + (a) 1 000. A ; space + (b) 1 000. B ; no-break space + (c) 1 000. C ; en space + (d) 1 000. D ; em space + (e) 1 000. E ; punctuation space + (f) 1 000. F ; thin space + (g) 1 000. G ; narrow no-break space + (h) 1 000. H ; medium mathematical space + +$ hledger bal -f - -N + 1 000 A a + 1 000 B b + 1 000 C c + 1 000 D d + 1 000 E e + 1 000 F f + 1 000 G g + 1 000 H h # ** 6. Space between digits groups in commodity directive < @@ -203,3 +216,24 @@ Balance changes in 2021: ===++=================================== a || -0.12345678901234567890123456 EUR +# ** 21. When spaces are used inconsistently, what happens ? The usual, first one seen sets commodity style. +< +2000-01-01 + (a) 1 000. A ; no-break space + (a) 1 000. A ; space + (a) 1 000. A ; en space + +$ hledger bal -f - -N + 3 000 A a + +# ** 22. And a commodity directive can enforce consistency as usual. +< +commodity 1 000. A ; narrow no-break space + +2000-01-01 + (a) 1 000. A ; space + (a) 1 000. A ; no-break space + (a) 1 000. A ; en space + +$ hledger bal -f - -N + 3 000 A a