Use ‘Scientific’ as target type for floats

Close #95. Here we introduce ‘scientific’ parser that can parse arbitrary big numbers without error or memory overflow. ‘float’ still returns ‘Double’, but it's defined in terms of ‘scientific’ now. Since ‘Scientific’ type can reliably represent integer values as well as floating point values, ‘number’ now returns ‘Scientific’ instead of ‘Either Integer Double’ (‘Integer’ or ‘Double’ can be extracted from ‘Scientific’ value anyway). This in turn makes ‘signed’ parser more natural and general, because we do not need ad-hoc ‘Signed’ type class anymore.
2024-12-28 18:54:34 +03:00 · 2016-03-29 16:33:59 +06:00 · 2016-03-29 16:33:59 +06:00 · 18d192ba70
commit 18d192ba70
parent cd7f12963c
4 changed files with 48 additions and 37 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -19,6 +19,15 @@
 * Changed order of arguments for a number of functions in
  `Text.Megaparsec.Pos`, allowing for easier point-free composition.

+* Introduced `scientific` parser that can parse arbitrary big numbers
+  without error or memory overflow. `float` still returns `Double`, but it's
+  defined in terms of `scientific` now. Since `Scientific` type can reliably
+  represent integer values as well as floating point values, `number` now
+  returns `Scientific` instead of `Either Integer Double` (`Integer` or
+  `Double` can be extracted from `Scientific` value anyway). This in turn
+  makes `signed` parser more natural and general, because we do not need
+  ad-hoc `Signed` type class anymore.
+
 ## Megaparsec 4.4.0

 * Now state returned on failure is the exact state of parser at the moment
--- a/Text/Megaparsec/Lexer.hs
+++ b/Text/Megaparsec/Lexer.hs
@ -35,11 +35,11 @@ module Text.Megaparsec.Lexer
    -- * Character and string literals
  , charLiteral
    -- * Numbers
-  , Signed (..)
  , integer
  , decimal
  , hexadecimal
  , octal
+  , scientific
  , float
  , number
  , signed )
@ -49,8 +49,7 @@ import Control.Applicative ((<|>), some, optional)
 import Control.Monad (void)
 import Data.Char (readLitChar)
 import Data.Maybe (listToMaybe, fromMaybe, isJust)
-import Prelude hiding (negate)
-import qualified Prelude
+import Data.Scientific (Scientific, toRealFloat)

 import Text.Megaparsec.Combinator
 import Text.Megaparsec.Pos
@ -310,26 +309,6 @@ charLiteral = label "literal character" $ do
 ----------------------------------------------------------------------------
 -- Numbers

-- | This type class abstracts the concept of signed number in context of
-- this module. This is especially useful when you want to compose 'signed'
-- and 'number'.
-
-class Signed a where
-
-  -- | Unary negation.
-
-  negate :: a -> a
-
-instance Signed Integer where
-  negate = Prelude.negate
-
-instance Signed Double where
-  negate = Prelude.negate
-
-instance (Signed l, Signed r) => Signed (Either l r) where
-  negate (Left  x) = Left  $ negate x
-  negate (Right x) = Right $ negate x
-
 -- | Parse an integer without sign in decimal representation (according to
 -- format of integer literals described in Haskell report).
 --
@ -373,14 +352,28 @@ octal = nump "0o" C.octDigitChar <?> "octal integer"
 nump :: MonadParsec s m Char => String -> m Char -> m Integer
 nump prefix baseDigit = read . (prefix ++) <$> some baseDigit

-- | Parse a floating point value without sign. Representation of floating
-- point value is expected to be according to Haskell report.
+-- | Parse floating point value as 'Scientific' number. 'Scientific' is
+-- great for parsing of arbitrary precision numbers coming from an untrusted
+-- source. See documentation in "Data.Scientific" for more information.
+-- Representation of floating point value is expected to be according to
+-- Haskell report.
 --
-- If you need to parse signed floats, see 'signed'.
+-- This function does not parse sign, if you need to parse signed numbers,
+-- see 'signed'.
+--
+-- @since 5.0.0
+
+scientific :: MonadParsec s m Char => m Scientific
+scientific = label "floating point number" (read <$> f)
+  where f = (++) <$> some C.digitChar <*> (fraction <|> fExp)
+
+-- | Parse floating point number without sign. This is a simple shortcut
+-- defined as:
+--
+-- > float = toRealFloat <$> scientific

 float :: MonadParsec s m Char => m Double
-float = label "float" (read <$> f)
-  where f = (++) <$> some C.digitChar <*> (fraction <|> fExp)
+float = toRealFloat <$> scientific

 -- | This is a helper for 'float' parser. It parses fractional part of
 -- floating point number, that is, dot and everything after it.
@ -402,10 +395,13 @@ fExp = do
  return (expChar : signStr ++ d)

 -- | Parse a number: either integer or floating point. The parser can handle
-- overlapping grammars graciously.
+-- overlapping grammars graciously. Use functions like
+-- 'Data.Scientific.floatingOrInteger' from "Data.Scientific" to test and
+-- extract integer or real values.

-number :: MonadParsec s m Char => m (Either Integer Double)
-number = (Right <$> try float) <|> (Left <$> integer) <?> "number"
+number :: MonadParsec s m Char => m Scientific
+number = label "number" (read <$> f)
+  where f = (++) <$> some C.digitChar <*> option "" (fraction <|> fExp)

 -- | @signed space p@ parser parses optional sign, then if there is a sign
 -- it will consume optional white space (using @space@ parser), then it runs
@ -418,11 +414,11 @@ number = (Right <$> try float) <|> (Left <$> integer) <?> "number"
 -- > integer       = lexeme L.integer
 -- > signedInteger = L.signed spaceConsumer integer

-signed :: (MonadParsec s m Char, Signed a) => m () -> m a -> m a
+signed :: (MonadParsec s m Char, Num a) => m () -> m a -> m a
 signed spc p = ($) <$> option id (lexeme spc sign) <*> p

 -- | Parse a sign and return either 'id' or 'negate' according to parsed
 -- sign.

-sign :: (MonadParsec s m Char, Signed a) => m (a -> a)
+sign :: (MonadParsec s m Char, Num a) => m (a -> a)
 sign = (C.char '+' *> return id) <|> (C.char '-' *> return negate)
--- a/megaparsec.cabal
+++ b/megaparsec.cabal
@ -59,6 +59,7 @@ library
  build-depends:      base         >= 4.6 && < 5
                    , bytestring
                    , mtl          == 2.*
+                    , scientific   >= 0.3.1 && < 0.4
                    , text         >= 0.2
                    , transformers >= 0.4 && < 0.6

@ -152,6 +153,7 @@ test-suite tests
                    , bytestring
                    , megaparsec     >= 4.4.0
                    , mtl            == 2.*
+                    , scientific     >= 0.3.1 && < 0.4
                    , test-framework >= 0.6 && < 1
                    , test-framework-hunit       >= 0.3 && < 0.4
                    , test-framework-quickcheck2 >= 0.3 && < 0.4
--- a/tests/Lexer.hs
+++ b/tests/Lexer.hs
@ -41,6 +41,7 @@ import Data.Char
  , toLower )
 import Data.List (findIndices, isInfixOf, find)
 import Data.Maybe
+import Data.Scientific (fromFloatDigits)
 import Numeric (showInt, showHex, showOct, showSigned)

 import Test.Framework
@ -283,7 +284,7 @@ prop_float_0 n' = checkParser float r s

 prop_float_1 :: Maybe (NonNegative Integer) -> Property
 prop_float_1 n' = checkParser float r s
-  where r | isNothing n' = posErr 0 s [uneEof, exSpec "float"]
+  where r | isNothing n' = posErr 0 s [uneEof, exSpec "floating point number"]
          | otherwise    = posErr (length s) s [ uneEof, exCh '.', exCh 'E'
                                  , exCh 'e', exSpec "digit" ]
        s = maybe "" (show . getNonNegative) n'
@ -291,8 +292,8 @@ prop_float_1 n' = checkParser float r s
 prop_number_0 :: Either (NonNegative Integer) (NonNegative Double) -> Property
 prop_number_0 n' = checkParser number r s
  where r = Right $ case n' of
-                      Left  x -> Left  $ getNonNegative x
-                      Right x -> Right $ getNonNegative x
+              Left  x -> fromIntegral . getNonNegative $ x
+              Right x -> fromFloatDigits . getNonNegative $ x
        s = either (show . getNonNegative) (show . getNonNegative) n'

 prop_number_1 :: Property
@ -301,8 +302,11 @@ prop_number_1 = checkParser number r s
        s = ""

 prop_number_2 :: Either Integer Double -> Property
-prop_number_2 n = checkParser p (Right n) s
+prop_number_2 n = checkParser p r s
  where p = signed (hidden C.space) number
+        r = Right $ case n of
+              Left  x -> fromIntegral x
+              Right x -> fromFloatDigits x
        s = either show show n

 prop_signed :: Integer -> Int -> Bool -> Property