Merge branch 'new-lexer'

2024-11-23 19:38:05 +03:00 · 2015-09-13 21:16:39 +06:00 · 2015-09-13 21:16:39 +06:00 · 0d39e44f40
commit 0d39e44f40
parent 7133fb54b7 704f84f018
10 changed files with 514 additions and 713 deletions
--- a/Text/Megaparsec/Expr.hs
+++ b/Text/Megaparsec/Expr.hs
@ -41,11 +41,10 @@ data Operator s u m a
 -- descending precedence. All operators in one list have the same precedence
 -- (but may have a different associativity).
 --
-- Prefix and postfix
-- operators of the same precedence can only occur once (i.e. @--2@ is not
-- allowed if @-@ is prefix negate). Prefix and postfix operators of the
-- same precedence associate to the left (i.e. if @++@ is postfix increment,
-- than @-2++@ equals @-1@, not @-3@).
+-- Prefix and postfix operators of the same precedence can only occur once
+-- (i.e. @--2@ is not allowed if @-@ is prefix negate). Prefix and postfix
+-- operators of the same precedence associate to the left (i.e. if @++@ is
+-- postfix increment, than @-2++@ equals @-1@, not @-3@).
 --
 -- The @makeExprParser@ takes care of all the complexity involved in
 -- building expression parser. Here is an example of an expression parser
@ -66,6 +65,9 @@ data Operator s u m a
 -- > binary  name f = InfixL  (reservedOp name >> return f)
 -- > prefix  name f = Prefix  (reservedOp name >> return f)
 -- > postfix name f = Postfix (reservedOp name >> return f)
+--
+-- Please note that multi-character operators should use 'try' in order to
+-- be reported correctly in error messages.

 makeExprParser :: Stream s m t => ParsecT s u m a ->
                  [[Operator s u m a]] -> ParsecT s u m a
--- a/Text/Megaparsec/Language.hs
+++ b/Text/Megaparsec/Language.hs
@ -1,115 +0,0 @@
-- |
-- Module      :  Text.Megaparsec.Language
-- Copyright   :  © 2015 Megaparsec contributors
--                © 2007 Paolo Martini
--                © 1999–2001 Daan Leijen
-- License     :  BSD3
--
-- Maintainer  :  Mark Karpov <markkarpov@opmbx.org>
-- Stability   :  experimental
-- Portability :  non-portable (uses non-portable module Text.Megaparsec.Lexer)
--
-- A helper module that defines some language definitions that can be used
-- to instantiate a token parser (see "Text.Megaparsec.Lexer").
-
-module Text.Megaparsec.Language
-  ( LanguageDef
-  , emptyDef
-  , haskellStyle
-  , javaStyle
-  , haskellDef
-  , mondrianDef )
-where
-
-import Control.Monad.Identity
-
-import Control.Applicative ((<|>))
-import Text.Megaparsec.Char
-import Text.Megaparsec.Lexer
-
-- | This is the most minimal token definition. It is recommended to use
-- this definition as the basis for other definitions. @emptyDef@ has no
-- reserved names or operators, is case sensitive and doesn't accept
-- comments, identifiers or operators.
-
-emptyDef :: LanguageDef String st Identity
-emptyDef =
-  LanguageDef
-  { commentStart    = ""
-  , commentEnd      = ""
-  , commentLine     = ""
-  , nestedComments  = True
-  , identStart      = letterChar <|> char '_'
-  , identLetter     = alphaNumChar <|> oneOf "_'"
-  , opStart         = opLetter emptyDef
-  , opLetter        = oneOf ":!#$%&*+./<=>?@\\^|-~"
-  , reservedOpNames = []
-  , reservedNames   = []
-  , caseSensitive   = True }
-
-- | This is a minimal token definition for Haskell-style languages. It
-- defines the style of comments, valid identifiers and case sensitivity. It
-- does not define any reserved words or operators.
-
-haskellStyle :: LanguageDef String u Identity
-haskellStyle =
-  emptyDef
-  { commentStart    = "{-"
-  , commentEnd      = "-}"
-  , commentLine     = "--"
-  , nestedComments  = True
-  , identStart      = letterChar
-  , identLetter     = alphaNumChar <|> oneOf "_'"
-  , opStart         = opLetter haskellStyle
-  , opLetter        = oneOf ":!#$%&*+./<=>?@\\^|-~"
-  , reservedOpNames = []
-  , reservedNames   = []
-  , caseSensitive   = True }
-
-- | This is a minimal token definition for Java-style languages. It
-- defines the style of comments, valid identifiers and case sensitivity. It
-- does not define any reserved words or operators.
-
-javaStyle  :: LanguageDef String u Identity
-javaStyle =
-  emptyDef
-  { commentStart    = "/*"
-  , commentEnd      = "*/"
-  , commentLine     = "//"
-  , nestedComments  = True
-  , identStart      = letterChar
-  , identLetter     = alphaNumChar <|> oneOf "_'"
-  , reservedNames   = []
-  , reservedOpNames = []
-  , caseSensitive   = False }
-
-- | The language definition for the Haskell language.
-
-haskellDef  :: LanguageDef String u Identity
-haskellDef =
-  haskell98Def
-  { identLetter   = identLetter haskell98Def <|> char '#'
-  , reservedNames = reservedNames haskell98Def ++
-                     [ "foreign", "import", "export", "primitive"
-                     , "_ccall_", "_casm_", "forall"] }
-
-- | The language definition for the language Haskell98.
-
-haskell98Def :: LanguageDef String u Identity
-haskell98Def =
-  haskellStyle
-  { reservedOpNames = ["::","..","=","\\","|","<-","->","@","~","=>"]
-  , reservedNames   = [ "let", "in", "case", "of", "if", "then", "else"
-                      , "data", "type", "class", "default", "deriving"
-                      , "do", "import", "infix", "infixl", "infixr"
-                      , "instance", "module", "newtype", "where"
-                      , "primitive" ] }
-
-- | The language definition for the language Mondrian.
-
-mondrianDef :: LanguageDef String u Identity
-mondrianDef =
-  javaStyle
-  { reservedNames = [ "case", "class", "default", "extends"
-                    , "import", "in", "let", "new", "of", "package" ]
-  , caseSensitive = True }
--- a/Text/Megaparsec/Lexer.hs
+++ b/Text/Megaparsec/Lexer.hs
@ -9,573 +9,271 @@
 -- Stability   :  experimental
 -- Portability :  non-portable (uses local universal quantification: PolymorphicComponents)
 --
-- A helper module to parse lexical elements. See 'makeLexer' for a
-- description of how to use it.
-
-{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
+-- High-level parsers to help you write your lexer. The module doesn't
+-- impose how you should write your parser, but certain approaches may be
+-- more elegant than others. Especially important theme is parsing of write
+-- space, comments and indentation.
+--
+-- This module is supposed to be imported qualified:
+--
+-- > import qualified Text.Megaparsec.Lexer as L

 module Text.Megaparsec.Lexer
-  ( LanguageDef (..)
-  , Lexer (..)
-  , makeLexer )
+  ( -- * White space and indentation
+    space
+  , lexeme
+  , symbol
+  , symbol'
+  , indentGuard
+  , skipLineComment
+  , skipBlockComment
+    -- * Character and string literals
+  , charLiteral
+    -- * Numbers
+  , integer
+  , decimal
+  , hexadecimal
+  , octal
+  , float
+  , number
+  , signed )
 where

-import Control.Applicative ((<|>), many, some)
+import Control.Applicative ((<|>), some)
 import Control.Monad (void)
-import Data.Char (isAlpha, toLower, toUpper)
-import Data.List (sort)
+import Data.Char (readLitChar)
+import Data.Maybe (listToMaybe)

-import Text.Megaparsec.Prim
-import Text.Megaparsec.Char
 import Text.Megaparsec.Combinator
+import Text.Megaparsec.Pos
+import Text.Megaparsec.Prim
+import Text.Megaparsec.ShowToken
+import qualified Text.Megaparsec.Char as C

-- Language definition
+-- White space and indentation

-- | The @LanguageDef@ type is a record that contains all parameters used to
-- control features of the "Text.Megaparsec.Lexer" module. The module
-- "Text.Megaparsec.Language" contains some default definitions.
-
-data LanguageDef s u m =
-  LanguageDef {
-
-  -- | Describes the start of a block comment. Use the empty string if the
-  -- language doesn't support block comments.
-
-    commentStart :: String
-
-  -- | Describes the end of a block comment. Use the empty string if the
-  -- language doesn't support block comments.
-
-  , commentEnd :: String
-
-  -- | Describes the start of a line comment. Use the empty string if the
-  -- language doesn't support line comments.
-
-  , commentLine :: String
-
-  -- | Set to 'True' if the language supports nested block comments.
-
-  , nestedComments :: Bool
-
-  -- | This parser should accept any start characters of identifiers, for
-  -- example @letter \<|> char \'_\'@.
-
-  , identStart :: ParsecT s u m Char
-
-  -- | This parser should accept any legal tail characters of identifiers,
-  -- for example @alphaNum \<|> char \'_\'@.
-
-  , identLetter :: ParsecT s u m Char
-
-  -- | This parser should accept any start characters of operators, for
-  -- example @oneOf \":!#$%&*+.\/\<=>?\@\\\\^|-~\"@
-
-  , opStart :: ParsecT s u m Char
-
-  -- | This parser should accept any legal tail characters of operators.
-  -- Note that this parser should even be defined if the language doesn't
-  -- support user-defined operators, or otherwise the 'reservedOp' parser
-  -- won't work correctly.
-
-  , opLetter :: ParsecT s u m Char
-
-  -- | The list of reserved identifiers.
-
-  , reservedNames :: [String]
-
-  -- | The list of reserved operators.
-
-  , reservedOpNames :: [String]
-
-  -- | Set to 'True' if the language is case sensitive.
-
-  , caseSensitive :: Bool }
-
-- Lexer
-
-- | The type of the record that holds lexical parsers that work on
-- @s@ streams with state @u@ over a monad @m@.
-
-data Lexer s u m =
-  Lexer {
-
-  -- | The lexeme parser parses a legal identifier. Returns the identifier
-  -- string. This parser will fail on identifiers that are reserved
-  -- words. Legal identifier (start) characters and reserved words are
-  -- defined in the 'LanguageDef' that is passed to 'makeLexer'.
-
-    identifier :: ParsecT s u m String
-
-  -- | The lexeme parser @reserved name@ parses @symbol name@, but it also
-  -- checks that the @name@ is not a prefix of a valid identifier.
-
-  , reserved :: String -> ParsecT s u m ()
-
-  -- | The lexeme parser parses a legal operator. Returns the name of the
-  -- operator. This parser will fail on any operators that are reserved
-  -- operators. Legal operator (start) characters and reserved operators are
-  -- defined in the 'LanguageDef' that is passed to 'makeLexer'.
-
-  , operator :: ParsecT s u m String
-
-  -- | The lexeme parser @reservedOp name@ parses @symbol name@, but it
-  -- also checks that the @name@ is not a prefix of a valid operator.
-
-  , reservedOp :: String -> ParsecT s u m ()
-
-  -- | The lexeme parser parses a single literal character. Returns the
-  -- literal character value. This parsers deals correctly with escape
-  -- sequences. The literal character is parsed according to the grammar
-  -- rules defined in the Haskell report (which matches most programming
-  -- languages quite closely).
-
-  , charLiteral :: ParsecT s u m Char
-
-  -- | The lexeme parser parses a literal string. Returns the literal
-  -- string value. This parsers deals correctly with escape sequences and
-  -- gaps. The literal string is parsed according to the grammar rules
-  -- defined in the Haskell report (which matches most programming languages
-  -- quite closely).
-
-  , stringLiteral :: ParsecT s u m String
-
-  -- | The lexeme parser parses an integer (a whole number). This parser
-  -- /does not/ parse sign. Returns the value of the number. The number can
-  -- be specified in 'decimal', 'hexadecimal' or 'octal'. The number is
-  -- parsed according to the grammar rules in the Haskell report.
-
-  , integer :: ParsecT s u m Integer
-
-  -- | This is just like 'integer', except it can parse sign.
-
-  , integer' :: ParsecT s u m Integer
-
-  -- | The lexeme parses a positive whole number in the decimal system.
-  -- Returns the value of the number.
-
-  , decimal :: ParsecT s u m Integer
-
-  -- | The lexeme parses a positive whole number in the hexadecimal
-  -- system. The number should be prefixed with “0x” or “0X”. Returns the
-  -- value of the number.
-
-  , hexadecimal :: ParsecT s u m Integer
-
-  -- | The lexeme parses a positive whole number in the octal system.
-  -- The number should be prefixed with “0o” or “0O”. Returns the value of
-  -- the number.
-
-  , octal :: ParsecT s u m Integer
-
-  -- | @signed p@ tries to parse sign (i.e. “+”, “-”, or nothing) and
-  -- then runs parser @p@, changing sign of its result accordingly. Note
-  -- that there may be white space after the sign but not before it.
-
-  , signed :: forall a . Num a => ParsecT s u m a -> ParsecT s u m a
-
-  -- | The lexeme parser parses a floating point value. Returns the value
-  -- of the number. The number is parsed according to the grammar rules
-  -- defined in the Haskell report, sign is /not/ parsed, use 'float'' to
-  -- achieve parsing of signed floating point values.
-
-  , float :: ParsecT s u m Double
-
-  -- | This is just like 'float', except it can parse sign.
-
-  , float' :: ParsecT s u m Double
-
-  -- | The lexeme parser parses either 'integer' or a 'float'.
-  -- Returns the value of the number. This parser deals with any overlap in
-  -- the grammar rules for integers and floats. The number is parsed
-  -- according to the grammar rules defined in the Haskell report.
-
-  , number :: ParsecT s u m (Either Integer Double)
-
-  -- | This is just like 'number', except it can parse sign.
-
-  , number' :: ParsecT s u m (Either Integer Double)
-
-  -- | Lexeme parser @symbol s@ parses 'string' @s@ and skips
-  -- trailing white space.
-
-  , symbol :: String -> ParsecT s u m String
-
-  -- | @lexeme p@ first applies parser @p@ and then the 'whiteSpace'
-  -- parser, returning the value of @p@. Every lexical token (lexeme) is
-  -- defined using @lexeme@, this way every parse starts at a point without
-  -- white space. Parsers that use @lexeme@ are called /lexeme/ parsers in
-  -- this document.
-  --
-  -- The only point where the 'whiteSpace' parser should be called
-  -- explicitly is the start of the main parser in order to skip any leading
-  -- white space.
-
-  , lexeme :: forall a. ParsecT s u m a -> ParsecT s u m a
-
-  -- | Parses any white space. White space consists of /zero/ or more
-  -- occurrences of a 'space', a line comment or a block (multi line)
-  -- comment. Block comments may be nested. How comments are started and
-  -- ended is defined in the 'LanguageDef' that is passed to
-  -- 'makeLexer'.
-
-  , whiteSpace :: ParsecT s u m ()
-
-  -- | Lexeme parser @parens p@ parses @p@ enclosed in parenthesis,
-  -- returning the value of @p@.
-
-  , parens :: forall a. ParsecT s u m a -> ParsecT s u m a
-
-  -- | Lexeme parser @braces p@ parses @p@ enclosed in braces (“{” and
-  -- “}”), returning the value of @p@.
-
-  , braces :: forall a. ParsecT s u m a -> ParsecT s u m a
-
-  -- | Lexeme parser @angles p@ parses @p@ enclosed in angle brackets (“\<”
-  -- and “>”), returning the value of @p@.
-
-  , angles :: forall a. ParsecT s u m a -> ParsecT s u m a
-
-  -- | Lexeme parser @brackets p@ parses @p@ enclosed in brackets (“[”
-  -- and “]”), returning the value of @p@.
-
-  , brackets :: forall a. ParsecT s u m a -> ParsecT s u m a
-
-  -- | Lexeme parser @semicolon@ parses the character “;” and skips any
-  -- trailing white space. Returns the string “;”.
-
-  , semicolon :: ParsecT s u m String
-
-  -- | Lexeme parser @comma@ parses the character “,” and skips any
-  -- trailing white space. Returns the string “,”.
-
-  , comma :: ParsecT s u m String
-
-  -- | Lexeme parser @colon@ parses the character “:” and skips any
-  -- trailing white space. Returns the string “:”.
-
-  , colon :: ParsecT s u m String
-
-  -- | Lexeme parser @dot@ parses the character “.” and skips any
-  -- trailing white space. Returns the string “.”.
-
-  , dot :: ParsecT s u m String
-
-  -- | Lexeme parser @semiSep p@ parses /zero/ or more occurrences of @p@
-  -- separated by 'semicolon'. Returns a list of values returned by @p@.
-
-  , semicolonSep :: forall a . ParsecT s u m a -> ParsecT s u m [a]
-
-  -- | Lexeme parser @semiSep1 p@ parses /one/ or more occurrences of @p@
-  -- separated by 'semi'. Returns a list of values returned by @p@.
-
-  , semicolonSep1 :: forall a . ParsecT s u m a -> ParsecT s u m [a]
-
-  -- | Lexeme parser @commaSep p@ parses /zero/ or more occurrences of
-  -- @p@ separated by 'comma'. Returns a list of values returned by @p@.
-
-  , commaSep :: forall a . ParsecT s u m a -> ParsecT s u m [a]
-
-  -- | Lexeme parser @commaSep1 p@ parses /one/ or more occurrences of
-  -- @p@ separated by 'comma'. Returns a list of values returned by @p@.
-
-  , commaSep1 :: forall a . ParsecT s u m a -> ParsecT s u m [a] }
-
-- | The expression @makeLexer language@ creates a 'Lexer' record that
-- contains lexical parsers that are defined using the definitions in the
-- @language@ record.
+-- | @space spaceChar lineComment blockComment@ produces parser that can
+-- parse white space in general. It's expected that you create such a parser
+-- once and pass it to many other function in this module as needed (it's
+-- usually called @spaceConsumer@ in doc-strings here).
 --
-- The use of this function is quite stylized — one imports the appropriate
-- language definition and selects the lexical parsers that are needed from
-- the resulting 'Lexer'.
+-- @spaceChar@ is used to parse trivial space characters. You can use
+-- 'C.spaceChar' from "Text.Megaparsec.Char" for this purpose as well as
+-- your own parser (if you don't want automatically consume newlines, for
+-- example).
 --
-- > module Main (main) where
+-- @lineComment@ is used to parse line comments. You can use
+-- 'skipLineComment' if you don't need anything special.
+--
+-- @blockComment@ is used to parse block (multi-line) comments. You can use
+-- 'skipBlockComment' if you don't need anything special.
+--
+-- Parsing of white space is important part of any parser. We propose scheme
+-- where every lexeme should consume all trailing white space, but not
+-- leading one. You should wrap every lexeme parser with 'lexeme' to achieve
+-- this. You only need to call 'space' “manually” to consume any white space
+-- before the first lexeme (at the beginning of file).
+
+space :: Stream s m t => ParsecT s u m () ->
+         ParsecT s u m () -> ParsecT s u m () -> ParsecT s u m ()
+space ch line block = hidden . skipMany $ choice [ch, line, block]
+
+-- | This is wrapper for lexemes. Typical usage is to supply first argument
+-- (parser that consumes white space, probably defined via 'space') and use
+-- resulting function to wrap parsers for every lexeme.
+--
+-- > lexeme  = L.lexeme spaceConsumer
+-- > integer = lexeme L.integer
+
+lexeme :: ParsecT s u m () -> ParsecT s u m a -> ParsecT s u m a
+lexeme spc p = p <* spc
+
+-- | This is a helper to parse symbols, i.e. verbatim strings. You pass the
+-- first argument (parser that consumes white space, probably defined via
+-- 'space') and then you can use the resulting function to parse strings:
+--
+-- > symbol    = L.symbol spaceConsumer
 -- >
-- > import Text.Megaparsec
-- > import Text.Megaparsec.Language (haskellDef)
-- > import qualified Text.Megaparsec.Lexer as L
-- >
-- > -- The parser
-- > …
-- >
-- > expr =  parens expr
-- >     <|> identifier
-- >     <|> …
-- >
-- > -- The lexer
-- > lexer      = L.makeLexer haskellDef
-- >
-- > parens     = L.parens     lexer
-- > braces     = L.braces     lexer
-- > identifier = L.identifier lexer
-- > reserved   = L.reserved   lexer
-- > …
+-- > parens    = between (symbol "(") (symbol ")")
+-- > braces    = between (symbol "{") (symbol "}")
+-- > angles    = between (symbol "<") (symbol ">")
+-- > brackets  = between (symbol "[") (symbol "]")
+-- > semicolon = symbol ";"
+-- > comma     = symbol ","
+-- > colon     = symbol ":"
+-- > dot       = symbol "."

-makeLexer :: Stream s m Char => LanguageDef s u m -> Lexer s u m
-makeLexer languageDef =
-  Lexer
-  { identifier    = identifier
-  , reserved      = reserved
-  , operator      = operator
-  , reservedOp    = reservedOp
+symbol :: Stream s m Char =>
+          ParsecT s u m () -> String -> ParsecT s u m String
+symbol spc = lexeme spc . C.string

-  , charLiteral   = charLiteral
-  , stringLiteral = stringLiteral
+-- | Case-insensitive version of 'symbol'. This may be helpful if you're
+-- working with case-insensitive languages.

-  , integer       = integer
-  , integer'      = integer'
-  , decimal       = decimal
-  , hexadecimal   = hexadecimal
-  , octal         = octal
-  , signed        = signed
-  , float         = float
-  , float'        = float'
-  , number        = number
-  , number'       = number'
+symbol' :: Stream s m Char =>
+           ParsecT s u m () -> String -> ParsecT s u m String
+symbol' spc = lexeme spc . C.string'

-  , symbol        = symbol
-  , lexeme        = lexeme
-  , whiteSpace    = whiteSpace
+-- | @indentGuard spaceConsumer test@ first consumes all white space
+-- (indentation) with @spaceConsumer@ parser, then it checks column
+-- position. It should satisfy supplied predicate @test@, otherwise the
+-- parser fails with error message “incorrect indentation”. On success
+-- current column position is returned.
+--
+-- When you want to parse block of indentation first run this parser with
+-- predicate like @(> 1)@ — this will make sure you have some
+-- indentation. Use returned value to check indentation on every subsequent
+-- line according to syntax of your language.

-  , parens        = parens
-  , braces        = braces
-  , angles        = angles
-  , brackets      = brackets
-  , semicolon     = semicolon
-  , comma         = comma
-  , colon         = colon
-  , dot           = dot
-  , semicolonSep  = semicolonSep
-  , semicolonSep1 = semicolonSep1
-  , commaSep      = commaSep
-  , commaSep1     = commaSep1 }
-  where
+indentGuard :: Stream s m t =>
+               ParsecT s u m () -> (Int -> Bool) -> ParsecT s u m Int
+indentGuard spc p = do
+  spc
+  pos <- sourceColumn <$> getPosition
+  if p pos
+  then return pos
+  else fail "incorrect indentation"

-  -- bracketing
+-- | Given comment prefix this function returns parser that skips line
+-- comments. Note that it stops just before newline character but doesn't
+-- consume the newline. Newline is either supposed to be consumed by 'space'
+-- parser or picked up manually.

-  parens    = between (symbol "(") (symbol ")")
-  braces    = between (symbol "{") (symbol "}")
-  angles    = between (symbol "<") (symbol ">")
-  brackets  = between (symbol "[") (symbol "]")
+skipLineComment :: Stream s m Char => String -> ParsecT s u m ()
+skipLineComment prefix = p >> void (manyTill C.anyChar n)
+  where p = try $ C.string prefix
+        n = lookAhead C.newline

-  semicolon = symbol ";"
-  comma     = symbol ","
-  dot       = symbol "."
-  colon     = symbol ":"
+-- | @skipBlockComment start end@ skips non-nested block comment starting
+-- with @start@ and ending with @end@.

-  commaSep  = (`sepBy` comma)
-  semicolonSep = (`sepBy` semicolon)
+skipBlockComment :: Stream s m Char => String -> String -> ParsecT s u m ()
+skipBlockComment start end = p >> void (manyTill C.anyChar n)
+  where p = try $ C.string start
+        n = try $ C.string end

-  commaSep1 = (`sepBy1` comma)
-  semicolonSep1 = (`sepBy1` semicolon)
+-- Character and string literals

-  -- chars & strings
+-- | The lexeme parser parses a single literal character without
+-- quotes. Purpose of this parser is to help with parsing of commonly used
+-- escape sequences. It's your responsibility to take care of character
+-- literal syntax in your language (surround it with single quotes or
+-- similar).
+--
+-- The literal character is parsed according to the grammar rules defined in
+-- the Haskell report.
+--
+-- Note that you can use this parser as a building block to parse various
+-- string literals:
+--
+-- > stringLiteral = char '"' >> manyTill L.charLiteral (char '"')

-  charLiteral = lexeme ( between (char '\'')
-                                 (char '\'' <?> "end of character")
-                                 characterChar )
-                <?> "character"
+charLiteral :: Stream s m Char => ParsecT s u m Char
+charLiteral = label "literal character" $ do
+  r@(x:_) <- lookAhead $ count' 1 8 C.anyChar
+  case listToMaybe (readLitChar r) of
+    Just (c, r') -> count (length r - length r') C.anyChar >> return c
+    Nothing      -> unexpected (showToken x)

-  characterChar = charLetter <|> charEscape <?> "literal character"
+-- Numbers

-  charEscape = char '\\' >> escapeCode
-  charLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026'))
+-- | Parse an integer without sign in decimal representation (according to
+-- format of integer literals described in Haskell report).
+--
+-- If you need to parse signed integers, see 'signed' combinator.

-  stringLiteral =
-      lexeme ((foldr (maybe id (:)) "" <$>
-               between (char '"') (char '"' <?> "end of string")
-                           (many stringChar)) <?> "literal string")
+integer :: Stream s m Char => ParsecT s u m Integer
+integer = decimal <?> "integer"

-  stringChar = (Just <$> stringLetter) <|> stringEscape <?> "string character"
+-- | The same as 'integer', but 'integer' is 'label'ed with “integer” label,
+-- while this parser is not labeled.

-  stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'))
+decimal :: Stream s m Char => ParsecT s u m Integer
+decimal = nump "" C.digitChar

-  stringEscape = char '\\' >>
-                 ( (escapeGap >> return Nothing)   <|>
-                   (escapeEmpty >> return Nothing) <|>
-                   (Just <$> escapeCode) )
+-- | Parse an integer in hexadecimal representation. Representation of
+-- hexadecimal number is expected to be according to Haskell report except
+-- for the fact that this parser doesn't parse “0x” or “0X” prefix. It is
+-- reponsibility of the programmer to parse correct prefix before parsing
+-- the number itself.
+--
+-- For example you can make it conform to Haskell report like this:
+--
+-- > hexadecimal = char '0' >> char' 'x' >> L.hexadecimal

-  escapeEmpty = char '&'
-  escapeGap   = some spaceChar >> char '\\' <?> "end of string gap"
+hexadecimal :: Stream s m Char => ParsecT s u m Integer
+hexadecimal = nump "0x" C.hexDigitChar

-  -- escape codes
+-- | Parse an integer in octal representation. Representation of octal
+-- number is expected to be according to Haskell report except for the fact
+-- that this parser doesn't parse “0o” or “0O” prefix. It is responsibility
+-- of the programmer to parse correct prefix before parsing the number
+-- itself.

-  escapeCode = charEsc <|> charNum <|> charAscii <|> charControl
-               <?> "escape code"
+octal :: Stream s m Char => ParsecT s u m Integer
+octal = nump "0o" C.octDigitChar

-  charEsc = choice (parseEsc <$> escMap)
-      where parseEsc (c, code) = char c >> return code
+-- | @nump prefix p@ parses /one/ or more characters with @p@ parser, then
+-- prepends @prefix@ to returned value and tries to interpret the result as
+-- an integer according to Haskell syntax.

-  charNum = toEnum . fromInteger <$>
-            ( decimal <|>
-             (char 'o' >> nump "0o" octDigitChar) <|>
-             (char 'x' >> nump "0x" hexDigitChar) )
+nump :: String -> ParsecT s u m Char -> ParsecT s u m Integer
+nump prefix baseDigit = read . (prefix ++) <$> some baseDigit

-  charAscii = choice (parseAscii <$> asciiMap)
-      where parseAscii (asc, code) = try (string asc >> return code)
+-- | Parse a floating point value without sign. Representation of floating
+-- point value is expected to be according to Haskell report.
+--
+-- If you need to parse signed floats, see 'signed' combinator.

-  charControl = toEnum . subtract 64 . fromEnum <$> (char '^' >> upperChar)
-
-  -- escape code tables
-
-  escMap      = zip "abfnrtv\\\"\'" "\a\b\f\n\r\t\v\\\"\'"
-  asciiMap    = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)
-
-  ascii2codes = ["BS","HT","LF","VT","FF","CR","SO","SI","EM",
-                 "FS","GS","RS","US","SP"]
-  ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL",
-                 "DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB",
-                 "CAN","SUB","ESC","DEL"]
-
-  ascii2 = "\b\t\n\v\f\r\SO\SI\EM\FS\GS\RS\US "
-  ascii3 = "\NUL\SOH\STX\ETX\EOT\ENQ\ACK\a\DLE\DC1\DC2\DC3\DC4\NAK\SYN\ETB\CAN\SUB\ESC\DEL"
-
-  -- numbers — integers
-
-  integer  = decimal
-  integer' = signed integer
-
-  decimal     = lexeme (nump "" digitChar <?> "integer")
-  hexadecimal = lexeme $ char '0' >> oneOf "xX" >> nump "0x" hexDigitChar
-  octal       = lexeme $ char '0' >> oneOf "oO" >> nump "0o" octDigitChar
-
-  nump prefix baseDigit = read . (prefix ++) <$> some baseDigit
-
-  signed p = ($) <$> option id (lexeme sign) <*> p
-
-  sign :: (Stream s m Char, Num a) => ParsecT s u m (a -> a)
-  sign = (char '+' *> return id) <|> (char '-' *> return negate)
-
-  -- numbers — floats
-
-  float  = lexeme ffloat <?> "float"
-  float' = signed float
-
-  ffloat = read <$> ffloat'
-    where
-      ffloat' = do
-        decimal <- fDec
+float :: Stream s m Char => ParsecT s u m Double
+float = label "float" $ read <$> f
+  where f = do
+          d    <- some C.digitChar
          rest <- fraction <|> fExp
-        return $ decimal ++ rest
+          return $ d ++ rest

-  fraction = do
-    void $ char '.'
-    decimal <- fDec
-    exp <- option "" fExp
-    return $ '.' : decimal ++  exp
+-- | This is a helper for 'float' parser. It parses fractional part of
+-- floating point number, that is, dot and everything after it.

-  fDec = some digitChar
+fraction :: Stream s m Char => ParsecT s u m String
+fraction = do
+  void $ C.char '.'
+  d <- some C.digitChar
+  e <- option "" fExp
+  return $ '.' : d ++ e

-  fExp = do
-    expChar <- oneOf "eE"
-    signStr <- option "" (pure <$> oneOf "+-")
-    decimal <- fDec
-    return $ expChar : signStr ++ decimal
+-- | This helper parses exponent of floating point numbers.

-  -- numbers — a more general case
+fExp :: Stream s m Char => ParsecT s u m String
+fExp = do
+  expChar <- C.char' 'e'
+  signStr <- option "" (pure <$> choice (C.char <$> "+-"))
+  d       <- some C.digitChar
+  return $ expChar : signStr ++ d

-  number  = (Right <$> try float)  <|> (Left <$> integer)  <?> "number"
-  number' = (Right <$> try float') <|> (Left <$> integer') <?> "number"
+-- | Parse a number: either integer or floating point. The parser can handle
+-- overlapping grammars graciously.

-  -- operators & reserved ops
+number :: Stream s m Char => ParsecT s u m (Either Integer Double)
+number = (Right <$> try float) <|> (Left <$> integer) <?> "number"

-  reservedOp name =
-      lexeme $ try $ do
-        void $ string name
-        notFollowedBy (opLetter languageDef) <?> ("end of " ++ show name)
+-- | @signed space p@ parser parses optional sign, then if there is a sign
+-- it will consume optional white space (using @space@ parser), then it runs
+-- parser @p@ which should return a number. Sign of the number is changed
+-- according to previously parsed sign.
+--
+-- For example, to parse signed integer you can write:
+--
+-- > lexeme        = L.lexeme spaceConsumer
+-- > integer       = lexeme L.integer
+-- > signedInteger = signed spaceConsumer integer

-  operator =
-      lexeme $ try $ do
-        name <- oper
-        if isReservedOp name
-        then unexpected ("reserved operator " ++ show name)
-        else return name
+signed :: (Stream s m Char, Num a) =>
+          ParsecT s u m () -> ParsecT s u m a -> ParsecT s u m a
+signed spc p = ($) <$> option id (lexeme spc sign) <*> p

-  oper = ((:) <$> opStart languageDef <*> many (opLetter languageDef))
-         <?> "operator"
+-- | Parse a sign and return either 'id' or 'negate' according to parsed
+-- sign.

-  isReservedOp = isReserved . sort $ reservedOpNames languageDef
-
-  -- identifiers & reserved words
-
-  reserved name =
-      lexeme $ try $ do
-        void $ caseString name
-        notFollowedBy (identLetter languageDef) <?> ("end of " ++ show name)
-
-  caseString name
-      | caseSensitive languageDef = string name
-      | otherwise                 = walk name >> return name
-      where walk = foldr (\c -> ((caseChar c <?> show name) >>)) (return ())
-            caseChar c
-                | isAlpha c = char (toLower c) <|> char (toUpper c)
-                | otherwise = char c
-
-  identifier =
-      lexeme $ try $ do
-        name <- ident
-        if isReservedName name
-        then unexpected ("reserved word " ++ show name)
-        else return name
-
-  ident = ((:) <$> identStart languageDef <*> many (identLetter languageDef))
-          <?> "identifier"
-
-  isReservedName name = isReserved theReservedNames caseName
-      where caseName
-                | caseSensitive languageDef = name
-                | otherwise                 = toLower <$> name
-
-  isReserved names name = scan names
-      where scan []     = False
-            scan (r:rs) = case compare r name of
-                            LT  -> scan rs
-                            EQ  -> True
-                            GT  -> False
-
-  theReservedNames
-      | caseSensitive languageDef = sort reserved
-      | otherwise                 = sort . fmap (fmap toLower) $ reserved
-      where reserved = reservedNames languageDef
-
-  -- white space & symbols
-
-  symbol = lexeme . string
-
-  lexeme p = p <* whiteSpace
-
-  whiteSpace = hidden space -- FIXME: write it in a decent manner
-      -- \| noLine && noMulti = skipMany (space            <?> "")
-      -- \| noLine            = skipMany (space            <|>
-      --                                 multiLineComment <?> "")
-      -- \| noMulti           = skipMany (space            <|>
-      --                                 oneLineComment   <?> "")
-      -- \| otherwise         = skipMany (space            <|>
-      --                                 oneLineComment   <|>
-      --                                 multiLineComment <?> "")
-      -- where
-      --   noLine  = null (commentLine languageDef)
-      --   noMulti = null (commentStart languageDef)
-
-  -- oneLineComment = void (try (string (commentLine languageDef))
-  --                       >> skipMany (satisfy (/= '\n')))
-
-  -- multiLineComment = try (string (commentStart languageDef)) >> inComment
-
-  -- inComment = if nestedComments languageDef
-  --             then inCommentMulti
-  --             else inCommentSingle
-
-  -- inCommentMulti
-  --     =  void (try . string $ commentEnd languageDef)
-  --    <|> (multiLineComment            >> inCommentMulti)
-  --    <|> (skipSome (noneOf startEnd) >> inCommentMulti)
-  --    <|> (oneOf startEnd              >> inCommentMulti)
-  --    <?> "end of comment"
-
-  -- inCommentSingle
-  --     =  void (try . string $ commentEnd languageDef)
-  --    <|> (skipSome (noneOf startEnd) >> inCommentSingle)
-  --    <|> (oneOf startEnd              >> inCommentSingle)
-  --    <?> "end of comment"
-
-  -- startEnd = nub $ (++) <$> commentEnd <*> commentStart $ languageDef
+sign :: (Stream s m Char, Num a) => ParsecT s u m (a -> a)
+sign = (C.char '+' *> return id) <|> (C.char '-' *> return negate)
--- a/megaparsec.cabal
+++ b/megaparsec.cabal
@ -100,7 +100,6 @@ library
                   , Text.Megaparsec.Combinator
                   , Text.Megaparsec.Error
                   , Text.Megaparsec.Expr
-                   , Text.Megaparsec.Language
                   , Text.Megaparsec.Lexer
                   , Text.Megaparsec.Perm
                   , Text.Megaparsec.Pos
@ -116,6 +115,7 @@ test-suite old-tests
  main-is:           Main.hs
  hs-source-dirs:    old-tests
  type:              exitcode-stdio-1.0
+  ghc-options:       -O2 -Wall
  other-modules:     Bugs
                   , Bugs.Bug2
                   , Bugs.Bug6
@ -126,7 +126,8 @@ test-suite old-tests
                   , HUnit                  == 1.2.*
                   , test-framework         >= 0.6 && < 1
                   , test-framework-hunit   >= 0.2 && < 0.4
-  ghc-options:       -O2 -Wall
+  default-extensions:
+                     FlexibleContexts
  default-language:  Haskell2010

 test-suite tests
--- a/old-tests/Bugs/Bug2.hs
+++ b/old-tests/Bugs/Bug2.hs
@ -1,21 +1,33 @@

 module Bugs.Bug2 (main) where

-import Test.HUnit hiding (Test)
-import Test.Framework
-import Test.Framework.Providers.HUnit
+import Control.Applicative (empty)
+import Control.Monad (void)

 import Text.Megaparsec
-import Text.Megaparsec.Language (haskellDef)
+import Text.Megaparsec.String
 import qualified Text.Megaparsec.Lexer as L

+import Test.Framework
+import Test.Framework.Providers.HUnit
+import Test.HUnit hiding (Test)
+
+sc :: Parser ()
+sc = L.space (void spaceChar) empty empty
+
+lexeme :: Parser a -> Parser a
+lexeme = L.lexeme sc
+
+stringLiteral :: Parser String
+stringLiteral = lexeme $ char '"' >> manyTill L.charLiteral (char '"')
+
 main :: Test
 main =
  testCase "Control Char Parsing (#2)" $
  parseString "\"test\\^Bstring\"" @?= "test\^Bstring"
 where
+   parseString :: String -> String
   parseString input =
-      case parse parser "Example" input of
+      case parse stringLiteral "Example" input of
        Left{} -> error "Parse failure"
        Right str -> str
-   parser = L.stringLiteral $ L.makeLexer haskellDef
--- a/old-tests/Bugs/Bug35.hs
+++ b/old-tests/Bugs/Bug35.hs
@ -2,13 +2,11 @@
 module Bugs.Bug35 (main) where

 import Text.Megaparsec
-import Text.Megaparsec.Language
-import Text.Megaparsec.String
 import qualified Text.Megaparsec.Lexer as L

-import Test.HUnit hiding (Test)
 import Test.Framework
 import Test.Framework.Providers.HUnit
+import Test.HUnit hiding (Test)

 trickyFloats :: [String]
 trickyFloats =
@ -28,12 +26,9 @@ trickyFloats =
    , "32.96176575630599"
    , "38.47735512322269" ]

-float :: Parser Double
-float = L.float (L.makeLexer emptyDef)
-
 testBatch :: Assertion
 testBatch = mapM_ testFloat trickyFloats
-    where testFloat x = parse float "" x @?= Right (read x :: Double)
+    where testFloat x = parse L.float "" x @?= Right (read x :: Double)

 main :: Test
 main = testCase "Output of Text.Megaparsec.Lexer.float (#35)" testBatch
--- a/old-tests/Bugs/Bug39.hs
+++ b/old-tests/Bugs/Bug39.hs
@ -1,16 +1,17 @@

 module Bugs.Bug39 (main) where

+import Control.Applicative (empty)
+import Control.Monad (void)
 import Data.Either (isLeft, isRight)

 import Text.Megaparsec
-import Text.Megaparsec.Language
 import Text.Megaparsec.String
 import qualified Text.Megaparsec.Lexer as L

-import Test.HUnit hiding (Test)
 import Test.Framework
 import Test.Framework.Providers.HUnit
+import Test.HUnit hiding (Test)

 shouldFail :: [String]
 shouldFail = [" 1", " +1", " -1"]
@ -18,8 +19,14 @@ shouldFail = [" 1", " +1", " -1"]
 shouldSucceed :: [String]
 shouldSucceed = ["1", "+1", "-1", "+ 1 ", "- 1 ", "1 "]

+sc :: Parser ()
+sc = L.space (void spaceChar) empty empty
+
+lexeme :: Parser a -> Parser a
+lexeme = L.lexeme sc
+
 integer :: Parser Integer
-integer = L.integer' (L.makeLexer emptyDef)
+integer = lexeme $ L.signed sc L.integer

 testBatch :: Assertion
 testBatch = mapM_ (f testFail)    shouldFail >>
--- a/old-tests/Bugs/Bug6.hs
+++ b/old-tests/Bugs/Bug6.hs
@ -1,13 +1,13 @@

 module Bugs.Bug6 (main) where

-import Test.HUnit hiding (Test)
-import Test.Framework
-import Test.Framework.Providers.HUnit
-
 import Text.Megaparsec
 import Text.Megaparsec.String

+import Test.Framework
+import Test.Framework.Providers.HUnit
+import Test.HUnit hiding (Test)
+
 import Util

 main :: Test
--- a/old-tests/Bugs/Bug9.hs
+++ b/old-tests/Bugs/Bug9.hs
@ -1,15 +1,17 @@

 module Bugs.Bug9 (main) where

+import Control.Applicative (empty)
+import Control.Monad (void)
+
 import Text.Megaparsec
-import Text.Megaparsec.Language (haskellStyle)
-import Text.Megaparsec.String (Parser)
 import Text.Megaparsec.Expr
+import Text.Megaparsec.String (Parser)
 import qualified Text.Megaparsec.Lexer as L

-import Test.HUnit hiding (Test)
 import Test.Framework
 import Test.Framework.Providers.HUnit
+import Test.HUnit hiding (Test)

 import Util

@ -25,12 +27,21 @@ main =

 -- Syntax analysis

+sc :: Stream s m Char => ParsecT s u m ()
+sc = L.space (void spaceChar) empty empty
+
+lexeme :: Stream s m Char => ParsecT s u m a -> ParsecT s u m a
+lexeme = L.lexeme sc
+
+integer :: Stream s m Char => ParsecT s u m Integer
+integer = lexeme L.integer
+
+operator :: Stream s m Char => String -> ParsecT s u m String
+operator = try . L.symbol sc
+
 parseTopLevel :: Parser Expr
 parseTopLevel = parseExpr <* eof

 parseExpr :: Parser Expr
 parseExpr = makeExprParser (Const <$> integer) table
-  where table    = [[ InfixL (Op <$ reserved ">>>") ]]
-        lexer    = L.makeLexer haskellStyle { L.reservedOpNames = [">>>"] }
-        integer  = L.integer  lexer
-        reserved = L.reserved lexer
+  where table = [[ InfixL (Op <$ operator ">>>") ]]
--- a/tests/Lexer.hs
+++ b/tests/Lexer.hs
@ -29,16 +29,206 @@

 module Lexer (tests) where

-import Control.Applicative (some, (<|>))
+import Control.Applicative (empty)
+import Control.Monad (void)
+import Data.Char
+  ( readLitChar
+  , showLitChar
+  , isDigit
+  , isAlphaNum
+  , isSpace
+  , toLower )
+import Data.List (findIndices, isInfixOf)
+import Data.Maybe (listToMaybe, maybeToList, isNothing, fromJust)
+import Numeric (showInt, showHex, showOct, showSigned)

 import Test.Framework
 import Test.Framework.Providers.QuickCheck2 (testProperty)
 import Test.QuickCheck

-import Text.Megaparsec.Char
-import Text.Megaparsec.Combinator
+import Text.Megaparsec.Error
 import Text.Megaparsec.Lexer
+import Text.Megaparsec.Pos
+import Text.Megaparsec.Prim
+import Text.Megaparsec.String
+import qualified Text.Megaparsec.Char as C
+
+import Util

 tests :: Test
 tests = testGroup "Lexer"
-        []
+        [ testProperty "space combinator"            prop_space
+        , testProperty "symbol combinator"           prop_symbol
+        , testProperty "symbol' combinator"          prop_symbol'
+        , testProperty "indentGuard combinator"      prop_indentGuard
+        , testProperty "charLiteral"                 prop_charLiteral
+        , testProperty "integer"                     prop_integer
+        , testProperty "decimal"                     prop_decimal
+        , testProperty "hexadecimal"                 prop_hexadecimal
+        , testProperty "octal"                       prop_octal
+        , testProperty "float 0"                     prop_float_0
+        , testProperty "float 1"                     prop_float_1
+        , testProperty "number"                      prop_number
+        , testProperty "signed"                      prop_signed ]
+
+newtype WhiteSpace = WhiteSpace
+  { getWhiteSpace :: String }
+  deriving (Show, Eq)
+
+instance Arbitrary WhiteSpace where
+  arbitrary = WhiteSpace . concat <$> listOf whiteUnit
+
+newtype Symbol = Symbol
+  { getSymbol :: String }
+  deriving (Show, Eq)
+
+instance Arbitrary Symbol where
+  arbitrary = Symbol <$> ((++) <$> symbolName <*> whiteChars)
+
+whiteUnit :: Gen String
+whiteUnit = oneof [whiteChars, whiteLine, whiteBlock]
+
+whiteChars :: Gen String
+whiteChars = listOf $ elements "\t\n "
+
+whiteLine :: Gen String
+whiteLine = commentOut <$> arbitrary `suchThat` goodEnough
+  where commentOut x = "//" ++ x ++ "\n"
+        goodEnough x = '\n' `notElem` x
+
+whiteBlock :: Gen String
+whiteBlock = commentOut <$> arbitrary `suchThat` goodEnough
+  where commentOut x = "/*" ++ x ++ "*/"
+        goodEnough x = not $ "*/" `isInfixOf` x
+
+symbolName :: Gen String
+symbolName = listOf $ arbitrary `suchThat` isAlphaNum
+
+sc :: Parser ()
+sc = space (void C.spaceChar) l b
+  where l = skipLineComment "//"
+        b = skipBlockComment "/*" "*/"
+
+sc' :: Parser ()
+sc' = space (void $ C.oneOf " \t") empty empty
+
+prop_space :: WhiteSpace -> Property
+prop_space w = checkParser p r s
+  where p = sc
+        r = Right ()
+        s = getWhiteSpace w
+
+prop_symbol :: Symbol -> Maybe Char -> Property
+prop_symbol = parseSymbol (symbol sc) id
+
+prop_symbol' :: Symbol -> Maybe Char -> Property
+prop_symbol' = parseSymbol (symbol' sc) (fmap toLower)
+
+parseSymbol :: (String -> Parser String) -> (String -> String)
+            -> Symbol -> Maybe Char -> Property
+parseSymbol p' f s' t = checkParser p r s
+  where p = p' (f g)
+        r | g == s || isSpace (last s) = Right (f g)
+          | otherwise = posErr (length s - 1) s [uneCh (last s), exEof]
+        g = takeWhile (not . isSpace) s
+        s = getSymbol s' ++ maybeToList t
+
+newtype IndLine = IndLine
+  { getIndLine :: String }
+  deriving (Show, Eq)
+
+instance Arbitrary IndLine where
+  arbitrary = IndLine . concat <$> sequence [spc, sym, spc, eol]
+    where spc = listOf (elements " \t")
+          sym = return "xxx"
+          eol = return "\n"
+
+prop_indentGuard :: IndLine -> IndLine -> IndLine -> Property
+prop_indentGuard l0 l1 l2 = checkParser p r s
+  where p  = ip (> 1) >>= \x -> sp >> ip (== x) >> sp >> ip (> x) >> sp
+        ip = indentGuard sc'
+        sp = void $ symbol sc' "xxx" <* C.eol
+        r | f' l0 <= 1     = posErr 0 s msg'
+          | f' l1 /= f' l0 = posErr (f l1 + g [l0]) s msg'
+          | f' l2 <= f' l0 = posErr (f l2 + g [l0, l1]) s msg'
+          | otherwise = Right ()
+        msg' = [msg "incorrect indentation"]
+        f    = length . takeWhile isSpace . getIndLine
+        f' x = sourceColumn $
+               updatePosString (initialPos "") $ take (f x) (getIndLine x)
+        g xs = sum $ length . getIndLine <$> xs
+        s    = concat $ getIndLine <$> [l0, l1, l2]
+
+prop_charLiteral :: String -> Bool -> Property
+prop_charLiteral t i = checkParser charLiteral r s
+  where b = listToMaybe $ readLitChar s
+        (h, g) = fromJust b
+        r | isNothing b = posErr 0 s $ exSpec "literal character" :
+                          [ if null s then uneEof else uneCh (head s) ]
+          | null g      = Right h
+          | otherwise   = posErr l s [uneCh (head g), exEof]
+        l = length s - length g
+        s = if null t || i then t else showLitChar (head t) (tail t)
+
+prop_integer :: NonNegative Integer -> Int -> Property
+prop_integer n' i = checkParser integer r s
+  where (r, s) = quasiCorrupted n' i showInt "integer"
+
+prop_decimal :: NonNegative Integer -> Int -> Property
+prop_decimal n' i = checkParser decimal r s
+  where (r, s) = quasiCorrupted n' i showInt "digit"
+
+prop_hexadecimal :: NonNegative Integer -> Int -> Property
+prop_hexadecimal n' i = checkParser hexadecimal r s
+  where (r, s) = quasiCorrupted n' i showHex "hexadecimal digit"
+
+prop_octal :: NonNegative Integer -> Int -> Property
+prop_octal n' i = checkParser octal r s
+  where (r, s) = quasiCorrupted n' i showOct "octal digit"
+
+prop_float_0 :: NonNegative Double -> Property
+prop_float_0 n' = checkParser float r s
+  where n = getNonNegative n'
+        r = Right n
+        s = show n
+
+prop_float_1 :: Maybe (NonNegative Integer) -> Property
+prop_float_1 n' = checkParser float r s
+  where r | isNothing n' = posErr 0 s [uneEof, exSpec "float"]
+          | otherwise    = posErr (length s) s [ uneEof, exCh '.', exCh 'E'
+                                  , exCh 'e', exSpec "digit" ]
+        s = maybe "" (show . getNonNegative) n'
+
+prop_number :: Either (NonNegative Integer) (NonNegative Double)
+            -> Integer -> Property
+prop_number n' i = checkParser number r s
+  where r | null s    = posErr 0 s [uneEof, exSpec "number"]
+          | otherwise =
+            Right $ case n' of
+                      Left  x -> Left  $ getNonNegative x
+                      Right x -> Right $ getNonNegative x
+        s = if i < 5
+            then ""
+            else either (show . getNonNegative) (show . getNonNegative) n'
+
+prop_signed :: Integer -> Int -> Bool -> Property
+prop_signed n i plus = checkParser p r s
+  where p = signed (hidden C.space) integer
+        r | i > length z = Right n
+          | otherwise    = posErr i s $ [uneCh '?', exSpec "integer"] ++
+                           (if i <= 0 then [exCh '+', exCh '-'] else []) ++
+                           [exEof | i > head (findIndices isDigit s)]
+        z = let bar = showSigned showInt 0 n ""
+            in if n < 0 || plus then bar else '+' : bar
+        s = if i <= length z then take i z ++ "?" ++ drop i z else z
+
+quasiCorrupted :: NonNegative Integer -> Int
+               -> (Integer -> String -> String) -> String
+               -> (Either ParseError Integer, String)
+quasiCorrupted n' i shower l = (r, s)
+  where n = getNonNegative n'
+        r | i > length z = Right n
+          | otherwise    = posErr i s $ [uneCh '?', exSpec l] ++
+                           [ exEof | i > 0 ]
+        z = shower n ""
+        s = if i <= length z then take i z ++ "?" ++ drop i z else z