unescape printable characters (#3140)

* unescape printable characters

* add comments

* add tests

* improve the parser

* simplify code & add more docs
This commit is contained in:
Kobayashi 2022-09-15 23:12:51 +08:00 committed by GitHub
parent 1568ce7b0a
commit 2bd1863eea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 78 additions and 9 deletions

View File

@ -79,9 +79,9 @@ import qualified Outputable as Out
import SrcLoc
#endif
#if MIN_VERSION_ghc(9,3,0)
import GHC.Utils.Logger
import GHC.Driver.Config.Diagnostic
import Data.Maybe
import Data.Maybe
import GHC.Driver.Config.Diagnostic
import GHC.Utils.Logger
#endif
-- | A compatible function to print `Outputable` instances

View File

@ -81,6 +81,7 @@ import GHC.IO.Exception
import GHC.IO.Handle.Internals
import GHC.IO.Handle.Types
import GHC.Stack
import Ide.PluginUtils (unescape)
import System.Environment.Blank (getEnvDefault)
import System.FilePath
import System.IO.Unsafe
@ -287,10 +288,17 @@ instance Outputable SDoc where
#endif
-- | Print a GHC value in `defaultUserStyle` without unique symbols.
-- It uses `showSDocUnsafe` with `unsafeGlobalDynFlags` internally.
--
-- This is the most common print utility, will print with a user-friendly style like: `a_a4ME` as `a`.
-- This is the most common print utility.
-- It will do something additionally compared to what the 'Outputable' instance does.
--
-- It internal using `showSDocUnsafe` with `unsafeGlobalDynFlags`.
-- 1. print with a user-friendly style: `a_a4ME` as `a`.
-- 2. unescape escape sequences of printable unicode characters within a pair of double quotes
printOutputable :: Outputable a => a -> T.Text
printOutputable = T.pack . printWithoutUniques
printOutputable =
-- IfaceTyLit from GHC.Iface.Type implements Outputable with 'show'.
-- Showing a String escapes non-ascii printable characters. We unescape it here.
-- More discussion at https://github.com/haskell/haskell-language-server/issues/3115.
unescape . T.pack . printWithoutUniques
{-# INLINE printOutputable #-}

View File

@ -56,6 +56,7 @@ library
, text
, transformers
, unordered-containers
, megaparsec > 9
if os(windows)
build-depends: Win32
@ -91,4 +92,5 @@ test-suite tests
, tasty
, tasty-hunit
, tasty-rerun
, text
, lsp-types

View File

@ -32,6 +32,7 @@ module Ide.PluginUtils
handleMaybe,
handleMaybeM,
throwPluginError,
unescape,
)
where
@ -43,10 +44,12 @@ import Control.Monad.Trans.Except (ExceptT, runExceptT, throwE)
import Data.Algorithm.Diff
import Data.Algorithm.DiffOutput
import Data.Bifunctor (Bifunctor (first))
import Data.Char (isPrint, showLitChar)
import Data.Functor (void)
import qualified Data.HashMap.Strict as H
import Data.List (find)
import Data.String (IsString (fromString))
import qualified Data.Text as T
import Data.Void (Void)
import Ide.Plugin.Config
import Ide.Plugin.Properties
import Ide.Types
@ -57,6 +60,9 @@ import Language.LSP.Types hiding
SemanticTokensEdit (_start))
import qualified Language.LSP.Types as J
import Language.LSP.Types.Capabilities
import qualified Text.Megaparsec as P
import qualified Text.Megaparsec.Char as P
import qualified Text.Megaparsec.Char.Lexer as P
-- ---------------------------------------------------------------------
@ -255,3 +261,34 @@ pluginResponse :: Monad m => ExceptT String m a -> m (Either ResponseError a)
pluginResponse =
fmap (first (\msg -> ResponseError InternalError (fromString msg) Nothing))
. runExceptT
-- ---------------------------------------------------------------------
type TextParser = P.Parsec Void T.Text
-- | Unescape printable escape sequences within double quotes.
-- This is useful if you have to call 'show' indirectly, and it escapes some characters which you would prefer to
-- display as is.
unescape :: T.Text -> T.Text
unescape input =
case P.runParser escapedTextParser "inline" input of
Left _ -> input
Right strs -> T.pack strs
-- | Parser for a string that contains double quotes. Returns unescaped string.
escapedTextParser :: TextParser String
escapedTextParser = concat <$> P.many (outsideStringLiteral P.<|> stringLiteral)
where
outsideStringLiteral :: TextParser String
outsideStringLiteral = P.someTill (P.anySingleBut '"') (P.lookAhead (void (P.char '"') P.<|> P.eof))
stringLiteral :: TextParser String
stringLiteral = do
inside <- P.char '"' >> P.manyTill P.charLiteral (P.char '"')
let f '"' = "\\\"" -- double quote should still be escaped
-- Despite the docs, 'showLitChar' and 'showLitString' from 'Data.Char' DOES ESCAPE unicode printable
-- characters. So we need to call 'isPrint' from 'Data.Char' manually.
f ch = if isPrint ch then [ch] else showLitChar ch ""
inside' = concatMap f inside
pure $ "\"" <> inside' <> "\""

View File

@ -1,13 +1,35 @@
{-# LANGUAGE OverloadedStrings #-}
module Ide.PluginUtilsTest
( tests
) where
import Ide.PluginUtils (positionInRange)
import Data.Char (isPrint)
import qualified Data.Text as T
import Ide.PluginUtils (positionInRange, unescape)
import Language.LSP.Types (Position (Position), Range (Range))
import Test.Tasty
import Test.Tasty.HUnit
tests :: TestTree
tests = testGroup "PluginUtils"
[
[ unescapeTest
]
unescapeTest :: TestTree
unescapeTest = testGroup "unescape"
[ testCase "no double quote" $
unescape "hello世界" @?= "hello世界"
, testCase "whole string quoted" $
unescape "\"hello\\19990\\30028\"" @?= "\"hello世界\""
, testCase "text before quotes should not be unescaped" $
unescape "\\19990a\"hello\\30028\"" @?= "\\19990a\"hello界\""
, testCase "some text after quotes" $
unescape "\"hello\\19990\\30028\"abc" @?= "\"hello世界\"abc"
, testCase "many pairs of quote" $
unescape "oo\"hello\\19990\\30028\"abc\"\1087\1088\1080\1074\1077\1090\"hh" @?= "oo\"hello世界\"abc\"привет\"hh"
, testCase "double quote itself should not be unescaped" $
unescape "\"\\\"\\19990o\"" @?= "\"\\\"世o\""
, testCase "control characters should not be escaped" $
unescape "\"\\n\\t\"" @?= "\"\\n\\t\""
]