perf: Use ByteString instead of [Char] for input parsing.

NOTE: This breaks lots of internal APIs, many of which are actually
exposed as public API. `brick` does not break, but client code that
depends on constants containing escape sequences will need to adapt.

There's really no point in using `[Char]` here. The first thing the code
did was convert all the bytes individually to `Char` and putting them
into a linked list. Then, when actually parsing utf-8, it turns those
`Char`s back into `Word8`s. All the other code is generally concerned
with bytes and goes out of its way to deal with `[Char]` instead. The
code is now simpler and much faster.

Overall this gives another order of magnitude speedup over the previous
speedup totalling roughly a 200x speedup over 2 commits ago for the
300KB case (it's less for smaller cases and much more for larger cases,
because we already made one `n^2` algorithm `n`).

This does not fix the polynomial time complexity, but at this point we
can comfortably paste a low number of megabytes into the terminal and
process it reasonably quickly. This is sufficient to support small file
uploads via bracketed paste.

Some timing:
* 100KB: unmeasurable by hand (basically instant)
* 1000KB: 2 seconds
* 1500KB: 3 seconds
* 2000KB: 5 seconds
* 3000KB: 10 seconds
* 4000KB: 17 seconds
This commit is contained in:
iphydf 2022-03-09 21:33:09 +00:00
parent ecee2cd2fc
commit 98b2a83da7
No known key found for this signature in database
GPG Key ID: 3855DBA2D74403C9
8 changed files with 124 additions and 98 deletions

View File

@ -1,5 +1,5 @@
{-# OPTIONS_HADDOCK hide #-}
-- This makes a kind of tri. Has space efficiency issues with large
-- This makes a kind of trie. Has space efficiency issues with large
-- input blocks. Likely building a parser and just applying that would
-- be better.
module Graphics.Vty.Input.Classify
@ -16,25 +16,28 @@ import Graphics.Vty.Input.Classify.Types
import Codec.Binary.UTF8.Generic (decode)
import Data.List (inits)
import Control.Arrow (first)
import qualified Data.Map as M( fromList, lookup )
import Data.Maybe ( mapMaybe )
import qualified Data.Set as S( fromList, member )
import Data.Char
import Data.Word
compile :: ClassifyMap -> String -> KClass
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BS8
import Data.ByteString.Char8 (ByteString)
compile :: ClassifyMap -> ByteString -> KClass
compile table = cl' where
-- take all prefixes and create a set of these
prefixSet = S.fromList $ concatMap (init . inits . fst) table
prefixSet = S.fromList $ concatMap (init . BS.inits . BS8.pack . fst) table
maxValidInputLength = maximum (map (length . fst) table)
eventForInput = M.fromList table
cl' [] = Prefix
eventForInput = M.fromList $ map (first BS8.pack) table
cl' inputBlock | BS8.null inputBlock = Prefix
cl' inputBlock = case M.lookup inputBlock eventForInput of
-- if the inputBlock is exactly what is expected for an
-- event then consume the whole block and return the event
Just e -> Valid e []
Just e -> Valid e BS8.empty
Nothing -> case S.member inputBlock prefixSet of
True -> Prefix
-- look up progressively smaller tails of the input
@ -45,40 +48,40 @@ compile table = cl' where
-- H: There will always be one match. The prefixSet
-- contains, by definition, all prefixes of an event.
False ->
let inputPrefixes = reverse $ take maxValidInputLength $ tail $ inits inputBlock
let inputPrefixes = reverse . take maxValidInputLength . tail . BS8.inits $ inputBlock
in case mapMaybe (\s -> (,) s `fmap` M.lookup s eventForInput) inputPrefixes of
(s,e) : _ -> Valid e (drop (length s) inputBlock)
(s,e) : _ -> Valid e (BS8.drop (BS8.length s) inputBlock)
-- neither a prefix or a full event.
[] -> Invalid
classify :: ClassifyMap -> String -> KClass
classify :: ClassifyMap -> ByteString -> KClass
classify table =
let standardClassifier = compile table
in \s -> case s of
in \s -> case BS.uncons s of
_ | bracketedPasteStarted s ->
if bracketedPasteFinished s
then parseBracketedPaste s
else Prefix
_ | isMouseEvent s -> classifyMouseEvent s
_ | isFocusEvent s -> classifyFocusEvent s
c:cs | ord c >= 0xC2 -> classifyUtf8 c cs
_ -> standardClassifier s
_ | isMouseEvent s -> classifyMouseEvent s
_ | isFocusEvent s -> classifyFocusEvent s
Just (c,cs) | c >= 0xC2 -> classifyUtf8 c cs
_ -> standardClassifier s
classifyUtf8 :: Char -> String -> KClass
classifyUtf8 :: Word8 -> ByteString -> KClass
classifyUtf8 c cs =
let n = utf8Length (ord c)
(codepoint,rest) = splitAt n (c:cs)
let n = utf8Length c
(codepoint,rest) = BS8.splitAt (n - 1) cs
codepoint8 :: [Word8]
codepoint8 = map (fromIntegral . ord) codepoint
codepoint8 = c:BS.unpack codepoint
in case decode codepoint8 of
_ | n < length codepoint -> Prefix
Just (unicodeChar, _) -> Valid (EvKey (KChar unicodeChar) []) rest
_ | n < BS.length codepoint + 1 -> Prefix
Just (unicodeChar, _) -> Valid (EvKey (KChar unicodeChar) []) rest
-- something bad happened; just ignore and continue.
Nothing -> Invalid
Nothing -> Invalid
utf8Length :: (Num t, Ord a, Num a) => a -> t
utf8Length :: Word8 -> Int
utf8Length c
| c < 0x80 = 1
| c < 0xE0 = 2

View File

@ -16,12 +16,15 @@ import Graphics.Vty.Input.Classify.Types
import Control.Monad.Trans.Maybe
import Control.Monad.State
type Parser a = MaybeT (State String) a
import qualified Data.ByteString.Char8 as BS8
import Data.ByteString.Char8 (ByteString)
type Parser a = MaybeT (State ByteString) a
-- | Run a parser on a given input string. If the parser fails, return
-- 'Invalid'. Otherwise return the valid event ('Valid') and the
-- remaining unparsed characters.
runParser :: String -> Parser Event -> KClass
runParser :: ByteString -> Parser Event -> KClass
runParser s parser =
case runState (runMaybeT parser) s of
(Nothing, _) -> Invalid
@ -36,9 +39,9 @@ failParse = fail "invalid parse"
-- return '123' and consume those characters.
readInt :: Parser Int
readInt = do
s <- get
s <- BS8.unpack <$> get
case (reads :: ReadS Int) s of
[(i, rest)] -> put rest >> return i
[(i, rest)] -> put (BS8.pack rest) >> return i
_ -> failParse
-- | Read a character from the input stream. If one cannot be read (e.g.
@ -46,9 +49,9 @@ readInt = do
readChar :: Parser Char
readChar = do
s <- get
case s of
c:rest -> put rest >> return c
_ -> failParse
case BS8.uncons s of
Just (c,rest) -> put rest >> return c
Nothing -> failParse
-- | Read a character from the input stream and fail parsing if it is
-- not the specified character.

View File

@ -8,8 +8,10 @@ where
import Graphics.Vty.Input.Events
import Data.ByteString.Char8 (ByteString)
data KClass
= Valid Event String
= Valid Event ByteString
-- ^ A valid event was parsed. Any unused characters from the input
-- stream are also provided.
| Invalid

View File

@ -11,30 +11,32 @@ import Graphics.Vty.Input.Classify.Types
import Graphics.Vty.Input.Classify.Parse
import Control.Monad.State
import Data.List (isPrefixOf)
import qualified Data.ByteString.Char8 as BS8
import Data.ByteString.Char8 (ByteString)
-- | These sequences set xterm-based terminals to send focus event
-- sequences.
requestFocusEvents :: String
requestFocusEvents = "\ESC[?1004h"
requestFocusEvents :: ByteString
requestFocusEvents = BS8.pack "\ESC[?1004h"
-- | These sequences disable focus events.
disableFocusEvents :: String
disableFocusEvents = "\ESC[?1004l"
disableFocusEvents :: ByteString
disableFocusEvents = BS8.pack "\ESC[?1004l"
-- | Does the specified string begin with a focus event?
isFocusEvent :: String -> Bool
isFocusEvent s = isPrefixOf focusIn s ||
isPrefixOf focusOut s
isFocusEvent :: ByteString -> Bool
isFocusEvent s = BS8.isPrefixOf focusIn s ||
BS8.isPrefixOf focusOut s
focusIn :: String
focusIn = "\ESC[I"
focusIn :: ByteString
focusIn = BS8.pack "\ESC[I"
focusOut :: String
focusOut = "\ESC[O"
focusOut :: ByteString
focusOut = BS8.pack "\ESC[O"
-- | Attempt to classify an input string as a focus event.
classifyFocusEvent :: String -> KClass
classifyFocusEvent :: ByteString -> KClass
classifyFocusEvent s = runParser s $ do
when (not $ isFocusEvent s) failParse

View File

@ -33,12 +33,15 @@ import Control.Monad.Trans.State (StateT(..), evalStateT)
import Control.Monad.State.Class (MonadState, modify)
import Control.Monad.Trans.Reader (ReaderT(..))
import Data.Char
import qualified Data.ByteString.Char8 as BS8
import qualified Data.ByteString as BS
import Data.ByteString.Char8 (ByteString)
import Data.IORef
import Data.Word (Word8)
import Foreign ( allocaArray, peekArray, Ptr )
import Foreign (allocaArray)
import Foreign.C.Types (CInt(..))
import Foreign.Ptr (Ptr, castPtr)
import System.IO
import System.Posix.IO (fdReadBuf, setFdOption, FdOption(..))
@ -76,10 +79,10 @@ data InputBuffer = InputBuffer
makeLenses ''InputBuffer
data InputState = InputState
{ _unprocessedBytes :: String
{ _unprocessedBytes :: ByteString
, _appliedConfig :: Config
, _inputBuffer :: InputBuffer
, _classifier :: String -> KClass
, _classifier :: ByteString -> KClass
}
makeLenses ''InputState
@ -104,7 +107,7 @@ loopInputProcessor = do
dropInvalid
loopInputProcessor
addBytesToProcess :: String -> InputM ()
addBytesToProcess :: ByteString -> InputM ()
addBytesToProcess block = unprocessedBytes <>= block
emit :: Event -> InputM ()
@ -117,7 +120,7 @@ emit event = do
--
-- Precondition: Under the threaded runtime. Only current use is from a
-- forkOS thread. That case satisfies precondition.
readFromDevice :: InputM String
readFromDevice :: InputM ByteString
readFromDevice = do
newConfig <- view configRef >>= liftIO . readIORef
oldConfig <- use appliedConfig
@ -137,9 +140,10 @@ readFromDevice = do
threadWaitRead fd
bytesRead <- fdReadBuf fd bufferPtr (fromIntegral maxBytes)
if bytesRead > 0
then map (chr . fromIntegral) <$> peekArray (fromIntegral bytesRead) bufferPtr
else return []
when (not $ null stringRep) $ logMsg $ "input bytes: " ++ show stringRep
then BS.packCStringLen (castPtr bufferPtr, fromIntegral bytesRead)
else return BS.empty
when (not $ BS.null stringRep) $
logMsg $ "input bytes: " ++ show (BS8.unpack stringRep)
return stringRep
applyConfig :: Fd -> Config -> IO ()
@ -165,15 +169,16 @@ dropInvalid = do
b <- use unprocessedBytes
when (c b == Invalid) $ do
logMsg "dropping input bytes"
unprocessedBytes .= []
unprocessedBytes .= BS8.empty
runInputProcessorLoop :: ClassifyMap -> Input -> IO ()
runInputProcessorLoop classifyTable input = do
let bufferSize = 1024
allocaArray bufferSize $ \(bufferPtr :: Ptr Word8) -> do
s0 <- InputState [] <$> readIORef (_configRef input)
<*> pure (InputBuffer bufferPtr bufferSize)
<*> pure (classify classifyTable)
s0 <- InputState BS8.empty
<$> readIORef (_configRef input)
<*> pure (InputBuffer bufferPtr bufferSize)
<*> pure (classify classifyTable)
runReaderT (evalStateT loopInputProcessor s0) input
-- | Construct two IO actions: one to configure the terminal for Vty and

View File

@ -15,10 +15,12 @@ import Graphics.Vty.Input.Classify.Types
import Graphics.Vty.Input.Classify.Parse
import Control.Monad.State
import Data.List (isPrefixOf)
import Data.Maybe (catMaybes)
import Data.Bits ((.&.))
import qualified Data.ByteString.Char8 as BS8
import Data.ByteString.Char8 (ByteString)
-- A mouse event in SGR extended mode is
--
-- '\ESC' '[' '<' B ';' X ';' Y ';' ('M'|'m')
@ -32,28 +34,28 @@ import Data.Bits ((.&.))
-- | These sequences set xterm-based terminals to send mouse event
-- sequences.
requestMouseEvents :: String
requestMouseEvents = "\ESC[?1000h\ESC[?1002h\ESC[?1006h"
requestMouseEvents :: ByteString
requestMouseEvents = BS8.pack "\ESC[?1000h\ESC[?1002h\ESC[?1006h"
-- | These sequences disable mouse events.
disableMouseEvents :: String
disableMouseEvents = "\ESC[?1000l\ESC[?1002l\ESC[?1006l"
disableMouseEvents :: ByteString
disableMouseEvents = BS8.pack "\ESC[?1000l\ESC[?1002l\ESC[?1006l"
-- | Does the specified string begin with a mouse event?
isMouseEvent :: String -> Bool
isMouseEvent :: ByteString -> Bool
isMouseEvent s = isSGREvent s || isNormalEvent s
isSGREvent :: String -> Bool
isSGREvent = isPrefixOf sgrPrefix
isSGREvent :: ByteString -> Bool
isSGREvent = BS8.isPrefixOf sgrPrefix
sgrPrefix :: String
sgrPrefix = "\ESC[M"
sgrPrefix :: ByteString
sgrPrefix = BS8.pack "\ESC[M"
isNormalEvent :: String -> Bool
isNormalEvent = isPrefixOf normalPrefix
isNormalEvent :: ByteString -> Bool
isNormalEvent = BS8.isPrefixOf normalPrefix
normalPrefix :: String
normalPrefix = "\ESC[<"
normalPrefix :: ByteString
normalPrefix = BS8.pack "\ESC[<"
-- Modifier bits:
shiftBit :: Int
@ -88,7 +90,7 @@ hasBitSet :: Int -> Int -> Bool
hasBitSet val bit = val .&. bit > 0
-- | Attempt to lassify an input string as a mouse event.
classifyMouseEvent :: String -> KClass
classifyMouseEvent :: ByteString -> KClass
classifyMouseEvent s = runParser s $ do
when (not $ isMouseEvent s) failParse

View File

@ -9,34 +9,33 @@ module Graphics.Vty.Input.Paste
where
import qualified Data.ByteString.Char8 as BS8
import Data.ByteString.Char8 (ByteString)
import Graphics.Vty.Input.Events
import Graphics.Vty.Input.Classify.Types
import Data.List (isPrefixOf, isInfixOf)
bracketedPasteStart :: ByteString
bracketedPasteStart = BS8.pack "\ESC[200~"
bracketedPasteStart :: String
bracketedPasteStart = "\ESC[200~"
bracketedPasteEnd :: String
bracketedPasteEnd = "\ESC[201~"
bracketedPasteEnd :: ByteString
bracketedPasteEnd = BS8.pack "\ESC[201~"
-- | Does the input start a bracketed paste?
bracketedPasteStarted :: String -> Bool
bracketedPasteStarted = isPrefixOf bracketedPasteStart
bracketedPasteStarted :: ByteString -> Bool
bracketedPasteStarted = BS8.isPrefixOf bracketedPasteStart
-- | Does the input contain a complete bracketed paste?
bracketedPasteFinished :: String -> Bool
bracketedPasteFinished = isInfixOf bracketedPasteEnd
bracketedPasteFinished :: ByteString -> Bool
bracketedPasteFinished = BS8.isInfixOf bracketedPasteEnd
-- | Parse a bracketed paste. This should only be called on a string if
-- both 'bracketedPasteStarted' and 'bracketedPasteFinished' return
-- 'True'.
parseBracketedPaste :: String -> KClass
parseBracketedPaste :: ByteString -> KClass
parseBracketedPaste s =
Valid (EvPaste p) (BS8.unpack $ BS8.drop (BS8.length end) rest')
Valid (EvPaste p) (BS8.drop endLen rest')
where
start = BS8.pack bracketedPasteStart
end = BS8.pack bracketedPasteEnd
(_, rest ) = BS8.breakSubstring start . BS8.pack $ s
(p, rest') = BS8.breakSubstring end . BS8.drop (BS8.length start) $ rest
startLen = BS8.length bracketedPasteStart
endLen = BS8.length bracketedPasteEnd
(_, rest ) = BS8.breakSubstring bracketedPasteStart s
(p, rest') = BS8.breakSubstring bracketedPasteEnd . BS8.drop startLen $ rest

View File

@ -15,13 +15,17 @@ import qualified Graphics.Vty.Output.TerminfoBased as TerminfoBased
import Blaze.ByteString.Builder (writeToByteString)
import Blaze.ByteString.Builder.Word (writeWord8)
import qualified Data.ByteString.Char8 as BS8
import Data.ByteString.Char8 (ByteString)
import Foreign.Ptr (castPtr)
import Control.Monad (void, when)
import Control.Monad.Trans
import Data.Char (toLower)
import Data.IORef
import System.Posix.IO (fdWrite)
import System.Posix.Types (Fd)
import System.Posix.IO (fdWriteBuf)
import System.Posix.Types (ByteCount, Fd)
import System.Posix.Env (getEnv)
import Data.List (isInfixOf)
@ -31,6 +35,12 @@ import Data.Maybe (catMaybes)
import Data.Monoid ((<>))
#endif
-- | Write a 'ByteString' to an 'Fd'.
fdWrite :: Fd -> ByteString -> IO ByteCount
fdWrite fd s =
BS8.useAsCStringLen s $ \(buf,len) -> do
fdWriteBuf fd (castPtr buf) (fromIntegral len)
-- | Construct an Xterm output driver. Initialize the display to UTF-8.
reserveTerminal :: ( Applicative m, MonadIO m ) => String -> Fd -> m Output
reserveTerminal variant outFd = liftIO $ do
@ -100,19 +110,19 @@ utf8Active = do
-- | Enable bracketed paste mode:
-- http://cirw.in/blog/bracketed-paste
enableBracketedPastes :: String
enableBracketedPastes = "\ESC[?2004h"
enableBracketedPastes :: ByteString
enableBracketedPastes = BS8.pack "\ESC[?2004h"
-- | Disable bracketed paste mode:
disableBracketedPastes :: String
disableBracketedPastes = "\ESC[?2004l"
disableBracketedPastes :: ByteString
disableBracketedPastes = BS8.pack "\ESC[?2004l"
-- | These sequences set xterm based terminals to UTF-8 output.
--
-- There is no known terminfo capability equivalent to this.
setUtf8CharSet, setDefaultCharSet :: String
setUtf8CharSet = "\ESC%G"
setDefaultCharSet = "\ESC%@"
setUtf8CharSet, setDefaultCharSet :: ByteString
setUtf8CharSet = BS8.pack "\ESC%G"
setDefaultCharSet = BS8.pack "\ESC%@"
xtermInlineHack :: Output -> IO ()
xtermInlineHack t = do