From 98b2a83da787438658e8f19903df37d42a8192d0 Mon Sep 17 00:00:00 2001 From: iphydf Date: Wed, 9 Mar 2022 21:33:09 +0000 Subject: [PATCH] perf: Use `ByteString` instead of `[Char]` for input parsing. NOTE: This breaks lots of internal APIs, many of which are actually exposed as public API. `brick` does not break, but client code that depends on constants containing escape sequences will need to adapt. There's really no point in using `[Char]` here. The first thing the code did was convert all the bytes individually to `Char` and putting them into a linked list. Then, when actually parsing utf-8, it turns those `Char`s back into `Word8`s. All the other code is generally concerned with bytes and goes out of its way to deal with `[Char]` instead. The code is now simpler and much faster. Overall this gives another order of magnitude speedup over the previous speedup totalling roughly a 200x speedup over 2 commits ago for the 300KB case (it's less for smaller cases and much more for larger cases, because we already made one `n^2` algorithm `n`). This does not fix the polynomial time complexity, but at this point we can comfortably paste a low number of megabytes into the terminal and process it reasonably quickly. This is sufficient to support small file uploads via bracketed paste. Some timing: * 100KB: unmeasurable by hand (basically instant) * 1000KB: 2 seconds * 1500KB: 3 seconds * 2000KB: 5 seconds * 3000KB: 10 seconds * 4000KB: 17 seconds --- src/Graphics/Vty/Input/Classify.hs | 51 +++++++++++++----------- src/Graphics/Vty/Input/Classify/Parse.hs | 17 ++++---- src/Graphics/Vty/Input/Classify/Types.hs | 4 +- src/Graphics/Vty/Input/Focus.hs | 28 +++++++------ src/Graphics/Vty/Input/Loop.hs | 31 ++++++++------ src/Graphics/Vty/Input/Mouse.hs | 32 ++++++++------- src/Graphics/Vty/Input/Paste.hs | 31 +++++++------- src/Graphics/Vty/Output/XTermColor.hs | 28 ++++++++----- 8 files changed, 124 insertions(+), 98 deletions(-) diff --git a/src/Graphics/Vty/Input/Classify.hs b/src/Graphics/Vty/Input/Classify.hs index f1fcfb9..1ea9479 100644 --- a/src/Graphics/Vty/Input/Classify.hs +++ b/src/Graphics/Vty/Input/Classify.hs @@ -1,5 +1,5 @@ {-# OPTIONS_HADDOCK hide #-} --- This makes a kind of tri. Has space efficiency issues with large +-- This makes a kind of trie. Has space efficiency issues with large -- input blocks. Likely building a parser and just applying that would -- be better. module Graphics.Vty.Input.Classify @@ -16,25 +16,28 @@ import Graphics.Vty.Input.Classify.Types import Codec.Binary.UTF8.Generic (decode) -import Data.List (inits) +import Control.Arrow (first) import qualified Data.Map as M( fromList, lookup ) import Data.Maybe ( mapMaybe ) import qualified Data.Set as S( fromList, member ) -import Data.Char import Data.Word -compile :: ClassifyMap -> String -> KClass +import qualified Data.ByteString as BS +import qualified Data.ByteString.Char8 as BS8 +import Data.ByteString.Char8 (ByteString) + +compile :: ClassifyMap -> ByteString -> KClass compile table = cl' where -- take all prefixes and create a set of these - prefixSet = S.fromList $ concatMap (init . inits . fst) table + prefixSet = S.fromList $ concatMap (init . BS.inits . BS8.pack . fst) table maxValidInputLength = maximum (map (length . fst) table) - eventForInput = M.fromList table - cl' [] = Prefix + eventForInput = M.fromList $ map (first BS8.pack) table + cl' inputBlock | BS8.null inputBlock = Prefix cl' inputBlock = case M.lookup inputBlock eventForInput of -- if the inputBlock is exactly what is expected for an -- event then consume the whole block and return the event - Just e -> Valid e [] + Just e -> Valid e BS8.empty Nothing -> case S.member inputBlock prefixSet of True -> Prefix -- look up progressively smaller tails of the input @@ -45,40 +48,40 @@ compile table = cl' where -- H: There will always be one match. The prefixSet -- contains, by definition, all prefixes of an event. False -> - let inputPrefixes = reverse $ take maxValidInputLength $ tail $ inits inputBlock + let inputPrefixes = reverse . take maxValidInputLength . tail . BS8.inits $ inputBlock in case mapMaybe (\s -> (,) s `fmap` M.lookup s eventForInput) inputPrefixes of - (s,e) : _ -> Valid e (drop (length s) inputBlock) + (s,e) : _ -> Valid e (BS8.drop (BS8.length s) inputBlock) -- neither a prefix or a full event. [] -> Invalid -classify :: ClassifyMap -> String -> KClass +classify :: ClassifyMap -> ByteString -> KClass classify table = let standardClassifier = compile table - in \s -> case s of + in \s -> case BS.uncons s of _ | bracketedPasteStarted s -> if bracketedPasteFinished s then parseBracketedPaste s else Prefix - _ | isMouseEvent s -> classifyMouseEvent s - _ | isFocusEvent s -> classifyFocusEvent s - c:cs | ord c >= 0xC2 -> classifyUtf8 c cs - _ -> standardClassifier s + _ | isMouseEvent s -> classifyMouseEvent s + _ | isFocusEvent s -> classifyFocusEvent s + Just (c,cs) | c >= 0xC2 -> classifyUtf8 c cs + _ -> standardClassifier s -classifyUtf8 :: Char -> String -> KClass +classifyUtf8 :: Word8 -> ByteString -> KClass classifyUtf8 c cs = - let n = utf8Length (ord c) - (codepoint,rest) = splitAt n (c:cs) + let n = utf8Length c + (codepoint,rest) = BS8.splitAt (n - 1) cs codepoint8 :: [Word8] - codepoint8 = map (fromIntegral . ord) codepoint + codepoint8 = c:BS.unpack codepoint in case decode codepoint8 of - _ | n < length codepoint -> Prefix - Just (unicodeChar, _) -> Valid (EvKey (KChar unicodeChar) []) rest + _ | n < BS.length codepoint + 1 -> Prefix + Just (unicodeChar, _) -> Valid (EvKey (KChar unicodeChar) []) rest -- something bad happened; just ignore and continue. - Nothing -> Invalid + Nothing -> Invalid -utf8Length :: (Num t, Ord a, Num a) => a -> t +utf8Length :: Word8 -> Int utf8Length c | c < 0x80 = 1 | c < 0xE0 = 2 diff --git a/src/Graphics/Vty/Input/Classify/Parse.hs b/src/Graphics/Vty/Input/Classify/Parse.hs index 6c6e9b5..414ee43 100644 --- a/src/Graphics/Vty/Input/Classify/Parse.hs +++ b/src/Graphics/Vty/Input/Classify/Parse.hs @@ -16,12 +16,15 @@ import Graphics.Vty.Input.Classify.Types import Control.Monad.Trans.Maybe import Control.Monad.State -type Parser a = MaybeT (State String) a +import qualified Data.ByteString.Char8 as BS8 +import Data.ByteString.Char8 (ByteString) + +type Parser a = MaybeT (State ByteString) a -- | Run a parser on a given input string. If the parser fails, return -- 'Invalid'. Otherwise return the valid event ('Valid') and the -- remaining unparsed characters. -runParser :: String -> Parser Event -> KClass +runParser :: ByteString -> Parser Event -> KClass runParser s parser = case runState (runMaybeT parser) s of (Nothing, _) -> Invalid @@ -36,9 +39,9 @@ failParse = fail "invalid parse" -- return '123' and consume those characters. readInt :: Parser Int readInt = do - s <- get + s <- BS8.unpack <$> get case (reads :: ReadS Int) s of - [(i, rest)] -> put rest >> return i + [(i, rest)] -> put (BS8.pack rest) >> return i _ -> failParse -- | Read a character from the input stream. If one cannot be read (e.g. @@ -46,9 +49,9 @@ readInt = do readChar :: Parser Char readChar = do s <- get - case s of - c:rest -> put rest >> return c - _ -> failParse + case BS8.uncons s of + Just (c,rest) -> put rest >> return c + Nothing -> failParse -- | Read a character from the input stream and fail parsing if it is -- not the specified character. diff --git a/src/Graphics/Vty/Input/Classify/Types.hs b/src/Graphics/Vty/Input/Classify/Types.hs index e16414b..f4f4d6a 100644 --- a/src/Graphics/Vty/Input/Classify/Types.hs +++ b/src/Graphics/Vty/Input/Classify/Types.hs @@ -8,8 +8,10 @@ where import Graphics.Vty.Input.Events +import Data.ByteString.Char8 (ByteString) + data KClass - = Valid Event String + = Valid Event ByteString -- ^ A valid event was parsed. Any unused characters from the input -- stream are also provided. | Invalid diff --git a/src/Graphics/Vty/Input/Focus.hs b/src/Graphics/Vty/Input/Focus.hs index a026f5a..bddb902 100644 --- a/src/Graphics/Vty/Input/Focus.hs +++ b/src/Graphics/Vty/Input/Focus.hs @@ -11,30 +11,32 @@ import Graphics.Vty.Input.Classify.Types import Graphics.Vty.Input.Classify.Parse import Control.Monad.State -import Data.List (isPrefixOf) + +import qualified Data.ByteString.Char8 as BS8 +import Data.ByteString.Char8 (ByteString) -- | These sequences set xterm-based terminals to send focus event -- sequences. -requestFocusEvents :: String -requestFocusEvents = "\ESC[?1004h" +requestFocusEvents :: ByteString +requestFocusEvents = BS8.pack "\ESC[?1004h" -- | These sequences disable focus events. -disableFocusEvents :: String -disableFocusEvents = "\ESC[?1004l" +disableFocusEvents :: ByteString +disableFocusEvents = BS8.pack "\ESC[?1004l" -- | Does the specified string begin with a focus event? -isFocusEvent :: String -> Bool -isFocusEvent s = isPrefixOf focusIn s || - isPrefixOf focusOut s +isFocusEvent :: ByteString -> Bool +isFocusEvent s = BS8.isPrefixOf focusIn s || + BS8.isPrefixOf focusOut s -focusIn :: String -focusIn = "\ESC[I" +focusIn :: ByteString +focusIn = BS8.pack "\ESC[I" -focusOut :: String -focusOut = "\ESC[O" +focusOut :: ByteString +focusOut = BS8.pack "\ESC[O" -- | Attempt to classify an input string as a focus event. -classifyFocusEvent :: String -> KClass +classifyFocusEvent :: ByteString -> KClass classifyFocusEvent s = runParser s $ do when (not $ isFocusEvent s) failParse diff --git a/src/Graphics/Vty/Input/Loop.hs b/src/Graphics/Vty/Input/Loop.hs index 503c53e..78cb21f 100644 --- a/src/Graphics/Vty/Input/Loop.hs +++ b/src/Graphics/Vty/Input/Loop.hs @@ -33,12 +33,15 @@ import Control.Monad.Trans.State (StateT(..), evalStateT) import Control.Monad.State.Class (MonadState, modify) import Control.Monad.Trans.Reader (ReaderT(..)) -import Data.Char +import qualified Data.ByteString.Char8 as BS8 +import qualified Data.ByteString as BS +import Data.ByteString.Char8 (ByteString) import Data.IORef import Data.Word (Word8) -import Foreign ( allocaArray, peekArray, Ptr ) +import Foreign (allocaArray) import Foreign.C.Types (CInt(..)) +import Foreign.Ptr (Ptr, castPtr) import System.IO import System.Posix.IO (fdReadBuf, setFdOption, FdOption(..)) @@ -76,10 +79,10 @@ data InputBuffer = InputBuffer makeLenses ''InputBuffer data InputState = InputState - { _unprocessedBytes :: String + { _unprocessedBytes :: ByteString , _appliedConfig :: Config , _inputBuffer :: InputBuffer - , _classifier :: String -> KClass + , _classifier :: ByteString -> KClass } makeLenses ''InputState @@ -104,7 +107,7 @@ loopInputProcessor = do dropInvalid loopInputProcessor -addBytesToProcess :: String -> InputM () +addBytesToProcess :: ByteString -> InputM () addBytesToProcess block = unprocessedBytes <>= block emit :: Event -> InputM () @@ -117,7 +120,7 @@ emit event = do -- -- Precondition: Under the threaded runtime. Only current use is from a -- forkOS thread. That case satisfies precondition. -readFromDevice :: InputM String +readFromDevice :: InputM ByteString readFromDevice = do newConfig <- view configRef >>= liftIO . readIORef oldConfig <- use appliedConfig @@ -137,9 +140,10 @@ readFromDevice = do threadWaitRead fd bytesRead <- fdReadBuf fd bufferPtr (fromIntegral maxBytes) if bytesRead > 0 - then map (chr . fromIntegral) <$> peekArray (fromIntegral bytesRead) bufferPtr - else return [] - when (not $ null stringRep) $ logMsg $ "input bytes: " ++ show stringRep + then BS.packCStringLen (castPtr bufferPtr, fromIntegral bytesRead) + else return BS.empty + when (not $ BS.null stringRep) $ + logMsg $ "input bytes: " ++ show (BS8.unpack stringRep) return stringRep applyConfig :: Fd -> Config -> IO () @@ -165,15 +169,16 @@ dropInvalid = do b <- use unprocessedBytes when (c b == Invalid) $ do logMsg "dropping input bytes" - unprocessedBytes .= [] + unprocessedBytes .= BS8.empty runInputProcessorLoop :: ClassifyMap -> Input -> IO () runInputProcessorLoop classifyTable input = do let bufferSize = 1024 allocaArray bufferSize $ \(bufferPtr :: Ptr Word8) -> do - s0 <- InputState [] <$> readIORef (_configRef input) - <*> pure (InputBuffer bufferPtr bufferSize) - <*> pure (classify classifyTable) + s0 <- InputState BS8.empty + <$> readIORef (_configRef input) + <*> pure (InputBuffer bufferPtr bufferSize) + <*> pure (classify classifyTable) runReaderT (evalStateT loopInputProcessor s0) input -- | Construct two IO actions: one to configure the terminal for Vty and diff --git a/src/Graphics/Vty/Input/Mouse.hs b/src/Graphics/Vty/Input/Mouse.hs index 8326d71..0080cba 100644 --- a/src/Graphics/Vty/Input/Mouse.hs +++ b/src/Graphics/Vty/Input/Mouse.hs @@ -15,10 +15,12 @@ import Graphics.Vty.Input.Classify.Types import Graphics.Vty.Input.Classify.Parse import Control.Monad.State -import Data.List (isPrefixOf) import Data.Maybe (catMaybes) import Data.Bits ((.&.)) +import qualified Data.ByteString.Char8 as BS8 +import Data.ByteString.Char8 (ByteString) + -- A mouse event in SGR extended mode is -- -- '\ESC' '[' '<' B ';' X ';' Y ';' ('M'|'m') @@ -32,28 +34,28 @@ import Data.Bits ((.&.)) -- | These sequences set xterm-based terminals to send mouse event -- sequences. -requestMouseEvents :: String -requestMouseEvents = "\ESC[?1000h\ESC[?1002h\ESC[?1006h" +requestMouseEvents :: ByteString +requestMouseEvents = BS8.pack "\ESC[?1000h\ESC[?1002h\ESC[?1006h" -- | These sequences disable mouse events. -disableMouseEvents :: String -disableMouseEvents = "\ESC[?1000l\ESC[?1002l\ESC[?1006l" +disableMouseEvents :: ByteString +disableMouseEvents = BS8.pack "\ESC[?1000l\ESC[?1002l\ESC[?1006l" -- | Does the specified string begin with a mouse event? -isMouseEvent :: String -> Bool +isMouseEvent :: ByteString -> Bool isMouseEvent s = isSGREvent s || isNormalEvent s -isSGREvent :: String -> Bool -isSGREvent = isPrefixOf sgrPrefix +isSGREvent :: ByteString -> Bool +isSGREvent = BS8.isPrefixOf sgrPrefix -sgrPrefix :: String -sgrPrefix = "\ESC[M" +sgrPrefix :: ByteString +sgrPrefix = BS8.pack "\ESC[M" -isNormalEvent :: String -> Bool -isNormalEvent = isPrefixOf normalPrefix +isNormalEvent :: ByteString -> Bool +isNormalEvent = BS8.isPrefixOf normalPrefix -normalPrefix :: String -normalPrefix = "\ESC[<" +normalPrefix :: ByteString +normalPrefix = BS8.pack "\ESC[<" -- Modifier bits: shiftBit :: Int @@ -88,7 +90,7 @@ hasBitSet :: Int -> Int -> Bool hasBitSet val bit = val .&. bit > 0 -- | Attempt to lassify an input string as a mouse event. -classifyMouseEvent :: String -> KClass +classifyMouseEvent :: ByteString -> KClass classifyMouseEvent s = runParser s $ do when (not $ isMouseEvent s) failParse diff --git a/src/Graphics/Vty/Input/Paste.hs b/src/Graphics/Vty/Input/Paste.hs index 0b97961..3f08f26 100644 --- a/src/Graphics/Vty/Input/Paste.hs +++ b/src/Graphics/Vty/Input/Paste.hs @@ -9,34 +9,33 @@ module Graphics.Vty.Input.Paste where import qualified Data.ByteString.Char8 as BS8 +import Data.ByteString.Char8 (ByteString) import Graphics.Vty.Input.Events import Graphics.Vty.Input.Classify.Types -import Data.List (isPrefixOf, isInfixOf) +bracketedPasteStart :: ByteString +bracketedPasteStart = BS8.pack "\ESC[200~" -bracketedPasteStart :: String -bracketedPasteStart = "\ESC[200~" - -bracketedPasteEnd :: String -bracketedPasteEnd = "\ESC[201~" +bracketedPasteEnd :: ByteString +bracketedPasteEnd = BS8.pack "\ESC[201~" -- | Does the input start a bracketed paste? -bracketedPasteStarted :: String -> Bool -bracketedPasteStarted = isPrefixOf bracketedPasteStart +bracketedPasteStarted :: ByteString -> Bool +bracketedPasteStarted = BS8.isPrefixOf bracketedPasteStart -- | Does the input contain a complete bracketed paste? -bracketedPasteFinished :: String -> Bool -bracketedPasteFinished = isInfixOf bracketedPasteEnd +bracketedPasteFinished :: ByteString -> Bool +bracketedPasteFinished = BS8.isInfixOf bracketedPasteEnd -- | Parse a bracketed paste. This should only be called on a string if -- both 'bracketedPasteStarted' and 'bracketedPasteFinished' return -- 'True'. -parseBracketedPaste :: String -> KClass +parseBracketedPaste :: ByteString -> KClass parseBracketedPaste s = - Valid (EvPaste p) (BS8.unpack $ BS8.drop (BS8.length end) rest') + Valid (EvPaste p) (BS8.drop endLen rest') where - start = BS8.pack bracketedPasteStart - end = BS8.pack bracketedPasteEnd - (_, rest ) = BS8.breakSubstring start . BS8.pack $ s - (p, rest') = BS8.breakSubstring end . BS8.drop (BS8.length start) $ rest + startLen = BS8.length bracketedPasteStart + endLen = BS8.length bracketedPasteEnd + (_, rest ) = BS8.breakSubstring bracketedPasteStart s + (p, rest') = BS8.breakSubstring bracketedPasteEnd . BS8.drop startLen $ rest diff --git a/src/Graphics/Vty/Output/XTermColor.hs b/src/Graphics/Vty/Output/XTermColor.hs index 9a17a11..a6272e1 100644 --- a/src/Graphics/Vty/Output/XTermColor.hs +++ b/src/Graphics/Vty/Output/XTermColor.hs @@ -15,13 +15,17 @@ import qualified Graphics.Vty.Output.TerminfoBased as TerminfoBased import Blaze.ByteString.Builder (writeToByteString) import Blaze.ByteString.Builder.Word (writeWord8) +import qualified Data.ByteString.Char8 as BS8 +import Data.ByteString.Char8 (ByteString) +import Foreign.Ptr (castPtr) + import Control.Monad (void, when) import Control.Monad.Trans import Data.Char (toLower) import Data.IORef -import System.Posix.IO (fdWrite) -import System.Posix.Types (Fd) +import System.Posix.IO (fdWriteBuf) +import System.Posix.Types (ByteCount, Fd) import System.Posix.Env (getEnv) import Data.List (isInfixOf) @@ -31,6 +35,12 @@ import Data.Maybe (catMaybes) import Data.Monoid ((<>)) #endif +-- | Write a 'ByteString' to an 'Fd'. +fdWrite :: Fd -> ByteString -> IO ByteCount +fdWrite fd s = + BS8.useAsCStringLen s $ \(buf,len) -> do + fdWriteBuf fd (castPtr buf) (fromIntegral len) + -- | Construct an Xterm output driver. Initialize the display to UTF-8. reserveTerminal :: ( Applicative m, MonadIO m ) => String -> Fd -> m Output reserveTerminal variant outFd = liftIO $ do @@ -100,19 +110,19 @@ utf8Active = do -- | Enable bracketed paste mode: -- http://cirw.in/blog/bracketed-paste -enableBracketedPastes :: String -enableBracketedPastes = "\ESC[?2004h" +enableBracketedPastes :: ByteString +enableBracketedPastes = BS8.pack "\ESC[?2004h" -- | Disable bracketed paste mode: -disableBracketedPastes :: String -disableBracketedPastes = "\ESC[?2004l" +disableBracketedPastes :: ByteString +disableBracketedPastes = BS8.pack "\ESC[?2004l" -- | These sequences set xterm based terminals to UTF-8 output. -- -- There is no known terminfo capability equivalent to this. -setUtf8CharSet, setDefaultCharSet :: String -setUtf8CharSet = "\ESC%G" -setDefaultCharSet = "\ESC%@" +setUtf8CharSet, setDefaultCharSet :: ByteString +setUtf8CharSet = BS8.pack "\ESC%G" +setDefaultCharSet = BS8.pack "\ESC%@" xtermInlineHack :: Output -> IO () xtermInlineHack t = do