perf: Process chunks of bracketed pastes only once.

The current code is `O(n^2)` because it processes the entire input each
time a new chunk arrives. This gets very expensive for large inputs.

This commit changes the code to process each chunk exactly twice: once
to see if it contains the end of the bracketed paste, and another time
to parse the whole paste and return it as an event.

The code can now process more than 1MB/sec. At this point, my tests are
probably more network bound, so these timings might be off, and instead
be testing my network throughput. Good enough for me anyway :).

Timing:
* 1MB: 1s
* 4MB: 3s
* 7MB: 6s
* 10MB: 7s
* 13MB: 10s
* 15MB: 12s
* 20MB: 16s
This commit is contained in:
iphydf 2022-03-09 23:38:23 +00:00
parent 98b2a83da7
commit 547ab196fc
No known key found for this signature in database
GPG Key ID: 3855DBA2D74403C9
3 changed files with 55 additions and 18 deletions

View File

@ -5,6 +5,7 @@
module Graphics.Vty.Input.Classify
( classify
, KClass(..)
, ClassifierState(..)
)
where
@ -27,6 +28,17 @@ import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BS8
import Data.ByteString.Char8 (ByteString)
-- | Whether the classifier is currently processing a chunked format.
-- Currently, only bracketed pastes use this.
data ClassifierState
= ClassifierStart
-- ^ Not processing a chunked format.
| ClassifierInChunk ByteString [ByteString]
-- ^ Currently processing a chunked format. The initial chunk is in the
-- first argument and a reversed remainder of the chunks is collected in
-- the second argument. At the end of the processing, the chunks are
-- reversed and concatenated with the final chunk.
compile :: ClassifyMap -> ByteString -> KClass
compile table = cl' where
-- take all prefixes and create a set of these
@ -54,18 +66,27 @@ compile table = cl' where
-- neither a prefix or a full event.
[] -> Invalid
classify :: ClassifyMap -> ByteString -> KClass
classify table =
let standardClassifier = compile table
in \s -> case BS.uncons s of
_ | bracketedPasteStarted s ->
classify :: ClassifyMap -> ClassifierState -> ByteString -> KClass
classify table = process
where
standardClassifier = compile table
process ClassifierStart s =
case BS.uncons s of
_ | bracketedPasteStarted s ->
if bracketedPasteFinished s
then parseBracketedPaste s
else Chunk
_ | isMouseEvent s -> classifyMouseEvent s
_ | isFocusEvent s -> classifyFocusEvent s
Just (c,cs) | c >= 0xC2 -> classifyUtf8 c cs
_ -> standardClassifier s
process (ClassifierInChunk p ps) s | bracketedPasteStarted p =
if bracketedPasteFinished s
then parseBracketedPaste s
else Prefix
_ | isMouseEvent s -> classifyMouseEvent s
_ | isFocusEvent s -> classifyFocusEvent s
Just (c,cs) | c >= 0xC2 -> classifyUtf8 c cs
_ -> standardClassifier s
then parseBracketedPaste $ BS.concat $ p:reverse (s:ps)
else Chunk
process ClassifierInChunk{} _ = Invalid
classifyUtf8 :: Word8 -> ByteString -> KClass
classifyUtf8 c cs =

View File

@ -19,4 +19,7 @@ data KClass
| Prefix
-- ^ The input characters form the prefix of a valid event character
-- sequence.
| Chunk
-- ^ The input characters are either start of a bracketed paste chunk
-- or in the middle of a bracketed paste chunk.
deriving(Show, Eq)

View File

@ -80,9 +80,10 @@ makeLenses ''InputBuffer
data InputState = InputState
{ _unprocessedBytes :: ByteString
, _classifierState :: ClassifierState
, _appliedConfig :: Config
, _inputBuffer :: InputBuffer
, _classifier :: ByteString -> KClass
, _classifier :: ClassifierState -> ByteString -> KClass
}
makeLenses ''InputState
@ -154,28 +155,40 @@ applyConfig _ _ = fail "(vty) applyConfig was not provided a complete configurat
parseEvent :: InputM Event
parseEvent = do
c <- use classifier
s <- use classifierState
b <- use unprocessedBytes
case c b of
case c s b of
Valid e remaining -> do
logMsg $ "valid parse: " ++ show e
logMsg $ "remaining: " ++ show remaining
classifierState .= ClassifierStart
unprocessedBytes .= remaining
return e
_ -> mzero
_ -> mzero
dropInvalid :: InputM ()
dropInvalid = do
c <- use classifier
s <- use classifierState
b <- use unprocessedBytes
when (c b == Invalid) $ do
logMsg "dropping input bytes"
unprocessedBytes .= BS8.empty
case c s b of
Chunk -> do
classifierState .=
case s of
ClassifierStart -> ClassifierInChunk b []
ClassifierInChunk p bs -> ClassifierInChunk p (b:bs)
unprocessedBytes .= BS8.empty
Invalid -> do
logMsg "dropping input bytes"
classifierState .= ClassifierStart
unprocessedBytes .= BS8.empty
_ -> return ()
runInputProcessorLoop :: ClassifyMap -> Input -> IO ()
runInputProcessorLoop classifyTable input = do
let bufferSize = 1024
allocaArray bufferSize $ \(bufferPtr :: Ptr Word8) -> do
s0 <- InputState BS8.empty
s0 <- InputState BS8.empty ClassifierStart
<$> readIORef (_configRef input)
<*> pure (InputBuffer bufferPtr bufferSize)
<*> pure (classify classifyTable)