ladybird/Userland/Libraries/LibVT/StateMachine.txt
Daniel Bertalan 282d0ebbec LibVT: Fix UTF-8 handling in OSC sequences
Previously, we would ignore bytes in the `0x80..0xff` range when parsing
OSC strings. This caused terminal titles and hyperlinks containing
non-ASCII characters to fail. Also added is extending the UTF-8 fail
functionality for C1 control codes, since we do not handle those.

Fixes #7377
2021-05-22 23:23:08 +02:00

217 lines
5.1 KiB
Plaintext

// This file is used for automatically generating the ANSI escape sequence state machine
//
// The description of the state machine is taken from https://vt100.net/emu/dec_ansi_parser
// with added support for UTF-8 parsing
@name EscapeSequenceStateMachine
@namespace VT
@begin Ground
@anywhere {
0x18 => (Ground, Execute)
0x1a => (Ground, Execute)
0x1b => (Escape, _)
}
Ground {
[0x00..0x17] => (_, Execute)
0x19 => (_, Execute)
[0x1c..0x1f] => (_, Execute)
[0x20..0x7f] => (_, Print)
[0x80..0xc1] => (Ground, FailUTF8)
[0xc2..0xdf] => (UTF81ByteNeeded, BeginUTF8)
[0xe0..0xef] => (UTF82BytesNeeded, BeginUTF8)
[0xf0..0xf4] => (UTF83BytesNeeded, BeginUTF8)
[0xf5..0xff] => (Ground, FailUTF8)
}
UTF81ByteNeeded {
[0x00..0x7f] => (Ground, FailUTF8)
[0x80..0xbf] => (Ground, PrintUTF8)
[0xc0..0xff] => (Ground, FailUTF8)
}
UTF82BytesNeeded {
[0x00..0x7f] => (Ground, FailUTF8)
[0x80..0xbf] => (UTF81ByteNeeded, AdvanceUTF8)
[0xc0..0xff] => (Ground, FailUTF8)
}
UTF83BytesNeeded {
[0x00..0x7f] => (Ground, FailUTF8)
[0x80..0xbf] => (UTF82BytesNeeded, AdvanceUTF8)
[0xc0..0xff] => (Ground, FailUTF8)
}
Escape {
@entry Clear
[0x00..0x17] => (_, Execute)
0x19 => (_, Execute)
[0x1c..0x1f] => (_, Execute)
[0x20..0x2f] => (EscapeIntermediate, Collect)
[0x30..0x4f] => (Ground, EscDispatch)
0x50 => (DcsEntry, _)
[0x51..0x57] => (Ground, EscDispatch)
0x58 => (SosPmApcString, _)
0x59 => (Ground, EscDispatch)
0x5a => (Ground, EscDispatch)
0x5b => (CsiEntry, _)
0x5c => (Ground, EscDispatch)
0x5d => (OscString, _)
0x5e => (SosPmApcString, _)
0x5f => (SosPmApcString, _)
[0x60..0x7e] => (Ground, EscDispatch)
0x7f => (_, _)
}
EscapeIntermediate {
[0x00..0x17] => (_, Execute)
0x19 => (_, Execute)
[0x1c..0x1f] => (_, Execute)
[0x20..0x2f] => (_, Collect)
[0x30..0x7e] => (Ground, EscDispatch)
0x7f => (_, _)
}
CsiEntry {
@entry Clear
[0x00..0x17] => (_, Execute)
0x19 => (_, Execute)
[0x1c..0x1f] => (_, Execute)
[0x20..0x2f] => (CsiIntermediate, Execute)
[0x30..0x39] => (CsiParam, Param)
0x3a => (CsiIgnore, _)
0x3b => (CsiParam, Param)
[0x3c..0x3f] => (CsiParam, Collect)
[0x40..0x7e] => (Ground, CsiDispatch)
0x7f => (_, _)
}
CsiIgnore {
[0x00..0x17] => (_, Execute)
0x19 => (_, Execute)
[0x1c..0x1f] => (_, Execute)
[0x20..0x3f] => (_, _)
[0x40..0x7e] => (Ground, _)
0x7f => (_, _)
}
CsiParam {
[0x00..0x17] => (_, Execute)
0x19 => (_, Execute)
[0x1c..0x1f] => (_, Execute)
[0x20..0x2f] => (CsiIntermediate, Collect)
[0x30..0x39] => (_, Param)
0x3a => (CsiIgnore, _)
0x3b => (_, Param)
[0x3c..0x3f] => (CsiIgnore, _)
[0x40..0x7e] => (Ground, CsiDispatch)
0x7f => (_, _)
}
CsiIntermediate {
[0x00..0x17] => (_, Execute)
0x19 => (_, Execute)
[0x1c..0x1f] => (_, Execute)
[0x20..0x2f] => (_, Collect)
[0x30..0x3f] => (CsiIgnore, _)
[0x40..0x7e] => (Ground, CsiDispatch)
0x7f => (_, _)
}
DcsEntry {
@entry Clear
[0x00..0x17] => (_, _)
0x19 => (_, _)
[0x1c..0x1f] => (_, _)
[0x20..0x2f] => (DcsIntermediate, Collect)
[0x30..0x39] => (DcsParam, Param)
0x3a => (DcsIgnore, _)
0x3b => (DcsParam, Param)
[0x3c..0x3f] => (DcsParam, Collect)
[0x40..0x7e] => (DcsPassthrough, _)
0x7f => (_, _)
}
DcsIntermediate {
[0x00..0x17] => (_, _)
0x19 => (_, _)
[0x1c..0x1f] => (_, _)
[0x20..0x2f] => (_, Collect)
[0x30..0x3f] => (DcsIgnore, _)
[0x40..0x7e] => (DcsPassthrough, _)
0x7f => (_, _)
}
DcsIgnore {
[0x00..0x17] => (_, _)
0x19 => (_, _)
[0x1c..0x1f] => (_, _)
[0x20..0x7f] => (_, _)
0x9c => (Ground, _)
}
DcsParam {
[0x00..0x17] => (_, _)
0x19 => (_, _)
[0x1c..0x1f] => (_, _)
[0x20..0x2f] => (DcsIntermediate, Collect)
[0x30..0x39] => (_, Param)
0x3a => (DcsIgnore, _)
0x3b => (_, Param)
[0x3c..0x3f] => (DcsIgnore, _)
[0x40..0x7e] => (DcsPassthrough, _)
0x7f => (_, _)
}
DcsPassthrough {
@entry Hook
[0x00..0x17] => (_, Put)
0x19 => (_, Put)
[0x1c..0x1f] => (_, Put)
[0x20..0x7e] => (_, Put)
0x7f => (_, _)
0x9c => (Ground, _)
@exit Unhook
}
SosPmApcString {
[0x00..0x17] => (_, _)
0x19 => (_, _)
[0x1c..0x1f] => (_, _)
[0x20..0x7f] => (_, _)
0x9c => (Ground, _)
}
OscString {
@entry OscStart
[0x00..0x06] => (_, _)
// While the standard says that only ST can terminate the string,
// xterm uses BEL (0x07)
0x07 => (Ground, _)
[0x08..0x17] => (_, _)
0x19 => (_, _)
[0x1c..0x1f] => (_, _)
[0x20..0xff] => (_, OscPut)
@exit OscEnd
}