mirror of
https://github.com/wez/wezterm.git
synced 2024-12-23 13:21:38 +03:00
fix an issue with utf-8 in OSC sequences
I've noticed this off and on for a while, and thought it was something
fishy with my shell dotfiles.
Tracing through I found that the final byte in the "Face with head
bandage" emoji 🤕 U+1F915 was being interpreted as the MW control
code and causing the vt parser to jump out of the OSC state.
The solution for this is to hook up proper UTF-8 processing in the
same way that it is applied in the ground state.
Since we don't have enough bits to introduce new state values (we're
pretty tightly packed in the 16 bits available), I've introduced a
memory of the state to which the utf8 parser needs to return once
a complete sequence is detected.
This commit is contained in:
parent
40386b964c
commit
23b4876d75
@ -34,7 +34,7 @@ smallvec = "0.6"
|
||||
terminfo = "0.6"
|
||||
unicode-segmentation = "1.2"
|
||||
unicode-width = "0.1"
|
||||
vtparse = "0.1"
|
||||
vtparse = { version="0.1", path="../vtparse" }
|
||||
|
||||
[dev-dependencies]
|
||||
varbincode = "0.1"
|
||||
|
@ -741,6 +741,11 @@ mod test {
|
||||
OperatingSystemCommand::SetIconNameAndWindowTitle("hello".into())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse(&["0", "hello \u{1f915}"], "\x1b]0;hello \u{1f915}\x07"),
|
||||
OperatingSystemCommand::SetIconNameAndWindowTitle("hello \u{1f915}".into())
|
||||
);
|
||||
|
||||
// Missing title parameter
|
||||
assert_eq!(
|
||||
parse(&["0"], "\x1b]0\x07"),
|
||||
|
@ -267,6 +267,20 @@ mod test {
|
||||
assert_eq!(encode(&actions), "\x1b]532534523;hello\x07");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_emoji_title_osc() {
|
||||
let input = "\x1b]0;\u{1f915}\x07";
|
||||
let mut p = Parser::new();
|
||||
let actions = p.parse_as_vec(input.as_bytes());
|
||||
assert_eq!(
|
||||
vec![Action::OperatingSystemCommand(Box::new(
|
||||
OperatingSystemCommand::SetIconNameAndWindowTitle("\u{1f915}".to_owned()),
|
||||
))],
|
||||
actions
|
||||
);
|
||||
assert_eq!(encode(&actions), input);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_esc() {
|
||||
let mut p = Parser::new();
|
||||
|
@ -317,7 +317,9 @@ fn build_tables() -> Tables {
|
||||
// This extended range allows for UTF-8 characters
|
||||
// to be embedded in OSC parameters. It is not
|
||||
// part of the base state machine.
|
||||
0x80..=0xff => (OscPut, OscString),
|
||||
0xc2..=0xdf => (Utf8, Utf8Sequence),
|
||||
0xe0..=0xef => (Utf8, Utf8Sequence),
|
||||
0xf0..=0xf4 => (Utf8, Utf8Sequence),
|
||||
},
|
||||
),
|
||||
);
|
||||
|
@ -278,6 +278,38 @@ const MAX_INTERMEDIATES: usize = 2;
|
||||
const MAX_OSC: usize = 16;
|
||||
const MAX_PARAMS: usize = 16;
|
||||
|
||||
struct OscState {
|
||||
buffer: Vec<u8>,
|
||||
param_indices: [usize; MAX_OSC],
|
||||
num_params: usize,
|
||||
full: bool,
|
||||
}
|
||||
|
||||
impl OscState {
|
||||
fn put(&mut self, param: char) {
|
||||
if param == ';' {
|
||||
match self.num_params {
|
||||
MAX_OSC => {
|
||||
self.full = true;
|
||||
return;
|
||||
}
|
||||
num => {
|
||||
self.param_indices[num - 1] = self.buffer.len();
|
||||
self.num_params += 1;
|
||||
}
|
||||
}
|
||||
} else if !self.full {
|
||||
if self.num_params == 0 {
|
||||
self.num_params = 1;
|
||||
}
|
||||
|
||||
let mut buf = [0u8; 8];
|
||||
self.buffer
|
||||
.extend_from_slice(param.encode_utf8(&mut buf).as_bytes());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The virtual terminal parser. It works together with an implementation of `VTActor`.
|
||||
pub struct VTParser {
|
||||
state: State,
|
||||
@ -286,10 +318,7 @@ pub struct VTParser {
|
||||
num_intermediates: usize,
|
||||
ignored_excess_intermediates: bool,
|
||||
|
||||
osc_buffer: Vec<u8>,
|
||||
osc_param_indices: [usize; MAX_OSC],
|
||||
osc_num_params: usize,
|
||||
osc_full: bool,
|
||||
osc: OscState,
|
||||
|
||||
params: [i64; MAX_PARAMS],
|
||||
num_params: usize,
|
||||
@ -297,24 +326,28 @@ pub struct VTParser {
|
||||
params_full: bool,
|
||||
|
||||
utf8_parser: Utf8Parser,
|
||||
utf8_return_state: State,
|
||||
}
|
||||
|
||||
impl VTParser {
|
||||
pub fn new() -> Self {
|
||||
let osc_param_indices = [0usize; MAX_OSC];
|
||||
let param_indices = [0usize; MAX_OSC];
|
||||
let params = [0i64; MAX_PARAMS];
|
||||
|
||||
Self {
|
||||
state: State::Ground,
|
||||
utf8_return_state: State::Ground,
|
||||
|
||||
intermediates: [0, 0],
|
||||
num_intermediates: 0,
|
||||
ignored_excess_intermediates: false,
|
||||
|
||||
osc_buffer: Vec::new(),
|
||||
osc_param_indices,
|
||||
osc_num_params: 0,
|
||||
osc_full: false,
|
||||
osc: OscState {
|
||||
buffer: Vec::new(),
|
||||
param_indices,
|
||||
num_params: 0,
|
||||
full: false,
|
||||
},
|
||||
|
||||
params,
|
||||
num_params: 0,
|
||||
@ -342,8 +375,8 @@ impl VTParser {
|
||||
Action::Clear => {
|
||||
self.num_intermediates = 0;
|
||||
self.ignored_excess_intermediates = false;
|
||||
self.osc_num_params = 0;
|
||||
self.osc_full = false;
|
||||
self.osc.num_params = 0;
|
||||
self.osc.full = false;
|
||||
self.num_params = 0;
|
||||
self.params_full = false;
|
||||
self.current_param.take();
|
||||
@ -406,47 +439,31 @@ impl VTParser {
|
||||
}
|
||||
Action::Unhook => actor.dcs_unhook(),
|
||||
Action::OscStart => {
|
||||
self.osc_buffer.clear();
|
||||
self.osc_num_params = 0;
|
||||
self.osc_full = false;
|
||||
}
|
||||
Action::OscPut => {
|
||||
if param == b';' {
|
||||
match self.osc_num_params {
|
||||
MAX_OSC => {
|
||||
self.osc_full = true;
|
||||
return;
|
||||
}
|
||||
num => {
|
||||
self.osc_param_indices[num - 1] = self.osc_buffer.len();
|
||||
self.osc_num_params += 1;
|
||||
}
|
||||
}
|
||||
} else if !self.osc_full {
|
||||
if self.osc_num_params == 0 {
|
||||
self.osc_num_params = 1;
|
||||
}
|
||||
self.osc_buffer.push(param);
|
||||
}
|
||||
self.osc.buffer.clear();
|
||||
self.osc.num_params = 0;
|
||||
self.osc.full = false;
|
||||
}
|
||||
Action::OscPut => self.osc.put(param as char),
|
||||
|
||||
Action::OscEnd => {
|
||||
if self.osc_num_params == 0 {
|
||||
if self.osc.num_params == 0 {
|
||||
actor.osc_dispatch(&[]);
|
||||
} else {
|
||||
let mut params: [&[u8]; MAX_OSC] = [b""; MAX_OSC];
|
||||
let mut offset = 0usize;
|
||||
let mut slice = self.osc_buffer.as_slice();
|
||||
let limit = self.osc_num_params.min(MAX_OSC);
|
||||
let mut slice = self.osc.buffer.as_slice();
|
||||
let limit = self.osc.num_params.min(MAX_OSC);
|
||||
for i in 0..limit - 1 {
|
||||
let (a, b) = slice.split_at(self.osc_param_indices[i] - offset);
|
||||
let (a, b) = slice.split_at(self.osc.param_indices[i] - offset);
|
||||
params[i] = a;
|
||||
slice = b;
|
||||
offset = self.osc_param_indices[i];
|
||||
offset = self.osc.param_indices[i];
|
||||
}
|
||||
params[limit - 1] = slice;
|
||||
actor.osc_dispatch(¶ms[0..limit]);
|
||||
}
|
||||
}
|
||||
|
||||
Action::Utf8 => self.next_utf8(actor, param),
|
||||
}
|
||||
}
|
||||
@ -463,12 +480,18 @@ impl VTParser {
|
||||
struct Decoder<'a> {
|
||||
state: &'a mut State,
|
||||
actor: &'a mut dyn VTActor,
|
||||
return_state: State,
|
||||
osc: &'a mut OscState,
|
||||
}
|
||||
|
||||
impl<'a> utf8parse::Receiver for Decoder<'a> {
|
||||
fn codepoint(&mut self, c: char) {
|
||||
self.actor.print(c);
|
||||
*self.state = State::Ground;
|
||||
match self.return_state {
|
||||
State::Ground => self.actor.print(c),
|
||||
State::OscString => self.osc.put(c),
|
||||
state => panic!("unreachable state {:?}", state),
|
||||
};
|
||||
*self.state = self.return_state;
|
||||
}
|
||||
|
||||
fn invalid_sequence(&mut self) {
|
||||
@ -479,6 +502,8 @@ impl VTParser {
|
||||
let mut decoder = Decoder {
|
||||
state: &mut self.state,
|
||||
actor,
|
||||
return_state: self.utf8_return_state,
|
||||
osc: &mut self.osc,
|
||||
};
|
||||
self.utf8_parser.advance(&mut decoder, byte);
|
||||
}
|
||||
@ -499,9 +524,12 @@ impl VTParser {
|
||||
let (action, state) = lookup(self.state, byte);
|
||||
|
||||
if state != self.state {
|
||||
self.action(lookup_exit(self.state), 0, actor);
|
||||
if state != State::Utf8Sequence {
|
||||
self.action(lookup_exit(self.state), 0, actor);
|
||||
}
|
||||
self.action(action, byte, actor);
|
||||
self.action(lookup_entry(state), 0, actor);
|
||||
self.utf8_return_state = self.state;
|
||||
self.state = state;
|
||||
} else {
|
||||
self.action(action, byte, actor);
|
||||
|
Loading…
Reference in New Issue
Block a user