mirror of
https://github.com/wez/wezterm.git
synced 2024-12-23 21:32:13 +03:00
improve emoji width calculation
I noticed while scrolling `emoji-test.txt` that some of the combined emoji sequences rendered very poorly. This was due to the unicode width being reported as up to 4 in some cases. Digging into it, I discovered that the unicode width crate uses a standard calculation that doesn't take emoji combination sequences into account (see https://github.com/unicode-rs/unicode-width/issues/4). This commit takes a dep on the xi-unicode crate as a lightweight way to gain access to emoji tables and test whether a given grapheme is part of a combining sequence of emoji.
This commit is contained in:
parent
a9bdca6d84
commit
1ab438c1e2
@ -46,8 +46,8 @@ impl GlyphInfo {
|
||||
info: &harfbuzz::hb_glyph_info_t,
|
||||
pos: &harfbuzz::hb_glyph_position_t,
|
||||
) -> GlyphInfo {
|
||||
use unicode_width::UnicodeWidthStr;
|
||||
let num_cells = UnicodeWidthStr::width(text) as u8;
|
||||
use termwiz::cell::unicode_column_width;
|
||||
let num_cells = unicode_column_width(text) as u8;
|
||||
GlyphInfo {
|
||||
#[cfg(debug_assertions)]
|
||||
text: text.into(),
|
||||
|
@ -17,7 +17,6 @@ use termwiz::escape::osc::{ChangeColorPair, ColorOrQuery, ITermFileData, ITermPr
|
||||
use termwiz::escape::{Action, ControlCode, Esc, EscCode, OneBased, OperatingSystemCommand, CSI};
|
||||
use termwiz::hyperlink::Rule as HyperlinkRule;
|
||||
use termwiz::image::{ImageCell, ImageData, TextureCoordinate};
|
||||
use unicode_width::UnicodeWidthStr;
|
||||
|
||||
struct TabStop {
|
||||
tabs: Vec<bool>,
|
||||
@ -1988,7 +1987,7 @@ impl<'a> Performer<'a> {
|
||||
// they occupy a cell so that we can re-emit them when we output them.
|
||||
// If we didn't do this, then we'd effectively filter them out from
|
||||
// the model, which seems like a lossy design choice.
|
||||
let print_width = UnicodeWidthStr::width(g).max(1);
|
||||
let print_width = unicode_column_width(g).max(1);
|
||||
|
||||
if !self.insert && x + print_width >= width {
|
||||
pen.set_wrapped(true);
|
||||
|
@ -318,7 +318,14 @@ fn assert_lines_equal(lines: &[Line], expect_lines: &[Line], compare: Compare) {
|
||||
if compare.contains(Compare::TEXT) {
|
||||
let line_str = line.as_str();
|
||||
let expect_str = expect.as_str();
|
||||
assert_eq!(line_str, expect_str, "line {} text didn't match", idx,);
|
||||
assert_eq!(
|
||||
line_str,
|
||||
expect_str,
|
||||
"line {} text didn't match '{}' vs '{}'",
|
||||
idx,
|
||||
line_str.escape_default(),
|
||||
expect_str.escape_default()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@ -543,6 +550,26 @@ fn test_scroll_margins() {
|
||||
assert_all_contents(&term, &["1", "2", "3", "W", " ", "a"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_emoji_with_modifier() {
|
||||
let waving_hand = "\u{1f44b}";
|
||||
let waving_hand_dark_tone = "\u{1f44b}\u{1f3ff}";
|
||||
|
||||
let mut term = TestTerm::new(3, 5, 0);
|
||||
term.print(waving_hand);
|
||||
term.print("\r\n");
|
||||
term.print(waving_hand_dark_tone);
|
||||
|
||||
assert_all_contents(
|
||||
&term,
|
||||
&[
|
||||
&format!("{} ", waving_hand),
|
||||
&format!("{} ", waving_hand_dark_tone),
|
||||
" ",
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hyperlinks() {
|
||||
let mut term = TestTerm::new(3, 5, 0);
|
||||
|
@ -34,6 +34,7 @@ smallvec = "0.6"
|
||||
terminfo = "0.6"
|
||||
unicode-segmentation = "1.5"
|
||||
unicode-width = "0.1"
|
||||
xi-unicode = "0.2"
|
||||
vtparse = { version="0.1", path="../vtparse" }
|
||||
|
||||
[dev-dependencies]
|
||||
|
@ -288,7 +288,7 @@ impl Cell {
|
||||
|
||||
/// Returns the number of cells visually occupied by this grapheme
|
||||
pub fn width(&self) -> usize {
|
||||
UnicodeWidthStr::width(self.str())
|
||||
grapheme_column_width(self.str())
|
||||
}
|
||||
|
||||
/// Returns the attributes of the cell
|
||||
@ -301,6 +301,31 @@ impl Cell {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of cells visually occupied by a sequence
|
||||
/// of graphemes
|
||||
pub fn unicode_column_width(s: &str) -> usize {
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
s.graphemes(true).map(grapheme_column_width).sum()
|
||||
}
|
||||
|
||||
/// Returns the number of cells visually occupied by a grapheme.
|
||||
/// The input string must be a single grapheme.
|
||||
pub fn grapheme_column_width(s: &str) -> usize {
|
||||
// Due to this issue:
|
||||
// https://github.com/unicode-rs/unicode-width/issues/4
|
||||
// we cannot simply use the unicode-width crate to compute
|
||||
// the desired value.
|
||||
// Let's check for emoji-ness for ourselves first
|
||||
use xi_unicode::EmojiExt;
|
||||
for c in s.chars() {
|
||||
if c.is_emoji_modifier_base() || c.is_emoji_modifier() {
|
||||
// treat modifier sequences as double wide
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
UnicodeWidthStr::width(s)
|
||||
}
|
||||
|
||||
/// Models a change in the attributes of a cell in a stream of changes.
|
||||
/// Each variant specifies one of the possible attributes; the corresponding
|
||||
/// value holds the new value to be used for that attribute.
|
||||
@ -334,4 +359,38 @@ mod test {
|
||||
assert_eq!(cell.str(), " ");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_width() {
|
||||
let foot = "\u{1f9b6}";
|
||||
eprintln!("foot chars");
|
||||
for c in foot.chars() {
|
||||
eprintln!("char: {:?}", c);
|
||||
use xi_unicode::EmojiExt;
|
||||
eprintln!("xi emoji: {}", c.is_emoji());
|
||||
eprintln!("xi emoji_mod: {}", c.is_emoji_modifier());
|
||||
eprintln!("xi emoji_mod_base: {}", c.is_emoji_modifier_base());
|
||||
}
|
||||
assert_eq!(unicode_column_width(foot), 2, "{} should be 2", foot);
|
||||
|
||||
let women_holding_hands_dark_skin_tone_medium_light_skin_tone =
|
||||
"\u{1F469}\u{1F3FF}\u{200D}\u{1F91D}\u{200D}\u{1F469}\u{1F3FC}";
|
||||
|
||||
// Ensure that we can hold this longer grapheme sequence in the cell
|
||||
// and correctly return its string contents!
|
||||
let cell = Cell::new_grapheme(
|
||||
women_holding_hands_dark_skin_tone_medium_light_skin_tone,
|
||||
CellAttributes::default(),
|
||||
);
|
||||
assert_eq!(
|
||||
cell.str(),
|
||||
women_holding_hands_dark_skin_tone_medium_light_skin_tone
|
||||
);
|
||||
assert_eq!(
|
||||
cell.width(),
|
||||
2,
|
||||
"width of {} should be 2",
|
||||
women_holding_hands_dark_skin_tone_medium_light_skin_tone
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -36,12 +36,12 @@
|
||||
//! Alt-b, Alt-Left | Move the cursor backwards one word
|
||||
//! Alt-f, Alt-Right | Move the cursor forwards one word
|
||||
use crate::caps::{Capabilities, ProbeHintsBuilder};
|
||||
use crate::cell::unicode_column_width;
|
||||
use crate::input::{InputEvent, KeyCode, KeyEvent, Modifiers};
|
||||
use crate::surface::{Change, Position};
|
||||
use crate::terminal::{new_terminal, Terminal};
|
||||
use failure::{err_msg, Fallible};
|
||||
use unicode_segmentation::GraphemeCursor;
|
||||
use unicode_width::UnicodeWidthStr;
|
||||
|
||||
mod actions;
|
||||
mod history;
|
||||
@ -157,7 +157,7 @@ impl<T: Terminal> LineEditor<T> {
|
||||
let mut prompt_width = 0;
|
||||
for ele in host.render_prompt(&self.prompt) {
|
||||
if let OutputElement::Text(ref t) = ele {
|
||||
prompt_width += UnicodeWidthStr::width(t.as_str());
|
||||
prompt_width += unicode_column_width(t.as_str());
|
||||
}
|
||||
changes.push(ele.into());
|
||||
}
|
||||
@ -174,7 +174,7 @@ impl<T: Terminal> LineEditor<T> {
|
||||
// It might feel more right to count the number of graphemes in
|
||||
// the string, but this doesn't render correctly for glyphs that
|
||||
// are double-width. Nothing about unicode is easy :-/
|
||||
let grapheme_count = UnicodeWidthStr::width(&self.line[0..self.cursor]);
|
||||
let grapheme_count = unicode_column_width(&self.line[0..self.cursor]);
|
||||
changes.push(Change::CursorPosition {
|
||||
x: Position::Absolute(prompt_width + grapheme_count),
|
||||
y: Position::NoChange,
|
||||
|
Loading…
Reference in New Issue
Block a user