1
1
mirror of https://github.com/wez/wezterm.git synced 2024-12-23 21:32:13 +03:00

improve emoji width calculation

I noticed while scrolling `emoji-test.txt` that some of the combined
emoji sequences rendered very poorly.  This was due to the unicode
width being reported as up to 4 in some cases.

Digging into it, I discovered that the unicode width crate uses a
standard calculation that doesn't take emoji combination sequences
into account (see https://github.com/unicode-rs/unicode-width/issues/4).

This commit takes a dep on the xi-unicode crate as a lightweight way
to gain access to emoji tables and test whether a given grapheme is
part of a combining sequence of emoji.
This commit is contained in:
Wez Furlong 2019-11-05 08:45:08 -08:00
parent a9bdca6d84
commit 1ab438c1e2
6 changed files with 95 additions and 9 deletions

View File

@ -46,8 +46,8 @@ impl GlyphInfo {
info: &harfbuzz::hb_glyph_info_t,
pos: &harfbuzz::hb_glyph_position_t,
) -> GlyphInfo {
use unicode_width::UnicodeWidthStr;
let num_cells = UnicodeWidthStr::width(text) as u8;
use termwiz::cell::unicode_column_width;
let num_cells = unicode_column_width(text) as u8;
GlyphInfo {
#[cfg(debug_assertions)]
text: text.into(),

View File

@ -17,7 +17,6 @@ use termwiz::escape::osc::{ChangeColorPair, ColorOrQuery, ITermFileData, ITermPr
use termwiz::escape::{Action, ControlCode, Esc, EscCode, OneBased, OperatingSystemCommand, CSI};
use termwiz::hyperlink::Rule as HyperlinkRule;
use termwiz::image::{ImageCell, ImageData, TextureCoordinate};
use unicode_width::UnicodeWidthStr;
struct TabStop {
tabs: Vec<bool>,
@ -1988,7 +1987,7 @@ impl<'a> Performer<'a> {
// they occupy a cell so that we can re-emit them when we output them.
// If we didn't do this, then we'd effectively filter them out from
// the model, which seems like a lossy design choice.
let print_width = UnicodeWidthStr::width(g).max(1);
let print_width = unicode_column_width(g).max(1);
if !self.insert && x + print_width >= width {
pen.set_wrapped(true);

View File

@ -318,7 +318,14 @@ fn assert_lines_equal(lines: &[Line], expect_lines: &[Line], compare: Compare) {
if compare.contains(Compare::TEXT) {
let line_str = line.as_str();
let expect_str = expect.as_str();
assert_eq!(line_str, expect_str, "line {} text didn't match", idx,);
assert_eq!(
line_str,
expect_str,
"line {} text didn't match '{}' vs '{}'",
idx,
line_str.escape_default(),
expect_str.escape_default()
);
}
}
@ -543,6 +550,26 @@ fn test_scroll_margins() {
assert_all_contents(&term, &["1", "2", "3", "W", " ", "a"]);
}
#[test]
fn test_emoji_with_modifier() {
let waving_hand = "\u{1f44b}";
let waving_hand_dark_tone = "\u{1f44b}\u{1f3ff}";
let mut term = TestTerm::new(3, 5, 0);
term.print(waving_hand);
term.print("\r\n");
term.print(waving_hand_dark_tone);
assert_all_contents(
&term,
&[
&format!("{} ", waving_hand),
&format!("{} ", waving_hand_dark_tone),
" ",
],
);
}
#[test]
fn test_hyperlinks() {
let mut term = TestTerm::new(3, 5, 0);

View File

@ -34,6 +34,7 @@ smallvec = "0.6"
terminfo = "0.6"
unicode-segmentation = "1.5"
unicode-width = "0.1"
xi-unicode = "0.2"
vtparse = { version="0.1", path="../vtparse" }
[dev-dependencies]

View File

@ -288,7 +288,7 @@ impl Cell {
/// Returns the number of cells visually occupied by this grapheme
pub fn width(&self) -> usize {
UnicodeWidthStr::width(self.str())
grapheme_column_width(self.str())
}
/// Returns the attributes of the cell
@ -301,6 +301,31 @@ impl Cell {
}
}
/// Returns the number of cells visually occupied by a sequence
/// of graphemes
pub fn unicode_column_width(s: &str) -> usize {
use unicode_segmentation::UnicodeSegmentation;
s.graphemes(true).map(grapheme_column_width).sum()
}
/// Returns the number of cells visually occupied by a grapheme.
/// The input string must be a single grapheme.
pub fn grapheme_column_width(s: &str) -> usize {
// Due to this issue:
// https://github.com/unicode-rs/unicode-width/issues/4
// we cannot simply use the unicode-width crate to compute
// the desired value.
// Let's check for emoji-ness for ourselves first
use xi_unicode::EmojiExt;
for c in s.chars() {
if c.is_emoji_modifier_base() || c.is_emoji_modifier() {
// treat modifier sequences as double wide
return 2;
}
}
UnicodeWidthStr::width(s)
}
/// Models a change in the attributes of a cell in a stream of changes.
/// Each variant specifies one of the possible attributes; the corresponding
/// value holds the new value to be used for that attribute.
@ -334,4 +359,38 @@ mod test {
assert_eq!(cell.str(), " ");
}
}
#[test]
fn test_width() {
let foot = "\u{1f9b6}";
eprintln!("foot chars");
for c in foot.chars() {
eprintln!("char: {:?}", c);
use xi_unicode::EmojiExt;
eprintln!("xi emoji: {}", c.is_emoji());
eprintln!("xi emoji_mod: {}", c.is_emoji_modifier());
eprintln!("xi emoji_mod_base: {}", c.is_emoji_modifier_base());
}
assert_eq!(unicode_column_width(foot), 2, "{} should be 2", foot);
let women_holding_hands_dark_skin_tone_medium_light_skin_tone =
"\u{1F469}\u{1F3FF}\u{200D}\u{1F91D}\u{200D}\u{1F469}\u{1F3FC}";
// Ensure that we can hold this longer grapheme sequence in the cell
// and correctly return its string contents!
let cell = Cell::new_grapheme(
women_holding_hands_dark_skin_tone_medium_light_skin_tone,
CellAttributes::default(),
);
assert_eq!(
cell.str(),
women_holding_hands_dark_skin_tone_medium_light_skin_tone
);
assert_eq!(
cell.width(),
2,
"width of {} should be 2",
women_holding_hands_dark_skin_tone_medium_light_skin_tone
);
}
}

View File

@ -36,12 +36,12 @@
//! Alt-b, Alt-Left | Move the cursor backwards one word
//! Alt-f, Alt-Right | Move the cursor forwards one word
use crate::caps::{Capabilities, ProbeHintsBuilder};
use crate::cell::unicode_column_width;
use crate::input::{InputEvent, KeyCode, KeyEvent, Modifiers};
use crate::surface::{Change, Position};
use crate::terminal::{new_terminal, Terminal};
use failure::{err_msg, Fallible};
use unicode_segmentation::GraphemeCursor;
use unicode_width::UnicodeWidthStr;
mod actions;
mod history;
@ -157,7 +157,7 @@ impl<T: Terminal> LineEditor<T> {
let mut prompt_width = 0;
for ele in host.render_prompt(&self.prompt) {
if let OutputElement::Text(ref t) = ele {
prompt_width += UnicodeWidthStr::width(t.as_str());
prompt_width += unicode_column_width(t.as_str());
}
changes.push(ele.into());
}
@ -174,7 +174,7 @@ impl<T: Terminal> LineEditor<T> {
// It might feel more right to count the number of graphemes in
// the string, but this doesn't render correctly for glyphs that
// are double-width. Nothing about unicode is easy :-/
let grapheme_count = UnicodeWidthStr::width(&self.line[0..self.cursor]);
let grapheme_count = unicode_column_width(&self.line[0..self.cursor]);
changes.push(Change::CursorPosition {
x: Position::Absolute(prompt_width + grapheme_count),
y: Position::NoChange,