introduce unicode_version config

This is a fairly far-reaching commit. The idea is: * Introduce a unicode_version config that specifies the default level of unicode conformance for each newly created Terminal (each Pane) * The unicode_version is passed down to the `grapheme_column_width` function which interprets the width based on the version * `Cell` records the width so that later calculations don't need to know the unicode version In a subsequent diff, I will introduce an escape sequence that allows setting/pushing/popping the unicode version so that it can be overridden via eg: a shell alias prior to launching an application that uses a different version of unicode from the default. This approach allows output from multiple applications with differing understanding of unicode to coexist on the same screen a little more sanely. Note that the default `unicode_version` is set to 9, which means that emoji presentation selectors are now by-default ignored. This was selected to better match the level of support in widely deployed applications. I expect to raise that default version in the future. Also worth noting: there are a number of callers of `unicode_column_width` in things like overlays and lua helper functions that pass `None` for the unicode version: these will assume the latest known-to-wezterm/termwiz version of unicode to be desired. If those overlays do things with emoji presentation selectors, then there may be some alignment artifacts. That can be tackled in a follow up commit. refs: #1231 refs: #997
2024-12-22 21:01:36 +03:00 · 2021-11-25 08:53:07 -07:00 · 2021-11-25 08:53:07 -07:00 · 225e7a1243
commit 225e7a1243
parent 591e1f593c
31 changed files with 260 additions and 97 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -4023,7 +4023,7 @@ checksum = "13a4ec180a2de59b57434704ccfad967f789b12737738798fa08798cd5824c16"

 [[package]]
 name = "termwiz"
-version = "0.13.0"
+version = "0.14.0"
 dependencies = [
 "anyhow",
 "base64",
--- a/config/src/lib.rs
+++ b/config/src/lib.rs
@ -1249,9 +1249,17 @@ pub struct Config {

    #[serde(default = "default_canonicalize_pasted_newlines")]
    pub canonicalize_pasted_newlines: bool,
+
+    #[serde(default = "default_unicode_version")]
+    pub unicode_version: u8,
 }
 impl_lua_conversion!(Config);

+// Coupled with term/src/config.rs:TerminalConfiguration::unicode_version
+fn default_unicode_version() -> u8 {
+    9
+}
+
 fn default_canonicalize_pasted_newlines() -> bool {
    cfg!(windows)
 }
--- a/config/src/lua.rs
+++ b/config/src/lua.rs
@ -164,13 +164,13 @@ pub fn make_lua_context(config_file: &Path) -> anyhow::Result<Lua> {

        wezterm_mod.set(
            "column_width",
-            lua.create_function(|_, s: String| Ok(unicode_column_width(&s)))?,
+            lua.create_function(|_, s: String| Ok(unicode_column_width(&s, None)))?,
        )?;

        wezterm_mod.set(
            "pad_right",
            lua.create_function(|_, (mut result, width): (String, usize)| {
-                let mut len = unicode_column_width(&result);
+                let mut len = unicode_column_width(&result, None);
                while len < width {
                    result.push(' ');
                    len += 1;
@ -183,7 +183,7 @@ pub fn make_lua_context(config_file: &Path) -> anyhow::Result<Lua> {
        wezterm_mod.set(
            "pad_left",
            lua.create_function(|_, (mut result, width): (String, usize)| {
-                let mut len = unicode_column_width(&result);
+                let mut len = unicode_column_width(&result, None);
                while len < width {
                    result.insert(0, ' ');
                    len += 1;
@ -199,7 +199,7 @@ pub fn make_lua_context(config_file: &Path) -> anyhow::Result<Lua> {
                let mut result = String::new();
                let mut len = 0;
                for g in s.graphemes(true) {
-                    let g_len = grapheme_column_width(g);
+                    let g_len = grapheme_column_width(g, None);
                    if g_len + len > max_width {
                        break;
                    }
@ -217,7 +217,7 @@ pub fn make_lua_context(config_file: &Path) -> anyhow::Result<Lua> {
                let mut result = vec![];
                let mut len = 0;
                for g in s.graphemes(true).rev() {
-                    let g_len = grapheme_column_width(g);
+                    let g_len = grapheme_column_width(g, None);
                    if g_len + len > max_width {
                        break;
                    }
--- a/config/src/terminal.rs
+++ b/config/src/terminal.rs
@ -74,4 +74,8 @@ impl wezterm_term::TerminalConfiguration for TermConfig {
    fn canonicalize_pasted_newlines(&self) -> bool {
        self.configuration().canonicalize_pasted_newlines
    }
+
+    fn unicode_version(&self) -> u8 {
+        self.configuration().unicode_version
+    }
 }
--- a/mux/src/connui.rs
+++ b/mux/src/connui.rs
@ -25,12 +25,15 @@ impl LineEditorHost for PasswordPromptHost {
    // characters when output to the terminal widget
    fn highlight_line(&self, line: &str, cursor_position: usize) -> (Vec<OutputElement>, usize) {
        let placeholder = "🔑";
-        let grapheme_count = unicode_column_width(line);
+        let grapheme_count = unicode_column_width(line, None);
        let mut output = vec![];
        for _ in 0..grapheme_count {
            output.push(OutputElement::Text(placeholder.to_string()));
        }
-        (output, unicode_column_width(placeholder) * cursor_position)
+        (
+            output,
+            unicode_column_width(placeholder, None) * cursor_position,
+        )
    }
 }

--- a/mux/src/ssh.rs
+++ b/mux/src/ssh.rs
@ -43,12 +43,15 @@ impl LineEditorHost for PasswordPromptHost {
            // Rewrite the input so that we can obscure the password
            // characters when output to the terminal widget
            let placeholder = "🔑";
-            let grapheme_count = unicode_column_width(line);
+            let grapheme_count = unicode_column_width(line, None);
            let mut output = vec![];
            for _ in 0..grapheme_count {
                output.push(OutputElement::Text(placeholder.to_string()));
            }
-            (output, unicode_column_width(placeholder) * cursor_position)
+            (
+                output,
+                unicode_column_width(placeholder, None) * cursor_position,
+            )
        }
    }
 }
--- a/tabout/Cargo.toml
+++ b/tabout/Cargo.toml
@ -9,4 +9,4 @@ license = "MIT"
 documentation = "https://docs.rs/tabout"

 [dependencies]
-termwiz = { path = "../termwiz", version="0.13"}
+termwiz = { path = "../termwiz", version="0.14"}
--- a/tabout/src/lib.rs
+++ b/tabout/src/lib.rs
@ -28,7 +28,7 @@ fn emit_column<W: std::io::Write>(
    alignment: Alignment,
    output: &mut W,
 ) -> Result<(), std::io::Error> {
-    let text_width = unicode_column_width(text);
+    let text_width = unicode_column_width(text, None);
    let (left_pad, right_pad) = match alignment {
        Alignment::Left => (0, max_width - text_width),
        Alignment::Center => {
@ -66,14 +66,14 @@ pub fn tabulate_output<S: std::string::ToString, W: std::io::Write>(
 ) -> Result<(), std::io::Error> {
    let mut col_widths: Vec<usize> = columns
        .iter()
-        .map(|c| unicode_column_width(&c.name))
+        .map(|c| unicode_column_width(&c.name, None))
        .collect();

    let mut display_rows: Vec<Vec<String>> = vec![];
    for src_row in rows {
        let dest_row: Vec<String> = src_row.iter().map(|col| col.to_string()).collect();
        for (idx, col) in dest_row.iter().enumerate() {
-            let col_width = unicode_column_width(col);
+            let col_width = unicode_column_width(col, None);
            if let Some(width) = col_widths.get_mut(idx) {
                *width = (*width).max(col_width);
            } else {
@ -116,7 +116,7 @@ pub fn unicode_column_width_of_change_slice(s: &[Change]) -> usize {
    s.iter()
        .map(|c| {
            if c.is_text() {
-                unicode_column_width(c.text())
+                unicode_column_width(c.text(), None)
            } else {
                0
            }
@ -170,7 +170,7 @@ pub fn tabulate_for_terminal(
 ) {
    let mut col_widths: Vec<usize> = columns
        .iter()
-        .map(|c| unicode_column_width(&c.name))
+        .map(|c| unicode_column_width(&c.name, None))
        .collect();

    for row in rows {
--- a/term/Cargo.toml
+++ b/term/Cargo.toml
@ -37,6 +37,6 @@ pretty_env_logger = "0.4"
 k9 = "0.11.0"

 [dependencies.termwiz]
-version = "0.13"
+version = "0.14"
 path = "../termwiz"
 features = ["use_image"]
--- a/term/src/config.rs
+++ b/term/src/config.rs
@ -84,4 +84,15 @@ pub trait TerminalConfiguration: std::fmt::Debug {
    fn enable_kitty_graphics(&self) -> bool {
        false
    }
+
+    /// The default unicode version to assume.
+    /// This affects how the width of certain sequences is interpreted.
+    /// At the time of writing, we default to 9 even though the current
+    /// version of unicode is 14.  14 introduced emoji presentation selectors
+    /// that also alter the width of certain sequences, and that is too
+    /// new for most deployed applications.
+    // Coupled with config/src/lib.rs:default_unicode_version
+    fn unicode_version(&self) -> u8 {
+        9
+    }
 }
--- a/term/src/terminalstate/mod.rs
+++ b/term/src/terminalstate/mod.rs
@ -9,6 +9,7 @@ use std::collections::HashMap;
 use std::sync::mpsc::{channel, Sender};
 use std::sync::Arc;
 use terminfo::{Database, Value};
+use termwiz::cell::UnicodeVersion;
 use termwiz::escape::csi::{
    Cursor, CursorStyle, DecPrivateMode, DecPrivateModeCode, Device, Edit, EraseInDisplay,
    EraseInLine, Mode, Sgr, TabulationClear, TerminalMode, TerminalModeCode, Window, XtSmGraphics,
@ -341,6 +342,9 @@ pub struct TerminalState {

    kitty_img: KittyImageState,
    seqno: SequenceNo,
+
+    /// The unicode version that is in effect
+    unicode_version: UnicodeVersion,
 }

 fn default_color_map() -> HashMap<u16, RgbColor> {
@ -414,6 +418,8 @@ impl TerminalState {

        let color_map = default_color_map();

+        let unicode_version = UnicodeVersion(config.unicode_version());
+
        TerminalState {
            config,
            screen,
@ -467,6 +473,7 @@ impl TerminalState {
            user_vars: HashMap::new(),
            kitty_img: Default::default(),
            seqno: 0,
+            unicode_version,
        }
    }

--- a/term/src/terminalstate/performer.rs
+++ b/term/src/terminalstate/performer.rs
@ -134,7 +134,7 @@ impl<'a> Performer<'a> {
            // they occupy a cell so that we can re-emit them when we output them.
            // If we didn't do this, then we'd effectively filter them out from
            // the model, which seems like a lossy design choice.
-            let print_width = grapheme_column_width(g).max(1);
+            let print_width = grapheme_column_width(g, Some(self.unicode_version)).max(1);
            let wrappable = x + print_width >= width;

            let cell = Cell::new_grapheme_with_width(g, print_width, pen);
--- a/termwiz/Cargo.toml
+++ b/termwiz/Cargo.toml
@ -1,7 +1,7 @@
 [package]
 authors = ["Wez Furlong"]
 name = "termwiz"
-version = "0.13.0"
+version = "0.14.0"
 edition = "2018"
 repository = "https://github.com/wez/wezterm"
 description = "Terminal Wizardry for Unix and Windows"
--- a/termwiz/src/cell.rs
+++ b/termwiz/src/cell.rs
@ -517,6 +517,11 @@ where
 /// has length 2, otherwise, it has length 1 (we don't allow zero-length
 /// strings).
 struct TeenyString(usize);
+struct TeenyStringHeap {
+    bytes: Vec<u8>,
+    width: usize,
+}
+
 impl TeenyString {
    const fn marker_mask() -> usize {
        if cfg!(target_endian = "little") {
@ -591,9 +596,9 @@ impl TeenyString {

        let bytes = s.as_bytes();
        let len = bytes.len();
+        let width = width.unwrap_or_else(|| grapheme_column_width(s, None));

        if len < std::mem::size_of::<usize>() {
-            let width = width.unwrap_or_else(|| grapheme_column_width(s));
            debug_assert!(width < 3);

            let mut word = 0usize;
@ -607,7 +612,10 @@ impl TeenyString {
            let word = Self::set_marker_bit(word, width);
            Self(word)
        } else {
-            let vec = Box::new(bytes.to_vec());
+            let vec = Box::new(TeenyStringHeap {
+                bytes: bytes.to_vec(),
+                width,
+            });
            let ptr = Box::into_raw(vec);
            Self(ptr as usize)
        }
@ -654,7 +662,8 @@ impl TeenyString {
                1
            }
        } else {
-            grapheme_column_width(self.str())
+            let heap = self.0 as *const usize as *const TeenyStringHeap;
+            unsafe { (*heap).width }
        }
    }

@ -676,8 +685,8 @@ impl TeenyString {

            &bytes[0..len]
        } else {
-            let vec = self.0 as *const usize as *const Vec<u8>;
-            unsafe { (*vec).as_slice() }
+            let heap = self.0 as *const usize as *const TeenyStringHeap;
+            unsafe { (*heap).bytes.as_slice() }
        }
    }
 }
@ -819,26 +828,77 @@ impl Cell {
    }
 }

+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub struct UnicodeVersion(pub u8);
+
+pub const LATEST_UNICODE_VERSION: UnicodeVersion = UnicodeVersion(14);
+
 /// Returns the number of cells visually occupied by a sequence
-/// of graphemes
-pub fn unicode_column_width(s: &str) -> usize {
+/// of graphemes.
+/// Calls through to `grapheme_column_width` for each grapheme
+/// and sums up the length.
+pub fn unicode_column_width(s: &str, version: Option<UnicodeVersion>) -> usize {
    use unicode_segmentation::UnicodeSegmentation;
-    s.graphemes(true).map(grapheme_column_width).sum()
+    s.graphemes(true)
+        .map(|g| grapheme_column_width(g, version))
+        .sum()
 }

 /// Returns the number of cells visually occupied by a grapheme.
 /// The input string must be a single grapheme.
-pub fn grapheme_column_width(s: &str) -> usize {
-    let width = s
+///
+/// There are some frustrating dragons in the realm of terminal cell widths:
+///
+/// a) wcwidth and wcswidth are widely used by applications and may be
+///    several versions of unicode behind the current version
+/// b) The width of characters has and will change in the future.
+///    Unicode Version 8 -> 9 made some characters wider.
+///    Unicode 14 defines Emoji variation selectors that change the
+///    width depending on trailing context in the unicode sequence.
+///
+/// Differing opinions about the width leads to visual artifacts in
+/// text and and line editors, especially with respect to cursor placement.
+///
+/// There aren't any really great solutions to this problem, as a given
+/// terminal emulator may be fine locally but essentially breaks when
+/// ssh'ing into a remote system with a divergent wcwidth implementation.
+///
+/// This means that a global understanding of the unicode version that
+/// is in use isn't a good solution.
+///
+/// The approach that wezterm wants to take here is to define a
+/// configuration value that sets the starting level of unicode conformance,
+/// and to define an escape sequence that can push/pop a desired confirmance
+/// level onto a stack maintained by the terminal emulator.
+///
+/// The terminal emulator can then pass the unicode version through to
+/// the Cell that is used to hold a grapheme, and that per-Cell version
+/// can then be used to calculate width.
+pub fn grapheme_column_width(s: &str, version: Option<UnicodeVersion>) -> usize {
+    let version = version.unwrap_or(LATEST_UNICODE_VERSION).0;
+
+    let width: usize = s
        .chars()
-        .map(|c| WcWidth::from_char(c).width_unicode_9_or_later())
-        .max()
-        .unwrap_or(0);
-    match Presentation::for_grapheme(s) {
-        (_, Some(Presentation::Emoji)) => 2,
-        (_, Some(Presentation::Text)) => 1,
-        (Presentation::Emoji, None) => 2,
-        (Presentation::Text, None) => width.into(),
+        .map(|c| {
+            let c = WcWidth::from_char(c);
+            if version >= 9 {
+                c.width_unicode_9_or_later()
+            } else {
+                c.width_unicode_8_or_earlier()
+            }
+        })
+        .sum::<u8>()
+        .into();
+
+    if version >= 14 {
+        match Presentation::for_grapheme(s) {
+            (_, Some(Presentation::Emoji)) => 2,
+            (_, Some(Presentation::Text)) => 1,
+            (Presentation::Emoji, None) => 2,
+            (Presentation::Text, None) => width,
+        }
+    } else {
+        width
    }
 }

@ -911,7 +971,7 @@ mod test {
        for c in foot.chars() {
            eprintln!("char: {:?}", c);
        }
-        assert_eq!(unicode_column_width(foot), 2, "{} should be 2", foot);
+        assert_eq!(unicode_column_width(foot, None), 2, "{} should be 2", foot);

        let women_holding_hands_dark_skin_tone_medium_light_skin_tone =
            "\u{1F469}\u{1F3FF}\u{200D}\u{1F91D}\u{200D}\u{1F469}\u{1F3FC}";
@ -938,18 +998,31 @@ mod test {
        for c in deaf_man.chars() {
            eprintln!("char: {:?}", c);
        }
-        assert_eq!(unicode_column_width(deaf_man), 2);
+        assert_eq!(unicode_column_width(deaf_man, None), 2);
+
+        let man_dancing = "\u{1F57A}";
+        assert_eq!(
+            unicode_column_width(man_dancing, Some(UnicodeVersion(9))),
+            2
+        );
+        assert_eq!(
+            unicode_column_width(man_dancing, Some(UnicodeVersion(8))),
+            1
+        );

        // This is a codepoint in the private use area
        let font_awesome_star = "\u{f005}";
        eprintln!("font_awesome_star {}", font_awesome_star.escape_debug());
-        assert_eq!(unicode_column_width(font_awesome_star), 1);
+        assert_eq!(unicode_column_width(font_awesome_star, None), 1);
+
+        let england_flag = "\u{1f3f4}\u{e0067}\u{e0062}\u{e0065}\u{e006e}\u{e0067}\u{e007f}";
+        assert_eq!(unicode_column_width(england_flag, None), 2);
    }

    #[test]
    fn issue_1161() {
        let x_ideographic_space_x = "x\u{3000}x";
-        assert_eq!(unicode_column_width(x_ideographic_space_x), 4);
+        assert_eq!(unicode_column_width(x_ideographic_space_x, None), 4);
        assert_eq!(
            x_ideographic_space_x.graphemes(true).collect::<Vec<_>>(),
            vec!["x".to_string(), "\u{3000}".to_string(), "x".to_string()],
@ -964,8 +1037,11 @@ mod test {
        let victory_hand = "\u{270c}";
        let victory_hand_text_presentation = "\u{270c}\u{fe0e}";

-        assert_eq!(unicode_column_width(victory_hand_text_presentation), 1);
-        assert_eq!(unicode_column_width(victory_hand), 1);
+        assert_eq!(
+            unicode_column_width(victory_hand_text_presentation, None),
+            1
+        );
+        assert_eq!(unicode_column_width(victory_hand, None), 1);

        assert_eq!(
            victory_hand_text_presentation
@ -985,7 +1061,11 @@ mod test {
                .collect::<Vec<_>>(),
            vec![copyright_emoji_presentation.to_string()]
        );
-        assert_eq!(unicode_column_width(copyright_emoji_presentation), 2);
+        assert_eq!(unicode_column_width(copyright_emoji_presentation, None), 2);
+        assert_eq!(
+            unicode_column_width(copyright_emoji_presentation, Some(UnicodeVersion(9))),
+            1
+        );

        let copyright_text_presentation = "\u{00A9}";
        assert_eq!(
@ -994,7 +1074,7 @@ mod test {
                .collect::<Vec<_>>(),
            vec![copyright_text_presentation.to_string()]
        );
-        assert_eq!(unicode_column_width(copyright_text_presentation), 1);
+        assert_eq!(unicode_column_width(copyright_text_presentation, None), 1);

        let raised_fist = "\u{270a}";
        let raised_fist_text = "\u{270a}\u{fe0e}";
@ -1002,12 +1082,12 @@ mod test {
            Presentation::for_grapheme(raised_fist),
            (Presentation::Emoji, None)
        );
-        assert_eq!(unicode_column_width(raised_fist), 2);
+        assert_eq!(unicode_column_width(raised_fist, None), 2);
        assert_eq!(
            Presentation::for_grapheme(raised_fist_text),
            (Presentation::Emoji, Some(Presentation::Text))
        );
-        assert_eq!(unicode_column_width(raised_fist_text), 1);
+        assert_eq!(unicode_column_width(raised_fist_text, None), 1);

        assert_eq!(
            raised_fist_text.graphemes(true).collect::<Vec<_>>(),
--- a/termwiz/src/cellcluster.rs
+++ b/termwiz/src/cellcluster.rs
@ -11,8 +11,10 @@ use std::borrow::Cow;
 pub struct CellCluster {
    pub attrs: CellAttributes,
    pub text: String,
+    pub width: usize,
    pub presentation: Presentation,
    byte_to_cell_idx: Vec<usize>,
+    byte_to_cell_width: Vec<u8>,
    pub first_cell_idx: usize,
 }

@ -27,6 +29,14 @@ impl CellCluster {
        }
    }

+    pub fn byte_to_cell_width(&self, byte_idx: usize) -> u8 {
+        if self.byte_to_cell_width.is_empty() {
+            1
+        } else {
+            self.byte_to_cell_width[byte_idx]
+        }
+    }
+
    /// Compute the list of CellClusters from a set of visible cells.
    /// The input is typically the result of calling `Line::visible_cells()`.
    pub fn make_cluster<'a>(
@ -60,6 +70,7 @@ impl CellCluster {
                        normalized_attr.into_owned(),
                        cell_str,
                        cell_idx,
+                        c.width(),
                    ))
                }
                Some(mut last) => {
@ -75,6 +86,7 @@ impl CellCluster {
                            normalized_attr.into_owned(),
                            cell_str,
                            cell_idx,
+                            c.width(),
                        ))
                    } else {
                        // Add to current cluster.
@ -102,9 +114,10 @@ impl CellCluster {
                                normalized_attr.into_owned(),
                                cell_str,
                                cell_idx,
+                                c.width(),
                            ))
                        } else {
-                            last.add(cell_str, cell_idx);
+                            last.add(cell_str, cell_idx, c.width());
                            Some(last)
                        }
                    }
@ -127,6 +140,7 @@ impl CellCluster {
        attrs: CellAttributes,
        text: &str,
        cell_idx: usize,
+        width: usize,
    ) -> CellCluster {
        let mut idx = Vec::new();
        if text.len() > 1 {
@ -137,20 +151,30 @@ impl CellCluster {
                idx.push(cell_idx);
            }
        }
+
+        let mut byte_to_cell_width = Vec::new();
+        if width > 1 {
+            for _ in 0..text.len() {
+                byte_to_cell_width.push(width as u8);
+            }
+        }
        let mut storage = String::with_capacity(hint);
        storage.push_str(text);

        CellCluster {
            attrs,
+            width,
            text: storage,
            presentation,
            byte_to_cell_idx: idx,
+            byte_to_cell_width,
            first_cell_idx: cell_idx,
        }
    }

    /// Add to this cluster
-    fn add(&mut self, text: &str, cell_idx: usize) {
+    fn add(&mut self, text: &str, cell_idx: usize, width: usize) {
+        self.width += width;
        if !self.byte_to_cell_idx.is_empty() {
            // We had at least one multi-byte cell in the past
            for _ in 0..text.len() {
@ -166,6 +190,22 @@ impl CellCluster {
                self.byte_to_cell_idx.push(cell_idx);
            }
        }
+
+        if !self.byte_to_cell_width.is_empty() {
+            // We had at least one double-wide cell in the past
+            for _ in 0..text.len() {
+                self.byte_to_cell_width.push(width as u8);
+            }
+        } else if width > 1 {
+            // Extrapolate the widths so far; they must all be single width
+            for _ in 0..self.text.len() {
+                self.byte_to_cell_width.push(1);
+            }
+            // and add the current double width cell
+            for _ in 0..text.len() {
+                self.byte_to_cell_width.push(width as u8);
+            }
+        }
        self.text.push_str(text);
    }
 }
--- a/termwiz/src/lineedit/host.rs
+++ b/termwiz/src/lineedit/host.rs
@ -67,7 +67,7 @@ pub trait LineEditorHost {
    /// as well as textual output.
    /// The default implementation returns the line as-is with no coloring.
    fn highlight_line(&self, line: &str, cursor_position: usize) -> (Vec<OutputElement>, usize) {
-        let cursor_x_pos = crate::cell::unicode_column_width(&line[0..cursor_position]);
+        let cursor_x_pos = crate::cell::unicode_column_width(&line[0..cursor_position], None);
        (vec![OutputElement::Text(line.to_owned())], cursor_x_pos)
    }

--- a/termwiz/src/surface/change.rs
+++ b/termwiz/src/surface/change.rs
@ -203,7 +203,7 @@ impl ChangeSequence {
                        self.cursor_y += 1;
                        self.cursor_x = 0;
                    } else {
-                        let len = unicode_column_width(g);
+                        let len = unicode_column_width(g, None);
                        self.cursor_x += len;
                    }
                }
--- a/test-data/emoji-presentation.sh
+++ b/test-data/emoji-presentation.sh
@ -7,3 +7,8 @@ echo -e "\u270c\ufe0f  Victory hand, explicit emoji presentation"
 echo -e "\u270a  Raised fist, emoji presentation by default"
 echo -e "\u270a\ufe0e  Raised fist, explicit text presentation"
 echo -e "\u270a\ufe0f  Raised fist, explicit emoji presentation"
+
+echo -e "\u2716  Multiply, text presentation by default"
+echo -e "\u2716\ufe0e  Multiply, explicit text presentation"
+echo -e "\u2716\ufe0f  Multiply, explicit emoji presentation"
+
--- a/wezterm-client/src/pane/renderable.rs
+++ b/wezterm-client/src/pane/renderable.rs
@ -659,7 +659,7 @@ impl RenderableState {
                    let col = inner
                        .dimensions
                        .cols
-                        .saturating_sub(wezterm_term::unicode_column_width(&status));
+                        .saturating_sub(wezterm_term::unicode_column_width(&status, None));

                    let mut attr = CellAttributes::default();
                    attr.set_foreground(AnsiColor::White);
--- a/wezterm-font/src/shaper/harfbuzz.rs
+++ b/wezterm-font/src/shaper/harfbuzz.rs
@ -9,7 +9,7 @@ use log::error;
 use ordered_float::NotNan;
 use std::cell::{RefCell, RefMut};
 use std::collections::HashMap;
-use termwiz::cell::{unicode_column_width, Presentation};
+use termwiz::cell::Presentation;
 use thiserror::Error;
 use unicode_segmentation::UnicodeSegmentation;

@ -25,13 +25,11 @@ struct Info {
 }

 fn make_glyphinfo(text: &str, font_idx: usize, info: &Info) -> GlyphInfo {
-    let num_cells = unicode_column_width(text) as u8;
    let is_space = text == " ";
    GlyphInfo {
        #[cfg(debug_assertions)]
        text: text.into(),
        is_space,
-        num_cells,
        font_idx,
        glyph_pos: info.codepoint,
        cluster: info.cluster as u32,
@ -585,7 +583,6 @@ mod test {
                        is_space: false,
                        font_idx: 0,
                        glyph_pos: 180,
-                        num_cells: 1,
                        #[cfg(debug_assertions)]
                        text: "a".into(),
                        x_advance: PixelLength::new(6.),
@ -598,7 +595,6 @@ mod test {
                        is_space: false,
                        font_idx: 0,
                        glyph_pos: 205,
-                        num_cells: 1,
                        #[cfg(debug_assertions)]
                        text: "b".into(),
                        x_advance: PixelLength::new(6.),
@ -611,7 +607,6 @@ mod test {
                        is_space: false,
                        font_idx: 0,
                        glyph_pos: 206,
-                        num_cells: 1,
                        #[cfg(debug_assertions)]
                        text: "c".into(),
                        x_advance: PixelLength::new(6.),
@ -633,7 +628,6 @@ mod test {
                    is_space: false,
                    font_idx: 0,
                    glyph_pos: 726,
-                    num_cells: 1,
                    #[cfg(debug_assertions)]
                    text: "<".into(),
                    x_advance: PixelLength::new(6.),
@ -657,7 +651,6 @@ mod test {
                        is_space: false,
                        font_idx: 0,
                        glyph_pos: 1212,
-                        num_cells: 1,
                        #[cfg(debug_assertions)]
                        text: "<".into(),
                        x_advance: PixelLength::new(6.),
@ -670,7 +663,6 @@ mod test {
                        is_space: false,
                        font_idx: 0,
                        glyph_pos: 1065,
-                        num_cells: 1,
                        #[cfg(debug_assertions)]
                        text: "-".into(),
                        x_advance: PixelLength::new(6.),
@ -693,7 +685,6 @@ mod test {
                        is_space: false,
                        font_idx: 0,
                        glyph_pos: 726,
-                        num_cells: 1,
                        #[cfg(debug_assertions)]
                        text: "<".into(),
                        x_advance: PixelLength::new(6.),
@ -706,7 +697,6 @@ mod test {
                        is_space: false,
                        font_idx: 0,
                        glyph_pos: 1212,
-                        num_cells: 1,
                        #[cfg(debug_assertions)]
                        text: "-".into(),
                        x_advance: PixelLength::new(6.),
@ -719,7 +709,6 @@ mod test {
                        is_space: false,
                        font_idx: 0,
                        glyph_pos: 623,
-                        num_cells: 1,
                        #[cfg(debug_assertions)]
                        text: "-".into(),
                        x_advance: PixelLength::new(6.),
@ -743,7 +732,6 @@ mod test {
                        is_space: false,
                        font_idx: 0,
                        glyph_pos: 350,
-                        num_cells: 1,
                        #[cfg(debug_assertions)]
                        text: "x".into(),
                        x_advance: PixelLength::new(6.),
@ -756,7 +744,6 @@ mod test {
                        text: " ".into(),
                        is_space: true,
                        cluster: 1,
-                        num_cells: 1,
                        font_idx: 0,
                        glyph_pos: 686,
                        x_advance: PixelLength::new(6.),
@ -769,7 +756,6 @@ mod test {
                        is_space: false,
                        font_idx: 0,
                        glyph_pos: 350,
-                        num_cells: 1,
                        #[cfg(debug_assertions)]
                        text: "x".into(),
                        x_advance: PixelLength::new(6.),
@ -795,7 +781,6 @@ mod test {
                        is_space: false,
                        font_idx: 0,
                        glyph_pos: 350,
-                        num_cells: 1,
                        #[cfg(debug_assertions)]
                        text: "x".into(),
                        x_advance: PixelLength::new(6.),
@ -808,7 +793,6 @@ mod test {
                        text: "\u{3000}".into(),
                        is_space: false,
                        cluster: 1,
-                        num_cells: 2,
                        font_idx: 0,
                        glyph_pos: 686,
                        x_advance: PixelLength::new(10.),
@ -821,7 +805,6 @@ mod test {
                        is_space: false,
                        font_idx: 0,
                        glyph_pos: 350,
-                        num_cells: 1,
                        #[cfg(debug_assertions)]
                        text: "x".into(),
                        x_advance: PixelLength::new(6.),
--- a/wezterm-font/src/shaper/mod.rs
+++ b/wezterm-font/src/shaper/mod.rs
@ -13,8 +13,6 @@ pub struct GlyphInfo {
    pub is_space: bool,
    /// Offset within text
    pub cluster: u32,
-    /// How many cells/columns this glyph occupies horizontally
-    pub num_cells: u8,
    /// Which font alternative to use; index into Font.fonts
    pub font_idx: FallbackIdx,
    /// Which freetype glyph to load
--- a/wezterm-gui/src/glyphcache.rs
+++ b/wezterm-gui/src/glyphcache.rs
@ -56,6 +56,7 @@ pub struct SizedBlockKey {
 pub struct GlyphKey {
    pub font_idx: usize,
    pub glyph_pos: u32,
+    pub num_cells: u8,
    pub style: TextStyle,
    pub followed_by_space: bool,
    pub metric: CellMetricKey,
@ -72,6 +73,7 @@ pub struct GlyphKey {
 pub struct BorrowedGlyphKey<'a> {
    pub font_idx: usize,
    pub glyph_pos: u32,
+    pub num_cells: u8,
    pub style: &'a TextStyle,
    pub followed_by_space: bool,
    pub metric: CellMetricKey,
@ -84,6 +86,7 @@ impl<'a> BorrowedGlyphKey<'a> {
        GlyphKey {
            font_idx: self.font_idx,
            glyph_pos: self.glyph_pos,
+            num_cells: self.num_cells,
            style: self.style.clone(),
            followed_by_space: self.followed_by_space,
            metric: self.metric,
@ -101,6 +104,7 @@ impl GlyphKeyTrait for GlyphKey {
        BorrowedGlyphKey {
            font_idx: self.font_idx,
            glyph_pos: self.glyph_pos,
+            num_cells: self.num_cells,
            style: &self.style,
            followed_by_space: self.followed_by_space,
            metric: self.metric,
@ -345,10 +349,12 @@ impl<T: Texture2d> GlyphCache<T> {
        followed_by_space: bool,
        font: &Rc<LoadedFont>,
        metrics: &RenderMetrics,
+        num_cells: u8,
    ) -> anyhow::Result<Rc<CachedGlyph<T>>> {
        let key = BorrowedGlyphKey {
            font_idx: info.font_idx,
            glyph_pos: info.glyph_pos,
+            num_cells: num_cells,
            style,
            followed_by_space,
            metric: metrics.into(),
@ -361,7 +367,7 @@ impl<T: Texture2d> GlyphCache<T> {
        }
        metrics::histogram!("glyph_cache.glyph_cache.miss.rate", 1.);

-        let glyph = match self.load_glyph(info, font, followed_by_space) {
+        let glyph = match self.load_glyph(info, font, followed_by_space, num_cells) {
            Ok(g) => g,
            Err(err) => {
                if err
@ -407,6 +413,7 @@ impl<T: Texture2d> GlyphCache<T> {
        info: &GlyphInfo,
        font: &Rc<LoadedFont>,
        followed_by_space: bool,
+        num_cells: u8,
    ) -> anyhow::Result<Rc<CachedGlyph<T>>> {
        let base_metrics;
        let idx_metrics;
@ -442,7 +449,7 @@ impl<T: Texture2d> GlyphCache<T> {
        // can happen somehow; see <https://github.com/wez/wezterm/issues/1042>
        // so let's treat 0 cells as 1 cell so that we don't try to divide by
        // zero below.
-        let num_cells = info.num_cells.max(1) as f64;
+        let num_cells = num_cells.max(1) as f64;

        // Maximum width allowed for this glyph based on its unicode width and
        // the dimensions of a cell
--- a/wezterm-gui/src/markdown.rs
+++ b/wezterm-gui/src/markdown.rs
@ -54,7 +54,7 @@ impl RenderState {

    fn wrap_text(&mut self, text: &str) {
        for word in text.split_word_bounds() {
-            let len = unicode_column_width(word);
+            let len = unicode_column_width(word, None);
            if self.x_pos + len < self.wrap_width {
                if !(self.x_pos == 0 && is_whitespace_word(word)) {
                    self.changes.push(word.into());
@ -110,7 +110,7 @@ impl RenderState {
                } else {
                    "  * ".to_owned()
                };
-                let indent_width = unicode_column_width(&list_item_prefix);
+                let indent_width = unicode_column_width(&list_item_prefix, None);
                self.current_indent.replace(indent_width);
                self.changes.push(list_item_prefix.into());
                self.x_pos += indent_width;
--- a/wezterm-gui/src/overlay/copy.rs
+++ b/wezterm-gui/src/overlay/copy.rs
@ -296,7 +296,7 @@ impl CopyRenderable {
            let mut last_was_whitespace = false;

            for (idx, word) in s.split_word_bounds().rev().enumerate() {
-                let width = unicode_column_width(word);
+                let width = unicode_column_width(word, None);

                if is_whitespace_word(word) {
                    self.cursor.x = self.cursor.x.saturating_sub(width);
@ -335,13 +335,13 @@ impl CopyRenderable {
            let mut words = s.split_word_bounds();

            if let Some(word) = words.next() {
-                self.cursor.x += unicode_column_width(word);
+                self.cursor.x += unicode_column_width(word, None);
                if !is_whitespace_word(word) {
                    // We were part-way through a word, so look
                    // at the next word
                    if let Some(word) = words.next() {
                        if is_whitespace_word(word) {
-                            self.cursor.x += unicode_column_width(word);
+                            self.cursor.x += unicode_column_width(word, None);
                            // If we advance off the RHS, move to the start of the word on the
                            // next line, if any!
                            if self.cursor.x >= width {
--- a/wezterm-gui/src/overlay/quickselect.rs
+++ b/wezterm-gui/src/overlay/quickselect.rs
@ -400,7 +400,7 @@ impl Pane for QuickSelectOverlay {
        // move to the search box
        let renderer = self.renderer.borrow();
        StableCursorPosition {
-            x: 8 + wezterm_term::unicode_column_width(&renderer.selection),
+            x: 8 + wezterm_term::unicode_column_width(&renderer.selection, None),
            y: renderer.compute_search_row(),
            shape: termwiz::surface::CursorShape::SteadyBlock,
            visibility: termwiz::surface::CursorVisibility::Visible,
--- a/wezterm-gui/src/overlay/search.rs
+++ b/wezterm-gui/src/overlay/search.rs
@ -271,7 +271,7 @@ impl Pane for SearchOverlay {
        // move to the search box
        let renderer = self.renderer.borrow();
        StableCursorPosition {
-            x: 8 + wezterm_term::unicode_column_width(&renderer.pattern),
+            x: 8 + wezterm_term::unicode_column_width(&renderer.pattern, None),
            y: renderer.compute_search_row(),
            shape: termwiz::surface::CursorShape::SteadyBlock,
            visibility: termwiz::surface::CursorVisibility::Visible,
--- a/wezterm-gui/src/shapecache.rs
+++ b/wezterm-gui/src/shapecache.rs
@ -45,7 +45,7 @@ where
    /// This function's goal is to handle those two cases.
    pub fn process(
        render_metrics: &RenderMetrics,
-        _cluster: &CellCluster,
+        cluster: &CellCluster,
        infos: &[GlyphInfo],
        glyphs: &[Rc<CachedGlyph<T>>],
    ) -> Vec<ShapedInfo<T>> {
@ -57,6 +57,7 @@ where
        let simple_mode = !config::configuration().experimental_shape_post_processing;

        for (info, glyph) in infos.iter().zip(glyphs.iter()) {
+            let info_num_cells = cluster.byte_to_cell_width(info.cluster as usize);
            if simple_mode {
                pos.push(Some(ShapedInfo {
                    pos: GlyphPosition {
@ -65,7 +66,7 @@ where
                            .texture
                            .as_ref()
                            .map_or(0, |t| t.coords.width() as u32),
-                        num_cells: info.num_cells,
+                        num_cells: info_num_cells,
                        x_offset: info.x_offset,
                        bearing_x: glyph.bearing_x.get() as f32,
                    },
@ -131,16 +132,16 @@ where
                    .texture
                    .as_ref()
                    .map_or(0, |t| t.coords.width() as u32);
-                let num_cells = if info.num_cells == 1
+                let num_cells = if info_num_cells == 1
                    // Only adjust the cell count if this glyph is wide enough
                    && glyph_width > (1.5 * render_metrics.cell_size.width as f64)
                {
                    (glyph_width / render_metrics.cell_size.width as f64).ceil() as u8
                } else {
-                    info.num_cells
+                    info_num_cells
                };
-                let bearing_x = if num_cells > info.num_cells && glyph.bearing_x.get() < 0. {
-                    ((num_cells - info.num_cells) as f64 * render_metrics.cell_size.width as f64)
+                let bearing_x = if num_cells > info_num_cells && glyph.bearing_x.get() < 0. {
+                    ((num_cells - info_num_cells) as f64 * render_metrics.cell_size.width as f64)
                        + glyph.bearing_x.get()
                } else {
                    glyph.bearing_x.get()
@ -163,7 +164,7 @@ where
                            .texture
                            .as_ref()
                            .map_or(0, |t| t.coords.width() as u32),
-                        num_cells: info.num_cells,
+                        num_cells: info_num_cells,
                        x_offset: info.x_offset,
                        bearing_x: glyph.bearing_x.get() as f32,
                    },
@ -281,6 +282,7 @@ mod test {
            .iter()
            .map(|info| {
                let cell_idx = cluster.byte_to_cell_idx(info.cluster as usize);
+                let num_cells = cluster.byte_to_cell_width(info.cluster as usize);

                let followed_by_space = match line.cells().get(cell_idx + 1) {
                    Some(cell) => cell.str() == " ",
@ -288,7 +290,14 @@ mod test {
                };

                glyph_cache
-                    .cached_glyph(info, &style, followed_by_space, font, render_metrics)
+                    .cached_glyph(
+                        info,
+                        &style,
+                        followed_by_space,
+                        font,
+                        render_metrics,
+                        num_cells,
+                    )
                    .unwrap()
            })
            .collect::<Vec<_>>();
--- a/wezterm-gui/src/tabbar.rs
+++ b/wezterm-gui/src/tabbar.rs
@ -81,7 +81,7 @@ fn call_format_tab_title(
                _ => {
                    let s = String::from_lua(v, &*lua)?;
                    Ok(Some(TitleText {
-                        len: unicode_column_width(&s),
+                        len: unicode_column_width(&s, None),
                        items: vec![FormatItem::Text(s)],
                    }))
                }
@ -133,7 +133,7 @@ fn compute_tab_title(
                // this if there are too many tabs to fit the window at
                // this width.
                if !config.use_fancy_tab_bar {
-                    while unicode_column_width(&title) < 5 {
+                    while unicode_column_width(&title, None) < 5 {
                        title.push(' ');
                    }
                }
@ -143,7 +143,7 @@ fn compute_tab_title(
            };

            TitleText {
-                len: unicode_column_width(&title),
+                len: unicode_column_width(&title, None),
                items: vec![FormatItem::Text(title)],
            }
        }
--- a/wezterm-gui/src/termwindow/render.rs
+++ b/wezterm-gui/src/termwindow/render.rs
@ -30,7 +30,7 @@ use smol::Timer;
 use std::ops::Range;
 use std::rc::Rc;
 use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
-use termwiz::cell::{unicode_column_width, Blink};
+use termwiz::cell::Blink;
 use termwiz::cellcluster::CellCluster;
 use termwiz::surface::{CursorShape, CursorVisibility};
 use wezterm_font::units::{IntPixelLength, PixelLength};
@ -1762,7 +1762,7 @@ impl super::TermWindow {
        for item in shaped {
            let cluster = &item.cluster;
            let attrs = &cluster.attrs;
-            let cluster_width = unicode_column_width(&cluster.text);
+            let cluster_width = cluster.width;

            let bg_is_default = attrs.background() == ColorAttribute::Default;
            let bg_color = params.palette.resolve_bg(attrs.background());
@ -2586,6 +2586,7 @@ impl super::TermWindow {
        let mut glyphs = Vec::with_capacity(infos.len());
        for info in infos {
            let cell_idx = cluster.byte_to_cell_idx(info.cluster as usize);
+            let num_cells = cluster.byte_to_cell_width(info.cluster as usize);

            if self.config.custom_block_glyphs {
                if let Some(cell) = line.cells().get(cell_idx) {
@ -2620,6 +2621,7 @@ impl super::TermWindow {
                followed_by_space,
                font,
                metrics,
+                num_cells,
            )?);
        }
        Ok(glyphs)
--- a/wezterm-ssh/Cargo.toml
+++ b/wezterm-ssh/Cargo.toml
@ -45,5 +45,5 @@ rstest = "0.11"
 shell-words = "1.0"
 smol-potat = "1.1.2"
 structopt = "0.3"
-termwiz = { version = "0.13", path = "../termwiz" }
+termwiz = { version = "0.14", path = "../termwiz" }
 whoami = "1.1"
--- a/wezterm-ssh/examples/ssh.rs
+++ b/wezterm-ssh/examples/ssh.rs
@ -26,12 +26,15 @@ impl LineEditorHost for PasswordPromptHost {
            // Rewrite the input so that we can obscure the password
            // characters when output to the terminal widget
            let placeholder = "🔑";
-            let grapheme_count = unicode_column_width(line);
+            let grapheme_count = unicode_column_width(line, None);
            let mut output = vec![];
            for _ in 0..grapheme_count {
                output.push(OutputElement::Text(placeholder.to_string()));
            }
-            (output, unicode_column_width(placeholder) * cursor_position)
+            (
+                output,
+                unicode_column_width(placeholder, None) * cursor_position,
+            )
        }
    }
 }