diff --git a/Cargo.lock b/Cargo.lock index 2f9ca065c..d64d2114f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1392,6 +1392,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "finl_unicode" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69152938fc3cf544c50065ed78d321d0c5bf3433344f227eb2323bcf78370d34" + [[package]] name = "fixedbitset" version = "0.4.2" @@ -2628,6 +2634,7 @@ dependencies = [ "crossbeam", "downcast-rs", "filedescriptor", + "finl_unicode", "flume", "hostname", "k9", @@ -2654,7 +2661,6 @@ dependencies = [ "termwiz-funcs", "textwrap 0.15.0", "thiserror", - "unicode-segmentation", "url", "wezterm-dynamic", "wezterm-ssh", @@ -4650,6 +4656,7 @@ dependencies = [ "criterion", "env_logger", "filedescriptor", + "finl_unicode", "fixedbitset", "fnv", "hex", @@ -4691,11 +4698,11 @@ version = "0.1.0" dependencies = [ "anyhow", "config", + "finl_unicode", "lazy_static", "luahelper", "terminfo", "termwiz", - "unicode-segmentation", "wezterm-dynamic", ] @@ -5488,6 +5495,7 @@ dependencies = [ "enum-display-derive", "env_logger", "euclid", + "finl_unicode", "fontconfig", "freetype", "harfbuzz", @@ -5500,7 +5508,6 @@ dependencies = [ "rangeset", "termwiz", "thiserror", - "unicode-segmentation", "walkdir", "wezterm-bidi", "wezterm-color-types", @@ -5533,6 +5540,7 @@ dependencies = [ "euclid", "fastrand", "filedescriptor", + "finl_unicode", "fnv", "frecency", "futures", @@ -5701,6 +5709,7 @@ dependencies = [ "bitflags", "csscolorparser", "env_logger", + "finl_unicode", "hex", "image", "k9", @@ -5714,7 +5723,6 @@ dependencies = [ "terminfo", "termwiz", "unicode-normalization", - "unicode-segmentation", "url", "wezterm-bidi", "wezterm-dynamic", diff --git a/lua-api-crates/termwiz-funcs/Cargo.toml b/lua-api-crates/termwiz-funcs/Cargo.toml index f98c9aca9..9177f55f2 100644 --- a/lua-api-crates/termwiz-funcs/Cargo.toml +++ b/lua-api-crates/termwiz-funcs/Cargo.toml @@ -8,9 +8,9 @@ edition = "2021" [dependencies] anyhow = "1.0" config = { path = "../../config" } +finl_unicode = "1.0.2" terminfo = "0.7" wezterm-dynamic = { path = "../../wezterm-dynamic" } luahelper = { path = "../../luahelper" } termwiz = { path = "../../termwiz", features=["use_serde"] } -unicode-segmentation = "1.8" lazy_static = "1.4" diff --git a/lua-api-crates/termwiz-funcs/src/lib.rs b/lua-api-crates/termwiz-funcs/src/lib.rs index 7b96b494f..46df11237 100644 --- a/lua-api-crates/termwiz-funcs/src/lib.rs +++ b/lua-api-crates/termwiz-funcs/src/lib.rs @@ -1,5 +1,6 @@ use config::lua::get_or_create_module; use config::lua::mlua::{self, Lua, ToLua}; +use finl_unicode::grapheme_clusters::Graphemes; use luahelper::impl_lua_conversion_dynamic; use std::str::FromStr; use termwiz::caps::{Capabilities, ColorLevel, ProbeHints}; @@ -8,7 +9,6 @@ use termwiz::color::{AnsiColor, ColorAttribute, ColorSpec, SrgbaTuple}; use termwiz::input::Modifiers; use termwiz::render::terminfo::TerminfoRenderer; use termwiz::surface::change::Change; -use unicode_segmentation::UnicodeSegmentation; use wezterm_dynamic::{FromDynamic, ToDynamic}; pub fn register(lua: &Lua) -> anyhow::Result<()> { @@ -170,7 +170,8 @@ pub fn pad_left(mut result: String, width: usize) -> String { pub fn truncate_left(s: &str, max_width: usize) -> String { let mut result = vec![]; let mut len = 0; - for g in s.graphemes(true).rev() { + let graphemes: Vec<_> = Graphemes::new(s).collect(); + for &g in graphemes.iter().rev() { let g_len = grapheme_column_width(g, None); if g_len + len > max_width { break; @@ -186,7 +187,7 @@ pub fn truncate_left(s: &str, max_width: usize) -> String { pub fn truncate_right(s: &str, max_width: usize) -> String { let mut result = String::new(); let mut len = 0; - for g in s.graphemes(true) { + for g in Graphemes::new(s) { let g_len = grapheme_column_width(g, None); if g_len + len > max_width { break; diff --git a/mux/Cargo.toml b/mux/Cargo.toml index 5440ab8c6..df6ae7601 100644 --- a/mux/Cargo.toml +++ b/mux/Cargo.toml @@ -17,6 +17,7 @@ config = { path = "../config" } crossbeam = "0.8" downcast-rs = "1.0" filedescriptor = { version="0.8", path = "../filedescriptor" } +finl_unicode = "1.0.2" hostname = "0.3" lazy_static = "1.4" libc = "0.2" @@ -40,7 +41,6 @@ termwiz = { path = "../termwiz" } termwiz-funcs = { path = "../lua-api-crates/termwiz-funcs" } textwrap = "0.15" thiserror = "1.0" -unicode-segmentation = "1.8" url = "2" wezterm-ssh = { path = "../wezterm-ssh" } wezterm-dynamic = { path = "../wezterm-dynamic" } diff --git a/mux/src/connui.rs b/mux/src/connui.rs index f5b52cf16..7a0a651ea 100644 --- a/mux/src/connui.rs +++ b/mux/src/connui.rs @@ -1,6 +1,7 @@ use crate::termwiztermtab; use anyhow::{anyhow, bail, Context as _}; use crossbeam::channel::{unbounded, Receiver, Sender}; +use finl_unicode::grapheme_clusters::Graphemes; use promise::spawn::block_on; use promise::Promise; use std::sync::Mutex; @@ -9,7 +10,6 @@ use termwiz::cell::{unicode_column_width, CellAttributes}; use termwiz::lineedit::*; use termwiz::surface::{Change, Position}; use termwiz::terminal::*; -use unicode_segmentation::UnicodeSegmentation; use wezterm_term::TerminalSize; #[derive(Default)] @@ -146,7 +146,7 @@ impl ConnectionUIImpl { let mut reversed_string = String::new(); let mut default_string = String::new(); let mut col = 0; - for grapheme in message.graphemes(true) { + for grapheme in Graphemes::new(&message) { // Once we've passed the elapsed column, full up the string // that we'll render with default attributes instead. if col > prog_width { diff --git a/term/Cargo.toml b/term/Cargo.toml index 7ebb1c4b9..a4a7cb62e 100644 --- a/term/Cargo.toml +++ b/term/Cargo.toml @@ -18,6 +18,7 @@ anyhow = "1.0" bitflags = "1.3" csscolorparser = "0.6" miniz_oxide = "0.4" +finl_unicode = "1.0.2" hex = "0.4" image = "0.24" lazy_static = "1.4" @@ -28,7 +29,6 @@ ordered-float = "3.0" serde = {version="1.0", features = ["rc"]} terminfo = "0.7" unicode-normalization = "0.1.21" -unicode-segmentation = "1.8" url = "2" wezterm-bidi = { path = "../bidi" } wezterm-dynamic = { path = "../wezterm-dynamic" } @@ -36,7 +36,6 @@ wezterm-dynamic = { path = "../wezterm-dynamic" } [dev-dependencies] env_logger = "0.9" k9 = "0.11.0" -unicode-normalization = "0.1" [dependencies.termwiz] version = "0.17" diff --git a/term/src/terminalstate/performer.rs b/term/src/terminalstate/performer.rs index ec19bf8ea..d37bc211f 100644 --- a/term/src/terminalstate/performer.rs +++ b/term/src/terminalstate/performer.rs @@ -3,6 +3,7 @@ use crate::terminalstate::{ default_color_map, CharSet, MouseEncoding, TabStop, UnicodeVersionStackEntry, }; use crate::{ClipboardSelection, Position, TerminalState, VisibleRowIndex, DCS, ST}; +use finl_unicode::grapheme_clusters::Graphemes; use log::{debug, error}; use num_traits::FromPrimitive; use ordered_float::NotNan; @@ -126,7 +127,7 @@ impl<'a> Performer<'a> { p.as_str() }; - for g in unicode_segmentation::UnicodeSegmentation::graphemes(text, true) { + for g in Graphemes::new(text) { let g = self.remap_grapheme(g); let print_width = grapheme_column_width(g, Some(self.unicode_version)); diff --git a/term/src/test/mod.rs b/term/src/test/mod.rs index 8384a462e..56b441440 100644 --- a/term/src/test/mod.rs +++ b/term/src/test/mod.rs @@ -1177,8 +1177,8 @@ fn test_1573() { let recomposed: String = sequence.nfc().collect(); assert_eq!(recomposed, "\u{d55c}"); - use unicode_segmentation::UnicodeSegmentation; - let graphemes: Vec<_> = sequence.graphemes(true).collect(); + use finl_unicode::grapheme_clusters::Graphemes; + let graphemes: Vec<_> = Graphemes::new(sequence).collect(); assert_eq!(graphemes, vec![sequence]); } diff --git a/termwiz/Cargo.toml b/termwiz/Cargo.toml index 8c427bb82..9c6deb88c 100644 --- a/termwiz/Cargo.toml +++ b/termwiz/Cargo.toml @@ -18,6 +18,7 @@ cassowary = {version="0.3", optional=true} cfg-if = "1.0" anyhow = "1.0" filedescriptor = { version="0.8", path = "../filedescriptor" } +finl_unicode = "1.0.2" fixedbitset = "0.4" fnv = {version="1.0", optional=true} hex = "0.4" diff --git a/termwiz/src/cell.rs b/termwiz/src/cell.rs index e69da2bd5..3aa657b41 100644 --- a/termwiz/src/cell.rs +++ b/termwiz/src/cell.rs @@ -5,6 +5,7 @@ use crate::emoji_variation::WCWIDTH_TABLE; pub use crate::escape::osc::Hyperlink; use crate::image::ImageCell; use crate::widechar_width::WcWidth; +use finl_unicode::grapheme_clusters::Graphemes; #[cfg(feature = "use_serde")] use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::hash::{Hash, Hasher}; @@ -922,8 +923,7 @@ pub const LATEST_UNICODE_VERSION: UnicodeVersion = UnicodeVersion { /// Calls through to `grapheme_column_width` for each grapheme /// and sums up the length. pub fn unicode_column_width(s: &str, version: Option) -> usize { - use unicode_segmentation::UnicodeSegmentation; - s.graphemes(true) + Graphemes::new(s) .map(|g| grapheme_column_width(g, version)) .sum() } @@ -1023,7 +1023,6 @@ pub enum AttributeChange { #[cfg(test)] mod test { use super::*; - use unicode_segmentation::UnicodeSegmentation; #[test] fn teeny_string() { @@ -1135,7 +1134,7 @@ mod test { let x_ideographic_space_x = "x\u{3000}x"; assert_eq!(unicode_column_width(x_ideographic_space_x, None), 4); assert_eq!( - x_ideographic_space_x.graphemes(true).collect::>(), + Graphemes::new(x_ideographic_space_x).collect::>(), vec!["x".to_string(), "\u{3000}".to_string(), "x".to_string()], ); @@ -1155,21 +1154,17 @@ mod test { assert_eq!(unicode_column_width(victory_hand, None), 1); assert_eq!( - victory_hand_text_presentation - .graphemes(true) - .collect::>(), + Graphemes::new(victory_hand_text_presentation).collect::>(), vec![victory_hand_text_presentation.to_string()] ); assert_eq!( - victory_hand.graphemes(true).collect::>(), + Graphemes::new(victory_hand).collect::>(), vec![victory_hand.to_string()] ); let copyright_emoji_presentation = "\u{00A9}\u{FE0F}"; assert_eq!( - copyright_emoji_presentation - .graphemes(true) - .collect::>(), + Graphemes::new(copyright_emoji_presentation).collect::>(), vec![copyright_emoji_presentation.to_string()] ); assert_eq!(unicode_column_width(copyright_emoji_presentation, None), 2); @@ -1180,9 +1175,7 @@ mod test { let copyright_text_presentation = "\u{00A9}"; assert_eq!( - copyright_text_presentation - .graphemes(true) - .collect::>(), + Graphemes::new(copyright_text_presentation).collect::>(), vec![copyright_text_presentation.to_string()] ); assert_eq!(unicode_column_width(copyright_text_presentation, None), 1); @@ -1202,11 +1195,11 @@ mod test { assert_eq!(unicode_column_width(raised_fist_text, None), 2); assert_eq!( - raised_fist_text.graphemes(true).collect::>(), + Graphemes::new(raised_fist_text).collect::>(), vec![raised_fist_text.to_string()] ); assert_eq!( - raised_fist.graphemes(true).collect::>(), + Graphemes::new(raised_fist).collect::>(), vec![raised_fist.to_string()] ); } diff --git a/termwiz/src/surface/change.rs b/termwiz/src/surface/change.rs index b15eb73d7..8c4979028 100644 --- a/termwiz/src/surface/change.rs +++ b/termwiz/src/surface/change.rs @@ -2,10 +2,10 @@ use crate::cell::{unicode_column_width, AttributeChange, CellAttributes}; use crate::color::ColorAttribute; pub use crate::image::{ImageData, TextureCoordinate}; use crate::surface::{CursorShape, CursorVisibility, Position}; +use finl_unicode::grapheme_clusters::Graphemes; #[cfg(feature = "use_serde")] use serde::{Deserialize, Serialize}; use std::sync::Arc; -use unicode_segmentation::UnicodeSegmentation; /// `Change` describes an update operation to be applied to a `Surface`. /// Changes to the active attributes (color, style), moving the cursor @@ -190,7 +190,7 @@ impl ChangeSequence { | Change::Title(_) | Change::ClearToEndOfScreen(_) => {} Change::Text(t) => { - for g in t.as_str().graphemes(true) { + for g in Graphemes::new(t.as_str()) { if self.cursor_x == self.screen_cols { self.cursor_y += 1; self.cursor_x = 0; diff --git a/termwiz/src/surface/line/clusterline.rs b/termwiz/src/surface/line/clusterline.rs index a8c7b3667..b6631b3b4 100644 --- a/termwiz/src/surface/line/clusterline.rs +++ b/termwiz/src/surface/line/clusterline.rs @@ -1,11 +1,11 @@ use crate::cell::{Cell, CellAttributes}; use crate::surface::line::CellRef; +use finl_unicode::grapheme_clusters::Graphemes; use fixedbitset::FixedBitSet; #[cfg(feature = "use_serde")] use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::convert::TryInto; use std::num::NonZeroU8; -use unicode_segmentation::UnicodeSegmentation; #[cfg_attr(feature = "use_serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, PartialEq)] @@ -166,7 +166,7 @@ impl ClusteredLine { let mut clusters = self.clusters.iter(); let cluster = clusters.next(); ClusterLineCellIter { - graphemes: self.text.graphemes(true), + graphemes: Graphemes::new(&self.text), clusters, cluster, idx: 0, @@ -307,7 +307,7 @@ impl ClusteredLine { } pub(crate) struct ClusterLineCellIter<'a> { - graphemes: unicode_segmentation::Graphemes<'a>, + graphemes: Graphemes<'a>, clusters: std::slice::Iter<'a, Cluster>, cluster: Option<&'a Cluster>, idx: usize, diff --git a/termwiz/src/surface/line/line.rs b/termwiz/src/surface/line/line.rs index 5549732f8..64c03bb1d 100644 --- a/termwiz/src/surface/line/line.rs +++ b/termwiz/src/surface/line/line.rs @@ -7,6 +7,7 @@ use crate::surface::line::linebits::LineBits; use crate::surface::line::storage::{CellStorage, VisibleCellIter}; use crate::surface::line::vecstorage::{VecStorage, VecStorageIter}; use crate::surface::{Change, SequenceNo, SEQ_ZERO}; +use finl_unicode::grapheme_clusters::Graphemes; #[cfg(feature = "use_serde")] use serde::{Deserialize, Serialize}; use siphasher::sip128::{Hasher128, SipHasher}; @@ -15,7 +16,6 @@ use std::borrow::Cow; use std::hash::Hash; use std::ops::Range; use std::sync::{Arc, Mutex, Weak}; -use unicode_segmentation::UnicodeSegmentation; use wezterm_bidi::{Direction, ParagraphDirectionHint}; #[cfg_attr(feature = "use_serde", derive(Serialize, Deserialize))] @@ -137,7 +137,7 @@ impl Line { ) -> Line { let mut cells = Vec::new(); - for sub in s.graphemes(true) { + for sub in Graphemes::new(s) { let cell = Cell::new_grapheme(sub, attrs.clone(), unicode_version); let width = cell.width(); cells.push(cell); @@ -850,7 +850,7 @@ impl Line { attr: CellAttributes, seqno: SequenceNo, ) { - for (i, c) in text.graphemes(true).enumerate() { + for (i, c) in Graphemes::new(text).enumerate() { let cell = Cell::new_grapheme(c, attr.clone(), None); let width = cell.width(); self.set_cell(i + start_idx, cell, seqno); diff --git a/termwiz/src/surface/mod.rs b/termwiz/src/surface/mod.rs index 52e50b253..ff35e5b3b 100644 --- a/termwiz/src/surface/mod.rs +++ b/termwiz/src/surface/mod.rs @@ -2,12 +2,12 @@ use crate::cell::{AttributeChange, Cell, CellAttributes}; use crate::color::ColorAttribute; use crate::image::ImageCell; use crate::surface::line::CellRef; +use finl_unicode::grapheme_clusters::Graphemes; use ordered_float::NotNan; #[cfg(feature = "use_serde")] use serde::{Deserialize, Serialize}; use std::borrow::Cow; use std::cmp::min; -use unicode_segmentation::UnicodeSegmentation; use wezterm_dynamic::{FromDynamic, ToDynamic}; pub mod change; @@ -407,7 +407,7 @@ impl Surface { } fn print_text(&mut self, text: &str) { - for g in UnicodeSegmentation::graphemes(text, true) { + for g in Graphemes::new(text) { if g == "\r\n" { self.xpos = 0; let new_y = self.ypos + 1; diff --git a/wezterm-font/Cargo.toml b/wezterm-font/Cargo.toml index 4135f9278..b66af542e 100644 --- a/wezterm-font/Cargo.toml +++ b/wezterm-font/Cargo.toml @@ -19,6 +19,7 @@ config = { path = "../config" } encoding_rs = "0.8" enum-display-derive = "0.1" euclid = "0.22" +finl_unicode = "1.0.2" freetype = { path = "../deps/freetype" } harfbuzz = { path = "../deps/harfbuzz" } lazy_static = "1.4" @@ -29,7 +30,6 @@ ordered-float = "3.0" rangeset = { path = "../rangeset" } termwiz = { path = "../termwiz" } thiserror = "1.0" -unicode-segmentation = "1.8" walkdir = "2" wezterm-color-types = { path = "../color-types" } wezterm-input-types = { path = "../wezterm-input-types" } diff --git a/wezterm-font/src/shaper/harfbuzz.rs b/wezterm-font/src/shaper/harfbuzz.rs index 4679d2685..91dbd1108 100644 --- a/wezterm-font/src/shaper/harfbuzz.rs +++ b/wezterm-font/src/shaper/harfbuzz.rs @@ -4,13 +4,13 @@ use crate::units::*; use crate::{ftwrap, hbwrap as harfbuzz}; use anyhow::{anyhow, Context}; use config::ConfigHandle; +use finl_unicode::grapheme_clusters::Graphemes; use log::error; use ordered_float::NotNan; use std::cell::{RefCell, RefMut}; use std::collections::HashMap; use std::ops::Range; use termwiz::cell::{unicode_column_width, Presentation}; -use unicode_segmentation::UnicodeSegmentation; use wezterm_bidi::Direction; // Changing these will switch to using harfbuzz's opentype functions. @@ -80,7 +80,7 @@ pub struct HarfbuzzShaper { /// original string. That isn't perfect, but it should /// be good enough to indicate that something isn't right. fn make_question_string(s: &str) -> String { - let len = s.graphemes(true).count(); + let len = Graphemes::new(s).count(); let mut result = String::new(); let c = if !is_question_string(s) { std::char::REPLACEMENT_CHARACTER diff --git a/wezterm-gui/Cargo.toml b/wezterm-gui/Cargo.toml index 9d4df417d..e187bd928 100644 --- a/wezterm-gui/Cargo.toml +++ b/wezterm-gui/Cargo.toml @@ -48,6 +48,7 @@ env-bootstrap = { path = "../env-bootstrap" } euclid = "0.22" fastrand = "1.6" filedescriptor = { version="0.8", path = "../filedescriptor" } +finl_unicode = "1.0.2" fnv = "1.0" frecency = { path = "../frecency" } futures = "0.3" diff --git a/wezterm-gui/src/tabbar.rs b/wezterm-gui/src/tabbar.rs index 57e14b6a4..37f63a4a7 100644 --- a/wezterm-gui/src/tabbar.rs +++ b/wezterm-gui/src/tabbar.rs @@ -1,5 +1,6 @@ use crate::termwindow::{PaneInformation, TabInformation, UIItem, UIItemType}; use config::{ConfigHandle, TabBarColors}; +use finl_unicode::grapheme_clusters::Graphemes; use mlua::FromLua; use termwiz::cell::{unicode_column_width, Cell, CellAttributes}; use termwiz::color::ColorSpec; @@ -401,7 +402,7 @@ pub fn parse_status_text(text: &str, default_cell: CellAttributes) -> Line { let mut print_buffer = String::new(); fn flush_print(buf: &mut String, cells: &mut Vec, pen: &CellAttributes) { - for g in unicode_segmentation::UnicodeSegmentation::graphemes(buf.as_str(), true) { + for g in Graphemes::new(buf.as_str()) { let cell = Cell::new_grapheme(g, pen.clone(), None); let width = cell.width(); cells.push(cell); diff --git a/wezterm-gui/src/termwindow/box_model.rs b/wezterm-gui/src/termwindow/box_model.rs index 638b2b963..14329ff64 100644 --- a/wezterm-gui/src/termwindow/box_model.rs +++ b/wezterm-gui/src/termwindow/box_model.rs @@ -11,11 +11,11 @@ use crate::utilsprites::RenderMetrics; use ::window::{RectF, WindowOps}; use anyhow::anyhow; use config::{Dimension, DimensionContext}; +use finl_unicode::grapheme_clusters::Graphemes; use std::cell::RefCell; use std::rc::Rc; use termwiz::cell::{grapheme_column_width, Presentation}; use termwiz::surface::Line; -use unicode_segmentation::UnicodeSegmentation; use wezterm_font::units::PixelUnit; use wezterm_font::LoadedFont; use wezterm_term::color::{ColorAttribute, ColorPalette}; @@ -583,7 +583,7 @@ impl super::TermWindow { for info in infos { let cell_start = &s[info.cluster as usize..]; - let mut iter = cell_start.graphemes(true).peekable(); + let mut iter = Graphemes::new(cell_start).peekable(); let grapheme = iter .next() .ok_or_else(|| anyhow!("info.cluster didn't map into string"))?;