1
1
mirror of https://github.com/wez/wezterm.git synced 2024-12-23 21:32:13 +03:00

termwiz: better fidelity Emoji_Presentation logic

Make a distinction between default and selected presentation,
and account for that in the cell width.

Add a method to the cell that returns the effective presentation.

refs: https://github.com/wez/wezterm/issues/997
This commit is contained in:
Wez Furlong 2021-08-04 22:36:17 -07:00
parent e3acbd594f
commit 28e8b5ff2f
4 changed files with 1608 additions and 48 deletions

1261
termwiz/data/emoji-data.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,6 @@
//! Model a cell in the terminal display
use crate::color::{ColorAttribute, PaletteIndex};
pub use crate::emoji::Presentation;
pub use crate::escape::osc::Hyperlink;
use crate::image::ImageCell;
#[cfg(feature = "use_serde")]
@ -647,6 +648,16 @@ impl Cell {
}
}
/// Indicates whether this cell has text or emoji presentation.
/// The width already reflects that choice; this information
/// is also useful when selecting an appropriate font.
pub fn presentation(&self) -> Presentation {
match Presentation::for_grapheme(self.str()) {
(_, Some(variation)) => variation,
(presentation, None) => presentation,
}
}
/// Create a new cell holding the specified grapheme.
/// The grapheme is passed as a string slice and is intended to hold
/// double-width characters, or combining unicode sequences, that need
@ -700,46 +711,11 @@ pub fn unicode_column_width(s: &str) -> usize {
/// Returns the number of cells visually occupied by a grapheme.
/// The input string must be a single grapheme.
pub fn grapheme_column_width(s: &str) -> usize {
// Due to this issue:
// https://github.com/unicode-rs/unicode-width/issues/4
// we cannot simply use the unicode-width crate to compute
// the desired value.
// Let's check for emoji-ness for ourselves first
use xi_unicode::EmojiExt;
let mut emoji = false;
let mut implied_emoji_presentation = false;
for c in s.chars() {
if c == '\u{FE0F}' {
// Explicit emoji presentation
return 2;
} else if c == '\u{FE0E}' {
// Explicit text presentation
return 1;
} else if c.is_emoji_modifier_base() || c.is_emoji_modifier() {
// We'll probably use emoji presentation for this,
// but defer the decision until we've had a chance
// to look for an explicit presentation selection.
implied_emoji_presentation = true;
} else if c.is_emoji() {
emoji = true;
}
}
if implied_emoji_presentation {
return 2;
}
let width = UnicodeWidthStr::width(s);
if emoji {
// For sequences such as "deaf man", UnicodeWidthStr::width()
// returns 3 because of the widths of the component glyphs,
// rather than 2 for a single double width grapheme.
// If we saw any emoji within the characters then we assume
// that it can be a maximum of 2 cells in width.
width.min(2)
} else {
width
match Presentation::for_grapheme(s) {
(_, Some(Presentation::Emoji)) => 2,
(_, Some(Presentation::Text)) => 1,
(Presentation::Emoji, None) => 2,
(Presentation::Text, None) => UnicodeWidthStr::width(s).min(2),
}
}
@ -852,21 +828,21 @@ mod test {
#[test]
fn issue_997() {
use unicode_segmentation::UnicodeSegmentation;
let waving_hand = "\u{270c}";
let waving_hand_text_presentation = "\u{270c}\u{fe0e}";
let victory_hand = "\u{270c}";
let victory_hand_text_presentation = "\u{270c}\u{fe0e}";
assert_eq!(unicode_column_width(waving_hand_text_presentation), 1);
assert_eq!(unicode_column_width(waving_hand), 2);
assert_eq!(unicode_column_width(victory_hand_text_presentation), 1);
assert_eq!(unicode_column_width(victory_hand), 1);
assert_eq!(
waving_hand_text_presentation
victory_hand_text_presentation
.graphemes(true)
.collect::<Vec<_>>(),
vec![waving_hand_text_presentation.to_string()]
vec![victory_hand_text_presentation.to_string()]
);
assert_eq!(
waving_hand.graphemes(true).collect::<Vec<_>>(),
vec![waving_hand.to_string()]
victory_hand.graphemes(true).collect::<Vec<_>>(),
vec![victory_hand.to_string()]
);
let copyright_emoji_presentation = "\u{00A9}\u{FE0F}";
@ -886,5 +862,27 @@ mod test {
vec![copyright_text_presentation.to_string()]
);
assert_eq!(unicode_column_width(copyright_text_presentation), 1);
let raised_fist = "\u{270a}";
let raised_fist_text = "\u{270a}\u{fe0e}";
assert_eq!(
Presentation::for_grapheme(raised_fist),
(Presentation::Emoji, None)
);
assert_eq!(unicode_column_width(raised_fist), 2);
assert_eq!(
Presentation::for_grapheme(raised_fist_text),
(Presentation::Emoji, Some(Presentation::Text))
);
assert_eq!(unicode_column_width(raised_fist_text), 1);
assert_eq!(
raised_fist_text.graphemes(true).collect::<Vec<_>>(),
vec![raised_fist_text.to_string()]
);
assert_eq!(
raised_fist.graphemes(true).collect::<Vec<_>>(),
vec![raised_fist.to_string()]
);
}
}

299
termwiz/src/emoji.rs Normal file
View File

@ -0,0 +1,299 @@
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum Presentation {
Text,
Emoji,
}
impl Presentation {
/// Returns the default presentation followed
/// by the explicit presentation if specified
/// by a variation selector
pub fn for_grapheme(s: &str) -> (Self, Option<Self>) {
let mut presentation = Self::Text;
let mut variation = None;
for c in s.chars() {
if c == '\u{FE0F}' {
// Explicit emoji presentation
variation.replace(Self::Emoji);
} else if c == '\u{FE0E}' {
variation.replace(Self::Text);
} else if Self::for_char(c) == Self::Emoji {
presentation = Self::Emoji;
}
// Note that `c` may be some other combining
// sequence that doesn't definitively indicate
// that we're text, so we only positively
// change presentation when we identify an
// emoji char.
}
(presentation, variation)
}
pub fn for_char(c: char) -> Self {
// Matches Emoji_Presentation properties from Unicode 13.0
match c {
'\u{231A}'..='\u{231B}'
| '\u{23E9}'..='\u{23EC}'
| '\u{23F0}'
| '\u{23F3}'
| '\u{25FD}'..='\u{25FE}'
| '\u{2614}'..='\u{2615}'
| '\u{2648}'..='\u{2653}'
| '\u{267F}'
| '\u{2693}'
| '\u{26A1}'
| '\u{26AA}'..='\u{26AB}'
| '\u{26BD}'..='\u{26BE}'
| '\u{26C4}'..='\u{26C5}'
| '\u{26CE}'
| '\u{26D4}'
| '\u{26EA}'
| '\u{26F2}'..='\u{26F3}'
| '\u{26F5}'
| '\u{26FA}'
| '\u{26FD}'
| '\u{2705}'
| '\u{270A}'..='\u{270B}'
| '\u{2728}'
| '\u{274C}'
| '\u{274E}'
| '\u{2753}'..='\u{2755}'
| '\u{2757}'
| '\u{2795}'..='\u{2797}'
| '\u{27B0}'
| '\u{27BF}'
| '\u{2B1B}'..='\u{2B1C}'
| '\u{2B50}'
| '\u{2B55}'
| '\u{1F004}'
| '\u{1F0CF}'
| '\u{1F18E}'
| '\u{1F191}'..='\u{1F19A}'
| '\u{1F1E6}'..='\u{1F1FF}'
| '\u{1F201}'
| '\u{1F21A}'
| '\u{1F22F}'
| '\u{1F232}'..='\u{1F236}'
| '\u{1F238}'..='\u{1F23A}'
| '\u{1F250}'..='\u{1F251}'
| '\u{1F300}'..='\u{1F30C}'
| '\u{1F30D}'..='\u{1F30E}'
| '\u{1F30F}'
| '\u{1F310}'
| '\u{1F311}'
| '\u{1F312}'
| '\u{1F313}'..='\u{1F315}'
| '\u{1F316}'..='\u{1F318}'
| '\u{1F319}'
| '\u{1F31A}'
| '\u{1F31B}'
| '\u{1F31C}'
| '\u{1F31D}'..='\u{1F31E}'
| '\u{1F31F}'..='\u{1F320}'
| '\u{1F32D}'..='\u{1F32F}'
| '\u{1F330}'..='\u{1F331}'
| '\u{1F332}'..='\u{1F333}'
| '\u{1F334}'..='\u{1F335}'
| '\u{1F337}'..='\u{1F34A}'
| '\u{1F34B}'
| '\u{1F34C}'..='\u{1F34F}'
| '\u{1F350}'
| '\u{1F351}'..='\u{1F37B}'
| '\u{1F37C}'
| '\u{1F37E}'..='\u{1F37F}'
| '\u{1F380}'..='\u{1F393}'
| '\u{1F3A0}'..='\u{1F3C4}'
| '\u{1F3C5}'
| '\u{1F3C6}'
| '\u{1F3C7}'
| '\u{1F3C8}'
| '\u{1F3C9}'
| '\u{1F3CA}'
| '\u{1F3CF}'..='\u{1F3D3}'
| '\u{1F3E0}'..='\u{1F3E3}'
| '\u{1F3E4}'
| '\u{1F3E5}'..='\u{1F3F0}'
| '\u{1F3F4}'
| '\u{1F3F8}'..='\u{1F407}'
| '\u{1F408}'
| '\u{1F409}'..='\u{1F40B}'
| '\u{1F40C}'..='\u{1F40E}'
| '\u{1F40F}'..='\u{1F410}'
| '\u{1F411}'..='\u{1F412}'
| '\u{1F413}'
| '\u{1F414}'
| '\u{1F415}'
| '\u{1F416}'
| '\u{1F417}'..='\u{1F429}'
| '\u{1F42A}'
| '\u{1F42B}'..='\u{1F43E}'
| '\u{1F440}'
| '\u{1F442}'..='\u{1F464}'
| '\u{1F465}'
| '\u{1F466}'..='\u{1F46B}'
| '\u{1F46C}'..='\u{1F46D}'
| '\u{1F46E}'..='\u{1F4AC}'
| '\u{1F4AD}'
| '\u{1F4AE}'..='\u{1F4B5}'
| '\u{1F4B6}'..='\u{1F4B7}'
| '\u{1F4B8}'..='\u{1F4EB}'
| '\u{1F4EC}'..='\u{1F4ED}'
| '\u{1F4EE}'
| '\u{1F4EF}'
| '\u{1F4F0}'..='\u{1F4F4}'
| '\u{1F4F5}'
| '\u{1F4F6}'..='\u{1F4F7}'
| '\u{1F4F8}'
| '\u{1F4F9}'..='\u{1F4FC}'
| '\u{1F4FF}'..='\u{1F502}'
| '\u{1F503}'
| '\u{1F504}'..='\u{1F507}'
| '\u{1F508}'
| '\u{1F509}'
| '\u{1F50A}'..='\u{1F514}'
| '\u{1F515}'
| '\u{1F516}'..='\u{1F52B}'
| '\u{1F52C}'..='\u{1F52D}'
| '\u{1F52E}'..='\u{1F53D}'
| '\u{1F54B}'..='\u{1F54E}'
| '\u{1F550}'..='\u{1F55B}'
| '\u{1F55C}'..='\u{1F567}'
| '\u{1F57A}'
| '\u{1F595}'..='\u{1F596}'
| '\u{1F5A4}'
| '\u{1F5FB}'..='\u{1F5FF}'
| '\u{1F600}'
| '\u{1F601}'..='\u{1F606}'
| '\u{1F607}'..='\u{1F608}'
| '\u{1F609}'..='\u{1F60D}'
| '\u{1F60E}'
| '\u{1F60F}'
| '\u{1F610}'
| '\u{1F611}'
| '\u{1F612}'..='\u{1F614}'
| '\u{1F615}'
| '\u{1F616}'
| '\u{1F617}'
| '\u{1F618}'
| '\u{1F619}'
| '\u{1F61A}'
| '\u{1F61B}'
| '\u{1F61C}'..='\u{1F61E}'
| '\u{1F61F}'
| '\u{1F620}'..='\u{1F625}'
| '\u{1F626}'..='\u{1F627}'
| '\u{1F628}'..='\u{1F62B}'
| '\u{1F62C}'
| '\u{1F62D}'
| '\u{1F62E}'..='\u{1F62F}'
| '\u{1F630}'..='\u{1F633}'
| '\u{1F634}'
| '\u{1F635}'
| '\u{1F636}'
| '\u{1F637}'..='\u{1F640}'
| '\u{1F641}'..='\u{1F644}'
| '\u{1F645}'..='\u{1F64F}'
| '\u{1F680}'
| '\u{1F681}'..='\u{1F682}'
| '\u{1F683}'..='\u{1F685}'
| '\u{1F686}'
| '\u{1F687}'
| '\u{1F688}'
| '\u{1F689}'
| '\u{1F68A}'..='\u{1F68B}'
| '\u{1F68C}'
| '\u{1F68D}'
| '\u{1F68E}'
| '\u{1F68F}'
| '\u{1F690}'
| '\u{1F691}'..='\u{1F693}'
| '\u{1F694}'
| '\u{1F695}'
| '\u{1F696}'
| '\u{1F697}'
| '\u{1F698}'
| '\u{1F699}'..='\u{1F69A}'
| '\u{1F69B}'..='\u{1F6A1}'
| '\u{1F6A2}'
| '\u{1F6A3}'
| '\u{1F6A4}'..='\u{1F6A5}'
| '\u{1F6A6}'
| '\u{1F6A7}'..='\u{1F6AD}'
| '\u{1F6AE}'..='\u{1F6B1}'
| '\u{1F6B2}'
| '\u{1F6B3}'..='\u{1F6B5}'
| '\u{1F6B6}'
| '\u{1F6B7}'..='\u{1F6B8}'
| '\u{1F6B9}'..='\u{1F6BE}'
| '\u{1F6BF}'
| '\u{1F6C0}'
| '\u{1F6C1}'..='\u{1F6C5}'
| '\u{1F6CC}'
| '\u{1F6D0}'
| '\u{1F6D1}'..='\u{1F6D2}'
| '\u{1F6D5}'
| '\u{1F6D6}'..='\u{1F6D7}'
| '\u{1F6EB}'..='\u{1F6EC}'
| '\u{1F6F4}'..='\u{1F6F6}'
| '\u{1F6F7}'..='\u{1F6F8}'
| '\u{1F6F9}'
| '\u{1F6FA}'
| '\u{1F6FB}'..='\u{1F6FC}'
| '\u{1F7E0}'..='\u{1F7EB}'
| '\u{1F90C}'
| '\u{1F90D}'..='\u{1F90F}'
| '\u{1F910}'..='\u{1F918}'
| '\u{1F919}'..='\u{1F91E}'
| '\u{1F91F}'
| '\u{1F920}'..='\u{1F927}'
| '\u{1F928}'..='\u{1F92F}'
| '\u{1F930}'
| '\u{1F931}'..='\u{1F932}'
| '\u{1F933}'..='\u{1F93A}'
| '\u{1F93C}'..='\u{1F93E}'
| '\u{1F93F}'
| '\u{1F940}'..='\u{1F945}'
| '\u{1F947}'..='\u{1F94B}'
| '\u{1F94C}'
| '\u{1F94D}'..='\u{1F94F}'
| '\u{1F950}'..='\u{1F95E}'
| '\u{1F95F}'..='\u{1F96B}'
| '\u{1F96C}'..='\u{1F970}'
| '\u{1F971}'
| '\u{1F972}'
| '\u{1F973}'..='\u{1F976}'
| '\u{1F977}'..='\u{1F978}'
| '\u{1F97A}'
| '\u{1F97B}'
| '\u{1F97C}'..='\u{1F97F}'
| '\u{1F980}'..='\u{1F984}'
| '\u{1F985}'..='\u{1F991}'
| '\u{1F992}'..='\u{1F997}'
| '\u{1F998}'..='\u{1F9A2}'
| '\u{1F9A3}'..='\u{1F9A4}'
| '\u{1F9A5}'..='\u{1F9AA}'
| '\u{1F9AB}'..='\u{1F9AD}'
| '\u{1F9AE}'..='\u{1F9AF}'
| '\u{1F9B0}'..='\u{1F9B9}'
| '\u{1F9BA}'..='\u{1F9BF}'
| '\u{1F9C0}'
| '\u{1F9C1}'..='\u{1F9C2}'
| '\u{1F9C3}'..='\u{1F9CA}'
| '\u{1F9CB}'
| '\u{1F9CD}'..='\u{1F9CF}'
| '\u{1F9D0}'..='\u{1F9E6}'
| '\u{1F9E7}'..='\u{1F9FF}'
| '\u{1FA70}'..='\u{1FA73}'
| '\u{1FA74}'
| '\u{1FA78}'..='\u{1FA7A}'
| '\u{1FA80}'..='\u{1FA82}'
| '\u{1FA83}'..='\u{1FA86}'
| '\u{1FA90}'..='\u{1FA95}'
| '\u{1FA96}'..='\u{1FAA8}'
| '\u{1FAB0}'..='\u{1FAB6}'
| '\u{1FAC0}'..='\u{1FAC2}'
| '\u{1FAD0}'..='\u{1FAD6}' => Self::Emoji,
_ => Self::Text,
}
}
}

View File

@ -40,6 +40,8 @@
//! * `widgets` - enables the widget layout and related traits
//! * `use_serde` - makes a number of structs serde serializable
mod emoji;
pub mod caps;
pub mod cell;
pub mod cellcluster;