diff --git a/Cargo.lock b/Cargo.lock index ce85ffd1a..3b57c49a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3947,9 +3947,9 @@ checksum = "3e5d7cd7ab3e47dda6e56542f4bbf3824c15234958c6e1bd6aaa347e93499fdc" [[package]] name = "ttf-parser" -version = "0.8.3" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7622061403fd00f0820df288e5a580e87d3ce15a1c4313c59fd1ffb77129903f" +checksum = "62ddb402ac6c2af6f7a2844243887631c4e94b51585b229fcfddb43958cd55ca" [[package]] name = "typenum" @@ -4459,7 +4459,7 @@ dependencies = [ "termwiz", "thiserror", "tinyvec", - "ttf-parser 0.8.3", + "ttf-parser 0.9.0", "unicode-general-category 0.3.0", "unicode-segmentation", "walkdir", diff --git a/wezterm-font/Cargo.toml b/wezterm-font/Cargo.toml index f33fa4936..21a127b0f 100644 --- a/wezterm-font/Cargo.toml +++ b/wezterm-font/Cargo.toml @@ -22,7 +22,7 @@ rangeset = { path = "../rangeset" } termwiz = { path = "../termwiz" } thiserror = "1.0" tinyvec = "1.1" # Note: constrained by the allsorts crate -ttf-parser = "0.8" +ttf-parser = "0.9" unicode-segmentation = "1.7" unicode-general-category = "0.3" walkdir = "2" diff --git a/wezterm-font/src/parser.rs b/wezterm-font/src/parser.rs index b521613bc..ced553f86 100644 --- a/wezterm-font/src/parser.rs +++ b/wezterm-font/src/parser.rs @@ -1,23 +1,9 @@ -//! This module uses the allsorts crate to parse font data. -//! At this time it is used only to extract name information, -//! but in the future I'd like to use its shaping functionality -#![allow(dead_code)] use crate::locator::FontDataHandle; use crate::shaper::GlyphInfo; -use allsorts::binary::read::{ReadScope, ReadScopeOwned}; -use allsorts::font_data_impl::read_cmap_subtable; -use allsorts::layout::{new_layout_cache, GDEFTable, LayoutCache, LayoutTable, GPOS, GSUB}; -use allsorts::post::PostTable; -use allsorts::tables::cmap::{Cmap, CmapSubtable}; -use allsorts::tables::{ - HeadTable, HheaTable, HmtxTable, MaxpTable, OffsetTable, OpenTypeFile, OpenTypeFont, -}; -use allsorts::tag; -use anyhow::{anyhow, Context}; +use anyhow::anyhow; use config::FontAttributes; -use std::collections::HashSet; -use std::convert::TryInto; use std::path::{Path, PathBuf}; +use ttf_parser::{fonts_in_collection, Face, Name, PlatformId}; #[derive(Debug)] pub enum MaybeShaped { @@ -28,19 +14,6 @@ pub enum MaybeShaped { /// Represents a parsed font pub struct ParsedFont { names: Names, - - cmap_subtable: CmapSubtable<'static>, - gpos_cache: Option>, - gsub_cache: Option>, - gdef_table: Option, - hmtx: HmtxTable<'static>, - post: PostTable<'static>, - hhea: HheaTable, - num_glyphs: u16, - units_per_em: u16, - - // Must be last: this keeps the 'static items alive - _scope: ReadScopeOwned, } #[derive(Debug)] @@ -52,168 +25,221 @@ pub struct Names { pub postscript_name: Option, } +/// Computes a score for a given name record; font files can contain +/// multiple variants of the same logical name encoded differently +/// for various operating systems and languages. +/// This function assigns a weight to each of the combinations; +/// we generally prefer the English rendition of the name in unicode. +/// +/// Borrowed from a similar bit of code in the allsorts crate. +fn score(name: &Name) -> Option { + match (name.platform_id(), name.encoding_id(), name.language_id()) { + (PlatformId::Windows, 10, _) => Some(1000), + (PlatformId::Unicode, 6, 0) => Some(900), + (PlatformId::Unicode, 4, 0) => Some(800), + (PlatformId::Windows, 1, 0x409) => Some(750), + (PlatformId::Windows, 1, lang) if lang != 0x409 => Some(700), + (PlatformId::Unicode, 3, 0) => Some(600), + (PlatformId::Unicode, 2, 0) => Some(500), + (PlatformId::Unicode, 1, 0) => Some(400), + (PlatformId::Unicode, 0, 0) => Some(300), + (PlatformId::Windows, 0, _) => Some(200), + (PlatformId::Macintosh, 0, 0) => Some(150), + (PlatformId::Macintosh, 0, lang) if lang != 0 => Some(100), + _ => None, + } +} + +/// Maybe convert a MacRoman byte to a unicode char. +/// Borrowed from the allsorts crate. +fn macroman_to_char(b: u8) -> Option { + match b { + 0..=127 => Some(b as char), + 128 => Some('Ä'), // A dieresis + 129 => Some('Å'), // A ring + 130 => Some('Ç'), // C cedilla + 131 => Some('É'), // E acute + 132 => Some('Ñ'), // N tilde + 133 => Some('Ö'), // O dieresis + 134 => Some('Ü'), // U dieresis + 135 => Some('á'), // a acute + 136 => Some('à'), // a grave + 137 => Some('â'), // a circumflex + 138 => Some('ä'), // a dieresis + 139 => Some('ã'), // a tilde + 140 => Some('å'), // a ring + 141 => Some('ç'), // c cedilla + 142 => Some('é'), // e acute + 143 => Some('è'), // e grave + 144 => Some('ê'), // e circumflex + 145 => Some('ë'), // e dieresis + 146 => Some('í'), // i acute + 147 => Some('ì'), // i grave + 148 => Some('î'), // i circumflex + 149 => Some('ï'), // i dieresis + 150 => Some('ñ'), // n tilde + 151 => Some('ó'), // o acute + 152 => Some('ò'), // o grave + 153 => Some('ô'), // o circumflex + 154 => Some('ö'), // o dieresis + 155 => Some('õ'), // o tilde + 156 => Some('ú'), // u acute + 157 => Some('ù'), // u grave + 158 => Some('û'), // u circumflex + 159 => Some('ü'), // u dieresis + 160 => Some('†'), // dagger + 161 => Some('°'), // degree + 162 => Some('¢'), // cent + 163 => Some('£'), // sterling + 164 => Some('§'), // section + 165 => Some('•'), // bullet + 166 => Some('¶'), // paragraph + 167 => Some('ß'), // German double s + 168 => Some('®'), // registered + 169 => Some('©'), // copyright + 170 => Some('™'), // trademark + 171 => Some('´'), // acute + 172 => Some('¨'), // diaeresis + 174 => Some('Æ'), // AE + 175 => Some('Ø'), // O slash + 177 => Some('±'), // plusminus + 180 => Some('¥'), // yen + 181 => Some('µ'), // micro + 187 => Some('ª'), // ordfeminine + 188 => Some('º'), // ordmasculine + 190 => Some('æ'), // ae + 191 => Some('ø'), // o slash + 192 => Some('¿'), // question down + 193 => Some('¡'), // exclamation down + 194 => Some('¬'), // not + 196 => Some('ƒ'), // florin + 199 => Some('«'), // left guille + 200 => Some('»'), // right guille + 201 => Some('…'), // ellipsis + 202 => Some(' '), // non-breaking space + 203 => Some('À'), // A grave + 204 => Some('Ã'), // A tilde + 205 => Some('Õ'), // O tilde + 206 => Some('Œ'), // OE + 207 => Some('œ'), // oe + 208 => Some('–'), // endash + 209 => Some('—'), // emdash + 210 => Some('“'), // ldquo + 211 => Some('”'), // rdquo + 212 => Some('‘'), // lsquo + 213 => Some('’'), // rsquo + 214 => Some('÷'), // divide + 216 => Some('ÿ'), // y dieresis + 217 => Some('Ÿ'), // Y dieresis + 218 => Some('⁄'), // fraction + 219 => Some('¤'), // currency + 220 => Some('‹'), // left single guille + 221 => Some('›'), // right single guille + 222 => Some('fi'), // fi + 223 => Some('fl'), // fl + 224 => Some('‡'), // double dagger + 225 => Some('·'), // middle dot + 226 => Some('‚'), // single quote base + 227 => Some('„'), // double quote base + 228 => Some('‰'), // perthousand + 229 => Some('Â'), // A circumflex + 230 => Some('Ê'), // E circumflex + 231 => Some('Á'), // A acute + 232 => Some('Ë'), // E dieresis + 233 => Some('È'), // E grave + 234 => Some('Í'), // I acute + 235 => Some('Î'), // I circumflex + 236 => Some('Ï'), // I dieresis + 237 => Some('Ì'), // I grave + 238 => Some('Ó'), // O acute + 239 => Some('Ô'), // O circumflex + 241 => Some('Ò'), // O grave + 242 => Some('Ú'), // U acute + 243 => Some('Û'), // U circumflex + 244 => Some('Ù'), // U grave + 245 => Some('ı'), // dot-less i + 246 => Some('^'), // circumflex + 247 => Some('˜'), // tilde + 248 => Some('¯'), // macron + 249 => Some('˘'), // breve + 250 => Some('˙'), // dot accent + 251 => Some('˚'), // ring + 252 => Some('¸'), // cedilla + 253 => Some('˝'), // Hungarian umlaut (double acute accent) + 254 => Some('˛'), // ogonek + 255 => Some('ˇ'), // caron + _ => None, + } +} + +/// Return a unicode version of the name +fn decode_name(name: &Name) -> Option { + if name.platform_id() == PlatformId::Macintosh { + Some( + name.name() + .iter() + .filter_map(|&b| macroman_to_char(b)) + .collect::(), + ) + } else { + name.to_string() + } +} + +/// Resolve a given name id from the face +fn get_name(face: &Face, id: u16) -> Option { + let mut best = 0; + let mut result = None; + + for name in face.names() { + if name.name_id() != id { + continue; + } + + if let Some(v) = score(&name) { + if v > best { + if let Some(value) = decode_name(&name) { + result.replace(value); + best = v; + } + } + } + } + + result +} + impl Names { - fn from_name_table_data(name_table: &[u8]) -> anyhow::Result { + fn from_face(face: &Face) -> anyhow::Result { Ok(Names { - full_name: get_name(name_table, 4).context("full_name")?, - unique: get_name(name_table, 3).ok(), - family: get_name(name_table, 1).ok(), - sub_family: get_name(name_table, 2).ok(), - postscript_name: get_name(name_table, 6).ok(), + full_name: get_name(face, ttf_parser::name_id::FULL_NAME) + .ok_or_else(|| anyhow!("missing full name"))?, + unique: get_name(face, ttf_parser::name_id::UNIQUE_ID), + family: get_name(face, ttf_parser::name_id::FAMILY), + sub_family: get_name(face, ttf_parser::name_id::SUBFAMILY), + postscript_name: get_name(face, ttf_parser::name_id::POST_SCRIPT_NAME), }) } } impl ParsedFont { - fn match_font_info( - fonts_selection: &[FontAttributes], - mut font_info: Vec<(Names, std::path::PathBuf, FontDataHandle)>, - loaded: &mut HashSet, - ) -> anyhow::Result> { - font_info.sort_by_key(|(names, _, _)| names.full_name.clone()); - for (names, _, _) in &font_info { - log::warn!("available font: {}", names.full_name); - } - - // Second, apply matching rules in order. We can't match - // against the font files as we discover them because the - // filesystem iteration order is arbitrary whereas our - // fonts_selection is strictly ordered - let mut handles = vec![]; - for attr in fonts_selection { - for (names, path, handle) in &font_info { - if font_info_matches(attr, &names) { - log::warn!( - "Using {} from {} for {:?}", - names.full_name, - path.display(), - attr - ); - handles.push(handle.clone()); - loaded.insert(attr.clone()); - break; - } - } - } - Ok(handles) - } - pub fn from_locator(handle: &FontDataHandle) -> anyhow::Result { - let (data, index) = match handle { - FontDataHandle::Memory { data, index, .. } => (data.to_vec(), *index), + match handle { FontDataHandle::OnDisk { path, index } => { let data = std::fs::read(path)?; - (data, *index) + let face = Face::from_slice(&data, *index)?; + Ok(Self { + names: Names::from_face(&face)?, + }) } - }; - let index = index as usize; - - let owned_scope = ReadScopeOwned::new(ReadScope::new(&data)); - - // This unsafe block and transmute are present so that we can - // extend the lifetime of the OpenTypeFile that we produce here. - // That in turn allows us to store all of these derived items - // into a struct and manage their lifetimes together. - let file: OpenTypeFile<'static> = unsafe { - std::mem::transmute( - owned_scope - .scope() - .read::() - .context("read OpenTypeFile")?, - ) - }; - - let otf = locate_offset_table(&file, index).context("locate_offset_table")?; - let name_table = name_table_data(&otf, &file.scope).context("name_table_data")?; - let names = - Names::from_name_table_data(name_table).context("Names::from_name_table_data")?; - - let head = otf - .read_table(&file.scope, tag::HEAD)? - .ok_or_else(|| anyhow!("HEAD table missing or broken"))? - .read::() - .context("read HeadTable")?; - let cmap = otf - .read_table(&file.scope, tag::CMAP)? - .ok_or_else(|| anyhow!("CMAP table missing or broken"))? - .read::() - .context("read Cmap")?; - let cmap_subtable: CmapSubtable<'static> = read_cmap_subtable(&cmap)? - .ok_or_else(|| anyhow!("CMAP subtable not found"))? - .1; - - let maxp = otf - .read_table(&file.scope, tag::MAXP)? - .ok_or_else(|| anyhow!("MAXP table not found"))? - .read::() - .context("read MaxpTable")?; - let num_glyphs = maxp.num_glyphs; - - let post = otf - .read_table(&file.scope, tag::POST)? - .ok_or_else(|| anyhow!("POST table not found"))? - .read::() - .context("read PostTable")?; - - let hhea = otf - .read_table(&file.scope, tag::HHEA)? - .ok_or_else(|| anyhow!("HHEA table not found"))? - .read::() - .context("read HheaTable")?; - let hmtx = otf - .read_table(&file.scope, tag::HMTX)? - .ok_or_else(|| anyhow!("HMTX table not found"))? - .read_dep::(( - usize::from(maxp.num_glyphs), - usize::from(hhea.num_h_metrics), - )) - .context("read_dep HmtxTable")?; - - let gdef_table: Option = otf - .find_table_record(tag::GDEF) - .map(|gdef_record| -> anyhow::Result { - Ok(gdef_record - .read_table(&file.scope)? - .read::() - .context("read GDEFTable")?) - }) - .transpose()?; - let opt_gpos_table = otf - .find_table_record(tag::GPOS) - .map(|gpos_record| -> anyhow::Result> { - Ok(gpos_record - .read_table(&file.scope)? - .read::>() - .context("read LayoutTable")?) - }) - .transpose()?; - let gpos_cache = opt_gpos_table.map(new_layout_cache); - - let gsub_cache = otf - .find_table_record(tag::GSUB) - .map(|gsub| -> anyhow::Result> { - Ok(gsub - .read_table(&file.scope)? - .read::>() - .context("read LayoutTable")?) - }) - .transpose()? - .map(new_layout_cache); - - Ok(Self { - names, - cmap_subtable, - post, - hmtx, - hhea, - gpos_cache, - gsub_cache, - gdef_table, - num_glyphs, - units_per_em: head.units_per_em, - _scope: owned_scope, - }) + FontDataHandle::Memory { data, index, .. } => { + let face = Face::from_slice(data, *index)?; + Ok(Self { + names: Names::from_face(&face)?, + }) + } + } } pub fn names(&self) -> &Names { @@ -221,24 +247,6 @@ impl ParsedFont { } } -fn collect_font_info( - name_table_data: &[u8], - path: &Path, - index: usize, - infos: &mut Vec<(Names, PathBuf, FontDataHandle)>, -) -> anyhow::Result<()> { - let names = Names::from_name_table_data(name_table_data)?; - infos.push(( - names, - path.to_path_buf(), - FontDataHandle::OnDisk { - path: path.to_path_buf(), - index: index.try_into()?, - }, - )); - Ok(()) -} - pub fn font_info_matches(attr: &FontAttributes, names: &Names) -> bool { if let Some(fam) = names.family.as_ref() { // TODO: correctly match using family and sub-family; @@ -271,36 +279,21 @@ pub fn resolve_font_from_ttc_data( attr: &FontAttributes, data: &[u8], ) -> anyhow::Result> { - let scope = allsorts::binary::read::ReadScope::new(&data); - let file = scope.read::()?; - - match &file.font { - OpenTypeFont::Single(ttf) => { - let name_table_data = ttf - .read_table(&file.scope, allsorts::tag::NAME)? - .ok_or_else(|| anyhow!("name table is not present"))?; - - let names = Names::from_name_table_data(name_table_data.data())?; + if let Some(size) = fonts_in_collection(data) { + for index in 0..size { + let face = Face::from_slice(data, index)?; + let names = Names::from_face(&face)?; if font_info_matches(attr, &names) { - Ok(Some(0)) - } else { - Ok(None) + return Ok(Some(index as usize)); } } - OpenTypeFont::Collection(ttc) => { - for (index, offset_table_offset) in ttc.offset_tables.iter().enumerate() { - let ttf = file - .scope - .offset(offset_table_offset as usize) - .read::()?; - let name_table_data = ttf - .read_table(&file.scope, allsorts::tag::NAME)? - .ok_or_else(|| anyhow!("name table is not present"))?; - let names = Names::from_name_table_data(name_table_data.data())?; - if font_info_matches(attr, &names) { - return Ok(Some(index)); - } - } + Ok(None) + } else { + let face = Face::from_slice(data, 0)?; + let names = Names::from_face(&face)?; + if font_info_matches(attr, &names) { + Ok(Some(0)) + } else { Ok(None) } } @@ -336,49 +329,17 @@ pub(crate) fn load_built_in_fonts( font!("../../assets/fonts/NotoColorEmoji.ttf"), font!("../../assets/fonts/LastResortHE-Regular.ttf"), ] { - let scope = allsorts::binary::read::ReadScope::new(&data); - let file = scope.read::()?; - let path = Path::new("memory"); - - match &file.font { - OpenTypeFont::Single(ttf) => { - let name_table_data = ttf - .read_table(&file.scope, allsorts::tag::NAME)? - .ok_or_else(|| anyhow!("name table is not present"))?; - - let names = Names::from_name_table_data(name_table_data.data())?; - font_info.push(( - names, - path.to_path_buf(), - FontDataHandle::Memory { - data: data.to_vec(), - index: 0, - name: name.to_string(), - }, - )); - } - OpenTypeFont::Collection(ttc) => { - for (index, offset_table_offset) in ttc.offset_tables.iter().enumerate() { - let ttf = file - .scope - .offset(offset_table_offset as usize) - .read::()?; - let name_table_data = ttf - .read_table(&file.scope, allsorts::tag::NAME)? - .ok_or_else(|| anyhow!("name table is not present"))?; - let names = Names::from_name_table_data(name_table_data.data())?; - font_info.push(( - names, - path.to_path_buf(), - FontDataHandle::Memory { - data: data.to_vec(), - index: index.try_into()?, - name: name.to_string(), - }, - )); - } - } - } + let face = Face::from_slice(data, 0)?; + let names = Names::from_face(&face)?; + font_info.push(( + names, + PathBuf::from(name), + FontDataHandle::Memory { + data: data.to_vec(), + index: 0, + name: name.to_string(), + }, + )); } Ok(()) @@ -389,69 +350,37 @@ pub(crate) fn parse_and_collect_font_info( font_info: &mut Vec<(Names, PathBuf, FontDataHandle)>, ) -> anyhow::Result<()> { let data = std::fs::read(path)?; - let scope = allsorts::binary::read::ReadScope::new(&data); - let file = scope.read::()?; + let size = fonts_in_collection(&data).unwrap_or(0); - match &file.font { - OpenTypeFont::Single(ttf) => { - let data = ttf - .read_table(&file.scope, allsorts::tag::NAME)? - .ok_or_else(|| anyhow!("name table is not present"))?; - collect_font_info(data.data(), path, 0, font_info)?; - } - OpenTypeFont::Collection(ttc) => { - for (index, offset_table_offset) in ttc.offset_tables.iter().enumerate() { - let ttf = file - .scope - .offset(offset_table_offset as usize) - .read::()?; - let data = ttf - .read_table(&file.scope, allsorts::tag::NAME)? - .ok_or_else(|| anyhow!("name table is not present"))?; - collect_font_info(data.data(), path, index, font_info).ok(); - } + fn load_one( + data: &[u8], + path: &Path, + index: u32, + font_info: &mut Vec<(Names, PathBuf, FontDataHandle)>, + ) -> anyhow::Result<()> { + let face = Face::from_slice(data, index)?; + let names = Names::from_face(&face)?; + font_info.push(( + names, + path.to_path_buf(), + FontDataHandle::OnDisk { + path: path.to_path_buf(), + index, + }, + )); + Ok(()) + } + + for index in 0..=size { + if let Err(err) = load_one(&data, path, index, font_info) { + log::trace!( + "error while parsing {} index {}: {}", + path.display(), + index, + err + ); } } Ok(()) } - -fn locate_offset_table<'a>(f: &OpenTypeFile<'a>, idx: usize) -> anyhow::Result> { - match &f.font { - OpenTypeFont::Single(ttf) if idx == 0 => Ok(ttf.clone()), - OpenTypeFont::Single(_) => Err(anyhow!("requested idx {} not present in single ttf", idx)), - OpenTypeFont::Collection(ttc) => { - // Ideally `read_item` would simply error when idx is out of range, - // but it generates a panic, so we need to check for ourselves. - if idx >= ttc.offset_tables.len() { - anyhow::bail!("requested idx {} out of range for ttc", idx); - } - let offset_table_offset = ttc - .offset_tables - .read_item(idx) - .map_err(|e| anyhow!("font idx={} is not present in ttc file: {}", idx, e))?; - let ttf = f - .scope - .offset(offset_table_offset as usize) - .read::()?; - Ok(ttf.clone()) - } - } -} - -/// Extract the name table data from a font -fn name_table_data<'a>(otf: &OffsetTable<'a>, scope: &ReadScope<'a>) -> anyhow::Result<&'a [u8]> { - let data = otf - .read_table(scope, allsorts::tag::NAME)? - .ok_or_else(|| anyhow!("name table is not present"))?; - Ok(data.data()) -} - -/// Extract a name from the name table -fn get_name(name_table_data: &[u8], name_id: u16) -> anyhow::Result { - let cstr = allsorts::get_name::fontcode_get_name(name_table_data, name_id) - .with_context(|| anyhow!("fontcode_get_name name_id:{}", name_id))? - .ok_or_else(|| anyhow!("name_id {} not found", name_id))?; - cstr.into_string() - .map_err(|e| anyhow!("name_id {} is not representable as String: {}", name_id, e)) -}