From eff3a13847f63f93fc958eeef775cef6971ed3eb Mon Sep 17 00:00:00 2001 From: Wez Furlong Date: Sat, 19 Dec 2020 21:55:24 -0800 Subject: [PATCH] font: switch parser to ttf_parser My original goal was to update to allsorts 0.5 but the API changes are significant and not clearly described. To make that transition easier, the prior commit moved the shaping logic into our allsorts shaper module, leaving the name parsing here in parser.rs. This commit now replaces that logic with ttf_parser, which is potentially faster (there's more emphasis on optimal code in that crate than in allsorts) but definitely simpler. It's not a slam-dunk transition: ttf_parser doesn't know how to decode MacRoman encoded text, so there's a bit of logic borrowed from allsorts here to handle that. --- Cargo.lock | 6 +- wezterm-font/Cargo.toml | 2 +- wezterm-font/src/parser.rs | 583 ++++++++++++++++--------------------- 3 files changed, 260 insertions(+), 331 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ce85ffd1a..3b57c49a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3947,9 +3947,9 @@ checksum = "3e5d7cd7ab3e47dda6e56542f4bbf3824c15234958c6e1bd6aaa347e93499fdc" [[package]] name = "ttf-parser" -version = "0.8.3" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7622061403fd00f0820df288e5a580e87d3ce15a1c4313c59fd1ffb77129903f" +checksum = "62ddb402ac6c2af6f7a2844243887631c4e94b51585b229fcfddb43958cd55ca" [[package]] name = "typenum" @@ -4459,7 +4459,7 @@ dependencies = [ "termwiz", "thiserror", "tinyvec", - "ttf-parser 0.8.3", + "ttf-parser 0.9.0", "unicode-general-category 0.3.0", "unicode-segmentation", "walkdir", diff --git a/wezterm-font/Cargo.toml b/wezterm-font/Cargo.toml index f33fa4936..21a127b0f 100644 --- a/wezterm-font/Cargo.toml +++ b/wezterm-font/Cargo.toml @@ -22,7 +22,7 @@ rangeset = { path = "../rangeset" } termwiz = { path = "../termwiz" } thiserror = "1.0" tinyvec = "1.1" # Note: constrained by the allsorts crate -ttf-parser = "0.8" +ttf-parser = "0.9" unicode-segmentation = "1.7" unicode-general-category = "0.3" walkdir = "2" diff --git a/wezterm-font/src/parser.rs b/wezterm-font/src/parser.rs index b521613bc..ced553f86 100644 --- a/wezterm-font/src/parser.rs +++ b/wezterm-font/src/parser.rs @@ -1,23 +1,9 @@ -//! This module uses the allsorts crate to parse font data. -//! At this time it is used only to extract name information, -//! but in the future I'd like to use its shaping functionality -#![allow(dead_code)] use crate::locator::FontDataHandle; use crate::shaper::GlyphInfo; -use allsorts::binary::read::{ReadScope, ReadScopeOwned}; -use allsorts::font_data_impl::read_cmap_subtable; -use allsorts::layout::{new_layout_cache, GDEFTable, LayoutCache, LayoutTable, GPOS, GSUB}; -use allsorts::post::PostTable; -use allsorts::tables::cmap::{Cmap, CmapSubtable}; -use allsorts::tables::{ - HeadTable, HheaTable, HmtxTable, MaxpTable, OffsetTable, OpenTypeFile, OpenTypeFont, -}; -use allsorts::tag; -use anyhow::{anyhow, Context}; +use anyhow::anyhow; use config::FontAttributes; -use std::collections::HashSet; -use std::convert::TryInto; use std::path::{Path, PathBuf}; +use ttf_parser::{fonts_in_collection, Face, Name, PlatformId}; #[derive(Debug)] pub enum MaybeShaped { @@ -28,19 +14,6 @@ pub enum MaybeShaped { /// Represents a parsed font pub struct ParsedFont { names: Names, - - cmap_subtable: CmapSubtable<'static>, - gpos_cache: Option>, - gsub_cache: Option>, - gdef_table: Option, - hmtx: HmtxTable<'static>, - post: PostTable<'static>, - hhea: HheaTable, - num_glyphs: u16, - units_per_em: u16, - - // Must be last: this keeps the 'static items alive - _scope: ReadScopeOwned, } #[derive(Debug)] @@ -52,168 +25,221 @@ pub struct Names { pub postscript_name: Option, } +/// Computes a score for a given name record; font files can contain +/// multiple variants of the same logical name encoded differently +/// for various operating systems and languages. +/// This function assigns a weight to each of the combinations; +/// we generally prefer the English rendition of the name in unicode. +/// +/// Borrowed from a similar bit of code in the allsorts crate. +fn score(name: &Name) -> Option { + match (name.platform_id(), name.encoding_id(), name.language_id()) { + (PlatformId::Windows, 10, _) => Some(1000), + (PlatformId::Unicode, 6, 0) => Some(900), + (PlatformId::Unicode, 4, 0) => Some(800), + (PlatformId::Windows, 1, 0x409) => Some(750), + (PlatformId::Windows, 1, lang) if lang != 0x409 => Some(700), + (PlatformId::Unicode, 3, 0) => Some(600), + (PlatformId::Unicode, 2, 0) => Some(500), + (PlatformId::Unicode, 1, 0) => Some(400), + (PlatformId::Unicode, 0, 0) => Some(300), + (PlatformId::Windows, 0, _) => Some(200), + (PlatformId::Macintosh, 0, 0) => Some(150), + (PlatformId::Macintosh, 0, lang) if lang != 0 => Some(100), + _ => None, + } +} + +/// Maybe convert a MacRoman byte to a unicode char. +/// Borrowed from the allsorts crate. +fn macroman_to_char(b: u8) -> Option { + match b { + 0..=127 => Some(b as char), + 128 => Some('Ä'), // A dieresis + 129 => Some('Å'), // A ring + 130 => Some('Ç'), // C cedilla + 131 => Some('É'), // E acute + 132 => Some('Ñ'), // N tilde + 133 => Some('Ö'), // O dieresis + 134 => Some('Ü'), // U dieresis + 135 => Some('á'), // a acute + 136 => Some('à'), // a grave + 137 => Some('â'), // a circumflex + 138 => Some('ä'), // a dieresis + 139 => Some('ã'), // a tilde + 140 => Some('å'), // a ring + 141 => Some('ç'), // c cedilla + 142 => Some('é'), // e acute + 143 => Some('è'), // e grave + 144 => Some('ê'), // e circumflex + 145 => Some('ë'), // e dieresis + 146 => Some('í'), // i acute + 147 => Some('ì'), // i grave + 148 => Some('î'), // i circumflex + 149 => Some('ï'), // i dieresis + 150 => Some('ñ'), // n tilde + 151 => Some('ó'), // o acute + 152 => Some('ò'), // o grave + 153 => Some('ô'), // o circumflex + 154 => Some('ö'), // o dieresis + 155 => Some('õ'), // o tilde + 156 => Some('ú'), // u acute + 157 => Some('ù'), // u grave + 158 => Some('û'), // u circumflex + 159 => Some('ü'), // u dieresis + 160 => Some('†'), // dagger + 161 => Some('°'), // degree + 162 => Some('¢'), // cent + 163 => Some('£'), // sterling + 164 => Some('§'), // section + 165 => Some('•'), // bullet + 166 => Some('¶'), // paragraph + 167 => Some('ß'), // German double s + 168 => Some('®'), // registered + 169 => Some('©'), // copyright + 170 => Some('™'), // trademark + 171 => Some('´'), // acute + 172 => Some('¨'), // diaeresis + 174 => Some('Æ'), // AE + 175 => Some('Ø'), // O slash + 177 => Some('±'), // plusminus + 180 => Some('¥'), // yen + 181 => Some('µ'), // micro + 187 => Some('ª'), // ordfeminine + 188 => Some('º'), // ordmasculine + 190 => Some('æ'), // ae + 191 => Some('ø'), // o slash + 192 => Some('¿'), // question down + 193 => Some('¡'), // exclamation down + 194 => Some('¬'), // not + 196 => Some('ƒ'), // florin + 199 => Some('«'), // left guille + 200 => Some('»'), // right guille + 201 => Some('…'), // ellipsis + 202 => Some(' '), // non-breaking space + 203 => Some('À'), // A grave + 204 => Some('Ã'), // A tilde + 205 => Some('Õ'), // O tilde + 206 => Some('Œ'), // OE + 207 => Some('œ'), // oe + 208 => Some('–'), // endash + 209 => Some('—'), // emdash + 210 => Some('“'), // ldquo + 211 => Some('”'), // rdquo + 212 => Some('‘'), // lsquo + 213 => Some('’'), // rsquo + 214 => Some('÷'), // divide + 216 => Some('ÿ'), // y dieresis + 217 => Some('Ÿ'), // Y dieresis + 218 => Some('⁄'), // fraction + 219 => Some('¤'), // currency + 220 => Some('‹'), // left single guille + 221 => Some('›'), // right single guille + 222 => Some('fi'), // fi + 223 => Some('fl'), // fl + 224 => Some('‡'), // double dagger + 225 => Some('·'), // middle dot + 226 => Some('‚'), // single quote base + 227 => Some('„'), // double quote base + 228 => Some('‰'), // perthousand + 229 => Some('Â'), // A circumflex + 230 => Some('Ê'), // E circumflex + 231 => Some('Á'), // A acute + 232 => Some('Ë'), // E dieresis + 233 => Some('È'), // E grave + 234 => Some('Í'), // I acute + 235 => Some('Î'), // I circumflex + 236 => Some('Ï'), // I dieresis + 237 => Some('Ì'), // I grave + 238 => Some('Ó'), // O acute + 239 => Some('Ô'), // O circumflex + 241 => Some('Ò'), // O grave + 242 => Some('Ú'), // U acute + 243 => Some('Û'), // U circumflex + 244 => Some('Ù'), // U grave + 245 => Some('ı'), // dot-less i + 246 => Some('^'), // circumflex + 247 => Some('˜'), // tilde + 248 => Some('¯'), // macron + 249 => Some('˘'), // breve + 250 => Some('˙'), // dot accent + 251 => Some('˚'), // ring + 252 => Some('¸'), // cedilla + 253 => Some('˝'), // Hungarian umlaut (double acute accent) + 254 => Some('˛'), // ogonek + 255 => Some('ˇ'), // caron + _ => None, + } +} + +/// Return a unicode version of the name +fn decode_name(name: &Name) -> Option { + if name.platform_id() == PlatformId::Macintosh { + Some( + name.name() + .iter() + .filter_map(|&b| macroman_to_char(b)) + .collect::(), + ) + } else { + name.to_string() + } +} + +/// Resolve a given name id from the face +fn get_name(face: &Face, id: u16) -> Option { + let mut best = 0; + let mut result = None; + + for name in face.names() { + if name.name_id() != id { + continue; + } + + if let Some(v) = score(&name) { + if v > best { + if let Some(value) = decode_name(&name) { + result.replace(value); + best = v; + } + } + } + } + + result +} + impl Names { - fn from_name_table_data(name_table: &[u8]) -> anyhow::Result { + fn from_face(face: &Face) -> anyhow::Result { Ok(Names { - full_name: get_name(name_table, 4).context("full_name")?, - unique: get_name(name_table, 3).ok(), - family: get_name(name_table, 1).ok(), - sub_family: get_name(name_table, 2).ok(), - postscript_name: get_name(name_table, 6).ok(), + full_name: get_name(face, ttf_parser::name_id::FULL_NAME) + .ok_or_else(|| anyhow!("missing full name"))?, + unique: get_name(face, ttf_parser::name_id::UNIQUE_ID), + family: get_name(face, ttf_parser::name_id::FAMILY), + sub_family: get_name(face, ttf_parser::name_id::SUBFAMILY), + postscript_name: get_name(face, ttf_parser::name_id::POST_SCRIPT_NAME), }) } } impl ParsedFont { - fn match_font_info( - fonts_selection: &[FontAttributes], - mut font_info: Vec<(Names, std::path::PathBuf, FontDataHandle)>, - loaded: &mut HashSet, - ) -> anyhow::Result> { - font_info.sort_by_key(|(names, _, _)| names.full_name.clone()); - for (names, _, _) in &font_info { - log::warn!("available font: {}", names.full_name); - } - - // Second, apply matching rules in order. We can't match - // against the font files as we discover them because the - // filesystem iteration order is arbitrary whereas our - // fonts_selection is strictly ordered - let mut handles = vec![]; - for attr in fonts_selection { - for (names, path, handle) in &font_info { - if font_info_matches(attr, &names) { - log::warn!( - "Using {} from {} for {:?}", - names.full_name, - path.display(), - attr - ); - handles.push(handle.clone()); - loaded.insert(attr.clone()); - break; - } - } - } - Ok(handles) - } - pub fn from_locator(handle: &FontDataHandle) -> anyhow::Result { - let (data, index) = match handle { - FontDataHandle::Memory { data, index, .. } => (data.to_vec(), *index), + match handle { FontDataHandle::OnDisk { path, index } => { let data = std::fs::read(path)?; - (data, *index) + let face = Face::from_slice(&data, *index)?; + Ok(Self { + names: Names::from_face(&face)?, + }) } - }; - let index = index as usize; - - let owned_scope = ReadScopeOwned::new(ReadScope::new(&data)); - - // This unsafe block and transmute are present so that we can - // extend the lifetime of the OpenTypeFile that we produce here. - // That in turn allows us to store all of these derived items - // into a struct and manage their lifetimes together. - let file: OpenTypeFile<'static> = unsafe { - std::mem::transmute( - owned_scope - .scope() - .read::() - .context("read OpenTypeFile")?, - ) - }; - - let otf = locate_offset_table(&file, index).context("locate_offset_table")?; - let name_table = name_table_data(&otf, &file.scope).context("name_table_data")?; - let names = - Names::from_name_table_data(name_table).context("Names::from_name_table_data")?; - - let head = otf - .read_table(&file.scope, tag::HEAD)? - .ok_or_else(|| anyhow!("HEAD table missing or broken"))? - .read::() - .context("read HeadTable")?; - let cmap = otf - .read_table(&file.scope, tag::CMAP)? - .ok_or_else(|| anyhow!("CMAP table missing or broken"))? - .read::() - .context("read Cmap")?; - let cmap_subtable: CmapSubtable<'static> = read_cmap_subtable(&cmap)? - .ok_or_else(|| anyhow!("CMAP subtable not found"))? - .1; - - let maxp = otf - .read_table(&file.scope, tag::MAXP)? - .ok_or_else(|| anyhow!("MAXP table not found"))? - .read::() - .context("read MaxpTable")?; - let num_glyphs = maxp.num_glyphs; - - let post = otf - .read_table(&file.scope, tag::POST)? - .ok_or_else(|| anyhow!("POST table not found"))? - .read::() - .context("read PostTable")?; - - let hhea = otf - .read_table(&file.scope, tag::HHEA)? - .ok_or_else(|| anyhow!("HHEA table not found"))? - .read::() - .context("read HheaTable")?; - let hmtx = otf - .read_table(&file.scope, tag::HMTX)? - .ok_or_else(|| anyhow!("HMTX table not found"))? - .read_dep::(( - usize::from(maxp.num_glyphs), - usize::from(hhea.num_h_metrics), - )) - .context("read_dep HmtxTable")?; - - let gdef_table: Option = otf - .find_table_record(tag::GDEF) - .map(|gdef_record| -> anyhow::Result { - Ok(gdef_record - .read_table(&file.scope)? - .read::() - .context("read GDEFTable")?) - }) - .transpose()?; - let opt_gpos_table = otf - .find_table_record(tag::GPOS) - .map(|gpos_record| -> anyhow::Result> { - Ok(gpos_record - .read_table(&file.scope)? - .read::>() - .context("read LayoutTable")?) - }) - .transpose()?; - let gpos_cache = opt_gpos_table.map(new_layout_cache); - - let gsub_cache = otf - .find_table_record(tag::GSUB) - .map(|gsub| -> anyhow::Result> { - Ok(gsub - .read_table(&file.scope)? - .read::>() - .context("read LayoutTable")?) - }) - .transpose()? - .map(new_layout_cache); - - Ok(Self { - names, - cmap_subtable, - post, - hmtx, - hhea, - gpos_cache, - gsub_cache, - gdef_table, - num_glyphs, - units_per_em: head.units_per_em, - _scope: owned_scope, - }) + FontDataHandle::Memory { data, index, .. } => { + let face = Face::from_slice(data, *index)?; + Ok(Self { + names: Names::from_face(&face)?, + }) + } + } } pub fn names(&self) -> &Names { @@ -221,24 +247,6 @@ impl ParsedFont { } } -fn collect_font_info( - name_table_data: &[u8], - path: &Path, - index: usize, - infos: &mut Vec<(Names, PathBuf, FontDataHandle)>, -) -> anyhow::Result<()> { - let names = Names::from_name_table_data(name_table_data)?; - infos.push(( - names, - path.to_path_buf(), - FontDataHandle::OnDisk { - path: path.to_path_buf(), - index: index.try_into()?, - }, - )); - Ok(()) -} - pub fn font_info_matches(attr: &FontAttributes, names: &Names) -> bool { if let Some(fam) = names.family.as_ref() { // TODO: correctly match using family and sub-family; @@ -271,36 +279,21 @@ pub fn resolve_font_from_ttc_data( attr: &FontAttributes, data: &[u8], ) -> anyhow::Result> { - let scope = allsorts::binary::read::ReadScope::new(&data); - let file = scope.read::()?; - - match &file.font { - OpenTypeFont::Single(ttf) => { - let name_table_data = ttf - .read_table(&file.scope, allsorts::tag::NAME)? - .ok_or_else(|| anyhow!("name table is not present"))?; - - let names = Names::from_name_table_data(name_table_data.data())?; + if let Some(size) = fonts_in_collection(data) { + for index in 0..size { + let face = Face::from_slice(data, index)?; + let names = Names::from_face(&face)?; if font_info_matches(attr, &names) { - Ok(Some(0)) - } else { - Ok(None) + return Ok(Some(index as usize)); } } - OpenTypeFont::Collection(ttc) => { - for (index, offset_table_offset) in ttc.offset_tables.iter().enumerate() { - let ttf = file - .scope - .offset(offset_table_offset as usize) - .read::()?; - let name_table_data = ttf - .read_table(&file.scope, allsorts::tag::NAME)? - .ok_or_else(|| anyhow!("name table is not present"))?; - let names = Names::from_name_table_data(name_table_data.data())?; - if font_info_matches(attr, &names) { - return Ok(Some(index)); - } - } + Ok(None) + } else { + let face = Face::from_slice(data, 0)?; + let names = Names::from_face(&face)?; + if font_info_matches(attr, &names) { + Ok(Some(0)) + } else { Ok(None) } } @@ -336,49 +329,17 @@ pub(crate) fn load_built_in_fonts( font!("../../assets/fonts/NotoColorEmoji.ttf"), font!("../../assets/fonts/LastResortHE-Regular.ttf"), ] { - let scope = allsorts::binary::read::ReadScope::new(&data); - let file = scope.read::()?; - let path = Path::new("memory"); - - match &file.font { - OpenTypeFont::Single(ttf) => { - let name_table_data = ttf - .read_table(&file.scope, allsorts::tag::NAME)? - .ok_or_else(|| anyhow!("name table is not present"))?; - - let names = Names::from_name_table_data(name_table_data.data())?; - font_info.push(( - names, - path.to_path_buf(), - FontDataHandle::Memory { - data: data.to_vec(), - index: 0, - name: name.to_string(), - }, - )); - } - OpenTypeFont::Collection(ttc) => { - for (index, offset_table_offset) in ttc.offset_tables.iter().enumerate() { - let ttf = file - .scope - .offset(offset_table_offset as usize) - .read::()?; - let name_table_data = ttf - .read_table(&file.scope, allsorts::tag::NAME)? - .ok_or_else(|| anyhow!("name table is not present"))?; - let names = Names::from_name_table_data(name_table_data.data())?; - font_info.push(( - names, - path.to_path_buf(), - FontDataHandle::Memory { - data: data.to_vec(), - index: index.try_into()?, - name: name.to_string(), - }, - )); - } - } - } + let face = Face::from_slice(data, 0)?; + let names = Names::from_face(&face)?; + font_info.push(( + names, + PathBuf::from(name), + FontDataHandle::Memory { + data: data.to_vec(), + index: 0, + name: name.to_string(), + }, + )); } Ok(()) @@ -389,69 +350,37 @@ pub(crate) fn parse_and_collect_font_info( font_info: &mut Vec<(Names, PathBuf, FontDataHandle)>, ) -> anyhow::Result<()> { let data = std::fs::read(path)?; - let scope = allsorts::binary::read::ReadScope::new(&data); - let file = scope.read::()?; + let size = fonts_in_collection(&data).unwrap_or(0); - match &file.font { - OpenTypeFont::Single(ttf) => { - let data = ttf - .read_table(&file.scope, allsorts::tag::NAME)? - .ok_or_else(|| anyhow!("name table is not present"))?; - collect_font_info(data.data(), path, 0, font_info)?; - } - OpenTypeFont::Collection(ttc) => { - for (index, offset_table_offset) in ttc.offset_tables.iter().enumerate() { - let ttf = file - .scope - .offset(offset_table_offset as usize) - .read::()?; - let data = ttf - .read_table(&file.scope, allsorts::tag::NAME)? - .ok_or_else(|| anyhow!("name table is not present"))?; - collect_font_info(data.data(), path, index, font_info).ok(); - } + fn load_one( + data: &[u8], + path: &Path, + index: u32, + font_info: &mut Vec<(Names, PathBuf, FontDataHandle)>, + ) -> anyhow::Result<()> { + let face = Face::from_slice(data, index)?; + let names = Names::from_face(&face)?; + font_info.push(( + names, + path.to_path_buf(), + FontDataHandle::OnDisk { + path: path.to_path_buf(), + index, + }, + )); + Ok(()) + } + + for index in 0..=size { + if let Err(err) = load_one(&data, path, index, font_info) { + log::trace!( + "error while parsing {} index {}: {}", + path.display(), + index, + err + ); } } Ok(()) } - -fn locate_offset_table<'a>(f: &OpenTypeFile<'a>, idx: usize) -> anyhow::Result> { - match &f.font { - OpenTypeFont::Single(ttf) if idx == 0 => Ok(ttf.clone()), - OpenTypeFont::Single(_) => Err(anyhow!("requested idx {} not present in single ttf", idx)), - OpenTypeFont::Collection(ttc) => { - // Ideally `read_item` would simply error when idx is out of range, - // but it generates a panic, so we need to check for ourselves. - if idx >= ttc.offset_tables.len() { - anyhow::bail!("requested idx {} out of range for ttc", idx); - } - let offset_table_offset = ttc - .offset_tables - .read_item(idx) - .map_err(|e| anyhow!("font idx={} is not present in ttc file: {}", idx, e))?; - let ttf = f - .scope - .offset(offset_table_offset as usize) - .read::()?; - Ok(ttf.clone()) - } - } -} - -/// Extract the name table data from a font -fn name_table_data<'a>(otf: &OffsetTable<'a>, scope: &ReadScope<'a>) -> anyhow::Result<&'a [u8]> { - let data = otf - .read_table(scope, allsorts::tag::NAME)? - .ok_or_else(|| anyhow!("name table is not present"))?; - Ok(data.data()) -} - -/// Extract a name from the name table -fn get_name(name_table_data: &[u8], name_id: u16) -> anyhow::Result { - let cstr = allsorts::get_name::fontcode_get_name(name_table_data, name_id) - .with_context(|| anyhow!("fontcode_get_name name_id:{}", name_id))? - .ok_or_else(|| anyhow!("name_id {} not found", name_id))?; - cstr.into_string() - .map_err(|e| anyhow!("name_id {} is not representable as String: {}", name_id, e)) -}