1
0
mirror of https://github.com/google/fonts.git synced 2025-01-05 17:05:26 +03:00

[tools/encoding] Script to generate filter lists from existing Namelists. #678

This commit is contained in:
Lasse Fister 2017-03-15 23:43:24 +01:00
parent f62e3bc9ae
commit 5790bdf8ce
2 changed files with 88 additions and 14 deletions

View File

@ -26,8 +26,17 @@
# # To reformat all Namelists in a directory.
# $ find encodings/GF\ Glyph\ Sets/ -type f -name "*.nam" -exec \
# bash -c './namelist.py reformat "{}" > "{}__tmp" && mv "{}__tmp" "{}"' \;
#
# # To generate "uni names" and "nice names" filter lists a Namelist
# # This will create the needed directories if missing
# $ ./namelist.py generate-filter-lists NameList.nam
#
# # To generate "uni names" and "nice names" filter lists for all Namelists
# $ find encodings/GF\ Glyph\ Sets/ -type f -name "*.nam" -exec \
# ./namelist.py generate-filter-lists "{}" \;
from __future__ import print_function, unicode_literals
import sys
import os
from fontTools.ttLib import TTFont
from fontTools.unicode import Unicode
import codecs
@ -73,7 +82,7 @@ def reformat_namelist(filename, out=None):
if filename == '-':
_reformat_namelist(codecs.getreader('utf8')(sys.stdin), out)
return
with codecs.open(filename, 'r', 'utf-8') as f:
with codecs.open(filename, 'r', encoding='utf-8') as f:
_reformat_namelist(f, out)
def _reformat_namelist(f, out=None):
@ -92,7 +101,7 @@ def _reformat_namelist(f, out=None):
entry = (codepoint, None, line)
elif line.startswith(' '):
# unencoded name
name = filter_lists.production_name_to_friendly_name(line.rsplit(' ', 1)[1])
name = filter_lists.translate_name(line.rsplit(' ', 1)[1])
entry = (None, name, line)
if entry is not None:
@ -115,6 +124,47 @@ def _reformat_namelist(f, out=None):
# output left over lines at the end of the file
map(_print, before)
def _names_generator(filename):
with codecs.open(filename, 'r', encoding='utf-8') as f:
for line in f:
line = line.rstrip()
if line.startswith('0x'):
# uni chr
codepoint = google_fonts.get_codepoint_from_line(line)
name = filter_lists.get_name_by_unicode(codepoint)
if name is None:
prefix = 'u' if codepoint > 0xFFFF else 'uni'
name = '{0}{1:04X}'.format(prefix, codepoint)
yield name
elif line.startswith(' ' * 6):
# unencoded name
yield line.rsplit(' ', 1)[1]
def _mkdir(path):
try:
os.makedirs(path)
except OSError as exc:
if not os.path.isdir(path):
raise exc
def generate_filter_lists(filename):
# 'GF-{script}-rest.nam' => {script}-rest
basename = os.path.basename(filename).split('.', 1)[0].split('-', 2)[-1]
filerListFileName = '{0}.txt'.format(basename)
dirname = os.path.dirname(filename)
nice_names_filename = os.path.join(dirname, 'filter lists', 'nice names', filerListFileName)
prod_names_filename = os.path.join(dirname, 'filter lists', 'uni names', filerListFileName)
_mkdir(os.path.dirname(nice_names_filename))
_mkdir(os.path.dirname(prod_names_filename))
with codecs.open(nice_names_filename, 'w', encoding='utf-8') as niceNamesFile, \
codecs.open(prod_names_filename, 'w', encoding='utf-8') as prodNamesFile:
for name in _names_generator(filename):
print(filter_lists.translate_name(name, production_name=False), file=niceNamesFile)
print(filter_lists.translate_name(name, production_name=True), file=prodNamesFile)
def _format_codepoint(codepoint):
if 0xE000 <= codepoint <= 0xF8FF:
item_description = 'PRIVATE USE AREA U+{0:04X}'.format(codepoint)
@ -154,6 +204,8 @@ def namelist_from_font(file_name, out=None):
def main(*args):
if args[0] == 'reformat':
reformat_namelist(args[1])
if args[0] == 'generate-filter-lists':
generate_filter_lists(args[1])
else:
namelist_from_font(args[0])

View File

@ -62,7 +62,7 @@ def get_namelist_for_filterlist(filterlistFilename):
def get_name_by_unicode(search_codepoint, production_name=False):
"""
If this returns None GlyphsData.xml doesn't contain search_codepoint.
Returns None if GlyphsData.xml doesn't contain search_codepoint.
"""
entry = (None, None)
glyph = GlyphData.by_unicode.get(search_codepoint, None)
@ -70,8 +70,23 @@ def get_name_by_unicode(search_codepoint, production_name=False):
entry = (glyph.name, glyph.production_name)
return entry[1] if production_name else entry[0]
def get_name_by_name(search_name, production_name=False):
"""
Use this if you don't know what exact type your name is. E.g. when
the names in your source are mixes friendly names and production names.
Returns None if GlyphsData.xml doesn't contain search_name.
"""
entry = (None, None)
glyph = GlyphData.by_name.get(search_name, None) \
or GlyphData.by_prodname.get(search_name, None)
if glyph is not None:
entry = (glyph.name, glyph.production_name)
return entry[1] if production_name else entry[0]
def get_unicode_by_name(name):
glyph = GlyphData.by_name.get(name, None)
glyph = GlyphData.by_name.get(name, None) \
or GlyphData.by_prodname.get(name, None)
if glyph is not None and glyph.unicode is not None:
return glyph.unicode
match = PURE_UNI_CHR.match(name)
@ -96,27 +111,34 @@ def read_filterlist(filterListFileName):
noncodes.append(name)
return codepoints, noncodes
def production_name_to_friendly_name(name):
def translate_name(name, production_name=False):
# The call to get_unicode_by_name at the beginning of this recursive
# function is more expensive, but it may get fringe cases
# where names with ".", "-" or "_" have a unicode.
new_name = get_name_by_name(name,production_name=production_name)
if new_name is not None:
return new_name
codepoint = get_unicode_by_name(name)
if codepoint is not None:
friendly_name = get_name_by_unicode(codepoint, production_name=False) \
new_name = get_name_by_unicode(codepoint, production_name=production_name) \
if codepoint is not None else None
if friendly_name is not None:
return friendly_name
if new_name is not None:
return new_name
if '_' in name:
return '_'.join(production_name_to_friendly_name(component)
return '_'.join(translate_name(component, production_name=production_name)
for component in name.split('_'))
if '.' in name:
basename, extension = name.split('.', 1)
return '.'.join([production_name_to_friendly_name(basename), extension])
return '.'.join([translate_name(basename, production_name=production_name)
, extension])
# "brevecomb-cy" did not produce a friendly name
if '-' in name:
basename, extension = name.split('-', 1)
return '-'.join([production_name_to_friendly_name(basename), extension])
return '-'.join([translate_name(basename, production_name=production_name)
, extension])
return name
def check_filterlist_in_namelist(filterListFileName, namelistCache=None):
@ -128,7 +150,7 @@ def check_filterlist_in_namelist(filterListFileName, namelistCache=None):
useProductionNames = 'uni names' in filterListFileName or 'uni-names' in filterListFileName
if useProductionNames:
prod_noncodes = noncodes;
noncodes = [production_name_to_friendly_name(name) for name in noncodes]
noncodes = [translate_name(name) for name in noncodes]
noncodes2prodcodes = dict(zip(noncodes, prod_noncodes))
namelist = fonts.readNamelist(namelistFilename, cache=namelistCache)
@ -146,7 +168,7 @@ def check_filterlist_in_namelist(filterListFileName, namelistCache=None):
'\n[{names}]'.format(names=names))
missingNoncodes = []
namelistNoCharcode = set(production_name_to_friendly_name(name) for name in namelist['noCharcode'])
namelistNoCharcode = set(translate_name(name) for name in namelist['noCharcode'])
for noncode in noncodes:
if noncode not in namelistNoCharcode:
missingNoncodes.append(noncode)
@ -260,7 +282,7 @@ def _build_friendly_names_production_names_equal(pathparts, prod_names_file, nic
log_message('nice names filter list:', nice_names_file)
prod_names = get_filterlist_names(prod_names_file)
nice_prod_names = [production_name_to_friendly_name(name)
nice_prod_names = [translate_name(name)
for name in prod_names]
nice_names = get_filterlist_names(nice_names_file)