mirror of
https://github.com/google/fonts.git
synced 2024-12-01 03:06:03 +03:00
[tools/encoding] Script to generate filter lists from existing Namelists. #678
This commit is contained in:
parent
f62e3bc9ae
commit
5790bdf8ce
@ -26,8 +26,17 @@
|
||||
# # To reformat all Namelists in a directory.
|
||||
# $ find encodings/GF\ Glyph\ Sets/ -type f -name "*.nam" -exec \
|
||||
# bash -c './namelist.py reformat "{}" > "{}__tmp" && mv "{}__tmp" "{}"' \;
|
||||
#
|
||||
# # To generate "uni names" and "nice names" filter lists a Namelist
|
||||
# # This will create the needed directories if missing
|
||||
# $ ./namelist.py generate-filter-lists NameList.nam
|
||||
#
|
||||
# # To generate "uni names" and "nice names" filter lists for all Namelists
|
||||
# $ find encodings/GF\ Glyph\ Sets/ -type f -name "*.nam" -exec \
|
||||
# ./namelist.py generate-filter-lists "{}" \;
|
||||
from __future__ import print_function, unicode_literals
|
||||
import sys
|
||||
import os
|
||||
from fontTools.ttLib import TTFont
|
||||
from fontTools.unicode import Unicode
|
||||
import codecs
|
||||
@ -73,7 +82,7 @@ def reformat_namelist(filename, out=None):
|
||||
if filename == '-':
|
||||
_reformat_namelist(codecs.getreader('utf8')(sys.stdin), out)
|
||||
return
|
||||
with codecs.open(filename, 'r', 'utf-8') as f:
|
||||
with codecs.open(filename, 'r', encoding='utf-8') as f:
|
||||
_reformat_namelist(f, out)
|
||||
|
||||
def _reformat_namelist(f, out=None):
|
||||
@ -92,7 +101,7 @@ def _reformat_namelist(f, out=None):
|
||||
entry = (codepoint, None, line)
|
||||
elif line.startswith(' '):
|
||||
# unencoded name
|
||||
name = filter_lists.production_name_to_friendly_name(line.rsplit(' ', 1)[1])
|
||||
name = filter_lists.translate_name(line.rsplit(' ', 1)[1])
|
||||
entry = (None, name, line)
|
||||
|
||||
if entry is not None:
|
||||
@ -115,6 +124,47 @@ def _reformat_namelist(f, out=None):
|
||||
# output left over lines at the end of the file
|
||||
map(_print, before)
|
||||
|
||||
|
||||
def _names_generator(filename):
|
||||
with codecs.open(filename, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = line.rstrip()
|
||||
if line.startswith('0x'):
|
||||
# uni chr
|
||||
codepoint = google_fonts.get_codepoint_from_line(line)
|
||||
name = filter_lists.get_name_by_unicode(codepoint)
|
||||
if name is None:
|
||||
prefix = 'u' if codepoint > 0xFFFF else 'uni'
|
||||
name = '{0}{1:04X}'.format(prefix, codepoint)
|
||||
yield name
|
||||
elif line.startswith(' ' * 6):
|
||||
# unencoded name
|
||||
yield line.rsplit(' ', 1)[1]
|
||||
|
||||
def _mkdir(path):
|
||||
try:
|
||||
os.makedirs(path)
|
||||
except OSError as exc:
|
||||
if not os.path.isdir(path):
|
||||
raise exc
|
||||
|
||||
def generate_filter_lists(filename):
|
||||
# 'GF-{script}-rest.nam' => {script}-rest
|
||||
basename = os.path.basename(filename).split('.', 1)[0].split('-', 2)[-1]
|
||||
filerListFileName = '{0}.txt'.format(basename)
|
||||
dirname = os.path.dirname(filename)
|
||||
nice_names_filename = os.path.join(dirname, 'filter lists', 'nice names', filerListFileName)
|
||||
prod_names_filename = os.path.join(dirname, 'filter lists', 'uni names', filerListFileName)
|
||||
|
||||
_mkdir(os.path.dirname(nice_names_filename))
|
||||
_mkdir(os.path.dirname(prod_names_filename))
|
||||
|
||||
with codecs.open(nice_names_filename, 'w', encoding='utf-8') as niceNamesFile, \
|
||||
codecs.open(prod_names_filename, 'w', encoding='utf-8') as prodNamesFile:
|
||||
for name in _names_generator(filename):
|
||||
print(filter_lists.translate_name(name, production_name=False), file=niceNamesFile)
|
||||
print(filter_lists.translate_name(name, production_name=True), file=prodNamesFile)
|
||||
|
||||
def _format_codepoint(codepoint):
|
||||
if 0xE000 <= codepoint <= 0xF8FF:
|
||||
item_description = 'PRIVATE USE AREA U+{0:04X}'.format(codepoint)
|
||||
@ -154,6 +204,8 @@ def namelist_from_font(file_name, out=None):
|
||||
def main(*args):
|
||||
if args[0] == 'reformat':
|
||||
reformat_namelist(args[1])
|
||||
if args[0] == 'generate-filter-lists':
|
||||
generate_filter_lists(args[1])
|
||||
else:
|
||||
namelist_from_font(args[0])
|
||||
|
||||
|
@ -62,7 +62,7 @@ def get_namelist_for_filterlist(filterlistFilename):
|
||||
|
||||
def get_name_by_unicode(search_codepoint, production_name=False):
|
||||
"""
|
||||
If this returns None GlyphsData.xml doesn't contain search_codepoint.
|
||||
Returns None if GlyphsData.xml doesn't contain search_codepoint.
|
||||
"""
|
||||
entry = (None, None)
|
||||
glyph = GlyphData.by_unicode.get(search_codepoint, None)
|
||||
@ -70,8 +70,23 @@ def get_name_by_unicode(search_codepoint, production_name=False):
|
||||
entry = (glyph.name, glyph.production_name)
|
||||
return entry[1] if production_name else entry[0]
|
||||
|
||||
def get_name_by_name(search_name, production_name=False):
|
||||
"""
|
||||
Use this if you don't know what exact type your name is. E.g. when
|
||||
the names in your source are mixes friendly names and production names.
|
||||
|
||||
Returns None if GlyphsData.xml doesn't contain search_name.
|
||||
"""
|
||||
entry = (None, None)
|
||||
glyph = GlyphData.by_name.get(search_name, None) \
|
||||
or GlyphData.by_prodname.get(search_name, None)
|
||||
if glyph is not None:
|
||||
entry = (glyph.name, glyph.production_name)
|
||||
return entry[1] if production_name else entry[0]
|
||||
|
||||
def get_unicode_by_name(name):
|
||||
glyph = GlyphData.by_name.get(name, None)
|
||||
glyph = GlyphData.by_name.get(name, None) \
|
||||
or GlyphData.by_prodname.get(name, None)
|
||||
if glyph is not None and glyph.unicode is not None:
|
||||
return glyph.unicode
|
||||
match = PURE_UNI_CHR.match(name)
|
||||
@ -96,27 +111,34 @@ def read_filterlist(filterListFileName):
|
||||
noncodes.append(name)
|
||||
return codepoints, noncodes
|
||||
|
||||
def production_name_to_friendly_name(name):
|
||||
def translate_name(name, production_name=False):
|
||||
# The call to get_unicode_by_name at the beginning of this recursive
|
||||
# function is more expensive, but it may get fringe cases
|
||||
# where names with ".", "-" or "_" have a unicode.
|
||||
|
||||
new_name = get_name_by_name(name,production_name=production_name)
|
||||
if new_name is not None:
|
||||
return new_name
|
||||
|
||||
codepoint = get_unicode_by_name(name)
|
||||
if codepoint is not None:
|
||||
friendly_name = get_name_by_unicode(codepoint, production_name=False) \
|
||||
new_name = get_name_by_unicode(codepoint, production_name=production_name) \
|
||||
if codepoint is not None else None
|
||||
if friendly_name is not None:
|
||||
return friendly_name
|
||||
if new_name is not None:
|
||||
return new_name
|
||||
|
||||
if '_' in name:
|
||||
return '_'.join(production_name_to_friendly_name(component)
|
||||
return '_'.join(translate_name(component, production_name=production_name)
|
||||
for component in name.split('_'))
|
||||
if '.' in name:
|
||||
basename, extension = name.split('.', 1)
|
||||
return '.'.join([production_name_to_friendly_name(basename), extension])
|
||||
return '.'.join([translate_name(basename, production_name=production_name)
|
||||
, extension])
|
||||
# "brevecomb-cy" did not produce a friendly name
|
||||
if '-' in name:
|
||||
basename, extension = name.split('-', 1)
|
||||
return '-'.join([production_name_to_friendly_name(basename), extension])
|
||||
return '-'.join([translate_name(basename, production_name=production_name)
|
||||
, extension])
|
||||
return name
|
||||
|
||||
def check_filterlist_in_namelist(filterListFileName, namelistCache=None):
|
||||
@ -128,7 +150,7 @@ def check_filterlist_in_namelist(filterListFileName, namelistCache=None):
|
||||
useProductionNames = 'uni names' in filterListFileName or 'uni-names' in filterListFileName
|
||||
if useProductionNames:
|
||||
prod_noncodes = noncodes;
|
||||
noncodes = [production_name_to_friendly_name(name) for name in noncodes]
|
||||
noncodes = [translate_name(name) for name in noncodes]
|
||||
noncodes2prodcodes = dict(zip(noncodes, prod_noncodes))
|
||||
|
||||
namelist = fonts.readNamelist(namelistFilename, cache=namelistCache)
|
||||
@ -146,7 +168,7 @@ def check_filterlist_in_namelist(filterListFileName, namelistCache=None):
|
||||
'\n[{names}]'.format(names=names))
|
||||
|
||||
missingNoncodes = []
|
||||
namelistNoCharcode = set(production_name_to_friendly_name(name) for name in namelist['noCharcode'])
|
||||
namelistNoCharcode = set(translate_name(name) for name in namelist['noCharcode'])
|
||||
for noncode in noncodes:
|
||||
if noncode not in namelistNoCharcode:
|
||||
missingNoncodes.append(noncode)
|
||||
@ -260,7 +282,7 @@ def _build_friendly_names_production_names_equal(pathparts, prod_names_file, nic
|
||||
log_message('nice names filter list:', nice_names_file)
|
||||
|
||||
prod_names = get_filterlist_names(prod_names_file)
|
||||
nice_prod_names = [production_name_to_friendly_name(name)
|
||||
nice_prod_names = [translate_name(name)
|
||||
for name in prod_names]
|
||||
nice_names = get_filterlist_names(nice_names_file)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user