[tools/encoding] Script to generate filter lists from existing Namelists. #678

2025-01-05 17:05:26 +03:00 · 2017-03-15 23:43:24 +01:00 · 2017-03-15 23:43:24 +01:00 · 5790bdf8ce
commit 5790bdf8ce
parent f62e3bc9ae
2 changed files with 88 additions and 14 deletions
--- a/tools/namelist.py
+++ b/tools/namelist.py
@ -26,8 +26,17 @@
 #   # To reformat all Namelists in a directory.
 #   $ find encodings/GF\ Glyph\ Sets/ -type f -name "*.nam" -exec \
 #       bash -c './namelist.py reformat "{}" > "{}__tmp" && mv "{}__tmp" "{}"' \;
+#
+#   # To generate "uni names" and "nice names" filter lists a Namelist
+#   # This will create the needed directories if missing
+#   $ ./namelist.py generate-filter-lists NameList.nam
+#
+#   # To generate "uni names" and "nice names" filter lists for all Namelists
+#   $ find encodings/GF\ Glyph\ Sets/ -type f -name "*.nam" -exec \
+#       ./namelist.py generate-filter-lists "{}" \;
 from __future__ import print_function, unicode_literals
 import sys
+import os
 from fontTools.ttLib import TTFont
 from fontTools.unicode import Unicode
 import codecs
@ -73,7 +82,7 @@ def reformat_namelist(filename, out=None):
    if filename == '-':
        _reformat_namelist(codecs.getreader('utf8')(sys.stdin), out)
        return
-    with codecs.open(filename, 'r', 'utf-8') as f:
+    with codecs.open(filename, 'r', encoding='utf-8') as f:
        _reformat_namelist(f, out)

 def _reformat_namelist(f, out=None):
@ -92,7 +101,7 @@ def _reformat_namelist(f, out=None):
            entry = (codepoint, None, line)
        elif line.startswith('      '):
            # unencoded name
-            name = filter_lists.production_name_to_friendly_name(line.rsplit(' ', 1)[1])
+            name = filter_lists.translate_name(line.rsplit(' ', 1)[1])
            entry = (None, name, line)

        if entry is not None:
@ -115,6 +124,47 @@ def _reformat_namelist(f, out=None):
    # output left over lines at the end of the file
    map(_print, before)

+
+def _names_generator(filename):
+    with codecs.open(filename, 'r', encoding='utf-8') as f:
+        for line in f:
+            line = line.rstrip()
+            if line.startswith('0x'):
+                # uni chr
+                codepoint = google_fonts.get_codepoint_from_line(line)
+                name = filter_lists.get_name_by_unicode(codepoint)
+                if name is None:
+                    prefix = 'u' if codepoint > 0xFFFF else 'uni'
+                    name = '{0}{1:04X}'.format(prefix, codepoint)
+                yield name
+            elif line.startswith(' ' * 6):
+                # unencoded name
+                yield line.rsplit(' ', 1)[1]
+
+def _mkdir(path):
+    try:
+        os.makedirs(path)
+    except OSError as exc:
+        if not os.path.isdir(path):
+          raise exc
+
+def generate_filter_lists(filename):
+    # 'GF-{script}-rest.nam' => {script}-rest
+    basename = os.path.basename(filename).split('.', 1)[0].split('-', 2)[-1]
+    filerListFileName = '{0}.txt'.format(basename)
+    dirname =  os.path.dirname(filename)
+    nice_names_filename = os.path.join(dirname, 'filter lists', 'nice names', filerListFileName)
+    prod_names_filename = os.path.join(dirname, 'filter lists', 'uni names', filerListFileName)
+
+    _mkdir(os.path.dirname(nice_names_filename))
+    _mkdir(os.path.dirname(prod_names_filename))
+
+    with codecs.open(nice_names_filename, 'w', encoding='utf-8') as niceNamesFile, \
+            codecs.open(prod_names_filename, 'w', encoding='utf-8') as prodNamesFile:
+        for name in _names_generator(filename):
+            print(filter_lists.translate_name(name, production_name=False), file=niceNamesFile)
+            print(filter_lists.translate_name(name, production_name=True), file=prodNamesFile)
+
 def _format_codepoint(codepoint):
    if 0xE000 <= codepoint <= 0xF8FF:
        item_description = 'PRIVATE USE AREA U+{0:04X}'.format(codepoint)
@ -154,6 +204,8 @@ def namelist_from_font(file_name, out=None):
 def main(*args):
    if args[0] == 'reformat':
        reformat_namelist(args[1])
+    if args[0] == 'generate-filter-lists':
+        generate_filter_lists(args[1])
    else:
        namelist_from_font(args[0])

--- a/tools/util/filter_lists.py
+++ b/tools/util/filter_lists.py
@ -62,7 +62,7 @@ def get_namelist_for_filterlist(filterlistFilename):

 def get_name_by_unicode(search_codepoint, production_name=False):
    """
-    If this returns None GlyphsData.xml doesn't contain search_codepoint.
+    Returns None if GlyphsData.xml doesn't contain search_codepoint.
    """
    entry = (None, None)
    glyph = GlyphData.by_unicode.get(search_codepoint, None)
@ -70,8 +70,23 @@ def get_name_by_unicode(search_codepoint, production_name=False):
        entry = (glyph.name, glyph.production_name)
    return entry[1] if production_name else entry[0]

+def get_name_by_name(search_name, production_name=False):
+    """
+    Use this if you don't know what exact type your name is. E.g. when
+    the names in your source are mixes friendly names and production names.
+
+    Returns None if GlyphsData.xml doesn't contain search_name.
+    """
+    entry = (None, None)
+    glyph = GlyphData.by_name.get(search_name, None) \
+                            or GlyphData.by_prodname.get(search_name, None)
+    if glyph is not None:
+        entry = (glyph.name, glyph.production_name)
+    return entry[1] if production_name else entry[0]
+
 def get_unicode_by_name(name):
-    glyph = GlyphData.by_name.get(name, None)
+    glyph = GlyphData.by_name.get(name, None) \
+                                or GlyphData.by_prodname.get(name, None)
    if glyph is not None and glyph.unicode is not None:
        return glyph.unicode
    match = PURE_UNI_CHR.match(name)
@ -96,27 +111,34 @@ def read_filterlist(filterListFileName):
        noncodes.append(name)
    return codepoints, noncodes

-def production_name_to_friendly_name(name):
+def translate_name(name, production_name=False):
    # The call to get_unicode_by_name at the beginning of this recursive
    # function is more expensive, but it may get fringe cases
    # where names with ".", "-" or "_" have a unicode.
+
+    new_name = get_name_by_name(name,production_name=production_name)
+    if new_name is not None:
+        return new_name
+
    codepoint = get_unicode_by_name(name)
    if codepoint is not None:
-        friendly_name = get_name_by_unicode(codepoint, production_name=False) \
+        new_name = get_name_by_unicode(codepoint, production_name=production_name) \
                                        if codepoint is not None else None
-        if friendly_name is not None:
-            return friendly_name
+        if new_name is not None:
+            return new_name

    if '_' in name:
-        return '_'.join(production_name_to_friendly_name(component)
+        return '_'.join(translate_name(component, production_name=production_name)
                                    for component in name.split('_'))
    if '.' in name:
        basename, extension = name.split('.', 1)
-        return '.'.join([production_name_to_friendly_name(basename), extension])
+        return '.'.join([translate_name(basename, production_name=production_name)
+                                                            , extension])
    # "brevecomb-cy" did not produce a friendly name
    if '-' in name:
        basename, extension = name.split('-', 1)
-        return '-'.join([production_name_to_friendly_name(basename), extension])
+        return '-'.join([translate_name(basename, production_name=production_name)
+                                                            , extension])
    return name

 def check_filterlist_in_namelist(filterListFileName, namelistCache=None):
@ -128,7 +150,7 @@ def check_filterlist_in_namelist(filterListFileName, namelistCache=None):
    useProductionNames = 'uni names' in filterListFileName or 'uni-names' in filterListFileName
    if useProductionNames:
        prod_noncodes = noncodes;
-        noncodes = [production_name_to_friendly_name(name) for name in noncodes]
+        noncodes = [translate_name(name) for name in noncodes]
        noncodes2prodcodes = dict(zip(noncodes, prod_noncodes))

    namelist = fonts.readNamelist(namelistFilename, cache=namelistCache)
@ -146,7 +168,7 @@ def check_filterlist_in_namelist(filterListFileName, namelistCache=None):
                       '\n[{names}]'.format(names=names))

    missingNoncodes = []
-    namelistNoCharcode = set(production_name_to_friendly_name(name) for name in namelist['noCharcode'])
+    namelistNoCharcode = set(translate_name(name) for name in namelist['noCharcode'])
    for noncode in noncodes:
        if noncode not in namelistNoCharcode:
            missingNoncodes.append(noncode)
@ -260,7 +282,7 @@ def _build_friendly_names_production_names_equal(pathparts, prod_names_file, nic
        log_message('nice names filter list:', nice_names_file)

        prod_names = get_filterlist_names(prod_names_file)
-        nice_prod_names = [production_name_to_friendly_name(name)
+        nice_prod_names = [translate_name(name)
                                                    for name in prod_names]
        nice_names = get_filterlist_names(nice_names_file)