From 92ce1ab8aa907f522bd4f1ac8b9b2a78bd128d18 Mon Sep 17 00:00:00 2001
From: Lasse Fister <commander@graphicore.de>
Date: Tue, 7 Mar 2017 01:32:11 +0100
Subject: [PATCH] [tools/encodings] use unittest for filter lists tests. #678

---
 tools/util/filter_lists.py | 178 ++++++++++++++++++++++---------------
 1 file changed, 104 insertions(+), 74 deletions(-)

diff --git a/tools/util/filter_lists.py b/tools/util/filter_lists.py
index 561628ba2..4efe45123 100755
--- a/tools/util/filter_lists.py
+++ b/tools/util/filter_lists.py
@@ -4,18 +4,20 @@
 # for the glyphsLib dependency do: pip install GlyphsLib
 
 from __future__ import print_function, unicode_literals
-from glyphsLib import glyphdata, glyphdata_generated
 from fontTools.misc.py23 import unichr, byteord
 import sys, os, subprocess
 import re
 import codecs
+import logging as log
+import unittest
+from collections import Counter
 
 if __name__ == '__main__':
-  # some of the imports here wouldn't work otherwise
+  # the following imports wouldn't work otherwise
   sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 import google_fonts as fonts
-
+from glyphdata import DATA as GlyphData
 
 
 PURE_UNI_CHR = re.compile('^uni([0-9A-F]{4,5})$', re.IGNORECASE)
@@ -49,7 +51,7 @@ def get_namelist_for_filterlist(filterlistFilename):
     while(len(markerDir) and markerDir != FILTER_LISTS_DIR_NAME):
         namelistDir, markerDir = os.path.split(namelistDir)
     if markerDir != FILTER_LISTS_DIR_NAME:
-        raise Exception('charset directory not fond in "{path}".'.format(path=dirname))
+        raise Exception('charset directory not found in "{path}".'.format(path=dirname))
 
     # get all the Namelist files from
     for root, dirs, files in os.walk(namelistDir):
@@ -73,26 +75,19 @@ def get_namelist_for_filterlist(filterlistFilename):
 
 _UNICDE2GLYPHNAME = {}
 def get_name_by_unicode(search_codepoint, production_name=False):
-    if not len(_UNICDE2GLYPHNAME):
-        for name in glyphdata_generated.PRODUCTION_NAMES:
-            pname = glyphdata_generated.PRODUCTION_NAMES[name]
-            codepoint = glyphdata.get_glyph(name).unicode
-            if codepoint is not None:
-                _UNICDE2GLYPHNAME[byteord(codepoint)] = (name, pname)
-
-        for name in glyphdata_generated.IRREGULAR_UNICODE_STRINGS:
-            codepoint = glyphdata.get_glyph(name).unicode
-            if codepoint is not None:
-                _UNICDE2GLYPHNAME[byteord(codepoint)] = (name, None)
-
-    entry = _UNICDE2GLYPHNAME.get(search_codepoint, (None, None))
-    index = 1 if production_name else 0
-    return entry[index]
+    """
+    If this returns None GlyphsData.xml doesn't contain search_codepoint.
+    """
+    entry = (None, None)
+    glyph = GlyphData.by_unicode.get(search_codepoint, None)
+    if glyph is not None:
+        entry = (glyph.name, glyph.production_name)
+    return entry[1] if production_name else entry[0]
 
 def get_unicode_by_name(name):
-    codepoint = glyphdata.get_glyph(name).unicode
-    if codepoint is not None:
-        return byteord(codepoint)
+    glyph = GlyphData.by_name.get(name, None)
+    if glyph is not None and glyph.unicode is not None:
+        return glyph.unicode
     match = PURE_UNI_CHR.match(name)
     if match is not None:
         return int(match.groups()[0], base=16)
@@ -105,7 +100,6 @@ def get_filterlist_names(filterListFileName):
     with codecs.open(filterListFileName, 'r', encoding='utf-8') as f:
         return [line.strip() for line in f]
 
-
 def read_filterlist(filterListFileName):
     names = get_filterlist_names(filterListFileName)
     codepoints = []
@@ -141,7 +135,6 @@ def production_name_to_friendly_name(name):
     friendly_name = get_name_by_unicode(codepoint, production_name=False)
     return friendly_name if friendly_name is not None else name
 
-
 def check_filterlist_in_namelist(filterListFileName, namelistCache=None):
     namelistFilename = get_namelist_for_filterlist(filterListFileName)
     if not namelistFilename:
@@ -183,41 +176,31 @@ def check_filterlist_in_namelist(filterListFileName, namelistCache=None):
         return False, '\n'.join(message), namelistFilename
     return True, None, namelistFilename
 
-def check_filterlists_in_namelists(files):
-    print('*'*30)
-    print('Checking filterlists in namelists...')
-    print('*'*30)
-    namelistCache = {}
+
+def _build_filterlists_in_namelists(f):
+    """
+        Checking filterlists in namelists.
+    """
+    test_name = 'test_filterlists_in_namelists {0}'.format(f)
+    def test_filterlists_in_namelists(self):
+        passed, message, namelist = check_filterlist_in_namelist(f, self._cache)
+        if passed:
+            return
+        self.assertTrue(passed, msg=message)
+    return test_name, test_filterlists_in_namelists
+
+def build_filterlists_in_namelists(files):
     for f in files:
-        print('='*30)
-        print ('Checking filter list:', f)
-        passed, message, namelist = check_filterlist_in_namelist(f, namelistCache)
-        print ('Namelist:', namelist)
-        if not passed:
-            print('Failed')
-            print(message)
-        else:
-            print('Passed')
+        yield _build_filterlists_in_namelists(f);
 
-def check_friendly_names_production_names_equal(files):
-    print('*'*30)
-    print('Check if nice names and uni names filter lists are in sync.')
-    print('*'*30)
-    nice_names_dir = 'nice names'
-    prod_names_dir = 'uni names'
-    nice_names_parts = {tuple(f.split(nice_names_dir, 1)) for f in files
-                                                if nice_names_dir in f}
-    prod_names_parts = {tuple(f.split(prod_names_dir, 1)) for f in files
-                                                if prod_names_dir in f}
+def _build_friendly_names_production_names_equal(pathparts, prod_names_file, nice_names_file):
+    test_name = 'test_nice_names_uni_names_equal {0}'.format('{marker dir}'.join(pathparts))
+    def test_friendly_names_production_names_equal(self):
+        message = []
+        log_message = lambda *args: message.append(' '.join(map(unicode, args)))
 
-    # filter to check only files that have a counterpart
-    matches = sorted(list(nice_names_parts & prod_names_parts))
-    for pathparts in matches:
-        print('='*30)
-        prod_names_file = prod_names_dir.join(pathparts)
-        nice_names_file = nice_names_dir.join(pathparts)
-        print('uni names filter list:', prod_names_file)
-        print('nice names filter list:', nice_names_file)
+        log_message('uni names filter list:', prod_names_file)
+        log_message('nice names filter list:', nice_names_file)
 
         prod_names = get_filterlist_names(prod_names_file)
         nice_prod_names = [production_name_to_friendly_name(name)
@@ -229,40 +212,87 @@ def check_friendly_names_production_names_equal(files):
 
         not_in_both = nice_names_set ^ prod_names_set
         if not len(not_in_both):
-            print('PASS\n')
-            continue
+            return
 
-        print('FAIL')
-        print('# uni names', len(prod_names), '# duplicates', len(prod_names) - len(prod_names_set))
-        print('# nice names', len(nice_names), '# duplicates',len(nice_names) - len(nice_names_set))
+        log_message('# uni names', len(prod_names))
+        log_message('# nice names', len(nice_names))
+
+        dupes_in_prod_names = len(prod_names) - len(prod_names_set)
+        dupes_in_nice_names = len(nice_names) - len(nice_names_set)
+
+        if dupes_in_prod_names:
+            log_message('# duplicates in uni names', dupes_in_prod_names, '\n'
+                    , *[item for item, count in Counter(prod_names).items() if count > 1])
+        if dupes_in_nice_names:
+            log_message('# duplicates in nice names', dupes_in_nice_names, '\n'
+                    , *[item for item, count in Counter(nice_names).items() if count > 1])
+
+
+        self.assertTrue(dupes_in_prod_names == 0 and dupes_in_nice_names == 0
+                                                ,  msg='\n'.join(message))
 
         not_in_nice = sorted(not_in_both - nice_names_set)
         not_in_prod = sorted(not_in_both - prod_names_set)
 
         if len(not_in_prod):
-            print ('Entries in nice names but not in uni names(#{1}):\n{0}\n' \
+            log_message('Entries in nice names but not in uni names(#{1}):\n{0}\n' \
                                 .format(', '.join(not_in_prod), len(not_in_prod)))
 
         if len(not_in_nice):
             nice2prod = dict(zip(nice_prod_names, prod_names))
             items = ','.join('{0} as {1}'.format(nice2prod[n],n)
                                                     for n in not_in_nice)
-            print ('Entries in uni names but not in nice names (#{1}):\n{0}\n' \
-                                        .format(items, len(not_in_nice)))
+            log_message('Entries in uni names but not in nice names (#{1}):\n{0}\n' \
+                                    .format(items, len(not_in_nice)))
+
+        self.assertTrue(False, msg='\n'.join(message))
+    return test_name, test_friendly_names_production_names_equal
 
 
-def check_files(files):
-    check_filterlists_in_namelists(files)
-    check_friendly_names_production_names_equal(files)
+def build_friendly_names_production_names_equal(files):
+    """
+        Check if nice names and uni names filter lists are in sync.
+    """
+    nice_names_dir = 'nice names'
+    prod_names_dir = 'uni names'
+    nice_names_parts = {tuple(f.split(nice_names_dir, 1)) for f in files
+                                                if nice_names_dir in f}
+    prod_names_parts = {tuple(f.split(prod_names_dir, 1)) for f in files
+                                                if prod_names_dir in f}
+
+    # filter to check only files that have a counterpart
+    matches = sorted(list(nice_names_parts & prod_names_parts))
+    for pathparts in matches:
+        prod_names_file = prod_names_dir.join(pathparts)
+        nice_names_file = nice_names_dir.join(pathparts)
+        yield _build_friendly_names_production_names_equal(pathparts, prod_names_file, nice_names_file)
+
+def initTestProperties(cls, files):
+  initialized = []
+  for test_generator in (build_filterlists_in_namelists
+                       , build_friendly_names_production_names_equal):
+    for testName, test in test_generator(files):
+        setattr(cls, testName, test)
+
+
+class TestFilterLists(unittest.TestCase):
+  def setUp(self):
+    self._cache = {}
+
+  def tearDown(self):
+    self._cache = None
+
 
 def main(args):
-    if len(args) < 1:
-        raise Excepion('The first argument must be the search directory'\
+    if len(args) < 2:
+        raise Exception('The first argument must be the search directory'\
                                                         ' for nam files.')
-    searchDirectory = args[0]
-    files = subprocess.check_output(['find', searchDirectory, '-type', 'f', '-path', '*/filter lists/*.txt']);
-    check_files( filter(len, files.split('\n')) )
+    searchDirectory = args[1]
+    files = subprocess.check_output(['find', searchDirectory, '-type', 'f', '-path', '*/filter lists/*.txt'])
+    files = filter(len, files.split('\n'))
+    initTestProperties(TestFilterLists, files)
+    unittest.main(argv=args[:1] + args[2:], verbosity=2)
+
 
 if __name__ == '__main__':
-    main(sys.argv[1:])
-
+    main(sys.argv)