From 58900d30f7eb77fb28c59fa3f189b644cc73014c Mon Sep 17 00:00:00 2001
From: Elliot Glaysher <erg@google.com>
Date: Thu, 18 Jan 2018 22:40:24 -0800
Subject: [PATCH] Further %143-ization and general cleanup.

---
 gen/capitalize.hoon   |  3 +++
 mar/unicode-data.hoon |  5 +----
 sur/unicode-data.hoon | 52 +++++++++++++++++++++----------------------
 3 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/gen/capitalize.hoon b/gen/capitalize.hoon
index b31c5e1eb..f5bff918d 100644
--- a/gen/capitalize.hoon
+++ b/gen/capitalize.hoon
@@ -1,3 +1,6 @@
+::  to use, download UnicdoeData.txt and place it in `%/lib/unicode-data/txt`.
+::
+::::
 ::
 ::  part 1: parse the file into {uppers}
 ::
diff --git a/mar/unicode-data.hoon b/mar/unicode-data.hoon
index 7a24846eb..61564ffa7 100644
--- a/mar/unicode-data.hoon
+++ b/mar/unicode-data.hoon
@@ -22,7 +22,7 @@
         hex                       :: code/@c        codepoint in hex format
         name-string               :: name/tape      character name
         general-category          :: gen/general    type of character
-        (bass 10 (plus dit))      :: can/@ud        canonical combining class 
+        (bass 10 (plus dit))      :: can/@ud        canonical combining class
         bidi-category             :: bi/bidi        bidirectional category
         decomposition-mapping     :: de/decomp      decomposition mapping
       ::
@@ -45,7 +45,6 @@
       %+  cook
         |=(a=tape a)
       (star ;~(less sem prn))
-      :: (star ;~(pose alp (mask " <>()")))
     ::
     :>  parses a unicode general category abbreviation to symbol
     ++  general-category
@@ -57,7 +56,6 @@
       %+  sear  (soft bidi:unicode-data)
       :(cook crip cass (star hig))
     ::
-    ::  TODO: This seems to be where the nest-fail is. There's an extra @ here?
     ++  decomposition-mapping
       %-  punt  :: optional
       :: a tag and a list of characters to decompose to
@@ -68,7 +66,6 @@
     ::
     ++  decomp-tag
       %+  sear  (soft decomp-tag:unicode-data)
-      %+  cook  |=(a=term ?+(a a $nobreak %no-break))  ::REVIEW or just change the type
       :(cook crip cass (star alf))
     ::
     ++  string-number
diff --git a/sur/unicode-data.hoon b/sur/unicode-data.hoon
index 6b4345c47..a333455c8 100644
--- a/sur/unicode-data.hoon
+++ b/sur/unicode-data.hoon
@@ -5,23 +5,23 @@
 ++  line
   :>    an individual codepoint definition
   :>
-  $:  code/@c               :< codepoint in hexadecimal format
-      name/tape             :< character name
-      gen/general           :< type of character this is
+  $:  code=@c               :<  codepoint in hexadecimal format
+      name=tape             :<  character name
+      gen=general           :<  type of character this is
       :>  canonical combining class for ordering algorithms
-      can/@ud          
-      bi/bidi               :< bidirectional category of this character
-      de/decomp             :< character decomposition mapping
-      ::  todo: decimal/digit/numeric need to be parsed.                    
-      decimal/tape          :< decimal digit value (or ~)
-      digit/tape            :< digit value, covering non decimal radix forms
-      numeric/tape          :< numeric value, including fractions
-      mirrored/?            :< whether char is mirrored in bidirectional text
-      old-name/tape         :< unicode 1.0 compatibility name
-      iso/tape              :< iso 10646 comment field
-      up/(unit @c)          :< uppercase mapping codepoint
-      low/(unit @c)         :< lowercase mapping codepoint
-      title/(unit @c)       :< titlecase mapping codepoint
+      can=@ud
+      bi=bidi               :<  bidirectional category of this character
+      de=decomp             :<  character decomposition mapping
+      ::  todo: decimal/digit/numeric need to be parsed.
+      decimal=tape          :<  decimal digit value (or ~)
+      digit=tape            :<  digit value, covering non decimal radix forms
+      numeric=tape          :<  numeric value, including fractions
+      mirrored=?            :<  whether char is mirrored in bidirectional text
+      old-name=tape         :<  unicode 1.0 compatibility name
+      iso=tape              :<  iso 10646 comment field
+      up=(unit @c)          :<  uppercase mapping codepoint
+      low=(unit @c)         :<  lowercase mapping codepoint
+      title=(unit @c)       :<  titlecase mapping codepoint
   ==
 ::
 ++  general
@@ -99,7 +99,7 @@
 ++  decomp-tag
   :>  tag that describes the type of a character decomposition.
   $?  $font      :<  a font variant
-      $no-break  :<  a no-break version of a space or hyphen
+      $nobreak   :<  a no-break version of a space or hyphen
       $initial   :<  an initial presentation form (arabic)
       $medial    :<  a medial presentation form (arabic)
       $final     :<  a final presentation form (arabic)
@@ -124,24 +124,24 @@
 ++  case-offset
   :>  case offsets can be in either direction
   $%  :>  add {a} to get the new character
-      {$add a/@u}
+      [%add a=@u]
       :>  subtract {a} to get the new character
-      {$sub s/@u}
+      [%sub s=@u]
       :>  take no action; return self
-      {$none $~}
+      [%none $~]
       :>  represents series of alternating uppercase/lowercase characters
-      {$uplo $~}
+      [%uplo $~]
   ==
 ::
 ++  case-node
   :>    a node in a case-tree.
   :>
   :>  represents a range of
-  $:  start/@ux
-      end/@ux
-      upper/case-offset
-      lower/case-offset
-      title/case-offset
+  $:  start=@ux
+      end=@ux
+      upper=case-offset
+      lower=case-offset
+      title=case-offset
   ==
 ::
 ++  case-tree