Further %143-ization and general cleanup.

2024-11-28 19:55:53 +03:00 · 2018-01-18 22:40:24 -08:00 · 2018-01-18 22:40:24 -08:00 · 58900d30f7
commit 58900d30f7
parent d0b34fdae8
3 changed files with 30 additions and 30 deletions
--- a/gen/capitalize.hoon
+++ b/gen/capitalize.hoon
@ -1,3 +1,6 @@
+::  to use, download UnicdoeData.txt and place it in `%/lib/unicode-data/txt`.
+::
+::::
 ::
 ::  part 1: parse the file into {uppers}
 ::
--- a/mar/unicode-data.hoon
+++ b/mar/unicode-data.hoon
@ -22,7 +22,7 @@
        hex                       :: code/@c        codepoint in hex format
        name-string               :: name/tape      character name
        general-category          :: gen/general    type of character
-        (bass 10 (plus dit))      :: can/@ud        canonical combining class 
+        (bass 10 (plus dit))      :: can/@ud        canonical combining class
        bidi-category             :: bi/bidi        bidirectional category
        decomposition-mapping     :: de/decomp      decomposition mapping
      ::
@ -45,7 +45,6 @@
      %+  cook
        |=(a=tape a)
      (star ;~(less sem prn))
-      :: (star ;~(pose alp (mask " <>()")))
    ::
    :>  parses a unicode general category abbreviation to symbol
    ++  general-category
@ -57,7 +56,6 @@
      %+  sear  (soft bidi:unicode-data)
      :(cook crip cass (star hig))
    ::
-    ::  TODO: This seems to be where the nest-fail is. There's an extra @ here?
    ++  decomposition-mapping
      %-  punt  :: optional
      :: a tag and a list of characters to decompose to
@ -68,7 +66,6 @@
    ::
    ++  decomp-tag
      %+  sear  (soft decomp-tag:unicode-data)
-      %+  cook  |=(a=term ?+(a a $nobreak %no-break))  ::REVIEW or just change the type
      :(cook crip cass (star alf))
    ::
    ++  string-number
--- a/sur/unicode-data.hoon
+++ b/sur/unicode-data.hoon
@ -5,23 +5,23 @@
 ++  line
  :>    an individual codepoint definition
  :>
-  $:  code/@c               :< codepoint in hexadecimal format
-      name/tape             :< character name
-      gen/general           :< type of character this is
+  $:  code=@c               :<  codepoint in hexadecimal format
+      name=tape             :<  character name
+      gen=general           :<  type of character this is
      :>  canonical combining class for ordering algorithms
-      can/@ud          
-      bi/bidi               :< bidirectional category of this character
-      de/decomp             :< character decomposition mapping
-      ::  todo: decimal/digit/numeric need to be parsed.                    
-      decimal/tape          :< decimal digit value (or ~)
-      digit/tape            :< digit value, covering non decimal radix forms
-      numeric/tape          :< numeric value, including fractions
-      mirrored/?            :< whether char is mirrored in bidirectional text
-      old-name/tape         :< unicode 1.0 compatibility name
-      iso/tape              :< iso 10646 comment field
-      up/(unit @c)          :< uppercase mapping codepoint
-      low/(unit @c)         :< lowercase mapping codepoint
-      title/(unit @c)       :< titlecase mapping codepoint
+      can=@ud
+      bi=bidi               :<  bidirectional category of this character
+      de=decomp             :<  character decomposition mapping
+      ::  todo: decimal/digit/numeric need to be parsed.
+      decimal=tape          :<  decimal digit value (or ~)
+      digit=tape            :<  digit value, covering non decimal radix forms
+      numeric=tape          :<  numeric value, including fractions
+      mirrored=?            :<  whether char is mirrored in bidirectional text
+      old-name=tape         :<  unicode 1.0 compatibility name
+      iso=tape              :<  iso 10646 comment field
+      up=(unit @c)          :<  uppercase mapping codepoint
+      low=(unit @c)         :<  lowercase mapping codepoint
+      title=(unit @c)       :<  titlecase mapping codepoint
  ==
 ::
 ++  general
@ -99,7 +99,7 @@
 ++  decomp-tag
  :>  tag that describes the type of a character decomposition.
  $?  $font      :<  a font variant
-      $no-break  :<  a no-break version of a space or hyphen
+      $nobreak   :<  a no-break version of a space or hyphen
      $initial   :<  an initial presentation form (arabic)
      $medial    :<  a medial presentation form (arabic)
      $final     :<  a final presentation form (arabic)
@ -124,24 +124,24 @@
 ++  case-offset
  :>  case offsets can be in either direction
  $%  :>  add {a} to get the new character
-      {$add a/@u}
+      [%add a=@u]
      :>  subtract {a} to get the new character
-      {$sub s/@u}
+      [%sub s=@u]
      :>  take no action; return self
-      {$none $~}
+      [%none $~]
      :>  represents series of alternating uppercase/lowercase characters
-      {$uplo $~}
+      [%uplo $~]
  ==
 ::
 ++  case-node
  :>    a node in a case-tree.
  :>
  :>  represents a range of
-  $:  start/@ux
-      end/@ux
-      upper/case-offset
-      lower/case-offset
-      title/case-offset
+  $:  start=@ux
+      end=@ux
+      upper=case-offset
+      lower=case-offset
+      title=case-offset
  ==
 ::
 ++  case-tree