From 58900d30f7eb77fb28c59fa3f189b644cc73014c Mon Sep 17 00:00:00 2001 From: Elliot Glaysher Date: Thu, 18 Jan 2018 22:40:24 -0800 Subject: [PATCH] Further %143-ization and general cleanup. --- gen/capitalize.hoon | 3 +++ mar/unicode-data.hoon | 5 +---- sur/unicode-data.hoon | 52 +++++++++++++++++++++---------------------- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/gen/capitalize.hoon b/gen/capitalize.hoon index b31c5e1eb..f5bff918d 100644 --- a/gen/capitalize.hoon +++ b/gen/capitalize.hoon @@ -1,3 +1,6 @@ +:: to use, download UnicdoeData.txt and place it in `%/lib/unicode-data/txt`. +:: +:::: :: :: part 1: parse the file into {uppers} :: diff --git a/mar/unicode-data.hoon b/mar/unicode-data.hoon index 7a24846eb..61564ffa7 100644 --- a/mar/unicode-data.hoon +++ b/mar/unicode-data.hoon @@ -22,7 +22,7 @@ hex :: code/@c codepoint in hex format name-string :: name/tape character name general-category :: gen/general type of character - (bass 10 (plus dit)) :: can/@ud canonical combining class + (bass 10 (plus dit)) :: can/@ud canonical combining class bidi-category :: bi/bidi bidirectional category decomposition-mapping :: de/decomp decomposition mapping :: @@ -45,7 +45,6 @@ %+ cook |=(a=tape a) (star ;~(less sem prn)) - :: (star ;~(pose alp (mask " <>()"))) :: :> parses a unicode general category abbreviation to symbol ++ general-category @@ -57,7 +56,6 @@ %+ sear (soft bidi:unicode-data) :(cook crip cass (star hig)) :: - :: TODO: This seems to be where the nest-fail is. There's an extra @ here? ++ decomposition-mapping %- punt :: optional :: a tag and a list of characters to decompose to @@ -68,7 +66,6 @@ :: ++ decomp-tag %+ sear (soft decomp-tag:unicode-data) - %+ cook |=(a=term ?+(a a $nobreak %no-break)) ::REVIEW or just change the type :(cook crip cass (star alf)) :: ++ string-number diff --git a/sur/unicode-data.hoon b/sur/unicode-data.hoon index 6b4345c47..a333455c8 100644 --- a/sur/unicode-data.hoon +++ b/sur/unicode-data.hoon @@ -5,23 +5,23 @@ ++ line :> an individual codepoint definition :> - $: code/@c :< codepoint in hexadecimal format - name/tape :< character name - gen/general :< type of character this is + $: code=@c :< codepoint in hexadecimal format + name=tape :< character name + gen=general :< type of character this is :> canonical combining class for ordering algorithms - can/@ud - bi/bidi :< bidirectional category of this character - de/decomp :< character decomposition mapping - :: todo: decimal/digit/numeric need to be parsed. - decimal/tape :< decimal digit value (or ~) - digit/tape :< digit value, covering non decimal radix forms - numeric/tape :< numeric value, including fractions - mirrored/? :< whether char is mirrored in bidirectional text - old-name/tape :< unicode 1.0 compatibility name - iso/tape :< iso 10646 comment field - up/(unit @c) :< uppercase mapping codepoint - low/(unit @c) :< lowercase mapping codepoint - title/(unit @c) :< titlecase mapping codepoint + can=@ud + bi=bidi :< bidirectional category of this character + de=decomp :< character decomposition mapping + :: todo: decimal/digit/numeric need to be parsed. + decimal=tape :< decimal digit value (or ~) + digit=tape :< digit value, covering non decimal radix forms + numeric=tape :< numeric value, including fractions + mirrored=? :< whether char is mirrored in bidirectional text + old-name=tape :< unicode 1.0 compatibility name + iso=tape :< iso 10646 comment field + up=(unit @c) :< uppercase mapping codepoint + low=(unit @c) :< lowercase mapping codepoint + title=(unit @c) :< titlecase mapping codepoint == :: ++ general @@ -99,7 +99,7 @@ ++ decomp-tag :> tag that describes the type of a character decomposition. $? $font :< a font variant - $no-break :< a no-break version of a space or hyphen + $nobreak :< a no-break version of a space or hyphen $initial :< an initial presentation form (arabic) $medial :< a medial presentation form (arabic) $final :< a final presentation form (arabic) @@ -124,24 +124,24 @@ ++ case-offset :> case offsets can be in either direction $% :> add {a} to get the new character - {$add a/@u} + [%add a=@u] :> subtract {a} to get the new character - {$sub s/@u} + [%sub s=@u] :> take no action; return self - {$none $~} + [%none $~] :> represents series of alternating uppercase/lowercase characters - {$uplo $~} + [%uplo $~] == :: ++ case-node :> a node in a case-tree. :> :> represents a range of - $: start/@ux - end/@ux - upper/case-offset - lower/case-offset - title/case-offset + $: start=@ux + end=@ux + upper=case-offset + lower=case-offset + title=case-offset == :: ++ case-tree