mirror of
https://github.com/ilyakooo0/urbit.git
synced 2024-11-28 19:55:53 +03:00
Further %143-ization and general cleanup.
This commit is contained in:
parent
d0b34fdae8
commit
58900d30f7
@ -1,3 +1,6 @@
|
||||
:: to use, download UnicdoeData.txt and place it in `%/lib/unicode-data/txt`.
|
||||
::
|
||||
::::
|
||||
::
|
||||
:: part 1: parse the file into {uppers}
|
||||
::
|
||||
|
@ -22,7 +22,7 @@
|
||||
hex :: code/@c codepoint in hex format
|
||||
name-string :: name/tape character name
|
||||
general-category :: gen/general type of character
|
||||
(bass 10 (plus dit)) :: can/@ud canonical combining class
|
||||
(bass 10 (plus dit)) :: can/@ud canonical combining class
|
||||
bidi-category :: bi/bidi bidirectional category
|
||||
decomposition-mapping :: de/decomp decomposition mapping
|
||||
::
|
||||
@ -45,7 +45,6 @@
|
||||
%+ cook
|
||||
|=(a=tape a)
|
||||
(star ;~(less sem prn))
|
||||
:: (star ;~(pose alp (mask " <>()")))
|
||||
::
|
||||
:> parses a unicode general category abbreviation to symbol
|
||||
++ general-category
|
||||
@ -57,7 +56,6 @@
|
||||
%+ sear (soft bidi:unicode-data)
|
||||
:(cook crip cass (star hig))
|
||||
::
|
||||
:: TODO: This seems to be where the nest-fail is. There's an extra @ here?
|
||||
++ decomposition-mapping
|
||||
%- punt :: optional
|
||||
:: a tag and a list of characters to decompose to
|
||||
@ -68,7 +66,6 @@
|
||||
::
|
||||
++ decomp-tag
|
||||
%+ sear (soft decomp-tag:unicode-data)
|
||||
%+ cook |=(a=term ?+(a a $nobreak %no-break)) ::REVIEW or just change the type
|
||||
:(cook crip cass (star alf))
|
||||
::
|
||||
++ string-number
|
||||
|
@ -5,23 +5,23 @@
|
||||
++ line
|
||||
:> an individual codepoint definition
|
||||
:>
|
||||
$: code/@c :< codepoint in hexadecimal format
|
||||
name/tape :< character name
|
||||
gen/general :< type of character this is
|
||||
$: code=@c :< codepoint in hexadecimal format
|
||||
name=tape :< character name
|
||||
gen=general :< type of character this is
|
||||
:> canonical combining class for ordering algorithms
|
||||
can/@ud
|
||||
bi/bidi :< bidirectional category of this character
|
||||
de/decomp :< character decomposition mapping
|
||||
:: todo: decimal/digit/numeric need to be parsed.
|
||||
decimal/tape :< decimal digit value (or ~)
|
||||
digit/tape :< digit value, covering non decimal radix forms
|
||||
numeric/tape :< numeric value, including fractions
|
||||
mirrored/? :< whether char is mirrored in bidirectional text
|
||||
old-name/tape :< unicode 1.0 compatibility name
|
||||
iso/tape :< iso 10646 comment field
|
||||
up/(unit @c) :< uppercase mapping codepoint
|
||||
low/(unit @c) :< lowercase mapping codepoint
|
||||
title/(unit @c) :< titlecase mapping codepoint
|
||||
can=@ud
|
||||
bi=bidi :< bidirectional category of this character
|
||||
de=decomp :< character decomposition mapping
|
||||
:: todo: decimal/digit/numeric need to be parsed.
|
||||
decimal=tape :< decimal digit value (or ~)
|
||||
digit=tape :< digit value, covering non decimal radix forms
|
||||
numeric=tape :< numeric value, including fractions
|
||||
mirrored=? :< whether char is mirrored in bidirectional text
|
||||
old-name=tape :< unicode 1.0 compatibility name
|
||||
iso=tape :< iso 10646 comment field
|
||||
up=(unit @c) :< uppercase mapping codepoint
|
||||
low=(unit @c) :< lowercase mapping codepoint
|
||||
title=(unit @c) :< titlecase mapping codepoint
|
||||
==
|
||||
::
|
||||
++ general
|
||||
@ -99,7 +99,7 @@
|
||||
++ decomp-tag
|
||||
:> tag that describes the type of a character decomposition.
|
||||
$? $font :< a font variant
|
||||
$no-break :< a no-break version of a space or hyphen
|
||||
$nobreak :< a no-break version of a space or hyphen
|
||||
$initial :< an initial presentation form (arabic)
|
||||
$medial :< a medial presentation form (arabic)
|
||||
$final :< a final presentation form (arabic)
|
||||
@ -124,24 +124,24 @@
|
||||
++ case-offset
|
||||
:> case offsets can be in either direction
|
||||
$% :> add {a} to get the new character
|
||||
{$add a/@u}
|
||||
[%add a=@u]
|
||||
:> subtract {a} to get the new character
|
||||
{$sub s/@u}
|
||||
[%sub s=@u]
|
||||
:> take no action; return self
|
||||
{$none $~}
|
||||
[%none $~]
|
||||
:> represents series of alternating uppercase/lowercase characters
|
||||
{$uplo $~}
|
||||
[%uplo $~]
|
||||
==
|
||||
::
|
||||
++ case-node
|
||||
:> a node in a case-tree.
|
||||
:>
|
||||
:> represents a range of
|
||||
$: start/@ux
|
||||
end/@ux
|
||||
upper/case-offset
|
||||
lower/case-offset
|
||||
title/case-offset
|
||||
$: start=@ux
|
||||
end=@ux
|
||||
upper=case-offset
|
||||
lower=case-offset
|
||||
title=case-offset
|
||||
==
|
||||
::
|
||||
++ case-tree
|
||||
|
Loading…
Reference in New Issue
Block a user