mirror of
https://github.com/urbit/shrub.git
synced 2024-12-14 11:08:45 +03:00
80 lines
2.6 KiB
Plaintext
80 lines
2.6 KiB
Plaintext
/- unicode-data
|
|
=, eyre
|
|
=, format
|
|
::
|
|
|_ all/(list line:unicode-data)
|
|
++ grab
|
|
:: converts from mark to unicode-data.
|
|
|%
|
|
++ mime |=([* a=octs] (txt (to-wain q.a))) :: XX mark translation
|
|
++ txt
|
|
|^ |= a=wain
|
|
^+ all
|
|
%+ murn a
|
|
|= b=cord
|
|
^- (unit line:unicode-data)
|
|
?~ b ~
|
|
`(rash b line)
|
|
::
|
|
:: parses a single character information line of the unicode data file.
|
|
++ line
|
|
;~ (glue mic)
|
|
hex :: code/@c codepoint in hex format
|
|
name-string :: name/tape character name
|
|
general-category :: gen/general type of character
|
|
(bass 10 (plus dit)) :: can/@ud canonical combining class
|
|
bidi-category :: bi/bidi bidirectional category
|
|
decomposition-mapping :: de/decomp decomposition mapping
|
|
::
|
|
:: todo: decimal/digit/numeric need to be parsed.
|
|
::
|
|
string-number :: decimal/tape decimal digit value (or ~)
|
|
string-number :: digit/tape digit value, even if non-decimal
|
|
string-number :: numeric/tape numeric value, including fractions
|
|
::
|
|
(fuss 'Y' 'N') :: mirrored/? is char mirrored in bidi text?
|
|
name-string :: old-name/tape unicode 1.0 compatibility name
|
|
name-string :: iso/tape iso 10646 comment field
|
|
(punt hex) :: up/(unit @c) uppercase mapping codepoint
|
|
(punt hex) :: low/(unit @c) lowercase mapping codepoint
|
|
(punt hex) :: title/(unit @c) titlecase mapping codepoint
|
|
==
|
|
::
|
|
:: parses a single name or comment string.
|
|
++ name-string
|
|
%+ cook
|
|
|=(a=tape a)
|
|
(star ;~(less mic prn))
|
|
::
|
|
:: parses a unicode general category abbreviation to symbol
|
|
++ general-category
|
|
%+ sear (soft general:unicode-data)
|
|
:(cook crip cass ;~(plug hig low (easy ~)))
|
|
::
|
|
:: parses a bidirectional category abbreviation to symbol.
|
|
++ bidi-category
|
|
%+ sear (soft bidi:unicode-data)
|
|
:(cook crip cass (star hig))
|
|
::
|
|
++ decomposition-mapping
|
|
%- punt :: optional
|
|
:: a tag and a list of characters to decompose to
|
|
;~ plug
|
|
(punt (ifix [gal ;~(plug gar ace)] decomp-tag))
|
|
(cook |=(a=(list @c) a) (most ace hex))
|
|
==
|
|
::
|
|
++ decomp-tag
|
|
%+ sear (soft decomp-tag:unicode-data)
|
|
:(cook crip cass (star alf))
|
|
::
|
|
++ string-number
|
|
%+ cook
|
|
|=(a=tape a)
|
|
(star ;~(pose nud fas hep))
|
|
::
|
|
--
|
|
--
|
|
++ grad %txt
|
|
--
|