mirror of
https://github.com/urbit/shrub.git
synced 2024-12-26 13:31:36 +03:00
jets: hand rolled c parser for (slaw %p ...)
This completes the c jetting for parsing %p strings in slaw.
This commit is contained in:
parent
91badd6376
commit
1ef953c9ac
@ -70,8 +70,8 @@
|
||||
u3_noun u3qc_swp(u3_atom, u3_atom);
|
||||
u3_noun u3qc_sqt(u3_atom);
|
||||
|
||||
u3_noun u3qc_po_ind(u3_atom);
|
||||
u3_noun u3qc_po_ins(u3_atom);
|
||||
u3_noun po_find_prefix(char one, char two, char three);
|
||||
u3_noun po_find_suffix(char one, char two, char three);
|
||||
|
||||
u3_noun u3qc_ob_fynd(u3_atom);
|
||||
|
||||
|
@ -3,8 +3,8 @@
|
||||
*/
|
||||
#include "all.h"
|
||||
|
||||
static u3_noun
|
||||
find_prefix(char one, char two, char three) {
|
||||
u3_noun
|
||||
po_find_prefix(char one, char two, char three) {
|
||||
switch (one) {
|
||||
case 'b': switch (two) {
|
||||
case 'a': switch (three) {
|
||||
@ -410,8 +410,8 @@ find_prefix(char one, char two, char three) {
|
||||
}
|
||||
}
|
||||
|
||||
static u3_noun
|
||||
find_suffix(char one, char two, char three) {
|
||||
u3_noun
|
||||
po_find_suffix(char one, char two, char three) {
|
||||
switch (one) {
|
||||
case 'b': switch (two) {
|
||||
case 'e': switch (three) {
|
||||
@ -852,7 +852,7 @@ u3qc_po_ins(u3_noun a)
|
||||
c3_y byt_y[3];
|
||||
u3r_bytes(0, 3, byt_y, a);
|
||||
|
||||
return find_prefix(byt_y[0], byt_y[1], byt_y[2]);
|
||||
return po_find_prefix(byt_y[0], byt_y[1], byt_y[2]);
|
||||
}
|
||||
|
||||
u3_noun
|
||||
@ -875,7 +875,7 @@ u3qc_po_ind(u3_noun a)
|
||||
c3_y byt_y[3];
|
||||
u3r_bytes(0, 3, byt_y, a);
|
||||
|
||||
return find_suffix(byt_y[0], byt_y[1], byt_y[2]);
|
||||
return po_find_suffix(byt_y[0], byt_y[1], byt_y[2]);
|
||||
}
|
||||
|
||||
u3_noun
|
||||
|
@ -7,72 +7,299 @@
|
||||
|
||||
/* functions
|
||||
*/
|
||||
u3_noun
|
||||
_parse_ud(u3_noun txt) {
|
||||
c3_c* c = u3r_string(txt);
|
||||
|
||||
u3_noun
|
||||
_parse_ud(u3_noun txt) {
|
||||
c3_c* c = u3r_string(txt);
|
||||
// First character must represent a digit
|
||||
c3_c* cur = c;
|
||||
if (cur[0] > '9' || cur[0] < '0') {
|
||||
c3_free(c);
|
||||
return 0;
|
||||
}
|
||||
c3_w total = cur[0] - '0';
|
||||
cur++;
|
||||
|
||||
int since_last_period = 0;
|
||||
while (cur[0] != 0) {
|
||||
since_last_period++;
|
||||
if (cur[0] == '.') {
|
||||
since_last_period = 0;
|
||||
cur++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// First character must represent a digit
|
||||
c3_c* cur = c;
|
||||
if (cur[0] > '9' || cur[0] < '0') {
|
||||
c3_free(c);
|
||||
return 0;
|
||||
}
|
||||
c3_w total = cur[0] - '0';
|
||||
|
||||
total = u3qa_mul(total, 10);
|
||||
total = u3qa_add(total, cur[0] - '0');
|
||||
cur++;
|
||||
|
||||
int since_last_period = 0;
|
||||
while (cur[0] != 0) {
|
||||
since_last_period++;
|
||||
if (cur[0] == '.') {
|
||||
since_last_period = 0;
|
||||
cur++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cur[0] > '9' || cur[0] < '0') {
|
||||
c3_free(c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
total = u3qa_mul(total, 10);
|
||||
total = u3qa_add(total, cur[0] - '0');
|
||||
cur++;
|
||||
|
||||
if (since_last_period > 3) {
|
||||
c3_free(c);
|
||||
return 0;
|
||||
}
|
||||
if (since_last_period > 3) {
|
||||
c3_free(c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
c3_free(c);
|
||||
return u3nc(0, total);
|
||||
}
|
||||
|
||||
// parsing @p:
|
||||
//
|
||||
// +slaw calls +fed:ag directly. +fed:ag:
|
||||
//
|
||||
// - parses the text first into a number.
|
||||
// - runs fynd:ob, which is the scrambler restore structure. And +fynd is
|
||||
// unjetted.
|
||||
//
|
||||
// The actual +po stuff, like +ins:po, is jetted but it's jetted such that it
|
||||
// pulls the tables out of the sample, so we can't just reuse it from other
|
||||
// jets.
|
||||
c3_free(c);
|
||||
return u3nc(0, total);
|
||||
}
|
||||
|
||||
/* u3_noun */
|
||||
/* _parse_p(u3_noun txt) { */
|
||||
/* // The current parsing text code for @p in hoon is kinda nuts. it parses */
|
||||
/* // arbitrary lowercase ascii characters and then does a linear walk through */
|
||||
/* // the */
|
||||
// parsing @p:
|
||||
//
|
||||
// +slaw calls +fed:ag directly. +fed:ag:
|
||||
//
|
||||
// - parses the text first into a number.
|
||||
// - runs fynd:ob, which is the scrambler restore structure. And +fynd is
|
||||
// unjetted.
|
||||
//
|
||||
// The actual +po stuff, like +ins:po, is jetted but it's jetted such that it
|
||||
// pulls the tables out of the sample, so we can't just reuse it from other
|
||||
// jets.
|
||||
|
||||
u3_noun get_syllable(c3_c** cur_ptr, c3_c* one, c3_c* two, c3_c* three) {
|
||||
if (islower((*cur_ptr)[0]) && islower((*cur_ptr)[1]) &&
|
||||
islower((*cur_ptr)[2])) {
|
||||
*one = (*cur_ptr)[0];
|
||||
*two = (*cur_ptr)[1];
|
||||
*three = (*cur_ptr)[2];
|
||||
(*cur_ptr) += 3;
|
||||
return c3y;
|
||||
} else {
|
||||
return c3n;
|
||||
}
|
||||
}
|
||||
|
||||
/* // Run the result through the scrambler to resolve the textual name to the */
|
||||
/* // number. */
|
||||
/* return u3nc(0, u3qc_ob_fynd(raw)); */
|
||||
/* } */
|
||||
static
|
||||
u3_noun combine(u3_noun p, u3_noun q)
|
||||
{
|
||||
if (_(u3a_is_atom(p))) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (_(u3a_is_atom(q))) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
u3_noun ret = u3nc(0, u3qa_add(u3k(u3t(p)), u3qa_mul(256, u3k(u3t(q)))));
|
||||
u3z(p);
|
||||
u3z(q);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define ENSURE_NOT_END() do { \
|
||||
if (*cur == 0) { \
|
||||
c3_free(c); \
|
||||
return 0; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CONSUME_HEP() do { \
|
||||
if (*cur != '-') { \
|
||||
c3_free(c); \
|
||||
return 0; \
|
||||
} \
|
||||
cur++; \
|
||||
} while (0)
|
||||
|
||||
#define TRY_GET_SYLLABLE(prefix) \
|
||||
c3_c prefix##_one, prefix##_two, prefix##_three; \
|
||||
if (c3n == get_syllable(&cur, & prefix##_one, & prefix##_two, & prefix##_three)) { \
|
||||
c3_free(c); \
|
||||
return 0; \
|
||||
}
|
||||
|
||||
u3_noun
|
||||
_parse_p(u3_noun txt) {
|
||||
c3_c* c = u3r_string(txt);
|
||||
|
||||
c3_c* cur = c;
|
||||
if (cur[0] != '~') {
|
||||
c3_free(c);
|
||||
return 0;
|
||||
}
|
||||
cur++;
|
||||
|
||||
// We at least have a sig prefix. We're now going to parse tuples of three
|
||||
// lowercase letters. Our naming pattern for the pieces we read is [a b c d
|
||||
// ...] as we read them.
|
||||
TRY_GET_SYLLABLE(a);
|
||||
|
||||
// There was only one syllable. If it's a valid suffix syllable, then
|
||||
// it's a galaxy. We don't even have to run this through the scrambler or
|
||||
// check for validity since its already a (unit @).
|
||||
if (*cur == 0) {
|
||||
c3_free(c);
|
||||
return po_find_suffix(a_one, a_two, a_three);
|
||||
}
|
||||
|
||||
TRY_GET_SYLLABLE(b);
|
||||
|
||||
// There were only two syllables. If they are a valid prefix and suffix, then
|
||||
// it's a star.
|
||||
if (*cur == 0) {
|
||||
u3_noun a_part = po_find_prefix(a_one, a_two, a_three);
|
||||
u3_noun b_part = po_find_suffix(b_one, b_two, b_three);
|
||||
u3_atom combined = combine(b_part, a_part);
|
||||
c3_free(c);
|
||||
return combined;
|
||||
}
|
||||
|
||||
// There must now be a - or it is invalid
|
||||
CONSUME_HEP();
|
||||
|
||||
TRY_GET_SYLLABLE(c);
|
||||
|
||||
ENSURE_NOT_END();
|
||||
|
||||
TRY_GET_SYLLABLE(d);
|
||||
|
||||
if (*cur == 0) {
|
||||
u3_noun a_part = po_find_prefix(a_one, a_two, a_three);
|
||||
u3_noun b_part = po_find_suffix(b_one, b_two, b_three);
|
||||
u3_noun c_part = po_find_prefix(c_one, c_two, c_three);
|
||||
u3_noun d_part = po_find_suffix(d_one, d_two, d_three);
|
||||
|
||||
u3_noun m = combine(d_part, combine(c_part, combine(b_part, a_part)));
|
||||
c3_free(c);
|
||||
|
||||
if (_(u3a_is_atom(m))) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
u3_atom raw = u3k(u3t(m));
|
||||
u3z(m);
|
||||
return u3nc(0, u3qc_ob_fynd(raw));
|
||||
}
|
||||
|
||||
// There must now be a - or it is invalid.
|
||||
CONSUME_HEP();
|
||||
|
||||
// The next possible case is a "short" moon. (~ab-cd-ef)
|
||||
TRY_GET_SYLLABLE(e);
|
||||
|
||||
ENSURE_NOT_END();
|
||||
|
||||
TRY_GET_SYLLABLE(f);
|
||||
|
||||
if (*cur == 0) {
|
||||
u3_noun a_part = po_find_prefix(a_one, a_two, a_three);
|
||||
u3_noun b_part = po_find_suffix(b_one, b_two, b_three);
|
||||
u3_noun c_part = po_find_prefix(c_one, c_two, c_three);
|
||||
u3_noun d_part = po_find_suffix(d_one, d_two, d_three);
|
||||
u3_noun e_part = po_find_prefix(e_one, e_two, e_three);
|
||||
u3_noun f_part = po_find_suffix(f_one, f_two, f_three);
|
||||
|
||||
u3_noun m = combine(f_part, combine(e_part, combine(d_part,
|
||||
combine(c_part, combine(b_part, a_part)))));
|
||||
c3_free(c);
|
||||
|
||||
if (_(u3a_is_atom(m))) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
u3_atom raw = u3k(u3t(m));
|
||||
u3z(m);
|
||||
return u3nc(0, u3qc_ob_fynd(raw));
|
||||
}
|
||||
|
||||
// There must now be a - or it is invalid.
|
||||
CONSUME_HEP();
|
||||
|
||||
// The next possible case is a "long" moon. (~ab-cd-ef-gh)
|
||||
TRY_GET_SYLLABLE(g);
|
||||
|
||||
ENSURE_NOT_END();
|
||||
|
||||
TRY_GET_SYLLABLE(h);
|
||||
|
||||
if (*cur == 0) {
|
||||
u3_noun a_part = po_find_prefix(a_one, a_two, a_three);
|
||||
u3_noun b_part = po_find_suffix(b_one, b_two, b_three);
|
||||
u3_noun c_part = po_find_prefix(c_one, c_two, c_three);
|
||||
u3_noun d_part = po_find_suffix(d_one, d_two, d_three);
|
||||
u3_noun e_part = po_find_prefix(e_one, e_two, e_three);
|
||||
u3_noun f_part = po_find_suffix(f_one, f_two, f_three);
|
||||
u3_noun g_part = po_find_prefix(g_one, g_two, g_three);
|
||||
u3_noun h_part = po_find_suffix(h_one, h_two, h_three);
|
||||
|
||||
u3_noun m = combine(h_part, combine(g_part, combine(f_part,
|
||||
combine(e_part, combine(d_part, combine(c_part,
|
||||
combine(b_part, a_part)))))));
|
||||
c3_free(c);
|
||||
|
||||
if (_(u3a_is_atom(m))) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
u3_atom raw = u3k(u3t(m));
|
||||
u3z(m);
|
||||
return u3nc(0, u3qc_ob_fynd(raw));
|
||||
}
|
||||
|
||||
// At this point, the only thing it could be is a long comet, of the form
|
||||
// ~ab-cd-ef-gh--ij-kl-mn-op
|
||||
|
||||
CONSUME_HEP();
|
||||
CONSUME_HEP();
|
||||
|
||||
TRY_GET_SYLLABLE(i);
|
||||
ENSURE_NOT_END();
|
||||
TRY_GET_SYLLABLE(j);
|
||||
CONSUME_HEP();
|
||||
TRY_GET_SYLLABLE(k);
|
||||
ENSURE_NOT_END();
|
||||
TRY_GET_SYLLABLE(l);
|
||||
CONSUME_HEP();
|
||||
TRY_GET_SYLLABLE(m);
|
||||
ENSURE_NOT_END();
|
||||
TRY_GET_SYLLABLE(n);
|
||||
CONSUME_HEP();
|
||||
TRY_GET_SYLLABLE(o);
|
||||
ENSURE_NOT_END();
|
||||
TRY_GET_SYLLABLE(p);
|
||||
|
||||
if (*cur != 0) {
|
||||
// We've parsed all of a comet shape, and there's still more in the
|
||||
// string. Error.
|
||||
c3_free(c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// We have a long comet. Time to jam it all together. We rely on combine()
|
||||
// for the error checking and we don't have to scramble comet names.
|
||||
u3_noun a_part = po_find_prefix(a_one, a_two, a_three);
|
||||
u3_noun b_part = po_find_suffix(b_one, b_two, b_three);
|
||||
u3_noun c_part = po_find_prefix(c_one, c_two, c_three);
|
||||
u3_noun d_part = po_find_suffix(d_one, d_two, d_three);
|
||||
u3_noun e_part = po_find_prefix(e_one, e_two, e_three);
|
||||
u3_noun f_part = po_find_suffix(f_one, f_two, f_three);
|
||||
u3_noun g_part = po_find_prefix(g_one, g_two, g_three);
|
||||
u3_noun h_part = po_find_suffix(h_one, h_two, h_three);
|
||||
u3_noun i_part = po_find_prefix(i_one, i_two, i_three);
|
||||
u3_noun j_part = po_find_suffix(j_one, j_two, j_three);
|
||||
u3_noun k_part = po_find_prefix(k_one, k_two, k_three);
|
||||
u3_noun l_part = po_find_suffix(l_one, l_two, l_three);
|
||||
u3_noun m_part = po_find_prefix(m_one, m_two, m_three);
|
||||
u3_noun n_part = po_find_suffix(n_one, n_two, n_three);
|
||||
u3_noun o_part = po_find_prefix(o_one, o_two, o_three);
|
||||
u3_noun p_part = po_find_suffix(p_one, p_two, p_three);
|
||||
|
||||
c3_free(c);
|
||||
|
||||
return combine(p_part, combine(o_part, combine(n_part, combine(m_part,
|
||||
combine(l_part, combine(k_part, combine(j_part, combine(i_part,
|
||||
combine(h_part, combine(g_part, combine(f_part, combine(e_part,
|
||||
combine(d_part, combine(c_part, combine(b_part, a_part)))))))))))))));
|
||||
}
|
||||
|
||||
#undef ENSURE_NOT_END
|
||||
#undef CONSUME_HEP
|
||||
#undef TRY_GET_SYLLABLE
|
||||
|
||||
u3_noun
|
||||
_parse_tas(u3_noun txt) {
|
||||
@ -98,7 +325,7 @@
|
||||
}
|
||||
|
||||
c3_free(c);
|
||||
return u3nc(0, txt);
|
||||
return u3nc(0, u3k(txt));
|
||||
}
|
||||
|
||||
u3_noun
|
||||
@ -114,7 +341,8 @@
|
||||
}
|
||||
|
||||
switch (mod) {
|
||||
/* TODO: case c3__p. Need background jets first. */
|
||||
case 'p':
|
||||
return _parse_p(txt);
|
||||
|
||||
case c3__ud:
|
||||
return _parse_ud(txt);
|
||||
|
Loading…
Reference in New Issue
Block a user