Merge pull request #4952 from urbit/mp/tokenize-mention-links

tokenizer: please I beg no more
This commit is contained in:
matildepark 2021-05-27 20:50:12 -04:00 committed by GitHub
commit 68cd853fb5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 84 additions and 38 deletions

View File

@ -17,6 +17,36 @@ export const isUrl = (str) => {
}
};
const raceRegexes = (str) => {
const link = str.match(URL_REGEX);
const groupRef = str.match(GROUP_REGEX);
const mention = str.match(PATP_REGEX);
let pfix = str;
let content, sfix;
if(link) {
pfix = link[1];
sfix = link[4];
const perma = parsePermalink(link[2]);
if(perma) {
content = permalinkToReference(perma);
} else {
content = { url: link[2] };
}
}
if(groupRef && groupRef[1].length < pfix?.length) {
pfix = groupRef[1];
const perma = parsePermalink(convertToGroupRef(groupRef[2]));
content = permalinkToReference(perma);
sfix = groupRef[3];
}
if(mention && urbitOb.isValidPatp(mention[2]) && mention[1].length < pfix?.length) {
pfix = mention[1];
content = { mention: mention[2] };
sfix = mention[3];
}
return [pfix, content, sfix];
};
const tokenizeMessage = (text) => {
const messages = [];
// by line
@ -35,50 +65,38 @@ const tokenizeMessage = (text) => {
}
while(str.length > 0) {
const resetAndPush = (content) => {
blocks.push(currBlock.join(''));
messages.push({ text: blocks.join('`') });
if(currBlock.length > 0) {
blocks.push(currBlock.join(''));
}
if(blocks.length > 0) {
// ended on a `
if(blocks.length % 2 === 0) {
blocks.push('');
}
messages.push({ text: blocks.join('`') });
}
currBlock = [];
blocks = [];
messages.push(content);
};
const link = str.match(URL_REGEX);
if(link) {
const [,pfix, url, protocol, sfix] = link;
const perma = parsePermalink(url);
currBlock.push(pfix);
if(protocol === 'web+urbitgraph://' && perma) {
resetAndPush(permalinkToReference(perma));
} else {
resetAndPush({ url });
}
const [pfix, content, sfix] = raceRegexes(str);
if(content) {
pfix?.length > 0 && currBlock.push(pfix);
resetAndPush(content);
str = sfix;
continue;
} else {
currBlock.push(str);
str = '';
}
const groupRef = str.match(GROUP_REGEX);
if(groupRef) {
const [,pfix, group, sfix] = groupRef;
currBlock.push(pfix);
const perma = parsePermalink(convertToGroupRef(group));
resetAndPush(permalinkToReference(perma));
str = sfix;
continue;
}
const patp = str.match(PATP_REGEX);
if(Boolean(patp) && urbitOb.isValidPatp(patp[2])) {
const [,pfix, mention, sfix] = patp;
currBlock.push(pfix);
resetAndPush({ mention });
str = sfix;
continue;
}
currBlock.push(str);
str = '';
}
blocks.push(currBlock.join(''));
currBlock = [];
});
// ended on a `
if(blocks.length % 2 === 0) {
blocks.push('');
}
messages.push({ text: blocks.join('`') });
console.log(messages);
return messages;
};

View File

@ -34,7 +34,8 @@ describe('tokenizeMessage', () => {
it('should autoexpand group references', () => {
const example = 'test ~bitbet-bolbel/urbit-community foo';
const [{ text }, { reference }, { text: foo }] = tokenizeMessage(example);
const result = tokenizeMessage(example);
const [{ text }, { reference }, { text: foo }] = result;
expect(text).toEqual('test ');
expect(reference.group).toEqual('/ship/~bitbet-bolbel/urbit-community');
expect(foo).toEqual(' foo');
@ -48,8 +49,7 @@ describe('tokenizeMessage', () => {
it('should handle multiline messages with references', () => {
const example = 'web+urbitgraph://group/~fabled-faster/interface-testing-facility/graph/~hastuc-dibtux/test-book-7531/170141184505064871297992714192687202304\n\nlol here [is a link](https://urbit.org)';
const [{ text }, { reference }, { text: text2 }] = tokenizeMessage(example);
expect(text).toEqual('');
const [{ reference }, { text: text2 }] = tokenizeMessage(example);
expect(reference.graph.graph).toEqual('/ship/~hastuc-dibtux/test-book-7531');
expect(reference.graph.index).toEqual('/170141184505064871297992714192687202304');
expect(text2).toEqual('\n\nlol here [is a link](https://urbit.org)');
@ -57,12 +57,40 @@ describe('tokenizeMessage', () => {
it('should handle links on newlines after references', () => {
const example = 'web+urbitgraph://group/~fabled-faster/interface-testing-facility/graph/~hastuc-dibtux/test-book-7531/170141184505064871297992714192687202304\n\nhttps://urbit.org a link is here!';
const [{ text }, { reference }, { text: text2 }, { url }, { text: text3 }] = tokenizeMessage(example);
expect(text).toEqual('');
const [{ reference }, { text: text2 }, { url }, { text: text3 }] = tokenizeMessage(example);
expect(reference.graph.graph).toEqual('/ship/~hastuc-dibtux/test-book-7531');
expect(reference.graph.index).toEqual('/170141184505064871297992714192687202304');
expect(text2).toEqual('\n\n');
expect(url).toEqual('https://urbit.org');
expect(text3).toEqual(' a link is here!');
});
it('should tokenize mention at start of a line', () => {
const example = '~haddef-sigwen test';
const result = tokenizeMessage(example);
const [{ mention }, { text }] = result;
expect(mention).toEqual('~haddef-sigwen');
expect(text).toEqual(' test');
});
it('should tokenize both mentions and links', () => {
const example = '~haddef-sigwen have you looked at https://urbit.org lately?';
const result = tokenizeMessage(example);
const [{ mention }, { text }, { url }, { text: text2 }] = result;
expect(mention).toEqual('~haddef-sigwen');
expect(text).toEqual(' have you looked at ');
expect(url).toEqual('https://urbit.org');
expect(text2).toEqual(' lately?');
});
it('should tokenize two links and a mention', () => {
const example = '~haddef-sigwen, test https://tlon.io test https://urbit.org test ~hastuc-dibtux';
const result = tokenizeMessage(example);
const [{ mention }, { text: one }, { url: tlon }, { text: two }, { url: urbit }, { text: three }, { mention: hastuc }] = result;
expect(mention).toEqual('~haddef-sigwen');
expect(one).toEqual(', test ');
expect(tlon).toEqual('https://tlon.io');
expect(two).toEqual(' test ');
expect(urbit).toEqual('https://urbit.org');
expect(three).toEqual(' test ');
expect(hastuc).toEqual('~hastuc-dibtux');
});
});