From e704d90e06e1bf7bedb0923aa599be60dfcbca26 Mon Sep 17 00:00:00 2001 From: Matilde Park Date: Thu, 27 May 2021 17:00:54 -0400 Subject: [PATCH 1/3] interface: add new test for mention+link msgs --- pkg/interface/src/logic/lib/tokenizeMessage.test.js | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pkg/interface/src/logic/lib/tokenizeMessage.test.js b/pkg/interface/src/logic/lib/tokenizeMessage.test.js index 4a5451ca1..7b477a3a4 100644 --- a/pkg/interface/src/logic/lib/tokenizeMessage.test.js +++ b/pkg/interface/src/logic/lib/tokenizeMessage.test.js @@ -65,4 +65,12 @@ describe('tokenizeMessage', () => { expect(url).toEqual('https://urbit.org'); expect(text3).toEqual(' a link is here!'); }); + it('should tokenize both mentions and links', () => { + const example = '~haddef-sigwen have you looked at https://urbit.org lately?'; + const [{ mention }, { text }, { url }, { text: text2 }] = tokenizeMessage(example); + expect(mention).toEqual('~haddef-sigwen'); + expect(text).toEqual(' have you looked at '); + expect(url).toEqual('https://urbit.org'); + expect(text2).toEqual(' lately?'); + }); }); From 3d9036b10f59e475b2e0dd6310fa12faed5675f2 Mon Sep 17 00:00:00 2001 From: Liam Fitzgerald Date: Fri, 28 May 2021 09:51:55 +1000 Subject: [PATCH 2/3] tokenizeMessage: race regexes, choose closest match --- .../src/logic/lib/tokenizeMessage.js | 88 +++++++++++-------- .../src/logic/lib/tokenizeMessage.test.js | 19 ++-- 2 files changed, 66 insertions(+), 41 deletions(-) diff --git a/pkg/interface/src/logic/lib/tokenizeMessage.js b/pkg/interface/src/logic/lib/tokenizeMessage.js index f3750c0c4..7ee41b553 100644 --- a/pkg/interface/src/logic/lib/tokenizeMessage.js +++ b/pkg/interface/src/logic/lib/tokenizeMessage.js @@ -3,9 +3,9 @@ import { parsePermalink, permalinkToReference } from '~/logic/lib/permalinks'; const URL_REGEX = new RegExp(String(/^([^[\]]*?)(([\w\-\+]+:\/\/)[-a-zA-Z0-9:@;?&=\/%\+\.\*!'\(\),\$_\{\}\^~\[\]`#|]+[\w/])([\s\S]*)/.source)); -const PATP_REGEX = /^([\s\S]*?)(~[a-z_-]+)([\s\S]*)/; +const PATP_REGEX = /^([\s\S]*)(~[a-z_-]+)([\s\S]*)/; -const GROUP_REGEX = new RegExp(String(/^([\s\S ]*?)(~[-a-z_]+\/[-a-z]+)([\s\S]*)/.source)); +const GROUP_REGEX = new RegExp(String(/^([\s\S ]*)(~[-a-z_]+\/[-a-z]+)([\s\S]*)/.source)); const convertToGroupRef = group => `web+urbitgraph://group/${group}`; @@ -17,6 +17,36 @@ export const isUrl = (str) => { } }; +const raceRegexes = (str) => { + const link = str.match(URL_REGEX); + const groupRef = str.match(GROUP_REGEX); + const mention = str.match(PATP_REGEX); + let pfix = str; + let content, sfix; + if(link) { + pfix = link[1]; + sfix = link[4]; + const perma = parsePermalink(link[2]); + if(perma) { + content = permalinkToReference(perma); + } else { + content = { url: link[2] }; + } + } + if(groupRef && groupRef[1].length < pfix?.length) { + pfix = groupRef[1]; + const perma = parsePermalink(convertToGroupRef(groupRef[2])); + content = permalinkToReference(perma); + sfix = groupRef[3]; + } + if(mention && urbitOb.isValidPatp(mention[2]) && mention[1].length < pfix?.length) { + pfix = mention[1]; + content = { mention: mention[2] }; + sfix = mention[3]; + } + return [pfix, content, sfix]; +}; + const tokenizeMessage = (text) => { const messages = []; // by line @@ -35,50 +65,38 @@ const tokenizeMessage = (text) => { } while(str.length > 0) { const resetAndPush = (content) => { - blocks.push(currBlock.join('')); - messages.push({ text: blocks.join('`') }); + if(currBlock.length > 0) { + blocks.push(currBlock.join('')); + } + if(blocks.length > 0) { + // ended on a ` + if(blocks.length % 2 === 0) { + blocks.push(''); + } + messages.push({ text: blocks.join('`') }); + } currBlock = []; blocks = []; messages.push(content); }; - const link = str.match(URL_REGEX); - if(link) { - const [,pfix, url, protocol, sfix] = link; - const perma = parsePermalink(url); - currBlock.push(pfix); - if(protocol === 'web+urbitgraph://' && perma) { - resetAndPush(permalinkToReference(perma)); - } else { - resetAndPush({ url }); - } + const [pfix, content, sfix] = raceRegexes(str); + if(content) { + pfix?.length > 0 && currBlock.push(pfix); + resetAndPush(content); str = sfix; - continue; + } else { + currBlock.push(str); + str = ''; } - const groupRef = str.match(GROUP_REGEX); - if(groupRef) { - const [,pfix, group, sfix] = groupRef; - currBlock.push(pfix); - const perma = parsePermalink(convertToGroupRef(group)); - resetAndPush(permalinkToReference(perma)); - str = sfix; - continue; - } - const patp = str.match(PATP_REGEX); - if(Boolean(patp) && urbitOb.isValidPatp(patp[2])) { - const [,pfix, mention, sfix] = patp; - currBlock.push(pfix); - resetAndPush({ mention }); - str = sfix; - continue; - } - currBlock.push(str); - str = ''; } blocks.push(currBlock.join('')); currBlock = []; }); + // ended on a ` + if(blocks.length % 2 === 0) { + blocks.push(''); + } messages.push({ text: blocks.join('`') }); - console.log(messages); return messages; }; diff --git a/pkg/interface/src/logic/lib/tokenizeMessage.test.js b/pkg/interface/src/logic/lib/tokenizeMessage.test.js index 7b477a3a4..29f37584c 100644 --- a/pkg/interface/src/logic/lib/tokenizeMessage.test.js +++ b/pkg/interface/src/logic/lib/tokenizeMessage.test.js @@ -34,7 +34,8 @@ describe('tokenizeMessage', () => { it('should autoexpand group references', () => { const example = 'test ~bitbet-bolbel/urbit-community foo'; - const [{ text }, { reference }, { text: foo }] = tokenizeMessage(example); + const result = tokenizeMessage(example); + const [{ text }, { reference }, { text: foo }] = result; expect(text).toEqual('test '); expect(reference.group).toEqual('/ship/~bitbet-bolbel/urbit-community'); expect(foo).toEqual(' foo'); @@ -48,8 +49,7 @@ describe('tokenizeMessage', () => { it('should handle multiline messages with references', () => { const example = 'web+urbitgraph://group/~fabled-faster/interface-testing-facility/graph/~hastuc-dibtux/test-book-7531/170141184505064871297992714192687202304\n\nlol here [is a link](https://urbit.org)'; - const [{ text }, { reference }, { text: text2 }] = tokenizeMessage(example); - expect(text).toEqual(''); + const [{ reference }, { text: text2 }] = tokenizeMessage(example); expect(reference.graph.graph).toEqual('/ship/~hastuc-dibtux/test-book-7531'); expect(reference.graph.index).toEqual('/170141184505064871297992714192687202304'); expect(text2).toEqual('\n\nlol here [is a link](https://urbit.org)'); @@ -57,17 +57,24 @@ describe('tokenizeMessage', () => { it('should handle links on newlines after references', () => { const example = 'web+urbitgraph://group/~fabled-faster/interface-testing-facility/graph/~hastuc-dibtux/test-book-7531/170141184505064871297992714192687202304\n\nhttps://urbit.org a link is here!'; - const [{ text }, { reference }, { text: text2 }, { url }, { text: text3 }] = tokenizeMessage(example); - expect(text).toEqual(''); + const [{ reference }, { text: text2 }, { url }, { text: text3 }] = tokenizeMessage(example); expect(reference.graph.graph).toEqual('/ship/~hastuc-dibtux/test-book-7531'); expect(reference.graph.index).toEqual('/170141184505064871297992714192687202304'); expect(text2).toEqual('\n\n'); expect(url).toEqual('https://urbit.org'); expect(text3).toEqual(' a link is here!'); }); + it('should tokenize mention at start of a line', () => { + const example = '~haddef-sigwen test'; + const result = tokenizeMessage(example); + const [{ mention }, { text }] = result; + expect(mention).toEqual('~haddef-sigwen'); + expect(text).toEqual(' test'); + }); it('should tokenize both mentions and links', () => { const example = '~haddef-sigwen have you looked at https://urbit.org lately?'; - const [{ mention }, { text }, { url }, { text: text2 }] = tokenizeMessage(example); + const result = tokenizeMessage(example); + const [{ mention }, { text }, { url }, { text: text2 }] = result; expect(mention).toEqual('~haddef-sigwen'); expect(text).toEqual(' have you looked at '); expect(url).toEqual('https://urbit.org'); From 623da874e5311359599894ba1ef0aefc301658b7 Mon Sep 17 00:00:00 2001 From: Liam Fitzgerald Date: Fri, 28 May 2021 10:47:51 +1000 Subject: [PATCH 3/3] tokenizeMessage: less greedy --- pkg/interface/src/logic/lib/tokenizeMessage.js | 4 ++-- pkg/interface/src/logic/lib/tokenizeMessage.test.js | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/pkg/interface/src/logic/lib/tokenizeMessage.js b/pkg/interface/src/logic/lib/tokenizeMessage.js index 7ee41b553..e2f46ad94 100644 --- a/pkg/interface/src/logic/lib/tokenizeMessage.js +++ b/pkg/interface/src/logic/lib/tokenizeMessage.js @@ -3,9 +3,9 @@ import { parsePermalink, permalinkToReference } from '~/logic/lib/permalinks'; const URL_REGEX = new RegExp(String(/^([^[\]]*?)(([\w\-\+]+:\/\/)[-a-zA-Z0-9:@;?&=\/%\+\.\*!'\(\),\$_\{\}\^~\[\]`#|]+[\w/])([\s\S]*)/.source)); -const PATP_REGEX = /^([\s\S]*)(~[a-z_-]+)([\s\S]*)/; +const PATP_REGEX = /^([\s\S]*?)(~[a-z_-]+)([\s\S]*)/; -const GROUP_REGEX = new RegExp(String(/^([\s\S ]*)(~[-a-z_]+\/[-a-z]+)([\s\S]*)/.source)); +const GROUP_REGEX = new RegExp(String(/^([\s\S ]*?)(~[-a-z_]+\/[-a-z]+)([\s\S]*)/.source)); const convertToGroupRef = group => `web+urbitgraph://group/${group}`; diff --git a/pkg/interface/src/logic/lib/tokenizeMessage.test.js b/pkg/interface/src/logic/lib/tokenizeMessage.test.js index 29f37584c..98a4559e5 100644 --- a/pkg/interface/src/logic/lib/tokenizeMessage.test.js +++ b/pkg/interface/src/logic/lib/tokenizeMessage.test.js @@ -80,4 +80,17 @@ describe('tokenizeMessage', () => { expect(url).toEqual('https://urbit.org'); expect(text2).toEqual(' lately?'); }); + + it('should tokenize two links and a mention', () => { + const example = '~haddef-sigwen, test https://tlon.io test https://urbit.org test ~hastuc-dibtux'; + const result = tokenizeMessage(example); + const [{ mention }, { text: one }, { url: tlon }, { text: two }, { url: urbit }, { text: three }, { mention: hastuc }] = result; + expect(mention).toEqual('~haddef-sigwen'); + expect(one).toEqual(', test '); + expect(tlon).toEqual('https://tlon.io'); + expect(two).toEqual(' test '); + expect(urbit).toEqual('https://urbit.org'); + expect(three).toEqual(' test '); + expect(hastuc).toEqual('~hastuc-dibtux'); + }); });