From 964b8d1ba49f51329fe57cc618681fdc994c9afe Mon Sep 17 00:00:00 2001 From: Alexander Onnikov Date: Mon, 9 Sep 2024 14:07:38 +0700 Subject: [PATCH] fix: implement better todos parsing (#6497) Signed-off-by: Alexander Onnikov --- .../src/markdown/__tests__/markdown.test.ts | 408 ++++++++++++++++++ packages/text/src/markdown/parser.ts | 206 +++++---- 2 files changed, 525 insertions(+), 89 deletions(-) create mode 100644 packages/text/src/markdown/__tests__/markdown.test.ts diff --git a/packages/text/src/markdown/__tests__/markdown.test.ts b/packages/text/src/markdown/__tests__/markdown.test.ts new file mode 100644 index 0000000000..2c47c91979 --- /dev/null +++ b/packages/text/src/markdown/__tests__/markdown.test.ts @@ -0,0 +1,408 @@ +// +// Copyright © 2024 Hardcore Engineering Inc. +// +// Licensed under the Eclipse Public License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. You may +// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// +// See the License for the specific language governing permissions and +// limitations under the License. +// + +import { ServerKit } from '../../kits/server-kit' +import { parseMessageMarkdown } from '..' + +const refUrl: string = 'ref://' +const imageUrl: string = 'http://localhost' + +const extensions = [ServerKit] + +const tests: Array<{ name: string, markdown: string, markup: object }> = [ + { + name: 'simple text', + markdown: 'Lorem ipsum dolor sit amet.', + markup: { + type: 'doc', + content: [ + { + type: 'paragraph', + content: [ + { + type: 'text', + text: 'Lorem ipsum dolor sit amet.', + marks: [] + } + ] + } + ] + } + }, + { + name: 'text with heading', + markdown: `# Lorem ipsum + +Lorem ipsum dolor sit amet. +`, + markup: { + type: 'doc', + content: [ + { + type: 'heading', + attrs: { level: 1 }, + content: [ + { + type: 'text', + text: 'Lorem ipsum', + marks: [] + } + ] + }, + { + type: 'paragraph', + content: [ + { + type: 'text', + text: 'Lorem ipsum dolor sit amet.', + marks: [] + } + ] + } + ] + } + }, + { + name: 'bullet list', + markdown: `# bullet list +- list item 1 +- list item 2 +`, + markup: { + type: 'doc', + content: [ + { + type: 'heading', + attrs: { level: 1 }, + content: [ + { + type: 'text', + text: 'bullet list', + marks: [] + } + ] + }, + { + type: 'bulletList', + content: [ + { + type: 'listItem', + content: [ + { + type: 'paragraph', + content: [ + { + type: 'text', + text: 'list item 1', + marks: [] + } + ] + } + ] + }, + { + type: 'listItem', + content: [ + { + type: 'paragraph', + content: [ + { + type: 'text', + text: 'list item 2', + marks: [] + } + ] + } + ] + } + ] + } + ] + } + }, + { + name: 'todos', + markdown: `# TODO +- [ ] todo 1 +- [x] todo 2 +`, + markup: { + type: 'doc', + content: [ + { + type: 'heading', + attrs: { level: 1 }, + content: [ + { + type: 'text', + text: 'TODO', + marks: [] + } + ] + }, + { + type: 'todoList', + content: [ + { + type: 'todoItem', + attrs: { checked: false }, + content: [ + { + type: 'paragraph', + content: [ + { + type: 'text', + text: 'todo 1', + marks: [] + } + ] + } + ] + }, + { + type: 'todoItem', + attrs: { checked: true }, + content: [ + { + type: 'paragraph', + content: [ + { + type: 'text', + text: 'todo 2', + marks: [] + } + ] + } + ] + } + ] + } + ] + } + }, + { + name: 'todos followed by list items', + markdown: `# todo and list +- [ ] todo 1 +- [x] todo 2 +- list item 1 +- list item 2 +`, + markup: { + type: 'doc', + content: [ + { + type: 'heading', + attrs: { level: 1 }, + content: [ + { + type: 'text', + text: 'todo and list', + marks: [] + } + ] + }, + { + type: 'todoList', + content: [ + { + type: 'todoItem', + attrs: { checked: false }, + content: [ + { + type: 'paragraph', + content: [ + { + type: 'text', + text: 'todo 1', + marks: [] + } + ] + } + ] + }, + { + type: 'todoItem', + attrs: { checked: true }, + content: [ + { + type: 'paragraph', + content: [ + { + type: 'text', + text: 'todo 2', + marks: [] + } + ] + } + ] + } + ] + }, + { + type: 'bulletList', + content: [ + { + type: 'listItem', + content: [ + { + type: 'paragraph', + content: [ + { + type: 'text', + text: 'list item 1', + marks: [] + } + ] + } + ] + }, + { + type: 'listItem', + content: [ + { + type: 'paragraph', + content: [ + { + type: 'text', + text: 'list item 2', + marks: [] + } + ] + } + ] + } + ] + } + ] + } + }, + { + name: 'todos followed by list items', + markdown: `# mixed lists +- [ ] todo 1 +- list item 1 +- [x] todo 2 +- list item 2 +`, + markup: { + type: 'doc', + content: [ + { + type: 'heading', + attrs: { level: 1 }, + content: [ + { + type: 'text', + text: 'mixed lists', + marks: [] + } + ] + }, + { + type: 'todoList', + content: [ + { + type: 'todoItem', + attrs: { checked: false }, + content: [ + { + type: 'paragraph', + content: [ + { + type: 'text', + text: 'todo 1', + marks: [] + } + ] + } + ] + } + ] + }, + { + type: 'bulletList', + content: [ + { + type: 'listItem', + content: [ + { + type: 'paragraph', + content: [ + { + type: 'text', + text: 'list item 1', + marks: [] + } + ] + } + ] + } + ] + }, + { + type: 'todoList', + content: [ + { + type: 'todoItem', + attrs: { checked: true }, + content: [ + { + type: 'paragraph', + content: [ + { + type: 'text', + text: 'todo 2', + marks: [] + } + ] + } + ] + } + ] + }, + { + type: 'bulletList', + content: [ + { + type: 'listItem', + content: [ + { + type: 'paragraph', + content: [ + { + type: 'text', + text: 'list item 2', + marks: [] + } + ] + } + ] + } + ] + } + ] + } + } +] + +describe('markdown', () => { + tests.forEach(({ name, markdown, markup }) => { + it(name, () => { + const parsed = parseMessageMarkdown(markdown, refUrl, imageUrl, extensions) + expect(parsed).toEqual(markup) + }) + }) +}) diff --git a/packages/text/src/markdown/parser.ts b/packages/text/src/markdown/parser.ts index 40e1a5f506..e4c2f88c60 100644 --- a/packages/text/src/markdown/parser.ts +++ b/packages/text/src/markdown/parser.ts @@ -589,12 +589,6 @@ interface TaskListStateCore extends StateCore { const startsWithTodoMarkdown = (token: Token): boolean => /^\[[xX \u00A0]\][ \u00A0]/.test(token.content) const isCheckedTodoItem = (token: Token): boolean => /^\[[xX]\][ \u00A0]/.test(token.content) -const isTodoListItemInline = (tokens: Token[], index: number): boolean => - isInlineToken(tokens[index]) && - isParagraphToken(tokens[index - 1]) && - isListItemToken(tokens[index - 2]) && - startsWithTodoMarkdown(tokens[index]) - export class MarkdownParser { tokenizer: MarkdownIt tokenHandlers: Record void> @@ -607,7 +601,7 @@ export class MarkdownParser { this.tokenizer = MarkdownIt('default', { html: true }) - this.tokenizer.core.ruler.after('inline', 'task_list', this.taskListRule) + this.tokenizer.core.ruler.after('inline', 'task_list', this.listRule) this.tokenHandlers = tokenHandlers(tokensBlock, tokensNode, tokensMark, specialRule, ignoreRule, extensions) } @@ -625,98 +619,132 @@ export class MarkdownParser { return doc } - taskListRule: RuleCore = (state: TaskListStateCore): boolean => { + listRule: RuleCore = (state: TaskListStateCore): boolean => { const tokens = state.tokens - interface TodoListItemDescriptor { - start?: number - end?: number + // step #1 - convert list items to todo items + for (let open = 0; open < tokens.length; open++) { + if (isTodoListItem(tokens, open)) { + convertTodoItem(tokens, open) + } } - let todoListStartIdx: number | undefined - let todoListItems: TodoListItemDescriptor[] = [] - let todoListItem: TodoListItemDescriptor | undefined - let isTodoList = false - for (let i = 0; i < tokens.length; i++) { - if (tokens[i].type === 'bullet_list_open') { - todoListStartIdx = i - isTodoList = true - } - - if (tokens[i].type === 'list_item_open') { - todoListItem = { - start: i - } - } - - if (tokens[i].type === 'inline') { - if (todoListItem === undefined || !isTodoListItemInline(tokens, i)) { - isTodoList = false - } - } - - if (tokens[i].type === 'list_item_close' && todoListItem !== undefined) { - todoListItem.end = i - if (isTodoList) { - todoListItems.push(todoListItem) - } - todoListItem = undefined - } - + // step #2 - convert lists to proper type + let closeIdx = -1 + let lastItemIdx = -1 + for (let i = tokens.length - 1; i >= 0; i--) { if (tokens[i].type === 'bullet_list_close') { - if (isTodoList && todoListStartIdx !== undefined) { - // Transform tokens - tokens[todoListStartIdx].type = 'todo_list_open' - tokens[i].type = 'todo_list_close' - - for (const item of todoListItems) { - if (item.start !== undefined && item.end !== undefined) { - tokens[item.start].type = 'todo_item_open' - tokens[item.end].type = 'todo_item_close' - - const inline = tokens[item.start + 2] - - if (tokens[item.start].attrs == null) { - tokens[item.start].attrs = [] - } - - if (isCheckedTodoItem(inline)) { - ;(tokens[item.start].attrs as any).push(['checked', 'true']) - } - - if (inline.children !== null) { - const newContent = inline.children[0].content.slice(4) - if (newContent.length > 0) { - inline.children[0].content = newContent - } else { - inline.children = inline.children.slice(1) - } - - const metaTok = inline.children.find( - (tok) => tok.type === 'html_inline' && tok.content.startsWith('') - ) - if (metaTok !== undefined) { - const metaValues = metaTok.content.slice(5, -4).split(',') - for (const mv of metaValues) { - if (mv.startsWith('todoid')) { - ;(tokens[item.start].attrs as any).push(['todoid', mv.slice(7)]) - } - if (mv.startsWith('userid')) { - ;(tokens[item.start].attrs as any).push(['userid', mv.slice(7)]) - } - } - } - } - } - } + closeIdx = i + lastItemIdx = -1 + } else if (tokens[i].type === 'list_item_close' || tokens[i].type === 'todo_item_close') { + // when found item close token of different type, split the list + if (lastItemIdx === -1) { + lastItemIdx = i + } else if (tokens[i].type !== tokens[lastItemIdx].type) { + tokens.splice(i + 1, 0, new state.Token('bullet_list_open', 'ul', 1)) + tokens.splice(i + 1, 0, new state.Token('bullet_list_close', 'ul', -1)) + convertTodoList(tokens, i + 2, closeIdx + 2, lastItemIdx + 2) + closeIdx = i + 1 + lastItemIdx = i + } + } else if (tokens[i].type === 'bullet_list_open' && tokens[i].level === tokens[closeIdx].level) { + // when found list open token of the same level, decide what to do + if (lastItemIdx !== -1) { + convertTodoList(tokens, i, closeIdx, lastItemIdx) } - todoListStartIdx = undefined - todoListItems = [] - isTodoList = false + // Reset closeIdx and lastItemIdx for the next list + closeIdx = -1 + lastItemIdx = -1 } } return true } } + +function convertTodoList (tokens: Token[], open: number, close: number, item: number): void { + if (tokens[open].type !== 'bullet_list_open') { + throw new Error('bullet_list_open token expected') + } + if (tokens[close].type !== 'bullet_list_close') { + throw new Error('bullet_list_close token expected') + } + + if (tokens[item].type === 'todo_item_close') { + tokens[open].type = 'todo_list_open' + tokens[close].type = 'todo_list_close' + } +} + +function convertTodoItem (tokens: Token[], open: number): boolean { + const close = findListItemCloseToken(tokens, open) + if (close !== -1) { + tokens[open].type = 'todo_item_open' + tokens[close].type = 'todo_item_close' + + const inline = tokens[open + 2] + + if (tokens[open].attrs == null) { + tokens[open].attrs = [] + } + + ;(tokens[open].attrs as any).push(['checked', isCheckedTodoItem(inline) ? 'true' : 'false']) + + if (inline.children !== null) { + const newContent = inline.children[0].content.slice(4) + if (newContent.length > 0) { + inline.children[0].content = newContent + } else { + inline.children = inline.children.slice(1) + } + + const metaTok = inline.children.find( + (tok) => tok.type === 'html_inline' && tok.content.startsWith('') + ) + if (metaTok !== undefined) { + const metaValues = metaTok.content.slice(5, -4).split(',') + for (const mv of metaValues) { + if (mv.startsWith('todoid')) { + ;(tokens[open].attrs as any).push(['todoid', mv.slice(7)]) + } + if (mv.startsWith('userid')) { + ;(tokens[open].attrs as any).push(['userid', mv.slice(7)]) + } + } + } + } + + return true + } + + return false +} + +function findListItemCloseToken (tokens: Token[], open: number): number { + if (tokens[open].type !== 'list_item_open') { + throw new Error('list_item_open token expected') + } + + const level = tokens[open].level + for (let close = open + 1; close < tokens.length; close++) { + if (tokens[close].type === 'list_item_close' && tokens[close].level === level) { + return close + } + } + + return -1 +} + +// todo token structure +// tokens[i].type === list_item_open +// tokens[i + 1].type === paragraph +// tokens[i + 2].type === inline +function isTodoListItem (tokens: Token[], pos: number): boolean { + return ( + isListItemToken(tokens[pos]) && + isParagraphToken(tokens[pos + 1]) && + isInlineToken(tokens[pos + 2]) && + startsWithTodoMarkdown(tokens[pos + 2]) + ) +}