Fix pasting tables from Google Sheets; update tests. (#10327)

Part of #10275 (AG Grid support will be a separate PR).
This commit is contained in:
Kaz Wesley 2024-06-21 13:30:47 -07:00 committed by GitHub
parent bcbdda5f70
commit b8a1b0c366
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 159 additions and 56 deletions

View File

@ -14,6 +14,8 @@
For example, `locale` parameter of `Equal_Ignore_Case` kind in join component.
- [Node previews][10310]: Node may be previewed by hovering output port while
pressing <kbd>Ctrl</kbd> key (<kbd>Cmd</kbd> on macOS).
- [Google Sheets clipboard support][10327]: Create a Table component when cells
are pasted from Google Sheets.
- [Fixed issue with two arrows being visible at once in drop-down
widget.][10337]
- [Fixed issue where picking "<Numeric literal>" variant in some ports
@ -27,6 +29,7 @@
[10243]: https://github.com/enso-org/enso/pull/10243
[10297]: https://github.com/enso-org/enso/pull/10297
[10310]: https://github.com/enso-org/enso/pull/10310
[10327]: https://github.com/enso-org/enso/pull/10327
[10337]: https://github.com/enso-org/enso/pull/10337
#### Enso Standard Library

View File

@ -1,7 +1,9 @@
import testCases from '@/components/GraphEditor/__tests__/clipboardTestCases.json' assert { type: 'json' }
import {
excelTableToEnso,
isSpreadsheetTsv,
nodesFromClipboardContent,
nodesToClipboardData,
tsvToEnsoTable,
} from '@/components/GraphEditor/clipboard'
import { type Node } from '@/stores/graph'
import { Ast } from '@/util/ast'
@ -42,7 +44,7 @@ test.each([
"'\\t36\\t52\\n11\\t\\t4.727272727\\n12\\t\\t4.333333333\\n13\\t2.769230769\\t4\\n14\\t2.571428571\\t3.714285714\\n15\\t2.4\\t3.466666667\\n16\\t2.25\\t3.25\\n17\\t2.117647059\\t3.058823529\\n19\\t1.894736842\\t2.736842105\\n21\\t1.714285714\\t2.476190476\\n24\\t1.5\\t2.166666667\\n27\\t1.333333333\\t1.925925926\\n30\\t1.2\\t'.to Table",
},
])('Enso expression from Excel data: $description', ({ tableData, expectedEnsoExpression }) => {
expect(excelTableToEnso(tableData)).toEqual(expectedEnsoExpression)
expect(tsvToEnsoTable(tableData)).toEqual(expectedEnsoExpression)
})
class MockClipboardItem {
@ -52,10 +54,10 @@ class MockClipboardItem {
this.types = Object.keys(data)
}
getType(type: string): Blob {
getType(type: string): Promise<Blob> {
const blob = this.data[type]
assertDefined(blob)
return blob
return Promise.resolve(blob)
}
}
@ -95,3 +97,59 @@ test.each([...testNodes.map((node) => [node]), testNodes])(
})
},
)
function clipboardItemFromTypes(types: Record<string, string>): ClipboardItem {
return new MockClipboardItem(
Object.fromEntries(Object.entries(types).map(([key, value]) => [key, new Blob([value])])),
) as any
}
/* Creating a new test case:
*
* Obtaining `raw` clipboard HTML data from a spreadsheet:
* - Copy a range of a spreadsheet in the source application
* - In Chrome/Chromium, go to `https://evercoder.github.io/clipboard-inspector/`
* - Paste into the page with the keyboard (not the button on the page)
* [the keyboard binding uses the legacy API, which Chromium doesn't sanitize]
* - In the resulting type/getData(type) table, click "Copy as plain text" in the `text/html` row.
*
* Obtaining browser-sanitized HTML data from raw data:
* 1. Load the raw data into the clipboard as 'text/html':
* - In Chromium, create a new tab and open the Developer Console
* - In the console, set `htmlData = <pasted raw HTML>`
* - Run: `setTimeout(async () => { await window.navigator.clipboard.write([new ClipboardItem({ 'text/html': new Blob([htmlData], { type: 'text/html' })})]); console.log('ok') }, 2000)`
* - After pressing Enter, quickly click the document background
* - Wait for 'ok' to be logged to the console
* 2. In the target browser, go to `https://evercoder.github.io/clipboard-inspector/`
* - Click the "Paste using the Clipboard API" button
* [the button reads the clipboard via the async API with the default options, obtaining sanitized data]
* - Copy the `text/html` data
*/
type BrowserNameAndVersion = `${string}-${string}`
interface RecognitionCase {
spreadsheet: string
html: Record<BrowserNameAndVersion, string> & { raw: string }
}
interface FullStackCase extends RecognitionCase {
plainText: string
ensoCode: string
}
type SpreadsheetTestCase = RecognitionCase | FullStackCase
const spreadsheetTestCases: SpreadsheetTestCase[] = testCases.spreadsheetTestCases
test.each(spreadsheetTestCases)('Spreadsheet test case: $spreadsheet', async (testCase) => {
for (const [version, htmlContent] of Object.entries(testCase.html)) {
expect(isSpreadsheetTsv(htmlContent), `${version} version`).toBe(true)
if ('plainText' in testCase) {
const nodes = await nodesFromClipboardContent([
clipboardItemFromTypes({
'text/html': htmlContent,
'text/plain': testCase.plainText,
}),
])
expect(nodes.length).toBe(1)
assertDefined(nodes[0])
expect(nodes[0].expression).toBe(testCase.ensoCode)
}
}
})

File diff suppressed because one or more lines are too long

View File

@ -2,6 +2,7 @@ import type { NodeCreationOptions } from '@/composables/nodeCreation'
import type { GraphStore, Node, NodeId } from '@/stores/graph'
import { Ast } from '@/util/ast'
import { Pattern } from '@/util/ast/match'
import { filterDefined } from '@/util/data/iterable'
import { Vec2 } from '@/util/data/vec2'
import type { ToValue } from '@/util/reactivity'
import type { NodeMetadataFields } from 'shared/ast'
@ -38,62 +39,22 @@ function nodeDataFromExpressionText(expression: string): CopiedNode {
return { expression }
}
const toTable = computed(() => Pattern.parse('__.to Table'))
/** @internal Exported for testing. */
export function excelTableToEnso(excelData: string) {
const textLiteral = Ast.TextLiteral.new(excelData)
return toTable.value.instantiate(textLiteral.module, [textLiteral]).code()
}
/** @internal Exported for testing. */
export async function nodesFromClipboardContent(
clipboardItems: ClipboardItems,
): Promise<CopiedNode[]> {
let fallbackItem: ClipboardItem | undefined
for (const clipboardItem of clipboardItems) {
for (const type of clipboardItem.types) {
if (type === ENSO_MIME_TYPE) {
const blob = await clipboardItem.getType(type)
return JSON.parse(await blob.text()).nodes
}
if (type === 'text/html') {
const blob = await clipboardItem.getType(type)
const htmlContent = await blob.text()
const excelNode = await nodeDataFromExcelClipboard(htmlContent, clipboardItem)
if (excelNode) {
return [excelNode]
}
}
if (type === 'text/plain') {
fallbackItem = clipboardItem
}
}
}
if (fallbackItem) {
const fallbackData = await fallbackItem.getType('text/plain')
return [nodeDataFromExpressionText(await fallbackData.text())]
}
return []
return [
...(await decodeClipboard(clipboardItems, [ensoDecoder, spreadsheetDecoder, plainTextDecoder])),
].flat()
}
// Excel data starts with a `table` tag; Google Sheets starts with its own marker.
const spreadsheetHtmlRegex = /^(?:<table |<google-sheets-html-origin>).*<\/table>$/
async function nodeDataFromExcelClipboard(
htmlContent: string,
clipboardItem: ClipboardItem,
): Promise<CopiedNode | undefined> {
// Check if the contents look like HTML tables produced by spreadsheet software known to provide a plain-text
// version of the table with tab separators, as Excel does.
if (clipboardItem.types.includes('text/plain') && spreadsheetHtmlRegex.test(htmlContent)) {
const textData = await clipboardItem.getType('text/plain')
const expression = excelTableToEnso(await textData.text())
return nodeDataFromExpressionText(expression)
}
return undefined
const ensoDecoder: ClipboardDecoder<CopiedNode[]> = {
mimeType: ENSO_MIME_TYPE,
decode: async (blob) => JSON.parse(await blob.text()).nodes,
}
const plainTextDecoder: ClipboardDecoder<CopiedNode[]> = {
mimeType: 'text/plain',
decode: async (blob) => [nodeDataFromExpressionText(await blob.text())],
}
type clipboardItemFactory = (itemData: Record<string, Blob>) => ClipboardItem
@ -142,7 +103,12 @@ export function useGraphEditorClipboard(
/** Read the clipboard and if it contains valid data, create nodes from the content. */
async function createNodesFromClipboard() {
const clipboardItems = await getClipboard().read()
const clipboardItems = await getClipboard().read({
// Chromium-based browsers support reading unsanitized HTML data, so we can obtain predictable data for
// spreadsheet recognition in that case; other browsers, including Firefox (as of v127), do not, and should have
// their sanitized data included in test cases in `clipboardTestCases.json`.
unsanitized: ['text/html'],
})
const clipboardData = await nodesFromClipboardContent(clipboardItems)
if (!clipboardData.length) {
console.warn('No valid node in clipboard.')
@ -170,3 +136,58 @@ export function useGraphEditorClipboard(
createNodesFromClipboard,
}
}
// === Clipboard decoding ===
interface ClipboardDecoder<T> {
mimeType: string
decode: (blob: Blob, item: ClipboardItem) => Promise<T | undefined>
}
async function decodeClipboard<T>(
clipboardItems: ClipboardItems,
decoders: ClipboardDecoder<T>[],
): Promise<IterableIterator<T>> {
const decodeItem = async (clipboardItem: ClipboardItem) => {
for (const decoder of decoders) {
if (clipboardItem.types.includes(decoder.mimeType)) {
const blob = await clipboardItem.getType(decoder.mimeType)
const decoded = await decoder.decode(blob, clipboardItem)
if (decoded) return decoded
}
}
}
return filterDefined(await Promise.all(clipboardItems.map(decodeItem)))
}
// === Spreadsheet clipboard decoder ===
const spreadsheetDecoder: ClipboardDecoder<CopiedNode[]> = {
mimeType: 'text/html',
decode: async (blob, item) => {
const htmlContent = await blob.text()
if (!item.types.includes('text/plain')) return
if (isSpreadsheetTsv(htmlContent)) {
const textData = await item.getType('text/plain').then((blob) => blob.text())
return [nodeDataFromExpressionText(tsvToEnsoTable(textData))]
}
},
}
const toTable = computed(() => Pattern.parse('__.to Table'))
/** @internal Exported for testing. */
export function tsvToEnsoTable(tsvData: string) {
const textLiteral = Ast.TextLiteral.new(tsvData)
return toTable.value.instantiate(textLiteral.module, [textLiteral]).code()
}
/** @internal Exported for testing. */
export function isSpreadsheetTsv(htmlContent: string) {
// This is a very general criterion that can have some false-positives (e.g. pasting rich text that includes a table).
// However, due to non-standardized browser HTML sanitization it is difficult to precisely recognize spreadsheet
// clipboard data. We want to avoid false negatives (even if a browser changes its sanitization), and in case of a
// false positive the user is pasting data we don't have any good way to handle, so trying to make a Table from it is
// acceptable.
return /<table[ >]/i.test(htmlContent)
}

View File

@ -9,7 +9,8 @@
"src/util/theme.json",
"stories/mockSuggestions.json",
"mock/**/*",
"mock/**/*.vue"
"mock/**/*.vue",
"src/**/__tests__/*.json"
],
"exclude": ["src/**/__tests__/*", "shared/**/__tests__/*", "public/**/__tests__/*"],
"compilerOptions": {