Stricter URL recognition for autolinking (#11871)

Documentation editor: Use only regex-based URL recognition for pasted text; increase strictness of regex. Fixes #11697.
This commit is contained in:
Kaz Wesley 2024-12-17 06:13:30 -08:00 committed by GitHub
parent cf82c8c3c7
commit 30075d26bf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 8 additions and 38 deletions

View File

@ -10,7 +10,6 @@ test.each([
}, },
{ {
clipboard: 'example.com', clipboard: 'example.com',
inserted: '<https://example.com>',
}, },
{ {
clipboard: 'http://example.com', clipboard: 'http://example.com',
@ -22,15 +21,12 @@ test.each([
}, },
{ {
clipboard: 'example.com/Address containing spaces and a < character', clipboard: 'example.com/Address containing spaces and a < character',
inserted: '<https://example.com/Address containing spaces and a %3C character>',
}, },
{ {
clipboard: 'example.com/Address resembling *bold syntax*', clipboard: 'example.com/Address resembling *bold syntax*',
inserted: '<https://example.com/Address resembling %2Abold syntax%2A>',
}, },
{ {
clipboard: 'Url: www.a.example.com, another: www.b.example.com', clipboard: 'Url: www.a.example.com, another: www.b.example.com',
inserted: 'Url: <https://www.a.example.com>, another: <https://www.b.example.com>',
}, },
{ {
clipboard: 'gopher:///no/autolinking/unusual/protocols', clipboard: 'gopher:///no/autolinking/unusual/protocols',
@ -53,6 +49,9 @@ test.each([
{ {
clipboard: 'example.com with trailing text', clipboard: 'example.com with trailing text',
}, },
{
clipboard: 'Standard.Base.Math',
},
])('Auto-linking pasted text: $clipboard', ({ clipboard, inserted }) => { ])('Auto-linking pasted text: $clipboard', ({ clipboard, inserted }) => {
expect(transformPastedText(clipboard)).toBe(inserted ?? clipboard) expect(transformPastedText(clipboard)).toBe(inserted ?? clipboard)
}) })

View File

@ -5,30 +5,10 @@ function uriEscapeChar(char: string) {
} }
function toAutoLink(text: string) { function toAutoLink(text: string) {
return `<${addProtocolIfMissing(text).replaceAll(/[\][<>*`]/g, uriEscapeChar)}>` return `<${text.replaceAll(/[\][<>*`]/g, uriEscapeChar)}>`
}
function addProtocolIfMissing(url: string) {
return (URL.canParse(url) ? '' : 'https://') + url
}
/**
* Return whether the input is likely to be a URL, possibly with the protocol omitted. This matches more aggressively
* than {@link LINKABLE_URL_REGEX}, but rejects some inputs that would technically make valid URLs but are more likely
* to be other text.
*/
function isReasonableUrl(text: string) {
const textWithProto = addProtocolIfMissing(text)
let textAsUrl: URL | undefined
try {
textAsUrl = new URL(textWithProto)
} catch {
return false
}
return textAsUrl.protocol.match(/https?:/) && textAsUrl.hostname.match(/\.[a-z]/)
} }
/** Convert the input to Markdown. This includes converting any likely URLs to <autolink>s. */ /** Convert the input to Markdown. This includes converting any likely URLs to <autolink>s. */
export function transformPastedText(text: string): string { export function transformPastedText(text: string): string {
return isReasonableUrl(text) ? toAutoLink(text) : text.replaceAll(LINKABLE_URL_REGEX, toAutoLink) return text.replaceAll(LINKABLE_URL_REGEX, toAutoLink)
} }

View File

@ -50,15 +50,6 @@ test.each([
}, },
], ],
}, },
{
text: 'Url: www.example.com',
expectedLinks: [
{
text: 'www.example.com',
href: 'https://www.example.com',
},
],
},
{ {
text: 'Email: user@example.com', text: 'Email: user@example.com',
expectedLinks: [ expectedLinks: [

View File

@ -3,7 +3,6 @@ import { LINKABLE_EMAIL_REGEX, LINKABLE_URL_REGEX } from '../link'
const cases = { const cases = {
urls: [ urls: [
'www.a.b',
'http://example.com', 'http://example.com',
'https://a.b', 'https://a.b',
'https://some.local', 'https://some.local',
@ -19,6 +18,7 @@ const cases = {
'(a@b.cd)', '(a@b.cd)',
], ],
neither: [ neither: [
'www.a.b',
'https://💩.la/', 'https://💩.la/',
'a.b', 'a.b',
'http://AsDf', 'http://AsDf',

View File

@ -1,9 +1,9 @@
/** /**
* Heuristic that matches strings suitable to be automatically interpreted as links. Recognizes absolute URLs with * Heuristic that matches strings suitable to be automatically interpreted as links. Recognizes absolute URLs with
* `http` and `https` protocols, and some protocol-less strings that are likely to be URLs. * `http` and `https` protocols.
*/ */
export const LINKABLE_URL_REGEX = export const LINKABLE_URL_REGEX =
/(?:https?:\/\/(?:www\.)?|www\.)[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b[-a-zA-Z0-9()@:%_+.~#?&/=]*/g /https?:\/\/[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b[-a-zA-Z0-9()@:%_+.~#?&/=]*/g
/** Heuristic that matches strings suitable to be automatically interpreted as email addresses. */ /** Heuristic that matches strings suitable to be automatically interpreted as email addresses. */
export const LINKABLE_EMAIL_REGEX = export const LINKABLE_EMAIL_REGEX =