2022-09-19 18:12:54 +03:00
|
|
|
class LinkReplacer {
|
|
|
|
/**
|
|
|
|
* Replaces the links in the provided HTML
|
|
|
|
* @param {string} html
|
|
|
|
* @param {(url: URL): Promise<URL|string>} replaceLink
|
|
|
|
* @returns {Promise<string>}
|
|
|
|
*/
|
|
|
|
async replace(html, replaceLink) {
|
|
|
|
const cheerio = require('cheerio');
|
2023-03-28 13:29:15 +03:00
|
|
|
const entities = require('entities');
|
2023-02-16 13:26:35 +03:00
|
|
|
try {
|
2023-03-08 18:30:54 +03:00
|
|
|
const $ = cheerio.load(html, {
|
|
|
|
xml: {
|
|
|
|
// This makes sure we use the faster and less destructive htmlparser2 parser
|
|
|
|
xmlMode: false
|
|
|
|
},
|
|
|
|
// Do not replace &, ', " and others with HTML entities (is bugged because it replaces &map_ with something weird (↦))
|
|
|
|
decodeEntities: false
|
|
|
|
}, false);
|
2022-09-19 18:12:54 +03:00
|
|
|
|
2023-02-16 13:26:35 +03:00
|
|
|
for (const el of $('a').toArray()) {
|
|
|
|
const href = $(el).attr('href');
|
|
|
|
if (href) {
|
|
|
|
let url;
|
|
|
|
try {
|
2023-03-28 13:29:15 +03:00
|
|
|
url = new URL(entities.decode(href));
|
2023-02-16 13:26:35 +03:00
|
|
|
} catch (e) {
|
|
|
|
// Ignore invalid URLs
|
|
|
|
}
|
|
|
|
if (url) {
|
|
|
|
url = await replaceLink(url);
|
|
|
|
const str = url.toString();
|
|
|
|
$(el).attr('href', str);
|
|
|
|
}
|
2022-09-19 18:12:54 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-02-16 13:26:35 +03:00
|
|
|
return $.html();
|
|
|
|
} catch (e) {
|
|
|
|
// Catch errors from cheerio
|
|
|
|
return html;
|
|
|
|
}
|
2022-09-19 18:12:54 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = new LinkReplacer();
|