markdown-link-check/index.js
Rafael Kitover b1da4b118a
Check GitHub markdown section links
Extract markdown heading lines and convert to section link names, check
all section links against this list, first removing any code blocks, and
return a 404 for any that do not have a heading.

Check for `baseUrl` option prepended section links as well.

Add tests for this functionality.

Make some minor adjustments for the tests to pass on Windows.

Fix #250

Signed-off-by: Rafael Kitover <rkitover@gmail.com>
2024-04-12 00:43:34 +00:00

172 lines
6.2 KiB
JavaScript

'use strict';
const _ = require('lodash');
const async = require('async');
const linkCheck = require('link-check');
const LinkCheckResult = require('link-check').LinkCheckResult;
const markdownLinkExtractor = require('markdown-link-extractor');
const ProgressBar = require('progress');
const envVarPatternMatcher = /(?<pattern>{{env\.(?<name>[a-zA-Z0-9\-_]+)}})/;
/*
* Performs some special replacements for the following patterns:
* - {{BASEURL}} - to be replaced with opts.projectBaseUrl
* - {{env.<env_var_name>}} - to be replaced with the environment variable specified with <env_var_name>
*/
function performSpecialReplacements(str, opts) {
// replace the `{{BASEURL}}` with the opts.projectBaseUrl. Helpful to build absolute urls "relative" to project roots
str = str.replace('{{BASEURL}}', opts.projectBaseUrl);
// replace {{env.<env_var_name>}} with the corresponding environment variable or an empty string if none is set.
var envVarMatch;
do {
envVarMatch = envVarPatternMatcher.exec(str);
if(!envVarMatch) {
break;
}
var envVarPattern = envVarMatch.groups.pattern;
var envVarName = envVarMatch.groups.name;
var envVarPatternReplacement = '';
if(envVarName in process.env) {
envVarPatternReplacement = process.env[envVarName];
}
str = str.replace(envVarPattern, envVarPatternReplacement);
// eslint-disable-next-line no-constant-condition
} while (true);
return str;
}
function extractSections(markdown) {
// First remove code blocks.
markdown = markdown.replace(/^```[\S\s]+?^```$/mg, '');
const sectionTitles = markdown.match(/^#+ .*$/gm) || [];
const sections = sectionTitles.map(section =>
section.replace(/^\W+/, '').replace(/\W+$/, '').replace(/[^\w\s-]+/g, '').replace(/\s+/g, '-').toLowerCase()
);
var uniq = {};
for (var section of sections) {
if (section in uniq) {
uniq[section]++;
section = section + '-' + uniq[section];
}
uniq[section] = 0;
}
const uniqueSections = Object.keys(uniq) ?? [];
return uniqueSections;
}
module.exports = function markdownLinkCheck(markdown, opts, callback) {
if (arguments.length === 2 && typeof opts === 'function') {
// optional 'opts' not supplied.
callback = opts;
opts = {};
}
if(!opts.ignoreDisable) {
markdown = [
/(<!--[ \t]+markdown-link-check-disable[ \t]+-->[\S\s]*?<!--[ \t]+markdown-link-check-enable[ \t]+-->)/mg,
/(<!--[ \t]+markdown-link-check-disable[ \t]+-->[\S\s]*(?!<!--[ \t]+markdown-link-check-enable[ \t]+-->))/mg,
/(<!--[ \t]+markdown-link-check-disable-next-line[ \t]+-->\r?\n[^\r\n]*)/mg,
/([^\r\n]*<!--[ \t]+markdown-link-check-disable-line[ \t]+-->[^\r\n]*)/mg
].reduce(function(_markdown, disablePattern) {
return _markdown.replace(new RegExp(disablePattern), '');
}, markdown);
}
const links = markdownLinkExtractor(markdown);
const sections = extractSections(markdown);
const linksCollection = _.uniq(links);
const bar = (opts.showProgressBar) ?
new ProgressBar('Checking... [:bar] :percent', {
complete: '=',
incomplete: ' ',
width: 25,
total: linksCollection.length
}) : undefined;
async.mapLimit(linksCollection, 2, function (link, callback) {
if (opts.ignorePatterns) {
const shouldIgnore = opts.ignorePatterns.some(function(ignorePattern) {
return ignorePattern.pattern instanceof RegExp ? ignorePattern.pattern.test(link) : (new RegExp(ignorePattern.pattern)).test(link) ? true : false;
});
if (shouldIgnore) {
const result = new LinkCheckResult(opts, link, 0, undefined);
result.status = 'ignored'; // custom status for ignored links
callback(null, result);
return;
}
}
if (opts.replacementPatterns) {
for (let replacementPattern of opts.replacementPatterns) {
let pattern = replacementPattern.pattern instanceof RegExp ? replacementPattern.pattern : new RegExp(replacementPattern.pattern, replacementPattern.global ? 'g' : '');
link = link.replace(pattern, performSpecialReplacements(replacementPattern.replacement, opts));
}
}
// Make sure it is not undefined and that the appropriate headers are always recalculated for a given link.
opts.headers = {};
if (opts.httpHeaders) {
for (const httpHeader of opts.httpHeaders) {
if (httpHeader.headers) {
for (const header of Object.keys(httpHeader.headers)) {
httpHeader.headers[header] = performSpecialReplacements(httpHeader.headers[header], opts);
}
}
for (const url of httpHeader.urls) {
if (link.startsWith(url)) {
Object.assign(opts.headers, httpHeader.headers);
// The headers of this httpHeader has been applied, the other URLs of this httpHeader don't need to be evaluated any further.
break;
}
}
}
}
let sectionLink = null;
if (link.startsWith('#')) {
sectionLink = link;
}
else if ('baseUrl' in opts && link.startsWith(opts.baseUrl)) {
if (link.substring(opts.baseUrl.length).match(/^\/*#/)) {
sectionLink = link.replace(/^[^#]+/, '');
}
}
if (sectionLink) {
const result = new LinkCheckResult(opts, sectionLink, sections.includes(sectionLink.substring(1)) ? 200 : 404, undefined);
callback(null, result);
return;
}
linkCheck(link, opts, function (err, result) {
if (opts.showProgressBar) {
bar.tick();
}
if (err) {
result = new LinkCheckResult(opts, link, 500, err);
result.status = 'error'; // custom status for errored links
}
callback(null, result);
});
}, callback);
};