node/tools/eslint/node_modules/eslint-plugin-markdown/lib/processor.js
Michaël Zasso 2eff28fb7a
tools: move ESLint to tools/eslint
Greatly simplify how ESLint and its plugins are installed.

PR-URL: https://github.com/nodejs/node/pull/53413
Reviewed-By: Antoine du Hamel <duhamelantoine1995@gmail.com>
2024-06-19 19:54:08 +00:00

421 lines
14 KiB
JavaScript

/**
* @fileoverview Processes Markdown files for consumption by ESLint.
* @author Brandon Mills
*/
/**
* @typedef {import('eslint/lib/shared/types').LintMessage} Message
* @typedef {Object} ASTNode
* @property {string} type The type of node.
* @property {string} [lang] The language that the node is in
* @typedef {Object} RangeMap
* @property {number} indent Number of code block indent characters trimmed from
* the beginning of the line during extraction.
* @property {number} js Offset from the start of the code block's range in the
* extracted JS.
* @property {number} md Offset from the start of the code block's range in the
* original Markdown.
* @typedef {Object} BlockBase
* @property {string} baseIndentText Leading whitespace text for the block.
* @property {string[]} comments Comments inside of the JavaScript code.
* @property {RangeMap[]} rangeMap A list of offset-based adjustments, where
* lookups are done based on the `js` key, which represents the range in the
* linted JS, and the `md` key is the offset delta that, when added to the JS
* range, returns the corresponding location in the original Markdown source.
* @typedef {ASTNode & BlockBase} Block
*/
"use strict";
const parse = require("mdast-util-from-markdown");
const pkg = require("../package.json");
const UNSATISFIABLE_RULES = new Set([
"eol-last", // The Markdown parser strips trailing newlines in code fences
"unicode-bom" // Code blocks will begin in the middle of Markdown files
]);
const SUPPORTS_AUTOFIX = true;
/**
* @type {Map<string, Block[]>}
*/
const blocksCache = new Map();
/**
* Performs a depth-first traversal of the Markdown AST.
* @param {ASTNode} node A Markdown AST node.
* @param {{[key: string]: (node: ASTNode) => void}} callbacks A map of node types to callbacks.
* @returns {void}
*/
function traverse(node, callbacks) {
if (callbacks[node.type]) {
callbacks[node.type](node);
} else {
callbacks["*"]();
}
if (typeof node.children !== "undefined") {
for (let i = 0; i < node.children.length; i++) {
traverse(node.children[i], callbacks);
}
}
}
/**
* Extracts `eslint-*` or `global` comments from HTML comments if present.
* @param {string} html The text content of an HTML AST node.
* @returns {string} The comment's text without the opening and closing tags or
* an empty string if the text is not an ESLint HTML comment.
*/
function getComment(html) {
const commentStart = "<!--";
const commentEnd = "-->";
const regex = /^(eslint\b|global\s)/u;
if (
html.slice(0, commentStart.length) !== commentStart ||
html.slice(-commentEnd.length) !== commentEnd
) {
return "";
}
const comment = html.slice(commentStart.length, -commentEnd.length);
if (!regex.test(comment.trim())) {
return "";
}
return comment;
}
// Before a code block, blockquote characters (`>`) are also considered
// "whitespace".
const leadingWhitespaceRegex = /^[>\s]*/u;
/**
* Gets the offset for the first column of the node's first line in the
* original source text.
* @param {ASTNode} node A Markdown code block AST node.
* @returns {number} The offset for the first column of the node's first line.
*/
function getBeginningOfLineOffset(node) {
return node.position.start.offset - node.position.start.column + 1;
}
/**
* Gets the leading text, typically whitespace with possible blockquote chars,
* used to indent a code block.
* @param {string} text The text of the file.
* @param {ASTNode} node A Markdown code block AST node.
* @returns {string} The text from the start of the first line to the opening
* fence of the code block.
*/
function getIndentText(text, node) {
return leadingWhitespaceRegex.exec(
text.slice(getBeginningOfLineOffset(node))
)[0];
}
/**
* When applying fixes, the postprocess step needs to know how to map fix ranges
* from their location in the linted JS to the original offset in the Markdown.
* Configuration comments and indentation trimming both complicate this process.
*
* Configuration comments appear in the linted JS but not in the Markdown code
* block. Fixes to configuration comments would cause undefined behavior and
* should be ignored during postprocessing. Fixes to actual code after
* configuration comments need to be mapped back to the code block after
* removing any offset due to configuration comments.
*
* Fenced code blocks can be indented by up to three spaces at the opening
* fence. Inside of a list, for example, this indent can be in addition to the
* indent already required for list item children. Leading whitespace inside
* indented code blocks is trimmed up to the level of the opening fence and does
* not appear in the linted code. Further, lines can have less leading
* whitespace than the opening fence, so not all lines are guaranteed to have
* the same column offset as the opening fence.
*
* The source code of a non-configuration-comment line in the linted JS is a
* suffix of the corresponding line in the Markdown code block. There are no
* differences within the line, so the mapping need only provide the offset
* delta at the beginning of each line.
* @param {string} text The text of the file.
* @param {ASTNode} node A Markdown code block AST node.
* @param {string[]} comments List of configuration comment strings that will be
* inserted at the beginning of the code block.
* @returns {RangeMap[]} A list of offset-based adjustments, where lookups are
* done based on the `js` key, which represents the range in the linted JS,
* and the `md` key is the offset delta that, when added to the JS range,
* returns the corresponding location in the original Markdown source.
*/
function getBlockRangeMap(text, node, comments) {
/*
* The parser sets the fenced code block's start offset to wherever content
* should normally begin (typically the first column of the line, but more
* inside a list item, for example). The code block's opening fence may be
* further indented by up to three characters. If the code block has
* additional indenting, the opening fence's first backtick may be up to
* three whitespace characters after the start offset.
*/
const startOffset = getBeginningOfLineOffset(node);
/*
* Extract the Markdown source to determine the leading whitespace for each
* line.
*/
const code = text.slice(startOffset, node.position.end.offset);
const lines = code.split("\n");
/*
* The parser trims leading whitespace from each line of code within the
* fenced code block up to the opening fence's first backtick. The first
* backtick's column is the AST node's starting column plus any additional
* indentation.
*/
const baseIndent = getIndentText(text, node).length;
/*
* Track the length of any inserted configuration comments at the beginning
* of the linted JS and start the JS offset lookup keys at this index.
*/
const commentLength = comments.reduce((len, comment) => len + comment.length + 1, 0);
/*
* In case there are configuration comments, initialize the map so that the
* first lookup index is always 0. If there are no configuration comments,
* the lookup index will also be 0, and the lookup should always go to the
* last range that matches, skipping this initialization entry.
*/
const rangeMap = [{
indent: baseIndent,
js: 0,
md: 0
}];
// Start the JS offset after any configuration comments.
let jsOffset = commentLength;
/*
* Start the Markdown offset at the beginning of the block's first line of
* actual code. The first line of the block is always the opening fence, so
* the code begins on the second line.
*/
let mdOffset = startOffset + lines[0].length + 1;
/*
* For each line, determine how much leading whitespace was trimmed due to
* indentation. Increase the JS lookup offset by the length of the line
* post-trimming and the Markdown offset by the total line length.
*/
for (let i = 0; i + 1 < lines.length; i++) {
const line = lines[i + 1];
const leadingWhitespaceLength = leadingWhitespaceRegex.exec(line)[0].length;
// The parser trims leading whitespace up to the level of the opening
// fence, so keep any additional indentation beyond that.
const trimLength = Math.min(baseIndent, leadingWhitespaceLength);
rangeMap.push({
indent: trimLength,
js: jsOffset,
// Advance `trimLength` character from the beginning of the Markdown
// line to the beginning of the equivalent JS line, then compute the
// delta.
md: mdOffset + trimLength - jsOffset
});
// Accumulate the current line in the offsets, and don't forget the
// newline.
mdOffset += line.length + 1;
jsOffset += line.length - trimLength + 1;
}
return rangeMap;
}
const languageToFileExtension = {
javascript: "js",
ecmascript: "js",
typescript: "ts",
markdown: "md"
};
/**
* Extracts lintable code blocks from Markdown text.
* @param {string} text The text of the file.
* @param {string} filename The filename of the file
* @returns {Array<{ filename: string, text: string }>} Source code blocks to lint.
*/
function preprocess(text, filename) {
const ast = parse(text);
const blocks = [];
blocksCache.set(filename, blocks);
/**
* During the depth-first traversal, keep track of any sequences of HTML
* comment nodes containing `eslint-*` or `global` comments. If a code
* block immediately follows such a sequence, insert the comments at the
* top of the code block. Any non-ESLint comment or other node type breaks
* and empties the sequence.
* @type {string[]}
*/
let htmlComments = [];
traverse(ast, {
"*"() {
htmlComments = [];
},
code(node) {
if (node.lang) {
const comments = [];
for (const comment of htmlComments) {
if (comment.trim() === "eslint-skip") {
htmlComments = [];
return;
}
comments.push(`/*${comment}*/`);
}
htmlComments = [];
blocks.push({
...node,
baseIndentText: getIndentText(text, node),
comments,
rangeMap: getBlockRangeMap(text, node, comments)
});
}
},
html(node) {
const comment = getComment(node.value);
if (comment) {
htmlComments.push(comment);
} else {
htmlComments = [];
}
}
});
return blocks.map((block, index) => {
const [language] = block.lang.trim().split(" ");
const fileExtension = Object.hasOwn(languageToFileExtension, language) ? languageToFileExtension[language] : language;
return {
filename: `${index}.${fileExtension}`,
text: [
...block.comments,
block.value,
""
].join("\n")
};
});
}
/**
* Creates a map function that adjusts messages in a code block.
* @param {Block} block A code block.
* @returns {(message: Message) => Message} A function that adjusts messages in a code block.
*/
function adjustBlock(block) {
const leadingCommentLines = block.comments.reduce((count, comment) => count + comment.split("\n").length, 0);
const blockStart = block.position.start.line;
/**
* Adjusts ESLint messages to point to the correct location in the Markdown.
* @param {Message} message A message from ESLint.
* @returns {Message} The same message, but adjusted to the correct location.
*/
return function adjustMessage(message) {
if (!Number.isInteger(message.line)) {
return {
...message,
line: blockStart,
column: block.position.start.column
};
}
const lineInCode = message.line - leadingCommentLines;
if (lineInCode < 1 || lineInCode >= block.rangeMap.length) {
return null;
}
const out = {
line: lineInCode + blockStart,
column: message.column + block.rangeMap[lineInCode].indent
};
if (Number.isInteger(message.endLine)) {
out.endLine = message.endLine - leadingCommentLines + blockStart;
}
const adjustedFix = {};
if (message.fix) {
adjustedFix.fix = {
range: message.fix.range.map(range => {
// Advance through the block's range map to find the last
// matching range by finding the first range too far and
// then going back one.
let i = 1;
while (i < block.rangeMap.length && block.rangeMap[i].js <= range) {
i++;
}
// Apply the mapping delta for this range.
return range + block.rangeMap[i - 1].md;
}),
text: message.fix.text.replace(/\n/gu, `\n${block.baseIndentText}`)
};
}
return { ...message, ...out, ...adjustedFix };
};
}
/**
* Excludes unsatisfiable rules from the list of messages.
* @param {Message} message A message from the linter.
* @returns {boolean} True if the message should be included in output.
*/
function excludeUnsatisfiableRules(message) {
return message && !UNSATISFIABLE_RULES.has(message.ruleId);
}
/**
* Transforms generated messages for output.
* @param {Array<Message[]>} messages An array containing one array of messages
* for each code block returned from `preprocess`.
* @param {string} filename The filename of the file
* @returns {Message[]} A flattened array of messages with mapped locations.
*/
function postprocess(messages, filename) {
const blocks = blocksCache.get(filename);
blocksCache.delete(filename);
return messages.flatMap((group, i) => {
const adjust = adjustBlock(blocks[i]);
return group.map(adjust).filter(excludeUnsatisfiableRules);
});
}
module.exports = {
meta: {
name: `${pkg.name}/markdown`,
version: pkg.version
},
preprocess,
postprocess,
supportsAutofix: SUPPORTS_AUTOFIX
};