setup-node/node_modules/jsdom/lib/jsdom/browser/htmltodom.js
eric sciple e7ed525da5 .
2020-01-24 16:30:50 -05:00

271 lines
8.2 KiB
JavaScript

"use strict";
const parse5 = require("parse5");
const sax = require("sax");
const attributes = require("../living/attributes");
const DocumentType = require("../living/generated/DocumentType");
const JSDOMParse5Adapter = require("./parse5-adapter-parsing");
const { HTML_NS } = require("../living/helpers/namespaces");
// Horrible monkey-patch to implement https://github.com/inikulin/parse5/issues/237
const OpenElementStack = require("parse5/lib/parser/open_element_stack");
const originalPop = OpenElementStack.prototype.pop;
OpenElementStack.prototype.pop = function (...args) {
const before = this.items[this.stackTop];
originalPop.apply(this, args);
if (before._poppedOffStackOfOpenElements) {
before._poppedOffStackOfOpenElements();
}
};
const originalPush = OpenElementStack.prototype.push;
OpenElementStack.prototype.push = function (...args) {
originalPush.apply(this, args);
const after = this.items[this.stackTop];
if (after._pushedOnStackOfOpenElements) {
after._pushedOnStackOfOpenElements();
}
};
module.exports = class HTMLToDOM {
constructor(parsingMode) {
this.parser = parsingMode === "xml" ? sax : parse5;
}
appendToNode(html, node) {
html = String(html);
return this._doParse(html, true, node);
}
appendToDocument(html, documentImpl) {
html = String(html);
return this._doParse(html, false, documentImpl, documentImpl._parseOptions);
}
_doParse(...args) {
return this.parser === parse5 ? this._parseWithParse5(...args) : this._parseWithSax(...args);
}
_parseWithParse5(html, isFragment, contextNode, options = {}) {
const adapter = new JSDOMParse5Adapter(contextNode._ownerDocument || contextNode);
options.treeAdapter = adapter;
if (isFragment) {
const fragment = this.parser.parseFragment(contextNode, html, options);
if (contextNode._templateContents) {
contextNode._templateContents.appendChild(fragment);
} else {
contextNode.appendChild(fragment);
}
} else {
this.parser.parse(html, options);
}
return contextNode;
}
_parseWithSax(html, isFragment, contextNode) {
const SaxParser = this.parser.parser;
const parser = new SaxParser(/* strict = */true, { xmlns: true, strictEntities: true });
parser.noscript = false;
parser.looseCase = "toString";
const openStack = [contextNode];
parser.ontext = text => {
setChildForSax(openStack[openStack.length - 1], {
type: "text",
data: text
});
};
parser.oncdata = cdata => {
setChildForSax(openStack[openStack.length - 1], {
type: "cdata",
data: cdata
});
};
parser.onopentag = arg => {
const attrs = Object.keys(arg.attributes).map(key => {
const rawAttribute = arg.attributes[key];
let { prefix } = rawAttribute;
let localName = rawAttribute.local;
if (prefix === "xmlns" && localName === "") {
// intended weirdness in node-sax, see https://github.com/isaacs/sax-js/issues/165
localName = prefix;
prefix = null;
}
if (prefix === "") {
prefix = null;
}
const namespace = rawAttribute.uri === "" ? null : rawAttribute.uri;
return { name: rawAttribute.name, value: rawAttribute.value, prefix, localName, namespace };
});
const tag = {
type: "tag",
name: arg.local,
prefix: arg.prefix,
namespace: arg.uri,
attributes: attrs
};
if (arg.local === "script" && arg.uri === HTML_NS) {
openStack.push(tag);
} else {
const elem = setChildForSax(openStack[openStack.length - 1], tag);
openStack.push(elem);
}
};
parser.onclosetag = () => {
const elem = openStack.pop();
if (elem.constructor.name === "Object") { // we have an empty script tag
setChildForSax(openStack[openStack.length - 1], elem);
}
};
parser.onscript = scriptText => {
const tag = openStack.pop();
tag.children = [{ type: "text", data: scriptText }];
const elem = setChildForSax(openStack[openStack.length - 1], tag);
openStack.push(elem);
};
parser.oncomment = comment => {
setChildForSax(openStack[openStack.length - 1], {
type: "comment",
data: comment
});
};
parser.onprocessinginstruction = pi => {
setChildForSax(openStack[openStack.length - 1], {
type: "directive",
name: "?" + pi.name,
data: "?" + pi.name + " " + pi.body + "?"
});
};
parser.ondoctype = dt => {
setChildForSax(openStack[openStack.length - 1], {
type: "directive",
name: "!doctype",
data: "!doctype " + dt
});
const entityMatcher = /<!ENTITY ([^ ]+) "([^"]+)">/g;
let result;
while ((result = entityMatcher.exec(dt))) {
const [, name, value] = result;
if (!(name in parser.ENTITIES)) {
parser.ENTITIES[name] = value;
}
}
};
parser.onerror = err => {
throw err;
};
parser.write(html).close();
}
};
function setChildForSax(parentImpl, node) {
const currentDocument = (parentImpl && parentImpl._ownerDocument) || parentImpl;
let newNode;
let isTemplateContents = false;
switch (node.type) {
case "tag":
case "script":
case "style":
newNode = currentDocument._createElementWithCorrectElementInterface(node.name, node.namespace);
newNode._prefix = node.prefix || null;
newNode._namespaceURI = node.namespace || null;
break;
case "root":
// If we are in <template> then add all children to the parent's _templateContents; skip this virtual root node.
if (parentImpl.tagName === "TEMPLATE" && parentImpl._namespaceURI === HTML_NS) {
newNode = parentImpl._templateContents;
isTemplateContents = true;
}
break;
case "text":
// HTML entities should already be decoded by the parser, so no need to decode them
newNode = currentDocument.createTextNode(node.data);
break;
case "cdata":
newNode = currentDocument.createCDATASection(node.data);
break;
case "comment":
newNode = currentDocument.createComment(node.data);
break;
case "directive":
if (node.name[0] === "?" && node.name.toLowerCase() !== "?xml") {
const data = node.data.slice(node.name.length + 1, -1);
newNode = currentDocument.createProcessingInstruction(node.name.substring(1), data);
} else if (node.name.toLowerCase() === "!doctype") {
newNode = parseDocType(currentDocument, "<" + node.data + ">");
}
break;
}
if (!newNode) {
return null;
}
if (node.attributes) {
for (const a of node.attributes) {
attributes.setAttributeValue(newNode, a.localName, a.value, a.prefix, a.namespace);
}
}
if (node.children) {
for (let c = 0; c < node.children.length; c++) {
setChildForSax(newNode, node.children[c]);
}
}
if (!isTemplateContents) {
if (parentImpl._templateContents) {
// Setting innerHTML on a <template>
parentImpl._templateContents.appendChild(newNode);
} else {
parentImpl.appendChild(newNode);
}
}
return newNode;
}
const HTML5_DOCTYPE = /<!doctype html>/i;
const PUBLIC_DOCTYPE = /<!doctype\s+([^\s]+)\s+public\s+"([^"]+)"\s+"([^"]+)"/i;
const SYSTEM_DOCTYPE = /<!doctype\s+([^\s]+)\s+system\s+"([^"]+)"/i;
function parseDocType(doc, html) {
if (HTML5_DOCTYPE.test(html)) {
return createDocumentTypeInternal(doc, "html", "", "");
}
const publicPieces = PUBLIC_DOCTYPE.exec(html);
if (publicPieces) {
return createDocumentTypeInternal(doc, publicPieces[1], publicPieces[2], publicPieces[3]);
}
const systemPieces = SYSTEM_DOCTYPE.exec(html);
if (systemPieces) {
return createDocumentTypeInternal(doc, systemPieces[1], "", systemPieces[2]);
}
// Shouldn't get here (the parser shouldn't let us know about invalid doctypes), but our logic likely isn't
// real-world perfect, so let's fallback.
return createDocumentTypeInternal(doc, "html", "", "");
}
function createDocumentTypeInternal(ownerDocument, name, publicId, systemId) {
return DocumentType.createImpl([], { ownerDocument, name, publicId, systemId });
}