taoqf / node-html-parser

A very fast HTML parser, generating a simplified DOM, with basic element query support.
MIT License
1.11k stars 107 forks source link

add node.replaceWith + add innerHTML setter #104

Closed milahu closed 3 years ago

milahu commented 3 years ago
const { parse } = require('node-html-parser')
var root = parse(`\
<html>
  <div class="main">yay</div>
</html>
`);
root.querySelector('div.main').innerHTML = 'innerHTML setter was here';
console.log(root.toString());
var result = `\
<html>
  <div class="main">innerHTML setter was here</div>
</html>
`;
root.querySelector('div.main').replaceWith('<pre>replaceWith was here</pre>');
console.log(root.toString());
var result = `\
<html>
  <pre>replaceWith was here</pre>
</html>
`;

use case: transform custom elements

```js var root = parse(`\ yay `); root.querySelectorAll('some-custom-element').forEach(node => { //const nodeNew = parse('
') // not working, we cannot change root node const nodeNew = parse('
').querySelector('div') for (const [name, value] of Object.entries(node.attributes)) { nodeNew.setAttribute(name, value); } // standard DOM method //for (let a = 0; a < node.attributes.length; a++) { // const attr = node.attributes[a]; // nodeNew.setAttribute(attr.name, attr.value); // copy attribute //} //const c = nodeNew.getAttribute('class'); //nodeNew.setAttribute('class', c ? (c + ' ' + node.tagName.toLowerCase()) : c); nodeNew.classList.add(node.localName); nodeNew.innerHTML = node.innerHTML; node.replaceWith(nodeNew); }); console.log(root.toString()); var result = `\
yay
`; ```

edit: use classList and localName todo: maybe implement element.className todo: implement document.createElement

milahu commented 3 years ago

problem: multiple transforms will trigger some bug, and only the first transform works

what does work is node.insertAdjacentHTML with the full html source of the new node

workaround ```js const { parse } = require('node-html-parser') const insrc = `\

gutentag

hello

gutentag2

hello2

`; var expected_result = `\ gutentag hello gutentag2 hello2 `; var root = parse(insrc); console.log(root.toString()); // note. there could be problems with added whitespace (not sure what exactly is the cause) function replaceNodes(root, selector, name = 'div', attributes = {}) { const getName = (typeof name == 'function') ? (node => name(node)) : (() => name); const getAttributes = (typeof attributes == 'function') ? (node => attributes(node)) : (() => attributes); const result = []; root.querySelectorAll(selector).forEach(node => { const nameNew = getName(node); if (!nameNew || !node.innerHTML) return; // no replace const attributesNew = getAttributes(node); const nodeNew = parse(`<${nameNew}>`).childNodes[0].childNodes[0]; for (const [name, value] of Object.entries(attributesNew)) { nodeNew.setAttribute(name, value); } nodeNew.innerHTML = node.innerHTML; if (1) { // working node.insertAdjacentHTML('afterend', nodeNew.toString()); const nodeNewReal = node.nextElementSibling; result.push(nodeNewReal); node.remove(); } else if (0) { // breaks when called multiple times node.replaceWith(nodeNew); result.push(nodeNew); } else { // breaks when called multiple times node.insertAdjacentHTML('afterend', `<${nameNew}>`); const nodeNew = node.nextElementSibling; for (const [name, value] of Object.entries(attributesNew)) { nodeNew.setAttribute(name, value); } nodeNew.innerHTML = node.innerHTML; result.push(nodeNew); node.remove(); } }); return result; } replaceNodes(root, 'div.fragment', 'lang') .forEach(nodeNew => { console.dir(nodeNew.querySelectorAll('p').map(p => p.rawAttrs)) // empty when using node.replaceWith(nodeNew) replaceNodes(nodeNew, 'p', (node => node.attributes && node.attributes.lang)); }); console.log(root.toString()); ```
taoqf commented 3 years ago

Thanks so much for your work, I will check this some time when I got time.

milahu commented 3 years ago

when I got time.

haha, same. its only a sideproject of a sideproject of a sideproject ....

but its low priority, cos we already have node.insertAdjacentHTML to transform html / xml / custom element nodes, see my workaround