taoqf / node-html-parser

A very fast HTML parser, generating a simplified DOM, with basic element query support.
MIT License
1.11k stars 107 forks source link

Get line number of node #226

Closed psugihara closed 1 year ago

psugihara commented 1 year ago

I'm making a tiny HTML email linter (https://github.com/sofn-xyz/mailing/issues/291).

Is there a fast way to get the line number of a node in the html? My best Idea is just searching the html lines for a string like this, but it will have trouble with repeated lines and big html files:

function lintHtml(html: string) {
  const lines = html.split("\n");
  const lint = [];
  // check html for images with relative paths or localhost
  const root = parse(html);
  const images = root.querySelectorAll("img");
  for (const image of images) {
    const src = image.getAttribute("src");
    if (!src) continue;
    if (src.startsWith("http://localhost")) {
      lint.push({
        line: lines.findIndex((line) => line.includes(src)),
        message: `image src "${src}" uses localhost`,
      });
    } else if (!src.startsWith("http")) {
      lint.push({
        line: lines.findIndex((line) => line.includes(src)),
        message: `image src "${src}" is relative and must be absolute`,
      });
    }
  }
...

Also, this lib is incredibly fast. Thank you so much for maintaining it!

taoqf commented 1 year ago
function lintHtml(html: string) {
    const lint = [];
    // check html for images with relative paths or localhost
    const root = parse(html);
    const images = root.querySelectorAll("img");
    function getLine(node) {
        const r = html.substring(0, node.range[0]).match(/\n/g);
        if (r) {
            return r.length + 1;
        }
        return 1;
    }
    for (const image of images) {
        const src = image.getAttribute("src");
        if (!src) continue;
        if (src.startsWith("http://localhost")) {
            lint.push({
                line: getLine(image),
                message: `image src "${src}" uses localhost`,
            });
        } else if (!src.startsWith("http")) {
            lint.push({
                line: getLine(image),
                message: `image src "${src}" is relative and must be absolute`,
            });
        }
    }
    return lint;
}