Tencent / hel

A module federation SDK which is unrelated to tool chain for module consumer. 工具链无关的运行时模块联邦sdk.
https://tencent.github.io/hel/
Other
931 stars 79 forks source link

【bug】html parse 报错修复方案 #86

Closed fantasticsoul closed 1 month ago

fantasticsoul commented 7 months ago

目前内置的htmlparser 应当某些场景有问题,可以用此办法修复

后续会使用 DomParser 来修复

import * as htmlparser2 from "htmlparser2";

/**
 * 用于对接 custom.parseHtml
 * ```ts
 * helMicro.preFetchLib('xxx',  { custom: { parseHtml } })
 * ```
 */
export function parseHtml(html) {
  let isHeadOpen = true;
  const nodeList = [];

  function pushAsssetItem(item) {
    const { data, toHead } = item;
    nodeList.push({ ...data, head: toHead });
  }

  const tagDataList = [];
  function recordTagOpen(tag, attrs) {
    if (tag === "script") {
      tagDataList.push({ data: { tag, attrs, innerText: '' }, toHead: isHeadOpen });
    }
    if (tag === "link" && attrs.rel !== 'icon') {
      tagDataList.push({ data: { tag, attrs, innerText: '' }, toHead: isHeadOpen });
    }
  }
  function recordTagText(innerText, ...p) {
    const lastItem = tagDataList[tagDataList.length - 1];
    if (lastItem) {
      lastItem.innerText = lastItem.innerText + innerText;
    }
  }

  const parser = new htmlparser2.Parser({
    onopentag: recordTagOpen,
    ontext: recordTagText,
    onclosetag(tag) {
      if (tag === 'head') isHeadOpen = false;
    },
  });
  parser.write(html);
  parser.end();

  tagDataList.forEach(pushAsssetItem);
  return nodeList;
}
fantasticsoul commented 7 months ago

出现错误的片段

<!doctype html>
<html lang="zh">
    <head>
        <meta charset="UTF-8"/>
        <meta http-equiv="X-UA-Compatible" content="IE=edge"/>
        <meta name="viewport" content="width=device-width,initial-scale=1"/>
        <title>中台</title>
        <script defer="defer" src="https://cdn.staticfile.org/vue/2.6.14/vue.min.js"></script>
        <script defer="defer" src="https://cdn.staticfile.org/vue-router/3.6.5/vue-router.min.js"></script>
        <script defer="defer" src="https://cdn.staticfile.org/moment.js/2.29.3/moment.min.js"></script>
        <script defer="defer" src="http://127.0.0.1:5438/js/runtime-index.d154bbc9aa4a6ad57a2e.js"></script>
        <script defer="defer" src="http://127.0.0.1:5438/js/vendors-node_modules_pnpm_core-js_3_25_2_node_modules_core-js_modules_es_object_to-string_js--170f15.e7448cf8a83e13d55703.js"></script>
        <script defer="defer" src="http://127.0.0.1:5438/js/index.ba485e0f34152cd5e814.js"></script>
        <link href="http://127.0.0.1:5438/css/index.cd4a8a64c0ef12a2f379.css" rel="stylesheet">
    </head>
    <body>
        <div id="app"></div>
    </body>
</html>
fantasticsoul commented 1 month ago

最新版本已优先使用DOMParser ,将不会出现此类问题,见custom里的nativeParse方法