oven-sh / bun

Incredibly fast JavaScript runtime, bundler, test runner, and package manager – all in one
https://bun.sh
Other
74.53k stars 2.79k forks source link

`Bun.build` interprets some static require() calls as dynamic #11024

Open ashtonsix opened 6 months ago

ashtonsix commented 6 months ago

What version of Bun is running?

1.1.7+b0b7db5c0

What platform is your computer?

Darwin 23.1.0 arm64 arm

What steps can reproduce the bug?

import { tmpdir } from "os";

const fpath = `${tmpdir()}/${Math.random().toFixed(36).slice(2)}.js`;
const outdir = `${tmpdir()}/${Math.random().toFixed(36).slice(2)}`;

await Bun.write(fpath, `var x = require("mod")`);

const output = await Bun.build({
  entrypoints: [fpath],
  outdir,
  target: "browser",
  external: ["mod"], // resolved using https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script/type/importmap
});

const outpath = output.outputs.find((o) => o.kind === "entry-point")!.path;

console.log(await Bun.file(outpath).text());

What is the expected behavior?

Build output that doesn't fail upon execution when require() is unavailable

What do you see instead?

An error message that misidentifies the require() call as dynamic in the build output, when it is clearly static:

throw Error('Dynamic require of "' + x + '" is not supported');

The artifact throws this in environments where require() is unavailable (such as the browser).

Additional information

I created a plugin that replaces static require() calls with ESM imports, since Bun seems to have no problem with these. The correctness and performance are decent. And thanks to the plugin I'm not affected by this issue, however I think the community would be better-served if Bun.build were modified rather than extended.

For the plugin to consider a require call static it must:

  1. Be top-level eg, not nested like if (condition) var x = require("mod");
  2. Be a simple declaration eg, var x = require("mod");

Here's the plugin source code (taken from a private repository):

Replace static require() plugin ```ts import type { BunPlugin } from "bun"; import jsTokens from "js-tokens"; // uses tokens rather than AST for perf type TokenConsumer = (tokens: jsTokens.Token[]) => number; // consumes parens and any preceding content, such as "for await (...)" const consumeParens: TokenConsumer = (tokens) => { let length = 0; let token: jsTokens.Token; let level = 0; let seen = 0; while ((token = tokens.pop()!)) { length += token.value.length; if (token.type === "Punctuator" && /^[()]$/.test(token.value)) { seen += 1; level += token.value === "(" ? 1 : -1; } if (seen >= 1 && level === 0) { return length; } } return length; }; // consumes content until end of statement, terminated by "}", ";" or "\n" const consumeStatement: TokenConsumer = (tokens) => { let length = 0; let token: jsTokens.Token; let level = 0; enum State { Operator, NotOperator, NotOperatorNewline, } let state = State.NotOperator; while ((token = tokens.pop()!)) { length += token.value.length; let isOpenTag = false; let isClosedTag = false; if (/^Punctuator$|^Template(Head|Tail)$/.test(token.type)) { isOpenTag = /^[({[]$/.test(token.value) || /Head$/.test(token.type); isClosedTag = /^[\]})]$/.test(token.value) || /Tail$/.test(token.type); state = isOpenTag || isClosedTag ? State.NotOperator : State.Operator; } if (isOpenTag) level++; else if (isClosedTag) level--; if (level || /^WhiteSpace$|Comment$/.test(token.type)) continue; // match statement-terminating "}" or ";" if ( token.type === "Punctuator" && (token.value === "}" || token.value === ";") ) { return length; } // match statement-terminating newline using state machine // looking for sequence NotOperator -> Newline -> IdentifierOrLiteral if ( state === State.NotOperator && token.type === "LineTerminatorSequence" ) { state = State.NotOperatorNewline; } else if ( state === State.NotOperatorNewline && /^IdentifierName$|Literal$/.test(token.type) ) { tokens.push(token); // restore lookahead return length - token.value.length; } else if ( state === State.NotOperatorNewline && token.type !== "LineTerminatorSequence" ) { state = State.NotOperator; } } return length; }; const consumeSeqs = { if: [consumeParens, consumeStatement], else: [consumeStatement], for: [consumeParens, consumeStatement], while: [consumeParens, consumeStatement], do: [consumeStatement, consumeParens], with: [consumeParens, consumeStatement], } as Record; // We consider a CJS import to be static if it's top-level and part of a simple declaration statement export function extractStaticCjs(src: string) { if (!src.includes("require")) return []; // early exit for perf let index = 0; let level = 0; let token: jsTokens.Token; const tokens = Array.from(jsTokens(src)).reverse(); const importRefs = [] as { start: number; end: number; esmVersion: string }[]; while ((token = tokens.pop()!)) { index += token.value.length; // skip blocks if (token.type === "Punctuator" && /^[({[\]})]$/.test(token.value)) { level += /^[({[]$/.test(token.value) ? 1 : -1; continue; } else if (level) continue; // skip nested statements not part of a block if ( token.type === "IdentifierName" && {}.hasOwnProperty.call(consumeSeqs, token.value) ) { for (let consumer of consumeSeqs[token.value]) index += consumer(tokens); continue; } // identify possible start of simple require declaration if ( token.type !== "IdentifierName" || !/^(var|let|const)$/.test(token.value) ) { continue; } // \s*( // \w+|{[^}]+} - simple or destructured identifier // )\s*=\s* // require\(\s*['"]([^'"]+)['"]\s*\) - require call // \s*(?:;|$|\n(?=\s*[\w'"])) - end of statement const reg = /\s*(\w+|{[^}]+})\s*=\s*require\(\s*['"]([^'"]+)['"]\s*\)\s*(?:;|$|\n(?=\s*[\w'"]))/y; reg.lastIndex = index; const match = reg.exec(src); if (!match) continue; const [statement, value, path] = match; let start = index - token.value.length; let end = index + statement.length; let esmVersion: string; if (value[0] === "{") { const imports = value .replace(/[{}]/g, "") .split(",") .map((part) => { let [imprt, alias] = part.split(":"); imprt = imprt.replace(/['"]/g, "").trim(); if (alias) imprt += ` as ${alias.trim()}`; return imprt; }); esmVersion = `import {${imports.join(",")}} from "${path}";`; } else { esmVersion = `import * as ${value}_MFALL from "${path}";\n` + `const ${value} = ${value}_MFALL?.default || ${value}_MFALL;`; } const original = src.slice(start, end); importRefs.push({ start, end, original, esmVersion }); } return importRefs; } export const rewriteImportsBunPlugin: BunPlugin = { name: "Rewrite imports", setup(build) { // We consider require() calls to be static when they are top-level and expressed as a simple statement // However Bun occasionally interprets these calls as dynamic, so we convert them to ESM imports for // improved consistency in the build output build.onLoad( { filter: /\.(mjs|cjs|js|jsx|ts|mts|cts|tsx)$/ }, async ({ path }) => { let contents = await Bun.file(path).text(); let cjs = extractStaticCjs(contents); for (let { start: s, end: e, esmVersion } of cjs.reverse()) { contents = contents.slice(0, s) + esmVersion + contents.slice(e); } // Bun doesn't always handle "exports.x = y" correctly, // so we prefix "exports" with "module." to help it out // https://github.com/oven-sh/bun/issues/11032 contents = contents.replace( /([^.]\s*[^.\w])(exports(?:\.\w+)?\s*=)/g, "$1module.$2", ); // prettier-ignore // https://bun.sh/docs/bundler/loaders const loader = ({ mjs: "js", cjs: "js", js: "jsx", jsx: "jsx", ts: "ts", mts: "ts", cts: "ts", tsx: "tsx", } as const)[path.match(/\.([^.]+)$/)![1]]!; return { contents, loader, }; }, ); }, }; ```
extractStaticCjs test suite ```ts import { expect, describe, it } from "bun:test"; import { extractStaticCjs } from "./parse-js"; describe("extractStaticCjs", () => { it("should return an empty array if no require statements are found", () => { const src = "const x = 10;"; const result = extractStaticCjs(src); expect(result.map((e) => e.original)).toEqual([]); }); it("should skip require calls without a declaration", () => { const src = `require("mod");`; const result = extractStaticCjs(src); expect(result.map((e) => e.original)).toEqual([]); }); it("should extract simple require declarations", () => { const src = ` const mod = require("mod"); some nonsense to ignore const {mod: alias} = require("mod"); const { mod3, mod4: alias2 } = require("mod"); `; const result = extractStaticCjs(src); expect(result.map((e) => e.original.replace(/\s+/g, " "))).toEqual([ 'const mod = require("mod");', 'const {mod: alias} = require("mod");', 'const { mod3, mod4: alias2 } = require("mod");', ]); }); it("should skip complex declarations", () => { // maybe _some_ of these should be extracted? const src = ` const mod1 = condition ? require("mod1") : require("mod1-alt"); const mod2 = require("mod2") + require("mod2-alt"); const mod3 = require("mod3").item; const mod4 = require("mod4"); `; const result = extractStaticCjs(src); expect(result.map((e) => e.original)).toEqual([ `const mod4 = require("mod4");`, ]); }); it("should skip require statements inside block statements", () => { const src = ` if (condition) { const modI = require("modI"); } label: { const modI = require("modI"); } const mod1 = require("mod1"); try { const modIgnore = require("modIgnore"); } catch (e) {} const mod2 = require("mod2"); function() { const modI = require("modI"); } `; const result = extractStaticCjs(src); expect(result.map((e) => e.original)).toEqual([ `const mod1 = require("mod1");`, `const mod2 = require("mod2");`, ]); }); it("should skip require statements inside nested statements not demarcated by blocks", () => { const src = ` const mod1 = require("mod1") if (condition); else var mod2 = require("mod2") for (let i = 0; i < 10; i++) var mod3 = require("mod3") // no semicolon const mod4 = require("mod4"); (sequence, var mod5 = require("mod5")); [array, var mod6 = require("mod6")]; // comment; var mod7 = require("mod7"); if (condition) something + // operator prevents termination by newline var mod8 = require("mod8") do var mod9 = require("mod9"); while (condition) const { modA } = require("modA") `; const result = extractStaticCjs(src); expect(result.map((e) => e.original.trim())).toEqual([ 'const mod1 = require("mod1")', 'const mod4 = require("mod4");', 'const { modA } = require("modA")', ]); }); }); ```
paperdave commented 5 months ago

the current behavior of non-resolved imports in --target=browser is intentional. it is to use a function which checks for a global require function, otherwise throw with an error message. the reason mod is a dynamic import is because it is marked external.

relevant codegen: (this is almost identical to what esbuild outputs)

// top of file:
var __require = ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
  get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
}) : x)(function(x) {
  if (typeof require !== "undefined")
    return require.apply(this, arguments);
  throw Error('Dynamic require of "' + x + '" is not supported');
});

// callsite:
var x = __require("mod");