ericcornelissen / pp-runtime-gadgets

Gadgets in the JavaScript runtime based on the ECMAScript specification
0 stars 0 forks source link

Finding gadgets using test262 #1

Open ericcornelissen opened 1 month ago

ericcornelissen commented 1 month ago

The tc39/test262 suite contains many tests for the JavaScript runtime functionality. By instrumenting it, it could be used to find gadget candidates automatically (instead of reviewing the spec manually).

As a quick hacky check for feasibility I ran the following script which replaces all empty objects {} by a proxy. Besides missing objects with properties, this also replaces non-objects (e.g. in function f() {}) so it breaks lots of tests too. Nevertheless, it does find some pollutable properties already discovered in this project (e.g. maxByteLength for ArrayBuffer)

bash

git clone git@github.com:tc39/test262.git
cd test262
find ./test/**/*.js -type f -exec sed -i 's/{\}/new Proxy(\0,{get(t, p){if(!Object.hasOwn(t, p)){console.log("looked up:",p)}return t[p]}})/g' {} \;
find ./test/**/**/*.js -type f -exec sed -i 's/{\}/new Proxy(\0,{get(t, p){if(!Object.hasOwn(t, p)){console.log("looked up:",p)}return t[p]}})/g' {} \;
find ./test/**/**/**/*.js -type f -exec sed -i 's/{\}/new Proxy(\0,{get(t, p){if(!Object.hasOwn(t, p)){console.log("looked up:",p)}return t[p]}})/g' {} \;
find ./test/**/**/**/**/*.js -type f -exec sed -i 's/{\}/new Proxy(\0,{get(t, p){if(!Object.hasOwn(t, p)){console.log("looked up:",p)}return t[p]}})/g' {} \;
find ./test/**/**/**/**/**/*.js -type f -exec sed -i 's/{\}/new Proxy(\0,{get(t, p){if(!Object.hasOwn(t, p)){console.log("looked up:",p)}return t[p]}})/g' {} \;
find ./test/**/**/**/**/**/**/*.js -type f -exec sed -i 's/{\}/new Proxy(\0,{get(t, p){if(!Object.hasOwn(t, p)){console.log("looked up:",p)}return t[p]}})/g' {} \;
find ./test/**/**/**/**/**/**/**/*.js -type f -exec sed -i 's/{\}/new Proxy(\0,{get(t, p){if(!Object.hasOwn(t, p)){console.log("looked up:",p)}return t[p]}})/g' {} \;
npx test262-harness ./test/**/*.js | tee proxy-0.log
npx test262-harness ./test/**/**/*.js | tee proxy-1.log
npx test262-harness ./test/built-ins/**/*.js | tee proxy-1-1.log
npx test262-harness ./test/**/**/**/*.js | tee proxy-2.log
npx test262-harness ./test/**/**/**/**/*.js | tee proxy-3.log
npx test262-harness ./test/**/**/**/**/**/*.js | tee proxy-4.log
npx test262-harness ./test/**/**/**/**/**/**/*.js | tee proxy-5.log
npx test262-harness ./test/**/**/**/**/**/**/**/*.js | tee proxy-6.log
ericcornelissen commented 1 month ago

Put together a basic CLI to perform proper rewriting of JavaScript code. Haven't run as extensive a test as in the original comment, but it worked well on the ArrayBuffer test suite.

The CLI program is:

// used `"acorn": "8.11.3", "escodegen": "2.1.0", "estraverse": "5.3.0"`
// ran on Node.js v22.0.0

import * as fs from "node:fs/promises";
import * as path from "node:path";
import * as process from "node:process";

import * as acorn from "acorn";
import * as escodegen from "escodegen";
import * as estraverse from "estraverse";

function createProxyCode(obj) {
    return `
    new Proxy(${obj}, {
      get(target, property, _receiver) {
        if (!Object.hasOwn(target, property)) {
          console.log("looked up:", property);
        }

        return target[property];
      },
    })
  `;
}

function transform(code, acornOptions) {
    function generateReplacementNode(node) {
        const raw = escodegen.generate(node);
        const replacement = createProxyCode(raw);
        const tmp = acorn.parse(replacement, acornOptions);
        const newNode = tmp.body[0].expression;
        return newNode;
    }

    const ast = acorn.parse(code, acornOptions);
    const result = estraverse.replace(ast, {
        leave: (node) => {
            if ([
                "ArrayExpression",
                "ObjectExpression",
            ].includes(node.type)) {
                return generateReplacementNode(node)
            }
        }
    });

    return escodegen.generate(result);
}

const file = process.argv[2];
const filepath = path.resolve(file);
try {
    await fs.access(filepath, fs.constants.R_OK | fs.constants.W_OK);
    const code = await fs.readFile(filepath, { encoding: "utf-8" });
    const transformed = transform(code, { ecmaVersion: 2020 });
    await fs.writeFile(filepath, transformed);
} catch (error) {
    console.log(error);
}

it can be applied to a bunch of files easily on a Unix system with:

cd test262
find ./test/built-ins/ArrayBuffer/ -type f -name "*.js" -exec node cli.js {} \;

Then a shell with recursive glob expansion like Zsh can be used to run the ArrayBuffer tests:

npx test262-harness test/built-ins/ArrayBuffer/**/*.js

This particular approach seems to cause some issue, which I think is because the rewriting doesn't preserve comments...

ericcornelissen commented 1 month ago

Preserving comments results in more passing tests. The following iteration of the above CLI achieve this. It also improves the detection of missing property lookups by 1) looking if the property is anywhere in the hierarchy (to avoid reporting things expected to be in the prototype, which aren't surprising), and 2) ignoring some uninteresting cases.

This iteration still has some problems, as is it 1) can generate incorrect code (because it does naive replacement, which goes wrong for nested objects), 2) does not support some modern language features, 3) does not support both CJS and ESM.

// used `"esprima": "4.0.1"`
// ran on Node.js v22.0.0

import * as fs from "node:fs/promises";
import * as path from "node:path";
import * as process from "node:process";

import * as esprima from "esprima";

function createProxyCode(obj) {
  return `new Proxy(${obj}, {
    get(target, property, _receiver) {
      if (Object.getPrototypeOf(target) !== null) {
        const allowlist = [
          Symbol.toStringTag,
          Symbol.toPrimitive,
        ];

        let found = false, obj = target;
        while (obj !== null) {
          if (Object.hasOwn(obj, property)) {
            found = true;
            break;
          }

          obj = Object.getPrototypeOf(obj);
        }

        if (!found && !allowlist.includes(property)) {
          console.log("looked up:", property);
        }
      }

      return target[property];
    }
  })`;
}

function transform(source) {
  const objects = [];
  esprima.parseScript(source, {}, function (node, meta) {
    switch (node.type) {
      case "ArrayExpression":
      case "ObjectExpression":
        objects.push({
          start: meta.start.offset,
          end: meta.end.offset
        });
        break;
    }
  });

  objects
    .sort((a, b) => { return b.end - a.end })
    .forEach((meta) => {
      const pre = source.slice(0, meta.start);
      const obj = source.slice(meta.start, meta.end);
      const post = source.slice(meta.end, /* end */);
      source = pre + createProxyCode(obj) + post;
    });

  return source;
}

// -----------------------------------------------------------------------------

const file = process.argv[2];
const filepath = path.resolve(file);
try {
  await fs.access(filepath, fs.constants.R_OK | fs.constants.W_OK);
  const code = await fs.readFile(filepath, { encoding: "utf-8" });
  const transformed = transform(code);
  await fs.writeFile(filepath, transformed);
} catch (error) {
  console.log(`error for ${file}:`, error.message);
  console.log(error);
}

which can be used in the same way as the CLI script from the previous comment.