emscripten-core / emscripten

Emscripten: An LLVM-to-WebAssembly Compiler
Other
25.88k stars 3.32k forks source link

Code size regressions with WASI, environ, libc/filesystem #22534

Open juj opened 2 months ago

juj commented 2 months ago

I've got a Flappy Bird game that I compiled last time in circa 2020, when I last optimized it for minimal code size.

It's been a while since I touched the codebase, though tonight I looked to update the code to take it through the latest Emscripten compiler version.

There are some code size creeps that have appeared in the build. Here are some examples:

var noExitRuntime = Module["noExitRuntime"] || true;
var __abort_js = () => {
  abort("")
};
var __emscripten_runtime_keepalive_clear = () => {
  noExitRuntime = false
};
var timers = {};
var callUserCallback = func => func();
var __setitimer_js = (which, timeout_ms) => {
  if (timers[which]) {
    clearTimeout(timers[which].id);
    delete timers[which]
  }
  if (!timeout_ms) return 0;
  var id = setTimeout(() => {
    delete timers[which];
    callUserCallback(() => __emscripten_timeout(which, _emscripten_get_now()))
  }, timeout_ms);
  timers[which] = {
    id: id,
    timeout_ms: timeout_ms
  };
  return 0
};
var ENV = {};
var getExecutableName = () => "./this.program";
var getEnvStrings = () => {
  if (!getEnvStrings.strings) {
    var lang = (typeof navigator == "object" && navigator.languages && navigator.languages[0] || "C").replace("-", "_") + ".UTF-8";
    var env = {
      USER: "web_user",
      LOGNAME: "web_user",
      PATH: "/",
      PWD: "/",
      HOME: "/home/web_user",
      LANG: lang,
      _: getExecutableName()
    };
    for (var x in ENV) {
      if (ENV[x] === undefined) delete env[x];
      else env[x] = ENV[x]
    }
    var strings = [];
    for (var x in env) {
      strings.push(`${x}=${env[x]}`)
    }
    getEnvStrings.strings = strings
  }
  return getEnvStrings.strings
};
var stringToAscii = (str, buffer) => {
  for (var i = 0; i < str.length; ++i) {
    HEAP8[buffer++] = str.charCodeAt(i)
  }
  HEAP8[buffer] = 0
};
var _environ_get = (__environ, environ_buf) => {
  var bufSize = 0;
  getEnvStrings().forEach((string, i) => {
    var ptr = environ_buf + bufSize;
    HEAPU32[__environ + i * 4 >> 2] = ptr;
    stringToAscii(string, ptr);
    bufSize += string.length + 1
  });
  return 0
};
var _environ_sizes_get = (penviron_count, penviron_buf_size) => {
  var strings = getEnvStrings();
  HEAPU32[penviron_count >> 2] = strings.length;
  var bufSize = 0;
  strings.forEach(string => bufSize += string.length + 1);
  HEAPU32[penviron_buf_size >> 2] = bufSize;
  return 0
};
var _fd_close = fd => 52;
var convertI32PairToI53Checked = (lo, hi) => hi + 2097152 >>> 0 < 4194305 - !!lo ? (lo >>> 0) + hi * 4294967296 : NaN;

function _fd_seek(fd, offset_low, offset_high, whence, newOffset) {
  var offset = convertI32PairToI53Checked(offset_low, offset_high);
  return 70
}
var _fd_write = (fd, iov, iovcnt, pnum) => {
  var num = 0;
  for (var i = 0; i < iovcnt; i++) {
    var ptr = HEAPU32[iov >> 2];
    var len = HEAPU32[iov + 4 >> 2];
    iov += 8;
    for (var j = 0; j < len; j++) {
      printChar(fd, HEAPU8[ptr + j])
    }
    num += len
  }
  HEAPU32[pnum >> 2] = num;
  return 0
};
var _proc_exit = code => {
  throw `exit(${code})`
};

var wasmImports = {
  nb: __emscripten_runtime_keepalive_clear,
  mb: __setitimer_js,
  tb: _environ_get,
  sb: _environ_sizes_get,
  rb: _fd_close,
  K: _fd_seek,
  qb: _fd_write,
  pb: _proc_exit
};

and in Wasm:

FlappyBird.wasm/function __wasi_syscall_ret: 23
FlappyBird.wasm/function action_terminate: 14
FlappyBird.wasm/function __stdio_close: 13
FlappyBird.wasm/function _emscripten_timeout: 108
FlappyBird.wasm/function action_abort: 7

The project itself does not use WASI, libc filesystem, or most other parts of libc much at all (e.g. no printf() and no environ() or related code), and it is compiled with -sEXIT_RUNTIME=0 in MINIMAL_RUNTIME build mode.

When I look at why these code functions are present, I get odd circular trails:

It seems that the code size tests aren't catching real-world use case regressions as much as I would have hoped. I wonder if there are past changes in these areas that would come to mind?

sbc100 commented 2 months ago

One way to figure out why a given function is included would be to use -Wl,--trace-symbol. For example -Wl,--trace-symbol=raise might help understand any raise is being included. That does seem like a symbol that should only be included if its used directly.