voltrondata-labs / arrowbench

R package for benchmarking
Other
13 stars 9 forks source link

Populate result properly when erroring so it can be posted #121

Closed alistaire47 closed 1 year ago

alistaire47 commented 1 year ago

Right now, when run with run_benchmark(), benchmarks that error do not properly populate all metadata fields to identify them with the correct history (tags, info, context, machine_info). Instead, with placebo, you get something like

{
  "timestamp": "2022-12-09T19:43:23.942867+0000",
  "error": {
    "log": ["22: stop(fallback)", "21: signal_abort(cnd, .file)", "20: rlang::abort(msg)", "19: placebo_func()", "18: eval(bm$run, envir = ctx)", "17: eval(bm$run, envir = ctx) at run.R#261", "16: eval(expr, p)", "15: eval.parent(...)", "14: as_bench_time(.Call(system_time_, substitute(expr), parent.frame()))", "13: stats::setNames(as_bench_time(.Call(system_time_, substitute(expr), ", "        parent.frame())), c(\"process\", \"real\"))", "12: bench::bench_time(eval.parent(...)) at measure.R#12", "11: eval(expr, p)", "10: eval.parent(expr) at measure.R#35", "9: with_profiling(profiling, {", "       timings <- bench::bench_time(eval.parent(...))", "   }) at measure.R#11", "8: eval(expr, p)", "7: eval.parent(expr) at measure.R#41", "6: force(expr)", "5: with_gcinfo(eval.parent(expr)) at measure.R#41", "4: with_gc_info({", "       prof_file <- with_profiling(profiling, {", "           timings <- bench::bench_time(eval.parent(...))", "       })", "   }) at measure.R#10", "3: measure(eval(bm$run, envir = ctx), profiling = profiling) at run.R#261", "2: run_iteration(bm, ctx, profiling = profiling) at run.R#197", "1: run_bm(duration = 0.01, grid = TRUE, error = \"rlang::abort\", ", "       bm = structure(list(name = \"placebo\", setup = function(duration = 0.01, ", "           error_type = NULL, output_type = NULL, grid = TRUE) {", "           BenchEnvironment(placebo_func = function() {", "               if (!is.null(output_type)) {", "                   msg <- \"here's some output\"", "                   if (output_type == \"message\") {", "                     message(\"A message: \", msg)", "                   } else if (output_type == \"warning\") {", "                     warning(\"A warning:\", msg)", "                   } else if (output_type == \"cat\") {", "                     cat(\"A cat:\", msg)", "                   }", "               }", "               if (!is.null(error_type)) {", "                   msg <- \"something went wrong (but I knew that)\"", "                   if (error_type == \"rlang::abort\") {", "                     rlang::abort(msg)", "                   }", "                   stop(msg)", "               }", "               Sys.sleep(duration)", "           })", "       }, before_each = TRUE, run = quote({", "           placebo_func()", "       }), after_each = TRUE, teardown = TRUE, valid_params = function(params) {", "           params", "       }, case_version = function(params) NULL, batch_id_fun = function(params) uuid(), ", "           tags_fun = function(params) params, packages_used = function(params) {", "               \"base\"", "           }), class = \"Benchmark\"), n_iter = 1, batch_id = \"065ce6d778a54d98bf1d1d4e0951f8b1\", ", "       profiling = FALSE, global_params = list(cpu_count = 10L, ", "           lib_path = \"latest\"))"]
  },
  "optional_benchmark_info": {
    "params": {
      "duration": 0.01,
      "grid": true,
      "error": "rlang::abort",
      "cpu_count": 10,
      "name": "placebo"
    }
  },
  "batch_id": "065ce6d778a54d98bf1d1d4e0951f8b1"
}

When placebo does not error, you instead get something like this:

```json { "batch_id": "065ce6d778a54d98bf1d1d4e0951f8b1", "timestamp": "2022-12-09T19:43:23.415977+0000", "stats": { "data": [ 0.0106903400737792 ], "unit": "s", "times": [], "time_unit": "s", "iterations": 1 }, "tags": { "duration": 0.01, "grid": true, "cpu_count": 10, "name": "placebo", "language": "R" }, "info": { "arrow_version": "10.0.0", "arrow_compiler_id": "AppleClang", "arrow_compiler_version": "13.0.0.13000029", "benchmark_language_version": "R version 4.2.2 (2022-10-31)", "arrow_version_r": "10.0.0" }, "optional_benchmark_info": { "result": [ { "process": 6.89999999999857e-05, "real": 0.0106903400737792, "start_mem_bytes": 113278976, "end_mem_bytes": 141950976, "max_mem_bytes": 141950976, "gc_level0": 0, "gc_level1": 0, "gc_level2": 0 } ], "params": { "duration": 0.01, "grid": true, "cpu_count": 10, "lib_path": "latest", "packages": [ { "package": "arrow", "version": "10.0.0", "date": "2022-10-26", "source": "CRAN (R 4.2.0)" }, { "package": "arrowbench", "version": "0.2.0", "date": "2022-12-05", "source": "local" }, { "package": "assertthat", "version": "0.2.1", "date": "2019-03-21", "source": "CRAN (R 4.2.0)" }, { "package": "bench", "version": "1.1.2", "date": "2021-11-30", "source": "CRAN (R 4.2.0)" }, { "package": "bit", "version": "4.0.5", "date": "2022-11-15", "source": "CRAN (R 4.2.0)" }, { "package": "bit64", "version": "4.0.5", "date": "2020-08-30", "source": "CRAN (R 4.2.0)" }, { "package": "cli", "version": "3.4.1", "date": "2022-09-23", "source": "CRAN (R 4.2.0)" }, { "package": "crayon", "version": "1.5.2", "date": "2022-09-29", "source": "CRAN (R 4.2.0)" }, { "package": "distro", "version": "0.1.0", "date": "2020-11-09", "source": "CRAN (R 4.2.0)" }, { "package": "ellipsis", "version": "0.3.2", "date": "2021-04-29", "source": "CRAN (R 4.2.0)" }, { "package": "fansi", "version": "1.0.3", "date": "2022-03-24", "source": "CRAN (R 4.2.0)" }, { "package": "glue", "version": "1.6.2", "date": "2022-02-24", "source": "CRAN (R 4.2.0)" }, { "package": "hms", "version": "1.1.2", "date": "2022-08-19", "source": "CRAN (R 4.2.0)" }, { "package": "jsonlite", "version": "1.8.3", "date": "2022-10-21", "source": "CRAN (R 4.2.0)" }, { "package": "lifecycle", "version": "1.0.3", "date": "2022-10-07", "source": "CRAN (R 4.2.0)" }, { "package": "magrittr", "version": "2.0.3", "date": "2022-03-30", "source": "CRAN (R 4.2.0)" }, { "package": "pillar", "version": "1.8.1", "date": "2022-08-19", "source": "CRAN (R 4.2.0)" }, { "package": "pkgconfig", "version": "2.0.3", "date": "2019-09-22", "source": "CRAN (R 4.2.0)" }, { "package": "prettyunits", "version": "1.1.1", "date": "2020-01-24", "source": "CRAN (R 4.2.0)" }, { "package": "progress", "version": "1.2.2", "date": "2019-05-16", "source": "CRAN (R 4.2.0)" }, { "package": "purrr", "version": "0.3.5", "date": "2022-10-06", "source": "CRAN (R 4.2.0)" }, { "package": "R.methodsS3", "version": "1.8.2", "date": "2022-06-13", "source": "CRAN (R 4.2.0)" }, { "package": "R.oo", "version": "1.25.0", "date": "2022-06-12", "source": "CRAN (R 4.2.0)" }, { "package": "R.utils", "version": "2.12.2", "date": "2022-11-11", "source": "CRAN (R 4.2.0)" }, { "package": "R6", "version": "2.5.1", "date": "2021-08-19", "source": "CRAN (R 4.2.0)" }, { "package": "remotes", "version": "2.4.2", "date": "2021-11-30", "source": "CRAN (R 4.2.0)" }, { "package": "rlang", "version": "1.0.6", "date": "2022-09-24", "source": "CRAN (R 4.2.0)" }, { "package": "rstudioapi", "version": "0.14", "date": "2022-08-22", "source": "CRAN (R 4.2.0)" }, { "package": "sessioninfo", "version": "1.2.2", "date": "2021-12-06", "source": "CRAN (R 4.2.0)" }, { "package": "tibble", "version": "3.1.8", "date": "2022-07-22", "source": "CRAN (R 4.2.0)" }, { "package": "tidyselect", "version": "1.2.0", "date": "2022-10-10", "source": "CRAN (R 4.2.0)" }, { "package": "utf8", "version": "1.2.2", "date": "2021-07-24", "source": "CRAN (R 4.2.0)" }, { "package": "vctrs", "version": "0.5.1", "date": "2022-11-16", "source": "CRAN (R 4.2.0)" }, { "package": "waldo", "version": "0.4.0", "date": "2022-03-16", "source": "CRAN (R 4.2.0)" }, { "package": "withr", "version": "2.5.0", "date": "2022-03-03", "source": "CRAN (R 4.2.0)" } ] }, "options": { "iterations": 1, "drop_caches": false, "cpu_count": 10 }, "output": "### RESULTS HAVE BEEN PARSED ###", "rscript": ["", "options(error = function() { traceback(3); q(status = 1)})", "options(Ncpus = 10)", "options(HTTPUserAgent = sprintf(\"R/%s R (%s)\", getRversion(), paste(getRversion(), R.version$platform, R.version$arch, R.version$os)))", "Sys.setenv(VROOM_THREADS = 10)", "Sys.setenv(R_DATATABLE_NUM_THREADS = 10)", "arrow:::SetCpuThreadPoolCapacity(as.integer(10))", "library(arrowbench)", "out <- run_bm(duration = 0.01, grid = TRUE, error = NULL, bm = structure(list(", " name = \"placebo\", setup = function(duration = 0.01, error_type = NULL, output_type = NULL, grid = TRUE) {", " BenchEnvironment(placebo_func = function() {", " if (!is.null(output_type)) {", " msg <- \"here's some output\"", " if (output_type == \"message\") {", " message(\"A message: \", msg)", " } else if (output_type == \"warning\") {", " warning(\"A warning:\", msg)", " } else if (output_type == \"cat\") {", " cat(\"A cat:\", msg)", " }", " }", " ", " if (!is.null(error_type)) {", " msg <- \"something went wrong (but I knew that)\"", " if (error_type == \"rlang::abort\") {", " rlang::abort(msg)", " }", " stop(msg)", " }", " Sys.sleep(duration)", " })", " }, before_each = TRUE, run = quote({", " placebo_func()", " }), after_each = TRUE, teardown = TRUE, valid_params = function(params) {", " params", " }, case_version = function(params) NULL, batch_id_fun = function(params) uuid(), ", " tags_fun = function(params) params, packages_used = function(params) {", " \"base\"", " }), class = \"Benchmark\"), n_iter = 1, batch_id = \"065ce6d778a54d98bf1d1d4e0951f8b1\", ", " profiling = FALSE, global_params = list(cpu_count = 10L, ", " lib_path = \"latest\"))", "cat(\"\n##### RESULTS FOLLOW\n\")", "cat(out$json)", "cat(\"\n##### RESULTS END\n\")"] }, "context": { "arrow_compiler_flags": " -Qunused-arguments -fcolor-diagnostics -O2 -DNDEBUG", "benchmark_language": "R" }, "github": { "repository": "https://github.com/apache/arrow", "commit": null, "pr_number": null } } ```

The key problem is that these lines: https://github.com/voltrondata-labs/arrowbench/blob/main/R/run.R#L431-L436 need to be improved (which may require moving them so more metadata is available).

jonkeane commented 1 year ago

🎉 I'm glad to see this rough edge refiend!