Closed alistaire47 closed 1 year ago
Right now, when run with run_benchmark(), benchmarks that error do not properly populate all metadata fields to identify them with the correct history (tags, info, context, machine_info). Instead, with placebo, you get something like
run_benchmark()
placebo
{ "timestamp": "2022-12-09T19:43:23.942867+0000", "error": { "log": ["22: stop(fallback)", "21: signal_abort(cnd, .file)", "20: rlang::abort(msg)", "19: placebo_func()", "18: eval(bm$run, envir = ctx)", "17: eval(bm$run, envir = ctx) at run.R#261", "16: eval(expr, p)", "15: eval.parent(...)", "14: as_bench_time(.Call(system_time_, substitute(expr), parent.frame()))", "13: stats::setNames(as_bench_time(.Call(system_time_, substitute(expr), ", " parent.frame())), c(\"process\", \"real\"))", "12: bench::bench_time(eval.parent(...)) at measure.R#12", "11: eval(expr, p)", "10: eval.parent(expr) at measure.R#35", "9: with_profiling(profiling, {", " timings <- bench::bench_time(eval.parent(...))", " }) at measure.R#11", "8: eval(expr, p)", "7: eval.parent(expr) at measure.R#41", "6: force(expr)", "5: with_gcinfo(eval.parent(expr)) at measure.R#41", "4: with_gc_info({", " prof_file <- with_profiling(profiling, {", " timings <- bench::bench_time(eval.parent(...))", " })", " }) at measure.R#10", "3: measure(eval(bm$run, envir = ctx), profiling = profiling) at run.R#261", "2: run_iteration(bm, ctx, profiling = profiling) at run.R#197", "1: run_bm(duration = 0.01, grid = TRUE, error = \"rlang::abort\", ", " bm = structure(list(name = \"placebo\", setup = function(duration = 0.01, ", " error_type = NULL, output_type = NULL, grid = TRUE) {", " BenchEnvironment(placebo_func = function() {", " if (!is.null(output_type)) {", " msg <- \"here's some output\"", " if (output_type == \"message\") {", " message(\"A message: \", msg)", " } else if (output_type == \"warning\") {", " warning(\"A warning:\", msg)", " } else if (output_type == \"cat\") {", " cat(\"A cat:\", msg)", " }", " }", " if (!is.null(error_type)) {", " msg <- \"something went wrong (but I knew that)\"", " if (error_type == \"rlang::abort\") {", " rlang::abort(msg)", " }", " stop(msg)", " }", " Sys.sleep(duration)", " })", " }, before_each = TRUE, run = quote({", " placebo_func()", " }), after_each = TRUE, teardown = TRUE, valid_params = function(params) {", " params", " }, case_version = function(params) NULL, batch_id_fun = function(params) uuid(), ", " tags_fun = function(params) params, packages_used = function(params) {", " \"base\"", " }), class = \"Benchmark\"), n_iter = 1, batch_id = \"065ce6d778a54d98bf1d1d4e0951f8b1\", ", " profiling = FALSE, global_params = list(cpu_count = 10L, ", " lib_path = \"latest\"))"] }, "optional_benchmark_info": { "params": { "duration": 0.01, "grid": true, "error": "rlang::abort", "cpu_count": 10, "name": "placebo" } }, "batch_id": "065ce6d778a54d98bf1d1d4e0951f8b1" }
When placebo does not error, you instead get something like this:
The key problem is that these lines: https://github.com/voltrondata-labs/arrowbench/blob/main/R/run.R#L431-L436 need to be improved (which may require moving them so more metadata is available).
🎉 I'm glad to see this rough edge refiend!
Right now, when run with
run_benchmark()
, benchmarks that error do not properly populate all metadata fields to identify them with the correct history (tags, info, context, machine_info). Instead, withplacebo
, you get something likeWhen
placebo
does not error, you instead get something like this:The key problem is that these lines: https://github.com/voltrondata-labs/arrowbench/blob/main/R/run.R#L431-L436 need to be improved (which may require moving them so more metadata is available).