jrmuizel / raqote

Rust 2D graphics library
BSD 3-Clause "New" or "Revised" License
1.04k stars 59 forks source link

Some blending modes are extremely slow #156

Open RazrFalcon opened 4 years ago

RazrFalcon commented 4 years ago

I've finally run benchmarks for all blending modes.

Code:

use bencher::{benchmark_group, benchmark_main, Bencher};

fn fill_raqote(blend_mode: raqote::BlendMode, bencher: &mut Bencher) {
    use raqote::*;

    let mut dt = DrawTarget::new(1000, 1000);

    let path1 = {
        let mut pb = PathBuilder::new();
        pb.move_to(60.0, 60.0);
        pb.line_to(160.0, 940.0);
        pb.cubic_to(380.0, 840.0, 660.0, 800.0, 940.0, 800.0);
        pb.cubic_to(740.0, 460.0, 440.0, 160.0, 60.0, 60.0);
        pb.close();
        pb.finish()
    };

    let path2 = {
        let mut pb = PathBuilder::new();
        pb.move_to(940.0, 60.0);
        pb.line_to(840.0, 940.0);
        pb.cubic_to(620.0, 840.0, 340.0, 800.0, 60.0, 800.0);
        pb.cubic_to(260.0, 460.0, 560.0, 160.0, 940.0, 60.0);
        pb.close();
        pb.finish()
    };

    // raqote uses ARGB order.
    let src1 = Source::from(Color::new(200, 50, 127, 150));
    let src2 = Source::from(Color::new(180, 220, 140, 75));

    let draw_opt1 = DrawOptions {
        blend_mode: BlendMode::SrcOver,
        alpha: 1.0,
        antialias: AntialiasMode::None,
    };

    let draw_opt2 = DrawOptions {
        blend_mode, // <-- variable
        alpha: 1.0,
        antialias: AntialiasMode::None,
    };

    dt.fill(&path1, &src1, &draw_opt1);

    bencher.iter(|| {
        dt.fill(&path2, &src2, &draw_opt2);
    });
}

fn clear_raqote(bencher: &mut Bencher)               { fill_raqote(raqote::BlendMode::Clear, bencher); }
fn source_raqote(bencher: &mut Bencher)              { fill_raqote(raqote::BlendMode::Src, bencher); }
fn destination_raqote(bencher: &mut Bencher)         { fill_raqote(raqote::BlendMode::Dst, bencher); }
fn source_over_raqote(bencher: &mut Bencher)         { fill_raqote(raqote::BlendMode::SrcOver, bencher); }
fn destination_over_raqote(bencher: &mut Bencher)    { fill_raqote(raqote::BlendMode::DstOver, bencher); }
fn source_in_raqote(bencher: &mut Bencher)           { fill_raqote(raqote::BlendMode::SrcIn, bencher); }
fn destination_in_raqote(bencher: &mut Bencher)      { fill_raqote(raqote::BlendMode::DstIn, bencher); }
fn source_out_raqote(bencher: &mut Bencher)          { fill_raqote(raqote::BlendMode::SrcOut, bencher); }
fn destination_out_raqote(bencher: &mut Bencher)     { fill_raqote(raqote::BlendMode::DstOut, bencher); }
fn source_atop_raqote(bencher: &mut Bencher)         { fill_raqote(raqote::BlendMode::SrcAtop, bencher); }
fn destination_atop_raqote(bencher: &mut Bencher)    { fill_raqote(raqote::BlendMode::DstAtop, bencher); }
fn xor_raqote(bencher: &mut Bencher)                 { fill_raqote(raqote::BlendMode::Xor, bencher); }
fn plus_raqote(bencher: &mut Bencher)                { fill_raqote(raqote::BlendMode::Add, bencher); }
// fn modulate_raqote(bencher: &mut Bencher)            { fill_raqote(raqote::BlendMode::Modulate, bencher); } // TODO: missing?
fn screen_raqote(bencher: &mut Bencher)              { fill_raqote(raqote::BlendMode::Screen, bencher); }
fn overlay_raqote(bencher: &mut Bencher)             { fill_raqote(raqote::BlendMode::Overlay, bencher); }
fn darken_raqote(bencher: &mut Bencher)              { fill_raqote(raqote::BlendMode::Darken, bencher); }
fn lighten_raqote(bencher: &mut Bencher)             { fill_raqote(raqote::BlendMode::Lighten, bencher); }
fn color_dodge_raqote(bencher: &mut Bencher)         { fill_raqote(raqote::BlendMode::ColorDodge, bencher); }
fn color_burn_raqote(bencher: &mut Bencher)          { fill_raqote(raqote::BlendMode::ColorBurn, bencher); }
fn hard_light_raqote(bencher: &mut Bencher)          { fill_raqote(raqote::BlendMode::HardLight, bencher); }
fn soft_light_raqote(bencher: &mut Bencher)          { fill_raqote(raqote::BlendMode::SoftLight, bencher); }
fn difference_raqote(bencher: &mut Bencher)          { fill_raqote(raqote::BlendMode::Difference, bencher); }
fn exclusion_raqote(bencher: &mut Bencher)           { fill_raqote(raqote::BlendMode::Exclusion, bencher); }
fn multiply_raqote(bencher: &mut Bencher)            { fill_raqote(raqote::BlendMode::Multiply, bencher); }
fn hue_raqote(bencher: &mut Bencher)                 { fill_raqote(raqote::BlendMode::Hue, bencher); }
fn saturation_raqote(bencher: &mut Bencher)          { fill_raqote(raqote::BlendMode::Saturation, bencher); }
fn color_raqote(bencher: &mut Bencher)               { fill_raqote(raqote::BlendMode::Color, bencher); }
fn luminosity_raqote(bencher: &mut Bencher)          { fill_raqote(raqote::BlendMode::Luminosity, bencher); }

benchmark_group!(benches,
    clear_raqote,
    source_raqote,
    destination_raqote,
    source_over_raqote,
    destination_over_raqote,
    source_in_raqote,
    destination_in_raqote,
    source_out_raqote,
    destination_out_raqote,
    source_atop_raqote,
    destination_atop_raqote,
    xor_raqote,
    plus_raqote,
    // modulate_raqote,
    screen_raqote,
    overlay_raqote,
    darken_raqote,
    lighten_raqote,
    color_dodge_raqote,
    color_burn_raqote,
    hard_light_raqote,
    soft_light_raqote,
    difference_raqote,
    exclusion_raqote,
    multiply_raqote,
    hue_raqote,
    saturation_raqote,
    color_raqote,
    luminosity_raqote
);

benchmark_main!(benches);

Results sorted by raqote performance, descending:

test saturation_raqote          ... bench:  13,752,382 ns/iter (+/- 111,587)
test saturation_cairo           ... bench:   7,443,261 ns/iter (+/- 80,673)
test saturation_skia            ... bench:   1,411,117 ns/iter (+/- 15,472)

test hue_raqote                 ... bench:  13,716,827 ns/iter (+/- 217,137)
test hue_cairo                  ... bench:   7,517,902 ns/iter (+/- 33,529)
test hue_skia                   ... bench:   1,413,791 ns/iter (+/- 19,080)

test soft_light_raqote          ... bench:  11,941,630 ns/iter (+/- 291,833)
test soft_light_cairo           ... bench:   5,900,415 ns/iter (+/- 26,181)
test soft_light_skia            ... bench:     986,403 ns/iter (+/- 10,433)

test color_raqote               ... bench:  10,537,391 ns/iter (+/- 75,747)
test color_cairo                ... bench:   6,070,058 ns/iter (+/- 38,963)
test color_skia                 ... bench:   1,115,800 ns/iter (+/- 4,762)

test luminosity_raqote          ... bench:  10,488,916 ns/iter (+/- 129,615)
test luminosity_cairo           ... bench:   6,124,294 ns/iter (+/- 46,113)
test luminosity_skia            ... bench:   1,090,402 ns/iter (+/- 12,929)

test color_dodge_raqote         ... bench:   9,634,447 ns/iter (+/- 80,584)
test color_dodge_cairo          ... bench:   5,151,488 ns/iter (+/- 93,180)
test color_dodge_skia           ... bench:     679,996 ns/iter (+/- 8,442)

test color_burn_raqote          ... bench:   9,617,102 ns/iter (+/- 177,719)
test color_burn_cairo           ... bench:   5,007,181 ns/iter (+/- 36,599)
test color_burn_skia            ... bench:     708,962 ns/iter (+/- 9,257)

test hard_light_raqote          ... bench:   7,034,417 ns/iter (+/- 70,127)
test hard_light_cairo           ... bench:   3,442,584 ns/iter (+/- 37,358)
test hard_light_skia            ... bench:     370,307 ns/iter (+/- 6,300)

test overlay_raqote             ... bench:   7,016,443 ns/iter (+/- 69,148)
test overlay_cairo              ... bench:   3,412,192 ns/iter (+/- 38,283)
test overlay_skia               ... bench:     385,684 ns/iter (+/- 3,953)

test exclusion_raqote           ... bench:   6,082,393 ns/iter (+/- 104,945)
test exclusion_cairo            ... bench:   3,842,131 ns/iter (+/- 15,905)
test exclusion_skia             ... bench:     316,235 ns/iter (+/- 2,617)

test multiply_raqote            ... bench:   5,986,364 ns/iter (+/- 76,662)
test multiply_cairo             ... bench:   3,608,817 ns/iter (+/- 21,714)
test multiply_skia              ... bench:     316,841 ns/iter (+/- 4,439)

test difference_raqote          ... bench:   5,718,776 ns/iter (+/- 56,842)
test difference_cairo           ... bench:   3,936,284 ns/iter (+/- 38,808)
test difference_skia            ... bench:     326,285 ns/iter (+/- 766)

test lighten_raqote             ... bench:   5,255,202 ns/iter (+/- 44,182)
test lighten_cairo              ... bench:   3,584,409 ns/iter (+/- 15,092)
test lighten_skia               ... bench:     319,867 ns/iter (+/- 5,679)

test darken_raqote              ... bench:   5,230,384 ns/iter (+/- 79,313)
test darken_cairo               ... bench:   3,579,931 ns/iter (+/- 28,375)
test darken_skia                ... bench:     317,215 ns/iter (+/- 4,315)

test plus_raqote                ... bench:   4,892,311 ns/iter (+/- 561,835)
test plus_cairo                 ... bench:     122,496 ns/iter (+/- 3,177)
test plus_skia                  ... bench:     286,504 ns/iter (+/- 1,964)

test screen_cairo               ... bench:   3,837,010 ns/iter (+/- 22,167)
test screen_raqote              ... bench:   1,459,500 ns/iter (+/- 31,974)
test screen_skia                ... bench:     312,507 ns/iter (+/- 1,955)

test destination_over_raqote    ... bench:   3,446,445 ns/iter (+/- 43,492)
test destination_over_cairo     ... bench:     236,970 ns/iter (+/- 5,202)
test destination_over_skia      ... bench:     306,542 ns/iter (+/- 2,728)

test source_over_raqote         ... bench:   2,645,207 ns/iter (+/- 254,594)
test source_over_cairo          ... bench:     212,838 ns/iter (+/- 4,089)
test source_over_skia           ... bench:     261,943 ns/iter (+/- 1,591)

test xor_raqote                 ... bench:   1,634,236 ns/iter (+/- 168,279)
test xor_cairo                  ... bench:     671,439 ns/iter (+/- 4,048)
test xor_skia                   ... bench:     310,439 ns/iter (+/- 4,453)

test destination_atop_raqote    ... bench:   1,511,405 ns/iter (+/- 48,424)
test destination_atop_cairo     ... bench:   1,220,442 ns/iter (+/- 10,316)
test destination_atop_skia      ... bench:     306,610 ns/iter (+/- 1,755)

test source_atop_raqote         ... bench:   1,501,419 ns/iter (+/- 31,417)
test source_atop_cairo          ... bench:     663,254 ns/iter (+/- 3,940)
test source_atop_skia           ... bench:     307,222 ns/iter (+/- 1,550)

test source_in_raqote           ... bench:   1,163,684 ns/iter (+/- 57,542)
test source_in_cairo            ... bench:   1,104,817 ns/iter (+/- 16,719)
test source_in_skia             ... bench:     291,008 ns/iter (+/- 3,723)

test source_out_raqote          ... bench:   1,182,724 ns/iter (+/- 26,307)
test source_out_cairo           ... bench:   1,122,571 ns/iter (+/- 9,405)
test source_out_skia            ... bench:     297,537 ns/iter (+/- 3,833)

test source_raqote              ... bench:   1,114,946 ns/iter (+/- 57,617)
test source_cairo               ... bench:      62,548 ns/iter (+/- 2,803)
test source_skia                ... bench:      50,949 ns/iter (+/- 2,114)

test destination_in_raqote      ... bench:   1,193,288 ns/iter (+/- 61,003)
test destination_in_cairo       ... bench:   1,103,758 ns/iter (+/- 16,687)
test destination_in_skia        ... bench:     293,412 ns/iter (+/- 2,173)

test destination_out_raqote     ... bench:   1,193,241 ns/iter (+/- 302,027)
test destination_out_cairo      ... bench:     556,088 ns/iter (+/- 3,815)
test destination_out_skia       ... bench:     299,481 ns/iter (+/- 2,212)

test clear_raqote               ... bench:   1,043,659 ns/iter (+/- 259,923)
test clear_cairo                ... bench:      62,331 ns/iter (+/- 758)
test clear_skia                 ... bench:      50,203 ns/iter (+/- 1,209)

test destination_raqote         ... bench:     980,993 ns/iter (+/- 47,829)
test destination_cairo          ... bench:           4 ns/iter (+/- 0)      (wut?!)
test destination_skia           ... bench:       5,318 ns/iter (+/- 187)

PS: surprisingly, cairo is also extremely slow.

PSS: afaiu, we cannot reach Skia's performance in Rust, because it doesn't support clang's vector extensions.

PSSS: cairo and skia are built with -march=native and raqote is built with -Ctarget-cpu=native. znver2 to be more precise.

jrmuizel commented 4 years ago

Is there a way I can run the skia versions?

RazrFalcon commented 4 years ago

It's not trivial, here is how I'm building it on Linux:

git clone https://skia.googlesource.com/skia.git
cd skia
git fetch --all
git checkout -b m85 origin/chrome/m85
python2 tools/git-sync-deps # python3 will not work
bin/gn gen out/Shared --args='
    is_official_build=false
    is_component_build=true
    is_debug=false
    cc="clang"
    cxx="clang++"
    extra_cflags_cc=["-march=native", "-DSK_FORCE_RASTER_PIPELINE_BLITTER"]
    werror=false
    paragraph_gms_enabled=false
    paragraph_tests_enabled=false
    skia_enable_android_utils=false
    skia_enable_discrete_gpu=false
    skia_enable_gpu=false
    skia_enable_nvpr=false
    skia_enable_particles=false
    skia_enable_pdf=false
    skia_enable_skottie=false
    skia_enable_skrive=false
    skia_enable_skshaper=false
    skia_enable_sksl_interpreter=false
    skia_enable_skvm_jit=false
    skia_enable_tools=false
    skia_use_expat=false
    skia_use_gl=false
    skia_use_harfbuzz=false
    skia_use_icu=false
    skia_use_libgifcodec=false
    skia_use_libheif=false
    skia_use_libjpeg_turbo_decode=false
    skia_use_libjpeg_turbo_encode=false
    skia_use_libwebp_decode=false
    skia_use_libwebp_encode=false
    skia_use_lua=false
    skia_use_piex=false'
ninja -C out/Shared

PS; yes, you must use clang, otherwise it's pointless.

RazrFalcon commented 4 years ago

Benchmarks can be found here: tiny-skia/benches