akaihola / shootadoc-rust

A Rust implementation of Shoot A Doc
BSD 3-Clause "New" or "Revised" License
0 stars 1 forks source link

Profile and optimize #7

Open akaihola opened 3 years ago

akaihola commented 3 years ago

cargo flamegraph shows, as expected, that 54% of running time is spent in the extreme() function

Screenshot from 2020-11-17 09-20-48

Interesting resources:

akaihola commented 3 years ago

I measured three different implementations of apply2():

$ time ( for i in 1 2 3; do target/release/shootadoc-rust /tmp/paper.jpg /tmp/paper.jpg /tmp/paper.jpg; done )
// 4.54s 4.78s 4.69s 4.61s 4.60s
fn apply2<I, P, S, F>(img1: I, img2: &I, func: F) -> ImageBuffer<P, Vec<S>>
where
    I: GenericImageView<Pixel = P>,
    P: Pixel<Subpixel = S> + 'static,
    S: Primitive + 'static,
    F: Fn(P, P) -> P,
{
    ImageBuffer::from_fn(img1.width(), img1.height(), |x, y| {
        let p1: P = img1.get_pixel(x, y);
        let p2: P = img2.get_pixel(x, y);
        func(p1, p2)
    })
}
// 4.58s 4.54s 4.58s 4.35s 4.58s
fn apply2<I, P, S, F>(img1: I, img2: &I, func: F) -> ImageBuffer<P, Vec<S>>
where
    I: GenericImageView<Pixel = P>,
    P: Pixel<Subpixel = S> + 'static,
    S: Primitive + 'static,
    F: Fn(P, P) -> P,
{
    let (w, h) = img1.dimensions();
    let mut buf = ImageBuffer::new(w, h);
    for y in 0.. h {
        for x in 0..w {
            buf.put_pixel(x, y, func(img1.get_pixel(x, y), img2.get_pixel(x, y)))
        }
    }
    buf
}
// 5.82s 5.93s 6.29s 6.07s 6.08s
fn apply2<I, P, S, F>(img1: I, img2: &I, func: F) -> ImageBuffer<P, Vec<S>>
where
    I: GenericImageView<Pixel = P>,
    P: Pixel<Subpixel = S> + 'static,
    S: Primitive + 'static,
    F: Fn(P, P) -> P,
{
    let (w, h) = img1.dimensions();
    let mut buf = ImageBuffer::new(w, h);
    for ((_, _, p), (_, _, p1), (_, _, p2)) in
        izip!(buf.enumerate_pixels_mut(), img1.pixels(), img2.pixels())
    {
        *p = func(p1, p2)
    }
    buf
}