The first step in sobel is inadvertantly unoptimized:
#if defined(HAS_ARGBTOBAYERGGROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToBayerRow = ARGBToBayerGGRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerGGRow_SSE2;
}
}
#endif
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOBAYERGGROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToBayerRow = ARGBToBayerGGRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerGGRow_NEON;
}
}
#endif
and the last step does not handle odd width
Testing C versus assembly should show a large difference.
It shows a difference, but not as high as it should be
set LIBYUV_DISABLE_ASM=0
set LIBYUV_WIDTH=4096
set LIBYUV_HEIGHT=2048
set LIBYUV_REPEAT=999
set LIBYUV_FLAGS=-1
out\release\libyuv_unittest --gtest_filter=*ARGBSobelXY_Opt | findstr /r
"^[^_]*_[^_]*ms"
ARGBSobelXY_Opt (12539 ms)
set LIBYUV_DISABLE_ASM=1
set LIBYUV_WIDTH=4094
set LIBYUV_HEIGHT=2048
set LIBYUV_REPEAT=999
set LIBYUV_FLAGS=0
out\release\libyuv_unittest --gtest_filter=*ARGBSobelXY_Opt | findstr /r
"^[^_]*_[^_]*ms"
ARGBSobelXY_Opt (57926 ms)
set LIBYUV_DISABLE_ASM=0
set LIBYUV_WIDTH=4094
set LIBYUV_HEIGHT=2048
set LIBYUV_REPEAT=999
set LIBYUV_FLAGS=0
out\release\libyuv_unittest --gtest_filter=*ARGBSobelXY_Opt | findstr /r
"^[^_]*_[^_]*ms"
ARGBSobelXY_Opt (22634 ms)
Original issue reported on code.google.com by fbarch...@chromium.org on 26 May 2015 at 11:55
Original issue reported on code.google.com by
fbarch...@chromium.org
on 26 May 2015 at 11:55