Open GoogleCodeExporter opened 9 years ago
Passes on Intel
LIBYUV_WIDTH=130 LIBYUV_HEIGHT=80 out/Release/libyuv_unittest
--gtest_filter=*ARGBToI411*
Note: Google Test filter = *ARGBToI411*
[==========] Running 4 tests from 1 test case.
[----------] Global test environment set-up.
[----------] 4 tests from libyuvTest
[ RUN ] libyuvTest.ARGBToI411_Any
[ OK ] libyuvTest.ARGBToI411_Any (2 ms)
[ RUN ] libyuvTest.ARGBToI411_Unaligned
[ OK ] libyuvTest.ARGBToI411_Unaligned (1 ms)
[ RUN ] libyuvTest.ARGBToI411_Invert
[ OK ] libyuvTest.ARGBToI411_Invert (1 ms)
[ RUN ] libyuvTest.ARGBToI411_Opt
[ OK ] libyuvTest.ARGBToI411_Opt (1 ms)
[----------] 4 tests from libyuvTest (5 ms total)
[----------] Global test environment tear-down
[==========] 4 tests from 1 test case ran. (5 ms total)
[ PASSED ] 4 tests.
Original comment by fbarch...@google.com
on 17 Sep 2015 at 8:04
Original comment by fbarch...@google.com
on 17 Sep 2015 at 8:09
Original comment by fbarch...@chromium.org
on 12 Oct 2015 at 11:48
With default parameters, this function passes
[ RUN ] LibYUVConvertTest.ARGBToI411_Any
[ OK ] LibYUVConvertTest.ARGBToI411_Any (5 ms)
[ RUN ] LibYUVConvertTest.ARGBToI411_Unaligned
[ OK ] LibYUVConvertTest.ARGBToI411_Unaligned (4 ms)
[ RUN ] LibYUVConvertTest.ARGBToI411_Invert
[ OK ] LibYUVConvertTest.ARGBToI411_Invert (4 ms)
[ RUN ] LibYUVConvertTest.ARGBToI411_Opt
[ OK ] LibYUVConvertTest.ARGBToI411_Opt (4 ms)
[----------] 4 tests from LibYUVConvertTest (18 ms total)
Original comment by fbarch...@chromium.org
on 14 Oct 2015 at 10:33
LIBYUV_WIDTH=130 LIBYUV_HEIGHT=80 LIBYUV_REPEAT=1 LIBYUV_DISABLE_ASM=1
out/Release/libyuv_unittest
This is the Intel code
0000000000556f00 <ARGBToUV411Row_C>:
push %r13
cmp $0x3,%ecx
push %r12
push %rbp
push %rbx
↓ jle 2c3
lea -0x4(%rcx),%r12d
mov %rsi,%rbx
mov %rdi,%rax
shr $0x2,%r12d
add $0x1,%r12
lea (%rdx,%r12,1),%rbp
nop
28: movzbl (%rax),%r9d
movzbl 0x4(%rax),%r8d
add $0x1,%rdx
movzbl 0x1(%rax),%r10d
movzbl 0x2(%rax),%r11d
add $0x1,%rbx
add $0x10,%rax
add %r9d,%r8d
movzbl -0x8(%rax),%r9d
add %r9d,%r8d
movzbl -0x4(%rax),%r9d
add %r9d,%r8d
movzbl -0xb(%rax),%r9d
sar $0x2,%r8d
mov %r8d,%r13d
lea (%r8,%r8,8),%r8d
add %r10d,%r9d
movzbl -0x7(%rax),%r10d
add %r8d,%r8d
add %r10d,%r9d
movzbl -0x3(%rax),%r10d
add %r10d,%r9d
movzbl -0xa(%rax),%r10d
sar $0x2,%r9d
add %r11d,%r10d
movzbl -0x6(%rax),%r11d
add %r11d,%r10d
movzbl -0x2(%rax),%r11d
add %r11d,%r10d
mov $0x70,%r11d
imul %r11d,%r13d
sar $0x2,%r10d
imul $0xffffffb6,%r9d,%r11d
add %r11d,%r13d
imul $0xffffffda,%r10d,%r11d
lea 0x8080(%r13,%r11,1),%r11d
mov $0xffffffa2,%r13d
imul %r13d,%r9d
sar $0x8,%r11d
mov %r11b,-0x1(%rbx)
mov $0x70,%r11d
imul %r11d,%r10d
add %r10d,%r9d
sub %r8d,%r9d
add $0x8080,%r9d
sar $0x8,%r9d
mov %r9b,-0x1(%rdx)
cmp %rbp,%rdx
↑ jne 28
mov %r12,%rax
add %r12,%rsi
shl $0x4,%rax
add %rax,%rdi
104: and $0x3,%ecx
cmp $0x3,%ecx
↓ je 198
cmp $0x2,%ecx
↓ je 238
cmp $0x1,%ecx
↓ je 128
pop %rbx
pop %rbp
pop %r12
pop %r13
← retq
nop
Original comment by fbarch...@chromium.org
on 16 Oct 2015 at 6:16
I've reproduced the error... had to hard code the unittest/unittest.cc
dimensions to 130x80. 130x72 also fails... its the width not being a multiple
of 4.
411 subsampling is really not used in practice, so it may be better to remove
the functionality than spend time fixing it.
Original comment by fbarch...@google.com
on 20 Oct 2015 at 1:37
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOUV411ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUV411Row = ARGBToUV411Row_Any_NEON;
if (IS_ALIGNED(width, 32)) {
ARGBToUV411Row = ARGBToUV411Row_NEON;
}
}
#endif
Original comment by fbarch...@google.com
on 20 Oct 2015 at 1:45
Original issue reported on code.google.com by
zhongwei...@linaro.org
on 15 Sep 2015 at 7:55