Open fredericlarue opened 2 years ago
I tried to fix the bug
using the temporary arrays you suggested.
Unfortunately this leads to strong artifacts at vertical lines.
Example: left new code with temporary arrays, right old code
I can provide a patch with the temporary array if someone is willing to test/review...
May I have a look your patch? I did some test too to fix this, and did not end up to such an issue. Actually, I dig a lot in the code for a few days now, and I found several other code issues like this one.
May I have a look your patch?
Sure:
diff --git a/rtengine/amaze_demosaic_RT.cc b/rtengine/amaze_demosaic_RT.cc
index 2041f3130..01f74370a 100644
--- a/rtengine/amaze_demosaic_RT.cc
+++ b/rtengine/amaze_demosaic_RT.cc
@@ -135,7 +135,7 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh, c
constexpr int cldf = 2; // factor to multiply cache line distance. 1 = 64 bytes, 2 = 128 bytes ...
// assign working space
- char *buffer = (char *) calloc(14 * sizeof(float) * ts * ts + sizeof(char) * ts * tsh + 18 * cldf * 64 + 63, 1);
+ char *buffer = (char *) calloc(16 * sizeof(float) * ts * ts + sizeof(char) * ts * tsh + 20 * cldf * 64 + 63, 1);
// aligned to 64 byte boundary
char *data = (char*)( ( uintptr_t(buffer) + uintptr_t(63)) / 64 * 64);
@@ -148,8 +148,10 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh, c
float *dirwts1 = (float (*)) ((char*)dirwts0 + sizeof(float) * ts * ts + cldf * 64); // 1
// vertically interpolated colour differences G-R, G-B
float *vcd = (float (*)) ((char*)dirwts1 + sizeof(float) * ts * ts + cldf * 64); // 1
+ float *vcdtmp = (float (*)) ((char*)vcd + sizeof(float) * ts * ts + cldf * 64); // 1
// horizontally interpolated colour differences
- float *hcd = (float (*)) ((char*)vcd + sizeof(float) * ts * ts + cldf * 64); // 1
+ float *hcd = (float (*)) ((char*)vcdtmp + sizeof(float) * ts * ts + cldf * 64); // 1
+ float *hcdtmp = (float (*)) ((char*)hcd + sizeof(float) * ts * ts + cldf * 64); // 1
// alternative vertical interpolation
float *vcdalt = (float (*)) ((char*)hcd + sizeof(float) * ts * ts + cldf * 64); // 1
// alternative horizontal interpolation
@@ -435,8 +437,12 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh, c
grarv = vself( clipmask, grhav, grarv);
//use HA if highlights are (nearly) clipped
- STVF(vcd[indx], vself( clipmask, vcdaltv, sgnv * (vintpf(vwtv, gdarv, guarv) - cfav)));
- STVF(hcd[indx], vself( clipmask, hcdaltv, sgnv * (vintpf(hwtv, grarv, glarv) - cfav)));
+ vfloat tmpv = vself( clipmask, vcdaltv, sgnv * (vintpf(vwtv, gdarv, guarv) - cfav));
+ STVF(vcd[indx], tmpv);
+ STVF(vcdtmp[indx], tmpv);
+ vfloat tmph = vself( clipmask, hcdaltv, sgnv * (vintpf(hwtv, grarv, glarv) - cfav));
+ STVF(hcd[indx], tmph);
+ STVF(hcdtmp[indx], tmph);
//differences of interpolations in opposite directions
STVF(dgintv[indx], vminf(SQRV(guhav - gdhav), SQRV(guarv - gdarv)));
@@ -500,14 +506,14 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh, c
//interpolated colour differences
if (fcswitch) {
- vcd[indx] = cfa[indx] - (vwt * gdar + (1.f - vwt) * guar);
- hcd[indx] = cfa[indx] - (hwt * grar + (1.f - hwt) * glar);
+ vcdtmp[indx] = vcd[indx] = cfa[indx] - (vwt * gdar + (1.f - vwt) * guar);
+ hcdtmp[indx] = hcd[indx] = cfa[indx] - (hwt * grar + (1.f - hwt) * glar);
vcdalt[indx] = cfa[indx] - Gintvha;
hcdalt[indx] = cfa[indx] - Ginthha;
} else {
//interpolated colour differences
- vcd[indx] = (vwt * gdar + (1.f - vwt) * guar) - cfa[indx];
- hcd[indx] = (hwt * grar + (1.f - hwt) * glar) - cfa[indx];
+ vcdtmp[indc] = vcd[indx] = (vwt * gdar + (1.f - vwt) * guar) - cfa[indx];
+ hcdtmp[indx] = hcd[indx] = (hwt * grar + (1.f - hwt) * glar) - cfa[indx];
vcdalt[indx] = Gintvha - cfa[indx];
hcdalt[indx] = Ginthha - cfa[indx];
}
@@ -520,8 +526,8 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh, c
gdar = gdha;
glar = glha;
grar = grha;
- vcd[indx] = vcdalt[indx];
- hcd[indx] = hcdalt[indx];
+ vcdtmp[indx] = vcd[indx] = vcdalt[indx];
+ hcdtmp[indc] = hcd[indx] = hcdalt[indx];
}
//differences of interpolations in opposite directions
@@ -553,12 +559,12 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh, c
sgn3v = -sgn3v;
for (int indx = rr * ts + 4; indx < rr * ts + cc1 - 4; indx += 4) {
- vfloat hcdv = LVF( hcd[indx] );
- vfloat hcdvarv = SQRV(LVFU(hcd[indx - 2]) - hcdv) + SQRV(LVFU(hcd[indx - 2]) - LVFU(hcd[indx + 2])) + SQRV(hcdv - LVFU(hcd[indx + 2]));
+ vfloat hcdv = LVF(hcdtmp[indx] );
+ vfloat hcdvarv = SQRV(LVFU(hcdtmp[indx - 2]) - hcdv) + SQRV(LVFU(hcdtmp[indx - 2]) - LVFU(hcdtmp[indx + 2])) + SQRV(hcdv - LVFU(hcdtmp[indx + 2]));
vfloat hcdaltv = LVF( hcdalt[indx] );
vfloat hcdaltvarv = SQRV(LVFU(hcdalt[indx - 2]) - hcdaltv) + SQRV(LVFU(hcdalt[indx - 2]) - LVFU(hcdalt[indx + 2])) + SQRV(hcdaltv - LVFU(hcdalt[indx + 2]));
- vfloat vcdv = LVF( vcd[indx] );
- vfloat vcdvarv = SQRV(LVF(vcd[indx - v2]) - vcdv) + SQRV(LVF(vcd[indx - v2]) - LVF(vcd[indx + v2])) + SQRV(vcdv - LVF(vcd[indx + v2]));
+ vfloat vcdv = LVF(vcdtmp[indx] );
+ vfloat vcdvarv = SQRV(LVF(vcdtmp[indx - v2]) - vcdv) + SQRV(LVF(vcdtmp[indx - v2]) - LVF(vcdtmp[indx + v2])) + SQRV(vcdv - LVF(vcdtmp[indx + v2]));
vfloat vcdaltv = LVF( vcdalt[indx] );
vfloat vcdaltvarv = SQRV(LVF(vcdalt[indx - v2]) - vcdaltv) + SQRV(LVF(vcdalt[indx - v2]) - LVF(vcdalt[indx + v2])) + SQRV(vcdaltv - LVF(vcdalt[indx + v2]));
@@ -598,9 +604,9 @@ void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh, c
for (int rr = 4; rr < rr1 - 4; rr++) {
for (int cc = 4, indx = rr * ts + cc, c = fc(cfarray, rr, cc) & 1; cc < cc1 - 4; cc++, indx++) {
- float hcdvar = 3.f * (SQR(hcd[indx - 2]) + SQR(hcd[indx]) + SQR(hcd[indx + 2])) - SQR(hcd[indx - 2] + hcd[indx] + hcd[indx + 2]);
+ float hcdvar = 3.f * (SQR(hcdtmp[indx - 2]) + SQR(hcdtmp[indx]) + SQR(hcdtmp[indx + 2])) - SQR(hcdtmp[indx - 2] + hcdtmp[indx] + hcdtmp[indx + 2]);
float hcdaltvar = 3.f * (SQR(hcdalt[indx - 2]) + SQR(hcdalt[indx]) + SQR(hcdalt[indx + 2])) - SQR(hcdalt[indx - 2] + hcdalt[indx] + hcdalt[indx + 2]);
- float vcdvar = 3.f * (SQR(vcd[indx - v2]) + SQR(vcd[indx]) + SQR(vcd[indx + v2])) - SQR(vcd[indx - v2] + vcd[indx] + vcd[indx + v2]);
+ float vcdvar = 3.f * (SQR(vcdtmp[indx - v2]) + SQR(vcdtmp[indx]) + SQR(vcdtmp[indx + v2])) - SQR(vcdtmp[indx - v2] + vcdtmp[indx] + vcdtmp[indx + v2]);
float vcdaltvar = 3.f * (SQR(vcdalt[indx - v2]) + SQR(vcdalt[indx]) + SQR(vcdalt[indx + v2])) - SQR(vcdalt[indx - v2] + vcdalt[indx] + vcdalt[indx + v2]);
//choose the smallest variance; this yields a smoother interpolation
An error:
float *vcdalt = (float (*)) ((char*)hcd + ...
should be:
float *vcdalt = (float (*)) ((char*)hcdtmp + ...
Thanks for spotting 👍 For eyes are better than two...
Sure! 😉
By the way, below are two locations in the file where I noticed the same problem: https://github.com/Beep6581/RawTherapee/blob/9a9e0acbf27d589dbf6e964fd20a25893eedf749/rtengine/amaze_demosaic_RT.cc#L972-L978
@fredericlarue Let's make a better amaze together. I have to stop for today, but will continue tomorrow.
Let's make a better amaze together.
Would be... amazing! 😛
And probably there too...
Well, after having looked at the code more thoroughly, it appears that this last one is not an issue: the array is split between red and blue just before, and this loop only updates undefined values without erasing any valid one.
Sure! 😉
By the way, below are two locations in the file where I noticed the same problem:
But for these two, there is still a problem! :) The results of these operations are indeed different with and without the use of a temporary array.
Sorry for the delay. I was on gcc 11 to test another issue. Now I downgraded to gcc 10.3 and continue work. Working on amaze using a buggy gcc 11 didn't make sense...
No hurry ;)
I could have done the modifications by myself, but I must admit that I'm too lazy to get and compile the whole thing 🤣
Variance of "cd" is computed from neighborhing pixels, but "cd" values are modified at the same time instead of using a temporary array. (The related SSE code should be checked too)
https://github.com/Beep6581/RawTherapee/blob/9a9e0acbf27d589dbf6e964fd20a25893eedf749/rtengine/amaze_demosaic_RT.cc#L599-L613