darktable-org · jenshannoschwalm · Mar 3, 2026 · Mar 7, 2026 · Mar 4, 2026 · Mar 8, 2026
diff --git a/data/kernels/basic.cl b/data/kernels/basic.cl
@@ -2972,11 +2972,11 @@ static inline float get_image_channel(read_only image2d_t in,
                                       const int c)
 {
   float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
-  if(c == 0)
+  if(c == RED)
     return pixel.x;
-  else if(c == 1)
+  else if(c == GREEN)
     return pixel.y;
-  else if(c == 2)
+  else if(c == BLUE)
     return pixel.z;
 
   return pixel.w;
@@ -3846,7 +3846,7 @@ interpolation_resample (read_only image2d_t in,
     for (int ix = 0; ix < hl && yvalid; ix++)
     {
       const int xx = lindex[ix];
-      float4 hpixel = read_imagef(in, sampleri,(int2)(xx, yy));
+      float4 hpixel = readpixel(in, xx, yy);
       vpixel += hpixel * lkernel[ix];
     }
 
@@ -3868,12 +3868,8 @@ interpolation_resample (read_only image2d_t in,
   }
 
   // store final result
-  if (iy == 0 && x < width && y < height)
-  {
-    // Clip negative RGB that may be produced by Lanczos undershooting
-    // Negative RGB are invalid values no matter the RGB space (light is positive)
-    write_imagef (out, (int2)(x, y), fmax(buffer[ylid], 0.f));
-  }
+  if(iy == 0 && x < width && y < height)
+    write_imagef(out, (int2)(x, y), buffer[ylid]);
 }
 
 /* kernel for the interpolation copy helper */
@@ -3890,7 +3886,7 @@ interpolation_copy(read_only image2d_t dev_in,
   const int ocol = get_global_id(0);
   const int orow = get_global_id(1);
 
-  if(ocol >= owidth || orow >= oheight) return;
+  if(ocol < 0 || ocol >= owidth || orow < 0 || orow >= oheight) return;
 
   float4 pix = (float4)( 0.0f, 0.0f, 0.0f, 0.0f );
 
@@ -3899,7 +3895,7 @@ interpolation_copy(read_only image2d_t dev_in,
 
   if(irow < iheight && irow >= 0 && icol < iwidth && icol >= 0)
   {
-    pix = read_imagef(dev_in, samplerA, (int2)(icol, irow));
+    pix = readpixel(dev_in, icol, irow);
   }
   write_imagef(dev_out, (int2)(ocol, orow), pix);
 }

diff --git a/data/kernels/capture.cl b/data/kernels/capture.cl
@@ -1,6 +1,6 @@
 /*
     This file is part of darktable,
-    copyright (c) 2025 darktable developer.
+    copyright (c) 2026 darktable developer.
 
     darktable is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -185,15 +185,17 @@ __kernel void prepare_blend(__read_only image2d_t cfa,
                             global const unsigned char (*const xtrans)[6],
                             global float *mask,
                             global float *Yold,
-                            global float *whites,
+                            const float4 wb,
                             const int w,
                             const int height)
 {
   const int col = get_global_id(0);
   const int row = get_global_id(1);
   if(col >= w || row >= height) return;
 
-  float4 rgb = read_imagef(dev_out, samplerA, (int2)(col, row));
+  float whites[4] = { wb.x, wb.y, wb.z, wb.w };
+
+  float4 rgb = readpixel(dev_out, col, row);
   // Photometric/digital ITU BT.709
   const float4 flum = (float4)( 0.212671f, 0.715160f, 0.072169f, 0.0f );
   rgb *= flum;
@@ -205,7 +207,7 @@ __kernel void prepare_blend(__read_only image2d_t cfa,
   {
     const int w2 = 2 * w;
     const int color = (filters == 9u) ? FCxtrans(row, col, xtrans) : FC(row, col, filters);
-    const float val = read_imagef(cfa, samplerA, (int2)(col, row)).x;
+    const float val = readsingle(cfa, col, row);
     if(val > whites[color] || Y < CAPTURE_YMIN)
     {
       mask[k-w2-1] = mask[k-w2]  = mask[k-w2+1] =
@@ -289,7 +291,7 @@ __kernel void show_blend_mask(__read_only image2d_t in,
   const int row = get_global_id(1);
   if(col >= width || row >= height) return;
 
-  float4 pix = read_imagef(in, samplerA, (int2)(col, row));
+  float4 pix = readpixel(in, col, row);
   const float blend = blender ? blend_mask[mad24(row, width, col)]
                               : (float)sigma_mask[mad24(row, width, col)] / 255.0f;
   pix.w = blend;
@@ -308,7 +310,7 @@ __kernel void capture_result( __read_only image2d_t in,
   const int row = get_global_id(1);
   if(col >= width || row >= height) return;
 
-  float4 pix = read_imagef(in, samplerA, (int2)(col, row));
+  float4 pix = readpixel(in, col, row);
   const int k = mad24(row, width, col);
 
   if(blendmask[k] > 0.0f)

diff --git a/data/kernels/common.h b/data/kernels/common.h
@@ -63,6 +63,15 @@ constant sampler_t samplerA = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE
   #pragma OPENCL FP_CONTRACT OFF
 #endif
 
+// Kahan summation algorithm
+#define Kahan_sum(m, c, add)        \
+  {                                 \
+    const float t1 = (add) - (c);   \
+    const float t2 = (m) + t1;      \
+    c = (t2 - m) - t1;              \
+    m = t2;                         \
+  }
+
 static inline int
 FC(const int row, const int col, const unsigned int filters)
 {
@@ -159,3 +168,23 @@ static inline float clipf(const float a)
 {
   return clamp(a, 0.0f, 1.0f);
 }
+
+/* Some inline functions making life easier when reading photosites
+   or pixels from cl_mem images.
+   As we had images with NaNs in data and at least AMD systems suffered
+   as reading NaNs instead of falling back to 0 (nvidia seems to do do)
+   we always read data and make sure values are valid floats.
+*/
+static inline float readsingle(read_only image2d_t in, int col, int row)
+{
+  return fmax(-FLT_MAX, read_imagef(in, sampleri, (int2)(col, row)).x);
+}
+
+static inline float4 readpixel(read_only image2d_t in, int col, int row)
+{
+  return fmax(-FLT_MAX, read_imagef(in, sampleri, (int2)(col, row)));
+}
+static inline float readalpha(read_only image2d_t in, int col, int row)
+{
+  return clipf(read_imagef(in, sampleri, (int2)(col, row)).w);
+}
diff --git a/data/kernels/demosaic_markesteijn.cl b/data/kernels/demosaic_markesteijn.cl
@@ -50,7 +50,7 @@ markesteijn_initial_copy(read_only image2d_t in, global float *rgb, const int wi
 
   const int f = FCxtrans(y, x, xtrans);
 
-  const float p = fmax(0.0f, read_imagef(in, sampleri, (int2)(x, y)).x);
+  const float p = readsingle(in, x, y);
 
   for(int c = 0; c < 3; c++)
     pix[c] = (c == f) ? p : 0.0f;
@@ -897,7 +897,7 @@ markesteijn_accu(read_only image2d_t in, write_only image2d_t out, global float
 
   const int glidx = mad24(y, width, x);
 
-  float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
+  float4 pixel = readpixel(in, x, y);
   float4 add = vload4(glidx, rgb);
   add.w = 1.0f;
 
@@ -918,11 +918,11 @@ markesteijn_final(read_only image2d_t in, write_only image2d_t out, const int wi
   // take sufficient border into account
   if(x < border || x >= width-border || y < border || y >= height-border) return;
 
-  float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
+  float4 pixel = readpixel(in, x, y);
 
   pixel = (pixel.w > 0.0f) ? pixel/pixel.w : (float4)0.0f;
   pixel.w = 0.0f;
 
-  write_imagef(out, (int2)(x, y), fmax(0.0f, pixel));
+  write_imagef(out, (int2)(x, y), pixel);
 }