Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 8 additions & 12 deletions data/kernels/basic.cl
Original file line number Diff line number Diff line change
Expand Up @@ -2972,11 +2972,11 @@ static inline float get_image_channel(read_only image2d_t in,
const int c)
{
float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
if(c == 0)
if(c == RED)
return pixel.x;
else if(c == 1)
else if(c == GREEN)
return pixel.y;
else if(c == 2)
else if(c == BLUE)
return pixel.z;

return pixel.w;
Expand Down Expand Up @@ -3846,7 +3846,7 @@ interpolation_resample (read_only image2d_t in,
for (int ix = 0; ix < hl && yvalid; ix++)
{
const int xx = lindex[ix];
float4 hpixel = read_imagef(in, sampleri,(int2)(xx, yy));
float4 hpixel = readpixel(in, xx, yy);
vpixel += hpixel * lkernel[ix];
}

Expand All @@ -3868,12 +3868,8 @@ interpolation_resample (read_only image2d_t in,
}

// store final result
if (iy == 0 && x < width && y < height)
{
// Clip negative RGB that may be produced by Lanczos undershooting
// Negative RGB are invalid values no matter the RGB space (light is positive)
write_imagef (out, (int2)(x, y), fmax(buffer[ylid], 0.f));
}
if(iy == 0 && x < width && y < height)
write_imagef(out, (int2)(x, y), buffer[ylid]);
}

/* kernel for the interpolation copy helper */
Expand All @@ -3890,7 +3886,7 @@ interpolation_copy(read_only image2d_t dev_in,
const int ocol = get_global_id(0);
const int orow = get_global_id(1);

if(ocol >= owidth || orow >= oheight) return;
if(ocol < 0 || ocol >= owidth || orow < 0 || orow >= oheight) return;

float4 pix = (float4)( 0.0f, 0.0f, 0.0f, 0.0f );

Expand All @@ -3899,7 +3895,7 @@ interpolation_copy(read_only image2d_t dev_in,

if(irow < iheight && irow >= 0 && icol < iwidth && icol >= 0)
{
pix = read_imagef(dev_in, samplerA, (int2)(icol, irow));
pix = readpixel(dev_in, icol, irow);
}
write_imagef(dev_out, (int2)(ocol, orow), pix);
}
Expand Down
14 changes: 8 additions & 6 deletions data/kernels/capture.cl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of darktable,
copyright (c) 2025 darktable developer.
copyright (c) 2026 darktable developer.

darktable is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -185,15 +185,17 @@ __kernel void prepare_blend(__read_only image2d_t cfa,
global const unsigned char (*const xtrans)[6],
global float *mask,
global float *Yold,
global float *whites,
const float4 wb,
const int w,
const int height)
{
const int col = get_global_id(0);
const int row = get_global_id(1);
if(col >= w || row >= height) return;

float4 rgb = read_imagef(dev_out, samplerA, (int2)(col, row));
float whites[4] = { wb.x, wb.y, wb.z, wb.w };

float4 rgb = readpixel(dev_out, col, row);
// Photometric/digital ITU BT.709
const float4 flum = (float4)( 0.212671f, 0.715160f, 0.072169f, 0.0f );
rgb *= flum;
Expand All @@ -205,7 +207,7 @@ __kernel void prepare_blend(__read_only image2d_t cfa,
{
const int w2 = 2 * w;
const int color = (filters == 9u) ? FCxtrans(row, col, xtrans) : FC(row, col, filters);
const float val = read_imagef(cfa, samplerA, (int2)(col, row)).x;
const float val = readsingle(cfa, col, row);
if(val > whites[color] || Y < CAPTURE_YMIN)
{
mask[k-w2-1] = mask[k-w2] = mask[k-w2+1] =
Expand Down Expand Up @@ -289,7 +291,7 @@ __kernel void show_blend_mask(__read_only image2d_t in,
const int row = get_global_id(1);
if(col >= width || row >= height) return;

float4 pix = read_imagef(in, samplerA, (int2)(col, row));
float4 pix = readpixel(in, col, row);
const float blend = blender ? blend_mask[mad24(row, width, col)]
: (float)sigma_mask[mad24(row, width, col)] / 255.0f;
pix.w = blend;
Expand All @@ -308,7 +310,7 @@ __kernel void capture_result( __read_only image2d_t in,
const int row = get_global_id(1);
if(col >= width || row >= height) return;

float4 pix = read_imagef(in, samplerA, (int2)(col, row));
float4 pix = readpixel(in, col, row);
const int k = mad24(row, width, col);

if(blendmask[k] > 0.0f)
Expand Down
29 changes: 29 additions & 0 deletions data/kernels/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,15 @@ constant sampler_t samplerA = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE
#pragma OPENCL FP_CONTRACT OFF
#endif

// Kahan summation algorithm
#define Kahan_sum(m, c, add) \
{ \
const float t1 = (add) - (c); \
const float t2 = (m) + t1; \
c = (t2 - m) - t1; \
m = t2; \
}

static inline int
FC(const int row, const int col, const unsigned int filters)
{
Expand Down Expand Up @@ -159,3 +168,23 @@ static inline float clipf(const float a)
{
return clamp(a, 0.0f, 1.0f);
}

/* Some inline functions making life easier when reading photosites
or pixels from cl_mem images.
As we had images with NaNs in data and at least AMD systems suffered
as reading NaNs instead of falling back to 0 (nvidia seems to do do)
we always read data and make sure values are valid floats.
*/
static inline float readsingle(read_only image2d_t in, int col, int row)
{
return fmax(-FLT_MAX, read_imagef(in, sampleri, (int2)(col, row)).x);
}

static inline float4 readpixel(read_only image2d_t in, int col, int row)
{
return fmax(-FLT_MAX, read_imagef(in, sampleri, (int2)(col, row)));
}
static inline float readalpha(read_only image2d_t in, int col, int row)
{
return clipf(read_imagef(in, sampleri, (int2)(col, row)).w);
}
8 changes: 4 additions & 4 deletions data/kernels/demosaic_markesteijn.cl
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ markesteijn_initial_copy(read_only image2d_t in, global float *rgb, const int wi

const int f = FCxtrans(y, x, xtrans);

const float p = fmax(0.0f, read_imagef(in, sampleri, (int2)(x, y)).x);
const float p = readsingle(in, x, y);

for(int c = 0; c < 3; c++)
pix[c] = (c == f) ? p : 0.0f;
Expand Down Expand Up @@ -897,7 +897,7 @@ markesteijn_accu(read_only image2d_t in, write_only image2d_t out, global float

const int glidx = mad24(y, width, x);

float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
float4 pixel = readpixel(in, x, y);
float4 add = vload4(glidx, rgb);
add.w = 1.0f;

Expand All @@ -918,11 +918,11 @@ markesteijn_final(read_only image2d_t in, write_only image2d_t out, const int wi
// take sufficient border into account
if(x < border || x >= width-border || y < border || y >= height-border) return;

float4 pixel = read_imagef(in, sampleri, (int2)(x, y));
float4 pixel = readpixel(in, x, y);

pixel = (pixel.w > 0.0f) ? pixel/pixel.w : (float4)0.0f;
pixel.w = 0.0f;

write_imagef(out, (int2)(x, y), fmax(0.0f, pixel));
write_imagef(out, (int2)(x, y), pixel);
}

Loading
Loading