forked from TeachingUndergradsCHC/modules
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnoRed.cu
More file actions
74 lines (53 loc) · 2.46 KB
/
noRed.cu
File metadata and controls
74 lines (53 loc) · 2.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "/content/ppmFile.c"
__global__ void kernel(int width, int height, unsigned char *d_input, unsigned char* d_output){
//coordinates of pixel for which this call is responsible
int i = blockIdx.x * blockDim.x + threadIdx.x;
int j = blockIdx.y * blockDim.y + threadIdx.y;
int offset; //index in array corresponding to a pixel
if(i >=0 && i < width && j >=0 && j < height) {
offset = (j * width + i) * 3 + 0; //0 is red channel
d_output[offset] = 0;
offset = (j * width + i) * 3 + 1; //1 is green channel
d_output[offset] = d_input[offset];
offset = (j * width + i) * 3 + 2; //2 is blue channel
d_output[offset] = d_input[offset];
}
}
int main (int argc, char *argv[]){
const char* inFile = "640x426.ppm"; //file names for input and output files
const char* outFile = "out.ppm";
int width; //image size
int height;
Image *inImage, *outImage; //image structs (defined in ppmFile.h)
unsigned char *data; //input image data
//Device variables:
unsigned char *d_input; //input image data
unsigned char *d_output; //output image data
inImage = ImageRead(inFile); //get input image and its attributes
width = inImage->width;
height = inImage->height;
data = inImage->data;
int image_size = width * height * 3; //size of image in byes; 3 is # channels
//allocate memory for GPU
cudaMalloc((void**)&d_input, sizeof(unsigned char*) * image_size);
cudaMalloc((void**)&d_output, sizeof(unsigned char*) * image_size);
//copy values to GPU
cudaMemcpy(d_input, data, image_size, cudaMemcpyHostToDevice);
//call kernel using block size 32x32
dim3 blockD(32,32);
dim3 gridD((width + blockD.x - 1)/blockD.x, (height + blockD.y - 1)/blockD.y);
kernel<<<gridD, blockD>>>(width, height, d_input,d_output);
//create and clear image variable for use as the result
outImage = ImageCreate(width,height);
ImageClear(outImage,255,255,255);
cudaDeviceSynchronize();
//copy output image from gpu
cudaMemcpy(outImage->data, d_output, image_size, cudaMemcpyDeviceToHost);
ImageWrite(outImage, outFile); //write output image to file
free(inImage->data); //free memory
free(outImage->data);
return 0;
}