CoE Cluster november 2011/CUDA
From Teknologisk videncenter
// Read image from file, remove blue channel and write onto another file
include <iostream>
include <string>
include <sstream>
include <cv.h>
include <highgui.h>
using namespace cv; using namespace std;
include "PixelMat.hpp"
void startCUDA(int blocks, int threads, unsigned char* img, int rows, int cols, int step);
int main(int argc, char* argv[]) {
int blocks = 0;
int threads = 0;
if (argc > 4)
{
istringstream buf1(argv[1]);
buf1 >> blocks;
istringstream buf2(argv[2]);
buf2 >> threads;
PixelMat img = (PixelMat) imread(argv[3],-1);
int rows = img.rows;
int cols = img.cols;
int step = img.step;
fprintf(stderr, "%d\n", step);
startCUDA(blocks, threads, img.data, rows, cols, step);
imwrite(argv[4], img);
}
else
cout << "Usage: " << argv[0] << " <blocks> <threads> <inputfile> <outputfile>" << endl;
return 0;
}
// removeblue.cu
// Read image from file, remove blue channel and write onto another file
//#include <stdio.h>
//#include <stdlib.h>
//#include <unistd.h>
__device__ void RGB(int x, int y, unsigned char* m, int step, int r, int g, int b)
{
unsigned char *p;
p = ((unsigned char *) (m + step*x))+3*y;
*p = (unsigned char) b;
*(p+1) = (unsigned char) g;
*(p+2) = (unsigned char) r;
}
__device__ void RGB(int x, int y, unsigned char* m, int step, int rgb)
{
unsigned char *p;
p = ((unsigned char *) (m + step*x)+3*y);
*p = (unsigned char) (rgb&0xff);
*(p+1) = (unsigned char) ((rgb>>8)&0xff);
*(p+2) = (unsigned char) ((rgb>>16)&0xff);
}
__device__ int RGB(int x, int y, unsigned char* m, int step)
{
unsigned char *p;
unsigned int b;
p = ((unsigned char *) (m + step*x)+3*y);
b = *p+((*(p+1))<<8)+((*(p+2))<<16);
return b;
}
__global__ void Count(unsigned char *img, int rows, int cols, int step)
{
float rowfac = ((float) rows)/gridDim.x;
int rowstart = blockIdx.x*rowfac;
int rowend = (blockIdx.x+1)*rowfac;
float colfac = ((float) cols)/blockDim.x;
int colstart = threadIdx.x*colfac;
int colend = (threadIdx.x+1)*colfac;
for (int i=rowstart; i<rowend; i++)
{
for (int j=colstart; j<colend; j++)
RGB(i, j, img, step, RGB(i, j, img, step)&0x0000ff);
}
}
void startCUDA(int blocks, int threads, unsigned char* img, int rows, int cols, int step)
{
if (img!=NULL)
{
dim3 dimBlock(threads, threads, 1);
dim3 dimGrid(blocks, 1, 1);
unsigned char *CUDAimg;
cudaMalloc((void**) &CUDAimg, rows*cols*3);
cudaMemcpy(CUDAimg, img, rows*cols*3, cudaMemcpyHostToDevice);
Count<<<dimGrid, dimBlock>>>(CUDAimg, rows, cols, step);
cudaMemcpy(img, CUDAimg, rows*cols*3, cudaMemcpyDeviceToHost);
cudaFree(CUDAimg);
}
}