

#define SIMDS 4  
#define MAXBLOCK 8  


const int NPROC = SIMDS * MAXBLOCK;
const int NBPROC = MAXBLOCK;


#include <time.h>

#include <stdio.h>
#include <CL/opencl.h>
#include "Example.h"
// Global OpenCL environment vars
cl_platform_id platid;
cl_device_id cldev;
cl_context context;
cl_command_queue cmdQ;
cl_program clprogram;
cl_int errCode;
cl_uint numPlats;
char platName[256];
cl_kernel kernel_VecAdd;
cl_kernel kernel_VecMul;
cl_kernel kernel_VecSqr;
char *cl_src = clString;
size_t ksize = sizeof(clString);
size_t glb_sz;
size_t lcl_sz;
int checkError(cl_int errCode, char *fun)
{
	if (errCode != CL_SUCCESS)
	{
		printf("Error in %s %d\n", fun, errCode);
		return 0;
	}
	return 1;
}






void expVec(float *op, float *res)
{
	
	cl_mem cl_op;
	int cl_op_sz = (sizeof(cl_float)*NPROC);
	cl_op=clCreateBuffer(context,CL_MEM_READ_ONLY,
		cl_op_sz,NULL,&errCode);
	if(!checkError(errCode, "$clalloc(op,cl_float,NPROC,ro):+")){exit(0);}

	cl_mem cl_res;
	int cl_res_sz = (sizeof(cl_float)*NPROC);
	cl_res=clCreateBuffer(context,CL_MEM_WRITE_ONLY,
		cl_res_sz,NULL,&errCode);
	if(!checkError(errCode, "$clalloc(res,cl_float,NPROC,wo):+")){exit(0);}


	
	errCode =
	clEnqueueWriteBuffer(cmdQ, cl_op, CL_TRUE, 0,
		cl_op_sz,op, 0, NULL, NULL);
	if(!checkError(errCode,"$clwrite(...op...):")){exit(0);}

	
	
	
		errCode =
	clSetKernelArg(kernel_VecSqr,0, sizeof(cl_mem),(void*)&cl_op);
	if(!checkError(errCode,"clSetKernelArg(VecSqr,op):")){exit(0);}

		errCode =
	clSetKernelArg(kernel_VecSqr,1, sizeof(cl_mem),(void*)&cl_res);
	if(!checkError(errCode,"clSetKernelArg(VecSqr,res):")){exit(0);}

	glb_sz = NPROC;
	lcl_sz = NBPROC;
		errCode =
	clEnqueueNDRangeKernel(cmdQ, kernel_VecSqr, 1, NULL,
		&glb_sz, &lcl_sz,0, NULL, NULL);
	if(!checkError(errCode, "$call(VecSqr):")){exit(0);}

	
	errCode =
	clEnqueueReadBuffer(cmdQ, cl_res, CL_TRUE, 0,
		cl_res_sz,res, 0, NULL, NULL);
	if(!checkError(errCode,"$clread(...res...):")){exit(0);}

	
	if(cl_op) clReleaseMemObject(cl_op);
	if(cl_res) clReleaseMemObject(cl_res);
}

int main(int argc, char **argv)
{
	float a[NPROC];
	float b[NPROC];
	float out[NPROC];

	
	int i;
	for (i = 0; i < NPROC; ++i)
	{a[i] = i; b[i] = i; out[i] = 0;}	

	
	for (i = 0; i < NPROC; ++i)
	{printf("%2.2f + %2.2f = %2.2f\n", a[i], b[i], out[i]);}

 /* OpenCL Environment Setup */

	printf("Size of kernel code: %d bytes\n", (int)ksize);

		errCode =
	clGetPlatformIDs(0, NULL, &numPlats);
	if (!checkError(errCode, "clGetPlatformIDs:")){exit(0);} 

	printf("Number of platforms: %d\n", numPlats);

	errCode =
	clGetPlatformIDs(1, &platid, NULL);
	if (!checkError(errCode, "clGetPlatformIDs:")){exit(0);} 

		errCode =
	clGetPlatformInfo(platid, CL_PLATFORM_NAME, sizeof(platName),
		platName, NULL);
	if (!checkError(errCode, "clGetPlatformInfo:")){exit(0);} 

	printf("Platform = %s\n", platName);

		errCode =
	clGetDeviceIDs(platid, CL_DEVICE_TYPE_DEFAULT, 1, &cldev, NULL);
	if (!checkError(errCode, "clGetDeviceIDs:")){exit(0);} 

	context =
	clCreateContext(0, 1, &cldev, NULL, NULL, &errCode);
	if (!checkError(errCode, "clCreateContext:")){exit(0);} 

	cmdQ =
	clCreateCommandQueue(context, cldev, 0, &errCode);
	if (!checkError(errCode, "clCreateCommandQueue:")){exit(0);} 


	clprogram =
	clCreateProgramWithSource(context, 1, (const char **)&cl_src,
		&ksize, &errCode);
	if (!checkError(errCode, "clCreateProgramWithSource:")){exit(0);} 
	
	int buildError = 
	clBuildProgram(clprogram, 0, NULL, "" , NULL, NULL);
	cl_build_status stat;
	char opt[1000];
	char log[4000];

		errCode =
	clGetProgramBuildInfo (clprogram, cldev, CL_PROGRAM_BUILD_STATUS,
		sizeof(cl_build_status), &stat, NULL);
	if (!checkError(errCode, "clGetProgramInfo:")){exit(0);} 

		errCode =
	clGetProgramBuildInfo (clprogram, cldev, CL_PROGRAM_BUILD_OPTIONS,
		sizeof(opt), opt, NULL);
	if (!checkError(errCode, "clGetProgramInfo:")){exit(0);} 
   
		errCode =
	clGetProgramBuildInfo (clprogram, cldev, CL_PROGRAM_BUILD_LOG,
		sizeof(log), log, NULL);
	if (!checkError(errCode, "clGetProgramBuildInfo:")){exit(0);} 

	printf("Program Build Info:\n");
	printf("Build Status\n%d\n", stat);
	printf("Build Flags\n%s\n", opt);
	printf("Build Log\n%s\n", log);
	if (!checkError(buildError, "clBuildProgram:")){exit(0);} 

	kernel_VecAdd =
	clCreateKernel(clprogram, "VecAdd", &errCode);
	if(!checkError(errCode, "clCreateKernel(VecAdd):")){exit(0);}

	kernel_VecMul =
	clCreateKernel(clprogram, "VecMul", &errCode);
	if(!checkError(errCode, "clCreateKernel(VecMul):")){exit(0);}

	kernel_VecSqr =
	clCreateKernel(clprogram, "VecSqr", &errCode);
	if(!checkError(errCode, "clCreateKernel(VecSqr):")){exit(0);}


	

	
	cl_mem cl_a;
	int cl_a_sz = (sizeof(cl_float)*NPROC);
	cl_a=clCreateBuffer(context,CL_MEM_READ_ONLY,
		cl_a_sz,NULL,&errCode);
	if(!checkError(errCode, "$clalloc(a,cl_float,NPROC,ro):+")){exit(0);}

	cl_mem cl_b;
	int cl_b_sz = (sizeof(cl_float)*NPROC);
	cl_b=clCreateBuffer(context,CL_MEM_READ_ONLY,
		cl_b_sz,NULL,&errCode);
	if(!checkError(errCode, "$clalloc(b,cl_float,NPROC,ro):+")){exit(0);}

	cl_mem cl_out;
	int cl_out_sz = (sizeof(cl_float)*NPROC);
	cl_out=clCreateBuffer(context,CL_MEM_WRITE_ONLY,
		cl_out_sz,NULL,&errCode);
	if(!checkError(errCode, "$clalloc(out,cl_float,NPROC,wo):+")){exit(0);}


	
	errCode =
	clEnqueueWriteBuffer(cmdQ, cl_a, CL_TRUE, 0,
		cl_a_sz,a, 0, NULL, NULL);
	if(!checkError(errCode,"$clwrite(...a...):")){exit(0);}

	errCode =
	clEnqueueWriteBuffer(cmdQ, cl_b, CL_TRUE, 0,
		cl_b_sz,b, 0, NULL, NULL);
	if(!checkError(errCode,"$clwrite(...b...):")){exit(0);}

	struct timespec getCallStart;
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &getCallStart);

	
		errCode =
	clSetKernelArg(kernel_VecAdd,0, sizeof(cl_mem),(void*)&cl_a);
	if(!checkError(errCode,"clSetKernelArg(VecAdd,a):")){exit(0);}

		errCode =
	clSetKernelArg(kernel_VecAdd,1, sizeof(cl_mem),(void*)&cl_b);
	if(!checkError(errCode,"clSetKernelArg(VecAdd,b):")){exit(0);}

		errCode =
	clSetKernelArg(kernel_VecAdd,2, sizeof(cl_mem),(void*)&cl_out);
	if(!checkError(errCode,"clSetKernelArg(VecAdd,out):")){exit(0);}

	glb_sz = NPROC;
	lcl_sz = NBPROC;
		errCode =
	clEnqueueNDRangeKernel(cmdQ, kernel_VecAdd, 1, NULL,
		&glb_sz, &lcl_sz,0, NULL, NULL);
	if(!checkError(errCode, "$call(VecAdd):")){exit(0);}

	
	a[0] = out[0];

	
	errCode =
	clEnqueueWriteBuffer(cmdQ, cl_a, CL_TRUE, 0,
		cl_a_sz,a, 0, NULL, NULL);
	if(!checkError(errCode,"$clwrite(...a...):")){exit(0);}


	
		errCode =
	clSetKernelArg(kernel_VecMul,0, sizeof(cl_mem),(void*)&cl_a);
	if(!checkError(errCode,"clSetKernelArg(VecMul,a):")){exit(0);}

		errCode =
	clSetKernelArg(kernel_VecMul,1, sizeof(cl_mem),(void*)&cl_b);
	if(!checkError(errCode,"clSetKernelArg(VecMul,b):")){exit(0);}

		errCode =
	clSetKernelArg(kernel_VecMul,2, sizeof(cl_mem),(void*)&cl_out);
	if(!checkError(errCode,"clSetKernelArg(VecMul,out):")){exit(0);}

	glb_sz = NPROC;
	lcl_sz = NBPROC;
		errCode =
	clEnqueueNDRangeKernel(cmdQ, kernel_VecMul, 1, NULL,
		&glb_sz, &lcl_sz,0, NULL, NULL);
	if(!checkError(errCode, "$call(VecMul):")){exit(0);}


	
	clFinish(cmdQ);

	struct timespec getCallFinish;
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &getCallFinish);

	
	errCode =
	clEnqueueReadBuffer(cmdQ, cl_out, CL_TRUE, 0,
		cl_out_sz,out, 0, NULL, NULL);
	if(!checkError(errCode,"$clread(...out...):")){exit(0);}



	
	expVec(out, out);


	
	for (i = 0; i < NPROC; ++i)
	{printf("%lf\n", out[i]);}

	
	if(cl_a) clReleaseMemObject(cl_a);
	if(cl_b) clReleaseMemObject(cl_b);
	if(cl_out) clReleaseMemObject(cl_out);

	double callTime = ((getCallFinish.tv_sec - getCallStart.tv_sec)+
			(getCallFinish.tv_nsec - getCallStart.tv_nsec) / 1000000000.0);

	printf("Call Time: %2.9lf\n", callTime);

	
	if(kernel_VecAdd) clReleaseKernel(kernel_VecAdd);
	if(kernel_VecMul) clReleaseKernel(kernel_VecMul);
	if(kernel_VecSqr) clReleaseKernel(kernel_VecSqr);
	if(clprogram) clReleaseProgram(clprogram);
	if(cmdQ) clReleaseCommandQueue(cmdQ);
	if(context) clReleaseContext(context);
	return 0;
}

