Написал программу для "рисования" множество Мандельброта, но она не работает как надо. Я честно, не совсем понимаю как в OCL можно писать, а как нет. Чтение доков(и оффициальных спецификаций, и AMD'шный) ничего не прояснило, поэтому прошу помощи.
test.c:
Код:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <CL/cl.h>
#define MAX_SOURCE_SIZE (0x100000)
#define resox 300
#define resoy 200
#define LIST_SIZE resox*resoy
#define iterations 256.0
#define PI 3.1415926535897932384626433832795
int main(void)
{
// Create the two input vectors
int i;
// Load the kernel source code into the array source_str
FILE *fp;
char *source_str;
size_t source_size;
fp = fopen("vector_add_kernel.cl", "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel.\n");
exit(1);
}
source_str = (char*)malloc(MAX_SOURCE_SIZE);
source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
fclose( fp );
// Get platform and device information
cl_platform_id platform_id = NULL;
cl_device_id device_id = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);
// Create an OpenCL context
cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
// Create a command queue
cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
// Create memory buffers on the device for each vector
cl_mem c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, LIST_SIZE * sizeof(unsigned int), NULL, &ret);
// Create a program from the kernel source
cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);
// Build the program
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
// Create the OpenCL kernel
cl_kernel kernel = clCreateKernel(program, "vector_add", &ret);
// Set the arguments of the kernel
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&c_mem_obj);
// Execute the OpenCL kernel on the list
/* size_t global_item_size = LIST_SIZE; // Process the entire lists
size_t local_item_size = 1; // Process one item at a time
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL);*/
// Read the memory buffer C on the device to the local variable C
unsigned int *C = (unsigned int*)malloc(sizeof(unsigned int)*LIST_SIZE);
ret = clEnqueueReadBuffer(command_queue, c_mem_obj, CL_TRUE, 0, LIST_SIZE * sizeof(unsigned int), C, 0, NULL, NULL);
// Display the result to the screen
FILE *part1;
part1=fopen("out.ppm", "w");
unsigned char color[2];
fprintf(part1, "P6\n %s\n %d\n %d\n %d\n", "#asd", resox, resoy, 255);
for (unsigned int y=0; y < resoy; y++)
for (unsigned int x=0; x < resox; x++)
{
color[0]=0;
color[1]=(int)((sin((double)C[x+y*resox]/iterations*PI)/2.0)*255);
color[2]=(int)(sin((double)C[x+y*resox]/iterations*PI)*255);
fwrite(color, 1, 3, part1);
}
// Clean up
ret = clFlush(command_queue);
ret = clFinish(command_queue);
ret = clReleaseKernel(kernel);
ret = clReleaseProgram(program);
ret = clReleaseMemObject(c_mem_obj);
ret = clReleaseCommandQueue(command_queue);
ret = clReleaseContext(context);
free©;
return 0;
}
vector_add_kernel.cl:
Код:
#define resox 300
#define resoy 200
#define CXmin -2.0
#define CXmax 1.0
#define CXshag 3.0/resox
#define CYmin -1.0
#define CYmax 1.0
#define CYshag 2.0/resoy
#define iterations 256.0
__kernel void vector_add(__global unsigned int *C)
{
for (unsigned int y=0; y < resoy; y++)
for (unsigned int x=0; x < resox; x++)
{
double realZ, imagZ;
C[x+y*resox]=0;
realZ=0;
imagZ=0;
while(sqrt(realZ*realZ+imagZ*imagZ) < 2.0 && C[x+y*resox]++<iterations)
{
realZ=realZ*realZ-imagZ*imagZ+(x*CXshag-2);
imagZ=2*realZ*imagZ+((double)y*CYshag-1);
}
}
}
Если надо, то есть "эталонная" на OpenMP:
Код:
#include <stdio.h>
#include <math.h>
#include <complex.h>
#include <omp.h>
#define PROCESSING 1
#define PROCESSING_COLOR_WRITE_ON_DISK 1
#if PROCESSING||PROCESSING_COLOR_WRITE_ON_DISK
#include <ncurses.h>
#endif
#define iterations 256.0
#define PI 3.1415926535897932384626433832795
#define resox 300
#define resoy 200
#define CXmin -2.0
#define CXmax 1.0
#define CXshag 3.0/resox
#define CYmin -1.0
#define CYmax 1.0
#define CYshag 2.0/resoy
unsigned short int matrix[resox*resoy];
int main(void)
{
FILE *part1;
part1=fopen("out.ppm", "w");
omp_set_dynamic(0);
omp_set_num_threads(16);
#if PROCESSING||PROCESSING_COLOR_WRITE_ON_DISK
unsigned int ecx=0, oecx, ooecx=-1;
initscr();
#endif
#pragma omp parallel for
for (unsigned int y=0; y < resoy; y++)
{
for (unsigned int x=0; x < resox; x++)
{
matrix[x+y*resox]=0;
complex double z=0+0i;
for(matrix[x+y*resox]=0; matrix[x+y*resox]<iterations && cabs(z) < 2.0; matrix[x+y*resox]++) z=z*z+(double)(x*CXshag-2)+((double)y*CYshag-1)*I;
#if PROCESSING
#pragma omp critical
{
oecx=(unsigned int)(++ecx)*100.0/(resox*resoy);
if(oecx!=ooecx) {move(LINES-1, 0); printw("Processing: %u", oecx); ooecx=oecx; refresh();}
}
#endif
}
}
#if PROCESSING||PROCESSING_COLOR_WRITE_ON_DISK
ecx=oecx=0;
ooecx=-1;
#endif
unsigned char color[2];
fprintf(part1, "P6\n %s\n %d\n %d\n %d\n", "#asd", resox, resoy, 255);
for (unsigned int y=0; y < resoy; y++)
for (unsigned int x=0; x < resox; x++)
{
color[0]=0;
color[1]=(int)((sin((double)matrix[x+y*resox]/iterations*PI)/2.0)*255);
color[2]=(int)(sin((double)matrix[x+y*resox]/iterations*PI)*255);
fwrite(color, 1, 3, part1);
#if PROCESSING_COLOR_WRITE_ON_DISK
oecx=(unsigned int)(++ecx)*100.0/(double)(resox*resoy);
if(oecx!=ooecx) {move(LINES-1, 0); printw("Write on disk: %u", oecx); ooecx=oecx; refresh();}
#endif
}
#if PROCESSING||PROCESSING_COLOR_WRITE_ON_DISK
endwin();
#endif
return 0;
}