OpenCL

BratSinot · Сообщение **BratSinot** » 14.08.2011 14:13

Доброго времени суток!

Написал программу для "рисования" множество Мандельброта, но она не работает как надо. Я честно, не совсем понимаю как в OCL можно писать, а как нет. Чтение доков(и оффициальных спецификаций, и AMD'шный) ничего не прояснило, поэтому прошу помощи.

test.c:

Код:


#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#include <CL/cl.h>

#define MAX_SOURCE_SIZE (0x100000)

#define resox 300
#define resoy 200
#define LIST_SIZE resox*resoy
#define iterations 256.0
#define PI 3.1415926535897932384626433832795

int main(void)
{
    // Create the two input vectors
    int i;

    // Load the kernel source code into the array source_str
    FILE *fp;
    char *source_str;
    size_t source_size;

    fp = fopen("vector_add_kernel.cl", "r");
    if (!fp) {
        fprintf(stderr, "Failed to load kernel.\n");
        exit(1);
    }
    source_str = (char*)malloc(MAX_SOURCE_SIZE);
    source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
    fclose( fp );

    // Get platform and device information
    cl_platform_id platform_id = NULL;
    cl_device_id device_id = NULL;
    cl_uint ret_num_devices;
    cl_uint ret_num_platforms;
    cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
    ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);

    // Create an OpenCL context
    cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);

    // Create a command queue
    cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);

    // Create memory buffers on the device for each vector
    cl_mem c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, LIST_SIZE * sizeof(unsigned int), NULL, &ret);

     // Create a program from the kernel source
    cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);

    // Build the program
    ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);

    // Create the OpenCL kernel
    cl_kernel kernel = clCreateKernel(program, "vector_add", &ret);

    // Set the arguments of the kernel
    ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&c_mem_obj);

    // Execute the OpenCL kernel on the list
/*    size_t global_item_size = LIST_SIZE; // Process the entire lists
    size_t local_item_size = 1; // Process one item at a time
    ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL);*/

    // Read the memory buffer C on the device to the local variable C
    unsigned int *C = (unsigned int*)malloc(sizeof(unsigned int)*LIST_SIZE);
    ret = clEnqueueReadBuffer(command_queue, c_mem_obj, CL_TRUE, 0, LIST_SIZE * sizeof(unsigned int), C, 0, NULL, NULL);

    // Display the result to the screen
    FILE *part1;
    part1=fopen("out.ppm", "w");

 unsigned char color[2];
 fprintf(part1, "P6\n %s\n %d\n %d\n %d\n", "#asd", resox, resoy, 255);
  for (unsigned int y=0; y < resoy; y++)
   for (unsigned int x=0; x < resox; x++)
   {
    color[0]=0;
    color[1]=(int)((sin((double)C[x+y*resox]/iterations*PI)/2.0)*255);
    color[2]=(int)(sin((double)C[x+y*resox]/iterations*PI)*255);
    fwrite(color, 1, 3, part1);
   }

    // Clean up
    ret = clFlush(command_queue);
    ret = clFinish(command_queue);
    ret = clReleaseKernel(kernel);
    ret = clReleaseProgram(program);
    ret = clReleaseMemObject(c_mem_obj);
    ret = clReleaseCommandQueue(command_queue);
    ret = clReleaseContext(context);
    free©;
    return 0;
}

vector_add_kernel.cl:

Код:


#define resox 300
#define resoy 200

#define CXmin -2.0
#define CXmax 1.0
#define CXshag 3.0/resox
#define CYmin -1.0
#define CYmax 1.0
#define CYshag 2.0/resoy
#define iterations 256.0

__kernel void vector_add(__global unsigned int *C)
{
 for (unsigned	int y=0; y < resoy; y++)
  for (unsigned int x=0; x < resox; x++)
  {
   double realZ, imagZ;
   C[x+y*resox]=0;
   realZ=0;
   imagZ=0;
   while(sqrt(realZ*realZ+imagZ*imagZ) < 2.0 && C[x+y*resox]++<iterations)
   {
    realZ=realZ*realZ-imagZ*imagZ+(x*CXshag-2);
    imagZ=2*realZ*imagZ+((double)y*CYshag-1);
   }
  }
}

Если надо, то есть "эталонная" на OpenMP:

Код:


#include <stdio.h>
#include <math.h>
#include <complex.h>
#include <omp.h>

#define PROCESSING 1
#define PROCESSING_COLOR_WRITE_ON_DISK 1

#if PROCESSING||PROCESSING_COLOR_WRITE_ON_DISK
 #include <ncurses.h>
#endif

#define iterations 256.0
#define PI 3.1415926535897932384626433832795

#define resox 300
#define resoy 200

#define CXmin -2.0
#define CXmax 1.0
#define CXshag 3.0/resox
#define CYmin -1.0
#define CYmax 1.0
#define CYshag 2.0/resoy

unsigned short int matrix[resox*resoy];

int main(void)
{
 FILE *part1;
 part1=fopen("out.ppm", "w");

 omp_set_dynamic(0);
 omp_set_num_threads(16);

#if PROCESSING||PROCESSING_COLOR_WRITE_ON_DISK
 unsigned int ecx=0, oecx, ooecx=-1;
 initscr();
#endif

 #pragma omp parallel for
 for (unsigned	int y=0; y < resoy; y++)
 {
  for (unsigned int x=0; x < resox; x++)
  {
   matrix[x+y*resox]=0;
   complex double z=0+0i;
   for(matrix[x+y*resox]=0; matrix[x+y*resox]<iterations && cabs(z) < 2.0; matrix[x+y*resox]++) z=z*z+(double)(x*CXshag-2)+((double)y*CYshag-1)*I;
  #if PROCESSING
   #pragma omp critical
   {
    oecx=(unsigned int)(++ecx)*100.0/(resox*resoy);
    if(oecx!=ooecx) {move(LINES-1, 0); printw("Processing: %u", oecx); ooecx=oecx; refresh();}
   }
  #endif
  }
 }

#if PROCESSING||PROCESSING_COLOR_WRITE_ON_DISK
 ecx=oecx=0;
 ooecx=-1;
#endif

 unsigned char color[2];
 fprintf(part1, "P6\n %s\n %d\n %d\n %d\n", "#asd", resox, resoy, 255);
  for (unsigned int y=0; y < resoy; y++)
   for (unsigned int x=0; x < resox; x++)
   {
    color[0]=0;
    color[1]=(int)((sin((double)matrix[x+y*resox]/iterations*PI)/2.0)*255);
    color[2]=(int)(sin((double)matrix[x+y*resox]/iterations*PI)*255);
    fwrite(color, 1, 3, part1);
    #if PROCESSING_COLOR_WRITE_ON_DISK
     oecx=(unsigned int)(++ecx)*100.0/(double)(resox*resoy);
     if(oecx!=ooecx) {move(LINES-1, 0); printw("Write on disk: %u", oecx); ooecx=oecx; refresh();}
    #endif
   }

#if PROCESSING||PROCESSING_COLOR_WRITE_ON_DISK
 endwin();
#endif

 return 0;
}

IMB · Сообщение **IMB** » 14.08.2011 15:14

Небольшое замечание:

Код: Выделить всё

$ grep M_PI /usr/include/math.h
# define M_PI           3.14159265358979323846  /* pi */
# define M_PI_2         1.57079632679489661923  /* pi/2 */
# define M_PI_4         0.78539816339744830962  /* pi/4 */
# define M_PIl          3.1415926535897932384626433832795029L  /* pi */
# define M_PI_2l        1.5707963267948966192313216916397514L  /* pi/2 */
# define M_PI_4l        0.7853981633974483096156608458198757L  /* pi/4 */

http://www.linux.org.ru/forum/development/3088094

BratSinot · Сообщение **BratSinot** » 14.08.2011 15:45

IMB писал(а): ↑

14.08.2011 15:14

Небольшое замечание:

Код: Выделить всё

$ grep M_PI /usr/include/math.h
# define M_PI           3.14159265358979323846  /* pi */
# define M_PI_2         1.57079632679489661923  /* pi/2 */
# define M_PI_4         0.78539816339744830962  /* pi/4 */
# define M_PIl          3.1415926535897932384626433832795029L  /* pi */
# define M_PI_2l        1.5707963267948966192313216916397514L  /* pi/2 */
# define M_PI_4l        0.7853981633974483096156608458198757L  /* pi/4 */

http://www.linux.org.ru/forum/development/3088094

Ваше небольшое замечание не входит в стандарт.

BratSinot · Сообщение **BratSinot** » 22.08.2011 21:56

Короче, меняем в *.cl файле double на float и все.

eddy · Сообщение **eddy** » 22.08.2011 23:42

(оффтоп)

Spoiler

Хотелось бы узнать у ТС: действительно ли openCV такой тормозной, как о нем отзываются?
// сам пользуюсь собственными велосипедами (CUDA, openmp, ручное распараллеливание), но все равно производительность не нравится.

BratSinot · Сообщение **BratSinot** » 09.09.2011 21:01

А вообще можно расширение для double и векторов doublen активировать, но ATI это дело не сделало еще.

unixforum.org

OpenCL

OpenCL

Re: OpenCL

Re: OpenCL

Re: OpenCL

Re: OpenCL

Re: OpenCL