Сразу скажу, что это дело гоняется под Windows XP с MSVC 2010, ибо CUDA Toolkit не поддерживает GCC 4.7 и новые glibc, а чтобы использовать старые это нужно откатывать GCC, glibc и кучу программ, которые будут ругаться на glibc.
Так вот, собрал я zlib 1.2.7, libpng15 и программу:
Код:
/*
Copyright © 2012 DolphinCommode
This is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
mieconsole is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <cuComplex.h>
#include <png.h>
#define iterations (1024)
#define resox (2048)
#define resoy (resox)
#define mag_ratio (1.0)
#define X0 (0.0)
#define Y0 (0.0)
#define Xshift ((-1.5/mag_ratio)+X0)
#define Yshift ((-1.5/mag_ratio)+Y0)
#define CXmin (-1.5/mag_ratio)
#define CXmax (1.5/mag_ratio)
#define CXshag ((fabs(CXmin)+fabs(CXmax))/resox)
#define CYmin (-1.5/mag_ratio)
#define CYmax (1.5/mag_ratio)
#define CYshag ((fabs(CYmin)+fabs(CYmax))/resoy)
#define C make_cuFloatComplex(0.285, 0.01)
__global__ void eval(unsigned short int *row)
{
int x = threadIdx.x, y = threadIdx.y;
cuFloatComplex Z = make_cuFloatComplex(y*CYshag+Yshift, x*CXshag+Xshift);
unsigned short int iter=0;
while(++iter<iterations && cuCabsf(Z) < 4.0) Z = cuCaddf(cuCmulf(Z, Z), C);
float tmp2 = /*fmod(*/(iter-log2(log(cuCabsf(Z))))/*, 32.0)*/;
row[y*3] = floor(fmod((tmp2*1792.0), 0xFFFF));
row[y*3] = (row[y*3]<<8) | (row[y*3]>>8);
row[y*3+1] = floor(fmod((tmp2*3584.0), 0xFFFF));
row[y*3+1] = (row[y*3+1]<<8) | (row[y*3+1]>>8);
row[y*3+2] = floor(fmod((tmp2*512.0), 0xFFFF));
row[y*3+2] = (row[y*3+2]<<8) | (row[y*3+2]>>8);
}
__host__ int main(void)
{
png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
png_infop info_ptr = png_create_info_struct(png_ptr);
FILE *fp = fopen("out.png", "wb");
png_init_io(png_ptr, fp);
png_set_IHDR(png_ptr, info_ptr, resox, resoy, 16, PNG_COLOR_TYPE_RGB, PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);
//png_set_compression_level(png_ptr, Z_BEST_COMPRESSION);
png_write_info(png_ptr, info_ptr);
for(unsigned short int x=0; x<resox; x++)
{
unsigned short int *row_device;
cudaMalloc(&row_device, resox*4*16);
eval<<<1, 512>>>(row_device);
cudaEvent_t syncEvent;
cudaEventCreate(&syncEvent);
cudaEventRecord(syncEvent, 0);
cudaEventSynchronize(syncEvent);
unsigned short int row[resox*4];
cudaMemcpy(row, row_device, resox*4*16, cudaMemcpyDeviceToHost);
png_write_row(png_ptr, (png_bytep)row);
cudaEventDestroy(syncEvent);
cudaFree(row_device);
}
png_write_end(png_ptr, NULL);
fclose(fp);
return 0;
}
И эта зараза вылетает на png_write_info. Причем файл открыт и в него даже можно что-нибудь записать (через fprint например) и оно запишется.