#include <Define.h>

#ifndef CPU_MODE

// includes, project
#include <MersenneTwister.h>
#include <cuda_gl_interop.h>
#include <cutil_inline.h>

// includes, kernels
#include <ParticleEngineKernel.cu>



//Global Cuda variables
float* points_cuda;
float* range_cuda;
float* colors_cuda;



MTRand rand_gen;

extern "C" void initCuda()
{
	// Allocate trackers arrays
	cutilSafeCall(cudaMalloc((void**)& points_cuda, 3*TRACKER_COUNT*sizeof(float)));
	cutilSafeCall(cudaMalloc((void**)& range_cuda,  3*TRACKER_COUNT*sizeof(float)));
	cutilSafeCall(cudaMalloc((void**)& colors_cuda, 3*TRACKER_COUNT*sizeof(float)));
}

extern "C" void resetCuda(float* points, float* ranges, float* colors, int size_trackers)
{
	// Copy trackers arrays
	cutilSafeCall(cudaMemcpy(points_cuda, points, 3*TRACKER_COUNT*sizeof(float), cudaMemcpyHostToDevice));
	cutilSafeCall(cudaMemcpy(range_cuda , ranges, 3*TRACKER_COUNT*sizeof(float), cudaMemcpyHostToDevice));
	cutilSafeCall(cudaMemcpy(colors_cuda, colors, 3*TRACKER_COUNT*sizeof(float), cudaMemcpyHostToDevice));
}


extern "C" void runCuda(GLuint vbo_location, GLuint vbo_color, GLuint vbo_attributes,
						GLuint vbo_vector_cuda, int nb_props, int sizeu, int sizev, int sizew,
						int velocity, bool pause, int pcount)
{
	if ( pause ) return;

	// Get grid size
	int grid_size = sqrt((float)pcount);
	int grid_size1 = grid_size / 32;
	int grid_size2 = grid_size / 16;
	if ( (grid_size1*grid_size2*512) < pcount ) ++grid_size2;
	if ( (grid_size1*grid_size2*512) < pcount ) ++grid_size1;
    dim3 grid(grid_size,grid_size,1);
    dim3 block(512,1,1);

	// Map OpenGL location VBO for writing from CUDA
	cudaGLRegisterBufferObject(vbo_location);	
    float3* location;
	cutilSafeCall(cudaGLMapBufferObject((void**)&location, vbo_location));

	// Map OpenGL color VBO for writing from CUDA
	cudaGLRegisterBufferObject(vbo_color);	
    float4* color;
	cutilSafeCall(cudaGLMapBufferObject((void**)&color, vbo_color));

	// Map OpenGL attributes VBO for writing from CUDA
	cudaGLRegisterBufferObject(vbo_attributes);	
    float3* attributes;
	cutilSafeCall(cudaGLMapBufferObject((void**)&attributes, vbo_attributes));

	// Map OpenGL vector VBO for writing from CUDA
	cudaGLRegisterBufferObject(vbo_vector_cuda);	
    float3* vector_cuda;
	cutilSafeCall(cudaGLMapBufferObject((void**)&vector_cuda, vbo_vector_cuda));

	// Initialize random parameters
	float3 random;
	random.x = 0.2 + 0.8 * rand_gen.rand();
	random.y = rand_gen.rand();
	random.z = ( 0.2 + 0.8 * rand_gen.rand() ) / 10000.0;

	// Call the cuda kernel
    kernel<<< grid, block >>>(location, color, attributes, vector_cuda,
								nb_props, sizeu, sizev, sizew, velocity, random,
								points_cuda, range_cuda, colors_cuda, pcount);

	// Unmap vector buffer object
	cutilSafeCall(cudaGLUnmapBufferObject(vbo_vector_cuda));
	cudaGLUnregisterBufferObject(vbo_vector_cuda);

    // Unmap attributes buffer object
	cutilSafeCall(cudaGLUnmapBufferObject(vbo_attributes));
	cudaGLUnregisterBufferObject(vbo_attributes);

    // Unmap color buffer object
	cutilSafeCall(cudaGLUnmapBufferObject(vbo_color));
	cudaGLUnregisterBufferObject(vbo_color);

    // Unmap location buffer object
	cutilSafeCall(cudaGLUnmapBufferObject(vbo_location));
	cudaGLUnregisterBufferObject(vbo_location);
}


extern "C" void closeCuda() {
	cudaFree(points_cuda);
	cudaFree(range_cuda);
	cudaFree(colors_cuda);
	points_cuda = nil;
	range_cuda = nil;
	colors_cuda = nil;
}

#endif
