#ifndef _PARTICLEENGINEKERNEL_H_
#define _PARTICLEENGINEKERNEL_H_

#ifndef CPU_MODE

#define MOD(X,Y) ((float)(X) - (float)(Y)*(float)floor((float)(X)/(float)(Y)))
#define RANDOM(VAR,X)\
	{{\
		float tmp_rand = random.x * (float)(X) + random.y;\
		VAR = -1.0 + 2.0 * MOD(MOD(tmp_rand, random.z) / random.z,0.99);\
		if ( VAR > 1.0 ) VAR = 1.0;\
		if ( VAR < -1.0 ) VAR = -1.0;\
	}}
#define CHECK_RESET\
	reset = false;\
	float index_x_tmp = (location[thread].x + 1.0)/2.0;\
	float index_y_tmp = (location[thread].y + 1.0)/2.0;\
	float index_z_tmp = (location[thread].z + 1.0)/2.0;\
	if ( index_x_tmp >= 1.0 ) reset = true;\
	if ( index_x_tmp <= 0.0 ) reset = true;\
	if ( index_y_tmp >= 1.0 ) reset = true;\
	if ( index_y_tmp <= 0.0 ) reset = true;\
	if ( index_z_tmp >= 1.0 ) reset = true;\
	if ( index_z_tmp <= 0.0 ) reset = true;\
	if ( reset ) attributes[thread].x = attributes[thread].y + 1.0;\
	else attributes[thread].x = 0.0;


// Particle update kernel
__global__ void
kernel(float3* location, float4* color,
	   float3* attributes, float3* vector_cuda,
	   int nb_props, int sizeu, int sizev, int sizew,
	   int velocity, float3 random,
	   float* points, float* range, float* colors, int pcount)
{
	// Get particle coordinates
	bool reset = false;
    unsigned int thread = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
    if ( thread > pcount ) return;

	// Increment particle age
	attributes[thread].x += 1.0;

	// Check coordinates range and particle age
	float index_x = (location[thread].x + 1.0)/2.0;
	float index_y = (location[thread].y + 1.0)/2.0;
	float index_z = (location[thread].z + 1.0)/2.0;
	if ( index_x >= 1.0 ) reset = true;
	if ( index_x <= 0.0 ) reset = true;
	if ( index_y >= 1.0 ) reset = true;
	if ( index_y <= 0.0 ) reset = true;
	if ( index_z >= 1.0 ) reset = true;
	if ( index_z <= 0.0 ) reset = true;
	if ( attributes[thread].x > attributes[thread].y ) reset = true;

	// Reset particle when out of range
	if ( reset == true ) {
		RANDOM(location[thread].x, location[thread].x);
		RANDOM(location[thread].y, location[thread].y);
		RANDOM(location[thread].z, location[thread].z);
		CHECK_RESET

		//Make particle white again
		color[thread].x = PARTICLES_COLOR_R;
		color[thread].y = PARTICLES_COLOR_G;
		color[thread].z = PARTICLES_COLOR_B;
		color[thread].w = PARTICLES_COLOR_A;
		return;
	}

	// Get table index
	int index_xi = index_x * (float)( sizeu );
	int index_yi = index_y * (float)( sizev );
	int index_zi = index_z * (float)( sizew );
	if ( index_xi >= sizeu ) index_xi = sizeu - 1;
	if ( index_yi >= sizev ) index_yi = sizev - 1;
	if ( index_zi >= sizew ) index_zi = sizew - 1;
	int index_array = index_zi * sizeu * sizev + index_yi * sizeu + index_xi;

	// Move particles
	float3 displacement = vector_cuda[index_array];
	location[thread].x += displacement.x / (float)velocity;
	location[thread].y += displacement.y / (float)velocity;
	location[thread].z += displacement.z / (float)velocity;

	// Color particles moving close to the tracker points
	for ( int k = 0; k < TRACKER_COUNT; ++k ) {
		if ( location[thread].x < ( points[3*k+0] - range[3*k+0] ) ) continue;
		if ( location[thread].x > ( points[3*k+0] + range[3*k+0] ) ) continue;
		if ( location[thread].y < ( points[3*k+1] - range[3*k+1] ) ) continue;
		if ( location[thread].y > ( points[3*k+1] + range[3*k+1] ) ) continue;
		if ( location[thread].z < ( points[3*k+2] - range[3*k+2] ) ) continue;
		if ( location[thread].z > ( points[3*k+2] + range[3*k+2] ) ) continue;
		color[thread].x = colors[3*k+0];
		color[thread].y = colors[3*k+1];
		color[thread].z = colors[3*k+2];
		color[thread].w = 100.0;
	}
}

#endif

#endif // #ifndef _PARTICLEENGINEKERNEL_H_
