b24387f1eb28ea2a3d4fc1681d68201948b4e82c7838344a88937312f99744af850f8b2fba4190c4
1
#include "Solvers/WSPHGPUSolver.h"
1
#include "Solvers/WSPHGPUSolver.h"
2
 
2
 
3
#include "System.h"
3
#include "System.h"
4
#include "CL/cl.h"
4
#include "SysCalls.h"
 
 
5
#include <iostream>
 
 
6
 
 
 
7
#include <CL/cl.h>
5
 
8
 
6
#define MAX_SOURCE_SIZE (0x100000)
 
 
7
#define MAT0 0
9
#define MAT0 0
8
 
10
 
9
using namespace std;
11
using namespace std;
 
 
12
using namespace Squishy::CL;
10
 
13
 
11
namespace Squishy { namespace SPH { namespace Solvers {
14
namespace Squishy { namespace SPH { namespace Solvers {
12
 
15
 
...
 
...
 
39
    {
42
    {
40
        kernelRadius = max(kernelRadius, fluid->GetMaterial()->ComputeKernelRadius(GetConfig().AverageKernelParticleCount));
43
        kernelRadius = max(kernelRadius, fluid->GetMaterial()->ComputeKernelRadius(GetConfig().AverageKernelParticleCount));
41
        kernelRadiusSq = kernelRadius * kernelRadius;
44
        kernelRadiusSq = kernelRadius * kernelRadius;
42
        
45
 
43
        // fix up kernels:
46
        // fix up kernels:
44
        gaussianKernel->SetRadius(kernelRadius);
47
        gaussianKernel->SetRadius(kernelRadius);
45
        viscosityKernel->SetRadius(kernelRadius);
48
        viscosityKernel->SetRadius(kernelRadius);
...
 
...
 
59
 
62
 
60
            particles.push_back(&*p);
63
            particles.push_back(&*p);
61
        }
64
        }
62
        
65
 
63
        ParticlePtrIterator from = particles.begin();
66
        ParticlePtrIterator from = particles.begin();
64
        ParticlePtrIterator to = particles.end();
67
        ParticlePtrIterator to = particles.end();
65
        
68
 
66
        FindNeighbors(from, to);
69
        FindNeighbors(from, to);
67
 
70
 
68
        ComputeDensityPressure(from, to);
71
        ComputeDensityPressure(from, to);
...
 
...
 
85
        //          1. All particle density  2. all particle pressure
88
        //          1. All particle density  2. all particle pressure
86
        //      After invoking the kernel, set the density and pressure of all particles accordingly
89
        //      After invoking the kernel, set the density and pressure of all particles accordingly
87
 
90
 
88
    
91
 
89
        // Load the kernel source code into the array source_str
92
        // Load the kernel source code into the array source_str
90
        FILE *fp;
93
        FILE *fp;
91
        char *source_str;
94
        char *source_str;
92
        size_t source_size;
95
 
93
 
96
        const char kernelFile[] = "../Squishy.SPH/OpenCL/kernel.cl";
94
        fp = fopen("OpenCL/kernel.cl", "r");
97
        fp = fopen(kernelFile, "r");
95
        if (!fp) {
98
        if (!fp)
96
            fprintf(stderr, "Failed to load kernel.\n");
 
 
97
            exit(1);
 
 
98
        }
 
 
99
        source_str = (char*)malloc(MAX_SOURCE_SIZE);
 
 
100
        source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
 
 
101
        fclose( fp );
 
 
102
 
 
 
103
        // Get platform and device information
 
 
104
        cl_platform_id platform_id = NULL;
 
 
105
        cl_device_id device_id = NULL;   
 
 
106
        cl_uint ret_num_devices;
 
 
107
        cl_uint ret_num_platforms;
 
 
108
        cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
 
 
109
        ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);
 
 
110
 
 
 
111
        // Create an OpenCL context
 
 
112
        cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
 
 
113
 
 
 
114
        // Create memory buffers on the device for each vector 
 
 
115
        ParticleVector& particles = system->GetParticles();
 
 
116
        int nParticles = particles.size();
 
 
117
 
 
 
118
        const FluidVector& fluids = system->GetFluids();
 
 
119
        int nFluids = fluids.size();
 
 
120
        Materials materialParam;
 
 
121
        materialParam.masses = new Real[nFluids];
 
 
122
        materialParam.gasStiffness=  new Real[nFluids];
 
 
123
 
 
 
124
        for (int fluid = 0; fluid<nFluids; ++fluid)
 
 
125
        {
99
        {
126
            materialParam.masses[fluid] = fluids[fluid]->GetMaterial()->GetMass();
100
            fprintf(stderr, "Could not open kernel file: %s\n", kernelFile);
127
            materialParam.gasStiffness[fluid] = fluids[fluid]->GetMaterial()->GetGasStiffness();
101
            return;
128
         }
102
        }
 
 
103
 
 
 
104
        // Read kernel into memory
 
 
105
        size_t source_size = ComputeFileSize(fp) + 1;
 
 
106
        source_str = new char[source_size];
 
 
107
        source_size = fread( source_str, 1, source_size, fp);
 
 
108
        fclose( fp );
 
 
109
 
 
 
110
        source_str[source_size] = 0;
 
 
111
 
 
 
112
        // Get platform and device information
 
 
113
        cl_platform_id platform_id = NULL;
 
 
114
        cl_device_id device_id = NULL;   
 
 
115
        cl_uint ret_num_devices;
 
 
116
        cl_uint ret_num_platforms;
 
 
117
 
 
 
118
        cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
 
 
119
        ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices);
 
 
120
        assert(ret_num_devices > 0);
 
 
121
        CL_SUCCESS_OR_ELSE(ret, return);   
 
 
122
 
 
 
123
        char name[100];
 
 
124
        ret = clGetDeviceInfo (device_id, CL_DEVICE_NAME, sizeof(name), name, NULL);
 
 
125
        CL_SUCCESS_OR_ELSE(ret, return);
 
 
126
        printf ("Device found: %s\n", name);
 
 
127
 
 
 
128
        // Create an OpenCL context
 
 
129
        cl_context_properties contextProperties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, 0 };
 
 
130
        cl_context context = clCreateContext( contextProperties, 1, &device_id, NULL, NULL, &ret);
 
 
131
        CL_SUCCESS_OR_ELSE(ret, return);
 
 
132
 
 
 
133
        // Create a command queue
 
 
134
        cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
 
 
135
        CL_SUCCESS_OR_ELSE(ret, return);
 
 
136
        
 
 
137
        // Create program object
 
 
138
        cl_program program = clCreateProgramWithSource(context, 1, (const char**)&source_str, NULL, &ret);
 
 
139
        CL_SUCCESS_OR_ELSE(ret, return);
 
 
140
 
 
 
141
        // Compile & link the program
 
 
142
        ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
 
 
143
        //ret = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
 
 
144
        if (ret != CL_SUCCESS)
 
 
145
        {
 
 
146
            cout << "Could not build CL Program: " << GetCLErrorString(ret) << "\n";
 
 
147
            /*cl_build_status status;
 
 
148
            ret = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, nullptr);
 
 
149
            assert(ret == CL_SUCCESS);*/
 
 
150
            //CL_SUCCESS_OR_ELSE(status, return);
 
 
151
            
 
 
152
            size_t length;
 
 
153
            static const int bufferSize = 8096;
 
 
154
            char buffer[bufferSize];
 
 
155
            ret = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, bufferSize, buffer, &length);
 
 
156
            assert(ret == CL_SUCCESS);
 
 
157
 
 
 
158
            buffer[length] = 0;
 
 
159
 
 
 
160
            cout << "--- Build log (" << length << " bytes) ---\n "<<buffer<<endl;
 
 
161
            return;
 
 
162
        }
 
 
163
 
 
 
164
        // Create the OpenCL kernel
 
 
165
        cl_kernel kernel = clCreateKernel(program, "solver", &ret);
 
 
166
        CL_SUCCESS_OR_ELSE(ret, return);
 
 
167
 
 
 
168
        // delete the source code
 
 
169
        delete [] source_str;
 
 
170
 
 
 
171
 
 
 
172
        // Prepare the data
 
 
173
        ParticleVector& particles = system->GetParticles();
 
 
174
        int nParticles = particles.size();
129
 
175
 
 
 
176
        const FluidVector& fluids = system->GetFluids();
 
 
177
        int nFluids = fluids.size();
 
 
178
        MaterialProperties materialProps;
 
 
179
        materialProps.masses = new Real[nFluids];
 
 
180
        materialProps.gasStiffness=  new Real[nFluids];
130
 
181
 
131
        Vector* positions = new Vector[nParticles];
182
        Vector* positions = new Vector[nParticles];
132
        Real* densities = new Real[nParticles];
183
        Real* densities = new Real[nParticles];
133
        int* materialIndexes = new int[nParticles];
184
        int* materialIndexes = new int[nParticles];
134
        
185
 
135
        //Particle neighbors[n_particles];
186
        //Particle neighbors[n_particles];
136
        int index = 0;
 
 
137
        for (int p = 0; p < nParticles; ++p)
187
        for (int p = 0; p < nParticles; ++p)
138
        {
188
        {
139
             positions[p] = particles[p].position;
189
            positions[p] = particles[p].position;
140
             densities[p] = particles[p].density;
190
            densities[p] = particles[p].density;
141
             materialIndexes[p] = MAT0;
191
            materialIndexes[p] = MAT0;
142
             index++;
192
        }
 
 
193
 
 
 
194
        for (int fluid = 0; fluid < nFluids; ++fluid)
 
 
195
        {
 
 
196
            materialProps.masses[fluid] = fluids[fluid]->GetMaterial()->GetMass();
 
 
197
            materialProps.gasStiffness[fluid] = fluids[fluid]->GetMaterial()->GetGasStiffness();
143
        }
198
        }
144
 
199
 
145
 
200
        // Allocate buffers on device
146
        int size_4_new_densities = nParticles * sizeof(Real);
201
        int sizePositions = sizeof(Vector) * nParticles;
147
        int size_4_pressure = nParticles * sizeof(Real);
202
        int sizeParticleScalar = sizeof(Real) * nParticles;
148
                
203
        int sizeMatIndices = sizeof(int) * nParticles;
149
        cl_mem d_positions = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(positions), NULL, &ret);
204
        int sizeMaterials = sizeof(MaterialProperties) * nFluids;
150
        cl_mem d_densities = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(densities), NULL, &ret);
205
 
151
        cl_mem d_materialIndexes = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(materialIndexes), NULL, &ret);
206
        cl_mem d_positions = clCreateBuffer(context, CL_MEM_READ_ONLY, sizePositions, NULL, &ret);
152
        cl_mem d_materials = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(materialParam), NULL, &ret);
207
        cl_mem d_densities = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeParticleScalar, NULL, &ret);
153
 
208
        cl_mem d_materialIndexes = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeMatIndices, NULL, &ret);
154
        cl_mem d_new_densities = clCreateBuffer(context, CL_MEM_READ_ONLY, size_4_new_densities, NULL, &ret);
209
        cl_mem d_materials = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeMaterials, NULL, &ret);
155
        cl_mem d_pressure = clCreateBuffer(context, CL_MEM_READ_ONLY, size_4_pressure, NULL, &ret);
210
 
156
 
211
        cl_mem d_new_densities = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeParticleScalar, NULL, &ret);
157
        // Create a command queue
212
        cl_mem d_pressure = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeParticleScalar, NULL, &ret);
158
        cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
213
 
159
 
214
        // Copy the data to their respective device buffers
160
        // Copy the lists to their respective memory buffers
215
        static const int blocking = CL_TRUE;
161
        ret = clEnqueueWriteBuffer(command_queue, d_positions, CL_TRUE, 0, sizeof(positions), positions, 0, NULL, NULL);
216
        ret = clEnqueueWriteBuffer(command_queue, d_positions, blocking, 0, sizePositions, positions, 0, NULL, NULL);
162
        ret = clEnqueueWriteBuffer(command_queue, d_densities, CL_TRUE, 0, sizeof(densities), densities, 0, NULL, NULL);
217
        ret = clEnqueueWriteBuffer(command_queue, d_densities, blocking, 0, sizeParticleScalar, densities, 0, NULL, NULL);
163
        ret = clEnqueueWriteBuffer(command_queue, d_materialIndexes, CL_TRUE, 0, sizeof(materialIndexes), materialIndexes, 0, NULL, NULL);
218
        ret = clEnqueueWriteBuffer(command_queue, d_materialIndexes, blocking, 0, sizeMatIndices, materialIndexes, 0, NULL, NULL);
164
        ret = clEnqueueWriteBuffer(command_queue, d_materials, CL_TRUE, 0, sizeof(materialParam), &materialParam, 0, NULL, NULL);
219
        ret = clEnqueueWriteBuffer(command_queue, d_materials, blocking, 0, sizeMaterials, &materialProps, 0, NULL, NULL);
165
        
220
 
166
        // Create a program from the kernel source
221
 
167
        cl_program program = clCreateProgramWithSource(context, 1, 
222
        // Set the arguments of the kernel
168
            (const char **)&source_str, (const size_t *)&source_size, &ret);
223
        ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&d_positions);
169
 
224
        ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&d_densities);
170
        //Build the program
225
        ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&d_materialIndexes);
171
        ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
226
        ret = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&d_materials);
172
 
227
 
173
        // Create the OpenCL kernel
228
        ret = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&d_new_densities);
174
        cl_kernel kernel = clCreateKernel(program, "solver", &ret);
229
        ret = clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *)&d_pressure);
175
    
230
 
176
        // Set the arguments of the kernel
231
        ret = clSetKernelArg(kernel, 6, sizeof(nParticles), &nParticles);
177
        ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&d_positions);
232
 
178
        ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&d_densities);
233
 
179
        ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&d_materialIndexes);
234
        // Execute the OpenCL kernel on the list
180
        ret = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&d_materials);
235
        size_t local_work_size = 64; // Process one item at a time //Neither this
181
 
236
        size_t global_work_size = ((int)((int) local_work_size/ nParticles))+1;
182
        ret = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&d_new_densities);
237
        ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL);
183
        ret = clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *)&d_pressure);
238
 
184
 
239
        // Read the memory buffer C on the device to the local variable C
185
        ret = clSetKernelArg(kernel, 6, sizeof(nParticles), &nParticles);
240
        Real* newDensities = new Real[nParticles];
186
 
241
        Real* newPressures = new Real[nParticles];
187
 
242
        ret = clEnqueueReadBuffer(command_queue, d_new_densities, CL_TRUE, 0, nParticles * sizeof(Real), newDensities, 0, NULL, NULL);
188
         // Execute the OpenCL kernel on the list
243
        ret = clEnqueueReadBuffer(command_queue, d_pressure, CL_TRUE, 0, nParticles * sizeof(Real), newPressures, 0, NULL, NULL);
189
        size_t local_work_size = 64; // Process one item at a time //Neither this
244
 
190
        size_t global_work_size = ((int)((int) local_work_size/ nParticles))+1;
 
 
191
        ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL);
 
 
192
 
 
 
193
        // Read the memory buffer C on the device to the local variable C
 
 
194
        Real* newDensities = new Real[nParticles];
 
 
195
        Real* newPressures = new Real[nParticles];
 
 
196
        ret = clEnqueueReadBuffer(command_queue, d_new_densities, CL_TRUE, 0, nParticles * sizeof(Real), newDensities, 0, NULL, NULL);
 
 
197
        ret = clEnqueueReadBuffer(command_queue, d_pressure, CL_TRUE, 0, nParticles * sizeof(Real), newPressures, 0, NULL, NULL);
 
 
198
        
 
 
199
        // Cleanup allocated objects
245
        // Cleanup allocated objects
200
        clReleaseKernel (kernel);  
246
        clReleaseKernel (kernel);  
201
        clReleaseProgram (program);
247
        clReleaseProgram (program);
202
        clReleaseCommandQueue (command_queue);
248
        clReleaseCommandQueue (command_queue);
203
        clReleaseContext (context);
249
        clReleaseContext (context);
204
        clReleaseMemObject (d_positions);
250
        clReleaseMemObject (d_positions);
205
        clReleaseMemObject (d_densities);
251
        clReleaseMemObject (d_densities);
206
        clReleaseMemObject (d_materialIndexes);
252
        clReleaseMemObject (d_materialIndexes);
207
        clReleaseMemObject (d_materials);
253
        clReleaseMemObject (d_materials);
208
        clReleaseMemObject (d_new_densities);
254
        clReleaseMemObject (d_new_densities);
209
        clReleaseMemObject (d_pressure);
255
        clReleaseMemObject (d_pressure);
210
        //free (cdDevices);
256
        //free (cdDevices);
211
        //free (cPathAndName);
257
        //free (cPathAndName);
212
        free (source_str);
258
        // Free host memory
213
        // Free host memory
259
        //free(srcA); 
214
        //free(srcA); 
260
        //free(srcB);
215
        //free(srcB);
261
        //free (dst);
216
        //free (dst);
262
 
217
 
263
        /*for (; _p != to; ++_p)
218
        /*for (; _p != to; ++_p)
 
 
219
        {
264
        {
220
            Particle& p = **_p;
265
        Particle& p = **_p;
221
 
266
 
222
            Real density = 0;
267
        Real density = 0;
223
            
 
 
224
            ParticlePtrVector neis = GetNeighbors(p);
 
 
225
 
268
 
226
            for (ParticlePtrIteratorConst neighbor = neis.begin(), end = neis.end(); neighbor != end; ++neighbor)
269
        ParticlePtrVector neis = GetNeighbors(p);
227
            {
270
        for (ParticlePtrIteratorConst neighbor = neis.begin(), end = neis.end(); neighbor != end; ++neighbor)
228
                Particle& nei = **neighbor;
271
        {
229
                density += gaussianKernel->evaluate(p.predictedPosition - nei.predictedPosition);
272
        Particle& nei = **neighbor;
230
            }
273
        density += gaussianKernel->evaluate(p.position - nei.position);
231
            density *= p.GetMass();
274
        }
 
 
275
        density *= p.GetMass();
 
 
276
        p.density = density;
232
 
277
 
233
            p.density = density;
278
        p.pressure = p.GetMaterial()->GetGasStiffness() * density;
234
            p.pressure = p.GetMaterial()->GetGasStiffness() * density;
 
 
235
        }*/
279
        }*/
236
    }
280
    }
237
 
281
 
...
 
...
 
265
            // Leapfrog integration
309
            // Leapfrog integration
266
            // see http://en.wikipedia.org/wiki/Leapfrog_integration
310
            // see http://en.wikipedia.org/wiki/Leapfrog_integration
267
 
311
 
268
            p.acceleration = p.GetSumOfAllForces() / p.GetMaterial()->GetRestDensity();
312
            p.acceleration = p.GetSumOfAllForces() / p.GetMaterial()->GetRestDensity();
269
 
313
 
270
            //newPos = p.position + p.velocity * GetConfig().GetDeltaT() + accel * GetConfig().GetDeltaT()HalfSquared;
314
            //newPos = p.position + p.velocity * GetConfig().GetDeltaT() + accel * GetConfig().GetDeltaT()HalfSquared;
271
            //newVelocity = p.velocity + GetConfig().GetDeltaT()Half * (accel + newAccel);
315
            //newVelocity = p.velocity + GetConfig().GetDeltaT()Half * (accel + newAccel);