b24387f1eb28ea2a3d4fc1681d68201948b4e82c | 7838344a88937312f99744af850f8b2fba4190c4 | ||
---|---|---|---|
1 | #include "Solvers/WSPHGPUSolver.h" | 1 | #include "Solvers/WSPHGPUSolver.h" |
2 | 2 | ||
3 | #include "System.h" | 3 | #include "System.h" |
4 | #include "CL/cl.h" | 4 | #include "SysCalls.h" |
5 | #include <iostream> | ||
6 | |||
7 | #include <CL/cl.h> | ||
5 | 8 | ||
6 | #define MAX_SOURCE_SIZE (0x100000) | ||
7 | #define MAT0 0 | 9 | #define MAT0 0 |
8 | 10 | ||
9 | using namespace std; | 11 | using namespace std; |
12 | using namespace Squishy::CL; | ||
10 | 13 | ||
11 | namespace Squishy { namespace SPH { namespace Solvers { | 14 | namespace Squishy { namespace SPH { namespace Solvers { |
12 | 15 | ||
... | ... | ||
39 | { | 42 | { |
40 | kernelRadius = max(kernelRadius, fluid->GetMaterial()->ComputeKernelRadius(GetConfig().AverageKernelParticleCount)); | 43 | kernelRadius = max(kernelRadius, fluid->GetMaterial()->ComputeKernelRadius(GetConfig().AverageKernelParticleCount)); |
41 | kernelRadiusSq = kernelRadius * kernelRadius; | 44 | kernelRadiusSq = kernelRadius * kernelRadius; |
42 | | 45 | |
43 | // fix up kernels: | 46 | // fix up kernels: |
44 | gaussianKernel->SetRadius(kernelRadius); | 47 | gaussianKernel->SetRadius(kernelRadius); |
45 | viscosityKernel->SetRadius(kernelRadius); | 48 | viscosityKernel->SetRadius(kernelRadius); |
... | ... | ||
59 | 62 | ||
60 | particles.push_back(&*p); | 63 | particles.push_back(&*p); |
61 | } | 64 | } |
62 | | 65 | |
63 | ParticlePtrIterator from = particles.begin(); | 66 | ParticlePtrIterator from = particles.begin(); |
64 | ParticlePtrIterator to = particles.end(); | 67 | ParticlePtrIterator to = particles.end(); |
65 | | 68 | |
66 | FindNeighbors(from, to); | 69 | FindNeighbors(from, to); |
67 | 70 | ||
68 | ComputeDensityPressure(from, to); | 71 | ComputeDensityPressure(from, to); |
... | ... | ||
85 | // 1. All particle density 2. all particle pressure | 88 | // 1. All particle density 2. all particle pressure |
86 | // After invoking the kernel, set the density and pressure of all particles accordingly | 89 | // After invoking the kernel, set the density and pressure of all particles accordingly |
87 | 90 | ||
88 | | 91 | |
89 | // Load the kernel source code into the array source_str | 92 | // Load the kernel source code into the array source_str |
90 | FILE *fp; | 93 | FILE *fp; |
91 | char *source_str; | 94 | char *source_str; |
92 | size_t source_size; | 95 | |
93 | 96 | const char kernelFile[] = "../Squishy.SPH/OpenCL/kernel.cl"; | |
94 | fp = fopen("OpenCL/kernel.cl", "r"); | 97 | fp = fopen(kernelFile, "r"); |
95 | if (!fp) { | 98 | if (!fp) |
96 | fprintf(stderr, "Failed to load kernel.\n"); | ||
97 | exit(1); | ||
98 | } | ||
99 | source_str = (char*)malloc(MAX_SOURCE_SIZE); | ||
100 | source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp); | ||
101 | fclose( fp ); | ||
102 | |||
103 | // Get platform and device information | ||
104 | cl_platform_id platform_id = NULL; | ||
105 | cl_device_id device_id = NULL; | ||
106 | cl_uint ret_num_devices; | ||
107 | cl_uint ret_num_platforms; | ||
108 | cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); | ||
109 | ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices); | ||
110 | |||
111 | // Create an OpenCL context | ||
112 | cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret); | ||
113 | |||
114 | // Create memory buffers on the device for each vector | ||
115 | ParticleVector& particles = system->GetParticles(); | ||
116 | int nParticles = particles.size(); | ||
117 | |||
118 | const FluidVector& fluids = system->GetFluids(); | ||
119 | int nFluids = fluids.size(); | ||
120 | Materials materialParam; | ||
121 | materialParam.masses = new Real[nFluids]; | ||
122 | materialParam.gasStiffness= new Real[nFluids]; | ||
123 | |||
124 | for (int fluid = 0; fluid<nFluids; ++fluid) | ||
125 | { | 99 | { |
126 | materialParam.masses[fluid] = fluids[fluid]->GetMaterial()->GetMass(); | 100 | fprintf(stderr, "Could not open kernel file: %s\n", kernelFile); |
127 | materialParam.gasStiffness[fluid] = fluids[fluid]->GetMaterial()->GetGasStiffness(); | 101 | return; |
128 | } | 102 | } |
103 | |||
104 | // Read kernel into memory | ||
105 | size_t source_size = ComputeFileSize(fp) + 1; | ||
106 | source_str = new char[source_size]; | ||
107 | source_size = fread( source_str, 1, source_size, fp); | ||
108 | fclose( fp ); | ||
109 | |||
110 | source_str[source_size] = 0; | ||
111 | |||
112 | // Get platform and device information | ||
113 | cl_platform_id platform_id = NULL; | ||
114 | cl_device_id device_id = NULL; | ||
115 | cl_uint ret_num_devices; | ||
116 | cl_uint ret_num_platforms; | ||
117 | |||
118 | cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); | ||
119 | ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices); | ||
120 | assert(ret_num_devices > 0); | ||
121 | CL_SUCCESS_OR_ELSE(ret, return); | ||
122 | |||
123 | char name[100]; | ||
124 | ret = clGetDeviceInfo (device_id, CL_DEVICE_NAME, sizeof(name), name, NULL); | ||
125 | CL_SUCCESS_OR_ELSE(ret, return); | ||
126 | printf ("Device found: %s\n", name); | ||
127 | |||
128 | // Create an OpenCL context | ||
129 | cl_context_properties contextProperties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, 0 }; | ||
130 | cl_context context = clCreateContext( contextProperties, 1, &device_id, NULL, NULL, &ret); | ||
131 | CL_SUCCESS_OR_ELSE(ret, return); | ||
132 | |||
133 | // Create a command queue | ||
134 | cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret); | ||
135 | CL_SUCCESS_OR_ELSE(ret, return); | ||
136 | | ||
137 | // Create program object | ||
138 | cl_program program = clCreateProgramWithSource(context, 1, (const char**)&source_str, NULL, &ret); | ||
139 | CL_SUCCESS_OR_ELSE(ret, return); | ||
140 | |||
141 | // Compile & link the program | ||
142 | ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); | ||
143 | //ret = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); | ||
144 | if (ret != CL_SUCCESS) | ||
145 | { | ||
146 | cout << "Could not build CL Program: " << GetCLErrorString(ret) << "\n"; | ||
147 | /*cl_build_status status; | ||
148 | ret = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, nullptr); | ||
149 | assert(ret == CL_SUCCESS);*/ | ||
150 | //CL_SUCCESS_OR_ELSE(status, return); | ||
151 | | ||
152 | size_t length; | ||
153 | static const int bufferSize = 8096; | ||
154 | char buffer[bufferSize]; | ||
155 | ret = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, bufferSize, buffer, &length); | ||
156 | assert(ret == CL_SUCCESS); | ||
157 | |||
158 | buffer[length] = 0; | ||
159 | |||
160 | cout << "--- Build log (" << length << " bytes) ---\n "<<buffer<<endl; | ||
161 | return; | ||
162 | } | ||
163 | |||
164 | // Create the OpenCL kernel | ||
165 | cl_kernel kernel = clCreateKernel(program, "solver", &ret); | ||
166 | CL_SUCCESS_OR_ELSE(ret, return); | ||
167 | |||
168 | // delete the source code | ||
169 | delete [] source_str; | ||
170 | |||
171 | |||
172 | // Prepare the data | ||
173 | ParticleVector& particles = system->GetParticles(); | ||
174 | int nParticles = particles.size(); | ||
129 | 175 | ||
176 | const FluidVector& fluids = system->GetFluids(); | ||
177 | int nFluids = fluids.size(); | ||
178 | MaterialProperties materialProps; | ||
179 | materialProps.masses = new Real[nFluids]; | ||
180 | materialProps.gasStiffness= new Real[nFluids]; | ||
130 | 181 | ||
131 | Vector* positions = new Vector[nParticles]; | 182 | Vector* positions = new Vector[nParticles]; |
132 | Real* densities = new Real[nParticles]; | 183 | Real* densities = new Real[nParticles]; |
133 | int* materialIndexes = new int[nParticles]; | 184 | int* materialIndexes = new int[nParticles]; |
134 | | 185 | |
135 | //Particle neighbors[n_particles]; | 186 | //Particle neighbors[n_particles]; |
136 | int index = 0; | ||
137 | for (int p = 0; p < nParticles; ++p) | 187 | for (int p = 0; p < nParticles; ++p) |
138 | { | 188 | { |
139 | positions[p] = particles[p].position; | 189 | positions[p] = particles[p].position; |
140 | densities[p] = particles[p].density; | 190 | densities[p] = particles[p].density; |
141 | materialIndexes[p] = MAT0; | 191 | materialIndexes[p] = MAT0; |
142 | index++; | 192 | } |
193 | |||
194 | for (int fluid = 0; fluid < nFluids; ++fluid) | ||
195 | { | ||
196 | materialProps.masses[fluid] = fluids[fluid]->GetMaterial()->GetMass(); | ||
197 | materialProps.gasStiffness[fluid] = fluids[fluid]->GetMaterial()->GetGasStiffness(); | ||
143 | } | 198 | } |
144 | 199 | ||
145 | 200 | // Allocate buffers on device | |
146 | int size_4_new_densities = nParticles * sizeof(Real); | 201 | int sizePositions = sizeof(Vector) * nParticles; |
147 | int size_4_pressure = nParticles * sizeof(Real); | 202 | int sizeParticleScalar = sizeof(Real) * nParticles; |
148 | | 203 | int sizeMatIndices = sizeof(int) * nParticles; |
149 | cl_mem d_positions = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(positions), NULL, &ret); | 204 | int sizeMaterials = sizeof(MaterialProperties) * nFluids; |
150 | cl_mem d_densities = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(densities), NULL, &ret); | 205 | |
151 | cl_mem d_materialIndexes = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(materialIndexes), NULL, &ret); | 206 | cl_mem d_positions = clCreateBuffer(context, CL_MEM_READ_ONLY, sizePositions, NULL, &ret); |
152 | cl_mem d_materials = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(materialParam), NULL, &ret); | 207 | cl_mem d_densities = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeParticleScalar, NULL, &ret); |
153 | 208 | cl_mem d_materialIndexes = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeMatIndices, NULL, &ret); | |
154 | cl_mem d_new_densities = clCreateBuffer(context, CL_MEM_READ_ONLY, size_4_new_densities, NULL, &ret); | 209 | cl_mem d_materials = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeMaterials, NULL, &ret); |
155 | cl_mem d_pressure = clCreateBuffer(context, CL_MEM_READ_ONLY, size_4_pressure, NULL, &ret); | 210 | |
156 | 211 | cl_mem d_new_densities = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeParticleScalar, NULL, &ret); | |
157 | // Create a command queue | 212 | cl_mem d_pressure = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeParticleScalar, NULL, &ret); |
158 | cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret); | 213 | |
159 | 214 | // Copy the data to their respective device buffers | |
160 | // Copy the lists to their respective memory buffers | 215 | static const int blocking = CL_TRUE; |
161 | ret = clEnqueueWriteBuffer(command_queue, d_positions, CL_TRUE, 0, sizeof(positions), positions, 0, NULL, NULL); | 216 | ret = clEnqueueWriteBuffer(command_queue, d_positions, blocking, 0, sizePositions, positions, 0, NULL, NULL); |
162 | ret = clEnqueueWriteBuffer(command_queue, d_densities, CL_TRUE, 0, sizeof(densities), densities, 0, NULL, NULL); | 217 | ret = clEnqueueWriteBuffer(command_queue, d_densities, blocking, 0, sizeParticleScalar, densities, 0, NULL, NULL); |
163 | ret = clEnqueueWriteBuffer(command_queue, d_materialIndexes, CL_TRUE, 0, sizeof(materialIndexes), materialIndexes, 0, NULL, NULL); | 218 | ret = clEnqueueWriteBuffer(command_queue, d_materialIndexes, blocking, 0, sizeMatIndices, materialIndexes, 0, NULL, NULL); |
164 | ret = clEnqueueWriteBuffer(command_queue, d_materials, CL_TRUE, 0, sizeof(materialParam), &materialParam, 0, NULL, NULL); | 219 | ret = clEnqueueWriteBuffer(command_queue, d_materials, blocking, 0, sizeMaterials, &materialProps, 0, NULL, NULL); |
165 | | 220 | |
166 | // Create a program from the kernel source | 221 | |
167 | cl_program program = clCreateProgramWithSource(context, 1, | 222 | // Set the arguments of the kernel |
168 | (const char **)&source_str, (const size_t *)&source_size, &ret); | 223 | ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&d_positions); |
169 | 224 | ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&d_densities); | |
170 | //Build the program | 225 | ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&d_materialIndexes); |
171 | ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); | 226 | ret = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&d_materials); |
172 | | 227 | |
173 | // Create the OpenCL kernel | 228 | ret = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&d_new_densities); |
174 | cl_kernel kernel = clCreateKernel(program, "solver", &ret); | 229 | ret = clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *)&d_pressure); |
175 | | 230 | |
176 | // Set the arguments of the kernel | 231 | ret = clSetKernelArg(kernel, 6, sizeof(nParticles), &nParticles); |
177 | ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&d_positions); | 232 | |
178 | ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&d_densities); | 233 | |
179 | ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&d_materialIndexes); | 234 | // Execute the OpenCL kernel on the list |
180 | ret = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&d_materials); | 235 | size_t local_work_size = 64; // Process one item at a time //Neither this |
181 | 236 | size_t global_work_size = ((int)((int) local_work_size/ nParticles))+1; | |
182 | ret = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&d_new_densities); | 237 | ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL); |
183 | ret = clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *)&d_pressure); | 238 | |
184 | 239 | // Read the memory buffer C on the device to the local variable C | |
185 | ret = clSetKernelArg(kernel, 6, sizeof(nParticles), &nParticles); | 240 | Real* newDensities = new Real[nParticles]; |
186 | 241 | Real* newPressures = new Real[nParticles]; | |
187 | 242 | ret = clEnqueueReadBuffer(command_queue, d_new_densities, CL_TRUE, 0, nParticles * sizeof(Real), newDensities, 0, NULL, NULL); | |
188 | // Execute the OpenCL kernel on the list | 243 | ret = clEnqueueReadBuffer(command_queue, d_pressure, CL_TRUE, 0, nParticles * sizeof(Real), newPressures, 0, NULL, NULL); |
189 | size_t local_work_size = 64; // Process one item at a time //Neither this | 244 | |
190 | size_t global_work_size = ((int)((int) local_work_size/ nParticles))+1; | ||
191 | ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL); | ||
192 | | ||
193 | // Read the memory buffer C on the device to the local variable C | ||
194 | Real* newDensities = new Real[nParticles]; | ||
195 | Real* newPressures = new Real[nParticles]; | ||
196 | ret = clEnqueueReadBuffer(command_queue, d_new_densities, CL_TRUE, 0, nParticles * sizeof(Real), newDensities, 0, NULL, NULL); | ||
197 | ret = clEnqueueReadBuffer(command_queue, d_pressure, CL_TRUE, 0, nParticles * sizeof(Real), newPressures, 0, NULL, NULL); | ||
198 | | ||
199 | // Cleanup allocated objects | 245 | // Cleanup allocated objects |
200 | clReleaseKernel (kernel); | 246 | clReleaseKernel (kernel); |
201 | clReleaseProgram (program); | 247 | clReleaseProgram (program); |
202 | clReleaseCommandQueue (command_queue); | 248 | clReleaseCommandQueue (command_queue); |
203 | clReleaseContext (context); | 249 | clReleaseContext (context); |
204 | clReleaseMemObject (d_positions); | 250 | clReleaseMemObject (d_positions); |
205 | clReleaseMemObject (d_densities); | 251 | clReleaseMemObject (d_densities); |
206 | clReleaseMemObject (d_materialIndexes); | 252 | clReleaseMemObject (d_materialIndexes); |
207 | clReleaseMemObject (d_materials); | 253 | clReleaseMemObject (d_materials); |
208 | clReleaseMemObject (d_new_densities); | 254 | clReleaseMemObject (d_new_densities); |
209 | clReleaseMemObject (d_pressure); | 255 | clReleaseMemObject (d_pressure); |
210 | //free (cdDevices); | 256 | //free (cdDevices); |
211 | //free (cPathAndName); | 257 | //free (cPathAndName); |
212 | free (source_str); | 258 | // Free host memory |
213 | // Free host memory | 259 | //free(srcA); |
214 | //free(srcA); | 260 | //free(srcB); |
215 | //free(srcB); | 261 | //free (dst); |
216 | //free (dst); | 262 | |
217 | 263 | /*for (; _p != to; ++_p) | |
218 | /*for (; _p != to; ++_p) | ||
219 | { | 264 | { |
220 | Particle& p = **_p; | 265 | Particle& p = **_p; |
221 | 266 | ||
222 | Real density = 0; | 267 | Real density = 0; |
223 | | ||
224 | ParticlePtrVector neis = GetNeighbors(p); | ||
225 | 268 | ||
226 | for (ParticlePtrIteratorConst neighbor = neis.begin(), end = neis.end(); neighbor != end; ++neighbor) | 269 | ParticlePtrVector neis = GetNeighbors(p); |
227 | { | 270 | for (ParticlePtrIteratorConst neighbor = neis.begin(), end = neis.end(); neighbor != end; ++neighbor) |
228 | Particle& nei = **neighbor; | 271 | { |
229 | density += gaussianKernel->evaluate(p.predictedPosition - nei.predictedPosition); | 272 | Particle& nei = **neighbor; |
230 | } | 273 | density += gaussianKernel->evaluate(p.position - nei.position); |
231 | density *= p.GetMass(); | 274 | } |
275 | density *= p.GetMass(); | ||
276 | p.density = density; | ||
232 | 277 | ||
233 | p.density = density; | 278 | p.pressure = p.GetMaterial()->GetGasStiffness() * density; |
234 | p.pressure = p.GetMaterial()->GetGasStiffness() * density; | ||
235 | }*/ | 279 | }*/ |
236 | } | 280 | } |
237 | 281 | ||
... | ... | ||
265 | // Leapfrog integration | 309 | // Leapfrog integration |
266 | // see http://en.wikipedia.org/wiki/Leapfrog_integration | 310 | // see http://en.wikipedia.org/wiki/Leapfrog_integration |
267 | 311 | ||
268 | p.acceleration = p.GetSumOfAllForces() / p.GetMaterial()->GetRestDensity(); | 312 | p.acceleration = p.GetSumOfAllForces() / p.GetMaterial()->GetRestDensity(); |
269 | 313 | ||
270 | //newPos = p.position + p.velocity * GetConfig().GetDeltaT() + accel * GetConfig().GetDeltaT()HalfSquared; | 314 | //newPos = p.position + p.velocity * GetConfig().GetDeltaT() + accel * GetConfig().GetDeltaT()HalfSquared; |
271 | //newVelocity = p.velocity + GetConfig().GetDeltaT()Half * (accel + newAccel); | 315 | //newVelocity = p.velocity + GetConfig().GetDeltaT()Half * (accel + newAccel); |