1 package net.bmahe.genetics4j.gpu;
2
3 import java.io.IOException;
4 import java.nio.charset.StandardCharsets;
5 import java.util.ArrayList;
6 import java.util.HashMap;
7 import java.util.List;
8 import java.util.Map;
9 import java.util.Set;
10 import java.util.concurrent.CompletableFuture;
11 import java.util.concurrent.ExecutorService;
12
13 import org.apache.commons.collections4.ListUtils;
14 import org.apache.commons.io.IOUtils;
15 import org.apache.commons.lang3.Validate;
16 import org.apache.commons.lang3.tuple.Pair;
17 import org.apache.logging.log4j.LogManager;
18 import org.apache.logging.log4j.Logger;
19 import org.jocl.CL;
20 import org.jocl.cl_command_queue;
21 import org.jocl.cl_context;
22 import org.jocl.cl_context_properties;
23 import org.jocl.cl_device_id;
24 import org.jocl.cl_kernel;
25 import org.jocl.cl_platform_id;
26 import org.jocl.cl_program;
27 import org.jocl.cl_queue_properties;
28
29 import net.bmahe.genetics4j.core.Genotype;
30 import net.bmahe.genetics4j.core.evaluation.FitnessEvaluator;
31 import net.bmahe.genetics4j.gpu.opencl.DeviceReader;
32 import net.bmahe.genetics4j.gpu.opencl.DeviceUtils;
33 import net.bmahe.genetics4j.gpu.opencl.KernelInfoReader;
34 import net.bmahe.genetics4j.gpu.opencl.OpenCLExecutionContext;
35 import net.bmahe.genetics4j.gpu.opencl.PlatformReader;
36 import net.bmahe.genetics4j.gpu.opencl.PlatformUtils;
37 import net.bmahe.genetics4j.gpu.opencl.model.Device;
38 import net.bmahe.genetics4j.gpu.opencl.model.KernelInfo;
39 import net.bmahe.genetics4j.gpu.opencl.model.Platform;
40 import net.bmahe.genetics4j.gpu.spec.GPUEAConfiguration;
41 import net.bmahe.genetics4j.gpu.spec.GPUEAExecutionContext;
42 import net.bmahe.genetics4j.gpu.spec.Program;
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130 public class GPUFitnessEvaluator<T extends Comparable<T>> implements FitnessEvaluator<T> {
131 public static final Logger logger = LogManager.getLogger(GPUFitnessEvaluator.class);
132
133 private final GPUEAExecutionContext<T> gpuEAExecutionContext;
134 private final GPUEAConfiguration<T> gpuEAConfiguration;
135 private final ExecutorService executorService;
136
137 private List<Pair<Platform, Device>> selectedPlatformToDevice;
138
139 final List<cl_context> clContexts = new ArrayList<>();
140 final List<cl_command_queue> clCommandQueues = new ArrayList<>();
141 final List<cl_program> clPrograms = new ArrayList<>();
142 final List<Map<String, cl_kernel>> clKernels = new ArrayList<>();
143 final List<OpenCLExecutionContext> clExecutionContexts = new ArrayList<>();
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160 public GPUFitnessEvaluator(final GPUEAExecutionContext<T> _gpuEAExecutionContext,
161 final GPUEAConfiguration<T> _gpuEAConfiguration, final ExecutorService _executorService) {
162 Validate.notNull(_gpuEAExecutionContext);
163 Validate.notNull(_gpuEAConfiguration);
164 Validate.notNull(_executorService);
165
166 this.gpuEAExecutionContext = _gpuEAExecutionContext;
167 this.gpuEAConfiguration = _gpuEAConfiguration;
168 this.executorService = _executorService;
169
170 CL.setExceptionsEnabled(true);
171 }
172
173 private String loadResource(final String filename) {
174 Validate.notBlank(filename);
175
176 try {
177 return IOUtils.resourceToString(filename, StandardCharsets.UTF_8);
178 } catch (IOException e) {
179 throw new IllegalStateException("Unable to load resource " + filename, e);
180 }
181 }
182
183 private List<String> grabProgramSources() {
184 final Program programSpec = gpuEAConfiguration.program();
185
186 logger.info("Load program source: {}", programSpec);
187
188 final List<String> sources = new ArrayList<>();
189
190 sources.addAll(programSpec.content());
191
192 programSpec.resources()
193 .stream()
194 .map(resource -> loadResource(resource))
195 .forEach(program -> {
196 sources.add(program);
197 });
198
199 return sources;
200 }
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231 @Override
232 public void preEvaluation() {
233 logger.trace("Init...");
234 FitnessEvaluator.super.preEvaluation();
235
236 final var platformReader = new PlatformReader();
237 final var deviceReader = new DeviceReader();
238 final var kernelInfoReader = new KernelInfoReader();
239
240 final int numPlatforms = PlatformUtils.numPlatforms();
241 logger.info("Found {} platforms", numPlatforms);
242
243 final List<cl_platform_id> platformIds = PlatformUtils.platformIds(numPlatforms);
244
245 logger.info("Selecting platform and devices");
246 final var platformFilters = gpuEAExecutionContext.platformFilters();
247 final var deviceFilters = gpuEAExecutionContext.deviceFilters();
248
249 selectedPlatformToDevice = platformIds.stream()
250 .map(platformReader::read)
251 .filter(platformFilters)
252 .flatMap(platform -> {
253 final var platformId = platform.platformId();
254 final int numDevices = DeviceUtils.numDevices(platformId);
255 logger.trace("\tPlatform {}: {} devices", platform.name(), numDevices);
256
257 final var deviceIds = DeviceUtils.getDeviceIds(platformId, numDevices);
258 return deviceIds.stream()
259 .map(deviceId -> Pair.of(platform, deviceId));
260 })
261 .map(platformToDeviceId -> {
262 final var platform = platformToDeviceId.getLeft();
263 final var platformId = platform.platformId();
264 final var deviceID = platformToDeviceId.getRight();
265
266 return Pair.of(platform, deviceReader.read(platformId, deviceID));
267 })
268 .filter(platformToDevice -> deviceFilters.test(platformToDevice.getRight()))
269 .toList();
270
271 if (logger.isTraceEnabled()) {
272 logger.trace("============================");
273 logger.trace("Selected devices:");
274 selectedPlatformToDevice.forEach(pd -> {
275 logger.trace("{}", pd.getLeft());
276 logger.trace("\t{}", pd.getRight());
277 });
278 logger.trace("============================");
279 }
280
281 Validate.isTrue(selectedPlatformToDevice.size() > 0);
282
283 final List<String> programs = grabProgramSources();
284 final String[] programsArr = programs.toArray(new String[programs.size()]);
285
286 for (final var platformAndDevice : selectedPlatformToDevice) {
287 final var platform = platformAndDevice.getLeft();
288 final var device = platformAndDevice.getRight();
289
290 logger.info("Processing platform [{}] / device [{}]", platform.name(), device.name());
291
292 logger.info("\tCreating context");
293 cl_context_properties contextProperties = new cl_context_properties();
294 contextProperties.addProperty(CL.CL_CONTEXT_PLATFORM, platform.platformId());
295
296 final cl_context context = CL
297 .clCreateContext(contextProperties, 1, new cl_device_id[] { device.deviceId() }, null, null, null);
298
299 logger.info("\tCreating command queue");
300 final cl_queue_properties queueProperties = new cl_queue_properties();
301 queueProperties.addProperty(CL.CL_QUEUE_PROPERTIES,
302 CL.CL_QUEUE_PROFILING_ENABLE | CL.CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
303 final cl_command_queue commandQueue = CL
304 .clCreateCommandQueueWithProperties(context, device.deviceId(), queueProperties, null);
305
306 logger.info("\tCreate program");
307 final cl_program program = CL.clCreateProgramWithSource(context, programsArr.length, programsArr, null, null);
308
309 final var programSpec = gpuEAConfiguration.program();
310 final var buildOptions = programSpec.buildOptions()
311 .orElse(null);
312 logger.info("\tBuilding program with options: {}", buildOptions);
313 CL.clBuildProgram(program, 0, null, buildOptions, null, null);
314
315 final Set<String> kernelNames = gpuEAConfiguration.program()
316 .kernelNames();
317
318 final Map<String, cl_kernel> kernels = new HashMap<>();
319 final Map<String, KernelInfo> kernelInfos = new HashMap<>();
320 for (final String kernelName : kernelNames) {
321
322 logger.info("\tCreate kernel {}", kernelName);
323 final cl_kernel kernel = CL.clCreateKernel(program, kernelName, null);
324 Validate.notNull(kernel);
325
326 kernels.put(kernelName, kernel);
327
328 final var kernelInfo = kernelInfoReader.read(device.deviceId(), kernel, kernelName);
329 logger.trace("\t{}", kernelInfo);
330 kernelInfos.put(kernelName, kernelInfo);
331 }
332
333 clContexts.add(context);
334 clCommandQueues.add(commandQueue);
335 clKernels.add(kernels);
336 clPrograms.add(program);
337
338 final var openCLExecutionContext = OpenCLExecutionContext.builder()
339 .platform(platform)
340 .device(device)
341 .clContext(context)
342 .clCommandQueue(commandQueue)
343 .kernels(kernels)
344 .kernelInfos(kernelInfos)
345 .clProgram(program)
346 .build();
347
348 clExecutionContexts.add(openCLExecutionContext);
349 }
350
351 final var fitness = gpuEAConfiguration.fitness();
352 fitness.beforeAllEvaluations();
353 for (final OpenCLExecutionContext clExecutionContext : clExecutionContexts) {
354 fitness.beforeAllEvaluations(clExecutionContext, executorService);
355 }
356 }
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400 @Override
401 public List<T> evaluate(final long generation, final List<Genotype> genotypes) {
402
403 final var fitness = gpuEAConfiguration.fitness();
404
405
406
407
408 final int partitionSize = (int) (Math.ceil((double) genotypes.size() / clExecutionContexts.size()));
409 final var subGenotypes = ListUtils.partition(genotypes, partitionSize);
410 logger.debug("Genotype decomposed in {} partition(s)", subGenotypes.size());
411 if (logger.isTraceEnabled()) {
412 for (int i = 0; i < subGenotypes.size(); i++) {
413 final List<Genotype> subGenotype = subGenotypes.get(i);
414 logger.trace("\tPartition {} with {} elements", i, subGenotype.size());
415 }
416 }
417
418 final List<CompletableFuture<List<T>>> subResultsCF = new ArrayList<>();
419 for (int i = 0; i < subGenotypes.size(); i++) {
420 final var openCLExecutionContext = clExecutionContexts.get(i % clExecutionContexts.size());
421 final var subGenotype = subGenotypes.get(i);
422
423 fitness.beforeEvaluation(generation, subGenotype);
424 fitness.beforeEvaluation(openCLExecutionContext, executorService, generation, subGenotype);
425
426 final var resultsCF = fitness.compute(openCLExecutionContext, executorService, generation, subGenotype)
427 .thenApply((results) -> {
428
429 fitness.afterEvaluation(openCLExecutionContext, executorService, generation, subGenotype);
430 fitness.afterEvaluation(generation, subGenotype);
431
432 return results;
433 });
434
435 subResultsCF.add(resultsCF);
436 }
437
438 final List<T> resultsEvaluation = new ArrayList<>(genotypes.size());
439 for (final CompletableFuture<List<T>> subResultCF : subResultsCF) {
440 final var fitnessResults = subResultCF.join();
441 resultsEvaluation.addAll(fitnessResults);
442 }
443 return resultsEvaluation;
444 }
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477 @Override
478 public void postEvaluation() {
479
480 final var fitness = gpuEAConfiguration.fitness();
481
482 for (final OpenCLExecutionContext clExecutionContext : clExecutionContexts) {
483 fitness.afterAllEvaluations(clExecutionContext, executorService);
484 }
485 fitness.afterAllEvaluations();
486
487 logger.debug("Releasing kernels");
488
489 for (final Map<String, cl_kernel> kernels : clKernels) {
490 for (final cl_kernel clKernel : kernels.values()) {
491 CL.clReleaseKernel(clKernel);
492 }
493 }
494 clKernels.clear();
495
496 logger.debug("Releasing programs");
497 for (final cl_program clProgram : clPrograms) {
498 CL.clReleaseProgram(clProgram);
499 }
500 clPrograms.clear();
501
502 logger.debug("Releasing command queues");
503 for (final cl_command_queue clCommandQueue : clCommandQueues) {
504 CL.clReleaseCommandQueue(clCommandQueue);
505 }
506 clCommandQueues.clear();
507
508 logger.debug("Releasing contexts");
509 for (final cl_context clContext : clContexts) {
510 CL.clReleaseContext(clContext);
511 }
512 clContexts.clear();
513
514 clExecutionContexts.clear();
515 selectedPlatformToDevice = null;
516
517 FitnessEvaluator.super.postEvaluation();
518 }
519 }