1 package net.bmahe.genetics4j.gpu;
2
3 import java.io.IOException;
4 import java.nio.charset.StandardCharsets;
5 import java.util.ArrayList;
6 import java.util.HashMap;
7 import java.util.List;
8 import java.util.Map;
9 import java.util.Set;
10 import java.util.concurrent.CompletableFuture;
11 import java.util.concurrent.ExecutorService;
12
13 import org.apache.commons.collections4.ListUtils;
14 import org.apache.commons.io.IOUtils;
15 import org.apache.commons.lang3.Validate;
16 import org.apache.commons.lang3.tuple.Pair;
17 import org.apache.logging.log4j.LogManager;
18 import org.apache.logging.log4j.Logger;
19 import org.jocl.CL;
20 import org.jocl.cl_command_queue;
21 import org.jocl.cl_context;
22 import org.jocl.cl_context_properties;
23 import org.jocl.cl_device_id;
24 import org.jocl.cl_kernel;
25 import org.jocl.cl_platform_id;
26 import org.jocl.cl_program;
27 import org.jocl.cl_queue_properties;
28
29 import net.bmahe.genetics4j.core.Genotype;
30 import net.bmahe.genetics4j.core.evaluation.FitnessEvaluator;
31 import net.bmahe.genetics4j.gpu.opencl.DeviceReader;
32 import net.bmahe.genetics4j.gpu.opencl.DeviceUtils;
33 import net.bmahe.genetics4j.gpu.opencl.KernelInfoReader;
34 import net.bmahe.genetics4j.gpu.opencl.OpenCLExecutionContext;
35 import net.bmahe.genetics4j.gpu.opencl.PlatformReader;
36 import net.bmahe.genetics4j.gpu.opencl.PlatformUtils;
37 import net.bmahe.genetics4j.gpu.opencl.model.Device;
38 import net.bmahe.genetics4j.gpu.opencl.model.KernelInfo;
39 import net.bmahe.genetics4j.gpu.opencl.model.Platform;
40 import net.bmahe.genetics4j.gpu.spec.GPUEAConfiguration;
41 import net.bmahe.genetics4j.gpu.spec.GPUEAExecutionContext;
42 import net.bmahe.genetics4j.gpu.spec.Program;
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133 public class GPUFitnessEvaluator<T extends Comparable<T>> implements FitnessEvaluator<T> {
134 public static final Logger logger = LogManager.getLogger(GPUFitnessEvaluator.class);
135
136 private final GPUEAExecutionContext<T> gpuEAExecutionContext;
137 private final GPUEAConfiguration<T> gpuEAConfiguration;
138 private final ExecutorService executorService;
139
140 private List<Pair<Platform, Device>> selectedPlatformToDevice;
141
142 final List<cl_context> clContexts = new ArrayList<>();
143 final List<cl_command_queue> clCommandQueues = new ArrayList<>();
144 final List<cl_program> clPrograms = new ArrayList<>();
145 final List<Map<String, cl_kernel>> clKernels = new ArrayList<>();
146 final List<OpenCLExecutionContext> clExecutionContexts = new ArrayList<>();
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162 public GPUFitnessEvaluator(final GPUEAExecutionContext<T> _gpuEAExecutionContext,
163 final GPUEAConfiguration<T> _gpuEAConfiguration, final ExecutorService _executorService) {
164 Validate.notNull(_gpuEAExecutionContext);
165 Validate.notNull(_gpuEAConfiguration);
166 Validate.notNull(_executorService);
167
168 this.gpuEAExecutionContext = _gpuEAExecutionContext;
169 this.gpuEAConfiguration = _gpuEAConfiguration;
170 this.executorService = _executorService;
171
172 CL.setExceptionsEnabled(true);
173 }
174
175 private String loadResource(final String filename) {
176 Validate.notBlank(filename);
177
178 try {
179 return IOUtils.resourceToString(filename, StandardCharsets.UTF_8);
180 } catch (IOException e) {
181 throw new IllegalStateException("Unable to load resource " + filename, e);
182 }
183 }
184
185 private List<String> grabProgramSources() {
186 final Program programSpec = gpuEAConfiguration.program();
187
188 logger.info("Load program source: {}", programSpec);
189
190 final List<String> sources = new ArrayList<>();
191
192 sources.addAll(programSpec.content());
193
194 programSpec.resources()
195 .stream()
196 .map(resource -> loadResource(resource))
197 .forEach(program -> {
198 sources.add(program);
199 });
200
201 return sources;
202 }
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232 @Override
233 public void preEvaluation() {
234 logger.trace("Init...");
235 FitnessEvaluator.super.preEvaluation();
236
237 final var platformReader = new PlatformReader();
238 final var deviceReader = new DeviceReader();
239 final var kernelInfoReader = new KernelInfoReader();
240
241 final int numPlatforms = PlatformUtils.numPlatforms();
242 logger.info("Found {} platforms", numPlatforms);
243
244 final List<cl_platform_id> platformIds = PlatformUtils.platformIds(numPlatforms);
245
246 logger.info("Selecting platform and devices");
247 final var platformFilters = gpuEAExecutionContext.platformFilters();
248 final var deviceFilters = gpuEAExecutionContext.deviceFilters();
249
250 selectedPlatformToDevice = platformIds.stream()
251 .map(platformReader::read)
252 .filter(platformFilters)
253 .flatMap(platform -> {
254 final var platformId = platform.platformId();
255 final int numDevices = DeviceUtils.numDevices(platformId);
256 logger.trace("\tPlatform {}: {} devices", platform.name(), numDevices);
257
258 final var deviceIds = DeviceUtils.getDeviceIds(platformId, numDevices);
259 return deviceIds.stream()
260 .map(deviceId -> Pair.of(platform, deviceId));
261 })
262 .map(platformToDeviceId -> {
263 final var platform = platformToDeviceId.getLeft();
264 final var platformId = platform.platformId();
265 final var deviceID = platformToDeviceId.getRight();
266
267 return Pair.of(platform, deviceReader.read(platformId, deviceID));
268 })
269 .filter(platformToDevice -> deviceFilters.test(platformToDevice.getRight()))
270 .toList();
271
272 if (logger.isTraceEnabled()) {
273 logger.trace("============================");
274 logger.trace("Selected devices:");
275 selectedPlatformToDevice.forEach(pd -> {
276 logger.trace("{}", pd.getLeft());
277 logger.trace("\t{}", pd.getRight());
278 });
279 logger.trace("============================");
280 }
281
282 Validate.isTrue(selectedPlatformToDevice.size() > 0);
283
284 final List<String> programs = grabProgramSources();
285 final String[] programsArr = programs.toArray(new String[programs.size()]);
286
287 for (final var platformAndDevice : selectedPlatformToDevice) {
288 final var platform = platformAndDevice.getLeft();
289 final var device = platformAndDevice.getRight();
290
291 logger.info("Processing platform [{}] / device [{}]", platform.name(), device.name());
292
293 logger.info("\tCreating context");
294 cl_context_properties contextProperties = new cl_context_properties();
295 contextProperties.addProperty(CL.CL_CONTEXT_PLATFORM, platform.platformId());
296
297 final cl_context context = CL
298 .clCreateContext(contextProperties, 1, new cl_device_id[] { device.deviceId() }, null, null, null);
299
300 logger.info("\tCreating command queue");
301 final cl_queue_properties queueProperties = new cl_queue_properties();
302 queueProperties.addProperty(CL.CL_QUEUE_PROPERTIES,
303 CL.CL_QUEUE_PROFILING_ENABLE | CL.CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
304 final cl_command_queue commandQueue = CL
305 .clCreateCommandQueueWithProperties(context, device.deviceId(), queueProperties, null);
306
307 logger.info("\tCreate program");
308 final cl_program program = CL.clCreateProgramWithSource(context, programsArr.length, programsArr, null, null);
309
310 final var programSpec = gpuEAConfiguration.program();
311 final var buildOptions = programSpec.buildOptions()
312 .orElse(null);
313 logger.info("\tBuilding program with options: {}", buildOptions);
314 CL.clBuildProgram(program, 0, null, buildOptions, null, null);
315
316 final Set<String> kernelNames = gpuEAConfiguration.program()
317 .kernelNames();
318
319 final Map<String, cl_kernel> kernels = new HashMap<>();
320 final Map<String, KernelInfo> kernelInfos = new HashMap<>();
321 for (final String kernelName : kernelNames) {
322
323 logger.info("\tCreate kernel {}", kernelName);
324 final cl_kernel kernel = CL.clCreateKernel(program, kernelName, null);
325 Validate.notNull(kernel);
326
327 kernels.put(kernelName, kernel);
328
329 final var kernelInfo = kernelInfoReader.read(device.deviceId(), kernel, kernelName);
330 logger.trace("\t{}", kernelInfo);
331 kernelInfos.put(kernelName, kernelInfo);
332 }
333
334 clContexts.add(context);
335 clCommandQueues.add(commandQueue);
336 clKernels.add(kernels);
337 clPrograms.add(program);
338
339 final var openCLExecutionContext = OpenCLExecutionContext.builder()
340 .platform(platform)
341 .device(device)
342 .clContext(context)
343 .clCommandQueue(commandQueue)
344 .kernels(kernels)
345 .kernelInfos(kernelInfos)
346 .clProgram(program)
347 .build();
348
349 clExecutionContexts.add(openCLExecutionContext);
350 }
351
352 final var fitness = gpuEAConfiguration.fitness();
353 fitness.beforeAllEvaluations();
354 for (final OpenCLExecutionContext clExecutionContext : clExecutionContexts) {
355 fitness.beforeAllEvaluations(clExecutionContext, executorService);
356 }
357 }
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400 @Override
401 public List<T> evaluate(final long generation, final List<Genotype> genotypes) {
402
403 final var fitness = gpuEAConfiguration.fitness();
404
405
406
407
408 final int partitionSize = (int) (Math.ceil((double) genotypes.size() / clExecutionContexts.size()));
409 final var subGenotypes = ListUtils.partition(genotypes, partitionSize);
410 logger.debug("Genotype decomposed in {} partition(s)", subGenotypes.size());
411 if (logger.isTraceEnabled()) {
412 for (int i = 0; i < subGenotypes.size(); i++) {
413 final List<Genotype> subGenotype = subGenotypes.get(i);
414 logger.trace("\tPartition {} with {} elements", i, subGenotype.size());
415 }
416 }
417
418 final List<CompletableFuture<List<T>>> subResultsCF = new ArrayList<>();
419 for (int i = 0; i < subGenotypes.size(); i++) {
420 final var openCLExecutionContext = clExecutionContexts.get(i % clExecutionContexts.size());
421 final var subGenotype = subGenotypes.get(i);
422
423 fitness.beforeEvaluation(generation, subGenotype);
424 fitness.beforeEvaluation(openCLExecutionContext, executorService, generation, subGenotype);
425
426 final var resultsCF = fitness.compute(openCLExecutionContext, executorService, generation, subGenotype)
427 .thenApply((results) -> {
428
429 fitness.afterEvaluation(openCLExecutionContext, executorService, generation, subGenotype);
430 fitness.afterEvaluation(generation, subGenotype);
431
432 return results;
433 });
434
435 subResultsCF.add(resultsCF);
436 }
437
438 final List<T> resultsEvaluation = new ArrayList<>(genotypes.size());
439 for (final CompletableFuture<List<T>> subResultCF : subResultsCF) {
440 final var fitnessResults = subResultCF.join();
441 resultsEvaluation.addAll(fitnessResults);
442 }
443 return resultsEvaluation;
444 }
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475 @Override
476 public void postEvaluation() {
477
478 final var fitness = gpuEAConfiguration.fitness();
479
480 for (final OpenCLExecutionContext clExecutionContext : clExecutionContexts) {
481 fitness.afterAllEvaluations(clExecutionContext, executorService);
482 }
483 fitness.afterAllEvaluations();
484
485 logger.debug("Releasing kernels");
486
487 for (final Map<String, cl_kernel> kernels : clKernels) {
488 for (final cl_kernel clKernel : kernels.values()) {
489 CL.clReleaseKernel(clKernel);
490 }
491 }
492 clKernels.clear();
493
494 logger.debug("Releasing programs");
495 for (final cl_program clProgram : clPrograms) {
496 CL.clReleaseProgram(clProgram);
497 }
498 clPrograms.clear();
499
500 logger.debug("Releasing command queues");
501 for (final cl_command_queue clCommandQueue : clCommandQueues) {
502 CL.clReleaseCommandQueue(clCommandQueue);
503 }
504 clCommandQueues.clear();
505
506 logger.debug("Releasing contexts");
507 for (final cl_context clContext : clContexts) {
508 CL.clReleaseContext(clContext);
509 }
510 clContexts.clear();
511
512 clExecutionContexts.clear();
513 selectedPlatformToDevice = null;
514
515 FitnessEvaluator.super.postEvaluation();
516 }
517 }