1 | package net.bmahe.genetics4j.gpu.opencl.model; | |
2 | ||
3 | import java.util.Set; | |
4 | ||
5 | import org.immutables.value.Value; | |
6 | import org.jocl.cl_device_id; | |
7 | ||
8 | /** | |
9 | * Represents an OpenCL compute device with its capabilities and characteristics for GPU-accelerated evolutionary algorithms. | |
10 | * | |
11 | * <p>Device encapsulates the properties and capabilities of an OpenCL compute device (GPU, CPU, or accelerator) | |
12 | * that can be used for fitness evaluation in evolutionary algorithms. This information is essential for | |
13 | * device selection, kernel optimization, and workload configuration to achieve optimal performance. | |
14 | * | |
15 | * <p>Key device characteristics include: | |
16 | * <ul> | |
17 | * <li><strong>Device identification</strong>: Name, vendor, and version information</li> | |
18 | * <li><strong>Compute capabilities</strong>: Number of compute units and maximum work group sizes</li> | |
19 | * <li><strong>Memory hierarchy</strong>: Global, local, and constant memory sizes and characteristics</li> | |
20 | * <li><strong>Processing features</strong>: Vector width preferences, image support, and built-in kernels</li> | |
21 | * <li><strong>Performance metrics</strong>: Clock frequency and execution capabilities</li> | |
22 | * </ul> | |
23 | * | |
24 | * <p>Device selection considerations for evolutionary algorithms: | |
25 | * <ul> | |
26 | * <li><strong>Device type</strong>: GPU devices typically provide highest parallelism for large populations</li> | |
27 | * <li><strong>Compute units</strong>: More compute units allow better utilization of large populations</li> | |
28 | * <li><strong>Work group sizes</strong>: Must accommodate the parallelism patterns of fitness kernels</li> | |
29 | * <li><strong>Memory capacity</strong>: Must be sufficient for population data and intermediate results</li> | |
30 | * <li><strong>Vector operations</strong>: Vector width preferences can optimize numerical computations</li> | |
31 | * </ul> | |
32 | * | |
33 | * <p>Common device filtering patterns: | |
34 | * <pre>{@code | |
35 | * // Select GPU devices with sufficient parallel processing capability | |
36 | * Predicate<Device> gpuFilter = device -> | |
37 | * device.deviceType().contains(DeviceType.GPU) && | |
38 | * device.maxComputeUnits() >= 8; | |
39 | * | |
40 | * // Select devices with large work group support for population processing | |
41 | * Predicate<Device> workGroupFilter = device -> | |
42 | * device.maxWorkGroupSize() >= 256; | |
43 | * | |
44 | * // Select devices with high clock frequency for compute-intensive fitness | |
45 | * Predicate<Device> performanceFilter = device -> | |
46 | * device.maxClockFrequency() >= 1000; // MHz | |
47 | * | |
48 | * // Select devices that support floating-point vector operations | |
49 | * Predicate<Device> vectorFilter = device -> | |
50 | * device.preferredVectorWidthFloat() >= 4; | |
51 | * | |
52 | * // Comprehensive filter for evolutionary algorithm suitability | |
53 | * Predicate<Device> eaOptimizedFilter = device -> | |
54 | * device.deviceType().contains(DeviceType.GPU) && | |
55 | * device.maxComputeUnits() >= 4 && | |
56 | * device.maxWorkGroupSize() >= 128 && | |
57 | * device.preferredVectorWidthFloat() >= 2; | |
58 | * }</pre> | |
59 | * | |
60 | * <p>Performance optimization using device information: | |
61 | * <ul> | |
62 | * <li><strong>Work group sizing</strong>: Configure kernel work groups based on {@link #maxWorkGroupSize()}</li> | |
63 | * <li><strong>Parallel dispatch</strong>: Scale parallelism based on {@link #maxComputeUnits()}</li> | |
64 | * <li><strong>Vector operations</strong>: Optimize data layouts for {@link #preferredVectorWidthFloat()}</li> | |
65 | * <li><strong>Memory access patterns</strong>: Design kernels considering memory hierarchy characteristics</li> | |
66 | * </ul> | |
67 | * | |
68 | * <p>Device capability assessment workflow: | |
69 | * <ol> | |
70 | * <li><strong>Device discovery</strong>: Enumerate devices from selected platforms</li> | |
71 | * <li><strong>Capability query</strong>: Read device properties from OpenCL runtime</li> | |
72 | * <li><strong>Model creation</strong>: Create device objects with discovered capabilities</li> | |
73 | * <li><strong>Filtering</strong>: Apply user-defined predicates to select suitable devices</li> | |
74 | * <li><strong>Context creation</strong>: Create OpenCL contexts for selected devices</li> | |
75 | * </ol> | |
76 | * | |
77 | * <p>Common device types in evolutionary computation: | |
78 | * <ul> | |
79 | * <li><strong>GPU devices</strong>: Provide massive parallelism for large population fitness evaluation</li> | |
80 | * <li><strong>CPU devices</strong>: Offer good sequential performance and large memory capacity</li> | |
81 | * <li><strong>Accelerator devices</strong>: Specialized hardware for specific computational patterns</li> | |
82 | * <li><strong>Custom devices</strong>: FPGA or other specialized compute devices</li> | |
83 | * </ul> | |
84 | * | |
85 | * <p>Error handling and compatibility: | |
86 | * <ul> | |
87 | * <li><strong>Device availability</strong>: Devices may become unavailable during execution</li> | |
88 | * <li><strong>Capability validation</strong>: Ensure device supports required kernel features</li> | |
89 | * <li><strong>Memory constraints</strong>: Validate device memory is sufficient for population size</li> | |
90 | * <li><strong>Work group limits</strong>: Ensure kernels respect device work group size limits</li> | |
91 | * </ul> | |
92 | * | |
93 | * @see Platform | |
94 | * @see DeviceType | |
95 | * @see net.bmahe.genetics4j.gpu.spec.GPUEAExecutionContext#deviceFilters() | |
96 | * @see net.bmahe.genetics4j.gpu.opencl.DeviceUtils | |
97 | */ | |
98 | @Value.Immutable | |
99 | public interface Device { | |
100 | ||
101 | /** | |
102 | * Returns the native OpenCL device identifier. | |
103 | * | |
104 | * @return the OpenCL device ID for low-level operations | |
105 | */ | |
106 | cl_device_id deviceId(); | |
107 | ||
108 | /** | |
109 | * Returns the device name provided by the vendor. | |
110 | * | |
111 | * @return the human-readable device name (e.g., "GeForce RTX 3080", "Intel Core i7") | |
112 | */ | |
113 | String name(); | |
114 | ||
115 | /** | |
116 | * Returns the device vendor name. | |
117 | * | |
118 | * @return the vendor name (e.g., "NVIDIA Corporation", "Intel", "AMD") | |
119 | */ | |
120 | String vendor(); | |
121 | ||
122 | /** | |
123 | * Returns the OpenCL version supported by this device. | |
124 | * | |
125 | * @return the device OpenCL version string (e.g., "OpenCL 2.1") | |
126 | */ | |
127 | String deviceVersion(); | |
128 | ||
129 | /** | |
130 | * Returns the device driver version. | |
131 | * | |
132 | * @return the driver version string provided by the vendor | |
133 | */ | |
134 | String driverVersion(); | |
135 | ||
136 | /** | |
137 | * Returns the maximum configured clock frequency of the device compute units in MHz. | |
138 | * | |
139 | * @return the maximum clock frequency in megahertz | |
140 | */ | |
141 | int maxClockFrequency(); | |
142 | ||
143 | /** | |
144 | * Returns the set of device types that classify this device. | |
145 | * | |
146 | * @return set of device types (e.g., GPU, CPU, ACCELERATOR) | |
147 | */ | |
148 | Set<DeviceType> deviceType(); | |
149 | ||
150 | /** | |
151 | * Returns the set of built-in kernel names available on this device. | |
152 | * | |
153 | * @return set of built-in kernel names provided by the device | |
154 | */ | |
155 | Set<String> builtInKernels(); | |
156 | ||
157 | /** | |
158 | * Returns the number of parallel compute units on the device. | |
159 | * | |
160 | * <p>Compute units represent the primary parallel processing elements and directly | |
161 | * impact the device's ability to execute work groups concurrently. | |
162 | * | |
163 | * @return the number of parallel compute units available | |
164 | */ | |
165 | int maxComputeUnits(); | |
166 | ||
167 | /** | |
168 | * Returns the maximum number of work-item dimensions supported by the device. | |
169 | * | |
170 | * @return the maximum number of dimensions for work-item indexing | |
171 | */ | |
172 | int maxWorkItemDimensions(); | |
173 | ||
174 | /** | |
175 | * Returns the maximum number of work-items in a work group for kernel execution. | |
176 | * | |
177 | * <p>This limit constrains the local work group size that can be used when | |
178 | * launching kernels on this device. Larger work groups can improve memory | |
179 | * locality and reduce synchronization overhead. | |
180 | * | |
181 | * @return the maximum work group size for kernel execution | |
182 | */ | |
183 | long maxWorkGroupSize(); | |
184 | ||
185 | /** | |
186 | * Returns the maximum number of work-items in each dimension of a work group. | |
187 | * | |
188 | * <p>The array contains the maximum work-item count for each dimension, | |
189 | * providing more granular control over work group configuration than | |
190 | * the overall {@link #maxWorkGroupSize()} limit. | |
191 | * | |
192 | * @return array of maximum work-item counts per dimension | |
193 | */ | |
194 | long[] maxWorkItemSizes(); | |
195 | ||
196 | /** | |
197 | * Returns whether the device supports image objects in kernels. | |
198 | * | |
199 | * @return true if the device supports image processing operations | |
200 | */ | |
201 | boolean imageSupport(); | |
202 | ||
203 | /** | |
204 | * Returns the preferred vector width for float operations. | |
205 | * | |
206 | * <p>This indicates the optimal vector width for floating-point operations | |
207 | * on this device, which can be used to optimize numerical computations | |
208 | * in fitness evaluation kernels. | |
209 | * | |
210 | * @return the preferred vector width for float operations | |
211 | */ | |
212 | int preferredVectorWidthFloat(); | |
213 | ||
214 | /** | |
215 | * Creates a new builder for constructing Device instances. | |
216 | * | |
217 | * @return a new builder for creating device objects | |
218 | */ | |
219 | static ImmutableDevice.Builder builder() { | |
220 |
2
1. builder : replaced return value with null for net/bmahe/genetics4j/gpu/opencl/model/Device::builder → NO_COVERAGE 2. builder : removed call to net/bmahe/genetics4j/gpu/opencl/model/ImmutableDevice::builder → NO_COVERAGE |
return ImmutableDevice.builder(); |
221 | } | |
222 | } | |
Mutations | ||
220 |
1.1 2.2 |