1 package net.bmahe.genetics4j.gpu.opencl.model;
2
3 import java.util.Set;
4
5 import org.immutables.value.Value;
6 import org.jocl.cl_device_id;
7
8 /**
9 * Represents an OpenCL compute device with its capabilities and characteristics for GPU-accelerated evolutionary
10 * algorithms.
11 *
12 * <p>Device encapsulates the properties and capabilities of an OpenCL compute device (GPU, CPU, or accelerator) that
13 * can be used for fitness evaluation in evolutionary algorithms. This information is essential for device selection,
14 * kernel optimization, and workload configuration to achieve optimal performance.
15 *
16 * <p>Key device characteristics include:
17 * <ul>
18 * <li><strong>Device identification</strong>: Name, vendor, and version information</li>
19 * <li><strong>Compute capabilities</strong>: Number of compute units and maximum work group sizes</li>
20 * <li><strong>Memory hierarchy</strong>: Global, local, and constant memory sizes and characteristics</li>
21 * <li><strong>Processing features</strong>: Vector width preferences, image support, and built-in kernels</li>
22 * <li><strong>Performance metrics</strong>: Clock frequency and execution capabilities</li>
23 * </ul>
24 *
25 * <p>Device selection considerations for evolutionary algorithms:
26 * <ul>
27 * <li><strong>Device type</strong>: GPU devices typically provide highest parallelism for large populations</li>
28 * <li><strong>Compute units</strong>: More compute units allow better utilization of large populations</li>
29 * <li><strong>Work group sizes</strong>: Must accommodate the parallelism patterns of fitness kernels</li>
30 * <li><strong>Memory capacity</strong>: Must be sufficient for population data and intermediate results</li>
31 * <li><strong>Vector operations</strong>: Vector width preferences can optimize numerical computations</li>
32 * </ul>
33 *
34 * <p>Common device filtering patterns:
35 *
36 * <pre>{@code
37 * // Select GPU devices with sufficient parallel processing capability
38 * Predicate<Device> gpuFilter = device -> device.deviceType()
39 * .contains(DeviceType.GPU) && device.maxComputeUnits() >= 8;
40 *
41 * // Select devices with large work group support for population processing
42 * Predicate<Device> workGroupFilter = device -> device.maxWorkGroupSize() >= 256;
43 *
44 * // Select devices with high clock frequency for compute-intensive fitness
45 * Predicate<Device> performanceFilter = device -> device.maxClockFrequency() >= 1000; // MHz
46 *
47 * // Select devices that support floating-point vector operations
48 * Predicate<Device> vectorFilter = device -> device.preferredVectorWidthFloat() >= 4;
49 *
50 * // Comprehensive filter for evolutionary algorithm suitability
51 * Predicate<Device> eaOptimizedFilter = device -> device.deviceType()
52 * .contains(DeviceType.GPU) && device.maxComputeUnits() >= 4 && device.maxWorkGroupSize() >= 128
53 * && device.preferredVectorWidthFloat() >= 2;
54 * }</pre>
55 *
56 * <p>Performance optimization using device information:
57 * <ul>
58 * <li><strong>Work group sizing</strong>: Configure kernel work groups based on {@link #maxWorkGroupSize()}</li>
59 * <li><strong>Parallel dispatch</strong>: Scale parallelism based on {@link #maxComputeUnits()}</li>
60 * <li><strong>Vector operations</strong>: Optimize data layouts for {@link #preferredVectorWidthFloat()}</li>
61 * <li><strong>Memory access patterns</strong>: Design kernels considering memory hierarchy characteristics</li>
62 * </ul>
63 *
64 * <p>Device capability assessment workflow:
65 * <ol>
66 * <li><strong>Device discovery</strong>: Enumerate devices from selected platforms</li>
67 * <li><strong>Capability query</strong>: Read device properties from OpenCL runtime</li>
68 * <li><strong>Model creation</strong>: Create device objects with discovered capabilities</li>
69 * <li><strong>Filtering</strong>: Apply user-defined predicates to select suitable devices</li>
70 * <li><strong>Context creation</strong>: Create OpenCL contexts for selected devices</li>
71 * </ol>
72 *
73 * <p>Common device types in evolutionary computation:
74 * <ul>
75 * <li><strong>GPU devices</strong>: Provide massive parallelism for large population fitness evaluation</li>
76 * <li><strong>CPU devices</strong>: Offer good sequential performance and large memory capacity</li>
77 * <li><strong>Accelerator devices</strong>: Specialized hardware for specific computational patterns</li>
78 * <li><strong>Custom devices</strong>: FPGA or other specialized compute devices</li>
79 * </ul>
80 *
81 * <p>Error handling and compatibility:
82 * <ul>
83 * <li><strong>Device availability</strong>: Devices may become unavailable during execution</li>
84 * <li><strong>Capability validation</strong>: Ensure device supports required kernel features</li>
85 * <li><strong>Memory constraints</strong>: Validate device memory is sufficient for population size</li>
86 * <li><strong>Work group limits</strong>: Ensure kernels respect device work group size limits</li>
87 * </ul>
88 *
89 * @see Platform
90 * @see DeviceType
91 * @see net.bmahe.genetics4j.gpu.spec.GPUEAExecutionContext#deviceFilters()
92 * @see net.bmahe.genetics4j.gpu.opencl.DeviceUtils
93 */
94 @Value.Immutable
95 public interface Device {
96
97 /**
98 * Returns the native OpenCL device identifier.
99 *
100 * @return the OpenCL device ID for low-level operations
101 */
102 cl_device_id deviceId();
103
104 /**
105 * Returns the device name provided by the vendor.
106 *
107 * @return the human-readable device name (e.g., "GeForce RTX 3080", "Intel Core i7")
108 */
109 String name();
110
111 /**
112 * Returns the device vendor name.
113 *
114 * @return the vendor name (e.g., "NVIDIA Corporation", "Intel", "AMD")
115 */
116 String vendor();
117
118 /**
119 * Returns the OpenCL version supported by this device.
120 *
121 * @return the device OpenCL version string (e.g., "OpenCL 2.1")
122 */
123 String deviceVersion();
124
125 /**
126 * Returns the device driver version.
127 *
128 * @return the driver version string provided by the vendor
129 */
130 String driverVersion();
131
132 /**
133 * Returns the maximum configured clock frequency of the device compute units in MHz.
134 *
135 * @return the maximum clock frequency in megahertz
136 */
137 int maxClockFrequency();
138
139 /**
140 * Returns the set of device types that classify this device.
141 *
142 * @return set of device types (e.g., GPU, CPU, ACCELERATOR)
143 */
144 Set<DeviceType> deviceType();
145
146 /**
147 * Returns the set of built-in kernel names available on this device.
148 *
149 * @return set of built-in kernel names provided by the device
150 */
151 Set<String> builtInKernels();
152
153 /**
154 * Returns the number of parallel compute units on the device.
155 *
156 * <p>Compute units represent the primary parallel processing elements and directly impact the device's ability to
157 * execute work groups concurrently.
158 *
159 * @return the number of parallel compute units available
160 */
161 int maxComputeUnits();
162
163 /**
164 * Returns the maximum number of work-item dimensions supported by the device.
165 *
166 * @return the maximum number of dimensions for work-item indexing
167 */
168 int maxWorkItemDimensions();
169
170 /**
171 * Returns the maximum number of work-items in a work group for kernel execution.
172 *
173 * <p>This limit constrains the local work group size that can be used when launching kernels on this device. Larger
174 * work groups can improve memory locality and reduce synchronization overhead.
175 *
176 * @return the maximum work group size for kernel execution
177 */
178 long maxWorkGroupSize();
179
180 /**
181 * Returns the maximum number of work-items in each dimension of a work group.
182 *
183 * <p>The array contains the maximum work-item count for each dimension, providing more granular control over work
184 * group configuration than the overall {@link #maxWorkGroupSize()} limit.
185 *
186 * @return array of maximum work-item counts per dimension
187 */
188 long[] maxWorkItemSizes();
189
190 /**
191 * Returns whether the device supports image objects in kernels.
192 *
193 * @return true if the device supports image processing operations
194 */
195 boolean imageSupport();
196
197 /**
198 * Returns the preferred vector width for float operations.
199 *
200 * <p>This indicates the optimal vector width for floating-point operations on this device, which can be used to
201 * optimize numerical computations in fitness evaluation kernels.
202 *
203 * @return the preferred vector width for float operations
204 */
205 int preferredVectorWidthFloat();
206
207 /**
208 * Creates a new builder for constructing Device instances.
209 *
210 * @return a new builder for creating device objects
211 */
212 static ImmutableDevice.Builder builder() {
213 return ImmutableDevice.builder();
214 }
215 }