1 package net.bmahe.genetics4j.gpu.opencl.model; 2 3 import java.util.Set; 4 5 import org.immutables.value.Value; 6 import org.jocl.cl_device_id; 7 8 /** 9 * Represents an OpenCL compute device with its capabilities and characteristics for GPU-accelerated evolutionary 10 * algorithms. 11 * 12 * <p>Device encapsulates the properties and capabilities of an OpenCL compute device (GPU, CPU, or accelerator) that 13 * can be used for fitness evaluation in evolutionary algorithms. This information is essential for device selection, 14 * kernel optimization, and workload configuration to achieve optimal performance. 15 * 16 * <p>Key device characteristics include: 17 * <ul> 18 * <li><strong>Device identification</strong>: Name, vendor, and version information</li> 19 * <li><strong>Compute capabilities</strong>: Number of compute units and maximum work group sizes</li> 20 * <li><strong>Memory hierarchy</strong>: Global, local, and constant memory sizes and characteristics</li> 21 * <li><strong>Processing features</strong>: Vector width preferences, image support, and built-in kernels</li> 22 * <li><strong>Performance metrics</strong>: Clock frequency and execution capabilities</li> 23 * </ul> 24 * 25 * <p>Device selection considerations for evolutionary algorithms: 26 * <ul> 27 * <li><strong>Device type</strong>: GPU devices typically provide highest parallelism for large populations</li> 28 * <li><strong>Compute units</strong>: More compute units allow better utilization of large populations</li> 29 * <li><strong>Work group sizes</strong>: Must accommodate the parallelism patterns of fitness kernels</li> 30 * <li><strong>Memory capacity</strong>: Must be sufficient for population data and intermediate results</li> 31 * <li><strong>Vector operations</strong>: Vector width preferences can optimize numerical computations</li> 32 * </ul> 33 * 34 * <p>Common device filtering patterns: 35 * 36 * <pre>{@code 37 * // Select GPU devices with sufficient parallel processing capability 38 * Predicate<Device> gpuFilter = device -> device.deviceType() 39 * .contains(DeviceType.GPU) && device.maxComputeUnits() >= 8; 40 * 41 * // Select devices with large work group support for population processing 42 * Predicate<Device> workGroupFilter = device -> device.maxWorkGroupSize() >= 256; 43 * 44 * // Select devices with high clock frequency for compute-intensive fitness 45 * Predicate<Device> performanceFilter = device -> device.maxClockFrequency() >= 1000; // MHz 46 * 47 * // Select devices that support floating-point vector operations 48 * Predicate<Device> vectorFilter = device -> device.preferredVectorWidthFloat() >= 4; 49 * 50 * // Comprehensive filter for evolutionary algorithm suitability 51 * Predicate<Device> eaOptimizedFilter = device -> device.deviceType() 52 * .contains(DeviceType.GPU) && device.maxComputeUnits() >= 4 && device.maxWorkGroupSize() >= 128 53 * && device.preferredVectorWidthFloat() >= 2; 54 * }</pre> 55 * 56 * <p>Performance optimization using device information: 57 * <ul> 58 * <li><strong>Work group sizing</strong>: Configure kernel work groups based on {@link #maxWorkGroupSize()}</li> 59 * <li><strong>Parallel dispatch</strong>: Scale parallelism based on {@link #maxComputeUnits()}</li> 60 * <li><strong>Vector operations</strong>: Optimize data layouts for {@link #preferredVectorWidthFloat()}</li> 61 * <li><strong>Memory access patterns</strong>: Design kernels considering memory hierarchy characteristics</li> 62 * </ul> 63 * 64 * <p>Device capability assessment workflow: 65 * <ol> 66 * <li><strong>Device discovery</strong>: Enumerate devices from selected platforms</li> 67 * <li><strong>Capability query</strong>: Read device properties from OpenCL runtime</li> 68 * <li><strong>Model creation</strong>: Create device objects with discovered capabilities</li> 69 * <li><strong>Filtering</strong>: Apply user-defined predicates to select suitable devices</li> 70 * <li><strong>Context creation</strong>: Create OpenCL contexts for selected devices</li> 71 * </ol> 72 * 73 * <p>Common device types in evolutionary computation: 74 * <ul> 75 * <li><strong>GPU devices</strong>: Provide massive parallelism for large population fitness evaluation</li> 76 * <li><strong>CPU devices</strong>: Offer good sequential performance and large memory capacity</li> 77 * <li><strong>Accelerator devices</strong>: Specialized hardware for specific computational patterns</li> 78 * <li><strong>Custom devices</strong>: FPGA or other specialized compute devices</li> 79 * </ul> 80 * 81 * <p>Error handling and compatibility: 82 * <ul> 83 * <li><strong>Device availability</strong>: Devices may become unavailable during execution</li> 84 * <li><strong>Capability validation</strong>: Ensure device supports required kernel features</li> 85 * <li><strong>Memory constraints</strong>: Validate device memory is sufficient for population size</li> 86 * <li><strong>Work group limits</strong>: Ensure kernels respect device work group size limits</li> 87 * </ul> 88 * 89 * @see Platform 90 * @see DeviceType 91 * @see net.bmahe.genetics4j.gpu.spec.GPUEAExecutionContext#deviceFilters() 92 * @see net.bmahe.genetics4j.gpu.opencl.DeviceUtils 93 */ 94 @Value.Immutable 95 public interface Device { 96 97 /** 98 * Returns the native OpenCL device identifier. 99 * 100 * @return the OpenCL device ID for low-level operations 101 */ 102 cl_device_id deviceId(); 103 104 /** 105 * Returns the device name provided by the vendor. 106 * 107 * @return the human-readable device name (e.g., "GeForce RTX 3080", "Intel Core i7") 108 */ 109 String name(); 110 111 /** 112 * Returns the device vendor name. 113 * 114 * @return the vendor name (e.g., "NVIDIA Corporation", "Intel", "AMD") 115 */ 116 String vendor(); 117 118 /** 119 * Returns the OpenCL version supported by this device. 120 * 121 * @return the device OpenCL version string (e.g., "OpenCL 2.1") 122 */ 123 String deviceVersion(); 124 125 /** 126 * Returns the device driver version. 127 * 128 * @return the driver version string provided by the vendor 129 */ 130 String driverVersion(); 131 132 /** 133 * Returns the maximum configured clock frequency of the device compute units in MHz. 134 * 135 * @return the maximum clock frequency in megahertz 136 */ 137 int maxClockFrequency(); 138 139 /** 140 * Returns the set of device types that classify this device. 141 * 142 * @return set of device types (e.g., GPU, CPU, ACCELERATOR) 143 */ 144 Set<DeviceType> deviceType(); 145 146 /** 147 * Returns the set of built-in kernel names available on this device. 148 * 149 * @return set of built-in kernel names provided by the device 150 */ 151 Set<String> builtInKernels(); 152 153 /** 154 * Returns the number of parallel compute units on the device. 155 * 156 * <p>Compute units represent the primary parallel processing elements and directly impact the device's ability to 157 * execute work groups concurrently. 158 * 159 * @return the number of parallel compute units available 160 */ 161 int maxComputeUnits(); 162 163 /** 164 * Returns the maximum number of work-item dimensions supported by the device. 165 * 166 * @return the maximum number of dimensions for work-item indexing 167 */ 168 int maxWorkItemDimensions(); 169 170 /** 171 * Returns the maximum number of work-items in a work group for kernel execution. 172 * 173 * <p>This limit constrains the local work group size that can be used when launching kernels on this device. Larger 174 * work groups can improve memory locality and reduce synchronization overhead. 175 * 176 * @return the maximum work group size for kernel execution 177 */ 178 long maxWorkGroupSize(); 179 180 /** 181 * Returns the maximum number of work-items in each dimension of a work group. 182 * 183 * <p>The array contains the maximum work-item count for each dimension, providing more granular control over work 184 * group configuration than the overall {@link #maxWorkGroupSize()} limit. 185 * 186 * @return array of maximum work-item counts per dimension 187 */ 188 long[] maxWorkItemSizes(); 189 190 /** 191 * Returns whether the device supports image objects in kernels. 192 * 193 * @return true if the device supports image processing operations 194 */ 195 boolean imageSupport(); 196 197 /** 198 * Returns the preferred vector width for float operations. 199 * 200 * <p>This indicates the optimal vector width for floating-point operations on this device, which can be used to 201 * optimize numerical computations in fitness evaluation kernels. 202 * 203 * @return the preferred vector width for float operations 204 */ 205 int preferredVectorWidthFloat(); 206 207 /** 208 * Creates a new builder for constructing Device instances. 209 * 210 * @return a new builder for creating device objects 211 */ 212 static ImmutableDevice.Builder builder() { 213 return ImmutableDevice.builder(); 214 } 215 }