| 1 | package net.bmahe.genetics4j.gpu.opencl.model; | |
| 2 | ||
| 3 | import java.util.Set; | |
| 4 | ||
| 5 | import org.immutables.value.Value; | |
| 6 | import org.jocl.cl_device_id; | |
| 7 | ||
| 8 | /** | |
| 9 | * Represents an OpenCL compute device with its capabilities and characteristics for GPU-accelerated evolutionary | |
| 10 | * algorithms. | |
| 11 | * | |
| 12 | * <p>Device encapsulates the properties and capabilities of an OpenCL compute device (GPU, CPU, or accelerator) that | |
| 13 | * can be used for fitness evaluation in evolutionary algorithms. This information is essential for device selection, | |
| 14 | * kernel optimization, and workload configuration to achieve optimal performance. | |
| 15 | * | |
| 16 | * <p>Key device characteristics include: | |
| 17 | * <ul> | |
| 18 | * <li><strong>Device identification</strong>: Name, vendor, and version information</li> | |
| 19 | * <li><strong>Compute capabilities</strong>: Number of compute units and maximum work group sizes</li> | |
| 20 | * <li><strong>Memory hierarchy</strong>: Global, local, and constant memory sizes and characteristics</li> | |
| 21 | * <li><strong>Processing features</strong>: Vector width preferences, image support, and built-in kernels</li> | |
| 22 | * <li><strong>Performance metrics</strong>: Clock frequency and execution capabilities</li> | |
| 23 | * </ul> | |
| 24 | * | |
| 25 | * <p>Device selection considerations for evolutionary algorithms: | |
| 26 | * <ul> | |
| 27 | * <li><strong>Device type</strong>: GPU devices typically provide highest parallelism for large populations</li> | |
| 28 | * <li><strong>Compute units</strong>: More compute units allow better utilization of large populations</li> | |
| 29 | * <li><strong>Work group sizes</strong>: Must accommodate the parallelism patterns of fitness kernels</li> | |
| 30 | * <li><strong>Memory capacity</strong>: Must be sufficient for population data and intermediate results</li> | |
| 31 | * <li><strong>Vector operations</strong>: Vector width preferences can optimize numerical computations</li> | |
| 32 | * </ul> | |
| 33 | * | |
| 34 | * <p>Common device filtering patterns: | |
| 35 | * | |
| 36 | * <pre>{@code | |
| 37 | * // Select GPU devices with sufficient parallel processing capability | |
| 38 | * Predicate<Device> gpuFilter = device -> device.deviceType() | |
| 39 | * .contains(DeviceType.GPU) && device.maxComputeUnits() >= 8; | |
| 40 | * | |
| 41 | * // Select devices with large work group support for population processing | |
| 42 | * Predicate<Device> workGroupFilter = device -> device.maxWorkGroupSize() >= 256; | |
| 43 | * | |
| 44 | * // Select devices with high clock frequency for compute-intensive fitness | |
| 45 | * Predicate<Device> performanceFilter = device -> device.maxClockFrequency() >= 1000; // MHz | |
| 46 | * | |
| 47 | * // Select devices that support floating-point vector operations | |
| 48 | * Predicate<Device> vectorFilter = device -> device.preferredVectorWidthFloat() >= 4; | |
| 49 | * | |
| 50 | * // Comprehensive filter for evolutionary algorithm suitability | |
| 51 | * Predicate<Device> eaOptimizedFilter = device -> device.deviceType() | |
| 52 | * .contains(DeviceType.GPU) && device.maxComputeUnits() >= 4 && device.maxWorkGroupSize() >= 128 | |
| 53 | * && device.preferredVectorWidthFloat() >= 2; | |
| 54 | * }</pre> | |
| 55 | * | |
| 56 | * <p>Performance optimization using device information: | |
| 57 | * <ul> | |
| 58 | * <li><strong>Work group sizing</strong>: Configure kernel work groups based on {@link #maxWorkGroupSize()}</li> | |
| 59 | * <li><strong>Parallel dispatch</strong>: Scale parallelism based on {@link #maxComputeUnits()}</li> | |
| 60 | * <li><strong>Vector operations</strong>: Optimize data layouts for {@link #preferredVectorWidthFloat()}</li> | |
| 61 | * <li><strong>Memory access patterns</strong>: Design kernels considering memory hierarchy characteristics</li> | |
| 62 | * </ul> | |
| 63 | * | |
| 64 | * <p>Device capability assessment workflow: | |
| 65 | * <ol> | |
| 66 | * <li><strong>Device discovery</strong>: Enumerate devices from selected platforms</li> | |
| 67 | * <li><strong>Capability query</strong>: Read device properties from OpenCL runtime</li> | |
| 68 | * <li><strong>Model creation</strong>: Create device objects with discovered capabilities</li> | |
| 69 | * <li><strong>Filtering</strong>: Apply user-defined predicates to select suitable devices</li> | |
| 70 | * <li><strong>Context creation</strong>: Create OpenCL contexts for selected devices</li> | |
| 71 | * </ol> | |
| 72 | * | |
| 73 | * <p>Common device types in evolutionary computation: | |
| 74 | * <ul> | |
| 75 | * <li><strong>GPU devices</strong>: Provide massive parallelism for large population fitness evaluation</li> | |
| 76 | * <li><strong>CPU devices</strong>: Offer good sequential performance and large memory capacity</li> | |
| 77 | * <li><strong>Accelerator devices</strong>: Specialized hardware for specific computational patterns</li> | |
| 78 | * <li><strong>Custom devices</strong>: FPGA or other specialized compute devices</li> | |
| 79 | * </ul> | |
| 80 | * | |
| 81 | * <p>Error handling and compatibility: | |
| 82 | * <ul> | |
| 83 | * <li><strong>Device availability</strong>: Devices may become unavailable during execution</li> | |
| 84 | * <li><strong>Capability validation</strong>: Ensure device supports required kernel features</li> | |
| 85 | * <li><strong>Memory constraints</strong>: Validate device memory is sufficient for population size</li> | |
| 86 | * <li><strong>Work group limits</strong>: Ensure kernels respect device work group size limits</li> | |
| 87 | * </ul> | |
| 88 | * | |
| 89 | * @see Platform | |
| 90 | * @see DeviceType | |
| 91 | * @see net.bmahe.genetics4j.gpu.spec.GPUEAExecutionContext#deviceFilters() | |
| 92 | * @see net.bmahe.genetics4j.gpu.opencl.DeviceUtils | |
| 93 | */ | |
| 94 | @Value.Immutable | |
| 95 | public interface Device { | |
| 96 | ||
| 97 | /** | |
| 98 | * Returns the native OpenCL device identifier. | |
| 99 | * | |
| 100 | * @return the OpenCL device ID for low-level operations | |
| 101 | */ | |
| 102 | cl_device_id deviceId(); | |
| 103 | ||
| 104 | /** | |
| 105 | * Returns the device name provided by the vendor. | |
| 106 | * | |
| 107 | * @return the human-readable device name (e.g., "GeForce RTX 3080", "Intel Core i7") | |
| 108 | */ | |
| 109 | String name(); | |
| 110 | ||
| 111 | /** | |
| 112 | * Returns the device vendor name. | |
| 113 | * | |
| 114 | * @return the vendor name (e.g., "NVIDIA Corporation", "Intel", "AMD") | |
| 115 | */ | |
| 116 | String vendor(); | |
| 117 | ||
| 118 | /** | |
| 119 | * Returns the OpenCL version supported by this device. | |
| 120 | * | |
| 121 | * @return the device OpenCL version string (e.g., "OpenCL 2.1") | |
| 122 | */ | |
| 123 | String deviceVersion(); | |
| 124 | ||
| 125 | /** | |
| 126 | * Returns the device driver version. | |
| 127 | * | |
| 128 | * @return the driver version string provided by the vendor | |
| 129 | */ | |
| 130 | String driverVersion(); | |
| 131 | ||
| 132 | /** | |
| 133 | * Returns the maximum configured clock frequency of the device compute units in MHz. | |
| 134 | * | |
| 135 | * @return the maximum clock frequency in megahertz | |
| 136 | */ | |
| 137 | int maxClockFrequency(); | |
| 138 | ||
| 139 | /** | |
| 140 | * Returns the set of device types that classify this device. | |
| 141 | * | |
| 142 | * @return set of device types (e.g., GPU, CPU, ACCELERATOR) | |
| 143 | */ | |
| 144 | Set<DeviceType> deviceType(); | |
| 145 | ||
| 146 | /** | |
| 147 | * Returns the set of built-in kernel names available on this device. | |
| 148 | * | |
| 149 | * @return set of built-in kernel names provided by the device | |
| 150 | */ | |
| 151 | Set<String> builtInKernels(); | |
| 152 | ||
| 153 | /** | |
| 154 | * Returns the number of parallel compute units on the device. | |
| 155 | * | |
| 156 | * <p>Compute units represent the primary parallel processing elements and directly impact the device's ability to | |
| 157 | * execute work groups concurrently. | |
| 158 | * | |
| 159 | * @return the number of parallel compute units available | |
| 160 | */ | |
| 161 | int maxComputeUnits(); | |
| 162 | ||
| 163 | /** | |
| 164 | * Returns the maximum number of work-item dimensions supported by the device. | |
| 165 | * | |
| 166 | * @return the maximum number of dimensions for work-item indexing | |
| 167 | */ | |
| 168 | int maxWorkItemDimensions(); | |
| 169 | ||
| 170 | /** | |
| 171 | * Returns the maximum number of work-items in a work group for kernel execution. | |
| 172 | * | |
| 173 | * <p>This limit constrains the local work group size that can be used when launching kernels on this device. Larger | |
| 174 | * work groups can improve memory locality and reduce synchronization overhead. | |
| 175 | * | |
| 176 | * @return the maximum work group size for kernel execution | |
| 177 | */ | |
| 178 | long maxWorkGroupSize(); | |
| 179 | ||
| 180 | /** | |
| 181 | * Returns the maximum number of work-items in each dimension of a work group. | |
| 182 | * | |
| 183 | * <p>The array contains the maximum work-item count for each dimension, providing more granular control over work | |
| 184 | * group configuration than the overall {@link #maxWorkGroupSize()} limit. | |
| 185 | * | |
| 186 | * @return array of maximum work-item counts per dimension | |
| 187 | */ | |
| 188 | long[] maxWorkItemSizes(); | |
| 189 | ||
| 190 | /** | |
| 191 | * Returns whether the device supports image objects in kernels. | |
| 192 | * | |
| 193 | * @return true if the device supports image processing operations | |
| 194 | */ | |
| 195 | boolean imageSupport(); | |
| 196 | ||
| 197 | /** | |
| 198 | * Returns the preferred vector width for float operations. | |
| 199 | * | |
| 200 | * <p>This indicates the optimal vector width for floating-point operations on this device, which can be used to | |
| 201 | * optimize numerical computations in fitness evaluation kernels. | |
| 202 | * | |
| 203 | * @return the preferred vector width for float operations | |
| 204 | */ | |
| 205 | int preferredVectorWidthFloat(); | |
| 206 | ||
| 207 | /** | |
| 208 | * Creates a new builder for constructing Device instances. | |
| 209 | * | |
| 210 | * @return a new builder for creating device objects | |
| 211 | */ | |
| 212 | static ImmutableDevice.Builder builder() { | |
| 213 |
2
1. builder : replaced return value with null for net/bmahe/genetics4j/gpu/opencl/model/Device::builder → NO_COVERAGE 2. builder : removed call to net/bmahe/genetics4j/gpu/opencl/model/ImmutableDevice::builder → NO_COVERAGE |
return ImmutableDevice.builder(); |
| 214 | } | |
| 215 | } | |
Mutations | ||
| 213 |
1.1 2.2 |