View Javadoc
1   package net.bmahe.genetics4j.gpu.opencl.model;
2   
3   import java.util.Set;
4   
5   import org.immutables.value.Value;
6   import org.jocl.cl_device_id;
7   
8   /**
9    * Represents an OpenCL compute device with its capabilities and characteristics for GPU-accelerated evolutionary algorithms.
10   * 
11   * <p>Device encapsulates the properties and capabilities of an OpenCL compute device (GPU, CPU, or accelerator)
12   * that can be used for fitness evaluation in evolutionary algorithms. This information is essential for
13   * device selection, kernel optimization, and workload configuration to achieve optimal performance.
14   * 
15   * <p>Key device characteristics include:
16   * <ul>
17   * <li><strong>Device identification</strong>: Name, vendor, and version information</li>
18   * <li><strong>Compute capabilities</strong>: Number of compute units and maximum work group sizes</li>
19   * <li><strong>Memory hierarchy</strong>: Global, local, and constant memory sizes and characteristics</li>
20   * <li><strong>Processing features</strong>: Vector width preferences, image support, and built-in kernels</li>
21   * <li><strong>Performance metrics</strong>: Clock frequency and execution capabilities</li>
22   * </ul>
23   * 
24   * <p>Device selection considerations for evolutionary algorithms:
25   * <ul>
26   * <li><strong>Device type</strong>: GPU devices typically provide highest parallelism for large populations</li>
27   * <li><strong>Compute units</strong>: More compute units allow better utilization of large populations</li>
28   * <li><strong>Work group sizes</strong>: Must accommodate the parallelism patterns of fitness kernels</li>
29   * <li><strong>Memory capacity</strong>: Must be sufficient for population data and intermediate results</li>
30   * <li><strong>Vector operations</strong>: Vector width preferences can optimize numerical computations</li>
31   * </ul>
32   * 
33   * <p>Common device filtering patterns:
34   * <pre>{@code
35   * // Select GPU devices with sufficient parallel processing capability
36   * Predicate<Device> gpuFilter = device -> 
37   *     device.deviceType().contains(DeviceType.GPU) &&
38   *     device.maxComputeUnits() >= 8;
39   * 
40   * // Select devices with large work group support for population processing
41   * Predicate<Device> workGroupFilter = device ->
42   *     device.maxWorkGroupSize() >= 256;
43   * 
44   * // Select devices with high clock frequency for compute-intensive fitness
45   * Predicate<Device> performanceFilter = device ->
46   *     device.maxClockFrequency() >= 1000; // MHz
47   * 
48   * // Select devices that support floating-point vector operations
49   * Predicate<Device> vectorFilter = device ->
50   *     device.preferredVectorWidthFloat() >= 4;
51   * 
52   * // Comprehensive filter for evolutionary algorithm suitability
53   * Predicate<Device> eaOptimizedFilter = device ->
54   *     device.deviceType().contains(DeviceType.GPU) &&
55   *     device.maxComputeUnits() >= 4 &&
56   *     device.maxWorkGroupSize() >= 128 &&
57   *     device.preferredVectorWidthFloat() >= 2;
58   * }</pre>
59   * 
60   * <p>Performance optimization using device information:
61   * <ul>
62   * <li><strong>Work group sizing</strong>: Configure kernel work groups based on {@link #maxWorkGroupSize()}</li>
63   * <li><strong>Parallel dispatch</strong>: Scale parallelism based on {@link #maxComputeUnits()}</li>
64   * <li><strong>Vector operations</strong>: Optimize data layouts for {@link #preferredVectorWidthFloat()}</li>
65   * <li><strong>Memory access patterns</strong>: Design kernels considering memory hierarchy characteristics</li>
66   * </ul>
67   * 
68   * <p>Device capability assessment workflow:
69   * <ol>
70   * <li><strong>Device discovery</strong>: Enumerate devices from selected platforms</li>
71   * <li><strong>Capability query</strong>: Read device properties from OpenCL runtime</li>
72   * <li><strong>Model creation</strong>: Create device objects with discovered capabilities</li>
73   * <li><strong>Filtering</strong>: Apply user-defined predicates to select suitable devices</li>
74   * <li><strong>Context creation</strong>: Create OpenCL contexts for selected devices</li>
75   * </ol>
76   * 
77   * <p>Common device types in evolutionary computation:
78   * <ul>
79   * <li><strong>GPU devices</strong>: Provide massive parallelism for large population fitness evaluation</li>
80   * <li><strong>CPU devices</strong>: Offer good sequential performance and large memory capacity</li>
81   * <li><strong>Accelerator devices</strong>: Specialized hardware for specific computational patterns</li>
82   * <li><strong>Custom devices</strong>: FPGA or other specialized compute devices</li>
83   * </ul>
84   * 
85   * <p>Error handling and compatibility:
86   * <ul>
87   * <li><strong>Device availability</strong>: Devices may become unavailable during execution</li>
88   * <li><strong>Capability validation</strong>: Ensure device supports required kernel features</li>
89   * <li><strong>Memory constraints</strong>: Validate device memory is sufficient for population size</li>
90   * <li><strong>Work group limits</strong>: Ensure kernels respect device work group size limits</li>
91   * </ul>
92   * 
93   * @see Platform
94   * @see DeviceType
95   * @see net.bmahe.genetics4j.gpu.spec.GPUEAExecutionContext#deviceFilters()
96   * @see net.bmahe.genetics4j.gpu.opencl.DeviceUtils
97   */
98  @Value.Immutable
99  public interface Device {
100 
101 	/**
102 	 * Returns the native OpenCL device identifier.
103 	 * 
104 	 * @return the OpenCL device ID for low-level operations
105 	 */
106 	cl_device_id deviceId();
107 
108 	/**
109 	 * Returns the device name provided by the vendor.
110 	 * 
111 	 * @return the human-readable device name (e.g., "GeForce RTX 3080", "Intel Core i7")
112 	 */
113 	String name();
114 
115 	/**
116 	 * Returns the device vendor name.
117 	 * 
118 	 * @return the vendor name (e.g., "NVIDIA Corporation", "Intel", "AMD")
119 	 */
120 	String vendor();
121 
122 	/**
123 	 * Returns the OpenCL version supported by this device.
124 	 * 
125 	 * @return the device OpenCL version string (e.g., "OpenCL 2.1")
126 	 */
127 	String deviceVersion();
128 
129 	/**
130 	 * Returns the device driver version.
131 	 * 
132 	 * @return the driver version string provided by the vendor
133 	 */
134 	String driverVersion();
135 
136 	/**
137 	 * Returns the maximum configured clock frequency of the device compute units in MHz.
138 	 * 
139 	 * @return the maximum clock frequency in megahertz
140 	 */
141 	int maxClockFrequency();
142 
143 	/**
144 	 * Returns the set of device types that classify this device.
145 	 * 
146 	 * @return set of device types (e.g., GPU, CPU, ACCELERATOR)
147 	 */
148 	Set<DeviceType> deviceType();
149 
150 	/**
151 	 * Returns the set of built-in kernel names available on this device.
152 	 * 
153 	 * @return set of built-in kernel names provided by the device
154 	 */
155 	Set<String> builtInKernels();
156 
157 	/**
158 	 * Returns the number of parallel compute units on the device.
159 	 * 
160 	 * <p>Compute units represent the primary parallel processing elements and directly
161 	 * impact the device's ability to execute work groups concurrently.
162 	 * 
163 	 * @return the number of parallel compute units available
164 	 */
165 	int maxComputeUnits();
166 
167 	/**
168 	 * Returns the maximum number of work-item dimensions supported by the device.
169 	 * 
170 	 * @return the maximum number of dimensions for work-item indexing
171 	 */
172 	int maxWorkItemDimensions();
173 
174 	/**
175 	 * Returns the maximum number of work-items in a work group for kernel execution.
176 	 * 
177 	 * <p>This limit constrains the local work group size that can be used when
178 	 * launching kernels on this device. Larger work groups can improve memory
179 	 * locality and reduce synchronization overhead.
180 	 * 
181 	 * @return the maximum work group size for kernel execution
182 	 */
183 	long maxWorkGroupSize();
184 
185 	/**
186 	 * Returns the maximum number of work-items in each dimension of a work group.
187 	 * 
188 	 * <p>The array contains the maximum work-item count for each dimension,
189 	 * providing more granular control over work group configuration than
190 	 * the overall {@link #maxWorkGroupSize()} limit.
191 	 * 
192 	 * @return array of maximum work-item counts per dimension
193 	 */
194 	long[] maxWorkItemSizes();
195 
196 	/**
197 	 * Returns whether the device supports image objects in kernels.
198 	 * 
199 	 * @return true if the device supports image processing operations
200 	 */
201 	boolean imageSupport();
202 
203 	/**
204 	 * Returns the preferred vector width for float operations.
205 	 * 
206 	 * <p>This indicates the optimal vector width for floating-point operations
207 	 * on this device, which can be used to optimize numerical computations
208 	 * in fitness evaluation kernels.
209 	 * 
210 	 * @return the preferred vector width for float operations
211 	 */
212 	int preferredVectorWidthFloat();
213 
214 	/**
215 	 * Creates a new builder for constructing Device instances.
216 	 * 
217 	 * @return a new builder for creating device objects
218 	 */
219 	static ImmutableDevice.Builder builder() {
220 		return ImmutableDevice.builder();
221 	}
222 }