View Javadoc
1   package net.bmahe.genetics4j.gpu.opencl.model;
2   
3   import java.util.Set;
4   
5   import org.immutables.value.Value;
6   import org.jocl.cl_device_id;
7   
8   /**
9    * Represents an OpenCL compute device with its capabilities and characteristics for GPU-accelerated evolutionary
10   * algorithms.
11   * 
12   * <p>Device encapsulates the properties and capabilities of an OpenCL compute device (GPU, CPU, or accelerator) that
13   * can be used for fitness evaluation in evolutionary algorithms. This information is essential for device selection,
14   * kernel optimization, and workload configuration to achieve optimal performance.
15   * 
16   * <p>Key device characteristics include:
17   * <ul>
18   * <li><strong>Device identification</strong>: Name, vendor, and version information</li>
19   * <li><strong>Compute capabilities</strong>: Number of compute units and maximum work group sizes</li>
20   * <li><strong>Memory hierarchy</strong>: Global, local, and constant memory sizes and characteristics</li>
21   * <li><strong>Processing features</strong>: Vector width preferences, image support, and built-in kernels</li>
22   * <li><strong>Performance metrics</strong>: Clock frequency and execution capabilities</li>
23   * </ul>
24   * 
25   * <p>Device selection considerations for evolutionary algorithms:
26   * <ul>
27   * <li><strong>Device type</strong>: GPU devices typically provide highest parallelism for large populations</li>
28   * <li><strong>Compute units</strong>: More compute units allow better utilization of large populations</li>
29   * <li><strong>Work group sizes</strong>: Must accommodate the parallelism patterns of fitness kernels</li>
30   * <li><strong>Memory capacity</strong>: Must be sufficient for population data and intermediate results</li>
31   * <li><strong>Vector operations</strong>: Vector width preferences can optimize numerical computations</li>
32   * </ul>
33   * 
34   * <p>Common device filtering patterns:
35   * 
36   * <pre>{@code
37   * // Select GPU devices with sufficient parallel processing capability
38   * Predicate<Device> gpuFilter = device -> device.deviceType()
39   * 		.contains(DeviceType.GPU) && device.maxComputeUnits() >= 8;
40   * 
41   * // Select devices with large work group support for population processing
42   * Predicate<Device> workGroupFilter = device -> device.maxWorkGroupSize() >= 256;
43   * 
44   * // Select devices with high clock frequency for compute-intensive fitness
45   * Predicate<Device> performanceFilter = device -> device.maxClockFrequency() >= 1000; // MHz
46   * 
47   * // Select devices that support floating-point vector operations
48   * Predicate<Device> vectorFilter = device -> device.preferredVectorWidthFloat() >= 4;
49   * 
50   * // Comprehensive filter for evolutionary algorithm suitability
51   * Predicate<Device> eaOptimizedFilter = device -> device.deviceType()
52   * 		.contains(DeviceType.GPU) && device.maxComputeUnits() >= 4 && device.maxWorkGroupSize() >= 128
53   * 		&& device.preferredVectorWidthFloat() >= 2;
54   * }</pre>
55   * 
56   * <p>Performance optimization using device information:
57   * <ul>
58   * <li><strong>Work group sizing</strong>: Configure kernel work groups based on {@link #maxWorkGroupSize()}</li>
59   * <li><strong>Parallel dispatch</strong>: Scale parallelism based on {@link #maxComputeUnits()}</li>
60   * <li><strong>Vector operations</strong>: Optimize data layouts for {@link #preferredVectorWidthFloat()}</li>
61   * <li><strong>Memory access patterns</strong>: Design kernels considering memory hierarchy characteristics</li>
62   * </ul>
63   * 
64   * <p>Device capability assessment workflow:
65   * <ol>
66   * <li><strong>Device discovery</strong>: Enumerate devices from selected platforms</li>
67   * <li><strong>Capability query</strong>: Read device properties from OpenCL runtime</li>
68   * <li><strong>Model creation</strong>: Create device objects with discovered capabilities</li>
69   * <li><strong>Filtering</strong>: Apply user-defined predicates to select suitable devices</li>
70   * <li><strong>Context creation</strong>: Create OpenCL contexts for selected devices</li>
71   * </ol>
72   * 
73   * <p>Common device types in evolutionary computation:
74   * <ul>
75   * <li><strong>GPU devices</strong>: Provide massive parallelism for large population fitness evaluation</li>
76   * <li><strong>CPU devices</strong>: Offer good sequential performance and large memory capacity</li>
77   * <li><strong>Accelerator devices</strong>: Specialized hardware for specific computational patterns</li>
78   * <li><strong>Custom devices</strong>: FPGA or other specialized compute devices</li>
79   * </ul>
80   * 
81   * <p>Error handling and compatibility:
82   * <ul>
83   * <li><strong>Device availability</strong>: Devices may become unavailable during execution</li>
84   * <li><strong>Capability validation</strong>: Ensure device supports required kernel features</li>
85   * <li><strong>Memory constraints</strong>: Validate device memory is sufficient for population size</li>
86   * <li><strong>Work group limits</strong>: Ensure kernels respect device work group size limits</li>
87   * </ul>
88   * 
89   * @see Platform
90   * @see DeviceType
91   * @see net.bmahe.genetics4j.gpu.spec.GPUEAExecutionContext#deviceFilters()
92   * @see net.bmahe.genetics4j.gpu.opencl.DeviceUtils
93   */
94  @Value.Immutable
95  public interface Device {
96  
97  	/**
98  	 * Returns the native OpenCL device identifier.
99  	 * 
100 	 * @return the OpenCL device ID for low-level operations
101 	 */
102 	cl_device_id deviceId();
103 
104 	/**
105 	 * Returns the device name provided by the vendor.
106 	 * 
107 	 * @return the human-readable device name (e.g., "GeForce RTX 3080", "Intel Core i7")
108 	 */
109 	String name();
110 
111 	/**
112 	 * Returns the device vendor name.
113 	 * 
114 	 * @return the vendor name (e.g., "NVIDIA Corporation", "Intel", "AMD")
115 	 */
116 	String vendor();
117 
118 	/**
119 	 * Returns the OpenCL version supported by this device.
120 	 * 
121 	 * @return the device OpenCL version string (e.g., "OpenCL 2.1")
122 	 */
123 	String deviceVersion();
124 
125 	/**
126 	 * Returns the device driver version.
127 	 * 
128 	 * @return the driver version string provided by the vendor
129 	 */
130 	String driverVersion();
131 
132 	/**
133 	 * Returns the maximum configured clock frequency of the device compute units in MHz.
134 	 * 
135 	 * @return the maximum clock frequency in megahertz
136 	 */
137 	int maxClockFrequency();
138 
139 	/**
140 	 * Returns the set of device types that classify this device.
141 	 * 
142 	 * @return set of device types (e.g., GPU, CPU, ACCELERATOR)
143 	 */
144 	Set<DeviceType> deviceType();
145 
146 	/**
147 	 * Returns the set of built-in kernel names available on this device.
148 	 * 
149 	 * @return set of built-in kernel names provided by the device
150 	 */
151 	Set<String> builtInKernels();
152 
153 	/**
154 	 * Returns the number of parallel compute units on the device.
155 	 * 
156 	 * <p>Compute units represent the primary parallel processing elements and directly impact the device's ability to
157 	 * execute work groups concurrently.
158 	 * 
159 	 * @return the number of parallel compute units available
160 	 */
161 	int maxComputeUnits();
162 
163 	/**
164 	 * Returns the maximum number of work-item dimensions supported by the device.
165 	 * 
166 	 * @return the maximum number of dimensions for work-item indexing
167 	 */
168 	int maxWorkItemDimensions();
169 
170 	/**
171 	 * Returns the maximum number of work-items in a work group for kernel execution.
172 	 * 
173 	 * <p>This limit constrains the local work group size that can be used when launching kernels on this device. Larger
174 	 * work groups can improve memory locality and reduce synchronization overhead.
175 	 * 
176 	 * @return the maximum work group size for kernel execution
177 	 */
178 	long maxWorkGroupSize();
179 
180 	/**
181 	 * Returns the maximum number of work-items in each dimension of a work group.
182 	 * 
183 	 * <p>The array contains the maximum work-item count for each dimension, providing more granular control over work
184 	 * group configuration than the overall {@link #maxWorkGroupSize()} limit.
185 	 * 
186 	 * @return array of maximum work-item counts per dimension
187 	 */
188 	long[] maxWorkItemSizes();
189 
190 	/**
191 	 * Returns whether the device supports image objects in kernels.
192 	 * 
193 	 * @return true if the device supports image processing operations
194 	 */
195 	boolean imageSupport();
196 
197 	/**
198 	 * Returns the preferred vector width for float operations.
199 	 * 
200 	 * <p>This indicates the optimal vector width for floating-point operations on this device, which can be used to
201 	 * optimize numerical computations in fitness evaluation kernels.
202 	 * 
203 	 * @return the preferred vector width for float operations
204 	 */
205 	int preferredVectorWidthFloat();
206 
207 	/**
208 	 * Creates a new builder for constructing Device instances.
209 	 * 
210 	 * @return a new builder for creating device objects
211 	 */
212 	static ImmutableDevice.Builder builder() {
213 		return ImmutableDevice.builder();
214 	}
215 }