Device.java

1
package net.bmahe.genetics4j.gpu.opencl.model;
2
3
import java.util.Set;
4
5
import org.immutables.value.Value;
6
import org.jocl.cl_device_id;
7
8
/**
9
 * Represents an OpenCL compute device with its capabilities and characteristics for GPU-accelerated evolutionary algorithms.
10
 * 
11
 * <p>Device encapsulates the properties and capabilities of an OpenCL compute device (GPU, CPU, or accelerator)
12
 * that can be used for fitness evaluation in evolutionary algorithms. This information is essential for
13
 * device selection, kernel optimization, and workload configuration to achieve optimal performance.
14
 * 
15
 * <p>Key device characteristics include:
16
 * <ul>
17
 * <li><strong>Device identification</strong>: Name, vendor, and version information</li>
18
 * <li><strong>Compute capabilities</strong>: Number of compute units and maximum work group sizes</li>
19
 * <li><strong>Memory hierarchy</strong>: Global, local, and constant memory sizes and characteristics</li>
20
 * <li><strong>Processing features</strong>: Vector width preferences, image support, and built-in kernels</li>
21
 * <li><strong>Performance metrics</strong>: Clock frequency and execution capabilities</li>
22
 * </ul>
23
 * 
24
 * <p>Device selection considerations for evolutionary algorithms:
25
 * <ul>
26
 * <li><strong>Device type</strong>: GPU devices typically provide highest parallelism for large populations</li>
27
 * <li><strong>Compute units</strong>: More compute units allow better utilization of large populations</li>
28
 * <li><strong>Work group sizes</strong>: Must accommodate the parallelism patterns of fitness kernels</li>
29
 * <li><strong>Memory capacity</strong>: Must be sufficient for population data and intermediate results</li>
30
 * <li><strong>Vector operations</strong>: Vector width preferences can optimize numerical computations</li>
31
 * </ul>
32
 * 
33
 * <p>Common device filtering patterns:
34
 * <pre>{@code
35
 * // Select GPU devices with sufficient parallel processing capability
36
 * Predicate<Device> gpuFilter = device -> 
37
 *     device.deviceType().contains(DeviceType.GPU) &&
38
 *     device.maxComputeUnits() >= 8;
39
 * 
40
 * // Select devices with large work group support for population processing
41
 * Predicate<Device> workGroupFilter = device ->
42
 *     device.maxWorkGroupSize() >= 256;
43
 * 
44
 * // Select devices with high clock frequency for compute-intensive fitness
45
 * Predicate<Device> performanceFilter = device ->
46
 *     device.maxClockFrequency() >= 1000; // MHz
47
 * 
48
 * // Select devices that support floating-point vector operations
49
 * Predicate<Device> vectorFilter = device ->
50
 *     device.preferredVectorWidthFloat() >= 4;
51
 * 
52
 * // Comprehensive filter for evolutionary algorithm suitability
53
 * Predicate<Device> eaOptimizedFilter = device ->
54
 *     device.deviceType().contains(DeviceType.GPU) &&
55
 *     device.maxComputeUnits() >= 4 &&
56
 *     device.maxWorkGroupSize() >= 128 &&
57
 *     device.preferredVectorWidthFloat() >= 2;
58
 * }</pre>
59
 * 
60
 * <p>Performance optimization using device information:
61
 * <ul>
62
 * <li><strong>Work group sizing</strong>: Configure kernel work groups based on {@link #maxWorkGroupSize()}</li>
63
 * <li><strong>Parallel dispatch</strong>: Scale parallelism based on {@link #maxComputeUnits()}</li>
64
 * <li><strong>Vector operations</strong>: Optimize data layouts for {@link #preferredVectorWidthFloat()}</li>
65
 * <li><strong>Memory access patterns</strong>: Design kernels considering memory hierarchy characteristics</li>
66
 * </ul>
67
 * 
68
 * <p>Device capability assessment workflow:
69
 * <ol>
70
 * <li><strong>Device discovery</strong>: Enumerate devices from selected platforms</li>
71
 * <li><strong>Capability query</strong>: Read device properties from OpenCL runtime</li>
72
 * <li><strong>Model creation</strong>: Create device objects with discovered capabilities</li>
73
 * <li><strong>Filtering</strong>: Apply user-defined predicates to select suitable devices</li>
74
 * <li><strong>Context creation</strong>: Create OpenCL contexts for selected devices</li>
75
 * </ol>
76
 * 
77
 * <p>Common device types in evolutionary computation:
78
 * <ul>
79
 * <li><strong>GPU devices</strong>: Provide massive parallelism for large population fitness evaluation</li>
80
 * <li><strong>CPU devices</strong>: Offer good sequential performance and large memory capacity</li>
81
 * <li><strong>Accelerator devices</strong>: Specialized hardware for specific computational patterns</li>
82
 * <li><strong>Custom devices</strong>: FPGA or other specialized compute devices</li>
83
 * </ul>
84
 * 
85
 * <p>Error handling and compatibility:
86
 * <ul>
87
 * <li><strong>Device availability</strong>: Devices may become unavailable during execution</li>
88
 * <li><strong>Capability validation</strong>: Ensure device supports required kernel features</li>
89
 * <li><strong>Memory constraints</strong>: Validate device memory is sufficient for population size</li>
90
 * <li><strong>Work group limits</strong>: Ensure kernels respect device work group size limits</li>
91
 * </ul>
92
 * 
93
 * @see Platform
94
 * @see DeviceType
95
 * @see net.bmahe.genetics4j.gpu.spec.GPUEAExecutionContext#deviceFilters()
96
 * @see net.bmahe.genetics4j.gpu.opencl.DeviceUtils
97
 */
98
@Value.Immutable
99
public interface Device {
100
101
	/**
102
	 * Returns the native OpenCL device identifier.
103
	 * 
104
	 * @return the OpenCL device ID for low-level operations
105
	 */
106
	cl_device_id deviceId();
107
108
	/**
109
	 * Returns the device name provided by the vendor.
110
	 * 
111
	 * @return the human-readable device name (e.g., "GeForce RTX 3080", "Intel Core i7")
112
	 */
113
	String name();
114
115
	/**
116
	 * Returns the device vendor name.
117
	 * 
118
	 * @return the vendor name (e.g., "NVIDIA Corporation", "Intel", "AMD")
119
	 */
120
	String vendor();
121
122
	/**
123
	 * Returns the OpenCL version supported by this device.
124
	 * 
125
	 * @return the device OpenCL version string (e.g., "OpenCL 2.1")
126
	 */
127
	String deviceVersion();
128
129
	/**
130
	 * Returns the device driver version.
131
	 * 
132
	 * @return the driver version string provided by the vendor
133
	 */
134
	String driverVersion();
135
136
	/**
137
	 * Returns the maximum configured clock frequency of the device compute units in MHz.
138
	 * 
139
	 * @return the maximum clock frequency in megahertz
140
	 */
141
	int maxClockFrequency();
142
143
	/**
144
	 * Returns the set of device types that classify this device.
145
	 * 
146
	 * @return set of device types (e.g., GPU, CPU, ACCELERATOR)
147
	 */
148
	Set<DeviceType> deviceType();
149
150
	/**
151
	 * Returns the set of built-in kernel names available on this device.
152
	 * 
153
	 * @return set of built-in kernel names provided by the device
154
	 */
155
	Set<String> builtInKernels();
156
157
	/**
158
	 * Returns the number of parallel compute units on the device.
159
	 * 
160
	 * <p>Compute units represent the primary parallel processing elements and directly
161
	 * impact the device's ability to execute work groups concurrently.
162
	 * 
163
	 * @return the number of parallel compute units available
164
	 */
165
	int maxComputeUnits();
166
167
	/**
168
	 * Returns the maximum number of work-item dimensions supported by the device.
169
	 * 
170
	 * @return the maximum number of dimensions for work-item indexing
171
	 */
172
	int maxWorkItemDimensions();
173
174
	/**
175
	 * Returns the maximum number of work-items in a work group for kernel execution.
176
	 * 
177
	 * <p>This limit constrains the local work group size that can be used when
178
	 * launching kernels on this device. Larger work groups can improve memory
179
	 * locality and reduce synchronization overhead.
180
	 * 
181
	 * @return the maximum work group size for kernel execution
182
	 */
183
	long maxWorkGroupSize();
184
185
	/**
186
	 * Returns the maximum number of work-items in each dimension of a work group.
187
	 * 
188
	 * <p>The array contains the maximum work-item count for each dimension,
189
	 * providing more granular control over work group configuration than
190
	 * the overall {@link #maxWorkGroupSize()} limit.
191
	 * 
192
	 * @return array of maximum work-item counts per dimension
193
	 */
194
	long[] maxWorkItemSizes();
195
196
	/**
197
	 * Returns whether the device supports image objects in kernels.
198
	 * 
199
	 * @return true if the device supports image processing operations
200
	 */
201
	boolean imageSupport();
202
203
	/**
204
	 * Returns the preferred vector width for float operations.
205
	 * 
206
	 * <p>This indicates the optimal vector width for floating-point operations
207
	 * on this device, which can be used to optimize numerical computations
208
	 * in fitness evaluation kernels.
209
	 * 
210
	 * @return the preferred vector width for float operations
211
	 */
212
	int preferredVectorWidthFloat();
213
214
	/**
215
	 * Creates a new builder for constructing Device instances.
216
	 * 
217
	 * @return a new builder for creating device objects
218
	 */
219
	static ImmutableDevice.Builder builder() {
220 2 1. builder : replaced return value with null for net/bmahe/genetics4j/gpu/opencl/model/Device::builder → NO_COVERAGE
2. builder : removed call to net/bmahe/genetics4j/gpu/opencl/model/ImmutableDevice::builder → NO_COVERAGE
		return ImmutableDevice.builder();
221
	}
222
}

Mutations

220

1.1
Location : builder
Killed by : none
replaced return value with null for net/bmahe/genetics4j/gpu/opencl/model/Device::builder → NO_COVERAGE

2.2
Location : builder
Killed by : none
removed call to net/bmahe/genetics4j/gpu/opencl/model/ImmutableDevice::builder → NO_COVERAGE

Active mutators

Tests examined


Report generated by PIT 1.19.6