1 package net.bmahe.genetics4j.gpu.opencl;
2
3 import org.apache.commons.lang3.Validate;
4 import org.jocl.CL;
5 import org.jocl.Pointer;
6 import org.jocl.Sizeof;
7 import org.jocl.cl_device_id;
8 import org.jocl.cl_kernel;
9
10 /**
11 * Utility class providing convenient methods for querying OpenCL kernel work group information.
12 *
13 * <p>KernelInfoUtils encapsulates the low-level OpenCL API calls required for retrieving kernel-specific execution
14 * characteristics on target devices. This information is essential for optimizing kernel launch parameters and ensuring
15 * efficient resource utilization in GPU-accelerated evolutionary algorithms.
16 *
17 * <p>Key functionality includes:
18 * <ul>
19 * <li><strong>Work group queries</strong>: Retrieve kernel-specific work group size limits and preferences</li>
20 * <li><strong>Memory usage queries</strong>: Query local and private memory requirements per work-item</li>
21 * <li><strong>Performance optimization</strong>: Access preferred work group size multiples for optimal execution</li>
22 * <li><strong>Resource validation</strong>: Obtain kernel resource requirements for launch parameter validation</li>
23 * </ul>
24 *
25 * <p>Common usage patterns:
26 *
27 * <pre>{@code
28 * // Query kernel work group characteristics
29 * long maxWorkGroupSize = KernelInfoUtils.getKernelWorkGroupInfoLong(deviceId, kernel, CL.CL_KERNEL_WORK_GROUP_SIZE);
30 *
31 * long preferredMultiple = KernelInfoUtils
32 * .getKernelWorkGroupInfoLong(deviceId, kernel, CL.CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE);
33 *
34 * // Query memory requirements
35 * long localMemSize = KernelInfoUtils.getKernelWorkGroupInfoLong(deviceId, kernel, CL.CL_KERNEL_LOCAL_MEM_SIZE);
36 *
37 * long privateMemSize = KernelInfoUtils.getKernelWorkGroupInfoLong(deviceId, kernel, CL.CL_KERNEL_PRIVATE_MEM_SIZE);
38 *
39 * // Optimize work group size based on kernel characteristics
40 * long optimalWorkGroupSize = (maxWorkGroupSize / preferredMultiple) * preferredMultiple;
41 * }</pre>
42 *
43 * <p>Kernel optimization workflow:
44 * <ol>
45 * <li><strong>Kernel compilation</strong>: Compile kernel for target device</li>
46 * <li><strong>Characteristic query</strong>: Retrieve kernel-specific execution parameters</li>
47 * <li><strong>Launch optimization</strong>: Configure work group sizes based on kernel requirements</li>
48 * <li><strong>Resource validation</strong>: Ensure memory requirements don't exceed device limits</li>
49 * </ol>
50 *
51 * <p>Error handling:
52 * <ul>
53 * <li><strong>Parameter validation</strong>: Validates all input parameters</li>
54 * <li><strong>OpenCL error propagation</strong>: OpenCL errors are propagated as runtime exceptions</li>
55 * <li><strong>Memory management</strong>: Automatically handles buffer allocation and cleanup</li>
56 * </ul>
57 *
58 * @see KernelInfo
59 * @see KernelInfoReader
60 * @see net.bmahe.genetics4j.gpu.opencl.model.Device
61 */
62 public class KernelInfoUtils {
63
64 private KernelInfoUtils() {
65
66 }
67
68 /**
69 * Queries and returns a long value for kernel work group information on the specified device.
70 *
71 * <p>This method retrieves kernel-specific execution characteristics that vary by device, such as maximum work group
72 * size, preferred work group size multiples, and memory usage requirements. This information is essential for
73 * optimizing kernel launch parameters.
74 *
75 * @param deviceId the OpenCL device to query
76 * @param kernel the compiled OpenCL kernel
77 * @param parameter the OpenCL parameter constant (e.g., CL_KERNEL_WORK_GROUP_SIZE, CL_KERNEL_LOCAL_MEM_SIZE)
78 * @return the long value of the requested kernel work group property
79 * @throws IllegalArgumentException if deviceId or kernel is null
80 */
81 public static long getKernelWorkGroupInfoLong(final cl_device_id deviceId, final cl_kernel kernel,
82 final int parameter) {
83 Validate.notNull(deviceId);
84 Validate.notNull(kernel);
85
86 final long[] values = new long[1];
87 CL.clGetKernelWorkGroupInfo(kernel, deviceId, parameter, Sizeof.cl_long, Pointer.to(values), null);
88
89 return values[0];
90 }
91 }