1 package net.bmahe.genetics4j.gpu.opencl; 2 3 import org.apache.commons.lang3.Validate; 4 import org.jocl.CL; 5 import org.jocl.Pointer; 6 import org.jocl.Sizeof; 7 import org.jocl.cl_device_id; 8 import org.jocl.cl_kernel; 9 10 /** 11 * Utility class providing convenient methods for querying OpenCL kernel work group information. 12 * 13 * <p>KernelInfoUtils encapsulates the low-level OpenCL API calls required for retrieving kernel-specific execution 14 * characteristics on target devices. This information is essential for optimizing kernel launch parameters and ensuring 15 * efficient resource utilization in GPU-accelerated evolutionary algorithms. 16 * 17 * <p>Key functionality includes: 18 * <ul> 19 * <li><strong>Work group queries</strong>: Retrieve kernel-specific work group size limits and preferences</li> 20 * <li><strong>Memory usage queries</strong>: Query local and private memory requirements per work-item</li> 21 * <li><strong>Performance optimization</strong>: Access preferred work group size multiples for optimal execution</li> 22 * <li><strong>Resource validation</strong>: Obtain kernel resource requirements for launch parameter validation</li> 23 * </ul> 24 * 25 * <p>Common usage patterns: 26 * 27 * <pre>{@code 28 * // Query kernel work group characteristics 29 * long maxWorkGroupSize = KernelInfoUtils.getKernelWorkGroupInfoLong(deviceId, kernel, CL.CL_KERNEL_WORK_GROUP_SIZE); 30 * 31 * long preferredMultiple = KernelInfoUtils 32 * .getKernelWorkGroupInfoLong(deviceId, kernel, CL.CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE); 33 * 34 * // Query memory requirements 35 * long localMemSize = KernelInfoUtils.getKernelWorkGroupInfoLong(deviceId, kernel, CL.CL_KERNEL_LOCAL_MEM_SIZE); 36 * 37 * long privateMemSize = KernelInfoUtils.getKernelWorkGroupInfoLong(deviceId, kernel, CL.CL_KERNEL_PRIVATE_MEM_SIZE); 38 * 39 * // Optimize work group size based on kernel characteristics 40 * long optimalWorkGroupSize = (maxWorkGroupSize / preferredMultiple) * preferredMultiple; 41 * }</pre> 42 * 43 * <p>Kernel optimization workflow: 44 * <ol> 45 * <li><strong>Kernel compilation</strong>: Compile kernel for target device</li> 46 * <li><strong>Characteristic query</strong>: Retrieve kernel-specific execution parameters</li> 47 * <li><strong>Launch optimization</strong>: Configure work group sizes based on kernel requirements</li> 48 * <li><strong>Resource validation</strong>: Ensure memory requirements don't exceed device limits</li> 49 * </ol> 50 * 51 * <p>Error handling: 52 * <ul> 53 * <li><strong>Parameter validation</strong>: Validates all input parameters</li> 54 * <li><strong>OpenCL error propagation</strong>: OpenCL errors are propagated as runtime exceptions</li> 55 * <li><strong>Memory management</strong>: Automatically handles buffer allocation and cleanup</li> 56 * </ul> 57 * 58 * @see KernelInfo 59 * @see KernelInfoReader 60 * @see net.bmahe.genetics4j.gpu.opencl.model.Device 61 */ 62 public class KernelInfoUtils { 63 64 private KernelInfoUtils() { 65 66 } 67 68 /** 69 * Queries and returns a long value for kernel work group information on the specified device. 70 * 71 * <p>This method retrieves kernel-specific execution characteristics that vary by device, such as maximum work group 72 * size, preferred work group size multiples, and memory usage requirements. This information is essential for 73 * optimizing kernel launch parameters. 74 * 75 * @param deviceId the OpenCL device to query 76 * @param kernel the compiled OpenCL kernel 77 * @param parameter the OpenCL parameter constant (e.g., CL_KERNEL_WORK_GROUP_SIZE, CL_KERNEL_LOCAL_MEM_SIZE) 78 * @return the long value of the requested kernel work group property 79 * @throws IllegalArgumentException if deviceId or kernel is null 80 */ 81 public static long getKernelWorkGroupInfoLong(final cl_device_id deviceId, final cl_kernel kernel, 82 final int parameter) { 83 Validate.notNull(deviceId); 84 Validate.notNull(kernel); 85 86 final long[] values = new long[1]; 87 CL.clGetKernelWorkGroupInfo(kernel, deviceId, parameter, Sizeof.cl_long, Pointer.to(values), null); 88 89 return values[0]; 90 } 91 }