1 package net.bmahe.genetics4j.gpu.opencl; 2 3 import org.apache.commons.lang3.Validate; 4 import org.jocl.CL; 5 import org.jocl.Pointer; 6 import org.jocl.Sizeof; 7 import org.jocl.cl_device_id; 8 import org.jocl.cl_kernel; 9 10 /** 11 * Utility class providing convenient methods for querying OpenCL kernel work group information. 12 * 13 * <p>KernelInfoUtils encapsulates the low-level OpenCL API calls required for retrieving kernel-specific 14 * execution characteristics on target devices. This information is essential for optimizing kernel 15 * launch parameters and ensuring efficient resource utilization in GPU-accelerated evolutionary algorithms. 16 * 17 * <p>Key functionality includes: 18 * <ul> 19 * <li><strong>Work group queries</strong>: Retrieve kernel-specific work group size limits and preferences</li> 20 * <li><strong>Memory usage queries</strong>: Query local and private memory requirements per work-item</li> 21 * <li><strong>Performance optimization</strong>: Access preferred work group size multiples for optimal execution</li> 22 * <li><strong>Resource validation</strong>: Obtain kernel resource requirements for launch parameter validation</li> 23 * </ul> 24 * 25 * <p>Common usage patterns: 26 * <pre>{@code 27 * // Query kernel work group characteristics 28 * long maxWorkGroupSize = KernelInfoUtils.getKernelWorkGroupInfoLong( 29 * deviceId, kernel, CL.CL_KERNEL_WORK_GROUP_SIZE); 30 * 31 * long preferredMultiple = KernelInfoUtils.getKernelWorkGroupInfoLong( 32 * deviceId, kernel, CL.CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE); 33 * 34 * // Query memory requirements 35 * long localMemSize = KernelInfoUtils.getKernelWorkGroupInfoLong( 36 * deviceId, kernel, CL.CL_KERNEL_LOCAL_MEM_SIZE); 37 * 38 * long privateMemSize = KernelInfoUtils.getKernelWorkGroupInfoLong( 39 * deviceId, kernel, CL.CL_KERNEL_PRIVATE_MEM_SIZE); 40 * 41 * // Optimize work group size based on kernel characteristics 42 * long optimalWorkGroupSize = (maxWorkGroupSize / preferredMultiple) * preferredMultiple; 43 * }</pre> 44 * 45 * <p>Kernel optimization workflow: 46 * <ol> 47 * <li><strong>Kernel compilation</strong>: Compile kernel for target device</li> 48 * <li><strong>Characteristic query</strong>: Retrieve kernel-specific execution parameters</li> 49 * <li><strong>Launch optimization</strong>: Configure work group sizes based on kernel requirements</li> 50 * <li><strong>Resource validation</strong>: Ensure memory requirements don't exceed device limits</li> 51 * </ol> 52 * 53 * <p>Error handling: 54 * <ul> 55 * <li><strong>Parameter validation</strong>: Validates all input parameters</li> 56 * <li><strong>OpenCL error propagation</strong>: OpenCL errors are propagated as runtime exceptions</li> 57 * <li><strong>Memory management</strong>: Automatically handles buffer allocation and cleanup</li> 58 * </ul> 59 * 60 * @see KernelInfo 61 * @see KernelInfoReader 62 * @see net.bmahe.genetics4j.gpu.opencl.model.Device 63 */ 64 public class KernelInfoUtils { 65 66 private KernelInfoUtils() { 67 68 } 69 70 /** 71 * Queries and returns a long value for kernel work group information on the specified device. 72 * 73 * <p>This method retrieves kernel-specific execution characteristics that vary by device, 74 * such as maximum work group size, preferred work group size multiples, and memory usage 75 * requirements. This information is essential for optimizing kernel launch parameters. 76 * 77 * @param deviceId the OpenCL device to query 78 * @param kernel the compiled OpenCL kernel 79 * @param parameter the OpenCL parameter constant (e.g., CL_KERNEL_WORK_GROUP_SIZE, CL_KERNEL_LOCAL_MEM_SIZE) 80 * @return the long value of the requested kernel work group property 81 * @throws IllegalArgumentException if deviceId or kernel is null 82 */ 83 public static long getKernelWorkGroupInfoLong(final cl_device_id deviceId, final cl_kernel kernel, 84 final int parameter) { 85 Validate.notNull(deviceId); 86 Validate.notNull(kernel); 87 88 final long[] values = new long[1]; 89 CL.clGetKernelWorkGroupInfo(kernel, deviceId, parameter, Sizeof.cl_long, Pointer.to(values), null); 90 91 return values[0]; 92 } 93 }