OpenCLFitness.java
package net.bmahe.genetics4j.gpu.spec.fitness;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import net.bmahe.genetics4j.core.Genotype;
import net.bmahe.genetics4j.gpu.opencl.OpenCLExecutionContext;
/**
* Abstract base class for implementing OpenCL-based fitness evaluation in GPU-accelerated evolutionary algorithms.
*
* <p>OpenCLFitness provides the framework for evaluating population fitness using OpenCL kernels executed
* on GPU devices. This class defines the lifecycle and coordination patterns needed for efficient GPU-based
* fitness computation, including resource management, data transfer, and kernel execution orchestration.
*
* <p>The fitness evaluation lifecycle consists of several phases:
* <ol>
* <li><strong>Global initialization</strong>: One-time setup before any evaluations ({@link #beforeAllEvaluations})</li>
* <li><strong>Per-device initialization</strong>: Setup for each OpenCL device context</li>
* <li><strong>Generation setup</strong>: Preparation before each generation evaluation</li>
* <li><strong>Computation</strong>: Actual fitness evaluation using OpenCL kernels</li>
* <li><strong>Generation cleanup</strong>: Cleanup after each generation evaluation</li>
* <li><strong>Per-device cleanup</strong>: Cleanup for each OpenCL device context</li>
* <li><strong>Global cleanup</strong>: Final cleanup after all evaluations ({@link #afterAllEvaluations})</li>
* </ol>
*
* <p>Key responsibilities for implementations:
* <ul>
* <li><strong>Data preparation</strong>: Convert genotypes to GPU-compatible data formats</li>
* <li><strong>Memory management</strong>: Allocate and manage GPU memory buffers</li>
* <li><strong>Kernel execution</strong>: Configure and execute OpenCL kernels with appropriate parameters</li>
* <li><strong>Result extraction</strong>: Retrieve and convert fitness values from GPU memory</li>
* <li><strong>Resource cleanup</strong>: Ensure proper cleanup of GPU resources</li>
* </ul>
*
* <p>Common implementation patterns:
* <pre>{@code
* public class MyGPUFitness extends OpenCLFitness<Double> {
*
* private CLData inputBuffer;
* private CLData outputBuffer;
*
* @Override
* public void beforeAllEvaluations(OpenCLExecutionContext context, ExecutorService executor) {
* // Allocate GPU memory buffers that persist across generations
* int maxPopulationSize = getMaxPopulationSize();
* inputBuffer = CLData.allocateFloat(context, maxPopulationSize * chromosomeSize);
* outputBuffer = CLData.allocateFloat(context, maxPopulationSize);
* }
*
* @Override
* public CompletableFuture<List<Double>> compute(OpenCLExecutionContext context,
* ExecutorService executor, long generation, List<Genotype> genotypes) {
*
* return CompletableFuture.supplyAsync(() -> {
* // Transfer genotype data to GPU
* transferGenotypesToGPU(context, genotypes, inputBuffer);
*
* // Execute fitness evaluation kernel
* executeKernel(context, "fitness_kernel", genotypes.size());
*
* // Retrieve results from GPU
* return extractFitnessValues(context, outputBuffer, genotypes.size());
* }, executor);
* }
*
* @Override
* public void afterAllEvaluations(OpenCLExecutionContext context, ExecutorService executor) {
* // Clean up GPU memory
* inputBuffer.release();
* outputBuffer.release();
* }
* }
* }</pre>
*
* <p>Performance optimization strategies:
* <ul>
* <li><strong>Memory reuse</strong>: Allocate buffers once in {@link #beforeAllEvaluations} and reuse across generations</li>
* <li><strong>Asynchronous execution</strong>: Use CompletableFuture for non-blocking GPU operations</li>
* <li><strong>Batch processing</strong>: Process entire populations in single kernel launches</li>
* <li><strong>Memory coalescing</strong>: Organize data layouts for optimal GPU memory access patterns</li>
* <li><strong>Kernel optimization</strong>: Design kernels to maximize GPU utilization and minimize divergence</li>
* </ul>
*
* <p>Error handling and robustness:
* <ul>
* <li><strong>GPU errors</strong>: Handle OpenCL errors gracefully and provide meaningful error messages</li>
* <li><strong>Memory management</strong>: Ensure proper cleanup even in exceptional circumstances</li>
* <li><strong>Device failures</strong>: Support graceful degradation when GPU devices fail</li>
* <li><strong>Timeout handling</strong>: Implement appropriate timeouts for long-running kernels</li>
* </ul>
*
* <p>Multi-device considerations:
* <ul>
* <li><strong>Device-specific setup</strong>: Separate contexts and buffers for each device</li>
* <li><strong>Load balancing</strong>: Coordinate with the framework's automatic population partitioning</li>
* <li><strong>Resource isolation</strong>: Ensure proper isolation of resources between devices</li>
* <li><strong>Synchronization</strong>: Coordinate results from multiple devices</li>
* </ul>
*
* @param <T> the type of fitness values produced, must be comparable for selection operations
* @see net.bmahe.genetics4j.gpu.GPUFitnessEvaluator
* @see OpenCLExecutionContext
* @see net.bmahe.genetics4j.gpu.opencl.model.CLData
*/
public abstract class OpenCLFitness<T extends Comparable<T>> {
public static final Logger logger = LogManager.getLogger(OpenCLFitness.class);
/**
* Global initialization hook called once before any fitness evaluations begin.
*
* <p>This method is called once at the beginning of the evolutionary algorithm execution,
* before any OpenCL contexts are created or evaluations are performed. Use this method
* for global initialization that applies to all devices and generations.
*
* <p>Typical use cases:
* <ul>
* <li>Initialize problem-specific constants or parameters</li>
* <li>Load reference data or configuration</li>
* <li>Set up logging or monitoring infrastructure</li>
* <li>Validate problem constraints or requirements</li>
* </ul>
*
* <p>This method is called on the main thread before any concurrent operations begin.
*
* @see #beforeAllEvaluations(OpenCLExecutionContext, ExecutorService)
*/
public void beforeAllEvaluations() {
}
/**
* Per-device initialization hook called for each OpenCL execution context.
*
* <p>This method is called once for each OpenCL device that will be used for fitness
* evaluation. It allows device-specific initialization such as memory allocation,
* buffer creation, and device-specific resource setup.
*
* <p>Typical use cases:
* <ul>
* <li>Allocate GPU memory buffers that persist across generations</li>
* <li>Pre-load static data to GPU memory</li>
* <li>Initialize device-specific data structures</li>
* <li>Set up device-specific kernels or configurations</li>
* </ul>
*
* <p>Memory allocated in this method should typically be released in the corresponding
* {@link #afterAllEvaluations(OpenCLExecutionContext, ExecutorService)} method.
*
* @param openCLExecutionContext the OpenCL execution context for a specific device
* @param executorService the executor service for asynchronous operations
* @see #afterAllEvaluations(OpenCLExecutionContext, ExecutorService)
*/
public void beforeAllEvaluations(final OpenCLExecutionContext openCLExecutionContext,
final ExecutorService executorService) {
}
/**
* Global preparation hook called before each generation evaluation.
*
* <p>This method is called before fitness evaluation of each generation, providing
* an opportunity for global preparation that applies across all devices. It receives
* the generation number and complete population for context.
*
* <p>Typical use cases:
* <ul>
* <li>Update generation-specific parameters or configurations</li>
* <li>Log generation start or population statistics</li>
* <li>Prepare global data structures for the upcoming evaluation</li>
* <li>Implement adaptive behavior based on generation number</li>
* </ul>
*
* @param generation the current generation number (0-based)
* @param genotypes the complete population to be evaluated
* @see #beforeEvaluation(OpenCLExecutionContext, ExecutorService, long, List)
*/
public void beforeEvaluation(final long generation, final List<Genotype> genotypes) {
}
/**
* Per-device preparation hook called before each device partition evaluation.
*
* <p>This method is called for each device before evaluating its assigned partition
* of the population. It provides access to the device context and the specific
* genotypes that will be evaluated on this device.
*
* <p>Typical use cases:
* <ul>
* <li>Transfer genotype data to device memory</li>
* <li>Update device-specific parameters for this generation</li>
* <li>Prepare input buffers with population data</li>
* <li>Set up kernel arguments that vary by generation</li>
* </ul>
*
* @param openCLExecutionContext the OpenCL execution context for this device
* @param executorService the executor service for asynchronous operations
* @param generation the current generation number (0-based)
* @param genotypes the partition of genotypes to be evaluated on this device
* @see #afterEvaluation(OpenCLExecutionContext, ExecutorService, long, List)
*/
public void beforeEvaluation(final OpenCLExecutionContext openCLExecutionContext,
final ExecutorService executorService, final long generation, final List<Genotype> genotypes) {
}
/**
* Performs the actual fitness computation using OpenCL kernels on the GPU.
*
* <p>This is the core method that implements GPU-based fitness evaluation. It receives
* a partition of the population and must return corresponding fitness values using
* OpenCL kernel execution on the specified device.
*
* <p>Implementation requirements:
* <ul>
* <li><strong>Return order</strong>: Fitness values must correspond to genotypes in the same order</li>
* <li><strong>Size consistency</strong>: Return exactly one fitness value per input genotype</li>
* <li><strong>Asynchronous execution</strong>: Use the executor service for non-blocking GPU operations</li>
* <li><strong>Error handling</strong>: Handle GPU errors gracefully and provide meaningful exceptions</li>
* </ul>
*
* <p>Common implementation pattern:
* <ol>
* <li><strong>Data transfer</strong>: Copy genotype data to GPU memory</li>
* <li><strong>Kernel setup</strong>: Configure kernel arguments and work group parameters</li>
* <li><strong>Kernel execution</strong>: Launch OpenCL kernels for fitness computation</li>
* <li><strong>Result retrieval</strong>: Read fitness values from GPU memory</li>
* <li><strong>Data conversion</strong>: Convert GPU results to appropriate fitness type</li>
* </ol>
*
* @param openCLExecutionContext the OpenCL execution context providing device access
* @param executorService the executor service for asynchronous operations
* @param generation the current generation number for context
* @param genotypes the genotypes to evaluate on this device
* @return a CompletableFuture that will complete with fitness values for each genotype
* @throws RuntimeException if GPU evaluation fails or setup errors occur
*/
public abstract CompletableFuture<List<T>> compute(final OpenCLExecutionContext openCLExecutionContext,
final ExecutorService executorService, final long generation, final List<Genotype> genotypes);
/**
* Per-device cleanup hook called after each device partition evaluation.
*
* <p>This method is called for each device after its partition evaluation completes,
* providing an opportunity for device-specific cleanup and resource management.
*
* <p>Typical use cases:
* <ul>
* <li>Clean up temporary GPU memory allocations</li>
* <li>Log device-specific performance metrics</li>
* <li>Update device-specific statistics or state</li>
* <li>Perform device-specific validation or debugging</li>
* </ul>
*
* @param openCLExecutionContext the OpenCL execution context for this device
* @param executorService the executor service for asynchronous operations
* @param generation the current generation number (0-based)
* @param genotypes the partition of genotypes that were evaluated on this device
* @see #beforeEvaluation(OpenCLExecutionContext, ExecutorService, long, List)
*/
public void afterEvaluation(final OpenCLExecutionContext openCLExecutionContext,
final ExecutorService executorService, final long generation, final List<Genotype> genotypes) {
}
/**
* Global cleanup hook called after each generation evaluation.
*
* <p>This method is called after fitness evaluation of each generation completes
* across all devices, providing an opportunity for global cleanup and statistics
* collection that applies to the entire population.
*
* <p>Typical use cases:
* <ul>
* <li>Log generation completion and performance metrics</li>
* <li>Update global statistics or progress tracking</li>
* <li>Perform global validation or debugging</li>
* <li>Clean up generation-specific global resources</li>
* </ul>
*
* @param generation the current generation number (0-based)
* @param genotypes the complete population that was evaluated
* @see #beforeEvaluation(long, List)
*/
public void afterEvaluation(final long generation, final List<Genotype> genotypes) {
}
/**
* Per-device cleanup hook called for each OpenCL execution context at the end.
*
* <p>This method is called once for each OpenCL device when fitness evaluation
* is complete, providing an opportunity to clean up device-specific resources
* that were allocated in {@link #beforeAllEvaluations(OpenCLExecutionContext, ExecutorService)}.
*
* <p>Typical use cases:
* <ul>
* <li>Release GPU memory buffers and resources</li>
* <li>Clean up device-specific data structures</li>
* <li>Log device-specific performance summaries</li>
* <li>Ensure no GPU memory leaks occur</li>
* </ul>
*
* <p>This method should ensure proper cleanup even if exceptions occurred during
* evaluation, as it may be the only opportunity to prevent resource leaks.
*
* @param openCLExecutionContext the OpenCL execution context for this device
* @param executorService the executor service for asynchronous operations
* @see #beforeAllEvaluations(OpenCLExecutionContext, ExecutorService)
*/
public void afterAllEvaluations(final OpenCLExecutionContext openCLExecutionContext,
final ExecutorService executorService) {
}
/**
* Global cleanup hook called once after all fitness evaluations complete.
*
* <p>This method is called once at the end of the evolutionary algorithm execution,
* after all OpenCL contexts have been cleaned up and all evaluations are complete.
* Use this method for final global cleanup and resource deallocation.
*
* <p>Typical use cases:
* <ul>
* <li>Clean up global resources and data structures</li>
* <li>Log final performance summaries and statistics</li>
* <li>Save results or generate reports</li>
* <li>Perform final validation or cleanup</li>
* </ul>
*
* <p>This method is called on the main thread after all concurrent operations complete.
*
* @see #beforeAllEvaluations()
*/
public void afterAllEvaluations() {
}
}