View Javadoc
1   package net.bmahe.genetics4j.samples.mixturemodel;
2   
3   import java.io.IOException;
4   import java.util.Collection;
5   import java.util.Comparator;
6   import java.util.HashSet;
7   import java.util.List;
8   import java.util.Set;
9   import java.util.concurrent.ExecutionException;
10  import java.util.concurrent.Executors;
11  import java.util.stream.IntStream;
12  
13  import org.apache.commons.collections4.CollectionUtils;
14  import org.apache.commons.math3.distribution.MultivariateNormalDistribution;
15  import org.apache.commons.math3.exception.MathUnsupportedOperationException;
16  import org.apache.commons.math3.linear.Array2DRowRealMatrix;
17  import org.apache.commons.math3.linear.EigenDecomposition;
18  import org.apache.commons.math3.linear.NonPositiveDefiniteMatrixException;
19  import org.apache.commons.math3.linear.SingularMatrixException;
20  import org.apache.logging.log4j.LogManager;
21  import org.apache.logging.log4j.Logger;
22  
23  import net.bmahe.genetics4j.core.Genotype;
24  import net.bmahe.genetics4j.core.chromosomes.FloatChromosome;
25  import net.bmahe.genetics4j.core.evolutionlisteners.EvolutionListeners;
26  import net.bmahe.genetics4j.core.spec.EvolutionResult;
27  import net.bmahe.genetics4j.core.spec.chromosome.FloatChromosomeSpec;
28  import net.bmahe.genetics4j.core.spec.combination.MultiCombinations;
29  import net.bmahe.genetics4j.core.spec.combination.MultiPointArithmetic;
30  import net.bmahe.genetics4j.core.spec.combination.MultiPointCrossover;
31  import net.bmahe.genetics4j.core.spec.combination.SinglePointArithmetic;
32  import net.bmahe.genetics4j.core.spec.combination.SinglePointCrossover;
33  import net.bmahe.genetics4j.core.spec.mutation.CreepMutation;
34  import net.bmahe.genetics4j.core.spec.mutation.MultiMutations;
35  import net.bmahe.genetics4j.core.spec.mutation.RandomMutation;
36  import net.bmahe.genetics4j.core.spec.mutation.SwapMutation;
37  import net.bmahe.genetics4j.core.spec.replacement.Elitism;
38  import net.bmahe.genetics4j.core.spec.statistics.distributions.NormalDistribution;
39  import net.bmahe.genetics4j.core.termination.Terminations;
40  import net.bmahe.genetics4j.extras.evolutionlisteners.CSVEvolutionListener;
41  import net.bmahe.genetics4j.extras.evolutionlisteners.ColumnExtractor;
42  import net.bmahe.genetics4j.gpu.GPUEASystemFactory;
43  import net.bmahe.genetics4j.gpu.spec.DeviceFilters;
44  import net.bmahe.genetics4j.gpu.spec.GPUEAConfiguration;
45  import net.bmahe.genetics4j.gpu.spec.GPUEAExecutionContext;
46  import net.bmahe.genetics4j.gpu.spec.Program;
47  import net.bmahe.genetics4j.gpu.spec.fitness.FitnessExtractor;
48  import net.bmahe.genetics4j.gpu.spec.fitness.SingleKernelFitness;
49  import net.bmahe.genetics4j.gpu.spec.fitness.SingleKernelFitnessDescriptor;
50  import net.bmahe.genetics4j.gpu.spec.fitness.cldata.DataLoaders;
51  import net.bmahe.genetics4j.gpu.spec.fitness.cldata.DataSupplier;
52  import net.bmahe.genetics4j.gpu.spec.fitness.cldata.ResultAllocators;
53  import net.bmahe.genetics4j.gpu.spec.fitness.cldata.StaticDataLoaders;
54  import net.bmahe.genetics4j.gpu.spec.fitness.kernelcontext.KernelExecutionContextComputers;
55  import net.bmahe.genetics4j.moo.FitnessVector;
56  import net.bmahe.genetics4j.moo.nsga2.impl.NSGA2SelectionPolicyHandler;
57  import net.bmahe.genetics4j.moo.nsga2.impl.TournamentNSGA2SelectionPolicyHandler;
58  import net.bmahe.genetics4j.moo.nsga2.spec.NSGA2Selection;
59  import net.bmahe.genetics4j.moo.nsga2.spec.TournamentNSGA2Selection;
60  import net.bmahe.genetics4j.moo.spea2.replacement.SPEA2ReplacementStrategyHandler;
61  
62  public class MooGPU {
63  	final static public Logger logger = LogManager.getLogger(MooGPU.class);
64  
65  	private final int distributionNumParameters;
66  	private final Comparator<Genotype> deduplicator;
67  	private final String baseDir;
68  	private final int maxGenerations;
69  
70  	public MooGPU(final int _distributionNumParameters,
71  			final Comparator<Genotype> _deduplicator,
72  			final String _baseDir,
73  			final int _maxGenerations) {
74  
75  		this.distributionNumParameters = _distributionNumParameters;
76  		this.deduplicator = _deduplicator;
77  		this.baseDir = _baseDir;
78  		this.maxGenerations = _maxGenerations;
79  	}
80  
81  	// tag::moo_gpu_fitness_extractor[]
82  	public final FitnessExtractor<FitnessVector<Float>> fitnessExtractor(final int maxPossibleDistributions,
83  			final double[][] samples) {
84  		return (openCLExecutionContext, kernelExecutionContext, executorService, generation, genotypes,
85  				resultExtractor) -> {
86  
87  			final float[] results = resultExtractor.extractFloatArray(openCLExecutionContext, 5);
88  
89  			try {
90  				final var extractionTask = executorService.submit(() -> {
91  					return IntStream.range(0, genotypes.size()).boxed().parallel().map(genotypeIndex -> {
92  						float tally = 0;
93  
94  						final int startIndex = genotypeIndex * samples.length;
95  						final int endIndex = startIndex + samples.length;
96  						int notFiniteCount = 0;
97  						for (int i = startIndex; i < endIndex; i++) {
98  							if (Float.isFinite(results[i])) {
99  								tally += results[i];
100 							} else {
101 								notFiniteCount++;
102 								tally -= 10_000f;
103 							}
104 						}
105 						final int[] assignedClusters = ClusteringUtils.assignClustersFloatChromosome(
106 								distributionNumParameters,
107 									samples,
108 									genotypes.get(genotypeIndex));
109 						final Set<Integer> uniqueAssigned = new HashSet<>();
110 						uniqueAssigned.addAll(IntStream.of(assignedClusters).boxed().toList());
111 
112 						if (notFiniteCount > 0 || notFiniteCount == samples.length || uniqueAssigned.size() == 0) {
113 							return new FitnessVector<>(-100_000_000f, -100f);
114 						}
115 
116 						final float numUnusedCluster = (float) (maxPossibleDistributions - uniqueAssigned.size());
117 						return new FitnessVector<>(tally / samples.length, numUnusedCluster);
118 					}).toList();
119 				});
120 
121 				return extractionTask.get();
122 			} catch (InterruptedException | ExecutionException e) {
123 				logger.error("Could not extract results", e);
124 				throw new RuntimeException(e);
125 			}
126 		};
127 	}
128 	// end::moo_gpu_fitness_extractor[]
129 
130 	public DataSupplier<float[]> computeSideData(final int maxPossibleDistributions) {
131 		return (openCLExecutionContext, generation, genotypes) -> {
132 			/**
133 			 * We will provide: <br>
134 			 * - Whether the values are valid<br>
135 			 * - Value of the determinant<br>
136 			 * - A linearized 2x2 matrix for the inverse covariance
137 			 */
138 			final float[] densityDataHelper = new float[genotypes.size() * maxPossibleDistributions * (2 + 4)];
139 			int genotypeIndex = 0;
140 			for (final var genotype : genotypes) {
141 				final var fChromosome = genotype.getChromosome(0, FloatChromosome.class);
142 
143 				for (int distributionIndex = 0; distributionIndex < maxPossibleDistributions; distributionIndex++) {
144 					final int baseDistributionIndex = distributionIndex * distributionNumParameters;
145 
146 					final double[] mean = new double[] { fChromosome.getAllele(baseDistributionIndex + 1),
147 							fChromosome.getAllele(baseDistributionIndex + 2) };
148 					final double[][] covariances = new double[2][2];
149 					covariances[0][0] = fChromosome.getAllele(baseDistributionIndex + 3) - 15;
150 					covariances[0][1] = fChromosome.getAllele(baseDistributionIndex + 4) - 15;
151 					covariances[1][0] = fChromosome.getAllele(baseDistributionIndex + 4) - 15;
152 					covariances[1][1] = fChromosome.getAllele(baseDistributionIndex + 5) - 15;
153 
154 					try {
155 						/**
156 						 * Will throw exception is the covariances aren't valid
157 						 */
158 						final var multivariateNormalDistribution = new MultivariateNormalDistribution(mean, covariances);
159 
160 						final var covarianceMatrix = new Array2DRowRealMatrix(covariances);
161 						final var eigenDecomposition = new EigenDecomposition(covarianceMatrix);
162 						final var eigenDecompositionSolver = eigenDecomposition.getSolver();
163 						final var covarianceInverse = eigenDecompositionSolver.getInverse();
164 						final var covarianceDeterminant = eigenDecomposition.getDeterminant();
165 
166 						final int baseDensityDataHelperIndex = genotypeIndex * maxPossibleDistributions * 6
167 								+ distributionIndex * 6;
168 						densityDataHelper[baseDensityDataHelperIndex + 1] = (float) covarianceDeterminant;
169 						densityDataHelper[baseDensityDataHelperIndex + 2] = (float) covarianceInverse.getEntry(0, 0);
170 						densityDataHelper[baseDensityDataHelperIndex + 3] = (float) covarianceInverse.getEntry(0, 1);
171 						densityDataHelper[baseDensityDataHelperIndex + 4] = (float) covarianceInverse.getEntry(1, 0);
172 						densityDataHelper[baseDensityDataHelperIndex + 5] = (float) covarianceInverse.getEntry(1, 1);
173 
174 						densityDataHelper[baseDensityDataHelperIndex + 0] = 10;
175 
176 					} catch (NonPositiveDefiniteMatrixException | MathUnsupportedOperationException
177 							| SingularMatrixException e) {
178 
179 					}
180 				}
181 
182 				genotypeIndex++;
183 			}
184 			return densityDataHelper;
185 		};
186 	}
187 
188 	public void run(final int maxPossibleDistributions, final int numDistributions, final double[][] samplesDouble,
189 			final float[][] samples, final float[] x, final float[] y, final String algorithmName,
190 			final Collection<Genotype> seedPopulation, final EvolutionResult<FitnessVector<Float>> bestCPUResult)
191 			throws IOException {
192 
193 		final var kernelName = "mixtureModelKernel";
194 		final int chromosomeSize = distributionNumParameters * maxPossibleDistributions;
195 
196 		// tag::moo_gpu_skfd[]
197 		final var singleKernelFitnessDescriptorBuilder = SingleKernelFitnessDescriptor.builder();
198 		singleKernelFitnessDescriptorBuilder.kernelName(kernelName)
199 				.kernelExecutionContextComputer(KernelExecutionContextComputers.ofGlobalWorkSize1D(samples.length))
200 				.putStaticDataLoaders(0, StaticDataLoaders.ofLinearize(samples))
201 				.putStaticDataLoaders(
202 						1,
203 							StaticDataLoaders
204 									.of(distributionNumParameters, maxPossibleDistributions, chromosomeSize, samples.length))
205 				.putDataLoaders(2, DataLoaders.ofGenerationAndPopulationSize())
206 				.putDataLoaders(3, DataLoaders.ofLinearizeFloatChromosome(0))
207 				.putDataLoaders(4, DataLoaders.ofFloatSupplier(computeSideData(maxPossibleDistributions)))
208 				.putResultAllocators(5, ResultAllocators.ofMultiplePopulationSizeFloat(samples.length));
209 		final var singleKernelFitnessDescriptor = singleKernelFitnessDescriptorBuilder.build();
210 		// end::moo_gpu_skfd[]
211 
212 		// tag::moo_gpu_config[]
213 		final var singleKernelFitness = SingleKernelFitness
214 				.of(singleKernelFitnessDescriptor, fitnessExtractor(maxPossibleDistributions, samplesDouble));
215 
216 		final var eaConfigurationBuilder = GPUEAConfiguration.<FitnessVector<Float>>builder()
217 				.chromosomeSpecs(FloatChromosomeSpec.of(chromosomeSize, 0, 30))
218 				.parentSelectionPolicy(TournamentNSGA2Selection.ofFitnessVector(2, 5, deduplicator))
219 				.replacementStrategy(
220 						Elitism.builder()
221 								.offspringRatio(0.950)
222 								.offspringSelectionPolicy(TournamentNSGA2Selection.ofFitnessVector(2, 5, deduplicator))
223 								.survivorSelectionPolicy(NSGA2Selection.ofFitnessVector(2, deduplicator))
224 								.build())
225 				.combinationPolicy(
226 						MultiCombinations.of(
227 								SinglePointArithmetic.of(0.9),
228 									SinglePointCrossover.build(),
229 									MultiPointCrossover.of(3),
230 									MultiPointArithmetic.of(3, 0.9)))
231 				.mutationPolicies(
232 						MultiMutations.of(
233 								RandomMutation.of(0.40),
234 									CreepMutation.of(0.40, NormalDistribution.of(0.0, 1)),
235 									SwapMutation.of(0.40, 5, false)))
236 				.program(Program.ofResource("/opencl/mixturemodel/main.cl", kernelName, ""))
237 				.fitness(singleKernelFitness)
238 				.termination(Terminations.ofMaxGeneration(maxGenerations));
239 
240 		if (CollectionUtils.isNotEmpty(seedPopulation)) {
241 			eaConfigurationBuilder.seedPopulation(seedPopulation);
242 		}
243 
244 		final var eaConfiguration = eaConfigurationBuilder.build();
245 		// end::moo_gpu_config[]
246 
247 		// tag::moo_gpu_eaexeccontext[]
248 		final var csvEvolutionListener = CSVEvolutionListener.<FitnessVector<Float>, Void>of(
249 				baseDir + algorithmName + ".csv",
250 					List.of(
251 							ColumnExtractor.of("generation", e -> e.generation()),
252 								ColumnExtractor.of("fitness", e -> e.fitness().get(0)),
253 								ColumnExtractor.of("numUnusedCluster", e -> e.fitness().get(1)),
254 								ColumnExtractor.of("numCluster", e -> maxPossibleDistributions - e.fitness().get(1))));
255 
256 		final var topNloggerEvolutionListener = EvolutionListeners.<FitnessVector<Float>>ofLogTopN(
257 				logger,
258 					5,
259 					Comparator.<FitnessVector<Float>, Float>comparing(fv -> fv.get(0)));
260 
261 		final var eaExecutionContextBuilder = GPUEAExecutionContext.<FitnessVector<Float>>builder()
262 				.deviceFilters(DeviceFilters.ofGPU())
263 				.populationSize(250)
264 				.addEvolutionListeners(csvEvolutionListener, topNloggerEvolutionListener);
265 
266 		/**
267 		 * TODO should be in the library
268 		 */
269 		eaExecutionContextBuilder.addSelectionPolicyHandlerFactories(
270 				(gsd) -> new NSGA2SelectionPolicyHandler<>(),
271 					gsd -> new TournamentNSGA2SelectionPolicyHandler<>(gsd.randomGenerator()));
272 
273 		eaExecutionContextBuilder
274 				.addReplacementStrategyHandlerFactories((gsd) -> new SPEA2ReplacementStrategyHandler<>());
275 
276 		final var eaExecutionContext = eaExecutionContextBuilder.build();
277 		// end::moo_gpu_eaexeccontext[]
278 
279 		final var executorService = Executors.newWorkStealingPool();
280 		final var eaSystem = GPUEASystemFactory.from(eaConfiguration, eaExecutionContext, executorService);
281 
282 		final EvolutionResult<FitnessVector<Float>> evolutionResult = eaSystem.evolve();
283 		logger.info("Best genotype: {}", evolutionResult.bestGenotype());
284 		logger.info("  with fitness: {}", evolutionResult.bestFitness());
285 		logger.info("  at generation: {}", evolutionResult.generation());
286 
287 		ClusteringUtils.categorizeByNumClusters(
288 				distributionNumParameters,
289 					maxPossibleDistributions,
290 					x,
291 					y,
292 					samplesDouble,
293 					evolutionResult,
294 					baseDir,
295 					algorithmName);
296 	}
297 }