View Javadoc
1   package net.bmahe.genetics4j.samples.mixturemodel;
2   
3   import java.io.IOException;
4   import java.util.Collection;
5   import java.util.Comparator;
6   import java.util.HashSet;
7   import java.util.List;
8   import java.util.Set;
9   import java.util.concurrent.ExecutionException;
10  import java.util.concurrent.Executors;
11  import java.util.stream.IntStream;
12  
13  import org.apache.commons.collections4.CollectionUtils;
14  import org.apache.commons.math3.distribution.MultivariateNormalDistribution;
15  import org.apache.commons.math3.exception.MathUnsupportedOperationException;
16  import org.apache.commons.math3.linear.Array2DRowRealMatrix;
17  import org.apache.commons.math3.linear.EigenDecomposition;
18  import org.apache.commons.math3.linear.NonPositiveDefiniteMatrixException;
19  import org.apache.commons.math3.linear.SingularMatrixException;
20  import org.apache.logging.log4j.LogManager;
21  import org.apache.logging.log4j.Logger;
22  
23  import net.bmahe.genetics4j.core.Genotype;
24  import net.bmahe.genetics4j.core.chromosomes.FloatChromosome;
25  import net.bmahe.genetics4j.core.evolutionlisteners.EvolutionListeners;
26  import net.bmahe.genetics4j.core.spec.EvolutionResult;
27  import net.bmahe.genetics4j.core.spec.chromosome.FloatChromosomeSpec;
28  import net.bmahe.genetics4j.core.spec.combination.MultiCombinations;
29  import net.bmahe.genetics4j.core.spec.combination.MultiPointArithmetic;
30  import net.bmahe.genetics4j.core.spec.combination.MultiPointCrossover;
31  import net.bmahe.genetics4j.core.spec.combination.SinglePointArithmetic;
32  import net.bmahe.genetics4j.core.spec.combination.SinglePointCrossover;
33  import net.bmahe.genetics4j.core.spec.mutation.CreepMutation;
34  import net.bmahe.genetics4j.core.spec.mutation.MultiMutations;
35  import net.bmahe.genetics4j.core.spec.mutation.RandomMutation;
36  import net.bmahe.genetics4j.core.spec.mutation.SwapMutation;
37  import net.bmahe.genetics4j.core.spec.replacement.Elitism;
38  import net.bmahe.genetics4j.core.spec.statistics.distributions.NormalDistribution;
39  import net.bmahe.genetics4j.core.termination.Terminations;
40  import net.bmahe.genetics4j.extras.evolutionlisteners.CSVEvolutionListener;
41  import net.bmahe.genetics4j.extras.evolutionlisteners.ColumnExtractor;
42  import net.bmahe.genetics4j.gpu.GPUEASystemFactory;
43  import net.bmahe.genetics4j.gpu.spec.DeviceFilters;
44  import net.bmahe.genetics4j.gpu.spec.GPUEAConfiguration;
45  import net.bmahe.genetics4j.gpu.spec.GPUEAExecutionContext;
46  import net.bmahe.genetics4j.gpu.spec.Program;
47  import net.bmahe.genetics4j.gpu.spec.fitness.FitnessExtractor;
48  import net.bmahe.genetics4j.gpu.spec.fitness.SingleKernelFitness;
49  import net.bmahe.genetics4j.gpu.spec.fitness.SingleKernelFitnessDescriptor;
50  import net.bmahe.genetics4j.gpu.spec.fitness.cldata.DataLoaders;
51  import net.bmahe.genetics4j.gpu.spec.fitness.cldata.DataSupplier;
52  import net.bmahe.genetics4j.gpu.spec.fitness.cldata.ResultAllocators;
53  import net.bmahe.genetics4j.gpu.spec.fitness.cldata.StaticDataLoaders;
54  import net.bmahe.genetics4j.gpu.spec.fitness.kernelcontext.KernelExecutionContextComputers;
55  import net.bmahe.genetics4j.moo.FitnessVector;
56  import net.bmahe.genetics4j.moo.nsga2.impl.NSGA2SelectionPolicyHandler;
57  import net.bmahe.genetics4j.moo.nsga2.impl.TournamentNSGA2SelectionPolicyHandler;
58  import net.bmahe.genetics4j.moo.nsga2.spec.NSGA2Selection;
59  import net.bmahe.genetics4j.moo.nsga2.spec.TournamentNSGA2Selection;
60  import net.bmahe.genetics4j.moo.spea2.replacement.SPEA2ReplacementStrategyHandler;
61  
62  public class MooGPU {
63  	final static public Logger logger = LogManager.getLogger(MooGPU.class);
64  
65  	private final int distributionNumParameters;
66  	private final Comparator<Genotype> deduplicator;
67  	private final String baseDir;
68  	private final int maxGenerations;
69  
70  	public MooGPU(final int _distributionNumParameters, final Comparator<Genotype> _deduplicator, final String _baseDir,
71  			final int _maxGenerations) {
72  
73  		this.distributionNumParameters = _distributionNumParameters;
74  		this.deduplicator = _deduplicator;
75  		this.baseDir = _baseDir;
76  		this.maxGenerations = _maxGenerations;
77  	}
78  
79  	// tag::moo_gpu_fitness_extractor[]
80  	public final FitnessExtractor<FitnessVector<Float>> fitnessExtractor(final int maxPossibleDistributions,
81  			final double[][] samples) {
82  		return (openCLExecutionContext, kernelExecutionContext, executorService, generation, genotypes,
83  				resultExtractor) -> {
84  
85  			final float[] results = resultExtractor.extractFloatArray(openCLExecutionContext, 5);
86  
87  			try {
88  				final var extractionTask = executorService.submit(() -> {
89  					return IntStream.range(0, genotypes.size())
90  							.boxed()
91  							.parallel()
92  							.map(genotypeIndex -> {
93  								float tally = 0;
94  
95  								final int startIndex = genotypeIndex * samples.length;
96  								final int endIndex = startIndex + samples.length;
97  								int notFiniteCount = 0;
98  								for (int i = startIndex; i < endIndex; i++) {
99  									if (Float.isFinite(results[i])) {
100 										tally += results[i];
101 									} else {
102 										notFiniteCount++;
103 										tally -= 10_000f;
104 									}
105 								}
106 								final int[] assignedClusters = ClusteringUtils.assignClustersFloatChromosome(
107 										distributionNumParameters,
108 										samples,
109 										genotypes.get(genotypeIndex));
110 								final Set<Integer> uniqueAssigned = new HashSet<>();
111 								uniqueAssigned.addAll(IntStream.of(assignedClusters)
112 										.boxed()
113 										.toList());
114 
115 								if (notFiniteCount > 0 || notFiniteCount == samples.length || uniqueAssigned.size() == 0) {
116 									return new FitnessVector<>(-100_000_000f, -100f);
117 								}
118 
119 								final float numUnusedCluster = (float) (maxPossibleDistributions - uniqueAssigned.size());
120 								return new FitnessVector<>(tally / samples.length, numUnusedCluster);
121 							})
122 							.toList();
123 				});
124 
125 				return extractionTask.get();
126 			} catch (InterruptedException | ExecutionException e) {
127 				logger.error("Could not extract results", e);
128 				throw new RuntimeException(e);
129 			}
130 		};
131 	}
132 	// end::moo_gpu_fitness_extractor[]
133 
134 	public DataSupplier<float[]> computeSideData(final int maxPossibleDistributions) {
135 		return (openCLExecutionContext, generation, genotypes) -> {
136 			/**
137 			 * We will provide: <br>
138 			 * - Whether the values are valid<br>
139 			 * - Value of the determinant<br>
140 			 * - A linearized 2x2 matrix for the inverse covariance
141 			 */
142 			final float[] densityDataHelper = new float[genotypes.size() * maxPossibleDistributions * (2 + 4)];
143 			int genotypeIndex = 0;
144 			for (final var genotype : genotypes) {
145 				final var fChromosome = genotype.getChromosome(0, FloatChromosome.class);
146 
147 				for (int distributionIndex = 0; distributionIndex < maxPossibleDistributions; distributionIndex++) {
148 					final int baseDistributionIndex = distributionIndex * distributionNumParameters;
149 
150 					final double[] mean = new double[] { fChromosome.getAllele(baseDistributionIndex + 1),
151 							fChromosome.getAllele(baseDistributionIndex + 2) };
152 					final double[][] covariances = new double[2][2];
153 					covariances[0][0] = fChromosome.getAllele(baseDistributionIndex + 3) - 15;
154 					covariances[0][1] = fChromosome.getAllele(baseDistributionIndex + 4) - 15;
155 					covariances[1][0] = fChromosome.getAllele(baseDistributionIndex + 4) - 15;
156 					covariances[1][1] = fChromosome.getAllele(baseDistributionIndex + 5) - 15;
157 
158 					try {
159 						/**
160 						 * Will throw exception is the covariances aren't valid
161 						 */
162 						final var multivariateNormalDistribution = new MultivariateNormalDistribution(mean, covariances);
163 
164 						final var covarianceMatrix = new Array2DRowRealMatrix(covariances);
165 						final var eigenDecomposition = new EigenDecomposition(covarianceMatrix);
166 						final var eigenDecompositionSolver = eigenDecomposition.getSolver();
167 						final var covarianceInverse = eigenDecompositionSolver.getInverse();
168 						final var covarianceDeterminant = eigenDecomposition.getDeterminant();
169 
170 						final int baseDensityDataHelperIndex = genotypeIndex * maxPossibleDistributions * 6
171 								+ distributionIndex * 6;
172 						densityDataHelper[baseDensityDataHelperIndex + 1] = (float) covarianceDeterminant;
173 						densityDataHelper[baseDensityDataHelperIndex + 2] = (float) covarianceInverse.getEntry(0, 0);
174 						densityDataHelper[baseDensityDataHelperIndex + 3] = (float) covarianceInverse.getEntry(0, 1);
175 						densityDataHelper[baseDensityDataHelperIndex + 4] = (float) covarianceInverse.getEntry(1, 0);
176 						densityDataHelper[baseDensityDataHelperIndex + 5] = (float) covarianceInverse.getEntry(1, 1);
177 
178 						densityDataHelper[baseDensityDataHelperIndex + 0] = 10;
179 
180 					} catch (NonPositiveDefiniteMatrixException | MathUnsupportedOperationException
181 							| SingularMatrixException e) {
182 
183 					}
184 				}
185 
186 				genotypeIndex++;
187 			}
188 			return densityDataHelper;
189 		};
190 	}
191 
192 	public void run(final int maxPossibleDistributions, final int numDistributions, final double[][] samplesDouble,
193 			final float[][] samples, final float[] x, final float[] y, final String algorithmName,
194 			final Collection<Genotype> seedPopulation, final EvolutionResult<FitnessVector<Float>> bestCPUResult)
195 			throws IOException {
196 
197 		final var kernelName = "mixtureModelKernel";
198 		final int chromosomeSize = distributionNumParameters * maxPossibleDistributions;
199 
200 		// tag::moo_gpu_skfd[]
201 		final var singleKernelFitnessDescriptorBuilder = SingleKernelFitnessDescriptor.builder();
202 		singleKernelFitnessDescriptorBuilder.kernelName(kernelName)
203 				.kernelExecutionContextComputer(KernelExecutionContextComputers.ofGlobalWorkSize1D(samples.length))
204 				.putStaticDataLoaders(0, StaticDataLoaders.ofLinearize(samples))
205 				.putStaticDataLoaders(1,
206 						StaticDataLoaders
207 								.of(distributionNumParameters, maxPossibleDistributions, chromosomeSize, samples.length))
208 				.putDataLoaders(2, DataLoaders.ofGenerationAndPopulationSize())
209 				.putDataLoaders(3, DataLoaders.ofLinearizeFloatChromosome(0))
210 				.putDataLoaders(4, DataLoaders.ofFloatSupplier(computeSideData(maxPossibleDistributions)))
211 				.putResultAllocators(5, ResultAllocators.ofMultiplePopulationSizeFloat(samples.length));
212 		final var singleKernelFitnessDescriptor = singleKernelFitnessDescriptorBuilder.build();
213 		// end::moo_gpu_skfd[]
214 
215 		// tag::moo_gpu_config[]
216 		final var singleKernelFitness = SingleKernelFitness.of(singleKernelFitnessDescriptor,
217 				fitnessExtractor(maxPossibleDistributions, samplesDouble));
218 
219 		final var eaConfigurationBuilder = GPUEAConfiguration.<FitnessVector<Float>>builder()
220 				.chromosomeSpecs(FloatChromosomeSpec.of(chromosomeSize, 0, 30))
221 				.parentSelectionPolicy(TournamentNSGA2Selection.ofFitnessVector(2, 5, deduplicator))
222 				.replacementStrategy(Elitism.builder()
223 						.offspringRatio(0.950)
224 						.offspringSelectionPolicy(TournamentNSGA2Selection.ofFitnessVector(2, 5, deduplicator))
225 						.survivorSelectionPolicy(NSGA2Selection.ofFitnessVector(2, deduplicator))
226 						.build())
227 				.combinationPolicy(MultiCombinations.of(SinglePointArithmetic.of(0.9),
228 						SinglePointCrossover.build(),
229 						MultiPointCrossover.of(3),
230 						MultiPointArithmetic.of(3, 0.9)))
231 				.mutationPolicies(MultiMutations.of(RandomMutation.of(0.40),
232 						CreepMutation.of(0.40, NormalDistribution.of(0.0, 1)),
233 						SwapMutation.of(0.40, 5, false)))
234 				.program(Program.ofResource("/opencl/mixturemodel/main.cl", kernelName, ""))
235 				.fitness(singleKernelFitness)
236 				.termination(Terminations.ofMaxGeneration(maxGenerations));
237 
238 		if (CollectionUtils.isNotEmpty(seedPopulation)) {
239 			eaConfigurationBuilder.seedPopulation(seedPopulation);
240 		}
241 
242 		final var eaConfiguration = eaConfigurationBuilder.build();
243 		// end::moo_gpu_config[]
244 
245 		// tag::moo_gpu_eaexeccontext[]
246 		final var csvEvolutionListener = CSVEvolutionListener.<FitnessVector<Float>, Void>of(
247 				baseDir + algorithmName + ".csv",
248 				List.of(ColumnExtractor.of("generation", e -> e.generation()),
249 						ColumnExtractor.of("fitness",
250 								e -> e.fitness()
251 										.get(0)),
252 						ColumnExtractor.of("numUnusedCluster",
253 								e -> e.fitness()
254 										.get(1)),
255 						ColumnExtractor.of("numCluster",
256 								e -> maxPossibleDistributions - e.fitness()
257 										.get(1))));
258 
259 		final var topNloggerEvolutionListener = EvolutionListeners.<FitnessVector<Float>>ofLogTopN(logger,
260 				5,
261 				Comparator.<FitnessVector<Float>, Float>comparing(fv -> fv.get(0)));
262 
263 		final var eaExecutionContextBuilder = GPUEAExecutionContext.<FitnessVector<Float>>builder()
264 				.deviceFilters(DeviceFilters.ofGPU())
265 				.populationSize(250)
266 				.addEvolutionListeners(csvEvolutionListener, topNloggerEvolutionListener);
267 
268 		/**
269 		 * TODO should be in the library
270 		 */
271 		eaExecutionContextBuilder.addSelectionPolicyHandlerFactories((gsd) -> new NSGA2SelectionPolicyHandler<>(),
272 				gsd -> new TournamentNSGA2SelectionPolicyHandler<>(gsd.randomGenerator()));
273 
274 		eaExecutionContextBuilder
275 				.addReplacementStrategyHandlerFactories((gsd) -> new SPEA2ReplacementStrategyHandler<>());
276 
277 		final var eaExecutionContext = eaExecutionContextBuilder.build();
278 		// end::moo_gpu_eaexeccontext[]
279 
280 		final var executorService = Executors.newWorkStealingPool();
281 		final var eaSystem = GPUEASystemFactory.from(eaConfiguration, eaExecutionContext, executorService);
282 
283 		final EvolutionResult<FitnessVector<Float>> evolutionResult = eaSystem.evolve();
284 		logger.info("Best genotype: {}", evolutionResult.bestGenotype());
285 		logger.info("  with fitness: {}", evolutionResult.bestFitness());
286 		logger.info("  at generation: {}", evolutionResult.generation());
287 
288 		ClusteringUtils.categorizeByNumClusters(distributionNumParameters,
289 				maxPossibleDistributions,
290 				x,
291 				y,
292 				samplesDouble,
293 				evolutionResult,
294 				baseDir,
295 				algorithmName);
296 	}
297 }