View Javadoc
1   package net.bmahe.genetics4j.samples.mixturemodel;
2   
3   import java.io.IOException;
4   import java.util.Collection;
5   import java.util.Comparator;
6   import java.util.HashSet;
7   import java.util.List;
8   import java.util.Set;
9   import java.util.stream.IntStream;
10  
11  import org.apache.commons.collections4.CollectionUtils;
12  import org.apache.commons.math3.distribution.MultivariateNormalDistribution;
13  import org.apache.commons.math3.exception.MathUnsupportedOperationException;
14  import org.apache.commons.math3.linear.NonPositiveDefiniteMatrixException;
15  import org.apache.commons.math3.linear.SingularMatrixException;
16  import org.apache.logging.log4j.LogManager;
17  import org.apache.logging.log4j.Logger;
18  
19  import net.bmahe.genetics4j.core.EASystem;
20  import net.bmahe.genetics4j.core.EASystemFactory;
21  import net.bmahe.genetics4j.core.Fitness;
22  import net.bmahe.genetics4j.core.Genotype;
23  import net.bmahe.genetics4j.core.chromosomes.FloatChromosome;
24  import net.bmahe.genetics4j.core.evolutionlisteners.EvolutionListeners;
25  import net.bmahe.genetics4j.core.spec.EAConfiguration;
26  import net.bmahe.genetics4j.core.spec.EAConfiguration.Builder;
27  import net.bmahe.genetics4j.core.spec.EAExecutionContexts;
28  import net.bmahe.genetics4j.core.spec.EvolutionResult;
29  import net.bmahe.genetics4j.core.spec.chromosome.FloatChromosomeSpec;
30  import net.bmahe.genetics4j.core.spec.combination.MultiCombinations;
31  import net.bmahe.genetics4j.core.spec.combination.MultiPointArithmetic;
32  import net.bmahe.genetics4j.core.spec.combination.MultiPointCrossover;
33  import net.bmahe.genetics4j.core.spec.combination.SinglePointArithmetic;
34  import net.bmahe.genetics4j.core.spec.combination.SinglePointCrossover;
35  import net.bmahe.genetics4j.core.spec.mutation.CreepMutation;
36  import net.bmahe.genetics4j.core.spec.mutation.MultiMutations;
37  import net.bmahe.genetics4j.core.spec.mutation.RandomMutation;
38  import net.bmahe.genetics4j.core.spec.mutation.SwapMutation;
39  import net.bmahe.genetics4j.core.spec.replacement.Elitism;
40  import net.bmahe.genetics4j.core.spec.statistics.distributions.NormalDistribution;
41  import net.bmahe.genetics4j.core.termination.Terminations;
42  import net.bmahe.genetics4j.extras.evolutionlisteners.CSVEvolutionListener;
43  import net.bmahe.genetics4j.extras.evolutionlisteners.ColumnExtractor;
44  import net.bmahe.genetics4j.moo.FitnessVector;
45  import net.bmahe.genetics4j.moo.nsga2.spec.NSGA2Selection;
46  import net.bmahe.genetics4j.moo.nsga2.spec.TournamentNSGA2Selection;
47  
48  public class MooCPU {
49  	final static public Logger logger = LogManager.getLogger(MooCPU.class);
50  
51  	private final int distributionNumParameters;
52  	private final Comparator<Genotype> deduplicator;
53  	private final String baseDir;
54  	private final int maxGenerations;
55  
56  	public MooCPU(final int _distributionNumParameters, final Comparator<Genotype> _deduplicator, final String _baseDir,
57  			final int _maxGenerations) {
58  
59  		this.distributionNumParameters = _distributionNumParameters;
60  		this.deduplicator = _deduplicator;
61  		this.baseDir = _baseDir;
62  		this.maxGenerations = _maxGenerations;
63  	}
64  
65  	// tag::moo_cpu_fitness[]
66  	public Fitness<FitnessVector<Float>> fitnessCPU(final int numDistributions, final double[][] samples) {
67  		return (genotype) -> {
68  			final var fChromosome = genotype.getChromosome(0, FloatChromosome.class);
69  
70  			float sumAlpha = 0.0f;
71  			int k = 0;
72  			while (k < fChromosome.getSize()) {
73  				final float alpha = fChromosome.getAllele(k);
74  				sumAlpha += alpha;
75  
76  				k += distributionNumParameters;
77  			}
78  
79  			float[] likelyhoods = new float[samples.length];
80  			int i = 0;
81  			while (i < fChromosome.getSize()) {
82  
83  				final float alpha = fChromosome.getAllele(i) / sumAlpha;
84  				if (alpha > 0.0001) {
85  					final double[] mean = new double[] { fChromosome.getAllele(i + 1), fChromosome.getAllele(i + 2) };
86  					final double[][] covariances = new double[][] {
87  							{ fChromosome.getAllele(i + 3) - 15, fChromosome.getAllele(i + 4) - 15 },
88  							{ fChromosome.getAllele(i + 4) - 15, fChromosome.getAllele(i + 5) - 15 } };
89  
90  					try {
91  						final var multivariateNormalDistribution = new MultivariateNormalDistribution(mean, covariances);
92  
93  						for (int j = 0; j < samples.length; j++) {
94  							final var density = multivariateNormalDistribution.density(samples[j]);
95  							likelyhoods[j] += alpha * density;
96  						}
97  					} catch (NonPositiveDefiniteMatrixException | MathUnsupportedOperationException
98  							| SingularMatrixException e) {
99  					}
100 				}
101 				i += distributionNumParameters;
102 			}
103 
104 			float sumLogs = 0.0f;
105 			for (int j = 0; j < samples.length; j++) {
106 				sumLogs += Math.log(likelyhoods[j]);
107 			}
108 
109 			final int[] assigned = ClusteringUtils
110 					.assignClustersFloatChromosome(distributionNumParameters, samples, genotype);
111 			final Set<Integer> uniqueAssigned = new HashSet<>();
112 			uniqueAssigned.addAll(IntStream.of(assigned)
113 					.boxed()
114 					.toList());
115 
116 			final float numUnusedCluster = uniqueAssigned.size() == 0 ? -10f
117 					: (float) (numDistributions - uniqueAssigned.size());
118 
119 			return new FitnessVector<>(sumLogs / samples.length, numUnusedCluster);
120 		};
121 	}
122 	// end::moo_cpu_fitness[]
123 
124 	public EvolutionResult<FitnessVector<Float>> run(final int maxPossibleDistributions, final double[][] samples,
125 			final float[] x, final float[] y, final String algorithmName, final Collection<Genotype> seedPopulation)
126 			throws IOException {
127 
128 		// tag::moo_cpu_config[]
129 		final var fitnessFunc = fitnessCPU(maxPossibleDistributions, samples);
130 
131 		final Builder<FitnessVector<Float>> eaConfigurationBuilder = new EAConfiguration.Builder<>();
132 		eaConfigurationBuilder
133 				.chromosomeSpecs(FloatChromosomeSpec.of(distributionNumParameters * maxPossibleDistributions, 0, 30))
134 				.parentSelectionPolicy(TournamentNSGA2Selection.ofFitnessVector(2, 3, deduplicator))
135 				.replacementStrategy(Elitism.builder()
136 						.offspringRatio(0.950)
137 						.offspringSelectionPolicy(TournamentNSGA2Selection.ofFitnessVector(2, 3, deduplicator))
138 						.survivorSelectionPolicy(NSGA2Selection.ofFitnessVector(2, deduplicator))
139 						.build())
140 				.combinationPolicy(MultiCombinations.of(SinglePointArithmetic.of(0.9),
141 						SinglePointCrossover.build(),
142 						MultiPointCrossover.of(2),
143 						MultiPointArithmetic.of(2, 0.9)))
144 				.mutationPolicies(MultiMutations.of(RandomMutation.of(0.40),
145 						CreepMutation.of(0.40, NormalDistribution.of(0.0, 1)),
146 						SwapMutation.of(0.40, 5, false)))
147 				.fitness(fitnessFunc)
148 				.termination(Terminations.ofMaxGeneration(maxGenerations));
149 
150 		if (CollectionUtils.isNotEmpty(seedPopulation)) {
151 			eaConfigurationBuilder.seedPopulation(seedPopulation);
152 		}
153 
154 		final var eaConfiguration = eaConfigurationBuilder.build();
155 		// end::moo_cpu_config[]
156 
157 		// tag::moo_cpu_eaexeccontext[]
158 		final var csvEvolutionListener = CSVEvolutionListener.<FitnessVector<Float>, Void>of(
159 				baseDir + "mixturemodel-moo-cpu.csv",
160 				List.of(ColumnExtractor.of("generation", e -> e.generation()),
161 						ColumnExtractor.of("fitness",
162 								e -> e.fitness()
163 										.get(0)),
164 						ColumnExtractor.of("numUnusedCluster",
165 								e -> e.fitness()
166 										.get(1))));
167 
168 		final var eaExecutionContextBuilder = EAExecutionContexts.<FitnessVector<Float>>standard();
169 		eaExecutionContextBuilder.populationSize(250)
170 				.addEvolutionListeners(csvEvolutionListener,
171 						EvolutionListeners.<FitnessVector<Float>>ofLogTopN(logger,
172 								5,
173 								Comparator.<FitnessVector<Float>, Float>comparing(fv -> fv.get(0))))
174 				.build();
175 		final var eaExecutionContext = eaExecutionContextBuilder.build();
176 		// end::moo_cpu_eaexeccontext[]
177 
178 		final EASystem<FitnessVector<Float>> eaSystem = EASystemFactory.from(eaConfiguration, eaExecutionContext);
179 
180 		final EvolutionResult<FitnessVector<Float>> evolutionResult = eaSystem.evolve();
181 		logger.info("Best genotype: {}", evolutionResult.bestGenotype());
182 		logger.info("  with fitness: {}", evolutionResult.bestFitness());
183 		logger.info("  at generation: {}", evolutionResult.generation());
184 
185 		ClusteringUtils.categorizeByNumClusters(distributionNumParameters,
186 				maxPossibleDistributions,
187 				x,
188 				y,
189 				samples,
190 				evolutionResult,
191 				baseDir,
192 				"moo-cpu");
193 
194 		return evolutionResult;
195 	}
196 
197 }