1 package net.bmahe.genetics4j.samples.mixturemodel;
2
3 import java.io.IOException;
4 import java.util.Collection;
5 import java.util.Comparator;
6 import java.util.HashSet;
7 import java.util.List;
8 import java.util.Set;
9 import java.util.stream.IntStream;
10
11 import org.apache.commons.collections4.CollectionUtils;
12 import org.apache.commons.math3.distribution.MultivariateNormalDistribution;
13 import org.apache.commons.math3.exception.MathUnsupportedOperationException;
14 import org.apache.commons.math3.linear.NonPositiveDefiniteMatrixException;
15 import org.apache.commons.math3.linear.SingularMatrixException;
16 import org.apache.logging.log4j.LogManager;
17 import org.apache.logging.log4j.Logger;
18
19 import net.bmahe.genetics4j.core.EASystem;
20 import net.bmahe.genetics4j.core.EASystemFactory;
21 import net.bmahe.genetics4j.core.Fitness;
22 import net.bmahe.genetics4j.core.Genotype;
23 import net.bmahe.genetics4j.core.chromosomes.FloatChromosome;
24 import net.bmahe.genetics4j.core.evolutionlisteners.EvolutionListeners;
25 import net.bmahe.genetics4j.core.spec.EAConfiguration;
26 import net.bmahe.genetics4j.core.spec.EAConfiguration.Builder;
27 import net.bmahe.genetics4j.core.spec.EAExecutionContexts;
28 import net.bmahe.genetics4j.core.spec.EvolutionResult;
29 import net.bmahe.genetics4j.core.spec.chromosome.FloatChromosomeSpec;
30 import net.bmahe.genetics4j.core.spec.combination.MultiCombinations;
31 import net.bmahe.genetics4j.core.spec.combination.MultiPointArithmetic;
32 import net.bmahe.genetics4j.core.spec.combination.MultiPointCrossover;
33 import net.bmahe.genetics4j.core.spec.combination.SinglePointArithmetic;
34 import net.bmahe.genetics4j.core.spec.combination.SinglePointCrossover;
35 import net.bmahe.genetics4j.core.spec.mutation.CreepMutation;
36 import net.bmahe.genetics4j.core.spec.mutation.MultiMutations;
37 import net.bmahe.genetics4j.core.spec.mutation.RandomMutation;
38 import net.bmahe.genetics4j.core.spec.mutation.SwapMutation;
39 import net.bmahe.genetics4j.core.spec.replacement.Elitism;
40 import net.bmahe.genetics4j.core.spec.statistics.distributions.NormalDistribution;
41 import net.bmahe.genetics4j.core.termination.Terminations;
42 import net.bmahe.genetics4j.extras.evolutionlisteners.CSVEvolutionListener;
43 import net.bmahe.genetics4j.extras.evolutionlisteners.ColumnExtractor;
44 import net.bmahe.genetics4j.moo.FitnessVector;
45 import net.bmahe.genetics4j.moo.nsga2.spec.NSGA2Selection;
46 import net.bmahe.genetics4j.moo.nsga2.spec.TournamentNSGA2Selection;
47
48 public class MooCPU {
49 final static public Logger logger = LogManager.getLogger(MooCPU.class);
50
51 private final int distributionNumParameters;
52 private final Comparator<Genotype> deduplicator;
53 private final String baseDir;
54 private final int maxGenerations;
55
56 public MooCPU(final int _distributionNumParameters, final Comparator<Genotype> _deduplicator, final String _baseDir,
57 final int _maxGenerations) {
58
59 this.distributionNumParameters = _distributionNumParameters;
60 this.deduplicator = _deduplicator;
61 this.baseDir = _baseDir;
62 this.maxGenerations = _maxGenerations;
63 }
64
65
66 public Fitness<FitnessVector<Float>> fitnessCPU(final int numDistributions, final double[][] samples) {
67 return (genotype) -> {
68 final var fChromosome = genotype.getChromosome(0, FloatChromosome.class);
69
70 float sumAlpha = 0.0f;
71 int k = 0;
72 while (k < fChromosome.getSize()) {
73 final float alpha = fChromosome.getAllele(k);
74 sumAlpha += alpha;
75
76 k += distributionNumParameters;
77 }
78
79 float[] likelyhoods = new float[samples.length];
80 int i = 0;
81 while (i < fChromosome.getSize()) {
82
83 final float alpha = fChromosome.getAllele(i) / sumAlpha;
84 if (alpha > 0.0001) {
85 final double[] mean = new double[] { fChromosome.getAllele(i + 1), fChromosome.getAllele(i + 2) };
86 final double[][] covariances = new double[][] {
87 { fChromosome.getAllele(i + 3) - 15, fChromosome.getAllele(i + 4) - 15 },
88 { fChromosome.getAllele(i + 4) - 15, fChromosome.getAllele(i + 5) - 15 } };
89
90 try {
91 final var multivariateNormalDistribution = new MultivariateNormalDistribution(mean, covariances);
92
93 for (int j = 0; j < samples.length; j++) {
94 final var density = multivariateNormalDistribution.density(samples[j]);
95 likelyhoods[j] += alpha * density;
96 }
97 } catch (NonPositiveDefiniteMatrixException | MathUnsupportedOperationException
98 | SingularMatrixException e) {
99 }
100 }
101 i += distributionNumParameters;
102 }
103
104 float sumLogs = 0.0f;
105 for (int j = 0; j < samples.length; j++) {
106 sumLogs += Math.log(likelyhoods[j]);
107 }
108
109 final int[] assigned = ClusteringUtils
110 .assignClustersFloatChromosome(distributionNumParameters, samples, genotype);
111 final Set<Integer> uniqueAssigned = new HashSet<>();
112 uniqueAssigned.addAll(IntStream.of(assigned)
113 .boxed()
114 .toList());
115
116 final float numUnusedCluster = uniqueAssigned.size() == 0 ? -10f
117 : (float) (numDistributions - uniqueAssigned.size());
118
119 return new FitnessVector<>(sumLogs / samples.length, numUnusedCluster);
120 };
121 }
122
123
124 public EvolutionResult<FitnessVector<Float>> run(final int maxPossibleDistributions, final double[][] samples,
125 final float[] x, final float[] y, final String algorithmName, final Collection<Genotype> seedPopulation)
126 throws IOException {
127
128
129 final var fitnessFunc = fitnessCPU(maxPossibleDistributions, samples);
130
131 final Builder<FitnessVector<Float>> eaConfigurationBuilder = new EAConfiguration.Builder<>();
132 eaConfigurationBuilder
133 .chromosomeSpecs(FloatChromosomeSpec.of(distributionNumParameters * maxPossibleDistributions, 0, 30))
134 .parentSelectionPolicy(TournamentNSGA2Selection.ofFitnessVector(2, 3, deduplicator))
135 .replacementStrategy(Elitism.builder()
136 .offspringRatio(0.950)
137 .offspringSelectionPolicy(TournamentNSGA2Selection.ofFitnessVector(2, 3, deduplicator))
138 .survivorSelectionPolicy(NSGA2Selection.ofFitnessVector(2, deduplicator))
139 .build())
140 .combinationPolicy(MultiCombinations.of(SinglePointArithmetic.of(0.9),
141 SinglePointCrossover.build(),
142 MultiPointCrossover.of(2),
143 MultiPointArithmetic.of(2, 0.9)))
144 .mutationPolicies(MultiMutations.of(RandomMutation.of(0.40),
145 CreepMutation.of(0.40, NormalDistribution.of(0.0, 1)),
146 SwapMutation.of(0.40, 5, false)))
147 .fitness(fitnessFunc)
148 .termination(Terminations.ofMaxGeneration(maxGenerations));
149
150 if (CollectionUtils.isNotEmpty(seedPopulation)) {
151 eaConfigurationBuilder.seedPopulation(seedPopulation);
152 }
153
154 final var eaConfiguration = eaConfigurationBuilder.build();
155
156
157
158 final var csvEvolutionListener = CSVEvolutionListener.<FitnessVector<Float>, Void>of(
159 baseDir + "mixturemodel-moo-cpu.csv",
160 List.of(ColumnExtractor.of("generation", e -> e.generation()),
161 ColumnExtractor.of("fitness",
162 e -> e.fitness()
163 .get(0)),
164 ColumnExtractor.of("numUnusedCluster",
165 e -> e.fitness()
166 .get(1))));
167
168 final var eaExecutionContextBuilder = EAExecutionContexts.<FitnessVector<Float>>standard();
169 eaExecutionContextBuilder.populationSize(250)
170 .addEvolutionListeners(csvEvolutionListener,
171 EvolutionListeners.<FitnessVector<Float>>ofLogTopN(logger,
172 5,
173 Comparator.<FitnessVector<Float>, Float>comparing(fv -> fv.get(0))))
174 .build();
175 final var eaExecutionContext = eaExecutionContextBuilder.build();
176
177
178 final EASystem<FitnessVector<Float>> eaSystem = EASystemFactory.from(eaConfiguration, eaExecutionContext);
179
180 final EvolutionResult<FitnessVector<Float>> evolutionResult = eaSystem.evolve();
181 logger.info("Best genotype: {}", evolutionResult.bestGenotype());
182 logger.info(" with fitness: {}", evolutionResult.bestFitness());
183 logger.info(" at generation: {}", evolutionResult.generation());
184
185 ClusteringUtils.categorizeByNumClusters(distributionNumParameters,
186 maxPossibleDistributions,
187 x,
188 y,
189 samples,
190 evolutionResult,
191 baseDir,
192 "moo-cpu");
193
194 return evolutionResult;
195 }
196
197 }