1 package net.bmahe.genetics4j.samples.mixturemodel;
2
3 import java.io.IOException;
4 import java.util.Collection;
5 import java.util.Comparator;
6 import java.util.HashSet;
7 import java.util.List;
8 import java.util.Set;
9 import java.util.stream.IntStream;
10
11 import org.apache.commons.collections4.CollectionUtils;
12 import org.apache.commons.math3.distribution.MultivariateNormalDistribution;
13 import org.apache.commons.math3.exception.MathUnsupportedOperationException;
14 import org.apache.commons.math3.linear.NonPositiveDefiniteMatrixException;
15 import org.apache.commons.math3.linear.SingularMatrixException;
16 import org.apache.logging.log4j.LogManager;
17 import org.apache.logging.log4j.Logger;
18
19 import net.bmahe.genetics4j.core.EASystem;
20 import net.bmahe.genetics4j.core.EASystemFactory;
21 import net.bmahe.genetics4j.core.Fitness;
22 import net.bmahe.genetics4j.core.Genotype;
23 import net.bmahe.genetics4j.core.chromosomes.FloatChromosome;
24 import net.bmahe.genetics4j.core.evolutionlisteners.EvolutionListeners;
25 import net.bmahe.genetics4j.core.spec.EAConfiguration;
26 import net.bmahe.genetics4j.core.spec.EAConfiguration.Builder;
27 import net.bmahe.genetics4j.core.spec.EAExecutionContexts;
28 import net.bmahe.genetics4j.core.spec.EvolutionResult;
29 import net.bmahe.genetics4j.core.spec.chromosome.FloatChromosomeSpec;
30 import net.bmahe.genetics4j.core.spec.combination.MultiCombinations;
31 import net.bmahe.genetics4j.core.spec.combination.MultiPointArithmetic;
32 import net.bmahe.genetics4j.core.spec.combination.MultiPointCrossover;
33 import net.bmahe.genetics4j.core.spec.combination.SinglePointArithmetic;
34 import net.bmahe.genetics4j.core.spec.combination.SinglePointCrossover;
35 import net.bmahe.genetics4j.core.spec.mutation.CreepMutation;
36 import net.bmahe.genetics4j.core.spec.mutation.MultiMutations;
37 import net.bmahe.genetics4j.core.spec.mutation.RandomMutation;
38 import net.bmahe.genetics4j.core.spec.mutation.SwapMutation;
39 import net.bmahe.genetics4j.core.spec.replacement.Elitism;
40 import net.bmahe.genetics4j.core.spec.statistics.distributions.NormalDistribution;
41 import net.bmahe.genetics4j.core.termination.Terminations;
42 import net.bmahe.genetics4j.extras.evolutionlisteners.CSVEvolutionListener;
43 import net.bmahe.genetics4j.extras.evolutionlisteners.ColumnExtractor;
44 import net.bmahe.genetics4j.moo.FitnessVector;
45 import net.bmahe.genetics4j.moo.nsga2.spec.NSGA2Selection;
46 import net.bmahe.genetics4j.moo.nsga2.spec.TournamentNSGA2Selection;
47
48 public class MooCPU {
49 final static public Logger logger = LogManager.getLogger(MooCPU.class);
50
51 private final int distributionNumParameters;
52 private final Comparator<Genotype> deduplicator;
53 private final String baseDir;
54 private final int maxGenerations;
55
56 public MooCPU(final int _distributionNumParameters,
57 final Comparator<Genotype> _deduplicator,
58 final String _baseDir,
59 final int _maxGenerations) {
60
61 this.distributionNumParameters = _distributionNumParameters;
62 this.deduplicator = _deduplicator;
63 this.baseDir = _baseDir;
64 this.maxGenerations = _maxGenerations;
65 }
66
67
68 public Fitness<FitnessVector<Float>> fitnessCPU(final int numDistributions, final double[][] samples) {
69 return (genotype) -> {
70 final var fChromosome = genotype.getChromosome(0, FloatChromosome.class);
71
72 float sumAlpha = 0.0f;
73 int k = 0;
74 while (k < fChromosome.getSize()) {
75 final float alpha = fChromosome.getAllele(k);
76 sumAlpha += alpha;
77
78 k += distributionNumParameters;
79 }
80
81 float[] likelyhoods = new float[samples.length];
82 int i = 0;
83 while (i < fChromosome.getSize()) {
84
85 final float alpha = fChromosome.getAllele(i) / sumAlpha;
86 if (alpha > 0.0001) {
87 final double[] mean = new double[] { fChromosome.getAllele(i + 1), fChromosome.getAllele(i + 2) };
88 final double[][] covariances = new double[][] {
89 { fChromosome.getAllele(i + 3) - 15, fChromosome.getAllele(i + 4) - 15 },
90 { fChromosome.getAllele(i + 4) - 15, fChromosome.getAllele(i + 5) - 15 } };
91
92 try {
93 final var multivariateNormalDistribution = new MultivariateNormalDistribution(mean, covariances);
94
95 for (int j = 0; j < samples.length; j++) {
96 final var density = multivariateNormalDistribution.density(samples[j]);
97 likelyhoods[j] += alpha * density;
98 }
99 } catch (NonPositiveDefiniteMatrixException | MathUnsupportedOperationException
100 | SingularMatrixException e) {
101 }
102 }
103 i += distributionNumParameters;
104 }
105
106 float sumLogs = 0.0f;
107 for (int j = 0; j < samples.length; j++) {
108 sumLogs += Math.log(likelyhoods[j]);
109 }
110
111 final int[] assigned = ClusteringUtils
112 .assignClustersFloatChromosome(distributionNumParameters, samples, genotype);
113 final Set<Integer> uniqueAssigned = new HashSet<>();
114 uniqueAssigned.addAll(IntStream.of(assigned).boxed().toList());
115
116 final float numUnusedCluster = uniqueAssigned.size() == 0 ? -10f
117 : (float) (numDistributions - uniqueAssigned.size());
118
119 return new FitnessVector<>(sumLogs / samples.length, numUnusedCluster);
120 };
121 }
122
123
124 public EvolutionResult<FitnessVector<Float>> run(final int maxPossibleDistributions, final double[][] samples,
125 final float[] x, final float[] y, final String algorithmName, final Collection<Genotype> seedPopulation)
126 throws IOException {
127
128
129 final var fitnessFunc = fitnessCPU(maxPossibleDistributions, samples);
130
131 final Builder<FitnessVector<Float>> eaConfigurationBuilder = new EAConfiguration.Builder<>();
132 eaConfigurationBuilder
133 .chromosomeSpecs(FloatChromosomeSpec.of(distributionNumParameters * maxPossibleDistributions, 0, 30))
134 .parentSelectionPolicy(TournamentNSGA2Selection.ofFitnessVector(2, 3, deduplicator))
135 .replacementStrategy(
136 Elitism.builder()
137 .offspringRatio(0.950)
138 .offspringSelectionPolicy(TournamentNSGA2Selection.ofFitnessVector(2, 3, deduplicator))
139 .survivorSelectionPolicy(NSGA2Selection.ofFitnessVector(2, deduplicator))
140 .build())
141 .combinationPolicy(
142 MultiCombinations.of(
143 SinglePointArithmetic.of(0.9),
144 SinglePointCrossover.build(),
145 MultiPointCrossover.of(2),
146 MultiPointArithmetic.of(2, 0.9)))
147 .mutationPolicies(
148 MultiMutations.of(
149 RandomMutation.of(0.40),
150 CreepMutation.of(0.40, NormalDistribution.of(0.0, 1)),
151 SwapMutation.of(0.40, 5, false)))
152 .fitness(fitnessFunc)
153 .termination(Terminations.ofMaxGeneration(maxGenerations));
154
155 if (CollectionUtils.isNotEmpty(seedPopulation)) {
156 eaConfigurationBuilder.seedPopulation(seedPopulation);
157 }
158
159 final var eaConfiguration = eaConfigurationBuilder.build();
160
161
162
163 final var csvEvolutionListener = CSVEvolutionListener.<FitnessVector<Float>, Void>of(
164 baseDir + "mixturemodel-moo-cpu.csv",
165 List.of(
166 ColumnExtractor.of("generation", e -> e.generation()),
167 ColumnExtractor.of("fitness", e -> e.fitness().get(0)),
168 ColumnExtractor.of("numUnusedCluster", e -> e.fitness().get(1))));
169
170 final var eaExecutionContextBuilder = EAExecutionContexts.<FitnessVector<Float>>standard();
171 eaExecutionContextBuilder.populationSize(250)
172 .addEvolutionListeners(
173 csvEvolutionListener,
174 EvolutionListeners.<FitnessVector<Float>>ofLogTopN(
175 logger,
176 5,
177 Comparator.<FitnessVector<Float>, Float>comparing(fv -> fv.get(0))))
178 .build();
179 final var eaExecutionContext = eaExecutionContextBuilder.build();
180
181
182 final EASystem<FitnessVector<Float>> eaSystem = EASystemFactory.from(eaConfiguration, eaExecutionContext);
183
184 final EvolutionResult<FitnessVector<Float>> evolutionResult = eaSystem.evolve();
185 logger.info("Best genotype: {}", evolutionResult.bestGenotype());
186 logger.info(" with fitness: {}", evolutionResult.bestFitness());
187 logger.info(" at generation: {}", evolutionResult.generation());
188
189 ClusteringUtils.categorizeByNumClusters(
190 distributionNumParameters,
191 maxPossibleDistributions,
192 x,
193 y,
194 samples,
195 evolutionResult,
196 baseDir,
197 "moo-cpu");
198
199 return evolutionResult;
200 }
201
202 }