View Javadoc
1   package net.bmahe.genetics4j.samples.mixturemodel;
2   
3   import java.io.IOException;
4   import java.util.Collection;
5   import java.util.Comparator;
6   import java.util.HashSet;
7   import java.util.List;
8   import java.util.Set;
9   import java.util.stream.IntStream;
10  
11  import org.apache.commons.collections4.CollectionUtils;
12  import org.apache.commons.math3.distribution.MultivariateNormalDistribution;
13  import org.apache.commons.math3.exception.MathUnsupportedOperationException;
14  import org.apache.commons.math3.linear.NonPositiveDefiniteMatrixException;
15  import org.apache.commons.math3.linear.SingularMatrixException;
16  import org.apache.logging.log4j.LogManager;
17  import org.apache.logging.log4j.Logger;
18  
19  import net.bmahe.genetics4j.core.EASystem;
20  import net.bmahe.genetics4j.core.EASystemFactory;
21  import net.bmahe.genetics4j.core.Fitness;
22  import net.bmahe.genetics4j.core.Genotype;
23  import net.bmahe.genetics4j.core.chromosomes.FloatChromosome;
24  import net.bmahe.genetics4j.core.evolutionlisteners.EvolutionListeners;
25  import net.bmahe.genetics4j.core.spec.EAConfiguration;
26  import net.bmahe.genetics4j.core.spec.EAConfiguration.Builder;
27  import net.bmahe.genetics4j.core.spec.EAExecutionContexts;
28  import net.bmahe.genetics4j.core.spec.EvolutionResult;
29  import net.bmahe.genetics4j.core.spec.chromosome.FloatChromosomeSpec;
30  import net.bmahe.genetics4j.core.spec.combination.MultiCombinations;
31  import net.bmahe.genetics4j.core.spec.combination.MultiPointArithmetic;
32  import net.bmahe.genetics4j.core.spec.combination.MultiPointCrossover;
33  import net.bmahe.genetics4j.core.spec.combination.SinglePointArithmetic;
34  import net.bmahe.genetics4j.core.spec.combination.SinglePointCrossover;
35  import net.bmahe.genetics4j.core.spec.mutation.CreepMutation;
36  import net.bmahe.genetics4j.core.spec.mutation.MultiMutations;
37  import net.bmahe.genetics4j.core.spec.mutation.RandomMutation;
38  import net.bmahe.genetics4j.core.spec.mutation.SwapMutation;
39  import net.bmahe.genetics4j.core.spec.replacement.Elitism;
40  import net.bmahe.genetics4j.core.spec.statistics.distributions.NormalDistribution;
41  import net.bmahe.genetics4j.core.termination.Terminations;
42  import net.bmahe.genetics4j.extras.evolutionlisteners.CSVEvolutionListener;
43  import net.bmahe.genetics4j.extras.evolutionlisteners.ColumnExtractor;
44  import net.bmahe.genetics4j.moo.FitnessVector;
45  import net.bmahe.genetics4j.moo.nsga2.spec.NSGA2Selection;
46  import net.bmahe.genetics4j.moo.nsga2.spec.TournamentNSGA2Selection;
47  
48  public class MooCPU {
49  	final static public Logger logger = LogManager.getLogger(MooCPU.class);
50  
51  	private final int distributionNumParameters;
52  	private final Comparator<Genotype> deduplicator;
53  	private final String baseDir;
54  	private final int maxGenerations;
55  
56  	public MooCPU(final int _distributionNumParameters,
57  			final Comparator<Genotype> _deduplicator,
58  			final String _baseDir,
59  			final int _maxGenerations) {
60  
61  		this.distributionNumParameters = _distributionNumParameters;
62  		this.deduplicator = _deduplicator;
63  		this.baseDir = _baseDir;
64  		this.maxGenerations = _maxGenerations;
65  	}
66  
67  	// tag::moo_cpu_fitness[]
68  	public Fitness<FitnessVector<Float>> fitnessCPU(final int numDistributions, final double[][] samples) {
69  		return (genotype) -> {
70  			final var fChromosome = genotype.getChromosome(0, FloatChromosome.class);
71  
72  			float sumAlpha = 0.0f;
73  			int k = 0;
74  			while (k < fChromosome.getSize()) {
75  				final float alpha = fChromosome.getAllele(k);
76  				sumAlpha += alpha;
77  
78  				k += distributionNumParameters;
79  			}
80  
81  			float[] likelyhoods = new float[samples.length];
82  			int i = 0;
83  			while (i < fChromosome.getSize()) {
84  
85  				final float alpha = fChromosome.getAllele(i) / sumAlpha;
86  				if (alpha > 0.0001) {
87  					final double[] mean = new double[] { fChromosome.getAllele(i + 1), fChromosome.getAllele(i + 2) };
88  					final double[][] covariances = new double[][] {
89  							{ fChromosome.getAllele(i + 3) - 15, fChromosome.getAllele(i + 4) - 15 },
90  							{ fChromosome.getAllele(i + 4) - 15, fChromosome.getAllele(i + 5) - 15 } };
91  
92  					try {
93  						final var multivariateNormalDistribution = new MultivariateNormalDistribution(mean, covariances);
94  
95  						for (int j = 0; j < samples.length; j++) {
96  							final var density = multivariateNormalDistribution.density(samples[j]);
97  							likelyhoods[j] += alpha * density;
98  						}
99  					} catch (NonPositiveDefiniteMatrixException | MathUnsupportedOperationException
100 							| SingularMatrixException e) {
101 					}
102 				}
103 				i += distributionNumParameters;
104 			}
105 
106 			float sumLogs = 0.0f;
107 			for (int j = 0; j < samples.length; j++) {
108 				sumLogs += Math.log(likelyhoods[j]);
109 			}
110 
111 			final int[] assigned = ClusteringUtils
112 					.assignClustersFloatChromosome(distributionNumParameters, samples, genotype);
113 			final Set<Integer> uniqueAssigned = new HashSet<>();
114 			uniqueAssigned.addAll(IntStream.of(assigned).boxed().toList());
115 
116 			final float numUnusedCluster = uniqueAssigned.size() == 0 ? -10f
117 					: (float) (numDistributions - uniqueAssigned.size());
118 
119 			return new FitnessVector<>(sumLogs / samples.length, numUnusedCluster);
120 		};
121 	}
122 	// end::moo_cpu_fitness[]
123 
124 	public EvolutionResult<FitnessVector<Float>> run(final int maxPossibleDistributions, final double[][] samples,
125 			final float[] x, final float[] y, final String algorithmName, final Collection<Genotype> seedPopulation)
126 			throws IOException {
127 
128 		// tag::moo_cpu_config[]
129 		final var fitnessFunc = fitnessCPU(maxPossibleDistributions, samples);
130 
131 		final Builder<FitnessVector<Float>> eaConfigurationBuilder = new EAConfiguration.Builder<>();
132 		eaConfigurationBuilder
133 				.chromosomeSpecs(FloatChromosomeSpec.of(distributionNumParameters * maxPossibleDistributions, 0, 30))
134 				.parentSelectionPolicy(TournamentNSGA2Selection.ofFitnessVector(2, 3, deduplicator))
135 				.replacementStrategy(
136 						Elitism.builder()
137 								.offspringRatio(0.950)
138 								.offspringSelectionPolicy(TournamentNSGA2Selection.ofFitnessVector(2, 3, deduplicator))
139 								.survivorSelectionPolicy(NSGA2Selection.ofFitnessVector(2, deduplicator))
140 								.build())
141 				.combinationPolicy(
142 						MultiCombinations.of(
143 								SinglePointArithmetic.of(0.9),
144 									SinglePointCrossover.build(),
145 									MultiPointCrossover.of(2),
146 									MultiPointArithmetic.of(2, 0.9)))
147 				.mutationPolicies(
148 						MultiMutations.of(
149 								RandomMutation.of(0.40),
150 									CreepMutation.of(0.40, NormalDistribution.of(0.0, 1)),
151 									SwapMutation.of(0.40, 5, false)))
152 				.fitness(fitnessFunc)
153 				.termination(Terminations.ofMaxGeneration(maxGenerations));
154 
155 		if (CollectionUtils.isNotEmpty(seedPopulation)) {
156 			eaConfigurationBuilder.seedPopulation(seedPopulation);
157 		}
158 
159 		final var eaConfiguration = eaConfigurationBuilder.build();
160 		// end::moo_cpu_config[]
161 
162 		// tag::moo_cpu_eaexeccontext[]
163 		final var csvEvolutionListener = CSVEvolutionListener.<FitnessVector<Float>, Void>of(
164 				baseDir + "mixturemodel-moo-cpu.csv",
165 					List.of(
166 							ColumnExtractor.of("generation", e -> e.generation()),
167 								ColumnExtractor.of("fitness", e -> e.fitness().get(0)),
168 								ColumnExtractor.of("numUnusedCluster", e -> e.fitness().get(1))));
169 
170 		final var eaExecutionContextBuilder = EAExecutionContexts.<FitnessVector<Float>>standard();
171 		eaExecutionContextBuilder.populationSize(250)
172 				.addEvolutionListeners(
173 						csvEvolutionListener,
174 							EvolutionListeners.<FitnessVector<Float>>ofLogTopN(
175 									logger,
176 										5,
177 										Comparator.<FitnessVector<Float>, Float>comparing(fv -> fv.get(0))))
178 				.build();
179 		final var eaExecutionContext = eaExecutionContextBuilder.build();
180 		// end::moo_cpu_eaexeccontext[]
181 
182 		final EASystem<FitnessVector<Float>> eaSystem = EASystemFactory.from(eaConfiguration, eaExecutionContext);
183 
184 		final EvolutionResult<FitnessVector<Float>> evolutionResult = eaSystem.evolve();
185 		logger.info("Best genotype: {}", evolutionResult.bestGenotype());
186 		logger.info("  with fitness: {}", evolutionResult.bestFitness());
187 		logger.info("  at generation: {}", evolutionResult.generation());
188 
189 		ClusteringUtils.categorizeByNumClusters(
190 				distributionNumParameters,
191 					maxPossibleDistributions,
192 					x,
193 					y,
194 					samples,
195 					evolutionResult,
196 					baseDir,
197 					"moo-cpu");
198 
199 		return evolutionResult;
200 	}
201 
202 }