/// <summary> /// This method only selects the newest tournament results as training data. /// </summary> /// <param name="data">All historical training data.</param> /// <returns>Only the training data where <see cref="TrainingDataWrapper.CurrentGeneration"/> == <see cref="GenomeTournamentResult.Generation"/>.</returns> public AggregatedTrainingDataWrapper AggregateTargets(TrainingDataWrapper data) { // we just want to train on the newest results // assumption: list of known results for genome g is ordered // we need to be able to handle duplicates, since genomes are not neccessarily unique var filteredObservations = data.Genomes.Select(g => data.TournamentResults[g].Where(t => t.Generation == data.CurrentGeneration).ToList()) .ToArray(); var relevantCount = filteredObservations.Sum(r => r.Count); var relevantIndices = new int[relevantCount]; var relevantTargets = new double[relevantCount]; var currentRelevantIndex = 0; for (var i = 0; i < filteredObservations.Length; i++) { // filteredObservations[i].Count can be 0. for (var repeat = 0; repeat < filteredObservations[i].Count; repeat++) { relevantIndices[currentRelevantIndex] = i; relevantTargets[currentRelevantIndex] = filteredObservations[i][repeat].TournamentRank; currentRelevantIndex++; } } // this should match Debug.Assert(currentRelevantIndex == relevantCount, "Each feature column should have been handled."); var relevantConvertedGenomes = data.ConvertedGenomes.Rows(relevantIndices); var result = new AggregatedTrainingDataWrapper() { RelevantConvertedGenomes = relevantConvertedGenomes, RelevantTargets = relevantTargets, }; return result; }
/// <summary> /// Performs the genetic engineering. /// I.e.: Train the forest, /// Engineer requested number of offspring. /// </summary> /// <param name="engineeredGenomeNumber"> /// The required number of engineered genomes. /// </param> /// <param name="competitiveParents"> /// The competitive parents. /// </param> /// <returns> /// The <see cref="IEnumerable{Genome}"/> containing the engineered genomes. /// </returns> private IEnumerable <Genome> PerformGeneticEngineering(int engineeredGenomeNumber, List <Genome> competitiveParents) { // always train, if we may need engineered genomes later on // also required, if sexual selection is enabled if (this._configuration.TrainModel || this._configuration.EngineeredPopulationRatio > 0 || this._configuration.EnableSexualSelection) { var trainSet = new TrainingDataWrapper(this.AllKnownRanks, this._currentGeneration); this._geneticEngineering.TrainForest(trainSet); } // Perform engineering, if required IEnumerable <Genome> engineeredOffspring; if (engineeredGenomeNumber > 0) { var chosenCompetitiveParents = competitiveParents.InflateAndShuffle(engineeredGenomeNumber); engineeredOffspring = this._geneticEngineering.EngineerGenomes( chosenCompetitiveParents, this._population.GetNonCompetitiveMates(), this._population.AllGenomes); } else { // just an empty dummy-enumerable engineeredOffspring = new Genome[0]; } return(engineeredOffspring); }
/// <summary> /// OPTANO Algorithm Tuner specific method to start the training of the <see cref="StandardRandomForestLearner{TSamplingStrategy}"/>. /// </summary> /// <param name="data"> /// All historical tournament data. /// </param> /// <returns> /// A trained and post-processed <see cref="GenomePredictionForestModel{TWeakPredictor}"/>. /// </returns> public GenomePredictionForestModel <GenomePredictionTree> Learn(TrainingDataWrapper data) { var aggregatedData = this.SamplingStrategy.AggregateTargets(data); var forestModel = this.Learn(aggregatedData.RelevantConvertedGenomes, aggregatedData.RelevantTargets); this.SamplingStrategy.PostProcessModel(forestModel); return(forestModel); }
/// <summary> /// Computes the average <see cref="GenomeTournamentResult.TournamentRank"/> over all <see cref="GenomeTournamentResult"/> for each <see cref="Genome"/> in <see cref="TrainingDataWrapper.Genomes"/>. /// </summary> /// <param name="data"> /// The training data. /// </param> /// <returns> /// The aggregated training data. /// </returns> public AggregatedTrainingDataWrapper AggregateTargets(TrainingDataWrapper data) { var targets = data.Genomes.Select(g => data.TournamentResults[g].Average(r => r.TournamentRank)).ToArray(); var result = new AggregatedTrainingDataWrapper() { RelevantConvertedGenomes = data.ConvertedGenomes, RelevantTargets = targets }; return(result); }
/// <summary> /// Simulates a tuner run for the specified number of generations and stores results in a new <see cref="TrainingDataWrapper"/>. /// </summary> /// <param name="tree"><see cref="ParameterTree"/> to base genomes on.</param> /// <param name="encoder">Strategy to convert genomes to double arrays.</param> /// <param name="genomeCount">Number of genomes to add to result per generation.</param> /// <param name="generations">Number of generations to simulate.</param> /// <param name="config"><see cref="AlgorithmTunerConfiguration"/>, required to generate new genomes.</param> /// <returns>The created <see cref="TrainingDataWrapper"/>.</returns> public static TrainingDataWrapper GenerateTrainingData( ParameterTree tree, IBulkGenomeTransformation encoder, int genomeCount, int generations, AlgorithmTunerConfiguration config) { var result = new TrainingDataWrapper( new Dictionary <Genome, List <GenomeTournamentRank> >(Genome.GenomeComparer), generations - 1); // Start with correct number of random genomes. var randomGenomes = TestDataUtils.GenerateGenomes(tree, config, genomeCount); // Then simulate the correct number of generations. for (var currentGen = 0; currentGen < generations; currentGen++) { var fitness = TestDataUtils.EvaluateTargetFunction(encoder, randomGenomes); // add result for every genome for (var genomeIndex = 0; genomeIndex < genomeCount; genomeIndex++) { var currentGenome = randomGenomes[genomeIndex]; if (!result.TournamentResults.ContainsKey(currentGenome)) { result.TournamentResults[currentGenome] = new List <GenomeTournamentRank>(); } var tournamentResult = new GenomeTournamentRank() { GenerationId = currentGen, TournamentId = currentGen, TournamentRank = fitness[genomeIndex], }; result.TournamentResults[currentGenome].Add(tournamentResult); } // swap out some genomes var replaceCount = (int)Math.Ceiling(0.3 * genomeCount); var indiciesToReplace = Randomizer.Instance.ChooseRandomSubset( Enumerable.Range(0, genomeCount), replaceCount); var newGenomes = TestDataUtils.GenerateGenomes(tree, config, replaceCount); var replacementIndex = 0; foreach (var indexToReplace in indiciesToReplace) { randomGenomes[indexToReplace] = newGenomes[replacementIndex++]; } } return(result); }
/// <summary> /// Writes all training data. /// </summary> /// <param name="data"> /// The data. /// </param> /// <param name="pathAndFile"> /// The path and file. /// </param> internal static void WriteAllTrainingData(TrainingDataWrapper data, string pathAndFile) { var filePath = new FileInfo(pathAndFile); if (filePath.DirectoryName != null) { Directory.CreateDirectory(filePath.DirectoryName); } var convertedGenomes = data.ConvertedGenomes; // build header: generation, tournament id, features.. var featureColumnNames = string.Join(";", Enumerable.Range(1, convertedGenomes.ColumnCount).Select(r => $"Feature_{r}")); var csvBuilder = new StringBuilder(string.Concat("UniqueGenomeId;", "Generation;", "TournamentId;", featureColumnNames, ";Rank")) .AppendLine(); Debug.Assert( data.Genomes.GroupBy(g => g, g => g, new Genome.GeneValueComparator()).Count() == data.Count, "Found 2 separate genomes in list data.Genomes that are equal. This should not occur."); // indices/order: // 0-2: genome id, generation, tournament id // 3: "genome double representation" as separated string // 4: Rank var formatTemplate = "{0};{1};{2};{3};{4}"; var genomes = data.Genomes.ToArray(); for (var rowIndex = 0; rowIndex < data.Count; rowIndex++) { // repeat the genome data for every observed tournament result // assumption: data.Genomes is a distinct list of genomes (with respect to Genome.GeneValueComparator) var currentGenome = genomes[rowIndex]; var genomeMatrixString = convertedGenomes.GetRowAsCsv(rowIndex, ";"); var genomeResults = data.TournamentResults[currentGenome]; foreach (var currentResult in genomeResults) { var rowText = string.Format( CultureInfo.InvariantCulture, formatTemplate, rowIndex, currentResult.Generation, currentResult.TournamentId, genomeMatrixString, currentResult.TournamentRank); csvBuilder.AppendLine(rowText); } } File.WriteAllText(filePath.FullName, csvBuilder.ToString(), Encoding.UTF8); }