コード例 #1
0
        /// <summary>
        /// This method only selects the newest tournament results as training data.
        /// </summary>
        /// <param name="data">All historical training data.</param>
        /// <returns>Only the training data where <see cref="TrainingDataWrapper.CurrentGeneration"/> == <see cref="GenomeTournamentResult.Generation"/>.</returns>
        public AggregatedTrainingDataWrapper AggregateTargets(TrainingDataWrapper data)
        {
            // we just want to train on the newest results
            // assumption: list of known results for genome g is ordered
            // we need to be able to handle duplicates, since genomes are not neccessarily unique
            var filteredObservations = data.Genomes.Select(g => data.TournamentResults[g].Where(t => t.Generation == data.CurrentGeneration).ToList())
                .ToArray();

            var relevantCount = filteredObservations.Sum(r => r.Count);
            var relevantIndices = new int[relevantCount];
            var relevantTargets = new double[relevantCount];

            var currentRelevantIndex = 0;
            for (var i = 0; i < filteredObservations.Length; i++)
            {
                // filteredObservations[i].Count can be 0.
                for (var repeat = 0; repeat < filteredObservations[i].Count; repeat++)
                {
                    relevantIndices[currentRelevantIndex] = i;
                    relevantTargets[currentRelevantIndex] = filteredObservations[i][repeat].TournamentRank;
                    currentRelevantIndex++;
                }
            }

            // this should match
            Debug.Assert(currentRelevantIndex == relevantCount, "Each feature column should have been handled.");
            var relevantConvertedGenomes = data.ConvertedGenomes.Rows(relevantIndices);

            var result = new AggregatedTrainingDataWrapper()
                             {
                                 RelevantConvertedGenomes = relevantConvertedGenomes,
                                 RelevantTargets = relevantTargets,
                             };
            return result;
        }
コード例 #2
0
        /// <summary>
        /// Performs the genetic engineering.
        /// I.e.: Train the forest,
        /// Engineer requested number of offspring.
        /// </summary>
        /// <param name="engineeredGenomeNumber">
        /// The required number of engineered genomes.
        /// </param>
        /// <param name="competitiveParents">
        /// The competitive parents.
        /// </param>
        /// <returns>
        /// The <see cref="IEnumerable{Genome}"/> containing the engineered genomes.
        /// </returns>
        private IEnumerable <Genome> PerformGeneticEngineering(int engineeredGenomeNumber, List <Genome> competitiveParents)
        {
            // always train, if we may need engineered genomes later on
            // also required, if sexual selection is enabled
            if (this._configuration.TrainModel || this._configuration.EngineeredPopulationRatio > 0 || this._configuration.EnableSexualSelection)
            {
                var trainSet = new TrainingDataWrapper(this.AllKnownRanks, this._currentGeneration);
                this._geneticEngineering.TrainForest(trainSet);
            }

            // Perform engineering, if required
            IEnumerable <Genome> engineeredOffspring;

            if (engineeredGenomeNumber > 0)
            {
                var chosenCompetitiveParents = competitiveParents.InflateAndShuffle(engineeredGenomeNumber);
                engineeredOffspring = this._geneticEngineering.EngineerGenomes(
                    chosenCompetitiveParents,
                    this._population.GetNonCompetitiveMates(),
                    this._population.AllGenomes);
            }
            else
            {
                // just an empty dummy-enumerable
                engineeredOffspring = new Genome[0];
            }

            return(engineeredOffspring);
        }
コード例 #3
0
        /// <summary>
        /// OPTANO Algorithm Tuner specific method to start the training of the <see cref="StandardRandomForestLearner{TSamplingStrategy}"/>.
        /// </summary>
        /// <param name="data">
        /// All historical tournament data.
        /// </param>
        /// <returns>
        /// A trained and post-processed <see cref="GenomePredictionForestModel{TWeakPredictor}"/>.
        /// </returns>
        public GenomePredictionForestModel <GenomePredictionTree> Learn(TrainingDataWrapper data)
        {
            var aggregatedData = this.SamplingStrategy.AggregateTargets(data);
            var forestModel    = this.Learn(aggregatedData.RelevantConvertedGenomes, aggregatedData.RelevantTargets);

            this.SamplingStrategy.PostProcessModel(forestModel);

            return(forestModel);
        }
コード例 #4
0
        /// <summary>
        /// Computes the average <see cref="GenomeTournamentResult.TournamentRank"/> over all <see cref="GenomeTournamentResult"/> for each <see cref="Genome"/> in <see cref="TrainingDataWrapper.Genomes"/>.
        /// </summary>
        /// <param name="data">
        /// The training data.
        /// </param>
        /// <returns>
        /// The aggregated training data.
        /// </returns>
        public AggregatedTrainingDataWrapper AggregateTargets(TrainingDataWrapper data)
        {
            var targets = data.Genomes.Select(g => data.TournamentResults[g].Average(r => r.TournamentRank)).ToArray();

            var result = new AggregatedTrainingDataWrapper()
            {
                RelevantConvertedGenomes = data.ConvertedGenomes, RelevantTargets = targets
            };

            return(result);
        }
コード例 #5
0
        /// <summary>
        /// Simulates a tuner run for the specified number of generations and stores results in a new <see cref="TrainingDataWrapper"/>.
        /// </summary>
        /// <param name="tree"><see cref="ParameterTree"/> to base genomes on.</param>
        /// <param name="encoder">Strategy to convert genomes to double arrays.</param>
        /// <param name="genomeCount">Number of genomes to add to result per generation.</param>
        /// <param name="generations">Number of generations to simulate.</param>
        /// <param name="config"><see cref="AlgorithmTunerConfiguration"/>, required to generate new genomes.</param>
        /// <returns>The created <see cref="TrainingDataWrapper"/>.</returns>
        public static TrainingDataWrapper GenerateTrainingData(
            ParameterTree tree,
            IBulkGenomeTransformation encoder,
            int genomeCount,
            int generations,
            AlgorithmTunerConfiguration config)
        {
            var result = new TrainingDataWrapper(
                new Dictionary <Genome, List <GenomeTournamentRank> >(Genome.GenomeComparer),
                generations - 1);

            // Start with correct number of random genomes.
            var randomGenomes = TestDataUtils.GenerateGenomes(tree, config, genomeCount);

            // Then simulate the correct number of generations.
            for (var currentGen = 0; currentGen < generations; currentGen++)
            {
                var fitness = TestDataUtils.EvaluateTargetFunction(encoder, randomGenomes);

                // add result for every genome
                for (var genomeIndex = 0; genomeIndex < genomeCount; genomeIndex++)
                {
                    var currentGenome = randomGenomes[genomeIndex];
                    if (!result.TournamentResults.ContainsKey(currentGenome))
                    {
                        result.TournamentResults[currentGenome] = new List <GenomeTournamentRank>();
                    }

                    var tournamentResult = new GenomeTournamentRank()
                    {
                        GenerationId   = currentGen,
                        TournamentId   = currentGen,
                        TournamentRank = fitness[genomeIndex],
                    };

                    result.TournamentResults[currentGenome].Add(tournamentResult);
                }

                // swap out some genomes
                var replaceCount      = (int)Math.Ceiling(0.3 * genomeCount);
                var indiciesToReplace = Randomizer.Instance.ChooseRandomSubset(
                    Enumerable.Range(0, genomeCount),
                    replaceCount);

                var newGenomes       = TestDataUtils.GenerateGenomes(tree, config, replaceCount);
                var replacementIndex = 0;
                foreach (var indexToReplace in indiciesToReplace)
                {
                    randomGenomes[indexToReplace] = newGenomes[replacementIndex++];
                }
            }

            return(result);
        }
コード例 #6
0
        /// <summary>
        /// Writes all training data.
        /// </summary>
        /// <param name="data">
        /// The data.
        /// </param>
        /// <param name="pathAndFile">
        /// The path and file.
        /// </param>
        internal static void WriteAllTrainingData(TrainingDataWrapper data, string pathAndFile)
        {
            var filePath = new FileInfo(pathAndFile);

            if (filePath.DirectoryName != null)
            {
                Directory.CreateDirectory(filePath.DirectoryName);
            }

            var convertedGenomes = data.ConvertedGenomes;

            // build header: generation, tournament id, features..
            var featureColumnNames = string.Join(";", Enumerable.Range(1, convertedGenomes.ColumnCount).Select(r => $"Feature_{r}"));
            var csvBuilder         = new StringBuilder(string.Concat("UniqueGenomeId;", "Generation;", "TournamentId;", featureColumnNames, ";Rank"))
                                     .AppendLine();

            Debug.Assert(
                data.Genomes.GroupBy(g => g, g => g, new Genome.GeneValueComparator()).Count() == data.Count,
                "Found 2 separate genomes in list data.Genomes that are equal. This should not occur.");

            // indices/order:
            // 0-2: genome id, generation, tournament id
            // 3: "genome double representation" as separated string
            // 4: Rank
            var formatTemplate = "{0};{1};{2};{3};{4}";
            var genomes        = data.Genomes.ToArray();

            for (var rowIndex = 0; rowIndex < data.Count; rowIndex++)
            {
                // repeat the genome data for every observed tournament result
                // assumption: data.Genomes is a distinct list of genomes (with respect to Genome.GeneValueComparator)
                var currentGenome      = genomes[rowIndex];
                var genomeMatrixString = convertedGenomes.GetRowAsCsv(rowIndex, ";");
                var genomeResults      = data.TournamentResults[currentGenome];

                foreach (var currentResult in genomeResults)
                {
                    var rowText = string.Format(
                        CultureInfo.InvariantCulture,
                        formatTemplate,
                        rowIndex,
                        currentResult.Generation,
                        currentResult.TournamentId,
                        genomeMatrixString,
                        currentResult.TournamentRank);
                    csvBuilder.AppendLine(rowText);
                }
            }

            File.WriteAllText(filePath.FullName, csvBuilder.ToString(), Encoding.UTF8);
        }