public void Hyper_Parameter_Tuning()
        {
            #region Read data

            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read classification targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            #endregion

            // metric to minimize
            var metric = new MeanSquaredErrorRegressionMetric();

            // Parameter ranges for the optimizer
            var paramers = new ParameterBounds[]
            {
                new ParameterBounds(min: 1, max: 100, transform: Transform.Linear), // maximumTreeDepth
                new ParameterBounds(min: 1, max: 16, transform: Transform.Linear),  // minimumSplitSize
            };

            // create random search optimizer
            var optimizer = new RandomSearchOptimizer(paramers, iterations: 30, runParallel: true);

            // other availible optimizers
            // GridSearchOptimizer
            // GlobalizedBoundedNelderMeadOptimizer
            // ParticleSwarmOptimizer
            // BayesianOptimizer

            // function to minimize
            Func <double[], OptimizerResult> minimize = p =>
            {
                var cv          = new RandomCrossValidation <double>(crossValidationFolds: 5, seed: 42);
                var optlearner  = new RegressionDecisionTreeLearner(maximumTreeDepth: (int)p[0], minimumSplitSize: (int)p[1]);
                var predictions = cv.CrossValidate(optlearner, observations, targets);
                var error       = metric.Error(targets, predictions);
                Trace.WriteLine("Error: " + error);
                return(new OptimizerResult(p, error));
            };

            // run optimizer
            var result         = optimizer.OptimizeBest(minimize);
            var bestParameters = result.ParameterSet;

            Trace.WriteLine("Result: " + result.Error);

            // create learner with found parameters
            var learner = new RegressionDecisionTreeLearner(maximumTreeDepth: (int)bestParameters[0], minimumSplitSize: (int)bestParameters[1]);

            // learn model with found parameters
            var model = learner.Learn(observations, targets);
        }
        public void RandomSearchOptimizer_OptimizeBest()
        {
            var parameters = new ParameterBounds[]
            {
                new ParameterBounds(0.0, 100.0, Transform.Linear)
            };
            var sut    = new RandomSearchOptimizer(parameters, 100);
            var actual = sut.OptimizeBest(Minimize);

            Assert.AreEqual(110.67173923600831, actual.Error, 0.00001);
            Assert.AreEqual(37.533294194160632, actual.ParameterSet.Single(), 0.00001);
        }
Esempio n. 3
0
        public void GlobalizedBoundedNelderMeadOptimizer_OptimizeBest()
        {
            var parameters = new ParameterBounds[]
            {
                new ParameterBounds(-10.0, 10.0, Transform.Linear),
                new ParameterBounds(-10.0, 10.0, Transform.Linear),
                new ParameterBounds(-10.0, 10.0, Transform.Linear),
            };
            var sut    = new GlobalizedBoundedNelderMeadOptimizer(parameters, 5, 1e-5, 10);
            var actual = sut.OptimizeBest(Minimize);

            Assert.AreEqual(actual.Error, -0.99999927563662372, 0.0000001);
            Assert.AreEqual(actual.ParameterSet.Length, 3);

            Assert.AreEqual(actual.ParameterSet[0], 1.5710337846223761, 0.0000001);
            Assert.AreEqual(actual.ParameterSet[1], 3.1421855980282225, 0.0000001);
            Assert.AreEqual(actual.ParameterSet[2], 5.203790999662519E-07, 0.0000001);
        }
        public void ParticleSwarmOptimizer_OptimizeBest()
        {
            var parameters = new ParameterBounds[]
            {
                new ParameterBounds(-10.0, 10.0, Transform.Linear),
                new ParameterBounds(-10.0, 10.0, Transform.Linear),
                new ParameterBounds(-10.0, 10.0, Transform.Linear),
            };
            var sut    = new ParticleSwarmOptimizer(parameters, 100);
            var actual = sut.OptimizeBest(Minimize);

            Assert.AreEqual(actual.Error, -0.64324321766401094, 0.0000001);
            Assert.AreEqual(actual.ParameterSet.Length, 3);

            Assert.AreEqual(actual.ParameterSet[0], -4.92494268653156, 0.0000001);
            Assert.AreEqual(actual.ParameterSet[1], 10, 0.0000001);
            Assert.AreEqual(actual.ParameterSet[2], -0.27508308116943514, 0.0000001);
        }
        public void GlobalizedBoundedNelderMeadOptimizer_OptimizeBest()
        {
            var parameters = new ParameterBounds[]
            {
                new ParameterBounds(-10.0, 10.0, Transform.Linear),
                new ParameterBounds(-10.0, 10.0, Transform.Linear),
                new ParameterBounds(-10.0, 10.0, Transform.Linear),
            };
            var sut    = new GlobalizedBoundedNelderMeadOptimizer(parameters, 5, 1e-5, 10);
            var actual = sut.OptimizeBest(Minimize);

            Assert.AreEqual(actual.Error, -0.99999949547279676, 0.0000001);
            Assert.AreEqual(actual.ParameterSet.Length, 3);

            Assert.AreEqual(actual.ParameterSet[0], -7.8547285710964134, 0.0000001);
            Assert.AreEqual(actual.ParameterSet[1], 6.2835515298977995, 0.0000001);
            Assert.AreEqual(actual.ParameterSet[2], -1.5851024386788885E-07, 0.0000001);
        }
Esempio n. 6
0
        public void ParticleSwarmOptimizer_OptimizeBest()
        {
            var parameters = new ParameterBounds[]
            {
                new ParameterBounds(-10.0, 10.0, Transform.Linear),
                new ParameterBounds(-10.0, 10.0, Transform.Linear),
                new ParameterBounds(-10.0, 10.0, Transform.Linear),
            };
            var sut    = new ParticleSwarmOptimizer(parameters, 100);
            var actual = sut.OptimizeBest(Minimize);

            Assert.AreEqual(actual.Error, -0.45484916939206588, 0.0000001);
            Assert.AreEqual(actual.ParameterSet.Length, 3);

            Assert.AreEqual(actual.ParameterSet[0], -10, 0.0000001);
            Assert.AreEqual(actual.ParameterSet[1], -10, 0.0000001);
            Assert.AreEqual(actual.ParameterSet[2], 0.0035692182837614439, 0.0000001);
        }
Esempio n. 7
0
        public void BayesianOptimizer_OptimizeBest()
        {
            var parameters = new ParameterBounds[]
            {
                new ParameterBounds(-10.0, 10.0, Transform.Linear),
                new ParameterBounds(-10.0, 10.0, Transform.Linear),
                new ParameterBounds(-10.0, 10.0, Transform.Linear),
            };
            var sut    = new BayesianOptimizer(parameters, 100, 5, 1);
            var actual = sut.OptimizeBest(Minimize);

            Assert.AreEqual(actual.Error, -0.74765422244251278, 0.0001);
            Assert.AreEqual(actual.ParameterSet.Length, 3);

            Assert.AreEqual(actual.ParameterSet[0], -5.0065683270835173, m_delta);
            Assert.AreEqual(actual.ParameterSet[1], -9.67008227467075, m_delta);
            Assert.AreEqual(actual.ParameterSet[2], -0.24173704452893574, m_delta);
        }
        public void BayesianOptimizer_OptimizeBest()
        {
            var parameters = new ParameterBounds[]
            {
                new ParameterBounds(-10.0, 10.0, Transform.Linear),
                new ParameterBounds(-10.0, 10.0, Transform.Linear),
                new ParameterBounds(-10.0, 10.0, Transform.Linear),
            };
            var sut    = new BayesianOptimizer(parameters, 100, 5, 1);
            var actual = sut.OptimizeBest(Minimize);

            Assert.AreEqual(actual.Error, -0.92327107866106661, 0.0001);
            Assert.AreEqual(actual.ParameterSet.Length, 3);

            Assert.AreEqual(actual.ParameterSet[0], 8.1239613509382878, 0.0001);
            Assert.AreEqual(actual.ParameterSet[1], -9.2896384835660637, 0.0001);
            Assert.AreEqual(actual.ParameterSet[2], -0.03435398919245003, 0.0001);
        }
Esempio n. 9
0
 public bool AddParameterBound(ParameterBounds bounds)
 {
     using (KraContext Context = new KraContext())
     {
         ParameterBounds bound = Context.Bounds.Find(bounds.DefinitionID);
         if (bound == null)
         {
             Context.Bounds.Add(bounds);
             Context.SaveChanges();
         }
         else
         {
             bound.MinValue = bounds.MinValue;
             bound.MaxValue = bounds.MaxValue;
             bound.Result   = bounds.Result;
             Context.SaveChanges();
         }
     }
     return(true);
 }
Esempio n. 10
0
        public void ParameterBounds_NextValue()
        {
            var sut     = new ParameterBounds(min: 20, max: 200, transform: Transform.Linear);
            var sampler = new RandomUniform(seed: 32);

            var actual = new double[10];

            for (int i = 0; i < actual.Length; i++)
            {
                actual[i] = sut.NextValue(sampler: sampler);
            }

            var expected = new double[] { 99.8935983236384, 57.2098020451189, 44.4149092419142, 89.9002946307418, 137.643828772774, 114.250629522954, 63.8914499915631, 109.294177409864, 188.567149950455, 33.2731248034505 };

            Assert.AreEqual(expected.Length, actual.Length);
            for (int i = 0; i < expected.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], 0.000001);
            }
        }
        public void RandomSearchOptimizer_Optimize()
        {
            var parameters = new ParameterBounds[]
            {
                new ParameterBounds(10.0, 37.5, Transform.Linear)
            };
            var sut    = new RandomSearchOptimizer(parameters, 2);
            var actual = sut.Optimize(Minimize);

            var expected = new OptimizerResult[]
            {
                new OptimizerResult(new double[] { 28.372927812567415 }, 3690.8111981874217),
                new OptimizerResult(new double[] { 13.874950705270725 }, 23438.215764163542)
            };

            Assert.AreEqual(expected.First().Error, actual.First().Error, 0.0001);
            Assert.AreEqual(expected.First().ParameterSet.First(), actual.First().ParameterSet.First(), 0.0001);

            Assert.AreEqual(expected.Last().Error, actual.Last().Error, 0.0001);
            Assert.AreEqual(expected.Last().ParameterSet.First(), actual.Last().ParameterSet.First(), 0.0001);
        }
        public void ParticleSwarmOptimizer_Optimize()
        {
            var parameters = new ParameterBounds[]
            {
                new ParameterBounds(0.0, 100.0, Transform.Linear)
            };
            var sut     = new ParticleSwarmOptimizer(parameters, 100);
            var results = sut.Optimize(Minimize2);
            var actual  = new OptimizerResult[] { results.First(), results.Last() };

            var expected = new OptimizerResult[]
            {
                new OptimizerResult(new double[] { 37.660092259635064 }, 109.45936368750877),
                new OptimizerResult(new double[] { 39.038272502859328 }, 181.43166846962754)
            };

            Assert.AreEqual(expected.First().Error, actual.First().Error, 0.0001);
            Assert.AreEqual(expected.First().ParameterSet.First(), actual.First().ParameterSet.First(), 0.0001);

            Assert.AreEqual(expected.Last().Error, actual.Last().Error, 0.0001);
            Assert.AreEqual(expected.Last().ParameterSet.First(), actual.Last().ParameterSet.First(), 0.0001);
        }
        public void BayesianOptimizer_Optimize()
        {
            var parameters = new ParameterBounds[]
            {
                new ParameterBounds(0.0, 100.0, Transform.Linear)
            };
            var sut     = new BayesianOptimizer(parameters, 120, 5, 1);
            var results = sut.Optimize(Minimize2);
            var actual  = new OptimizerResult[] { results.First(), results.Last() };

            var expected = new OptimizerResult[]
            {
                new OptimizerResult(new double[] { 37.710969353891429 }, 109.34400835405613),
                new OptimizerResult(new double[] { 99.646240426062718 }, 157577.44222424511)
            };

            Assert.AreEqual(expected.First().Error, actual.First().Error, 0.0001);
            Assert.AreEqual(expected.First().ParameterSet.First(), actual.First().ParameterSet.First(), 0.0001);

            Assert.AreEqual(expected.Last().Error, actual.Last().Error, 0.0001);
            Assert.AreEqual(expected.Last().ParameterSet.First(), actual.Last().ParameterSet.First(), 0.0001);
        }
Esempio n. 14
0
        public void ParticleSwarmOptimizer_Optimize()
        {
            var parameters = new ParameterBounds[]
            {
                new ParameterBounds(0.0, 100.0, Transform.Linear)
            };
            var sut     = new ParticleSwarmOptimizer(parameters, 100);
            var results = sut.Optimize(Minimize2);
            var actual  = new OptimizerResult[] { results.First(), results.Last() };

            var expected = new OptimizerResult[]
            {
                new OptimizerResult(new double[] { 37.804275358363732 }, 109.68474734728727),
                new OptimizerResult(new double[] { 35.942821697748165 }, 238.00642904844648)
            };

            Assert.AreEqual(expected.First().Error, actual.First().Error, 0.0001);
            Assert.AreEqual(expected.First().ParameterSet.First(), actual.First().ParameterSet.First(), 0.0001);

            Assert.AreEqual(expected.Last().Error, actual.Last().Error, 0.0001);
            Assert.AreEqual(expected.Last().ParameterSet.First(), actual.Last().ParameterSet.First(), 0.0001);
        }
Esempio n. 15
0
        public void GlobalizedBoundedNelderMeadOptimizer_Optimize()
        {
            var parameters = new ParameterBounds[]
            {
                new ParameterBounds(0.0, 100.0, Transform.Linear)
            };
            var sut     = new GlobalizedBoundedNelderMeadOptimizer(parameters, 5, 1e-5, 10);
            var results = sut.Optimize(Minimize2);
            var actual  = new OptimizerResult[] { results.First(), results.Last() };

            var expected = new OptimizerResult[]
            {
                new OptimizerResult(new double[] { 37.71314535727786 }, 109.34381396310141),
                new OptimizerResult(new double[] { 37.7131485180996 }, 109.34381396350526)
            };

            Assert.AreEqual(expected.First().Error, actual.First().Error, 0.0001);
            Assert.AreEqual(expected.First().ParameterSet.First(), actual.First().ParameterSet.First(), 0.0001);

            Assert.AreEqual(expected.Last().Error, actual.Last().Error, 0.0001);
            Assert.AreEqual(expected.Last().ParameterSet.First(), actual.Last().ParameterSet.First(), 0.0001);
        }
Esempio n. 16
0
        public void BayesianOptimizer_Optimize()
        {
            var parameters = new ParameterBounds[]
            {
                new ParameterBounds(0.0, 100.0, Transform.Linear)
            };
            var sut     = new BayesianOptimizer(parameters, 120, 5, 1);
            var results = sut.Optimize(Minimize2);
            var actual  = new OptimizerResult[] { results.First(), results.Last() };

            var expected = new OptimizerResult[]
            {
                new OptimizerResult(new double[] { 42.323589763754789 }, 981.97873691815118),
                new OptimizerResult(new double[] { 99.110398813667885 }, 154864.41962974239)
            };

            Assert.AreEqual(expected.First().Error, actual.First().Error, m_delta);
            Assert.AreEqual(expected.First().ParameterSet.First(), actual.First().ParameterSet.First(), m_delta);

            Assert.AreEqual(expected.Last().Error, actual.Last().Error, m_delta);
            Assert.AreEqual(expected.Last().ParameterSet.First(), actual.Last().ParameterSet.First(), m_delta);
        }
        /// <summary>
        /// This method takes a list of peptides and creates a subset list of peptides to normalize with, to avoid
        /// excessive computation time in normalization functions.
        /// </summary>
        //private List<Peptide> SubsetData(List<Peptide> initialList, List<SpectraFileInfo> spectraFiles)
        //{
        //    List<SpectraFileInfo>[] bothBioreps = new List<SpectraFileInfo>[2];
        //    var temp1 = spectraFiles.GroupBy(p => p.Condition).ToList();
        //    if (temp1.Count() == 1)
        //    {
        //        // normalizing bioreps within a condition
        //        var temp2 = spectraFiles.GroupBy(p => p.BiologicalReplicate).ToList();
        //        bothBioreps[0] = temp2[0].ToList();
        //        bothBioreps[1] = temp2[1].ToList();
        //    }
        //    else
        //    {
        //        // normalizing bioreps between conditions
        //        bothBioreps[0] = temp1[0].ToList();
        //        bothBioreps[1] = temp1[1].ToList();
        //    }

        //    HashSet<Peptide> subsetList = new HashSet<Peptide>();
        //    int maxFractionIndex = bothBioreps.SelectMany(p => p).Max(v => v.Fraction);

        //    foreach (var biorep in bothBioreps)
        //    {
        //        List<int> fractions = biorep.Select(p => p.Fraction).Distinct().ToList();

        //        int numToAddPerFraction = numPeptidesDesiredInMatrix / fractions.Count;
        //        if (numToAddPerFraction < numPeptidesDesiredFromEachFraction)
        //        {
        //            numToAddPerFraction = numPeptidesDesiredFromEachFraction;
        //        }

        //        int[] peptidesAddedPerFraction = new int[fractions.Count];
        //        Queue<Peptide>[] peptidesObservedInEachFraction = new Queue<Peptide>[fractions.Count];

        //        foreach (var file in biorep)
        //        {
        //            if (peptidesObservedInEachFraction[file.Fraction] == null)
        //            {
        //                peptidesObservedInEachFraction[file.Fraction] = new Queue<Peptide>(initialList.Where(p => p.GetIntensity(file) > 0)
        //                    .OrderByDescending(p => p.GetIntensity(file)));
        //            }
        //        }

        //        foreach (var fraction in fractions)
        //        {
        //            while (peptidesAddedPerFraction[fraction] < numToAddPerFraction && peptidesObservedInEachFraction[fraction].Any())
        //            {
        //                var peptide = peptidesObservedInEachFraction[fraction].Dequeue();

        //                // don't need to check if the return list already contains the peptide because it's a HashSet (no duplicates are allowed)
        //                subsetList.Add(peptide);

        //                // this peptide is in the return list regardless of whether or not it was actually just added;
        //                // we just want to guarantee this fraction has 500 peptides in the return list to normalize with
        //                peptidesAddedPerFraction[fraction]++;
        //            }
        //        }
        //    }

        //    return subsetList.ToList();
        //}

        /// <summary>
        /// Calculates normalization factors for fractionated data using a bounded Nelder-Mead optimization algorithm.
        /// Called by NormalizeFractions().
        /// </summary>
        private static double[] GetNormalizationFactors(double[,,] peptideIntensities, int numP, int numF)
        {
            object locker = new object();

            double[] referenceSample = new double[numP];
            double[,] sampleToNormalize = new double[numP, numF];

            // populate the peptide sample quantity array for normalization calculations
            for (int p = 0; p < numP; p++)
            {
                for (int f = 0; f < numF; f++)
                {
                    referenceSample[p]     += peptideIntensities[p, 0, f];
                    sampleToNormalize[p, f] = peptideIntensities[p, 1, f];
                }
            }

            // initialize normalization factors to 1.0
            // normalization factor optimization must improve on these to be valid
            double[] bestNormFactors = new double[numF];
            for (int i = 0; i < bestNormFactors.Length; i++)
            {
                bestNormFactors[i] = 1.0;
            }

            // calculate the error between bioreps if all normalization factors are 1 (initial error)
            double[] initialErrors = new double[numP];
            double   bestError     = CalculateNormalizationFactorError(ref referenceSample, ref sampleToNormalize, ref bestNormFactors);

            // constraint (normalization factors must be >0.3 and <3
            var parameterArray = new ParameterBounds[numF];

            for (int f = 0; f < numF; f++)
            {
                parameterArray[f] = new ParameterBounds(0.3, 3, Transform.Linear);
            }

            // find approximate best starting area for each fraction normalization factor
            for (int f = 0; f < numF; f++)
            {
                double   bestFractionError = double.PositiveInfinity;
                double   start             = parameterArray[0].Min;
                double   end     = parameterArray[0].Max;
                double[] factors = new double[numF];
                for (int i = 0; i < factors.Length; i++)
                {
                    factors[i] = 1.0;
                }

                for (double n = start; n <= end; n += 0.01)
                {
                    factors[f] = Math.Round(n, 2);

                    double error = CalculateNormalizationFactorError(ref referenceSample, ref sampleToNormalize, ref factors);

                    if (error < bestFractionError)
                    {
                        bestFractionError  = error;
                        bestNormFactors[f] = factors[f];
                    }
                }
            }

            // find the best normalization factors (minimize error)
            double[] errors = new double[numP];

            // define minimization metric
            Func <double[], OptimizerResult> minimize = v =>
            {
                // calculate error with these normalization factors
                double candidateError = CalculateNormalizationFactorError(ref referenceSample, ref sampleToNormalize, ref v);

                return(new OptimizerResult(v, candidateError));
            };

            // create optimizer
            OptimizerResult result = new NelderMeadWithStartPoints(
                parameters: parameterArray,
                startingValue: bestNormFactors,
                maxIterationsWithoutImprovement: 10
                ).OptimizeBest(minimize);

            double sampleError = result.Error;

            double[] normalizationFactors = result.ParameterSet;

            if (sampleError < bestError)
            {
                lock (locker)
                {
                    if (sampleError < bestError)
                    {
                        bestError       = sampleError;
                        bestNormFactors = normalizationFactors;
                    }
                }
            }

            return(bestNormFactors);
        }
        /// <summary>
        /// Calculates normalization factors for fractionated data using a bounded Nelder-Mead optimization algorithm.
        /// Called by NormalizeFractions().
        /// </summary>
        private static double[] GetNormalizationFactors(double[,,] peptideIntensities, int numP, int numB, int numF, int maxThreads)
        {
            double step   = 0.01;
            object locker = new object();

            // initialize normalization factors to 1.0
            // normalization factor optimization must improve on these to be valid
            double bestError = 0;

            double[] bestNormFactors = new double[numF];
            for (int i = 0; i < bestNormFactors.Length; i++)
            {
                bestNormFactors[i] = 1.0;
            }

            // constraint (normalization factors must be >0.3 and <3
            var parameterArray = new ParameterBounds[numF];

            for (int f = 0; f < numF; f++)
            {
                parameterArray[f] = new ParameterBounds(0.3, 3, Transform.Linear);
            }

            //TODO: put this into a helper method to avoid code repetition...
            // calculate the error between bioreps if all norm factors are 1 (initial error)
            {
                double[,] originalBiorepIntensities = new double[numP, numB];
                double[] temp = new double[2];

                for (int p = 0; p < numP; p++)
                {
                    for (int b = 0; b < numB; b++)
                    {
                        for (int f = 0; f < numF; f++)
                        {
                            originalBiorepIntensities[p, b] += peptideIntensities[p, b, f];
                        }
                    }
                }

                for (int p = 0; p < numP; p++)
                {
                    for (int b2 = 1; b2 < numB; b2++)
                    {
                        temp[0] = originalBiorepIntensities[p, 0];
                        temp[1] = originalBiorepIntensities[p, b2];

                        // calculate initial error (error if all norm factors are 1)
                        // error metric is sum square error of coefficient of variation of each peptide
                        double coefficientOfVariation = Statistics.StandardDeviation(temp) / temp.Average();
                        bestError += Math.Pow(coefficientOfVariation, 2);
                    }
                }
            }

            int startN = (int)(parameterArray[0].Min / step);
            int endN   = (int)(parameterArray[0].Max / step);

            Parallel.For(startN, endN, new ParallelOptions {
                MaxDegreeOfParallelism = maxThreads
            }, n =>
            {
                double startPosition = n * step;

                double[,] biorepIntensities = new double[numP, numB];
                double[] temp = new double[2];

                // define minimization metric
                Func <double[], OptimizerResult> minimize = v =>
                {
                    // sum the intensities with the current normalization factors
                    Array.Clear(biorepIntensities, 0, biorepIntensities.Length);

                    for (int p = 0; p < numP; p++)
                    {
                        for (int b = 0; b < numB; b++)
                        {
                            for (int f = 0; f < numF; f++)
                            {
                                if (b == 0)
                                {
                                    biorepIntensities[p, b] += peptideIntensities[p, b, f];
                                }
                                else
                                {
                                    biorepIntensities[p, b] += peptideIntensities[p, b, f] * v[f];
                                }
                            }
                        }
                    }

                    // calculate the error for these normalization factors
                    double candidateError = 0;
                    for (int p = 0; p < numP; p++)
                    {
                        for (int b2 = 1; b2 < numB; b2++)
                        {
                            temp[0] = biorepIntensities[p, 0];
                            temp[1] = biorepIntensities[p, b2];

                            // error metric is sum square error of coefficient of variation of each peptide
                            double coefficientOfVariation = Statistics.StandardDeviation(temp) / temp.Average();
                            candidateError += Math.Pow(coefficientOfVariation, 2);
                        }
                    }

                    return(new OptimizerResult(v, candidateError));
                };

                // create optimizer
                OptimizerResult result = new NelderMeadWithStartPoints(
                    parameters: parameterArray,
                    startingValue: startPosition,
                    maxIterationsWithoutImprovement: 10
                    ).OptimizeBest(minimize);

                double error         = result.Error;
                double[] normFactors = result.ParameterSet;

                if (error < bestError)
                {
                    lock (locker)
                    {
                        if (error < bestError)
                        {
                            bestError       = error;
                            bestNormFactors = normFactors;
                        }
                    }
                }
            });

            return(bestNormFactors);
        }
Esempio n. 19
0
        public void GradientBoost_Optimize_Hyperparameters()
        {
            #region read and split data
            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read regression targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            // creates training test splitter,
            // Since this is a regression problem, we use the random training/test set splitter.
            // 30 % of the data is used for the test set.
            var splitter = new RandomTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24);

            var trainingTestSplit = splitter.SplitSet(observations, targets);
            var trainSet          = trainingTestSplit.TrainingSet;
            var testSet           = trainingTestSplit.TestSet;
            #endregion

            // since this is a regression problem we are using square error as metric
            // for evaluating how well the model performs.
            var metric = new MeanSquaredErrorRegressionMetric();

            // Usually better results can be achieved by tuning a gradient boost learner

            var numberOfFeatures = trainSet.Observations.ColumnCount;

            // Parameter ranges for the optimizer
            // best parameter to tune on random forest is featuresPrSplit.
            var parameters = new ParameterBounds[]
            {
                new ParameterBounds(min: 80, max: 300, transform: Transform.Linear),             // iterations
                new ParameterBounds(min: 0.02, max:  0.2, transform: Transform.Logarithmic),     // learning rate
                new ParameterBounds(min: 8, max: 15, transform: Transform.Linear),               // maximumTreeDepth
                new ParameterBounds(min: 0.5, max: 0.9, transform: Transform.Linear),            // subSampleRatio
                new ParameterBounds(min: 1, max: numberOfFeatures, transform: Transform.Linear), // featuresPrSplit
            };

            // Further split the training data to have a validation set to measure
            // how well the model generalizes to unseen data during the optimization.
            var validationSplit = new RandomTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24)
                                  .SplitSet(trainSet.Observations, trainSet.Targets);


            // Define optimizer objective (function to minimize)
            Func <double[], OptimizerResult> minimize = p =>
            {
                // create the candidate learner using the current optimization parameters.
                var candidateLearner = new RegressionSquareLossGradientBoostLearner(
                    iterations: (int)p[0],
                    learningRate: p[1],
                    maximumTreeDepth: (int)p[2],
                    subSampleRatio: p[3],
                    featuresPrSplit: (int)p[4],
                    runParallel: false);

                var candidateModel = candidateLearner.Learn(validationSplit.TrainingSet.Observations,
                                                            validationSplit.TrainingSet.Targets);

                var validationPredictions = candidateModel.Predict(validationSplit.TestSet.Observations);
                var candidateError        = metric.Error(validationSplit.TestSet.Targets, validationPredictions);

                // trace current error
                Trace.WriteLine(string.Format("Candidate Error: {0:0.0000}, Candidate Parameters: {1}",
                                              candidateError, string.Join(", ", p)));

                return(new OptimizerResult(p, candidateError));
            };

            // create random search optimizer
            var optimizer = new RandomSearchOptimizer(parameters, iterations: 30, runParallel: true);

            // find best hyperparameters
            var result = optimizer.OptimizeBest(minimize);
            var best   = result.ParameterSet;

            // create the final learner using the best hyperparameters.
            var learner = new RegressionSquareLossGradientBoostLearner(
                iterations: (int)best[0],
                learningRate: best[1],
                maximumTreeDepth: (int)best[2],
                subSampleRatio: best[3],
                featuresPrSplit: (int)best[4],
                runParallel: false);

            // learn model with found parameters
            var model = learner.Learn(trainSet.Observations, trainSet.Targets);

            // predict the training and test set.
            var trainPredictions = model.Predict(trainSet.Observations);
            var testPredictions  = model.Predict(testSet.Observations);

            // measure the error on training and test set.
            var trainError = metric.Error(trainSet.Targets, trainPredictions);
            var testError  = metric.Error(testSet.Targets, testPredictions);

            // Optimizer found hyperparameters.
            Trace.WriteLine(string.Format("Found parameters, iterations:  {0}, learning rate {1:0.000}:  maximumTreeDepth: {2}, subSampleRatio {3:0.000}, featuresPrSplit: {4} ",
                                          (int)best[0], best[1], (int)best[2], best[3], (int)best[4]));
            TraceTrainingAndTestError(trainError, testError);
        }