예제 #1
0
        private void CalibrateSpectra(RegressionForestModel ms1predictor, RegressionForestModel ms2predictor)
        {
            Parallel.ForEach(Partitioner.Create(1, myMsDataFile.NumSpectra + 1), fff =>
            {
                for (int i = fff.Item1; i < fff.Item2; i++)
                {
                    var scan = myMsDataFile.GetOneBasedScan(i);

                    if (scan is IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> > ms2Scan)
                    {
                        var precursorScan = myMsDataFile.GetOneBasedScan(ms2Scan.OneBasedPrecursorScanNumber.Value);

                        if (!ms2Scan.SelectedIonMonoisotopicGuessIntensity.HasValue && ms2Scan.SelectedIonMonoisotopicGuessMz.HasValue)
                        {
                            ms2Scan.ComputeMonoisotopicPeakIntensity(precursorScan.MassSpectrum);
                        }

                        double theFunc(IPeak x) => x.X - ms2predictor.Predict(new double[] { x.X, scan.RetentionTime, Math.Log(scan.TotalIonCurrent), scan.InjectionTime.HasValue ? Math.Log(scan.InjectionTime.Value) : double.NaN, Math.Log(x.Y) });

                        double theFuncForPrecursor(IPeak x) => x.X - ms1predictor.Predict(new double[] { x.X, precursorScan.RetentionTime, Math.Log(precursorScan.TotalIonCurrent), precursorScan.InjectionTime.HasValue ? Math.Log(precursorScan.InjectionTime.Value) : double.NaN, Math.Log(x.Y) });

                        ms2Scan.TransformMzs(theFunc, theFuncForPrecursor);
                    }
                    else
                    {
                        Func <IPeak, double> theFunc = x => x.X - ms1predictor.Predict(new double[] { x.X, scan.RetentionTime, Math.Log(scan.TotalIonCurrent), scan.InjectionTime.HasValue ? Math.Log(scan.InjectionTime.Value) : double.NaN, Math.Log(x.Y) });
                        scan.MassSpectrum.ReplaceXbyApplyingFunction(theFunc);
                    }
                }
            }
                             );
        }
        public void RegressionRandomForestLearnerTest_Learn_Glass_100_Indices()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.Glass));
            var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix();
            var targets      = parser.EnumerateRows("Target").ToF64Vector();
            var rows         = targets.Length;

            var sut = new RegressionRandomForestLearner(100, 1, 100, 1, 0.0001, 1.0, 42, false);

            var indices = Enumerable.Range(0, targets.Length).ToArray();

            indices.Shuffle(new Random(42));
            indices = indices.Take((int)(targets.Length * 0.7))
                      .ToArray();

            var decisionTreeModels = sut.Learn(observations, targets, indices, out var rawVariableImportance).ToArray();
            var model = new RegressionForestModel(decisionTreeModels, rawVariableImportance);

            var predictions = model.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.49709813080602938, error, m_delta);
        }
예제 #3
0
        public void CalibrateHitsAndComponents(RegressionForestModel bestCf)
        {
            foreach (SpectrumMatch hit in all_topdown_hits)
            {
                hit.mz = hit.mz - bestCf.Predict(new double[] { hit.mz, hit.ms1_scan.RetentionTime, Math.Log(hit.ms1_scan.TotalIonCurrent), hit.ms1_scan.InjectionTime.HasValue ? Math.Log(hit.ms1_scan.InjectionTime.Value) : double.NaN });
            }
            foreach (Component c in Sweet.lollipop.calibration_components.Where(h => h.input_file.lt_condition == raw_file.lt_condition && h.input_file.biological_replicate == raw_file.biological_replicate && h.input_file.fraction == raw_file.fraction && h.input_file.technical_replicate == raw_file.technical_replicate))
            {
                foreach (ChargeState cs in c.charge_states)
                {
                    int  scanNumber = myMsDataFile.GetClosestOneBasedSpectrumNumber(c.rt_apex);
                    var  scan       = myMsDataFile.GetOneBasedScan(scanNumber);
                    bool ms1Scan    = scan.MsnOrder == 1;
                    while (!ms1Scan)
                    {
                        scanNumber--;
                        scan    = myMsDataFile.GetOneBasedScan(scanNumber);
                        ms1Scan = scan.MsnOrder == 1;
                    }

                    cs.mz_centroid = cs.mz_centroid - bestCf.Predict(new double[] { cs.mz_centroid, scan.RetentionTime, Math.Log(scan.TotalIonCurrent), scan.InjectionTime.HasValue ? Math.Log(scan.InjectionTime.Value) : double.NaN });
                }
            }
            foreach (var a in myMsDataFile.GetAllScansList().Where(s => s.MsnOrder == 1))
            {
                Func <MzPeak, double> theFunc = x => x.Mz - bestCf.Predict(new double[] { x.Mz, a.RetentionTime, Math.Log(a.TotalIonCurrent), a.InjectionTime.HasValue ? Math.Log(a.InjectionTime.Value) : double.NaN });
                a.MassSpectrum.ReplaceXbyApplyingFunction(theFunc);
            }
        }
예제 #4
0
        double[][] GreedyPlusRandomSearch(double[][] parentParameterSets, RegressionForestModel model,
                                          int parameterSetCount, IReadOnlyList <OptimizerResult> previousResults)
        {
            // TODO: Handle maximization and minimization. Currently minimizes.
            var best = previousResults.Min(v => v.Error);

            var parameterSets = new List <(double[] parameterSet, double EI)>();

            // Perform local search.
            foreach (var parameterSet in parentParameterSets)
            {
                var bestParameterSet = LocalSearch(parentParameterSets, model, best, m_epsilon);
                parameterSets.Add(bestParameterSet);
            }

            // Additional set of random parameterSets to choose from during local search.
            for (int i = 0; i < m_randomSearchPointCount; i++)
            {
                var parameterSet = RandomSearchOptimizer
                                   .SampleParameterSet(m_parameters, m_sampler);

                var expectedImprovement = ComputeExpectedImprovement(best, parameterSet, model);
                parameterSets.Add((parameterSet, expectedImprovement));
            }

            // Take the best parameterSets. Here we want the max expected improvement.
            return(parameterSets.OrderByDescending(v => v.EI)
                   .Take(parameterSetCount).Select(v => v.parameterSet)
                   .ToArray());
        }
예제 #5
0
        double ComputeExpectedImprovement(double best, double[] parameterSet, RegressionForestModel model)
        {
            var prediction = model.PredictCertainty(parameterSet);
            var mean       = prediction.Prediction;
            var variance   = prediction.Variance;

            return(AcquisitionFunctions.ExpectedImprovement(best, mean, variance));
        }
예제 #6
0
        OptimizerResult[] FindMinimumCandidates(RegressionForestModel model, double bestScore)
        {
            Func <double[], OptimizerResult> minimize = (param) =>
            {
                return(new OptimizerResult(param,
                                           ExpectedImprovementCriterion(param, model, bestScore)));
            };

            return(m_optimizer.Optimize(minimize).Take(m_numberOfCandidatesEvaluatedPrIteration).ToArray());
        }
예제 #7
0
        public void RandomForest_Default_Parameters_Save_Load_Model_Using_Static_Methods()
        {
            #region read and split data
            // Use StreamReader(filepath) when running from filesystem
            var parser     = new CsvParser(() => new StringReader(Resources.winequality_white));
            var targetName = "quality";

            // read feature matrix
            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();

            // read regression targets
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();

            // creates training test splitter,
            // Since this is a regression problem, we use the random training/test set splitter.
            // 30 % of the data is used for the test set.
            var splitter = new RandomTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24);

            var trainingTestSplit = splitter.SplitSet(observations, targets);
            var trainSet          = trainingTestSplit.TrainingSet;
            var testSet           = trainingTestSplit.TestSet;
            #endregion

            // create learner with default parameters
            var learner = new RegressionRandomForestLearner(trees: 100);

            // learn model with found parameters
            var model = learner.Learn(trainSet.Observations, trainSet.Targets);

            // predict the training and test set.
            var trainPredictions = model.Predict(trainSet.Observations);
            var testPredictions  = model.Predict(testSet.Observations);

            // since this is a regression problem we are using square error as metric
            // for evaluating how well the model performs.
            var metric = new MeanSquaredErrorRegressionMetric();

            // measure the error on training and test set.
            var trainError = metric.Error(trainSet.Targets, trainPredictions);
            var testError  = metric.Error(testSet.Targets, testPredictions);

            TraceTrainingAndTestError(trainError, testError);

            //Save model, in the file system use new StreamWriter(filePath);
            // default format is xml.
            var savedModel = new StringWriter();
            model.Save(() => savedModel);

            // load model, in the file system use new StreamReader(filePath);
            // default format is xml.
            var loadedModel = RegressionForestModel.Load(() => new StringReader(savedModel.ToString()));
        }
예제 #8
0
        /// <summary>
        /// Alternative to ExpectedImprovementCriterion
        /// </summary>
        /// <param name="observation"></param>
        /// <param name="model"></param>
        /// <param name="kappa"></param>
        /// <returns></returns>
        double UpperConfidenceBound(double[] observation, RegressionForestModel model, double kappa = 2.56)
        {
            var certaintyPrediction = model.PredictCertainty(observation);

            // Avoid points with zero variance
            var variance = Math.Max(certaintyPrediction.Variance, 1e-9);
            var mean     = certaintyPrediction.Prediction;

            var ucb = mean + kappa * Math.Sqrt(variance);

            return(ucb);
        }
예제 #9
0
        /// <summary>
        /// Alternative to ExpectedImprovementCriterion
        /// </summary>
        /// <param name="observation"></param>
        /// <param name="model"></param>
        /// <param name="yMax"></param>
        /// <param name="xi"></param>
        /// <returns></returns>
        double PExpectedImprovementCriterion(double[] observation, RegressionForestModel model, double yMax, double xi = 0.0)
        {
            var certaintyPrediction = model.PredictCertainty(observation);

            // Avoid points with zero variance
            var variance = Math.Max(certaintyPrediction.Variance, 1e-9);
            var mean     = certaintyPrediction.Prediction;

            var z = (mean - yMax - xi) / Math.Sqrt(variance);

            var ei = CumulativeDensityFunction(z);

            return(ei);
        }
        public void RegressionForestModel_Load()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var reader = new StringReader(m_regressionForestModelString);
            var sut    = RegressionForestModel.Load(() => reader);

            var predictions = sut.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.14547628738104926, error, m_delta);
        }
예제 #11
0
        OptimizerResult[] FindNextCandidates(RegressionForestModel model, double bestScore)
        {
            Func <double[], OptimizerResult> minimize = (param) =>
            {
                // use the model to predict the expected performance, mean and variance, of the parameter set.
                var p = model.PredictCertainty(param);

                return(new OptimizerResult(param,
                                           // negative, since we want to "maximize" the acquisition function.
                                           -m_acquisitionFunc(bestScore, p.Prediction, p.Variance)));
            };

            return(m_maximizer.Optimize(minimize).Take(m_numberOfCandidatesEvaluatedPrIteration).ToArray());
        }
        private OptimizerResult[] FindNextCandidates(RegressionForestModel model, double bestScore)
        {
            // Additional set of random parameterSets to choose from during local search.
            var results = new List <OptimizerResult>();

            for (var i = 0; i < m_randomSearchPointCount; i++)
            {
                var parameterSet = RandomSearchOptimizer.SampleParameterSet(m_parameters, m_sampler);

                var expectedImprovement = ComputeExpectedImprovement(bestScore, parameterSet, model);
                results.Add(new OptimizerResult(parameterSet, expectedImprovement));
            }

            return(results.ToArray());
        }
        OptimizerResult[] FindNextCandidates(RegressionForestModel model, double bestScore)
        {
            OptimizerResult minimize(double[] param)
            {
                // use the model to predict the expected performance, mean and variance, of the parameter set.
                var p = model.PredictCertainty(param);

                return(new OptimizerResult(param,
                                           // negative, since we want to "maximize" the acquisition function.
                                           -m_acquisitionFunc(bestScore, p.Prediction, p.Variance)));
            }

            return(m_maximizer.Optimize(minimize)
                   .Where(v => !double.IsNaN(v.Error)).OrderBy(r => r.Error)
                   .Take(m_functionEvaluationsPerIteration).ToArray());
        }
        public void RegressionForestModel_Load()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();

            var reader = new StringReader(ClassificationForestModelString);
            var sut    = RegressionForestModel.Load(() => reader);

            var predictions = sut.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.14547628738104926, error, 0.0000001);
        }
        public void RegressionForestModel_Save()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var learner = new RegressionRandomForestLearner(2, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            // save model.
            var writer = new StringWriter();

            sut.Save(() => writer);

            // load model and assert prediction results.
            sut = RegressionForestModel.Load(() => new StringReader(writer.ToString()));
            var predictions = sut.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.14547628738104926, actual, m_delta);
        }
        private double[][] GenerateCandidateParameterSets(
            int parameterSetCount,
            IReadOnlyList <OptimizerResult> previousResults,
            RegressionForestModel model)
        {
            // TODO: Handle maximization and minimization. Currently minimizes.
            var best = previousResults.Min(v => v.Error);

            // Sample new candidates.
            var results = FindNextCandidates(model, best);

            // Return the top candidate sets requested.
            // Error is used to store ExpectedImprovement, so we want the maximum value
            // not the minimum.
            var candidates = results.Where(v => !double.IsNaN(v.Error))
                             .OrderByDescending(r => r.Error)
                             .Take(parameterSetCount)
                             .Select(p => p.ParameterSet)
                             .ToArray();

            return(candidates);
        }
        public void RegressionForestModel_Trees()
        {
            var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet();

            var reader = new StringReader(m_regressionForestModelString);
            var sut    = RegressionForestModel.Load(() => reader);

            var rows        = observations.RowCount;
            var predictions = new double[rows];

            for (int row = 0; row < rows; row++)
            {
                var observation = observations.Row(row);
                predictions[row] = sut.Trees.Select(t => t.Predict(observation))
                                   .Average();
            }

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var error     = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.14547628738104926, error, m_delta);
        }
        public void RegressionForestModel_Save()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix();
            var targets      = parser.EnumerateRows("Pass").ToF64Vector();

            var learner = new RegressionRandomForestLearner(2, 5, 100, 1, 0.0001, 1.0, 42, false);
            var sut     = learner.Learn(observations, targets);

            // save model.
            var writer = new StringWriter();

            sut.Save(() => writer);

            // load model and assert prediction results.
            sut = RegressionForestModel.Load(() => new StringReader(writer.ToString()));
            var predictions = sut.Predict(observations);

            var evaluator = new MeanSquaredErrorRegressionMetric();
            var actual    = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.14547628738104926, actual, 0.0001);
        }
예제 #19
0
        private double[] GetPrediction(List <WarehousePredict> warehouses)
        {
            string modelFile = machineLearningModelsAbsolutePath + "random_forest_model.xml";

            if (!File.Exists(modelFile))
            {
                Train();
            }

            var           loadedModel = RegressionForestModel.Load(() => new StreamReader(machineLearningModelsAbsolutePath + "random_forest_model.xml"));
            List <double> results     = new List <double>();

            foreach (var warehouse in warehouses)
            {
                List <double> observation = new List <double>();
                var           properties  = warehouse.GetType().GetProperties();
                foreach (var property in properties)
                {
                    observation.Add(Convert.ToDouble(property.GetValue(warehouse, null)));
                }
                results.Add(loadedModel.Predict(observation.ToArray()));
            }
            return(results.ToArray());
        }
예제 #20
0
        double[][] GenerateCandidateParameterSets(int parameterSetCount,
                                                  IReadOnlyList <OptimizerResult> previousResults, RegressionForestModel model)
        {
            // Get top parameter sets from previous runs.
            var topParameterSets = previousResults.OrderBy(v => v.Error)
                                   .Take(m_localSearchPointCount).Select(v => v.ParameterSet).ToArray();

            // Perform local search using the top parameter sets from previous run.
            var challengerCount = (int)Math.Ceiling(parameterSetCount / 2.0F);
            var challengers     = GreedyPlusRandomSearch(topParameterSets, model,
                                                         challengerCount, previousResults);

            // Create random parameter sets.
            var randomParameterSetCount = parameterSetCount - challengers.Length;
            var randomChallengers       = RandomSearchOptimizer.SampleRandomParameterSets(
                randomParameterSetCount, m_parameters, m_sampler);

            // Interleave challengers and random parameter sets.
            return(InterLeaveModelBasedAndRandomParameterSets(challengers, randomChallengers));
        }
예제 #21
0
        /// <summary>
        /// Predição de Floresta Aleatória e Rede Neural
        /// </summary>
        public void RegressionLearner_Learn_And_Predict()
        {
            #region Treinamento da Floresta Aleatória
            var parser     = new CsvParser(() => new StringReader(treinamento));
            var targetName = "T";

            var observations = parser.EnumerateRows(c => c != targetName)
                               .ToF64Matrix();
            var targets = parser.EnumerateRows(targetName)
                          .ToF64Vector();
            UltimaObservacao = new double[] { observations[observations.RowCount - 1, 0], observations[observations.RowCount - 1, 2], observations[observations.RowCount - 1, 3], observations[observations.RowCount - 1, 4], observations[observations.RowCount - 1, 5], targets[targets.Count() - 1] };

            var learner = new RegressionRandomForestLearner(trees: 500);
            model = learner.Learn(observations, targets);
            #endregion

            #region Teste da Floresta Aleatória
            parser = new CsvParser(() => new StringReader(teste));
            var observationsTeste = parser.EnumerateRows(c => c != targetName)
                                    .ToF64Matrix();
            var targetsTeste = parser.EnumerateRows(targetName)
                               .ToF64Vector();

            // predict the training and test set.
            var trainPredictions = model.Predict(observations);
            var testPredictions  = model.Predict(observationsTeste);

            // create the metric
            var metric = new MeanSquaredErrorRegressionMetric();


            // measure the error on training and test set.
            trainError = metric.Error(targets, trainPredictions);
            testError  = metric.Error(targetsTeste, testPredictions);
            #endregion

            #region Treinamento da Rede Neural
            var net = new NeuralNet();
            net.Add(new InputLayer(6));
            net.Add(new DropoutLayer(0.2));
            net.Add(new DenseLayer(800, Activation.Relu));
            net.Add(new DropoutLayer(0.5));
            net.Add(new DenseLayer(800, Activation.Relu));
            net.Add(new DropoutLayer(0.5));
            net.Add(new SquaredErrorRegressionLayer());

            var learnernet = new RegressionNeuralNetLearner(net, iterations: 500, loss: new SquareLoss());
            modelnet = learnernet.Learn(observations, targets);
            #endregion

            #region Teste da Rede Neural
            trainPredictions = modelnet.Predict(observations);
            testPredictions  = modelnet.Predict(observationsTeste);

            trainErrorNet = metric.Error(targets, trainPredictions);
            testErrorNet  = metric.Error(targetsTeste, testPredictions);
            #endregion

            #region Treinamento Ada
            var learnerada = new RegressionAdaBoostLearner(maximumTreeDepth: 35, iterations: 2000, learningRate: 0.1);
            modelada = learnerada.Learn(observations, targets);
            #endregion

            #region Teste Ada
            trainPredictions = modelada.Predict(observations);
            testPredictions  = modelada.Predict(observationsTeste);

            trainErrorAda = metric.Error(targets, trainPredictions);
            testErrorAda  = metric.Error(targetsTeste, testPredictions);

            string stargets          = "";
            string strainPredictions = "";
            string stargetsTeste     = "";
            string stestPredictions  = "";

            foreach (var i in targets)
            {
                stargets += i + ";";
            }
            foreach (var i in trainPredictions)
            {
                strainPredictions += i + ";";
            }
            foreach (var i in targetsTeste)
            {
                stargetsTeste += i + ";";
            }
            foreach (var i in testPredictions)
            {
                stestPredictions += i + ";";
            }
            #endregion
        }