private void CalibrateSpectra(RegressionForestModel ms1predictor, RegressionForestModel ms2predictor) { Parallel.ForEach(Partitioner.Create(1, myMsDataFile.NumSpectra + 1), fff => { for (int i = fff.Item1; i < fff.Item2; i++) { var scan = myMsDataFile.GetOneBasedScan(i); if (scan is IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> > ms2Scan) { var precursorScan = myMsDataFile.GetOneBasedScan(ms2Scan.OneBasedPrecursorScanNumber.Value); if (!ms2Scan.SelectedIonMonoisotopicGuessIntensity.HasValue && ms2Scan.SelectedIonMonoisotopicGuessMz.HasValue) { ms2Scan.ComputeMonoisotopicPeakIntensity(precursorScan.MassSpectrum); } double theFunc(IPeak x) => x.X - ms2predictor.Predict(new double[] { x.X, scan.RetentionTime, Math.Log(scan.TotalIonCurrent), scan.InjectionTime.HasValue ? Math.Log(scan.InjectionTime.Value) : double.NaN, Math.Log(x.Y) }); double theFuncForPrecursor(IPeak x) => x.X - ms1predictor.Predict(new double[] { x.X, precursorScan.RetentionTime, Math.Log(precursorScan.TotalIonCurrent), precursorScan.InjectionTime.HasValue ? Math.Log(precursorScan.InjectionTime.Value) : double.NaN, Math.Log(x.Y) }); ms2Scan.TransformMzs(theFunc, theFuncForPrecursor); } else { Func <IPeak, double> theFunc = x => x.X - ms1predictor.Predict(new double[] { x.X, scan.RetentionTime, Math.Log(scan.TotalIonCurrent), scan.InjectionTime.HasValue ? Math.Log(scan.InjectionTime.Value) : double.NaN, Math.Log(x.Y) }); scan.MassSpectrum.ReplaceXbyApplyingFunction(theFunc); } } } ); }
public void RegressionRandomForestLearnerTest_Learn_Glass_100_Indices() { var parser = new CsvParser(() => new StringReader(Resources.Glass)); var observations = parser.EnumerateRows(v => v != "Target").ToF64Matrix(); var targets = parser.EnumerateRows("Target").ToF64Vector(); var rows = targets.Length; var sut = new RegressionRandomForestLearner(100, 1, 100, 1, 0.0001, 1.0, 42, false); var indices = Enumerable.Range(0, targets.Length).ToArray(); indices.Shuffle(new Random(42)); indices = indices.Take((int)(targets.Length * 0.7)) .ToArray(); var decisionTreeModels = sut.Learn(observations, targets, indices, out var rawVariableImportance).ToArray(); var model = new RegressionForestModel(decisionTreeModels, rawVariableImportance); var predictions = model.Predict(observations); var evaluator = new MeanSquaredErrorRegressionMetric(); var error = evaluator.Error(targets, predictions); Assert.AreEqual(0.49709813080602938, error, m_delta); }
public void CalibrateHitsAndComponents(RegressionForestModel bestCf) { foreach (SpectrumMatch hit in all_topdown_hits) { hit.mz = hit.mz - bestCf.Predict(new double[] { hit.mz, hit.ms1_scan.RetentionTime, Math.Log(hit.ms1_scan.TotalIonCurrent), hit.ms1_scan.InjectionTime.HasValue ? Math.Log(hit.ms1_scan.InjectionTime.Value) : double.NaN }); } foreach (Component c in Sweet.lollipop.calibration_components.Where(h => h.input_file.lt_condition == raw_file.lt_condition && h.input_file.biological_replicate == raw_file.biological_replicate && h.input_file.fraction == raw_file.fraction && h.input_file.technical_replicate == raw_file.technical_replicate)) { foreach (ChargeState cs in c.charge_states) { int scanNumber = myMsDataFile.GetClosestOneBasedSpectrumNumber(c.rt_apex); var scan = myMsDataFile.GetOneBasedScan(scanNumber); bool ms1Scan = scan.MsnOrder == 1; while (!ms1Scan) { scanNumber--; scan = myMsDataFile.GetOneBasedScan(scanNumber); ms1Scan = scan.MsnOrder == 1; } cs.mz_centroid = cs.mz_centroid - bestCf.Predict(new double[] { cs.mz_centroid, scan.RetentionTime, Math.Log(scan.TotalIonCurrent), scan.InjectionTime.HasValue ? Math.Log(scan.InjectionTime.Value) : double.NaN }); } } foreach (var a in myMsDataFile.GetAllScansList().Where(s => s.MsnOrder == 1)) { Func <MzPeak, double> theFunc = x => x.Mz - bestCf.Predict(new double[] { x.Mz, a.RetentionTime, Math.Log(a.TotalIonCurrent), a.InjectionTime.HasValue ? Math.Log(a.InjectionTime.Value) : double.NaN }); a.MassSpectrum.ReplaceXbyApplyingFunction(theFunc); } }
double[][] GreedyPlusRandomSearch(double[][] parentParameterSets, RegressionForestModel model, int parameterSetCount, IReadOnlyList <OptimizerResult> previousResults) { // TODO: Handle maximization and minimization. Currently minimizes. var best = previousResults.Min(v => v.Error); var parameterSets = new List <(double[] parameterSet, double EI)>(); // Perform local search. foreach (var parameterSet in parentParameterSets) { var bestParameterSet = LocalSearch(parentParameterSets, model, best, m_epsilon); parameterSets.Add(bestParameterSet); } // Additional set of random parameterSets to choose from during local search. for (int i = 0; i < m_randomSearchPointCount; i++) { var parameterSet = RandomSearchOptimizer .SampleParameterSet(m_parameters, m_sampler); var expectedImprovement = ComputeExpectedImprovement(best, parameterSet, model); parameterSets.Add((parameterSet, expectedImprovement)); } // Take the best parameterSets. Here we want the max expected improvement. return(parameterSets.OrderByDescending(v => v.EI) .Take(parameterSetCount).Select(v => v.parameterSet) .ToArray()); }
double ComputeExpectedImprovement(double best, double[] parameterSet, RegressionForestModel model) { var prediction = model.PredictCertainty(parameterSet); var mean = prediction.Prediction; var variance = prediction.Variance; return(AcquisitionFunctions.ExpectedImprovement(best, mean, variance)); }
OptimizerResult[] FindMinimumCandidates(RegressionForestModel model, double bestScore) { Func <double[], OptimizerResult> minimize = (param) => { return(new OptimizerResult(param, ExpectedImprovementCriterion(param, model, bestScore))); }; return(m_optimizer.Optimize(minimize).Take(m_numberOfCandidatesEvaluatedPrIteration).ToArray()); }
public void RandomForest_Default_Parameters_Save_Load_Model_Using_Static_Methods() { #region read and split data // Use StreamReader(filepath) when running from filesystem var parser = new CsvParser(() => new StringReader(Resources.winequality_white)); var targetName = "quality"; // read feature matrix var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); // read regression targets var targets = parser.EnumerateRows(targetName) .ToF64Vector(); // creates training test splitter, // Since this is a regression problem, we use the random training/test set splitter. // 30 % of the data is used for the test set. var splitter = new RandomTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24); var trainingTestSplit = splitter.SplitSet(observations, targets); var trainSet = trainingTestSplit.TrainingSet; var testSet = trainingTestSplit.TestSet; #endregion // create learner with default parameters var learner = new RegressionRandomForestLearner(trees: 100); // learn model with found parameters var model = learner.Learn(trainSet.Observations, trainSet.Targets); // predict the training and test set. var trainPredictions = model.Predict(trainSet.Observations); var testPredictions = model.Predict(testSet.Observations); // since this is a regression problem we are using square error as metric // for evaluating how well the model performs. var metric = new MeanSquaredErrorRegressionMetric(); // measure the error on training and test set. var trainError = metric.Error(trainSet.Targets, trainPredictions); var testError = metric.Error(testSet.Targets, testPredictions); TraceTrainingAndTestError(trainError, testError); //Save model, in the file system use new StreamWriter(filePath); // default format is xml. var savedModel = new StringWriter(); model.Save(() => savedModel); // load model, in the file system use new StreamReader(filePath); // default format is xml. var loadedModel = RegressionForestModel.Load(() => new StringReader(savedModel.ToString())); }
/// <summary> /// Alternative to ExpectedImprovementCriterion /// </summary> /// <param name="observation"></param> /// <param name="model"></param> /// <param name="kappa"></param> /// <returns></returns> double UpperConfidenceBound(double[] observation, RegressionForestModel model, double kappa = 2.56) { var certaintyPrediction = model.PredictCertainty(observation); // Avoid points with zero variance var variance = Math.Max(certaintyPrediction.Variance, 1e-9); var mean = certaintyPrediction.Prediction; var ucb = mean + kappa * Math.Sqrt(variance); return(ucb); }
/// <summary> /// Alternative to ExpectedImprovementCriterion /// </summary> /// <param name="observation"></param> /// <param name="model"></param> /// <param name="yMax"></param> /// <param name="xi"></param> /// <returns></returns> double PExpectedImprovementCriterion(double[] observation, RegressionForestModel model, double yMax, double xi = 0.0) { var certaintyPrediction = model.PredictCertainty(observation); // Avoid points with zero variance var variance = Math.Max(certaintyPrediction.Variance, 1e-9); var mean = certaintyPrediction.Prediction; var z = (mean - yMax - xi) / Math.Sqrt(variance); var ei = CumulativeDensityFunction(z); return(ei); }
public void RegressionForestModel_Load() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var reader = new StringReader(m_regressionForestModelString); var sut = RegressionForestModel.Load(() => reader); var predictions = sut.Predict(observations); var evaluator = new MeanSquaredErrorRegressionMetric(); var error = evaluator.Error(targets, predictions); Assert.AreEqual(0.14547628738104926, error, m_delta); }
OptimizerResult[] FindNextCandidates(RegressionForestModel model, double bestScore) { Func <double[], OptimizerResult> minimize = (param) => { // use the model to predict the expected performance, mean and variance, of the parameter set. var p = model.PredictCertainty(param); return(new OptimizerResult(param, // negative, since we want to "maximize" the acquisition function. -m_acquisitionFunc(bestScore, p.Prediction, p.Variance))); }; return(m_maximizer.Optimize(minimize).Take(m_numberOfCandidatesEvaluatedPrIteration).ToArray()); }
private OptimizerResult[] FindNextCandidates(RegressionForestModel model, double bestScore) { // Additional set of random parameterSets to choose from during local search. var results = new List <OptimizerResult>(); for (var i = 0; i < m_randomSearchPointCount; i++) { var parameterSet = RandomSearchOptimizer.SampleParameterSet(m_parameters, m_sampler); var expectedImprovement = ComputeExpectedImprovement(bestScore, parameterSet, model); results.Add(new OptimizerResult(parameterSet, expectedImprovement)); } return(results.ToArray()); }
OptimizerResult[] FindNextCandidates(RegressionForestModel model, double bestScore) { OptimizerResult minimize(double[] param) { // use the model to predict the expected performance, mean and variance, of the parameter set. var p = model.PredictCertainty(param); return(new OptimizerResult(param, // negative, since we want to "maximize" the acquisition function. -m_acquisitionFunc(bestScore, p.Prediction, p.Variance))); } return(m_maximizer.Optimize(minimize) .Where(v => !double.IsNaN(v.Error)).OrderBy(r => r.Error) .Take(m_functionEvaluationsPerIteration).ToArray()); }
public void RegressionForestModel_Load() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var reader = new StringReader(ClassificationForestModelString); var sut = RegressionForestModel.Load(() => reader); var predictions = sut.Predict(observations); var evaluator = new MeanSquaredErrorRegressionMetric(); var error = evaluator.Error(targets, predictions); Assert.AreEqual(0.14547628738104926, error, 0.0000001); }
public void RegressionForestModel_Save() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var learner = new RegressionRandomForestLearner(2, 5, 100, 1, 0.0001, 1.0, 42, false); var sut = learner.Learn(observations, targets); // save model. var writer = new StringWriter(); sut.Save(() => writer); // load model and assert prediction results. sut = RegressionForestModel.Load(() => new StringReader(writer.ToString())); var predictions = sut.Predict(observations); var evaluator = new MeanSquaredErrorRegressionMetric(); var actual = evaluator.Error(targets, predictions); Assert.AreEqual(0.14547628738104926, actual, m_delta); }
private double[][] GenerateCandidateParameterSets( int parameterSetCount, IReadOnlyList <OptimizerResult> previousResults, RegressionForestModel model) { // TODO: Handle maximization and minimization. Currently minimizes. var best = previousResults.Min(v => v.Error); // Sample new candidates. var results = FindNextCandidates(model, best); // Return the top candidate sets requested. // Error is used to store ExpectedImprovement, so we want the maximum value // not the minimum. var candidates = results.Where(v => !double.IsNaN(v.Error)) .OrderByDescending(r => r.Error) .Take(parameterSetCount) .Select(p => p.ParameterSet) .ToArray(); return(candidates); }
public void RegressionForestModel_Trees() { var(observations, targets) = DataSetUtilities.LoadAptitudeDataSet(); var reader = new StringReader(m_regressionForestModelString); var sut = RegressionForestModel.Load(() => reader); var rows = observations.RowCount; var predictions = new double[rows]; for (int row = 0; row < rows; row++) { var observation = observations.Row(row); predictions[row] = sut.Trees.Select(t => t.Predict(observation)) .Average(); } var evaluator = new MeanSquaredErrorRegressionMetric(); var error = evaluator.Error(targets, predictions); Assert.AreEqual(0.14547628738104926, error, m_delta); }
public void RegressionForestModel_Save() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var observations = parser.EnumerateRows(v => v != "Pass").ToF64Matrix(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var learner = new RegressionRandomForestLearner(2, 5, 100, 1, 0.0001, 1.0, 42, false); var sut = learner.Learn(observations, targets); // save model. var writer = new StringWriter(); sut.Save(() => writer); // load model and assert prediction results. sut = RegressionForestModel.Load(() => new StringReader(writer.ToString())); var predictions = sut.Predict(observations); var evaluator = new MeanSquaredErrorRegressionMetric(); var actual = evaluator.Error(targets, predictions); Assert.AreEqual(0.14547628738104926, actual, 0.0001); }
private double[] GetPrediction(List <WarehousePredict> warehouses) { string modelFile = machineLearningModelsAbsolutePath + "random_forest_model.xml"; if (!File.Exists(modelFile)) { Train(); } var loadedModel = RegressionForestModel.Load(() => new StreamReader(machineLearningModelsAbsolutePath + "random_forest_model.xml")); List <double> results = new List <double>(); foreach (var warehouse in warehouses) { List <double> observation = new List <double>(); var properties = warehouse.GetType().GetProperties(); foreach (var property in properties) { observation.Add(Convert.ToDouble(property.GetValue(warehouse, null))); } results.Add(loadedModel.Predict(observation.ToArray())); } return(results.ToArray()); }
double[][] GenerateCandidateParameterSets(int parameterSetCount, IReadOnlyList <OptimizerResult> previousResults, RegressionForestModel model) { // Get top parameter sets from previous runs. var topParameterSets = previousResults.OrderBy(v => v.Error) .Take(m_localSearchPointCount).Select(v => v.ParameterSet).ToArray(); // Perform local search using the top parameter sets from previous run. var challengerCount = (int)Math.Ceiling(parameterSetCount / 2.0F); var challengers = GreedyPlusRandomSearch(topParameterSets, model, challengerCount, previousResults); // Create random parameter sets. var randomParameterSetCount = parameterSetCount - challengers.Length; var randomChallengers = RandomSearchOptimizer.SampleRandomParameterSets( randomParameterSetCount, m_parameters, m_sampler); // Interleave challengers and random parameter sets. return(InterLeaveModelBasedAndRandomParameterSets(challengers, randomChallengers)); }
/// <summary> /// Predição de Floresta Aleatória e Rede Neural /// </summary> public void RegressionLearner_Learn_And_Predict() { #region Treinamento da Floresta Aleatória var parser = new CsvParser(() => new StringReader(treinamento)); var targetName = "T"; var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); var targets = parser.EnumerateRows(targetName) .ToF64Vector(); UltimaObservacao = new double[] { observations[observations.RowCount - 1, 0], observations[observations.RowCount - 1, 2], observations[observations.RowCount - 1, 3], observations[observations.RowCount - 1, 4], observations[observations.RowCount - 1, 5], targets[targets.Count() - 1] }; var learner = new RegressionRandomForestLearner(trees: 500); model = learner.Learn(observations, targets); #endregion #region Teste da Floresta Aleatória parser = new CsvParser(() => new StringReader(teste)); var observationsTeste = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); var targetsTeste = parser.EnumerateRows(targetName) .ToF64Vector(); // predict the training and test set. var trainPredictions = model.Predict(observations); var testPredictions = model.Predict(observationsTeste); // create the metric var metric = new MeanSquaredErrorRegressionMetric(); // measure the error on training and test set. trainError = metric.Error(targets, trainPredictions); testError = metric.Error(targetsTeste, testPredictions); #endregion #region Treinamento da Rede Neural var net = new NeuralNet(); net.Add(new InputLayer(6)); net.Add(new DropoutLayer(0.2)); net.Add(new DenseLayer(800, Activation.Relu)); net.Add(new DropoutLayer(0.5)); net.Add(new DenseLayer(800, Activation.Relu)); net.Add(new DropoutLayer(0.5)); net.Add(new SquaredErrorRegressionLayer()); var learnernet = new RegressionNeuralNetLearner(net, iterations: 500, loss: new SquareLoss()); modelnet = learnernet.Learn(observations, targets); #endregion #region Teste da Rede Neural trainPredictions = modelnet.Predict(observations); testPredictions = modelnet.Predict(observationsTeste); trainErrorNet = metric.Error(targets, trainPredictions); testErrorNet = metric.Error(targetsTeste, testPredictions); #endregion #region Treinamento Ada var learnerada = new RegressionAdaBoostLearner(maximumTreeDepth: 35, iterations: 2000, learningRate: 0.1); modelada = learnerada.Learn(observations, targets); #endregion #region Teste Ada trainPredictions = modelada.Predict(observations); testPredictions = modelada.Predict(observationsTeste); trainErrorAda = metric.Error(targets, trainPredictions); testErrorAda = metric.Error(targetsTeste, testPredictions); string stargets = ""; string strainPredictions = ""; string stargetsTeste = ""; string stestPredictions = ""; foreach (var i in targets) { stargets += i + ";"; } foreach (var i in trainPredictions) { strainPredictions += i + ";"; } foreach (var i in targetsTeste) { stargetsTeste += i + ";"; } foreach (var i in testPredictions) { stestPredictions += i + ";"; } #endregion }