private ModelAndFeatures CreateScoringModel(string modelName, bool decoys, bool secondBest) { _out.WriteLine(Resources.CommandLine_CreateScoringModel_Creating_scoring_model__0_, modelName); try { // Create new scoring model using the default calculators. var scoringModel = new MProphetPeakScoringModel(modelName, null as LinearModelParams, null, decoys, secondBest); var progressMonitor = new CommandProgressMonitor(_out, new ProgressStatus(String.Empty)); var targetDecoyGenerator = new TargetDecoyGenerator(_doc, scoringModel, progressMonitor); // Get scores for target and decoy groups. List<IList<float[]>> targetTransitionGroups; List<IList<float[]>> decoyTransitionGroups; targetDecoyGenerator.GetTransitionGroups(out targetTransitionGroups, out decoyTransitionGroups); // If decoy box is checked and no decoys, throw an error if (decoys && decoyTransitionGroups.Count == 0) { _out.WriteLine(Resources.CommandLine_CreateScoringModel_Error__There_are_no_decoy_peptides_in_the_document__Failed_to_create_scoring_model_); return null; } // Use decoys for training only if decoy box is checked if (!decoys) decoyTransitionGroups = new List<IList<float[]>>(); // Set intial weights based on previous model (with NaN's reset to 0) var initialWeights = new double[scoringModel.PeakFeatureCalculators.Count]; // But then set to NaN the weights that have unknown values for this dataset for (int i = 0; i < initialWeights.Length; ++i) { if (!targetDecoyGenerator.EligibleScores[i]) initialWeights[i] = double.NaN; } var initialParams = new LinearModelParams(initialWeights); // Train the model. scoringModel = (MProphetPeakScoringModel)scoringModel.Train(targetTransitionGroups, decoyTransitionGroups, initialParams, secondBest, true, progressMonitor); Settings.Default.PeakScoringModelList.SetValue(scoringModel); return new ModelAndFeatures(scoringModel, targetDecoyGenerator.PeakGroupFeatures); } catch (Exception x) { _out.WriteLine(Resources.CommandLine_CreateScoringModel_Error__Failed_to_create_scoring_model_); _out.WriteLine(x.Message); return null; } }
public override IPeakScoringModel Train(IList <IList <float[]> > targets, IList <IList <float[]> > decoys, TargetDecoyGenerator targetDecoyGenerator, LinearModelParams initParameters, int?iterations = null, bool includeSecondBest = false, bool preTrain = true, IProgressMonitor progressMonitor = null, string documentPath = null) { return(ChangeProp(ImClone(this), im => { int nWeights = initParameters.Weights.Count; var weights = new double [nWeights]; for (int i = 0; i < initParameters.Weights.Count; ++i) { weights[i] = double.IsNaN(initParameters.Weights[i]) ? double.NaN : DEFAULT_WEIGHTS[i]; } var parameters = new LinearModelParams(weights); ScoredGroupPeaksSet decoyTransitionGroups = new ScoredGroupPeaksSet(decoys, decoys.Count); ScoredGroupPeaksSet targetTransitionGroups = new ScoredGroupPeaksSet(targets, targets.Count); targetTransitionGroups.ScorePeaks(parameters.Weights); if (includeSecondBest) { ScoredGroupPeaksSet secondBestTransitionGroups; targetTransitionGroups.SelectTargetsAndDecoys(out targetTransitionGroups, out secondBestTransitionGroups); foreach (var secondBestGroup in secondBestTransitionGroups.ScoredGroupPeaksList) { decoyTransitionGroups.Add(secondBestGroup); } } decoyTransitionGroups.ScorePeaks(parameters.Weights); im.UsesDecoys = decoys.Count > 0; im.UsesSecondBest = includeSecondBest; im.Parameters = parameters.RescaleParameters(decoyTransitionGroups.Mean, decoyTransitionGroups.Stdev); im.Parameters = im.Parameters.CalculatePercentContributions(im, targetDecoyGenerator); })); }
public abstract IPeakScoringModel Train(IList <IList <float[]> > targets, IList <IList <float[]> > decoys, TargetDecoyGenerator targetDecoyGenerator, LinearModelParams initParameters, IList <double> cutoffs, int?iterations = null, bool includeSecondBest = false, bool preTrain = true, IProgressMonitor progressMonitor = null, string documentPath = null);
public LinearModelParams CalculatePercentContributions(IPeakScoringModel model, TargetDecoyGenerator targetDecoyGenerator) { var percentContributions = new double[_percentContributions.Count]; for (var i = 0; i < _percentContributions.Count; ++i) { percentContributions[i] = (targetDecoyGenerator != null ? targetDecoyGenerator.GetPercentContribution(model, i) : null) ?? double.NaN; } return(ChangeProp(ImClone(this), im => { im.PercentContributions = percentContributions; })); }
/// <summary> /// Train the model by iterative calculating weights to separate target and decoy transition groups. /// </summary> /// <param name="targetsIn">Target transition groups.</param> /// <param name="decoysIn">Decoy transition groups.</param> /// <param name="targetDecoyGenerator">Target decoy generator used to calculate contribution percentages</param> /// <param name="initParameters">Initial model parameters (weights and bias)</param> /// <param name="iterations">Optional specific number of iterations to use in training</param> /// <param name="includeSecondBest">Include the second best peaks in the targets as decoys?</param> /// <param name="preTrain">Use a pre-trained model to bootstrap the learning.</param> /// <param name="progressMonitor">Used to report progress to the calling context</param> /// <param name="documentPath">The path to the current document for writing score distributions</param> /// <returns>Immutable model with new weights.</returns> public override IPeakScoringModel Train(IList <IList <float[]> > targetsIn, IList <IList <float[]> > decoysIn, TargetDecoyGenerator targetDecoyGenerator, LinearModelParams initParameters, int?iterations = null, bool includeSecondBest = false, bool preTrain = true, IProgressMonitor progressMonitor = null, string documentPath = null) { if (initParameters == null) { initParameters = new LinearModelParams(_peakFeatureCalculators.Count); } return(ChangeProp(ImClone(this), im => { // This may take a long time between progress updates, but just measure progress by cycles through the training IProgressStatus status = new ProgressStatus(Resources.MProphetPeakScoringModel_Train_Training_peak_scoring_model); if (progressMonitor != null) { progressMonitor.UpdateProgress(status); } var targets = targetsIn.Where(list => list.Count > 0); var decoys = decoysIn.Where(list => list.Count > 0); var targetTransitionGroups = new ScoredGroupPeaksSet(targets, targetsIn.Count); var decoyTransitionGroups = new ScoredGroupPeaksSet(decoys, decoysIn.Count); // Iteratively refine the weights through multiple iterations. var calcWeights = new double[initParameters.Weights.Count]; Array.Copy(initParameters.Weights.ToArray(), calcWeights, initParameters.Weights.Count); double qValueCutoff = 0.01; // First iteration cut-off - if not pretraining, just start at 0.01 // Start with scores calculated from the initial weights if (!preTrain) { targetTransitionGroups.ScorePeaks(calcWeights); decoyTransitionGroups.ScorePeaks(calcWeights); } // Bootstrap from the pre-trained legacy model else { qValueCutoff = 0.15; var preTrainedWeights = new double[initParameters.Weights.Count]; for (int i = 0; i < preTrainedWeights.Length; ++i) { if (double.IsNaN(initParameters.Weights[i])) { preTrainedWeights[i] = double.NaN; } } int standardEnabledCount = GetEnabledCount(LegacyScoringModel.StandardFeatureCalculators, initParameters.Weights); int analyteEnabledCount = GetEnabledCount(LegacyScoringModel.AnalyteFeatureCalculators, initParameters.Weights); bool hasStandards = standardEnabledCount >= analyteEnabledCount; var calculators = hasStandards ? LegacyScoringModel.StandardFeatureCalculators : LegacyScoringModel.AnalyteFeatureCalculators; for (int i = 0; i < calculators.Length; ++i) { if (calculators[i].GetType() == typeof(MQuestRetentionTimePredictionCalc)) { continue; } SetCalculatorValue(calculators[i].GetType(), LegacyScoringModel.DEFAULT_WEIGHTS[i], preTrainedWeights); } targetTransitionGroups.ScorePeaks(preTrainedWeights); decoyTransitionGroups.ScorePeaks(preTrainedWeights); } double decoyMean = 0; double decoyStdev = 0; bool colinearWarning = false; int iterationCount = iterations ?? MAX_ITERATIONS; int truePeaksCount = 0; var lastWeights = new double[calcWeights.Length]; for (int i = 0; i < iterationCount; i++) { int percentComplete = 0; double decoyMeanNew, decoyStdevNew; bool colinearWarningNew = colinearWarning; int truePeaksCountNew = im.CalculateWeights(documentPath, targetTransitionGroups, decoyTransitionGroups, includeSecondBest, i == 0, // Use non-parametric q values for first round, when normality assumption may not hold qValueCutoff, calcWeights, out decoyMeanNew, out decoyStdevNew, ref colinearWarningNew); if (progressMonitor != null) { if (progressMonitor.IsCanceled) { throw new OperationCanceledException(); } // Calculate progress, but wait to make sure convergence has not occurred before setting it string formatText = qValueCutoff > 0.02 ? Resources.MProphetPeakScoringModel_Train_Training_scoring_model__iteration__0__of__1__ : Resources.MProphetPeakScoringModel_Train_Training_scoring_model__iteration__0__of__1_____2______peaks_at__3_0_____FDR_; percentComplete = (i + 1) * 100 / (iterationCount + 1); status = status.ChangeMessage(string.Format(formatText, i + 1, iterationCount, truePeaksCountNew, qValueCutoff)) .ChangePercentComplete(percentComplete); } if (qValueCutoff > 0.02) { // Tighten the q value cut-off for "truth" to 2% FDR qValueCutoff = 0.02; // And allow the true peaks count to go down in the next iteration // Though it rarely will truePeaksCountNew = 0; } // Decided in 2018 that equal should be counted as converging, since otherwise training can just get stuck, // and go to full iteration count without progressing else if (truePeaksCountNew <= truePeaksCount) { // The model has leveled off enough to begin losing discriminant value if (qValueCutoff > 0.01) { // Tighten the q value cut-off for "truth" to 1% FDR qValueCutoff = 0.01; // And allow the true peaks count to go down in the next iteration truePeaksCountNew = 0; } else { if (progressMonitor != null) { progressMonitor.UpdateProgress(status = status.ChangeMessage(string.Format(Resources.MProphetPeakScoringModel_Train_Scoring_model_converged__iteration__0_____1______peaks_at__2_0_____FDR_, i + 1, truePeaksCount, qValueCutoff)) .ChangePercentComplete(Math.Max(95, percentComplete))); } calcWeights = lastWeights; break; } } truePeaksCount = truePeaksCountNew; Array.Copy(calcWeights, lastWeights, calcWeights.Length); decoyMean = decoyMeanNew; decoyStdev = decoyStdevNew; colinearWarning = colinearWarningNew; if (progressMonitor != null) { progressMonitor.UpdateProgress(status); } } if (progressMonitor != null) { progressMonitor.UpdateProgress(status.ChangePercentComplete(100)); } var parameters = new LinearModelParams(calcWeights); parameters = parameters.RescaleParameters(decoyMean, decoyStdev); im.Parameters = parameters; im.ColinearWarning = colinearWarning; im.UsesSecondBest = includeSecondBest; im.UsesDecoys = decoysIn.Count > 0; im.Parameters = parameters.CalculatePercentContributions(im, targetDecoyGenerator); })); }