public override void ReadXml(XmlReader reader) { // Read tag attributes base.ReadXml(reader); // Earlier versions always used decoys only UsesDecoys = reader.GetBoolAttribute(ATTR.uses_decoys, true); UsesSecondBest = reader.GetBoolAttribute(ATTR.uses_false_targets, false); double bias = reader.GetDoubleAttribute(ATTR.bias); bool isEmpty = reader.IsEmptyElement; // Consume tag reader.Read(); if (!isEmpty) { // Read calculators var calculators = new List <FeatureCalculator>(); reader.ReadElements(calculators); var weights = new double[calculators.Count]; for (int i = 0; i < calculators.Count; i++) { if (calculators[i].Type != PeakFeatureCalculators[i].GetType()) { throw new InvalidDataException(Resources.LegacyScoringModel_ReadXml_Invalid_legacy_model_); } weights[i] = calculators[i].Weight; } Parameters = new LinearModelParams(weights, bias); reader.ReadEndElement(); } DoValidate(); }
public override IPeakScoringModel Train(IList <IList <float[]> > targets, IList <IList <float[]> > decoys, LinearModelParams initParameters, bool includeSecondBest = false, bool preTrain = true, IProgressMonitor progressMonitor = null) { return(ChangeProp(ImClone(this), im => { int nWeights = initParameters.Weights.Count; var weights = new double [nWeights]; for (int i = 0; i < initParameters.Weights.Count; ++i) { weights[i] = double.IsNaN(initParameters.Weights[i]) ? double.NaN : DEFAULT_WEIGHTS[i]; } var parameters = new LinearModelParams(weights); ScoredGroupPeaksSet decoyTransitionGroups = new ScoredGroupPeaksSet(decoys); ScoredGroupPeaksSet targetTransitionGroups = new ScoredGroupPeaksSet(targets); targetTransitionGroups.ScorePeaks(parameters.Weights); if (includeSecondBest) { ScoredGroupPeaksSet secondBestTransitionGroups; targetTransitionGroups.SelectTargetsAndDecoys(out targetTransitionGroups, out secondBestTransitionGroups); foreach (var secondBestGroup in secondBestTransitionGroups.ScoredGroupPeaksList) { decoyTransitionGroups.Add(secondBestGroup); } } decoyTransitionGroups.ScorePeaks(parameters.Weights); im.UsesDecoys = decoys.Count > 0; im.UsesSecondBest = includeSecondBest; im.Parameters = parameters.RescaleParameters(decoyTransitionGroups.Mean, decoyTransitionGroups.Stdev); })); }
private void GetActiveScoredValues(IPeakScoringModel peakScoringModel, LinearModelParams scoringParams, LinearModelParams calculatorParams, out List <double> targetScores, out List <double> activeDecoyScores) { targetScores = new List <double>(TargetCount); List <double> decoyScores = peakScoringModel.UsesDecoys ? new List <double>(DecoyCount) : null; List <double> secondBestScores = peakScoringModel.UsesSecondBest ? new List <double>(TargetCount) : null; GetScores(scoringParams, calculatorParams, targetScores, decoyScores, secondBestScores); if (peakScoringModel.UsesDecoys && !peakScoringModel.UsesSecondBest) { activeDecoyScores = decoyScores; } else if (peakScoringModel.UsesSecondBest && !peakScoringModel.UsesDecoys) { activeDecoyScores = secondBestScores; } else { activeDecoyScores = new List <double>(); if (decoyScores != null) { activeDecoyScores.AddRange(decoyScores); } if (secondBestScores != null) { activeDecoyScores.AddRange(secondBestScores); } } }
public override void ReadXml(XmlReader reader) { // Read tag attributes base.ReadXml(reader); ColinearWarning = reader.GetBoolAttribute(ATTR.colinear_warning); // Earlier versions always used decoys only UsesDecoys = reader.GetBoolAttribute(ATTR.uses_decoys, true); UsesSecondBest = reader.GetBoolAttribute(ATTR.uses_false_targets); double bias = reader.GetDoubleAttribute(ATTR.bias); // Consume tag reader.Read(); // Read calculators var calculators = new List <FeatureCalculator>(); reader.ReadElements(calculators); var peakFeatureCalculators = new List <IPeakFeatureCalculator>(calculators.Count); var weights = new double[calculators.Count]; for (int i = 0; i < calculators.Count; i++) { weights[i] = calculators[i].Weight; peakFeatureCalculators.Add(PeakFeatureCalculator.GetCalculator(calculators[i].Type)); } SetPeakFeatureCalculators(peakFeatureCalculators); Parameters = new LinearModelParams(weights, bias); reader.ReadEndElement(); DoValidate(); }
/// <summary> /// Calculate scores for targets and decoys. A transition is selected from each transition group using the /// scoring weights, and then its score is calculated using the calculator weights applied to each feature. /// </summary> /// <param name="scoringParams">Parameters to choose the best peak</param> /// <param name="calculatorParams">Parameters to calculate the score of the best peak.</param> /// <param name="targetScores">Output list of target scores.</param> /// <param name="decoyScores">Output list of decoy scores.</param> /// <param name="secondBestScores">Output list of false target scores.</param> /// <param name="invert">If true, select minimum rather than maximum scores</param> public void GetScores(LinearModelParams scoringParams, LinearModelParams calculatorParams, out List <double> targetScores, out List <double> decoyScores, out List <double> secondBestScores, bool invert = false) { targetScores = new List <double>(); decoyScores = new List <double>(); secondBestScores = new List <double>(); int invertSign = invert ? -1 : 1; foreach (var peakTransitionGroupFeatures in _peakTransitionGroupFeaturesList) { PeakGroupFeatures maxFeatures = null; PeakGroupFeatures nextFeatures = null; double maxScore = Double.MinValue; double nextScore = Double.MinValue; // No peaks in this transition group record if (peakTransitionGroupFeatures.PeakGroupFeatures.Count == 0) { continue; } // Find the highest and second highest scores among the transitions in this group. foreach (var peakGroupFeatures in peakTransitionGroupFeatures.PeakGroupFeatures) { double score = invertSign * GetScore(scoringParams, peakGroupFeatures); if (nextScore < score) { if (maxScore < score) { nextScore = maxScore; maxScore = score; nextFeatures = maxFeatures; maxFeatures = peakGroupFeatures; } else { nextScore = score; nextFeatures = peakGroupFeatures; } } } double currentScore = maxFeatures == null ? Double.NaN : GetScore(calculatorParams, maxFeatures); if (peakTransitionGroupFeatures.Id.NodePep.IsDecoy) { decoyScores.Add(currentScore); } else { targetScores.Add(currentScore); // Skip if only one peak if (peakTransitionGroupFeatures.PeakGroupFeatures.Count == 1) { continue; } double secondBestScore = nextFeatures == null ? Double.NaN : GetScore(calculatorParams, nextFeatures); secondBestScores.Add(secondBestScore); } } }
public LegacyScoringModel(string name, LinearModelParams parameters = null, bool usesDecoys = true, bool usesSecondBest = false) : base(name) { SetPeakFeatureCalculators(); Parameters = parameters; UsesDecoys = usesDecoys; UsesSecondBest = usesSecondBest; }
/// <summary> /// Recalculate the scores of each peak by applying the given feature weighting factors. /// </summary> /// <param name="weights">Array of weight factors applied to each feature.</param> /// <returns>Mean peak score.</returns> public void ScorePeaks(IList <double> weights) { foreach (var peak in _scoredGroupPeaksList.SelectMany(scoredGroupPeaks => scoredGroupPeaks.ScoredPeaks)) { peak.Score = LinearModelParams.Score(peak.Features, weights, 0); } // Calculate mean and stdev for top-scoring peaks in each transition group. var scores = GetMaxScores(); var stats = new Statistics(scores); Mean = stats.Mean(); Stdev = stats.StdDev(); }
protected bool Equals(LinearModelParams other) { if (Weights.Count != other.Weights.Count) { return(false); } for (int i = 0; i < Weights.Count; ++i) { if (Weights[i] != other.Weights[i]) { return(false); } } return(Bias == other.Bias); }
public MProphetPeakScoringModel( string name, LinearModelParams parameters, IList <IPeakFeatureCalculator> peakFeatureCalculators = null, bool usesDecoys = false, bool usesSecondBest = false, bool colinearWarning = false) : base(name) { SetPeakFeatureCalculators(peakFeatureCalculators ?? DEFAULT_CALCULATORS); Parameters = parameters; UsesDecoys = usesDecoys; UsesSecondBest = usesSecondBest; ColinearWarning = colinearWarning; Lambda = DEFAULT_R_LAMBDA; // Default from R DoValidate(); }
public static double Score(IList <float> features, LinearModelParams parameters) { return(parameters.Score(features)); }
/// <summary> /// Train the model by iterative calculating weights to separate target and decoy transition groups. /// </summary> /// <param name="targets">Target transition groups.</param> /// <param name="decoys">Decoy transition groups.</param> /// <param name="initParameters">Initial model parameters (weights and bias)</param> /// <param name="includeSecondBest"> Include the second best peaks in the targets as decoys?</param> /// <param name="preTrain">Use a pre-trained model to bootstrap the learning.</param> /// <param name="progressMonitor"></param> /// <returns>Immutable model with new weights.</returns> public override IPeakScoringModel Train(IList <IList <float[]> > targets, IList <IList <float[]> > decoys, LinearModelParams initParameters, bool includeSecondBest = false, bool preTrain = true, IProgressMonitor progressMonitor = null) { if (initParameters == null) { initParameters = new LinearModelParams(_peakFeatureCalculators.Count); } return(ChangeProp(ImClone(this), im => { targets = targets.Where(list => list.Count > 0).ToList(); decoys = decoys.Where(list => list.Count > 0).ToList(); var targetTransitionGroups = new ScoredGroupPeaksSet(targets); var decoyTransitionGroups = new ScoredGroupPeaksSet(decoys); // Bootstrap from the pre-trained legacy model if (preTrain) { var preTrainedWeights = new double[initParameters.Weights.Count]; for (int i = 0; i < preTrainedWeights.Length; ++i) { if (double.IsNaN(initParameters.Weights[i])) { preTrainedWeights[i] = double.NaN; } } int standardEnabledCount = GetEnabledCount(LegacyScoringModel.StandardFeatureCalculators, initParameters.Weights); int analyteEnabledCount = GetEnabledCount(LegacyScoringModel.AnalyteFeatureCalculators, initParameters.Weights); bool hasStandards = standardEnabledCount >= analyteEnabledCount; var calculators = hasStandards ? LegacyScoringModel.StandardFeatureCalculators : LegacyScoringModel.AnalyteFeatureCalculators; for (int i = 0; i < calculators.Length; ++i) { if (calculators[i].GetType() == typeof(MQuestRetentionTimePredictionCalc)) { continue; } SetCalculatorValue(calculators[i].GetType(), LegacyScoringModel.DEFAULT_WEIGHTS[i], preTrainedWeights); } targetTransitionGroups.ScorePeaks(preTrainedWeights); decoyTransitionGroups.ScorePeaks(preTrainedWeights); } // Iteratively refine the weights through multiple iterations. var calcWeights = new double[initParameters.Weights.Count]; Array.Copy(initParameters.Weights.ToArray(), calcWeights, initParameters.Weights.Count); double decoyMean = 0; double decoyStdev = 0; bool colinearWarning = false; // This may take a long time between progress updates, but just measure progress by cycles through the training IProgressStatus status = new ProgressStatus(Resources.MProphetPeakScoringModel_Train_Training_peak_scoring_model); if (progressMonitor != null) { progressMonitor.UpdateProgress(status); } for (int iteration = 0; iteration < MAX_ITERATIONS; iteration++) { if (progressMonitor != null) { if (progressMonitor.IsCanceled) { throw new OperationCanceledException(); } progressMonitor.UpdateProgress(status = status.ChangeMessage(string.Format(Resources.MProphetPeakScoringModel_Train_Training_peak_scoring_model__iteration__0__of__1__, iteration + 1, MAX_ITERATIONS)) .ChangePercentComplete((iteration + 1) * 100 / (MAX_ITERATIONS + 1))); } im.CalculateWeights(iteration, targetTransitionGroups, decoyTransitionGroups, includeSecondBest, calcWeights, out decoyMean, out decoyStdev, ref colinearWarning); GC.Collect(); // Each loop generates a number of large objects. GC helps to keep private bytes under control } if (progressMonitor != null) { progressMonitor.UpdateProgress(status.ChangePercentComplete(100)); } var parameters = new LinearModelParams(calcWeights); parameters = parameters.RescaleParameters(decoyMean, decoyStdev); im.Parameters = parameters; im.ColinearWarning = colinearWarning; im.UsesSecondBest = includeSecondBest; im.UsesDecoys = decoys.Count > 0; })); }
public abstract IPeakScoringModel Train(IList <IList <float[]> > targets, IList <IList <float[]> > decoys, TargetDecoyGenerator targetDecoyGenerator, LinearModelParams initParameters, IList <double> cutoffs, int?iterations = null, bool includeSecondBest = false, bool preTrain = true, IProgressMonitor progressMonitor = null, string documentPath = null);
public ScoredPeak CalcScore(IList <double> weights) { return(new ScoredPeak(Features, LinearModelParams.Score(Features, weights, 0))); }
/// <summary> /// Calculate the score of a set of features given an array of weighting coefficients. /// </summary> private static double GetScore(IList <double> weights, PeakGroupFeatures peakGroupFeatures, double bias) { return(LinearModelParams.Score(peakGroupFeatures.Features, weights, bias)); }
private static double GetScore(LinearModelParams parameters, PeakGroupFeatures peakGroupFeatures) { return(GetScore(parameters.Weights, peakGroupFeatures, parameters.Bias)); }
/// <summary> /// Train the model by iterative calculating weights to separate target and decoy transition groups. /// </summary> /// <param name="targetsIn">Target transition groups.</param> /// <param name="decoysIn">Decoy transition groups.</param> /// <param name="initParameters">Initial model parameters (weights and bias)</param> /// <param name="iterations">Optional specific number of iterations to use in training</param> /// <param name="includeSecondBest">Include the second best peaks in the targets as decoys?</param> /// <param name="preTrain">Use a pre-trained model to bootstrap the learning.</param> /// <param name="progressMonitor">Used to report progress to the calling context</param> /// <param name="documentPath">The path to the current document for writing score distributions</param> /// <returns>Immutable model with new weights.</returns> public override IPeakScoringModel Train(IList <IList <float[]> > targetsIn, IList <IList <float[]> > decoysIn, LinearModelParams initParameters, int?iterations = null, bool includeSecondBest = false, bool preTrain = true, IProgressMonitor progressMonitor = null, string documentPath = null) { if (initParameters == null) { initParameters = new LinearModelParams(_peakFeatureCalculators.Count); } return(ChangeProp(ImClone(this), im => { // This may take a long time between progress updates, but just measure progress by cycles through the training IProgressStatus status = new ProgressStatus(Resources.MProphetPeakScoringModel_Train_Training_peak_scoring_model); if (progressMonitor != null) { progressMonitor.UpdateProgress(status); } var targets = targetsIn.Where(list => list.Count > 0); var decoys = decoysIn.Where(list => list.Count > 0); var targetTransitionGroups = new ScoredGroupPeaksSet(targets, targetsIn.Count); var decoyTransitionGroups = new ScoredGroupPeaksSet(decoys, decoysIn.Count); // Iteratively refine the weights through multiple iterations. var calcWeights = new double[initParameters.Weights.Count]; Array.Copy(initParameters.Weights.ToArray(), calcWeights, initParameters.Weights.Count); double qValueCutoff = 0.01; // First iteration cut-off - if not pretraining, just start at 0.01 // Start with scores calculated from the initial weights if (!preTrain) { targetTransitionGroups.ScorePeaks(calcWeights); decoyTransitionGroups.ScorePeaks(calcWeights); } // Bootstrap from the pre-trained legacy model else { qValueCutoff = 0.15; var preTrainedWeights = new double[initParameters.Weights.Count]; for (int i = 0; i < preTrainedWeights.Length; ++i) { if (double.IsNaN(initParameters.Weights[i])) { preTrainedWeights[i] = double.NaN; } } int standardEnabledCount = GetEnabledCount(LegacyScoringModel.StandardFeatureCalculators, initParameters.Weights); int analyteEnabledCount = GetEnabledCount(LegacyScoringModel.AnalyteFeatureCalculators, initParameters.Weights); bool hasStandards = standardEnabledCount >= analyteEnabledCount; var calculators = hasStandards ? LegacyScoringModel.StandardFeatureCalculators : LegacyScoringModel.AnalyteFeatureCalculators; for (int i = 0; i < calculators.Length; ++i) { if (calculators[i].GetType() == typeof(MQuestRetentionTimePredictionCalc)) { continue; } SetCalculatorValue(calculators[i].GetType(), LegacyScoringModel.DEFAULT_WEIGHTS[i], preTrainedWeights); } targetTransitionGroups.ScorePeaks(preTrainedWeights); decoyTransitionGroups.ScorePeaks(preTrainedWeights); } double decoyMean = 0; double decoyStdev = 0; bool colinearWarning = false; int iterationCount = iterations ?? MAX_ITERATIONS; int truePeaksCount = 0; var lastWeights = new double[calcWeights.Length]; for (int i = 0; i < iterationCount; i++) { int percentComplete = 0; double decoyMeanNew, decoyStdevNew; bool colinearWarningNew = colinearWarning; int truePeaksCountNew = im.CalculateWeights(documentPath, targetTransitionGroups, decoyTransitionGroups, includeSecondBest, i == 0, // Use non-parametric q values for first round, when normality assumption may not hold qValueCutoff, calcWeights, out decoyMeanNew, out decoyStdevNew, ref colinearWarningNew); if (progressMonitor != null) { if (progressMonitor.IsCanceled) { throw new OperationCanceledException(); } // Calculate progress, but wait to make sure convergence has not occurred before setting it string formatText = qValueCutoff > 0.02 ? Resources.MProphetPeakScoringModel_Train_Training_scoring_model__iteration__0__of__1__ : Resources.MProphetPeakScoringModel_Train_Training_scoring_model__iteration__0__of__1_____2______peaks_at__3_0_____FDR_; percentComplete = (i + 1) * 100 / (iterationCount + 1); status = status.ChangeMessage(string.Format(formatText, i + 1, iterationCount, truePeaksCountNew, qValueCutoff)) .ChangePercentComplete(percentComplete); } if (qValueCutoff > 0.02) { // Tighten the q value cut-off for "truth" to 2% FDR qValueCutoff = 0.02; // And allow the true peaks count to go down in the next iteration // Though it rarely will truePeaksCountNew = 0; } else if (truePeaksCountNew < truePeaksCount) { // The model has leveled off enough to begin losing discriminant value if (qValueCutoff > 0.01) { // Tighten the q value cut-off for "truth" to 1% FDR qValueCutoff = 0.01; // And allow the true peaks count to go down in the next iteration truePeaksCountNew = 0; } else { if (progressMonitor != null) { progressMonitor.UpdateProgress(status = status.ChangeMessage(string.Format(Resources.MProphetPeakScoringModel_Train_Scoring_model_converged__iteration__0_____1______peaks_at__2_0_____FDR_, i + 1, truePeaksCount, qValueCutoff)) .ChangePercentComplete(Math.Max(95, percentComplete))); } calcWeights = lastWeights; break; } } truePeaksCount = truePeaksCountNew; Array.Copy(calcWeights, lastWeights, calcWeights.Length); decoyMean = decoyMeanNew; decoyStdev = decoyStdevNew; colinearWarning = colinearWarningNew; if (progressMonitor != null) { progressMonitor.UpdateProgress(status); } } if (progressMonitor != null) { progressMonitor.UpdateProgress(status.ChangePercentComplete(100)); } var parameters = new LinearModelParams(calcWeights); parameters = parameters.RescaleParameters(decoyMean, decoyStdev); im.Parameters = parameters; im.ColinearWarning = colinearWarning; im.UsesSecondBest = includeSecondBest; im.UsesDecoys = decoysIn.Count > 0; })); }
public abstract IPeakScoringModel Train(IList <IList <float[]> > targets, IList <IList <float[]> > decoys, LinearModelParams initParameters, bool includeSecondBest = false, bool preTrain = true, IProgressMonitor progressMonitor = null);