public LegacyScoringModel(string name, LinearModelParams parameters = null, bool usesDecoys = true, bool usesSecondBest = false) : base(name) { SetPeakFeatureCalculators(); Parameters = parameters; UsesDecoys = usesDecoys; UsesSecondBest = usesSecondBest; }
protected override bool LoadBackground(IDocumentContainer container, SrmDocument document, SrmDocument docCurrent) { var loadMonitor = new LoadMonitor(this, container, container.Document); IPeakScoringModel scoringModel = new MProphetPeakScoringModel( Path.GetFileNameWithoutExtension(container.DocumentFilePath), null as LinearModelParams, MProphetPeakScoringModel.GetDefaultCalculators(docCurrent), true); var targetDecoyGenerator = new TargetDecoyGenerator(docCurrent, scoringModel, this, loadMonitor); // Get scores for target and decoy groups. List <IList <float[]> > targetTransitionGroups, decoyTransitionGroups; targetDecoyGenerator.GetTransitionGroups(out targetTransitionGroups, out decoyTransitionGroups); if (!decoyTransitionGroups.Any()) { throw new InvalidDataException(); } // Set intial weights based on previous model (with NaN's reset to 0) var initialWeights = new double[scoringModel.PeakFeatureCalculators.Count]; // But then set to NaN the weights that have unknown values for this dataset for (var i = 0; i < initialWeights.Length; ++i) { if (!targetDecoyGenerator.EligibleScores[i]) { initialWeights[i] = double.NaN; } } var initialParams = new LinearModelParams(initialWeights); // Train the model. scoringModel = scoringModel.Train(targetTransitionGroups, decoyTransitionGroups, targetDecoyGenerator, initialParams, null, null, scoringModel.UsesSecondBest, true, loadMonitor); SrmDocument docNew; do { docCurrent = container.Document; docNew = docCurrent.ChangeSettings(docCurrent.Settings.ChangePeptideIntegration(i => i.ChangeAutoTrain(false).ChangePeakScoringModel((PeakScoringModelSpec)scoringModel))); // Reintegrate peaks var resultsHandler = new MProphetResultsHandler(docNew, (PeakScoringModelSpec)scoringModel, _cachedFeatureScores); resultsHandler.ScoreFeatures(loadMonitor); if (resultsHandler.IsMissingScores()) { throw new InvalidDataException(Resources.ImportPeptideSearchManager_LoadBackground_The_current_peak_scoring_model_is_incompatible_with_one_or_more_peptides_in_the_document_); } docNew = resultsHandler.ChangePeaks(loadMonitor); }while (!CompleteProcessing(container, docNew, docCurrent)); return(true); }
// Test that the dialog behaves correctly when opening a model // that is incompatible with the dataset (some or all composite scores are NaN's) protected void TestIncompatibleDataSet() { // Define an incompatible model var weights = new[] { 0.5322, -1.0352, double.NaN, double.NaN, 1.4744, 0.0430, 0.0477, -0.2740, double.NaN, 2.0096, 7.7726, -0.0566, 0.4751, 0.5, 0.5, double.NaN, double.NaN, double.NaN, double.NaN, double.NaN, double.NaN, double.NaN, double.NaN }; var parameters = new LinearModelParams(weights, -2.5); var incompatibleModel = new MProphetPeakScoringModel("incompatible", parameters, null, true); Settings.Default.PeakScoringModelList.Add(incompatibleModel); RunDlg <PeptideSettingsUI>(SkylineWindow.ShowPeptideSettingsUI, peptideSettingsDlg => { peptideSettingsDlg.ComboPeakScoringModelSelected = "incompatible"; peptideSettingsDlg.OkDialog(); }); var reintegrateDlgIncompatible = ShowDialog <ReintegrateDlg>(SkylineWindow.ShowReintegrateDialog); var editList = ShowDialog <EditListDlg <SettingsListBase <PeakScoringModelSpec>, PeakScoringModelSpec> >( reintegrateDlgIncompatible.EditPeakScoringModel); RunUI(() => editList.SelectItem("incompatible")); // Not L10N RunDlg <EditPeakScoringModelDlg>(editList.EditItem, editDlgTemp => { // All of the percentage fields should be null VerifyCellValues(editDlgTemp, SCORES_AND_WEIGHTS[10], 0.0); editDlgTemp.TrainModelClick(); // Cell values go back to the standard trained model after we train and enable calculators, // despite having been loaded with weird values editDlgTemp.SetChecked(3, true); editDlgTemp.TrainModelClick(); VerifyCellValues(editDlgTemp, SCORES_AND_WEIGHTS[1], 1.0, false); editDlgTemp.CancelDialog(); }); OkDialog(editList, editList.OkDialog); // Trying to reintegrate gives an error because the model is incompatible RunDlg <MessageDlg>(reintegrateDlgIncompatible.OkDialog, messageDlg => { Assert.AreEqual(TextUtil.LineSeparate(string.Format(Resources.ReintegrateDlg_OkDialog_Failed_attempting_to_reintegrate_peaks_), Resources.ReintegrateDlg_OkDialog_The_current_peak_scoring_model_is_incompatible_with_one_or_more_peptides_in_the_document___Please_train_a_new_model_), messageDlg.Message); messageDlg.OkDialog(); }); OkDialog(reintegrateDlgIncompatible, reintegrateDlgIncompatible.CancelDialog); }
public override IPeakScoringModel Train(IList<IList<float[]>> targets, IList<IList<float[]>> decoys, LinearModelParams initParameters, bool includeSecondBest = false, bool preTrain = true, IProgressMonitor progressMonitor = null) { return ChangeProp(ImClone(this), im => { int nWeights = initParameters.Weights.Count; var weights = new double [nWeights]; for (int i = 0; i < initParameters.Weights.Count; ++i) { weights[i] = double.IsNaN(initParameters.Weights[i]) ? double.NaN : DEFAULT_WEIGHTS[i]; } var parameters = new LinearModelParams(weights); ScoredGroupPeaksSet decoyTransitionGroups = new ScoredGroupPeaksSet(decoys); ScoredGroupPeaksSet targetTransitionGroups = new ScoredGroupPeaksSet(targets); targetTransitionGroups.ScorePeaks(parameters.Weights); if (includeSecondBest) { ScoredGroupPeaksSet secondBestTransitionGroups; targetTransitionGroups.SelectTargetsAndDecoys(out targetTransitionGroups, out secondBestTransitionGroups); foreach (var secondBestGroup in secondBestTransitionGroups.ScoredGroupPeaksList) { decoyTransitionGroups.Add(secondBestGroup); } } decoyTransitionGroups.ScorePeaks(parameters.Weights); im.UsesDecoys = decoys.Count > 0; im.UsesSecondBest = includeSecondBest; im.Parameters = parameters.RescaleParameters(decoyTransitionGroups.Mean, decoyTransitionGroups.Stdev); }); }
public override void ReadXml(XmlReader reader) { // Read tag attributes base.ReadXml(reader); // Earlier versions always used decoys only UsesDecoys = reader.GetBoolAttribute(ATTR.uses_decoys, true); UsesSecondBest = reader.GetBoolAttribute(ATTR.uses_false_targets, false); double bias = reader.GetDoubleAttribute(ATTR.bias); bool isEmpty = reader.IsEmptyElement; // Consume tag reader.Read(); if (!isEmpty) { // Read calculators var calculators = new List<FeatureCalculator>(); reader.ReadElements(calculators); var weights = new double[calculators.Count]; for (int i = 0; i < calculators.Count; i++) { if (calculators[i].Type != PeakFeatureCalculators[i].GetType()) throw new InvalidDataException(Resources.LegacyScoringModel_ReadXml_Invalid_legacy_model_); weights[i] = calculators[i].Weight; } Parameters = new LinearModelParams(weights, bias); reader.ReadEndElement(); } DoValidate(); }
private ModelAndFeatures CreateScoringModel(string modelName, bool decoys, bool secondBest) { _out.WriteLine(Resources.CommandLine_CreateScoringModel_Creating_scoring_model__0_, modelName); try { // Create new scoring model using the default calculators. var scoringModel = new MProphetPeakScoringModel(modelName, null as LinearModelParams, null, decoys, secondBest); var progressMonitor = new CommandProgressMonitor(_out, new ProgressStatus(String.Empty)); var targetDecoyGenerator = new TargetDecoyGenerator(_doc, scoringModel, progressMonitor); // Get scores for target and decoy groups. List<IList<float[]>> targetTransitionGroups; List<IList<float[]>> decoyTransitionGroups; targetDecoyGenerator.GetTransitionGroups(out targetTransitionGroups, out decoyTransitionGroups); // If decoy box is checked and no decoys, throw an error if (decoys && decoyTransitionGroups.Count == 0) { _out.WriteLine(Resources.CommandLine_CreateScoringModel_Error__There_are_no_decoy_peptides_in_the_document__Failed_to_create_scoring_model_); return null; } // Use decoys for training only if decoy box is checked if (!decoys) decoyTransitionGroups = new List<IList<float[]>>(); // Set intial weights based on previous model (with NaN's reset to 0) var initialWeights = new double[scoringModel.PeakFeatureCalculators.Count]; // But then set to NaN the weights that have unknown values for this dataset for (int i = 0; i < initialWeights.Length; ++i) { if (!targetDecoyGenerator.EligibleScores[i]) initialWeights[i] = double.NaN; } var initialParams = new LinearModelParams(initialWeights); // Train the model. scoringModel = (MProphetPeakScoringModel)scoringModel.Train(targetTransitionGroups, decoyTransitionGroups, initialParams, secondBest, true, progressMonitor); Settings.Default.PeakScoringModelList.SetValue(scoringModel); return new ModelAndFeatures(scoringModel, targetDecoyGenerator.PeakGroupFeatures); } catch (Exception x) { _out.WriteLine(Resources.CommandLine_CreateScoringModel_Error__Failed_to_create_scoring_model_); _out.WriteLine(x.Message); return null; } }
/// <summary> /// Calculate scores for targets and decoys. A transition is selected from each transition group using the /// scoring weights, and then its score is calculated using the calculator weights applied to each feature. /// </summary> /// <param name="scoringParams">Parameters to choose the best peak</param> /// <param name="calculatorParams">Parameters to calculate the score of the best peak.</param> /// <param name="targetScores">Output list of target scores.</param> /// <param name="decoyScores">Output list of decoy scores.</param> /// <param name="secondBestScores">Output list of false target scores.</param> /// <param name="invert">If true, select minimum rather than maximum scores</param> public void GetScores(LinearModelParams scoringParams, LinearModelParams calculatorParams, out List<double> targetScores, out List<double> decoyScores, out List<double> secondBestScores, bool invert = false) { targetScores = new List<double>(); decoyScores = new List<double>(); secondBestScores = new List<double>(); int invertSign = invert ? -1 : 1; foreach (var peakTransitionGroupFeatures in _peakTransitionGroupFeaturesList) { PeakGroupFeatures maxFeatures = null; PeakGroupFeatures nextFeatures = null; double maxScore = Double.MinValue; double nextScore = Double.MinValue; // No peaks in this transition group record if (peakTransitionGroupFeatures.PeakGroupFeatures.Count == 0) continue; // Find the highest and second highest scores among the transitions in this group. foreach (var peakGroupFeatures in peakTransitionGroupFeatures.PeakGroupFeatures) { double score = invertSign * GetScore(scoringParams, peakGroupFeatures); if (nextScore < score) { if (maxScore < score) { nextScore = maxScore; maxScore = score; nextFeatures = maxFeatures; maxFeatures = peakGroupFeatures; } else { nextScore = score; nextFeatures = peakGroupFeatures; } } } double currentScore = maxFeatures == null ? Double.NaN : GetScore(calculatorParams, maxFeatures); if (peakTransitionGroupFeatures.Id.NodePep.IsDecoy) decoyScores.Add(currentScore); else { targetScores.Add(currentScore); // Skip if only one peak if (peakTransitionGroupFeatures.PeakGroupFeatures.Count == 1) continue; double secondBestScore = nextFeatures == null ? Double.NaN : GetScore(calculatorParams, nextFeatures); secondBestScores.Add(secondBestScore); } } }
private static double GetScore(LinearModelParams parameters, PeakGroupFeatures peakGroupFeatures) { return GetScore(parameters.Weights, peakGroupFeatures, parameters.Bias); }
/// <summary> /// Train the model by iterative calculating weights to separate target and decoy transition groups. /// </summary> /// <param name="targets">Target transition groups.</param> /// <param name="decoys">Decoy transition groups.</param> /// <param name="initParameters">Initial model parameters (weights and bias)</param> /// <param name="includeSecondBest"> Include the second best peaks in the targets as decoys?</param> /// <param name="preTrain">Use a pre-trained model to bootstrap the learning.</param> /// <param name="progressMonitor"></param> /// <returns>Immutable model with new weights.</returns> public override IPeakScoringModel Train(IList<IList<float[]>> targets, IList<IList<float[]>> decoys, LinearModelParams initParameters, bool includeSecondBest = false, bool preTrain = true, IProgressMonitor progressMonitor = null) { if(initParameters == null) initParameters = new LinearModelParams(_peakFeatureCalculators.Count); return ChangeProp(ImClone(this), im => { targets = targets.Where(list => list.Count > 0).ToList(); decoys = decoys.Where(list => list.Count > 0).ToList(); var targetTransitionGroups = new ScoredGroupPeaksSet(targets); var decoyTransitionGroups = new ScoredGroupPeaksSet(decoys); // Bootstrap from the pre-trained legacy model if (preTrain) { var preTrainedWeights = new double[initParameters.Weights.Count]; for (int i = 0; i < preTrainedWeights.Length; ++i) { if (double.IsNaN(initParameters.Weights[i])) { preTrainedWeights[i] = double.NaN; } } int standardEnabledCount = GetEnabledCount(LegacyScoringModel.StandardFeatureCalculators, initParameters.Weights); int analyteEnabledCount = GetEnabledCount(LegacyScoringModel.AnalyteFeatureCalculators, initParameters.Weights); bool hasStandards = standardEnabledCount >= analyteEnabledCount; var calculators = hasStandards ? LegacyScoringModel.StandardFeatureCalculators : LegacyScoringModel.AnalyteFeatureCalculators; for (int i = 0; i < calculators.Length; ++i) { if (calculators[i].GetType() == typeof (MQuestRetentionTimePredictionCalc)) continue; SetCalculatorValue(calculators[i].GetType(), LegacyScoringModel.DEFAULT_WEIGHTS[i], preTrainedWeights); } targetTransitionGroups.ScorePeaks(preTrainedWeights); decoyTransitionGroups.ScorePeaks(preTrainedWeights); } // Iteratively refine the weights through multiple iterations. var calcWeights = new double[initParameters.Weights.Count]; Array.Copy(initParameters.Weights.ToArray(), calcWeights, initParameters.Weights.Count); double decoyMean = 0; double decoyStdev = 0; bool colinearWarning = false; // This may take a long time between progress updates, but just measure progress by cycles through the training var status = new ProgressStatus(Resources.MProphetPeakScoringModel_Train_Training_peak_scoring_model); if (progressMonitor != null) progressMonitor.UpdateProgress(status); for (int iteration = 0; iteration < MAX_ITERATIONS; iteration++) { if (progressMonitor != null) { if (progressMonitor.IsCanceled) throw new OperationCanceledException(); progressMonitor.UpdateProgress(status = status.ChangeMessage(string.Format(Resources.MProphetPeakScoringModel_Train_Training_peak_scoring_model__iteration__0__of__1__, iteration + 1, MAX_ITERATIONS)) .ChangePercentComplete((iteration + 1) * 100 / (MAX_ITERATIONS + 1))); } im.CalculateWeights(iteration, targetTransitionGroups, decoyTransitionGroups, includeSecondBest, calcWeights, out decoyMean, out decoyStdev, ref colinearWarning); GC.Collect(); // Each loop generates a number of large objects. GC helps to keep private bytes under control } if (progressMonitor != null) progressMonitor.UpdateProgress(status.ChangePercentComplete(100)); var parameters = new LinearModelParams(calcWeights); parameters = parameters.RescaleParameters(decoyMean, decoyStdev); im.Parameters = parameters; im.ColinearWarning = colinearWarning; im.UsesSecondBest = includeSecondBest; im.UsesDecoys = decoys.Count > 0; }); }
public override void ReadXml(XmlReader reader) { // Read tag attributes base.ReadXml(reader); ColinearWarning = reader.GetBoolAttribute(ATTR.colinear_warning); // Earlier versions always used decoys only UsesDecoys = reader.GetBoolAttribute(ATTR.uses_decoys, true); UsesSecondBest = reader.GetBoolAttribute(ATTR.uses_false_targets); double bias = reader.GetDoubleAttribute(ATTR.bias); // Consume tag reader.Read(); // Read calculators var calculators = new List<FeatureCalculator>(); reader.ReadElements(calculators); var peakFeatureCalculators = new List<IPeakFeatureCalculator>(calculators.Count); var weights = new double[calculators.Count]; for (int i = 0; i < calculators.Count; i++) { weights[i] = calculators[i].Weight; peakFeatureCalculators.Add(PeakFeatureCalculator.GetCalculator(calculators[i].Type)); } SetPeakFeatureCalculators(peakFeatureCalculators); Parameters = new LinearModelParams(weights, bias); reader.ReadEndElement(); DoValidate(); }
public MProphetPeakScoringModel( string name, LinearModelParams parameters, IList<IPeakFeatureCalculator> peakFeatureCalculators = null, bool usesDecoys = false, bool usesSecondBest = false, bool colinearWarning = false) : base(name) { SetPeakFeatureCalculators(peakFeatureCalculators ?? DEFAULT_CALCULATORS); Parameters = parameters; UsesDecoys = usesDecoys; UsesSecondBest = usesSecondBest; ColinearWarning = colinearWarning; Lambda = DEFAULT_R_LAMBDA; // Default from R DoValidate(); }
// Test that the dialog behaves correctly when opening a model // that is incompatible with the dataset (some or all composite scores are NaN's) protected void TestIncompatibleDataSet() { // Define an incompatible model var weights = new[] {0.5322, -1.0352, double.NaN, double.NaN, 1.4744, 0.0430, 0.0477, -0.2740, double.NaN, 2.0096, 7.7726, -0.0566, 0.4751, 0.5, 0.5, double.NaN, double.NaN, double.NaN, double.NaN, double.NaN, double.NaN, double.NaN, double.NaN}; var parameters = new LinearModelParams(weights, -2.5); var incompatibleModel = new MProphetPeakScoringModel("incompatible", parameters, null, true); Settings.Default.PeakScoringModelList.Add(incompatibleModel); RunDlg<PeptideSettingsUI>(SkylineWindow.ShowPeptideSettingsUI, peptideSettingsDlg => { peptideSettingsDlg.ComboPeakScoringModelSelected = "incompatible"; peptideSettingsDlg.OkDialog(); }); var reintegrateDlgIncompatible = ShowDialog<ReintegrateDlg>(SkylineWindow.ShowReintegrateDialog); var editList = ShowDialog<EditListDlg<SettingsListBase<PeakScoringModelSpec>, PeakScoringModelSpec>>( reintegrateDlgIncompatible.EditPeakScoringModel); RunUI(() => editList.SelectItem("incompatible")); // Not L10N RunDlg<EditPeakScoringModelDlg>(editList.EditItem, editDlgTemp => { // All of the percentage fields should be null VerifyCellValues(editDlgTemp, SCORES_AND_WEIGHTS[10], 0.0); editDlgTemp.TrainModelClick(); // Cell values go back to the standard trained model after we train and enable calculators, // despite having been loaded with weird values editDlgTemp.SetChecked(3, true); editDlgTemp.TrainModelClick(); VerifyCellValues(editDlgTemp, SCORES_AND_WEIGHTS[1], 1.0, false); editDlgTemp.CancelDialog(); }); OkDialog(editList, editList.OkDialog); // Trying to reintegrate gives an error because the model is incompatible RunDlg<MessageDlg>(reintegrateDlgIncompatible.OkDialog, messageDlg => { Assert.AreEqual(TextUtil.LineSeparate(string.Format(Resources.ReintegrateDlg_OkDialog_Failed_attempting_to_reintegrate_peaks_), Resources.ReintegrateDlg_OkDialog_The_current_peak_scoring_model_is_incompatible_with_one_or_more_peptides_in_the_document___Please_train_a_new_model_), messageDlg.Message); messageDlg.OkDialog(); }); OkDialog(reintegrateDlgIncompatible, reintegrateDlgIncompatible.CancelDialog); }