示例#1
0
        public LegacyScoringModel(string name, LinearModelParams parameters = null, bool usesDecoys = true, bool usesSecondBest = false)
            : base(name)
        {
            SetPeakFeatureCalculators();

            Parameters = parameters;
            UsesDecoys = usesDecoys;
            UsesSecondBest = usesSecondBest;
        }
示例#2
0
        protected override bool LoadBackground(IDocumentContainer container, SrmDocument document, SrmDocument docCurrent)
        {
            var loadMonitor = new LoadMonitor(this, container, container.Document);

            IPeakScoringModel scoringModel = new MProphetPeakScoringModel(
                Path.GetFileNameWithoutExtension(container.DocumentFilePath), null as LinearModelParams,
                MProphetPeakScoringModel.GetDefaultCalculators(docCurrent), true);

            var targetDecoyGenerator = new TargetDecoyGenerator(docCurrent, scoringModel, this, loadMonitor);

            // Get scores for target and decoy groups.
            List <IList <float[]> > targetTransitionGroups, decoyTransitionGroups;

            targetDecoyGenerator.GetTransitionGroups(out targetTransitionGroups, out decoyTransitionGroups);
            if (!decoyTransitionGroups.Any())
            {
                throw new InvalidDataException();
            }

            // Set intial weights based on previous model (with NaN's reset to 0)
            var initialWeights = new double[scoringModel.PeakFeatureCalculators.Count];

            // But then set to NaN the weights that have unknown values for this dataset
            for (var i = 0; i < initialWeights.Length; ++i)
            {
                if (!targetDecoyGenerator.EligibleScores[i])
                {
                    initialWeights[i] = double.NaN;
                }
            }
            var initialParams = new LinearModelParams(initialWeights);

            // Train the model.
            scoringModel = scoringModel.Train(targetTransitionGroups, decoyTransitionGroups, targetDecoyGenerator, initialParams, null, null, scoringModel.UsesSecondBest, true, loadMonitor);

            SrmDocument docNew;

            do
            {
                docCurrent = container.Document;
                docNew     = docCurrent.ChangeSettings(docCurrent.Settings.ChangePeptideIntegration(i =>
                                                                                                    i.ChangeAutoTrain(false).ChangePeakScoringModel((PeakScoringModelSpec)scoringModel)));

                // Reintegrate peaks
                var resultsHandler = new MProphetResultsHandler(docNew, (PeakScoringModelSpec)scoringModel, _cachedFeatureScores);
                resultsHandler.ScoreFeatures(loadMonitor);
                if (resultsHandler.IsMissingScores())
                {
                    throw new InvalidDataException(Resources.ImportPeptideSearchManager_LoadBackground_The_current_peak_scoring_model_is_incompatible_with_one_or_more_peptides_in_the_document_);
                }
                docNew = resultsHandler.ChangePeaks(loadMonitor);
            }while (!CompleteProcessing(container, docNew, docCurrent));

            return(true);
        }
示例#3
0
        // Test that the dialog behaves correctly when opening a model
        // that is incompatible with the dataset (some or all composite scores are NaN's)
        protected void TestIncompatibleDataSet()
        {
            // Define an incompatible model
            var weights = new[] { 0.5322, -1.0352, double.NaN, double.NaN, 1.4744, 0.0430, 0.0477, -0.2740, double.NaN,
                                  2.0096, 7.7726, -0.0566, 0.4751, 0.5, 0.5, double.NaN, double.NaN,
                                  double.NaN, double.NaN, double.NaN, double.NaN, double.NaN, double.NaN };
            var parameters        = new LinearModelParams(weights, -2.5);
            var incompatibleModel = new MProphetPeakScoringModel("incompatible", parameters, null, true);

            Settings.Default.PeakScoringModelList.Add(incompatibleModel);
            RunDlg <PeptideSettingsUI>(SkylineWindow.ShowPeptideSettingsUI, peptideSettingsDlg =>
            {
                peptideSettingsDlg.ComboPeakScoringModelSelected = "incompatible";
                peptideSettingsDlg.OkDialog();
            });

            var reintegrateDlgIncompatible = ShowDialog <ReintegrateDlg>(SkylineWindow.ShowReintegrateDialog);

            var editList = ShowDialog <EditListDlg <SettingsListBase <PeakScoringModelSpec>, PeakScoringModelSpec> >(
                reintegrateDlgIncompatible.EditPeakScoringModel);

            RunUI(() => editList.SelectItem("incompatible")); // Not L10N

            RunDlg <EditPeakScoringModelDlg>(editList.EditItem, editDlgTemp =>
            {
                // All of the percentage fields should be null
                VerifyCellValues(editDlgTemp, SCORES_AND_WEIGHTS[10], 0.0);
                editDlgTemp.TrainModelClick();
                // Cell values go back to the standard trained model after we train and enable calculators,
                // despite having been loaded with weird values
                editDlgTemp.SetChecked(3, true);
                editDlgTemp.TrainModelClick();
                VerifyCellValues(editDlgTemp, SCORES_AND_WEIGHTS[1], 1.0, false);
                editDlgTemp.CancelDialog();
            });
            OkDialog(editList, editList.OkDialog);
            // Trying to reintegrate gives an error because the model is incompatible
            RunDlg <MessageDlg>(reintegrateDlgIncompatible.OkDialog, messageDlg =>
            {
                Assert.AreEqual(TextUtil.LineSeparate(string.Format(Resources.ReintegrateDlg_OkDialog_Failed_attempting_to_reintegrate_peaks_),
                                                      Resources.ReintegrateDlg_OkDialog_The_current_peak_scoring_model_is_incompatible_with_one_or_more_peptides_in_the_document___Please_train_a_new_model_),
                                messageDlg.Message);
                messageDlg.OkDialog();
            });
            OkDialog(reintegrateDlgIncompatible, reintegrateDlgIncompatible.CancelDialog);
        }
示例#4
0
        public override IPeakScoringModel Train(IList<IList<float[]>> targets, IList<IList<float[]>> decoys, LinearModelParams initParameters,
            bool includeSecondBest = false, bool preTrain = true, IProgressMonitor progressMonitor = null)
        {
            return ChangeProp(ImClone(this), im =>
            {
                    int nWeights = initParameters.Weights.Count;
                    var weights = new double [nWeights];
                    for (int i = 0; i < initParameters.Weights.Count; ++i)
                    {
                        weights[i] = double.IsNaN(initParameters.Weights[i]) ? double.NaN : DEFAULT_WEIGHTS[i];
                    }
                    var parameters = new LinearModelParams(weights);
                    ScoredGroupPeaksSet decoyTransitionGroups = new ScoredGroupPeaksSet(decoys);
                    ScoredGroupPeaksSet targetTransitionGroups = new ScoredGroupPeaksSet(targets);
                    targetTransitionGroups.ScorePeaks(parameters.Weights);

                    if (includeSecondBest)
                    {
                        ScoredGroupPeaksSet secondBestTransitionGroups;
                        targetTransitionGroups.SelectTargetsAndDecoys(out targetTransitionGroups, out secondBestTransitionGroups);
                        foreach (var secondBestGroup in secondBestTransitionGroups.ScoredGroupPeaksList)
                        {
                            decoyTransitionGroups.Add(secondBestGroup);
                        }
                    }
                    decoyTransitionGroups.ScorePeaks(parameters.Weights);
                    im.UsesDecoys = decoys.Count > 0;
                    im.UsesSecondBest = includeSecondBest;
                    im.Parameters = parameters.RescaleParameters(decoyTransitionGroups.Mean, decoyTransitionGroups.Stdev);
                });
        }
示例#5
0
        public override void ReadXml(XmlReader reader)
        {
            // Read tag attributes
            base.ReadXml(reader);
            // Earlier versions always used decoys only
            UsesDecoys = reader.GetBoolAttribute(ATTR.uses_decoys, true);
            UsesSecondBest = reader.GetBoolAttribute(ATTR.uses_false_targets, false);
            double bias = reader.GetDoubleAttribute(ATTR.bias);

            bool isEmpty = reader.IsEmptyElement;

            // Consume tag
            reader.Read();

            if (!isEmpty)
            {
                // Read calculators
                var calculators = new List<FeatureCalculator>();
                reader.ReadElements(calculators);
                var weights = new double[calculators.Count];
                for (int i = 0; i < calculators.Count; i++)
                {
                    if (calculators[i].Type != PeakFeatureCalculators[i].GetType())
                        throw new InvalidDataException(Resources.LegacyScoringModel_ReadXml_Invalid_legacy_model_);
                    weights[i] = calculators[i].Weight;
                }
                Parameters = new LinearModelParams(weights, bias);

                reader.ReadEndElement();
            }

            DoValidate();
        }
示例#6
0
        private ModelAndFeatures CreateScoringModel(string modelName, bool decoys, bool secondBest)
        {
            _out.WriteLine(Resources.CommandLine_CreateScoringModel_Creating_scoring_model__0_, modelName);

            try
            {
                // Create new scoring model using the default calculators.
                var scoringModel = new MProphetPeakScoringModel(modelName, null as LinearModelParams, null, decoys, secondBest);
                var progressMonitor = new CommandProgressMonitor(_out, new ProgressStatus(String.Empty));
                var targetDecoyGenerator = new TargetDecoyGenerator(_doc, scoringModel, progressMonitor);

                // Get scores for target and decoy groups.
                List<IList<float[]>> targetTransitionGroups;
                List<IList<float[]>> decoyTransitionGroups;
                targetDecoyGenerator.GetTransitionGroups(out targetTransitionGroups, out decoyTransitionGroups);
                // If decoy box is checked and no decoys, throw an error
                if (decoys && decoyTransitionGroups.Count == 0)
                {
                    _out.WriteLine(Resources.CommandLine_CreateScoringModel_Error__There_are_no_decoy_peptides_in_the_document__Failed_to_create_scoring_model_);
                    return null;
                }
                // Use decoys for training only if decoy box is checked
                if (!decoys)
                    decoyTransitionGroups = new List<IList<float[]>>();

                // Set intial weights based on previous model (with NaN's reset to 0)
                var initialWeights = new double[scoringModel.PeakFeatureCalculators.Count];
                // But then set to NaN the weights that have unknown values for this dataset
                for (int i = 0; i < initialWeights.Length; ++i)
                {
                    if (!targetDecoyGenerator.EligibleScores[i])
                        initialWeights[i] = double.NaN;
                }
                var initialParams = new LinearModelParams(initialWeights);

                // Train the model.
                scoringModel = (MProphetPeakScoringModel)scoringModel.Train(targetTransitionGroups,
                    decoyTransitionGroups, initialParams, secondBest, true, progressMonitor);

                Settings.Default.PeakScoringModelList.SetValue(scoringModel);

                return new ModelAndFeatures(scoringModel, targetDecoyGenerator.PeakGroupFeatures);
            }
            catch (Exception x)
            {
                _out.WriteLine(Resources.CommandLine_CreateScoringModel_Error__Failed_to_create_scoring_model_);
                _out.WriteLine(x.Message);
                return null;
            }
        }
        /// <summary>
        /// Calculate scores for targets and decoys.  A transition is selected from each transition group using the
        /// scoring weights, and then its score is calculated using the calculator weights applied to each feature.
        /// </summary>
        /// <param name="scoringParams">Parameters to choose the best peak</param>
        /// <param name="calculatorParams">Parameters to calculate the score of the best peak.</param>
        /// <param name="targetScores">Output list of target scores.</param>
        /// <param name="decoyScores">Output list of decoy scores.</param>
        /// <param name="secondBestScores">Output list of false target scores.</param>
        /// <param name="invert">If true, select minimum rather than maximum scores</param>
        public void GetScores(LinearModelParams scoringParams, LinearModelParams calculatorParams, out List<double> targetScores, out List<double> decoyScores,
            out List<double> secondBestScores, bool invert = false)
        {
            targetScores = new List<double>();
            decoyScores = new List<double>();
            secondBestScores = new List<double>();
            int invertSign = invert ? -1 : 1;

            foreach (var peakTransitionGroupFeatures in _peakTransitionGroupFeaturesList)
            {
                PeakGroupFeatures maxFeatures = null;
                PeakGroupFeatures nextFeatures = null;
                double maxScore = Double.MinValue;
                double nextScore = Double.MinValue;

                // No peaks in this transition group record
                if (peakTransitionGroupFeatures.PeakGroupFeatures.Count == 0)
                    continue;

                // Find the highest and second highest scores among the transitions in this group.
                foreach (var peakGroupFeatures in peakTransitionGroupFeatures.PeakGroupFeatures)
                {
                    double score = invertSign * GetScore(scoringParams, peakGroupFeatures);
                    if (nextScore < score)
                    {
                        if (maxScore < score)
                        {
                            nextScore = maxScore;
                            maxScore = score;
                            nextFeatures = maxFeatures;
                            maxFeatures = peakGroupFeatures;
                        }
                        else
                        {
                            nextScore = score;
                            nextFeatures = peakGroupFeatures;
                        }
                    }
                }

                double currentScore = maxFeatures == null ? Double.NaN : GetScore(calculatorParams, maxFeatures);
                if (peakTransitionGroupFeatures.Id.NodePep.IsDecoy)
                    decoyScores.Add(currentScore);
                else
                {
                    targetScores.Add(currentScore);
                    // Skip if only one peak
                    if (peakTransitionGroupFeatures.PeakGroupFeatures.Count == 1)
                        continue;
                    double secondBestScore = nextFeatures == null ? Double.NaN : GetScore(calculatorParams, nextFeatures);
                    secondBestScores.Add(secondBestScore);

                }
            }
        }
 private static double GetScore(LinearModelParams parameters, PeakGroupFeatures peakGroupFeatures)
 {
     return GetScore(parameters.Weights, peakGroupFeatures, parameters.Bias);
 }
        /// <summary>
        /// Train the model by iterative calculating weights to separate target and decoy transition groups.
        /// </summary>
        /// <param name="targets">Target transition groups.</param>
        /// <param name="decoys">Decoy transition groups.</param>
        /// <param name="initParameters">Initial model parameters (weights and bias)</param>
        /// <param name="includeSecondBest"> Include the second best peaks in the targets as decoys?</param>
        /// <param name="preTrain">Use a pre-trained model to bootstrap the learning.</param>
        /// <param name="progressMonitor"></param>
        /// <returns>Immutable model with new weights.</returns>
        public override IPeakScoringModel Train(IList<IList<float[]>> targets, IList<IList<float[]>> decoys, LinearModelParams initParameters,
            bool includeSecondBest = false, bool preTrain = true, IProgressMonitor progressMonitor = null)
        {
            if(initParameters == null)
                initParameters = new LinearModelParams(_peakFeatureCalculators.Count);
            return ChangeProp(ImClone(this), im =>
                {
                    targets = targets.Where(list => list.Count > 0).ToList();
                    decoys = decoys.Where(list => list.Count > 0).ToList();
                    var targetTransitionGroups = new ScoredGroupPeaksSet(targets);
                    var decoyTransitionGroups = new ScoredGroupPeaksSet(decoys);
                    // Bootstrap from the pre-trained legacy model
                    if (preTrain)
                    {
                        var preTrainedWeights = new double[initParameters.Weights.Count];
                        for (int i = 0; i < preTrainedWeights.Length; ++i)
                        {
                            if (double.IsNaN(initParameters.Weights[i]))
                            {
                                preTrainedWeights[i] = double.NaN;
                            }
                        }
                        int standardEnabledCount = GetEnabledCount(LegacyScoringModel.StandardFeatureCalculators, initParameters.Weights);
                        int analyteEnabledCount = GetEnabledCount(LegacyScoringModel.AnalyteFeatureCalculators, initParameters.Weights);
                        bool hasStandards = standardEnabledCount >= analyteEnabledCount;
                        var calculators = hasStandards ? LegacyScoringModel.StandardFeatureCalculators : LegacyScoringModel.AnalyteFeatureCalculators;
                        for (int i = 0; i < calculators.Length; ++i)
                        {
                            if (calculators[i].GetType() == typeof (MQuestRetentionTimePredictionCalc))
                                continue;
                            SetCalculatorValue(calculators[i].GetType(), LegacyScoringModel.DEFAULT_WEIGHTS[i], preTrainedWeights);
                        }
                        targetTransitionGroups.ScorePeaks(preTrainedWeights);
                        decoyTransitionGroups.ScorePeaks(preTrainedWeights);
                    }

                    // Iteratively refine the weights through multiple iterations.
                    var calcWeights = new double[initParameters.Weights.Count];
                    Array.Copy(initParameters.Weights.ToArray(), calcWeights, initParameters.Weights.Count);
                    double decoyMean = 0;
                    double decoyStdev = 0;
                    bool colinearWarning = false;
                    // This may take a long time between progress updates, but just measure progress by cycles through the training
                    var status = new ProgressStatus(Resources.MProphetPeakScoringModel_Train_Training_peak_scoring_model);
                    if (progressMonitor != null)
                        progressMonitor.UpdateProgress(status);
                    for (int iteration = 0; iteration < MAX_ITERATIONS; iteration++)
                    {
                        if (progressMonitor != null)
                        {
                            if (progressMonitor.IsCanceled)
                                throw new OperationCanceledException();

                            progressMonitor.UpdateProgress(status =
                                status.ChangeMessage(string.Format(Resources.MProphetPeakScoringModel_Train_Training_peak_scoring_model__iteration__0__of__1__, iteration + 1, MAX_ITERATIONS))
                                      .ChangePercentComplete((iteration + 1) * 100 / (MAX_ITERATIONS + 1)));
                        }

                        im.CalculateWeights(iteration, targetTransitionGroups, decoyTransitionGroups,
                                            includeSecondBest, calcWeights, out decoyMean, out decoyStdev, ref colinearWarning);

                        GC.Collect();   // Each loop generates a number of large objects. GC helps to keep private bytes under control
                    }
                    if (progressMonitor != null)
                        progressMonitor.UpdateProgress(status.ChangePercentComplete(100));

                    var parameters = new LinearModelParams(calcWeights);
                    parameters = parameters.RescaleParameters(decoyMean, decoyStdev);
                    im.Parameters = parameters;
                    im.ColinearWarning = colinearWarning;
                    im.UsesSecondBest = includeSecondBest;
                    im.UsesDecoys = decoys.Count > 0;
                });
        }
示例#10
0
        public override void ReadXml(XmlReader reader)
        {
            // Read tag attributes
            base.ReadXml(reader);
            ColinearWarning = reader.GetBoolAttribute(ATTR.colinear_warning);
            // Earlier versions always used decoys only
            UsesDecoys = reader.GetBoolAttribute(ATTR.uses_decoys, true);
            UsesSecondBest = reader.GetBoolAttribute(ATTR.uses_false_targets);
            double bias = reader.GetDoubleAttribute(ATTR.bias);

            // Consume tag
            reader.Read();

            // Read calculators
            var calculators = new List<FeatureCalculator>();
            reader.ReadElements(calculators);
            var peakFeatureCalculators = new List<IPeakFeatureCalculator>(calculators.Count);
            var weights = new double[calculators.Count];
            for (int i = 0; i < calculators.Count; i++)
            {
                weights[i] = calculators[i].Weight;
                peakFeatureCalculators.Add(PeakFeatureCalculator.GetCalculator(calculators[i].Type));
            }
            SetPeakFeatureCalculators(peakFeatureCalculators);
            Parameters = new LinearModelParams(weights, bias);

            reader.ReadEndElement();

            DoValidate();
        }
示例#11
0
 public MProphetPeakScoringModel(
     string name,
     LinearModelParams parameters,
     IList<IPeakFeatureCalculator> peakFeatureCalculators = null,
     bool usesDecoys = false,
     bool usesSecondBest = false,
     bool colinearWarning = false)
     : base(name)
 {
     SetPeakFeatureCalculators(peakFeatureCalculators ?? DEFAULT_CALCULATORS);
     Parameters = parameters;
     UsesDecoys = usesDecoys;
     UsesSecondBest = usesSecondBest;
     ColinearWarning = colinearWarning;
     Lambda = DEFAULT_R_LAMBDA;   // Default from R
     DoValidate();
 }
示例#12
0
        // Test that the dialog behaves correctly when opening a model
        // that is incompatible with the dataset (some or all composite scores are NaN's)
        protected void TestIncompatibleDataSet()
        {
            // Define an incompatible model
            var weights = new[] {0.5322, -1.0352, double.NaN, double.NaN, 1.4744, 0.0430, 0.0477, -0.2740, double.NaN,
                                 2.0096, 7.7726, -0.0566, 0.4751, 0.5, 0.5, double.NaN, double.NaN,
                                 double.NaN, double.NaN, double.NaN, double.NaN, double.NaN, double.NaN};
            var parameters = new LinearModelParams(weights, -2.5);
            var incompatibleModel = new MProphetPeakScoringModel("incompatible", parameters, null, true);
            Settings.Default.PeakScoringModelList.Add(incompatibleModel);
            RunDlg<PeptideSettingsUI>(SkylineWindow.ShowPeptideSettingsUI, peptideSettingsDlg =>
                {
                    peptideSettingsDlg.ComboPeakScoringModelSelected = "incompatible";
                    peptideSettingsDlg.OkDialog();
                });

            var reintegrateDlgIncompatible = ShowDialog<ReintegrateDlg>(SkylineWindow.ShowReintegrateDialog);

            var editList = ShowDialog<EditListDlg<SettingsListBase<PeakScoringModelSpec>, PeakScoringModelSpec>>(
                    reintegrateDlgIncompatible.EditPeakScoringModel);
            RunUI(() => editList.SelectItem("incompatible")); // Not L10N

            RunDlg<EditPeakScoringModelDlg>(editList.EditItem, editDlgTemp =>
            {
                // All of the percentage fields should be null
                VerifyCellValues(editDlgTemp, SCORES_AND_WEIGHTS[10], 0.0);
                editDlgTemp.TrainModelClick();
                // Cell values go back to the standard trained model after we train and enable calculators,
                // despite having been loaded with weird values
                editDlgTemp.SetChecked(3, true);
                editDlgTemp.TrainModelClick();
                VerifyCellValues(editDlgTemp, SCORES_AND_WEIGHTS[1], 1.0, false);
                editDlgTemp.CancelDialog();
            });
            OkDialog(editList, editList.OkDialog);
            // Trying to reintegrate gives an error because the model is incompatible
            RunDlg<MessageDlg>(reintegrateDlgIncompatible.OkDialog, messageDlg =>
            {
                Assert.AreEqual(TextUtil.LineSeparate(string.Format(Resources.ReintegrateDlg_OkDialog_Failed_attempting_to_reintegrate_peaks_),
                                                      Resources.ReintegrateDlg_OkDialog_The_current_peak_scoring_model_is_incompatible_with_one_or_more_peptides_in_the_document___Please_train_a_new_model_),
                                messageDlg.Message);
                messageDlg.OkDialog();
            });
            OkDialog(reintegrateDlgIncompatible, reintegrateDlgIncompatible.CancelDialog);
        }