Exemplo n.º 1
0
        private ModelAndFeatures CreateScoringModel(string modelName, bool decoys, bool secondBest)
        {
            _out.WriteLine(Resources.CommandLine_CreateScoringModel_Creating_scoring_model__0_, modelName);

            try
            {
                // Create new scoring model using the default calculators.
                var scoringModel = new MProphetPeakScoringModel(modelName, null as LinearModelParams, null, decoys, secondBest);
                var progressMonitor = new CommandProgressMonitor(_out, new ProgressStatus(String.Empty));
                var targetDecoyGenerator = new TargetDecoyGenerator(_doc, scoringModel, progressMonitor);

                // Get scores for target and decoy groups.
                List<IList<float[]>> targetTransitionGroups;
                List<IList<float[]>> decoyTransitionGroups;
                targetDecoyGenerator.GetTransitionGroups(out targetTransitionGroups, out decoyTransitionGroups);
                // If decoy box is checked and no decoys, throw an error
                if (decoys && decoyTransitionGroups.Count == 0)
                {
                    _out.WriteLine(Resources.CommandLine_CreateScoringModel_Error__There_are_no_decoy_peptides_in_the_document__Failed_to_create_scoring_model_);
                    return null;
                }
                // Use decoys for training only if decoy box is checked
                if (!decoys)
                    decoyTransitionGroups = new List<IList<float[]>>();

                // Set intial weights based on previous model (with NaN's reset to 0)
                var initialWeights = new double[scoringModel.PeakFeatureCalculators.Count];
                // But then set to NaN the weights that have unknown values for this dataset
                for (int i = 0; i < initialWeights.Length; ++i)
                {
                    if (!targetDecoyGenerator.EligibleScores[i])
                        initialWeights[i] = double.NaN;
                }
                var initialParams = new LinearModelParams(initialWeights);

                // Train the model.
                scoringModel = (MProphetPeakScoringModel)scoringModel.Train(targetTransitionGroups,
                    decoyTransitionGroups, initialParams, secondBest, true, progressMonitor);

                Settings.Default.PeakScoringModelList.SetValue(scoringModel);

                return new ModelAndFeatures(scoringModel, targetDecoyGenerator.PeakGroupFeatures);
            }
            catch (Exception x)
            {
                _out.WriteLine(Resources.CommandLine_CreateScoringModel_Error__Failed_to_create_scoring_model_);
                _out.WriteLine(x.Message);
                return null;
            }
        }
Exemplo n.º 2
0
        public override IPeakScoringModel Train(IList <IList <float[]> > targets, IList <IList <float[]> > decoys, TargetDecoyGenerator targetDecoyGenerator, LinearModelParams initParameters,
                                                int?iterations = null, bool includeSecondBest = false, bool preTrain = true, IProgressMonitor progressMonitor = null, string documentPath = null)
        {
            return(ChangeProp(ImClone(this), im =>
            {
                int nWeights = initParameters.Weights.Count;
                var weights = new double [nWeights];
                for (int i = 0; i < initParameters.Weights.Count; ++i)
                {
                    weights[i] = double.IsNaN(initParameters.Weights[i]) ? double.NaN : DEFAULT_WEIGHTS[i];
                }
                var parameters = new LinearModelParams(weights);
                ScoredGroupPeaksSet decoyTransitionGroups = new ScoredGroupPeaksSet(decoys, decoys.Count);
                ScoredGroupPeaksSet targetTransitionGroups = new ScoredGroupPeaksSet(targets, targets.Count);
                targetTransitionGroups.ScorePeaks(parameters.Weights);

                if (includeSecondBest)
                {
                    ScoredGroupPeaksSet secondBestTransitionGroups;
                    targetTransitionGroups.SelectTargetsAndDecoys(out targetTransitionGroups, out secondBestTransitionGroups);
                    foreach (var secondBestGroup in secondBestTransitionGroups.ScoredGroupPeaksList)
                    {
                        decoyTransitionGroups.Add(secondBestGroup);
                    }
                }
                decoyTransitionGroups.ScorePeaks(parameters.Weights);
                im.UsesDecoys = decoys.Count > 0;
                im.UsesSecondBest = includeSecondBest;
                im.Parameters = parameters.RescaleParameters(decoyTransitionGroups.Mean, decoyTransitionGroups.Stdev);
                im.Parameters = im.Parameters.CalculatePercentContributions(im, targetDecoyGenerator);
            }));
        }
Exemplo n.º 3
0
 public abstract IPeakScoringModel Train(IList <IList <float[]> > targets, IList <IList <float[]> > decoys, TargetDecoyGenerator targetDecoyGenerator, LinearModelParams initParameters,
                                         IList <double> cutoffs, int?iterations = null, bool includeSecondBest = false, bool preTrain = true, IProgressMonitor progressMonitor = null, string documentPath = null);
Exemplo n.º 4
0
        public LinearModelParams CalculatePercentContributions(IPeakScoringModel model, TargetDecoyGenerator targetDecoyGenerator)
        {
            var percentContributions = new double[_percentContributions.Count];

            for (var i = 0; i < _percentContributions.Count; ++i)
            {
                percentContributions[i] = (targetDecoyGenerator != null ? targetDecoyGenerator.GetPercentContribution(model, i) : null) ?? double.NaN;
            }

            return(ChangeProp(ImClone(this), im =>
            {
                im.PercentContributions = percentContributions;
            }));
        }
Exemplo n.º 5
0
        /// <summary>
        /// Train the model by iterative calculating weights to separate target and decoy transition groups.
        /// </summary>
        /// <param name="targetsIn">Target transition groups.</param>
        /// <param name="decoysIn">Decoy transition groups.</param>
        /// <param name="targetDecoyGenerator">Target decoy generator used to calculate contribution percentages</param>
        /// <param name="initParameters">Initial model parameters (weights and bias)</param>
        /// <param name="iterations">Optional specific number of iterations to use in training</param>
        /// <param name="includeSecondBest">Include the second best peaks in the targets as decoys?</param>
        /// <param name="preTrain">Use a pre-trained model to bootstrap the learning.</param>
        /// <param name="progressMonitor">Used to report progress to the calling context</param>
        /// <param name="documentPath">The path to the current document for writing score distributions</param>
        /// <returns>Immutable model with new weights.</returns>
        public override IPeakScoringModel Train(IList <IList <float[]> > targetsIn,
                                                IList <IList <float[]> > decoysIn,
                                                TargetDecoyGenerator targetDecoyGenerator,
                                                LinearModelParams initParameters,
                                                int?iterations                   = null,
                                                bool includeSecondBest           = false,
                                                bool preTrain                    = true,
                                                IProgressMonitor progressMonitor = null,
                                                string documentPath              = null)
        {
            if (initParameters == null)
            {
                initParameters = new LinearModelParams(_peakFeatureCalculators.Count);
            }
            return(ChangeProp(ImClone(this), im =>
            {
                // This may take a long time between progress updates, but just measure progress by cycles through the training
                IProgressStatus status = new ProgressStatus(Resources.MProphetPeakScoringModel_Train_Training_peak_scoring_model);
                if (progressMonitor != null)
                {
                    progressMonitor.UpdateProgress(status);
                }

                var targets = targetsIn.Where(list => list.Count > 0);
                var decoys = decoysIn.Where(list => list.Count > 0);
                var targetTransitionGroups = new ScoredGroupPeaksSet(targets, targetsIn.Count);
                var decoyTransitionGroups = new ScoredGroupPeaksSet(decoys, decoysIn.Count);
                // Iteratively refine the weights through multiple iterations.
                var calcWeights = new double[initParameters.Weights.Count];
                Array.Copy(initParameters.Weights.ToArray(), calcWeights, initParameters.Weights.Count);
                double qValueCutoff = 0.01; // First iteration cut-off - if not pretraining, just start at 0.01
                // Start with scores calculated from the initial weights
                if (!preTrain)
                {
                    targetTransitionGroups.ScorePeaks(calcWeights);
                    decoyTransitionGroups.ScorePeaks(calcWeights);
                }
                // Bootstrap from the pre-trained legacy model
                else
                {
                    qValueCutoff = 0.15;
                    var preTrainedWeights = new double[initParameters.Weights.Count];
                    for (int i = 0; i < preTrainedWeights.Length; ++i)
                    {
                        if (double.IsNaN(initParameters.Weights[i]))
                        {
                            preTrainedWeights[i] = double.NaN;
                        }
                    }
                    int standardEnabledCount = GetEnabledCount(LegacyScoringModel.StandardFeatureCalculators, initParameters.Weights);
                    int analyteEnabledCount = GetEnabledCount(LegacyScoringModel.AnalyteFeatureCalculators, initParameters.Weights);
                    bool hasStandards = standardEnabledCount >= analyteEnabledCount;
                    var calculators = hasStandards ? LegacyScoringModel.StandardFeatureCalculators : LegacyScoringModel.AnalyteFeatureCalculators;
                    for (int i = 0; i < calculators.Length; ++i)
                    {
                        if (calculators[i].GetType() == typeof(MQuestRetentionTimePredictionCalc))
                        {
                            continue;
                        }
                        SetCalculatorValue(calculators[i].GetType(), LegacyScoringModel.DEFAULT_WEIGHTS[i], preTrainedWeights);
                    }
                    targetTransitionGroups.ScorePeaks(preTrainedWeights);
                    decoyTransitionGroups.ScorePeaks(preTrainedWeights);
                }

                double decoyMean = 0;
                double decoyStdev = 0;
                bool colinearWarning = false;
                int iterationCount = iterations ?? MAX_ITERATIONS;
                int truePeaksCount = 0;
                var lastWeights = new double[calcWeights.Length];
                for (int i = 0; i < iterationCount; i++)
                {
                    int percentComplete = 0;
                    double decoyMeanNew, decoyStdevNew;
                    bool colinearWarningNew = colinearWarning;
                    int truePeaksCountNew = im.CalculateWeights(documentPath,
                                                                targetTransitionGroups,
                                                                decoyTransitionGroups,
                                                                includeSecondBest,
                                                                i == 0, // Use non-parametric q values for first round, when normality assumption may not hold
                                                                qValueCutoff,
                                                                calcWeights,
                                                                out decoyMeanNew,
                                                                out decoyStdevNew,
                                                                ref colinearWarningNew);

                    if (progressMonitor != null)
                    {
                        if (progressMonitor.IsCanceled)
                        {
                            throw new OperationCanceledException();
                        }

                        // Calculate progress, but wait to make sure convergence has not occurred before setting it
                        string formatText = qValueCutoff > 0.02
                            ? Resources.MProphetPeakScoringModel_Train_Training_scoring_model__iteration__0__of__1__
                            : Resources.MProphetPeakScoringModel_Train_Training_scoring_model__iteration__0__of__1_____2______peaks_at__3_0_____FDR_;
                        percentComplete = (i + 1) * 100 / (iterationCount + 1);
                        status = status.ChangeMessage(string.Format(formatText, i + 1, iterationCount, truePeaksCountNew, qValueCutoff))
                                 .ChangePercentComplete(percentComplete);
                    }

                    if (qValueCutoff > 0.02)
                    {
                        // Tighten the q value cut-off for "truth" to 2% FDR
                        qValueCutoff = 0.02;
                        // And allow the true peaks count to go down in the next iteration
                        // Though it rarely will
                        truePeaksCountNew = 0;
                    }
                    // Decided in 2018 that equal should be counted as converging, since otherwise training can just get stuck,
                    // and go to full iteration count without progressing
                    else if (truePeaksCountNew <= truePeaksCount)
                    {
                        // The model has leveled off enough to begin losing discriminant value
                        if (qValueCutoff > 0.01)
                        {
                            // Tighten the q value cut-off for "truth" to 1% FDR
                            qValueCutoff = 0.01;
                            // And allow the true peaks count to go down in the next iteration
                            truePeaksCountNew = 0;
                        }
                        else
                        {
                            if (progressMonitor != null)
                            {
                                progressMonitor.UpdateProgress(status =
                                                                   status.ChangeMessage(string.Format(Resources.MProphetPeakScoringModel_Train_Scoring_model_converged__iteration__0_____1______peaks_at__2_0_____FDR_, i + 1, truePeaksCount, qValueCutoff))
                                                                   .ChangePercentComplete(Math.Max(95, percentComplete)));
                            }
                            calcWeights = lastWeights;
                            break;
                        }
                    }
                    truePeaksCount = truePeaksCountNew;
                    Array.Copy(calcWeights, lastWeights, calcWeights.Length);
                    decoyMean = decoyMeanNew;
                    decoyStdev = decoyStdevNew;
                    colinearWarning = colinearWarningNew;

                    if (progressMonitor != null)
                    {
                        progressMonitor.UpdateProgress(status);
                    }
                }
                if (progressMonitor != null)
                {
                    progressMonitor.UpdateProgress(status.ChangePercentComplete(100));
                }

                var parameters = new LinearModelParams(calcWeights);
                parameters = parameters.RescaleParameters(decoyMean, decoyStdev);
                im.Parameters = parameters;
                im.ColinearWarning = colinearWarning;
                im.UsesSecondBest = includeSecondBest;
                im.UsesDecoys = decoysIn.Count > 0;
                im.Parameters = parameters.CalculatePercentContributions(im, targetDecoyGenerator);
            }));
        }