Пример #1
0
        public override void ReadXml(XmlReader reader)
        {
            // Read tag attributes
            base.ReadXml(reader);
            // Earlier versions always used decoys only
            UsesDecoys     = reader.GetBoolAttribute(ATTR.uses_decoys, true);
            UsesSecondBest = reader.GetBoolAttribute(ATTR.uses_false_targets, false);
            double bias = reader.GetDoubleAttribute(ATTR.bias);

            bool isEmpty = reader.IsEmptyElement;

            // Consume tag
            reader.Read();

            if (!isEmpty)
            {
                // Read calculators
                var calculators = new List <FeatureCalculator>();
                reader.ReadElements(calculators);
                var weights = new double[calculators.Count];
                for (int i = 0; i < calculators.Count; i++)
                {
                    if (calculators[i].Type != PeakFeatureCalculators[i].GetType())
                    {
                        throw new InvalidDataException(Resources.LegacyScoringModel_ReadXml_Invalid_legacy_model_);
                    }
                    weights[i] = calculators[i].Weight;
                }
                Parameters = new LinearModelParams(weights, bias);

                reader.ReadEndElement();
            }

            DoValidate();
        }
Пример #2
0
        public override IPeakScoringModel Train(IList <IList <float[]> > targets, IList <IList <float[]> > decoys, LinearModelParams initParameters,
                                                bool includeSecondBest = false, bool preTrain = true, IProgressMonitor progressMonitor = null)
        {
            return(ChangeProp(ImClone(this), im =>
            {
                int nWeights = initParameters.Weights.Count;
                var weights = new double [nWeights];
                for (int i = 0; i < initParameters.Weights.Count; ++i)
                {
                    weights[i] = double.IsNaN(initParameters.Weights[i]) ? double.NaN : DEFAULT_WEIGHTS[i];
                }
                var parameters = new LinearModelParams(weights);
                ScoredGroupPeaksSet decoyTransitionGroups = new ScoredGroupPeaksSet(decoys);
                ScoredGroupPeaksSet targetTransitionGroups = new ScoredGroupPeaksSet(targets);
                targetTransitionGroups.ScorePeaks(parameters.Weights);

                if (includeSecondBest)
                {
                    ScoredGroupPeaksSet secondBestTransitionGroups;
                    targetTransitionGroups.SelectTargetsAndDecoys(out targetTransitionGroups, out secondBestTransitionGroups);
                    foreach (var secondBestGroup in secondBestTransitionGroups.ScoredGroupPeaksList)
                    {
                        decoyTransitionGroups.Add(secondBestGroup);
                    }
                }
                decoyTransitionGroups.ScorePeaks(parameters.Weights);
                im.UsesDecoys = decoys.Count > 0;
                im.UsesSecondBest = includeSecondBest;
                im.Parameters = parameters.RescaleParameters(decoyTransitionGroups.Mean, decoyTransitionGroups.Stdev);
            }));
        }
Пример #3
0
        private void GetActiveScoredValues(IPeakScoringModel peakScoringModel,
                                           LinearModelParams scoringParams,
                                           LinearModelParams calculatorParams,
                                           out List <double> targetScores,
                                           out List <double> activeDecoyScores)
        {
            targetScores = new List <double>(TargetCount);
            List <double> decoyScores      = peakScoringModel.UsesDecoys ? new List <double>(DecoyCount) : null;
            List <double> secondBestScores = peakScoringModel.UsesSecondBest ? new List <double>(TargetCount) : null;

            GetScores(scoringParams, calculatorParams, targetScores, decoyScores, secondBestScores);

            if (peakScoringModel.UsesDecoys && !peakScoringModel.UsesSecondBest)
            {
                activeDecoyScores = decoyScores;
            }
            else if (peakScoringModel.UsesSecondBest && !peakScoringModel.UsesDecoys)
            {
                activeDecoyScores = secondBestScores;
            }
            else
            {
                activeDecoyScores = new List <double>();
                if (decoyScores != null)
                {
                    activeDecoyScores.AddRange(decoyScores);
                }
                if (secondBestScores != null)
                {
                    activeDecoyScores.AddRange(secondBestScores);
                }
            }
        }
Пример #4
0
        public override void ReadXml(XmlReader reader)
        {
            // Read tag attributes
            base.ReadXml(reader);
            ColinearWarning = reader.GetBoolAttribute(ATTR.colinear_warning);
            // Earlier versions always used decoys only
            UsesDecoys     = reader.GetBoolAttribute(ATTR.uses_decoys, true);
            UsesSecondBest = reader.GetBoolAttribute(ATTR.uses_false_targets);
            double bias = reader.GetDoubleAttribute(ATTR.bias);

            // Consume tag
            reader.Read();

            // Read calculators
            var calculators = new List <FeatureCalculator>();

            reader.ReadElements(calculators);
            var peakFeatureCalculators = new List <IPeakFeatureCalculator>(calculators.Count);
            var weights = new double[calculators.Count];

            for (int i = 0; i < calculators.Count; i++)
            {
                weights[i] = calculators[i].Weight;
                peakFeatureCalculators.Add(PeakFeatureCalculator.GetCalculator(calculators[i].Type));
            }
            SetPeakFeatureCalculators(peakFeatureCalculators);
            Parameters = new LinearModelParams(weights, bias);

            reader.ReadEndElement();

            DoValidate();
        }
Пример #5
0
        /// <summary>
        /// Calculate scores for targets and decoys.  A transition is selected from each transition group using the
        /// scoring weights, and then its score is calculated using the calculator weights applied to each feature.
        /// </summary>
        /// <param name="scoringParams">Parameters to choose the best peak</param>
        /// <param name="calculatorParams">Parameters to calculate the score of the best peak.</param>
        /// <param name="targetScores">Output list of target scores.</param>
        /// <param name="decoyScores">Output list of decoy scores.</param>
        /// <param name="secondBestScores">Output list of false target scores.</param>
        /// <param name="invert">If true, select minimum rather than maximum scores</param>
        public void GetScores(LinearModelParams scoringParams, LinearModelParams calculatorParams, out List <double> targetScores, out List <double> decoyScores,
                              out List <double> secondBestScores, bool invert = false)
        {
            targetScores     = new List <double>();
            decoyScores      = new List <double>();
            secondBestScores = new List <double>();
            int invertSign = invert ? -1 : 1;

            foreach (var peakTransitionGroupFeatures in _peakTransitionGroupFeaturesList)
            {
                PeakGroupFeatures maxFeatures  = null;
                PeakGroupFeatures nextFeatures = null;
                double            maxScore     = Double.MinValue;
                double            nextScore    = Double.MinValue;

                // No peaks in this transition group record
                if (peakTransitionGroupFeatures.PeakGroupFeatures.Count == 0)
                {
                    continue;
                }

                // Find the highest and second highest scores among the transitions in this group.
                foreach (var peakGroupFeatures in peakTransitionGroupFeatures.PeakGroupFeatures)
                {
                    double score = invertSign * GetScore(scoringParams, peakGroupFeatures);
                    if (nextScore < score)
                    {
                        if (maxScore < score)
                        {
                            nextScore    = maxScore;
                            maxScore     = score;
                            nextFeatures = maxFeatures;
                            maxFeatures  = peakGroupFeatures;
                        }
                        else
                        {
                            nextScore    = score;
                            nextFeatures = peakGroupFeatures;
                        }
                    }
                }

                double currentScore = maxFeatures == null ? Double.NaN : GetScore(calculatorParams, maxFeatures);
                if (peakTransitionGroupFeatures.Id.NodePep.IsDecoy)
                {
                    decoyScores.Add(currentScore);
                }
                else
                {
                    targetScores.Add(currentScore);
                    // Skip if only one peak
                    if (peakTransitionGroupFeatures.PeakGroupFeatures.Count == 1)
                    {
                        continue;
                    }
                    double secondBestScore = nextFeatures == null ? Double.NaN : GetScore(calculatorParams, nextFeatures);
                    secondBestScores.Add(secondBestScore);
                }
            }
        }
Пример #6
0
        public LegacyScoringModel(string name, LinearModelParams parameters = null, bool usesDecoys = true, bool usesSecondBest = false) : base(name)
        {
            SetPeakFeatureCalculators();

            Parameters     = parameters;
            UsesDecoys     = usesDecoys;
            UsesSecondBest = usesSecondBest;
        }
Пример #7
0
        /// <summary>
        /// Recalculate the scores of each peak by applying the given feature weighting factors.
        /// </summary>
        /// <param name="weights">Array of weight factors applied to each feature.</param>
        /// <returns>Mean peak score.</returns>
        public void ScorePeaks(IList <double> weights)
        {
            foreach (var peak in _scoredGroupPeaksList.SelectMany(scoredGroupPeaks => scoredGroupPeaks.ScoredPeaks))
            {
                peak.Score = LinearModelParams.Score(peak.Features, weights, 0);
            }

            // Calculate mean and stdev for top-scoring peaks in each transition group.
            var scores = GetMaxScores();
            var stats  = new Statistics(scores);

            Mean  = stats.Mean();
            Stdev = stats.StdDev();
        }
Пример #8
0
 protected bool Equals(LinearModelParams other)
 {
     if (Weights.Count != other.Weights.Count)
     {
         return(false);
     }
     for (int i = 0; i < Weights.Count; ++i)
     {
         if (Weights[i] != other.Weights[i])
         {
             return(false);
         }
     }
     return(Bias == other.Bias);
 }
Пример #9
0
 public MProphetPeakScoringModel(
     string name,
     LinearModelParams parameters,
     IList <IPeakFeatureCalculator> peakFeatureCalculators = null,
     bool usesDecoys      = false,
     bool usesSecondBest  = false,
     bool colinearWarning = false)
     : base(name)
 {
     SetPeakFeatureCalculators(peakFeatureCalculators ?? DEFAULT_CALCULATORS);
     Parameters      = parameters;
     UsesDecoys      = usesDecoys;
     UsesSecondBest  = usesSecondBest;
     ColinearWarning = colinearWarning;
     Lambda          = DEFAULT_R_LAMBDA; // Default from R
     DoValidate();
 }
Пример #10
0
 public static double Score(IList <float> features, LinearModelParams parameters)
 {
     return(parameters.Score(features));
 }
Пример #11
0
        /// <summary>
        /// Train the model by iterative calculating weights to separate target and decoy transition groups.
        /// </summary>
        /// <param name="targets">Target transition groups.</param>
        /// <param name="decoys">Decoy transition groups.</param>
        /// <param name="initParameters">Initial model parameters (weights and bias)</param>
        /// <param name="includeSecondBest"> Include the second best peaks in the targets as decoys?</param>
        /// <param name="preTrain">Use a pre-trained model to bootstrap the learning.</param>
        /// <param name="progressMonitor"></param>
        /// <returns>Immutable model with new weights.</returns>
        public override IPeakScoringModel Train(IList <IList <float[]> > targets, IList <IList <float[]> > decoys, LinearModelParams initParameters,
                                                bool includeSecondBest = false, bool preTrain = true, IProgressMonitor progressMonitor = null)
        {
            if (initParameters == null)
            {
                initParameters = new LinearModelParams(_peakFeatureCalculators.Count);
            }
            return(ChangeProp(ImClone(this), im =>
            {
                targets = targets.Where(list => list.Count > 0).ToList();
                decoys = decoys.Where(list => list.Count > 0).ToList();
                var targetTransitionGroups = new ScoredGroupPeaksSet(targets);
                var decoyTransitionGroups = new ScoredGroupPeaksSet(decoys);
                // Bootstrap from the pre-trained legacy model
                if (preTrain)
                {
                    var preTrainedWeights = new double[initParameters.Weights.Count];
                    for (int i = 0; i < preTrainedWeights.Length; ++i)
                    {
                        if (double.IsNaN(initParameters.Weights[i]))
                        {
                            preTrainedWeights[i] = double.NaN;
                        }
                    }
                    int standardEnabledCount = GetEnabledCount(LegacyScoringModel.StandardFeatureCalculators, initParameters.Weights);
                    int analyteEnabledCount = GetEnabledCount(LegacyScoringModel.AnalyteFeatureCalculators, initParameters.Weights);
                    bool hasStandards = standardEnabledCount >= analyteEnabledCount;
                    var calculators = hasStandards ? LegacyScoringModel.StandardFeatureCalculators : LegacyScoringModel.AnalyteFeatureCalculators;
                    for (int i = 0; i < calculators.Length; ++i)
                    {
                        if (calculators[i].GetType() == typeof(MQuestRetentionTimePredictionCalc))
                        {
                            continue;
                        }
                        SetCalculatorValue(calculators[i].GetType(), LegacyScoringModel.DEFAULT_WEIGHTS[i], preTrainedWeights);
                    }
                    targetTransitionGroups.ScorePeaks(preTrainedWeights);
                    decoyTransitionGroups.ScorePeaks(preTrainedWeights);
                }

                // Iteratively refine the weights through multiple iterations.
                var calcWeights = new double[initParameters.Weights.Count];
                Array.Copy(initParameters.Weights.ToArray(), calcWeights, initParameters.Weights.Count);
                double decoyMean = 0;
                double decoyStdev = 0;
                bool colinearWarning = false;
                // This may take a long time between progress updates, but just measure progress by cycles through the training
                IProgressStatus status = new ProgressStatus(Resources.MProphetPeakScoringModel_Train_Training_peak_scoring_model);
                if (progressMonitor != null)
                {
                    progressMonitor.UpdateProgress(status);
                }
                for (int iteration = 0; iteration < MAX_ITERATIONS; iteration++)
                {
                    if (progressMonitor != null)
                    {
                        if (progressMonitor.IsCanceled)
                        {
                            throw new OperationCanceledException();
                        }

                        progressMonitor.UpdateProgress(status =
                                                           status.ChangeMessage(string.Format(Resources.MProphetPeakScoringModel_Train_Training_peak_scoring_model__iteration__0__of__1__, iteration + 1, MAX_ITERATIONS))
                                                           .ChangePercentComplete((iteration + 1) * 100 / (MAX_ITERATIONS + 1)));
                    }

                    im.CalculateWeights(iteration, targetTransitionGroups, decoyTransitionGroups,
                                        includeSecondBest, calcWeights, out decoyMean, out decoyStdev, ref colinearWarning);

                    GC.Collect();       // Each loop generates a number of large objects. GC helps to keep private bytes under control
                }
                if (progressMonitor != null)
                {
                    progressMonitor.UpdateProgress(status.ChangePercentComplete(100));
                }

                var parameters = new LinearModelParams(calcWeights);
                parameters = parameters.RescaleParameters(decoyMean, decoyStdev);
                im.Parameters = parameters;
                im.ColinearWarning = colinearWarning;
                im.UsesSecondBest = includeSecondBest;
                im.UsesDecoys = decoys.Count > 0;
            }));
        }
Пример #12
0
 public abstract IPeakScoringModel Train(IList <IList <float[]> > targets, IList <IList <float[]> > decoys, TargetDecoyGenerator targetDecoyGenerator, LinearModelParams initParameters,
                                         IList <double> cutoffs, int?iterations = null, bool includeSecondBest = false, bool preTrain = true, IProgressMonitor progressMonitor = null, string documentPath = null);
Пример #13
0
 public ScoredPeak CalcScore(IList <double> weights)
 {
     return(new ScoredPeak(Features, LinearModelParams.Score(Features, weights, 0)));
 }
Пример #14
0
 /// <summary>
 /// Calculate the score of a set of features given an array of weighting coefficients.
 /// </summary>
 private static double GetScore(IList <double> weights, PeakGroupFeatures peakGroupFeatures, double bias)
 {
     return(LinearModelParams.Score(peakGroupFeatures.Features, weights, bias));
 }
Пример #15
0
 private static double GetScore(LinearModelParams parameters, PeakGroupFeatures peakGroupFeatures)
 {
     return(GetScore(parameters.Weights, peakGroupFeatures, parameters.Bias));
 }
Пример #16
0
        /// <summary>
        /// Train the model by iterative calculating weights to separate target and decoy transition groups.
        /// </summary>
        /// <param name="targetsIn">Target transition groups.</param>
        /// <param name="decoysIn">Decoy transition groups.</param>
        /// <param name="initParameters">Initial model parameters (weights and bias)</param>
        /// <param name="iterations">Optional specific number of iterations to use in training</param>
        /// <param name="includeSecondBest">Include the second best peaks in the targets as decoys?</param>
        /// <param name="preTrain">Use a pre-trained model to bootstrap the learning.</param>
        /// <param name="progressMonitor">Used to report progress to the calling context</param>
        /// <param name="documentPath">The path to the current document for writing score distributions</param>
        /// <returns>Immutable model with new weights.</returns>
        public override IPeakScoringModel Train(IList <IList <float[]> > targetsIn,
                                                IList <IList <float[]> > decoysIn,
                                                LinearModelParams initParameters,
                                                int?iterations                   = null,
                                                bool includeSecondBest           = false,
                                                bool preTrain                    = true,
                                                IProgressMonitor progressMonitor = null,
                                                string documentPath              = null)
        {
            if (initParameters == null)
            {
                initParameters = new LinearModelParams(_peakFeatureCalculators.Count);
            }
            return(ChangeProp(ImClone(this), im =>
            {
                // This may take a long time between progress updates, but just measure progress by cycles through the training
                IProgressStatus status = new ProgressStatus(Resources.MProphetPeakScoringModel_Train_Training_peak_scoring_model);
                if (progressMonitor != null)
                {
                    progressMonitor.UpdateProgress(status);
                }

                var targets = targetsIn.Where(list => list.Count > 0);
                var decoys = decoysIn.Where(list => list.Count > 0);
                var targetTransitionGroups = new ScoredGroupPeaksSet(targets, targetsIn.Count);
                var decoyTransitionGroups = new ScoredGroupPeaksSet(decoys, decoysIn.Count);
                // Iteratively refine the weights through multiple iterations.
                var calcWeights = new double[initParameters.Weights.Count];
                Array.Copy(initParameters.Weights.ToArray(), calcWeights, initParameters.Weights.Count);
                double qValueCutoff = 0.01; // First iteration cut-off - if not pretraining, just start at 0.01
                // Start with scores calculated from the initial weights
                if (!preTrain)
                {
                    targetTransitionGroups.ScorePeaks(calcWeights);
                    decoyTransitionGroups.ScorePeaks(calcWeights);
                }
                // Bootstrap from the pre-trained legacy model
                else
                {
                    qValueCutoff = 0.15;
                    var preTrainedWeights = new double[initParameters.Weights.Count];
                    for (int i = 0; i < preTrainedWeights.Length; ++i)
                    {
                        if (double.IsNaN(initParameters.Weights[i]))
                        {
                            preTrainedWeights[i] = double.NaN;
                        }
                    }
                    int standardEnabledCount = GetEnabledCount(LegacyScoringModel.StandardFeatureCalculators, initParameters.Weights);
                    int analyteEnabledCount = GetEnabledCount(LegacyScoringModel.AnalyteFeatureCalculators, initParameters.Weights);
                    bool hasStandards = standardEnabledCount >= analyteEnabledCount;
                    var calculators = hasStandards ? LegacyScoringModel.StandardFeatureCalculators : LegacyScoringModel.AnalyteFeatureCalculators;
                    for (int i = 0; i < calculators.Length; ++i)
                    {
                        if (calculators[i].GetType() == typeof(MQuestRetentionTimePredictionCalc))
                        {
                            continue;
                        }
                        SetCalculatorValue(calculators[i].GetType(), LegacyScoringModel.DEFAULT_WEIGHTS[i], preTrainedWeights);
                    }
                    targetTransitionGroups.ScorePeaks(preTrainedWeights);
                    decoyTransitionGroups.ScorePeaks(preTrainedWeights);
                }

                double decoyMean = 0;
                double decoyStdev = 0;
                bool colinearWarning = false;
                int iterationCount = iterations ?? MAX_ITERATIONS;
                int truePeaksCount = 0;
                var lastWeights = new double[calcWeights.Length];
                for (int i = 0; i < iterationCount; i++)
                {
                    int percentComplete = 0;
                    double decoyMeanNew, decoyStdevNew;
                    bool colinearWarningNew = colinearWarning;
                    int truePeaksCountNew = im.CalculateWeights(documentPath,
                                                                targetTransitionGroups,
                                                                decoyTransitionGroups,
                                                                includeSecondBest,
                                                                i == 0, // Use non-parametric q values for first round, when normality assumption may not hold
                                                                qValueCutoff,
                                                                calcWeights,
                                                                out decoyMeanNew,
                                                                out decoyStdevNew,
                                                                ref colinearWarningNew);

                    if (progressMonitor != null)
                    {
                        if (progressMonitor.IsCanceled)
                        {
                            throw new OperationCanceledException();
                        }

                        // Calculate progress, but wait to make sure convergence has not occurred before setting it
                        string formatText = qValueCutoff > 0.02
                            ? Resources.MProphetPeakScoringModel_Train_Training_scoring_model__iteration__0__of__1__
                            : Resources.MProphetPeakScoringModel_Train_Training_scoring_model__iteration__0__of__1_____2______peaks_at__3_0_____FDR_;
                        percentComplete = (i + 1) * 100 / (iterationCount + 1);
                        status = status.ChangeMessage(string.Format(formatText, i + 1, iterationCount, truePeaksCountNew, qValueCutoff))
                                 .ChangePercentComplete(percentComplete);
                    }

                    if (qValueCutoff > 0.02)
                    {
                        // Tighten the q value cut-off for "truth" to 2% FDR
                        qValueCutoff = 0.02;
                        // And allow the true peaks count to go down in the next iteration
                        // Though it rarely will
                        truePeaksCountNew = 0;
                    }
                    else if (truePeaksCountNew < truePeaksCount)
                    {
                        // The model has leveled off enough to begin losing discriminant value
                        if (qValueCutoff > 0.01)
                        {
                            // Tighten the q value cut-off for "truth" to 1% FDR
                            qValueCutoff = 0.01;
                            // And allow the true peaks count to go down in the next iteration
                            truePeaksCountNew = 0;
                        }
                        else
                        {
                            if (progressMonitor != null)
                            {
                                progressMonitor.UpdateProgress(status =
                                                                   status.ChangeMessage(string.Format(Resources.MProphetPeakScoringModel_Train_Scoring_model_converged__iteration__0_____1______peaks_at__2_0_____FDR_, i + 1, truePeaksCount, qValueCutoff))
                                                                   .ChangePercentComplete(Math.Max(95, percentComplete)));
                            }
                            calcWeights = lastWeights;
                            break;
                        }
                    }
                    truePeaksCount = truePeaksCountNew;
                    Array.Copy(calcWeights, lastWeights, calcWeights.Length);
                    decoyMean = decoyMeanNew;
                    decoyStdev = decoyStdevNew;
                    colinearWarning = colinearWarningNew;

                    if (progressMonitor != null)
                    {
                        progressMonitor.UpdateProgress(status);
                    }
                }
                if (progressMonitor != null)
                {
                    progressMonitor.UpdateProgress(status.ChangePercentComplete(100));
                }

                var parameters = new LinearModelParams(calcWeights);
                parameters = parameters.RescaleParameters(decoyMean, decoyStdev);
                im.Parameters = parameters;
                im.ColinearWarning = colinearWarning;
                im.UsesSecondBest = includeSecondBest;
                im.UsesDecoys = decoysIn.Count > 0;
            }));
        }
Пример #17
0
 public abstract IPeakScoringModel Train(IList <IList <float[]> > targets, IList <IList <float[]> > decoys, LinearModelParams initParameters,
                                         bool includeSecondBest = false, bool preTrain = true, IProgressMonitor progressMonitor = null);