Beispiel #1
0
        public void SelectTargetsAndDecoys(out ScoredGroupPeaksSet targetScoredGroupPeaksSet, out ScoredGroupPeaksSet decoyScoredGroupPeaksSet)
        {
            targetScoredGroupPeaksSet = new ScoredGroupPeaksSet();
            decoyScoredGroupPeaksSet  = new ScoredGroupPeaksSet();
            foreach (var scoredGroupPeaks in _scoredGroupPeaksList)
            {
                var secondHighestPeak = scoredGroupPeaks.SecondHighestPeak;
                if (!secondHighestPeak.IsEmpty)
                {
                    var decoyScoredGroupPeaks = new ScoredGroupPeaks();
                    decoyScoredGroupPeaks.Add(secondHighestPeak);
                    decoyScoredGroupPeaksSet.Add(decoyScoredGroupPeaks);
                }

                // Copy all other peaks to target.
                var targetScoredGroupPeaks = new ScoredGroupPeaks();
                foreach (var peak in scoredGroupPeaks.ScoredPeaks)
                {
                    if (!ReferenceEquals(peak.Features, secondHighestPeak.Features))
                    {
                        targetScoredGroupPeaks.Add(peak);
                    }
                }
                targetScoredGroupPeaksSet.Add(targetScoredGroupPeaks);
            }
        }
Beispiel #2
0
        public override IPeakScoringModel Train(IList <IList <float[]> > targets, IList <IList <float[]> > decoys, LinearModelParams initParameters,
                                                bool includeSecondBest = false, bool preTrain = true, IProgressMonitor progressMonitor = null)
        {
            return(ChangeProp(ImClone(this), im =>
            {
                int nWeights = initParameters.Weights.Count;
                var weights = new double [nWeights];
                for (int i = 0; i < initParameters.Weights.Count; ++i)
                {
                    weights[i] = double.IsNaN(initParameters.Weights[i]) ? double.NaN : DEFAULT_WEIGHTS[i];
                }
                var parameters = new LinearModelParams(weights);
                ScoredGroupPeaksSet decoyTransitionGroups = new ScoredGroupPeaksSet(decoys);
                ScoredGroupPeaksSet targetTransitionGroups = new ScoredGroupPeaksSet(targets);
                targetTransitionGroups.ScorePeaks(parameters.Weights);

                if (includeSecondBest)
                {
                    ScoredGroupPeaksSet secondBestTransitionGroups;
                    targetTransitionGroups.SelectTargetsAndDecoys(out targetTransitionGroups, out secondBestTransitionGroups);
                    foreach (var secondBestGroup in secondBestTransitionGroups.ScoredGroupPeaksList)
                    {
                        decoyTransitionGroups.Add(secondBestGroup);
                    }
                }
                decoyTransitionGroups.ScorePeaks(parameters.Weights);
                im.UsesDecoys = decoys.Count > 0;
                im.UsesSecondBest = includeSecondBest;
                im.Parameters = parameters.RescaleParameters(decoyTransitionGroups.Mean, decoyTransitionGroups.Stdev);
            }));
        }
Beispiel #3
0
        public void SelectTargetsAndDecoys(out ScoredGroupPeaksSet targetScoredGroupPeaksSet, out ScoredGroupPeaksSet decoyScoredGroupPeaksSet)
        {
            targetScoredGroupPeaksSet = new ScoredGroupPeaksSet();
            decoyScoredGroupPeaksSet  = new ScoredGroupPeaksSet();
            foreach (var scoredGroupPeaks in _scoredGroupPeaksList)
            {
                var secondHighestPeak = scoredGroupPeaks.SecondHighestPeak;
                if (secondHighestPeak != null)
                {
                    var decoyScoredGroupPeaks = new ScoredGroupPeaks();
                    decoyScoredGroupPeaks.Add(secondHighestPeak);
                    decoyScoredGroupPeaksSet.Add(decoyScoredGroupPeaks);
                }

                // Copy all other peaks to target.
                var targetScoredGroupPeaks = new ScoredGroupPeaks();
                foreach (var peak in scoredGroupPeaks.ScoredPeaks)
                {
                    if (peak != secondHighestPeak)
                    {
                        targetScoredGroupPeaks.Add(peak);
                    }
                }
                targetScoredGroupPeaksSet.Add(targetScoredGroupPeaks);
            }
        }
Beispiel #4
0
        /// <summary>
        /// Return a list of peaks, where each peak has the maximum score in its transition group,
        /// and its q-value is less than the cutoff value.
        /// </summary>
        /// <param name="qValueCutoff">Cutoff q-value.</param>
        /// <param name="lambda">Optional p-value cutoff for calculating Pi-zero.</param>
        /// <param name="decoyScoredGroupPeaks">Decoy transition groups.</param>
        /// <returns>List of peaks the meet the criteria.</returns>
        public List <ScoredPeak> SelectTruePeaks(double qValueCutoff, double?lambda, ScoredGroupPeaksSet decoyScoredGroupPeaks)
        {
            // Get max peak score for each transition group.
            var targetScores = GetMaxScores();
            var decoyScores  = decoyScoredGroupPeaks.GetMaxScores();

            // Calculate statistics for each set of scores.
            var statDecoys = new Statistics(decoyScores);
            var statTarget = new Statistics(targetScores);

            // Calculate q values from decoy set.
            var pvalues = statDecoys.PvaluesNorm(statTarget);
            var qvalues = new Statistics(pvalues).Qvalues(lambda);

            // Select max peak with q value less than the cutoff from each target group.
            var truePeaks = new List <ScoredPeak>(_scoredGroupPeaksList.Count);

            for (int i = 0; i < _scoredGroupPeaksList.Count; i++)
            {
                if (qvalues[i] <= qValueCutoff)
                {
                    truePeaks.Add(_scoredGroupPeaksList[i].MaxPeak);
                }
            }
            return(truePeaks);
        }
Beispiel #5
0
        private void WriteDistributionInfo(string documentPath, ScoredGroupPeaksSet targetTransitionGroups, ScoredGroupPeaksSet decoyTransitionGroups)
        {
            string documentDir = Path.GetDirectoryName(documentPath);

            if (documentDir != null)
            {
                string distBase = Helpers.GetUniqueName(Path.Combine(documentDir, "dist1"),            // Not L10N
                                                        value => !File.Exists(value + "Targets.txt")); // Not L10N
                targetTransitionGroups.WriteBest(distBase + "Targets.txt");                            // Not L10N
                decoyTransitionGroups.WriteBest(distBase + "Decoys.txt");                              // Not L10N
            }
        }
Beispiel #6
0
        private double[] CalcPValues(ScoredGroupPeaksSet decoyScoredGroupPeaks, bool nonParametric = false)
        {
            // Get max peak score for each transition group.
            var targetScores = GetMaxScores();
            var decoyScores  = decoyScoredGroupPeaks.GetMaxScores();

            // Calculate statistics for each set of scores.
            var statDecoys = new Statistics(decoyScores);
            var statTarget = new Statistics(targetScores);

            return(nonParametric
                ? statDecoys.PvaluesNull(statTarget)
                : statDecoys.PvaluesNorm(statTarget));
        }
Beispiel #7
0
        /// <summary>
        /// Return a list of peaks, where each peak has the maximum score in its transition group,
        /// and its q-value is less than the cutoff value.
        /// </summary>
        /// <param name="decoyScoredGroupPeaks">Decoy transition groups.</param>
        /// <param name="qValueCutoff">Cutoff q-value.</param>
        /// <param name="lambda">Optional p-value cutoff for calculating Pi-zero.</param>
        /// <param name="nonParametric">Non-parametric p value calculation if true and based on normal distribution if false</param>
        /// <returns>List of peaks the meet the criteria.</returns>
        public List <ScoredPeak> SelectTruePeaks(ScoredGroupPeaksSet decoyScoredGroupPeaks, double qValueCutoff, double?lambda, bool nonParametric)
        {
            var pvalues = CalcPValues(decoyScoredGroupPeaks, nonParametric);
            var qvalues = new Statistics(pvalues).Qvalues(lambda, MProphetPeakScoringModel.PI_ZERO_MIN);

            // Select max peak with q value less than the cutoff from each target group.
            var truePeaks = new List <ScoredPeak>(_scoredGroupPeaksList.Count / 5);

            for (int i = 0; i < _scoredGroupPeaksList.Count; i++)
            {
                if (qvalues[i] <= qValueCutoff)
                {
                    truePeaks.Add(_scoredGroupPeaksList[i].MaxPeak);
                }
            }
            return(truePeaks);
        }
Beispiel #8
0
        private const int MAX_TRAINING_MEMORY = 512 * 1024 * 1024; // 512 MB

        /// <summary>
        /// Calculate new weight factors for one iteration of the refinement process.  This is the heart
        /// of the MProphet algorithm.
        /// </summary>
        /// <param name="iteration">Iteration number (special processing happens for iteration 0).</param>
        /// <param name="targetTransitionGroups">Target transition groups.</param>
        /// <param name="decoyTransitionGroups">Decoy transition groups.</param>
        /// <param name="includeSecondBest">Include the second best peaks in the targets as additional decoys?</param>
        /// <param name="weights">Array of weights per calculator.</param>
        /// <param name="decoyMean">Output mean of decoy transition groups.</param>
        /// <param name="decoyStdev">Output standard deviation of decoy transition groups.</param>
        /// <param name="colinearWarning">Set to true if colinearity was detected.</param>
        private void CalculateWeights(
            int iteration,
            ScoredGroupPeaksSet targetTransitionGroups,
            ScoredGroupPeaksSet decoyTransitionGroups,
            bool includeSecondBest,
            double[] weights,
            out double decoyMean,
            out double decoyStdev,
            ref bool colinearWarning)
        {
            if (includeSecondBest)
            {
                ScoredGroupPeaksSet secondBestTransitionGroups;
                targetTransitionGroups.SelectTargetsAndDecoys(out targetTransitionGroups, out secondBestTransitionGroups);
                foreach (var secondBestGroup in secondBestTransitionGroups.ScoredGroupPeaksList)
                {
                    decoyTransitionGroups.Add(secondBestGroup);
                }
            }

            // Select true target peaks using a q-value cutoff filter.
            var qValueCutoff = (iteration == 0 ? 0.15 : 0.02);
            var truePeaks    = targetTransitionGroups.SelectTruePeaks(qValueCutoff, Lambda, decoyTransitionGroups);
            var decoyPeaks   = decoyTransitionGroups.SelectMaxPeaks();

            // Omit first feature during first iteration, since it is used as the initial score value.
            weights[0] = (iteration == 0) ? double.NaN : 0;
            var featureCount = weights.Count(w => !double.IsNaN(w));

            // Copy target and decoy peaks to training data array.
            int totalTrainingPeaks = truePeaks.Count + decoyTransitionGroups.Count;
            // Calculate the maximum number of training peaks (8 bytes per score - double, featurCount + 1 scores per peak)
            int maxTrainingPeaks = MAX_TRAINING_MEMORY / 8 / (featureCount + 1);

            var trainData = new double[Math.Min(totalTrainingPeaks, maxTrainingPeaks), featureCount + 1];

            if (totalTrainingPeaks < maxTrainingPeaks)
            {
                for (int i = 0; i < truePeaks.Count; i++)
                {
                    CopyToTrainData(truePeaks[i].Features, trainData, weights, i, 1);
                }
                for (int i = 0; i < decoyPeaks.Count; i++)
                {
                    CopyToTrainData(decoyPeaks[i].Features, trainData, weights, i + truePeaks.Count, 0);
                }
            }
            else
            {
                double proportionTrue = truePeaks.Count * 1.0 / totalTrainingPeaks;
                int    truePeakCount  = (int)Math.Round(maxTrainingPeaks * proportionTrue);
                int    i = 0;
                foreach (var peak in truePeaks.RandomOrder())
                {
                    if (i < truePeakCount)
                    {
                        CopyToTrainData(peak.Features, trainData, weights, i, 1);
                    }
                    else
                    {
                        break;
                    }
                    i++;
                }
                int decoyPeakCount = maxTrainingPeaks - truePeakCount;
                i = 0;
                foreach (var peak in decoyPeaks.RandomOrder())
                {
                    if (i < decoyPeakCount)
                    {
                        CopyToTrainData(peak.Features, trainData, weights, i + truePeakCount, 0);
                    }
                    else
                    {
                        break;
                    }
                    i++;
                }
            }

            // Use Linear Discriminant Analysis to find weights that separate true and decoy peak scores.
            int info;

            double[] weightsFromLda;
            alglib.fisherlda(
                trainData,
                trainData.GetLength(0),
                trainData.GetLength(1) - 1,
                2,
                out info,
                out weightsFromLda);

            // Check for colinearity.
            if (info == 2)
            {
                colinearWarning = true;
            }

            // Unpack weights array.
            for (int i = 0, j = 0; i < weights.Length; i++)
            {
                if (!double.IsNaN(weights[i]))
                {
                    weights[i] = weightsFromLda[j++];
                }
            }

            // Recalculate all peak scores.
            targetTransitionGroups.ScorePeaks(weights);
            decoyTransitionGroups.ScorePeaks(weights);

            // If the mean target score is less than the mean decoy score, then the
            // weights came out negative, and all the weights and scores must be negated to
            // restore the proper ordering.
            if (targetTransitionGroups.Mean < decoyTransitionGroups.Mean)
            {
                for (int i = 0; i < weights.Length; i++)
                {
                    weights[i] *= -1;
                }
                targetTransitionGroups.ScorePeaks(weights);
                decoyTransitionGroups.ScorePeaks(weights);
            }

            decoyMean  = decoyTransitionGroups.Mean;
            decoyStdev = decoyTransitionGroups.Stdev;
        }
Beispiel #9
0
        /// <summary>
        /// Train the model by iterative calculating weights to separate target and decoy transition groups.
        /// </summary>
        /// <param name="targets">Target transition groups.</param>
        /// <param name="decoys">Decoy transition groups.</param>
        /// <param name="initParameters">Initial model parameters (weights and bias)</param>
        /// <param name="includeSecondBest"> Include the second best peaks in the targets as decoys?</param>
        /// <param name="preTrain">Use a pre-trained model to bootstrap the learning.</param>
        /// <param name="progressMonitor"></param>
        /// <returns>Immutable model with new weights.</returns>
        public override IPeakScoringModel Train(IList <IList <float[]> > targets, IList <IList <float[]> > decoys, LinearModelParams initParameters,
                                                bool includeSecondBest = false, bool preTrain = true, IProgressMonitor progressMonitor = null)
        {
            if (initParameters == null)
            {
                initParameters = new LinearModelParams(_peakFeatureCalculators.Count);
            }
            return(ChangeProp(ImClone(this), im =>
            {
                targets = targets.Where(list => list.Count > 0).ToList();
                decoys = decoys.Where(list => list.Count > 0).ToList();
                var targetTransitionGroups = new ScoredGroupPeaksSet(targets);
                var decoyTransitionGroups = new ScoredGroupPeaksSet(decoys);
                // Bootstrap from the pre-trained legacy model
                if (preTrain)
                {
                    var preTrainedWeights = new double[initParameters.Weights.Count];
                    for (int i = 0; i < preTrainedWeights.Length; ++i)
                    {
                        if (double.IsNaN(initParameters.Weights[i]))
                        {
                            preTrainedWeights[i] = double.NaN;
                        }
                    }
                    int standardEnabledCount = GetEnabledCount(LegacyScoringModel.StandardFeatureCalculators, initParameters.Weights);
                    int analyteEnabledCount = GetEnabledCount(LegacyScoringModel.AnalyteFeatureCalculators, initParameters.Weights);
                    bool hasStandards = standardEnabledCount >= analyteEnabledCount;
                    var calculators = hasStandards ? LegacyScoringModel.StandardFeatureCalculators : LegacyScoringModel.AnalyteFeatureCalculators;
                    for (int i = 0; i < calculators.Length; ++i)
                    {
                        if (calculators[i].GetType() == typeof(MQuestRetentionTimePredictionCalc))
                        {
                            continue;
                        }
                        SetCalculatorValue(calculators[i].GetType(), LegacyScoringModel.DEFAULT_WEIGHTS[i], preTrainedWeights);
                    }
                    targetTransitionGroups.ScorePeaks(preTrainedWeights);
                    decoyTransitionGroups.ScorePeaks(preTrainedWeights);
                }

                // Iteratively refine the weights through multiple iterations.
                var calcWeights = new double[initParameters.Weights.Count];
                Array.Copy(initParameters.Weights.ToArray(), calcWeights, initParameters.Weights.Count);
                double decoyMean = 0;
                double decoyStdev = 0;
                bool colinearWarning = false;
                // This may take a long time between progress updates, but just measure progress by cycles through the training
                IProgressStatus status = new ProgressStatus(Resources.MProphetPeakScoringModel_Train_Training_peak_scoring_model);
                if (progressMonitor != null)
                {
                    progressMonitor.UpdateProgress(status);
                }
                for (int iteration = 0; iteration < MAX_ITERATIONS; iteration++)
                {
                    if (progressMonitor != null)
                    {
                        if (progressMonitor.IsCanceled)
                        {
                            throw new OperationCanceledException();
                        }

                        progressMonitor.UpdateProgress(status =
                                                           status.ChangeMessage(string.Format(Resources.MProphetPeakScoringModel_Train_Training_peak_scoring_model__iteration__0__of__1__, iteration + 1, MAX_ITERATIONS))
                                                           .ChangePercentComplete((iteration + 1) * 100 / (MAX_ITERATIONS + 1)));
                    }

                    im.CalculateWeights(iteration, targetTransitionGroups, decoyTransitionGroups,
                                        includeSecondBest, calcWeights, out decoyMean, out decoyStdev, ref colinearWarning);

                    GC.Collect();       // Each loop generates a number of large objects. GC helps to keep private bytes under control
                }
                if (progressMonitor != null)
                {
                    progressMonitor.UpdateProgress(status.ChangePercentComplete(100));
                }

                var parameters = new LinearModelParams(calcWeights);
                parameters = parameters.RescaleParameters(decoyMean, decoyStdev);
                im.Parameters = parameters;
                im.ColinearWarning = colinearWarning;
                im.UsesSecondBest = includeSecondBest;
                im.UsesDecoys = decoys.Count > 0;
            }));
        }
Beispiel #10
0
        private const int MAX_TRAINING_MEMORY = 512 * 1024 * 1024; // 512 MB

        /// <summary>
        /// Calculate new weight factors for one iteration of the refinement process.  This is the heart
        /// of the MProphet algorithm.
        /// </summary>
        /// <param name="documentPath">The path to the current document for writing score distributions</param>
        /// <param name="targetTransitionGroups">Target transition groups.</param>
        /// <param name="decoyTransitionGroups">Decoy transition groups.</param>
        /// <param name="includeSecondBest">Include the second best peaks in the targets as additional decoys?</param>
        /// <param name="nonParametricPValues">Non-parametric p values used in selecting true peaks if true</param>
        /// <param name="qValueCutoff">The q value cut-off for true peaks in the training</param>
        /// <param name="weights">Array of weights per calculator.</param>
        /// <param name="decoyMean">Output mean of decoy transition groups.</param>
        /// <param name="decoyStdev">Output standard deviation of decoy transition groups.</param>
        /// <param name="colinearWarning">Set to true if colinearity was detected.</param>
        private int CalculateWeights(string documentPath,
                                     ScoredGroupPeaksSet targetTransitionGroups,
                                     ScoredGroupPeaksSet decoyTransitionGroups,
                                     bool includeSecondBest,
                                     bool nonParametricPValues,
                                     double qValueCutoff,
                                     double[] weights,
                                     out double decoyMean,
                                     out double decoyStdev,
                                     ref bool colinearWarning)
        {
            if (includeSecondBest)
            {
                ScoredGroupPeaksSet secondBestTransitionGroups;
                targetTransitionGroups.SelectTargetsAndDecoys(out targetTransitionGroups, out secondBestTransitionGroups);
                foreach (var secondBestGroup in secondBestTransitionGroups.ScoredGroupPeaksList)
                {
                    decoyTransitionGroups.Add(secondBestGroup);
                }
            }

            // Select true target peaks using a q-value cutoff filter.
            var truePeaks  = targetTransitionGroups.SelectTruePeaks(decoyTransitionGroups, qValueCutoff, Lambda, nonParametricPValues);
            var decoyPeaks = decoyTransitionGroups.SelectMaxPeaks();

            WriteDistributionInfo(documentPath, targetTransitionGroups, decoyTransitionGroups); // Only if asked to do so in command-line arguments

            // Better to let a really poor model through for the user to see than to give an error message here
            if (((double)truePeaks.Count) * 10 * 1000 < decoyPeaks.Count) // Targets must be at least 0.01% of decoys (still rejects zero)
            {
                throw new InvalidDataException(string.Format(Resources.MProphetPeakScoringModel_CalculateWeights_Insufficient_target_peaks___0__with__1__decoys__detected_at__2___FDR_to_continue_training_, truePeaks.Count, decoyPeaks.Count, qValueCutoff * 100));
            }
            if (((double)decoyPeaks.Count) * 1000 < truePeaks.Count) // Decoys must be at least 0.1% of targets
            {
                throw new InvalidDataException(string.Format(Resources.MProphetPeakScoringModel_CalculateWeights_Insufficient_decoy_peaks___0__with__1__targets__to_continue_training_, decoyPeaks.Count, truePeaks.Count));
            }

            var featureCount = weights.Count(w => !double.IsNaN(w));

            // Copy target and decoy peaks to training data array.
            int totalTrainingPeaks = truePeaks.Count + decoyTransitionGroups.Count;
            // Calculate the maximum number of training peaks (8 bytes per score - double, featurCount + 1 scores per peak)
            int maxTrainingPeaks = MAX_TRAINING_MEMORY / 8 / (featureCount + 1);

            var trainData = new double[Math.Min(totalTrainingPeaks, maxTrainingPeaks), featureCount + 1];

            if (totalTrainingPeaks < maxTrainingPeaks)
            {
                for (int i = 0; i < truePeaks.Count; i++)
                {
                    CopyToTrainData(truePeaks[i].Features, trainData, weights, i, 1);
                }
                for (int i = 0; i < decoyPeaks.Count; i++)
                {
                    CopyToTrainData(decoyPeaks[i].Features, trainData, weights, i + truePeaks.Count, 0);
                }
            }
            else
            {
                double proportionTrue = truePeaks.Count * 1.0 / totalTrainingPeaks;
                int    truePeakCount  = (int)Math.Round(maxTrainingPeaks * proportionTrue);
                int    i = 0;
                foreach (var peak in truePeaks.RandomOrder(ArrayUtil.RANDOM_SEED))
                {
                    if (i < truePeakCount)
                    {
                        CopyToTrainData(peak.Features, trainData, weights, i, 1);
                    }
                    else
                    {
                        break;
                    }
                    i++;
                }
                int decoyPeakCount = maxTrainingPeaks - truePeakCount;
                i = 0;
                foreach (var peak in decoyPeaks.RandomOrder(ArrayUtil.RANDOM_SEED))
                {
                    if (i < decoyPeakCount)
                    {
                        CopyToTrainData(peak.Features, trainData, weights, i + truePeakCount, 0);
                    }
                    else
                    {
                        break;
                    }
                    i++;
                }
            }

            // Use Linear Discriminant Analysis to find weights that separate true and decoy peak scores.
            int info;

            double[] weightsFromLda;
            alglib.fisherlda(
                trainData,
                trainData.GetLength(0),
                trainData.GetLength(1) - 1,
                2,
                out info,
                out weightsFromLda);

            // Check for colinearity.
            if (info == 2)
            {
                colinearWarning = true;
            }

            // Unpack weights array.
            for (int i = 0, j = 0; i < weights.Length; i++)
            {
                if (!double.IsNaN(weights[i]))
                {
                    weights[i] = weightsFromLda[j++];
                }
            }

            // Recalculate all peak scores.
            targetTransitionGroups.ScorePeaks(weights);
            decoyTransitionGroups.ScorePeaks(weights);

            // If the mean target score is less than the mean decoy score, then the
            // weights came out negative, and all the weights and scores must be negated to
            // restore the proper ordering.
            if (targetTransitionGroups.Mean < decoyTransitionGroups.Mean)
            {
                for (int i = 0; i < weights.Length; i++)
                {
                    weights[i] *= -1;
                }
                targetTransitionGroups.ScorePeaks(weights);
                decoyTransitionGroups.ScorePeaks(weights);
            }

            decoyMean  = decoyTransitionGroups.Mean;
            decoyStdev = decoyTransitionGroups.Stdev;
            return(truePeaks.Count);
        }
Beispiel #11
0
        /// <summary>
        /// Train the model by iterative calculating weights to separate target and decoy transition groups.
        /// </summary>
        /// <param name="targetsIn">Target transition groups.</param>
        /// <param name="decoysIn">Decoy transition groups.</param>
        /// <param name="initParameters">Initial model parameters (weights and bias)</param>
        /// <param name="iterations">Optional specific number of iterations to use in training</param>
        /// <param name="includeSecondBest">Include the second best peaks in the targets as decoys?</param>
        /// <param name="preTrain">Use a pre-trained model to bootstrap the learning.</param>
        /// <param name="progressMonitor">Used to report progress to the calling context</param>
        /// <param name="documentPath">The path to the current document for writing score distributions</param>
        /// <returns>Immutable model with new weights.</returns>
        public override IPeakScoringModel Train(IList <IList <float[]> > targetsIn,
                                                IList <IList <float[]> > decoysIn,
                                                LinearModelParams initParameters,
                                                int?iterations                   = null,
                                                bool includeSecondBest           = false,
                                                bool preTrain                    = true,
                                                IProgressMonitor progressMonitor = null,
                                                string documentPath              = null)
        {
            if (initParameters == null)
            {
                initParameters = new LinearModelParams(_peakFeatureCalculators.Count);
            }
            return(ChangeProp(ImClone(this), im =>
            {
                // This may take a long time between progress updates, but just measure progress by cycles through the training
                IProgressStatus status = new ProgressStatus(Resources.MProphetPeakScoringModel_Train_Training_peak_scoring_model);
                if (progressMonitor != null)
                {
                    progressMonitor.UpdateProgress(status);
                }

                var targets = targetsIn.Where(list => list.Count > 0);
                var decoys = decoysIn.Where(list => list.Count > 0);
                var targetTransitionGroups = new ScoredGroupPeaksSet(targets, targetsIn.Count);
                var decoyTransitionGroups = new ScoredGroupPeaksSet(decoys, decoysIn.Count);
                // Iteratively refine the weights through multiple iterations.
                var calcWeights = new double[initParameters.Weights.Count];
                Array.Copy(initParameters.Weights.ToArray(), calcWeights, initParameters.Weights.Count);
                double qValueCutoff = 0.01; // First iteration cut-off - if not pretraining, just start at 0.01
                // Start with scores calculated from the initial weights
                if (!preTrain)
                {
                    targetTransitionGroups.ScorePeaks(calcWeights);
                    decoyTransitionGroups.ScorePeaks(calcWeights);
                }
                // Bootstrap from the pre-trained legacy model
                else
                {
                    qValueCutoff = 0.15;
                    var preTrainedWeights = new double[initParameters.Weights.Count];
                    for (int i = 0; i < preTrainedWeights.Length; ++i)
                    {
                        if (double.IsNaN(initParameters.Weights[i]))
                        {
                            preTrainedWeights[i] = double.NaN;
                        }
                    }
                    int standardEnabledCount = GetEnabledCount(LegacyScoringModel.StandardFeatureCalculators, initParameters.Weights);
                    int analyteEnabledCount = GetEnabledCount(LegacyScoringModel.AnalyteFeatureCalculators, initParameters.Weights);
                    bool hasStandards = standardEnabledCount >= analyteEnabledCount;
                    var calculators = hasStandards ? LegacyScoringModel.StandardFeatureCalculators : LegacyScoringModel.AnalyteFeatureCalculators;
                    for (int i = 0; i < calculators.Length; ++i)
                    {
                        if (calculators[i].GetType() == typeof(MQuestRetentionTimePredictionCalc))
                        {
                            continue;
                        }
                        SetCalculatorValue(calculators[i].GetType(), LegacyScoringModel.DEFAULT_WEIGHTS[i], preTrainedWeights);
                    }
                    targetTransitionGroups.ScorePeaks(preTrainedWeights);
                    decoyTransitionGroups.ScorePeaks(preTrainedWeights);
                }

                double decoyMean = 0;
                double decoyStdev = 0;
                bool colinearWarning = false;
                int iterationCount = iterations ?? MAX_ITERATIONS;
                int truePeaksCount = 0;
                var lastWeights = new double[calcWeights.Length];
                for (int i = 0; i < iterationCount; i++)
                {
                    int percentComplete = 0;
                    double decoyMeanNew, decoyStdevNew;
                    bool colinearWarningNew = colinearWarning;
                    int truePeaksCountNew = im.CalculateWeights(documentPath,
                                                                targetTransitionGroups,
                                                                decoyTransitionGroups,
                                                                includeSecondBest,
                                                                i == 0, // Use non-parametric q values for first round, when normality assumption may not hold
                                                                qValueCutoff,
                                                                calcWeights,
                                                                out decoyMeanNew,
                                                                out decoyStdevNew,
                                                                ref colinearWarningNew);

                    if (progressMonitor != null)
                    {
                        if (progressMonitor.IsCanceled)
                        {
                            throw new OperationCanceledException();
                        }

                        // Calculate progress, but wait to make sure convergence has not occurred before setting it
                        string formatText = qValueCutoff > 0.02
                            ? Resources.MProphetPeakScoringModel_Train_Training_scoring_model__iteration__0__of__1__
                            : Resources.MProphetPeakScoringModel_Train_Training_scoring_model__iteration__0__of__1_____2______peaks_at__3_0_____FDR_;
                        percentComplete = (i + 1) * 100 / (iterationCount + 1);
                        status = status.ChangeMessage(string.Format(formatText, i + 1, iterationCount, truePeaksCountNew, qValueCutoff))
                                 .ChangePercentComplete(percentComplete);
                    }

                    if (qValueCutoff > 0.02)
                    {
                        // Tighten the q value cut-off for "truth" to 2% FDR
                        qValueCutoff = 0.02;
                        // And allow the true peaks count to go down in the next iteration
                        // Though it rarely will
                        truePeaksCountNew = 0;
                    }
                    else if (truePeaksCountNew < truePeaksCount)
                    {
                        // The model has leveled off enough to begin losing discriminant value
                        if (qValueCutoff > 0.01)
                        {
                            // Tighten the q value cut-off for "truth" to 1% FDR
                            qValueCutoff = 0.01;
                            // And allow the true peaks count to go down in the next iteration
                            truePeaksCountNew = 0;
                        }
                        else
                        {
                            if (progressMonitor != null)
                            {
                                progressMonitor.UpdateProgress(status =
                                                                   status.ChangeMessage(string.Format(Resources.MProphetPeakScoringModel_Train_Scoring_model_converged__iteration__0_____1______peaks_at__2_0_____FDR_, i + 1, truePeaksCount, qValueCutoff))
                                                                   .ChangePercentComplete(Math.Max(95, percentComplete)));
                            }
                            calcWeights = lastWeights;
                            break;
                        }
                    }
                    truePeaksCount = truePeaksCountNew;
                    Array.Copy(calcWeights, lastWeights, calcWeights.Length);
                    decoyMean = decoyMeanNew;
                    decoyStdev = decoyStdevNew;
                    colinearWarning = colinearWarningNew;

                    if (progressMonitor != null)
                    {
                        progressMonitor.UpdateProgress(status);
                    }
                }
                if (progressMonitor != null)
                {
                    progressMonitor.UpdateProgress(status.ChangePercentComplete(100));
                }

                var parameters = new LinearModelParams(calcWeights);
                parameters = parameters.RescaleParameters(decoyMean, decoyStdev);
                im.Parameters = parameters;
                im.ColinearWarning = colinearWarning;
                im.UsesSecondBest = includeSecondBest;
                im.UsesDecoys = decoysIn.Count > 0;
            }));
        }