public override IPeakScoringModel Train(IList <IList <float[]> > targets, IList <IList <float[]> > decoys, LinearModelParams initParameters, bool includeSecondBest = false, bool preTrain = true, IProgressMonitor progressMonitor = null) { return(ChangeProp(ImClone(this), im => { int nWeights = initParameters.Weights.Count; var weights = new double [nWeights]; for (int i = 0; i < initParameters.Weights.Count; ++i) { weights[i] = double.IsNaN(initParameters.Weights[i]) ? double.NaN : DEFAULT_WEIGHTS[i]; } var parameters = new LinearModelParams(weights); ScoredGroupPeaksSet decoyTransitionGroups = new ScoredGroupPeaksSet(decoys); ScoredGroupPeaksSet targetTransitionGroups = new ScoredGroupPeaksSet(targets); targetTransitionGroups.ScorePeaks(parameters.Weights); if (includeSecondBest) { ScoredGroupPeaksSet secondBestTransitionGroups; targetTransitionGroups.SelectTargetsAndDecoys(out targetTransitionGroups, out secondBestTransitionGroups); foreach (var secondBestGroup in secondBestTransitionGroups.ScoredGroupPeaksList) { decoyTransitionGroups.Add(secondBestGroup); } } decoyTransitionGroups.ScorePeaks(parameters.Weights); im.UsesDecoys = decoys.Count > 0; im.UsesSecondBest = includeSecondBest; im.Parameters = parameters.RescaleParameters(decoyTransitionGroups.Mean, decoyTransitionGroups.Stdev); })); }
private const int MAX_TRAINING_MEMORY = 512 * 1024 * 1024; // 512 MB /// <summary> /// Calculate new weight factors for one iteration of the refinement process. This is the heart /// of the MProphet algorithm. /// </summary> /// <param name="iteration">Iteration number (special processing happens for iteration 0).</param> /// <param name="targetTransitionGroups">Target transition groups.</param> /// <param name="decoyTransitionGroups">Decoy transition groups.</param> /// <param name="includeSecondBest">Include the second best peaks in the targets as additional decoys?</param> /// <param name="weights">Array of weights per calculator.</param> /// <param name="decoyMean">Output mean of decoy transition groups.</param> /// <param name="decoyStdev">Output standard deviation of decoy transition groups.</param> /// <param name="colinearWarning">Set to true if colinearity was detected.</param> private void CalculateWeights( int iteration, ScoredGroupPeaksSet targetTransitionGroups, ScoredGroupPeaksSet decoyTransitionGroups, bool includeSecondBest, double[] weights, out double decoyMean, out double decoyStdev, ref bool colinearWarning) { if (includeSecondBest) { ScoredGroupPeaksSet secondBestTransitionGroups; targetTransitionGroups.SelectTargetsAndDecoys(out targetTransitionGroups, out secondBestTransitionGroups); foreach (var secondBestGroup in secondBestTransitionGroups.ScoredGroupPeaksList) { decoyTransitionGroups.Add(secondBestGroup); } } // Select true target peaks using a q-value cutoff filter. var qValueCutoff = (iteration == 0 ? 0.15 : 0.02); var truePeaks = targetTransitionGroups.SelectTruePeaks(qValueCutoff, Lambda, decoyTransitionGroups); var decoyPeaks = decoyTransitionGroups.SelectMaxPeaks(); // Omit first feature during first iteration, since it is used as the initial score value. weights[0] = (iteration == 0) ? double.NaN : 0; var featureCount = weights.Count(w => !double.IsNaN(w)); // Copy target and decoy peaks to training data array. int totalTrainingPeaks = truePeaks.Count + decoyTransitionGroups.Count; // Calculate the maximum number of training peaks (8 bytes per score - double, featurCount + 1 scores per peak) int maxTrainingPeaks = MAX_TRAINING_MEMORY / 8 / (featureCount + 1); var trainData = new double[Math.Min(totalTrainingPeaks, maxTrainingPeaks), featureCount + 1]; if (totalTrainingPeaks < maxTrainingPeaks) { for (int i = 0; i < truePeaks.Count; i++) { CopyToTrainData(truePeaks[i].Features, trainData, weights, i, 1); } for (int i = 0; i < decoyPeaks.Count; i++) { CopyToTrainData(decoyPeaks[i].Features, trainData, weights, i + truePeaks.Count, 0); } } else { double proportionTrue = truePeaks.Count * 1.0 / totalTrainingPeaks; int truePeakCount = (int)Math.Round(maxTrainingPeaks * proportionTrue); int i = 0; foreach (var peak in truePeaks.RandomOrder()) { if (i < truePeakCount) { CopyToTrainData(peak.Features, trainData, weights, i, 1); } else { break; } i++; } int decoyPeakCount = maxTrainingPeaks - truePeakCount; i = 0; foreach (var peak in decoyPeaks.RandomOrder()) { if (i < decoyPeakCount) { CopyToTrainData(peak.Features, trainData, weights, i + truePeakCount, 0); } else { break; } i++; } } // Use Linear Discriminant Analysis to find weights that separate true and decoy peak scores. int info; double[] weightsFromLda; alglib.fisherlda( trainData, trainData.GetLength(0), trainData.GetLength(1) - 1, 2, out info, out weightsFromLda); // Check for colinearity. if (info == 2) { colinearWarning = true; } // Unpack weights array. for (int i = 0, j = 0; i < weights.Length; i++) { if (!double.IsNaN(weights[i])) { weights[i] = weightsFromLda[j++]; } } // Recalculate all peak scores. targetTransitionGroups.ScorePeaks(weights); decoyTransitionGroups.ScorePeaks(weights); // If the mean target score is less than the mean decoy score, then the // weights came out negative, and all the weights and scores must be negated to // restore the proper ordering. if (targetTransitionGroups.Mean < decoyTransitionGroups.Mean) { for (int i = 0; i < weights.Length; i++) { weights[i] *= -1; } targetTransitionGroups.ScorePeaks(weights); decoyTransitionGroups.ScorePeaks(weights); } decoyMean = decoyTransitionGroups.Mean; decoyStdev = decoyTransitionGroups.Stdev; }
private const int MAX_TRAINING_MEMORY = 512 * 1024 * 1024; // 512 MB /// <summary> /// Calculate new weight factors for one iteration of the refinement process. This is the heart /// of the MProphet algorithm. /// </summary> /// <param name="documentPath">The path to the current document for writing score distributions</param> /// <param name="targetTransitionGroups">Target transition groups.</param> /// <param name="decoyTransitionGroups">Decoy transition groups.</param> /// <param name="includeSecondBest">Include the second best peaks in the targets as additional decoys?</param> /// <param name="nonParametricPValues">Non-parametric p values used in selecting true peaks if true</param> /// <param name="qValueCutoff">The q value cut-off for true peaks in the training</param> /// <param name="weights">Array of weights per calculator.</param> /// <param name="decoyMean">Output mean of decoy transition groups.</param> /// <param name="decoyStdev">Output standard deviation of decoy transition groups.</param> /// <param name="colinearWarning">Set to true if colinearity was detected.</param> private int CalculateWeights(string documentPath, ScoredGroupPeaksSet targetTransitionGroups, ScoredGroupPeaksSet decoyTransitionGroups, bool includeSecondBest, bool nonParametricPValues, double qValueCutoff, double[] weights, out double decoyMean, out double decoyStdev, ref bool colinearWarning) { if (includeSecondBest) { ScoredGroupPeaksSet secondBestTransitionGroups; targetTransitionGroups.SelectTargetsAndDecoys(out targetTransitionGroups, out secondBestTransitionGroups); foreach (var secondBestGroup in secondBestTransitionGroups.ScoredGroupPeaksList) { decoyTransitionGroups.Add(secondBestGroup); } } // Select true target peaks using a q-value cutoff filter. var truePeaks = targetTransitionGroups.SelectTruePeaks(decoyTransitionGroups, qValueCutoff, Lambda, nonParametricPValues); var decoyPeaks = decoyTransitionGroups.SelectMaxPeaks(); WriteDistributionInfo(documentPath, targetTransitionGroups, decoyTransitionGroups); // Only if asked to do so in command-line arguments // Better to let a really poor model through for the user to see than to give an error message here if (((double)truePeaks.Count) * 10 * 1000 < decoyPeaks.Count) // Targets must be at least 0.01% of decoys (still rejects zero) { throw new InvalidDataException(string.Format(Resources.MProphetPeakScoringModel_CalculateWeights_Insufficient_target_peaks___0__with__1__decoys__detected_at__2___FDR_to_continue_training_, truePeaks.Count, decoyPeaks.Count, qValueCutoff * 100)); } if (((double)decoyPeaks.Count) * 1000 < truePeaks.Count) // Decoys must be at least 0.1% of targets { throw new InvalidDataException(string.Format(Resources.MProphetPeakScoringModel_CalculateWeights_Insufficient_decoy_peaks___0__with__1__targets__to_continue_training_, decoyPeaks.Count, truePeaks.Count)); } var featureCount = weights.Count(w => !double.IsNaN(w)); // Copy target and decoy peaks to training data array. int totalTrainingPeaks = truePeaks.Count + decoyTransitionGroups.Count; // Calculate the maximum number of training peaks (8 bytes per score - double, featurCount + 1 scores per peak) int maxTrainingPeaks = MAX_TRAINING_MEMORY / 8 / (featureCount + 1); var trainData = new double[Math.Min(totalTrainingPeaks, maxTrainingPeaks), featureCount + 1]; if (totalTrainingPeaks < maxTrainingPeaks) { for (int i = 0; i < truePeaks.Count; i++) { CopyToTrainData(truePeaks[i].Features, trainData, weights, i, 1); } for (int i = 0; i < decoyPeaks.Count; i++) { CopyToTrainData(decoyPeaks[i].Features, trainData, weights, i + truePeaks.Count, 0); } } else { double proportionTrue = truePeaks.Count * 1.0 / totalTrainingPeaks; int truePeakCount = (int)Math.Round(maxTrainingPeaks * proportionTrue); int i = 0; foreach (var peak in truePeaks.RandomOrder(ArrayUtil.RANDOM_SEED)) { if (i < truePeakCount) { CopyToTrainData(peak.Features, trainData, weights, i, 1); } else { break; } i++; } int decoyPeakCount = maxTrainingPeaks - truePeakCount; i = 0; foreach (var peak in decoyPeaks.RandomOrder(ArrayUtil.RANDOM_SEED)) { if (i < decoyPeakCount) { CopyToTrainData(peak.Features, trainData, weights, i + truePeakCount, 0); } else { break; } i++; } } // Use Linear Discriminant Analysis to find weights that separate true and decoy peak scores. int info; double[] weightsFromLda; alglib.fisherlda( trainData, trainData.GetLength(0), trainData.GetLength(1) - 1, 2, out info, out weightsFromLda); // Check for colinearity. if (info == 2) { colinearWarning = true; } // Unpack weights array. for (int i = 0, j = 0; i < weights.Length; i++) { if (!double.IsNaN(weights[i])) { weights[i] = weightsFromLda[j++]; } } // Recalculate all peak scores. targetTransitionGroups.ScorePeaks(weights); decoyTransitionGroups.ScorePeaks(weights); // If the mean target score is less than the mean decoy score, then the // weights came out negative, and all the weights and scores must be negated to // restore the proper ordering. if (targetTransitionGroups.Mean < decoyTransitionGroups.Mean) { for (int i = 0; i < weights.Length; i++) { weights[i] *= -1; } targetTransitionGroups.ScorePeaks(weights); decoyTransitionGroups.ScorePeaks(weights); } decoyMean = decoyTransitionGroups.Mean; decoyStdev = decoyTransitionGroups.Stdev; return(truePeaks.Count); }