public double CalculateSubsetFdr(ScoreDistribution subset, IFalseDiscoveryRateCalculator calc) { double targetCount = 0; double decoyCount = 0; foreach (OptimalResultCondition cond in subset.Keys) { var subsetOrs = subset[cond]; var totalOrs = this[cond]; var counts = from s in subsetOrs join t in totalOrs on s.Score equals t.Score select new { TargetCount = (int)s.PeptideCountFromTargetDB, DecoyCount = (double)s.PeptideCountFromTargetDB * t.PeptideCountFromDecoyDB / t.PeptideCountFromTargetDB }; targetCount += (from c in counts select c.TargetCount).Sum(); decoyCount += (from c in counts select c.DecoyCount).Sum(); } return(calc.Calculate((int)decoyCount, (int)targetCount)); }
public void CalculateFdr(IFalseDiscoveryRateCalculator calc) { int decoy = 0; int target = 0; HashSet <string> fileName = new HashSet <string>(); foreach (var spectrum in this.Spectra) { if (fileName.Contains(spectrum.Query.FileScan.LongFileName)) { continue; } fileName.Add(spectrum.Query.FileScan.LongFileName); if (spectrum.FromDecoy) { decoy++; } else { target++; } } this.Result.PeptideCountFromDecoyDB = decoy; this.Result.PeptideCountFromTargetDB = target; this.Result.FalseDiscoveryRate = calc.Calculate(decoy, target); }
public void RemoveConflictSpectrum(List <IIdentifiedSpectrum> conflicted) { if (conflicted.Count > 0) { IFalseDiscoveryRateCalculator calc = Options.Parent.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator(); var map = OptimalResults.ToDictionary(m => m.Condition); var bin = Options.Parent.Classification.BuildSpectrumBin(conflicted); foreach (var oi in bin) { var oldOi = map[oi.Condition]; foreach (var s in oi.Spectra) { if (s.FromDecoy) { oldOi.Result.PeptideCountFromDecoyDB--; } else { oldOi.Result.PeptideCountFromTargetDB--; } oldOi.Spectra.Remove(s); } oldOi.Result.FalseDiscoveryRate = calc.Calculate(oldOi.Result.PeptideCountFromDecoyDB, oldOi.Result.PeptideCountFromTargetDB); } } }
private double CalculateProteinGroupFdr(List <IIdentifiedProteinGroup> groups) { var decoyCount = groups.Count(m => decoyFilter.Accept(m)); var targetCount = groups.Count - decoyCount; return(fdrCalc.Calculate(decoyCount, targetCount)); }
public double CalculateFdr(IFalseDiscoveryRateCalculator calc) { double targetCount = 0; double decoyCount = 0; targetCount += (from ors in this.Values from or in ors select(int) or.PeptideCountFromTargetDB).Sum(); decoyCount += (from ors in this.Values from or in ors select(int) or.PeptideCountFromDecoyDB).Sum(); return(calc.Calculate((int)decoyCount, (int)targetCount)); }
private static double CalculateProteinFdr(List <IIdentifiedProteinGroup> groups, IIdentifiedProteinGroupFilter decoyFilter, IFalseDiscoveryRateCalculator calc, out int targetCount) { targetCount = 0; int decoyCount = 0; foreach (var group in groups) { if (decoyFilter.Accept(group)) { decoyCount++; } else { targetCount++; } } return(calc.Calculate(decoyCount, targetCount)); }
protected void WriteFdrFile(string parameterFile, BuildSummaryOptions conf, List <IIdentifiedSpectrum> result) { Progress.SetMessage("Calculating identified peptide false discovery rate ..."); IFalseDiscoveryRateCalculator calc = conf.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator(); DecoyPeptideBuilder.AssignDecoy(result, conf.GetDecoySpectrumFilter()); int decoyCount = 0; int targetCount = 0; foreach (IIdentifiedSpectrum mph in result) { if (mph.FromDecoy) { decoyCount++; } else { targetCount++; } } double fdr = calc.Calculate(decoyCount, targetCount); string optimalResultFile = FileUtils.ChangeExtension(parameterFile, ".optimal"); using (var sw = new StreamWriter(optimalResultFile)) { List <string> filters = conf.GetFilterString(); foreach (string filter in filters) { sw.WriteLine(filter); } sw.WriteLine("DecoyCount\t{0}", decoyCount); sw.WriteLine("TargetCount\t{0}", targetCount); sw.WriteLine("FDR\t{0:0.######}", fdr); } }
public static void CalculateUniqueQValue(List <IIdentifiedSpectrum> peptides, IScoreFunction scoreFuncs, IFalseDiscoveryRateCalculator fdrCalc) { if (peptides.Count == 0) { return; } scoreFuncs.SortSpectrum(peptides); List <IIdentifiedSpectrum> sameScores = new List <IIdentifiedSpectrum>(); HashSet <string> targetSeq = new HashSet <string>(); HashSet <string> decoySeq = new HashSet <string>(); double lastScore = scoreFuncs.GetScore(peptides[0]); for (int i = 0; i < peptides.Count; i++) { IIdentifiedSpectrum spectrum = peptides[i]; double score = scoreFuncs.GetScore(peptides[i]); if (score == lastScore) { sameScores.Add(spectrum); if (spectrum.FromDecoy) { decoySeq.Add(spectrum.Peptide.PureSequence); } else { targetSeq.Add(spectrum.Peptide.PureSequence); } continue; } else { double qValue = fdrCalc.Calculate(decoySeq.Count, targetSeq.Count); foreach (IIdentifiedSpectrum sameScoreSpectrum in sameScores) { sameScoreSpectrum.QValue = qValue; } sameScores.Clear(); lastScore = score; sameScores.Add(spectrum); if (spectrum.FromDecoy) { decoySeq.Add(spectrum.Peptide.PureSequence); } else { targetSeq.Add(spectrum.Peptide.PureSequence); } continue; } } double lastQValue = fdrCalc.Calculate(decoySeq.Count, targetSeq.Count); foreach (IIdentifiedSpectrum sameScoreSpectrum in sameScores) { sameScoreSpectrum.QValue = lastQValue; } }
/// <summary> /// 根据给定分数排序函数以及FDR计算器对鉴定谱图列表计算QValue。 /// </summary> /// <param name="peptides">谱图列表</param> /// <param name="scoreFuncs">与分数提取、排序相关类</param> /// <param name="fdrCalc">FDR计算器</param> public static void CalculateQValue(List <IIdentifiedSpectrum> peptides, IScoreFunction scoreFuncs, IFalseDiscoveryRateCalculator fdrCalc) { if (peptides.Count == 0) { return; } scoreFuncs.SortSpectrum(peptides); int totalTarget = 0; int totalDecoy = 0; HashSet <string> filenames = new HashSet <string>(); foreach (IIdentifiedSpectrum spectrum in peptides) { spectrum.QValue = 0.0; if (filenames.Contains(spectrum.Query.FileScan.LongFileName)) { continue; } filenames.Add(spectrum.Query.FileScan.LongFileName); if (spectrum.FromDecoy) { totalDecoy++; } else { totalTarget++; } } double lastScore = scoreFuncs.GetScore(peptides[peptides.Count - 1]); double lastQvalue = fdrCalc.Calculate(totalDecoy, totalTarget); for (int i = peptides.Count - 1; i >= 0; i--) { double score = scoreFuncs.GetScore(peptides[i]); if (score != lastScore) { lastScore = score; lastQvalue = fdrCalc.Calculate(totalDecoy, totalTarget); if (lastQvalue == 0.0) { break; } peptides[i].QValue = lastQvalue; } else { peptides[i].QValue = lastQvalue; } if (peptides[i].FromDecoy) { totalDecoy--; } else { totalTarget--; } } }
private FdrResult DoCalculateOverlappedPeptideFdr(double individualFdr) { FdrResult result = new FdrResult(); List <IIdentifiedSpectrum> spectra = new List <IIdentifiedSpectrum>(); int decoyCount = 0; int targetCount = 0; foreach (var dsList in OverlapBySearchEngine) { List <IIdentifiedSpectrum> peps = dsList.GetUnconflictedOptimalSpectra(); //根据实验文件名分类。这样可以降低需要比较的集合大小。 var expGroups = peps.GroupBy(m => m.Query.FileScan.Experimental); foreach (var group in expGroups) { //根据scan分类。 var spGroup = group.GroupBy(m => m.Query.FileScan.FirstScan); foreach (var sp in spGroup) { var lst = sp.ToList(); if (lst.Count > 1 && IsEngineConflict(lst)) { result.ConflictSpectra.AddRange(lst); var spectrum = conflictFunc.Process(lst); lst.Clear(); if (spectrum != null) { lst.AddRange(spectrum); } } if (lst.Count >= dsOptions.Options.MinimumEngineAgreeCount) { spectra.AddRange(lst); if (lst[0].FromDecoy) { decoyCount++; } else { targetCount++; } } } } } if (dsOptions.Options.MinimumEngineAgreeCount <= 1) { var noOverlapSpectra = NoOverlaps.GetOptimalSpectra(); foreach (var s in noOverlapSpectra) { if (s.FromDecoy) { decoyCount++; } else { targetCount++; } } spectra.AddRange(noOverlapSpectra); } result.Spectra = spectra; result.Fdr = fdrCalc.Calculate(decoyCount, targetCount); return(result); }