public void CalculateUniqueQValue(Dictionary <OptimalResultCondition, List <IIdentifiedSpectrum> > peptideBin) { foreach (var spectra in peptideBin.Values) { IdentifiedSpectrumUtils.CalculateUniqueQValue(spectra, scoreFunc, fdrCalc); } }
public void TestSameEngineDifferentParameters() { ClassificationOptions co = new ClassificationOptions(); co.ClassifyByCharge = true; co.ClassifyByMissCleavage = true; co.ClassifyByModification = true; co.ModifiedAminoacids = "STY"; co.ClassifyByNumProteaseTermini = true; var s1 = new MascotPeptideTextFormat().ReadFromFile(TestContext.CurrentContext.TestDirectory + "/../../../data/deisotopic.peptides"); IdentifiedSpectrumUtils.RemoveSpectrumWithAmbigiousAssignment(s1); s1.ForEach(m => m.Tag = "deisotopic"); var s2 = new MascotPeptideTextFormat().ReadFromFile(TestContext.CurrentContext.TestDirectory + "/../../../data/deisotopic-top10.peptides"); IdentifiedSpectrumUtils.RemoveSpectrumWithAmbigiousAssignment(s2); s2.ForEach(m => m.Tag = "deisotopic-top"); var all = s1.Union(s2).ToList(); var p1 = new List <IIdentifiedSpectrum>(all); IdentifiedSpectrumUtils.KeepTopPeptideFromSameEngineDifferentParameters(p1, new ScoreFunction()); p1.ForEach(m => m.ClassificationTag = "deisotopic/deisotopic-top"); var bin1 = co.BuildSpectrumBin(p1); var p2 = new List <IIdentifiedSpectrum>(all); IdentifiedSpectrumUtils.KeepUnconflictPeptidesFromSameEngineDifferentParameters(p2, new ScoreFunction()); p2.ForEach(m => m.ClassificationTag = "deisotopic/deisotopic-top"); var bin2 = co.BuildSpectrumBin(p2); bin2.ForEach(m => { IdentifiedSpectrumUtils.KeepTopPeptideFromSameEngineDifferentParameters(m.Spectra, new ScoreFunction()); var n = bin1.Find(a => a.Condition.ToString().Equals(m.Condition.ToString())); Assert.AreEqual(m.Spectra.Count, n.Spectra.Count); //{ // if (m.Condition.ToString().Equals("deisotopic/deisotopic-top; Charge=2; MissCleavage=0; Modification=1; NumProteaseTermini=2")) // { // Assert.IsTrue(n.Spectra.Any(k => k.Query.FileScan.ShortFileName.Equals("20111128_CLi_v_4-2k_2mg_TiO2_iTRAQ,4992"))); // } // var diff1 = m.Spectra.Except(n.Spectra).ToList(); // Console.WriteLine(m.Condition.ToString() + " : " + diff1.Count.ToString()); // diff1.ForEach(k => // { // var lst = all.FindAll(l => l.Query.FileScan.LongFileName.Equals(k.Query.FileScan.LongFileName)); // lst.ForEach(q => Console.WriteLine(q.Query.FileScan.ShortFileName + "\t" + q.Tag + "\t" + q.Score.ToString() + "\t" + q.Sequence)); // }); //} }); }
public void KeepTopPeptideFromSameEngineDifferentParameters() { List <IIdentifiedSpectrum> spectra = new List <IIdentifiedSpectrum>(new IIdentifiedSpectrum[] { s1, s2, s4, s5 }); IdentifiedSpectrumUtils.KeepTopPeptideFromSameEngineDifferentParameters(spectra, new ScoreFunction()); Assert.AreEqual(2, spectra.Count); Assert.IsTrue(spectra.Contains(s1)); Assert.IsTrue(spectra.Contains(s4)); }
public void TestFillProteinInformation() { var peptides = new MascotPeptideTextFormat().ReadFromFile(TestContext.CurrentContext.TestDirectory + "/../../../data/Test.output.xml.FDR0.01.peptides"); Assert.IsTrue(peptides.All(m => m.Peptide.Proteins.Count == 0)); IdentifiedSpectrumUtils.FillProteinInformation(peptides, TestContext.CurrentContext.TestDirectory + "/../../../data//Test.output.xml.FDR0.01.peptides.proteins"); Assert.IsTrue(peptides.All(m => m.Peptide.Proteins.Count > 0)); }
public void KeepUnconflictPeptidesFromSameEngineDifferentParameters() { List <IIdentifiedSpectrum> spectra = new List <IIdentifiedSpectrum>(new IIdentifiedSpectrum[] { s1, s2, s6, s7 }); IdentifiedSpectrumUtils.KeepUnconflictPeptidesFromSameEngineDifferentParameters(spectra, new ScoreFunction()); Assert.AreEqual(3, spectra.Count); Assert.IsTrue(spectra.Contains(s1)); Assert.IsTrue(spectra.Contains(s6)); Assert.IsTrue(spectra.Contains(s7)); }
public void TestCalculateQValue() { var peptides = new MascotPeptideTextFormat().ReadFromFile(TestContext.CurrentContext.TestDirectory + "/../../../data/QTOF_Ecoli.LowRes.t.xml.peptides"); peptides.RemoveAll(m => m.ExpectValue > 0.05 || m.Peptide.PureSequence.Length < 6); peptides.ForEach(m => m.FromDecoy = m.Proteins.Any(l => l.Contains("REVERSE_"))); IdentifiedSpectrumUtils.CalculateQValue(peptides, new ExpectValueFunction(), new TargetFalseDiscoveryRateCalculator()); Assert.AreEqual(0.0267, peptides[0].QValue, 0.0001); }
public void TestRemoveSameSpectrumWithDifferentCharge() { s1.Score = 3.0; s2.Score = 2.0; s3.Score = 2.0; s6.Score = 7.0; List <IIdentifiedSpectrum> spectra = new List <IIdentifiedSpectrum>(new IIdentifiedSpectrum[] { s1, s2, s3, s4, s5, s6 }); IdentifiedSpectrumUtils.FilterSameSpectrumWithDifferentCharge(spectra); Assert.AreEqual(3, spectra.Count); Assert.IsTrue(spectra.Contains(s1)); Assert.IsTrue(spectra.Contains(s2)); Assert.IsTrue(spectra.Contains(s6)); s3.Score = 4.0; s6.Score = 3.0; spectra = new List <IIdentifiedSpectrum>(new IIdentifiedSpectrum[] { s1, s2, s3, s4, s5, s6 }); IdentifiedSpectrumUtils.FilterSameSpectrumWithDifferentCharge(spectra); Assert.AreEqual(3, spectra.Count); Assert.IsTrue(spectra.Contains(s3)); Assert.IsTrue(spectra.Contains(s4)); Assert.IsTrue(spectra.Contains(s5)); }
private void DoInitUniquePeptideCount(IEnumerable <IIdentifiedPeptide> peps) { this.uniquePeptideCount = IdentifiedPeptideUtils.GetUniquePeptideCount(peps); this.peptideCount = IdentifiedSpectrumUtils.GetSpectrumCount(from p in peps select p.Spectrum); }
public static IdentificationSummary Parse(string proteinFile, string defaultDecoyPattern, IFalseDiscoveryRateCalculator defaultCalc) { IdentificationSummary result = new IdentificationSummary(); result.FileName = FileUtils.ChangeExtension(new FileInfo(proteinFile).Name, ""); Regex decoyReg = new Regex(defaultDecoyPattern); IIdentifiedProteinGroupFilter decoyFilter = null; IFalseDiscoveryRateCalculator curCalc = null; var paramFile = FileUtils.ChangeExtension(proteinFile, ".param"); if (File.Exists(paramFile)) { BuildSummaryOptions options = BuildSummaryOptionsUtils.LoadFromFile(paramFile); if (options.FalseDiscoveryRate.FilterByFdr) { decoyFilter = options.GetDecoyGroupFilter(); curCalc = options.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator(); } } if (decoyFilter == null) { decoyFilter = new IdentifiedProteinGroupNameRegexFilter(defaultDecoyPattern, false); curCalc = defaultCalc; } var peptideFile = FileUtils.ChangeExtension(proteinFile, ".peptides"); if (File.Exists(peptideFile)) { var peptides = new MascotPeptideTextFormat().ReadFromFile(peptideFile); var fullSpectra = GetSpectraByNPT(peptides, 2); var fullTargetSpectra = GetTargetSpectra(decoyReg, fullSpectra); var semiSpectra = GetSpectraByNPT(peptides, 1); var semiTargetSpectra = GetTargetSpectra(decoyReg, semiSpectra); result.FullSpectrumCount = GetSpectrumCount(fullSpectra); result.FullTargetSpectrumCount = GetSpectrumCount(fullTargetSpectra); result.SemiSpectrumCount = GetSpectrumCount(semiSpectra); result.SemiTargetSpectrumCount = GetSpectrumCount(semiTargetSpectra); result.FullPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(fullSpectra); result.FullTargetPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(fullTargetSpectra); result.SemiPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(semiSpectra); result.SemiTargetPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(semiTargetSpectra); result.FullSpectrumFdr = curCalc.Calculate(result.FullSpectrumCount - result.FullTargetSpectrumCount, result.FullTargetSpectrumCount); result.SemiSpectrumFdr = curCalc.Calculate(result.SemiSpectrumCount - result.SemiTargetSpectrumCount, result.SemiTargetSpectrumCount); result.FullPeptideFdr = curCalc.Calculate(result.FullPeptideCount - result.FullTargetPeptideCount, result.FullTargetPeptideCount); result.SemiPeptideFdr = curCalc.Calculate(result.SemiPeptideCount - result.SemiTargetPeptideCount, result.SemiTargetPeptideCount); } if (File.Exists(proteinFile)) { var ir = new MascotResultTextFormat().ReadFromFile(proteinFile); ir.InitUniquePeptideCount(); var u2proteins = (from p in ir where p[0].UniquePeptideCount > 1 select p).ToList(); var u1proteins = (from p in ir where p[0].UniquePeptideCount == 1 select p).ToList(); result.ProteinGroupCount = ir.Count; result.Unique2ProteinGroupCount = u2proteins.Count; int targetCount; result.Unique2ProteinFdr = CalculateProteinFdr(u2proteins, decoyFilter, defaultCalc, out targetCount); result.Unique2ProteinGroupTargetCount = (int)targetCount; result.Unique1ProteinFdr = CalculateProteinFdr(u1proteins, decoyFilter, defaultCalc, out targetCount); result.Unique1ProteinGroupTargetCount = (int)targetCount; } return(result); }
private List <IIdentifiedSpectrum> DoCalculateDuplicated(List <IIdentifiedSpectrum> preFiltered, OptimalResult optimalResult) { preFiltered.ForEach(m => m.QValue = -1); ScoreFunc.SortSpectrum(preFiltered); //Use top score spectra for Qvalue calculation var topSpectra = new List <IIdentifiedSpectrum>(preFiltered); IdentifiedSpectrumUtils.KeepTopPeptideFromSameEngineDifferentParameters(topSpectra, ScoreFunc); //计算QValue。 QValueFunc(topSpectra, ScoreFunc, FdrCalc); //将非top的肽段的QValue填充。 for (int i = 1; i < preFiltered.Count; i++) { if (preFiltered[i].QValue == -1) { preFiltered[i].QValue = preFiltered[i - 1].QValue; } } List <IIdentifiedSpectrum> result = new List <IIdentifiedSpectrum>(); optimalResult.PeptideCountFromDecoyDB = 0; optimalResult.PeptideCountFromTargetDB = 0; for (int i = preFiltered.Count - 1; i >= 0; i--) { if (preFiltered[i].QValue <= FdrValue) { result.AddRange(preFiltered.GetRange(0, i + 1)); optimalResult.Score = ScoreFunc.GetScore(preFiltered[i]); optimalResult.ExpectValue = preFiltered[i].ExpectValue; optimalResult.FalseDiscoveryRate = preFiltered[i].QValue; int decoyCount = 0; int targetCount = 0; HashSet <string> filenames = new HashSet <string>(); foreach (IIdentifiedSpectrum spectrum in result) { if (filenames.Contains(spectrum.Query.FileScan.LongFileName)) { continue; } filenames.Add(spectrum.Query.FileScan.LongFileName); if (spectrum.FromDecoy) { decoyCount++; } else { targetCount++; } } optimalResult.PeptideCountFromDecoyDB = decoyCount; optimalResult.PeptideCountFromTargetDB = targetCount; Console.WriteLine("{0} -> {1} ==> {2} / {3}", preFiltered.Count, topSpectra.Count, decoyCount, targetCount); break; } } return(result); }
public void CalculateQValue(List <IIdentifiedSpectrum> spectra) { IdentifiedSpectrumUtils.CalculateQValue(spectra, scoreFunc, fdrCalc); }