public void TestParse() { var result = IdentificationSummary.Parse(@TestContext.CurrentContext.TestDirectory + "/../../../data/buildsummary/hppp_mascot.noredundant", "^REV", new TotalFalseDiscoveryRateCalculator()); Assert.AreEqual(381, result.FullSpectrumCount); Assert.AreEqual(62, result.FullPeptideCount); Assert.AreEqual(31, result.ProteinGroupCount); Assert.AreEqual(16, result.Unique2ProteinGroupCount); }
public static IdentificationSummary Parse(string proteinFile, string defaultDecoyPattern, IFalseDiscoveryRateCalculator defaultCalc) { IdentificationSummary result = new IdentificationSummary(); result.FileName = FileUtils.ChangeExtension(new FileInfo(proteinFile).Name, ""); Regex decoyReg = new Regex(defaultDecoyPattern); IIdentifiedProteinGroupFilter decoyFilter = null; IFalseDiscoveryRateCalculator curCalc = null; var paramFile = FileUtils.ChangeExtension(proteinFile, ".param"); if (File.Exists(paramFile)) { BuildSummaryOptions options = BuildSummaryOptionsUtils.LoadFromFile(paramFile); if (options.FalseDiscoveryRate.FilterByFdr) { decoyFilter = options.GetDecoyGroupFilter(); curCalc = options.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator(); } } if (decoyFilter == null) { decoyFilter = new IdentifiedProteinGroupNameRegexFilter(defaultDecoyPattern, false); curCalc = defaultCalc; } var peptideFile = FileUtils.ChangeExtension(proteinFile, ".peptides"); if (File.Exists(peptideFile)) { var peptides = new MascotPeptideTextFormat().ReadFromFile(peptideFile); var fullSpectra = GetSpectraByNPT(peptides, 2); var fullTargetSpectra = GetTargetSpectra(decoyReg, fullSpectra); var semiSpectra = GetSpectraByNPT(peptides, 1); var semiTargetSpectra = GetTargetSpectra(decoyReg, semiSpectra); result.FullSpectrumCount = GetSpectrumCount(fullSpectra); result.FullTargetSpectrumCount = GetSpectrumCount(fullTargetSpectra); result.SemiSpectrumCount = GetSpectrumCount(semiSpectra); result.SemiTargetSpectrumCount = GetSpectrumCount(semiTargetSpectra); result.FullPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(fullSpectra); result.FullTargetPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(fullTargetSpectra); result.SemiPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(semiSpectra); result.SemiTargetPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(semiTargetSpectra); result.FullSpectrumFdr = curCalc.Calculate(result.FullSpectrumCount - result.FullTargetSpectrumCount, result.FullTargetSpectrumCount); result.SemiSpectrumFdr = curCalc.Calculate(result.SemiSpectrumCount - result.SemiTargetSpectrumCount, result.SemiTargetSpectrumCount); result.FullPeptideFdr = curCalc.Calculate(result.FullPeptideCount - result.FullTargetPeptideCount, result.FullTargetPeptideCount); result.SemiPeptideFdr = curCalc.Calculate(result.SemiPeptideCount - result.SemiTargetPeptideCount, result.SemiTargetPeptideCount); } if (File.Exists(proteinFile)) { var ir = new MascotResultTextFormat().ReadFromFile(proteinFile); ir.InitUniquePeptideCount(); var u2proteins = (from p in ir where p[0].UniquePeptideCount > 1 select p).ToList(); var u1proteins = (from p in ir where p[0].UniquePeptideCount == 1 select p).ToList(); result.ProteinGroupCount = ir.Count; result.Unique2ProteinGroupCount = u2proteins.Count; int targetCount; result.Unique2ProteinFdr = CalculateProteinFdr(u2proteins, decoyFilter, defaultCalc, out targetCount); result.Unique2ProteinGroupTargetCount = (int)targetCount; result.Unique1ProteinFdr = CalculateProteinFdr(u1proteins, decoyFilter, defaultCalc, out targetCount); result.Unique1ProteinGroupTargetCount = (int)targetCount; } return(result); }