public static IdentificationSummary Parse(string proteinFile, string defaultDecoyPattern, IFalseDiscoveryRateCalculator defaultCalc) { IdentificationSummary result = new IdentificationSummary(); result.FileName = FileUtils.ChangeExtension(new FileInfo(proteinFile).Name, ""); Regex decoyReg = new Regex(defaultDecoyPattern); IIdentifiedProteinGroupFilter decoyFilter = null; IFalseDiscoveryRateCalculator curCalc = null; var paramFile = FileUtils.ChangeExtension(proteinFile, ".param"); if (File.Exists(paramFile)) { BuildSummaryOptions options = BuildSummaryOptionsUtils.LoadFromFile(paramFile); if (options.FalseDiscoveryRate.FilterByFdr) { decoyFilter = options.GetDecoyGroupFilter(); curCalc = options.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator(); } } if (decoyFilter == null) { decoyFilter = new IdentifiedProteinGroupNameRegexFilter(defaultDecoyPattern, false); curCalc = defaultCalc; } var peptideFile = FileUtils.ChangeExtension(proteinFile, ".peptides"); if (File.Exists(peptideFile)) { var peptides = new MascotPeptideTextFormat().ReadFromFile(peptideFile); var fullSpectra = GetSpectraByNPT(peptides, 2); var fullTargetSpectra = GetTargetSpectra(decoyReg, fullSpectra); var semiSpectra = GetSpectraByNPT(peptides, 1); var semiTargetSpectra = GetTargetSpectra(decoyReg, semiSpectra); result.FullSpectrumCount = GetSpectrumCount(fullSpectra); result.FullTargetSpectrumCount = GetSpectrumCount(fullTargetSpectra); result.SemiSpectrumCount = GetSpectrumCount(semiSpectra); result.SemiTargetSpectrumCount = GetSpectrumCount(semiTargetSpectra); result.FullPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(fullSpectra); result.FullTargetPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(fullTargetSpectra); result.SemiPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(semiSpectra); result.SemiTargetPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(semiTargetSpectra); result.FullSpectrumFdr = curCalc.Calculate(result.FullSpectrumCount - result.FullTargetSpectrumCount, result.FullTargetSpectrumCount); result.SemiSpectrumFdr = curCalc.Calculate(result.SemiSpectrumCount - result.SemiTargetSpectrumCount, result.SemiTargetSpectrumCount); result.FullPeptideFdr = curCalc.Calculate(result.FullPeptideCount - result.FullTargetPeptideCount, result.FullTargetPeptideCount); result.SemiPeptideFdr = curCalc.Calculate(result.SemiPeptideCount - result.SemiTargetPeptideCount, result.SemiTargetPeptideCount); } if (File.Exists(proteinFile)) { var ir = new MascotResultTextFormat().ReadFromFile(proteinFile); ir.InitUniquePeptideCount(); var u2proteins = (from p in ir where p[0].UniquePeptideCount > 1 select p).ToList(); var u1proteins = (from p in ir where p[0].UniquePeptideCount == 1 select p).ToList(); result.ProteinGroupCount = ir.Count; result.Unique2ProteinGroupCount = u2proteins.Count; int targetCount; result.Unique2ProteinFdr = CalculateProteinFdr(u2proteins, decoyFilter, defaultCalc, out targetCount); result.Unique2ProteinGroupTargetCount = (int)targetCount; result.Unique1ProteinFdr = CalculateProteinFdr(u1proteins, decoyFilter, defaultCalc, out targetCount); result.Unique1ProteinGroupTargetCount = (int)targetCount; } return(result); }
private void RunCurrentParameter(string parameterFile, List <string> result, BuildSummaryOptions conf) { IStringParser <string> acParser = conf.Database.GetAccessNumberParser(); IIdentifiedProteinBuilder proteinBuilder = new IdentifiedProteinBuilder(); IIdentifiedProteinGroupBuilder groupBuilder = new IdentifiedProteinGroupBuilder() { Progress = this.Progress }; IdentifiedSpectrumBuilderResult isbr; List <IIdentifiedSpectrum> finalPeptides; if (string.IsNullOrEmpty(options.PeptideFile)) { //parse from configuration //build spectrum list IIdentifiedSpectrumBuilder spectrumBuilder = conf.GetSpectrumBuilder(); if (spectrumBuilder is IProgress) { (spectrumBuilder as IProgress).Progress = this.Progress; } isbr = spectrumBuilder.Build(parameterFile); finalPeptides = isbr.Spectra; } else { Progress.SetMessage("Reading peptides from {0} ...", options.PeptideFile); finalPeptides = new MascotPeptideTextFormat().ReadFromFile(options.PeptideFile); conf.SavePeptidesFile = false; isbr = null; } CalculateIsoelectricPoint(finalPeptides); //如果需要通过蛋白质注释去除contamination,首先需要在肽段水平删除 if (conf.Database.HasContaminationDescriptionFilter() && (conf.FalseDiscoveryRate.FdrLevel != FalseDiscoveryRateLevel.Protein)) { Progress.SetMessage("Removing contamination by description ..."); var notConGroupFilter = conf.Database.GetNotContaminationDescriptionFilter(Progress); var tempResultBuilder = new IdentifiedResultBuilder(null, null); while (true) { List <IIdentifiedProtein> proteins = proteinBuilder.Build(finalPeptides); List <IIdentifiedProteinGroup> groups = groupBuilder.Build(proteins); IIdentifiedResult tmpResult = tempResultBuilder.Build(groups); HashSet <IIdentifiedSpectrum> notConSpectra = new HashSet <IIdentifiedSpectrum>(); foreach (var group in tmpResult) { if (notConGroupFilter.Accept(group)) { notConSpectra.UnionWith(group[0].GetSpectra()); } } if (notConSpectra.Count == finalPeptides.Count) { break; } finalPeptides = notConSpectra.ToList(); } } if (conf.FalseDiscoveryRate.FilterOneHitWonder && conf.FalseDiscoveryRate.MinOneHitWonderPeptideCount > 1) { Progress.SetMessage("Filtering single wonders ..."); var proteinFilter = new IdentifiedProteinSingleWonderPeptideCountFilter(conf.FalseDiscoveryRate.MinOneHitWonderPeptideCount); List <IIdentifiedProtein> proteins = proteinBuilder.Build(finalPeptides); int oldProteinCount = proteins.Count; proteins.RemoveAll(l => !proteinFilter.Accept(l)); if (oldProteinCount != proteins.Count) { HashSet <IIdentifiedSpectrum> newspectra = new HashSet <IIdentifiedSpectrum>(); foreach (var protein in proteins) { newspectra.UnionWith(protein.GetSpectra()); } finalPeptides = newspectra.ToList(); } } //if (conf.SavePeptidesFile && !(conf.FalseDiscoveryRate.FilterOneHitWonder && conf.FalseDiscoveryRate.MinOneHitWonderPeptideCount > 1)) if (conf.SavePeptidesFile) { if (conf.Database.RemovePeptideFromDecoyDB) { DecoyPeptideBuilder.AssignDecoy(finalPeptides, conf.GetDecoySpectrumFilter()); for (int i = finalPeptides.Count - 1; i >= 0; i--) { if (finalPeptides[i].FromDecoy) { finalPeptides.RemoveAt(i); } } } finalPeptides.Sort(); //保存肽段文件 IFileFormat <List <IIdentifiedSpectrum> > peptideFormat = conf.GetIdentifiedSpectrumFormat(); string peptideFile = FileUtils.ChangeExtension(parameterFile, ".peptides"); Progress.SetMessage("Writing peptides file..."); peptideFormat.WriteToFile(peptideFile, finalPeptides); result.Add(peptideFile); if (!conf.FalseDiscoveryRate.FilterByFdr && conf.Database.DecoyPatternDefined) { WriteFdrFile(parameterFile, conf, finalPeptides); } Progress.SetMessage("Calculating precursor offset..."); result.AddRange(new PrecursorOffsetCalculator(finalPeptides).Process(peptideFile)); } Progress.SetMessage("Building protein..."); //构建蛋白质列表 List <IIdentifiedProtein> finalProteins = proteinBuilder.Build(finalPeptides); Progress.SetMessage("Building protein group..."); //构建蛋白质群列表 List <IIdentifiedProteinGroup> finalGroups = groupBuilder.Build(finalProteins); if (conf.Database.HasContaminationDescriptionFilter()) { var notConGroupFilter = conf.Database.GetNotContaminationDescriptionFilter(Progress); for (int i = finalGroups.Count - 1; i >= 0; i--) { if (!notConGroupFilter.Accept(finalGroups[i])) { finalGroups.RemoveAt(i); } } } //构建最终鉴定结果 var resultBuilder = conf.GetIdentifiedResultBuilder(); resultBuilder.Progress = Progress; IIdentifiedResult finalResult = resultBuilder.Build(finalGroups); finalResult.BuildGroupIndex(); if (conf.FalseDiscoveryRate.FilterByFdr) { var decoyGroupFilter = conf.GetDecoyGroupFilter(); foreach (var group in finalResult) { group.FromDecoy = decoyGroupFilter.Accept(group); foreach (var protein in group) { protein.FromDecoy = group.FromDecoy; } } finalResult.ProteinFDR = conf.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator().Calculate(finalResult.Count(l => l[0].FromDecoy), finalResult.Count(l => !l[0].FromDecoy)); } CalculateIsoelectricPoint(finalResult.GetProteins()); if (isbr != null) { finalResult.PeptideFDR = isbr.PeptideFDR; } //保存非冗余蛋白质列表文件 var resultFormat = conf.GetIdetifiedResultFormat(finalResult, this.Progress); string noredundantFile = FileUtils.ChangeExtension(parameterFile, ".noredundant"); Progress.SetMessage("Writing noredundant file..."); resultFormat.WriteToFile(noredundantFile, finalResult); result.Add(noredundantFile); Progress.SetMessage("Finished!"); }