public static BuildSummaryOptions LoadFromFile(string fileName) { BuildSummaryOptions result = new BuildSummaryOptions(); LoadFromFile(fileName, result); return(result); }
public IdentifiedSpectrumBuilderResult Build(string parameterFile) { Options = new BuildSummaryOptions(parameterFile); Options.DatasetList.RemoveDisabled(); return(DoBuild(parameterFile)); }
public virtual void SaveDatasetList <T>(BuildSummaryOptions conf) where T : IDatasetOptions, new() { Dictionary <string, IDatasetOptions> dsmap = new Dictionary <string, IDatasetOptions>(); foreach (ListViewItem item in this.lvDatFiles.Items) { if (item.Selected) { var key = string.Empty; if (item.SubItems.Count >= 3) { key = item.SubItems[2].Text; } if (!dsmap.ContainsKey(key)) { var dsoptions = new T(); dsoptions.Name = key; dsoptions.Parent = conf; dsmap[key] = dsoptions; conf.DatasetList.Add(dsoptions); } dsmap[key].PathNames.Add(item.SubItems[0].Text); } } }
public IdentifiedSpectrumBuilderResult Build(string parameterFile) { Options = new BuildSummaryOptions(parameterFile); Options.DatasetList.RemoveDisabled(); IIdentifiedProteinBuilder proteinBuilder = new IdentifiedProteinBuilder(); IIdentifiedProteinGroupBuilder groupBuilder = new IdentifiedProteinGroupBuilder(); var fdrCalc = Options.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator(); BuildResult = new DatasetList(); //从配置进行初始化 BuildResult.InitFromOptions(Options.DatasetList, this.Progress, parameterFile); var totalCount = BuildResult.GetOptimalSpectrumCount(); string optimalResultFile = FileUtils.ChangeExtension(parameterFile, ".optimal"); using (var sw = new StreamWriter(optimalResultFile)) { new OptimalFileTextWriter().WriteToStream(sw, BuildResult); UniformProteinFdrOptimalResultCalculator proteinCalc = new UniformProteinFdrOptimalResultCalculator(fdrCalc, Options.GetDecoyGroupFilter()) { Progress = this.Progress }; Progress.SetMessage("Filtering PSMs by protein fdr {0}, using peptide fdr {1}...", Options.FalseDiscoveryRate.FdrValue, Options.FalseDiscoveryRate.MaxPeptideFdr); var groupFilter = Options.FalseDiscoveryRate.FilterOneHitWonder ? new IdentifiedProteinGroupSingleWonderPeptideCountFilter(Options.FalseDiscoveryRate.MinOneHitWonderPeptideCount) : null; var ret = proteinCalc.GetOptimalResultForGroupFilter(BuildResult, Options.FalseDiscoveryRate.MaxPeptideFdr, Options.FalseDiscoveryRate.FdrValue, groupFilter); //只保留通过筛选的蛋白质包含的PSMs。 BuildResult.KeepOptimalResultInSetOnly(ret.AcceptedSpectra); GC.Collect(); GC.WaitForPendingFinalizers(); sw.WriteLine("After SimpleProteinFDR filter {0} with condition {1}, required peptide fdr = {2} ", ret.ProteinFdr, ret.ProteinCondition, ret.PeptideFdr); BuildResult.ForEach(ds => { sw.WriteLine("Dataset {0}", ds.Options.Name); OptimalResultConditionUtils.WriteSpectrumBin(sw, ds, f1, f2); }); //sw.WriteLine(); //new OptimalFileTextWriter().WriteToStream(sw, BuildResult); return(new IdentifiedSpectrumBuilderResult() { Spectra = ret.AcceptedSpectra.ToList(), PeptideFDR = ret.PeptideFdr, ProteinFDR = ret.ProteinFdr }); } }
public virtual void LoadDatasetList <T>(BuildSummaryOptions options) where T : IDatasetOptions { this.lvDatFiles.Items.Clear(); foreach (T dataset in options.DatasetList) { foreach (var file in dataset.PathNames) { ListViewItem item = this.lvDatFiles.Items.Add(file); item.SubItems.Add(""); item.SubItems.Add(dataset.Name); item.Selected = true; } } }
public static void LoadFromFile(string fileName, BuildSummaryOptions result) { XElement root = XElement.Load(fileName); var engineNode = root.Element("SearchEngine"); if (engineNode != null) { var engine = engineNode.Value; SearchEngineType set = EnumUtils.StringToEnum(engine, SearchEngineType.Unknown); AbstractSummaryConfiguration conf; switch (set) { case SearchEngineType.Unknown: throw new ArgumentException("Unknown search engine " + engine); case SearchEngineType.MASCOT: conf = new MascotDatSummaryConfiguration(""); break; case SearchEngineType.SEQUEST: conf = new SequestSummaryConfiguration(""); break; case SearchEngineType.XTandem: conf = new XTandemXmlSummaryConfiguration(""); break; case SearchEngineType.PFind: conf = new PFindSummaryConfiguration(""); break; //case SearchEngineType.PEPTIDEPHOPHET: // op = new PeptideProphetDatasetOptions(); // break; default: throw new ArgumentException("It's not defined that how to get SummaryConfiguration for search engine " + engine); } conf.LoadFromFile(fileName); conf.ConvertTo(result); } else { result.LoadFromFile(fileName); } }
public IdentifiedSpectrumBuilderResult Build(string parameterFile) { Options = new BuildSummaryOptions(parameterFile); Options.DatasetList.RemoveDisabled(); IIdentifiedProteinGroupFilter conFilter = Options.Database.GetNotContaminationDescriptionFilter(this.Progress); var fdrCalc = Options.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator(); BuildResult = new DatasetList(); //从配置进行初始化 BuildResult.InitFromOptions(Options.DatasetList, this.Progress, parameterFile); var totalCount = BuildResult.GetOptimalSpectrumCount(); string optimalResultFile = FileUtils.ChangeExtension(parameterFile, ".optimal"); using (var sw = new StreamWriter(optimalResultFile)) { new OptimalFileTextWriter().WriteToStream(sw, BuildResult); UniformProteinFdrOptimalResultCalculator proteinCalc = new UniformProteinFdrOptimalResultCalculator(fdrCalc, Options.GetDecoyGroupFilter()) { Progress = this.Progress }; sw.WriteLine(OptimalFilteredItem.GetHeader()); var uniqueFilter = new IdentifiedProteinGroupUniquePeptideCountFilter(2); OptimalFilteredItem finalItem = null; List <IIdentifiedSpectrum> allSpectrum = Options.PeptideRetrieval ? BuildResult.GetSpectra() : null; int fdrPeptideCount = Options.FalseDiscoveryRate.FdrPeptideCount > 2 ? Options.FalseDiscoveryRate.FdrPeptideCount : 2; double firstStepFdr = Options.FalseDiscoveryRate.MaxPeptideFdr; bool bFirst = true; for (int curPeptideCount = fdrPeptideCount; curPeptideCount >= 2; curPeptideCount--) { //重新根据保留的Spectra构建SpectrumBin。 if (!bFirst) { BuildResult.BuildSpectrumBin(); } bFirst = false; var curItem = new OptimalFilteredItem(); IIdentifiedProteinGroupFilter groupFilter; bool bNeedFirstStep = curPeptideCount > 2; if (bNeedFirstStep) { Progress.SetMessage("Filtering PSMs by protein fdr {0}, unique peptide count >= 2 and peptide count >= {1} using peptide fdr {2}...", Options.FalseDiscoveryRate.FdrValue, curPeptideCount, firstStepFdr); //第一步,根据UniquePeptideCount和PeptideCount进行筛选,得到满足蛋白质Fdr要求所对应的肽段fdr。 var countFilter = new IdentifiedProteinGroupPeptideCountFilter(curPeptideCount); if (conFilter != null) { groupFilter = new AndIdentifiedProteinGroupFilter(new IIdentifiedProteinGroupFilter[] { conFilter, uniqueFilter, countFilter }); } else { groupFilter = new AndIdentifiedProteinGroupFilter(new IIdentifiedProteinGroupFilter[] { uniqueFilter, countFilter }); } curItem.Unique2CountResult = proteinCalc.GetOptimalResultForGroupFilter(BuildResult, firstStepFdr, Options.FalseDiscoveryRate.FdrValue, groupFilter); firstStepFdr = curItem.Unique2CountResult.PeptideFdr; //只保留没有被通过筛选的蛋白质包含的PSMs。 BuildResult.KeepOptimalResultInSetOnly(new HashSet <IIdentifiedSpectrum>(curItem.Unique2CountResult.RejectedSpectra)); GC.Collect(); GC.WaitForPendingFinalizers(); } else { curItem.Unique2CountResult = new ProteinFdrFilteredItem(); } Progress.SetMessage("Filtering PSMs by protein fdr {0}, unique peptide count >= 2 using peptide fdr {1}...", Options.FalseDiscoveryRate.FdrValue, firstStepFdr); //第二步,根据UniquePeptideCount进行筛选,计算得到满足给定蛋白质fdr的结果。 double secondStepFdr = bNeedFirstStep ? Options.FalseDiscoveryRate.MaxPeptideFdr : firstStepFdr; if (conFilter != null) { groupFilter = new AndIdentifiedProteinGroupFilter(new IIdentifiedProteinGroupFilter[] { conFilter, uniqueFilter }); } else { groupFilter = uniqueFilter; } curItem.Unique2Result = proteinCalc.GetOptimalResultForGroupFilter(BuildResult, secondStepFdr, Options.FalseDiscoveryRate.FdrValue, groupFilter); //只保留没有被通过筛选的蛋白质包含的PSMs。 BuildResult.KeepOptimalResultInSetOnly(new HashSet <IIdentifiedSpectrum>(curItem.Unique2Result.RejectedSpectra)); GC.Collect(); GC.WaitForPendingFinalizers(); curItem.Unique1Result = FilterOneHitWonders(conFilter, proteinCalc); GC.Collect(); GC.WaitForPendingFinalizers(); sw.WriteLine(curItem.ToString()); if (finalItem == null || finalItem.TotalProteinCount < curItem.TotalProteinCount) { finalItem = curItem; } curItem = null; GC.Collect(); GC.WaitForPendingFinalizers(); Console.WriteLine(MyConvert.Format("Filtering PSMs by protein fdr {0}, unique peptide count >= 2 and peptide count >= {1} using peptide fdr {2}...cost {3}.", Options.FalseDiscoveryRate.FdrValue, curPeptideCount, firstStepFdr, SystemUtils.CostMemory())); } Progress.SetMessage("Filtering PSMs by protein fdr {0} finished, free memory...", Options.FalseDiscoveryRate.FdrValue); if (finalItem != null) { sw.WriteLine(); sw.WriteLine("Final result : "); WriteScoreMap(sw, BuildResult, finalItem.Unique2CountResult); WriteScoreMap(sw, BuildResult, finalItem.Unique2Result); WriteScoreMap(sw, BuildResult, finalItem.Unique1Result); var finalSpectra = finalItem.GetSpectra(); if (Options.PeptideRetrieval) { Progress.SetMessage("Retrivaling peptides passed maximum peptide FDR for proteins passed protein FDR..."); var proteinBuilder = new IdentifiedProteinBuilder(); var groupBuilder = new IdentifiedProteinGroupBuilder(); List <IIdentifiedProtein> proteins = proteinBuilder.Build(finalSpectra); List <IIdentifiedProteinGroup> groups = groupBuilder.Build(proteins); var proteinMap = new Dictionary <string, IIdentifiedProteinGroup>(); foreach (var g in groups) { foreach (var p in g) { proteinMap[p.Name] = g; } } var savedSpectra = new HashSet <IIdentifiedSpectrum>(finalItem.GetSpectra()); foreach (var spectrum in allSpectrum) { if (savedSpectra.Contains(spectrum)) { continue; } var pgs = new HashSet <IIdentifiedProteinGroup>(); foreach (var protein in spectrum.Proteins) { IIdentifiedProteinGroup pg; if (proteinMap.TryGetValue(protein, out pg)) { pgs.Add(pg); } } //if the spectrum doesn't map to protein passed FDR filter, ignore //if the spectrum maps to multiple groups, ignore if (pgs.Count == 0 || pgs.Count > 1) { continue; } //The spectrum should map to all proteins in the group if (pgs.First().All(l => spectrum.Proteins.Contains(l.Name))) { finalSpectra.Add(spectrum); } } } BuildResult.ClearSpectra(); GC.Collect(); GC.WaitForPendingFinalizers(); return(new IdentifiedSpectrumBuilderResult() { Spectra = finalSpectra, PeptideFDR = finalItem.Unique2Result.PeptideFdr, ProteinFDR = Options.FalseDiscoveryRate.FdrValue }); } else { return(new IdentifiedSpectrumBuilderResult() { Spectra = new List <IIdentifiedSpectrum>() }); } } }