public static DatasetListOptions LoadOptions(XElement parentNode) { var result = new DatasetListOptions(); result.Load(parentNode); return(result); }
public BuildSummaryOptions() { MergeResult = true; KeepTopPeptideFromSameEngineButDifferentSearchParameters = true; SavePeptidesFile = true; IsSemiPeptideConsiderAsUnique = true; Database = new DatabaseOptions(); FalseDiscoveryRate = new FalseDiscoveryRateOptions(); Classification = new ClassificationOptions(); PeptideFilter = new PeptideFilterOptions(); DatasetList = new DatasetListOptions(); ConflictType = ResolveSearchEngineConflictTypeFactory.QValue; MinimumEngineAgreeCount = 1; PeptideRetrieval = false; }
public void InitFromOptions(DatasetListOptions dsOptions, IProgressCallback progress, string paramFile) { this.Clear(); this.conflictFunc = dsOptions.Options.GetConflictFunc(); this.fdrCalc = dsOptions.Options.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator(); IFilter <IIdentifiedSpectrum> decoyFilter = null; if (dsOptions.Options.FalseDiscoveryRate.FilterByFdr) { decoyFilter = dsOptions.Options.GetDecoySpectrumFilter(); } this.dsOptions = dsOptions; long afterFirstMemory = 0; DateTime afterFirstTime = DateTime.Now; var totalCount = dsOptions.Sum(l => l.PathNames.Count); var usedCount = 0; for (int i = 0; i < dsOptions.Count; i++) { var m = dsOptions[i]; var builder = m.GetBuilder(); builder.Progress = progress; Dataset ds = new Dataset(m); //首先,获取所有通过了固定筛选标准的谱图。 ds.Spectra = builder.ParseFromSearchResult(); ds.PSMPassedFixedCriteriaCount = ds.Spectra.Count; if (dsOptions.Options.FalseDiscoveryRate.FilterByFdr) { //对每个谱图设置是否来自诱饵库 progress.SetMessage("Assigning decoy information..."); DecoyPeptideBuilder.AssignDecoy(ds.Spectra, decoyFilter); var decoyCount = ds.Spectra.Count(l => l.FromDecoy); if (decoyCount == 0) { throw new Exception(string.Format("No decoy protein found at dataset {0}, make sure the protein access number parser and the decoy pattern are correctly defined!", m.Name)); } progress.SetMessage("{0} decoys out of {1} hits found", decoyCount, ds.Spectra.Count); ds.BuildSpectrumBin(); ds.CalculateCurrentFdr(); ds.PushCurrentOptimalResults(string.Format("Before maximum peptide fdr {0}", dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr)); progress.SetMessage("Filtering by maximum peptide fdr {0} ...", dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr); ds.FilterByFdr(dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr); ds.Spectra = ds.GetUnconflictedOptimalSpectra(); ds.BuildSpectrumBin(); ds.CalculateCurrentFdr(); ds.PushCurrentOptimalResults(string.Format("After maximum peptide fdr {0}", dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr)); } this.Add(ds); if (i == 0) { afterFirstMemory = Process.GetCurrentProcess().WorkingSet64 / (1024 * 1024); afterFirstTime = DateTime.Now; } else { usedCount += m.PathNames.Count; long currMemory = Process.GetCurrentProcess().WorkingSet64 / (1024 * 1024); double averageCost = (double)(currMemory - afterFirstMemory) / usedCount; double estimatedCost = afterFirstMemory + averageCost * totalCount; DateTime currTime = DateTime.Now; var averageTime = currTime.Subtract(afterFirstTime).TotalMinutes / usedCount; var finishTime = afterFirstTime.AddMinutes(averageTime * (totalCount - dsOptions[0].PathNames.Count)); progress.SetMessage("{0}/{1} datasets, cost {2}M, avg {3:0.0}M, need {4:0.0}M, will finish at {5:MM-dd HH:mm:ss}", (i + 1), dsOptions.Count, currMemory, averageCost, estimatedCost, finishTime); } } //初始化实验列表 this.ForEach(m => m.InitExperimentals()); if (dsOptions.Count > 1) { if (dsOptions.Options.KeepTopPeptideFromSameEngineButDifferentSearchParameters) { //合并/删除那些相同搜索引擎,不同参数得到的结果。 ProcessDatasetFromSameEngine(progress, (peptides, score) => IdentifiedSpectrumUtils.KeepTopPeptideFromSameEngineDifferentParameters(peptides, score), false); } else { ProcessDatasetFromSameEngine(progress, (peptides, score) => IdentifiedSpectrumUtils.KeepUnconflictPeptidesFromSameEngineDifferentParameters(peptides, score), true); } //初始化不同搜索引擎搜索的dataset之间的overlap关系。 this.OverlapBySearchEngine = FindOverlap((m1, m2) => m1.Options.SearchEngine != m2.Options.SearchEngine); //初始化没有交集的dataset var overlaps = new HashSet <Dataset>(from m in OverlapBySearchEngine from s in m select s); this.NoOverlaps = this.Where(m => !overlaps.Contains(m)).ToList(); if (OverlapBySearchEngine.Count > 0 && dsOptions.Options.FalseDiscoveryRate.FilterByFdr) { //根据最大的fdr进行筛选。 progress.SetMessage("Filtering PSMs by maximum fdr {0}, considering multiple engine overlap...", dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr); var realFdr = this.FilterByFdr(dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr); if (realFdr.ConflictSpectra.Count > 0) { new MascotPeptideTextFormat(UniformHeader.PEPTIDE_HEADER).WriteToFile(Path.ChangeExtension(paramFile, ".conflicted.peps"), realFdr.ConflictSpectra); } //保留每个dataset的spectra为筛选后的结果,以用于后面的迭代。 this.ForEach(m => { m.Spectra = m.GetUnconflictedOptimalSpectra(); }); } } else { this.NoOverlaps = new List <Dataset>(this); this.OverlapBySearchEngine = new List <List <Dataset> >(); } }