Пример #1
0
        public static DatasetListOptions LoadOptions(XElement parentNode)
        {
            var result = new DatasetListOptions();

            result.Load(parentNode);
            return(result);
        }
 public BuildSummaryOptions()
 {
     MergeResult = true;
     KeepTopPeptideFromSameEngineButDifferentSearchParameters = true;
     SavePeptidesFile = true;
     IsSemiPeptideConsiderAsUnique = true;
     Database                = new DatabaseOptions();
     FalseDiscoveryRate      = new FalseDiscoveryRateOptions();
     Classification          = new ClassificationOptions();
     PeptideFilter           = new PeptideFilterOptions();
     DatasetList             = new DatasetListOptions();
     ConflictType            = ResolveSearchEngineConflictTypeFactory.QValue;
     MinimumEngineAgreeCount = 1;
     PeptideRetrieval        = false;
 }
        public void InitFromOptions(DatasetListOptions dsOptions, IProgressCallback progress, string paramFile)
        {
            this.Clear();

            this.conflictFunc = dsOptions.Options.GetConflictFunc();

            this.fdrCalc = dsOptions.Options.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator();
            IFilter <IIdentifiedSpectrum> decoyFilter = null;

            if (dsOptions.Options.FalseDiscoveryRate.FilterByFdr)
            {
                decoyFilter = dsOptions.Options.GetDecoySpectrumFilter();
            }

            this.dsOptions = dsOptions;

            long     afterFirstMemory = 0;
            DateTime afterFirstTime   = DateTime.Now;
            var      totalCount       = dsOptions.Sum(l => l.PathNames.Count);
            var      usedCount        = 0;

            for (int i = 0; i < dsOptions.Count; i++)
            {
                var m       = dsOptions[i];
                var builder = m.GetBuilder();

                builder.Progress = progress;

                Dataset ds = new Dataset(m);

                //首先,获取所有通过了固定筛选标准的谱图。
                ds.Spectra = builder.ParseFromSearchResult();
                ds.PSMPassedFixedCriteriaCount = ds.Spectra.Count;

                if (dsOptions.Options.FalseDiscoveryRate.FilterByFdr)
                {
                    //对每个谱图设置是否来自诱饵库
                    progress.SetMessage("Assigning decoy information...");
                    DecoyPeptideBuilder.AssignDecoy(ds.Spectra, decoyFilter);
                    var decoyCount = ds.Spectra.Count(l => l.FromDecoy);
                    if (decoyCount == 0)
                    {
                        throw new Exception(string.Format("No decoy protein found at dataset {0}, make sure the protein access number parser and the decoy pattern are correctly defined!", m.Name));
                    }

                    progress.SetMessage("{0} decoys out of {1} hits found", decoyCount, ds.Spectra.Count);

                    ds.BuildSpectrumBin();
                    ds.CalculateCurrentFdr();
                    ds.PushCurrentOptimalResults(string.Format("Before maximum peptide fdr {0}", dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr));

                    progress.SetMessage("Filtering by maximum peptide fdr {0} ...", dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr);
                    ds.FilterByFdr(dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr);
                    ds.Spectra = ds.GetUnconflictedOptimalSpectra();
                    ds.BuildSpectrumBin();
                    ds.CalculateCurrentFdr();
                    ds.PushCurrentOptimalResults(string.Format("After maximum peptide fdr {0}", dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr));
                }

                this.Add(ds);

                if (i == 0)
                {
                    afterFirstMemory = Process.GetCurrentProcess().WorkingSet64 / (1024 * 1024);
                    afterFirstTime   = DateTime.Now;
                }
                else
                {
                    usedCount += m.PathNames.Count;

                    long   currMemory    = Process.GetCurrentProcess().WorkingSet64 / (1024 * 1024);
                    double averageCost   = (double)(currMemory - afterFirstMemory) / usedCount;
                    double estimatedCost = afterFirstMemory + averageCost * totalCount;

                    DateTime currTime    = DateTime.Now;
                    var      averageTime = currTime.Subtract(afterFirstTime).TotalMinutes / usedCount;
                    var      finishTime  = afterFirstTime.AddMinutes(averageTime * (totalCount - dsOptions[0].PathNames.Count));
                    progress.SetMessage("{0}/{1} datasets, cost {2}M, avg {3:0.0}M, need {4:0.0}M, will finish at {5:MM-dd HH:mm:ss}", (i + 1), dsOptions.Count, currMemory, averageCost, estimatedCost, finishTime);
                }
            }

            //初始化实验列表
            this.ForEach(m => m.InitExperimentals());

            if (dsOptions.Count > 1)
            {
                if (dsOptions.Options.KeepTopPeptideFromSameEngineButDifferentSearchParameters)
                {
                    //合并/删除那些相同搜索引擎,不同参数得到的结果。
                    ProcessDatasetFromSameEngine(progress, (peptides, score) => IdentifiedSpectrumUtils.KeepTopPeptideFromSameEngineDifferentParameters(peptides, score), false);
                }
                else
                {
                    ProcessDatasetFromSameEngine(progress, (peptides, score) => IdentifiedSpectrumUtils.KeepUnconflictPeptidesFromSameEngineDifferentParameters(peptides, score), true);
                }

                //初始化不同搜索引擎搜索的dataset之间的overlap关系。
                this.OverlapBySearchEngine = FindOverlap((m1, m2) => m1.Options.SearchEngine != m2.Options.SearchEngine);


                //初始化没有交集的dataset
                var overlaps = new HashSet <Dataset>(from m in OverlapBySearchEngine
                                                     from s in m
                                                     select s);
                this.NoOverlaps = this.Where(m => !overlaps.Contains(m)).ToList();

                if (OverlapBySearchEngine.Count > 0 && dsOptions.Options.FalseDiscoveryRate.FilterByFdr)
                {
                    //根据最大的fdr进行筛选。
                    progress.SetMessage("Filtering PSMs by maximum fdr {0}, considering multiple engine overlap...", dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr);
                    var realFdr = this.FilterByFdr(dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr);
                    if (realFdr.ConflictSpectra.Count > 0)
                    {
                        new MascotPeptideTextFormat(UniformHeader.PEPTIDE_HEADER).WriteToFile(Path.ChangeExtension(paramFile, ".conflicted.peps"), realFdr.ConflictSpectra);
                    }

                    //保留每个dataset的spectra为筛选后的结果,以用于后面的迭代。
                    this.ForEach(m =>
                    {
                        m.Spectra = m.GetUnconflictedOptimalSpectra();
                    });
                }
            }
            else
            {
                this.NoOverlaps            = new List <Dataset>(this);
                this.OverlapBySearchEngine = new List <List <Dataset> >();
            }
        }