Exemple #1
0
        public static BuildSummaryOptions LoadFromFile(string fileName)
        {
            BuildSummaryOptions result = new BuildSummaryOptions();

            LoadFromFile(fileName, result);
            return(result);
        }
Exemple #2
0
        public IdentifiedSpectrumBuilderResult Build(string parameterFile)
        {
            Options = new BuildSummaryOptions(parameterFile);
            Options.DatasetList.RemoveDisabled();

            return(DoBuild(parameterFile));
        }
        public virtual void SaveDatasetList <T>(BuildSummaryOptions conf) where T : IDatasetOptions, new()
        {
            Dictionary <string, IDatasetOptions> dsmap = new Dictionary <string, IDatasetOptions>();

            foreach (ListViewItem item in this.lvDatFiles.Items)
            {
                if (item.Selected)
                {
                    var key = string.Empty;
                    if (item.SubItems.Count >= 3)
                    {
                        key = item.SubItems[2].Text;
                    }

                    if (!dsmap.ContainsKey(key))
                    {
                        var dsoptions = new T();
                        dsoptions.Name   = key;
                        dsoptions.Parent = conf;

                        dsmap[key] = dsoptions;
                        conf.DatasetList.Add(dsoptions);
                    }
                    dsmap[key].PathNames.Add(item.SubItems[0].Text);
                }
            }
        }
        public IdentifiedSpectrumBuilderResult Build(string parameterFile)
        {
            Options = new BuildSummaryOptions(parameterFile);
            Options.DatasetList.RemoveDisabled();

            IIdentifiedProteinBuilder      proteinBuilder = new IdentifiedProteinBuilder();
            IIdentifiedProteinGroupBuilder groupBuilder   = new IdentifiedProteinGroupBuilder();

            var fdrCalc = Options.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator();

            BuildResult = new DatasetList();

            //从配置进行初始化
            BuildResult.InitFromOptions(Options.DatasetList, this.Progress, parameterFile);

            var totalCount = BuildResult.GetOptimalSpectrumCount();

            string optimalResultFile = FileUtils.ChangeExtension(parameterFile, ".optimal");

            using (var sw = new StreamWriter(optimalResultFile))
            {
                new OptimalFileTextWriter().WriteToStream(sw, BuildResult);

                UniformProteinFdrOptimalResultCalculator proteinCalc = new UniformProteinFdrOptimalResultCalculator(fdrCalc, Options.GetDecoyGroupFilter())
                {
                    Progress = this.Progress
                };

                Progress.SetMessage("Filtering PSMs by protein fdr {0}, using peptide fdr {1}...", Options.FalseDiscoveryRate.FdrValue, Options.FalseDiscoveryRate.MaxPeptideFdr);

                var groupFilter = Options.FalseDiscoveryRate.FilterOneHitWonder ? new IdentifiedProteinGroupSingleWonderPeptideCountFilter(Options.FalseDiscoveryRate.MinOneHitWonderPeptideCount) : null;
                var ret         = proteinCalc.GetOptimalResultForGroupFilter(BuildResult, Options.FalseDiscoveryRate.MaxPeptideFdr, Options.FalseDiscoveryRate.FdrValue, groupFilter);

                //只保留通过筛选的蛋白质包含的PSMs。
                BuildResult.KeepOptimalResultInSetOnly(ret.AcceptedSpectra);

                GC.Collect();
                GC.WaitForPendingFinalizers();

                sw.WriteLine("After SimpleProteinFDR filter {0} with condition {1}, required peptide fdr = {2} ", ret.ProteinFdr, ret.ProteinCondition, ret.PeptideFdr);
                BuildResult.ForEach(ds =>
                {
                    sw.WriteLine("Dataset {0}", ds.Options.Name);
                    OptimalResultConditionUtils.WriteSpectrumBin(sw, ds, f1, f2);
                });

                //sw.WriteLine();
                //new OptimalFileTextWriter().WriteToStream(sw, BuildResult);

                return(new IdentifiedSpectrumBuilderResult()
                {
                    Spectra = ret.AcceptedSpectra.ToList(),
                    PeptideFDR = ret.PeptideFdr,
                    ProteinFDR = ret.ProteinFdr
                });
            }
        }
 public virtual void LoadDatasetList <T>(BuildSummaryOptions options) where T : IDatasetOptions
 {
     this.lvDatFiles.Items.Clear();
     foreach (T dataset in options.DatasetList)
     {
         foreach (var file in dataset.PathNames)
         {
             ListViewItem item = this.lvDatFiles.Items.Add(file);
             item.SubItems.Add("");
             item.SubItems.Add(dataset.Name);
             item.Selected = true;
         }
     }
 }
Exemple #6
0
        public static void LoadFromFile(string fileName, BuildSummaryOptions result)
        {
            XElement root = XElement.Load(fileName);

            var engineNode = root.Element("SearchEngine");

            if (engineNode != null)
            {
                var engine           = engineNode.Value;
                SearchEngineType set = EnumUtils.StringToEnum(engine, SearchEngineType.Unknown);

                AbstractSummaryConfiguration conf;
                switch (set)
                {
                case SearchEngineType.Unknown:
                    throw new ArgumentException("Unknown search engine " + engine);

                case SearchEngineType.MASCOT:
                    conf = new MascotDatSummaryConfiguration("");
                    break;

                case SearchEngineType.SEQUEST:
                    conf = new SequestSummaryConfiguration("");
                    break;

                case SearchEngineType.XTandem:
                    conf = new XTandemXmlSummaryConfiguration("");
                    break;

                case SearchEngineType.PFind:
                    conf = new PFindSummaryConfiguration("");
                    break;

                //case SearchEngineType.PEPTIDEPHOPHET:
                //  op = new PeptideProphetDatasetOptions();
                //  break;
                default:
                    throw new ArgumentException("It's not defined that how to get SummaryConfiguration for search engine " + engine);
                }

                conf.LoadFromFile(fileName);
                conf.ConvertTo(result);
            }
            else
            {
                result.LoadFromFile(fileName);
            }
        }
Exemple #7
0
        public IdentifiedSpectrumBuilderResult Build(string parameterFile)
        {
            Options = new BuildSummaryOptions(parameterFile);
            Options.DatasetList.RemoveDisabled();

            IIdentifiedProteinGroupFilter conFilter = Options.Database.GetNotContaminationDescriptionFilter(this.Progress);

            var fdrCalc = Options.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator();

            BuildResult = new DatasetList();

            //从配置进行初始化
            BuildResult.InitFromOptions(Options.DatasetList, this.Progress, parameterFile);

            var totalCount = BuildResult.GetOptimalSpectrumCount();

            string optimalResultFile = FileUtils.ChangeExtension(parameterFile, ".optimal");

            using (var sw = new StreamWriter(optimalResultFile))
            {
                new OptimalFileTextWriter().WriteToStream(sw, BuildResult);

                UniformProteinFdrOptimalResultCalculator proteinCalc = new UniformProteinFdrOptimalResultCalculator(fdrCalc, Options.GetDecoyGroupFilter())
                {
                    Progress = this.Progress
                };

                sw.WriteLine(OptimalFilteredItem.GetHeader());

                var uniqueFilter = new IdentifiedProteinGroupUniquePeptideCountFilter(2);

                OptimalFilteredItem finalItem = null;

                List <IIdentifiedSpectrum> allSpectrum = Options.PeptideRetrieval ? BuildResult.GetSpectra() : null;

                int    fdrPeptideCount = Options.FalseDiscoveryRate.FdrPeptideCount > 2 ? Options.FalseDiscoveryRate.FdrPeptideCount : 2;
                double firstStepFdr    = Options.FalseDiscoveryRate.MaxPeptideFdr;
                bool   bFirst          = true;
                for (int curPeptideCount = fdrPeptideCount; curPeptideCount >= 2; curPeptideCount--)
                {
                    //重新根据保留的Spectra构建SpectrumBin。
                    if (!bFirst)
                    {
                        BuildResult.BuildSpectrumBin();
                    }
                    bFirst = false;

                    var curItem = new OptimalFilteredItem();

                    IIdentifiedProteinGroupFilter groupFilter;

                    bool bNeedFirstStep = curPeptideCount > 2;
                    if (bNeedFirstStep)
                    {
                        Progress.SetMessage("Filtering PSMs by protein fdr {0}, unique peptide count >= 2 and peptide count >= {1} using peptide fdr {2}...", Options.FalseDiscoveryRate.FdrValue, curPeptideCount, firstStepFdr);

                        //第一步,根据UniquePeptideCount和PeptideCount进行筛选,得到满足蛋白质Fdr要求所对应的肽段fdr。
                        var countFilter = new IdentifiedProteinGroupPeptideCountFilter(curPeptideCount);

                        if (conFilter != null)
                        {
                            groupFilter = new AndIdentifiedProteinGroupFilter(new IIdentifiedProteinGroupFilter[] { conFilter, uniqueFilter, countFilter });
                        }
                        else
                        {
                            groupFilter = new AndIdentifiedProteinGroupFilter(new IIdentifiedProteinGroupFilter[] { uniqueFilter, countFilter });
                        }

                        curItem.Unique2CountResult = proteinCalc.GetOptimalResultForGroupFilter(BuildResult, firstStepFdr, Options.FalseDiscoveryRate.FdrValue, groupFilter);
                        firstStepFdr = curItem.Unique2CountResult.PeptideFdr;

                        //只保留没有被通过筛选的蛋白质包含的PSMs。
                        BuildResult.KeepOptimalResultInSetOnly(new HashSet <IIdentifiedSpectrum>(curItem.Unique2CountResult.RejectedSpectra));

                        GC.Collect();
                        GC.WaitForPendingFinalizers();
                    }
                    else
                    {
                        curItem.Unique2CountResult = new ProteinFdrFilteredItem();
                    }

                    Progress.SetMessage("Filtering PSMs by protein fdr {0}, unique peptide count >= 2 using peptide fdr {1}...", Options.FalseDiscoveryRate.FdrValue, firstStepFdr);

                    //第二步,根据UniquePeptideCount进行筛选,计算得到满足给定蛋白质fdr的结果。
                    double secondStepFdr = bNeedFirstStep ? Options.FalseDiscoveryRate.MaxPeptideFdr : firstStepFdr;

                    if (conFilter != null)
                    {
                        groupFilter = new AndIdentifiedProteinGroupFilter(new IIdentifiedProteinGroupFilter[] { conFilter, uniqueFilter });
                    }
                    else
                    {
                        groupFilter = uniqueFilter;
                    }

                    curItem.Unique2Result = proteinCalc.GetOptimalResultForGroupFilter(BuildResult, secondStepFdr, Options.FalseDiscoveryRate.FdrValue, groupFilter);

                    //只保留没有被通过筛选的蛋白质包含的PSMs。
                    BuildResult.KeepOptimalResultInSetOnly(new HashSet <IIdentifiedSpectrum>(curItem.Unique2Result.RejectedSpectra));
                    GC.Collect();
                    GC.WaitForPendingFinalizers();

                    curItem.Unique1Result = FilterOneHitWonders(conFilter, proteinCalc);

                    GC.Collect();
                    GC.WaitForPendingFinalizers();

                    sw.WriteLine(curItem.ToString());

                    if (finalItem == null || finalItem.TotalProteinCount < curItem.TotalProteinCount)
                    {
                        finalItem = curItem;
                    }

                    curItem = null;

                    GC.Collect();
                    GC.WaitForPendingFinalizers();

                    Console.WriteLine(MyConvert.Format("Filtering PSMs by protein fdr {0}, unique peptide count >= 2 and peptide count >= {1} using peptide fdr {2}...cost {3}.", Options.FalseDiscoveryRate.FdrValue, curPeptideCount, firstStepFdr, SystemUtils.CostMemory()));
                }

                Progress.SetMessage("Filtering PSMs by protein fdr {0} finished, free memory...", Options.FalseDiscoveryRate.FdrValue);

                if (finalItem != null)
                {
                    sw.WriteLine();
                    sw.WriteLine("Final result : ");

                    WriteScoreMap(sw, BuildResult, finalItem.Unique2CountResult);
                    WriteScoreMap(sw, BuildResult, finalItem.Unique2Result);
                    WriteScoreMap(sw, BuildResult, finalItem.Unique1Result);

                    var finalSpectra = finalItem.GetSpectra();
                    if (Options.PeptideRetrieval)
                    {
                        Progress.SetMessage("Retrivaling peptides passed maximum peptide FDR for proteins passed protein FDR...");
                        var proteinBuilder = new IdentifiedProteinBuilder();
                        var groupBuilder   = new IdentifiedProteinGroupBuilder();
                        List <IIdentifiedProtein>      proteins = proteinBuilder.Build(finalSpectra);
                        List <IIdentifiedProteinGroup> groups   = groupBuilder.Build(proteins);

                        var proteinMap = new Dictionary <string, IIdentifiedProteinGroup>();
                        foreach (var g in groups)
                        {
                            foreach (var p in g)
                            {
                                proteinMap[p.Name] = g;
                            }
                        }

                        var savedSpectra = new HashSet <IIdentifiedSpectrum>(finalItem.GetSpectra());
                        foreach (var spectrum in allSpectrum)
                        {
                            if (savedSpectra.Contains(spectrum))
                            {
                                continue;
                            }

                            var pgs = new HashSet <IIdentifiedProteinGroup>();
                            foreach (var protein in spectrum.Proteins)
                            {
                                IIdentifiedProteinGroup pg;
                                if (proteinMap.TryGetValue(protein, out pg))
                                {
                                    pgs.Add(pg);
                                }
                            }

                            //if the spectrum doesn't map to protein passed FDR filter, ignore
                            //if the spectrum maps to multiple groups, ignore
                            if (pgs.Count == 0 || pgs.Count > 1)
                            {
                                continue;
                            }

                            //The spectrum should map to all proteins in the group
                            if (pgs.First().All(l => spectrum.Proteins.Contains(l.Name)))
                            {
                                finalSpectra.Add(spectrum);
                            }
                        }
                    }

                    BuildResult.ClearSpectra();
                    GC.Collect();
                    GC.WaitForPendingFinalizers();

                    return(new IdentifiedSpectrumBuilderResult()
                    {
                        Spectra = finalSpectra,
                        PeptideFDR = finalItem.Unique2Result.PeptideFdr,
                        ProteinFDR = Options.FalseDiscoveryRate.FdrValue
                    });
                }
                else
                {
                    return(new IdentifiedSpectrumBuilderResult()
                    {
                        Spectra = new List <IIdentifiedSpectrum>()
                    });
                }
            }
        }