Esempio n. 1
0
        public void RemoveConflictSpectrum(List <IIdentifiedSpectrum> conflicted)
        {
            if (conflicted.Count > 0)
            {
                IFalseDiscoveryRateCalculator calc = Options.Parent.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator();

                var map = OptimalResults.ToDictionary(m => m.Condition);
                var bin = Options.Parent.Classification.BuildSpectrumBin(conflicted);
                foreach (var oi in bin)
                {
                    var oldOi = map[oi.Condition];
                    foreach (var s in oi.Spectra)
                    {
                        if (s.FromDecoy)
                        {
                            oldOi.Result.PeptideCountFromDecoyDB--;
                        }
                        else
                        {
                            oldOi.Result.PeptideCountFromTargetDB--;
                        }
                        oldOi.Spectra.Remove(s);
                    }

                    oldOi.Result.FalseDiscoveryRate = calc.Calculate(oldOi.Result.PeptideCountFromDecoyDB, oldOi.Result.PeptideCountFromTargetDB);
                }
            }
        }
Esempio n. 2
0
        public double CalculateSubsetFdr(ScoreDistribution subset, IFalseDiscoveryRateCalculator calc)
        {
            double targetCount = 0;
            double decoyCount  = 0;

            foreach (OptimalResultCondition cond in subset.Keys)
            {
                var subsetOrs = subset[cond];
                var totalOrs  = this[cond];

                var counts =
                    from s in subsetOrs
                    join t in totalOrs on s.Score equals t.Score
                    select new { TargetCount = (int)s.PeptideCountFromTargetDB, DecoyCount = (double)s.PeptideCountFromTargetDB * t.PeptideCountFromDecoyDB / t.PeptideCountFromTargetDB };

                targetCount +=
                    (from c in counts
                     select c.TargetCount).Sum();

                decoyCount +=
                    (from c in counts
                     select c.DecoyCount).Sum();
            }

            return(calc.Calculate((int)decoyCount, (int)targetCount));
        }
Esempio n. 3
0
        public void CalculateFdr(IFalseDiscoveryRateCalculator calc)
        {
            int decoy  = 0;
            int target = 0;

            HashSet <string> fileName = new HashSet <string>();

            foreach (var spectrum in this.Spectra)
            {
                if (fileName.Contains(spectrum.Query.FileScan.LongFileName))
                {
                    continue;
                }

                fileName.Add(spectrum.Query.FileScan.LongFileName);

                if (spectrum.FromDecoy)
                {
                    decoy++;
                }
                else
                {
                    target++;
                }
            }
            this.Result.PeptideCountFromDecoyDB  = decoy;
            this.Result.PeptideCountFromTargetDB = target;
            this.Result.FalseDiscoveryRate       = calc.Calculate(decoy, target);
        }
 public BuildSummaryResultParser(IFalseDiscoveryRateCalculator calc, string decoyPattern)
 {
     this.options = new BuildSummaryResultParserOptions()
     {
         TargetFDR    = calc is TargetFalseDiscoveryRateCalculator,
         DecoyPattern = decoyPattern
     };
 }
Esempio n. 5
0
        public void CalculateCurrentFdr()
        {
            IFalseDiscoveryRateCalculator calc = Options.Parent.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator();

            foreach (var item in OptimalResults)
            {
                item.CalculateFdr(calc);
            }
            CalculateToleranceScore();
        }
        public UniformProteinFdrOptimalResultCalculator(IFalseDiscoveryRateCalculator fdrCalc, IIdentifiedProteinGroupFilter decoyFilter)
        {
            this.fdrCalc = fdrCalc;

            this.decoyFilter = decoyFilter;

            this.proteinBuilder = new IdentifiedProteinBuilder();

            this.groupBuilder = new IdentifiedProteinGroupBuilder();
        }
        public virtual void InitializeQValue(List <IIdentifiedSpectrum> spectra)
        {
            IScoreFunction scoreFunctions = Options.ScoreFunction;

            CalculateQValueFunc qValueFunc = Options.Parent.FalseDiscoveryRate.GetQValueFunction();

            IFalseDiscoveryRateCalculator fdrCalc = Options.Parent.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator();

            qValueFunc(spectra, scoreFunctions, fdrCalc);
        }
Esempio n. 8
0
        public double CalculateFdr(IFalseDiscoveryRateCalculator calc)
        {
            double targetCount = 0;
            double decoyCount  = 0;

            targetCount +=
                (from ors in this.Values
                 from or in ors
                 select(int) or.PeptideCountFromTargetDB).Sum();

            decoyCount +=
                (from ors in this.Values
                 from or in ors
                 select(int) or.PeptideCountFromDecoyDB).Sum();


            return(calc.Calculate((int)decoyCount, (int)targetCount));
        }
        protected void WriteFdrFile(string parameterFile, BuildSummaryOptions conf, List <IIdentifiedSpectrum> result)
        {
            Progress.SetMessage("Calculating identified peptide false discovery rate ...");

            IFalseDiscoveryRateCalculator calc = conf.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator();

            DecoyPeptideBuilder.AssignDecoy(result, conf.GetDecoySpectrumFilter());

            int decoyCount  = 0;
            int targetCount = 0;

            foreach (IIdentifiedSpectrum mph in result)
            {
                if (mph.FromDecoy)
                {
                    decoyCount++;
                }
                else
                {
                    targetCount++;
                }
            }

            double fdr = calc.Calculate(decoyCount, targetCount);

            string optimalResultFile = FileUtils.ChangeExtension(parameterFile, ".optimal");

            using (var sw = new StreamWriter(optimalResultFile))
            {
                List <string> filters = conf.GetFilterString();
                foreach (string filter in filters)
                {
                    sw.WriteLine(filter);
                }

                sw.WriteLine("DecoyCount\t{0}", decoyCount);
                sw.WriteLine("TargetCount\t{0}", targetCount);
                sw.WriteLine("FDR\t{0:0.######}", fdr);
            }
        }
        private static double CalculateProteinFdr(List <IIdentifiedProteinGroup> groups, IIdentifiedProteinGroupFilter decoyFilter, IFalseDiscoveryRateCalculator calc, out int targetCount)
        {
            targetCount = 0;
            int decoyCount = 0;

            foreach (var group in groups)
            {
                if (decoyFilter.Accept(group))
                {
                    decoyCount++;
                }
                else
                {
                    targetCount++;
                }
            }
            return(calc.Calculate(decoyCount, targetCount));
        }
        public static IdentificationSummary Parse(string proteinFile, string defaultDecoyPattern, IFalseDiscoveryRateCalculator defaultCalc)
        {
            IdentificationSummary result = new IdentificationSummary();

            result.FileName = FileUtils.ChangeExtension(new FileInfo(proteinFile).Name, "");

            Regex decoyReg = new Regex(defaultDecoyPattern);

            IIdentifiedProteinGroupFilter decoyFilter = null;
            IFalseDiscoveryRateCalculator curCalc     = null;

            var paramFile = FileUtils.ChangeExtension(proteinFile, ".param");

            if (File.Exists(paramFile))
            {
                BuildSummaryOptions options = BuildSummaryOptionsUtils.LoadFromFile(paramFile);
                if (options.FalseDiscoveryRate.FilterByFdr)
                {
                    decoyFilter = options.GetDecoyGroupFilter();
                    curCalc     = options.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator();
                }
            }

            if (decoyFilter == null)
            {
                decoyFilter = new IdentifiedProteinGroupNameRegexFilter(defaultDecoyPattern, false);
                curCalc     = defaultCalc;
            }

            var peptideFile = FileUtils.ChangeExtension(proteinFile, ".peptides");

            if (File.Exists(peptideFile))
            {
                var peptides = new MascotPeptideTextFormat().ReadFromFile(peptideFile);

                var fullSpectra       = GetSpectraByNPT(peptides, 2);
                var fullTargetSpectra = GetTargetSpectra(decoyReg, fullSpectra);
                var semiSpectra       = GetSpectraByNPT(peptides, 1);
                var semiTargetSpectra = GetTargetSpectra(decoyReg, semiSpectra);

                result.FullSpectrumCount       = GetSpectrumCount(fullSpectra);
                result.FullTargetSpectrumCount = GetSpectrumCount(fullTargetSpectra);
                result.SemiSpectrumCount       = GetSpectrumCount(semiSpectra);
                result.SemiTargetSpectrumCount = GetSpectrumCount(semiTargetSpectra);

                result.FullPeptideCount       = IdentifiedSpectrumUtils.GetUniquePeptideCount(fullSpectra);
                result.FullTargetPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(fullTargetSpectra);
                result.SemiPeptideCount       = IdentifiedSpectrumUtils.GetUniquePeptideCount(semiSpectra);
                result.SemiTargetPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(semiTargetSpectra);

                result.FullSpectrumFdr = curCalc.Calculate(result.FullSpectrumCount - result.FullTargetSpectrumCount, result.FullTargetSpectrumCount);
                result.SemiSpectrumFdr = curCalc.Calculate(result.SemiSpectrumCount - result.SemiTargetSpectrumCount, result.SemiTargetSpectrumCount);
                result.FullPeptideFdr  = curCalc.Calculate(result.FullPeptideCount - result.FullTargetPeptideCount, result.FullTargetPeptideCount);
                result.SemiPeptideFdr  = curCalc.Calculate(result.SemiPeptideCount - result.SemiTargetPeptideCount, result.SemiTargetPeptideCount);
            }

            if (File.Exists(proteinFile))
            {
                var ir = new MascotResultTextFormat().ReadFromFile(proteinFile);
                ir.InitUniquePeptideCount();

                var u2proteins = (from p in ir
                                  where p[0].UniquePeptideCount > 1
                                  select p).ToList();

                var u1proteins = (from p in ir
                                  where p[0].UniquePeptideCount == 1
                                  select p).ToList();

                result.ProteinGroupCount        = ir.Count;
                result.Unique2ProteinGroupCount = u2proteins.Count;

                int targetCount;
                result.Unique2ProteinFdr = CalculateProteinFdr(u2proteins, decoyFilter, defaultCalc, out targetCount);
                result.Unique2ProteinGroupTargetCount = (int)targetCount;

                result.Unique1ProteinFdr = CalculateProteinFdr(u1proteins, decoyFilter, defaultCalc, out targetCount);
                result.Unique1ProteinGroupTargetCount = (int)targetCount;
            }

            return(result);
        }
        public static void CalculateUniqueQValue(List <IIdentifiedSpectrum> peptides, IScoreFunction scoreFuncs, IFalseDiscoveryRateCalculator fdrCalc)
        {
            if (peptides.Count == 0)
            {
                return;
            }

            scoreFuncs.SortSpectrum(peptides);

            List <IIdentifiedSpectrum> sameScores = new List <IIdentifiedSpectrum>();
            HashSet <string>           targetSeq  = new HashSet <string>();
            HashSet <string>           decoySeq   = new HashSet <string>();

            double lastScore = scoreFuncs.GetScore(peptides[0]);

            for (int i = 0; i < peptides.Count; i++)
            {
                IIdentifiedSpectrum spectrum = peptides[i];
                double score = scoreFuncs.GetScore(peptides[i]);
                if (score == lastScore)
                {
                    sameScores.Add(spectrum);
                    if (spectrum.FromDecoy)
                    {
                        decoySeq.Add(spectrum.Peptide.PureSequence);
                    }
                    else
                    {
                        targetSeq.Add(spectrum.Peptide.PureSequence);
                    }
                    continue;
                }
                else
                {
                    double qValue = fdrCalc.Calculate(decoySeq.Count, targetSeq.Count);
                    foreach (IIdentifiedSpectrum sameScoreSpectrum in sameScores)
                    {
                        sameScoreSpectrum.QValue = qValue;
                    }

                    sameScores.Clear();

                    lastScore = score;
                    sameScores.Add(spectrum);
                    if (spectrum.FromDecoy)
                    {
                        decoySeq.Add(spectrum.Peptide.PureSequence);
                    }
                    else
                    {
                        targetSeq.Add(spectrum.Peptide.PureSequence);
                    }
                    continue;
                }
            }
            double lastQValue = fdrCalc.Calculate(decoySeq.Count, targetSeq.Count);

            foreach (IIdentifiedSpectrum sameScoreSpectrum in sameScores)
            {
                sameScoreSpectrum.QValue = lastQValue;
            }
        }
        /// <summary>
        /// 根据给定分数排序函数以及FDR计算器对鉴定谱图列表计算QValue。
        /// </summary>
        /// <param name="peptides">谱图列表</param>
        /// <param name="scoreFuncs">与分数提取、排序相关类</param>
        /// <param name="fdrCalc">FDR计算器</param>
        public static void CalculateQValue(List <IIdentifiedSpectrum> peptides, IScoreFunction scoreFuncs, IFalseDiscoveryRateCalculator fdrCalc)
        {
            if (peptides.Count == 0)
            {
                return;
            }

            scoreFuncs.SortSpectrum(peptides);

            int totalTarget = 0;
            int totalDecoy  = 0;

            HashSet <string> filenames = new HashSet <string>();

            foreach (IIdentifiedSpectrum spectrum in peptides)
            {
                spectrum.QValue = 0.0;
                if (filenames.Contains(spectrum.Query.FileScan.LongFileName))
                {
                    continue;
                }
                filenames.Add(spectrum.Query.FileScan.LongFileName);

                if (spectrum.FromDecoy)
                {
                    totalDecoy++;
                }
                else
                {
                    totalTarget++;
                }
            }

            double lastScore  = scoreFuncs.GetScore(peptides[peptides.Count - 1]);
            double lastQvalue = fdrCalc.Calculate(totalDecoy, totalTarget);

            for (int i = peptides.Count - 1; i >= 0; i--)
            {
                double score = scoreFuncs.GetScore(peptides[i]);
                if (score != lastScore)
                {
                    lastScore  = score;
                    lastQvalue = fdrCalc.Calculate(totalDecoy, totalTarget);
                    if (lastQvalue == 0.0)
                    {
                        break;
                    }
                    peptides[i].QValue = lastQvalue;
                }
                else
                {
                    peptides[i].QValue = lastQvalue;
                }

                if (peptides[i].FromDecoy)
                {
                    totalDecoy--;
                }
                else
                {
                    totalTarget--;
                }
            }
        }
        public void InitFromOptions(DatasetListOptions dsOptions, IProgressCallback progress, string paramFile)
        {
            this.Clear();

            this.conflictFunc = dsOptions.Options.GetConflictFunc();

            this.fdrCalc = dsOptions.Options.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator();
            IFilter <IIdentifiedSpectrum> decoyFilter = null;

            if (dsOptions.Options.FalseDiscoveryRate.FilterByFdr)
            {
                decoyFilter = dsOptions.Options.GetDecoySpectrumFilter();
            }

            this.dsOptions = dsOptions;

            long     afterFirstMemory = 0;
            DateTime afterFirstTime   = DateTime.Now;
            var      totalCount       = dsOptions.Sum(l => l.PathNames.Count);
            var      usedCount        = 0;

            for (int i = 0; i < dsOptions.Count; i++)
            {
                var m       = dsOptions[i];
                var builder = m.GetBuilder();

                builder.Progress = progress;

                Dataset ds = new Dataset(m);

                //首先,获取所有通过了固定筛选标准的谱图。
                ds.Spectra = builder.ParseFromSearchResult();
                ds.PSMPassedFixedCriteriaCount = ds.Spectra.Count;

                if (dsOptions.Options.FalseDiscoveryRate.FilterByFdr)
                {
                    //对每个谱图设置是否来自诱饵库
                    progress.SetMessage("Assigning decoy information...");
                    DecoyPeptideBuilder.AssignDecoy(ds.Spectra, decoyFilter);
                    var decoyCount = ds.Spectra.Count(l => l.FromDecoy);
                    if (decoyCount == 0)
                    {
                        throw new Exception(string.Format("No decoy protein found at dataset {0}, make sure the protein access number parser and the decoy pattern are correctly defined!", m.Name));
                    }

                    progress.SetMessage("{0} decoys out of {1} hits found", decoyCount, ds.Spectra.Count);

                    ds.BuildSpectrumBin();
                    ds.CalculateCurrentFdr();
                    ds.PushCurrentOptimalResults(string.Format("Before maximum peptide fdr {0}", dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr));

                    progress.SetMessage("Filtering by maximum peptide fdr {0} ...", dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr);
                    ds.FilterByFdr(dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr);
                    ds.Spectra = ds.GetUnconflictedOptimalSpectra();
                    ds.BuildSpectrumBin();
                    ds.CalculateCurrentFdr();
                    ds.PushCurrentOptimalResults(string.Format("After maximum peptide fdr {0}", dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr));
                }

                this.Add(ds);

                if (i == 0)
                {
                    afterFirstMemory = Process.GetCurrentProcess().WorkingSet64 / (1024 * 1024);
                    afterFirstTime   = DateTime.Now;
                }
                else
                {
                    usedCount += m.PathNames.Count;

                    long   currMemory    = Process.GetCurrentProcess().WorkingSet64 / (1024 * 1024);
                    double averageCost   = (double)(currMemory - afterFirstMemory) / usedCount;
                    double estimatedCost = afterFirstMemory + averageCost * totalCount;

                    DateTime currTime    = DateTime.Now;
                    var      averageTime = currTime.Subtract(afterFirstTime).TotalMinutes / usedCount;
                    var      finishTime  = afterFirstTime.AddMinutes(averageTime * (totalCount - dsOptions[0].PathNames.Count));
                    progress.SetMessage("{0}/{1} datasets, cost {2}M, avg {3:0.0}M, need {4:0.0}M, will finish at {5:MM-dd HH:mm:ss}", (i + 1), dsOptions.Count, currMemory, averageCost, estimatedCost, finishTime);
                }
            }

            //初始化实验列表
            this.ForEach(m => m.InitExperimentals());

            if (dsOptions.Count > 1)
            {
                if (dsOptions.Options.KeepTopPeptideFromSameEngineButDifferentSearchParameters)
                {
                    //合并/删除那些相同搜索引擎,不同参数得到的结果。
                    ProcessDatasetFromSameEngine(progress, (peptides, score) => IdentifiedSpectrumUtils.KeepTopPeptideFromSameEngineDifferentParameters(peptides, score), false);
                }
                else
                {
                    ProcessDatasetFromSameEngine(progress, (peptides, score) => IdentifiedSpectrumUtils.KeepUnconflictPeptidesFromSameEngineDifferentParameters(peptides, score), true);
                }

                //初始化不同搜索引擎搜索的dataset之间的overlap关系。
                this.OverlapBySearchEngine = FindOverlap((m1, m2) => m1.Options.SearchEngine != m2.Options.SearchEngine);


                //初始化没有交集的dataset
                var overlaps = new HashSet <Dataset>(from m in OverlapBySearchEngine
                                                     from s in m
                                                     select s);
                this.NoOverlaps = this.Where(m => !overlaps.Contains(m)).ToList();

                if (OverlapBySearchEngine.Count > 0 && dsOptions.Options.FalseDiscoveryRate.FilterByFdr)
                {
                    //根据最大的fdr进行筛选。
                    progress.SetMessage("Filtering PSMs by maximum fdr {0}, considering multiple engine overlap...", dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr);
                    var realFdr = this.FilterByFdr(dsOptions.Options.FalseDiscoveryRate.MaxPeptideFdr);
                    if (realFdr.ConflictSpectra.Count > 0)
                    {
                        new MascotPeptideTextFormat(UniformHeader.PEPTIDE_HEADER).WriteToFile(Path.ChangeExtension(paramFile, ".conflicted.peps"), realFdr.ConflictSpectra);
                    }

                    //保留每个dataset的spectra为筛选后的结果,以用于后面的迭代。
                    this.ForEach(m =>
                    {
                        m.Spectra = m.GetUnconflictedOptimalSpectra();
                    });
                }
            }
            else
            {
                this.NoOverlaps            = new List <Dataset>(this);
                this.OverlapBySearchEngine = new List <List <Dataset> >();
            }
        }
 public QValueCalculator(IScoreFunction scoreFunc, IFalseDiscoveryRateCalculator fdrCalc)
 {
     this.scoreFunc = scoreFunc;
     this.fdrCalc   = fdrCalc;
 }