public void CalculateUniqueQValue(Dictionary <OptimalResultCondition, List <IIdentifiedSpectrum> > peptideBin)
 {
     foreach (var spectra in peptideBin.Values)
     {
         IdentifiedSpectrumUtils.CalculateUniqueQValue(spectra, scoreFunc, fdrCalc);
     }
 }
예제 #2
0
        public void TestSameEngineDifferentParameters()
        {
            ClassificationOptions co = new ClassificationOptions();

            co.ClassifyByCharge             = true;
            co.ClassifyByMissCleavage       = true;
            co.ClassifyByModification       = true;
            co.ModifiedAminoacids           = "STY";
            co.ClassifyByNumProteaseTermini = true;

            var s1 = new MascotPeptideTextFormat().ReadFromFile(TestContext.CurrentContext.TestDirectory + "/../../../data/deisotopic.peptides");

            IdentifiedSpectrumUtils.RemoveSpectrumWithAmbigiousAssignment(s1);

            s1.ForEach(m => m.Tag = "deisotopic");
            var s2 = new MascotPeptideTextFormat().ReadFromFile(TestContext.CurrentContext.TestDirectory + "/../../../data/deisotopic-top10.peptides");

            IdentifiedSpectrumUtils.RemoveSpectrumWithAmbigiousAssignment(s2);
            s2.ForEach(m => m.Tag = "deisotopic-top");

            var all = s1.Union(s2).ToList();

            var p1 = new List <IIdentifiedSpectrum>(all);

            IdentifiedSpectrumUtils.KeepTopPeptideFromSameEngineDifferentParameters(p1, new ScoreFunction());

            p1.ForEach(m => m.ClassificationTag = "deisotopic/deisotopic-top");
            var bin1 = co.BuildSpectrumBin(p1);

            var p2 = new List <IIdentifiedSpectrum>(all);

            IdentifiedSpectrumUtils.KeepUnconflictPeptidesFromSameEngineDifferentParameters(p2, new ScoreFunction());

            p2.ForEach(m => m.ClassificationTag = "deisotopic/deisotopic-top");
            var bin2 = co.BuildSpectrumBin(p2);

            bin2.ForEach(m =>
            {
                IdentifiedSpectrumUtils.KeepTopPeptideFromSameEngineDifferentParameters(m.Spectra, new ScoreFunction());

                var n = bin1.Find(a => a.Condition.ToString().Equals(m.Condition.ToString()));
                Assert.AreEqual(m.Spectra.Count, n.Spectra.Count);
                //{
                //  if (m.Condition.ToString().Equals("deisotopic/deisotopic-top; Charge=2; MissCleavage=0; Modification=1; NumProteaseTermini=2"))
                //  {
                //    Assert.IsTrue(n.Spectra.Any(k => k.Query.FileScan.ShortFileName.Equals("20111128_CLi_v_4-2k_2mg_TiO2_iTRAQ,4992")));
                //  }

                //  var diff1 = m.Spectra.Except(n.Spectra).ToList();
                //  Console.WriteLine(m.Condition.ToString() + " : " + diff1.Count.ToString());
                //  diff1.ForEach(k =>
                //  {
                //    var lst = all.FindAll(l => l.Query.FileScan.LongFileName.Equals(k.Query.FileScan.LongFileName));
                //    lst.ForEach(q => Console.WriteLine(q.Query.FileScan.ShortFileName + "\t" + q.Tag + "\t" + q.Score.ToString() + "\t" + q.Sequence));
                //  });
                //}
            });
        }
예제 #3
0
        public void KeepTopPeptideFromSameEngineDifferentParameters()
        {
            List <IIdentifiedSpectrum> spectra = new List <IIdentifiedSpectrum>(new IIdentifiedSpectrum[] { s1, s2, s4, s5 });

            IdentifiedSpectrumUtils.KeepTopPeptideFromSameEngineDifferentParameters(spectra, new ScoreFunction());
            Assert.AreEqual(2, spectra.Count);
            Assert.IsTrue(spectra.Contains(s1));
            Assert.IsTrue(spectra.Contains(s4));
        }
예제 #4
0
        public void TestFillProteinInformation()
        {
            var peptides = new MascotPeptideTextFormat().ReadFromFile(TestContext.CurrentContext.TestDirectory + "/../../../data/Test.output.xml.FDR0.01.peptides");

            Assert.IsTrue(peptides.All(m => m.Peptide.Proteins.Count == 0));

            IdentifiedSpectrumUtils.FillProteinInformation(peptides, TestContext.CurrentContext.TestDirectory + "/../../../data//Test.output.xml.FDR0.01.peptides.proteins");
            Assert.IsTrue(peptides.All(m => m.Peptide.Proteins.Count > 0));
        }
예제 #5
0
        public void KeepUnconflictPeptidesFromSameEngineDifferentParameters()
        {
            List <IIdentifiedSpectrum> spectra = new List <IIdentifiedSpectrum>(new IIdentifiedSpectrum[] { s1, s2, s6, s7 });

            IdentifiedSpectrumUtils.KeepUnconflictPeptidesFromSameEngineDifferentParameters(spectra, new ScoreFunction());
            Assert.AreEqual(3, spectra.Count);
            Assert.IsTrue(spectra.Contains(s1));
            Assert.IsTrue(spectra.Contains(s6));
            Assert.IsTrue(spectra.Contains(s7));
        }
예제 #6
0
        public void TestCalculateQValue()
        {
            var peptides = new MascotPeptideTextFormat().ReadFromFile(TestContext.CurrentContext.TestDirectory + "/../../../data/QTOF_Ecoli.LowRes.t.xml.peptides");

            peptides.RemoveAll(m => m.ExpectValue > 0.05 || m.Peptide.PureSequence.Length < 6);
            peptides.ForEach(m => m.FromDecoy = m.Proteins.Any(l => l.Contains("REVERSE_")));

            IdentifiedSpectrumUtils.CalculateQValue(peptides, new ExpectValueFunction(), new TargetFalseDiscoveryRateCalculator());

            Assert.AreEqual(0.0267, peptides[0].QValue, 0.0001);
        }
예제 #7
0
        public void TestRemoveSameSpectrumWithDifferentCharge()
        {
            s1.Score = 3.0;
            s2.Score = 2.0;
            s3.Score = 2.0;
            s6.Score = 7.0;

            List <IIdentifiedSpectrum> spectra = new List <IIdentifiedSpectrum>(new IIdentifiedSpectrum[] { s1, s2, s3, s4, s5, s6 });

            IdentifiedSpectrumUtils.FilterSameSpectrumWithDifferentCharge(spectra);
            Assert.AreEqual(3, spectra.Count);
            Assert.IsTrue(spectra.Contains(s1));
            Assert.IsTrue(spectra.Contains(s2));
            Assert.IsTrue(spectra.Contains(s6));

            s3.Score = 4.0;
            s6.Score = 3.0;
            spectra  = new List <IIdentifiedSpectrum>(new IIdentifiedSpectrum[] { s1, s2, s3, s4, s5, s6 });
            IdentifiedSpectrumUtils.FilterSameSpectrumWithDifferentCharge(spectra);
            Assert.AreEqual(3, spectra.Count);
            Assert.IsTrue(spectra.Contains(s3));
            Assert.IsTrue(spectra.Contains(s4));
            Assert.IsTrue(spectra.Contains(s5));
        }
 private void DoInitUniquePeptideCount(IEnumerable <IIdentifiedPeptide> peps)
 {
     this.uniquePeptideCount = IdentifiedPeptideUtils.GetUniquePeptideCount(peps);
     this.peptideCount       = IdentifiedSpectrumUtils.GetSpectrumCount(from p in peps select p.Spectrum);
 }
        public static IdentificationSummary Parse(string proteinFile, string defaultDecoyPattern, IFalseDiscoveryRateCalculator defaultCalc)
        {
            IdentificationSummary result = new IdentificationSummary();

            result.FileName = FileUtils.ChangeExtension(new FileInfo(proteinFile).Name, "");

            Regex decoyReg = new Regex(defaultDecoyPattern);

            IIdentifiedProteinGroupFilter decoyFilter = null;
            IFalseDiscoveryRateCalculator curCalc     = null;

            var paramFile = FileUtils.ChangeExtension(proteinFile, ".param");

            if (File.Exists(paramFile))
            {
                BuildSummaryOptions options = BuildSummaryOptionsUtils.LoadFromFile(paramFile);
                if (options.FalseDiscoveryRate.FilterByFdr)
                {
                    decoyFilter = options.GetDecoyGroupFilter();
                    curCalc     = options.FalseDiscoveryRate.GetFalseDiscoveryRateCalculator();
                }
            }

            if (decoyFilter == null)
            {
                decoyFilter = new IdentifiedProteinGroupNameRegexFilter(defaultDecoyPattern, false);
                curCalc     = defaultCalc;
            }

            var peptideFile = FileUtils.ChangeExtension(proteinFile, ".peptides");

            if (File.Exists(peptideFile))
            {
                var peptides = new MascotPeptideTextFormat().ReadFromFile(peptideFile);

                var fullSpectra       = GetSpectraByNPT(peptides, 2);
                var fullTargetSpectra = GetTargetSpectra(decoyReg, fullSpectra);
                var semiSpectra       = GetSpectraByNPT(peptides, 1);
                var semiTargetSpectra = GetTargetSpectra(decoyReg, semiSpectra);

                result.FullSpectrumCount       = GetSpectrumCount(fullSpectra);
                result.FullTargetSpectrumCount = GetSpectrumCount(fullTargetSpectra);
                result.SemiSpectrumCount       = GetSpectrumCount(semiSpectra);
                result.SemiTargetSpectrumCount = GetSpectrumCount(semiTargetSpectra);

                result.FullPeptideCount       = IdentifiedSpectrumUtils.GetUniquePeptideCount(fullSpectra);
                result.FullTargetPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(fullTargetSpectra);
                result.SemiPeptideCount       = IdentifiedSpectrumUtils.GetUniquePeptideCount(semiSpectra);
                result.SemiTargetPeptideCount = IdentifiedSpectrumUtils.GetUniquePeptideCount(semiTargetSpectra);

                result.FullSpectrumFdr = curCalc.Calculate(result.FullSpectrumCount - result.FullTargetSpectrumCount, result.FullTargetSpectrumCount);
                result.SemiSpectrumFdr = curCalc.Calculate(result.SemiSpectrumCount - result.SemiTargetSpectrumCount, result.SemiTargetSpectrumCount);
                result.FullPeptideFdr  = curCalc.Calculate(result.FullPeptideCount - result.FullTargetPeptideCount, result.FullTargetPeptideCount);
                result.SemiPeptideFdr  = curCalc.Calculate(result.SemiPeptideCount - result.SemiTargetPeptideCount, result.SemiTargetPeptideCount);
            }

            if (File.Exists(proteinFile))
            {
                var ir = new MascotResultTextFormat().ReadFromFile(proteinFile);
                ir.InitUniquePeptideCount();

                var u2proteins = (from p in ir
                                  where p[0].UniquePeptideCount > 1
                                  select p).ToList();

                var u1proteins = (from p in ir
                                  where p[0].UniquePeptideCount == 1
                                  select p).ToList();

                result.ProteinGroupCount        = ir.Count;
                result.Unique2ProteinGroupCount = u2proteins.Count;

                int targetCount;
                result.Unique2ProteinFdr = CalculateProteinFdr(u2proteins, decoyFilter, defaultCalc, out targetCount);
                result.Unique2ProteinGroupTargetCount = (int)targetCount;

                result.Unique1ProteinFdr = CalculateProteinFdr(u1proteins, decoyFilter, defaultCalc, out targetCount);
                result.Unique1ProteinGroupTargetCount = (int)targetCount;
            }

            return(result);
        }
예제 #10
0
        private List <IIdentifiedSpectrum> DoCalculateDuplicated(List <IIdentifiedSpectrum> preFiltered, OptimalResult optimalResult)
        {
            preFiltered.ForEach(m => m.QValue = -1);

            ScoreFunc.SortSpectrum(preFiltered);

            //Use top score spectra for Qvalue calculation
            var topSpectra = new List <IIdentifiedSpectrum>(preFiltered);

            IdentifiedSpectrumUtils.KeepTopPeptideFromSameEngineDifferentParameters(topSpectra, ScoreFunc);

            //计算QValue。
            QValueFunc(topSpectra, ScoreFunc, FdrCalc);

            //将非top的肽段的QValue填充。
            for (int i = 1; i < preFiltered.Count; i++)
            {
                if (preFiltered[i].QValue == -1)
                {
                    preFiltered[i].QValue = preFiltered[i - 1].QValue;
                }
            }

            List <IIdentifiedSpectrum> result = new List <IIdentifiedSpectrum>();

            optimalResult.PeptideCountFromDecoyDB  = 0;
            optimalResult.PeptideCountFromTargetDB = 0;

            for (int i = preFiltered.Count - 1; i >= 0; i--)
            {
                if (preFiltered[i].QValue <= FdrValue)
                {
                    result.AddRange(preFiltered.GetRange(0, i + 1));

                    optimalResult.Score       = ScoreFunc.GetScore(preFiltered[i]);
                    optimalResult.ExpectValue = preFiltered[i].ExpectValue;

                    optimalResult.FalseDiscoveryRate = preFiltered[i].QValue;

                    int decoyCount  = 0;
                    int targetCount = 0;

                    HashSet <string> filenames = new HashSet <string>();
                    foreach (IIdentifiedSpectrum spectrum in result)
                    {
                        if (filenames.Contains(spectrum.Query.FileScan.LongFileName))
                        {
                            continue;
                        }
                        filenames.Add(spectrum.Query.FileScan.LongFileName);

                        if (spectrum.FromDecoy)
                        {
                            decoyCount++;
                        }
                        else
                        {
                            targetCount++;
                        }
                    }

                    optimalResult.PeptideCountFromDecoyDB  = decoyCount;
                    optimalResult.PeptideCountFromTargetDB = targetCount;

                    Console.WriteLine("{0} -> {1} ==> {2} / {3}", preFiltered.Count, topSpectra.Count, decoyCount, targetCount);

                    break;
                }
            }

            return(result);
        }
 public void CalculateQValue(List <IIdentifiedSpectrum> spectra)
 {
     IdentifiedSpectrumUtils.CalculateQValue(spectra, scoreFunc, fdrCalc);
 }