Пример #1
0
        public static void TestCombos()
        {
            List <PeptideSpectralMatch> allIdentifications = null;

            ModificationMotif.TryGetMotif("N", out ModificationMotif motifN);
            ModificationMotif.TryGetMotif("P", out ModificationMotif motifP);
            var gptmdModifications = new List <ModificationWithMass> {
                new ModificationWithMass("21", "mt", motifN, TerminusLocalization.Any, 21.981943, null),
                new ModificationWithMass("16", "mt", motifP, TerminusLocalization.Any, 15.994915, null)
            };
            IEnumerable <Tuple <double, double> > combos = new List <Tuple <double, double> > {
                new Tuple <double, double>(21.981943, 15.994915)
            };
            Tolerance precursorMassTolerance = new PpmTolerance(10);

            MsDataScan dfd = new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 0, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null);
            Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(dfd, (651.297638557 + 21.981943 + 15.994915).ToMz(1), 1, "filepath");

            var                         parentProtein         = new Protein("NNNPPP", "accession");
            DigestionParams             digestionParams       = new DigestionParams(minPeptideLength: 5);
            List <ModificationWithMass> variableModifications = new List <ModificationWithMass>();
            var                         modPep = parentProtein.Digest(digestionParams, new List <ModificationWithMass>(), variableModifications).First();

            var peptidesWithSetModifications = new List <PeptideWithSetModifications> {
                modPep
            };
            PeptideSpectralMatch match  = new PeptideSpectralMatch(peptidesWithSetModifications.First().CompactPeptide(TerminusType.None), 0, 0, 0, scan, digestionParams);
            PeptideSpectralMatch newPsm = new PeptideSpectralMatch(peptidesWithSetModifications.First().CompactPeptide(TerminusType.None), 0, 0, 0, scan, digestionParams);
            Dictionary <ModificationWithMass, ushort> modsDictionary = new Dictionary <ModificationWithMass, ushort>();
            Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > matching = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >
            {
                { peptidesWithSetModifications.First().CompactPeptide(TerminusType.None), new HashSet <PeptideWithSetModifications> {
                      peptidesWithSetModifications.First()
                  } }
            };

            List <ProductType> lp = new List <ProductType> {
                ProductType.B, ProductType.Y
            };

            Tolerance fragmentTolerance = new AbsoluteTolerance(0.01);

            match.MatchToProteinLinkedPeptides(matching);

            match.SetFdrValues(1, 0, 0, 1, 0, 0, 0, 0, 0, false);
            allIdentifications = new List <PeptideSpectralMatch> {
                match
            };

            var engine = new GptmdEngine(allIdentifications, gptmdModifications, combos, new Dictionary <string, Tolerance> {
                { "filepath", precursorMassTolerance }
            }, new CommonParameters(), new List <string>());
            var res = (GptmdResults)engine.Run();

            Assert.AreEqual(1, res.Mods.Count);
            Assert.AreEqual(6, res.Mods["accession"].Count);
            Assert.AreEqual(3, res.Mods["accession"].Where(b => b.Item2.id.Equals("21")).Count());
            Assert.AreEqual(3, res.Mods["accession"].Where(b => b.Item2.id.Equals("16")).Count());
        }
Пример #2
0
        public static void TestPsmHeader()
        {
            DigestionParams             digestionParams = new DigestionParams();
            PeptideWithSetModifications pepWithSetMods  = new Protein(
                "MQQQQQQQ",
                "accession1",
                "org",
                new List <Tuple <string, string> > {
                new Tuple <string, string>("geneNameType", "geneName")
            },
                new Dictionary <int, List <Modification> > {
                { 2, new List <Modification> {
                      new Modification("mod", "mod")
                  } }
            },
                name: "name",
                full_name: "fullName",
                sequenceVariations: new List <SequenceVariation> {
                new SequenceVariation(2, "P", "Q", "changed this sequence")
            })
                                                          .Digest(digestionParams, new List <ModificationWithMass>(), new List <ModificationWithMass>()).First();
            MsDataFile myMsDataFile      = new TestDataFile(pepWithSetMods, "quadratic");
            MsDataScan scann             = myMsDataFile.GetOneBasedScan(2);
            Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(scann, 4, 1, null);
            PeptideSpectralMatch    psm  = new PeptideSpectralMatch(pepWithSetMods.CompactPeptide(TerminusType.None), 1, 2, 3, scan, digestionParams);

            var t            = psm.ToString();
            var tabsepheader = PeptideSpectralMatch.GetTabSeparatedHeader();

            Assert.AreEqual(psm.ToString().Count(f => f == '\t'), PeptideSpectralMatch.GetTabSeparatedHeader().Count(f => f == '\t'));

            Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > matching = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >
            {
                { pepWithSetMods.CompactPeptide(TerminusType.None), new HashSet <PeptideWithSetModifications> {
                      pepWithSetMods
                  } }
            };

            psm.MatchToProteinLinkedPeptides(matching);

            Assert.AreEqual(psm.ToString().Count(f => f == '\t'), PeptideSpectralMatch.GetTabSeparatedHeader().Count(f => f == '\t'));

            Tolerance          fragmentTolerance = new PpmTolerance(10);
            List <ProductType> lp = new List <ProductType> {
                ProductType.B
            };

            new LocalizationEngine(new List <PeptideSpectralMatch> {
                psm
            }, lp, myMsDataFile, new CommonParameters(productMassTolerance: fragmentTolerance), new List <string>()).Run();

            Assert.AreEqual(psm.ToString().Count(f => f == '\t'), PeptideSpectralMatch.GetTabSeparatedHeader().Count(f => f == '\t'));

            psm.SetFdrValues(6, 6, 6, 6, 6, 6, 0, 0, 0, true);

            Assert.AreEqual(psm.ToString().Count(f => f == '\t'), PeptideSpectralMatch.GetTabSeparatedHeader().Count(f => f == '\t'));
        }
Пример #3
0
        public static void TestGptmdEngine()
        {
            List <PeptideSpectralMatch> allResultingIdentifications = null;

            ModificationMotif.TryGetMotif("N", out ModificationMotif motifN);
            var gptmdModifications = new List <ModificationWithMass> {
                new ModificationWithMass("21", "mt", motifN, TerminusLocalization.Any, 21.981943)
            };
            IEnumerable <Tuple <double, double> > combos = new List <Tuple <double, double> >();
            Tolerance precursorMassTolerance             = new PpmTolerance(10);

            allResultingIdentifications = new List <PeptideSpectralMatch>();
            var engine = new GptmdEngine(allResultingIdentifications, gptmdModifications, combos, precursorMassTolerance, new List <string>());
            var res    = (GptmdResults)engine.Run();

            Assert.AreEqual(0, res.Mods.Count);

            //PsmParent newPsm = new TestParentSpectrumMatch(588.22520189093 + 21.981943);
            Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(new MzmlScanWithPrecursor(0, new MzmlMzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null, null, "scan=1"), (588.22520189093 + 21.981943).ToMz(1), 1, null);

            var parentProtein = new Protein("NNNNN", "accession");
            var protease      = new Protease("Custom Protease", new List <string> {
                "K"
            }, new List <string>(), TerminusType.C, CleavageSpecificity.Full, null, null, null);

            DigestionParams             digestionParams       = new DigestionParams();
            List <ModificationWithMass> variableModifications = new List <ModificationWithMass>();
            var modPep = parentProtein.Digest(digestionParams, new List <ModificationWithMass>(), variableModifications).First();

            var peptidesWithSetModifications = new List <PeptideWithSetModifications> {
                modPep
            };
            PeptideSpectralMatch newPsm = new PeptideSpectralMatch(peptidesWithSetModifications.First().CompactPeptide(TerminusType.None), 0, 0, 0, scan);

            Dictionary <ModificationWithMass, ushort> modsDictionary = new Dictionary <ModificationWithMass, ushort>();
            Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > matching = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >
            {
                { peptidesWithSetModifications.First().CompactPeptide(TerminusType.None), new HashSet <PeptideWithSetModifications> {
                      peptidesWithSetModifications.First()
                  } }
            };
            List <ProductType> lp = new List <ProductType> {
                ProductType.B, ProductType.Y
            };
            Tolerance fragmentTolerance = new AbsoluteTolerance(0.01);

            newPsm.MatchToProteinLinkedPeptides(matching);

            newPsm.SetFdrValues(1, 0, 0, 1, 0, 0, 0, 0, 0, false);
            allResultingIdentifications.Add(newPsm);

            engine = new GptmdEngine(allResultingIdentifications, gptmdModifications, combos, precursorMassTolerance, new List <string>());
            res    = (GptmdResults)engine.Run();
            Assert.AreEqual(1, res.Mods.Count);
            Assert.AreEqual(5, res.Mods["accession"].Count);
        }
Пример #4
0
        public static void TestGptmdEngine(string proteinSequence, string accession, string sequenceVariantDescription, int numModifiedResidues)
        {
            List <PeptideSpectralMatch> allResultingIdentifications = null;

            ModificationMotif.TryGetMotif("N", out ModificationMotif motifN);
            var gptmdModifications = new List <Modification> {
                new Modification(_originalId: "21", _modificationType: "mt", _target: motifN, _locationRestriction: "Anywhere.", _monoisotopicMass: 21.981943)
            };
            IEnumerable <Tuple <double, double> > combos = new List <Tuple <double, double> >();
            Tolerance precursorMassTolerance             = new PpmTolerance(10);

            allResultingIdentifications = new List <PeptideSpectralMatch>();

            var fsp = new List <(string fileName, CommonParameters fileSpecificParameters)>();

            fsp.Add(("", new CommonParameters()));

            var engine = new GptmdEngine(allResultingIdentifications, gptmdModifications, combos, new Dictionary <string, Tolerance> {
                { "filepath", precursorMassTolerance }
            }, new CommonParameters(), fsp, new List <string>());
            var res = (GptmdResults)engine.Run();

            Assert.AreEqual(0, res.Mods.Count);

            var parentProtein = new Protein(proteinSequence, accession, sequenceVariations: new List <SequenceVariation> {
                new SequenceVariation(1, "N", "A", sequenceVariantDescription)
            });
            var variantProteins = parentProtein.GetVariantProteins();
            CommonParameters commonParameters = new CommonParameters(digestionParams: new DigestionParams(minPeptideLength: 5));

            List <Modification> variableModifications = new List <Modification>();
            var modPep = variantProteins.SelectMany(p => p.Digest(commonParameters.DigestionParams, new List <Modification>(), variableModifications)).First();

            //PsmParent newPsm = new TestParentSpectrumMatch(588.22520189093 + 21.981943);
            Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 0, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null), (new Proteomics.AminoAcidPolymer.Peptide(modPep.BaseSequence).MonoisotopicMass + 21.981943).ToMz(1), 1, "filepath", new CommonParameters());

            var peptidesWithSetModifications = new List <PeptideWithSetModifications> {
                modPep
            };
            PeptideSpectralMatch newPsm = new PeptideSpectralMatch(peptidesWithSetModifications.First(), 0, 0, 0, scan, commonParameters, new List <MatchedFragmentIon>());

            Tolerance fragmentTolerance = new AbsoluteTolerance(0.01);

            newPsm.SetFdrValues(1, 0, 0, 1, 0, 0, 0, 0);
            allResultingIdentifications.Add(newPsm);

            engine = new GptmdEngine(allResultingIdentifications, gptmdModifications, combos, new Dictionary <string, Tolerance> {
                { "filepath", precursorMassTolerance }
            }, new CommonParameters(), null, new List <string>());
            res = (GptmdResults)engine.Run();
            Assert.AreEqual(1, res.Mods.Count);
            Assert.AreEqual(numModifiedResidues, res.Mods["accession"].Count);
        }
Пример #5
0
        public static void TestPsmHeader()
        {
            CommonParameters            commonParameters = new CommonParameters();
            PeptideWithSetModifications pepWithSetMods   = new Protein(
                "MQQQQQQQ",
                "accession1",
                "org",
                new List <Tuple <string, string> > {
                new Tuple <string, string>("geneNameType", "geneName")
            },
                new Dictionary <int, List <Modification> > {
                { 2, new List <Modification> {
                      new Modification("mod", "mod")
                  } }
            },
                name: "name",
                fullName: "fullName",
                sequenceVariations: new List <SequenceVariation> {
                new SequenceVariation(2, "P", "Q", "changed this sequence")
            })
                                                           .Digest(commonParameters.DigestionParams, new List <Modification>(), new List <Modification>()).First();
            MsDataFile myMsDataFile      = new TestDataFile(pepWithSetMods, "quadratic");
            MsDataScan scann             = myMsDataFile.GetOneBasedScan(2);
            Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(scann, 4, 1, null, new CommonParameters());

            var theoreticalIons = new List <Product>();

            pepWithSetMods.Fragment(DissociationType.HCD, FragmentationTerminus.Both, theoreticalIons);
            var matchedIons          = MetaMorpheusEngine.MatchFragmentIons(scan, theoreticalIons, new CommonParameters());
            PeptideSpectralMatch psm = new PeptideSpectralMatch(pepWithSetMods, 1, 2, 3, scan, commonParameters, matchedIons);

            psm.ResolveAllAmbiguities();

            var t            = psm.ToString();
            var tabsepheader = PeptideSpectralMatch.GetTabSeparatedHeader();

            Assert.AreEqual(psm.ToString().Count(f => f == '\t'), PeptideSpectralMatch.GetTabSeparatedHeader().Count(f => f == '\t'));

            Assert.AreEqual(psm.ToString().Count(f => f == '\t'), PeptideSpectralMatch.GetTabSeparatedHeader().Count(f => f == '\t'));

            Tolerance fragmentTolerance = new PpmTolerance(10);

            new LocalizationEngine(new List <PeptideSpectralMatch> {
                psm
            }, myMsDataFile, new CommonParameters(productMassTolerance: fragmentTolerance), null, new List <string>()).Run();

            Assert.AreEqual(psm.ToString().Count(f => f == '\t'), PeptideSpectralMatch.GetTabSeparatedHeader().Count(f => f == '\t'));

            psm.SetFdrValues(6, 6, 6, 6, 6, 0, 0, 0);

            Assert.AreEqual(psm.ToString().Count(f => f == '\t'), PeptideSpectralMatch.GetTabSeparatedHeader().Count(f => f == '\t'));
        }
        public static void TestCombos(string proteinSequence, string accession, string variantAA, string sequenceVariantDescription, int numModHashes, int numModifiedResidues, int numModifiedResiduesN, int numModifiedResiduesP, int numModifiedResiduesNP)
        {
            List <PeptideSpectralMatch> allIdentifications = null;

            ModificationMotif.TryGetMotif("N", out ModificationMotif motifN);
            ModificationMotif.TryGetMotif("P", out ModificationMotif motifP);
            var gptmdModifications = new List <Modification> {
                new Modification(_originalId: "21", _modificationType: "mt", _target: motifN, _locationRestriction: "Anywhere.", _monoisotopicMass: 21.981943),
                new Modification(_originalId: "16", _modificationType: "mt", _target: motifP, _locationRestriction: "Anywhere.", _monoisotopicMass: 15.994915)
            };
            IEnumerable <Tuple <double, double> > combos = new List <Tuple <double, double> > {
                new Tuple <double, double>(21.981943, 15.994915)
            };
            Tolerance precursorMassTolerance = new PpmTolerance(10);

            var parentProtein = new Protein(proteinSequence, accession, sequenceVariations: new List <SequenceVariation> {
                new SequenceVariation(1, "N", variantAA, sequenceVariantDescription)
            });
            var variantProteins = parentProtein.GetVariantProteins();

            DigestionParams     digestionParams       = new DigestionParams(minPeptideLength: 5);
            List <Modification> variableModifications = new List <Modification>();
            var modPep = variantProteins.SelectMany(p => p.Digest(digestionParams, new List <Modification>(), variableModifications)).First();

            MsDataScan dfd = new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 0, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null);
            Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(dfd, (new Proteomics.AminoAcidPolymer.Peptide(modPep.BaseSequence).MonoisotopicMass + 21.981943 + 15.994915).ToMz(1), 1, "filepath", new CommonParameters());

            var peptidesWithSetModifications = new List <PeptideWithSetModifications> {
                modPep
            };
            PeptideSpectralMatch match  = new PeptideSpectralMatch(peptidesWithSetModifications.First(), 0, 0, 0, scan, digestionParams, new List <MatchedFragmentIon>());
            PeptideSpectralMatch newPsm = new PeptideSpectralMatch(peptidesWithSetModifications.First(), 0, 0, 0, scan, digestionParams, new List <MatchedFragmentIon>());

            Tolerance fragmentTolerance = new AbsoluteTolerance(0.01);

            match.SetFdrValues(1, 0, 0, 1, 0, 0, 0, 0);
            allIdentifications = new List <PeptideSpectralMatch> {
                match
            };

            var engine = new GptmdEngine(allIdentifications, gptmdModifications, combos, new Dictionary <string, Tolerance> {
                { "filepath", precursorMassTolerance }
            }, new CommonParameters(), new List <string>());
            var res = (GptmdResults)engine.Run();

            Assert.AreEqual(numModHashes, res.Mods.Count);
            Assert.AreEqual(numModifiedResidues, res.Mods["accession"].Count);
            Assert.AreEqual(numModifiedResiduesN, res.Mods["accession"].Where(b => b.Item2.OriginalId.Equals("21")).Count());
            Assert.AreEqual(numModifiedResiduesP, res.Mods["accession"].Where(b => b.Item2.OriginalId.Equals("16")).Count());
            res.Mods.TryGetValue("accession_N1P", out var hash);
            Assert.AreEqual(numModifiedResiduesNP, (hash ?? new HashSet <Tuple <int, Modification> >()).Count);
        }
Пример #7
0
        public static void TestMzIdentMlWriterWithUniprotPsiMod()
        {
            Protein protein = new Protein("PEPTIDE", "", databaseFilePath: "temp");

            ModificationMotif.TryGetMotif("T", out var motif);

            Modification fakeMod = new Modification(_originalId: "FAKE", _accession: "FAKE_MOD_ACCESSION", _modificationType: "fake",
                                                    _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 0,
                                                    _databaseReference: new Dictionary <string, IList <string> > {
                { "PSI-MOD", new List <string> {
                      "FAKE_MOD_ACCESSION"
                  } }
            });

            string resIdAccession = fakeMod.DatabaseReference["PSI-MOD"].First();
            var    peptide        = protein.Digest(new DigestionParams(), new List <Modification> {
                fakeMod
            }, new List <Modification>()).First();

            MsDataScan dfb = new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 0, 1, true, Polarity.Positive, double.NaN, null,
                                            null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null);
            Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(dfb, 2, 0, "File", new CommonParameters());

            var psm = new PeptideSpectralMatch(peptide, 0, 1, 0, scan, new DigestionParams(), new List <MatchedFragmentIon>());

            psm.ResolveAllAmbiguities();
            psm.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);

            string path = Path.Combine(TestContext.CurrentContext.TestDirectory, "ResIdOutput.mzID");

            MzIdentMLWriter.WriteMzIdentMl(new List <PeptideSpectralMatch> {
                psm
            }, new List <ProteinGroup>(), new List <Modification>(),
                                           new List <Modification>(), new List <SilacLabel>(), new List <Protease>(), 0, new PpmTolerance(20), new PpmTolerance(20),
                                           0, path);

            var  file  = File.ReadAllLines(path);
            bool found = false;

            foreach (var line in file)
            {
                if (line.Contains("FAKE on T") && line.Contains("PSI-MOD:" + resIdAccession))
                {
                    found = true;
                }
            }
            Assert.That(found);

            File.Delete(path);
        }
Пример #8
0
        public static void TestMzIdentMlWriterWithUniprotResId()
        {
            Protein protein = new Protein("PEPTIDE", "", databaseFilePath: "temp");

            Modification uniProtMod = GlobalVariables.AllModsKnown.First(p =>
                                                                         p.IdWithMotif == "FMN phosphoryl threonine on T" &&
                                                                         p.ModificationType == "UniProt" &&
                                                                         p.Target.ToString() == "T" &&
                                                                         p.DatabaseReference.ContainsKey("RESID") &&
                                                                         p.LocationRestriction == "Anywhere.");

            string resIdAccession = uniProtMod.DatabaseReference["RESID"].First();
            var    peptide        = protein.Digest(new DigestionParams(), new List <Modification> {
                uniProtMod
            }, new List <Modification>()).First();

            MsDataScan dfb = new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 0, 1, true, Polarity.Positive, double.NaN, null,
                                            null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null);
            Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(dfb, 2, 0, "File", new CommonParameters());

            var psm = new PeptideSpectralMatch(peptide, 0, 1, 0, scan, new DigestionParams(), new List <MatchedFragmentIon>());

            psm.ResolveAllAmbiguities();
            psm.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);

            string path = Path.Combine(TestContext.CurrentContext.TestDirectory, "ResIdOutput.mzID");

            MzIdentMLWriter.WriteMzIdentMl(new List <PeptideSpectralMatch> {
                psm
            }, new List <ProteinGroup>(), new List <Modification>(),
                                           new List <Modification>(), new List <SilacLabel>(), new List <Protease>(), 0, new PpmTolerance(20), new PpmTolerance(20),
                                           0, path);

            var  file  = File.ReadAllLines(path);
            bool found = false;

            foreach (var line in file)
            {
                if (line.Contains("FMN phosphoryl threonine on T") && line.Contains("RESID:" + resIdAccession))
                {
                    found = true;
                }
            }
            Assert.That(found);

            File.Delete(path);
        }
Пример #9
0
        public static void TestPsmHeader()
        {
            DigestionParams             digestionParams = new DigestionParams();
            PeptideWithSetModifications pepWithSetMods  = new Protein("MQQQQQQQ", "accession1").Digest(digestionParams, new List <ModificationWithMass>(), new List <ModificationWithMass>()).First();
            IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile = new TestDataFile(pepWithSetMods, "quadratic");
            IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >   scann        = myMsDataFile.GetOneBasedScan(2) as IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >;
            Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(scann, 4, 1, null);
            PeptideSpectralMatch    psm  = new PeptideSpectralMatch(pepWithSetMods.CompactPeptide(TerminusType.None), 1, 2, 3, scan);

            var t            = psm.ToString();
            var tabsepheader = PeptideSpectralMatch.GetTabSeparatedHeader();

            Assert.AreEqual(psm.ToString().Count(f => f == '\t'), PeptideSpectralMatch.GetTabSeparatedHeader().Count(f => f == '\t'));

            Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > matching = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >
            {
                { pepWithSetMods.CompactPeptide(TerminusType.None), new HashSet <PeptideWithSetModifications> {
                      pepWithSetMods
                  } }
            };

            psm.MatchToProteinLinkedPeptides(matching);

            Assert.AreEqual(psm.ToString().Count(f => f == '\t'), PeptideSpectralMatch.GetTabSeparatedHeader().Count(f => f == '\t'));

            Tolerance          fragmentTolerance = new PpmTolerance(10);
            List <ProductType> lp = new List <ProductType> {
                ProductType.B
            };

            new LocalizationEngine(new List <PeptideSpectralMatch> {
                psm
            }, lp, myMsDataFile, fragmentTolerance, new List <string>(), false).Run();

            Assert.AreEqual(psm.ToString().Count(f => f == '\t'), PeptideSpectralMatch.GetTabSeparatedHeader().Count(f => f == '\t'));

            psm.SetFdrValues(6, 6, 6, 6, 6, 6, 0, 0, 0, false);

            Assert.AreEqual(psm.ToString().Count(f => f == '\t'), PeptideSpectralMatch.GetTabSeparatedHeader().Count(f => f == '\t'));
        }
Пример #10
0
        public static void TryFailSequenceCoverage()
        {
            var prot1 = new Protein("MMKMMK", "prot1");

            ModificationMotif.TryGetMotif("M", out ModificationMotif motifM);
            Modification mod1 = new Modification(_originalId: "mod1", _modificationType: "mt", _target: motifM, _locationRestriction: "N-terminal.", _monoisotopicMass: 10);
            Modification mod2 = new Modification(_originalId: "mod2", _modificationType: "mt", _target: motifM, _locationRestriction: "Peptide N-terminal.", _monoisotopicMass: 10);
            Modification mod3 = new Modification(_originalId: "mod3", _modificationType: "mt", _target: motifM, _locationRestriction: "Anywhere.", _monoisotopicMass: 10);

            ModificationMotif.TryGetMotif("K", out ModificationMotif motifK);
            Modification mod4 = new Modification(_originalId: "mod4", _modificationType: "mt", _target: motifK, _locationRestriction: "Peptide C-terminal.", _monoisotopicMass: 10);
            Modification mod5 = new Modification(_originalId: "mod5", _modificationType: "mt", _target: motifK, _locationRestriction: "C-terminal.", _monoisotopicMass: 10);

            Dictionary <int, Modification> modsFor1 = new Dictionary <int, Modification>
            {
                { 1, mod1 },
                { 3, mod3 },
                { 5, mod4 },
            };
            Dictionary <int, Modification> modsFor2 = new Dictionary <int, Modification>
            {
                { 1, mod2 },
                { 5, mod5 },
            };
            Dictionary <int, Modification> modsFor3 = new Dictionary <int, Modification>
            {
                { 1, mod1 },
                { 5, mod3 },
                { 8, mod5 }
            };

            DigestionParams digestionParams = new DigestionParams();
            var             pwsm1           = new PeptideWithSetModifications(prot1, digestionParams, 1, 3, CleavageSpecificity.Unknown, "", 0, modsFor1, 0);
            var             pwsm2           = new PeptideWithSetModifications(prot1, digestionParams, 4, 6, CleavageSpecificity.Unknown, "", 0, modsFor2, 0);
            var             pwsm3           = new PeptideWithSetModifications(prot1, digestionParams, 1, 6, CleavageSpecificity.Unknown, "", 0, modsFor3, 0);

            HashSet <PeptideWithSetModifications> peptides = new HashSet <PeptideWithSetModifications>
            {
                pwsm1,
                pwsm2,
                pwsm3,
            };

            IScan scan = new ThisTestScan();
            var   psm1 = new PeptideSpectralMatch(pwsm1, 0, 1, 0, scan, digestionParams, new List <MatchedFragmentIon>());

            psm1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);

            var psm2 = new PeptideSpectralMatch(pwsm2, 0, 1, 0, scan, digestionParams, new List <MatchedFragmentIon>());

            psm2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);

            var psm3 = new PeptideSpectralMatch(pwsm3, 0, 1, 0, scan, digestionParams, new List <MatchedFragmentIon>());

            psm3.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);


            List <PeptideSpectralMatch> newPsms = new List <PeptideSpectralMatch>
            {
                psm1,
                psm2,
                psm3,
            };

            newPsms.ForEach(p => p.ResolveAllAmbiguities());

            ProteinParsimonyEngine  ppe  = new ProteinParsimonyEngine(newPsms, true, new CommonParameters(), new List <string>());
            ProteinParsimonyResults fjkd = (ProteinParsimonyResults)ppe.Run();

            ProteinScoringAndFdrEngine psafe = new ProteinScoringAndFdrEngine(fjkd.ProteinGroups, newPsms, true, true, true, new CommonParameters(), new List <string>());

            psafe.Run();

            fjkd.ProteinGroups.First().CalculateSequenceCoverage();

            var firstSequenceCoverageDisplayList = fjkd.ProteinGroups.First().SequenceCoverageDisplayList.First();

            Assert.AreEqual("MMKMMK", firstSequenceCoverageDisplayList);
            var firstSequenceCoverageDisplayListWithMods = fjkd.ProteinGroups.First().SequenceCoverageDisplayListWithMods.First();

            Assert.AreEqual("[mod1 on M]-MM[mod3 on M]KM[mod3 on M]MK-[mod5 on K]", firstSequenceCoverageDisplayListWithMods);

            var firstModInfo = fjkd.ProteinGroups.First().ModsInfo.First();

            Assert.IsTrue(firstModInfo.Contains(@"#aa1[mod1 on M,info:occupancy=1.00(2/2)]"));
            Assert.IsTrue(firstModInfo.Contains(@"#aa2[mod3 on M,info:occupancy=0.50(1/2)]"));
            Assert.IsFalse(firstModInfo.Contains(@"#aa3"));
            Assert.IsTrue(firstModInfo.Contains(@"#aa4[mod3 on M,info:occupancy=0.50(1/2)]"));
            Assert.IsFalse(firstModInfo.Contains(@"#aa5"));
            Assert.IsTrue(firstModInfo.Contains(@"#aa6[mod5 on K,info:occupancy=1.00(2/2)]"));
        }
Пример #11
0
        public static void TryFailSequenceCoverage()
        {
            var prot1 = new Protein("MMKMMK", "prot1");

            ModificationMotif.TryGetMotif("M", out ModificationMotif motifM);
            ModificationWithMass mod1 = new ModificationWithMass("mod1", "mt", motifM, TerminusLocalization.NProt, 10);
            ModificationWithMass mod2 = new ModificationWithMass("mod2", "mt", motifM, TerminusLocalization.NPep, 10);
            ModificationWithMass mod3 = new ModificationWithMass("mod3", "mt", motifM, TerminusLocalization.Any, 10);

            ModificationMotif.TryGetMotif("K", out ModificationMotif motifK);
            ModificationWithMass mod4 = new ModificationWithMass("mod4", "mt", motifK, TerminusLocalization.PepC, 10);
            ModificationWithMass mod5 = new ModificationWithMass("mod5", "mt", motifK, TerminusLocalization.ProtC, 10);

            Dictionary <int, ModificationWithMass> modsFor1 = new Dictionary <int, ModificationWithMass>
            {
                { 1, mod1 },
                { 3, mod3 },
                { 5, mod4 },
            };
            Dictionary <int, ModificationWithMass> modsFor2 = new Dictionary <int, ModificationWithMass>
            {
                { 1, mod2 },
                { 5, mod5 },
            };
            Dictionary <int, ModificationWithMass> modsFor3 = new Dictionary <int, ModificationWithMass>
            {
                { 1, mod1 },
                { 5, mod3 },
                { 8, mod5 }
            };

            DigestionParams digestionParams = new DigestionParams();
            var             pwsm1           = new PeptideWithSetModifications(protein: prot1, digestionParams: digestionParams, oneBasedStartResidueInProtein: 1, oneBasedEndResidueInProtein: 3, peptideDescription: "", missedCleavages: 0, allModsOneIsNterminus: modsFor1, numFixedMods: 0);
            var             pwsm2           = new PeptideWithSetModifications(protein: prot1, digestionParams: digestionParams, oneBasedStartResidueInProtein: 4, oneBasedEndResidueInProtein: 6, peptideDescription: "", missedCleavages: 0, allModsOneIsNterminus: modsFor2, numFixedMods: 0);
            var             pwsm3           = new PeptideWithSetModifications(protein: prot1, digestionParams: digestionParams, oneBasedStartResidueInProtein: 1, oneBasedEndResidueInProtein: 6, peptideDescription: "", missedCleavages: 0, allModsOneIsNterminus: modsFor3, numFixedMods: 0);

            HashSet <PeptideWithSetModifications> peptides = new HashSet <PeptideWithSetModifications>
            {
                pwsm1,
                pwsm2,
                pwsm3,
            };

            Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > matching = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >
            {
                { pwsm1.CompactPeptide(TerminusType.None), new HashSet <PeptideWithSetModifications> {
                      pwsm1
                  } },
                { pwsm2.CompactPeptide(TerminusType.None), new HashSet <PeptideWithSetModifications> {
                      pwsm2
                  } },
                { pwsm3.CompactPeptide(TerminusType.None), new HashSet <PeptideWithSetModifications> {
                      pwsm3
                  } },
            };

            IScan scan = new ThisTestScan();
            var   psm1 = new PeptideSpectralMatch(pwsm1.CompactPeptide(TerminusType.None), 0, 1, 0, scan, digestionParams);

            psm1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            psm1.MatchToProteinLinkedPeptides(matching);
            var psm2 = new PeptideSpectralMatch(pwsm2.CompactPeptide(TerminusType.None), 0, 1, 0, scan, digestionParams);

            psm2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            psm2.MatchToProteinLinkedPeptides(matching);
            var psm3 = new PeptideSpectralMatch(pwsm3.CompactPeptide(TerminusType.None), 0, 1, 0, scan, digestionParams);

            psm3.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            psm3.MatchToProteinLinkedPeptides(matching);

            List <PeptideSpectralMatch> newPsms = new List <PeptideSpectralMatch>
            {
                psm1,
                psm2,
                psm3,
            };

            ProteinParsimonyEngine  ppe  = new ProteinParsimonyEngine(matching, true, new CommonParameters(), new List <string>());
            ProteinParsimonyResults fjkd = (ProteinParsimonyResults)ppe.Run();

            ProteinScoringAndFdrEngine psafe = new ProteinScoringAndFdrEngine(fjkd.ProteinGroups, newPsms, true, true, true, new CommonParameters(), new List <string>());

            psafe.Run();

            fjkd.ProteinGroups.First().CalculateSequenceCoverage();

            var firstSequenceCoverageDisplayList = fjkd.ProteinGroups.First().SequenceCoverageDisplayList.First();

            Assert.AreEqual("MMKMMK", firstSequenceCoverageDisplayList);
            var firstSequenceCoverageDisplayListWithMods = fjkd.ProteinGroups.First().SequenceCoverageDisplayListWithMods.First();

            Assert.AreEqual("[mod1]-MM[mod3]KM[mod3]MK-[mod5]", firstSequenceCoverageDisplayListWithMods);

            var firstModInfo = fjkd.ProteinGroups.First().ModsInfo.First();

            Assert.IsTrue(firstModInfo.Contains(@"#aa1[mod1,info:occupancy=1.00(2/2)]"));
            Assert.IsTrue(firstModInfo.Contains(@"#aa2[mod3,info:occupancy=0.50(1/2)]"));
            Assert.IsFalse(firstModInfo.Contains(@"#aa3"));
            Assert.IsTrue(firstModInfo.Contains(@"#aa4[mod3,info:occupancy=0.50(1/2)]"));
            Assert.IsFalse(firstModInfo.Contains(@"#aa5"));
            Assert.IsTrue(firstModInfo.Contains(@"#aa6[mod5,info:occupancy=1.00(2/2)]"));
        }
Пример #12
0
        private void DoFalseDiscoveryRateAnalysis(FdrAnalysisResults myAnalysisResults)
        {
            // Stop if canceled
            if (GlobalVariables.StopLoops)
            {
                return;
            }

            // calculate FDR on a per-protease basis (targets and decoys for a specific protease)
            var psmsGroupedByProtease = AllPsms.GroupBy(p => p.DigestionParams.Protease);

            foreach (var proteasePsms in psmsGroupedByProtease)
            {
                var psms = proteasePsms.ToList();

                // generate the null distribution for e-value calculations
                double globalMeanScore = 0;
                int    globalMeanCount = 0;

                if (CalculateEValue && psms.Any())
                {
                    List <double> combinedScores = new List <double>();

                    foreach (PeptideSpectralMatch psm in psms)
                    {
                        psm.AllScores.Sort();
                        combinedScores.AddRange(psm.AllScores);

                        //remove top scoring peptide
                        if (combinedScores.Any())
                        {
                            combinedScores.RemoveAt(combinedScores.Count - 1);
                        }
                    }

                    if (combinedScores.Any())
                    {
                        globalMeanScore = combinedScores.Average();
                        globalMeanCount = (int)((double)combinedScores.Count / psms.Count);
                    }
                    else
                    {
                        // should be a very rare case... if there are PSMs but each PSM only has one hit
                        globalMeanScore = 0;
                        globalMeanCount = 0;
                    }
                }

                //Calculate delta scores for the psms (regardless of if we are using them)
                foreach (PeptideSpectralMatch psm in psms)
                {
                    if (psm != null)
                    {
                        psm.CalculateDeltaScore(ScoreCutoff);
                    }
                }

                //determine if Score or DeltaScore performs better
                if (UseDeltaScore)
                {
                    const double qValueCutoff = 0.01; //optimize to get the most PSMs at a 1% FDR

                    List <PeptideSpectralMatch> scoreSorted = psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();
                    int ScorePSMs = GetNumPSMsAtqValueCutoff(scoreSorted, qValueCutoff);
                    scoreSorted = psms.OrderByDescending(b => b.DeltaScore).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();
                    int DeltaScorePSMs = GetNumPSMsAtqValueCutoff(scoreSorted, qValueCutoff);

                    //sort by best method
                    myAnalysisResults.DeltaScoreImprovement = DeltaScorePSMs > ScorePSMs;
                    psms = myAnalysisResults.DeltaScoreImprovement ?
                           psms.OrderByDescending(b => b.DeltaScore).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList() :
                           psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList();
                }
                else //sort by score
                {
                    psms = psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList();
                }

                double cumulativeTarget = 0;
                double cumulativeDecoy  = 0;

                //set up arrays for local FDRs
                double[] cumulativeTargetPerNotch = new double[MassDiffAcceptorNumNotches + 1];
                double[] cumulativeDecoyPerNotch  = new double[MassDiffAcceptorNumNotches + 1];

                //Assign FDR values to PSMs
                for (int i = 0; i < psms.Count; i++)
                {
                    // Stop if canceled
                    if (GlobalVariables.StopLoops)
                    {
                        break;
                    }

                    PeptideSpectralMatch psm = psms[i];
                    int notch = psm.Notch ?? MassDiffAcceptorNumNotches;
                    if (psm.IsDecoy)
                    {
                        // the PSM can be ambiguous between a target and a decoy sequence
                        // in that case, count it as the fraction of decoy hits
                        // e.g. if the PSM matched to 1 target and 2 decoys, it counts as 2/3 decoy
                        double decoyHits = 0;
                        double totalHits = 0;
                        var    hits      = psm.BestMatchingPeptides.GroupBy(p => p.Peptide.FullSequence);
                        foreach (var hit in hits)
                        {
                            if (hit.First().Peptide.Protein.IsDecoy)
                            {
                                decoyHits++;
                            }
                            totalHits++;
                        }

                        cumulativeDecoy += decoyHits / totalHits;
                        cumulativeDecoyPerNotch[notch] += decoyHits / totalHits;
                    }
                    else
                    {
                        cumulativeTarget++;
                        cumulativeTargetPerNotch[notch]++;
                    }

                    double qValue      = Math.Min(1, cumulativeDecoy / cumulativeTarget);
                    double qValueNotch = Math.Min(1, cumulativeDecoyPerNotch[notch] / cumulativeTargetPerNotch[notch]);

                    double maximumLikelihood = 0;
                    double eValue            = 0;
                    double eScore            = 0;
                    if (CalculateEValue)
                    {
                        eValue = GetEValue(psm, globalMeanCount, globalMeanScore, out maximumLikelihood);
                        eScore = -Math.Log(eValue, 10);
                    }

                    psm.SetFdrValues(cumulativeTarget, cumulativeDecoy, qValue, cumulativeTargetPerNotch[notch], cumulativeDecoyPerNotch[notch], qValueNotch, maximumLikelihood, eValue, eScore, CalculateEValue);
                }

                // set q-value thresholds such that a lower scoring PSM can't have
                // a higher confidence than a higher scoring PSM
                //Populate min qValues
                double   qValueThreshold      = 1.0;
                double[] qValueNotchThreshold = new double[MassDiffAcceptorNumNotches + 1];
                for (int i = 0; i < qValueNotchThreshold.Length; i++)
                {
                    qValueNotchThreshold[i] = 1.0;
                }

                for (int i = psms.Count - 1; i >= 0; i--)
                {
                    PeptideSpectralMatch psm = psms[i];

                    // threshold q-values
                    if (psm.FdrInfo.QValue > qValueThreshold)
                    {
                        psm.FdrInfo.QValue = qValueThreshold;
                    }
                    else if (psm.FdrInfo.QValue < qValueThreshold)
                    {
                        qValueThreshold = psm.FdrInfo.QValue;
                    }

                    // threshold notch q-values
                    int notch = psm.Notch ?? MassDiffAcceptorNumNotches;
                    if (psm.FdrInfo.QValueNotch > qValueNotchThreshold[notch])
                    {
                        psm.FdrInfo.QValueNotch = qValueNotchThreshold[notch];
                    }
                    else if (psm.FdrInfo.QValueNotch < qValueNotchThreshold[notch])
                    {
                        qValueNotchThreshold[notch] = psm.FdrInfo.QValueNotch;
                    }
                }
            }

            if (AnalysisType == "PSM")
            {
                CountPsm();
            }
        }
Пример #13
0
        public static void TestPTMOutput()
        {
            List <ModificationWithMass> variableModifications = new List <ModificationWithMass>();
            List <ModificationWithMass> fixedModifications    = new List <ModificationWithMass>();

            ModificationMotif.TryGetMotif("S", out ModificationMotif motif);
            variableModifications.Add(new ModificationWithMassAndCf("resMod", "HaHa", motif, TerminusLocalization.Any, ChemicalFormula.ParseFormula("H")));

            var proteinList = new List <Protein> {
                new Protein("MNNNSKQQQ", "accession")
            };
            var protease = new Protease("CustomProtease", new List <Tuple <string, TerminusType> > {
                new Tuple <string, TerminusType>("K", TerminusType.C)
            }, new List <Tuple <string, TerminusType> >(), CleavageSpecificity.Full, null, null, null);

            ProteaseDictionary.Dictionary.Add(protease.Name, protease);
            Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatching = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >();
            Dictionary <ModificationWithMass, ushort> modsDictionary = new Dictionary <ModificationWithMass, ushort>
            {
                { variableModifications.Last(), 1 }
            };

            DigestionParams digestionParams = new DigestionParams(protease: protease.Name, maxMissedCleavages: 0, minPeptideLength: 1);

            var modPep = proteinList.First().Digest(digestionParams, fixedModifications, variableModifications).Last();
            HashSet <PeptideWithSetModifications> value = new HashSet <PeptideWithSetModifications> {
                modPep
            };
            CompactPeptide compactPeptide1 = new CompactPeptide(value.First(), TerminusType.None);

            Assert.AreEqual("QQQ", value.First().Sequence);

            var firstProtDigest = proteinList.First().Digest(digestionParams, fixedModifications, variableModifications).ToList();
            HashSet <PeptideWithSetModifications> value2 = new HashSet <PeptideWithSetModifications> {
                firstProtDigest[0]
            };
            CompactPeptide compactPeptide2 = new CompactPeptide(value2.First(), TerminusType.None);

            Assert.AreEqual("MNNNSK", value2.First().Sequence);

            HashSet <PeptideWithSetModifications> value2mod = new HashSet <PeptideWithSetModifications> {
                firstProtDigest[1]
            };
            CompactPeptide compactPeptide2mod = new CompactPeptide(value2mod.Last(), TerminusType.None);

            Assert.AreEqual("MNNNS[HaHa:resMod]K", value2mod.Last().Sequence);

            HashSet <PeptideWithSetModifications> value3 = new HashSet <PeptideWithSetModifications> {
                firstProtDigest[2]
            };
            CompactPeptide compactPeptide3 = new CompactPeptide(value3.First(), TerminusType.None);

            Assert.AreEqual("NNNSK", value3.First().Sequence);
            HashSet <PeptideWithSetModifications> value3mod = new HashSet <PeptideWithSetModifications> {
                firstProtDigest[3]
            };

            CompactPeptide compactPeptide3mod = new CompactPeptide(value3mod.Last(), TerminusType.None);

            Assert.AreEqual("NNNS[HaHa:resMod]K", value3mod.Last().Sequence);

            var peptideList = new HashSet <PeptideWithSetModifications>();

            foreach (var protein in proteinList)
            {
                foreach (var peptide in protein.Digest(digestionParams, new List <ModificationWithMass>(), variableModifications))
                {
                    peptideList.Add(peptide);
                }
            }

            compactPeptideToProteinPeptideMatching.Add(compactPeptide1, value);
            compactPeptideToProteinPeptideMatching.Add(compactPeptide2, value2);
            compactPeptideToProteinPeptideMatching.Add(compactPeptide3, value3);
            compactPeptideToProteinPeptideMatching.Add(compactPeptide2mod, value2mod);
            compactPeptideToProteinPeptideMatching.Add(compactPeptide3mod, value3mod);

            ProteinParsimonyEngine engine = new ProteinParsimonyEngine(compactPeptideToProteinPeptideMatching, true, new CommonParameters(), new List <string> {
                "ff"
            });
            var cool          = (ProteinParsimonyResults)engine.Run();
            var proteinGroups = cool.ProteinGroups;

            MsDataScan jdfk = new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 0, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null);
            Ms2ScanWithSpecificMass ms2scan = new Ms2ScanWithSpecificMass(jdfk, 2, 0, "File");

            List <ProductType> lp = new List <ProductType> {
                ProductType.B, ProductType.Y
            };
            Tolerance fragmentTolerance = new AbsoluteTolerance(0.01);

            var match1 = new PeptideSpectralMatch(peptideList.ElementAt(0).CompactPeptide(TerminusType.None), 0, 10, 0, ms2scan, digestionParams)
            {
            };

            match1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            var match2 = new PeptideSpectralMatch(peptideList.ElementAt(1).CompactPeptide(TerminusType.None), 0, 10, 0, ms2scan, digestionParams)
            {
            };

            match2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            var match3 = new PeptideSpectralMatch(peptideList.ElementAt(1).CompactPeptide(TerminusType.None), 0, 10, 0, ms2scan, digestionParams)
            {
            };

            match3.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            match1.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching);
            match2.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching);
            match3.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching);

            List <PeptideSpectralMatch> psms = new List <PeptideSpectralMatch>
            {
                match1,
                match2,
                match3
            };
            ProteinScoringAndFdrEngine f = new ProteinScoringAndFdrEngine(proteinGroups, psms, false, false, true, new CommonParameters(), new List <string>());

            f.Run();

            Assert.AreEqual("#aa5[resMod,info:occupancy=0.67(2/3)];", proteinGroups.First().ModsInfo[0]);
        }
Пример #14
0
        public static void TestPTMOutput()
        {
            List <Modification> variableModifications = new List <Modification>();
            List <Modification> fixedModifications    = new List <Modification>();

            ModificationMotif.TryGetMotif("S", out ModificationMotif motifS);
            ModificationMotif.TryGetMotif("I", out ModificationMotif motifI);
            variableModifications.Add(new Modification(_originalId: "resMod", _modificationType: "HaHa", _target: motifS, _locationRestriction: "Anywhere.", _chemicalFormula: ChemicalFormula.ParseFormula("H")));
            variableModifications.Add(new Modification(_originalId: "iModOne", _modificationType: "HaHa", _target: motifI, _locationRestriction: "Anywhere.", _chemicalFormula: ChemicalFormula.ParseFormula("H")));
            variableModifications.Add(new Modification(_originalId: "iModTwo", _modificationType: "HaHa", _target: motifI, _locationRestriction: "Anywhere.", _chemicalFormula: ChemicalFormula.ParseFormula("H")));

            var proteinList = new List <Protein> {
                new Protein("MNNNSKQQQI", "accession")
            };
            var protease = new Protease("CustomProtease", CleavageSpecificity.Full, null, null, new List <DigestionMotif> {
                new DigestionMotif("K", null, 1, null)
            });

            ProteaseDictionary.Dictionary.Add(protease.Name, protease);

            Dictionary <Modification, ushort> modsDictionary = new Dictionary <Modification, ushort>
            {
                { variableModifications.Last(), 1 }
            };

            CommonParameters commonParameters = new CommonParameters(digestionParams: new DigestionParams(protease: protease.Name, maxMissedCleavages: 0, minPeptideLength: 1));
            var protDigest = proteinList.First().Digest(commonParameters.DigestionParams, fixedModifications, variableModifications).ToList();

            int idx = 0;

            var pep1 = new HashSet <PeptideWithSetModifications> {
                protDigest[idx++]
            };

            Assert.AreEqual("MNNNSK", pep1.Single().FullSequence);//this might be base

            var pep1mod = new HashSet <PeptideWithSetModifications> {
                protDigest[idx++]
            };

            Assert.AreEqual("MNNNS[HaHa:resMod on S]K", pep1mod.Single().FullSequence);//this might be base

            var pep3 = new HashSet <PeptideWithSetModifications> {
                protDigest[idx++]
            };

            Assert.AreEqual("NNNSK", pep3.Single().FullSequence);//this might be base

            var pep3mod = new HashSet <PeptideWithSetModifications> {
                protDigest[idx++]
            };

            Assert.AreEqual("NNNS[HaHa:resMod on S]K", pep3mod.Single().FullSequence);//this might be base

            var pep4 = new HashSet <PeptideWithSetModifications> {
                protDigest[idx++]
            };

            Assert.AreEqual("QQQI", pep4.Single().FullSequence);//this might be base

            var pep4mod1 = new HashSet <PeptideWithSetModifications> {
                protDigest[idx++]
            };

            Assert.AreEqual("QQQI[HaHa:iModOne on I]", pep4mod1.Single().FullSequence);//this might be base

            var pep4mod2 = new HashSet <PeptideWithSetModifications> {
                protDigest[idx++]
            };

            Assert.AreEqual("QQQI[HaHa:iModTwo on I]", pep4mod2.Single().FullSequence);//this might be base

            var peptideList = new HashSet <PeptideWithSetModifications>();

            foreach (var peptide in proteinList.SelectMany(protein => protein.Digest(commonParameters.DigestionParams, new List <Modification>(), variableModifications)))
            {
                peptideList.Add(peptide);
            }

            MsDataScan jdfk = new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 0, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null);
            Ms2ScanWithSpecificMass ms2scan = new Ms2ScanWithSpecificMass(jdfk, 2, 0, "File", new CommonParameters());

            Tolerance fragmentTolerance = new AbsoluteTolerance(0.01);

            var match1 = new PeptideSpectralMatch(peptideList.ElementAt(0), 0, 10, 0, ms2scan, commonParameters, new List <MatchedFragmentIon>())
            {
            };

            match1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);
            var match2 = new PeptideSpectralMatch(peptideList.ElementAt(1), 0, 10, 0, ms2scan, commonParameters, new List <MatchedFragmentIon>())
            {
            };

            match2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);
            var match3 = new PeptideSpectralMatch(peptideList.ElementAt(1), 0, 10, 0, ms2scan, commonParameters, new List <MatchedFragmentIon>())
            {
            };

            match3.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);
            var match4 = new PeptideSpectralMatch(peptideList.ElementAt(4), 0, 10, 0, ms2scan, commonParameters, new List <MatchedFragmentIon>())
            {
            };

            match4.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);
            var match5 = new PeptideSpectralMatch(peptideList.ElementAt(5), 0, 10, 0, ms2scan, commonParameters, new List <MatchedFragmentIon>())
            {
            };

            match5.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);
            var match6 = new PeptideSpectralMatch(peptideList.ElementAt(6), 0, 10, 0, ms2scan, commonParameters, new List <MatchedFragmentIon>())
            {
            };

            match6.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);
            var match44 = new PeptideSpectralMatch(peptideList.ElementAt(4), 0, 10, 0, ms2scan, commonParameters, new List <MatchedFragmentIon>())
            {
            };

            match44.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);
            var match55 = new PeptideSpectralMatch(peptideList.ElementAt(5), 0, 10, 0, ms2scan, commonParameters, new List <MatchedFragmentIon>())
            {
            };

            match55.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);
            var match66 = new PeptideSpectralMatch(peptideList.ElementAt(6), 0, 10, 0, ms2scan, commonParameters, new List <MatchedFragmentIon>())
            {
            };

            match66.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);


            List <PeptideSpectralMatch> psms = new List <PeptideSpectralMatch>
            {
                match1,
                match2,
                match3,
                match4,
                match44,
                match5,
                match55,
                match6,
                match66
            };

            psms.ForEach(p => p.ResolveAllAmbiguities());

            ProteinParsimonyEngine engine = new ProteinParsimonyEngine(psms, true, new CommonParameters(), null, new List <string> {
                "ff"
            });
            var cool          = (ProteinParsimonyResults)engine.Run();
            var proteinGroups = cool.ProteinGroups;

            ProteinScoringAndFdrEngine f = new ProteinScoringAndFdrEngine(proteinGroups, psms, false, false, true, new CommonParameters(), null, new List <string>());

            f.Run();

            Assert.AreEqual("#aa5[resMod on S,info:occupancy=0.67(2/3)];#aa10[iModOne on I,info:occupancy=0.33(2/6)];#aa10[iModTwo on I,info:occupancy=0.33(2/6)]", proteinGroups.First().ModsInfo[0]);
        }
Пример #15
0
        private void DoFalseDiscoveryRateAnalysis(FdrAnalysisResults myAnalysisResults)
        {
            // Stop if canceled
            if (GlobalVariables.StopLoops)
            {
                return;
            }

            // calculate FDR on a per-protease basis (targets and decoys for a specific protease)
            var psmsGroupedByProtease = AllPsms.GroupBy(p => p.DigestionParams.Protease);

            foreach (var proteasePsms in psmsGroupedByProtease)
            {
                var psms = proteasePsms.ToList();

                //determine if Score or DeltaScore performs better
                if (UseDeltaScore)
                {
                    const double qValueCutoff = 0.01; //optimize to get the most PSMs at a 1% FDR

                    List <PeptideSpectralMatch> scoreSorted = psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();
                    int ScorePSMs = GetNumPSMsAtqValueCutoff(scoreSorted, qValueCutoff);
                    scoreSorted = psms.OrderByDescending(b => b.DeltaScore).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();
                    int DeltaScorePSMs = GetNumPSMsAtqValueCutoff(scoreSorted, qValueCutoff);

                    //sort by best method
                    myAnalysisResults.DeltaScoreImprovement = DeltaScorePSMs > ScorePSMs;
                    psms = myAnalysisResults.DeltaScoreImprovement ?
                           psms.OrderByDescending(b => b.DeltaScore).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList() :
                           psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList();
                }
                else //sort by score
                {
                    psms = psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList();
                }

                double cumulativeTarget = 0;
                double cumulativeDecoy  = 0;

                //set up arrays for local FDRs
                double[] cumulativeTargetPerNotch = new double[MassDiffAcceptorNumNotches + 1];
                double[] cumulativeDecoyPerNotch  = new double[MassDiffAcceptorNumNotches + 1];

                //Assign FDR values to PSMs
                for (int i = 0; i < psms.Count; i++)
                {
                    // Stop if canceled
                    if (GlobalVariables.StopLoops)
                    {
                        break;
                    }

                    PeptideSpectralMatch psm = psms[i];
                    int notch = psm.Notch ?? MassDiffAcceptorNumNotches;
                    if (psm.IsDecoy)
                    {
                        // the PSM can be ambiguous between a target and a decoy sequence
                        // in that case, count it as the fraction of decoy hits
                        // e.g. if the PSM matched to 1 target and 2 decoys, it counts as 2/3 decoy
                        double decoyHits = 0;
                        double totalHits = 0;
                        var    hits      = psm.BestMatchingPeptides.GroupBy(p => p.Peptide.FullSequence);
                        foreach (var hit in hits)
                        {
                            if (hit.First().Peptide.Protein.IsDecoy)
                            {
                                decoyHits++;
                            }
                            totalHits++;
                        }

                        cumulativeDecoy += decoyHits / totalHits;
                        cumulativeDecoyPerNotch[notch] += decoyHits / totalHits;
                    }
                    else
                    {
                        cumulativeTarget++;
                        cumulativeTargetPerNotch[notch]++;
                    }

                    double qValue      = Math.Min(1, cumulativeDecoy / cumulativeTarget);
                    double qValueNotch = Math.Min(1, cumulativeDecoyPerNotch[notch] / cumulativeTargetPerNotch[notch]);

                    double pep       = psm.FdrInfo == null ? double.NaN : psm.FdrInfo.PEP;
                    double pepQValue = psm.FdrInfo == null ? double.NaN : psm.FdrInfo.PEP_QValue;

                    psm.SetFdrValues(cumulativeTarget, cumulativeDecoy, qValue, cumulativeTargetPerNotch[notch], cumulativeDecoyPerNotch[notch], qValueNotch, pep, pepQValue);
                }

                // set q-value thresholds such that a lower scoring PSM can't have
                // a higher confidence than a higher scoring PSM
                //Populate min qValues
                double   qValueThreshold      = 1.0;
                double[] qValueNotchThreshold = new double[MassDiffAcceptorNumNotches + 1];
                for (int i = 0; i < qValueNotchThreshold.Length; i++)
                {
                    qValueNotchThreshold[i] = 1.0;
                }

                for (int i = psms.Count - 1; i >= 0; i--)
                {
                    PeptideSpectralMatch psm = psms[i];

                    // threshold q-values
                    if (psm.FdrInfo.QValue > qValueThreshold)
                    {
                        psm.FdrInfo.QValue = qValueThreshold;
                    }
                    else if (psm.FdrInfo.QValue < qValueThreshold)
                    {
                        qValueThreshold = psm.FdrInfo.QValue;
                    }

                    // threshold notch q-values
                    int notch = psm.Notch ?? MassDiffAcceptorNumNotches;
                    if (psm.FdrInfo.QValueNotch > qValueNotchThreshold[notch])
                    {
                        psm.FdrInfo.QValueNotch = qValueNotchThreshold[notch];
                    }
                    else if (psm.FdrInfo.QValueNotch < qValueNotchThreshold[notch])
                    {
                        qValueNotchThreshold[notch] = psm.FdrInfo.QValueNotch;
                    }
                }
            }

            if (AnalysisType == "PSM")
            {
                CountPsm();
                if (AllPsms.Count > 0)
                {
                    myAnalysisResults.BinarySearchTreeMetrics = PEP_Analysis.ComputePEPValuesForAllPSMsGeneric(AllPsms);
                    Compute_PEPValue_Based_QValue(AllPsms);
                }
            }

            if (AnalysisType == "Peptide")
            {
                Compute_PEPValue_Based_QValue(AllPsms);
            }
        }
Пример #16
0
        public static void TestPTMOutput()
        {
            List <Modification> variableModifications = new List <Modification>();
            List <Modification> fixedModifications    = new List <Modification>();

            ModificationMotif.TryGetMotif("S", out ModificationMotif motif);
            variableModifications.Add(new Modification(_originalId: "resMod", _modificationType: "HaHa", _target: motif, _locationRestriction: "Anywhere.", _chemicalFormula: ChemicalFormula.ParseFormula("H")));

            var proteinList = new List <Protein> {
                new Protein("MNNNSKQQQ", "accession")
            };
            var protease = new Protease("CustomProtease", CleavageSpecificity.Full, null, null, new List <DigestionMotif> {
                new DigestionMotif("K", null, 1, null)
            });

            ProteaseDictionary.Dictionary.Add(protease.Name, protease);

            Dictionary <Modification, ushort> modsDictionary = new Dictionary <Modification, ushort>
            {
                { variableModifications.Last(), 1 }
            };

            DigestionParams digestionParams = new DigestionParams(protease: protease.Name, maxMissedCleavages: 0, minPeptideLength: 1);

            var modPep = proteinList.First().Digest(digestionParams, fixedModifications, variableModifications).Last();
            HashSet <PeptideWithSetModifications> value = new HashSet <PeptideWithSetModifications> {
                modPep
            };
            PeptideWithSetModifications compactPeptide1 = value.First();

            Assert.AreEqual("QQQ", value.First().FullSequence);//this might be base

            var firstProtDigest = proteinList.First().Digest(digestionParams, fixedModifications, variableModifications).ToList();
            HashSet <PeptideWithSetModifications> value2 = new HashSet <PeptideWithSetModifications> {
                firstProtDigest[0]
            };
            PeptideWithSetModifications compactPeptide2 = value2.First();

            Assert.AreEqual("MNNNSK", value2.First().FullSequence);//this might be base

            HashSet <PeptideWithSetModifications> value2mod = new HashSet <PeptideWithSetModifications> {
                firstProtDigest[1]
            };
            PeptideWithSetModifications compactPeptide2mod = value2mod.Last();

            Assert.AreEqual("MNNNS[HaHa:resMod on S]K", value2mod.Last().FullSequence);//this might be base

            HashSet <PeptideWithSetModifications> value3 = new HashSet <PeptideWithSetModifications> {
                firstProtDigest[2]
            };
            PeptideWithSetModifications compactPeptide3 = value3.First();

            Assert.AreEqual("NNNSK", value3.First().FullSequence);//this might be base
            HashSet <PeptideWithSetModifications> value3mod = new HashSet <PeptideWithSetModifications> {
                firstProtDigest[3]
            };

            PeptideWithSetModifications compactPeptide3mod = value3mod.Last();

            Assert.AreEqual("NNNS[HaHa:resMod on S]K", value3mod.Last().FullSequence);//this might be base

            var peptideList = new HashSet <PeptideWithSetModifications>();

            foreach (var protein in proteinList)
            {
                foreach (var peptide in protein.Digest(digestionParams, new List <Modification>(), variableModifications))
                {
                    peptideList.Add(peptide);
                }
            }

            MsDataScan jdfk = new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 0, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null);
            Ms2ScanWithSpecificMass ms2scan = new Ms2ScanWithSpecificMass(jdfk, 2, 0, "File", new CommonParameters());

            Tolerance fragmentTolerance = new AbsoluteTolerance(0.01);

            var match1 = new PeptideSpectralMatch(peptideList.ElementAt(0), 0, 10, 0, ms2scan, digestionParams, new List <MatchedFragmentIon>())
            {
            };

            match1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            var match2 = new PeptideSpectralMatch(peptideList.ElementAt(1), 0, 10, 0, ms2scan, digestionParams, new List <MatchedFragmentIon>())
            {
            };

            match2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            var match3 = new PeptideSpectralMatch(peptideList.ElementAt(1), 0, 10, 0, ms2scan, digestionParams, new List <MatchedFragmentIon>())
            {
            };

            match3.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);

            List <PeptideSpectralMatch> psms = new List <PeptideSpectralMatch>
            {
                match1,
                match2,
                match3
            };

            psms.ForEach(p => p.ResolveAllAmbiguities());

            ProteinParsimonyEngine engine = new ProteinParsimonyEngine(psms, true, new CommonParameters(), new List <string> {
                "ff"
            });
            var cool          = (ProteinParsimonyResults)engine.Run();
            var proteinGroups = cool.ProteinGroups;

            ProteinScoringAndFdrEngine f = new ProteinScoringAndFdrEngine(proteinGroups, psms, false, false, true, new CommonParameters(), new List <string>());

            f.Run();

            Assert.AreEqual("#aa5[resMod on S,info:occupancy=0.67(2/3)];", proteinGroups.First().ModsInfo[0]);
        }
Пример #17
0
        private static Tuple <List <PeptideSpectralMatch>, Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >, MassDiffAcceptor, bool, CompactPeptideBase, CompactPeptideBase> GetInfo(bool localizeable)
        {
            CommonParameters CommonParameters = new CommonParameters(digestionParams: new DigestionParams(maxMissedCleavages: 0, minPeptideLength: 1, maxModificationIsoforms: 2, initiatorMethionineBehavior: InitiatorMethionineBehavior.Retain, maxModsForPeptides: 1), scoreCutoff: 1);


            // Alanine = Glycine + CH2
            Protein protein1 = new Protein("MA", "protein1");
            Protein protein2 = new Protein("MG", "protein2");
            Protein protein3;
            double  monoisotopicMass = Chemistry.ChemicalFormula.ParseFormula("CH2").MonoisotopicMass;

            ModificationMotif.TryGetMotif("G", out ModificationMotif motif1);
            ModificationMotif.TryGetMotif("A", out ModificationMotif motif2);
            TerminusLocalization        modificationSites          = TerminusLocalization.Any;
            List <ModificationWithMass> allKnownFixedModifications = new List <ModificationWithMass>
            {
                new ModificationWithMass("CH2 on Glycine", null, motif1, modificationSites, monoisotopicMass)
            };
            List <ModificationWithMass> variableModifications;

            ModificationWithMass alanineMod = new ModificationWithMass("CH2 on Alanine", null, motif2, modificationSites, monoisotopicMass);

            if (localizeable)
            {
                variableModifications = new List <ModificationWithMass>();
                IDictionary <int, List <Modification> > oneBasedModifications = new Dictionary <int, List <Modification> >
                {
                    { 2, new List <Modification> {
                          alanineMod
                      } }
                };
                protein3 = new Protein("MA", "protein3", oneBasedModifications: oneBasedModifications);
            }
            else
            {
                variableModifications = new List <ModificationWithMass>();
                variableModifications = new List <ModificationWithMass> {
                    alanineMod
                };
                protein3 = new Protein("MA", "protein3");
            }

            var pepWithSetModifications1 = protein1.Digest(CommonParameters.DigestionParams, allKnownFixedModifications, variableModifications).First();

            var pepWithSetModifications2 = protein2.Digest(CommonParameters.DigestionParams, allKnownFixedModifications, variableModifications).First();

            var pepWithSetModifications3 = protein3.Digest(CommonParameters.DigestionParams, allKnownFixedModifications, variableModifications).Last();

            CompactPeptide compactPeptide1         = new CompactPeptide(pepWithSetModifications1, TerminusType.None);
            CompactPeptide compactPeptideDuplicate = new CompactPeptide(pepWithSetModifications2, TerminusType.None);

            Assert.AreEqual(compactPeptide1, compactPeptideDuplicate);
            CompactPeptide compactPeptide2 = new CompactPeptide(pepWithSetModifications3, TerminusType.None);

            string                  fullFilePath    = null;
            int                     precursorCharge = 0;
            TestDataFile            testDataFile    = new TestDataFile();
            MsDataScan              mzLibScan       = testDataFile.GetOneBasedScan(2);
            Ms2ScanWithSpecificMass scan            = new Ms2ScanWithSpecificMass(mzLibScan, 0, precursorCharge, fullFilePath);
            int                     scanIndex       = 0;
            double                  score           = 0;
            int                     notch           = 0;
            PeptideSpectralMatch    psm1            = new PeptideSpectralMatch(compactPeptide1, notch, score, scanIndex, scan, CommonParameters.DigestionParams);

            psm1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            PeptideSpectralMatch psm2 = new PeptideSpectralMatch(compactPeptide1, notch, score, scanIndex, scan, CommonParameters.DigestionParams);

            psm2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            PeptideSpectralMatch psm3 = new PeptideSpectralMatch(compactPeptide2, notch, score, scanIndex, scan, CommonParameters.DigestionParams);

            psm3.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            var newPsms = new List <PeptideSpectralMatch>
            {
                psm1,
                psm2,
                psm3
            };

            MassDiffAcceptor massDiffAcceptors            = new SinglePpmAroundZeroSearchMode(5);
            SequencesToActualProteinPeptidesEngine stappe = new SequencesToActualProteinPeptidesEngine(newPsms, new List <Protein> {
                protein1, protein2, protein3
            },
                                                                                                       allKnownFixedModifications, variableModifications, new List <ProductType> {
                ProductType.B, ProductType.Y
            }, new List <DigestionParams> {
                CommonParameters.DigestionParams
            }, CommonParameters.ReportAllAmbiguity, CommonParameters, new List <string>());

            var haha = (SequencesToActualProteinPeptidesEngineResults)stappe.Run();
            var compactPeptideToProteinPeptideMatching = haha.CompactPeptideToProteinPeptideMatching;

            Assert.AreEqual(2, compactPeptideToProteinPeptideMatching.Count);

            psm1.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching);

            bool noOneHitWonders = false;

            return(new Tuple <List <PeptideSpectralMatch>, Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >, MassDiffAcceptor, bool, CompactPeptideBase, CompactPeptideBase>
                   (
                       newPsms, compactPeptideToProteinPeptideMatching, massDiffAcceptors, noOneHitWonders, compactPeptide1, compactPeptide2
                   ));
        }
        public static void ParsimonyDontTreatModifiedFormsAsUnique()
        {
            bool modPeptidesAreUnique = false;

            // set up mods
            var modDictionary = new Dictionary <int, List <Modification> >();

            ModificationMotif.TryGetMotif("M", out ModificationMotif motif1);
            var mod = new Modification(_originalId: "Oxidation of M", _modificationType: "Common Variable", _target: motif1, _locationRestriction: "Anywhere.", _monoisotopicMass: 15.99491461957);

            // modified version of protein
            var protein1 = new Protein("PEPTIDEM", "accession1");

            // unmodified version of protein
            var protein2 = new Protein("YYYKPEPTIDEM", "accession2");

            List <PeptideWithSetModifications> pwsmsFromProtein1 = protein1.Digest(new DigestionParams(protease: "trypsin", minPeptideLength: 1), new List <Modification> {
                mod
            }, new List <Modification>()).ToList();                                                                                                                                                                   //this is a fixed mod
            List <PeptideWithSetModifications> pwsmsFromProtein2 = protein2.Digest(new DigestionParams(protease: "trypsin", minPeptideLength: 1), new List <Modification>(), new List <Modification>()).ToList();

            // check to make sure mod is present
            PeptideWithSetModifications modifiedPeptide   = pwsmsFromProtein1[0];
            PeptideWithSetModifications unmodifiedPeptide = pwsmsFromProtein2[1];

            Assert.That(!modifiedPeptide.FullSequence.Equals(unmodifiedPeptide.FullSequence)); // sequences should not be equal (one has a mod)
            Assert.That(modifiedPeptide.BaseSequence.Equals(unmodifiedPeptide.BaseSequence));  // base sequences should be equal
            Assert.That(modifiedPeptide.NumMods == 1);                                         // methionine was oxidized on this protein
            Assert.That(unmodifiedPeptide.NumMods == 0);                                       // there was no modification on this protein

            // build PSMs for parsimony
            List <PeptideSpectralMatch> psmsForParsimony = new List <PeptideSpectralMatch>();

            MsDataScan fakeScan = new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false),
                                                 0, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null,
                                                 null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null);

            Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(fakeScan, 2, 0, "File", new CommonParameters());

            PeptideSpectralMatch psm1 = new PeptideSpectralMatch(modifiedPeptide, 0, 10, 1, scan, new DigestionParams(), new List <MatchedFragmentIon>());

            psm1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);
            psm1.ResolveAllAmbiguities();

            PeptideSpectralMatch psm2 = new PeptideSpectralMatch(unmodifiedPeptide, 0, 10, 2, scan, new DigestionParams(), new List <MatchedFragmentIon>());

            psm2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);
            psm2.ResolveAllAmbiguities();

            psmsForParsimony.Add(psm1);
            psmsForParsimony.Add(psm2);

            // apply parsimony
            ProteinParsimonyEngine pae = new ProteinParsimonyEngine(psmsForParsimony, modPeptidesAreUnique, new CommonParameters(), new List <string>());

            // because the two chosen peptides are the same, we should end up with both protein accessions still in the list
            var proteinParsimonyResult = (ProteinParsimonyResults)pae.Run();

            // score protein groups and merge indistinguishable ones
            ProteinScoringAndFdrEngine proteinScoringEngine = new ProteinScoringAndFdrEngine(proteinParsimonyResult.ProteinGroups, psmsForParsimony, false, true, true, new CommonParameters(), new List <string>());
            var results = (ProteinScoringAndFdrResults)proteinScoringEngine.Run();

            int countOfProteinGroups = results.SortedAndScoredProteinGroups.Count;

            // because modified peptides were NOT considered as unique,
            // then there should be one ambiguous protein group after parsimony,
            // and two protein accessions for each peptide
            Assert.AreEqual(1, countOfProteinGroups);
            Assert.AreEqual(2, results.SortedAndScoredProteinGroups.First().Proteins.Count);
            Assert.IsNull(psm1.ProteinAccession);
            Assert.IsNull(psm2.ProteinAccession);
        }