Exemple #1
0
        public static void Test_CustumPrunedDatabaseWriteAndRead()
        {
            ModificationMotif.TryGetMotif("K", out ModificationMotif K);
            ModificationMotif.TryGetMotif("R", out ModificationMotif R);

            Modification acOnK = new Modification(_originalId: "Acetyl", _accession: null, _modificationType: "testModType", _featureType: null, _locationRestriction: "Anywhere.", _target: K, _monoisotopicMass: 42);
            Modification meOnK = new Modification(_originalId: "Methyl", _accession: null, _modificationType: "testModType", _featureType: null, _locationRestriction: "Anywhere.", _target: K, _monoisotopicMass: 14);
            Modification meOnR = new Modification(_originalId: "Methyl", _accession: null, _modificationType: "testModType", _featureType: null, _locationRestriction: "Anywhere.", _target: R, _monoisotopicMass: 14);

            Dictionary <int, List <Modification> > obm = new Dictionary <int, List <Modification> >
            {
                { 1, new List <Modification>()
                  {
                      acOnK
                  } },
                { 2, new List <Modification>()
                  {
                      meOnK
                  } },
                { 3, new List <Modification>()
                  {
                      meOnR
                  } }
            };

            Protein        p     = new Protein("KKR", "accession", null, null, obm, null, null, null, false, false, null, null, null, null);
            List <Protein> pList = new List <Protein>()
            {
                p
            };

            string outputFileName = Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"redundant.xml");

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), pList, outputFileName);

            List <Protein> new_proteins = ProteinDbLoader.LoadProteinXML(outputFileName,
                                                                         true, DecoyType.None, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> proteinXmlModList);

            Assert.AreEqual(3, new_proteins[0].OneBasedPossibleLocalizedModifications.Count());
        }
Exemple #2
0
        public void TestWritePtmWithDiagnosticIons()
        {
            string filename = "test_diagnostic_ion_mod.xml";
            Dictionary <int, List <Modification> > mods = new Dictionary <int, List <Modification> >();

            ModificationMotif.TryGetMotif("T", out var motif);
            Modification m = new Modification(_originalId: "Phospho", _modificationType: "Test", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 80.0, _diagnosticIons: new Dictionary <DissociationType, List <double> > {
                { DissociationType.HCD, new List <double> {
                      80.0, 0
                  } }, { DissociationType.ETD, new List <double> {
                             70.0, 0
                         } }
            });

            Assert.That(m.ValidModification);

            mods.Add(4, new List <Modification> {
                m
            });

            Protein protein = new Protein("PEPTIDE", "accession", oneBasedModifications: mods);

            Assert.That(protein.OneBasedPossibleLocalizedModifications.Count == 1);
            Assert.That(protein.OneBasedPossibleLocalizedModifications.First().Value.First().DiagnosticIons.First().Value.Count == 2);

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), new List <Protein> {
                protein
            }, Path.Combine(TestContext.CurrentContext.TestDirectory, filename));

            // with passed-in mods
            List <Protein> new_proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, filename), true, DecoyType.None, new List <Modification> {
                m
            }, false, new List <string>(), out Dictionary <string, Modification> um);

            Assert.That(new_proteins.First().OneBasedPossibleLocalizedModifications.First().Value.First().DiagnosticIons.First().Value.Count == 2);

            // should be able to read mod from top of database...
            new_proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, filename), true, DecoyType.None, new List <Modification>(), false, new List <string>(), out um);
            Assert.That(new_proteins.First().OneBasedPossibleLocalizedModifications.First().Value.First().DiagnosticIons.First().Value.Count == 2);
        }
        public void TestReadWriteSeqVars2()
        {
            ModificationMotif.TryGetMotif("X", out ModificationMotif motif);
            var nice = new List <Modification>
            {
                new Modification("fayk", null, "mt", null, motif, "Anywhere.", null, null, null, null, null, null, null, null)
            };

            List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"seqvartests.xml"), true, DecoyType.None,
                                                               nice, false, new List <string>(), out Dictionary <string, Modification> un);

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_seqvartests.xml"));
            List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_seqvartests.xml"), true, DecoyType.None,
                                                                nice, false, new List <string>(), out un);

            Assert.AreEqual(ok[0].SequenceVariations.Count(), ok2[0].SequenceVariations.Count());
            Assert.AreEqual(ok[0].SequenceVariations.First().OneBasedBeginPosition, ok2[0].SequenceVariations.First().OneBasedBeginPosition);
            Assert.AreEqual(ok[0].SequenceVariations.First().OneBasedEndPosition, ok2[0].SequenceVariations.First().OneBasedEndPosition);
            Assert.AreEqual(ok[0].SequenceVariations.First().Description, ok2[0].SequenceVariations.First().Description);
            Assert.AreEqual(ok[0].SequenceVariations.First().OriginalSequence, ok2[0].SequenceVariations.First().OriginalSequence);
            Assert.AreEqual(ok[0].SequenceVariations.First().VariantSequence, ok2[0].SequenceVariations.First().VariantSequence);
        }
        public static string TransferModifications(string sourceXmlPath, string destinationXmlPath)
        {
            var    uniprotPtms     = ProteinAnnotation.GetUniProtMods(Environment.CurrentDirectory);
            var    uniprot         = ProteinDbLoader.LoadProteinXML(sourceXmlPath, true, DecoyType.None, uniprotPtms, false, null, out var un);
            string outxml          = Path.Combine(Path.GetDirectoryName(destinationXmlPath), Path.GetFileNameWithoutExtension(destinationXmlPath) + ".withmods.xml");
            var    nonVariantProts = destinationXmlPath.EndsWith(".xml") | destinationXmlPath.EndsWith(".xml.gz") ?
                                     ProteinDbLoader.LoadProteinXML(destinationXmlPath, true, DecoyType.None, uniprotPtms, false, null, out un).Select(p => p.NonVariantProtein).Distinct() :
                                     ProteinDbLoader.LoadProteinFasta(destinationXmlPath, true, DecoyType.None, false, ProteinDbLoader.UniprotAccessionRegex, PgmNameRegex, PgmNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, out var un2).Select(p => p.NonVariantProtein).Distinct();
            var newProts = ProteinAnnotation.CombineAndAnnotateProteins(uniprot, nonVariantProts.ToList());

            ProteinDbWriter.WriteXmlDatabase(null, newProts, outxml);
            string outfasta           = Path.Combine(Path.GetDirectoryName(destinationXmlPath), Path.GetFileNameWithoutExtension(destinationXmlPath) + ".fasta");
            string outfastaWithDecoys = Path.Combine(Path.GetDirectoryName(destinationXmlPath), Path.GetFileNameWithoutExtension(destinationXmlPath) + ".withdecoys.fasta");
            var    prot               = newProts.FirstOrDefault(p => p.Accession.Contains("_"));
            var    protsForFasta      = newProts.SelectMany(p => p.GetVariantProteins()).Where(p => !p.BaseSequence.EndsWith('?')).ToList();
            var    decoyProtsForFasta = ProteinDbLoader.LoadProteinXML(destinationXmlPath, true, DecoyType.Reverse, uniprotPtms, false, null, out un).Where(p => !p.BaseSequence.EndsWith('?')).ToList();

            ProteinDbWriter.WriteFastaDatabase(protsForFasta, outfasta, "|");
            ProteinDbWriter.WriteFastaDatabase(decoyProtsForFasta, outfastaWithDecoys, "|");
            File.WriteAllLines(outfastaWithDecoys, File.ReadAllLines(outfastaWithDecoys).Select(line => line.Replace("mz|DECOY_", "rev_mz|")));
            return(outxml);
        }
Exemple #5
0
        public void DoNotWriteSameModTwiceAndDoNotWriteInHeaderSinceDifferent()
        {
            Loaders.LoadElements();
            var     sampleModList = PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "z.txt"), out var errors).ToList();
            Protein protein       = new Protein("MCSSSSSSSSSS", "accession", "organism", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> > {
                { 2, sampleModList.OfType <Modification>().ToList() }
            }, null, "name", "full_name", false, false, new List <DatabaseReference>(), new List <SequenceVariation>(), disulfideBonds: new List <DisulfideBond>());

            Assert.AreEqual(1, protein.OneBasedPossibleLocalizedModifications[2].OfType <Modification>().Count());

            Dictionary <string, HashSet <Tuple <int, Modification> > > dictWithThisMod = new Dictionary <string, HashSet <Tuple <int, Modification> > >();

            HashSet <Tuple <int, Modification> > value = new HashSet <Tuple <int, Modification> >();

            var modReadFromFile = sampleModList.First() as Modification;

            ModificationMotif.TryGetMotif("C", out ModificationMotif motif);
            Modification newMod = new Modification(_originalId: "Palmitoylation of C", _modificationType: "Type", _target: motif, _locationRestriction: "Anywhere.", _chemicalFormula: modReadFromFile.ChemicalFormula, _monoisotopicMass: modReadFromFile.MonoisotopicMass, _featureType: "MOD_RES", _fileOrigin: "E:\\GitClones\\mzLib\\Test\\bin\\x64\\Debug\\DatabaseTests\\z.txt");

            Assert.IsTrue(newMod.Equals(sampleModList.First()));

            Assert.AreEqual(newMod, sampleModList.First());
            Assert.AreEqual(sampleModList.First(), newMod);

            value.Add(new Tuple <int, Modification>(2, newMod));

            dictWithThisMod.Add("accession", value);
            var newModResEntries = ProteinDbWriter.WriteXmlDatabase(dictWithThisMod, new List <Protein> {
                protein
            }, Path.Combine(TestContext.CurrentContext.TestDirectory, "test_modifications_with_proteins3.xml"));

            Assert.AreEqual(0, newModResEntries.Count);
            List <Protein> new_proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "test_modifications_with_proteins3.xml"),
                                                                         true, DecoyType.None, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> um);

            Assert.AreEqual(1, new_proteins.Count);
            Assert.AreEqual(1, new_proteins[0].OneBasedPossibleLocalizedModifications.Count);
            Assert.AreEqual(1, new_proteins[0].OneBasedPossibleLocalizedModifications.SelectMany(kv => kv.Value).Count());
        }
Exemple #6
0
        public void VariantSymbolWeirdness2Xml()
        {
            string         file            = Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "SeqVarSymbolWeirdness2.xml");
            List <Protein> variantProteins = ProteinDbLoader.LoadProteinXML(file, true, DecoyType.None, null, false, null, out var un);

            Assert.AreEqual(1, variantProteins.First().NonVariantProtein.SequenceVariations.Count());
            Assert.AreEqual(2, variantProteins.Count); // there is only one unique amino acid change
            Assert.AreEqual(1, variantProteins.Where(v => v.BaseSequence == variantProteins.First().NonVariantProtein.BaseSequence).Count());
            var variantProteinRef = variantProteins.First();
            var variantProteinAlt = variantProteins.Last();

            Assert.AreEqual('R', variantProteins.First().NonVariantProtein.BaseSequence[2386]);
            Assert.AreEqual('R', variantProteinRef.BaseSequence[2386]);
            Assert.AreEqual('H', variantProteinAlt.BaseSequence[2386]);
            Assert.AreEqual(variantProteins.First().NonVariantProtein.Name, variantProteinRef.Name);
            Assert.AreNotEqual(variantProteins.First().NonVariantProtein.Name, variantProteinAlt.Name);
            Assert.AreEqual(variantProteins.First().NonVariantProtein.FullName, variantProteinRef.FullName);
            Assert.AreNotEqual(variantProteins.First().NonVariantProtein.FullName, variantProteinAlt.FullName);
            Assert.AreEqual(variantProteins.First().NonVariantProtein.Accession, variantProteinRef.Accession);
            Assert.AreNotEqual(variantProteins.First().NonVariantProtein.Accession, variantProteinAlt.Accession);
            List <PeptideWithSetModifications> peptides = variantProteins.SelectMany(vp => vp.Digest(new DigestionParams(), null, null)).ToList();
        }
        private static List <Protein> LoadProteinDb(string fileName)
        {
            List <string>  dbErrors    = new List <string>();
            List <Protein> proteinList = new List <Protein>();

            string theExtension = Path.GetExtension(fileName).ToLowerInvariant();
            bool   compressed   = theExtension.EndsWith("gz"); // allows for .bgz and .tgz, too which are used on occasion

            theExtension = compressed ? Path.GetExtension(Path.GetFileNameWithoutExtension(fileName)).ToLowerInvariant() : theExtension;

            if (theExtension.Equals(".fasta") || theExtension.Equals(".fa"))
            {
                proteinList = ProteinDbLoader.LoadProteinFasta(fileName, true, DecoyType.None, false, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex,
                                                               ProteinDbLoader.UniprotOrganismRegex, out dbErrors);
            }
            else
            {
                proteinList = ProteinDbLoader.LoadProteinXML(fileName, true, DecoyType.None, null, false, null, out var um);
            }

            return(proteinList.Where(p => p.BaseSequence.Length > 0).ToList());
        }
Exemple #8
0
        public void XmlGzTest()
        {
            var nice = new List <Modification>
            {
                new ModificationWithLocation("fayk", null, null, ModificationSites.A, null, null)
            };

            var ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"xml.xml.gz"), true, nice, false, null, out Dictionary <string, Modification> un);

            Assert.AreEqual('M', ok[0][0]);
            Assert.AreEqual('M', ok[1][0]);

            Assert.AreEqual("P62805|H4_HUMAN|Histone H4", ok[0].FullDescription);
            Assert.AreEqual("DECOY_P62805|H4_HUMAN|Histone H4", ok[1].FullDescription);
            Assert.AreEqual("ENST00000244537", ok[0].DatabaseReferences.First(dbRef => dbRef.Type == "Ensembl").Id);
            Assert.AreEqual("protein sequence ID", ok[0].DatabaseReferences.First(dbRef => dbRef.Type == "Ensembl").Properties.First().Item1);
            Assert.AreEqual("ENSP00000244537", ok[0].DatabaseReferences.First(dbRef => dbRef.Type == "Ensembl").Properties.First().Item2);
            Assert.AreEqual(42, ok[0].GeneNames.Count());
            Assert.AreEqual(14, ok[0].GeneNames.Where(t => t.Item1 == "primary").Count());
            Assert.AreEqual("HIST1H4A", ok[0].GeneNames.Where(t => t.Item1 == "primary").First().Item2);
            Assert.AreEqual(23, ok[0].DatabaseReferences.Count(dbRef => dbRef.Type == "Ensembl"));
        }
Exemple #9
0
        public static void TestDifferentHeaderStyles()
        {
            // uniprot database
            string fastaFile = Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "uniprot_aifm1.fasta");
            var    proteins  = ProteinDbLoader.LoadProteinFasta(fastaFile, true, DecoyType.Reverse, false, out var errors);

            Assert.That(proteins.Count == 2);

            var targetProtein = proteins.First(p => !p.IsDecoy);

            Assert.That(targetProtein.Accession == "Q9Z0X1");
            Assert.That(targetProtein.GeneNames.Count() == 1);
            Assert.That(targetProtein.GeneNames.First().Item2 == "Aifm1");
            Assert.That(targetProtein.FullName == "Apoptosis-inducing factor 1, mitochondrial");
            Assert.That(targetProtein.Name == "AIFM1_MOUSE");
            Assert.That(targetProtein.Organism == "Mus musculus");

            // gencode database
            fastaFile = Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "gencode_mmp20.fa");
            proteins  = ProteinDbLoader.LoadProteinFasta(fastaFile, true, DecoyType.Reverse, false, out errors);
            Assert.That(proteins.Count == 2);

            targetProtein = proteins.First(p => !p.IsDecoy);

            Assert.That(targetProtein.Accession == "ENSMUSP00000034487.2");
            Assert.That(targetProtein.GeneNames.Count() == 1);
            Assert.That(targetProtein.GeneNames.First().Item2 == "Mmp20");
            Assert.That(targetProtein.FullName == "Mmp20-201");

            // ensembl database
            fastaFile = Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "ensembl_prrc2a.fa");
            proteins  = ProteinDbLoader.LoadProteinFasta(fastaFile, true, DecoyType.Reverse, false, out errors);
            Assert.That(proteins.Count == 2);

            targetProtein = proteins.First(p => !p.IsDecoy);
            Assert.That(targetProtein.Accession == "ENSP00000372947.2");
            Assert.That(targetProtein.GeneNames.Count() == 1);
            Assert.That(targetProtein.GeneNames.First().Item2 == "ENSG00000206427.11");
        }
        public void Test_read_Ensembl_pepAllFasta()
        {
            ModificationMotif.TryGetMotif("X", out ModificationMotif motif);
            var nice = new List <Modification>
            {
                new Modification("fayk", null, "mt", null, motif, "Anywhere.", null, null, null, null, null, null, null, null)
            };

            List <Protein> ok = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"test_ensembl.pep.all.fasta"), true, DecoyType.None, false,
                                                                 ProteinDbLoader.EnsemblAccessionRegex, ProteinDbLoader.EnsemblFullNameRegex, ProteinDbLoader.EnsemblAccessionRegex, ProteinDbLoader.EnsemblGeneNameRegex, null, out var a);

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_test_ensembl.pep.all.xml"));
            List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_test_ensembl.pep.all.xml"), true, DecoyType.None, nice,
                                                                false, null, out Dictionary <string, Modification> un);

            Assert.AreEqual(ok.Count, ok2.Count);
            Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence));
            Assert.AreEqual("ENSP00000381386", ok[0].Accession);
            Assert.AreEqual("ENSP00000215773", ok[1].Accession);
            Assert.AreEqual("ENSG00000099977", ok[0].GeneNames.First().Item2);
            Assert.AreEqual("ENSG00000099977", ok[1].GeneNames.First().Item2);
            Assert.AreEqual("pep:known chromosome:GRCh37:22:24313554:24316773:-1 gene:ENSG00000099977 transcript:ENST00000398344 gene_biotype:protein_coding transcript_biotype:protein_coding", ok[0].FullName);
            Assert.AreEqual("pep:known chromosome:GRCh37:22:24313554:24322019:-1 gene:ENSG00000099977 transcript:ENST00000350608 gene_biotype:protein_coding transcript_biotype:protein_coding", ok[1].FullName);
            Assert.AreEqual(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"test_ensembl.pep.all.fasta"), ok[0].DatabaseFilePath);

            Assert.AreEqual("ENSP00000381386", ok2[0].Accession);
            Assert.AreEqual("ENSP00000215773", ok2[1].Accession);
            Assert.AreEqual("ENSG00000099977", ok2[0].GeneNames.First().Item2);
            Assert.AreEqual("ENSG00000099977", ok2[1].GeneNames.First().Item2);
            Assert.AreEqual("pep:known chromosome:GRCh37:22:24313554:24316773:-1 gene:ENSG00000099977 transcript:ENST00000398344 gene_biotype:protein_coding transcript_biotype:protein_coding", ok2[0].FullName);
            Assert.AreEqual("pep:known chromosome:GRCh37:22:24313554:24322019:-1 gene:ENSG00000099977 transcript:ENST00000350608 gene_biotype:protein_coding transcript_biotype:protein_coding", ok2[1].FullName);
            Assert.AreEqual(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_test_ensembl.pep.all.xml"), ok2[0].DatabaseFilePath);

            Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length)));
            Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length)));
            Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length)));
            Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length)));
        }
        public void TestEmptyProteins()
        {
            Protein p1 = new Protein("SEQENCE", "p1");

            Assert.AreEqual("p1||", p1.FullDescription);
            Protein p2 = new Protein("SEQENCE", "p2", name: "namep2");

            var proteinListToWrite = new List <Protein> {
                p1, p2
            };

            // Generate data for files
            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), proteinListToWrite, Path.Combine(TestContext.CurrentContext.TestDirectory, @"differentlyConstuctedProteins.xml"));

            IEnumerable <string>       modTypesToExclude     = new List <string>();
            IEnumerable <Modification> allKnownModifications = new List <Modification>();
            List <Protein>             ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"differentlyConstuctedProteins.xml"), true, DecoyType.None, allKnownModifications, false, modTypesToExclude, out Dictionary <string, Modification> un);

            Assert.AreEqual(p1.Accession, ok[0].Accession);
            Assert.AreEqual(p2.Accession, ok[1].Accession);
            Assert.AreEqual(p1.Name, ok[0].Name);
            Assert.AreEqual(p2.Name, ok[1].Name);
        }
Exemple #12
0
        public static void TestSlideDecoyXML()
        {
            //sequence, disulfides
            var ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"disulfidetests.xml"), true, DecoyType.Slide, UniProtPtms, false,
                                                     new string[] { "exclude_me" }, out Dictionary <string, Modification> un);

            Assert.AreEqual("MALLVHFLPLLALLALWEPKPTQAFVKQHLCGPHLVEALYLVCGERGFFYTPKSRREVEDPQVEQLELGGSPGDLQTLALEVARQKRGIVDQCCTSICSLYQLENYCN", ok2[0].BaseSequence);
            Assert.AreEqual("MTKAEVLQLLAGLHLVHALYAVLGVRFFPYLPLSARWVPDPQQEFLKLHGCPPDLQELLLLVCREKGGFVTQKCRSECELPQVEQYENGCSNGLLYTSAIETACQDRI", ok2[1].BaseSequence);
            Assert.AreEqual(ok2[0].DisulfideBonds.Count(), ok2[1].DisulfideBonds.Count());
            Assert.AreEqual(ok2[0].ProteolysisProducts.Count(), ok2[1].ProteolysisProducts.Count());
            for (int i = 0; i < ok2[0].ProteolysisProducts.Count(); i++)
            {
                Assert.AreEqual(ok2[0].ProteolysisProducts.ToArray()[i].OneBasedBeginPosition, ok2[1].ProteolysisProducts.ToArray()[i].OneBasedBeginPosition);
                Assert.AreEqual(ok2[0].ProteolysisProducts.ToArray()[i].OneBasedEndPosition, ok2[1].ProteolysisProducts.ToArray()[i].OneBasedEndPosition);
            }
            foreach (DisulfideBond bond in ok2[0].DisulfideBonds)
            {
                Assert.AreEqual(ok2[0].BaseSequence[bond.OneBasedBeginPosition - 1], 'C');
                Assert.AreEqual(ok2[0].BaseSequence[bond.OneBasedEndPosition - 1], 'C');
            }
            foreach (DisulfideBond bond in ok2[1].DisulfideBonds)
            {
                Assert.AreEqual(ok2[1].BaseSequence[bond.OneBasedBeginPosition - 1], 'C');
                Assert.AreEqual(ok2[1].BaseSequence[bond.OneBasedEndPosition - 1], 'C');
            }

            //sequence variants, modifications
            ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"O43653.xml"), true, DecoyType.Slide, UniProtPtms, false,
                                                 new string[] { "exclude_me" }, out un);

            Assert.AreEqual(ok2[1].OneBasedPossibleLocalizedModifications.First().Key, 13);
            var decoyVariants = ok2[1].SequenceVariations.ToList();

            Assert.AreEqual(decoyVariants[0].VariantSequence, "MLAAKLVMLL"); //variant should shuffle but keep initiator methionine
            Assert.AreEqual(decoyVariants[0].OneBasedBeginPosition, 1);      //shouldn't have changed
            Assert.AreEqual(decoyVariants[1].OneBasedBeginPosition, 10);     //30-20
        }
Exemple #13
0
        [TestCase("exclude_me", false)]//the first part is the test case, the latter part is ther result of the assertion
        //[TestCase("exclude_me_not", true)]
        public static void Read_xml_exclude_mods(string excludeString, bool isExcluded)
        {
            ModificationMotif.TryGetMotif("X", out ModificationMotif motif);

            var nice = new List <Modification>
            {
                new Modification("N-acetylserine", null, "exclude_me", null, motif, "Anywhere.", null, 10, null, null, null, null, null, null),
                new Modification("N-acetylserine", null, "exclude_me_not", null, motif, "Anywhere.", null, 10, null, null, null, null, null, null)
            };

            Assert.That(nice[0].ValidModification);

            var ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"xml.xml"), true, DecoyType.Reverse, nice, false,
                                                     new[] { excludeString }, out Dictionary <string, Modification> un);

            List <string> modTypes = new List <string>();

            foreach (KeyValuePair <int, List <Modification> > entry in ok2[0].OneBasedPossibleLocalizedModifications)
            {
                modTypes.AddRange(entry.Value.Select(m => m.ModificationType).ToList().Distinct());
            }
            Assert.AreEqual(isExcluded, modTypes.Contains("exclude_me"));
            Assert.AreEqual(!isExcluded, modTypes.Contains("exclude_me_not"));
        }
Exemple #14
0
        public void XmlTest_2entry()
        {
            var nice = new List <Modification>
            {
                new ModificationWithLocation("fayk", null, null, ModificationSites.A, null, null)
            };

            var ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"xml2.xml"), true, nice, false, null, out Dictionary <string, Modification> un);

            Assert.True(ok.All(p => p.ProteolysisProducts.All(d => d.OneBasedBeginPosition == null || d.OneBasedBeginPosition > 0)));

            Assert.True(ok.All(p => p.ProteolysisProducts.All(d => d.OneBasedEndPosition == null || d.OneBasedEndPosition <= p.Length)));

            Assert.False(ok.All(p => p.BaseSequence.Contains(" ")));
            Assert.False(ok.All(p => p.BaseSequence.Contains("\t")));
            Assert.False(ok.All(p => p.BaseSequence.Contains("\n")));

            //GoTerm checks
            List <Protein> targets = ok.Where(p => !p.IsDecoy).ToList();

            Assert.AreEqual(2, targets.Count);
            Assert.AreEqual(1, targets[0].DatabaseReferences.Count(dbRef => dbRef.Type == "EnsemblFungi"));
            Assert.AreEqual(1, targets[1].DatabaseReferences.Count(dbRef => dbRef.Type == "EnsemblFungi"));
        }
        public void Test_write_with_custom_mods()
        {
            ModificationMotif.TryGetMotif("S", out ModificationMotif m1);
            ModificationMotif.TryGetMotif("T", out ModificationMotif m2);
            ModificationMotif.TryGetMotif("X", out ModificationMotif motiff);

            var nice = new List <Modification>
            {
                new ModificationWithLocation("fayk", "mt", motiff, TerminusLocalization.Any, null),
                new ModificationWithLocation("Phosphoserine", "mt", m1, TerminusLocalization.Any, null),
                new ModificationWithLocation("Phosphothreonine", "mt", m2, TerminusLocalization.Any, null)
            };

            ModificationMotif.TryGetMotif("K", out ModificationMotif motif);
            ModificationWithMass m = new ModificationWithMass("mod", "mt", motif, TerminusLocalization.Any, 1, neutralLosses: new List <double> {
                -1
            });

            Dictionary <string, HashSet <Tuple <int, Modification> > > new_mods = new Dictionary <string, HashSet <Tuple <int, Modification> > >
            {
                { "P53863", new HashSet <Tuple <int, Modification> > {
                      new Tuple <int, Modification>(2, m)
                  } }
            };

            List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"xml2.xml"), true, DecoyType.None, nice, false, new List <string>(), out Dictionary <string, Modification> un);
            var            newModResEntries = ProteinDbWriter.WriteXmlDatabase(new_mods, ok, Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"));

            Assert.AreEqual(1, newModResEntries.Count);
            List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"), true, DecoyType.None, nice, false, new List <string>(), out un);

            Assert.AreEqual(ok.Count, ok2.Count);
            Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence));
            Assert.AreEqual(2, ok[0].OneBasedPossibleLocalizedModifications.Count);
            Assert.AreEqual(3, ok2[0].OneBasedPossibleLocalizedModifications.Count);
        }
Exemple #16
0
        public static void Test_MetaMorpheusStyleProteinDatabaseWriteAndREad()
        {
            string proteinDbFilePath = Path.Combine(TestContext.CurrentContext.TestDirectory, "TestProteinSplitAcrossFiles.xml");

            ModificationMotif.TryGetMotif("D", out ModificationMotif motif);
            Modification mod = new Modification(_originalId: "mod1", _modificationType: "mt", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 10);

            IDictionary <int, List <Modification> > oneBasedModification = new Dictionary <int, List <Modification> >
            {
                { 3, new List <Modification> {
                      mod
                  } }
            };

            Protein        prot1       = new Protein("MEDEEK", "prot1", oneBasedModifications: oneBasedModification);
            List <Protein> proteinList = new List <Protein> {
                prot1
            };

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), proteinList, proteinDbFilePath);

            var            lines          = File.ReadAllLines(proteinDbFilePath);
            List <Protein> newProteinList = ProteinDbLoader.LoadProteinXML(proteinDbFilePath, true, DecoyType.Reverse, new List <Modification>(), false, new List <string>(), out var um, -1);
        }
Exemple #17
0
        public void TestReadWriteSpliceSites()
        {
            List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"splices1.xml"), true, DecoyType.None,
                                                               null, false, new List <string>(), out Dictionary <string, Modification> un);

            Assert.IsNull(ok[0].SpliceSites.First().Description.Novel);

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_spliceSite.xml"));
            List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_spliceSite.xml"), true, DecoyType.None,
                                                                null, false, new List <string>(), out un);

            Assert.AreEqual(ok[0].SpliceSites.Count(), ok2[0].SpliceSites.Count());
            Assert.AreEqual(ok[0].SpliceSites.First().OneBasedBeginPosition, ok2[0].SpliceSites.First().OneBasedBeginPosition);
            Assert.AreEqual(ok[0].SpliceSites.First().OneBasedEndPosition, ok2[0].SpliceSites.First().OneBasedEndPosition);
            Assert.AreEqual(ok[0].SpliceSites.First().Description, ok2[0].SpliceSites.First().Description);
            Assert.IsNull(ok2[0].SpliceSites.First().Description.Novel);

            ok[0].SpliceSites.First().Description.Novel = true;
            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_spliceSite.xml"));
            ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_spliceSite.xml"), true, DecoyType.None,
                                                 null, false, new List <string>(), out un);

            Assert.IsTrue(ok2[0].SpliceSites.First().Description.Novel);
        }
        public void DoNotWriteSameModTwiceAndDoNotWriteInHeaderSinceDifferent()
        {
            Loaders.LoadElements(Path.Combine(TestContext.CurrentContext.TestDirectory, "elements2.dat"));
            var     sampleModList = PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "z.txt")).ToList();
            Protein protein       = new Protein("MCSSSSSSSSSS", "accession", "organism", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> > {
                { 2, sampleModList.OfType <Modification>().ToList() }
            }, null, "name", "full_name", false, false, new List <DatabaseReference>(), new List <SequenceVariation>(), new List <DisulfideBond>());

            Assert.AreEqual(1, protein.OneBasedPossibleLocalizedModifications[2].OfType <ModificationWithMass>().Count());

            Dictionary <string, HashSet <Tuple <int, Modification> > > dictWithThisMod = new Dictionary <string, HashSet <Tuple <int, Modification> > >();

            HashSet <Tuple <int, Modification> > value = new HashSet <Tuple <int, Modification> >();

            var modReadFromFile = sampleModList.First() as ModificationWithMassAndCf;

            ModificationMotif.TryGetMotif("C", out ModificationMotif motif);
            ModificationWithMass newMod = new ModificationWithMassAndCf("Palmitoylation of C", modReadFromFile.modificationType, motif, TerminusLocalization.Any, modReadFromFile.chemicalFormula, modReadFromFile.monoisotopicMass, null, null, null);

            Assert.AreEqual(newMod, sampleModList.First());
            Assert.AreEqual(sampleModList.First(), newMod);

            value.Add(new Tuple <int, Modification>(2, newMod));

            dictWithThisMod.Add("accession", value);
            var newModResEntries = ProteinDbWriter.WriteXmlDatabase(dictWithThisMod, new List <Protein> {
                protein
            }, Path.Combine(TestContext.CurrentContext.TestDirectory, "test_modifications_with_proteins3.xml"));

            Assert.AreEqual(0, newModResEntries.Count);
            List <Protein> new_proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "test_modifications_with_proteins3.xml"), true, DecoyType.None, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> um);

            Assert.AreEqual(1, new_proteins.Count);
            Assert.AreEqual(1, new_proteins[0].OneBasedPossibleLocalizedModifications.Count);
            Assert.AreEqual(1, new_proteins[0].OneBasedPossibleLocalizedModifications.SelectMany(kv => kv.Value).Count());
        }
Exemple #19
0
        public static void MultipleAlternateAlleles()
        {
            var proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "MultipleAlternateAlleles.xml"), true,
                                                          DecoyType.None, null, false, null, out var unknownModifications);

            Assert.AreEqual(2, proteins.Count);
            Assert.AreEqual(2, proteins[0].SequenceVariations.Count());                                                 // some redundant
            Assert.AreEqual(2, proteins[0].SequenceVariations.Select(v => v.SimpleString()).Distinct().Count());        // unique changes

            Assert.IsTrue(proteins[0].SequenceVariations.All(v => v.OneBasedBeginPosition == 63));                      // there are two alternate alleles (1 and 2), but only 2 is in the genotype, so only that's applied
            Assert.AreEqual(1, proteins[1].AppliedSequenceVariations.Count());                                          // some redundant
            Assert.AreEqual(1, proteins[1].AppliedSequenceVariations.Select(v => v.SimpleString()).Distinct().Count()); // unique changes
            Assert.AreEqual(72, proteins[0].Length);
            Assert.AreEqual(72, proteins[1].Length);
            Assert.AreEqual('K', proteins[0][63 - 1]);
            Assert.AreEqual('R', proteins[1][63 - 1]);

            proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "MultipleAlternateAlleles.xml"), true,
                                                      DecoyType.None, null, false, null, out unknownModifications, minAlleleDepth: 10);
            Assert.AreEqual(1, proteins.Count);
            Assert.AreEqual(0, proteins[0].AppliedSequenceVariations.Count());                                          // some redundant
            Assert.AreEqual(0, proteins[0].AppliedSequenceVariations.Select(v => v.SimpleString()).Distinct().Count()); // unique changes
            Assert.AreEqual('K', proteins[0][63 - 1]);                                                                  // reference only
        }
Exemple #20
0
        public static void StopGained()
        {
            var proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "StopGained.xml"), true,
                                                          DecoyType.None, null, false, null, out var unknownModifications);

            Assert.AreEqual(2, proteins.Count);
            Assert.AreEqual(1, proteins[0].SequenceVariations.Count());                                                 // some redundant
            Assert.AreEqual(1, proteins[0].SequenceVariations.Select(v => v.SimpleString()).Distinct().Count());        // unique changes
            Assert.AreEqual(0, proteins[0].AppliedSequenceVariations.Count());                                          // some redundant
            Assert.AreEqual(0, proteins[0].AppliedSequenceVariations.Select(v => v.SimpleString()).Distinct().Count()); // unique changes
            Assert.AreEqual(1, proteins[1].AppliedSequenceVariations.Count());                                          // some redundant
            Assert.AreEqual(1, proteins[1].AppliedSequenceVariations.Select(v => v.SimpleString()).Distinct().Count()); // unique changes
            Assert.AreEqual(191, proteins[0].Length);
            Assert.AreEqual('Q', proteins[0][161 - 1]);
            Assert.AreEqual(161 - 1, proteins[1].Length);
            Assert.AreNotEqual(proteins[0].Length, proteins[1].Length);

            proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "StopGained.xml"), true,
                                                      DecoyType.None, null, false, null, out unknownModifications, minAlleleDepth: 400);
            Assert.AreEqual(1, proteins.Count);
            Assert.AreEqual(1, proteins[0].AppliedSequenceVariations.Count());                                          // some redundant
            Assert.AreEqual(1, proteins[0].AppliedSequenceVariations.Select(v => v.SimpleString()).Distinct().Count()); // unique changes
            Assert.AreEqual(161 - 1, proteins[0].Length);
        }
        public static void DatabaseSummary(string sourceXmlPath, string destinationXmlPath)
        {
            var culture         = CultureInfo.CurrentCulture;
            var uniprotPtms     = ProteinAnnotation.GetUniProtMods(Environment.CurrentDirectory);
            var uniprot         = ProteinDbLoader.LoadProteinXML(sourceXmlPath, true, DecoyType.None, uniprotPtms, false, null, out var un);
            var spritz          = ProteinDbLoader.LoadProteinXML(destinationXmlPath, true, DecoyType.None, uniprotPtms, false, null, out un);
            var spritzCanonical = spritz.Select(p => p.NonVariantProtein).Distinct().ToList();
            int numberOfCanonicalProteinEntries = spritzCanonical.Count;
            int numberOfVariantProteinEntries   = spritz.Count - spritzCanonical.Count;
            int synonymousCount  = 0;
            int totalVariants    = 0;
            int missenseSnvCount = 0;
            int missenseMnvCount = 0;
            int insertionCount   = 0;
            int deletionCount    = 0;
            int frameshiftCount  = 0;
            int stopGainCount    = 0;
            int stopLossCount    = 0;
            Dictionary <string, List <SequenceVariation> > allVariants = new Dictionary <string, List <SequenceVariation> >();

            foreach (var spritzEntry in spritz)
            {
                if (spritzEntry.AppliedSequenceVariations.Count != 0)
                {
                    if (allVariants.ContainsKey(spritzEntry.NonVariantProtein.Accession))
                    {
                        foreach (var variant in spritzEntry.AppliedSequenceVariations)
                        {
                            if (!allVariants[spritzEntry.NonVariantProtein.Accession].Contains(variant))
                            {
                                allVariants[spritzEntry.NonVariantProtein.Accession].Add(variant);
                            }
                        }
                    }
                    else
                    {
                        allVariants.Add(spritzEntry.NonVariantProtein.Accession, spritzEntry.AppliedSequenceVariations);
                    }
                }
            }
            foreach (var entry in allVariants)
            {
                foreach (var variant in entry.Value)
                {
                    if (culture.CompareInfo.IndexOf(variant.Description.Description, "synonymous_variant", CompareOptions.IgnoreCase) >= 0)
                    {
                        synonymousCount++;
                        totalVariants++;
                    }
                    else if (culture.CompareInfo.IndexOf(variant.Description.Description, "missense_variant", CompareOptions.IgnoreCase) >= 0 &&
                             variant.Description.ReferenceAlleleString.Length == 1 && variant.Description.AlternateAlleleString.Length == 1)
                    {
                        missenseSnvCount++;
                        totalVariants++;
                    }
                    else if (culture.CompareInfo.IndexOf(variant.Description.Description, "missense_variant", CompareOptions.IgnoreCase) >= 0)
                    {
                        missenseMnvCount++;
                        totalVariants++;
                    }
                    else if (culture.CompareInfo.IndexOf(variant.Description.Description, "frameshift_variant", CompareOptions.IgnoreCase) >= 0)
                    {
                        frameshiftCount++;
                        totalVariants++;
                    }
                    else if (culture.CompareInfo.IndexOf(variant.Description.Description, "stop_gained", CompareOptions.IgnoreCase) >= 0)
                    {
                        stopGainCount++;
                        totalVariants++;
                    }
                    else if (culture.CompareInfo.IndexOf(variant.Description.Description, "conservative_inframe_insertion", CompareOptions.IgnoreCase) >= 0 || culture.CompareInfo.IndexOf(variant.Description.Description, "disruptive_inframe_insertion", CompareOptions.IgnoreCase) >= 0)
                    {
                        insertionCount++;
                        totalVariants++;
                    }
                    else if (culture.CompareInfo.IndexOf(variant.Description.Description, "conservative_inframe_deletion", CompareOptions.IgnoreCase) >= 0 || culture.CompareInfo.IndexOf(variant.Description.Description, "disruptive_inframe_deletion", CompareOptions.IgnoreCase) >= 0)
                    {
                        deletionCount++;
                        totalVariants++;
                    }
                    else if (culture.CompareInfo.IndexOf(variant.Description.Description, "stop_loss", CompareOptions.IgnoreCase) >= 0)
                    {
                        stopLossCount++;
                        totalVariants++;
                    }
                }
            }

            Console.WriteLine($"Spritz Database Summary");
            Console.WriteLine($"--------------------------------------------------------------");
            Console.WriteLine($"{numberOfCanonicalProteinEntries}\tTotal number of canonical protein entries (before applying variations)");
            Console.WriteLine($"{spritz.Count}\tTotal number of protein entries");
            Console.WriteLine($"{numberOfVariantProteinEntries}\tTotal number of variant containing protein entries");
            Console.WriteLine($"{totalVariants}\tTotal number of unique variants");
            Console.WriteLine($"{synonymousCount}\tTotal number of unique synonymous variants");
            Console.WriteLine($"{(totalVariants - synonymousCount)}\tTotal number of unique nonsynonymous variants");
            Console.WriteLine($"{missenseSnvCount}\tNumber of unique SNV missense variants");
            Console.WriteLine($"{missenseMnvCount}\tNumber of unique MNV missense variants");
            Console.WriteLine($"{frameshiftCount}\tNumber of unique frameshift variants");
            Console.WriteLine($"{insertionCount}\tNumber of unique insertion variants");
            Console.WriteLine($"{deletionCount}\tNumber of unique deletion variants");
            Console.WriteLine($"{stopGainCount}\tNumber of unique stop gain variants");
            Console.WriteLine($"{stopLossCount}\tNumber of unique stop loss variants");
        }
        public static void TestPrunedDatabase()
        {
            //Create Search Task
            SearchTask task1 = new SearchTask
            {
                SearchParameters = new SearchParameters
                {
                    WritePrunedDatabase  = true,
                    SearchTarget         = true,
                    MassDiffAcceptorType = MassDiffAcceptorType.Exact,
                    ModsToWriteSelection = new Dictionary <string, int>
                    {
                        { "ConnorModType", 1 }
                    }
                },
                CommonParameters = new CommonParameters(digestionParams: new DigestionParams(minPeptideLength: 5))
            };

            //add task to task list
            List <(string, MetaMorpheusTask)> taskList = new List <(string, MetaMorpheusTask)>
            {
                ("task1", task1)
            };

            ModificationMotif.TryGetMotif("P", out ModificationMotif motif);

            var connorMod = new Modification(_originalId: "ConnorMod on P", _modificationType: "ConnorModType", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 10);

            GlobalVariables.AddMods(new List <Modification>
            {
                connorMod
            }, false);

            //create modification lists
            List <Modification> variableModifications = GlobalVariables.AllModsKnown.OfType <Modification>()
                                                        .Where(b => task1.CommonParameters.ListOfModsVariable.Contains((b.ModificationType, b.IdWithMotif))).ToList();

            //add modification to Protein object
            var          dictHere  = new Dictionary <int, List <Modification> >();
            Modification modToAdd  = connorMod;
            Modification modToAdd2 = connorMod;

            dictHere.Add(1, new List <Modification> {
                modToAdd
            });
            dictHere.Add(3, new List <Modification> {
                modToAdd2
            });

            //protein Creation (One with mod and one without)
            Protein TestProteinWithMod = new Protein("PEPTID", "accession1", "organism", new List <Tuple <string, string> >(), dictHere);

            //First Write XML Database
            string xmlName = "okkk.xml";

            //Add Mod to list and write XML input database
            Dictionary <string, HashSet <Tuple <int, Modification> > > modList = new Dictionary <string, HashSet <Tuple <int, Modification> > >();
            var Hash = new HashSet <Tuple <int, Modification> >
            {
                new Tuple <int, Modification>(3, modToAdd)
            };

            modList.Add("test", Hash);
            ProteinDbWriter.WriteXmlDatabase(modList, new List <Protein> {
                TestProteinWithMod
            }, xmlName);

            //now write MZML file
            var protein = ProteinDbLoader.LoadProteinXML(xmlName, true,
                                                         DecoyType.Reverse, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> ok);

            //Dictionary 'ok' contains unknown modifications. There are no unknown modifications in this test.
            Assert.AreEqual(0, ok.Count);
            //One protein is read from the .xml database and one decoy is created. Therefore, the list of proteins contains 2 entries.
            Assert.AreEqual(2, protein.Count);
            //The original database had two localized mods on the protein. Therefore. both protein and decoy should have two mods.
            Assert.AreEqual(2, protein[0].OneBasedPossibleLocalizedModifications.Count);
            List <int> foundResidueIndicies   = protein[0].OneBasedPossibleLocalizedModifications.Select(k => k.Key).ToList();
            List <int> expectedResidueIndices = new List <int>()
            {
                1, 3
            };

            Assert.That(foundResidueIndicies, Is.EquivalentTo(expectedResidueIndices));
            Assert.AreEqual(2, protein[1].OneBasedPossibleLocalizedModifications.Count);
            foundResidueIndicies   = protein[1].OneBasedPossibleLocalizedModifications.Select(k => k.Key).ToList();
            expectedResidueIndices = new List <int>()
            {
                4, 6
            };                                                 //originally modified residues are now at the end in the decoy
            Assert.That(foundResidueIndicies, Is.EquivalentTo(expectedResidueIndices));

            var thisOk = ok;                                                      //for debugging
            var commonParamsAtThisPoint = task1.CommonParameters.DigestionParams; //for debugging

            var digestedList = protein[0].Digest(task1.CommonParameters.DigestionParams, new List <Modification> {
            },
                                                 variableModifications).ToList();

            Assert.AreEqual(4, digestedList.Count);

            //Set Peptide with 1 mod at position 3
            PeptideWithSetModifications pepWithSetMods1 = digestedList[1];

            //Finally Write MZML file
            Assert.AreEqual("PEP[ConnorModType:ConnorMod on P]TID", pepWithSetMods1.FullSequence);//this might be base sequence
            MsDataFile myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications> {
                pepWithSetMods1
            });
            string mzmlName = @"hello.mzML";

            IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, mzmlName, false);

            //run!
            string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestPrunedDatabase");
            var    engine       = new EverythingRunnerEngine(taskList, new List <string> {
                mzmlName
            },
                                                             new List <DbForTask> {
                new DbForTask(xmlName, false)
            }, outputFolder);

            engine.Run();

            string final = Path.Combine(MySetUpClass.outputFolder, "task1", "okkkpruned.xml");

            var proteins = ProteinDbLoader.LoadProteinXML(final, true, DecoyType.Reverse, new List <Modification>(), false, new List <string>(), out ok);

            //check length
            Assert.AreEqual(1, proteins[0].OneBasedPossibleLocalizedModifications.Count);
            //check location (key)
            Assert.AreEqual(true, proteins[0].OneBasedPossibleLocalizedModifications.ContainsKey(3));
            List <Modification> listOfMods = proteins[0].OneBasedPossibleLocalizedModifications[3];

            //check Type, count, ID
            Assert.AreEqual(listOfMods[0].ModificationType, "ConnorModType");
            Assert.AreEqual(listOfMods[0].IdWithMotif, "ConnorMod on P");
            Assert.AreEqual(listOfMods.Count, 1);
            Directory.Delete(outputFolder, true);
            File.Delete(xmlName);
            File.Delete(mzmlName);
        }
        public static void TestProteinPrunedWithModSelectionAndVariants()
        {
            var modToWrite    = GlobalVariables.AllModsKnown.Where(p => p.ModificationType == "UniProt" && p.Target.ToString() == "T").First();
            var modToNotWrite = GlobalVariables.AllModsKnown.Where(p => p.ModificationType == "Common Artifact" && p.Target.ToString() == "X").First();
            Dictionary <int, List <Modification> > variantMods = new Dictionary <int, List <Modification> >();

            variantMods.Add(1, new List <Modification>()
            {
                modToNotWrite
            });

            List <SequenceVariation> variants = new List <SequenceVariation> {
                new SequenceVariation(4, 4, "V", "T", @"20\t41168825\t.\tT\tC\t14290.77\t.\tANN=C|missense_variant|MODERATE|PLCG1|ENSG00000124181|transcript|ENST00000244007.7|protein_coding|22/33|c.2438T>C|p.Ile813Thr|2635/5285|2438/3876|813/1291||\tGT:AD:DP:GQ:PL\t1/1:1,392:393:99:14319,1142,0", variantMods)
            };

            var protein1 = new Protein("PEPVIDEKPEPT", "1", oneBasedModifications: new Dictionary <int, List <Modification> > {
                { 1, new List <Modification> {
                      modToNotWrite
                  } }, { 12, new List <Modification> {
                             modToWrite
                         } }
            }, sequenceVariations: variants);
            var protein2 = new Protein("PEPIDPEPT", "2", oneBasedModifications: new Dictionary <int, List <Modification> > {
                { 1, new List <Modification> {
                      modToNotWrite
                  } }, { 9, new List <Modification> {
                             modToWrite
                         } }
            });
            var protein1Variants = protein1.GetVariantProteins(1, 0);

            string path = @"temp";

            var proteinList = new List <Protein> {
                protein1, protein2
            };

            proteinList.AddRange(protein1Variants);


            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), proteinList, path);

            Directory.CreateDirectory(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTestVariant"));

            Dictionary <string, HashSet <Tuple <int, Modification> > > modList = new Dictionary <string, HashSet <Tuple <int, Modification> > >();
            var Hash = new HashSet <Tuple <int, Modification> >
            {
                new Tuple <int, Modification>(1, modToWrite),
                new Tuple <int, Modification>(2, modToNotWrite),
            };

            var db = ProteinDbWriter.WriteXmlDatabase(modList, proteinList, Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTestVariant/fakeDb.xml"));

            var peptideObserved = protein1Variants.First().Digest(new DigestionParams(minPeptideLength: 1), new List <Modification>(), new List <Modification>())
                                  .Where(p => p.BaseSequence == "PEPT").First();
            PostSearchAnalysisParameters testPostTaskParameters = new PostSearchAnalysisParameters();
            CommonParameters             commonParam            = new CommonParameters(useDeltaScore: false);

            double[,] noiseData = new double[10000, 10000];
            noiseData[0, 0]     = 1.0;
            List <Proteomics.Fragmentation.MatchedFragmentIon> matchedFragmentIons = new List <Proteomics.Fragmentation.MatchedFragmentIon>()
            {
            };
            MzSpectrum spectrum = new MzSpectrum(noiseData);
            MsDataScan scan     = new MsDataScan(spectrum, 1, 1, true, Polarity.Unknown, 2, new MzLibUtil.MzRange(10, 1000), "", MZAnalyzerType.Orbitrap, 10000, null, noiseData, "");

            testPostTaskParameters.ProteinList = proteinList;
            testPostTaskParameters.AllPsms     = new List <PeptideSpectralMatch> {
                new PeptideSpectralMatch(peptideObserved, 0, 20, 1, new Ms2ScanWithSpecificMass(scan, 100, 1, @"", commonParam), commonParam, matchedFragmentIons)
            };
            testPostTaskParameters.SearchParameters = new SearchParameters();
            testPostTaskParameters.SearchParameters.WritePrunedDatabase = true;
            testPostTaskParameters.SearchParameters.DoQuantification    = false;
            testPostTaskParameters.SearchParameters.WriteMzId           = false;
            testPostTaskParameters.DatabaseFilenameList = new List <DbForTask>()
            {
                new DbForTask(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/fakeDb.xml"), false)
            };
            testPostTaskParameters.OutputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest");
            Directory.CreateDirectory(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/individual"));
            testPostTaskParameters.IndividualResultsOutputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/individual");
            int[] stuffForSpectraFile = new int[2];
            stuffForSpectraFile[0] = 10;
            stuffForSpectraFile[1] = 10;
            Dictionary <string, int[]> numSpectraPerFile = new Dictionary <string, int[]>();

            numSpectraPerFile.Add("", stuffForSpectraFile);
            testPostTaskParameters.NumMs2SpectraPerFile = numSpectraPerFile;

            MsDataFile myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications>
            {
                peptideObserved
            });
            string mzmlName = @"newMzml.mzML";

            IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, mzmlName, false);

            modList.Add("test", Hash);

            testPostTaskParameters.CurrentRawFileList = new List <string>()
            {
                mzmlName
            };

            SearchTask task5 = new SearchTask
            {
                SearchParameters = new SearchParameters
                {
                    WritePrunedDatabase  = true,
                    SearchTarget         = true,
                    MassDiffAcceptorType = MassDiffAcceptorType.Exact,
                },
                CommonParameters = new CommonParameters()
            };

            var test = task5.RunTask(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest"), new List <DbForTask>()
            {
                new DbForTask(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/fakeDb.xml"), false)
            }, new List <string>()
            {
                mzmlName
            }, "name");

            testPostTaskParameters.SearchTaskResults = test;

            PostSearchAnalysisTask testPostTask = new PostSearchAnalysisTask();

            testPostTask.Parameters             = testPostTaskParameters;
            testPostTask.CommonParameters       = commonParam;
            testPostTask.FileSpecificParameters = new List <(string FileName, CommonParameters Parameters)> {
                ("newMzMl.mzml", commonParam)
            };
            testPostTask.Run();

            var proteinsLoaded = ProteinDbLoader.LoadProteinXML(path, true, DecoyType.None, GlobalVariables.AllModsKnown, false, new List <string>(), out var unknownMods);

            // assert that mods on proteins are the same before/after task is run
            Assert.AreEqual(protein1Variants.First().Accession, proteinsLoaded.First().Accession);
            Assert.AreEqual(protein1Variants.First().OneBasedPossibleLocalizedModifications.Count(), proteinsLoaded.First().OneBasedPossibleLocalizedModifications.Count());
            Assert.AreEqual(protein2.OneBasedPossibleLocalizedModifications.Count(), proteinsLoaded.ElementAt(1).OneBasedPossibleLocalizedModifications.Count());

            // assert that protein pruned DB has correct proteins mods
            var proteinPruned = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/fakeDbproteinPruned.xml"), true, DecoyType.None, GlobalVariables.AllModsKnown, false, new List <string>(), out var unknownMods1);

            Assert.That(proteinPruned.Count().Equals(1));
            Assert.That(proteinPruned.FirstOrDefault().OneBasedPossibleLocalizedModifications.Count().Equals(1));
            // assert that mod-pruned DB has correct proteins and mods
            var modPruned = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/fakeDbpruned.xml"), true, DecoyType.None, GlobalVariables.AllModsKnown, false, new List <string>(), out var unknownMods2);

            Assert.That(modPruned.Count().Equals(2));
            Assert.That(modPruned.ElementAt(0).OneBasedPossibleLocalizedModifications.Count().Equals(1));
            Assert.That(modPruned.ElementAt(1).OneBasedPossibleLocalizedModifications.Count().Equals(1));
        }
        public static void TestUserModSelectionInPrunedDB()
        {
            List <(string, string)> listOfModsFixed = new List <(string, string)> {
                ("Common Fixed", "Carbamidomethyl of C"), ("Common Fixed", "Carbamidomethyl of U")
            };
            //Create Search Task
            SearchTask task5 = new SearchTask
            {
                SearchParameters = new SearchParameters
                {
                    WritePrunedDatabase  = true,
                    SearchTarget         = true,
                    MassDiffAcceptorType = MassDiffAcceptorType.Exact,
                },
                CommonParameters = new CommonParameters(listOfModsFixed: listOfModsFixed)
            };

            task5.SearchParameters.ModsToWriteSelection["Mod"]          = 0;
            task5.SearchParameters.ModsToWriteSelection["Common Fixed"] = 1;
            task5.SearchParameters.ModsToWriteSelection["Glycan"]       = 2;
            task5.SearchParameters.ModsToWriteSelection["missing"]      = 3;

            //add task 1 to task list
            List <(string, MetaMorpheusTask)> taskList = new List <(string, MetaMorpheusTask)> {
                ("task5", task5)
            };

            ModificationMotif.TryGetMotif("P", out ModificationMotif motif);
            ModificationMotif.TryGetMotif("E", out ModificationMotif motif2);

            var connorMod  = new Modification(_originalId: "ModToNotAppear", _modificationType: "Mod", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 10);
            var connorMod2 = new Modification(_originalId: "Default(Mod in DB and Observed)", _modificationType: "Common Fixed", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 10);
            var connorMod3 = new Modification(_originalId: "ModToAlwaysAppear", _modificationType: "Glycan", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 10);
            var connorMod4 = new Modification(_originalId: "ModObservedNotinDB", _modificationType: "missing", _target: motif2, _locationRestriction: "Anywhere.", _monoisotopicMass: 5);

            GlobalVariables.AddMods(new List <Modification>
            {
                connorMod,
                connorMod2,
                connorMod3,
                connorMod4
            }, false);

            //create modification lists
            List <Modification> variableModifications = GlobalVariables.AllModsKnown.OfType <Modification>().Where(b => task5.CommonParameters.ListOfModsVariable.Contains
                                                                                                                       ((b.ModificationType, b.IdWithMotif))).ToList();
            List <Modification> fixedModifications = GlobalVariables.AllModsKnown.OfType <Modification>().Where(b => task5.CommonParameters.ListOfModsFixed.Contains
                                                                                                                    ((b.ModificationType, b.IdWithMotif))).ToList();

            //add modification to Protein object
            var          dictHere  = new Dictionary <int, List <Modification> >();
            Modification modToAdd  = connorMod;
            Modification modToAdd2 = connorMod2;
            Modification modToAdd3 = connorMod3;
            Modification modToAdd4 = connorMod4;

            //add Fixed modifcation so can test if mod that is observed and not in DB
            fixedModifications.Add(connorMod4);
            listOfModsFixed.Add((connorMod4.ModificationType, connorMod4.IdWithMotif));

            dictHere.Add(1, new List <Modification> {
                modToAdd
            });
            dictHere.Add(2, new List <Modification> {
                modToAdd2
            });                                                    //default
            dictHere.Add(3, new List <Modification> {
                modToAdd3
            });                                                    //Alway Appear

            var dictHere2 = new Dictionary <int, List <Modification> >
            {
                { 1, new List <Modification> {
                      modToAdd
                  } },
                { 2, new List <Modification> {
                      modToAdd2
                  } },                                       //default
                { 3, new List <Modification> {
                      modToAdd3
                  } },                                       //Alway Appear
                { 4, new List <Modification> {
                      modToAdd4
                  } }                                      //observed
            };

            //protein Creation (One with mod and one without)
            Protein TestProteinWithModForDB    = new Protein("PPPPPPPPPPE", "accession1", "organism", new List <Tuple <string, string> >(), dictHere);
            Protein TestProteinWithModObsevred = new Protein("PPPPPPPPPPE", "accession1", "organism", new List <Tuple <string, string> >(), dictHere2);

            //First Write XML Database
            string xmlName  = "selectedMods.xml";
            string xmlName2 = "selectedModsObvs.xml";

            //Add Mod to list and write XML input database
            Dictionary <string, HashSet <Tuple <int, Modification> > > modList = new Dictionary <string, HashSet <Tuple <int, Modification> > >();
            var Hash = new HashSet <Tuple <int, Modification> >
            {
                new Tuple <int, Modification>(1, modToAdd),
                new Tuple <int, Modification>(2, modToAdd2),
                new Tuple <int, Modification>(3, modToAdd3),
                new Tuple <int, Modification>(4, modToAdd4), //Observed Only
            };

            modList.Add("test", Hash);
            ProteinDbWriter.WriteXmlDatabase(modList, new List <Protein> {
                TestProteinWithModForDB
            }, xmlName);

            //Add Observed Only
            modList.Add("test2", Hash);
            ProteinDbWriter.WriteXmlDatabase(modList, new List <Protein> {
                TestProteinWithModObsevred
            }, xmlName2);

            //now create MZML data
            var protein      = ProteinDbLoader.LoadProteinXML(xmlName2, true, DecoyType.Reverse, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> ok);
            var digestedList = protein[0].Digest(task5.CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList();

            //Set Peptide with 1 mod at position 3
            PeptideWithSetModifications pepWithSetMods1 = digestedList[0];
            PeptideWithSetModifications pepWithSetMods2 = digestedList[1];
            PeptideWithSetModifications pepWithSetMods3 = digestedList[2];
            PeptideWithSetModifications pepWithSetMods4 = digestedList[3];
            PeptideWithSetModifications pepWithSetMods5 = digestedList[4];

            //CUSTOM PEP
            MsDataFile myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications>
            {
                pepWithSetMods1, pepWithSetMods2, pepWithSetMods3, pepWithSetMods4, pepWithSetMods5
            });
            string mzmlName = @"newMzml.mzML";

            IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, mzmlName, false);

            //make sure this runs correctly
            //run!
            string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestUserModSelectionInPrunedDB");
            var    engine       = new EverythingRunnerEngine(taskList, new List <string> {
                mzmlName
            }, new List <DbForTask> {
                new DbForTask(xmlName, false)
            }, outputFolder);

            engine.Run();
            string final    = Path.Combine(MySetUpClass.outputFolder, "task5", "selectedModspruned.xml");
            var    proteins = ProteinDbLoader.LoadProteinXML(final, true, DecoyType.Reverse, new List <Modification>(), false, new List <string>(), out ok);
            var    Dlist    = proteins[0].GetVariantProteins().SelectMany(vp => vp.Digest(task5.CommonParameters.DigestionParams, fixedModifications, variableModifications)).ToList();

            Assert.AreEqual(Dlist[0].NumFixedMods, 1);

            //check length
            Assert.AreEqual(proteins[0].OneBasedPossibleLocalizedModifications.Count, 3);
            List <Modification> listOfLocalMods = new List <Modification>();

            listOfLocalMods.AddRange(proteins[0].OneBasedPossibleLocalizedModifications[2]);
            listOfLocalMods.AddRange(proteins[0].OneBasedPossibleLocalizedModifications[3]);
            listOfLocalMods.AddRange(proteins[0].OneBasedPossibleLocalizedModifications[11]);

            //check Type, count, ID
            Assert.AreEqual(listOfLocalMods[0].ModificationType, "Common Fixed");
            Assert.AreEqual(listOfLocalMods[2].ModificationType, "missing");
            Assert.IsFalse(listOfLocalMods.Contains(connorMod)); //make sure that mod set not to show up is not in mod list

            Assert.AreEqual(listOfLocalMods[0].IdWithMotif, "Default(Mod in DB and Observed) on P");
            Assert.AreEqual(listOfLocalMods[1].IdWithMotif, "ModToAlwaysAppear on P");
            //Makes sure Mod that was not in the DB but was observed is in pruned DB
            Assert.AreEqual(listOfLocalMods[2].IdWithMotif, "ModObservedNotinDB on E");
            Assert.AreEqual(listOfLocalMods.Count, 3);
            Directory.Delete(outputFolder, true);
            File.Delete(mzmlName);
            File.Delete(xmlName);
            File.Delete(xmlName2);
        }
        public static void TestIndexEngineLowRes()
        {
            var proteinList = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, @"indexEngineTestFasta.fasta"), true, DecoyType.Reverse, false, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex,
                                                               ProteinDbLoader.UniprotOrganismRegex, out var dbErrors, -1);

            var variableModifications     = new List <Modification>();
            var fixedModifications        = new List <Modification>();
            var localizeableModifications = new List <Modification>();

            Dictionary <Modification, ushort> modsDictionary = new Dictionary <Modification, ushort>();

            foreach (var mod in fixedModifications)
            {
                modsDictionary.Add(mod, 0);
            }
            int i = 1;

            foreach (var mod in variableModifications)
            {
                modsDictionary.Add(mod, (ushort)i);
                i++;
            }
            foreach (var mod in localizeableModifications)
            {
                modsDictionary.Add(mod, (ushort)i);
                i++;
            }

            CommonParameters CommonParameters = new CommonParameters(dissociationType: DissociationType.LowCID, maxThreadsToUsePerFile: 1, scoreCutoff: 1, digestionParams: new DigestionParams(protease: "trypsin", minPeptideLength: 1));

            var engine = new IndexingEngine(proteinList, variableModifications, fixedModifications, null, 1, DecoyType.Reverse, CommonParameters, 30000, false, new List <FileInfo>(), new List <string>());

            var results = (IndexingResults)engine.Run();

            Assert.AreEqual(10, results.PeptideIndex.Count);

            var bubba    = results.FragmentIndex;
            var tooBubba = results.PrecursorIndex;


            var digestedList = proteinList[0].Digest(CommonParameters.DigestionParams, new List <Modification>(), variableModifications).ToList();

            digestedList.AddRange(proteinList[1].Digest(CommonParameters.DigestionParams, new List <Modification>(), variableModifications));

            Assert.AreEqual(10, digestedList.Count);
            foreach (PeptideWithSetModifications peptide in digestedList)
            {
                Assert.Contains(peptide, results.PeptideIndex);

                var fragments = peptide.Fragment(CommonParameters.DissociationType, FragmentationTerminus.Both).ToList();

                int positionInPeptideIndex = results.PeptideIndex.IndexOf(peptide);

                foreach (Product fragment in fragments.Where(f => f.ProductType == ProductType.b || f.ProductType == ProductType.y))
                {
                    // mass of the fragment
                    double fragmentMass = Math.Round(fragment.NeutralMass / 1.0005079, 0) * 1.0005079;
                    int    integerMassRepresentation = (int)Math.Round(fragmentMass * 1000);

                    // look up the peptides that have fragments with this mass
                    // the result of the lookup is a list of peptide IDs that have this fragment mass
                    List <int> fragmentBin = results.FragmentIndex[integerMassRepresentation];

                    // this list should contain this peptide!
                    Assert.Contains(positionInPeptideIndex, fragmentBin);
                }
            }
            foreach (var fdfd in digestedList)
            {
                Assert.Contains(fdfd, results.PeptideIndex);
            }
        }
Exemple #26
0
        public static void AppliedVariants()
        {
            ModificationMotif.TryGetMotif("P", out ModificationMotif motifP);
            Modification mp = new Modification("mod", null, "type", null, motifP, "Anywhere.", null, 42.01, new Dictionary <string, IList <string> >(), null, null, null, null, null);

            List <Protein> proteinsWithSeqVars = new List <Protein>
            {
                new Protein("MPEPTIDE", "protein1", sequenceVariations: new List <SequenceVariation> {
                    new SequenceVariation(4, 4, "P", "V", @"1\t50000000\t.\tA\tG\t.\tPASS\tANN=G||||||||||||||||\tGT:AD:DP\t1/1:30,30:30", null)
                }),
                new Protein("MPEPTIDE", "protein2", sequenceVariations: new List <SequenceVariation> {
                    new SequenceVariation(4, 5, "PT", "KT", @"1\t50000000\t.\tA\tG\t.\tPASS\tANN=G||||||||||||||||\tGT:AD:DP\t1/1:30,30:30", null)
                }),
                new Protein("MPEPTIDE", "protein3", sequenceVariations: new List <SequenceVariation> {
                    new SequenceVariation(4, 4, "P", "PPP", @"1\t50000000\t.\tA\tG\t.\tPASS\tANN=G||||||||||||||||\tGT:AD:DP\t1/1:30,30:30", null)
                }),
                new Protein("MPEPPPTIDE", "protein4", sequenceVariations: new List <SequenceVariation> {
                    new SequenceVariation(4, 6, "PPP", "P", @"1\t50000000\t.\tA\tG\t.\tPASS\tANN=G||||||||||||||||\tGT:AD:DP\t1/1:30,30:30", null)
                }),
                new Protein("MPEPTIDE", "protein5", sequenceVariations: new List <SequenceVariation> {
                    new SequenceVariation(4, 4, "P", "PPP", @"1\t50000000\t.\tA\tG\t.\tPASS\tANN=G||||||||||||||||\tGT:AD:DP\t1/1:30,30:30", new Dictionary <int, List <Modification> > {
                        { 5, new[] { mp }.ToList() }
                    })
                }),
            };
            var    proteinsWithAppliedVariants  = proteinsWithSeqVars.SelectMany(p => p.GetVariantProteins()).ToList();
            var    proteinsWithAppliedVariants2 = proteinsWithSeqVars.SelectMany(p => p.GetVariantProteins()).ToList(); // should be stable
            string xml = Path.Combine(TestContext.CurrentContext.TestDirectory, "AppliedVariants.xml");

            ProteinDbWriter.WriteXmlDatabase(null, proteinsWithSeqVars, xml);
            var proteinsWithAppliedVariants3 = ProteinDbLoader.LoadProteinXML(xml, true, DecoyType.None, null, false, null, out var un);

            var listArray = new[] { proteinsWithAppliedVariants, proteinsWithAppliedVariants2, proteinsWithAppliedVariants3 };

            for (int dbIdx = 0; dbIdx < listArray.Length; dbIdx++)
            {
                // sequences
                Assert.AreEqual("MPEVTIDE", listArray[dbIdx][0].BaseSequence);
                Assert.AreEqual("MPEKTIDE", listArray[dbIdx][1].BaseSequence);
                Assert.AreEqual("MPEPPPTIDE", listArray[dbIdx][2].BaseSequence);
                Assert.AreEqual("MPEPTIDE", listArray[dbIdx][3].BaseSequence);
                Assert.AreEqual("MPEPPPTIDE", listArray[dbIdx][4].BaseSequence);
                Assert.AreEqual(5, listArray[dbIdx][4].OneBasedPossibleLocalizedModifications.Single().Key);

                // SAV
                Assert.AreEqual(4, listArray[dbIdx][0].AppliedSequenceVariations.Single().OneBasedBeginPosition);
                Assert.AreEqual(4, listArray[dbIdx][0].AppliedSequenceVariations.Single().OneBasedEndPosition);

                // MNV
                Assert.AreEqual(4, listArray[dbIdx][1].AppliedSequenceVariations.Single().OneBasedBeginPosition);
                Assert.AreEqual(5, listArray[dbIdx][1].AppliedSequenceVariations.Single().OneBasedEndPosition);

                // insertion
                Assert.AreEqual(4, listArray[dbIdx][2].AppliedSequenceVariations.Single().OneBasedBeginPosition);
                Assert.AreEqual(6, listArray[dbIdx][2].AppliedSequenceVariations.Single().OneBasedEndPosition);

                // deletion
                Assert.AreEqual(4, listArray[dbIdx][3].AppliedSequenceVariations.Single().OneBasedBeginPosition);
                Assert.AreEqual(4, listArray[dbIdx][3].AppliedSequenceVariations.Single().OneBasedEndPosition);
            }
        }
 public void ReadXmlNulls()
 {
     var ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"xml2.xml"), true, DecoyType.None,
                                             null, false, null, out Dictionary <string, Modification> un);
 }
        public void TestFullProteinReadWrite()
        {
            Modification mod = new Modification("mod1", null, "modType1", null, null, null, null, null, null, null, null, null, null, null);

            ModificationMotif.TryGetMotif("E", out ModificationMotif motif);
            Modification mod2 = new Modification("mod2 on E", null, "modType1", null, motif, "Anywhere.", null, null, null, null, null, null, null, null);

            ModificationMotif.TryGetMotif("N", out ModificationMotif motif3);
            Modification mod3 = new Modification("mod3 on N", null, "modType1", null, motif3, "Anywhere.", null, 10, null, null, null, null, null, null);

            List <Tuple <string, string> > gene_names = new List <Tuple <string, string> > {
                new Tuple <string, string>("a", "b")
            };
            IDictionary <int, List <Modification> > oneBasedModifications = new Dictionary <int, List <Modification> >
            {
                { 3, new List <Modification> {
                      mod
                  } },
                { 4, new List <Modification> {
                      mod2
                  } },
                { 5, new List <Modification> {
                      mod3
                  } }
            };
            List <ProteolysisProduct> proteolysisProducts = new List <ProteolysisProduct> {
                new ProteolysisProduct(1, 2, "propeptide")
            };

            string name = "testName";

            string full_name = "testFullName";

            List <DatabaseReference> databaseReferences = new List <DatabaseReference> {
                new DatabaseReference("type1", "id1", new List <Tuple <string, string> > {
                    new Tuple <string, string>("e1", "e2")
                })
            };

            List <SequenceVariation> sequenceVariations = new List <SequenceVariation> {
                new SequenceVariation(3, "Q", "N", "replace Q by N"),
                new SequenceVariation(3, 4, "QE", "NN", "replace QE by NN")
            };

            List <DisulfideBond> disulfideBonds = new List <DisulfideBond> {
                new DisulfideBond(1, "ds1"), new DisulfideBond(2, 3, "ds2")
            };

            Protein p1 = new Protein(
                "SEQENCE",
                "a1",
                geneNames: gene_names,
                oneBasedModifications: oneBasedModifications,
                proteolysisProducts: proteolysisProducts,
                name: name,
                fullName: full_name,
                isDecoy: false,
                isContaminant: true,
                databaseReferences: databaseReferences,
                sequenceVariations: sequenceVariations,
                disulfideBonds: disulfideBonds,
                databaseFilePath: Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"bnueiwhf.xml"));

            // Generate data for files
            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), new List <Protein> {
                p1
            },
                                             Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"bnueiwhf.xml"));

            IEnumerable <string>       modTypesToExclude     = new List <string>();
            IEnumerable <Modification> allKnownModifications = new List <Modification>();
            List <Protein>             ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"bnueiwhf.xml"), true, DecoyType.None,
                                                                           allKnownModifications, true, modTypesToExclude, out Dictionary <string, Modification> unknownModifications);

            Assert.AreEqual(p1.Accession, ok[0].Accession);
            Assert.AreEqual(p1.BaseSequence, ok[0].BaseSequence);
            Assert.AreEqual(p1.DatabaseReferences.First().Id, ok[0].DatabaseReferences.First().Id);
            Assert.AreEqual(p1.DatabaseReferences.First().Properties.First().Item1, ok[0].DatabaseReferences.First().Properties.First().Item1);
            Assert.AreEqual(p1.DatabaseReferences.First().Properties.First().Item2, ok[0].DatabaseReferences.First().Properties.First().Item2);
            Assert.AreEqual(p1.DatabaseReferences.First().Type, ok[0].DatabaseReferences.First().Type);

            Assert.AreEqual(p1.DisulfideBonds.First().Description, ok[0].DisulfideBonds.First().Description);
            Assert.AreEqual(p1.DisulfideBonds.First().OneBasedBeginPosition, ok[0].DisulfideBonds.First().OneBasedBeginPosition);
            Assert.AreEqual(p1.DisulfideBonds.First().OneBasedEndPosition, ok[0].DisulfideBonds.First().OneBasedEndPosition);
            Assert.AreEqual(p1.DisulfideBonds.Last().Description, ok[0].DisulfideBonds.Last().Description);
            Assert.AreEqual(p1.DisulfideBonds.Last().OneBasedBeginPosition, ok[0].DisulfideBonds.Last().OneBasedBeginPosition);
            Assert.AreEqual(p1.DisulfideBonds.Last().OneBasedEndPosition, ok[0].DisulfideBonds.Last().OneBasedEndPosition);

            Assert.AreEqual(p1.FullDescription, ok[0].FullDescription);
            Assert.AreEqual(p1.FullName, ok[0].FullName);
            Assert.AreEqual(p1.GeneNames, ok[0].GeneNames);
            Assert.AreEqual(p1.IsContaminant, ok[0].IsContaminant);
            Assert.AreEqual(p1.IsDecoy, ok[0].IsDecoy);
            Assert.AreEqual(p1.Length, ok[0].Length);
            Assert.AreEqual(p1.Name, ok[0].Name);
            Assert.AreEqual(p1.Organism, ok[0].Organism);
            Assert.AreEqual(p1.DatabaseFilePath, ok[0].DatabaseFilePath);
            Assert.AreEqual(1, p1.OneBasedPossibleLocalizedModifications.Keys.Count);
            Assert.AreEqual(1, ok[0].OneBasedPossibleLocalizedModifications.Keys.Count);
            Assert.AreEqual(p1.OneBasedPossibleLocalizedModifications.Keys.First(), ok[0].OneBasedPossibleLocalizedModifications.Keys.First());
            Assert.IsTrue(p1.OneBasedPossibleLocalizedModifications[5][0].Equals(ok[0].OneBasedPossibleLocalizedModifications[5][0]));

            Assert.AreEqual(p1.ProteolysisProducts.First().OneBasedBeginPosition, ok[0].ProteolysisProducts.First().OneBasedBeginPosition);
            Assert.AreEqual(p1.ProteolysisProducts.First().OneBasedEndPosition, ok[0].ProteolysisProducts.First().OneBasedEndPosition);
            Assert.AreEqual(p1.ProteolysisProducts.First().Type, ok[0].ProteolysisProducts.First().Type);

            Assert.AreEqual(p1.SequenceVariations.First().Description, ok[0].SequenceVariations.First().Description);
            Assert.AreEqual(p1.SequenceVariations.First().OneBasedBeginPosition, ok[0].SequenceVariations.First().OneBasedBeginPosition);
            Assert.AreEqual(p1.SequenceVariations.First().OneBasedEndPosition, ok[0].SequenceVariations.First().OneBasedEndPosition);
            Assert.AreEqual(p1.SequenceVariations.First().OriginalSequence, ok[0].SequenceVariations.First().OriginalSequence);
            Assert.AreEqual(p1.SequenceVariations.First().VariantSequence, ok[0].SequenceVariations.First().VariantSequence);
            Assert.AreEqual(p1.SequenceVariations.Last().Description, ok[0].SequenceVariations.Last().Description);
            Assert.AreEqual(p1.SequenceVariations.Last().OneBasedBeginPosition, ok[0].SequenceVariations.Last().OneBasedBeginPosition);
            Assert.AreEqual(p1.SequenceVariations.Last().OneBasedEndPosition, ok[0].SequenceVariations.Last().OneBasedEndPosition);
            Assert.AreEqual(p1.SequenceVariations.Last().OriginalSequence, ok[0].SequenceVariations.Last().OriginalSequence);
            Assert.AreEqual(p1.SequenceVariations.Last().VariantSequence, ok[0].SequenceVariations.Last().VariantSequence);
        }
Exemple #29
0
        public static void TestComputePEPValue()
        {
            var              variableModifications = new List <Modification>();
            var              fixedModifications    = new List <Modification>();
            var              origDataFile          = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML");
            MyFileManager    myFileManager         = new MyFileManager(true);
            CommonParameters CommonParameters      = new CommonParameters(digestionParams: new DigestionParams());
            var              myMsDataFile          = myFileManager.LoadFile(origDataFile, CommonParameters);
            var              searchModes           = new SinglePpmAroundZeroSearchMode(5);
            List <Protein>   proteinList           = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\hela_snip_for_unitTest.fasta"), true, DecoyType.Reverse, false, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex,
                                                                                      ProteinDbLoader.UniprotOrganismRegex, out var dbErrors, -1);
            var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, CommonParameters).OrderBy(b => b.PrecursorMass).ToArray();

            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];
            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, proteinList, searchModes, CommonParameters, new List <string>()).Run();
            FdrAnalysisResults fdrResultsClassicDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.Where(p => p != null).ToList(), 1, CommonParameters, new List <string>()).Run());

            var nonNullPsms             = allPsmsArray.Where(p => p != null).ToList();
            var nonNullPsmsOriginalCopy = allPsmsArray.Where(p => p != null).ToList();
            var accessionCounts         = PEP_Analysis.GetAccessionCounts(nonNullPsms);

            var maxScore    = nonNullPsms.Select(n => n.Score).Max();
            var maxScorePsm = nonNullPsms.Where(n => n.Score == maxScore).First();

            Dictionary <string, int> sequenceToPsmCount = new Dictionary <string, int>();

            List <string> sequences = new List <string>();

            foreach (PeptideSpectralMatch psm in nonNullPsms)
            {
                var ss = psm.BestMatchingPeptides.Select(b => b.Peptide.FullSequence).ToList();
                sequences.Add(String.Join("|", ss));
            }

            var s = sequences.GroupBy(i => i);

            foreach (var grp in s)
            {
                sequenceToPsmCount.Add(grp.Key, grp.Count());
            }

            var maxPsmData = PEP_Analysis.CreateOnePsmDataFromPsm(maxScorePsm, accessionCounts, sequenceToPsmCount);

            Assert.That(maxScorePsm.PeptidesToMatchingFragments.Count, Is.EqualTo(maxPsmData.Ambiguity));
            Assert.That(maxScorePsm.DeltaScore, Is.EqualTo(maxPsmData.DeltaScore).Within(0.05));
            Assert.That((float)(maxScorePsm.Score - (int)maxScorePsm.Score), Is.EqualTo(maxPsmData.Intensity).Within(0.05));

            Assert.That(maxScorePsm.BestMatchingPeptides.Select(p => p.Peptide).First().MissedCleavages, Is.EqualTo(maxPsmData.MissedCleavagesCount));
            Assert.That(maxScorePsm.BestMatchingPeptides.Select(p => p.Peptide).First().AllModsOneIsNterminus.Values.Count(), Is.EqualTo(maxPsmData.ModsCount));
            Assert.That(maxScorePsm.Notch ?? 0, Is.EqualTo(maxPsmData.Notch));
            Assert.That(maxScorePsm.PsmCount, Is.EqualTo(maxPsmData.PsmCount));
            Assert.That(maxScorePsm.ScanPrecursorCharge, Is.EqualTo(maxPsmData.ScanPrecursorCharge));

            PEP_Analysis.ComputePEPValuesForAllPSMsGeneric(nonNullPsms);

            int trueCount = 0;

            foreach (var item in allPsmsArray.Where(p => p != null))
            {
                var b = item.FdrInfo.PEP;
                if (b >= 0.5)
                {
                    trueCount++;
                }
            }

            Assert.GreaterOrEqual(32, trueCount);
        }
Exemple #30
0
        public void Test_getptms_from_mzLibxml_without_prep()
        {
            List <Modification> ok = ProteinDbLoader.GetPtmListFromProteinXml(Path.Combine(TestContext.CurrentContext.TestDirectory, @"cRAP_databaseGPTMD.xml"));

            Assert.AreEqual(70, ok.Count);
        }