Beispiel #1
0
        public void Test_read_write_read_xml()
        {
            var nice = new List <Modification>
            {
                new ModificationWithLocation("fayk", null, null, ModificationSites.A, null, null)
            };

            List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"xml2.xml"), false, nice, false, null, out Dictionary <string, Modification> un);

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, ModificationWithMass> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"));
            List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"), false, nice, false, null, out un);

            Assert.AreEqual(ok.Count, ok2.Count);
            Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence));
            Assert.AreEqual(9, ok[0].DatabaseReferences.Count(dbRef => dbRef.Type == "GO"));
            Assert.AreEqual(3, ok[0].DatabaseReferences.First(dbRef => dbRef.Type == "GO").Properties.Count());
            Assert.AreEqual(3, ok[0].GeneNames.Count());
            Assert.AreEqual("primary", ok[0].GeneNames.First().Item1);
            Assert.AreEqual("JJJ1", ok[0].GeneNames.First().Item2);

            Assert.AreEqual(9, ok2[0].DatabaseReferences.Count(dbRef => dbRef.Type == "GO"));
            Assert.AreEqual(3, ok2[0].DatabaseReferences.First(dbRef => dbRef.Type == "GO").Properties.Count());
            Assert.AreEqual(3, ok2[0].GeneNames.Count());
            Assert.AreEqual("primary", ok2[0].GeneNames.First().Item1);
            Assert.AreEqual("JJJ1", ok2[0].GeneNames.First().Item2);

            Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length)));
            Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length)));
            Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length)));
            Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length)));
        }
Beispiel #2
0
        public void test_read_Ensembl_pepAllFasta()
        {
            var nice = new List <Modification>
            {
                new ModificationWithLocation("fayk", null, null, ModificationSites.A, null, null)
            };

            Dictionary <string, Modification> un;
            List <Protein> ok = ProteinDbLoader.LoadProteinDb(Path.Combine(TestContext.CurrentContext.TestDirectory, @"test_ensembl.pep.all.fasta"), false, nice, false, out un);

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, ModificationWithMass> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_test_ensembl.pep.all.xml"));
            List <Protein> ok2 = ProteinDbLoader.LoadProteinDb(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_test_ensembl.pep.all.xml"), false, nice, false, out un);

            Assert.AreEqual(ok.Count, ok2.Count);
            Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence));
            Assert.AreEqual("ENSP00000381386", ok[0].Accession);
            Assert.AreEqual("ENSP00000215773", ok[1].Accession);
            Assert.AreEqual("pep:known chromosome:GRCh37:22:24313554:24316773:-1 gene:ENSG00000099977 transcript:ENST00000398344 gene_biotype:protein_coding transcript_biotype:protein_coding", ok[0].FullName);
            Assert.AreEqual("pep:known chromosome:GRCh37:22:24313554:24322019:-1 gene:ENSG00000099977 transcript:ENST00000350608 gene_biotype:protein_coding transcript_biotype:protein_coding", ok[1].FullName);

            Assert.True(ok.All(p => p.OneBasedBeginPositions.All(begin => begin == null || begin > 0 && begin <= p.Length)));
            Assert.True(ok.All(p => p.OneBasedEndPositions.All(end => end == null || end > 0 && end <= p.Length)));
            Assert.True(ok2.All(p => p.OneBasedBeginPositions.All(begin => begin == null || begin > 0 && begin <= p.Length)));
            Assert.True(ok2.All(p => p.OneBasedEndPositions.All(end => end == null || end > 0 && end <= p.Length)));
        }
        public void DoNotWriteSameModTwiceButWriteInHeaderSinceDifferent()
        {
            Loaders.LoadElements(Path.Combine(TestContext.CurrentContext.TestDirectory, "elements2.dat"));
            var     sampleModList = PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "z.txt")).ToList();
            Protein protein       = new Protein("MCSSSSSSSSSS", "accession", "organism", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> > {
                { 2, sampleModList.OfType <Modification>().ToList() }
            }, null, "name", "full_name", false, false, new List <DatabaseReference>(), new List <SequenceVariation>(), new List <DisulfideBond>());

            Assert.AreEqual(1, protein.OneBasedPossibleLocalizedModifications[2].OfType <ModificationWithMass>().Count());

            Dictionary <string, HashSet <Tuple <int, Modification> > > dictWithThisMod = new Dictionary <string, HashSet <Tuple <int, Modification> > >();

            HashSet <Tuple <int, Modification> > value = new HashSet <Tuple <int, Modification> >();

            ModificationMotif.TryGetMotif("C", out ModificationMotif motif);
            ModificationWithMass newMod = new ModificationWithMass("Palmitoylation of C", "mt", motif, TerminusLocalization.Any, double.NaN, null, null);

            Assert.AreNotEqual(newMod, sampleModList.First());

            value.Add(new Tuple <int, Modification>(2, newMod));

            dictWithThisMod.Add("accession", value);

            var newModResEntries = ProteinDbWriter.WriteXmlDatabase(dictWithThisMod, new List <Protein> {
                protein
            }, Path.Combine(TestContext.CurrentContext.TestDirectory, "test_modifications_with_proteins2.xml"));

            Assert.AreEqual(0, newModResEntries.Count);
            List <Protein> new_proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "test_modifications_with_proteins2.xml"), true, DecoyType.None, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> um);

            Assert.AreEqual(1, new_proteins.Count);
            Assert.AreEqual(1, new_proteins[0].OneBasedPossibleLocalizedModifications.Count);
            Assert.AreEqual(2, new_proteins[0].OneBasedPossibleLocalizedModifications.SelectMany(kv => kv.Value).Count());
        }
Beispiel #4
0
        public void Test_write_with_custom_mods()
        {
            var nice = new List <Modification>
            {
                new ModificationWithLocation("fayk", null, null, ModificationSites.A, null, null)
            };

            ModificationMotif.TryGetMotif("K", out ModificationMotif motif);
            ModificationWithMass m = new ModificationWithMass("mod", new Tuple <string, string>("", ""), motif, ModificationSites.Any, 1, new Dictionary <string, IList <string> >(), new List <double> {
                -1
            }, new List <double>(), "");

            Dictionary <string, HashSet <Tuple <int, ModificationWithMass> > > new_mods = new Dictionary <string, HashSet <Tuple <int, ModificationWithMass> > >
            {
                { "P53863", new HashSet <Tuple <int, ModificationWithMass> > {
                      new Tuple <int, ModificationWithMass>(2, m)
                  } }
            };

            List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"xml2.xml"), false, nice, false, null, out Dictionary <string, Modification> un);
            var            newModResEntries = ProteinDbWriter.WriteXmlDatabase(new_mods, ok, Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"));

            Assert.AreEqual(1, newModResEntries.Count);
            List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"), false, nice, false, null, out un);

            Assert.AreEqual(ok.Count, ok2.Count);
            Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence));
            Assert.AreEqual(2, ok[0].OneBasedPossibleLocalizedModifications.Count);
            Assert.AreEqual(3, ok2[0].OneBasedPossibleLocalizedModifications.Count);
        }
Beispiel #5
0
        [TestCase("ranges2.xml", 1, 1, 5, 5)] // with starting methionine
        public static void ReverseDecoyProteolysisProducts(string databaseName, int beginIdx, int reversedBeginIdx, int endIdx, int reversedEndIdx)
        {
            var proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", databaseName), true,
                                                          DecoyType.Reverse, null, false, null, out var unknownModifications);
            var target = proteins[0];

            Assert.AreEqual(1, target.ProteolysisProducts.Count());
            Assert.AreEqual(beginIdx, target.ProteolysisProducts.Single().OneBasedBeginPosition); //P[start]EPTI[end]D, M[start]EPTI[end]D
            Assert.AreEqual(endIdx, target.ProteolysisProducts.Single().OneBasedEndPosition);
            var decoy = proteins[1];

            Assert.AreEqual(1, decoy.ProteolysisProducts.Count());
            Assert.AreEqual(reversedBeginIdx, decoy.ProteolysisProducts.Single().OneBasedBeginPosition); //DI[start]TPEP[end], M[start]DITP[end]E
            Assert.AreEqual(reversedEndIdx, decoy.ProteolysisProducts.Single().OneBasedEndPosition);

            string rewriteDbName = $"{Path.GetFileNameWithoutExtension(databaseName)}rewrite.xml";

            ProteinDbWriter.WriteXmlDatabase(null, proteins.Where(p => !p.IsDecoy).ToList(), Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName));
            proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName), true,
                                                      DecoyType.Reverse, null, false, null, out unknownModifications);
            target = proteins[0];
            Assert.AreEqual(1, target.ProteolysisProducts.Count());
            Assert.AreEqual(beginIdx, target.ProteolysisProducts.Single().OneBasedBeginPosition);
            Assert.AreEqual(endIdx, target.ProteolysisProducts.Single().OneBasedEndPosition);
            decoy = proteins[1];
            Assert.AreEqual(1, decoy.ProteolysisProducts.Count());
            Assert.AreEqual(reversedBeginIdx, decoy.ProteolysisProducts.Single().OneBasedBeginPosition);
            Assert.AreEqual(reversedEndIdx, decoy.ProteolysisProducts.Single().OneBasedEndPosition);
        }
        public void Modification_read_write_into_proteinDb()
        {
            Loaders.LoadElements(Path.Combine(TestContext.CurrentContext.TestDirectory, "elements2.dat"));
            var sampleModList = PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "z.txt")).ToList();

            Assert.AreEqual(1, sampleModList.OfType <ModificationWithMass>().Count());
            Protein protein = new Protein("MCSSSSSSSSSS", "accession", "organism", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> > {
                { 2, sampleModList.OfType <Modification>().ToList() }
            }, null, "name", "full_name", false, false, new List <DatabaseReference>(), new List <SequenceVariation>(), new List <DisulfideBond>());

            Assert.AreEqual(1, protein.OneBasedPossibleLocalizedModifications[2].OfType <ModificationWithMass>().Count());
            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), new List <Protein> {
                protein
            }, Path.Combine(TestContext.CurrentContext.TestDirectory, "test_modifications_with_proteins.xml"));
            List <Protein> new_proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "test_modifications_with_proteins.xml"), true, DecoyType.None, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> um);

            Assert.AreEqual(1, new_proteins.Count);
            Assert.AreEqual(1, new_proteins[0].OneBasedPossibleLocalizedModifications.Count);
            Assert.AreEqual(1, new_proteins[0].OneBasedPossibleLocalizedModifications.SelectMany(kv => kv.Value).Count());
            Assert.AreEqual("Type", new_proteins[0].OneBasedPossibleLocalizedModifications.SelectMany(kv => kv.Value).OfType <ModificationWithMass>().First().modificationType);
            Assert.AreEqual("Palmitoylation of C", new_proteins[0].OneBasedPossibleLocalizedModifications[2][0].id);
            Assert.AreEqual(1, new_proteins[0].OneBasedPossibleLocalizedModifications[2].OfType <ModificationWithMass>().Count());

            // Check that Modifications were saved after last load
            Assert.AreEqual(1, ProteinDbLoader.GetPtmListFromProteinXml(Path.Combine(TestContext.CurrentContext.TestDirectory, @"test_modifications_with_proteins.xml")).Count);
            Assert.True(ProteinDbLoader.GetPtmListFromProteinXml(Path.Combine(TestContext.CurrentContext.TestDirectory, @"test_modifications_with_proteins.xml"))[0] == new_proteins[0].OneBasedPossibleLocalizedModifications.SelectMany(kv => kv.Value).First());

            //But that we can still read modifications from other protein XMLs that exist
            Assert.AreEqual(0, ProteinDbLoader.GetPtmListFromProteinXml(Path.Combine(TestContext.CurrentContext.TestDirectory, "xml.xml")).Count);
        }
Beispiel #7
0
        [TestCase("bonds2.xml", 2, 4, "MDICPC", 4, 6)] // with starting methionine
        public static void ReverseDecoyDisulfideBonds(string databaseName, int beginIdx, int reversedBeginIdx, string reversedSequence, int endIdx, int reversedEndIdx)
        {
            var proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", databaseName), true,
                                                          DecoyType.Reverse, null, false, null, out var unknownModifications);
            var target = proteins[0];

            Assert.AreEqual(1, target.DisulfideBonds.Count());
            Assert.AreEqual(beginIdx, target.DisulfideBonds.Single().OneBasedBeginPosition); //PC[start]PC[end]ID, MC[start]PC[end]ID
            Assert.AreEqual(endIdx, target.DisulfideBonds.Single().OneBasedEndPosition);
            var decoy = proteins[1];

            Assert.AreEqual(1, decoy.DisulfideBonds.Count());
            Assert.AreEqual(reversedSequence, decoy.BaseSequence);
            Assert.AreEqual(reversedBeginIdx, decoy.DisulfideBonds.Single().OneBasedBeginPosition); //DIC[start]PC[end]P, MDIC[start]PC[end]
            Assert.AreEqual(reversedEndIdx, decoy.DisulfideBonds.Single().OneBasedEndPosition);

            string rewriteDbName = $"{Path.GetFileNameWithoutExtension(databaseName)}rewrite.xml";

            ProteinDbWriter.WriteXmlDatabase(null, proteins.Where(p => !p.IsDecoy).ToList(), Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName));
            proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName), true,
                                                      DecoyType.Reverse, null, false, null, out unknownModifications);
            target = proteins[0];
            Assert.AreEqual(1, target.DisulfideBonds.Count());
            Assert.AreEqual(beginIdx, target.DisulfideBonds.Single().OneBasedBeginPosition);
            Assert.AreEqual(endIdx, target.DisulfideBonds.Single().OneBasedEndPosition);
            decoy = proteins[1];
            Assert.AreEqual(1, decoy.DisulfideBonds.Count());
            Assert.AreEqual(reversedBeginIdx, decoy.DisulfideBonds.Single().OneBasedBeginPosition);
            Assert.AreEqual(reversedEndIdx, decoy.DisulfideBonds.Single().OneBasedEndPosition);
        }
        public void TestEmptyProteins()
        {
            Protein p1 = new Protein("SEQENCE", "p1");

            Assert.AreEqual("p1||", p1.FullDescription);
            Protein p2 = new Protein("SEQENCE", "p2", name: "namep2");

            var proteinListToWrite = new List <Protein> {
                p1, p2
            };

            // Generate data for files
            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), proteinListToWrite,
                                             Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"differentlyConstuctedProteins.xml"));

            IEnumerable <string>       modTypesToExclude     = new List <string>();
            IEnumerable <Modification> allKnownModifications = new List <Modification>();
            List <Protein>             ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"differentlyConstuctedProteins.xml"), true, DecoyType.None,
                                                                           allKnownModifications, false, modTypesToExclude, out Dictionary <string, Modification> un);

            Assert.AreEqual(p1.Accession, ok[0].Accession);
            Assert.AreEqual(p2.Accession, ok[1].Accession);
            Assert.AreEqual(p1.Name, ok[0].Name);
            Assert.AreEqual(p2.Name, ok[1].Name);
        }
Beispiel #9
0
        public static void LoadIsoforms()
        {
            var protein = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "Isoform.fasta"), true, DecoyType.None, false, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, out var errors);

            Assert.AreEqual("Q13409", protein[0].Accession);
            Assert.AreEqual("Q13409_2", protein[1].Accession);
            Assert.AreEqual("Q13409_3", protein[2].Accession);
            Assert.AreEqual("Q13813", protein[3].Accession);
            Assert.AreEqual("Q13813_2", protein[4].Accession);
            Assert.AreEqual("Q13813_3", protein[5].Accession);
            Assert.AreEqual("Q14103", protein[6].Accession);
            Assert.AreEqual("Q14103_2", protein[7].Accession);
            Assert.AreEqual("Q14103_3", protein[8].Accession);
            Assert.AreEqual("Q14103_4", protein[9].Accession);
            Dictionary <string, HashSet <Tuple <int, Modification> > > mods = new Dictionary <string, HashSet <Tuple <int, Modification> > >();

            ProteinDbWriter.WriteXmlDatabase(mods, protein, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "IsoformTest.xml"));
            var proteinXml = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "IsoformTest.xml"), true, DecoyType.None, null, false, null, out var unknownMod);

            Assert.AreEqual("Q13409", proteinXml[0].Accession);
            Assert.AreEqual("Q13409_2", proteinXml[1].Accession);
            Assert.AreEqual("Q13409_3", proteinXml[2].Accession);
            Assert.AreEqual("Q13813", proteinXml[3].Accession);
            Assert.AreEqual("Q13813_2", proteinXml[4].Accession);
            Assert.AreEqual("Q13813_3", proteinXml[5].Accession);
            Assert.AreEqual("Q14103", proteinXml[6].Accession);
            Assert.AreEqual("Q14103_2", proteinXml[7].Accession);
            Assert.AreEqual("Q14103_3", proteinXml[8].Accession);
            Assert.AreEqual("Q14103_4", proteinXml[9].Accession);
        }
        public static void TestContaminantAmbiguity()
        {
            //create an ms file and a database for the peptide
            Protein targetProtein = new Protein("PEPTIDE", "target");
            string  xmlName       = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\PEPTIDE.xml");

            ProteinDbWriter.WriteXmlDatabase(null, new List <Protein> {
                targetProtein
            }, xmlName);
            PeptideWithSetModifications pepWithSetMods = targetProtein.Digest(new DigestionParams(), null, null).First();
            TestDataFile msFile   = new TestDataFile(pepWithSetMods);
            string       mzmlName = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\PEPTIDE.mzML");

            IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(msFile, mzmlName, false);
            string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestContaminantAmbiguityOutput");

            //run a full modern search using two databases (the same database) but one is called a target and the other is called a contaminant
            //KEEP BOTH TARGET AND CONTAMINANT
            SearchParameters modernSearchParams = new SearchParameters();

            modernSearchParams.SearchType  = SearchType.Modern;
            modernSearchParams.TCAmbiguity = TargetContaminantAmbiguity.RenameProtein;
            SearchTask modernTask = new SearchTask();

            modernTask.SearchParameters = modernSearchParams;

            EverythingRunnerEngine engine = new EverythingRunnerEngine(new List <(string, MetaMorpheusTask)> {
                ("task1", modernTask)
            }, new List <string> {
Beispiel #11
0
        [TestCase("splices8.xml", 1, 5, 2, 6)] // range with start with starting methionine
        public static void ReverseDecoySpliceSites(string databaseName, int beginIdx, int reversedBeginIdx, int endIdx, int reversedEndIdx)
        {
            var proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", databaseName), true,
                                                          DecoyType.Reverse, null, false, null, out var unknownModifications);
            var target = proteins[0];

            Assert.AreEqual(1, target.SpliceSites.Count());
            Assert.AreEqual(beginIdx, target.SpliceSites.Single().OneBasedBeginPosition); //PE[start]P[end]TID, ME[start]P[start]TID, PE[site]PTID, ME[site]PTID, P[site]EPTID, M[site]EPTID
            Assert.AreEqual(endIdx, target.SpliceSites.Single().OneBasedEndPosition);
            var decoy = proteins[1];

            Assert.AreEqual(1, decoy.SpliceSites.Count());
            Assert.AreEqual(reversedBeginIdx, decoy.SpliceSites.Single().OneBasedBeginPosition); //DITP[start]E[end]P, MDITP[start]E[end], DITPE[site]P, MDITPE[site], DITPEP[site], M[site]DITPE
            Assert.AreEqual(reversedEndIdx, decoy.SpliceSites.Single().OneBasedEndPosition);

            string rewriteDbName = $"{Path.GetFileNameWithoutExtension(databaseName)}rewrite.xml";

            ProteinDbWriter.WriteXmlDatabase(null, proteins.Where(p => !p.IsDecoy).ToList(), Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName));
            proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName), true,
                                                      DecoyType.Reverse, null, false, null, out unknownModifications);
            target = proteins[0];
            Assert.AreEqual(1, target.SpliceSites.Count());
            Assert.AreEqual(beginIdx, target.SpliceSites.Single().OneBasedBeginPosition);
            Assert.AreEqual(endIdx, target.SpliceSites.Single().OneBasedEndPosition);
            decoy = proteins[1];
            Assert.AreEqual(1, decoy.SpliceSites.Count());
            Assert.AreEqual(reversedBeginIdx, decoy.SpliceSites.Single().OneBasedBeginPosition);
            Assert.AreEqual(reversedEndIdx, decoy.SpliceSites.Single().OneBasedEndPosition);

            List <PeptideWithSetModifications> peptides = proteins.SelectMany(vp => vp.Digest(new DigestionParams(), null, null)).ToList();
        }
Beispiel #12
0
        public List <string> TransferModifications(string spritzDirectory, string sourceXmlPath, List <string> destinationXmlPaths, List <Protein> additionalProteins)
        {
            var           uniprotPtms = ProteinAnnotation.GetUniProtMods(spritzDirectory);
            List <string> outxmls     = new List <string>();

            var uniprot = File.Exists(sourceXmlPath) ?
                          ProteinDbLoader.LoadProteinXML(sourceXmlPath, true, DecoyType.None, uniprotPtms, false, null, out Dictionary <string, Modification> un) :
                          new List <Protein>();

            foreach (var xml in destinationXmlPaths)
            {
                if (xml == null || !File.Exists(xml))
                {
                    continue;
                }
                string outxml          = Path.Combine(Path.GetDirectoryName(xml), Path.GetFileNameWithoutExtension(xml) + ".withmods.xml");
                var    nonVariantProts = ProteinDbLoader.LoadProteinXML(xml, true, DecoyType.None, uniprotPtms, false, null, out un).Select(p => p.NonVariantProtein).Distinct();
                var    newProts        = ProteinAnnotation.CombineAndAnnotateProteins(uniprot, nonVariantProts.Concat(additionalProteins).ToList());
                ProteinDbWriter.WriteXmlDatabase(null, newProts, outxml);
                string outfasta = Path.Combine(Path.GetDirectoryName(xml), Path.GetFileNameWithoutExtension(xml) + ".spritz.fasta");
                ProteinDbWriter.WriteFastaDatabase(newProts.SelectMany(p => p.GetVariantProteins()).ToList(), outfasta, "|");
                outxmls.Add(outxml);
            }
            return(outxmls);
        }
        public void Test_read_write_read_xml()
        {
            ModificationMotif.TryGetMotif("X", out ModificationMotif motif);
            var nice = new List <Modification>
            {
                new ModificationWithLocation("fayk", "mt", motif, TerminusLocalization.Any, null)
            };

            List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"xml2.xml"), true, DecoyType.None, nice, false, null, out Dictionary <string, Modification> un);

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"));
            List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"), true, DecoyType.None, nice, false, new List <string>(), out un);

            Assert.AreEqual(ok.Count, ok2.Count);
            Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence));
            Assert.AreEqual(9, ok[0].DatabaseReferences.Count(dbRef => dbRef.Type == "GO"));
            Assert.AreEqual(3, ok[0].DatabaseReferences.First(dbRef => dbRef.Type == "GO").Properties.Count());
            Assert.AreEqual(3, ok[0].GeneNames.Count());
            Assert.AreEqual("primary", ok[0].GeneNames.First().Item1);
            Assert.AreEqual("JJJ1", ok[0].GeneNames.First().Item2);
            Assert.AreEqual("Saccharomyces cerevisiae (strain ATCC 204508 / S288c)", ok[0].Organism);
            Assert.AreEqual(Path.Combine(TestContext.CurrentContext.TestDirectory, @"xml2.xml"), ok[0].DatabaseFilePath);
            Assert.AreEqual(9, ok2[0].DatabaseReferences.Count(dbRef => dbRef.Type == "GO"));
            Assert.AreEqual(3, ok2[0].DatabaseReferences.First(dbRef => dbRef.Type == "GO").Properties.Count());
            Assert.AreEqual(3, ok2[0].GeneNames.Count());
            Assert.AreEqual("primary", ok2[0].GeneNames.First().Item1);
            Assert.AreEqual("JJJ1", ok2[0].GeneNames.First().Item2);
            Assert.AreEqual("Saccharomyces cerevisiae (strain ATCC 204508 / S288c)", ok2[0].Organism);
            Assert.AreEqual(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"), ok2[0].DatabaseFilePath);
            Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length)));
            Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length)));
            Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length)));
            Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length)));
        }
        /// <summary>
        /// Takes coding effect files from star-fusion to produce proteins
        /// </summary>
        /// <param name="codingEffectFilePath"></param>
        /// <returns></returns>
        public static string ParseCodingEffectsToXml(string codingEffectFilePaths, int minPeptideLength = 7, string organism = "H**o sapiens")
        {
            List <Protein>   fusionProteins = new List <Protein>();
            HashSet <string> usedAccessions = new HashSet <string>();
            var paths = codingEffectFilePaths.Split(",");

            foreach (string path in paths)
            {
                using (StreamReader reader = new StreamReader(path))
                {
                    while (true)
                    {
                        string line = reader.ReadLine();
                        if (line == null)
                        {
                            break;
                        }
                        if (line.StartsWith("#"))
                        {
                            break;
                        }                                    // skip header
                        string[] s          = line.Split('\t');
                        var      fusionName = s[0];
                        var      junctionSpanningReadCount = s[1];
                        var      spliceType               = s[3];
                        var      leftGene                 = s[4];
                        var      leftBreakpoint           = s[5];
                        var      rightGene                = s[6];
                        var      rightBreakpoint          = s[7];
                        var      ffpm                     = s[9];
                        var      annots                   = s[14];
                        var      proteinFusionType        = s[19];
                        var      proteinFusionTranslation = s[22];
                        var      proteinSequence          = proteinFusionTranslation.Split('*')[0];
                        if (proteinSequence.Length < minPeptideLength)
                        {
                            continue;
                        }
                        string accession = fusionName;
                        int    i         = 1;
                        while (usedAccessions.Contains(accession))
                        {
                            accession = fusionName + "_" + i++.ToString();
                        }
                        usedAccessions.Add(accession);
                        fusionProteins.Add(new Protein(proteinSequence, accession, organism, new List <Tuple <string, string> > {
                            new Tuple <string, string>("fusion", leftGene + "--" + rightGene)
                        }));
                    }
                }
            }
            string fusionProteinXmlPath = Path.Combine(Path.GetDirectoryName(paths[0]), "FusionProteins.xml");

            ProteinDbWriter.WriteXmlDatabase(null, fusionProteins, fusionProteinXmlPath);
            return(fusionProteinXmlPath);
        }
Beispiel #15
0
        public void AnotherTest()
        {
            List <ModificationWithMass> variableModifications = new List <ModificationWithMass>();
            List <ModificationWithMass> fixedModifications    = new List <ModificationWithMass>();

            // Generate data for files
            Protein ParentProtein = new Protein("MPEPTIDEKANTHE", "accession1", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> >(), new int?[0], new int?[0], new string[0], "name1", "fullname1", false, false, new List <DatabaseReference>(), new List <SequenceVariation>());

            Protein proteinWithChain = new Protein("MAACNNNCAA", "accession3", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> >(), new int?[] { 4 }, new int?[] { 8 }, new string[] { "chain" }, "name2", "fullname2", false, false, new List <DatabaseReference>(), new List <SequenceVariation>());

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, ModificationWithMass> > >(), new List <Protein> {
                ParentProtein, proteinWithChain
            }, Path.Combine(TestContext.CurrentContext.TestDirectory, @"fdsfsd.xml"));
        }
Beispiel #16
0
        [TestCase("oblm3.xml", 3, 5)] // with starting methionine
        public static void LoadSeqVarModifications(string databaseName, int modIdx, int reversedModIdx)
        {
            var proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", databaseName), true,
                                                          DecoyType.Reverse, null, false, null, out var unknownModifications);
            var target = proteins[0];

            Assert.AreEqual(1, target.OneBasedPossibleLocalizedModifications.Count);
            Assert.AreEqual(modIdx, target.OneBasedPossibleLocalizedModifications.Single().Key);
            Assert.AreEqual(1, target.AppliedSequenceVariations.Count());
            Assert.AreEqual(modIdx, target.AppliedSequenceVariations.Single().OneBasedBeginPosition);
            Assert.AreEqual(1, target.SequenceVariations.Count());
            Assert.AreEqual(modIdx, target.SequenceVariations.Single().OneBasedBeginPosition);
            Assert.AreEqual(1, target.SequenceVariations.Single().OneBasedModifications.Count);
            Assert.AreEqual(modIdx, target.SequenceVariations.Single().OneBasedModifications.Single().Key); //PEP[mod]TID, MEP[mod]TID
            var decoy = proteins[1];

            Assert.AreEqual(1, decoy.OneBasedPossibleLocalizedModifications.Count);
            Assert.AreEqual(reversedModIdx, decoy.OneBasedPossibleLocalizedModifications.Single().Key); //DITP[mod]EP, MDITP[mod]E
            Assert.AreEqual(1, decoy.AppliedSequenceVariations.Count());
            Assert.AreEqual(reversedModIdx, decoy.AppliedSequenceVariations.Single().OneBasedBeginPosition);
            Assert.AreEqual(1, decoy.SequenceVariations.Count());
            Assert.AreEqual(reversedModIdx, decoy.SequenceVariations.Single().OneBasedBeginPosition);
            Assert.AreEqual(1, decoy.SequenceVariations.Single().OneBasedModifications.Count);
            Assert.AreEqual(reversedModIdx, decoy.SequenceVariations.Single().OneBasedModifications.Single().Key);

            string rewriteDbName = $"{Path.GetFileNameWithoutExtension(databaseName)}rewrite.xml";

            ProteinDbWriter.WriteXmlDatabase(null, proteins.Where(p => !p.IsDecoy).ToList(), Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName));
            proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName), true,
                                                      DecoyType.Reverse, null, false, null, out unknownModifications);
            target = proteins[0];
            Assert.AreEqual(1, target.OneBasedPossibleLocalizedModifications.Count);
            Assert.AreEqual(modIdx, target.OneBasedPossibleLocalizedModifications.Single().Key);
            Assert.AreEqual(1, target.AppliedSequenceVariations.Count());
            Assert.AreEqual(modIdx, target.AppliedSequenceVariations.Single().OneBasedBeginPosition);
            Assert.AreEqual(1, target.SequenceVariations.Count());
            Assert.AreEqual(modIdx, target.SequenceVariations.Single().OneBasedBeginPosition);
            Assert.AreEqual(1, target.SequenceVariations.Single().OneBasedModifications.Count);
            Assert.AreEqual(modIdx, target.SequenceVariations.Single().OneBasedModifications.Single().Key);
            decoy = proteins[1];
            Assert.AreEqual(1, decoy.OneBasedPossibleLocalizedModifications.Count);
            Assert.AreEqual(reversedModIdx, decoy.OneBasedPossibleLocalizedModifications.Single().Key);
            Assert.AreEqual(1, decoy.AppliedSequenceVariations.Count());
            Assert.AreEqual(reversedModIdx, decoy.AppliedSequenceVariations.Single().OneBasedBeginPosition);
            Assert.AreEqual(1, decoy.SequenceVariations.Count());
            Assert.AreEqual(reversedModIdx, decoy.SequenceVariations.Single().OneBasedBeginPosition);
            Assert.AreEqual(1, decoy.SequenceVariations.Single().OneBasedModifications.Count);
            Assert.AreEqual(reversedModIdx, decoy.SequenceVariations.Single().OneBasedModifications.Single().Key);
        }
        public static string TransferModifications(string sourceXmlPath, string destinationXmlPath)
        {
            var    uniprotPtms     = ProteinAnnotation.GetUniProtMods(Environment.CurrentDirectory);
            var    uniprot         = ProteinDbLoader.LoadProteinXML(sourceXmlPath, true, DecoyType.None, uniprotPtms, false, null, out var un);
            string outxml          = Path.Combine(Path.GetDirectoryName(destinationXmlPath), Path.GetFileNameWithoutExtension(destinationXmlPath) + ".withmods.xml");
            var    nonVariantProts = ProteinDbLoader.LoadProteinXML(destinationXmlPath, true, DecoyType.None, uniprotPtms, false, null, out un).Select(p => p.NonVariantProtein).Distinct();
            var    newProts        = ProteinAnnotation.CombineAndAnnotateProteins(uniprot, nonVariantProts.ToList());

            ProteinDbWriter.WriteXmlDatabase(null, newProts, outxml);
            string outfasta = Path.Combine(Path.GetDirectoryName(destinationXmlPath), Path.GetFileNameWithoutExtension(destinationXmlPath) + ".fasta");
            var    prot     = newProts.FirstOrDefault(p => p.Accession.Contains("_"));

            ProteinDbWriter.WriteFastaDatabase(newProts.SelectMany(p => p.GetVariantProteins()).ToList(), outfasta, "|");
            return(outxml);
        }
Beispiel #18
0
        public void TestWritePtmWithNeutralLossAndDiagnosticIons()
        {
            string filename = "test_neutral_loss_diagnostic_ion_mod.xml";
            Dictionary <int, List <Modification> > mods = new Dictionary <int, List <Modification> >();

            ModificationMotif.TryGetMotif("T", out var motif);
            Modification m = new Modification(_originalId: "Phospho", _modificationType: "Test", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 80.0, _neutralLosses: new Dictionary <DissociationType, List <double> > {
                { DissociationType.HCD, new List <double> {
                      80.0, 0
                  } }, { DissociationType.ETD, new List <double> {
                             70.0, 0
                         } }
            }, _diagnosticIons: new Dictionary <DissociationType, List <double> > {
                { DissociationType.CID, new List <double> {
                      60.0, 0
                  } }, { DissociationType.EThcD, new List <double> {
                             40.0, 0
                         } }
            });

            Assert.That(m.ValidModification);

            mods.Add(4, new List <Modification> {
                m
            });

            Protein protein = new Protein("PEPTIDE", "accession", oneBasedModifications: mods);

            Assert.That(protein.OneBasedPossibleLocalizedModifications.Count == 1);
            Assert.That(protein.OneBasedPossibleLocalizedModifications.First().Value.First().NeutralLosses.First().Value.Count == 2);

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), new List <Protein> {
                protein
            }, Path.Combine(TestContext.CurrentContext.TestDirectory, filename));

            // with passed-in mods
            List <Protein> new_proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, filename), true, DecoyType.None, new List <Modification> {
                m
            }, false, new List <string>(), out Dictionary <string, Modification> um);

            Assert.That(new_proteins.First().OneBasedPossibleLocalizedModifications.First().Value.First().NeutralLosses.First().Value.Count == 2);
            Assert.That(new_proteins.First().OneBasedPossibleLocalizedModifications.First().Value.First().DiagnosticIons.First().Value.Count == 2);

            // should be able to read mod from top of database...
            new_proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, filename), true, DecoyType.None, new List <Modification>(), false, new List <string>(), out um);
            Assert.That(new_proteins.First().OneBasedPossibleLocalizedModifications.First().Value.First().NeutralLosses.First().Value.Count == 2);
            Assert.That(new_proteins.First().OneBasedPossibleLocalizedModifications.First().Value.First().DiagnosticIons.First().Value.Count == 2);
        }
Beispiel #19
0
        public void TestReadWriteSpliceVars()
        {
            List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"spliceVar.xml"), true, DecoyType.None,
                                                               null, false, new List <string>(), out Dictionary <string, Modification> un);

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_spliceVar.xml"));
            List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_spliceVar.xml"), true, DecoyType.None,
                                                                null, false, new List <string>(), out un);

            Assert.AreEqual(ok[0].SpliceVariants.Count(), ok2[0].SpliceVariants.Count());
            Assert.AreEqual(ok[0].SpliceVariants.First().OneBasedBeginPosition, ok2[0].SpliceVariants.First().OneBasedBeginPosition);
            Assert.AreEqual(ok[0].SpliceVariants.First().OneBasedEndPosition, ok2[0].SpliceVariants.First().OneBasedEndPosition);
            Assert.AreEqual(ok[0].SpliceVariants.First().Description, ok2[0].SpliceVariants.First().Description);
            Assert.AreEqual(ok[0].SpliceVariants.First().OriginalSequence, ok2[0].SpliceVariants.First().OriginalSequence);
            Assert.AreEqual(ok[0].SpliceVariants.First().VariantSequence, ok2[0].SpliceVariants.First().VariantSequence);
        }
        public void Test_readUniProtXML_writeProteinXml()
        {
            ModificationMotif.TryGetMotif("X", out ModificationMotif motif);
            var nice = new List <Modification>
            {
                new Modification("fayk", null, "mt", null, motif, "Anywhere.", null, 10, null, null, null, null, null, null)
            };

            var psiModDeserialized = Loaders.LoadPsiMod(Path.Combine(TestContext.CurrentContext.TestDirectory, "PSI-MOD.obo2.xml"));
            Dictionary <string, int> formalChargesDictionary = Loaders.GetFormalChargesDictionary(psiModDeserialized);
            var uniprotPtms = Loaders.LoadUniprot(Path.Combine(TestContext.CurrentContext.TestDirectory, "ptmlist2.txt"), formalChargesDictionary).ToList();

            List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"xml2.xml"), true, DecoyType.None, uniprotPtms.Concat(nice), false, null,
                                                               out Dictionary <string, Modification> un);
            Protein zero = ok[0];
            Protein one  = ok[1];
            Dictionary <int, List <Modification> > zero_mods = zero.OneBasedPossibleLocalizedModifications as Dictionary <int, List <Modification> >;
            Dictionary <int, List <Modification> > one_mods  = one.OneBasedPossibleLocalizedModifications as Dictionary <int, List <Modification> >;



            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_xml2.xml"));
            List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_xml2.xml"), true, DecoyType.None, nice, false,
                                                                new List <string>(), out un);

            Assert.AreEqual(ok.Count, ok2.Count);
            Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence));
            Assert.AreEqual(9, ok[0].DatabaseReferences.Count(dbRef => dbRef.Type == "GO"));
            Assert.AreEqual(1, ok[0].DatabaseReferences.Count(dbRef => dbRef.Type == "GeneID"));
            Assert.AreEqual(3, ok[0].DatabaseReferences.First(dbRef => dbRef.Type == "GO").Properties.Count());
            Assert.AreEqual(3, ok[0].GeneNames.Count());
            Assert.AreEqual("primary", ok[0].GeneNames.First().Item1);
            Assert.AreEqual("JJJ1", ok[0].GeneNames.First().Item2);
            Assert.AreEqual("Saccharomyces cerevisiae (strain ATCC 204508 / S288c)", ok[0].Organism);
            Assert.AreEqual(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"xml2.xml"), ok[0].DatabaseFilePath);
            Assert.AreEqual(9, ok2[0].DatabaseReferences.Count(dbRef => dbRef.Type == "GO"));
            Assert.AreEqual(3, ok2[0].DatabaseReferences.First(dbRef => dbRef.Type == "GO").Properties.Count());
            Assert.AreEqual(3, ok2[0].GeneNames.Count());
            Assert.AreEqual("primary", ok2[0].GeneNames.First().Item1);
            Assert.AreEqual("JJJ1", ok2[0].GeneNames.First().Item2);
            Assert.AreEqual("Saccharomyces cerevisiae (strain ATCC 204508 / S288c)", ok2[0].Organism);
            Assert.AreEqual(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_xml2.xml"), ok2[0].DatabaseFilePath);
            Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length)));
            Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length)));
            Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length)));
            Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length)));
        }
        public void Test_write_with_custom_mods()
        {
            ModificationMotif.TryGetMotif("S", out ModificationMotif m1);
            ModificationMotif.TryGetMotif("T", out ModificationMotif m2);
            ModificationMotif.TryGetMotif("X", out ModificationMotif motiff);

            var nice = new List <Modification>
            {
                new Modification("fayk", null, "mt", null, motiff, "Anywhere.", null, 10, null, null, null, null, null, null),
                new Modification("Phosphoserine", null, "mt", null, m1, "Anywhere.", null, 80, null, null, null, null, null, null),
                new Modification("Phosphothreonine", null, "mt", null, m2, "Anywhere.", null, 80, null, null, null, null, null, null)
            };

            ModificationMotif.TryGetMotif("K", out ModificationMotif motif);
            Modification m = new Modification("mod", null, "mt", null, motif, "Anywhere.", null, 1, null, null, null, new Dictionary <DissociationType, List <double> >()
            {
                { DissociationType.AnyActivationType, new List <double> {
                      -1
                  } }
            }, null, null);

            Dictionary <string, HashSet <Tuple <int, Modification> > > new_mods = new Dictionary <string, HashSet <Tuple <int, Modification> > >
            {
                { "P53863", new HashSet <Tuple <int, Modification> > {
                      new Tuple <int, Modification>(2, m)
                  } }
            };

            var psiModDeserialized = Loaders.LoadPsiMod(Path.Combine(TestContext.CurrentContext.TestDirectory, "PSI-MOD.obo2.xml"));
            Dictionary <string, int> formalChargesDictionary = Loaders.GetFormalChargesDictionary(psiModDeserialized);
            var uniprotPtms = Loaders.LoadUniprot(Path.Combine(TestContext.CurrentContext.TestDirectory, "ptmlist2.txt"), formalChargesDictionary).ToList();


            List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"xml2.xml"), true, DecoyType.None, uniprotPtms.Concat(nice), false, new List <string>(),
                                                               out Dictionary <string, Modification> un);
            var newModResEntries = ProteinDbWriter.WriteXmlDatabase(new_mods, ok, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_xml2.xml"));

            Assert.AreEqual(1, newModResEntries.Count);
            List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_xml2.xml"), true, DecoyType.None,
                                                                nice, false, new List <string>(), out un);

            Assert.AreEqual(ok.Count, ok2.Count);
            Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence));
            Assert.AreEqual(2, ok[0].OneBasedPossibleLocalizedModifications.Count);
            Assert.AreEqual(3, ok2[0].OneBasedPossibleLocalizedModifications.Count);
        }
        public static void TestSilacWhenProteinIsMissing()
        {
            //make heavy residue and add to search task
            Residue heavyLysine = new Residue("a", 'a', "a", Chemistry.ChemicalFormula.ParseFormula("C{13}6H12N{15}2O"), ModificationSites.All); //+8 lysine
            Residue lightLysine = Residue.GetResidue('K');

            SearchTask task = new SearchTask
            {
                SearchParameters = new SearchParameters
                {
                    SilacLabels = new List <SilacLabel> {
                        new SilacLabel(lightLysine.Letter, heavyLysine.Letter, heavyLysine.ThisChemicalFormula.Formula, heavyLysine.MonoisotopicMass - lightLysine.MonoisotopicMass)
                    },
                    NoOneHitWonders = true
                                      //The NoOneHitWonders=true doesn't really seem like a SILAC test, but we're testing that there's no crash if a quantified peptide's proteinGroup isn't quantified
                                      //This happens if somebody messed with parsimony (picked TDS) or from requiring two peptides per protein (and we're only finding one). We're testing the second case here.
                }
            };

            PeptideWithSetModifications lightPeptide = new PeptideWithSetModifications("PEPTIDEK", new Dictionary <string, Modification>());

            List <double> massDifferences = new List <double> {
                heavyLysine.MonoisotopicMass - lightLysine.MonoisotopicMass
            };
            MsDataFile myMsDataFile1 = new TestDataFile(lightPeptide, massDifferences);
            string     mzmlName      = @"silac.mzML";

            IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile1, mzmlName, false);

            string  xmlName    = "SilacDb.xml";
            Protein theProtein = new Protein("PEPTIDEK", "accession1");

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), new List <Protein> {
                theProtein
            }, xmlName);

            string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestSilac");

            Directory.CreateDirectory(outputFolder);
            var theStringResult = task.RunTask(outputFolder, new List <DbForTask> {
                new DbForTask(xmlName, false)
            }, new List <string> {
                mzmlName
            }, "taskId1").ToString();
        }
Beispiel #23
0
        public static void TestStringSanitation()
        {
            string messedUpSequence = @"PRO�EIN�";

            // just test the string sanitation method alone
            var sanitized = ProteinDbLoader.SanitizeAminoAcidSequence(messedUpSequence, 'X');

            Assert.That(sanitized == "PROXEINX");

            // test reading from a fasta
            Protein protein = new Protein(messedUpSequence, "accession");

            string fastaPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"messedUp.fasta");

            ProteinDbWriter.WriteFastaDatabase(new List <Protein> {
                protein
            }, fastaPath, "|");

            var fastaProteins = ProteinDbLoader.LoadProteinFasta(fastaPath, true, DecoyType.Reverse, false, ProteinDbLoader.UniprotAccessionRegex,
                                                                 ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotNameRegex, ProteinDbLoader.UniprotGeneNameRegex,
                                                                 ProteinDbLoader.UniprotOrganismRegex, out var a);

            Assert.That(fastaProteins.First(p => !p.IsDecoy).BaseSequence == "PROXEINX");

            // digest and fragment to check that there isn't a crash
            var peptides = fastaProteins.First().Digest(new DigestionParams(), new List <Modification>(), new List <Modification>()).ToList();

            foreach (PeptideWithSetModifications peptide in peptides)
            {
                List <Product> fragments = new List <Product>();
                peptide.Fragment(DissociationType.HCD, FragmentationTerminus.Both, fragments);
            }

            // test reading from an XML
            string xmlPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"messedUp.xml");

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), new List <Protein> {
                protein
            }, xmlPath);
            var xmlProteins = ProteinDbLoader.LoadProteinXML(xmlPath, true, DecoyType.Reverse, new List <Modification>(), false, new List <string>(), out var unk);

            Assert.That(xmlProteins.First(p => !p.IsDecoy).BaseSequence == "PROXEINX");
        }
        public void AnotherTest()
        {
            List <Modification> variableModifications = new List <Modification>();
            List <Modification> fixedModifications    = new List <Modification>();

            // Generate data for files
            Protein ParentProtein = new Protein("MPEPTIDEKANTHE", "accession1", "organism", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> >(), null,
                                                "name1", "fullname1", false, false, new List <DatabaseReference>(), new List <SequenceVariation>(), new List <DisulfideBond>());

            List <ProteolysisProduct> pp = new List <ProteolysisProduct> {
                new ProteolysisProduct(4, 8, "chain")
            };
            Protein proteinWithChain = new Protein("MAACNNNCAA", "accession3", "organism", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> >(), pp,
                                                   "name2", "fullname2", false, false, new List <DatabaseReference>(), new List <SequenceVariation>(), new List <DisulfideBond>());

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), new List <Protein> {
                ParentProtein, proteinWithChain
            }, Path.Combine(TestContext.CurrentContext.TestDirectory, @"fdsfsd.xml"));
        }
Beispiel #25
0
        public void TestReadWriteSeqVars2()
        {
            var nice = new List <Modification>
            {
                new ModificationWithLocation("fayk", null, null, ModificationSites.A, null, null)
            };

            List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"seqvartests.xml"), false, nice, false, null, out Dictionary <string, Modification> un);

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, ModificationWithMass> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_seqvartests.xml"));
            List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_seqvartests.xml"), false, nice, false, null, out un);

            Assert.AreEqual(ok[0].SequenceVariations.Count(), ok2[0].SequenceVariations.Count());
            Assert.AreEqual(ok[0].SequenceVariations.First().OneBasedBeginPosition, ok2[0].SequenceVariations.First().OneBasedBeginPosition);
            Assert.AreEqual(ok[0].SequenceVariations.First().OneBasedEndPosition, ok2[0].SequenceVariations.First().OneBasedEndPosition);
            Assert.AreEqual(ok[0].SequenceVariations.First().Description, ok2[0].SequenceVariations.First().Description);
            Assert.AreEqual(ok[0].SequenceVariations.First().OriginalSequence, ok2[0].SequenceVariations.First().OriginalSequence);
            Assert.AreEqual(ok[0].SequenceVariations.First().VariantSequence, ok2[0].SequenceVariations.First().VariantSequence);
        }
        static void Main(string[] args)
        {
            List <string> files = args.Where(f => File.Exists(f) & (Path.GetExtension(f) == ".xml" || Path.GetExtension(f) == ".xml.gz")).ToList();

            if (files.Count < 2)
            {
                Console.WriteLine("Please enter at least two protein .xml or .xml.gz databases.");
                return;
            }

            // check that file path is valid
            string timestamp = DateTime.Now.Year.ToString("0000") + "-" + DateTime.Now.Month.ToString("00") + "-" + DateTime.Now.Day.ToString("00") + "-" + DateTime.Now.Hour.ToString("00") + "-" + DateTime.Now.Minute.ToString("00") + "-" + DateTime.Now.Second.ToString("00");
            string outpath   = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "merged_database_" + timestamp + ".xml");

            // merge databases
            Loaders.LoadElements(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "elements.dat"));
            List <Protein> merged = ProteinDbLoader.merge_proteins(files.SelectMany(f => ProteinDbLoader.LoadProteinXML(f, false, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> un))).ToList();

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), merged, outpath);
        }
        public void TestReadWriteSeqVars2()
        {
            ModificationMotif.TryGetMotif("X", out ModificationMotif motif);
            var nice = new List <Modification>
            {
                new ModificationWithLocation("fayk", "mt", motif, TerminusLocalization.Any, null)
            };

            List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"seqvartests.xml"), true, DecoyType.None, nice, false, new List <string>(), out Dictionary <string, Modification> un);

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_seqvartests.xml"));
            List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_seqvartests.xml"), true, DecoyType.None, nice, false, new List <string>(), out un);

            Assert.AreEqual(ok[0].SequenceVariations.Count(), ok2[0].SequenceVariations.Count());
            Assert.AreEqual(ok[0].SequenceVariations.First().OneBasedBeginPosition, ok2[0].SequenceVariations.First().OneBasedBeginPosition);
            Assert.AreEqual(ok[0].SequenceVariations.First().OneBasedEndPosition, ok2[0].SequenceVariations.First().OneBasedEndPosition);
            Assert.AreEqual(ok[0].SequenceVariations.First().Description, ok2[0].SequenceVariations.First().Description);
            Assert.AreEqual(ok[0].SequenceVariations.First().OriginalSequence, ok2[0].SequenceVariations.First().OriginalSequence);
            Assert.AreEqual(ok[0].SequenceVariations.First().VariantSequence, ok2[0].SequenceVariations.First().VariantSequence);
        }
Beispiel #28
0
        public void test_read_write_read_xml()
        {
            var nice = new List <Modification>
            {
                new ModificationWithLocation("fayk", null, null, ModificationSites.A, null, null)
            };

            Dictionary <string, Modification> un;
            List <Protein> ok = ProteinDbLoader.LoadProteinDb(Path.Combine(TestContext.CurrentContext.TestDirectory, @"xml2.xml"), false, nice, false, out un);

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, ModificationWithMass> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"));
            List <Protein> ok2 = ProteinDbLoader.LoadProteinDb(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"), false, nice, false, out un);

            Assert.AreEqual(ok.Count, ok2.Count);
            Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence));

            Assert.True(ok.All(p => p.OneBasedBeginPositions.All(begin => begin == null || begin > 0 && begin <= p.Length)));
            Assert.True(ok.All(p => p.OneBasedEndPositions.All(end => end == null || end > 0 && end <= p.Length)));
            Assert.True(ok2.All(p => p.OneBasedBeginPositions.All(begin => begin == null || begin > 0 && begin <= p.Length)));
            Assert.True(ok2.All(p => p.OneBasedEndPositions.All(end => end == null || end > 0 && end <= p.Length)));
        }
Beispiel #29
0
        private static void BenchmarkDatabaseLoadWrite()
        {
            Console.WriteLine("Starting benchmark BenchmarkDatabaseLoadWrite");

            Stopwatch stopWatch = new Stopwatch();

            Loaders.LoadElements("elements2.dat");
            IEnumerable <Modification> ya = PtmListLoader.ReadModsFromFile(@"ptmlist.txt").ToList();

            stopWatch.Restart();
            var a = ProteinDbLoader.LoadProteinXML(@"yeast_160126.xml.gz", true, ya, false, null, out Dictionary <string, Modification> um);

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <System.Tuple <int, ModificationWithMass> > >(), a.Where(p => !p.IsDecoy).ToList(), "rewrite_yeast.xml");
            var b = ProteinDbLoader.LoadProteinXML(@"rewrite_yeast.xml", true, ya, false, null, out um);

            stopWatch.Stop();

            Console.WriteLine("Time for getting formulas: " + stopWatch.Elapsed);

            Console.WriteLine("Benchmark BenchmarkDatabaseLoadWrite finished");
        }
        public static void Test_CustumPrunedDatabaseWriteAndRead()
        {
            ModificationMotif.TryGetMotif("K", out ModificationMotif K);
            ModificationMotif.TryGetMotif("R", out ModificationMotif R);

            Modification acOnK = new Modification(_originalId: "Acetyl", _accession: null, _modificationType: "testModType", _featureType: null, _locationRestriction: "Anywhere.", _target: K, _monoisotopicMass: 42);
            Modification meOnK = new Modification(_originalId: "Methyl", _accession: null, _modificationType: "testModType", _featureType: null, _locationRestriction: "Anywhere.", _target: K, _monoisotopicMass: 14);
            Modification meOnR = new Modification(_originalId: "Methyl", _accession: null, _modificationType: "testModType", _featureType: null, _locationRestriction: "Anywhere.", _target: R, _monoisotopicMass: 14);

            Dictionary <int, List <Modification> > obm = new Dictionary <int, List <Modification> >();

            obm.Add(1, new List <Modification>()
            {
                acOnK
            });
            obm.Add(2, new List <Modification>()
            {
                meOnK
            });
            obm.Add(3, new List <Modification>()
            {
                meOnR
            });

            Protein        p     = new Protein("KKR", "accession", null, null, obm, null, null, null, false, false, null, null, null, null);
            List <Protein> pList = new List <Protein>()
            {
                p
            };

            string outputFileName = Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"redundant.xml");

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), pList, outputFileName);

            List <Protein> new_proteins = ProteinDbLoader.LoadProteinXML(outputFileName,
                                                                         true, DecoyType.None, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> proteinXmlModList);


            Assert.AreEqual(3, new_proteins[0].OneBasedPossibleLocalizedModifications.Count());
        }