public void OneTranscriptOneHeterozygousSynonymous()
        {
            Genome         genome   = new Genome(Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData", "chr1_sample.fa"));
            VCFParser      vcf      = new VCFParser(Path.Combine(TestContext.CurrentContext.TestDirectory, "TestVcfs", "chr_1_one_heterozygous_synonymous.vcf"));
            List <Variant> variants = vcf.Select(x => new Variant(null, x, genome)).ToList();

            Assert.AreEqual(1, variants.Count);

            GeneModel         geneModel           = new GeneModel(genome, Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData", "chr1_one_transcript.gtf"));
            List <Protein>    proteins_wo_variant = geneModel.Translate(true).ToList();
            List <Transcript> transcripts         = geneModel.ApplyVariants(variants);
            List <Protein>    proteins            = transcripts.Select(t => t.Protein()).ToList();

            Assert.AreEqual(1, geneModel.Genes.Count);
            Assert.AreEqual(1, proteins.Count);
            Assert.AreEqual(1, proteins_wo_variant.Count);
            Assert.AreEqual(1, new HashSet <string> {
                proteins[0].BaseSequence, proteins_wo_variant[0].BaseSequence
            }.Count);
            Assert.IsTrue(proteins.Any(p => p.FullName.Contains(FunctionalClass.SILENT.ToString())));    // synonymous
            Assert.IsTrue(proteins.Any(p => p.FullName.Contains(GenotypeType.HETEROZYGOUS.ToString()))); // synonymous
            Assert.IsTrue(proteins.Any(p => p.FullName.Contains("1:69666")));

            string proteinFasta = Path.Combine(TestContext.CurrentContext.TestDirectory, "TestVcfs", "chr_1_one_heterozygous_synonymous.fasta");

            ProteinDbWriter.WriteFastaDatabase(proteins, proteinFasta, " ");
            string[] proteinFastaLines = File.ReadLines(proteinFasta).ToArray();
            Assert.IsTrue(proteinFastaLines[0].Contains(FunctionalClass.SILENT.ToString())); // synonymous
            Assert.IsTrue(proteinFastaLines[0].Contains("1:69666"));
        }
Exemplo n.º 2
0
        public List <string> TransferModifications(string spritzDirectory, string sourceXmlPath, List <string> destinationXmlPaths, List <Protein> additionalProteins)
        {
            var           uniprotPtms = ProteinAnnotation.GetUniProtMods(spritzDirectory);
            List <string> outxmls     = new List <string>();

            var uniprot = File.Exists(sourceXmlPath) ?
                          ProteinDbLoader.LoadProteinXML(sourceXmlPath, true, DecoyType.None, uniprotPtms, false, null, out Dictionary <string, Modification> un) :
                          new List <Protein>();

            foreach (var xml in destinationXmlPaths)
            {
                if (xml == null || !File.Exists(xml))
                {
                    continue;
                }
                string outxml          = Path.Combine(Path.GetDirectoryName(xml), Path.GetFileNameWithoutExtension(xml) + ".withmods.xml");
                var    nonVariantProts = ProteinDbLoader.LoadProteinXML(xml, true, DecoyType.None, uniprotPtms, false, null, out un).Select(p => p.NonVariantProtein).Distinct();
                var    newProts        = ProteinAnnotation.CombineAndAnnotateProteins(uniprot, nonVariantProts.Concat(additionalProteins).ToList());
                ProteinDbWriter.WriteXmlDatabase(null, newProts, outxml);
                string outfasta = Path.Combine(Path.GetDirectoryName(xml), Path.GetFileNameWithoutExtension(xml) + ".spritz.fasta");
                ProteinDbWriter.WriteFastaDatabase(newProts.SelectMany(p => p.GetVariantProteins()).ToList(), outfasta, "|");
                outxmls.Add(outxml);
            }
            return(outxmls);
        }
Exemplo n.º 3
0
        public void test_read_Ensembl_pepAllFasta()
        {
            var nice = new List <Modification>
            {
                new ModificationWithLocation("fayk", null, null, ModificationSites.A, null, null)
            };

            Dictionary <string, Modification> un;
            List <Protein> ok = ProteinDbLoader.LoadProteinDb(Path.Combine(TestContext.CurrentContext.TestDirectory, @"test_ensembl.pep.all.fasta"), false, nice, false, out un);

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, ModificationWithMass> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_test_ensembl.pep.all.xml"));
            List <Protein> ok2 = ProteinDbLoader.LoadProteinDb(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_test_ensembl.pep.all.xml"), false, nice, false, out un);

            Assert.AreEqual(ok.Count, ok2.Count);
            Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence));
            Assert.AreEqual("ENSP00000381386", ok[0].Accession);
            Assert.AreEqual("ENSP00000215773", ok[1].Accession);
            Assert.AreEqual("pep:known chromosome:GRCh37:22:24313554:24316773:-1 gene:ENSG00000099977 transcript:ENST00000398344 gene_biotype:protein_coding transcript_biotype:protein_coding", ok[0].FullName);
            Assert.AreEqual("pep:known chromosome:GRCh37:22:24313554:24322019:-1 gene:ENSG00000099977 transcript:ENST00000350608 gene_biotype:protein_coding transcript_biotype:protein_coding", ok[1].FullName);

            Assert.True(ok.All(p => p.OneBasedBeginPositions.All(begin => begin == null || begin > 0 && begin <= p.Length)));
            Assert.True(ok.All(p => p.OneBasedEndPositions.All(end => end == null || end > 0 && end <= p.Length)));
            Assert.True(ok2.All(p => p.OneBasedBeginPositions.All(begin => begin == null || begin > 0 && begin <= p.Length)));
            Assert.True(ok2.All(p => p.OneBasedEndPositions.All(end => end == null || end > 0 && end <= p.Length)));
        }
        public void Test_read_write_read_xml()
        {
            ModificationMotif.TryGetMotif("X", out ModificationMotif motif);
            var nice = new List <Modification>
            {
                new ModificationWithLocation("fayk", "mt", motif, TerminusLocalization.Any, null)
            };

            List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"xml2.xml"), true, DecoyType.None, nice, false, null, out Dictionary <string, Modification> un);

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"));
            List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"), true, DecoyType.None, nice, false, new List <string>(), out un);

            Assert.AreEqual(ok.Count, ok2.Count);
            Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence));
            Assert.AreEqual(9, ok[0].DatabaseReferences.Count(dbRef => dbRef.Type == "GO"));
            Assert.AreEqual(3, ok[0].DatabaseReferences.First(dbRef => dbRef.Type == "GO").Properties.Count());
            Assert.AreEqual(3, ok[0].GeneNames.Count());
            Assert.AreEqual("primary", ok[0].GeneNames.First().Item1);
            Assert.AreEqual("JJJ1", ok[0].GeneNames.First().Item2);
            Assert.AreEqual("Saccharomyces cerevisiae (strain ATCC 204508 / S288c)", ok[0].Organism);
            Assert.AreEqual(Path.Combine(TestContext.CurrentContext.TestDirectory, @"xml2.xml"), ok[0].DatabaseFilePath);
            Assert.AreEqual(9, ok2[0].DatabaseReferences.Count(dbRef => dbRef.Type == "GO"));
            Assert.AreEqual(3, ok2[0].DatabaseReferences.First(dbRef => dbRef.Type == "GO").Properties.Count());
            Assert.AreEqual(3, ok2[0].GeneNames.Count());
            Assert.AreEqual("primary", ok2[0].GeneNames.First().Item1);
            Assert.AreEqual("JJJ1", ok2[0].GeneNames.First().Item2);
            Assert.AreEqual("Saccharomyces cerevisiae (strain ATCC 204508 / S288c)", ok2[0].Organism);
            Assert.AreEqual(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"), ok2[0].DatabaseFilePath);
            Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length)));
            Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length)));
            Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length)));
            Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length)));
        }
        public void TestEmptyProteins()
        {
            Protein p1 = new Protein("SEQENCE", "p1");

            Assert.AreEqual("p1||", p1.FullDescription);
            Protein p2 = new Protein("SEQENCE", "p2", name: "namep2");

            var proteinListToWrite = new List <Protein> {
                p1, p2
            };

            // Generate data for files
            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), proteinListToWrite,
                                             Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"differentlyConstuctedProteins.xml"));

            IEnumerable <string>       modTypesToExclude     = new List <string>();
            IEnumerable <Modification> allKnownModifications = new List <Modification>();
            List <Protein>             ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"differentlyConstuctedProteins.xml"), true, DecoyType.None,
                                                                           allKnownModifications, false, modTypesToExclude, out Dictionary <string, Modification> un);

            Assert.AreEqual(p1.Accession, ok[0].Accession);
            Assert.AreEqual(p2.Accession, ok[1].Accession);
            Assert.AreEqual(p1.Name, ok[0].Name);
            Assert.AreEqual(p2.Name, ok[1].Name);
        }
        public void Test_read_xml_write_read_fasta()
        {
            ModificationMotif.TryGetMotif("X", out ModificationMotif motif);
            var nice = new List <Modification>
            {
                new Modification("fayk", null, "mt", null, motif, "Anywhere.", null, null, null, null, null, null, null, null)
            };

            List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"xml2.xml"), true, DecoyType.None, nice, false, null,
                                                               out Dictionary <string, Modification> un);

            ProteinDbWriter.WriteFastaDatabase(ok, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_xml_test.fasta"), "|");
            List <Protein> ok2 = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_xml_test.fasta"), true, DecoyType.None, false,
                                                                  ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, out var b);

            Assert.AreEqual(ok.Count, ok2.Count);
            Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence));
            Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].Name == ok2[i].Name));
            Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].Organism == ok2[i].Organism));
            Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].GeneNames.First().Item2 == ok2[i].GeneNames.First().Item2));

            Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length)));
            Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length)));
            Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length)));
            Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length)));
        }
Exemplo n.º 7
0
        public void Modification_read_write_into_proteinDb()
        {
            Loaders.LoadElements(Path.Combine(TestContext.CurrentContext.TestDirectory, "elements2.dat"));
            var sampleModList = PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "z.txt")).ToList();

            Assert.AreEqual(1, sampleModList.OfType <ModificationWithMass>().Count());
            Protein protein = new Protein("MCSSSSSSSSSS", "accession", "organism", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> > {
                { 2, sampleModList.OfType <Modification>().ToList() }
            }, null, "name", "full_name", false, false, new List <DatabaseReference>(), new List <SequenceVariation>(), new List <DisulfideBond>());

            Assert.AreEqual(1, protein.OneBasedPossibleLocalizedModifications[2].OfType <ModificationWithMass>().Count());
            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), new List <Protein> {
                protein
            }, Path.Combine(TestContext.CurrentContext.TestDirectory, "test_modifications_with_proteins.xml"));
            List <Protein> new_proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "test_modifications_with_proteins.xml"), true, DecoyType.None, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> um);

            Assert.AreEqual(1, new_proteins.Count);
            Assert.AreEqual(1, new_proteins[0].OneBasedPossibleLocalizedModifications.Count);
            Assert.AreEqual(1, new_proteins[0].OneBasedPossibleLocalizedModifications.SelectMany(kv => kv.Value).Count());
            Assert.AreEqual("Type", new_proteins[0].OneBasedPossibleLocalizedModifications.SelectMany(kv => kv.Value).OfType <ModificationWithMass>().First().modificationType);
            Assert.AreEqual("Palmitoylation of C", new_proteins[0].OneBasedPossibleLocalizedModifications[2][0].id);
            Assert.AreEqual(1, new_proteins[0].OneBasedPossibleLocalizedModifications[2].OfType <ModificationWithMass>().Count());

            // Check that Modifications were saved after last load
            Assert.AreEqual(1, ProteinDbLoader.GetPtmListFromProteinXml(Path.Combine(TestContext.CurrentContext.TestDirectory, @"test_modifications_with_proteins.xml")).Count);
            Assert.True(ProteinDbLoader.GetPtmListFromProteinXml(Path.Combine(TestContext.CurrentContext.TestDirectory, @"test_modifications_with_proteins.xml"))[0] == new_proteins[0].OneBasedPossibleLocalizedModifications.SelectMany(kv => kv.Value).First());

            //But that we can still read modifications from other protein XMLs that exist
            Assert.AreEqual(0, ProteinDbLoader.GetPtmListFromProteinXml(Path.Combine(TestContext.CurrentContext.TestDirectory, "xml.xml")).Count);
        }
        public static void FastaTest()
        {
            List <Protein> prots = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"fasta.fasta"), true, DecoyType.Reverse, false,
                                                                    ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotNameRegex, ProteinDbLoader.UniprotGeneNameRegex,
                                                                    ProteinDbLoader.UniprotOrganismRegex, out var a);

            ProteinDbWriter.WriteFastaDatabase(prots, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_fasta.fasta"), "|");
            List <Protein> prots2 = ProteinDbLoader.LoadProteinFasta(
                Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_fasta.fasta"),
                true,
                DecoyType.None,
                false,
                ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotNameRegex, ProteinDbLoader.UniprotGeneNameRegex,
                ProteinDbLoader.UniprotOrganismRegex,
                out var un);

            Assert.AreEqual("P62805", prots.First().Accession);
            Assert.AreEqual("H4_HUMAN", prots.First().Name);
            Assert.AreEqual("Histone H4", prots.First().FullName);
            Assert.AreEqual("HIST1H4A", prots.First().GeneNames.First().Item2);
            Assert.AreEqual("H**o sapiens", prots.First().Organism);

            Assert.AreEqual("P62805", prots2.First().Accession);
            Assert.AreEqual("H4_HUMAN", prots2.First().Name);
            Assert.AreEqual("Histone H4", prots2.First().FullName);
            Assert.AreEqual("HIST1H4A", prots2.First().GeneNames.First().Item2);
            Assert.AreEqual("H**o sapiens", prots2.First().Organism);
        }
Exemplo n.º 9
0
        [TestCase("bonds2.xml", 2, 4, "MDICPC", 4, 6)] // with starting methionine
        public static void ReverseDecoyDisulfideBonds(string databaseName, int beginIdx, int reversedBeginIdx, string reversedSequence, int endIdx, int reversedEndIdx)
        {
            var proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", databaseName), true,
                                                          DecoyType.Reverse, null, false, null, out var unknownModifications);
            var target = proteins[0];

            Assert.AreEqual(1, target.DisulfideBonds.Count());
            Assert.AreEqual(beginIdx, target.DisulfideBonds.Single().OneBasedBeginPosition); //PC[start]PC[end]ID, MC[start]PC[end]ID
            Assert.AreEqual(endIdx, target.DisulfideBonds.Single().OneBasedEndPosition);
            var decoy = proteins[1];

            Assert.AreEqual(1, decoy.DisulfideBonds.Count());
            Assert.AreEqual(reversedSequence, decoy.BaseSequence);
            Assert.AreEqual(reversedBeginIdx, decoy.DisulfideBonds.Single().OneBasedBeginPosition); //DIC[start]PC[end]P, MDIC[start]PC[end]
            Assert.AreEqual(reversedEndIdx, decoy.DisulfideBonds.Single().OneBasedEndPosition);

            string rewriteDbName = $"{Path.GetFileNameWithoutExtension(databaseName)}rewrite.xml";

            ProteinDbWriter.WriteXmlDatabase(null, proteins.Where(p => !p.IsDecoy).ToList(), Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName));
            proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName), true,
                                                      DecoyType.Reverse, null, false, null, out unknownModifications);
            target = proteins[0];
            Assert.AreEqual(1, target.DisulfideBonds.Count());
            Assert.AreEqual(beginIdx, target.DisulfideBonds.Single().OneBasedBeginPosition);
            Assert.AreEqual(endIdx, target.DisulfideBonds.Single().OneBasedEndPosition);
            decoy = proteins[1];
            Assert.AreEqual(1, decoy.DisulfideBonds.Count());
            Assert.AreEqual(reversedBeginIdx, decoy.DisulfideBonds.Single().OneBasedBeginPosition);
            Assert.AreEqual(reversedEndIdx, decoy.DisulfideBonds.Single().OneBasedEndPosition);
        }
Exemplo n.º 10
0
        public void Test_read_write_read_xml()
        {
            var nice = new List <Modification>
            {
                new ModificationWithLocation("fayk", null, null, ModificationSites.A, null, null)
            };

            List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"xml2.xml"), false, nice, false, null, out Dictionary <string, Modification> un);

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, ModificationWithMass> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"));
            List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"), false, nice, false, null, out un);

            Assert.AreEqual(ok.Count, ok2.Count);
            Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence));
            Assert.AreEqual(9, ok[0].DatabaseReferences.Count(dbRef => dbRef.Type == "GO"));
            Assert.AreEqual(3, ok[0].DatabaseReferences.First(dbRef => dbRef.Type == "GO").Properties.Count());
            Assert.AreEqual(3, ok[0].GeneNames.Count());
            Assert.AreEqual("primary", ok[0].GeneNames.First().Item1);
            Assert.AreEqual("JJJ1", ok[0].GeneNames.First().Item2);

            Assert.AreEqual(9, ok2[0].DatabaseReferences.Count(dbRef => dbRef.Type == "GO"));
            Assert.AreEqual(3, ok2[0].DatabaseReferences.First(dbRef => dbRef.Type == "GO").Properties.Count());
            Assert.AreEqual(3, ok2[0].GeneNames.Count());
            Assert.AreEqual("primary", ok2[0].GeneNames.First().Item1);
            Assert.AreEqual("JJJ1", ok2[0].GeneNames.First().Item2);

            Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length)));
            Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length)));
            Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length)));
            Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length)));
        }
Exemplo n.º 11
0
        public void Test_write_with_custom_mods()
        {
            var nice = new List <Modification>
            {
                new ModificationWithLocation("fayk", null, null, ModificationSites.A, null, null)
            };

            ModificationMotif.TryGetMotif("K", out ModificationMotif motif);
            ModificationWithMass m = new ModificationWithMass("mod", new Tuple <string, string>("", ""), motif, ModificationSites.Any, 1, new Dictionary <string, IList <string> >(), new List <double> {
                -1
            }, new List <double>(), "");

            Dictionary <string, HashSet <Tuple <int, ModificationWithMass> > > new_mods = new Dictionary <string, HashSet <Tuple <int, ModificationWithMass> > >
            {
                { "P53863", new HashSet <Tuple <int, ModificationWithMass> > {
                      new Tuple <int, ModificationWithMass>(2, m)
                  } }
            };

            List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"xml2.xml"), false, nice, false, null, out Dictionary <string, Modification> un);
            var            newModResEntries = ProteinDbWriter.WriteXmlDatabase(new_mods, ok, Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"));

            Assert.AreEqual(1, newModResEntries.Count);
            List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"), false, nice, false, null, out un);

            Assert.AreEqual(ok.Count, ok2.Count);
            Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence));
            Assert.AreEqual(2, ok[0].OneBasedPossibleLocalizedModifications.Count);
            Assert.AreEqual(3, ok2[0].OneBasedPossibleLocalizedModifications.Count);
        }
Exemplo n.º 12
0
        [TestCase("ranges2.xml", 1, 1, 5, 5)] // with starting methionine
        public static void ReverseDecoyProteolysisProducts(string databaseName, int beginIdx, int reversedBeginIdx, int endIdx, int reversedEndIdx)
        {
            var proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", databaseName), true,
                                                          DecoyType.Reverse, null, false, null, out var unknownModifications);
            var target = proteins[0];

            Assert.AreEqual(1, target.ProteolysisProducts.Count());
            Assert.AreEqual(beginIdx, target.ProteolysisProducts.Single().OneBasedBeginPosition); //P[start]EPTI[end]D, M[start]EPTI[end]D
            Assert.AreEqual(endIdx, target.ProteolysisProducts.Single().OneBasedEndPosition);
            var decoy = proteins[1];

            Assert.AreEqual(1, decoy.ProteolysisProducts.Count());
            Assert.AreEqual(reversedBeginIdx, decoy.ProteolysisProducts.Single().OneBasedBeginPosition); //DI[start]TPEP[end], M[start]DITP[end]E
            Assert.AreEqual(reversedEndIdx, decoy.ProteolysisProducts.Single().OneBasedEndPosition);

            string rewriteDbName = $"{Path.GetFileNameWithoutExtension(databaseName)}rewrite.xml";

            ProteinDbWriter.WriteXmlDatabase(null, proteins.Where(p => !p.IsDecoy).ToList(), Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName));
            proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName), true,
                                                      DecoyType.Reverse, null, false, null, out unknownModifications);
            target = proteins[0];
            Assert.AreEqual(1, target.ProteolysisProducts.Count());
            Assert.AreEqual(beginIdx, target.ProteolysisProducts.Single().OneBasedBeginPosition);
            Assert.AreEqual(endIdx, target.ProteolysisProducts.Single().OneBasedEndPosition);
            decoy = proteins[1];
            Assert.AreEqual(1, decoy.ProteolysisProducts.Count());
            Assert.AreEqual(reversedBeginIdx, decoy.ProteolysisProducts.Single().OneBasedBeginPosition);
            Assert.AreEqual(reversedEndIdx, decoy.ProteolysisProducts.Single().OneBasedEndPosition);
        }
Exemplo n.º 13
0
        public static void LoadIsoforms()
        {
            var protein = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "Isoform.fasta"), true, DecoyType.None, false, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, out var errors);

            Assert.AreEqual("Q13409", protein[0].Accession);
            Assert.AreEqual("Q13409_2", protein[1].Accession);
            Assert.AreEqual("Q13409_3", protein[2].Accession);
            Assert.AreEqual("Q13813", protein[3].Accession);
            Assert.AreEqual("Q13813_2", protein[4].Accession);
            Assert.AreEqual("Q13813_3", protein[5].Accession);
            Assert.AreEqual("Q14103", protein[6].Accession);
            Assert.AreEqual("Q14103_2", protein[7].Accession);
            Assert.AreEqual("Q14103_3", protein[8].Accession);
            Assert.AreEqual("Q14103_4", protein[9].Accession);
            Dictionary <string, HashSet <Tuple <int, Modification> > > mods = new Dictionary <string, HashSet <Tuple <int, Modification> > >();

            ProteinDbWriter.WriteXmlDatabase(mods, protein, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "IsoformTest.xml"));
            var proteinXml = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "IsoformTest.xml"), true, DecoyType.None, null, false, null, out var unknownMod);

            Assert.AreEqual("Q13409", proteinXml[0].Accession);
            Assert.AreEqual("Q13409_2", proteinXml[1].Accession);
            Assert.AreEqual("Q13409_3", proteinXml[2].Accession);
            Assert.AreEqual("Q13813", proteinXml[3].Accession);
            Assert.AreEqual("Q13813_2", proteinXml[4].Accession);
            Assert.AreEqual("Q13813_3", proteinXml[5].Accession);
            Assert.AreEqual("Q14103", proteinXml[6].Accession);
            Assert.AreEqual("Q14103_2", proteinXml[7].Accession);
            Assert.AreEqual("Q14103_3", proteinXml[8].Accession);
            Assert.AreEqual("Q14103_4", proteinXml[9].Accession);
        }
Exemplo n.º 14
0
        public static void TestContaminantAmbiguity()
        {
            //create an ms file and a database for the peptide
            Protein targetProtein = new Protein("PEPTIDE", "target");
            string  xmlName       = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\PEPTIDE.xml");

            ProteinDbWriter.WriteXmlDatabase(null, new List <Protein> {
                targetProtein
            }, xmlName);
            PeptideWithSetModifications pepWithSetMods = targetProtein.Digest(new DigestionParams(), null, null).First();
            TestDataFile msFile   = new TestDataFile(pepWithSetMods);
            string       mzmlName = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\PEPTIDE.mzML");

            IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(msFile, mzmlName, false);
            string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestContaminantAmbiguityOutput");

            //run a full modern search using two databases (the same database) but one is called a target and the other is called a contaminant
            //KEEP BOTH TARGET AND CONTAMINANT
            SearchParameters modernSearchParams = new SearchParameters();

            modernSearchParams.SearchType  = SearchType.Modern;
            modernSearchParams.TCAmbiguity = TargetContaminantAmbiguity.RenameProtein;
            SearchTask modernTask = new SearchTask();

            modernTask.SearchParameters = modernSearchParams;

            EverythingRunnerEngine engine = new EverythingRunnerEngine(new List <(string, MetaMorpheusTask)> {
                ("task1", modernTask)
            }, new List <string> {
Exemplo n.º 15
0
        public void DoNotWriteSameModTwiceButWriteInHeaderSinceDifferent()
        {
            Loaders.LoadElements(Path.Combine(TestContext.CurrentContext.TestDirectory, "elements2.dat"));
            var     sampleModList = PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "z.txt")).ToList();
            Protein protein       = new Protein("MCSSSSSSSSSS", "accession", "organism", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> > {
                { 2, sampleModList.OfType <Modification>().ToList() }
            }, null, "name", "full_name", false, false, new List <DatabaseReference>(), new List <SequenceVariation>(), new List <DisulfideBond>());

            Assert.AreEqual(1, protein.OneBasedPossibleLocalizedModifications[2].OfType <ModificationWithMass>().Count());

            Dictionary <string, HashSet <Tuple <int, Modification> > > dictWithThisMod = new Dictionary <string, HashSet <Tuple <int, Modification> > >();

            HashSet <Tuple <int, Modification> > value = new HashSet <Tuple <int, Modification> >();

            ModificationMotif.TryGetMotif("C", out ModificationMotif motif);
            ModificationWithMass newMod = new ModificationWithMass("Palmitoylation of C", "mt", motif, TerminusLocalization.Any, double.NaN, null, null);

            Assert.AreNotEqual(newMod, sampleModList.First());

            value.Add(new Tuple <int, Modification>(2, newMod));

            dictWithThisMod.Add("accession", value);

            var newModResEntries = ProteinDbWriter.WriteXmlDatabase(dictWithThisMod, new List <Protein> {
                protein
            }, Path.Combine(TestContext.CurrentContext.TestDirectory, "test_modifications_with_proteins2.xml"));

            Assert.AreEqual(0, newModResEntries.Count);
            List <Protein> new_proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "test_modifications_with_proteins2.xml"), true, DecoyType.None, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> um);

            Assert.AreEqual(1, new_proteins.Count);
            Assert.AreEqual(1, new_proteins[0].OneBasedPossibleLocalizedModifications.Count);
            Assert.AreEqual(2, new_proteins[0].OneBasedPossibleLocalizedModifications.SelectMany(kv => kv.Value).Count());
        }
Exemplo n.º 16
0
        [TestCase("splices8.xml", 1, 5, 2, 6)] // range with start with starting methionine
        public static void ReverseDecoySpliceSites(string databaseName, int beginIdx, int reversedBeginIdx, int endIdx, int reversedEndIdx)
        {
            var proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", databaseName), true,
                                                          DecoyType.Reverse, null, false, null, out var unknownModifications);
            var target = proteins[0];

            Assert.AreEqual(1, target.SpliceSites.Count());
            Assert.AreEqual(beginIdx, target.SpliceSites.Single().OneBasedBeginPosition); //PE[start]P[end]TID, ME[start]P[start]TID, PE[site]PTID, ME[site]PTID, P[site]EPTID, M[site]EPTID
            Assert.AreEqual(endIdx, target.SpliceSites.Single().OneBasedEndPosition);
            var decoy = proteins[1];

            Assert.AreEqual(1, decoy.SpliceSites.Count());
            Assert.AreEqual(reversedBeginIdx, decoy.SpliceSites.Single().OneBasedBeginPosition); //DITP[start]E[end]P, MDITP[start]E[end], DITPE[site]P, MDITPE[site], DITPEP[site], M[site]DITPE
            Assert.AreEqual(reversedEndIdx, decoy.SpliceSites.Single().OneBasedEndPosition);

            string rewriteDbName = $"{Path.GetFileNameWithoutExtension(databaseName)}rewrite.xml";

            ProteinDbWriter.WriteXmlDatabase(null, proteins.Where(p => !p.IsDecoy).ToList(), Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName));
            proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName), true,
                                                      DecoyType.Reverse, null, false, null, out unknownModifications);
            target = proteins[0];
            Assert.AreEqual(1, target.SpliceSites.Count());
            Assert.AreEqual(beginIdx, target.SpliceSites.Single().OneBasedBeginPosition);
            Assert.AreEqual(endIdx, target.SpliceSites.Single().OneBasedEndPosition);
            decoy = proteins[1];
            Assert.AreEqual(1, decoy.SpliceSites.Count());
            Assert.AreEqual(reversedBeginIdx, decoy.SpliceSites.Single().OneBasedBeginPosition);
            Assert.AreEqual(reversedEndIdx, decoy.SpliceSites.Single().OneBasedEndPosition);

            List <PeptideWithSetModifications> peptides = proteins.SelectMany(vp => vp.Digest(new DigestionParams(), null, null)).ToList();
        }
Exemplo n.º 17
0
        /// <summary>
        /// Takes coding effect files from star-fusion to produce proteins
        /// </summary>
        /// <param name="codingEffectFilePath"></param>
        /// <returns></returns>
        public static string ParseCodingEffectsToXml(string codingEffectFilePaths, int minPeptideLength = 7, string organism = "H**o sapiens")
        {
            List <Protein>   fusionProteins = new List <Protein>();
            HashSet <string> usedAccessions = new HashSet <string>();
            var paths = codingEffectFilePaths.Split(",");

            foreach (string path in paths)
            {
                using (StreamReader reader = new StreamReader(path))
                {
                    while (true)
                    {
                        string line = reader.ReadLine();
                        if (line == null)
                        {
                            break;
                        }
                        if (line.StartsWith("#"))
                        {
                            break;
                        }                                    // skip header
                        string[] s          = line.Split('\t');
                        var      fusionName = s[0];
                        var      junctionSpanningReadCount = s[1];
                        var      spliceType               = s[3];
                        var      leftGene                 = s[4];
                        var      leftBreakpoint           = s[5];
                        var      rightGene                = s[6];
                        var      rightBreakpoint          = s[7];
                        var      ffpm                     = s[9];
                        var      annots                   = s[14];
                        var      proteinFusionType        = s[19];
                        var      proteinFusionTranslation = s[22];
                        var      proteinSequence          = proteinFusionTranslation.Split('*')[0];
                        if (proteinSequence.Length < minPeptideLength)
                        {
                            continue;
                        }
                        string accession = fusionName;
                        int    i         = 1;
                        while (usedAccessions.Contains(accession))
                        {
                            accession = fusionName + "_" + i++.ToString();
                        }
                        usedAccessions.Add(accession);
                        fusionProteins.Add(new Protein(proteinSequence, accession, organism, new List <Tuple <string, string> > {
                            new Tuple <string, string>("fusion", leftGene + "--" + rightGene)
                        }));
                    }
                }
            }
            string fusionProteinXmlPath = Path.Combine(Path.GetDirectoryName(paths[0]), "FusionProteins.xml");

            ProteinDbWriter.WriteXmlDatabase(null, fusionProteins, fusionProteinXmlPath);
            return(fusionProteinXmlPath);
        }
Exemplo n.º 18
0
        public void Test_accession_regex_weird()
        {
            Regex          bad = new Regex(@"/()/");
            List <Protein> ok  = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, @"test_ensembl.pep.all.fasta"), false, false, bad, bad, bad, bad);

            ProteinDbWriter.WriteFastaDatabase(ok, Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_test_ensembl.pep.all.fasta"), " ");
            List <Protein> ok2 = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_test_ensembl.pep.all.fasta"), false, false, bad, bad, bad, bad);

            Assert.AreEqual("ENSP00000381386 pep:known chromosome:GRCh37:22:24313554:24316773:-1 gene:ENSG00000099977 transcript:ENST00000398344 gene_biotype:protein_coding transcript_biotype:protein_coding", ok[0].Accession);
            Assert.AreEqual("ENSP00000381386 pep:known chromosome:GRCh37:22:24313554:24316773:-1 gene:ENSG00000099977 transcript:ENST00000398344 gene_biotype:protein_coding transcript_biotype:protein_coding", ok2[0].Accession);
            Assert.AreEqual(ok.Count, ok2.Count);
            Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence));
        }
Exemplo n.º 19
0
        public void AnotherTest()
        {
            List <ModificationWithMass> variableModifications = new List <ModificationWithMass>();
            List <ModificationWithMass> fixedModifications    = new List <ModificationWithMass>();

            // Generate data for files
            Protein ParentProtein = new Protein("MPEPTIDEKANTHE", "accession1", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> >(), new int?[0], new int?[0], new string[0], "name1", "fullname1", false, false, new List <DatabaseReference>(), new List <SequenceVariation>());

            Protein proteinWithChain = new Protein("MAACNNNCAA", "accession3", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> >(), new int?[] { 4 }, new int?[] { 8 }, new string[] { "chain" }, "name2", "fullname2", false, false, new List <DatabaseReference>(), new List <SequenceVariation>());

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, ModificationWithMass> > >(), new List <Protein> {
                ParentProtein, proteinWithChain
            }, Path.Combine(TestContext.CurrentContext.TestDirectory, @"fdsfsd.xml"));
        }
Exemplo n.º 20
0
        public void MakeVariantFasta()
        {
            List <Protein> proteins = ProteinDbLoader.LoadProteinXML(@"E:\ProjectsActive\JurkatProteogenomics\180413\combined_1-trimmed-pair1Aligned.sortedByCoord.outProcessed.out.fixedQuals.split.concat.sorted.snpEffAnnotated.protein.xml",
                                                                     true, DecoyType.None, null, false, null, out var un);
            List <ProteinWithAppliedVariants> variantProteins  = proteins.SelectMany(p => p.GetVariantProteins()).ToList();
            List <ProteinWithAppliedVariants> variantProteins2 = variantProteins.Select(p =>
                                                                                        new ProteinWithAppliedVariants(
                                                                                            p.BaseSequence,
                                                                                            new Protein(p.Protein.BaseSequence, p.Protein.Accession, p.Protein.Organism, p.Protein.GeneNames.ToList(), p.Protein.OneBasedPossibleLocalizedModifications, p.Protein.ProteolysisProducts.ToList(), p.Protein.Name + string.Join(",", p.AppliedSequenceVariations.Select(d => d.Description)), p.Protein.FullName + string.Join(",", p.AppliedSequenceVariations.Select(d => d.Description)), p.IsDecoy, p.IsContaminant, p.DatabaseReferences.ToList(), p.SequenceVariations.ToList(), p.DisulfideBonds.ToList(), p.DatabaseFilePath),
                                                                                            p.AppliedSequenceVariations, p.Individual)
                                                                                        ).ToList();

            ProteinDbWriter.WriteFastaDatabase(variantProteins2.OfType <Protein>().ToList(), @"E:\ProjectsActive\Spritz\mmTesting\variantproteins.fasta", "|");
        }
Exemplo n.º 21
0
        [TestCase("oblm3.xml", 3, 5)] // with starting methionine
        public static void LoadSeqVarModifications(string databaseName, int modIdx, int reversedModIdx)
        {
            var proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", databaseName), true,
                                                          DecoyType.Reverse, null, false, null, out var unknownModifications);
            var target = proteins[0];

            Assert.AreEqual(1, target.OneBasedPossibleLocalizedModifications.Count);
            Assert.AreEqual(modIdx, target.OneBasedPossibleLocalizedModifications.Single().Key);
            Assert.AreEqual(1, target.AppliedSequenceVariations.Count());
            Assert.AreEqual(modIdx, target.AppliedSequenceVariations.Single().OneBasedBeginPosition);
            Assert.AreEqual(1, target.SequenceVariations.Count());
            Assert.AreEqual(modIdx, target.SequenceVariations.Single().OneBasedBeginPosition);
            Assert.AreEqual(1, target.SequenceVariations.Single().OneBasedModifications.Count);
            Assert.AreEqual(modIdx, target.SequenceVariations.Single().OneBasedModifications.Single().Key); //PEP[mod]TID, MEP[mod]TID
            var decoy = proteins[1];

            Assert.AreEqual(1, decoy.OneBasedPossibleLocalizedModifications.Count);
            Assert.AreEqual(reversedModIdx, decoy.OneBasedPossibleLocalizedModifications.Single().Key); //DITP[mod]EP, MDITP[mod]E
            Assert.AreEqual(1, decoy.AppliedSequenceVariations.Count());
            Assert.AreEqual(reversedModIdx, decoy.AppliedSequenceVariations.Single().OneBasedBeginPosition);
            Assert.AreEqual(1, decoy.SequenceVariations.Count());
            Assert.AreEqual(reversedModIdx, decoy.SequenceVariations.Single().OneBasedBeginPosition);
            Assert.AreEqual(1, decoy.SequenceVariations.Single().OneBasedModifications.Count);
            Assert.AreEqual(reversedModIdx, decoy.SequenceVariations.Single().OneBasedModifications.Single().Key);

            string rewriteDbName = $"{Path.GetFileNameWithoutExtension(databaseName)}rewrite.xml";

            ProteinDbWriter.WriteXmlDatabase(null, proteins.Where(p => !p.IsDecoy).ToList(), Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName));
            proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName), true,
                                                      DecoyType.Reverse, null, false, null, out unknownModifications);
            target = proteins[0];
            Assert.AreEqual(1, target.OneBasedPossibleLocalizedModifications.Count);
            Assert.AreEqual(modIdx, target.OneBasedPossibleLocalizedModifications.Single().Key);
            Assert.AreEqual(1, target.AppliedSequenceVariations.Count());
            Assert.AreEqual(modIdx, target.AppliedSequenceVariations.Single().OneBasedBeginPosition);
            Assert.AreEqual(1, target.SequenceVariations.Count());
            Assert.AreEqual(modIdx, target.SequenceVariations.Single().OneBasedBeginPosition);
            Assert.AreEqual(1, target.SequenceVariations.Single().OneBasedModifications.Count);
            Assert.AreEqual(modIdx, target.SequenceVariations.Single().OneBasedModifications.Single().Key);
            decoy = proteins[1];
            Assert.AreEqual(1, decoy.OneBasedPossibleLocalizedModifications.Count);
            Assert.AreEqual(reversedModIdx, decoy.OneBasedPossibleLocalizedModifications.Single().Key);
            Assert.AreEqual(1, decoy.AppliedSequenceVariations.Count());
            Assert.AreEqual(reversedModIdx, decoy.AppliedSequenceVariations.Single().OneBasedBeginPosition);
            Assert.AreEqual(1, decoy.SequenceVariations.Count());
            Assert.AreEqual(reversedModIdx, decoy.SequenceVariations.Single().OneBasedBeginPosition);
            Assert.AreEqual(1, decoy.SequenceVariations.Single().OneBasedModifications.Count);
            Assert.AreEqual(reversedModIdx, decoy.SequenceVariations.Single().OneBasedModifications.Single().Key);
        }
Exemplo n.º 22
0
        public void Test_read_write_read_fasta()
        {
            List <Protein> ok = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, @"test_ensembl.pep.all.fasta"), false, false, ProteinDbLoader.ensembl_accession_expression, ProteinDbLoader.ensembl_fullName_expression, ProteinDbLoader.ensembl_accession_expression, ProteinDbLoader.ensembl_gene_expression);

            ProteinDbWriter.WriteFastaDatabase(ok, Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_test_ensembl.pep.all.fasta"), " ");
            List <Protein> ok2 = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_test_ensembl.pep.all.fasta"), false, false, ProteinDbLoader.ensembl_accession_expression, ProteinDbLoader.ensembl_fullName_expression, ProteinDbLoader.ensembl_accession_expression, ProteinDbLoader.ensembl_gene_expression);

            Assert.AreEqual(ok.Count, ok2.Count);
            Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence));

            Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length)));
            Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length)));
            Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length)));
            Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length)));
        }
        public static string TransferModifications(string sourceXmlPath, string destinationXmlPath)
        {
            var    uniprotPtms     = ProteinAnnotation.GetUniProtMods(Environment.CurrentDirectory);
            var    uniprot         = ProteinDbLoader.LoadProteinXML(sourceXmlPath, true, DecoyType.None, uniprotPtms, false, null, out var un);
            string outxml          = Path.Combine(Path.GetDirectoryName(destinationXmlPath), Path.GetFileNameWithoutExtension(destinationXmlPath) + ".withmods.xml");
            var    nonVariantProts = ProteinDbLoader.LoadProteinXML(destinationXmlPath, true, DecoyType.None, uniprotPtms, false, null, out un).Select(p => p.NonVariantProtein).Distinct();
            var    newProts        = ProteinAnnotation.CombineAndAnnotateProteins(uniprot, nonVariantProts.ToList());

            ProteinDbWriter.WriteXmlDatabase(null, newProts, outxml);
            string outfasta = Path.Combine(Path.GetDirectoryName(destinationXmlPath), Path.GetFileNameWithoutExtension(destinationXmlPath) + ".fasta");
            var    prot     = newProts.FirstOrDefault(p => p.Accession.Contains("_"));

            ProteinDbWriter.WriteFastaDatabase(newProts.SelectMany(p => p.GetVariantProteins()).ToList(), outfasta, "|");
            return(outxml);
        }
Exemplo n.º 24
0
        public void TestWritePtmWithNeutralLossAndDiagnosticIons()
        {
            string filename = "test_neutral_loss_diagnostic_ion_mod.xml";
            Dictionary <int, List <Modification> > mods = new Dictionary <int, List <Modification> >();

            ModificationMotif.TryGetMotif("T", out var motif);
            Modification m = new Modification(_originalId: "Phospho", _modificationType: "Test", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 80.0, _neutralLosses: new Dictionary <DissociationType, List <double> > {
                { DissociationType.HCD, new List <double> {
                      80.0, 0
                  } }, { DissociationType.ETD, new List <double> {
                             70.0, 0
                         } }
            }, _diagnosticIons: new Dictionary <DissociationType, List <double> > {
                { DissociationType.CID, new List <double> {
                      60.0, 0
                  } }, { DissociationType.EThcD, new List <double> {
                             40.0, 0
                         } }
            });

            Assert.That(m.ValidModification);

            mods.Add(4, new List <Modification> {
                m
            });

            Protein protein = new Protein("PEPTIDE", "accession", oneBasedModifications: mods);

            Assert.That(protein.OneBasedPossibleLocalizedModifications.Count == 1);
            Assert.That(protein.OneBasedPossibleLocalizedModifications.First().Value.First().NeutralLosses.First().Value.Count == 2);

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), new List <Protein> {
                protein
            }, Path.Combine(TestContext.CurrentContext.TestDirectory, filename));

            // with passed-in mods
            List <Protein> new_proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, filename), true, DecoyType.None, new List <Modification> {
                m
            }, false, new List <string>(), out Dictionary <string, Modification> um);

            Assert.That(new_proteins.First().OneBasedPossibleLocalizedModifications.First().Value.First().NeutralLosses.First().Value.Count == 2);
            Assert.That(new_proteins.First().OneBasedPossibleLocalizedModifications.First().Value.First().DiagnosticIons.First().Value.Count == 2);

            // should be able to read mod from top of database...
            new_proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, filename), true, DecoyType.None, new List <Modification>(), false, new List <string>(), out um);
            Assert.That(new_proteins.First().OneBasedPossibleLocalizedModifications.First().Value.First().NeutralLosses.First().Value.Count == 2);
            Assert.That(new_proteins.First().OneBasedPossibleLocalizedModifications.First().Value.First().DiagnosticIons.First().Value.Count == 2);
        }
Exemplo n.º 25
0
        public void TestReadWriteSpliceVars()
        {
            List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"spliceVar.xml"), true, DecoyType.None,
                                                               null, false, new List <string>(), out Dictionary <string, Modification> un);

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_spliceVar.xml"));
            List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_spliceVar.xml"), true, DecoyType.None,
                                                                null, false, new List <string>(), out un);

            Assert.AreEqual(ok[0].SpliceVariants.Count(), ok2[0].SpliceVariants.Count());
            Assert.AreEqual(ok[0].SpliceVariants.First().OneBasedBeginPosition, ok2[0].SpliceVariants.First().OneBasedBeginPosition);
            Assert.AreEqual(ok[0].SpliceVariants.First().OneBasedEndPosition, ok2[0].SpliceVariants.First().OneBasedEndPosition);
            Assert.AreEqual(ok[0].SpliceVariants.First().Description, ok2[0].SpliceVariants.First().Description);
            Assert.AreEqual(ok[0].SpliceVariants.First().OriginalSequence, ok2[0].SpliceVariants.First().OriginalSequence);
            Assert.AreEqual(ok[0].SpliceVariants.First().VariantSequence, ok2[0].SpliceVariants.First().VariantSequence);
        }
Exemplo n.º 26
0
        public void Test_readUniProtXML_writeProteinXml()
        {
            ModificationMotif.TryGetMotif("X", out ModificationMotif motif);
            var nice = new List <Modification>
            {
                new Modification("fayk", null, "mt", null, motif, "Anywhere.", null, 10, null, null, null, null, null, null)
            };

            var psiModDeserialized = Loaders.LoadPsiMod(Path.Combine(TestContext.CurrentContext.TestDirectory, "PSI-MOD.obo2.xml"));
            Dictionary <string, int> formalChargesDictionary = Loaders.GetFormalChargesDictionary(psiModDeserialized);
            var uniprotPtms = Loaders.LoadUniprot(Path.Combine(TestContext.CurrentContext.TestDirectory, "ptmlist2.txt"), formalChargesDictionary).ToList();

            List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"xml2.xml"), true, DecoyType.None, uniprotPtms.Concat(nice), false, null,
                                                               out Dictionary <string, Modification> un);
            Protein zero = ok[0];
            Protein one  = ok[1];
            Dictionary <int, List <Modification> > zero_mods = zero.OneBasedPossibleLocalizedModifications as Dictionary <int, List <Modification> >;
            Dictionary <int, List <Modification> > one_mods  = one.OneBasedPossibleLocalizedModifications as Dictionary <int, List <Modification> >;



            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_xml2.xml"));
            List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_xml2.xml"), true, DecoyType.None, nice, false,
                                                                new List <string>(), out un);

            Assert.AreEqual(ok.Count, ok2.Count);
            Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence));
            Assert.AreEqual(9, ok[0].DatabaseReferences.Count(dbRef => dbRef.Type == "GO"));
            Assert.AreEqual(1, ok[0].DatabaseReferences.Count(dbRef => dbRef.Type == "GeneID"));
            Assert.AreEqual(3, ok[0].DatabaseReferences.First(dbRef => dbRef.Type == "GO").Properties.Count());
            Assert.AreEqual(3, ok[0].GeneNames.Count());
            Assert.AreEqual("primary", ok[0].GeneNames.First().Item1);
            Assert.AreEqual("JJJ1", ok[0].GeneNames.First().Item2);
            Assert.AreEqual("Saccharomyces cerevisiae (strain ATCC 204508 / S288c)", ok[0].Organism);
            Assert.AreEqual(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"xml2.xml"), ok[0].DatabaseFilePath);
            Assert.AreEqual(9, ok2[0].DatabaseReferences.Count(dbRef => dbRef.Type == "GO"));
            Assert.AreEqual(3, ok2[0].DatabaseReferences.First(dbRef => dbRef.Type == "GO").Properties.Count());
            Assert.AreEqual(3, ok2[0].GeneNames.Count());
            Assert.AreEqual("primary", ok2[0].GeneNames.First().Item1);
            Assert.AreEqual("JJJ1", ok2[0].GeneNames.First().Item2);
            Assert.AreEqual("Saccharomyces cerevisiae (strain ATCC 204508 / S288c)", ok2[0].Organism);
            Assert.AreEqual(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_xml2.xml"), ok2[0].DatabaseFilePath);
            Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length)));
            Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length)));
            Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length)));
            Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length)));
        }
Exemplo n.º 27
0
        public void Test_write_with_custom_mods()
        {
            ModificationMotif.TryGetMotif("S", out ModificationMotif m1);
            ModificationMotif.TryGetMotif("T", out ModificationMotif m2);
            ModificationMotif.TryGetMotif("X", out ModificationMotif motiff);

            var nice = new List <Modification>
            {
                new Modification("fayk", null, "mt", null, motiff, "Anywhere.", null, 10, null, null, null, null, null, null),
                new Modification("Phosphoserine", null, "mt", null, m1, "Anywhere.", null, 80, null, null, null, null, null, null),
                new Modification("Phosphothreonine", null, "mt", null, m2, "Anywhere.", null, 80, null, null, null, null, null, null)
            };

            ModificationMotif.TryGetMotif("K", out ModificationMotif motif);
            Modification m = new Modification("mod", null, "mt", null, motif, "Anywhere.", null, 1, null, null, null, new Dictionary <DissociationType, List <double> >()
            {
                { DissociationType.AnyActivationType, new List <double> {
                      -1
                  } }
            }, null, null);

            Dictionary <string, HashSet <Tuple <int, Modification> > > new_mods = new Dictionary <string, HashSet <Tuple <int, Modification> > >
            {
                { "P53863", new HashSet <Tuple <int, Modification> > {
                      new Tuple <int, Modification>(2, m)
                  } }
            };

            var psiModDeserialized = Loaders.LoadPsiMod(Path.Combine(TestContext.CurrentContext.TestDirectory, "PSI-MOD.obo2.xml"));
            Dictionary <string, int> formalChargesDictionary = Loaders.GetFormalChargesDictionary(psiModDeserialized);
            var uniprotPtms = Loaders.LoadUniprot(Path.Combine(TestContext.CurrentContext.TestDirectory, "ptmlist2.txt"), formalChargesDictionary).ToList();


            List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"xml2.xml"), true, DecoyType.None, uniprotPtms.Concat(nice), false, new List <string>(),
                                                               out Dictionary <string, Modification> un);
            var newModResEntries = ProteinDbWriter.WriteXmlDatabase(new_mods, ok, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_xml2.xml"));

            Assert.AreEqual(1, newModResEntries.Count);
            List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_xml2.xml"), true, DecoyType.None,
                                                                nice, false, new List <string>(), out un);

            Assert.AreEqual(ok.Count, ok2.Count);
            Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence));
            Assert.AreEqual(2, ok[0].OneBasedPossibleLocalizedModifications.Count);
            Assert.AreEqual(3, ok2[0].OneBasedPossibleLocalizedModifications.Count);
        }
Exemplo n.º 28
0
        public void Test_read_write_read_fasta()
        {
            List <Protein> ok = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"test_ensembl.pep.all.fasta"), true, DecoyType.None, false,
                                                                 ProteinDbLoader.EnsemblAccessionRegex, ProteinDbLoader.EnsemblFullNameRegex, ProteinDbLoader.EnsemblAccessionRegex, ProteinDbLoader.EnsemblGeneNameRegex, null, out var a);

            ProteinDbWriter.WriteFastaDatabase(ok, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_test_ensembl.pep.all.fasta"), " ");
            List <Protein> ok2 = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_test_ensembl.pep.all.fasta"), true, DecoyType.None, false,
                                                                  ProteinDbLoader.EnsemblAccessionRegex, ProteinDbLoader.EnsemblFullNameRegex, ProteinDbLoader.EnsemblAccessionRegex, ProteinDbLoader.EnsemblGeneNameRegex, null, out var b);

            Assert.AreEqual(ok.Count, ok2.Count);
            Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence));

            Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length)));
            Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length)));
            Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length)));
            Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length)));
        }
Exemplo n.º 29
0
        public static void TestSilacWhenProteinIsMissing()
        {
            //make heavy residue and add to search task
            Residue heavyLysine = new Residue("a", 'a', "a", Chemistry.ChemicalFormula.ParseFormula("C{13}6H12N{15}2O"), ModificationSites.All); //+8 lysine
            Residue lightLysine = Residue.GetResidue('K');

            SearchTask task = new SearchTask
            {
                SearchParameters = new SearchParameters
                {
                    SilacLabels = new List <SilacLabel> {
                        new SilacLabel(lightLysine.Letter, heavyLysine.Letter, heavyLysine.ThisChemicalFormula.Formula, heavyLysine.MonoisotopicMass - lightLysine.MonoisotopicMass)
                    },
                    NoOneHitWonders = true
                                      //The NoOneHitWonders=true doesn't really seem like a SILAC test, but we're testing that there's no crash if a quantified peptide's proteinGroup isn't quantified
                                      //This happens if somebody messed with parsimony (picked TDS) or from requiring two peptides per protein (and we're only finding one). We're testing the second case here.
                }
            };

            PeptideWithSetModifications lightPeptide = new PeptideWithSetModifications("PEPTIDEK", new Dictionary <string, Modification>());

            List <double> massDifferences = new List <double> {
                heavyLysine.MonoisotopicMass - lightLysine.MonoisotopicMass
            };
            MsDataFile myMsDataFile1 = new TestDataFile(lightPeptide, massDifferences);
            string     mzmlName      = @"silac.mzML";

            IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile1, mzmlName, false);

            string  xmlName    = "SilacDb.xml";
            Protein theProtein = new Protein("PEPTIDEK", "accession1");

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), new List <Protein> {
                theProtein
            }, xmlName);

            string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestSilac");

            Directory.CreateDirectory(outputFolder);
            var theStringResult = task.RunTask(outputFolder, new List <DbForTask> {
                new DbForTask(xmlName, false)
            }, new List <string> {
                mzmlName
            }, "taskId1").ToString();
        }
Exemplo n.º 30
0
        public void AnotherTest()
        {
            List <Modification> variableModifications = new List <Modification>();
            List <Modification> fixedModifications    = new List <Modification>();

            // Generate data for files
            Protein ParentProtein = new Protein("MPEPTIDEKANTHE", "accession1", "organism", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> >(), null,
                                                "name1", "fullname1", false, false, new List <DatabaseReference>(), new List <SequenceVariation>(), new List <DisulfideBond>());

            List <ProteolysisProduct> pp = new List <ProteolysisProduct> {
                new ProteolysisProduct(4, 8, "chain")
            };
            Protein proteinWithChain = new Protein("MAACNNNCAA", "accession3", "organism", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> >(), pp,
                                                   "name2", "fullname2", false, false, new List <DatabaseReference>(), new List <SequenceVariation>(), new List <DisulfideBond>());

            ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), new List <Protein> {
                ParentProtein, proteinWithChain
            }, Path.Combine(TestContext.CurrentContext.TestDirectory, @"fdsfsd.xml"));
        }