public void Test_read_write_read_xml() { var nice = new List <Modification> { new ModificationWithLocation("fayk", null, null, ModificationSites.A, null, null) }; List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"xml2.xml"), false, nice, false, null, out Dictionary <string, Modification> un); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, ModificationWithMass> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml")); List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"), false, nice, false, null, out un); Assert.AreEqual(ok.Count, ok2.Count); Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence)); Assert.AreEqual(9, ok[0].DatabaseReferences.Count(dbRef => dbRef.Type == "GO")); Assert.AreEqual(3, ok[0].DatabaseReferences.First(dbRef => dbRef.Type == "GO").Properties.Count()); Assert.AreEqual(3, ok[0].GeneNames.Count()); Assert.AreEqual("primary", ok[0].GeneNames.First().Item1); Assert.AreEqual("JJJ1", ok[0].GeneNames.First().Item2); Assert.AreEqual(9, ok2[0].DatabaseReferences.Count(dbRef => dbRef.Type == "GO")); Assert.AreEqual(3, ok2[0].DatabaseReferences.First(dbRef => dbRef.Type == "GO").Properties.Count()); Assert.AreEqual(3, ok2[0].GeneNames.Count()); Assert.AreEqual("primary", ok2[0].GeneNames.First().Item1); Assert.AreEqual("JJJ1", ok2[0].GeneNames.First().Item2); Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length))); Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length))); Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length))); Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length))); }
public void test_read_Ensembl_pepAllFasta() { var nice = new List <Modification> { new ModificationWithLocation("fayk", null, null, ModificationSites.A, null, null) }; Dictionary <string, Modification> un; List <Protein> ok = ProteinDbLoader.LoadProteinDb(Path.Combine(TestContext.CurrentContext.TestDirectory, @"test_ensembl.pep.all.fasta"), false, nice, false, out un); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, ModificationWithMass> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_test_ensembl.pep.all.xml")); List <Protein> ok2 = ProteinDbLoader.LoadProteinDb(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_test_ensembl.pep.all.xml"), false, nice, false, out un); Assert.AreEqual(ok.Count, ok2.Count); Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence)); Assert.AreEqual("ENSP00000381386", ok[0].Accession); Assert.AreEqual("ENSP00000215773", ok[1].Accession); Assert.AreEqual("pep:known chromosome:GRCh37:22:24313554:24316773:-1 gene:ENSG00000099977 transcript:ENST00000398344 gene_biotype:protein_coding transcript_biotype:protein_coding", ok[0].FullName); Assert.AreEqual("pep:known chromosome:GRCh37:22:24313554:24322019:-1 gene:ENSG00000099977 transcript:ENST00000350608 gene_biotype:protein_coding transcript_biotype:protein_coding", ok[1].FullName); Assert.True(ok.All(p => p.OneBasedBeginPositions.All(begin => begin == null || begin > 0 && begin <= p.Length))); Assert.True(ok.All(p => p.OneBasedEndPositions.All(end => end == null || end > 0 && end <= p.Length))); Assert.True(ok2.All(p => p.OneBasedBeginPositions.All(begin => begin == null || begin > 0 && begin <= p.Length))); Assert.True(ok2.All(p => p.OneBasedEndPositions.All(end => end == null || end > 0 && end <= p.Length))); }
public void DoNotWriteSameModTwiceButWriteInHeaderSinceDifferent() { Loaders.LoadElements(Path.Combine(TestContext.CurrentContext.TestDirectory, "elements2.dat")); var sampleModList = PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "z.txt")).ToList(); Protein protein = new Protein("MCSSSSSSSSSS", "accession", "organism", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> > { { 2, sampleModList.OfType <Modification>().ToList() } }, null, "name", "full_name", false, false, new List <DatabaseReference>(), new List <SequenceVariation>(), new List <DisulfideBond>()); Assert.AreEqual(1, protein.OneBasedPossibleLocalizedModifications[2].OfType <ModificationWithMass>().Count()); Dictionary <string, HashSet <Tuple <int, Modification> > > dictWithThisMod = new Dictionary <string, HashSet <Tuple <int, Modification> > >(); HashSet <Tuple <int, Modification> > value = new HashSet <Tuple <int, Modification> >(); ModificationMotif.TryGetMotif("C", out ModificationMotif motif); ModificationWithMass newMod = new ModificationWithMass("Palmitoylation of C", "mt", motif, TerminusLocalization.Any, double.NaN, null, null); Assert.AreNotEqual(newMod, sampleModList.First()); value.Add(new Tuple <int, Modification>(2, newMod)); dictWithThisMod.Add("accession", value); var newModResEntries = ProteinDbWriter.WriteXmlDatabase(dictWithThisMod, new List <Protein> { protein }, Path.Combine(TestContext.CurrentContext.TestDirectory, "test_modifications_with_proteins2.xml")); Assert.AreEqual(0, newModResEntries.Count); List <Protein> new_proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "test_modifications_with_proteins2.xml"), true, DecoyType.None, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> um); Assert.AreEqual(1, new_proteins.Count); Assert.AreEqual(1, new_proteins[0].OneBasedPossibleLocalizedModifications.Count); Assert.AreEqual(2, new_proteins[0].OneBasedPossibleLocalizedModifications.SelectMany(kv => kv.Value).Count()); }
public void Test_write_with_custom_mods() { var nice = new List <Modification> { new ModificationWithLocation("fayk", null, null, ModificationSites.A, null, null) }; ModificationMotif.TryGetMotif("K", out ModificationMotif motif); ModificationWithMass m = new ModificationWithMass("mod", new Tuple <string, string>("", ""), motif, ModificationSites.Any, 1, new Dictionary <string, IList <string> >(), new List <double> { -1 }, new List <double>(), ""); Dictionary <string, HashSet <Tuple <int, ModificationWithMass> > > new_mods = new Dictionary <string, HashSet <Tuple <int, ModificationWithMass> > > { { "P53863", new HashSet <Tuple <int, ModificationWithMass> > { new Tuple <int, ModificationWithMass>(2, m) } } }; List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"xml2.xml"), false, nice, false, null, out Dictionary <string, Modification> un); var newModResEntries = ProteinDbWriter.WriteXmlDatabase(new_mods, ok, Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml")); Assert.AreEqual(1, newModResEntries.Count); List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"), false, nice, false, null, out un); Assert.AreEqual(ok.Count, ok2.Count); Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence)); Assert.AreEqual(2, ok[0].OneBasedPossibleLocalizedModifications.Count); Assert.AreEqual(3, ok2[0].OneBasedPossibleLocalizedModifications.Count); }
[TestCase("ranges2.xml", 1, 1, 5, 5)] // with starting methionine public static void ReverseDecoyProteolysisProducts(string databaseName, int beginIdx, int reversedBeginIdx, int endIdx, int reversedEndIdx) { var proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", databaseName), true, DecoyType.Reverse, null, false, null, out var unknownModifications); var target = proteins[0]; Assert.AreEqual(1, target.ProteolysisProducts.Count()); Assert.AreEqual(beginIdx, target.ProteolysisProducts.Single().OneBasedBeginPosition); //P[start]EPTI[end]D, M[start]EPTI[end]D Assert.AreEqual(endIdx, target.ProteolysisProducts.Single().OneBasedEndPosition); var decoy = proteins[1]; Assert.AreEqual(1, decoy.ProteolysisProducts.Count()); Assert.AreEqual(reversedBeginIdx, decoy.ProteolysisProducts.Single().OneBasedBeginPosition); //DI[start]TPEP[end], M[start]DITP[end]E Assert.AreEqual(reversedEndIdx, decoy.ProteolysisProducts.Single().OneBasedEndPosition); string rewriteDbName = $"{Path.GetFileNameWithoutExtension(databaseName)}rewrite.xml"; ProteinDbWriter.WriteXmlDatabase(null, proteins.Where(p => !p.IsDecoy).ToList(), Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName)); proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName), true, DecoyType.Reverse, null, false, null, out unknownModifications); target = proteins[0]; Assert.AreEqual(1, target.ProteolysisProducts.Count()); Assert.AreEqual(beginIdx, target.ProteolysisProducts.Single().OneBasedBeginPosition); Assert.AreEqual(endIdx, target.ProteolysisProducts.Single().OneBasedEndPosition); decoy = proteins[1]; Assert.AreEqual(1, decoy.ProteolysisProducts.Count()); Assert.AreEqual(reversedBeginIdx, decoy.ProteolysisProducts.Single().OneBasedBeginPosition); Assert.AreEqual(reversedEndIdx, decoy.ProteolysisProducts.Single().OneBasedEndPosition); }
public void Modification_read_write_into_proteinDb() { Loaders.LoadElements(Path.Combine(TestContext.CurrentContext.TestDirectory, "elements2.dat")); var sampleModList = PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "z.txt")).ToList(); Assert.AreEqual(1, sampleModList.OfType <ModificationWithMass>().Count()); Protein protein = new Protein("MCSSSSSSSSSS", "accession", "organism", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> > { { 2, sampleModList.OfType <Modification>().ToList() } }, null, "name", "full_name", false, false, new List <DatabaseReference>(), new List <SequenceVariation>(), new List <DisulfideBond>()); Assert.AreEqual(1, protein.OneBasedPossibleLocalizedModifications[2].OfType <ModificationWithMass>().Count()); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), new List <Protein> { protein }, Path.Combine(TestContext.CurrentContext.TestDirectory, "test_modifications_with_proteins.xml")); List <Protein> new_proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "test_modifications_with_proteins.xml"), true, DecoyType.None, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> um); Assert.AreEqual(1, new_proteins.Count); Assert.AreEqual(1, new_proteins[0].OneBasedPossibleLocalizedModifications.Count); Assert.AreEqual(1, new_proteins[0].OneBasedPossibleLocalizedModifications.SelectMany(kv => kv.Value).Count()); Assert.AreEqual("Type", new_proteins[0].OneBasedPossibleLocalizedModifications.SelectMany(kv => kv.Value).OfType <ModificationWithMass>().First().modificationType); Assert.AreEqual("Palmitoylation of C", new_proteins[0].OneBasedPossibleLocalizedModifications[2][0].id); Assert.AreEqual(1, new_proteins[0].OneBasedPossibleLocalizedModifications[2].OfType <ModificationWithMass>().Count()); // Check that Modifications were saved after last load Assert.AreEqual(1, ProteinDbLoader.GetPtmListFromProteinXml(Path.Combine(TestContext.CurrentContext.TestDirectory, @"test_modifications_with_proteins.xml")).Count); Assert.True(ProteinDbLoader.GetPtmListFromProteinXml(Path.Combine(TestContext.CurrentContext.TestDirectory, @"test_modifications_with_proteins.xml"))[0] == new_proteins[0].OneBasedPossibleLocalizedModifications.SelectMany(kv => kv.Value).First()); //But that we can still read modifications from other protein XMLs that exist Assert.AreEqual(0, ProteinDbLoader.GetPtmListFromProteinXml(Path.Combine(TestContext.CurrentContext.TestDirectory, "xml.xml")).Count); }
[TestCase("bonds2.xml", 2, 4, "MDICPC", 4, 6)] // with starting methionine public static void ReverseDecoyDisulfideBonds(string databaseName, int beginIdx, int reversedBeginIdx, string reversedSequence, int endIdx, int reversedEndIdx) { var proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", databaseName), true, DecoyType.Reverse, null, false, null, out var unknownModifications); var target = proteins[0]; Assert.AreEqual(1, target.DisulfideBonds.Count()); Assert.AreEqual(beginIdx, target.DisulfideBonds.Single().OneBasedBeginPosition); //PC[start]PC[end]ID, MC[start]PC[end]ID Assert.AreEqual(endIdx, target.DisulfideBonds.Single().OneBasedEndPosition); var decoy = proteins[1]; Assert.AreEqual(1, decoy.DisulfideBonds.Count()); Assert.AreEqual(reversedSequence, decoy.BaseSequence); Assert.AreEqual(reversedBeginIdx, decoy.DisulfideBonds.Single().OneBasedBeginPosition); //DIC[start]PC[end]P, MDIC[start]PC[end] Assert.AreEqual(reversedEndIdx, decoy.DisulfideBonds.Single().OneBasedEndPosition); string rewriteDbName = $"{Path.GetFileNameWithoutExtension(databaseName)}rewrite.xml"; ProteinDbWriter.WriteXmlDatabase(null, proteins.Where(p => !p.IsDecoy).ToList(), Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName)); proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName), true, DecoyType.Reverse, null, false, null, out unknownModifications); target = proteins[0]; Assert.AreEqual(1, target.DisulfideBonds.Count()); Assert.AreEqual(beginIdx, target.DisulfideBonds.Single().OneBasedBeginPosition); Assert.AreEqual(endIdx, target.DisulfideBonds.Single().OneBasedEndPosition); decoy = proteins[1]; Assert.AreEqual(1, decoy.DisulfideBonds.Count()); Assert.AreEqual(reversedBeginIdx, decoy.DisulfideBonds.Single().OneBasedBeginPosition); Assert.AreEqual(reversedEndIdx, decoy.DisulfideBonds.Single().OneBasedEndPosition); }
public void TestEmptyProteins() { Protein p1 = new Protein("SEQENCE", "p1"); Assert.AreEqual("p1||", p1.FullDescription); Protein p2 = new Protein("SEQENCE", "p2", name: "namep2"); var proteinListToWrite = new List <Protein> { p1, p2 }; // Generate data for files ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), proteinListToWrite, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"differentlyConstuctedProteins.xml")); IEnumerable <string> modTypesToExclude = new List <string>(); IEnumerable <Modification> allKnownModifications = new List <Modification>(); List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"differentlyConstuctedProteins.xml"), true, DecoyType.None, allKnownModifications, false, modTypesToExclude, out Dictionary <string, Modification> un); Assert.AreEqual(p1.Accession, ok[0].Accession); Assert.AreEqual(p2.Accession, ok[1].Accession); Assert.AreEqual(p1.Name, ok[0].Name); Assert.AreEqual(p2.Name, ok[1].Name); }
public static void LoadIsoforms() { var protein = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "Isoform.fasta"), true, DecoyType.None, false, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, out var errors); Assert.AreEqual("Q13409", protein[0].Accession); Assert.AreEqual("Q13409_2", protein[1].Accession); Assert.AreEqual("Q13409_3", protein[2].Accession); Assert.AreEqual("Q13813", protein[3].Accession); Assert.AreEqual("Q13813_2", protein[4].Accession); Assert.AreEqual("Q13813_3", protein[5].Accession); Assert.AreEqual("Q14103", protein[6].Accession); Assert.AreEqual("Q14103_2", protein[7].Accession); Assert.AreEqual("Q14103_3", protein[8].Accession); Assert.AreEqual("Q14103_4", protein[9].Accession); Dictionary <string, HashSet <Tuple <int, Modification> > > mods = new Dictionary <string, HashSet <Tuple <int, Modification> > >(); ProteinDbWriter.WriteXmlDatabase(mods, protein, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "IsoformTest.xml")); var proteinXml = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "IsoformTest.xml"), true, DecoyType.None, null, false, null, out var unknownMod); Assert.AreEqual("Q13409", proteinXml[0].Accession); Assert.AreEqual("Q13409_2", proteinXml[1].Accession); Assert.AreEqual("Q13409_3", proteinXml[2].Accession); Assert.AreEqual("Q13813", proteinXml[3].Accession); Assert.AreEqual("Q13813_2", proteinXml[4].Accession); Assert.AreEqual("Q13813_3", proteinXml[5].Accession); Assert.AreEqual("Q14103", proteinXml[6].Accession); Assert.AreEqual("Q14103_2", proteinXml[7].Accession); Assert.AreEqual("Q14103_3", proteinXml[8].Accession); Assert.AreEqual("Q14103_4", proteinXml[9].Accession); }
public static void TestContaminantAmbiguity() { //create an ms file and a database for the peptide Protein targetProtein = new Protein("PEPTIDE", "target"); string xmlName = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\PEPTIDE.xml"); ProteinDbWriter.WriteXmlDatabase(null, new List <Protein> { targetProtein }, xmlName); PeptideWithSetModifications pepWithSetMods = targetProtein.Digest(new DigestionParams(), null, null).First(); TestDataFile msFile = new TestDataFile(pepWithSetMods); string mzmlName = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\PEPTIDE.mzML"); IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(msFile, mzmlName, false); string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestContaminantAmbiguityOutput"); //run a full modern search using two databases (the same database) but one is called a target and the other is called a contaminant //KEEP BOTH TARGET AND CONTAMINANT SearchParameters modernSearchParams = new SearchParameters(); modernSearchParams.SearchType = SearchType.Modern; modernSearchParams.TCAmbiguity = TargetContaminantAmbiguity.RenameProtein; SearchTask modernTask = new SearchTask(); modernTask.SearchParameters = modernSearchParams; EverythingRunnerEngine engine = new EverythingRunnerEngine(new List <(string, MetaMorpheusTask)> { ("task1", modernTask) }, new List <string> {
[TestCase("splices8.xml", 1, 5, 2, 6)] // range with start with starting methionine public static void ReverseDecoySpliceSites(string databaseName, int beginIdx, int reversedBeginIdx, int endIdx, int reversedEndIdx) { var proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", databaseName), true, DecoyType.Reverse, null, false, null, out var unknownModifications); var target = proteins[0]; Assert.AreEqual(1, target.SpliceSites.Count()); Assert.AreEqual(beginIdx, target.SpliceSites.Single().OneBasedBeginPosition); //PE[start]P[end]TID, ME[start]P[start]TID, PE[site]PTID, ME[site]PTID, P[site]EPTID, M[site]EPTID Assert.AreEqual(endIdx, target.SpliceSites.Single().OneBasedEndPosition); var decoy = proteins[1]; Assert.AreEqual(1, decoy.SpliceSites.Count()); Assert.AreEqual(reversedBeginIdx, decoy.SpliceSites.Single().OneBasedBeginPosition); //DITP[start]E[end]P, MDITP[start]E[end], DITPE[site]P, MDITPE[site], DITPEP[site], M[site]DITPE Assert.AreEqual(reversedEndIdx, decoy.SpliceSites.Single().OneBasedEndPosition); string rewriteDbName = $"{Path.GetFileNameWithoutExtension(databaseName)}rewrite.xml"; ProteinDbWriter.WriteXmlDatabase(null, proteins.Where(p => !p.IsDecoy).ToList(), Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName)); proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName), true, DecoyType.Reverse, null, false, null, out unknownModifications); target = proteins[0]; Assert.AreEqual(1, target.SpliceSites.Count()); Assert.AreEqual(beginIdx, target.SpliceSites.Single().OneBasedBeginPosition); Assert.AreEqual(endIdx, target.SpliceSites.Single().OneBasedEndPosition); decoy = proteins[1]; Assert.AreEqual(1, decoy.SpliceSites.Count()); Assert.AreEqual(reversedBeginIdx, decoy.SpliceSites.Single().OneBasedBeginPosition); Assert.AreEqual(reversedEndIdx, decoy.SpliceSites.Single().OneBasedEndPosition); List <PeptideWithSetModifications> peptides = proteins.SelectMany(vp => vp.Digest(new DigestionParams(), null, null)).ToList(); }
public List <string> TransferModifications(string spritzDirectory, string sourceXmlPath, List <string> destinationXmlPaths, List <Protein> additionalProteins) { var uniprotPtms = ProteinAnnotation.GetUniProtMods(spritzDirectory); List <string> outxmls = new List <string>(); var uniprot = File.Exists(sourceXmlPath) ? ProteinDbLoader.LoadProteinXML(sourceXmlPath, true, DecoyType.None, uniprotPtms, false, null, out Dictionary <string, Modification> un) : new List <Protein>(); foreach (var xml in destinationXmlPaths) { if (xml == null || !File.Exists(xml)) { continue; } string outxml = Path.Combine(Path.GetDirectoryName(xml), Path.GetFileNameWithoutExtension(xml) + ".withmods.xml"); var nonVariantProts = ProteinDbLoader.LoadProteinXML(xml, true, DecoyType.None, uniprotPtms, false, null, out un).Select(p => p.NonVariantProtein).Distinct(); var newProts = ProteinAnnotation.CombineAndAnnotateProteins(uniprot, nonVariantProts.Concat(additionalProteins).ToList()); ProteinDbWriter.WriteXmlDatabase(null, newProts, outxml); string outfasta = Path.Combine(Path.GetDirectoryName(xml), Path.GetFileNameWithoutExtension(xml) + ".spritz.fasta"); ProteinDbWriter.WriteFastaDatabase(newProts.SelectMany(p => p.GetVariantProteins()).ToList(), outfasta, "|"); outxmls.Add(outxml); } return(outxmls); }
public void Test_read_write_read_xml() { ModificationMotif.TryGetMotif("X", out ModificationMotif motif); var nice = new List <Modification> { new ModificationWithLocation("fayk", "mt", motif, TerminusLocalization.Any, null) }; List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"xml2.xml"), true, DecoyType.None, nice, false, null, out Dictionary <string, Modification> un); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml")); List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"), true, DecoyType.None, nice, false, new List <string>(), out un); Assert.AreEqual(ok.Count, ok2.Count); Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence)); Assert.AreEqual(9, ok[0].DatabaseReferences.Count(dbRef => dbRef.Type == "GO")); Assert.AreEqual(3, ok[0].DatabaseReferences.First(dbRef => dbRef.Type == "GO").Properties.Count()); Assert.AreEqual(3, ok[0].GeneNames.Count()); Assert.AreEqual("primary", ok[0].GeneNames.First().Item1); Assert.AreEqual("JJJ1", ok[0].GeneNames.First().Item2); Assert.AreEqual("Saccharomyces cerevisiae (strain ATCC 204508 / S288c)", ok[0].Organism); Assert.AreEqual(Path.Combine(TestContext.CurrentContext.TestDirectory, @"xml2.xml"), ok[0].DatabaseFilePath); Assert.AreEqual(9, ok2[0].DatabaseReferences.Count(dbRef => dbRef.Type == "GO")); Assert.AreEqual(3, ok2[0].DatabaseReferences.First(dbRef => dbRef.Type == "GO").Properties.Count()); Assert.AreEqual(3, ok2[0].GeneNames.Count()); Assert.AreEqual("primary", ok2[0].GeneNames.First().Item1); Assert.AreEqual("JJJ1", ok2[0].GeneNames.First().Item2); Assert.AreEqual("Saccharomyces cerevisiae (strain ATCC 204508 / S288c)", ok2[0].Organism); Assert.AreEqual(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"), ok2[0].DatabaseFilePath); Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length))); Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length))); Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length))); Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length))); }
/// <summary> /// Takes coding effect files from star-fusion to produce proteins /// </summary> /// <param name="codingEffectFilePath"></param> /// <returns></returns> public static string ParseCodingEffectsToXml(string codingEffectFilePaths, int minPeptideLength = 7, string organism = "H**o sapiens") { List <Protein> fusionProteins = new List <Protein>(); HashSet <string> usedAccessions = new HashSet <string>(); var paths = codingEffectFilePaths.Split(","); foreach (string path in paths) { using (StreamReader reader = new StreamReader(path)) { while (true) { string line = reader.ReadLine(); if (line == null) { break; } if (line.StartsWith("#")) { break; } // skip header string[] s = line.Split('\t'); var fusionName = s[0]; var junctionSpanningReadCount = s[1]; var spliceType = s[3]; var leftGene = s[4]; var leftBreakpoint = s[5]; var rightGene = s[6]; var rightBreakpoint = s[7]; var ffpm = s[9]; var annots = s[14]; var proteinFusionType = s[19]; var proteinFusionTranslation = s[22]; var proteinSequence = proteinFusionTranslation.Split('*')[0]; if (proteinSequence.Length < minPeptideLength) { continue; } string accession = fusionName; int i = 1; while (usedAccessions.Contains(accession)) { accession = fusionName + "_" + i++.ToString(); } usedAccessions.Add(accession); fusionProteins.Add(new Protein(proteinSequence, accession, organism, new List <Tuple <string, string> > { new Tuple <string, string>("fusion", leftGene + "--" + rightGene) })); } } } string fusionProteinXmlPath = Path.Combine(Path.GetDirectoryName(paths[0]), "FusionProteins.xml"); ProteinDbWriter.WriteXmlDatabase(null, fusionProteins, fusionProteinXmlPath); return(fusionProteinXmlPath); }
public void AnotherTest() { List <ModificationWithMass> variableModifications = new List <ModificationWithMass>(); List <ModificationWithMass> fixedModifications = new List <ModificationWithMass>(); // Generate data for files Protein ParentProtein = new Protein("MPEPTIDEKANTHE", "accession1", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> >(), new int?[0], new int?[0], new string[0], "name1", "fullname1", false, false, new List <DatabaseReference>(), new List <SequenceVariation>()); Protein proteinWithChain = new Protein("MAACNNNCAA", "accession3", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> >(), new int?[] { 4 }, new int?[] { 8 }, new string[] { "chain" }, "name2", "fullname2", false, false, new List <DatabaseReference>(), new List <SequenceVariation>()); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, ModificationWithMass> > >(), new List <Protein> { ParentProtein, proteinWithChain }, Path.Combine(TestContext.CurrentContext.TestDirectory, @"fdsfsd.xml")); }
[TestCase("oblm3.xml", 3, 5)] // with starting methionine public static void LoadSeqVarModifications(string databaseName, int modIdx, int reversedModIdx) { var proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", databaseName), true, DecoyType.Reverse, null, false, null, out var unknownModifications); var target = proteins[0]; Assert.AreEqual(1, target.OneBasedPossibleLocalizedModifications.Count); Assert.AreEqual(modIdx, target.OneBasedPossibleLocalizedModifications.Single().Key); Assert.AreEqual(1, target.AppliedSequenceVariations.Count()); Assert.AreEqual(modIdx, target.AppliedSequenceVariations.Single().OneBasedBeginPosition); Assert.AreEqual(1, target.SequenceVariations.Count()); Assert.AreEqual(modIdx, target.SequenceVariations.Single().OneBasedBeginPosition); Assert.AreEqual(1, target.SequenceVariations.Single().OneBasedModifications.Count); Assert.AreEqual(modIdx, target.SequenceVariations.Single().OneBasedModifications.Single().Key); //PEP[mod]TID, MEP[mod]TID var decoy = proteins[1]; Assert.AreEqual(1, decoy.OneBasedPossibleLocalizedModifications.Count); Assert.AreEqual(reversedModIdx, decoy.OneBasedPossibleLocalizedModifications.Single().Key); //DITP[mod]EP, MDITP[mod]E Assert.AreEqual(1, decoy.AppliedSequenceVariations.Count()); Assert.AreEqual(reversedModIdx, decoy.AppliedSequenceVariations.Single().OneBasedBeginPosition); Assert.AreEqual(1, decoy.SequenceVariations.Count()); Assert.AreEqual(reversedModIdx, decoy.SequenceVariations.Single().OneBasedBeginPosition); Assert.AreEqual(1, decoy.SequenceVariations.Single().OneBasedModifications.Count); Assert.AreEqual(reversedModIdx, decoy.SequenceVariations.Single().OneBasedModifications.Single().Key); string rewriteDbName = $"{Path.GetFileNameWithoutExtension(databaseName)}rewrite.xml"; ProteinDbWriter.WriteXmlDatabase(null, proteins.Where(p => !p.IsDecoy).ToList(), Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName)); proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", rewriteDbName), true, DecoyType.Reverse, null, false, null, out unknownModifications); target = proteins[0]; Assert.AreEqual(1, target.OneBasedPossibleLocalizedModifications.Count); Assert.AreEqual(modIdx, target.OneBasedPossibleLocalizedModifications.Single().Key); Assert.AreEqual(1, target.AppliedSequenceVariations.Count()); Assert.AreEqual(modIdx, target.AppliedSequenceVariations.Single().OneBasedBeginPosition); Assert.AreEqual(1, target.SequenceVariations.Count()); Assert.AreEqual(modIdx, target.SequenceVariations.Single().OneBasedBeginPosition); Assert.AreEqual(1, target.SequenceVariations.Single().OneBasedModifications.Count); Assert.AreEqual(modIdx, target.SequenceVariations.Single().OneBasedModifications.Single().Key); decoy = proteins[1]; Assert.AreEqual(1, decoy.OneBasedPossibleLocalizedModifications.Count); Assert.AreEqual(reversedModIdx, decoy.OneBasedPossibleLocalizedModifications.Single().Key); Assert.AreEqual(1, decoy.AppliedSequenceVariations.Count()); Assert.AreEqual(reversedModIdx, decoy.AppliedSequenceVariations.Single().OneBasedBeginPosition); Assert.AreEqual(1, decoy.SequenceVariations.Count()); Assert.AreEqual(reversedModIdx, decoy.SequenceVariations.Single().OneBasedBeginPosition); Assert.AreEqual(1, decoy.SequenceVariations.Single().OneBasedModifications.Count); Assert.AreEqual(reversedModIdx, decoy.SequenceVariations.Single().OneBasedModifications.Single().Key); }
public static string TransferModifications(string sourceXmlPath, string destinationXmlPath) { var uniprotPtms = ProteinAnnotation.GetUniProtMods(Environment.CurrentDirectory); var uniprot = ProteinDbLoader.LoadProteinXML(sourceXmlPath, true, DecoyType.None, uniprotPtms, false, null, out var un); string outxml = Path.Combine(Path.GetDirectoryName(destinationXmlPath), Path.GetFileNameWithoutExtension(destinationXmlPath) + ".withmods.xml"); var nonVariantProts = ProteinDbLoader.LoadProteinXML(destinationXmlPath, true, DecoyType.None, uniprotPtms, false, null, out un).Select(p => p.NonVariantProtein).Distinct(); var newProts = ProteinAnnotation.CombineAndAnnotateProteins(uniprot, nonVariantProts.ToList()); ProteinDbWriter.WriteXmlDatabase(null, newProts, outxml); string outfasta = Path.Combine(Path.GetDirectoryName(destinationXmlPath), Path.GetFileNameWithoutExtension(destinationXmlPath) + ".fasta"); var prot = newProts.FirstOrDefault(p => p.Accession.Contains("_")); ProteinDbWriter.WriteFastaDatabase(newProts.SelectMany(p => p.GetVariantProteins()).ToList(), outfasta, "|"); return(outxml); }
public void TestWritePtmWithNeutralLossAndDiagnosticIons() { string filename = "test_neutral_loss_diagnostic_ion_mod.xml"; Dictionary <int, List <Modification> > mods = new Dictionary <int, List <Modification> >(); ModificationMotif.TryGetMotif("T", out var motif); Modification m = new Modification(_originalId: "Phospho", _modificationType: "Test", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 80.0, _neutralLosses: new Dictionary <DissociationType, List <double> > { { DissociationType.HCD, new List <double> { 80.0, 0 } }, { DissociationType.ETD, new List <double> { 70.0, 0 } } }, _diagnosticIons: new Dictionary <DissociationType, List <double> > { { DissociationType.CID, new List <double> { 60.0, 0 } }, { DissociationType.EThcD, new List <double> { 40.0, 0 } } }); Assert.That(m.ValidModification); mods.Add(4, new List <Modification> { m }); Protein protein = new Protein("PEPTIDE", "accession", oneBasedModifications: mods); Assert.That(protein.OneBasedPossibleLocalizedModifications.Count == 1); Assert.That(protein.OneBasedPossibleLocalizedModifications.First().Value.First().NeutralLosses.First().Value.Count == 2); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), new List <Protein> { protein }, Path.Combine(TestContext.CurrentContext.TestDirectory, filename)); // with passed-in mods List <Protein> new_proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, filename), true, DecoyType.None, new List <Modification> { m }, false, new List <string>(), out Dictionary <string, Modification> um); Assert.That(new_proteins.First().OneBasedPossibleLocalizedModifications.First().Value.First().NeutralLosses.First().Value.Count == 2); Assert.That(new_proteins.First().OneBasedPossibleLocalizedModifications.First().Value.First().DiagnosticIons.First().Value.Count == 2); // should be able to read mod from top of database... new_proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, filename), true, DecoyType.None, new List <Modification>(), false, new List <string>(), out um); Assert.That(new_proteins.First().OneBasedPossibleLocalizedModifications.First().Value.First().NeutralLosses.First().Value.Count == 2); Assert.That(new_proteins.First().OneBasedPossibleLocalizedModifications.First().Value.First().DiagnosticIons.First().Value.Count == 2); }
public void TestReadWriteSpliceVars() { List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"spliceVar.xml"), true, DecoyType.None, null, false, new List <string>(), out Dictionary <string, Modification> un); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_spliceVar.xml")); List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_spliceVar.xml"), true, DecoyType.None, null, false, new List <string>(), out un); Assert.AreEqual(ok[0].SpliceVariants.Count(), ok2[0].SpliceVariants.Count()); Assert.AreEqual(ok[0].SpliceVariants.First().OneBasedBeginPosition, ok2[0].SpliceVariants.First().OneBasedBeginPosition); Assert.AreEqual(ok[0].SpliceVariants.First().OneBasedEndPosition, ok2[0].SpliceVariants.First().OneBasedEndPosition); Assert.AreEqual(ok[0].SpliceVariants.First().Description, ok2[0].SpliceVariants.First().Description); Assert.AreEqual(ok[0].SpliceVariants.First().OriginalSequence, ok2[0].SpliceVariants.First().OriginalSequence); Assert.AreEqual(ok[0].SpliceVariants.First().VariantSequence, ok2[0].SpliceVariants.First().VariantSequence); }
public void Test_readUniProtXML_writeProteinXml() { ModificationMotif.TryGetMotif("X", out ModificationMotif motif); var nice = new List <Modification> { new Modification("fayk", null, "mt", null, motif, "Anywhere.", null, 10, null, null, null, null, null, null) }; var psiModDeserialized = Loaders.LoadPsiMod(Path.Combine(TestContext.CurrentContext.TestDirectory, "PSI-MOD.obo2.xml")); Dictionary <string, int> formalChargesDictionary = Loaders.GetFormalChargesDictionary(psiModDeserialized); var uniprotPtms = Loaders.LoadUniprot(Path.Combine(TestContext.CurrentContext.TestDirectory, "ptmlist2.txt"), formalChargesDictionary).ToList(); List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"xml2.xml"), true, DecoyType.None, uniprotPtms.Concat(nice), false, null, out Dictionary <string, Modification> un); Protein zero = ok[0]; Protein one = ok[1]; Dictionary <int, List <Modification> > zero_mods = zero.OneBasedPossibleLocalizedModifications as Dictionary <int, List <Modification> >; Dictionary <int, List <Modification> > one_mods = one.OneBasedPossibleLocalizedModifications as Dictionary <int, List <Modification> >; ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_xml2.xml")); List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_xml2.xml"), true, DecoyType.None, nice, false, new List <string>(), out un); Assert.AreEqual(ok.Count, ok2.Count); Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence)); Assert.AreEqual(9, ok[0].DatabaseReferences.Count(dbRef => dbRef.Type == "GO")); Assert.AreEqual(1, ok[0].DatabaseReferences.Count(dbRef => dbRef.Type == "GeneID")); Assert.AreEqual(3, ok[0].DatabaseReferences.First(dbRef => dbRef.Type == "GO").Properties.Count()); Assert.AreEqual(3, ok[0].GeneNames.Count()); Assert.AreEqual("primary", ok[0].GeneNames.First().Item1); Assert.AreEqual("JJJ1", ok[0].GeneNames.First().Item2); Assert.AreEqual("Saccharomyces cerevisiae (strain ATCC 204508 / S288c)", ok[0].Organism); Assert.AreEqual(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"xml2.xml"), ok[0].DatabaseFilePath); Assert.AreEqual(9, ok2[0].DatabaseReferences.Count(dbRef => dbRef.Type == "GO")); Assert.AreEqual(3, ok2[0].DatabaseReferences.First(dbRef => dbRef.Type == "GO").Properties.Count()); Assert.AreEqual(3, ok2[0].GeneNames.Count()); Assert.AreEqual("primary", ok2[0].GeneNames.First().Item1); Assert.AreEqual("JJJ1", ok2[0].GeneNames.First().Item2); Assert.AreEqual("Saccharomyces cerevisiae (strain ATCC 204508 / S288c)", ok2[0].Organism); Assert.AreEqual(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_xml2.xml"), ok2[0].DatabaseFilePath); Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length))); Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length))); Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length))); Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length))); }
public void Test_write_with_custom_mods() { ModificationMotif.TryGetMotif("S", out ModificationMotif m1); ModificationMotif.TryGetMotif("T", out ModificationMotif m2); ModificationMotif.TryGetMotif("X", out ModificationMotif motiff); var nice = new List <Modification> { new Modification("fayk", null, "mt", null, motiff, "Anywhere.", null, 10, null, null, null, null, null, null), new Modification("Phosphoserine", null, "mt", null, m1, "Anywhere.", null, 80, null, null, null, null, null, null), new Modification("Phosphothreonine", null, "mt", null, m2, "Anywhere.", null, 80, null, null, null, null, null, null) }; ModificationMotif.TryGetMotif("K", out ModificationMotif motif); Modification m = new Modification("mod", null, "mt", null, motif, "Anywhere.", null, 1, null, null, null, new Dictionary <DissociationType, List <double> >() { { DissociationType.AnyActivationType, new List <double> { -1 } } }, null, null); Dictionary <string, HashSet <Tuple <int, Modification> > > new_mods = new Dictionary <string, HashSet <Tuple <int, Modification> > > { { "P53863", new HashSet <Tuple <int, Modification> > { new Tuple <int, Modification>(2, m) } } }; var psiModDeserialized = Loaders.LoadPsiMod(Path.Combine(TestContext.CurrentContext.TestDirectory, "PSI-MOD.obo2.xml")); Dictionary <string, int> formalChargesDictionary = Loaders.GetFormalChargesDictionary(psiModDeserialized); var uniprotPtms = Loaders.LoadUniprot(Path.Combine(TestContext.CurrentContext.TestDirectory, "ptmlist2.txt"), formalChargesDictionary).ToList(); List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"xml2.xml"), true, DecoyType.None, uniprotPtms.Concat(nice), false, new List <string>(), out Dictionary <string, Modification> un); var newModResEntries = ProteinDbWriter.WriteXmlDatabase(new_mods, ok, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_xml2.xml")); Assert.AreEqual(1, newModResEntries.Count); List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_xml2.xml"), true, DecoyType.None, nice, false, new List <string>(), out un); Assert.AreEqual(ok.Count, ok2.Count); Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence)); Assert.AreEqual(2, ok[0].OneBasedPossibleLocalizedModifications.Count); Assert.AreEqual(3, ok2[0].OneBasedPossibleLocalizedModifications.Count); }
public static void TestSilacWhenProteinIsMissing() { //make heavy residue and add to search task Residue heavyLysine = new Residue("a", 'a', "a", Chemistry.ChemicalFormula.ParseFormula("C{13}6H12N{15}2O"), ModificationSites.All); //+8 lysine Residue lightLysine = Residue.GetResidue('K'); SearchTask task = new SearchTask { SearchParameters = new SearchParameters { SilacLabels = new List <SilacLabel> { new SilacLabel(lightLysine.Letter, heavyLysine.Letter, heavyLysine.ThisChemicalFormula.Formula, heavyLysine.MonoisotopicMass - lightLysine.MonoisotopicMass) }, NoOneHitWonders = true //The NoOneHitWonders=true doesn't really seem like a SILAC test, but we're testing that there's no crash if a quantified peptide's proteinGroup isn't quantified //This happens if somebody messed with parsimony (picked TDS) or from requiring two peptides per protein (and we're only finding one). We're testing the second case here. } }; PeptideWithSetModifications lightPeptide = new PeptideWithSetModifications("PEPTIDEK", new Dictionary <string, Modification>()); List <double> massDifferences = new List <double> { heavyLysine.MonoisotopicMass - lightLysine.MonoisotopicMass }; MsDataFile myMsDataFile1 = new TestDataFile(lightPeptide, massDifferences); string mzmlName = @"silac.mzML"; IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile1, mzmlName, false); string xmlName = "SilacDb.xml"; Protein theProtein = new Protein("PEPTIDEK", "accession1"); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), new List <Protein> { theProtein }, xmlName); string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestSilac"); Directory.CreateDirectory(outputFolder); var theStringResult = task.RunTask(outputFolder, new List <DbForTask> { new DbForTask(xmlName, false) }, new List <string> { mzmlName }, "taskId1").ToString(); }
public static void TestStringSanitation() { string messedUpSequence = @"PRO�EIN�"; // just test the string sanitation method alone var sanitized = ProteinDbLoader.SanitizeAminoAcidSequence(messedUpSequence, 'X'); Assert.That(sanitized == "PROXEINX"); // test reading from a fasta Protein protein = new Protein(messedUpSequence, "accession"); string fastaPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"messedUp.fasta"); ProteinDbWriter.WriteFastaDatabase(new List <Protein> { protein }, fastaPath, "|"); var fastaProteins = ProteinDbLoader.LoadProteinFasta(fastaPath, true, DecoyType.Reverse, false, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, out var a); Assert.That(fastaProteins.First(p => !p.IsDecoy).BaseSequence == "PROXEINX"); // digest and fragment to check that there isn't a crash var peptides = fastaProteins.First().Digest(new DigestionParams(), new List <Modification>(), new List <Modification>()).ToList(); foreach (PeptideWithSetModifications peptide in peptides) { List <Product> fragments = new List <Product>(); peptide.Fragment(DissociationType.HCD, FragmentationTerminus.Both, fragments); } // test reading from an XML string xmlPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"messedUp.xml"); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), new List <Protein> { protein }, xmlPath); var xmlProteins = ProteinDbLoader.LoadProteinXML(xmlPath, true, DecoyType.Reverse, new List <Modification>(), false, new List <string>(), out var unk); Assert.That(xmlProteins.First(p => !p.IsDecoy).BaseSequence == "PROXEINX"); }
public void AnotherTest() { List <Modification> variableModifications = new List <Modification>(); List <Modification> fixedModifications = new List <Modification>(); // Generate data for files Protein ParentProtein = new Protein("MPEPTIDEKANTHE", "accession1", "organism", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> >(), null, "name1", "fullname1", false, false, new List <DatabaseReference>(), new List <SequenceVariation>(), new List <DisulfideBond>()); List <ProteolysisProduct> pp = new List <ProteolysisProduct> { new ProteolysisProduct(4, 8, "chain") }; Protein proteinWithChain = new Protein("MAACNNNCAA", "accession3", "organism", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> >(), pp, "name2", "fullname2", false, false, new List <DatabaseReference>(), new List <SequenceVariation>(), new List <DisulfideBond>()); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), new List <Protein> { ParentProtein, proteinWithChain }, Path.Combine(TestContext.CurrentContext.TestDirectory, @"fdsfsd.xml")); }
public void TestReadWriteSeqVars2() { var nice = new List <Modification> { new ModificationWithLocation("fayk", null, null, ModificationSites.A, null, null) }; List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"seqvartests.xml"), false, nice, false, null, out Dictionary <string, Modification> un); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, ModificationWithMass> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_seqvartests.xml")); List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_seqvartests.xml"), false, nice, false, null, out un); Assert.AreEqual(ok[0].SequenceVariations.Count(), ok2[0].SequenceVariations.Count()); Assert.AreEqual(ok[0].SequenceVariations.First().OneBasedBeginPosition, ok2[0].SequenceVariations.First().OneBasedBeginPosition); Assert.AreEqual(ok[0].SequenceVariations.First().OneBasedEndPosition, ok2[0].SequenceVariations.First().OneBasedEndPosition); Assert.AreEqual(ok[0].SequenceVariations.First().Description, ok2[0].SequenceVariations.First().Description); Assert.AreEqual(ok[0].SequenceVariations.First().OriginalSequence, ok2[0].SequenceVariations.First().OriginalSequence); Assert.AreEqual(ok[0].SequenceVariations.First().VariantSequence, ok2[0].SequenceVariations.First().VariantSequence); }
static void Main(string[] args) { List <string> files = args.Where(f => File.Exists(f) & (Path.GetExtension(f) == ".xml" || Path.GetExtension(f) == ".xml.gz")).ToList(); if (files.Count < 2) { Console.WriteLine("Please enter at least two protein .xml or .xml.gz databases."); return; } // check that file path is valid string timestamp = DateTime.Now.Year.ToString("0000") + "-" + DateTime.Now.Month.ToString("00") + "-" + DateTime.Now.Day.ToString("00") + "-" + DateTime.Now.Hour.ToString("00") + "-" + DateTime.Now.Minute.ToString("00") + "-" + DateTime.Now.Second.ToString("00"); string outpath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "merged_database_" + timestamp + ".xml"); // merge databases Loaders.LoadElements(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "elements.dat")); List <Protein> merged = ProteinDbLoader.merge_proteins(files.SelectMany(f => ProteinDbLoader.LoadProteinXML(f, false, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> un))).ToList(); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), merged, outpath); }
public void TestReadWriteSeqVars2() { ModificationMotif.TryGetMotif("X", out ModificationMotif motif); var nice = new List <Modification> { new ModificationWithLocation("fayk", "mt", motif, TerminusLocalization.Any, null) }; List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"seqvartests.xml"), true, DecoyType.None, nice, false, new List <string>(), out Dictionary <string, Modification> un); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_seqvartests.xml")); List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_seqvartests.xml"), true, DecoyType.None, nice, false, new List <string>(), out un); Assert.AreEqual(ok[0].SequenceVariations.Count(), ok2[0].SequenceVariations.Count()); Assert.AreEqual(ok[0].SequenceVariations.First().OneBasedBeginPosition, ok2[0].SequenceVariations.First().OneBasedBeginPosition); Assert.AreEqual(ok[0].SequenceVariations.First().OneBasedEndPosition, ok2[0].SequenceVariations.First().OneBasedEndPosition); Assert.AreEqual(ok[0].SequenceVariations.First().Description, ok2[0].SequenceVariations.First().Description); Assert.AreEqual(ok[0].SequenceVariations.First().OriginalSequence, ok2[0].SequenceVariations.First().OriginalSequence); Assert.AreEqual(ok[0].SequenceVariations.First().VariantSequence, ok2[0].SequenceVariations.First().VariantSequence); }
public void test_read_write_read_xml() { var nice = new List <Modification> { new ModificationWithLocation("fayk", null, null, ModificationSites.A, null, null) }; Dictionary <string, Modification> un; List <Protein> ok = ProteinDbLoader.LoadProteinDb(Path.Combine(TestContext.CurrentContext.TestDirectory, @"xml2.xml"), false, nice, false, out un); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, ModificationWithMass> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml")); List <Protein> ok2 = ProteinDbLoader.LoadProteinDb(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"), false, nice, false, out un); Assert.AreEqual(ok.Count, ok2.Count); Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence)); Assert.True(ok.All(p => p.OneBasedBeginPositions.All(begin => begin == null || begin > 0 && begin <= p.Length))); Assert.True(ok.All(p => p.OneBasedEndPositions.All(end => end == null || end > 0 && end <= p.Length))); Assert.True(ok2.All(p => p.OneBasedBeginPositions.All(begin => begin == null || begin > 0 && begin <= p.Length))); Assert.True(ok2.All(p => p.OneBasedEndPositions.All(end => end == null || end > 0 && end <= p.Length))); }
private static void BenchmarkDatabaseLoadWrite() { Console.WriteLine("Starting benchmark BenchmarkDatabaseLoadWrite"); Stopwatch stopWatch = new Stopwatch(); Loaders.LoadElements("elements2.dat"); IEnumerable <Modification> ya = PtmListLoader.ReadModsFromFile(@"ptmlist.txt").ToList(); stopWatch.Restart(); var a = ProteinDbLoader.LoadProteinXML(@"yeast_160126.xml.gz", true, ya, false, null, out Dictionary <string, Modification> um); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <System.Tuple <int, ModificationWithMass> > >(), a.Where(p => !p.IsDecoy).ToList(), "rewrite_yeast.xml"); var b = ProteinDbLoader.LoadProteinXML(@"rewrite_yeast.xml", true, ya, false, null, out um); stopWatch.Stop(); Console.WriteLine("Time for getting formulas: " + stopWatch.Elapsed); Console.WriteLine("Benchmark BenchmarkDatabaseLoadWrite finished"); }
public static void Test_CustumPrunedDatabaseWriteAndRead() { ModificationMotif.TryGetMotif("K", out ModificationMotif K); ModificationMotif.TryGetMotif("R", out ModificationMotif R); Modification acOnK = new Modification(_originalId: "Acetyl", _accession: null, _modificationType: "testModType", _featureType: null, _locationRestriction: "Anywhere.", _target: K, _monoisotopicMass: 42); Modification meOnK = new Modification(_originalId: "Methyl", _accession: null, _modificationType: "testModType", _featureType: null, _locationRestriction: "Anywhere.", _target: K, _monoisotopicMass: 14); Modification meOnR = new Modification(_originalId: "Methyl", _accession: null, _modificationType: "testModType", _featureType: null, _locationRestriction: "Anywhere.", _target: R, _monoisotopicMass: 14); Dictionary <int, List <Modification> > obm = new Dictionary <int, List <Modification> >(); obm.Add(1, new List <Modification>() { acOnK }); obm.Add(2, new List <Modification>() { meOnK }); obm.Add(3, new List <Modification>() { meOnR }); Protein p = new Protein("KKR", "accession", null, null, obm, null, null, null, false, false, null, null, null, null); List <Protein> pList = new List <Protein>() { p }; string outputFileName = Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"redundant.xml"); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), pList, outputFileName); List <Protein> new_proteins = ProteinDbLoader.LoadProteinXML(outputFileName, true, DecoyType.None, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> proteinXmlModList); Assert.AreEqual(3, new_proteins[0].OneBasedPossibleLocalizedModifications.Count()); }