public static void Test_CustumPrunedDatabaseWriteAndRead() { ModificationMotif.TryGetMotif("K", out ModificationMotif K); ModificationMotif.TryGetMotif("R", out ModificationMotif R); Modification acOnK = new Modification(_originalId: "Acetyl", _accession: null, _modificationType: "testModType", _featureType: null, _locationRestriction: "Anywhere.", _target: K, _monoisotopicMass: 42); Modification meOnK = new Modification(_originalId: "Methyl", _accession: null, _modificationType: "testModType", _featureType: null, _locationRestriction: "Anywhere.", _target: K, _monoisotopicMass: 14); Modification meOnR = new Modification(_originalId: "Methyl", _accession: null, _modificationType: "testModType", _featureType: null, _locationRestriction: "Anywhere.", _target: R, _monoisotopicMass: 14); Dictionary <int, List <Modification> > obm = new Dictionary <int, List <Modification> > { { 1, new List <Modification>() { acOnK } }, { 2, new List <Modification>() { meOnK } }, { 3, new List <Modification>() { meOnR } } }; Protein p = new Protein("KKR", "accession", null, null, obm, null, null, null, false, false, null, null, null, null); List <Protein> pList = new List <Protein>() { p }; string outputFileName = Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"redundant.xml"); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), pList, outputFileName); List <Protein> new_proteins = ProteinDbLoader.LoadProteinXML(outputFileName, true, DecoyType.None, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> proteinXmlModList); Assert.AreEqual(3, new_proteins[0].OneBasedPossibleLocalizedModifications.Count()); }
public void TestWritePtmWithDiagnosticIons() { string filename = "test_diagnostic_ion_mod.xml"; Dictionary <int, List <Modification> > mods = new Dictionary <int, List <Modification> >(); ModificationMotif.TryGetMotif("T", out var motif); Modification m = new Modification(_originalId: "Phospho", _modificationType: "Test", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 80.0, _diagnosticIons: new Dictionary <DissociationType, List <double> > { { DissociationType.HCD, new List <double> { 80.0, 0 } }, { DissociationType.ETD, new List <double> { 70.0, 0 } } }); Assert.That(m.ValidModification); mods.Add(4, new List <Modification> { m }); Protein protein = new Protein("PEPTIDE", "accession", oneBasedModifications: mods); Assert.That(protein.OneBasedPossibleLocalizedModifications.Count == 1); Assert.That(protein.OneBasedPossibleLocalizedModifications.First().Value.First().DiagnosticIons.First().Value.Count == 2); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), new List <Protein> { protein }, Path.Combine(TestContext.CurrentContext.TestDirectory, filename)); // with passed-in mods List <Protein> new_proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, filename), true, DecoyType.None, new List <Modification> { m }, false, new List <string>(), out Dictionary <string, Modification> um); Assert.That(new_proteins.First().OneBasedPossibleLocalizedModifications.First().Value.First().DiagnosticIons.First().Value.Count == 2); // should be able to read mod from top of database... new_proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, filename), true, DecoyType.None, new List <Modification>(), false, new List <string>(), out um); Assert.That(new_proteins.First().OneBasedPossibleLocalizedModifications.First().Value.First().DiagnosticIons.First().Value.Count == 2); }
public void TestReadWriteSeqVars2() { ModificationMotif.TryGetMotif("X", out ModificationMotif motif); var nice = new List <Modification> { new Modification("fayk", null, "mt", null, motif, "Anywhere.", null, null, null, null, null, null, null, null) }; List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"seqvartests.xml"), true, DecoyType.None, nice, false, new List <string>(), out Dictionary <string, Modification> un); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_seqvartests.xml")); List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_seqvartests.xml"), true, DecoyType.None, nice, false, new List <string>(), out un); Assert.AreEqual(ok[0].SequenceVariations.Count(), ok2[0].SequenceVariations.Count()); Assert.AreEqual(ok[0].SequenceVariations.First().OneBasedBeginPosition, ok2[0].SequenceVariations.First().OneBasedBeginPosition); Assert.AreEqual(ok[0].SequenceVariations.First().OneBasedEndPosition, ok2[0].SequenceVariations.First().OneBasedEndPosition); Assert.AreEqual(ok[0].SequenceVariations.First().Description, ok2[0].SequenceVariations.First().Description); Assert.AreEqual(ok[0].SequenceVariations.First().OriginalSequence, ok2[0].SequenceVariations.First().OriginalSequence); Assert.AreEqual(ok[0].SequenceVariations.First().VariantSequence, ok2[0].SequenceVariations.First().VariantSequence); }
public static string TransferModifications(string sourceXmlPath, string destinationXmlPath) { var uniprotPtms = ProteinAnnotation.GetUniProtMods(Environment.CurrentDirectory); var uniprot = ProteinDbLoader.LoadProteinXML(sourceXmlPath, true, DecoyType.None, uniprotPtms, false, null, out var un); string outxml = Path.Combine(Path.GetDirectoryName(destinationXmlPath), Path.GetFileNameWithoutExtension(destinationXmlPath) + ".withmods.xml"); var nonVariantProts = destinationXmlPath.EndsWith(".xml") | destinationXmlPath.EndsWith(".xml.gz") ? ProteinDbLoader.LoadProteinXML(destinationXmlPath, true, DecoyType.None, uniprotPtms, false, null, out un).Select(p => p.NonVariantProtein).Distinct() : ProteinDbLoader.LoadProteinFasta(destinationXmlPath, true, DecoyType.None, false, ProteinDbLoader.UniprotAccessionRegex, PgmNameRegex, PgmNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, out var un2).Select(p => p.NonVariantProtein).Distinct(); var newProts = ProteinAnnotation.CombineAndAnnotateProteins(uniprot, nonVariantProts.ToList()); ProteinDbWriter.WriteXmlDatabase(null, newProts, outxml); string outfasta = Path.Combine(Path.GetDirectoryName(destinationXmlPath), Path.GetFileNameWithoutExtension(destinationXmlPath) + ".fasta"); string outfastaWithDecoys = Path.Combine(Path.GetDirectoryName(destinationXmlPath), Path.GetFileNameWithoutExtension(destinationXmlPath) + ".withdecoys.fasta"); var prot = newProts.FirstOrDefault(p => p.Accession.Contains("_")); var protsForFasta = newProts.SelectMany(p => p.GetVariantProteins()).Where(p => !p.BaseSequence.EndsWith('?')).ToList(); var decoyProtsForFasta = ProteinDbLoader.LoadProteinXML(destinationXmlPath, true, DecoyType.Reverse, uniprotPtms, false, null, out un).Where(p => !p.BaseSequence.EndsWith('?')).ToList(); ProteinDbWriter.WriteFastaDatabase(protsForFasta, outfasta, "|"); ProteinDbWriter.WriteFastaDatabase(decoyProtsForFasta, outfastaWithDecoys, "|"); File.WriteAllLines(outfastaWithDecoys, File.ReadAllLines(outfastaWithDecoys).Select(line => line.Replace("mz|DECOY_", "rev_mz|"))); return(outxml); }
public void DoNotWriteSameModTwiceAndDoNotWriteInHeaderSinceDifferent() { Loaders.LoadElements(); var sampleModList = PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "z.txt"), out var errors).ToList(); Protein protein = new Protein("MCSSSSSSSSSS", "accession", "organism", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> > { { 2, sampleModList.OfType <Modification>().ToList() } }, null, "name", "full_name", false, false, new List <DatabaseReference>(), new List <SequenceVariation>(), disulfideBonds: new List <DisulfideBond>()); Assert.AreEqual(1, protein.OneBasedPossibleLocalizedModifications[2].OfType <Modification>().Count()); Dictionary <string, HashSet <Tuple <int, Modification> > > dictWithThisMod = new Dictionary <string, HashSet <Tuple <int, Modification> > >(); HashSet <Tuple <int, Modification> > value = new HashSet <Tuple <int, Modification> >(); var modReadFromFile = sampleModList.First() as Modification; ModificationMotif.TryGetMotif("C", out ModificationMotif motif); Modification newMod = new Modification(_originalId: "Palmitoylation of C", _modificationType: "Type", _target: motif, _locationRestriction: "Anywhere.", _chemicalFormula: modReadFromFile.ChemicalFormula, _monoisotopicMass: modReadFromFile.MonoisotopicMass, _featureType: "MOD_RES", _fileOrigin: "E:\\GitClones\\mzLib\\Test\\bin\\x64\\Debug\\DatabaseTests\\z.txt"); Assert.IsTrue(newMod.Equals(sampleModList.First())); Assert.AreEqual(newMod, sampleModList.First()); Assert.AreEqual(sampleModList.First(), newMod); value.Add(new Tuple <int, Modification>(2, newMod)); dictWithThisMod.Add("accession", value); var newModResEntries = ProteinDbWriter.WriteXmlDatabase(dictWithThisMod, new List <Protein> { protein }, Path.Combine(TestContext.CurrentContext.TestDirectory, "test_modifications_with_proteins3.xml")); Assert.AreEqual(0, newModResEntries.Count); List <Protein> new_proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "test_modifications_with_proteins3.xml"), true, DecoyType.None, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> um); Assert.AreEqual(1, new_proteins.Count); Assert.AreEqual(1, new_proteins[0].OneBasedPossibleLocalizedModifications.Count); Assert.AreEqual(1, new_proteins[0].OneBasedPossibleLocalizedModifications.SelectMany(kv => kv.Value).Count()); }
public void VariantSymbolWeirdness2Xml() { string file = Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "SeqVarSymbolWeirdness2.xml"); List <Protein> variantProteins = ProteinDbLoader.LoadProteinXML(file, true, DecoyType.None, null, false, null, out var un); Assert.AreEqual(1, variantProteins.First().NonVariantProtein.SequenceVariations.Count()); Assert.AreEqual(2, variantProteins.Count); // there is only one unique amino acid change Assert.AreEqual(1, variantProteins.Where(v => v.BaseSequence == variantProteins.First().NonVariantProtein.BaseSequence).Count()); var variantProteinRef = variantProteins.First(); var variantProteinAlt = variantProteins.Last(); Assert.AreEqual('R', variantProteins.First().NonVariantProtein.BaseSequence[2386]); Assert.AreEqual('R', variantProteinRef.BaseSequence[2386]); Assert.AreEqual('H', variantProteinAlt.BaseSequence[2386]); Assert.AreEqual(variantProteins.First().NonVariantProtein.Name, variantProteinRef.Name); Assert.AreNotEqual(variantProteins.First().NonVariantProtein.Name, variantProteinAlt.Name); Assert.AreEqual(variantProteins.First().NonVariantProtein.FullName, variantProteinRef.FullName); Assert.AreNotEqual(variantProteins.First().NonVariantProtein.FullName, variantProteinAlt.FullName); Assert.AreEqual(variantProteins.First().NonVariantProtein.Accession, variantProteinRef.Accession); Assert.AreNotEqual(variantProteins.First().NonVariantProtein.Accession, variantProteinAlt.Accession); List <PeptideWithSetModifications> peptides = variantProteins.SelectMany(vp => vp.Digest(new DigestionParams(), null, null)).ToList(); }
private static List <Protein> LoadProteinDb(string fileName) { List <string> dbErrors = new List <string>(); List <Protein> proteinList = new List <Protein>(); string theExtension = Path.GetExtension(fileName).ToLowerInvariant(); bool compressed = theExtension.EndsWith("gz"); // allows for .bgz and .tgz, too which are used on occasion theExtension = compressed ? Path.GetExtension(Path.GetFileNameWithoutExtension(fileName)).ToLowerInvariant() : theExtension; if (theExtension.Equals(".fasta") || theExtension.Equals(".fa")) { proteinList = ProteinDbLoader.LoadProteinFasta(fileName, true, DecoyType.None, false, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, out dbErrors); } else { proteinList = ProteinDbLoader.LoadProteinXML(fileName, true, DecoyType.None, null, false, null, out var um); } return(proteinList.Where(p => p.BaseSequence.Length > 0).ToList()); }
public void XmlGzTest() { var nice = new List <Modification> { new ModificationWithLocation("fayk", null, null, ModificationSites.A, null, null) }; var ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"xml.xml.gz"), true, nice, false, null, out Dictionary <string, Modification> un); Assert.AreEqual('M', ok[0][0]); Assert.AreEqual('M', ok[1][0]); Assert.AreEqual("P62805|H4_HUMAN|Histone H4", ok[0].FullDescription); Assert.AreEqual("DECOY_P62805|H4_HUMAN|Histone H4", ok[1].FullDescription); Assert.AreEqual("ENST00000244537", ok[0].DatabaseReferences.First(dbRef => dbRef.Type == "Ensembl").Id); Assert.AreEqual("protein sequence ID", ok[0].DatabaseReferences.First(dbRef => dbRef.Type == "Ensembl").Properties.First().Item1); Assert.AreEqual("ENSP00000244537", ok[0].DatabaseReferences.First(dbRef => dbRef.Type == "Ensembl").Properties.First().Item2); Assert.AreEqual(42, ok[0].GeneNames.Count()); Assert.AreEqual(14, ok[0].GeneNames.Where(t => t.Item1 == "primary").Count()); Assert.AreEqual("HIST1H4A", ok[0].GeneNames.Where(t => t.Item1 == "primary").First().Item2); Assert.AreEqual(23, ok[0].DatabaseReferences.Count(dbRef => dbRef.Type == "Ensembl")); }
public static void TestDifferentHeaderStyles() { // uniprot database string fastaFile = Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "uniprot_aifm1.fasta"); var proteins = ProteinDbLoader.LoadProteinFasta(fastaFile, true, DecoyType.Reverse, false, out var errors); Assert.That(proteins.Count == 2); var targetProtein = proteins.First(p => !p.IsDecoy); Assert.That(targetProtein.Accession == "Q9Z0X1"); Assert.That(targetProtein.GeneNames.Count() == 1); Assert.That(targetProtein.GeneNames.First().Item2 == "Aifm1"); Assert.That(targetProtein.FullName == "Apoptosis-inducing factor 1, mitochondrial"); Assert.That(targetProtein.Name == "AIFM1_MOUSE"); Assert.That(targetProtein.Organism == "Mus musculus"); // gencode database fastaFile = Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "gencode_mmp20.fa"); proteins = ProteinDbLoader.LoadProteinFasta(fastaFile, true, DecoyType.Reverse, false, out errors); Assert.That(proteins.Count == 2); targetProtein = proteins.First(p => !p.IsDecoy); Assert.That(targetProtein.Accession == "ENSMUSP00000034487.2"); Assert.That(targetProtein.GeneNames.Count() == 1); Assert.That(targetProtein.GeneNames.First().Item2 == "Mmp20"); Assert.That(targetProtein.FullName == "Mmp20-201"); // ensembl database fastaFile = Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "ensembl_prrc2a.fa"); proteins = ProteinDbLoader.LoadProteinFasta(fastaFile, true, DecoyType.Reverse, false, out errors); Assert.That(proteins.Count == 2); targetProtein = proteins.First(p => !p.IsDecoy); Assert.That(targetProtein.Accession == "ENSP00000372947.2"); Assert.That(targetProtein.GeneNames.Count() == 1); Assert.That(targetProtein.GeneNames.First().Item2 == "ENSG00000206427.11"); }
public void Test_read_Ensembl_pepAllFasta() { ModificationMotif.TryGetMotif("X", out ModificationMotif motif); var nice = new List <Modification> { new Modification("fayk", null, "mt", null, motif, "Anywhere.", null, null, null, null, null, null, null, null) }; List <Protein> ok = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"test_ensembl.pep.all.fasta"), true, DecoyType.None, false, ProteinDbLoader.EnsemblAccessionRegex, ProteinDbLoader.EnsemblFullNameRegex, ProteinDbLoader.EnsemblAccessionRegex, ProteinDbLoader.EnsemblGeneNameRegex, null, out var a); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_test_ensembl.pep.all.xml")); List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_test_ensembl.pep.all.xml"), true, DecoyType.None, nice, false, null, out Dictionary <string, Modification> un); Assert.AreEqual(ok.Count, ok2.Count); Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence)); Assert.AreEqual("ENSP00000381386", ok[0].Accession); Assert.AreEqual("ENSP00000215773", ok[1].Accession); Assert.AreEqual("ENSG00000099977", ok[0].GeneNames.First().Item2); Assert.AreEqual("ENSG00000099977", ok[1].GeneNames.First().Item2); Assert.AreEqual("pep:known chromosome:GRCh37:22:24313554:24316773:-1 gene:ENSG00000099977 transcript:ENST00000398344 gene_biotype:protein_coding transcript_biotype:protein_coding", ok[0].FullName); Assert.AreEqual("pep:known chromosome:GRCh37:22:24313554:24322019:-1 gene:ENSG00000099977 transcript:ENST00000350608 gene_biotype:protein_coding transcript_biotype:protein_coding", ok[1].FullName); Assert.AreEqual(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"test_ensembl.pep.all.fasta"), ok[0].DatabaseFilePath); Assert.AreEqual("ENSP00000381386", ok2[0].Accession); Assert.AreEqual("ENSP00000215773", ok2[1].Accession); Assert.AreEqual("ENSG00000099977", ok2[0].GeneNames.First().Item2); Assert.AreEqual("ENSG00000099977", ok2[1].GeneNames.First().Item2); Assert.AreEqual("pep:known chromosome:GRCh37:22:24313554:24316773:-1 gene:ENSG00000099977 transcript:ENST00000398344 gene_biotype:protein_coding transcript_biotype:protein_coding", ok2[0].FullName); Assert.AreEqual("pep:known chromosome:GRCh37:22:24313554:24322019:-1 gene:ENSG00000099977 transcript:ENST00000350608 gene_biotype:protein_coding transcript_biotype:protein_coding", ok2[1].FullName); Assert.AreEqual(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_test_ensembl.pep.all.xml"), ok2[0].DatabaseFilePath); Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length))); Assert.True(ok.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length))); Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedBeginPosition == null || prod.OneBasedBeginPosition > 0 && prod.OneBasedBeginPosition <= p.Length))); Assert.True(ok2.All(p => p.ProteolysisProducts.All(prod => prod.OneBasedEndPosition == null || prod.OneBasedEndPosition > 0 && prod.OneBasedEndPosition <= p.Length))); }
public void TestEmptyProteins() { Protein p1 = new Protein("SEQENCE", "p1"); Assert.AreEqual("p1||", p1.FullDescription); Protein p2 = new Protein("SEQENCE", "p2", name: "namep2"); var proteinListToWrite = new List <Protein> { p1, p2 }; // Generate data for files ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), proteinListToWrite, Path.Combine(TestContext.CurrentContext.TestDirectory, @"differentlyConstuctedProteins.xml")); IEnumerable <string> modTypesToExclude = new List <string>(); IEnumerable <Modification> allKnownModifications = new List <Modification>(); List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"differentlyConstuctedProteins.xml"), true, DecoyType.None, allKnownModifications, false, modTypesToExclude, out Dictionary <string, Modification> un); Assert.AreEqual(p1.Accession, ok[0].Accession); Assert.AreEqual(p2.Accession, ok[1].Accession); Assert.AreEqual(p1.Name, ok[0].Name); Assert.AreEqual(p2.Name, ok[1].Name); }
public static void TestSlideDecoyXML() { //sequence, disulfides var ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"disulfidetests.xml"), true, DecoyType.Slide, UniProtPtms, false, new string[] { "exclude_me" }, out Dictionary <string, Modification> un); Assert.AreEqual("MALLVHFLPLLALLALWEPKPTQAFVKQHLCGPHLVEALYLVCGERGFFYTPKSRREVEDPQVEQLELGGSPGDLQTLALEVARQKRGIVDQCCTSICSLYQLENYCN", ok2[0].BaseSequence); Assert.AreEqual("MTKAEVLQLLAGLHLVHALYAVLGVRFFPYLPLSARWVPDPQQEFLKLHGCPPDLQELLLLVCREKGGFVTQKCRSECELPQVEQYENGCSNGLLYTSAIETACQDRI", ok2[1].BaseSequence); Assert.AreEqual(ok2[0].DisulfideBonds.Count(), ok2[1].DisulfideBonds.Count()); Assert.AreEqual(ok2[0].ProteolysisProducts.Count(), ok2[1].ProteolysisProducts.Count()); for (int i = 0; i < ok2[0].ProteolysisProducts.Count(); i++) { Assert.AreEqual(ok2[0].ProteolysisProducts.ToArray()[i].OneBasedBeginPosition, ok2[1].ProteolysisProducts.ToArray()[i].OneBasedBeginPosition); Assert.AreEqual(ok2[0].ProteolysisProducts.ToArray()[i].OneBasedEndPosition, ok2[1].ProteolysisProducts.ToArray()[i].OneBasedEndPosition); } foreach (DisulfideBond bond in ok2[0].DisulfideBonds) { Assert.AreEqual(ok2[0].BaseSequence[bond.OneBasedBeginPosition - 1], 'C'); Assert.AreEqual(ok2[0].BaseSequence[bond.OneBasedEndPosition - 1], 'C'); } foreach (DisulfideBond bond in ok2[1].DisulfideBonds) { Assert.AreEqual(ok2[1].BaseSequence[bond.OneBasedBeginPosition - 1], 'C'); Assert.AreEqual(ok2[1].BaseSequence[bond.OneBasedEndPosition - 1], 'C'); } //sequence variants, modifications ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"O43653.xml"), true, DecoyType.Slide, UniProtPtms, false, new string[] { "exclude_me" }, out un); Assert.AreEqual(ok2[1].OneBasedPossibleLocalizedModifications.First().Key, 13); var decoyVariants = ok2[1].SequenceVariations.ToList(); Assert.AreEqual(decoyVariants[0].VariantSequence, "MLAAKLVMLL"); //variant should shuffle but keep initiator methionine Assert.AreEqual(decoyVariants[0].OneBasedBeginPosition, 1); //shouldn't have changed Assert.AreEqual(decoyVariants[1].OneBasedBeginPosition, 10); //30-20 }
[TestCase("exclude_me", false)]//the first part is the test case, the latter part is ther result of the assertion //[TestCase("exclude_me_not", true)] public static void Read_xml_exclude_mods(string excludeString, bool isExcluded) { ModificationMotif.TryGetMotif("X", out ModificationMotif motif); var nice = new List <Modification> { new Modification("N-acetylserine", null, "exclude_me", null, motif, "Anywhere.", null, 10, null, null, null, null, null, null), new Modification("N-acetylserine", null, "exclude_me_not", null, motif, "Anywhere.", null, 10, null, null, null, null, null, null) }; Assert.That(nice[0].ValidModification); var ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"xml.xml"), true, DecoyType.Reverse, nice, false, new[] { excludeString }, out Dictionary <string, Modification> un); List <string> modTypes = new List <string>(); foreach (KeyValuePair <int, List <Modification> > entry in ok2[0].OneBasedPossibleLocalizedModifications) { modTypes.AddRange(entry.Value.Select(m => m.ModificationType).ToList().Distinct()); } Assert.AreEqual(isExcluded, modTypes.Contains("exclude_me")); Assert.AreEqual(!isExcluded, modTypes.Contains("exclude_me_not")); }
public void XmlTest_2entry() { var nice = new List <Modification> { new ModificationWithLocation("fayk", null, null, ModificationSites.A, null, null) }; var ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"xml2.xml"), true, nice, false, null, out Dictionary <string, Modification> un); Assert.True(ok.All(p => p.ProteolysisProducts.All(d => d.OneBasedBeginPosition == null || d.OneBasedBeginPosition > 0))); Assert.True(ok.All(p => p.ProteolysisProducts.All(d => d.OneBasedEndPosition == null || d.OneBasedEndPosition <= p.Length))); Assert.False(ok.All(p => p.BaseSequence.Contains(" "))); Assert.False(ok.All(p => p.BaseSequence.Contains("\t"))); Assert.False(ok.All(p => p.BaseSequence.Contains("\n"))); //GoTerm checks List <Protein> targets = ok.Where(p => !p.IsDecoy).ToList(); Assert.AreEqual(2, targets.Count); Assert.AreEqual(1, targets[0].DatabaseReferences.Count(dbRef => dbRef.Type == "EnsemblFungi")); Assert.AreEqual(1, targets[1].DatabaseReferences.Count(dbRef => dbRef.Type == "EnsemblFungi")); }
public void Test_write_with_custom_mods() { ModificationMotif.TryGetMotif("S", out ModificationMotif m1); ModificationMotif.TryGetMotif("T", out ModificationMotif m2); ModificationMotif.TryGetMotif("X", out ModificationMotif motiff); var nice = new List <Modification> { new ModificationWithLocation("fayk", "mt", motiff, TerminusLocalization.Any, null), new ModificationWithLocation("Phosphoserine", "mt", m1, TerminusLocalization.Any, null), new ModificationWithLocation("Phosphothreonine", "mt", m2, TerminusLocalization.Any, null) }; ModificationMotif.TryGetMotif("K", out ModificationMotif motif); ModificationWithMass m = new ModificationWithMass("mod", "mt", motif, TerminusLocalization.Any, 1, neutralLosses: new List <double> { -1 }); Dictionary <string, HashSet <Tuple <int, Modification> > > new_mods = new Dictionary <string, HashSet <Tuple <int, Modification> > > { { "P53863", new HashSet <Tuple <int, Modification> > { new Tuple <int, Modification>(2, m) } } }; List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"xml2.xml"), true, DecoyType.None, nice, false, new List <string>(), out Dictionary <string, Modification> un); var newModResEntries = ProteinDbWriter.WriteXmlDatabase(new_mods, ok, Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml")); Assert.AreEqual(1, newModResEntries.Count); List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"rewrite_xml2.xml"), true, DecoyType.None, nice, false, new List <string>(), out un); Assert.AreEqual(ok.Count, ok2.Count); Assert.True(Enumerable.Range(0, ok.Count).All(i => ok[i].BaseSequence == ok2[i].BaseSequence)); Assert.AreEqual(2, ok[0].OneBasedPossibleLocalizedModifications.Count); Assert.AreEqual(3, ok2[0].OneBasedPossibleLocalizedModifications.Count); }
public static void Test_MetaMorpheusStyleProteinDatabaseWriteAndREad() { string proteinDbFilePath = Path.Combine(TestContext.CurrentContext.TestDirectory, "TestProteinSplitAcrossFiles.xml"); ModificationMotif.TryGetMotif("D", out ModificationMotif motif); Modification mod = new Modification(_originalId: "mod1", _modificationType: "mt", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 10); IDictionary <int, List <Modification> > oneBasedModification = new Dictionary <int, List <Modification> > { { 3, new List <Modification> { mod } } }; Protein prot1 = new Protein("MEDEEK", "prot1", oneBasedModifications: oneBasedModification); List <Protein> proteinList = new List <Protein> { prot1 }; ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), proteinList, proteinDbFilePath); var lines = File.ReadAllLines(proteinDbFilePath); List <Protein> newProteinList = ProteinDbLoader.LoadProteinXML(proteinDbFilePath, true, DecoyType.Reverse, new List <Modification>(), false, new List <string>(), out var um, -1); }
public void TestReadWriteSpliceSites() { List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"splices1.xml"), true, DecoyType.None, null, false, new List <string>(), out Dictionary <string, Modification> un); Assert.IsNull(ok[0].SpliceSites.First().Description.Novel); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_spliceSite.xml")); List <Protein> ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_spliceSite.xml"), true, DecoyType.None, null, false, new List <string>(), out un); Assert.AreEqual(ok[0].SpliceSites.Count(), ok2[0].SpliceSites.Count()); Assert.AreEqual(ok[0].SpliceSites.First().OneBasedBeginPosition, ok2[0].SpliceSites.First().OneBasedBeginPosition); Assert.AreEqual(ok[0].SpliceSites.First().OneBasedEndPosition, ok2[0].SpliceSites.First().OneBasedEndPosition); Assert.AreEqual(ok[0].SpliceSites.First().Description, ok2[0].SpliceSites.First().Description); Assert.IsNull(ok2[0].SpliceSites.First().Description.Novel); ok[0].SpliceSites.First().Description.Novel = true; ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), ok, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_spliceSite.xml")); ok2 = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"rewrite_spliceSite.xml"), true, DecoyType.None, null, false, new List <string>(), out un); Assert.IsTrue(ok2[0].SpliceSites.First().Description.Novel); }
public void DoNotWriteSameModTwiceAndDoNotWriteInHeaderSinceDifferent() { Loaders.LoadElements(Path.Combine(TestContext.CurrentContext.TestDirectory, "elements2.dat")); var sampleModList = PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "z.txt")).ToList(); Protein protein = new Protein("MCSSSSSSSSSS", "accession", "organism", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> > { { 2, sampleModList.OfType <Modification>().ToList() } }, null, "name", "full_name", false, false, new List <DatabaseReference>(), new List <SequenceVariation>(), new List <DisulfideBond>()); Assert.AreEqual(1, protein.OneBasedPossibleLocalizedModifications[2].OfType <ModificationWithMass>().Count()); Dictionary <string, HashSet <Tuple <int, Modification> > > dictWithThisMod = new Dictionary <string, HashSet <Tuple <int, Modification> > >(); HashSet <Tuple <int, Modification> > value = new HashSet <Tuple <int, Modification> >(); var modReadFromFile = sampleModList.First() as ModificationWithMassAndCf; ModificationMotif.TryGetMotif("C", out ModificationMotif motif); ModificationWithMass newMod = new ModificationWithMassAndCf("Palmitoylation of C", modReadFromFile.modificationType, motif, TerminusLocalization.Any, modReadFromFile.chemicalFormula, modReadFromFile.monoisotopicMass, null, null, null); Assert.AreEqual(newMod, sampleModList.First()); Assert.AreEqual(sampleModList.First(), newMod); value.Add(new Tuple <int, Modification>(2, newMod)); dictWithThisMod.Add("accession", value); var newModResEntries = ProteinDbWriter.WriteXmlDatabase(dictWithThisMod, new List <Protein> { protein }, Path.Combine(TestContext.CurrentContext.TestDirectory, "test_modifications_with_proteins3.xml")); Assert.AreEqual(0, newModResEntries.Count); List <Protein> new_proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "test_modifications_with_proteins3.xml"), true, DecoyType.None, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> um); Assert.AreEqual(1, new_proteins.Count); Assert.AreEqual(1, new_proteins[0].OneBasedPossibleLocalizedModifications.Count); Assert.AreEqual(1, new_proteins[0].OneBasedPossibleLocalizedModifications.SelectMany(kv => kv.Value).Count()); }
public static void MultipleAlternateAlleles() { var proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "MultipleAlternateAlleles.xml"), true, DecoyType.None, null, false, null, out var unknownModifications); Assert.AreEqual(2, proteins.Count); Assert.AreEqual(2, proteins[0].SequenceVariations.Count()); // some redundant Assert.AreEqual(2, proteins[0].SequenceVariations.Select(v => v.SimpleString()).Distinct().Count()); // unique changes Assert.IsTrue(proteins[0].SequenceVariations.All(v => v.OneBasedBeginPosition == 63)); // there are two alternate alleles (1 and 2), but only 2 is in the genotype, so only that's applied Assert.AreEqual(1, proteins[1].AppliedSequenceVariations.Count()); // some redundant Assert.AreEqual(1, proteins[1].AppliedSequenceVariations.Select(v => v.SimpleString()).Distinct().Count()); // unique changes Assert.AreEqual(72, proteins[0].Length); Assert.AreEqual(72, proteins[1].Length); Assert.AreEqual('K', proteins[0][63 - 1]); Assert.AreEqual('R', proteins[1][63 - 1]); proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "MultipleAlternateAlleles.xml"), true, DecoyType.None, null, false, null, out unknownModifications, minAlleleDepth: 10); Assert.AreEqual(1, proteins.Count); Assert.AreEqual(0, proteins[0].AppliedSequenceVariations.Count()); // some redundant Assert.AreEqual(0, proteins[0].AppliedSequenceVariations.Select(v => v.SimpleString()).Distinct().Count()); // unique changes Assert.AreEqual('K', proteins[0][63 - 1]); // reference only }
public static void StopGained() { var proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "StopGained.xml"), true, DecoyType.None, null, false, null, out var unknownModifications); Assert.AreEqual(2, proteins.Count); Assert.AreEqual(1, proteins[0].SequenceVariations.Count()); // some redundant Assert.AreEqual(1, proteins[0].SequenceVariations.Select(v => v.SimpleString()).Distinct().Count()); // unique changes Assert.AreEqual(0, proteins[0].AppliedSequenceVariations.Count()); // some redundant Assert.AreEqual(0, proteins[0].AppliedSequenceVariations.Select(v => v.SimpleString()).Distinct().Count()); // unique changes Assert.AreEqual(1, proteins[1].AppliedSequenceVariations.Count()); // some redundant Assert.AreEqual(1, proteins[1].AppliedSequenceVariations.Select(v => v.SimpleString()).Distinct().Count()); // unique changes Assert.AreEqual(191, proteins[0].Length); Assert.AreEqual('Q', proteins[0][161 - 1]); Assert.AreEqual(161 - 1, proteins[1].Length); Assert.AreNotEqual(proteins[0].Length, proteins[1].Length); proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "StopGained.xml"), true, DecoyType.None, null, false, null, out unknownModifications, minAlleleDepth: 400); Assert.AreEqual(1, proteins.Count); Assert.AreEqual(1, proteins[0].AppliedSequenceVariations.Count()); // some redundant Assert.AreEqual(1, proteins[0].AppliedSequenceVariations.Select(v => v.SimpleString()).Distinct().Count()); // unique changes Assert.AreEqual(161 - 1, proteins[0].Length); }
public static void DatabaseSummary(string sourceXmlPath, string destinationXmlPath) { var culture = CultureInfo.CurrentCulture; var uniprotPtms = ProteinAnnotation.GetUniProtMods(Environment.CurrentDirectory); var uniprot = ProteinDbLoader.LoadProteinXML(sourceXmlPath, true, DecoyType.None, uniprotPtms, false, null, out var un); var spritz = ProteinDbLoader.LoadProteinXML(destinationXmlPath, true, DecoyType.None, uniprotPtms, false, null, out un); var spritzCanonical = spritz.Select(p => p.NonVariantProtein).Distinct().ToList(); int numberOfCanonicalProteinEntries = spritzCanonical.Count; int numberOfVariantProteinEntries = spritz.Count - spritzCanonical.Count; int synonymousCount = 0; int totalVariants = 0; int missenseSnvCount = 0; int missenseMnvCount = 0; int insertionCount = 0; int deletionCount = 0; int frameshiftCount = 0; int stopGainCount = 0; int stopLossCount = 0; Dictionary <string, List <SequenceVariation> > allVariants = new Dictionary <string, List <SequenceVariation> >(); foreach (var spritzEntry in spritz) { if (spritzEntry.AppliedSequenceVariations.Count != 0) { if (allVariants.ContainsKey(spritzEntry.NonVariantProtein.Accession)) { foreach (var variant in spritzEntry.AppliedSequenceVariations) { if (!allVariants[spritzEntry.NonVariantProtein.Accession].Contains(variant)) { allVariants[spritzEntry.NonVariantProtein.Accession].Add(variant); } } } else { allVariants.Add(spritzEntry.NonVariantProtein.Accession, spritzEntry.AppliedSequenceVariations); } } } foreach (var entry in allVariants) { foreach (var variant in entry.Value) { if (culture.CompareInfo.IndexOf(variant.Description.Description, "synonymous_variant", CompareOptions.IgnoreCase) >= 0) { synonymousCount++; totalVariants++; } else if (culture.CompareInfo.IndexOf(variant.Description.Description, "missense_variant", CompareOptions.IgnoreCase) >= 0 && variant.Description.ReferenceAlleleString.Length == 1 && variant.Description.AlternateAlleleString.Length == 1) { missenseSnvCount++; totalVariants++; } else if (culture.CompareInfo.IndexOf(variant.Description.Description, "missense_variant", CompareOptions.IgnoreCase) >= 0) { missenseMnvCount++; totalVariants++; } else if (culture.CompareInfo.IndexOf(variant.Description.Description, "frameshift_variant", CompareOptions.IgnoreCase) >= 0) { frameshiftCount++; totalVariants++; } else if (culture.CompareInfo.IndexOf(variant.Description.Description, "stop_gained", CompareOptions.IgnoreCase) >= 0) { stopGainCount++; totalVariants++; } else if (culture.CompareInfo.IndexOf(variant.Description.Description, "conservative_inframe_insertion", CompareOptions.IgnoreCase) >= 0 || culture.CompareInfo.IndexOf(variant.Description.Description, "disruptive_inframe_insertion", CompareOptions.IgnoreCase) >= 0) { insertionCount++; totalVariants++; } else if (culture.CompareInfo.IndexOf(variant.Description.Description, "conservative_inframe_deletion", CompareOptions.IgnoreCase) >= 0 || culture.CompareInfo.IndexOf(variant.Description.Description, "disruptive_inframe_deletion", CompareOptions.IgnoreCase) >= 0) { deletionCount++; totalVariants++; } else if (culture.CompareInfo.IndexOf(variant.Description.Description, "stop_loss", CompareOptions.IgnoreCase) >= 0) { stopLossCount++; totalVariants++; } } } Console.WriteLine($"Spritz Database Summary"); Console.WriteLine($"--------------------------------------------------------------"); Console.WriteLine($"{numberOfCanonicalProteinEntries}\tTotal number of canonical protein entries (before applying variations)"); Console.WriteLine($"{spritz.Count}\tTotal number of protein entries"); Console.WriteLine($"{numberOfVariantProteinEntries}\tTotal number of variant containing protein entries"); Console.WriteLine($"{totalVariants}\tTotal number of unique variants"); Console.WriteLine($"{synonymousCount}\tTotal number of unique synonymous variants"); Console.WriteLine($"{(totalVariants - synonymousCount)}\tTotal number of unique nonsynonymous variants"); Console.WriteLine($"{missenseSnvCount}\tNumber of unique SNV missense variants"); Console.WriteLine($"{missenseMnvCount}\tNumber of unique MNV missense variants"); Console.WriteLine($"{frameshiftCount}\tNumber of unique frameshift variants"); Console.WriteLine($"{insertionCount}\tNumber of unique insertion variants"); Console.WriteLine($"{deletionCount}\tNumber of unique deletion variants"); Console.WriteLine($"{stopGainCount}\tNumber of unique stop gain variants"); Console.WriteLine($"{stopLossCount}\tNumber of unique stop loss variants"); }
public static void TestPrunedDatabase() { //Create Search Task SearchTask task1 = new SearchTask { SearchParameters = new SearchParameters { WritePrunedDatabase = true, SearchTarget = true, MassDiffAcceptorType = MassDiffAcceptorType.Exact, ModsToWriteSelection = new Dictionary <string, int> { { "ConnorModType", 1 } } }, CommonParameters = new CommonParameters(digestionParams: new DigestionParams(minPeptideLength: 5)) }; //add task to task list List <(string, MetaMorpheusTask)> taskList = new List <(string, MetaMorpheusTask)> { ("task1", task1) }; ModificationMotif.TryGetMotif("P", out ModificationMotif motif); var connorMod = new Modification(_originalId: "ConnorMod on P", _modificationType: "ConnorModType", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 10); GlobalVariables.AddMods(new List <Modification> { connorMod }, false); //create modification lists List <Modification> variableModifications = GlobalVariables.AllModsKnown.OfType <Modification>() .Where(b => task1.CommonParameters.ListOfModsVariable.Contains((b.ModificationType, b.IdWithMotif))).ToList(); //add modification to Protein object var dictHere = new Dictionary <int, List <Modification> >(); Modification modToAdd = connorMod; Modification modToAdd2 = connorMod; dictHere.Add(1, new List <Modification> { modToAdd }); dictHere.Add(3, new List <Modification> { modToAdd2 }); //protein Creation (One with mod and one without) Protein TestProteinWithMod = new Protein("PEPTID", "accession1", "organism", new List <Tuple <string, string> >(), dictHere); //First Write XML Database string xmlName = "okkk.xml"; //Add Mod to list and write XML input database Dictionary <string, HashSet <Tuple <int, Modification> > > modList = new Dictionary <string, HashSet <Tuple <int, Modification> > >(); var Hash = new HashSet <Tuple <int, Modification> > { new Tuple <int, Modification>(3, modToAdd) }; modList.Add("test", Hash); ProteinDbWriter.WriteXmlDatabase(modList, new List <Protein> { TestProteinWithMod }, xmlName); //now write MZML file var protein = ProteinDbLoader.LoadProteinXML(xmlName, true, DecoyType.Reverse, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> ok); //Dictionary 'ok' contains unknown modifications. There are no unknown modifications in this test. Assert.AreEqual(0, ok.Count); //One protein is read from the .xml database and one decoy is created. Therefore, the list of proteins contains 2 entries. Assert.AreEqual(2, protein.Count); //The original database had two localized mods on the protein. Therefore. both protein and decoy should have two mods. Assert.AreEqual(2, protein[0].OneBasedPossibleLocalizedModifications.Count); List <int> foundResidueIndicies = protein[0].OneBasedPossibleLocalizedModifications.Select(k => k.Key).ToList(); List <int> expectedResidueIndices = new List <int>() { 1, 3 }; Assert.That(foundResidueIndicies, Is.EquivalentTo(expectedResidueIndices)); Assert.AreEqual(2, protein[1].OneBasedPossibleLocalizedModifications.Count); foundResidueIndicies = protein[1].OneBasedPossibleLocalizedModifications.Select(k => k.Key).ToList(); expectedResidueIndices = new List <int>() { 4, 6 }; //originally modified residues are now at the end in the decoy Assert.That(foundResidueIndicies, Is.EquivalentTo(expectedResidueIndices)); var thisOk = ok; //for debugging var commonParamsAtThisPoint = task1.CommonParameters.DigestionParams; //for debugging var digestedList = protein[0].Digest(task1.CommonParameters.DigestionParams, new List <Modification> { }, variableModifications).ToList(); Assert.AreEqual(4, digestedList.Count); //Set Peptide with 1 mod at position 3 PeptideWithSetModifications pepWithSetMods1 = digestedList[1]; //Finally Write MZML file Assert.AreEqual("PEP[ConnorModType:ConnorMod on P]TID", pepWithSetMods1.FullSequence);//this might be base sequence MsDataFile myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications> { pepWithSetMods1 }); string mzmlName = @"hello.mzML"; IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, mzmlName, false); //run! string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestPrunedDatabase"); var engine = new EverythingRunnerEngine(taskList, new List <string> { mzmlName }, new List <DbForTask> { new DbForTask(xmlName, false) }, outputFolder); engine.Run(); string final = Path.Combine(MySetUpClass.outputFolder, "task1", "okkkpruned.xml"); var proteins = ProteinDbLoader.LoadProteinXML(final, true, DecoyType.Reverse, new List <Modification>(), false, new List <string>(), out ok); //check length Assert.AreEqual(1, proteins[0].OneBasedPossibleLocalizedModifications.Count); //check location (key) Assert.AreEqual(true, proteins[0].OneBasedPossibleLocalizedModifications.ContainsKey(3)); List <Modification> listOfMods = proteins[0].OneBasedPossibleLocalizedModifications[3]; //check Type, count, ID Assert.AreEqual(listOfMods[0].ModificationType, "ConnorModType"); Assert.AreEqual(listOfMods[0].IdWithMotif, "ConnorMod on P"); Assert.AreEqual(listOfMods.Count, 1); Directory.Delete(outputFolder, true); File.Delete(xmlName); File.Delete(mzmlName); }
public static void TestProteinPrunedWithModSelectionAndVariants() { var modToWrite = GlobalVariables.AllModsKnown.Where(p => p.ModificationType == "UniProt" && p.Target.ToString() == "T").First(); var modToNotWrite = GlobalVariables.AllModsKnown.Where(p => p.ModificationType == "Common Artifact" && p.Target.ToString() == "X").First(); Dictionary <int, List <Modification> > variantMods = new Dictionary <int, List <Modification> >(); variantMods.Add(1, new List <Modification>() { modToNotWrite }); List <SequenceVariation> variants = new List <SequenceVariation> { new SequenceVariation(4, 4, "V", "T", @"20\t41168825\t.\tT\tC\t14290.77\t.\tANN=C|missense_variant|MODERATE|PLCG1|ENSG00000124181|transcript|ENST00000244007.7|protein_coding|22/33|c.2438T>C|p.Ile813Thr|2635/5285|2438/3876|813/1291||\tGT:AD:DP:GQ:PL\t1/1:1,392:393:99:14319,1142,0", variantMods) }; var protein1 = new Protein("PEPVIDEKPEPT", "1", oneBasedModifications: new Dictionary <int, List <Modification> > { { 1, new List <Modification> { modToNotWrite } }, { 12, new List <Modification> { modToWrite } } }, sequenceVariations: variants); var protein2 = new Protein("PEPIDPEPT", "2", oneBasedModifications: new Dictionary <int, List <Modification> > { { 1, new List <Modification> { modToNotWrite } }, { 9, new List <Modification> { modToWrite } } }); var protein1Variants = protein1.GetVariantProteins(1, 0); string path = @"temp"; var proteinList = new List <Protein> { protein1, protein2 }; proteinList.AddRange(protein1Variants); ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), proteinList, path); Directory.CreateDirectory(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTestVariant")); Dictionary <string, HashSet <Tuple <int, Modification> > > modList = new Dictionary <string, HashSet <Tuple <int, Modification> > >(); var Hash = new HashSet <Tuple <int, Modification> > { new Tuple <int, Modification>(1, modToWrite), new Tuple <int, Modification>(2, modToNotWrite), }; var db = ProteinDbWriter.WriteXmlDatabase(modList, proteinList, Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTestVariant/fakeDb.xml")); var peptideObserved = protein1Variants.First().Digest(new DigestionParams(minPeptideLength: 1), new List <Modification>(), new List <Modification>()) .Where(p => p.BaseSequence == "PEPT").First(); PostSearchAnalysisParameters testPostTaskParameters = new PostSearchAnalysisParameters(); CommonParameters commonParam = new CommonParameters(useDeltaScore: false); double[,] noiseData = new double[10000, 10000]; noiseData[0, 0] = 1.0; List <Proteomics.Fragmentation.MatchedFragmentIon> matchedFragmentIons = new List <Proteomics.Fragmentation.MatchedFragmentIon>() { }; MzSpectrum spectrum = new MzSpectrum(noiseData); MsDataScan scan = new MsDataScan(spectrum, 1, 1, true, Polarity.Unknown, 2, new MzLibUtil.MzRange(10, 1000), "", MZAnalyzerType.Orbitrap, 10000, null, noiseData, ""); testPostTaskParameters.ProteinList = proteinList; testPostTaskParameters.AllPsms = new List <PeptideSpectralMatch> { new PeptideSpectralMatch(peptideObserved, 0, 20, 1, new Ms2ScanWithSpecificMass(scan, 100, 1, @"", commonParam), commonParam, matchedFragmentIons) }; testPostTaskParameters.SearchParameters = new SearchParameters(); testPostTaskParameters.SearchParameters.WritePrunedDatabase = true; testPostTaskParameters.SearchParameters.DoQuantification = false; testPostTaskParameters.SearchParameters.WriteMzId = false; testPostTaskParameters.DatabaseFilenameList = new List <DbForTask>() { new DbForTask(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/fakeDb.xml"), false) }; testPostTaskParameters.OutputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest"); Directory.CreateDirectory(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/individual")); testPostTaskParameters.IndividualResultsOutputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/individual"); int[] stuffForSpectraFile = new int[2]; stuffForSpectraFile[0] = 10; stuffForSpectraFile[1] = 10; Dictionary <string, int[]> numSpectraPerFile = new Dictionary <string, int[]>(); numSpectraPerFile.Add("", stuffForSpectraFile); testPostTaskParameters.NumMs2SpectraPerFile = numSpectraPerFile; MsDataFile myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications> { peptideObserved }); string mzmlName = @"newMzml.mzML"; IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, mzmlName, false); modList.Add("test", Hash); testPostTaskParameters.CurrentRawFileList = new List <string>() { mzmlName }; SearchTask task5 = new SearchTask { SearchParameters = new SearchParameters { WritePrunedDatabase = true, SearchTarget = true, MassDiffAcceptorType = MassDiffAcceptorType.Exact, }, CommonParameters = new CommonParameters() }; var test = task5.RunTask(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest"), new List <DbForTask>() { new DbForTask(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/fakeDb.xml"), false) }, new List <string>() { mzmlName }, "name"); testPostTaskParameters.SearchTaskResults = test; PostSearchAnalysisTask testPostTask = new PostSearchAnalysisTask(); testPostTask.Parameters = testPostTaskParameters; testPostTask.CommonParameters = commonParam; testPostTask.FileSpecificParameters = new List <(string FileName, CommonParameters Parameters)> { ("newMzMl.mzml", commonParam) }; testPostTask.Run(); var proteinsLoaded = ProteinDbLoader.LoadProteinXML(path, true, DecoyType.None, GlobalVariables.AllModsKnown, false, new List <string>(), out var unknownMods); // assert that mods on proteins are the same before/after task is run Assert.AreEqual(protein1Variants.First().Accession, proteinsLoaded.First().Accession); Assert.AreEqual(protein1Variants.First().OneBasedPossibleLocalizedModifications.Count(), proteinsLoaded.First().OneBasedPossibleLocalizedModifications.Count()); Assert.AreEqual(protein2.OneBasedPossibleLocalizedModifications.Count(), proteinsLoaded.ElementAt(1).OneBasedPossibleLocalizedModifications.Count()); // assert that protein pruned DB has correct proteins mods var proteinPruned = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/fakeDbproteinPruned.xml"), true, DecoyType.None, GlobalVariables.AllModsKnown, false, new List <string>(), out var unknownMods1); Assert.That(proteinPruned.Count().Equals(1)); Assert.That(proteinPruned.FirstOrDefault().OneBasedPossibleLocalizedModifications.Count().Equals(1)); // assert that mod-pruned DB has correct proteins and mods var modPruned = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, @"PrunedDbTest/fakeDbpruned.xml"), true, DecoyType.None, GlobalVariables.AllModsKnown, false, new List <string>(), out var unknownMods2); Assert.That(modPruned.Count().Equals(2)); Assert.That(modPruned.ElementAt(0).OneBasedPossibleLocalizedModifications.Count().Equals(1)); Assert.That(modPruned.ElementAt(1).OneBasedPossibleLocalizedModifications.Count().Equals(1)); }
public static void TestUserModSelectionInPrunedDB() { List <(string, string)> listOfModsFixed = new List <(string, string)> { ("Common Fixed", "Carbamidomethyl of C"), ("Common Fixed", "Carbamidomethyl of U") }; //Create Search Task SearchTask task5 = new SearchTask { SearchParameters = new SearchParameters { WritePrunedDatabase = true, SearchTarget = true, MassDiffAcceptorType = MassDiffAcceptorType.Exact, }, CommonParameters = new CommonParameters(listOfModsFixed: listOfModsFixed) }; task5.SearchParameters.ModsToWriteSelection["Mod"] = 0; task5.SearchParameters.ModsToWriteSelection["Common Fixed"] = 1; task5.SearchParameters.ModsToWriteSelection["Glycan"] = 2; task5.SearchParameters.ModsToWriteSelection["missing"] = 3; //add task 1 to task list List <(string, MetaMorpheusTask)> taskList = new List <(string, MetaMorpheusTask)> { ("task5", task5) }; ModificationMotif.TryGetMotif("P", out ModificationMotif motif); ModificationMotif.TryGetMotif("E", out ModificationMotif motif2); var connorMod = new Modification(_originalId: "ModToNotAppear", _modificationType: "Mod", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 10); var connorMod2 = new Modification(_originalId: "Default(Mod in DB and Observed)", _modificationType: "Common Fixed", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 10); var connorMod3 = new Modification(_originalId: "ModToAlwaysAppear", _modificationType: "Glycan", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 10); var connorMod4 = new Modification(_originalId: "ModObservedNotinDB", _modificationType: "missing", _target: motif2, _locationRestriction: "Anywhere.", _monoisotopicMass: 5); GlobalVariables.AddMods(new List <Modification> { connorMod, connorMod2, connorMod3, connorMod4 }, false); //create modification lists List <Modification> variableModifications = GlobalVariables.AllModsKnown.OfType <Modification>().Where(b => task5.CommonParameters.ListOfModsVariable.Contains ((b.ModificationType, b.IdWithMotif))).ToList(); List <Modification> fixedModifications = GlobalVariables.AllModsKnown.OfType <Modification>().Where(b => task5.CommonParameters.ListOfModsFixed.Contains ((b.ModificationType, b.IdWithMotif))).ToList(); //add modification to Protein object var dictHere = new Dictionary <int, List <Modification> >(); Modification modToAdd = connorMod; Modification modToAdd2 = connorMod2; Modification modToAdd3 = connorMod3; Modification modToAdd4 = connorMod4; //add Fixed modifcation so can test if mod that is observed and not in DB fixedModifications.Add(connorMod4); listOfModsFixed.Add((connorMod4.ModificationType, connorMod4.IdWithMotif)); dictHere.Add(1, new List <Modification> { modToAdd }); dictHere.Add(2, new List <Modification> { modToAdd2 }); //default dictHere.Add(3, new List <Modification> { modToAdd3 }); //Alway Appear var dictHere2 = new Dictionary <int, List <Modification> > { { 1, new List <Modification> { modToAdd } }, { 2, new List <Modification> { modToAdd2 } }, //default { 3, new List <Modification> { modToAdd3 } }, //Alway Appear { 4, new List <Modification> { modToAdd4 } } //observed }; //protein Creation (One with mod and one without) Protein TestProteinWithModForDB = new Protein("PPPPPPPPPPE", "accession1", "organism", new List <Tuple <string, string> >(), dictHere); Protein TestProteinWithModObsevred = new Protein("PPPPPPPPPPE", "accession1", "organism", new List <Tuple <string, string> >(), dictHere2); //First Write XML Database string xmlName = "selectedMods.xml"; string xmlName2 = "selectedModsObvs.xml"; //Add Mod to list and write XML input database Dictionary <string, HashSet <Tuple <int, Modification> > > modList = new Dictionary <string, HashSet <Tuple <int, Modification> > >(); var Hash = new HashSet <Tuple <int, Modification> > { new Tuple <int, Modification>(1, modToAdd), new Tuple <int, Modification>(2, modToAdd2), new Tuple <int, Modification>(3, modToAdd3), new Tuple <int, Modification>(4, modToAdd4), //Observed Only }; modList.Add("test", Hash); ProteinDbWriter.WriteXmlDatabase(modList, new List <Protein> { TestProteinWithModForDB }, xmlName); //Add Observed Only modList.Add("test2", Hash); ProteinDbWriter.WriteXmlDatabase(modList, new List <Protein> { TestProteinWithModObsevred }, xmlName2); //now create MZML data var protein = ProteinDbLoader.LoadProteinXML(xmlName2, true, DecoyType.Reverse, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> ok); var digestedList = protein[0].Digest(task5.CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList(); //Set Peptide with 1 mod at position 3 PeptideWithSetModifications pepWithSetMods1 = digestedList[0]; PeptideWithSetModifications pepWithSetMods2 = digestedList[1]; PeptideWithSetModifications pepWithSetMods3 = digestedList[2]; PeptideWithSetModifications pepWithSetMods4 = digestedList[3]; PeptideWithSetModifications pepWithSetMods5 = digestedList[4]; //CUSTOM PEP MsDataFile myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications> { pepWithSetMods1, pepWithSetMods2, pepWithSetMods3, pepWithSetMods4, pepWithSetMods5 }); string mzmlName = @"newMzml.mzML"; IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, mzmlName, false); //make sure this runs correctly //run! string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestUserModSelectionInPrunedDB"); var engine = new EverythingRunnerEngine(taskList, new List <string> { mzmlName }, new List <DbForTask> { new DbForTask(xmlName, false) }, outputFolder); engine.Run(); string final = Path.Combine(MySetUpClass.outputFolder, "task5", "selectedModspruned.xml"); var proteins = ProteinDbLoader.LoadProteinXML(final, true, DecoyType.Reverse, new List <Modification>(), false, new List <string>(), out ok); var Dlist = proteins[0].GetVariantProteins().SelectMany(vp => vp.Digest(task5.CommonParameters.DigestionParams, fixedModifications, variableModifications)).ToList(); Assert.AreEqual(Dlist[0].NumFixedMods, 1); //check length Assert.AreEqual(proteins[0].OneBasedPossibleLocalizedModifications.Count, 3); List <Modification> listOfLocalMods = new List <Modification>(); listOfLocalMods.AddRange(proteins[0].OneBasedPossibleLocalizedModifications[2]); listOfLocalMods.AddRange(proteins[0].OneBasedPossibleLocalizedModifications[3]); listOfLocalMods.AddRange(proteins[0].OneBasedPossibleLocalizedModifications[11]); //check Type, count, ID Assert.AreEqual(listOfLocalMods[0].ModificationType, "Common Fixed"); Assert.AreEqual(listOfLocalMods[2].ModificationType, "missing"); Assert.IsFalse(listOfLocalMods.Contains(connorMod)); //make sure that mod set not to show up is not in mod list Assert.AreEqual(listOfLocalMods[0].IdWithMotif, "Default(Mod in DB and Observed) on P"); Assert.AreEqual(listOfLocalMods[1].IdWithMotif, "ModToAlwaysAppear on P"); //Makes sure Mod that was not in the DB but was observed is in pruned DB Assert.AreEqual(listOfLocalMods[2].IdWithMotif, "ModObservedNotinDB on E"); Assert.AreEqual(listOfLocalMods.Count, 3); Directory.Delete(outputFolder, true); File.Delete(mzmlName); File.Delete(xmlName); File.Delete(xmlName2); }
public static void TestIndexEngineLowRes() { var proteinList = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, @"indexEngineTestFasta.fasta"), true, DecoyType.Reverse, false, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, out var dbErrors, -1); var variableModifications = new List <Modification>(); var fixedModifications = new List <Modification>(); var localizeableModifications = new List <Modification>(); Dictionary <Modification, ushort> modsDictionary = new Dictionary <Modification, ushort>(); foreach (var mod in fixedModifications) { modsDictionary.Add(mod, 0); } int i = 1; foreach (var mod in variableModifications) { modsDictionary.Add(mod, (ushort)i); i++; } foreach (var mod in localizeableModifications) { modsDictionary.Add(mod, (ushort)i); i++; } CommonParameters CommonParameters = new CommonParameters(dissociationType: DissociationType.LowCID, maxThreadsToUsePerFile: 1, scoreCutoff: 1, digestionParams: new DigestionParams(protease: "trypsin", minPeptideLength: 1)); var engine = new IndexingEngine(proteinList, variableModifications, fixedModifications, null, 1, DecoyType.Reverse, CommonParameters, 30000, false, new List <FileInfo>(), new List <string>()); var results = (IndexingResults)engine.Run(); Assert.AreEqual(10, results.PeptideIndex.Count); var bubba = results.FragmentIndex; var tooBubba = results.PrecursorIndex; var digestedList = proteinList[0].Digest(CommonParameters.DigestionParams, new List <Modification>(), variableModifications).ToList(); digestedList.AddRange(proteinList[1].Digest(CommonParameters.DigestionParams, new List <Modification>(), variableModifications)); Assert.AreEqual(10, digestedList.Count); foreach (PeptideWithSetModifications peptide in digestedList) { Assert.Contains(peptide, results.PeptideIndex); var fragments = peptide.Fragment(CommonParameters.DissociationType, FragmentationTerminus.Both).ToList(); int positionInPeptideIndex = results.PeptideIndex.IndexOf(peptide); foreach (Product fragment in fragments.Where(f => f.ProductType == ProductType.b || f.ProductType == ProductType.y)) { // mass of the fragment double fragmentMass = Math.Round(fragment.NeutralMass / 1.0005079, 0) * 1.0005079; int integerMassRepresentation = (int)Math.Round(fragmentMass * 1000); // look up the peptides that have fragments with this mass // the result of the lookup is a list of peptide IDs that have this fragment mass List <int> fragmentBin = results.FragmentIndex[integerMassRepresentation]; // this list should contain this peptide! Assert.Contains(positionInPeptideIndex, fragmentBin); } } foreach (var fdfd in digestedList) { Assert.Contains(fdfd, results.PeptideIndex); } }
public static void AppliedVariants() { ModificationMotif.TryGetMotif("P", out ModificationMotif motifP); Modification mp = new Modification("mod", null, "type", null, motifP, "Anywhere.", null, 42.01, new Dictionary <string, IList <string> >(), null, null, null, null, null); List <Protein> proteinsWithSeqVars = new List <Protein> { new Protein("MPEPTIDE", "protein1", sequenceVariations: new List <SequenceVariation> { new SequenceVariation(4, 4, "P", "V", @"1\t50000000\t.\tA\tG\t.\tPASS\tANN=G||||||||||||||||\tGT:AD:DP\t1/1:30,30:30", null) }), new Protein("MPEPTIDE", "protein2", sequenceVariations: new List <SequenceVariation> { new SequenceVariation(4, 5, "PT", "KT", @"1\t50000000\t.\tA\tG\t.\tPASS\tANN=G||||||||||||||||\tGT:AD:DP\t1/1:30,30:30", null) }), new Protein("MPEPTIDE", "protein3", sequenceVariations: new List <SequenceVariation> { new SequenceVariation(4, 4, "P", "PPP", @"1\t50000000\t.\tA\tG\t.\tPASS\tANN=G||||||||||||||||\tGT:AD:DP\t1/1:30,30:30", null) }), new Protein("MPEPPPTIDE", "protein4", sequenceVariations: new List <SequenceVariation> { new SequenceVariation(4, 6, "PPP", "P", @"1\t50000000\t.\tA\tG\t.\tPASS\tANN=G||||||||||||||||\tGT:AD:DP\t1/1:30,30:30", null) }), new Protein("MPEPTIDE", "protein5", sequenceVariations: new List <SequenceVariation> { new SequenceVariation(4, 4, "P", "PPP", @"1\t50000000\t.\tA\tG\t.\tPASS\tANN=G||||||||||||||||\tGT:AD:DP\t1/1:30,30:30", new Dictionary <int, List <Modification> > { { 5, new[] { mp }.ToList() } }) }), }; var proteinsWithAppliedVariants = proteinsWithSeqVars.SelectMany(p => p.GetVariantProteins()).ToList(); var proteinsWithAppliedVariants2 = proteinsWithSeqVars.SelectMany(p => p.GetVariantProteins()).ToList(); // should be stable string xml = Path.Combine(TestContext.CurrentContext.TestDirectory, "AppliedVariants.xml"); ProteinDbWriter.WriteXmlDatabase(null, proteinsWithSeqVars, xml); var proteinsWithAppliedVariants3 = ProteinDbLoader.LoadProteinXML(xml, true, DecoyType.None, null, false, null, out var un); var listArray = new[] { proteinsWithAppliedVariants, proteinsWithAppliedVariants2, proteinsWithAppliedVariants3 }; for (int dbIdx = 0; dbIdx < listArray.Length; dbIdx++) { // sequences Assert.AreEqual("MPEVTIDE", listArray[dbIdx][0].BaseSequence); Assert.AreEqual("MPEKTIDE", listArray[dbIdx][1].BaseSequence); Assert.AreEqual("MPEPPPTIDE", listArray[dbIdx][2].BaseSequence); Assert.AreEqual("MPEPTIDE", listArray[dbIdx][3].BaseSequence); Assert.AreEqual("MPEPPPTIDE", listArray[dbIdx][4].BaseSequence); Assert.AreEqual(5, listArray[dbIdx][4].OneBasedPossibleLocalizedModifications.Single().Key); // SAV Assert.AreEqual(4, listArray[dbIdx][0].AppliedSequenceVariations.Single().OneBasedBeginPosition); Assert.AreEqual(4, listArray[dbIdx][0].AppliedSequenceVariations.Single().OneBasedEndPosition); // MNV Assert.AreEqual(4, listArray[dbIdx][1].AppliedSequenceVariations.Single().OneBasedBeginPosition); Assert.AreEqual(5, listArray[dbIdx][1].AppliedSequenceVariations.Single().OneBasedEndPosition); // insertion Assert.AreEqual(4, listArray[dbIdx][2].AppliedSequenceVariations.Single().OneBasedBeginPosition); Assert.AreEqual(6, listArray[dbIdx][2].AppliedSequenceVariations.Single().OneBasedEndPosition); // deletion Assert.AreEqual(4, listArray[dbIdx][3].AppliedSequenceVariations.Single().OneBasedBeginPosition); Assert.AreEqual(4, listArray[dbIdx][3].AppliedSequenceVariations.Single().OneBasedEndPosition); } }
public void ReadXmlNulls() { var ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"xml2.xml"), true, DecoyType.None, null, false, null, out Dictionary <string, Modification> un); }
public void TestFullProteinReadWrite() { Modification mod = new Modification("mod1", null, "modType1", null, null, null, null, null, null, null, null, null, null, null); ModificationMotif.TryGetMotif("E", out ModificationMotif motif); Modification mod2 = new Modification("mod2 on E", null, "modType1", null, motif, "Anywhere.", null, null, null, null, null, null, null, null); ModificationMotif.TryGetMotif("N", out ModificationMotif motif3); Modification mod3 = new Modification("mod3 on N", null, "modType1", null, motif3, "Anywhere.", null, 10, null, null, null, null, null, null); List <Tuple <string, string> > gene_names = new List <Tuple <string, string> > { new Tuple <string, string>("a", "b") }; IDictionary <int, List <Modification> > oneBasedModifications = new Dictionary <int, List <Modification> > { { 3, new List <Modification> { mod } }, { 4, new List <Modification> { mod2 } }, { 5, new List <Modification> { mod3 } } }; List <ProteolysisProduct> proteolysisProducts = new List <ProteolysisProduct> { new ProteolysisProduct(1, 2, "propeptide") }; string name = "testName"; string full_name = "testFullName"; List <DatabaseReference> databaseReferences = new List <DatabaseReference> { new DatabaseReference("type1", "id1", new List <Tuple <string, string> > { new Tuple <string, string>("e1", "e2") }) }; List <SequenceVariation> sequenceVariations = new List <SequenceVariation> { new SequenceVariation(3, "Q", "N", "replace Q by N"), new SequenceVariation(3, 4, "QE", "NN", "replace QE by NN") }; List <DisulfideBond> disulfideBonds = new List <DisulfideBond> { new DisulfideBond(1, "ds1"), new DisulfideBond(2, 3, "ds2") }; Protein p1 = new Protein( "SEQENCE", "a1", geneNames: gene_names, oneBasedModifications: oneBasedModifications, proteolysisProducts: proteolysisProducts, name: name, fullName: full_name, isDecoy: false, isContaminant: true, databaseReferences: databaseReferences, sequenceVariations: sequenceVariations, disulfideBonds: disulfideBonds, databaseFilePath: Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"bnueiwhf.xml")); // Generate data for files ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), new List <Protein> { p1 }, Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"bnueiwhf.xml")); IEnumerable <string> modTypesToExclude = new List <string>(); IEnumerable <Modification> allKnownModifications = new List <Modification>(); List <Protein> ok = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"bnueiwhf.xml"), true, DecoyType.None, allKnownModifications, true, modTypesToExclude, out Dictionary <string, Modification> unknownModifications); Assert.AreEqual(p1.Accession, ok[0].Accession); Assert.AreEqual(p1.BaseSequence, ok[0].BaseSequence); Assert.AreEqual(p1.DatabaseReferences.First().Id, ok[0].DatabaseReferences.First().Id); Assert.AreEqual(p1.DatabaseReferences.First().Properties.First().Item1, ok[0].DatabaseReferences.First().Properties.First().Item1); Assert.AreEqual(p1.DatabaseReferences.First().Properties.First().Item2, ok[0].DatabaseReferences.First().Properties.First().Item2); Assert.AreEqual(p1.DatabaseReferences.First().Type, ok[0].DatabaseReferences.First().Type); Assert.AreEqual(p1.DisulfideBonds.First().Description, ok[0].DisulfideBonds.First().Description); Assert.AreEqual(p1.DisulfideBonds.First().OneBasedBeginPosition, ok[0].DisulfideBonds.First().OneBasedBeginPosition); Assert.AreEqual(p1.DisulfideBonds.First().OneBasedEndPosition, ok[0].DisulfideBonds.First().OneBasedEndPosition); Assert.AreEqual(p1.DisulfideBonds.Last().Description, ok[0].DisulfideBonds.Last().Description); Assert.AreEqual(p1.DisulfideBonds.Last().OneBasedBeginPosition, ok[0].DisulfideBonds.Last().OneBasedBeginPosition); Assert.AreEqual(p1.DisulfideBonds.Last().OneBasedEndPosition, ok[0].DisulfideBonds.Last().OneBasedEndPosition); Assert.AreEqual(p1.FullDescription, ok[0].FullDescription); Assert.AreEqual(p1.FullName, ok[0].FullName); Assert.AreEqual(p1.GeneNames, ok[0].GeneNames); Assert.AreEqual(p1.IsContaminant, ok[0].IsContaminant); Assert.AreEqual(p1.IsDecoy, ok[0].IsDecoy); Assert.AreEqual(p1.Length, ok[0].Length); Assert.AreEqual(p1.Name, ok[0].Name); Assert.AreEqual(p1.Organism, ok[0].Organism); Assert.AreEqual(p1.DatabaseFilePath, ok[0].DatabaseFilePath); Assert.AreEqual(1, p1.OneBasedPossibleLocalizedModifications.Keys.Count); Assert.AreEqual(1, ok[0].OneBasedPossibleLocalizedModifications.Keys.Count); Assert.AreEqual(p1.OneBasedPossibleLocalizedModifications.Keys.First(), ok[0].OneBasedPossibleLocalizedModifications.Keys.First()); Assert.IsTrue(p1.OneBasedPossibleLocalizedModifications[5][0].Equals(ok[0].OneBasedPossibleLocalizedModifications[5][0])); Assert.AreEqual(p1.ProteolysisProducts.First().OneBasedBeginPosition, ok[0].ProteolysisProducts.First().OneBasedBeginPosition); Assert.AreEqual(p1.ProteolysisProducts.First().OneBasedEndPosition, ok[0].ProteolysisProducts.First().OneBasedEndPosition); Assert.AreEqual(p1.ProteolysisProducts.First().Type, ok[0].ProteolysisProducts.First().Type); Assert.AreEqual(p1.SequenceVariations.First().Description, ok[0].SequenceVariations.First().Description); Assert.AreEqual(p1.SequenceVariations.First().OneBasedBeginPosition, ok[0].SequenceVariations.First().OneBasedBeginPosition); Assert.AreEqual(p1.SequenceVariations.First().OneBasedEndPosition, ok[0].SequenceVariations.First().OneBasedEndPosition); Assert.AreEqual(p1.SequenceVariations.First().OriginalSequence, ok[0].SequenceVariations.First().OriginalSequence); Assert.AreEqual(p1.SequenceVariations.First().VariantSequence, ok[0].SequenceVariations.First().VariantSequence); Assert.AreEqual(p1.SequenceVariations.Last().Description, ok[0].SequenceVariations.Last().Description); Assert.AreEqual(p1.SequenceVariations.Last().OneBasedBeginPosition, ok[0].SequenceVariations.Last().OneBasedBeginPosition); Assert.AreEqual(p1.SequenceVariations.Last().OneBasedEndPosition, ok[0].SequenceVariations.Last().OneBasedEndPosition); Assert.AreEqual(p1.SequenceVariations.Last().OriginalSequence, ok[0].SequenceVariations.Last().OriginalSequence); Assert.AreEqual(p1.SequenceVariations.Last().VariantSequence, ok[0].SequenceVariations.Last().VariantSequence); }
public static void TestComputePEPValue() { var variableModifications = new List <Modification>(); var fixedModifications = new List <Modification>(); var origDataFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML"); MyFileManager myFileManager = new MyFileManager(true); CommonParameters CommonParameters = new CommonParameters(digestionParams: new DigestionParams()); var myMsDataFile = myFileManager.LoadFile(origDataFile, CommonParameters); var searchModes = new SinglePpmAroundZeroSearchMode(5); List <Protein> proteinList = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\hela_snip_for_unitTest.fasta"), true, DecoyType.Reverse, false, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, out var dbErrors, -1); var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, CommonParameters).OrderBy(b => b.PrecursorMass).ToArray(); PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, proteinList, searchModes, CommonParameters, new List <string>()).Run(); FdrAnalysisResults fdrResultsClassicDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.Where(p => p != null).ToList(), 1, CommonParameters, new List <string>()).Run()); var nonNullPsms = allPsmsArray.Where(p => p != null).ToList(); var nonNullPsmsOriginalCopy = allPsmsArray.Where(p => p != null).ToList(); var accessionCounts = PEP_Analysis.GetAccessionCounts(nonNullPsms); var maxScore = nonNullPsms.Select(n => n.Score).Max(); var maxScorePsm = nonNullPsms.Where(n => n.Score == maxScore).First(); Dictionary <string, int> sequenceToPsmCount = new Dictionary <string, int>(); List <string> sequences = new List <string>(); foreach (PeptideSpectralMatch psm in nonNullPsms) { var ss = psm.BestMatchingPeptides.Select(b => b.Peptide.FullSequence).ToList(); sequences.Add(String.Join("|", ss)); } var s = sequences.GroupBy(i => i); foreach (var grp in s) { sequenceToPsmCount.Add(grp.Key, grp.Count()); } var maxPsmData = PEP_Analysis.CreateOnePsmDataFromPsm(maxScorePsm, accessionCounts, sequenceToPsmCount); Assert.That(maxScorePsm.PeptidesToMatchingFragments.Count, Is.EqualTo(maxPsmData.Ambiguity)); Assert.That(maxScorePsm.DeltaScore, Is.EqualTo(maxPsmData.DeltaScore).Within(0.05)); Assert.That((float)(maxScorePsm.Score - (int)maxScorePsm.Score), Is.EqualTo(maxPsmData.Intensity).Within(0.05)); Assert.That(maxScorePsm.BestMatchingPeptides.Select(p => p.Peptide).First().MissedCleavages, Is.EqualTo(maxPsmData.MissedCleavagesCount)); Assert.That(maxScorePsm.BestMatchingPeptides.Select(p => p.Peptide).First().AllModsOneIsNterminus.Values.Count(), Is.EqualTo(maxPsmData.ModsCount)); Assert.That(maxScorePsm.Notch ?? 0, Is.EqualTo(maxPsmData.Notch)); Assert.That(maxScorePsm.PsmCount, Is.EqualTo(maxPsmData.PsmCount)); Assert.That(maxScorePsm.ScanPrecursorCharge, Is.EqualTo(maxPsmData.ScanPrecursorCharge)); PEP_Analysis.ComputePEPValuesForAllPSMsGeneric(nonNullPsms); int trueCount = 0; foreach (var item in allPsmsArray.Where(p => p != null)) { var b = item.FdrInfo.PEP; if (b >= 0.5) { trueCount++; } } Assert.GreaterOrEqual(32, trueCount); }
public void Test_getptms_from_mzLibxml_without_prep() { List <Modification> ok = ProteinDbLoader.GetPtmListFromProteinXml(Path.Combine(TestContext.CurrentContext.TestDirectory, @"cRAP_databaseGPTMD.xml")); Assert.AreEqual(70, ok.Count); }