public static TheoreticalProteoform make_a_theoretical(string a, string d, double mass, ProteinWithGoTerms p, Dictionary <InputFile, Protein[]> dict) { ModificationMotif motif; ModificationMotif.TryGetMotif("X", out motif); string mod_title = "oxidation"; Modification m = new Modification(mod_title, _modificationType: "modtype", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 1); ProteinWithGoTerms p1 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T1", new List <Tuple <string, string> > { new Tuple <string, string>("ordered locus", "GENE") }, new Dictionary <int, List <Modification> > { { 1, new List <Modification> { m } } }, new List <ProteolysisProduct> { new ProteolysisProduct(1, 12, "") }, "T2", "T3", true, false, new List <DatabaseReference> { new DatabaseReference("GO", ":", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:") }) }, new List <GoTerm> { new GoTerm(new DatabaseReference("GO", ":", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:") })) }); PtmSet set = new PtmSet(new List <Ptm> { new Ptm(0, m) }); return(new TheoreticalProteoform(a, d, p1.BaseSequence, new List <ProteinWithGoTerms> { p }, mass, 0, set, true, false, dict)); }
public static TheoreticalProteoform make_a_theoretical(string a, double mass, int lysine_count) { ModificationMotif motif; ModificationMotif.TryGetMotif("X", out motif); Modification unmodification = new Modification("Unmodified", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 0); ProteinWithGoTerms p1 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T1", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> > { { 0, new List <Modification> { unmodification } } }, new List <ProteolysisProduct> { new ProteolysisProduct(1, 12, "") }, "T2", "T3", true, false, new List <DatabaseReference> { new DatabaseReference("GO", ":", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:") }) }, new List <GoTerm> { new GoTerm(new DatabaseReference("GO", ":", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:") })) }); PtmSet set = new PtmSet(new List <Ptm> { new Ptm() }); var t = new TheoreticalProteoform(a, "", p1.BaseSequence, new List <ProteinWithGoTerms> { p1 }, mass, lysine_count, set, true, false, new Dictionary <InputFile, Protein[]>()); t.modified_mass = mass; return(t); }
public static TheoreticalProteoform make_a_theoretical(string a, string d, double mass, ProteinWithGoTerms p, Dictionary <InputFile, Protein[]> dict) { ModificationMotif motif; ModificationMotif.TryGetMotif("K", out motif); string mod_title = "oxidation"; ModificationWithMass m = new ModificationWithMass(mod_title, new Tuple <string, string>("", mod_title), motif, ModificationSites.K, 1, new Dictionary <string, IList <string> >(), new List <double>(), new List <double>(), ""); ProteinWithGoTerms p1 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T1", new List <Tuple <string, string> > { new Tuple <string, string>("ordered locus", "GENE") }, new Dictionary <int, List <Modification> > { { 1, new List <Modification> { m } } }, new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> { new DatabaseReference("GO", ":", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:") }) }, new List <GoTerm> { new GoTerm(new DatabaseReference("GO", ":", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:") })) }); PtmSet set = new PtmSet(new List <Ptm> { new Ptm(0, m) }); return(new TheoreticalProteoform(a, d, new List <ProteinWithGoTerms> { p }, mass, 0, set, true, false, dict)); }
public void test_protein_grouping_by_sequence() { DatabaseReference d1 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:") }); DatabaseReference d2 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:") }); DatabaseReference d3 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:") }); GoTerm g1 = new GoTerm(d1); GoTerm g2 = new GoTerm(d1); GoTerm g3 = new GoTerm(d1); ProteinWithGoTerms p1 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T1", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> { d1 }, new List <GoTerm> { g1 }); ProteinWithGoTerms p2 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T2", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> { d2 }, new List <GoTerm> { g2 }); ProteinWithGoTerms p3 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T3", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> { d3 }, new List <GoTerm> { g3 }); ProteinSequenceGroup psg = new ProteinSequenceGroup(new List <ProteinWithGoTerms> { p1, p2, p3 }.OrderByDescending(p => p.IsContaminant ? 1 : 0)); Assert.AreEqual(3, psg.GoTerms.Count()); Assert.AreEqual(3, psg.GeneNames.Count()); Assert.AreEqual("T1_3G", psg.Accession); Assert.False(psg.IsContaminant); p3 = new ProteinWithGoTerms("MCSSSSSSSSSS", "T3", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> { d3 }, new List <GoTerm> { g3 }); ProteinSequenceGroup[] psgs = SaveState.lollipop.theoretical_database.group_proteins_by_sequence(new List <ProteinWithGoTerms> { p1, p2, p3 }); Assert.AreEqual(2, psgs.Length); }
public void test_construct_one_proteform_family_from_ET_with_theoretical_pf_group() { ProteoformCommunity test_community = new ProteoformCommunity(); SaveState.lollipop.theoretical_database.uniprotModifications = new Dictionary <string, List <Modification> > { { "unmodified", new List <Modification> { new Modification("unmodified", "unknown") } } }; InputFile f = new InputFile("fake.txt", Purpose.ProteinDatabase); ProteinWithGoTerms p1 = new ProteinWithGoTerms("", "T1", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "name", "full_name", true, false, new List <DatabaseReference>(), new List <GoTerm>()); Dictionary <InputFile, Protein[]> dict = new Dictionary <InputFile, Protein[]> { { f, new Protein[] { p1 } } }; TheoreticalProteoform t = ConstructorsForTesting.make_a_theoretical("T1_T1_asdf", p1, dict); //One accepted ET relation; should give one ProteoformFamily SaveState.lollipop.min_peak_count_et = 1; ExperimentalProteoform pf1 = ConstructorsForTesting.ExperimentalProteoform("E1"); TheoreticalProteoformGroup pf2 = new TheoreticalProteoformGroup(new List <TheoreticalProteoform> { t }); ProteoformComparison comparison = ProteoformComparison.ExperimentalTheoretical; ProteoformRelation pr1 = new ProteoformRelation(pf1, pf2, comparison, 0, TestContext.CurrentContext.TestDirectory); pr1.Accepted = true; List <ProteoformRelation> prs = new List <ProteoformRelation> { pr1 }; foreach (ProteoformRelation pr in prs) { pr.set_nearby_group(prs, prs.Select(r => r.InstanceId).ToList()); } DeltaMassPeak peak = new DeltaMassPeak(prs[0], prs); SaveState.lollipop.et_peaks = new List <DeltaMassPeak> { peak }; test_community.experimental_proteoforms = new ExperimentalProteoform[] { pf1 }; test_community.theoretical_proteoforms = new TheoreticalProteoform[] { pf2 }; test_community.construct_families(); Assert.AreEqual(1, test_community.families.Count); Assert.AreEqual(2, test_community.families[0].proteoforms.Count); Assert.AreEqual(1, test_community.families.First().experimental_proteoforms.Count); Assert.AreEqual(1, test_community.families.First().theoretical_proteoforms.Count); Assert.AreEqual("E1", test_community.families.First().experimentals_list); Assert.AreEqual(p1.Name, test_community.families.First().name_list); Assert.AreEqual(pf2.accession, test_community.families.First().accession_list); }
//MAKE THEORETICAL public static TheoreticalProteoform make_a_theoretical(string a, ProteinWithGoTerms p, Dictionary <InputFile, Protein[]> dict) { ModificationMotif motif; ModificationMotif.TryGetMotif("K", out motif); PtmSet set = new PtmSet(p.OneBasedPossibleLocalizedModifications.SelectMany(m => m.Value.OfType <ModificationWithMass>().SelectMany(mmm => new List <Ptm> { new Ptm(0, mmm) })).ToList()); return(new TheoreticalProteoform(a, "", new List <ProteinWithGoTerms> { p }, 100, 0, set, true, true, dict)); }
public void parallel_enter_theoreticals_doesnt_crash() { TheoreticalProteoformDatabase db = new TheoreticalProteoformDatabase(); db.populate_aa_mass_dictionary(); List <Modification> var = new List <Modification>(); List <TheoreticalProteoform> ts = new List <TheoreticalProteoform>(); ProteinWithGoTerms p = ConstructorsForTesting.make_a_theoretical().ExpandedProteinList.First(); Parallel.Invoke( () => db.EnterTheoreticalProteformFamily("SEQ", p, p.OneBasedPossibleLocalizedModifications, p.Accession, ts, 1, var), () => db.EnterTheoreticalProteformFamily("SEQ", p, p.OneBasedPossibleLocalizedModifications, p.Accession, ts, 1, var) ); }
public void test_protein_grouping_by_sequence_contaminant() { DatabaseReference d1 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:") }); DatabaseReference d2 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:") }); DatabaseReference d3 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:") }); GoTerm g1 = new GoTerm(d1); GoTerm g2 = new GoTerm(d1); GoTerm g3 = new GoTerm(d1); ProteinWithGoTerms p1 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T1", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(0, 0, "") }, "T2", "T3", true, false, new List <DatabaseReference> { d1 }, new List <GoTerm> { g1 }); ProteinWithGoTerms p2 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T2", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(0, 0, "") }, "T2", "T3", true, false, new List <DatabaseReference> { d2 }, new List <GoTerm> { g2 }); ProteinWithGoTerms p3 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T3", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(0, 0, "") }, "T2", "T3", true, true, new List <DatabaseReference> { d3 }, new List <GoTerm> { g3 }); ProteinSequenceGroup psg = new ProteinSequenceGroup(new List <ProteinWithGoTerms> { p1, p2, p3 }.OrderByDescending(p => p.IsContaminant ? 1 : 0)); Assert.AreEqual(3, psg.GoTerms.Count()); Assert.AreEqual(3, psg.GeneNames.Count()); Assert.AreEqual("T3_3G", psg.Accession); Assert.True(psg.IsContaminant); }
//MAKE THEORETICAL public static TheoreticalProteoform make_a_theoretical(string a, ProteinWithGoTerms p, Dictionary <InputFile, Protein[]> dict) { ModificationMotif motif; ModificationMotif.TryGetMotif("X", out motif); PtmSet set = new PtmSet(p.OneBasedPossibleLocalizedModifications.SelectMany(m => m.Value.SelectMany(mmm => new List <Ptm> { new Ptm(0, mmm) })).ToList()); TheoreticalProteoform t = new TheoreticalProteoform(a, "", p.BaseSequence, new List <ProteinWithGoTerms> { p }, 100, 0, set, true, true, dict); t.begin = 1; t.end = 4; return(t); }
public void test_contaminant_check() { InputFile f = new InputFile("fake.txt", Purpose.ProteinDatabase); f.ContaminantDB = true; InputFile g = new InputFile("fake.txt", Purpose.ProteinDatabase); InputFile h = new InputFile("fake.txt", Purpose.ProteinDatabase); ProteinWithGoTerms p1 = new ProteinWithGoTerms("", "T1", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(0, 0, "") }, "T2", "T3", true, false, new List <DatabaseReference>(), new List <GoTerm>()); ProteinWithGoTerms p2 = new ProteinWithGoTerms("", "T2", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(0, 0, "") }, "T2", "T3", true, false, new List <DatabaseReference>(), new List <GoTerm>()); ProteinWithGoTerms p3 = new ProteinWithGoTerms("", "T3", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(0, 0, "") }, "T2", "T3", true, false, new List <DatabaseReference>(), new List <GoTerm>()); Dictionary <InputFile, Protein[]> dict = new Dictionary <InputFile, Protein[]> { { f, new Protein[] { p1 } }, { g, new Protein[] { p2 } }, { h, new Protein[] { p3 } }, }; TheoreticalProteoform t = ConstructorsForTesting.make_a_theoretical("T1_T1_asdf", p1, dict); TheoreticalProteoform u = ConstructorsForTesting.make_a_theoretical("T2_T1_asdf_asdf", p2, dict); TheoreticalProteoform v = ConstructorsForTesting.make_a_theoretical("T3_T1_asdf_Asdf_Asdf", p3, dict); TheoreticalProteoform w = new TheoreticalProteoformGroup(new List <TheoreticalProteoform> { v, u, t }.OrderByDescending(theo => theo.contaminant ? 1 : 0)); Assert.True(w.contaminant); Assert.True(w.accession.Contains(p1.Accession)); //Not contaminant TheoreticalProteoform x = new TheoreticalProteoformGroup(new List <TheoreticalProteoform> { v, u }); Assert.False(x.contaminant); //PTM mass test Assert.AreEqual(0, t.ptm_mass); }
public static TheoreticalProteoform make_a_theoretical(string a, double mass, int lysine_count) { ModificationWithMass unmodification = new ModificationWithMass("Unmodified", new Tuple <string, string>("N/A", "Unmodified"), null, ModificationSites.Any, 0, null, null, null, null); ProteinWithGoTerms p1 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T1", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> > { { 0, new List <Modification> { unmodification } } }, new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> { new DatabaseReference("GO", ":", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:") }) }, new List <GoTerm> { new GoTerm(new DatabaseReference("GO", ":", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:") })) }); PtmSet set = new PtmSet(new List <Ptm>()); return(new TheoreticalProteoform(a, "", new List <ProteinWithGoTerms> { p1 }, mass, lysine_count, set, true, false, new Dictionary <InputFile, Protein[]>())); }
public void get_interesting_goterm_families() { Sweet.lollipop = new Lollipop(); Sweet.lollipop.theoretical_database.aaIsotopeMassList = new AminoAcidMasses(Sweet.lollipop.carbamidomethylation, Sweet.lollipop.neucode_labeled).AA_Masses; Sweet.lollipop.significance_by_permutation = true; Sweet.lollipop.significance_by_log2FC = false; DatabaseReference d1 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:1") }); DatabaseReference d2 = new DatabaseReference("GO", "GO:2", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:2") }); DatabaseReference d3 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:1") }); GoTerm g1 = new GoTerm(d1); GoTerm g2 = new GoTerm(d2); GoTerm g3 = new GoTerm(d3); ProteinWithGoTerms p1 = new ProteinWithGoTerms("ASDF", "T1", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(0, 0, "") }, "T2", "T3", true, false, new List <DatabaseReference> { d1 }, new List <GoTerm> { g1 }); ProteinWithGoTerms p2 = new ProteinWithGoTerms("ASDF", "T2", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(0, 0, "") }, "T2", "T3", true, false, new List <DatabaseReference> { d2 }, new List <GoTerm> { g2 }); ProteinWithGoTerms p3 = new ProteinWithGoTerms("ASDF", "T3", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(0, 0, "") }, "T2", "T3", true, false, new List <DatabaseReference> { d3 }, new List <GoTerm> { g3 }); Dictionary <InputFile, Protein[]> dict = new Dictionary <InputFile, Protein[]> { { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p1 } }, { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p2 } }, { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p3 } }, }; ExperimentalProteoform e1 = ConstructorsForTesting.ExperimentalProteoform("E"); ExperimentalProteoform e2 = ConstructorsForTesting.ExperimentalProteoform("E"); e1.quant.intensitySum = 1; e1.quant.TusherValues1.significant = true; e1.quant.tusherlogFoldChange = 1; e2.quant.intensitySum = 1; e2.quant.TusherValues1.significant = true; e2.quant.tusherlogFoldChange = 1; TheoreticalProteoform t = ConstructorsForTesting.make_a_theoretical("T1_T1_asdf", p1, dict); TheoreticalProteoform u = ConstructorsForTesting.make_a_theoretical("T2_T1_asdf_asdf", p2, dict); TheoreticalProteoform v = ConstructorsForTesting.make_a_theoretical("T3_T1_asdf_Asdf_Asdf", p3, dict); t.ExpandedProteinList = new List <ProteinWithGoTerms> { p1 }; u.ExpandedProteinList = new List <ProteinWithGoTerms> { p2 }; v.ExpandedProteinList = new List <ProteinWithGoTerms> { p3 }; t.begin = 1; t.end = 1; u.begin = 1; u.end = 1; v.begin = 1; v.end = 1; make_relation(e1, t); //make_relation(e1, v); // we don't allow this to happen anymore... we only allow one ET conntection per E make_relation(e2, u); ProteoformFamily f = new ProteoformFamily(e1); // two theoreticals with the same GoTerms... expecting one GoTerm number but two theoretical proteins (now only one) ProteoformFamily h = new ProteoformFamily(e2); f.construct_family(); f.identify_experimentals(); h.construct_family(); h.identify_experimentals(); List <ProteoformFamily> families = new List <ProteoformFamily> { f, h }; t.family = f; v.family = f; e1.family = f; u.family = h; e2.family = h; List <ExperimentalProteoform> fake_significant = new List <ExperimentalProteoform> { e1 }; List <ProteinWithGoTerms> significant_proteins = Sweet.lollipop.getInducedOrRepressedProteins(fake_significant, Sweet.lollipop.TusherAnalysis1.GoAnalysis); List <GoTermNumber> gtn = Sweet.lollipop.TusherAnalysis1.GoAnalysis.getGoTermNumbers(significant_proteins, new List <ProteinWithGoTerms> { p1, p2, p3 }); Assert.AreEqual(1, significant_proteins.Count); Assert.AreEqual(1, gtn.Count); Assert.AreEqual("1", gtn.First().Id); Assert.AreEqual(0 - (decimal)Math.Log(2d / 3d, 2), gtn.First().log_odds_ratio); List <ProteoformFamily> fams = Sweet.lollipop.getInterestingFamilies(gtn, families); Assert.AreEqual(1, fams.Count); Assert.AreEqual(1, fams[0].theoretical_proteoforms.Count); }
public void test_goterm_analysis_with_custom_list() { Sweet.lollipop = new Lollipop(); Sweet.lollipop.theoretical_database.aaIsotopeMassList = new AminoAcidMasses(Sweet.lollipop.carbamidomethylation, Sweet.lollipop.neucode_labeled).AA_Masses; Sweet.lollipop.significance_by_permutation = true; Sweet.lollipop.significance_by_log2FC = false; DatabaseReference d1 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:1") }); DatabaseReference d2 = new DatabaseReference("GO", "GO:2", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:2") }); DatabaseReference d3 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:1") }); GoTerm g1 = new GoTerm(d1); GoTerm g2 = new GoTerm(d2); GoTerm g3 = new GoTerm(d3); ProteinWithGoTerms p1 = new ProteinWithGoTerms("ASDF", "T1", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(0, 0, "") }, "T2", "T3", true, false, new List <DatabaseReference> { d1 }, new List <GoTerm> { g1 }); ProteinWithGoTerms p2 = new ProteinWithGoTerms("ASDF", "T2", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(0, 0, "") }, "T2", "T3", true, false, new List <DatabaseReference> { d2 }, new List <GoTerm> { g2 }); ProteinWithGoTerms p3 = new ProteinWithGoTerms("ASDF", "T3", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(0, 0, "") }, "T2", "T3", true, false, new List <DatabaseReference> { d3 }, new List <GoTerm> { g3 }); Dictionary <InputFile, Protein[]> dict = new Dictionary <InputFile, Protein[]> { { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p1 } }, { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p2 } }, { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p3 } }, }; ExperimentalProteoform e1 = ConstructorsForTesting.ExperimentalProteoform("E"); ExperimentalProteoform e2 = ConstructorsForTesting.ExperimentalProteoform("E"); e1.quant.intensitySum = 1; e1.quant.TusherValues1.significant = true; e1.quant.tusherlogFoldChange = 1; e2.quant.intensitySum = 1; e2.quant.TusherValues1.significant = true; e2.quant.tusherlogFoldChange = 1; TheoreticalProteoform t = ConstructorsForTesting.make_a_theoretical("T1_T1_asdf", p1, dict); TheoreticalProteoform u = ConstructorsForTesting.make_a_theoretical("T2_T1_asdf_asdf", p2, dict); TheoreticalProteoform v = ConstructorsForTesting.make_a_theoretical("T3_T1_asdf_Asdf_Asdf", p3, dict); t.ExpandedProteinList = new List <ProteinWithGoTerms> { p1 }; u.ExpandedProteinList = new List <ProteinWithGoTerms> { p2 }; v.ExpandedProteinList = new List <ProteinWithGoTerms> { p3 }; make_relation(e1, t); make_relation(e1, v); make_relation(e2, u); ProteoformFamily f = new ProteoformFamily(e1); // two theoreticals with the same GoTerms... expecting one GoTerm number but two theoretical proteins ProteoformFamily h = new ProteoformFamily(e2); f.construct_family(); f.identify_experimentals(); h.construct_family(); h.identify_experimentals(); List <ProteoformFamily> families = new List <ProteoformFamily> { f, h }; t.family = f; v.family = f; e1.family = f; u.family = h; e2.family = h; Sweet.lollipop.TusherAnalysis1.inducedOrRepressedProteins = Sweet.lollipop.getInducedOrRepressedProteins(new List <ExperimentalProteoform> { e1 }, Sweet.lollipop.TusherAnalysis1.GoAnalysis); Sweet.lollipop.TusherAnalysis1.GoAnalysis.allTheoreticalProteins = true; Sweet.lollipop.theoretical_database.expanded_proteins = new ProteinWithGoTerms[] { p1, p2, p3 }; Sweet.lollipop.TusherAnalysis1.GoAnalysis.backgroundProteinsList = Path.Combine(TestContext.CurrentContext.TestDirectory, "test_protein_list.txt"); Sweet.lollipop.TusherAnalysis1.GoAnalysis.GO_analysis(Sweet.lollipop.TusherAnalysis1.inducedOrRepressedProteins); Assert.AreEqual(1, Sweet.lollipop.TusherAnalysis1.inducedOrRepressedProteins.Count); // only taking one ET connection by definition in forming ET relations; only one is used in identify theoreticals Assert.AreEqual(1, Sweet.lollipop.TusherAnalysis1.GoAnalysis.goTermNumbers.Count); Assert.AreEqual("1", Sweet.lollipop.TusherAnalysis1.GoAnalysis.goTermNumbers.First().Id); Assert.AreEqual(0 - (decimal)Math.Log(2d / 3d, 2), Sweet.lollipop.TusherAnalysis1.GoAnalysis.goTermNumbers.First().log_odds_ratio); List <ProteoformFamily> fams = Sweet.lollipop.getInterestingFamilies(Sweet.lollipop.TusherAnalysis1.GoAnalysis.goTermNumbers, families); Assert.AreEqual(1, fams.Count); Assert.AreEqual(2, fams[0].theoretical_proteoforms.Count); }
public static ProteoformCommunity construct_two_families_with_potentially_colliding_theoreticals() { //Five experimental proteoforms, four relations (linear), second on not accepted into a peak, one peak; should give 2 families ProteoformCommunity community = new ProteoformCommunity(); SaveState.lollipop = new Lollipop(); SaveState.lollipop.target_proteoform_community = community; SaveState.lollipop.theoretical_database.uniprotModifications = new Dictionary <string, List <Modification> > { { "unmodified", new List <Modification> { ConstructorsForTesting.get_modWithMass("unmodified", 0) } }, { "fake", new List <Modification> { ConstructorsForTesting.get_modWithMass("fake", 19) } }, }; SaveState.lollipop.modification_ranks = new Dictionary <double, int> { { 0, 1 }, { 19, 2 } }; SaveState.lollipop.mod_rank_sum_threshold = 2; SaveState.lollipop.theoretical_database.all_possible_ptmsets = PtmCombos.generate_all_ptmsets(1, SaveState.lollipop.theoretical_database.uniprotModifications.SelectMany(kv => kv.Value).OfType <ModificationWithMass>().ToList(), SaveState.lollipop.modification_ranks, 1); SaveState.lollipop.theoretical_database.all_mods_with_mass = SaveState.lollipop.theoretical_database.uniprotModifications.SelectMany(kv => kv.Value).OfType <ModificationWithMass>().ToList(); SaveState.lollipop.theoretical_database.possible_ptmset_dictionary = SaveState.lollipop.theoretical_database.make_ptmset_dictionary(); SaveState.lollipop.ee_max_mass_difference = 20; SaveState.lollipop.peak_width_base_ee = 0.015; SaveState.lollipop.min_peak_count_ee = 3; //needs to be high so that 0 peak accepted, other peak isn't.... SaveState.lollipop.min_peak_count_et = 2; //needs to be lower so the 2 ET relations are accepted //TheoreticalProteoformGroup InputFile f = new InputFile("fake.txt", Purpose.ProteinDatabase); ProteinWithGoTerms p1 = new ProteinWithGoTerms("", p1_accession, new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, p1_name, p1_fullName, true, false, new List <DatabaseReference> { p1_dbRef }, new List <GoTerm> { p1_goterm }); Dictionary <InputFile, Protein[]> dict = new Dictionary <InputFile, Protein[]> { { f, new Protein[] { p1 } } }; TheoreticalProteoform t = ConstructorsForTesting.make_a_theoretical("T1_asdf", "T1_asdf", 1234.56, p1, dict); TheoreticalProteoformGroup pf1 = new TheoreticalProteoformGroup(new List <TheoreticalProteoform> { t }); TheoreticalProteoform pf2 = ConstructorsForTesting.make_a_theoretical("T1_asdf_pf2", "T1_asdf_1", 1234.56, dict); //ExperimentalProteoforms ExperimentalProteoform pf3 = ConstructorsForTesting.ExperimentalProteoform("E1", 0, 0, true); ExperimentalProteoform pf4 = ConstructorsForTesting.ExperimentalProteoform("E2", 0, 0, true); ExperimentalProteoform pf5 = ConstructorsForTesting.ExperimentalProteoform("E3", 0, 0, true); ExperimentalProteoform pf6 = ConstructorsForTesting.ExperimentalProteoform("E4", 0, 0, true); ExperimentalProteoform pf7 = ConstructorsForTesting.ExperimentalProteoform("E5", 0, 0, true); ExperimentalProteoform pf8 = ConstructorsForTesting.ExperimentalProteoform("E6", 0, 0, true); TheoreticalProteoform pf9 = ConstructorsForTesting.make_a_theoretical("T1_asdf_pf9", "T1_asdf_1", 1253.56, dict); community.theoretical_proteoforms = new TheoreticalProteoform[] { pf1, pf2, pf9 }; community.experimental_proteoforms = new ExperimentalProteoform[] { pf3, pf4, pf5, pf6, pf7, pf8 }; pf3.agg_mass = 1234.56; pf4.agg_mass = 1234.56; pf5.agg_mass = 1234.56; pf6.agg_mass = 1253.56; pf7.agg_mass = 1253.56; pf8.agg_mass = 1253.56; ProteoformComparison comparison13 = ProteoformComparison.ExperimentalTheoretical; ProteoformComparison comparison23 = ProteoformComparison.ExperimentalTheoretical; ProteoformComparison comparison34 = ProteoformComparison.ExperimentalExperimental; ProteoformComparison comparison45 = ProteoformComparison.ExperimentalExperimental; ProteoformComparison comparison56 = ProteoformComparison.ExperimentalExperimental; ProteoformComparison comparison67 = ProteoformComparison.ExperimentalExperimental; ProteoformComparison comparison78 = ProteoformComparison.ExperimentalExperimental; ProteoformComparison comparison89 = ProteoformComparison.ExperimentalTheoretical; ConstructorsForTesting.make_relation(pf3, pf1, comparison13, 0); ConstructorsForTesting.make_relation(pf3, pf2, comparison23, 0); ConstructorsForTesting.make_relation(pf3, pf4, comparison34, 0); ConstructorsForTesting.make_relation(pf4, pf5, comparison45, 0); ConstructorsForTesting.make_relation(pf5, pf6, comparison56, 19); //not accepted ConstructorsForTesting.make_relation(pf6, pf7, comparison67, 0); ConstructorsForTesting.make_relation(pf7, pf8, comparison78, 0); ConstructorsForTesting.make_relation(pf8, pf9, comparison89, 0); List <ProteoformRelation> prs = new HashSet <ProteoformRelation>(community.experimental_proteoforms.SelectMany(p => p.relationships).Concat(community.theoretical_proteoforms.SelectMany(p => p.relationships))).ToList(); foreach (Proteoform p in prs.SelectMany(r => r.connected_proteoforms)) { Assert.IsNotNull(p); } List <ProteoformRelation> prs_et = prs.Where(r => r.RelationType == ProteoformComparison.ExperimentalTheoretical).OrderBy(r => r.DeltaMass).ToList(); SaveState.lollipop.et_relations = prs_et; List <ProteoformRelation> prs_ee = prs.Where(r => r.RelationType == ProteoformComparison.ExperimentalExperimental).OrderBy(r => r.DeltaMass).ToList(); SaveState.lollipop.ee_relations = prs_ee; foreach (ProteoformRelation pr in prs_et) { pr.set_nearby_group(prs_et, prs_et.Select(r => r.InstanceId).ToList()); } foreach (ProteoformRelation pr in prs_ee) { pr.set_nearby_group(prs_ee, prs_ee.Select(r => r.InstanceId).ToList()); } Assert.AreEqual(3, pf1.relationships.First().nearby_relations_count); // 2 ET relations at 0 delta mass Assert.AreEqual(3, pf2.relationships.First().nearby_relations_count); Assert.AreEqual(4, pf4.relationships.First().nearby_relations_count); // 4 EE relations at 0 delta mass Assert.AreEqual(4, pf5.relationships.First().nearby_relations_count); Assert.AreEqual(1, pf6.relationships.First().nearby_relations_count); // 1 EE relation at 19 delta mass Assert.AreEqual(4, pf7.relationships.First().nearby_relations_count); Assert.AreEqual(4, pf8.relationships.First().nearby_relations_count); community.accept_deltaMass_peaks(prs_et, new List <ProteoformRelation>()); community.accept_deltaMass_peaks(prs_ee, new List <ProteoformRelation>()); Assert.AreEqual(3, SaveState.lollipop.et_peaks.Count + SaveState.lollipop.ee_peaks.Count); Assert.AreEqual(1, SaveState.lollipop.et_peaks.Where(peak => peak.Accepted).Count()); // 1 ET peak Assert.AreEqual(1, SaveState.lollipop.ee_peaks.Where(peak => peak.Accepted).Count()); // 1 EE peak accepted Assert.AreEqual(4, SaveState.lollipop.ee_peaks.Where(peak => peak.Accepted && peak.RelationType == ProteoformComparison.ExperimentalExperimental).First().grouped_relations.Count()); Assert.AreEqual(3, SaveState.lollipop.et_peaks.Where(peak => peak.Accepted && peak.RelationType == ProteoformComparison.ExperimentalTheoretical).First().grouped_relations.Count()); community.construct_families(); //Testing the identification of experimentals //test with a modificationwithmass that's 0 mass, and then see that it crawls around and labels them each with growing ptm sets with that modification //test that the relation.represented_modification gets set Assert.True(SaveState.lollipop.et_relations.All(r => r.peak.DeltaMass != 19 || r.represented_ptmset == null)); Assert.True(SaveState.lollipop.et_relations.All(r => r.peak.DeltaMass != 0 || r.represented_ptmset.ptm_combination.First().modification.id == "unmodified")); Assert.True(pf1 == pf3.linked_proteoform_references.First() || pf2 == pf3.linked_proteoform_references.First()); //test I don't get re-reassignments Assert.AreEqual(pf3, pf4.linked_proteoform_references.Last()); //test that the proteoform.theoretical_reference gets set to each successive PF base Assert.AreEqual((pf3.linked_proteoform_references.First() as TheoreticalProteoform).accession, (pf4.linked_proteoform_references.First() as TheoreticalProteoform).accession); Assert.AreEqual((pf3.linked_proteoform_references.First() as TheoreticalProteoform).fragment, (pf4.linked_proteoform_references.First() as TheoreticalProteoform).fragment); Assert.AreEqual(pf4, pf5.linked_proteoform_references.Last()); Assert.AreEqual((pf3.linked_proteoform_references.First() as TheoreticalProteoform).accession, (pf5.linked_proteoform_references.First() as TheoreticalProteoform).accession); //test that the accession gets carried all the way through the depth of connections Assert.AreEqual((pf3.linked_proteoform_references.First() as TheoreticalProteoform).fragment, (pf5.linked_proteoform_references.First() as TheoreticalProteoform).fragment); Assert.AreEqual(pf9, pf8.linked_proteoform_references.Last()); return(community); }
public void get_interesting_goterm_families() { SaveState.lollipop = new Lollipop(); DatabaseReference d1 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:1") }); DatabaseReference d2 = new DatabaseReference("GO", "GO:2", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:2") }); DatabaseReference d3 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:1") }); GoTerm g1 = new GoTerm(d1); GoTerm g2 = new GoTerm(d2); GoTerm g3 = new GoTerm(d3); ProteinWithGoTerms p1 = new ProteinWithGoTerms("", "T1", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> { d1 }, new List <GoTerm> { g1 }); ProteinWithGoTerms p2 = new ProteinWithGoTerms("", "T2", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> { d2 }, new List <GoTerm> { g2 }); ProteinWithGoTerms p3 = new ProteinWithGoTerms("", "T3", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> { d3 }, new List <GoTerm> { g3 }); Dictionary <InputFile, Protein[]> dict = new Dictionary <InputFile, Protein[]> { { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p1 } }, { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p2 } }, { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p3 } }, }; ExperimentalProteoform e1 = ConstructorsForTesting.ExperimentalProteoform("E"); ExperimentalProteoform e2 = ConstructorsForTesting.ExperimentalProteoform("E"); e1.quant.intensitySum = 1; e1.quant.FDR = 0; e1.quant.logFoldChange = 1; e2.quant.intensitySum = 1; e2.quant.FDR = 0; e2.quant.logFoldChange = 1; TheoreticalProteoform t = ConstructorsForTesting.make_a_theoretical("T1_T1_asdf", p1, dict); TheoreticalProteoform u = ConstructorsForTesting.make_a_theoretical("T2_T1_asdf_asdf", p2, dict); TheoreticalProteoform v = ConstructorsForTesting.make_a_theoretical("T3_T1_asdf_Asdf_Asdf", p3, dict); t.ExpandedProteinList = new List <ProteinWithGoTerms> { p1 }; u.ExpandedProteinList = new List <ProteinWithGoTerms> { p2 }; v.ExpandedProteinList = new List <ProteinWithGoTerms> { p3 }; make_relation(e1, t); //make_relation(e1, v); // we don't allow this to happen anymore... we only allow one ET conntection per E make_relation(e2, u); ProteoformFamily f = new ProteoformFamily(e1); // two theoreticals with the same GoTerms... expecting one GoTerm number but two theoretical proteins (now only one) ProteoformFamily h = new ProteoformFamily(e2); f.construct_family(); f.identify_experimentals(); h.construct_family(); h.identify_experimentals(); List <ProteoformFamily> families = new List <ProteoformFamily> { f, h }; t.family = f; v.family = f; e1.family = f; u.family = h; e2.family = h; List <ExperimentalProteoform> fake_significant = new List <ExperimentalProteoform> { e1 }; List <ProteinWithGoTerms> significant_proteins = SaveState.lollipop.getInducedOrRepressedProteins(fake_significant, 0, 1, 0); List <GoTermNumber> gtn = SaveState.lollipop.getGoTermNumbers(significant_proteins, new List <ProteinWithGoTerms> { p1, p2, p3 }); Assert.AreEqual(1, significant_proteins.Count); Assert.AreEqual(1, gtn.Count); Assert.AreEqual("1", gtn.First().Id); Assert.AreEqual(0 - (decimal)Math.Log(2d / 3d, 2), gtn.First().log_odds_ratio); List <ProteoformFamily> fams = SaveState.lollipop.getInterestingFamilies(gtn, families); Assert.AreEqual(1, fams.Count); Assert.AreEqual(1, fams[0].theoretical_proteoforms.Count); }