public static TheoreticalProteoform make_a_theoretical(string a, string d, double mass, ProteinWithGoTerms p, Dictionary <InputFile, Protein[]> dict)
        {
            ModificationMotif motif;

            ModificationMotif.TryGetMotif("X", out motif);
            string             mod_title = "oxidation";
            Modification       m         = new Modification(mod_title, _modificationType: "modtype", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 1);
            ProteinWithGoTerms p1        = new ProteinWithGoTerms("MSSSSSSSSSSS", "T1", new List <Tuple <string, string> > {
                new Tuple <string, string>("ordered locus", "GENE")
            }, new Dictionary <int, List <Modification> > {
                { 1, new List <Modification> {
                      m
                  } }
            }, new List <ProteolysisProduct> {
                new ProteolysisProduct(1, 12, "")
            }, "T2", "T3", true, false, new List <DatabaseReference> {
                new DatabaseReference("GO", ":", new List <Tuple <string, string> > {
                    new Tuple <string, string>("term", "P:")
                })
            }, new List <GoTerm> {
                new GoTerm(new DatabaseReference("GO", ":", new List <Tuple <string, string> > {
                    new Tuple <string, string>("term", "P:")
                }))
            });
            PtmSet set = new PtmSet(new List <Ptm> {
                new Ptm(0, m)
            });

            return(new TheoreticalProteoform(a, d, p1.BaseSequence, new List <ProteinWithGoTerms> {
                p
            }, mass, 0, set, true, false, dict));
        }
        public static TheoreticalProteoform make_a_theoretical(string a, double mass, int lysine_count)
        {
            ModificationMotif motif;

            ModificationMotif.TryGetMotif("X", out motif);
            Modification       unmodification = new Modification("Unmodified", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 0);
            ProteinWithGoTerms p1             = new ProteinWithGoTerms("MSSSSSSSSSSS", "T1", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> > {
                { 0, new List <Modification> {
                      unmodification
                  } }
            }, new List <ProteolysisProduct> {
                new ProteolysisProduct(1, 12, "")
            }, "T2", "T3", true, false, new List <DatabaseReference> {
                new DatabaseReference("GO", ":", new List <Tuple <string, string> > {
                    new Tuple <string, string>("term", "P:")
                })
            }, new List <GoTerm> {
                new GoTerm(new DatabaseReference("GO", ":", new List <Tuple <string, string> > {
                    new Tuple <string, string>("term", "P:")
                }))
            });
            PtmSet set = new PtmSet(new List <Ptm> {
                new Ptm()
            });
            var t = new TheoreticalProteoform(a, "", p1.BaseSequence, new List <ProteinWithGoTerms> {
                p1
            }, mass, lysine_count, set, true, false, new Dictionary <InputFile, Protein[]>());

            t.modified_mass = mass;
            return(t);
        }
        public static TheoreticalProteoform make_a_theoretical(string a, string d, double mass, ProteinWithGoTerms p, Dictionary <InputFile, Protein[]> dict)
        {
            ModificationMotif motif;

            ModificationMotif.TryGetMotif("K", out motif);
            string mod_title        = "oxidation";
            ModificationWithMass m  = new ModificationWithMass(mod_title, new Tuple <string, string>("", mod_title), motif, ModificationSites.K, 1, new Dictionary <string, IList <string> >(), new List <double>(), new List <double>(), "");
            ProteinWithGoTerms   p1 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T1", new List <Tuple <string, string> > {
                new Tuple <string, string>("ordered locus", "GENE")
            }, new Dictionary <int, List <Modification> > {
                { 1, new List <Modification> {
                      m
                  } }
            }, new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> {
                new DatabaseReference("GO", ":", new List <Tuple <string, string> > {
                    new Tuple <string, string>("term", "P:")
                })
            }, new List <GoTerm> {
                new GoTerm(new DatabaseReference("GO", ":", new List <Tuple <string, string> > {
                    new Tuple <string, string>("term", "P:")
                }))
            });
            PtmSet set = new PtmSet(new List <Ptm> {
                new Ptm(0, m)
            });

            return(new TheoreticalProteoform(a, d, new List <ProteinWithGoTerms> {
                p
            }, mass, 0, set, true, false, dict));
        }
Exemple #4
0
        public void test_protein_grouping_by_sequence()
        {
            DatabaseReference d1 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:")
            });
            DatabaseReference d2 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:")
            });
            DatabaseReference d3 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:")
            });
            GoTerm             g1 = new GoTerm(d1);
            GoTerm             g2 = new GoTerm(d1);
            GoTerm             g3 = new GoTerm(d1);
            ProteinWithGoTerms p1 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T1", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> {
                d1
            }, new List <GoTerm> {
                g1
            });
            ProteinWithGoTerms p2 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T2", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> {
                d2
            }, new List <GoTerm> {
                g2
            });
            ProteinWithGoTerms p3 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T3", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> {
                d3
            }, new List <GoTerm> {
                g3
            });
            ProteinSequenceGroup psg = new ProteinSequenceGroup(new List <ProteinWithGoTerms> {
                p1, p2, p3
            }.OrderByDescending(p => p.IsContaminant ? 1 : 0));

            Assert.AreEqual(3, psg.GoTerms.Count());
            Assert.AreEqual(3, psg.GeneNames.Count());
            Assert.AreEqual("T1_3G", psg.Accession);
            Assert.False(psg.IsContaminant);

            p3 = new ProteinWithGoTerms("MCSSSSSSSSSS", "T3", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> {
                d3
            }, new List <GoTerm> {
                g3
            });
            ProteinSequenceGroup[] psgs = SaveState.lollipop.theoretical_database.group_proteins_by_sequence(new List <ProteinWithGoTerms> {
                p1, p2, p3
            });
            Assert.AreEqual(2, psgs.Length);
        }
        public void test_construct_one_proteform_family_from_ET_with_theoretical_pf_group()
        {
            ProteoformCommunity test_community = new ProteoformCommunity();

            SaveState.lollipop.theoretical_database.uniprotModifications = new Dictionary <string, List <Modification> > {
                { "unmodified", new List <Modification> {
                      new Modification("unmodified", "unknown")
                  } }
            };

            InputFile          f  = new InputFile("fake.txt", Purpose.ProteinDatabase);
            ProteinWithGoTerms p1 = new ProteinWithGoTerms("", "T1", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "name", "full_name", true, false, new List <DatabaseReference>(), new List <GoTerm>());
            Dictionary <InputFile, Protein[]> dict = new Dictionary <InputFile, Protein[]> {
                { f, new Protein[] { p1 } }
            };
            TheoreticalProteoform t = ConstructorsForTesting.make_a_theoretical("T1_T1_asdf", p1, dict);


            //One accepted ET relation; should give one ProteoformFamily
            SaveState.lollipop.min_peak_count_et = 1;
            ExperimentalProteoform     pf1 = ConstructorsForTesting.ExperimentalProteoform("E1");
            TheoreticalProteoformGroup pf2 = new TheoreticalProteoformGroup(new List <TheoreticalProteoform> {
                t
            });
            ProteoformComparison comparison = ProteoformComparison.ExperimentalTheoretical;
            ProteoformRelation   pr1        = new ProteoformRelation(pf1, pf2, comparison, 0, TestContext.CurrentContext.TestDirectory);

            pr1.Accepted = true;
            List <ProteoformRelation> prs = new List <ProteoformRelation> {
                pr1
            };

            foreach (ProteoformRelation pr in prs)
            {
                pr.set_nearby_group(prs, prs.Select(r => r.InstanceId).ToList());
            }
            DeltaMassPeak peak = new DeltaMassPeak(prs[0], prs);

            SaveState.lollipop.et_peaks = new List <DeltaMassPeak> {
                peak
            };
            test_community.experimental_proteoforms = new ExperimentalProteoform[] { pf1 };
            test_community.theoretical_proteoforms  = new TheoreticalProteoform[] { pf2 };
            test_community.construct_families();
            Assert.AreEqual(1, test_community.families.Count);
            Assert.AreEqual(2, test_community.families[0].proteoforms.Count);
            Assert.AreEqual(1, test_community.families.First().experimental_proteoforms.Count);
            Assert.AreEqual(1, test_community.families.First().theoretical_proteoforms.Count);
            Assert.AreEqual("E1", test_community.families.First().experimentals_list);
            Assert.AreEqual(p1.Name, test_community.families.First().name_list);
            Assert.AreEqual(pf2.accession, test_community.families.First().accession_list);
        }
        //MAKE THEORETICAL
        public static TheoreticalProteoform make_a_theoretical(string a, ProteinWithGoTerms p, Dictionary <InputFile, Protein[]> dict)
        {
            ModificationMotif motif;

            ModificationMotif.TryGetMotif("K", out motif);
            PtmSet set = new PtmSet(p.OneBasedPossibleLocalizedModifications.SelectMany(m => m.Value.OfType <ModificationWithMass>().SelectMany(mmm => new List <Ptm> {
                new Ptm(0, mmm)
            })).ToList());

            return(new TheoreticalProteoform(a, "", new List <ProteinWithGoTerms> {
                p
            }, 100, 0, set, true, true, dict));
        }
Exemple #7
0
        public void parallel_enter_theoreticals_doesnt_crash()
        {
            TheoreticalProteoformDatabase db = new TheoreticalProteoformDatabase();

            db.populate_aa_mass_dictionary();
            List <Modification>          var = new List <Modification>();
            List <TheoreticalProteoform> ts  = new List <TheoreticalProteoform>();
            ProteinWithGoTerms           p   = ConstructorsForTesting.make_a_theoretical().ExpandedProteinList.First();

            Parallel.Invoke(
                () => db.EnterTheoreticalProteformFamily("SEQ", p, p.OneBasedPossibleLocalizedModifications, p.Accession, ts, 1, var),
                () => db.EnterTheoreticalProteformFamily("SEQ", p, p.OneBasedPossibleLocalizedModifications, p.Accession, ts, 1, var)
                );
        }
Exemple #8
0
        public void test_protein_grouping_by_sequence_contaminant()
        {
            DatabaseReference d1 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:")
            });
            DatabaseReference d2 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:")
            });
            DatabaseReference d3 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:")
            });
            GoTerm             g1 = new GoTerm(d1);
            GoTerm             g2 = new GoTerm(d1);
            GoTerm             g3 = new GoTerm(d1);
            ProteinWithGoTerms p1 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T1", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> {
                new ProteolysisProduct(0, 0, "")
            }, "T2", "T3", true, false, new List <DatabaseReference> {
                d1
            }, new List <GoTerm> {
                g1
            });
            ProteinWithGoTerms p2 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T2", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> {
                new ProteolysisProduct(0, 0, "")
            }, "T2", "T3", true, false, new List <DatabaseReference> {
                d2
            }, new List <GoTerm> {
                g2
            });
            ProteinWithGoTerms p3 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T3", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> {
                new ProteolysisProduct(0, 0, "")
            }, "T2", "T3", true, true, new List <DatabaseReference> {
                d3
            }, new List <GoTerm> {
                g3
            });
            ProteinSequenceGroup psg = new ProteinSequenceGroup(new List <ProteinWithGoTerms> {
                p1, p2, p3
            }.OrderByDescending(p => p.IsContaminant ? 1 : 0));

            Assert.AreEqual(3, psg.GoTerms.Count());
            Assert.AreEqual(3, psg.GeneNames.Count());
            Assert.AreEqual("T3_3G", psg.Accession);
            Assert.True(psg.IsContaminant);
        }
        //MAKE THEORETICAL
        public static TheoreticalProteoform make_a_theoretical(string a, ProteinWithGoTerms p, Dictionary <InputFile, Protein[]> dict)
        {
            ModificationMotif motif;

            ModificationMotif.TryGetMotif("X", out motif);
            PtmSet set = new PtmSet(p.OneBasedPossibleLocalizedModifications.SelectMany(m => m.Value.SelectMany(mmm => new List <Ptm> {
                new Ptm(0, mmm)
            })).ToList());
            TheoreticalProteoform t = new TheoreticalProteoform(a, "", p.BaseSequence, new List <ProteinWithGoTerms> {
                p
            }, 100, 0, set, true, true, dict);

            t.begin = 1;
            t.end   = 4;
            return(t);
        }
Exemple #10
0
        public void test_contaminant_check()
        {
            InputFile f = new InputFile("fake.txt", Purpose.ProteinDatabase);

            f.ContaminantDB = true;
            InputFile          g  = new InputFile("fake.txt", Purpose.ProteinDatabase);
            InputFile          h  = new InputFile("fake.txt", Purpose.ProteinDatabase);
            ProteinWithGoTerms p1 = new ProteinWithGoTerms("", "T1", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> {
                new ProteolysisProduct(0, 0, "")
            }, "T2", "T3", true, false, new List <DatabaseReference>(), new List <GoTerm>());
            ProteinWithGoTerms p2 = new ProteinWithGoTerms("", "T2", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> {
                new ProteolysisProduct(0, 0, "")
            }, "T2", "T3", true, false, new List <DatabaseReference>(), new List <GoTerm>());
            ProteinWithGoTerms p3 = new ProteinWithGoTerms("", "T3", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> {
                new ProteolysisProduct(0, 0, "")
            }, "T2", "T3", true, false, new List <DatabaseReference>(), new List <GoTerm>());
            Dictionary <InputFile, Protein[]> dict = new Dictionary <InputFile, Protein[]> {
                { f, new Protein[] { p1 } },
                { g, new Protein[] { p2 } },
                { h, new Protein[] { p3 } },
            };
            TheoreticalProteoform t = ConstructorsForTesting.make_a_theoretical("T1_T1_asdf", p1, dict);
            TheoreticalProteoform u = ConstructorsForTesting.make_a_theoretical("T2_T1_asdf_asdf", p2, dict);
            TheoreticalProteoform v = ConstructorsForTesting.make_a_theoretical("T3_T1_asdf_Asdf_Asdf", p3, dict);
            TheoreticalProteoform w = new TheoreticalProteoformGroup(new List <TheoreticalProteoform> {
                v, u, t
            }.OrderByDescending(theo => theo.contaminant ? 1 : 0));

            Assert.True(w.contaminant);
            Assert.True(w.accession.Contains(p1.Accession));

            //Not contaminant
            TheoreticalProteoform x = new TheoreticalProteoformGroup(new List <TheoreticalProteoform> {
                v, u
            });

            Assert.False(x.contaminant);

            //PTM mass test
            Assert.AreEqual(0, t.ptm_mass);
        }
        public static TheoreticalProteoform make_a_theoretical(string a, double mass, int lysine_count)
        {
            ModificationWithMass unmodification = new ModificationWithMass("Unmodified", new Tuple <string, string>("N/A", "Unmodified"), null, ModificationSites.Any, 0, null, null, null, null);
            ProteinWithGoTerms   p1             = new ProteinWithGoTerms("MSSSSSSSSSSS", "T1", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> > {
                { 0, new List <Modification> {
                      unmodification
                  } }
            }, new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> {
                new DatabaseReference("GO", ":", new List <Tuple <string, string> > {
                    new Tuple <string, string>("term", "P:")
                })
            }, new List <GoTerm> {
                new GoTerm(new DatabaseReference("GO", ":", new List <Tuple <string, string> > {
                    new Tuple <string, string>("term", "P:")
                }))
            });
            PtmSet set = new PtmSet(new List <Ptm>());

            return(new TheoreticalProteoform(a, "", new List <ProteinWithGoTerms> {
                p1
            }, mass, lysine_count, set, true, false, new Dictionary <InputFile, Protein[]>()));
        }
Exemple #12
0
        public void get_interesting_goterm_families()
        {
            Sweet.lollipop = new Lollipop();
            Sweet.lollipop.theoretical_database.aaIsotopeMassList = new AminoAcidMasses(Sweet.lollipop.carbamidomethylation, Sweet.lollipop.neucode_labeled).AA_Masses;
            Sweet.lollipop.significance_by_permutation            = true;
            Sweet.lollipop.significance_by_log2FC = false;
            DatabaseReference d1 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:1")
            });
            DatabaseReference d2 = new DatabaseReference("GO", "GO:2", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:2")
            });
            DatabaseReference d3 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:1")
            });
            GoTerm             g1 = new GoTerm(d1);
            GoTerm             g2 = new GoTerm(d2);
            GoTerm             g3 = new GoTerm(d3);
            ProteinWithGoTerms p1 = new ProteinWithGoTerms("ASDF", "T1", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> {
                new ProteolysisProduct(0, 0, "")
            }, "T2", "T3", true, false, new List <DatabaseReference> {
                d1
            }, new List <GoTerm> {
                g1
            });
            ProteinWithGoTerms p2 = new ProteinWithGoTerms("ASDF", "T2", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> {
                new ProteolysisProduct(0, 0, "")
            }, "T2", "T3", true, false, new List <DatabaseReference> {
                d2
            }, new List <GoTerm> {
                g2
            });
            ProteinWithGoTerms p3 = new ProteinWithGoTerms("ASDF", "T3", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> {
                new ProteolysisProduct(0, 0, "")
            }, "T2", "T3", true, false, new List <DatabaseReference> {
                d3
            }, new List <GoTerm> {
                g3
            });
            Dictionary <InputFile, Protein[]> dict = new Dictionary <InputFile, Protein[]>
            {
                { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p1 } },
                { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p2 } },
                { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p3 } },
            };
            ExperimentalProteoform e1 = ConstructorsForTesting.ExperimentalProteoform("E");
            ExperimentalProteoform e2 = ConstructorsForTesting.ExperimentalProteoform("E");

            e1.quant.intensitySum = 1;
            e1.quant.TusherValues1.significant = true;
            e1.quant.tusherlogFoldChange       = 1;
            e2.quant.intensitySum = 1;
            e2.quant.TusherValues1.significant = true;
            e2.quant.tusherlogFoldChange       = 1;
            TheoreticalProteoform t = ConstructorsForTesting.make_a_theoretical("T1_T1_asdf", p1, dict);
            TheoreticalProteoform u = ConstructorsForTesting.make_a_theoretical("T2_T1_asdf_asdf", p2, dict);
            TheoreticalProteoform v = ConstructorsForTesting.make_a_theoretical("T3_T1_asdf_Asdf_Asdf", p3, dict);

            t.ExpandedProteinList = new List <ProteinWithGoTerms> {
                p1
            };
            u.ExpandedProteinList = new List <ProteinWithGoTerms> {
                p2
            };
            v.ExpandedProteinList = new List <ProteinWithGoTerms> {
                p3
            };
            t.begin = 1;
            t.end   = 1;
            u.begin = 1;
            u.end   = 1;
            v.begin = 1;
            v.end   = 1;
            make_relation(e1, t);
            //make_relation(e1, v); // we don't allow this to happen anymore... we only allow one ET conntection per E
            make_relation(e2, u);
            ProteoformFamily f = new ProteoformFamily(e1); // two theoreticals with the same GoTerms... expecting one GoTerm number but two theoretical proteins (now only one)
            ProteoformFamily h = new ProteoformFamily(e2);

            f.construct_family();
            f.identify_experimentals();
            h.construct_family();
            h.identify_experimentals();
            List <ProteoformFamily> families = new List <ProteoformFamily> {
                f, h
            };

            t.family  = f;
            v.family  = f;
            e1.family = f;
            u.family  = h;
            e2.family = h;
            List <ExperimentalProteoform> fake_significant = new List <ExperimentalProteoform> {
                e1
            };
            List <ProteinWithGoTerms> significant_proteins = Sweet.lollipop.getInducedOrRepressedProteins(fake_significant, Sweet.lollipop.TusherAnalysis1.GoAnalysis);
            List <GoTermNumber>       gtn = Sweet.lollipop.TusherAnalysis1.GoAnalysis.getGoTermNumbers(significant_proteins, new List <ProteinWithGoTerms> {
                p1, p2, p3
            });

            Assert.AreEqual(1, significant_proteins.Count);
            Assert.AreEqual(1, gtn.Count);
            Assert.AreEqual("1", gtn.First().Id);
            Assert.AreEqual(0 - (decimal)Math.Log(2d / 3d, 2), gtn.First().log_odds_ratio);

            List <ProteoformFamily> fams = Sweet.lollipop.getInterestingFamilies(gtn, families);

            Assert.AreEqual(1, fams.Count);
            Assert.AreEqual(1, fams[0].theoretical_proteoforms.Count);
        }
Exemple #13
0
        public void test_goterm_analysis_with_custom_list()
        {
            Sweet.lollipop = new Lollipop();
            Sweet.lollipop.theoretical_database.aaIsotopeMassList = new AminoAcidMasses(Sweet.lollipop.carbamidomethylation, Sweet.lollipop.neucode_labeled).AA_Masses;
            Sweet.lollipop.significance_by_permutation            = true;
            Sweet.lollipop.significance_by_log2FC = false;
            DatabaseReference d1 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:1")
            });
            DatabaseReference d2 = new DatabaseReference("GO", "GO:2", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:2")
            });
            DatabaseReference d3 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:1")
            });
            GoTerm             g1 = new GoTerm(d1);
            GoTerm             g2 = new GoTerm(d2);
            GoTerm             g3 = new GoTerm(d3);
            ProteinWithGoTerms p1 = new ProteinWithGoTerms("ASDF", "T1", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> {
                new ProteolysisProduct(0, 0, "")
            }, "T2", "T3", true, false, new List <DatabaseReference> {
                d1
            }, new List <GoTerm> {
                g1
            });
            ProteinWithGoTerms p2 = new ProteinWithGoTerms("ASDF", "T2", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> {
                new ProteolysisProduct(0, 0, "")
            }, "T2", "T3", true, false, new List <DatabaseReference> {
                d2
            }, new List <GoTerm> {
                g2
            });
            ProteinWithGoTerms p3 = new ProteinWithGoTerms("ASDF", "T3", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> {
                new ProteolysisProduct(0, 0, "")
            }, "T2", "T3", true, false, new List <DatabaseReference> {
                d3
            }, new List <GoTerm> {
                g3
            });
            Dictionary <InputFile, Protein[]> dict = new Dictionary <InputFile, Protein[]> {
                { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p1 } },
                { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p2 } },
                { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p3 } },
            };
            ExperimentalProteoform e1 = ConstructorsForTesting.ExperimentalProteoform("E");
            ExperimentalProteoform e2 = ConstructorsForTesting.ExperimentalProteoform("E");

            e1.quant.intensitySum = 1;
            e1.quant.TusherValues1.significant = true;
            e1.quant.tusherlogFoldChange       = 1;
            e2.quant.intensitySum = 1;
            e2.quant.TusherValues1.significant = true;
            e2.quant.tusherlogFoldChange       = 1;
            TheoreticalProteoform t = ConstructorsForTesting.make_a_theoretical("T1_T1_asdf", p1, dict);
            TheoreticalProteoform u = ConstructorsForTesting.make_a_theoretical("T2_T1_asdf_asdf", p2, dict);
            TheoreticalProteoform v = ConstructorsForTesting.make_a_theoretical("T3_T1_asdf_Asdf_Asdf", p3, dict);

            t.ExpandedProteinList = new List <ProteinWithGoTerms> {
                p1
            };
            u.ExpandedProteinList = new List <ProteinWithGoTerms> {
                p2
            };
            v.ExpandedProteinList = new List <ProteinWithGoTerms> {
                p3
            };
            make_relation(e1, t);
            make_relation(e1, v);
            make_relation(e2, u);
            ProteoformFamily f = new ProteoformFamily(e1); // two theoreticals with the same GoTerms... expecting one GoTerm number but two theoretical proteins
            ProteoformFamily h = new ProteoformFamily(e2);

            f.construct_family();
            f.identify_experimentals();
            h.construct_family();
            h.identify_experimentals();
            List <ProteoformFamily> families = new List <ProteoformFamily> {
                f, h
            };

            t.family  = f;
            v.family  = f;
            e1.family = f;
            u.family  = h;
            e2.family = h;
            Sweet.lollipop.TusherAnalysis1.inducedOrRepressedProteins = Sweet.lollipop.getInducedOrRepressedProteins(new List <ExperimentalProteoform> {
                e1
            }, Sweet.lollipop.TusherAnalysis1.GoAnalysis);
            Sweet.lollipop.TusherAnalysis1.GoAnalysis.allTheoreticalProteins = true;
            Sweet.lollipop.theoretical_database.expanded_proteins            = new ProteinWithGoTerms[] { p1, p2, p3 };
            Sweet.lollipop.TusherAnalysis1.GoAnalysis.backgroundProteinsList = Path.Combine(TestContext.CurrentContext.TestDirectory, "test_protein_list.txt");
            Sweet.lollipop.TusherAnalysis1.GoAnalysis.GO_analysis(Sweet.lollipop.TusherAnalysis1.inducedOrRepressedProteins);
            Assert.AreEqual(1, Sweet.lollipop.TusherAnalysis1.inducedOrRepressedProteins.Count);  // only taking one ET connection by definition in forming ET relations; only one is used in identify theoreticals
            Assert.AreEqual(1, Sweet.lollipop.TusherAnalysis1.GoAnalysis.goTermNumbers.Count);
            Assert.AreEqual("1", Sweet.lollipop.TusherAnalysis1.GoAnalysis.goTermNumbers.First().Id);
            Assert.AreEqual(0 - (decimal)Math.Log(2d / 3d, 2), Sweet.lollipop.TusherAnalysis1.GoAnalysis.goTermNumbers.First().log_odds_ratio);

            List <ProteoformFamily> fams = Sweet.lollipop.getInterestingFamilies(Sweet.lollipop.TusherAnalysis1.GoAnalysis.goTermNumbers, families);

            Assert.AreEqual(1, fams.Count);
            Assert.AreEqual(2, fams[0].theoretical_proteoforms.Count);
        }
        public static ProteoformCommunity construct_two_families_with_potentially_colliding_theoreticals()
        {
            //Five experimental proteoforms, four relations (linear), second on not accepted into a peak, one peak; should give 2 families
            ProteoformCommunity community = new ProteoformCommunity();

            SaveState.lollipop = new Lollipop();
            SaveState.lollipop.target_proteoform_community = community;
            SaveState.lollipop.theoretical_database.uniprotModifications = new Dictionary <string, List <Modification> >
            {
                { "unmodified", new List <Modification> {
                      ConstructorsForTesting.get_modWithMass("unmodified", 0)
                  } },
                { "fake", new List <Modification> {
                      ConstructorsForTesting.get_modWithMass("fake", 19)
                  } },
            };

            SaveState.lollipop.modification_ranks = new Dictionary <double, int> {
                { 0, 1 }, { 19, 2 }
            };
            SaveState.lollipop.mod_rank_sum_threshold = 2;
            SaveState.lollipop.theoretical_database.all_possible_ptmsets       = PtmCombos.generate_all_ptmsets(1, SaveState.lollipop.theoretical_database.uniprotModifications.SelectMany(kv => kv.Value).OfType <ModificationWithMass>().ToList(), SaveState.lollipop.modification_ranks, 1);
            SaveState.lollipop.theoretical_database.all_mods_with_mass         = SaveState.lollipop.theoretical_database.uniprotModifications.SelectMany(kv => kv.Value).OfType <ModificationWithMass>().ToList();
            SaveState.lollipop.theoretical_database.possible_ptmset_dictionary = SaveState.lollipop.theoretical_database.make_ptmset_dictionary();

            SaveState.lollipop.ee_max_mass_difference = 20;
            SaveState.lollipop.peak_width_base_ee     = 0.015;
            SaveState.lollipop.min_peak_count_ee      = 3; //needs to be high so that 0 peak accepted, other peak isn't....
            SaveState.lollipop.min_peak_count_et      = 2; //needs to be lower so the 2 ET relations are accepted

            //TheoreticalProteoformGroup
            InputFile          f  = new InputFile("fake.txt", Purpose.ProteinDatabase);
            ProteinWithGoTerms p1 = new ProteinWithGoTerms("", p1_accession, new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, p1_name, p1_fullName, true, false, new List <DatabaseReference> {
                p1_dbRef
            }, new List <GoTerm> {
                p1_goterm
            });
            Dictionary <InputFile, Protein[]> dict = new Dictionary <InputFile, Protein[]> {
                { f, new Protein[] { p1 } }
            };
            TheoreticalProteoform      t   = ConstructorsForTesting.make_a_theoretical("T1_asdf", "T1_asdf", 1234.56, p1, dict);
            TheoreticalProteoformGroup pf1 = new TheoreticalProteoformGroup(new List <TheoreticalProteoform> {
                t
            });
            TheoreticalProteoform pf2 = ConstructorsForTesting.make_a_theoretical("T1_asdf_pf2", "T1_asdf_1", 1234.56, dict);


            //ExperimentalProteoforms
            ExperimentalProteoform pf3 = ConstructorsForTesting.ExperimentalProteoform("E1", 0, 0, true);
            ExperimentalProteoform pf4 = ConstructorsForTesting.ExperimentalProteoform("E2", 0, 0, true);
            ExperimentalProteoform pf5 = ConstructorsForTesting.ExperimentalProteoform("E3", 0, 0, true);
            ExperimentalProteoform pf6 = ConstructorsForTesting.ExperimentalProteoform("E4", 0, 0, true);
            ExperimentalProteoform pf7 = ConstructorsForTesting.ExperimentalProteoform("E5", 0, 0, true);
            ExperimentalProteoform pf8 = ConstructorsForTesting.ExperimentalProteoform("E6", 0, 0, true);
            TheoreticalProteoform  pf9 = ConstructorsForTesting.make_a_theoretical("T1_asdf_pf9", "T1_asdf_1", 1253.56, dict);

            community.theoretical_proteoforms  = new TheoreticalProteoform[] { pf1, pf2, pf9 };
            community.experimental_proteoforms = new ExperimentalProteoform[] { pf3, pf4, pf5, pf6, pf7, pf8 };
            pf3.agg_mass = 1234.56;
            pf4.agg_mass = 1234.56;
            pf5.agg_mass = 1234.56;
            pf6.agg_mass = 1253.56;
            pf7.agg_mass = 1253.56;
            pf8.agg_mass = 1253.56;

            ProteoformComparison comparison13 = ProteoformComparison.ExperimentalTheoretical;
            ProteoformComparison comparison23 = ProteoformComparison.ExperimentalTheoretical;
            ProteoformComparison comparison34 = ProteoformComparison.ExperimentalExperimental;
            ProteoformComparison comparison45 = ProteoformComparison.ExperimentalExperimental;
            ProteoformComparison comparison56 = ProteoformComparison.ExperimentalExperimental;
            ProteoformComparison comparison67 = ProteoformComparison.ExperimentalExperimental;
            ProteoformComparison comparison78 = ProteoformComparison.ExperimentalExperimental;
            ProteoformComparison comparison89 = ProteoformComparison.ExperimentalTheoretical;

            ConstructorsForTesting.make_relation(pf3, pf1, comparison13, 0);
            ConstructorsForTesting.make_relation(pf3, pf2, comparison23, 0);
            ConstructorsForTesting.make_relation(pf3, pf4, comparison34, 0);
            ConstructorsForTesting.make_relation(pf4, pf5, comparison45, 0);
            ConstructorsForTesting.make_relation(pf5, pf6, comparison56, 19); //not accepted
            ConstructorsForTesting.make_relation(pf6, pf7, comparison67, 0);
            ConstructorsForTesting.make_relation(pf7, pf8, comparison78, 0);
            ConstructorsForTesting.make_relation(pf8, pf9, comparison89, 0);

            List <ProteoformRelation> prs = new HashSet <ProteoformRelation>(community.experimental_proteoforms.SelectMany(p => p.relationships).Concat(community.theoretical_proteoforms.SelectMany(p => p.relationships))).ToList();

            foreach (Proteoform p in prs.SelectMany(r => r.connected_proteoforms))
            {
                Assert.IsNotNull(p);
            }
            List <ProteoformRelation> prs_et = prs.Where(r => r.RelationType == ProteoformComparison.ExperimentalTheoretical).OrderBy(r => r.DeltaMass).ToList();

            SaveState.lollipop.et_relations = prs_et;
            List <ProteoformRelation> prs_ee = prs.Where(r => r.RelationType == ProteoformComparison.ExperimentalExperimental).OrderBy(r => r.DeltaMass).ToList();

            SaveState.lollipop.ee_relations = prs_ee;
            foreach (ProteoformRelation pr in prs_et)
            {
                pr.set_nearby_group(prs_et, prs_et.Select(r => r.InstanceId).ToList());
            }
            foreach (ProteoformRelation pr in prs_ee)
            {
                pr.set_nearby_group(prs_ee, prs_ee.Select(r => r.InstanceId).ToList());
            }
            Assert.AreEqual(3, pf1.relationships.First().nearby_relations_count); // 2 ET relations at 0 delta mass
            Assert.AreEqual(3, pf2.relationships.First().nearby_relations_count);
            Assert.AreEqual(4, pf4.relationships.First().nearby_relations_count); // 4 EE relations at 0 delta mass
            Assert.AreEqual(4, pf5.relationships.First().nearby_relations_count);
            Assert.AreEqual(1, pf6.relationships.First().nearby_relations_count); // 1 EE relation at 19 delta mass
            Assert.AreEqual(4, pf7.relationships.First().nearby_relations_count);
            Assert.AreEqual(4, pf8.relationships.First().nearby_relations_count);

            community.accept_deltaMass_peaks(prs_et, new List <ProteoformRelation>());
            community.accept_deltaMass_peaks(prs_ee, new List <ProteoformRelation>());
            Assert.AreEqual(3, SaveState.lollipop.et_peaks.Count + SaveState.lollipop.ee_peaks.Count);
            Assert.AreEqual(1, SaveState.lollipop.et_peaks.Where(peak => peak.Accepted).Count()); // 1 ET peak
            Assert.AreEqual(1, SaveState.lollipop.ee_peaks.Where(peak => peak.Accepted).Count()); // 1 EE peak accepted
            Assert.AreEqual(4, SaveState.lollipop.ee_peaks.Where(peak => peak.Accepted && peak.RelationType == ProteoformComparison.ExperimentalExperimental).First().grouped_relations.Count());
            Assert.AreEqual(3, SaveState.lollipop.et_peaks.Where(peak => peak.Accepted && peak.RelationType == ProteoformComparison.ExperimentalTheoretical).First().grouped_relations.Count());

            community.construct_families();

            //Testing the identification of experimentals
            //test with a modificationwithmass that's 0 mass, and then see that it crawls around and labels them each with growing ptm sets with that modification
            //test that the relation.represented_modification gets set
            Assert.True(SaveState.lollipop.et_relations.All(r => r.peak.DeltaMass != 19 || r.represented_ptmset == null));
            Assert.True(SaveState.lollipop.et_relations.All(r => r.peak.DeltaMass != 0 || r.represented_ptmset.ptm_combination.First().modification.id == "unmodified"));
            Assert.True(pf1 == pf3.linked_proteoform_references.First() || pf2 == pf3.linked_proteoform_references.First());

            //test I don't get re-reassignments
            Assert.AreEqual(pf3, pf4.linked_proteoform_references.Last()); //test that the proteoform.theoretical_reference gets set to each successive PF base
            Assert.AreEqual((pf3.linked_proteoform_references.First() as TheoreticalProteoform).accession, (pf4.linked_proteoform_references.First() as TheoreticalProteoform).accession);
            Assert.AreEqual((pf3.linked_proteoform_references.First() as TheoreticalProteoform).fragment, (pf4.linked_proteoform_references.First() as TheoreticalProteoform).fragment);
            Assert.AreEqual(pf4, pf5.linked_proteoform_references.Last());
            Assert.AreEqual((pf3.linked_proteoform_references.First() as TheoreticalProteoform).accession, (pf5.linked_proteoform_references.First() as TheoreticalProteoform).accession); //test that the accession gets carried all the way through the depth of connections
            Assert.AreEqual((pf3.linked_proteoform_references.First() as TheoreticalProteoform).fragment, (pf5.linked_proteoform_references.First() as TheoreticalProteoform).fragment);
            Assert.AreEqual(pf9, pf8.linked_proteoform_references.Last());

            return(community);
        }
        public void get_interesting_goterm_families()
        {
            SaveState.lollipop = new Lollipop();
            DatabaseReference d1 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:1")
            });
            DatabaseReference d2 = new DatabaseReference("GO", "GO:2", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:2")
            });
            DatabaseReference d3 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:1")
            });
            GoTerm             g1 = new GoTerm(d1);
            GoTerm             g2 = new GoTerm(d2);
            GoTerm             g3 = new GoTerm(d3);
            ProteinWithGoTerms p1 = new ProteinWithGoTerms("", "T1", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> {
                d1
            }, new List <GoTerm> {
                g1
            });
            ProteinWithGoTerms p2 = new ProteinWithGoTerms("", "T2", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> {
                d2
            }, new List <GoTerm> {
                g2
            });
            ProteinWithGoTerms p3 = new ProteinWithGoTerms("", "T3", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> {
                d3
            }, new List <GoTerm> {
                g3
            });
            Dictionary <InputFile, Protein[]> dict = new Dictionary <InputFile, Protein[]>
            {
                { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p1 } },
                { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p2 } },
                { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p3 } },
            };
            ExperimentalProteoform e1 = ConstructorsForTesting.ExperimentalProteoform("E");
            ExperimentalProteoform e2 = ConstructorsForTesting.ExperimentalProteoform("E");

            e1.quant.intensitySum  = 1;
            e1.quant.FDR           = 0;
            e1.quant.logFoldChange = 1;
            e2.quant.intensitySum  = 1;
            e2.quant.FDR           = 0;
            e2.quant.logFoldChange = 1;
            TheoreticalProteoform t = ConstructorsForTesting.make_a_theoretical("T1_T1_asdf", p1, dict);
            TheoreticalProteoform u = ConstructorsForTesting.make_a_theoretical("T2_T1_asdf_asdf", p2, dict);
            TheoreticalProteoform v = ConstructorsForTesting.make_a_theoretical("T3_T1_asdf_Asdf_Asdf", p3, dict);

            t.ExpandedProteinList = new List <ProteinWithGoTerms> {
                p1
            };
            u.ExpandedProteinList = new List <ProteinWithGoTerms> {
                p2
            };
            v.ExpandedProteinList = new List <ProteinWithGoTerms> {
                p3
            };
            make_relation(e1, t);
            //make_relation(e1, v); // we don't allow this to happen anymore... we only allow one ET conntection per E
            make_relation(e2, u);
            ProteoformFamily f = new ProteoformFamily(e1); // two theoreticals with the same GoTerms... expecting one GoTerm number but two theoretical proteins (now only one)
            ProteoformFamily h = new ProteoformFamily(e2);

            f.construct_family();
            f.identify_experimentals();
            h.construct_family();
            h.identify_experimentals();
            List <ProteoformFamily> families = new List <ProteoformFamily> {
                f, h
            };

            t.family  = f;
            v.family  = f;
            e1.family = f;
            u.family  = h;
            e2.family = h;
            List <ExperimentalProteoform> fake_significant = new List <ExperimentalProteoform> {
                e1
            };
            List <ProteinWithGoTerms> significant_proteins = SaveState.lollipop.getInducedOrRepressedProteins(fake_significant, 0, 1, 0);
            List <GoTermNumber>       gtn = SaveState.lollipop.getGoTermNumbers(significant_proteins, new List <ProteinWithGoTerms> {
                p1, p2, p3
            });

            Assert.AreEqual(1, significant_proteins.Count);
            Assert.AreEqual(1, gtn.Count);
            Assert.AreEqual("1", gtn.First().Id);
            Assert.AreEqual(0 - (decimal)Math.Log(2d / 3d, 2), gtn.First().log_odds_ratio);

            List <ProteoformFamily> fams = SaveState.lollipop.getInterestingFamilies(gtn, families);

            Assert.AreEqual(1, fams.Count);
            Assert.AreEqual(1, fams[0].theoretical_proteoforms.Count);
        }