コード例 #1
0
ファイル: AddGoTermsForm.cs プロジェクト: Gorbachevaa/diploma
        private void metroAddGoTermButton_Click(object sender, EventArgs e)
        {
            GoTerm currentGoTerm = new GoTerm();

            using (var unitOfWork = new UnitOWork(new GoDBContext()))
            {
                if (this.Controls.OfType <MetroTextBox>().All(textBox => textBox.Text != String.Empty))
                {
                    currentGoTerm.GO_ID         = Int32.Parse(metroGoIdTextBox.Text);
                    currentGoTerm.name          = metroNameTextBox.Text;
                    currentGoTerm.Go_definition = metroDefinitionTextBox.Text;
                    currentGoTerm.ontology      = metroOntologyTextBox.Text;
                    Confirm confirm = new Confirm();
                    confirm.ShowDialog();
                    if (confirm.Result)
                    {
                        unitOfWork.GoTerms.Add(currentGoTerm);
                        unitOfWork.Complete();
                    }
                    this.Close();
                }
                else
                {
                    MessageBox.Show(@"Invalid input values");
                }
            }
        }
コード例 #2
0
        public void test_protein_grouping_by_sequence()
        {
            DatabaseReference d1 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:")
            });
            DatabaseReference d2 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:")
            });
            DatabaseReference d3 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:")
            });
            GoTerm             g1 = new GoTerm(d1);
            GoTerm             g2 = new GoTerm(d1);
            GoTerm             g3 = new GoTerm(d1);
            ProteinWithGoTerms p1 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T1", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> {
                d1
            }, new List <GoTerm> {
                g1
            });
            ProteinWithGoTerms p2 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T2", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> {
                d2
            }, new List <GoTerm> {
                g2
            });
            ProteinWithGoTerms p3 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T3", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> {
                d3
            }, new List <GoTerm> {
                g3
            });
            ProteinSequenceGroup psg = new ProteinSequenceGroup(new List <ProteinWithGoTerms> {
                p1, p2, p3
            }.OrderByDescending(p => p.IsContaminant ? 1 : 0));

            Assert.AreEqual(3, psg.GoTerms.Count());
            Assert.AreEqual(3, psg.GeneNames.Count());
            Assert.AreEqual("T1_3G", psg.Accession);
            Assert.False(psg.IsContaminant);

            p3 = new ProteinWithGoTerms("MCSSSSSSSSSS", "T3", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> {
                d3
            }, new List <GoTerm> {
                g3
            });
            ProteinSequenceGroup[] psgs = SaveState.lollipop.theoretical_database.group_proteins_by_sequence(new List <ProteinWithGoTerms> {
                p1, p2, p3
            });
            Assert.AreEqual(2, psgs.Length);
        }
コード例 #3
0
        public void testGoTermCreation()
        {
            GoTerm g = new GoTerm();

            g.id          = "id";
            g.description = "description";
            g.aspect      = aspect.biologicalProcess;

            Assert.AreEqual("id", g.id);
            Assert.AreEqual("description", g.description);
            Assert.AreEqual(aspect.biologicalProcess, g.aspect);
        }
コード例 #4
0
        public void test_protein_grouping_by_sequence_contaminant()
        {
            DatabaseReference d1 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:")
            });
            DatabaseReference d2 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:")
            });
            DatabaseReference d3 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:")
            });
            GoTerm             g1 = new GoTerm(d1);
            GoTerm             g2 = new GoTerm(d1);
            GoTerm             g3 = new GoTerm(d1);
            ProteinWithGoTerms p1 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T1", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> {
                new ProteolysisProduct(0, 0, "")
            }, "T2", "T3", true, false, new List <DatabaseReference> {
                d1
            }, new List <GoTerm> {
                g1
            });
            ProteinWithGoTerms p2 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T2", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> {
                new ProteolysisProduct(0, 0, "")
            }, "T2", "T3", true, false, new List <DatabaseReference> {
                d2
            }, new List <GoTerm> {
                g2
            });
            ProteinWithGoTerms p3 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T3", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> {
                new ProteolysisProduct(0, 0, "")
            }, "T2", "T3", true, true, new List <DatabaseReference> {
                d3
            }, new List <GoTerm> {
                g3
            });
            ProteinSequenceGroup psg = new ProteinSequenceGroup(new List <ProteinWithGoTerms> {
                p1, p2, p3
            }.OrderByDescending(p => p.IsContaminant ? 1 : 0));

            Assert.AreEqual(3, psg.GoTerms.Count());
            Assert.AreEqual(3, psg.GeneNames.Count());
            Assert.AreEqual("T3_3G", psg.Accession);
            Assert.True(psg.IsContaminant);
        }
コード例 #5
0
        public void testLogOddsRatio()
        {
            int q = 1; // number of enriched proteins with the term
            int k = 2; // number of enriched proteins
            int m = 2; // number of proteins in the background with the term
            int t = 4; // number of proteins in the background

            DatabaseReference d = new DatabaseReference("GO", ":1", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:one")
            });
            GoTerm       g   = new GoTerm(d);
            GoTermNumber gtn = new GoTermNumber(g, q, k, m, t);

            Assert.AreEqual(0, gtn.log_odds_ratio);
            //Assert.AreEqual(0.833333m, Math.Round((decimal)gtn.p_value.Truncate(7), 6));
            Assert.AreEqual(0.833333m, Math.Round(gtn.p_value, 6));

            q = 1; // number of enriched proteins with the term
            k = 2; // number of enriched proteins
            m = 4; // number of proteins in the background with the term
            t = 4; // number of proteins in the background

            gtn = new GoTermNumber(g, q, k, m, t);
            Assert.AreEqual(-1, gtn.log_odds_ratio);
            //Assert.AreEqual(1m, Math.Round((decimal)gtn.p_value.Truncate(7), 6));
            Assert.AreEqual(1m, Math.Round(gtn.p_value, 6));

            q = 2; // number of enriched proteins with the term
            k = 2; // number of enriched proteins
            m = 2; // number of proteins in the background with the term
            t = 4; // number of proteins in the background

            gtn = new GoTermNumber(g, q, k, m, t);
            Assert.AreEqual(1, gtn.log_odds_ratio);
            //Assert.AreEqual(0.166667m, Math.Round((decimal)gtn.p_value.Truncate(7), 6));
            Assert.AreEqual(0.166667m, Math.Round(gtn.p_value, 6));
        }
コード例 #6
0
        public void testGoTermNumberClass()
        {
            List <GoTerm> oneProteinGoTerms = new List <GoTerm>();
            GoTerm        g = new GoTerm();

            g.id          = "id";
            g.description = "description";
            g.aspect      = aspect.biologicalProcess;
            oneProteinGoTerms.Add(g);

            Dictionary <GoTerm, int> goMasterSet = new Dictionary <GoTerm, int>();

            goMasterSet.Add(g, 1);

            List <Protein> proteinsInSample = new List <Protein>();

            for (int i = 0; i < 4; i++)
            {
                Protein p = new Protein("accession_" + i.ToString(), oneProteinGoTerms);
                proteinsInSample.Add(p);
            }

            Assert.That(() => new GoTermNumber(g, proteinsInSample, goMasterSet), Throws.TypeOf <ArgumentOutOfRangeException>()
                        .With.Message
                        .EqualTo("GO Term Range is illegal"));

            //GoTermNumber gTN = new GoTermNumber(g, proteinsInSample, goMasterSet);
            //Assert.AreEqual("id", gTN.id);
            //Assert.AreEqual("description", gTN.description);
            //Assert.AreEqual(aspect.biologicalProcess, gTN.aspect);
            //Assert.AreEqual(1, gTN.k);
            //Assert.AreEqual(4, gTN.f);
            //Assert.AreEqual(1d, gTN.pValue);
            //Assert.AreEqual(1d, gTN.logfold);
            //Assert.AreEqual("", gTN.proteinInCategoryFromSample);
        }
コード例 #7
0
        public static List <Protein> LoadProteinDb <T>(string proteinDbLocation, bool onTheFlyDecoys, IEnumerable <T> allKnownModifications, bool IsContaminant, out Dictionary <string, Modification> unknownModifications)
            where T : Modification
        {
            var mod_dict = new Dictionary <string, IList <Modification> >();

            foreach (var nice in allKnownModifications)
            {
                IList <Modification> val;
                if (mod_dict.TryGetValue(nice.id, out val))
                {
                    val.Add(nice);
                }
                else
                {
                    mod_dict.Add(nice.id, new List <Modification> {
                        nice
                    });
                }
            }

            List <Protein> result = new List <Protein>();

            unknownModifications = new Dictionary <string, Modification>();
            using (var stream = new FileStream(proteinDbLocation, FileMode.Open))
            {
                string accession = null;
                string name      = null;
                string full_name = null;

                var oneBasedBeginPositions = new List <int?>();
                var oneBasedEndPositions   = new List <int?>();
                var peptideTypes           = new List <string>();
                var oneBasedModifications  = new Dictionary <int, List <Modification> >();

                // xml db
                if (!proteinDbLocation.EndsWith(".fasta"))
                {
                    Stream uniprotXmlFileStream = stream;
                    if (proteinDbLocation.EndsWith(".gz"))
                    {
                        uniprotXmlFileStream = new GZipStream(stream, CompressionMode.Decompress);
                    }

                    string[] nodes = new string[6];

                    string        sequence                 = null;
                    string        feature_type             = null;
                    string        feature_description      = null;
                    string        dbReference_type         = null;
                    string        dbReference_id           = null;
                    string        property_type            = null;
                    string        property_value           = null;
                    int           oneBasedfeature_position = -1;
                    int?          oneBasedbeginPosition    = null;
                    int?          oneBasedendPosition      = null;
                    List <GoTerm> goTerms = new List <GoTerm>();

                    using (XmlReader xml = XmlReader.Create(uniprotXmlFileStream))
                    {
                        while (xml.Read())
                        {
                            switch (xml.NodeType)
                            {
                            case XmlNodeType.Element:
                                nodes[xml.Depth] = xml.Name;
                                int outValue;
                                switch (xml.Name)
                                {
                                case "accession":
                                    if (accession == null)
                                    {
                                        accession = xml.ReadElementString();
                                    }
                                    break;

                                case "name":
                                    if (xml.Depth == 2)
                                    {
                                        name = xml.ReadElementString();
                                    }
                                    break;

                                case "fullName":
                                    if (full_name == null)
                                    {
                                        full_name = xml.ReadElementString();
                                    }
                                    break;

                                case "feature":
                                    feature_type        = xml.GetAttribute("type");
                                    feature_description = xml.GetAttribute("description");
                                    break;

                                case "dbReference":
                                    dbReference_type = xml.GetAttribute("type");
                                    dbReference_id   = xml.GetAttribute("id");
                                    break;

                                case "property":
                                    property_type  = xml.GetAttribute("type");
                                    property_value = xml.GetAttribute("value");
                                    if (dbReference_type == "GO" && property_type == "term")
                                    {
                                        GoTerm go = new GoTerm();
                                        go.id = dbReference_id.Split(':')[1].ToString();
                                        switch (property_value.Split(':')[0].ToString())
                                        {
                                        case "C":
                                            go.aspect      = Aspect.cellularComponent;
                                            go.description = property_value.Split(':')[1].ToString();
                                            break;

                                        case "F":
                                            go.aspect      = Aspect.molecularFunction;
                                            go.description = property_value.Split(':')[1].ToString();
                                            break;

                                        case "P":
                                            go.aspect      = Aspect.biologicalProcess;
                                            go.description = property_value.Split(':')[1].ToString();
                                            break;
                                        }
                                        goTerms.Add(go);
                                    }
                                    break;

                                case "position":
                                    oneBasedfeature_position = int.Parse(xml.GetAttribute("position"));
                                    break;

                                case "begin":
                                    oneBasedbeginPosition = int.TryParse(xml.GetAttribute("position"), out outValue) ? (int?)outValue : null;
                                    break;

                                case "end":
                                    oneBasedendPosition = int.TryParse(xml.GetAttribute("position"), out outValue) ? (int?)outValue : null;
                                    break;

                                case "sequence":
                                    sequence = xml.ReadElementString().Replace("\n", null).Replace(" ", null);
                                    break;
                                }
                                break;

                            case XmlNodeType.EndElement:
                                switch (xml.Name)
                                {
                                case "feature":
                                    if (feature_type == "modified residue")
                                    {
                                        feature_description = feature_description.Split(';')[0];
                                        List <Modification> residue_modifications;
                                        // Create new entry for this residue, if needed
                                        if (!oneBasedModifications.TryGetValue(oneBasedfeature_position, out residue_modifications))
                                        {
                                            residue_modifications = new List <Modification>();
                                            oneBasedModifications.Add(oneBasedfeature_position, residue_modifications);
                                        }
                                        if (mod_dict.ContainsKey(feature_description))
                                        {
                                            // Known
                                            residue_modifications.AddRange(mod_dict[feature_description]);
                                        }
                                        else if (unknownModifications.ContainsKey(feature_description))
                                        {
                                            // Not known but seen
                                            residue_modifications.Add(unknownModifications[feature_description]);
                                        }
                                        else
                                        {
                                            // Not known and not seen
                                            unknownModifications[feature_description] = new Modification(feature_description);
                                            residue_modifications.Add(unknownModifications[feature_description]);
                                        }
                                    }
                                    else if (feature_type == "peptide" || feature_type == "propeptide" || feature_type == "chain" || feature_type == "signal peptide")
                                    {
                                        oneBasedBeginPositions.Add(oneBasedbeginPosition);
                                        oneBasedEndPositions.Add(oneBasedendPosition);
                                        peptideTypes.Add(feature_type);
                                    }
                                    oneBasedbeginPosition    = null;
                                    oneBasedendPosition      = null;
                                    oneBasedfeature_position = -1;
                                    break;

                                case "dbReference":
                                    dbReference_type = null;
                                    dbReference_id   = null;
                                    break;

                                case "entry":
                                    if (accession != null && sequence != null)
                                    {
                                        var protein = new Protein(sequence, accession, oneBasedModifications, oneBasedBeginPositions.ToArray(), oneBasedEndPositions.ToArray(), peptideTypes.ToArray(), name, full_name, false, IsContaminant, goTerms);

                                        result.Add(protein);

                                        if (onTheFlyDecoys)
                                        {
                                            char[] sequence_array = sequence.ToCharArray();
                                            Dictionary <int, List <Modification> > decoy_modifications = null;
                                            if (sequence.StartsWith("M", StringComparison.InvariantCulture))
                                            {
                                                // Do not include the initiator methionine in reversal!!!
                                                Array.Reverse(sequence_array, 1, sequence.Length - 1);
                                                if (oneBasedModifications != null)
                                                {
                                                    decoy_modifications = new Dictionary <int, List <Modification> >(oneBasedModifications.Count);
                                                    foreach (var kvp in oneBasedModifications)
                                                    {
                                                        if (kvp.Key == 1)
                                                        {
                                                            decoy_modifications.Add(1, kvp.Value);
                                                        }
                                                        else if (kvp.Key > 1)
                                                        {
                                                            decoy_modifications.Add(sequence.Length - kvp.Key + 2, kvp.Value);
                                                        }
                                                    }
                                                }
                                            }
                                            else
                                            {
                                                Array.Reverse(sequence_array);
                                                if (oneBasedModifications != null)
                                                {
                                                    decoy_modifications = new Dictionary <int, List <Modification> >(oneBasedModifications.Count);
                                                    foreach (var kvp in oneBasedModifications)
                                                    {
                                                        decoy_modifications.Add(sequence.Length - kvp.Key + 1, kvp.Value);
                                                    }
                                                }
                                            }
                                            var      reversed_sequence    = new string(sequence_array);
                                            int?[]   decoybeginPositions  = new int?[oneBasedBeginPositions.Count];
                                            int?[]   decoyendPositions    = new int?[oneBasedEndPositions.Count];
                                            string[] decoyBigPeptideTypes = new string[oneBasedEndPositions.Count];
                                            for (int i = 0; i < decoybeginPositions.Length; i++)
                                            {
                                                decoybeginPositions[oneBasedBeginPositions.Count - i - 1]  = sequence.Length - oneBasedEndPositions[i] + 1;
                                                decoyendPositions[oneBasedBeginPositions.Count - i - 1]    = sequence.Length - oneBasedBeginPositions[i] + 1;
                                                decoyBigPeptideTypes[oneBasedBeginPositions.Count - i - 1] = peptideTypes[i];
                                            }
                                            var decoy_protein = new Protein(reversed_sequence, "DECOY_" + accession, decoy_modifications, decoybeginPositions, decoyendPositions, decoyBigPeptideTypes, name, full_name, true, IsContaminant, null);

                                            result.Add(decoy_protein);
                                        }
                                    }
                                    accession                = null;
                                    name                     = null;
                                    full_name                = null;
                                    sequence                 = null;
                                    feature_type             = null;
                                    feature_description      = null;
                                    dbReference_type         = null;
                                    dbReference_id           = null;
                                    property_type            = null;
                                    property_value           = null;
                                    oneBasedfeature_position = -1;
                                    oneBasedModifications    = new Dictionary <int, List <Modification> >();
                                    oneBasedBeginPositions   = new List <int?>();
                                    oneBasedEndPositions     = new List <int?>();
                                    peptideTypes             = new List <string>();
                                    goTerms                  = new List <GoTerm>();
                                    break;
                                }
                                break;
                            }
                        }
                    }
                }

                // fasta db
                else
                {
                    StreamReader fasta = new StreamReader(stream);

                    StringBuilder sb = null;
                    while (true)
                    {
                        string line = fasta.ReadLine();

                        if (line.StartsWith(">"))
                        {
                            // fasta protein only has accession, fullname, sequence (no mods)
                            string[] delimiters         = { ">", "|", " OS=" };
                            string[] delimiters_ensembl = { ">", " ", "\t" };
                            string[] output             = line.Split(delimiters, StringSplitOptions.None);
                            string[] output_ensembl     = line.Split(delimiters_ensembl, StringSplitOptions.None);
                            if (output.Length > 4)
                            {
                                accession = output[2];
                                name      = accession;
                                full_name = output[3];
                            }
                            else if (output_ensembl.Length > 2)
                            {
                                accession = output_ensembl[1];
                                name      = accession;
                                full_name = String.Join(" ", Enumerable.Range(2, output_ensembl.Length - 2).Select(i => output_ensembl[i]));
                            }
                            else
                            {
                                // can't read protein description
                                full_name = line.Substring(1);
                                accession = line.Substring(1);
                            }

                            // new protein
                            sb = new StringBuilder();
                        }
                        else if (sb != null)
                        {
                            sb.Append(line.Trim());
                        }

                        if ((fasta.Peek() == '>' || fasta.Peek() == -1) && accession != null && sb != null)
                        {
                            var sequence = sb.ToString();
                            var protein  = new Protein(sequence, accession, oneBasedModifications, oneBasedBeginPositions.ToArray(), oneBasedEndPositions.ToArray(), peptideTypes.ToArray(), name, full_name, false, IsContaminant, new List <GoTerm>());

                            result.Add(protein);

                            if (onTheFlyDecoys)
                            {
                                char[] sequence_array  = sequence.ToCharArray();
                                int    starts_with_met = Convert.ToInt32(sequence.StartsWith("M", StringComparison.InvariantCulture));
                                Array.Reverse(sequence_array, starts_with_met, sequence.Length - starts_with_met); // Do not include the initiator methionine in reversal!!!
                                var reversed_sequence = new string(sequence_array);
                                var decoy_protein     = new Protein(reversed_sequence, "DECOY_" + accession, oneBasedModifications, oneBasedBeginPositions.ToArray(), oneBasedEndPositions.ToArray(), peptideTypes.ToArray(), name, full_name, true, IsContaminant, null);

                                result.Add(decoy_protein);
                            }
                        }

                        // no input left
                        if (fasta.Peek() == -1)
                        {
                            break;
                        }
                    }
                }
            }
            return(result);
        }
コード例 #8
0
        public void get_interesting_goterm_families()
        {
            Sweet.lollipop = new Lollipop();
            Sweet.lollipop.theoretical_database.aaIsotopeMassList = new AminoAcidMasses(Sweet.lollipop.carbamidomethylation, Sweet.lollipop.neucode_labeled).AA_Masses;
            Sweet.lollipop.significance_by_permutation            = true;
            Sweet.lollipop.significance_by_log2FC = false;
            DatabaseReference d1 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:1")
            });
            DatabaseReference d2 = new DatabaseReference("GO", "GO:2", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:2")
            });
            DatabaseReference d3 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:1")
            });
            GoTerm             g1 = new GoTerm(d1);
            GoTerm             g2 = new GoTerm(d2);
            GoTerm             g3 = new GoTerm(d3);
            ProteinWithGoTerms p1 = new ProteinWithGoTerms("ASDF", "T1", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> {
                new ProteolysisProduct(0, 0, "")
            }, "T2", "T3", true, false, new List <DatabaseReference> {
                d1
            }, new List <GoTerm> {
                g1
            });
            ProteinWithGoTerms p2 = new ProteinWithGoTerms("ASDF", "T2", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> {
                new ProteolysisProduct(0, 0, "")
            }, "T2", "T3", true, false, new List <DatabaseReference> {
                d2
            }, new List <GoTerm> {
                g2
            });
            ProteinWithGoTerms p3 = new ProteinWithGoTerms("ASDF", "T3", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> {
                new ProteolysisProduct(0, 0, "")
            }, "T2", "T3", true, false, new List <DatabaseReference> {
                d3
            }, new List <GoTerm> {
                g3
            });
            Dictionary <InputFile, Protein[]> dict = new Dictionary <InputFile, Protein[]>
            {
                { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p1 } },
                { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p2 } },
                { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p3 } },
            };
            ExperimentalProteoform e1 = ConstructorsForTesting.ExperimentalProteoform("E");
            ExperimentalProteoform e2 = ConstructorsForTesting.ExperimentalProteoform("E");

            e1.quant.intensitySum = 1;
            e1.quant.TusherValues1.significant = true;
            e1.quant.tusherlogFoldChange       = 1;
            e2.quant.intensitySum = 1;
            e2.quant.TusherValues1.significant = true;
            e2.quant.tusherlogFoldChange       = 1;
            TheoreticalProteoform t = ConstructorsForTesting.make_a_theoretical("T1_T1_asdf", p1, dict);
            TheoreticalProteoform u = ConstructorsForTesting.make_a_theoretical("T2_T1_asdf_asdf", p2, dict);
            TheoreticalProteoform v = ConstructorsForTesting.make_a_theoretical("T3_T1_asdf_Asdf_Asdf", p3, dict);

            t.ExpandedProteinList = new List <ProteinWithGoTerms> {
                p1
            };
            u.ExpandedProteinList = new List <ProteinWithGoTerms> {
                p2
            };
            v.ExpandedProteinList = new List <ProteinWithGoTerms> {
                p3
            };
            t.begin = 1;
            t.end   = 1;
            u.begin = 1;
            u.end   = 1;
            v.begin = 1;
            v.end   = 1;
            make_relation(e1, t);
            //make_relation(e1, v); // we don't allow this to happen anymore... we only allow one ET conntection per E
            make_relation(e2, u);
            ProteoformFamily f = new ProteoformFamily(e1); // two theoreticals with the same GoTerms... expecting one GoTerm number but two theoretical proteins (now only one)
            ProteoformFamily h = new ProteoformFamily(e2);

            f.construct_family();
            f.identify_experimentals();
            h.construct_family();
            h.identify_experimentals();
            List <ProteoformFamily> families = new List <ProteoformFamily> {
                f, h
            };

            t.family  = f;
            v.family  = f;
            e1.family = f;
            u.family  = h;
            e2.family = h;
            List <ExperimentalProteoform> fake_significant = new List <ExperimentalProteoform> {
                e1
            };
            List <ProteinWithGoTerms> significant_proteins = Sweet.lollipop.getInducedOrRepressedProteins(fake_significant, Sweet.lollipop.TusherAnalysis1.GoAnalysis);
            List <GoTermNumber>       gtn = Sweet.lollipop.TusherAnalysis1.GoAnalysis.getGoTermNumbers(significant_proteins, new List <ProteinWithGoTerms> {
                p1, p2, p3
            });

            Assert.AreEqual(1, significant_proteins.Count);
            Assert.AreEqual(1, gtn.Count);
            Assert.AreEqual("1", gtn.First().Id);
            Assert.AreEqual(0 - (decimal)Math.Log(2d / 3d, 2), gtn.First().log_odds_ratio);

            List <ProteoformFamily> fams = Sweet.lollipop.getInterestingFamilies(gtn, families);

            Assert.AreEqual(1, fams.Count);
            Assert.AreEqual(1, fams[0].theoretical_proteoforms.Count);
        }
コード例 #9
0
        public void test_goterm_analysis_with_custom_list()
        {
            Sweet.lollipop = new Lollipop();
            Sweet.lollipop.theoretical_database.aaIsotopeMassList = new AminoAcidMasses(Sweet.lollipop.carbamidomethylation, Sweet.lollipop.neucode_labeled).AA_Masses;
            Sweet.lollipop.significance_by_permutation            = true;
            Sweet.lollipop.significance_by_log2FC = false;
            DatabaseReference d1 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:1")
            });
            DatabaseReference d2 = new DatabaseReference("GO", "GO:2", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:2")
            });
            DatabaseReference d3 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:1")
            });
            GoTerm             g1 = new GoTerm(d1);
            GoTerm             g2 = new GoTerm(d2);
            GoTerm             g3 = new GoTerm(d3);
            ProteinWithGoTerms p1 = new ProteinWithGoTerms("ASDF", "T1", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> {
                new ProteolysisProduct(0, 0, "")
            }, "T2", "T3", true, false, new List <DatabaseReference> {
                d1
            }, new List <GoTerm> {
                g1
            });
            ProteinWithGoTerms p2 = new ProteinWithGoTerms("ASDF", "T2", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> {
                new ProteolysisProduct(0, 0, "")
            }, "T2", "T3", true, false, new List <DatabaseReference> {
                d2
            }, new List <GoTerm> {
                g2
            });
            ProteinWithGoTerms p3 = new ProteinWithGoTerms("ASDF", "T3", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> {
                new ProteolysisProduct(0, 0, "")
            }, "T2", "T3", true, false, new List <DatabaseReference> {
                d3
            }, new List <GoTerm> {
                g3
            });
            Dictionary <InputFile, Protein[]> dict = new Dictionary <InputFile, Protein[]> {
                { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p1 } },
                { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p2 } },
                { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p3 } },
            };
            ExperimentalProteoform e1 = ConstructorsForTesting.ExperimentalProteoform("E");
            ExperimentalProteoform e2 = ConstructorsForTesting.ExperimentalProteoform("E");

            e1.quant.intensitySum = 1;
            e1.quant.TusherValues1.significant = true;
            e1.quant.tusherlogFoldChange       = 1;
            e2.quant.intensitySum = 1;
            e2.quant.TusherValues1.significant = true;
            e2.quant.tusherlogFoldChange       = 1;
            TheoreticalProteoform t = ConstructorsForTesting.make_a_theoretical("T1_T1_asdf", p1, dict);
            TheoreticalProteoform u = ConstructorsForTesting.make_a_theoretical("T2_T1_asdf_asdf", p2, dict);
            TheoreticalProteoform v = ConstructorsForTesting.make_a_theoretical("T3_T1_asdf_Asdf_Asdf", p3, dict);

            t.ExpandedProteinList = new List <ProteinWithGoTerms> {
                p1
            };
            u.ExpandedProteinList = new List <ProteinWithGoTerms> {
                p2
            };
            v.ExpandedProteinList = new List <ProteinWithGoTerms> {
                p3
            };
            make_relation(e1, t);
            make_relation(e1, v);
            make_relation(e2, u);
            ProteoformFamily f = new ProteoformFamily(e1); // two theoreticals with the same GoTerms... expecting one GoTerm number but two theoretical proteins
            ProteoformFamily h = new ProteoformFamily(e2);

            f.construct_family();
            f.identify_experimentals();
            h.construct_family();
            h.identify_experimentals();
            List <ProteoformFamily> families = new List <ProteoformFamily> {
                f, h
            };

            t.family  = f;
            v.family  = f;
            e1.family = f;
            u.family  = h;
            e2.family = h;
            Sweet.lollipop.TusherAnalysis1.inducedOrRepressedProteins = Sweet.lollipop.getInducedOrRepressedProteins(new List <ExperimentalProteoform> {
                e1
            }, Sweet.lollipop.TusherAnalysis1.GoAnalysis);
            Sweet.lollipop.TusherAnalysis1.GoAnalysis.allTheoreticalProteins = true;
            Sweet.lollipop.theoretical_database.expanded_proteins            = new ProteinWithGoTerms[] { p1, p2, p3 };
            Sweet.lollipop.TusherAnalysis1.GoAnalysis.backgroundProteinsList = Path.Combine(TestContext.CurrentContext.TestDirectory, "test_protein_list.txt");
            Sweet.lollipop.TusherAnalysis1.GoAnalysis.GO_analysis(Sweet.lollipop.TusherAnalysis1.inducedOrRepressedProteins);
            Assert.AreEqual(1, Sweet.lollipop.TusherAnalysis1.inducedOrRepressedProteins.Count);  // only taking one ET connection by definition in forming ET relations; only one is used in identify theoreticals
            Assert.AreEqual(1, Sweet.lollipop.TusherAnalysis1.GoAnalysis.goTermNumbers.Count);
            Assert.AreEqual("1", Sweet.lollipop.TusherAnalysis1.GoAnalysis.goTermNumbers.First().Id);
            Assert.AreEqual(0 - (decimal)Math.Log(2d / 3d, 2), Sweet.lollipop.TusherAnalysis1.GoAnalysis.goTermNumbers.First().log_odds_ratio);

            List <ProteoformFamily> fams = Sweet.lollipop.getInterestingFamilies(Sweet.lollipop.TusherAnalysis1.GoAnalysis.goTermNumbers, families);

            Assert.AreEqual(1, fams.Count);
            Assert.AreEqual(2, fams[0].theoretical_proteoforms.Count);
        }
コード例 #10
0
        public void get_interesting_goterm_families()
        {
            SaveState.lollipop = new Lollipop();
            DatabaseReference d1 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:1")
            });
            DatabaseReference d2 = new DatabaseReference("GO", "GO:2", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:2")
            });
            DatabaseReference d3 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > {
                new Tuple <string, string>("term", "P:1")
            });
            GoTerm             g1 = new GoTerm(d1);
            GoTerm             g2 = new GoTerm(d2);
            GoTerm             g3 = new GoTerm(d3);
            ProteinWithGoTerms p1 = new ProteinWithGoTerms("", "T1", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> {
                d1
            }, new List <GoTerm> {
                g1
            });
            ProteinWithGoTerms p2 = new ProteinWithGoTerms("", "T2", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> {
                d2
            }, new List <GoTerm> {
                g2
            });
            ProteinWithGoTerms p3 = new ProteinWithGoTerms("", "T3", new List <Tuple <string, string> > {
                new Tuple <string, string>("", "")
            }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> {
                d3
            }, new List <GoTerm> {
                g3
            });
            Dictionary <InputFile, Protein[]> dict = new Dictionary <InputFile, Protein[]>
            {
                { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p1 } },
                { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p2 } },
                { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p3 } },
            };
            ExperimentalProteoform e1 = ConstructorsForTesting.ExperimentalProteoform("E");
            ExperimentalProteoform e2 = ConstructorsForTesting.ExperimentalProteoform("E");

            e1.quant.intensitySum  = 1;
            e1.quant.FDR           = 0;
            e1.quant.logFoldChange = 1;
            e2.quant.intensitySum  = 1;
            e2.quant.FDR           = 0;
            e2.quant.logFoldChange = 1;
            TheoreticalProteoform t = ConstructorsForTesting.make_a_theoretical("T1_T1_asdf", p1, dict);
            TheoreticalProteoform u = ConstructorsForTesting.make_a_theoretical("T2_T1_asdf_asdf", p2, dict);
            TheoreticalProteoform v = ConstructorsForTesting.make_a_theoretical("T3_T1_asdf_Asdf_Asdf", p3, dict);

            t.ExpandedProteinList = new List <ProteinWithGoTerms> {
                p1
            };
            u.ExpandedProteinList = new List <ProteinWithGoTerms> {
                p2
            };
            v.ExpandedProteinList = new List <ProteinWithGoTerms> {
                p3
            };
            make_relation(e1, t);
            //make_relation(e1, v); // we don't allow this to happen anymore... we only allow one ET conntection per E
            make_relation(e2, u);
            ProteoformFamily f = new ProteoformFamily(e1); // two theoreticals with the same GoTerms... expecting one GoTerm number but two theoretical proteins (now only one)
            ProteoformFamily h = new ProteoformFamily(e2);

            f.construct_family();
            f.identify_experimentals();
            h.construct_family();
            h.identify_experimentals();
            List <ProteoformFamily> families = new List <ProteoformFamily> {
                f, h
            };

            t.family  = f;
            v.family  = f;
            e1.family = f;
            u.family  = h;
            e2.family = h;
            List <ExperimentalProteoform> fake_significant = new List <ExperimentalProteoform> {
                e1
            };
            List <ProteinWithGoTerms> significant_proteins = SaveState.lollipop.getInducedOrRepressedProteins(fake_significant, 0, 1, 0);
            List <GoTermNumber>       gtn = SaveState.lollipop.getGoTermNumbers(significant_proteins, new List <ProteinWithGoTerms> {
                p1, p2, p3
            });

            Assert.AreEqual(1, significant_proteins.Count);
            Assert.AreEqual(1, gtn.Count);
            Assert.AreEqual("1", gtn.First().Id);
            Assert.AreEqual(0 - (decimal)Math.Log(2d / 3d, 2), gtn.First().log_odds_ratio);

            List <ProteoformFamily> fams = SaveState.lollipop.getInterestingFamilies(gtn, families);

            Assert.AreEqual(1, fams.Count);
            Assert.AreEqual(1, fams[0].theoretical_proteoforms.Count);
        }