private void metroAddGoTermButton_Click(object sender, EventArgs e) { GoTerm currentGoTerm = new GoTerm(); using (var unitOfWork = new UnitOWork(new GoDBContext())) { if (this.Controls.OfType <MetroTextBox>().All(textBox => textBox.Text != String.Empty)) { currentGoTerm.GO_ID = Int32.Parse(metroGoIdTextBox.Text); currentGoTerm.name = metroNameTextBox.Text; currentGoTerm.Go_definition = metroDefinitionTextBox.Text; currentGoTerm.ontology = metroOntologyTextBox.Text; Confirm confirm = new Confirm(); confirm.ShowDialog(); if (confirm.Result) { unitOfWork.GoTerms.Add(currentGoTerm); unitOfWork.Complete(); } this.Close(); } else { MessageBox.Show(@"Invalid input values"); } } }
public void test_protein_grouping_by_sequence() { DatabaseReference d1 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:") }); DatabaseReference d2 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:") }); DatabaseReference d3 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:") }); GoTerm g1 = new GoTerm(d1); GoTerm g2 = new GoTerm(d1); GoTerm g3 = new GoTerm(d1); ProteinWithGoTerms p1 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T1", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> { d1 }, new List <GoTerm> { g1 }); ProteinWithGoTerms p2 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T2", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> { d2 }, new List <GoTerm> { g2 }); ProteinWithGoTerms p3 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T3", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> { d3 }, new List <GoTerm> { g3 }); ProteinSequenceGroup psg = new ProteinSequenceGroup(new List <ProteinWithGoTerms> { p1, p2, p3 }.OrderByDescending(p => p.IsContaminant ? 1 : 0)); Assert.AreEqual(3, psg.GoTerms.Count()); Assert.AreEqual(3, psg.GeneNames.Count()); Assert.AreEqual("T1_3G", psg.Accession); Assert.False(psg.IsContaminant); p3 = new ProteinWithGoTerms("MCSSSSSSSSSS", "T3", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> { d3 }, new List <GoTerm> { g3 }); ProteinSequenceGroup[] psgs = SaveState.lollipop.theoretical_database.group_proteins_by_sequence(new List <ProteinWithGoTerms> { p1, p2, p3 }); Assert.AreEqual(2, psgs.Length); }
public void testGoTermCreation() { GoTerm g = new GoTerm(); g.id = "id"; g.description = "description"; g.aspect = aspect.biologicalProcess; Assert.AreEqual("id", g.id); Assert.AreEqual("description", g.description); Assert.AreEqual(aspect.biologicalProcess, g.aspect); }
public void test_protein_grouping_by_sequence_contaminant() { DatabaseReference d1 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:") }); DatabaseReference d2 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:") }); DatabaseReference d3 = new DatabaseReference("GO", ":", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:") }); GoTerm g1 = new GoTerm(d1); GoTerm g2 = new GoTerm(d1); GoTerm g3 = new GoTerm(d1); ProteinWithGoTerms p1 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T1", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(0, 0, "") }, "T2", "T3", true, false, new List <DatabaseReference> { d1 }, new List <GoTerm> { g1 }); ProteinWithGoTerms p2 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T2", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(0, 0, "") }, "T2", "T3", true, false, new List <DatabaseReference> { d2 }, new List <GoTerm> { g2 }); ProteinWithGoTerms p3 = new ProteinWithGoTerms("MSSSSSSSSSSS", "T3", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(0, 0, "") }, "T2", "T3", true, true, new List <DatabaseReference> { d3 }, new List <GoTerm> { g3 }); ProteinSequenceGroup psg = new ProteinSequenceGroup(new List <ProteinWithGoTerms> { p1, p2, p3 }.OrderByDescending(p => p.IsContaminant ? 1 : 0)); Assert.AreEqual(3, psg.GoTerms.Count()); Assert.AreEqual(3, psg.GeneNames.Count()); Assert.AreEqual("T3_3G", psg.Accession); Assert.True(psg.IsContaminant); }
public void testLogOddsRatio() { int q = 1; // number of enriched proteins with the term int k = 2; // number of enriched proteins int m = 2; // number of proteins in the background with the term int t = 4; // number of proteins in the background DatabaseReference d = new DatabaseReference("GO", ":1", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:one") }); GoTerm g = new GoTerm(d); GoTermNumber gtn = new GoTermNumber(g, q, k, m, t); Assert.AreEqual(0, gtn.log_odds_ratio); //Assert.AreEqual(0.833333m, Math.Round((decimal)gtn.p_value.Truncate(7), 6)); Assert.AreEqual(0.833333m, Math.Round(gtn.p_value, 6)); q = 1; // number of enriched proteins with the term k = 2; // number of enriched proteins m = 4; // number of proteins in the background with the term t = 4; // number of proteins in the background gtn = new GoTermNumber(g, q, k, m, t); Assert.AreEqual(-1, gtn.log_odds_ratio); //Assert.AreEqual(1m, Math.Round((decimal)gtn.p_value.Truncate(7), 6)); Assert.AreEqual(1m, Math.Round(gtn.p_value, 6)); q = 2; // number of enriched proteins with the term k = 2; // number of enriched proteins m = 2; // number of proteins in the background with the term t = 4; // number of proteins in the background gtn = new GoTermNumber(g, q, k, m, t); Assert.AreEqual(1, gtn.log_odds_ratio); //Assert.AreEqual(0.166667m, Math.Round((decimal)gtn.p_value.Truncate(7), 6)); Assert.AreEqual(0.166667m, Math.Round(gtn.p_value, 6)); }
public void testGoTermNumberClass() { List <GoTerm> oneProteinGoTerms = new List <GoTerm>(); GoTerm g = new GoTerm(); g.id = "id"; g.description = "description"; g.aspect = aspect.biologicalProcess; oneProteinGoTerms.Add(g); Dictionary <GoTerm, int> goMasterSet = new Dictionary <GoTerm, int>(); goMasterSet.Add(g, 1); List <Protein> proteinsInSample = new List <Protein>(); for (int i = 0; i < 4; i++) { Protein p = new Protein("accession_" + i.ToString(), oneProteinGoTerms); proteinsInSample.Add(p); } Assert.That(() => new GoTermNumber(g, proteinsInSample, goMasterSet), Throws.TypeOf <ArgumentOutOfRangeException>() .With.Message .EqualTo("GO Term Range is illegal")); //GoTermNumber gTN = new GoTermNumber(g, proteinsInSample, goMasterSet); //Assert.AreEqual("id", gTN.id); //Assert.AreEqual("description", gTN.description); //Assert.AreEqual(aspect.biologicalProcess, gTN.aspect); //Assert.AreEqual(1, gTN.k); //Assert.AreEqual(4, gTN.f); //Assert.AreEqual(1d, gTN.pValue); //Assert.AreEqual(1d, gTN.logfold); //Assert.AreEqual("", gTN.proteinInCategoryFromSample); }
public static List <Protein> LoadProteinDb <T>(string proteinDbLocation, bool onTheFlyDecoys, IEnumerable <T> allKnownModifications, bool IsContaminant, out Dictionary <string, Modification> unknownModifications) where T : Modification { var mod_dict = new Dictionary <string, IList <Modification> >(); foreach (var nice in allKnownModifications) { IList <Modification> val; if (mod_dict.TryGetValue(nice.id, out val)) { val.Add(nice); } else { mod_dict.Add(nice.id, new List <Modification> { nice }); } } List <Protein> result = new List <Protein>(); unknownModifications = new Dictionary <string, Modification>(); using (var stream = new FileStream(proteinDbLocation, FileMode.Open)) { string accession = null; string name = null; string full_name = null; var oneBasedBeginPositions = new List <int?>(); var oneBasedEndPositions = new List <int?>(); var peptideTypes = new List <string>(); var oneBasedModifications = new Dictionary <int, List <Modification> >(); // xml db if (!proteinDbLocation.EndsWith(".fasta")) { Stream uniprotXmlFileStream = stream; if (proteinDbLocation.EndsWith(".gz")) { uniprotXmlFileStream = new GZipStream(stream, CompressionMode.Decompress); } string[] nodes = new string[6]; string sequence = null; string feature_type = null; string feature_description = null; string dbReference_type = null; string dbReference_id = null; string property_type = null; string property_value = null; int oneBasedfeature_position = -1; int? oneBasedbeginPosition = null; int? oneBasedendPosition = null; List <GoTerm> goTerms = new List <GoTerm>(); using (XmlReader xml = XmlReader.Create(uniprotXmlFileStream)) { while (xml.Read()) { switch (xml.NodeType) { case XmlNodeType.Element: nodes[xml.Depth] = xml.Name; int outValue; switch (xml.Name) { case "accession": if (accession == null) { accession = xml.ReadElementString(); } break; case "name": if (xml.Depth == 2) { name = xml.ReadElementString(); } break; case "fullName": if (full_name == null) { full_name = xml.ReadElementString(); } break; case "feature": feature_type = xml.GetAttribute("type"); feature_description = xml.GetAttribute("description"); break; case "dbReference": dbReference_type = xml.GetAttribute("type"); dbReference_id = xml.GetAttribute("id"); break; case "property": property_type = xml.GetAttribute("type"); property_value = xml.GetAttribute("value"); if (dbReference_type == "GO" && property_type == "term") { GoTerm go = new GoTerm(); go.id = dbReference_id.Split(':')[1].ToString(); switch (property_value.Split(':')[0].ToString()) { case "C": go.aspect = Aspect.cellularComponent; go.description = property_value.Split(':')[1].ToString(); break; case "F": go.aspect = Aspect.molecularFunction; go.description = property_value.Split(':')[1].ToString(); break; case "P": go.aspect = Aspect.biologicalProcess; go.description = property_value.Split(':')[1].ToString(); break; } goTerms.Add(go); } break; case "position": oneBasedfeature_position = int.Parse(xml.GetAttribute("position")); break; case "begin": oneBasedbeginPosition = int.TryParse(xml.GetAttribute("position"), out outValue) ? (int?)outValue : null; break; case "end": oneBasedendPosition = int.TryParse(xml.GetAttribute("position"), out outValue) ? (int?)outValue : null; break; case "sequence": sequence = xml.ReadElementString().Replace("\n", null).Replace(" ", null); break; } break; case XmlNodeType.EndElement: switch (xml.Name) { case "feature": if (feature_type == "modified residue") { feature_description = feature_description.Split(';')[0]; List <Modification> residue_modifications; // Create new entry for this residue, if needed if (!oneBasedModifications.TryGetValue(oneBasedfeature_position, out residue_modifications)) { residue_modifications = new List <Modification>(); oneBasedModifications.Add(oneBasedfeature_position, residue_modifications); } if (mod_dict.ContainsKey(feature_description)) { // Known residue_modifications.AddRange(mod_dict[feature_description]); } else if (unknownModifications.ContainsKey(feature_description)) { // Not known but seen residue_modifications.Add(unknownModifications[feature_description]); } else { // Not known and not seen unknownModifications[feature_description] = new Modification(feature_description); residue_modifications.Add(unknownModifications[feature_description]); } } else if (feature_type == "peptide" || feature_type == "propeptide" || feature_type == "chain" || feature_type == "signal peptide") { oneBasedBeginPositions.Add(oneBasedbeginPosition); oneBasedEndPositions.Add(oneBasedendPosition); peptideTypes.Add(feature_type); } oneBasedbeginPosition = null; oneBasedendPosition = null; oneBasedfeature_position = -1; break; case "dbReference": dbReference_type = null; dbReference_id = null; break; case "entry": if (accession != null && sequence != null) { var protein = new Protein(sequence, accession, oneBasedModifications, oneBasedBeginPositions.ToArray(), oneBasedEndPositions.ToArray(), peptideTypes.ToArray(), name, full_name, false, IsContaminant, goTerms); result.Add(protein); if (onTheFlyDecoys) { char[] sequence_array = sequence.ToCharArray(); Dictionary <int, List <Modification> > decoy_modifications = null; if (sequence.StartsWith("M", StringComparison.InvariantCulture)) { // Do not include the initiator methionine in reversal!!! Array.Reverse(sequence_array, 1, sequence.Length - 1); if (oneBasedModifications != null) { decoy_modifications = new Dictionary <int, List <Modification> >(oneBasedModifications.Count); foreach (var kvp in oneBasedModifications) { if (kvp.Key == 1) { decoy_modifications.Add(1, kvp.Value); } else if (kvp.Key > 1) { decoy_modifications.Add(sequence.Length - kvp.Key + 2, kvp.Value); } } } } else { Array.Reverse(sequence_array); if (oneBasedModifications != null) { decoy_modifications = new Dictionary <int, List <Modification> >(oneBasedModifications.Count); foreach (var kvp in oneBasedModifications) { decoy_modifications.Add(sequence.Length - kvp.Key + 1, kvp.Value); } } } var reversed_sequence = new string(sequence_array); int?[] decoybeginPositions = new int?[oneBasedBeginPositions.Count]; int?[] decoyendPositions = new int?[oneBasedEndPositions.Count]; string[] decoyBigPeptideTypes = new string[oneBasedEndPositions.Count]; for (int i = 0; i < decoybeginPositions.Length; i++) { decoybeginPositions[oneBasedBeginPositions.Count - i - 1] = sequence.Length - oneBasedEndPositions[i] + 1; decoyendPositions[oneBasedBeginPositions.Count - i - 1] = sequence.Length - oneBasedBeginPositions[i] + 1; decoyBigPeptideTypes[oneBasedBeginPositions.Count - i - 1] = peptideTypes[i]; } var decoy_protein = new Protein(reversed_sequence, "DECOY_" + accession, decoy_modifications, decoybeginPositions, decoyendPositions, decoyBigPeptideTypes, name, full_name, true, IsContaminant, null); result.Add(decoy_protein); } } accession = null; name = null; full_name = null; sequence = null; feature_type = null; feature_description = null; dbReference_type = null; dbReference_id = null; property_type = null; property_value = null; oneBasedfeature_position = -1; oneBasedModifications = new Dictionary <int, List <Modification> >(); oneBasedBeginPositions = new List <int?>(); oneBasedEndPositions = new List <int?>(); peptideTypes = new List <string>(); goTerms = new List <GoTerm>(); break; } break; } } } } // fasta db else { StreamReader fasta = new StreamReader(stream); StringBuilder sb = null; while (true) { string line = fasta.ReadLine(); if (line.StartsWith(">")) { // fasta protein only has accession, fullname, sequence (no mods) string[] delimiters = { ">", "|", " OS=" }; string[] delimiters_ensembl = { ">", " ", "\t" }; string[] output = line.Split(delimiters, StringSplitOptions.None); string[] output_ensembl = line.Split(delimiters_ensembl, StringSplitOptions.None); if (output.Length > 4) { accession = output[2]; name = accession; full_name = output[3]; } else if (output_ensembl.Length > 2) { accession = output_ensembl[1]; name = accession; full_name = String.Join(" ", Enumerable.Range(2, output_ensembl.Length - 2).Select(i => output_ensembl[i])); } else { // can't read protein description full_name = line.Substring(1); accession = line.Substring(1); } // new protein sb = new StringBuilder(); } else if (sb != null) { sb.Append(line.Trim()); } if ((fasta.Peek() == '>' || fasta.Peek() == -1) && accession != null && sb != null) { var sequence = sb.ToString(); var protein = new Protein(sequence, accession, oneBasedModifications, oneBasedBeginPositions.ToArray(), oneBasedEndPositions.ToArray(), peptideTypes.ToArray(), name, full_name, false, IsContaminant, new List <GoTerm>()); result.Add(protein); if (onTheFlyDecoys) { char[] sequence_array = sequence.ToCharArray(); int starts_with_met = Convert.ToInt32(sequence.StartsWith("M", StringComparison.InvariantCulture)); Array.Reverse(sequence_array, starts_with_met, sequence.Length - starts_with_met); // Do not include the initiator methionine in reversal!!! var reversed_sequence = new string(sequence_array); var decoy_protein = new Protein(reversed_sequence, "DECOY_" + accession, oneBasedModifications, oneBasedBeginPositions.ToArray(), oneBasedEndPositions.ToArray(), peptideTypes.ToArray(), name, full_name, true, IsContaminant, null); result.Add(decoy_protein); } } // no input left if (fasta.Peek() == -1) { break; } } } } return(result); }
public void get_interesting_goterm_families() { Sweet.lollipop = new Lollipop(); Sweet.lollipop.theoretical_database.aaIsotopeMassList = new AminoAcidMasses(Sweet.lollipop.carbamidomethylation, Sweet.lollipop.neucode_labeled).AA_Masses; Sweet.lollipop.significance_by_permutation = true; Sweet.lollipop.significance_by_log2FC = false; DatabaseReference d1 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:1") }); DatabaseReference d2 = new DatabaseReference("GO", "GO:2", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:2") }); DatabaseReference d3 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:1") }); GoTerm g1 = new GoTerm(d1); GoTerm g2 = new GoTerm(d2); GoTerm g3 = new GoTerm(d3); ProteinWithGoTerms p1 = new ProteinWithGoTerms("ASDF", "T1", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(0, 0, "") }, "T2", "T3", true, false, new List <DatabaseReference> { d1 }, new List <GoTerm> { g1 }); ProteinWithGoTerms p2 = new ProteinWithGoTerms("ASDF", "T2", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(0, 0, "") }, "T2", "T3", true, false, new List <DatabaseReference> { d2 }, new List <GoTerm> { g2 }); ProteinWithGoTerms p3 = new ProteinWithGoTerms("ASDF", "T3", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(0, 0, "") }, "T2", "T3", true, false, new List <DatabaseReference> { d3 }, new List <GoTerm> { g3 }); Dictionary <InputFile, Protein[]> dict = new Dictionary <InputFile, Protein[]> { { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p1 } }, { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p2 } }, { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p3 } }, }; ExperimentalProteoform e1 = ConstructorsForTesting.ExperimentalProteoform("E"); ExperimentalProteoform e2 = ConstructorsForTesting.ExperimentalProteoform("E"); e1.quant.intensitySum = 1; e1.quant.TusherValues1.significant = true; e1.quant.tusherlogFoldChange = 1; e2.quant.intensitySum = 1; e2.quant.TusherValues1.significant = true; e2.quant.tusherlogFoldChange = 1; TheoreticalProteoform t = ConstructorsForTesting.make_a_theoretical("T1_T1_asdf", p1, dict); TheoreticalProteoform u = ConstructorsForTesting.make_a_theoretical("T2_T1_asdf_asdf", p2, dict); TheoreticalProteoform v = ConstructorsForTesting.make_a_theoretical("T3_T1_asdf_Asdf_Asdf", p3, dict); t.ExpandedProteinList = new List <ProteinWithGoTerms> { p1 }; u.ExpandedProteinList = new List <ProteinWithGoTerms> { p2 }; v.ExpandedProteinList = new List <ProteinWithGoTerms> { p3 }; t.begin = 1; t.end = 1; u.begin = 1; u.end = 1; v.begin = 1; v.end = 1; make_relation(e1, t); //make_relation(e1, v); // we don't allow this to happen anymore... we only allow one ET conntection per E make_relation(e2, u); ProteoformFamily f = new ProteoformFamily(e1); // two theoreticals with the same GoTerms... expecting one GoTerm number but two theoretical proteins (now only one) ProteoformFamily h = new ProteoformFamily(e2); f.construct_family(); f.identify_experimentals(); h.construct_family(); h.identify_experimentals(); List <ProteoformFamily> families = new List <ProteoformFamily> { f, h }; t.family = f; v.family = f; e1.family = f; u.family = h; e2.family = h; List <ExperimentalProteoform> fake_significant = new List <ExperimentalProteoform> { e1 }; List <ProteinWithGoTerms> significant_proteins = Sweet.lollipop.getInducedOrRepressedProteins(fake_significant, Sweet.lollipop.TusherAnalysis1.GoAnalysis); List <GoTermNumber> gtn = Sweet.lollipop.TusherAnalysis1.GoAnalysis.getGoTermNumbers(significant_proteins, new List <ProteinWithGoTerms> { p1, p2, p3 }); Assert.AreEqual(1, significant_proteins.Count); Assert.AreEqual(1, gtn.Count); Assert.AreEqual("1", gtn.First().Id); Assert.AreEqual(0 - (decimal)Math.Log(2d / 3d, 2), gtn.First().log_odds_ratio); List <ProteoformFamily> fams = Sweet.lollipop.getInterestingFamilies(gtn, families); Assert.AreEqual(1, fams.Count); Assert.AreEqual(1, fams[0].theoretical_proteoforms.Count); }
public void test_goterm_analysis_with_custom_list() { Sweet.lollipop = new Lollipop(); Sweet.lollipop.theoretical_database.aaIsotopeMassList = new AminoAcidMasses(Sweet.lollipop.carbamidomethylation, Sweet.lollipop.neucode_labeled).AA_Masses; Sweet.lollipop.significance_by_permutation = true; Sweet.lollipop.significance_by_log2FC = false; DatabaseReference d1 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:1") }); DatabaseReference d2 = new DatabaseReference("GO", "GO:2", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:2") }); DatabaseReference d3 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:1") }); GoTerm g1 = new GoTerm(d1); GoTerm g2 = new GoTerm(d2); GoTerm g3 = new GoTerm(d3); ProteinWithGoTerms p1 = new ProteinWithGoTerms("ASDF", "T1", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(0, 0, "") }, "T2", "T3", true, false, new List <DatabaseReference> { d1 }, new List <GoTerm> { g1 }); ProteinWithGoTerms p2 = new ProteinWithGoTerms("ASDF", "T2", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(0, 0, "") }, "T2", "T3", true, false, new List <DatabaseReference> { d2 }, new List <GoTerm> { g2 }); ProteinWithGoTerms p3 = new ProteinWithGoTerms("ASDF", "T3", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(0, 0, "") }, "T2", "T3", true, false, new List <DatabaseReference> { d3 }, new List <GoTerm> { g3 }); Dictionary <InputFile, Protein[]> dict = new Dictionary <InputFile, Protein[]> { { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p1 } }, { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p2 } }, { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p3 } }, }; ExperimentalProteoform e1 = ConstructorsForTesting.ExperimentalProteoform("E"); ExperimentalProteoform e2 = ConstructorsForTesting.ExperimentalProteoform("E"); e1.quant.intensitySum = 1; e1.quant.TusherValues1.significant = true; e1.quant.tusherlogFoldChange = 1; e2.quant.intensitySum = 1; e2.quant.TusherValues1.significant = true; e2.quant.tusherlogFoldChange = 1; TheoreticalProteoform t = ConstructorsForTesting.make_a_theoretical("T1_T1_asdf", p1, dict); TheoreticalProteoform u = ConstructorsForTesting.make_a_theoretical("T2_T1_asdf_asdf", p2, dict); TheoreticalProteoform v = ConstructorsForTesting.make_a_theoretical("T3_T1_asdf_Asdf_Asdf", p3, dict); t.ExpandedProteinList = new List <ProteinWithGoTerms> { p1 }; u.ExpandedProteinList = new List <ProteinWithGoTerms> { p2 }; v.ExpandedProteinList = new List <ProteinWithGoTerms> { p3 }; make_relation(e1, t); make_relation(e1, v); make_relation(e2, u); ProteoformFamily f = new ProteoformFamily(e1); // two theoreticals with the same GoTerms... expecting one GoTerm number but two theoretical proteins ProteoformFamily h = new ProteoformFamily(e2); f.construct_family(); f.identify_experimentals(); h.construct_family(); h.identify_experimentals(); List <ProteoformFamily> families = new List <ProteoformFamily> { f, h }; t.family = f; v.family = f; e1.family = f; u.family = h; e2.family = h; Sweet.lollipop.TusherAnalysis1.inducedOrRepressedProteins = Sweet.lollipop.getInducedOrRepressedProteins(new List <ExperimentalProteoform> { e1 }, Sweet.lollipop.TusherAnalysis1.GoAnalysis); Sweet.lollipop.TusherAnalysis1.GoAnalysis.allTheoreticalProteins = true; Sweet.lollipop.theoretical_database.expanded_proteins = new ProteinWithGoTerms[] { p1, p2, p3 }; Sweet.lollipop.TusherAnalysis1.GoAnalysis.backgroundProteinsList = Path.Combine(TestContext.CurrentContext.TestDirectory, "test_protein_list.txt"); Sweet.lollipop.TusherAnalysis1.GoAnalysis.GO_analysis(Sweet.lollipop.TusherAnalysis1.inducedOrRepressedProteins); Assert.AreEqual(1, Sweet.lollipop.TusherAnalysis1.inducedOrRepressedProteins.Count); // only taking one ET connection by definition in forming ET relations; only one is used in identify theoreticals Assert.AreEqual(1, Sweet.lollipop.TusherAnalysis1.GoAnalysis.goTermNumbers.Count); Assert.AreEqual("1", Sweet.lollipop.TusherAnalysis1.GoAnalysis.goTermNumbers.First().Id); Assert.AreEqual(0 - (decimal)Math.Log(2d / 3d, 2), Sweet.lollipop.TusherAnalysis1.GoAnalysis.goTermNumbers.First().log_odds_ratio); List <ProteoformFamily> fams = Sweet.lollipop.getInterestingFamilies(Sweet.lollipop.TusherAnalysis1.GoAnalysis.goTermNumbers, families); Assert.AreEqual(1, fams.Count); Assert.AreEqual(2, fams[0].theoretical_proteoforms.Count); }
public void get_interesting_goterm_families() { SaveState.lollipop = new Lollipop(); DatabaseReference d1 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:1") }); DatabaseReference d2 = new DatabaseReference("GO", "GO:2", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:2") }); DatabaseReference d3 = new DatabaseReference("GO", "GO:1", new List <Tuple <string, string> > { new Tuple <string, string>("term", "P:1") }); GoTerm g1 = new GoTerm(d1); GoTerm g2 = new GoTerm(d2); GoTerm g3 = new GoTerm(d3); ProteinWithGoTerms p1 = new ProteinWithGoTerms("", "T1", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> { d1 }, new List <GoTerm> { g1 }); ProteinWithGoTerms p2 = new ProteinWithGoTerms("", "T2", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> { d2 }, new List <GoTerm> { g2 }); ProteinWithGoTerms p3 = new ProteinWithGoTerms("", "T3", new List <Tuple <string, string> > { new Tuple <string, string>("", "") }, new Dictionary <int, List <Modification> >(), new int?[] { 0 }, new int?[] { 0 }, new string[] { "" }, "T2", "T3", true, false, new List <DatabaseReference> { d3 }, new List <GoTerm> { g3 }); Dictionary <InputFile, Protein[]> dict = new Dictionary <InputFile, Protein[]> { { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p1 } }, { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p2 } }, { new InputFile("fake.txt", Purpose.ProteinDatabase), new Protein[] { p3 } }, }; ExperimentalProteoform e1 = ConstructorsForTesting.ExperimentalProteoform("E"); ExperimentalProteoform e2 = ConstructorsForTesting.ExperimentalProteoform("E"); e1.quant.intensitySum = 1; e1.quant.FDR = 0; e1.quant.logFoldChange = 1; e2.quant.intensitySum = 1; e2.quant.FDR = 0; e2.quant.logFoldChange = 1; TheoreticalProteoform t = ConstructorsForTesting.make_a_theoretical("T1_T1_asdf", p1, dict); TheoreticalProteoform u = ConstructorsForTesting.make_a_theoretical("T2_T1_asdf_asdf", p2, dict); TheoreticalProteoform v = ConstructorsForTesting.make_a_theoretical("T3_T1_asdf_Asdf_Asdf", p3, dict); t.ExpandedProteinList = new List <ProteinWithGoTerms> { p1 }; u.ExpandedProteinList = new List <ProteinWithGoTerms> { p2 }; v.ExpandedProteinList = new List <ProteinWithGoTerms> { p3 }; make_relation(e1, t); //make_relation(e1, v); // we don't allow this to happen anymore... we only allow one ET conntection per E make_relation(e2, u); ProteoformFamily f = new ProteoformFamily(e1); // two theoreticals with the same GoTerms... expecting one GoTerm number but two theoretical proteins (now only one) ProteoformFamily h = new ProteoformFamily(e2); f.construct_family(); f.identify_experimentals(); h.construct_family(); h.identify_experimentals(); List <ProteoformFamily> families = new List <ProteoformFamily> { f, h }; t.family = f; v.family = f; e1.family = f; u.family = h; e2.family = h; List <ExperimentalProteoform> fake_significant = new List <ExperimentalProteoform> { e1 }; List <ProteinWithGoTerms> significant_proteins = SaveState.lollipop.getInducedOrRepressedProteins(fake_significant, 0, 1, 0); List <GoTermNumber> gtn = SaveState.lollipop.getGoTermNumbers(significant_proteins, new List <ProteinWithGoTerms> { p1, p2, p3 }); Assert.AreEqual(1, significant_proteins.Count); Assert.AreEqual(1, gtn.Count); Assert.AreEqual("1", gtn.First().Id); Assert.AreEqual(0 - (decimal)Math.Log(2d / 3d, 2), gtn.First().log_odds_ratio); List <ProteoformFamily> fams = SaveState.lollipop.getInterestingFamilies(gtn, families); Assert.AreEqual(1, fams.Count); Assert.AreEqual(1, fams[0].theoretical_proteoforms.Count); }