public void cytoscape_script_from_topdown() { Sweet.lollipop = new Lollipop(); ProteoformCommunity community = TestProteoformFamilies.construct_community_with_td_proteoforms(-1); Sweet.lollipop.target_proteoform_community = community; TopDownProteoform td = ConstructorsForTesting.TopDownProteoform("ASDF", 1000, 50); td.gene_name = new GeneName(new List <Tuple <string, string> > { new Tuple <string, string>("genename", "genename") }); ProteoformFamily fam = new ProteoformFamily(td); fam.construct_family(); CytoscapeScript.write_cytoscape_script(new List <ProteoformFamily>() { fam }, new List <ProteoformFamily>() { fam }, TestContext.CurrentContext.TestDirectory, "", "test", null, false, false, CytoscapeScript.color_scheme_names[0], Lollipop.edge_labels[0], Lollipop.node_labels[0], CytoscapeScript.node_label_positions[0], Lollipop.node_positioning[0], 2, true, Lollipop.gene_name_labels[1]); string[] edge_lines = File.ReadAllLines(Path.Combine(TestContext.CurrentContext.TestDirectory, CytoscapeScript.edge_file_prefix + "test" + CytoscapeScript.edge_file_extension)); HashSet <string> shared_pf_names_edges = new HashSet <string>(); for (int i = 1; i < edge_lines.Length; i++) { if (edge_lines[i] == "") { break; } string[] line = edge_lines[i].Split(new char[] { '\t' }); shared_pf_names_edges.Add(line[0]); shared_pf_names_edges.Add(line[2]); } string[] node_lines = File.ReadAllLines(Path.Combine(TestContext.CurrentContext.TestDirectory, CytoscapeScript.node_file_prefix + "test" + CytoscapeScript.node_file_extension)); HashSet <string> shared_pf_names_nodes = new HashSet <string>(); for (int i = 1; i < node_lines.Length; i++) { if (node_lines[i] == "") { break; } string[] line = node_lines[i].Split(new char[] { '\t' }); shared_pf_names_nodes.Add(line[0]); } Assert.True(shared_pf_names_nodes.All(name => shared_pf_names_edges.Contains(name))); Assert.True(shared_pf_names_edges.All(name => shared_pf_names_nodes.Contains(name))); Assert.AreEqual(2, shared_pf_names_nodes.Count); Assert.AreEqual(2, shared_pf_names_edges.Count); }
public void results_dataframe_with_something() { Sweet.lollipop = new Lollipop(); Sweet.lollipop.input_files.Add(ConstructorsForTesting.InputFile("fake.txt", Labeling.NeuCode, Purpose.Identification, "n", "s", "1", "1", "1")); //0 ExperimentalProteoform e = ConstructorsForTesting.ExperimentalProteoform("E1"); e.linked_proteoform_references = new List <Proteoform>(new List <Proteoform> { ConstructorsForTesting.make_a_theoretical() }); e.ptm_set = e.linked_proteoform_references.Last().ptm_set; ProteoformFamily f = new ProteoformFamily(e); f.construct_family(); Sweet.lollipop.target_proteoform_community.families = new List <ProteoformFamily> { f }; string[] lines = ResultsSummaryGenerator.datatable_tostring(ResultsSummaryGenerator.experimental_results_dataframe(Sweet.lollipop.target_proteoform_community, Sweet.lollipop.TusherAnalysis1)).Split(new string[] { Environment.NewLine }, StringSplitOptions.None); Assert.True(lines.Count() == 3); Assert.True(lines.Any(a => a.Contains("E1"))); TopDownProteoform td = ConstructorsForTesting.TopDownProteoform("TD1", 1000, 10); td.linked_proteoform_references = new List <Proteoform>(new List <Proteoform> { ConstructorsForTesting.make_a_theoretical() }); td.ptm_set = e.linked_proteoform_references.Last().ptm_set; ProteoformFamily f2 = new ProteoformFamily(td); f2.construct_family(); Sweet.lollipop.target_proteoform_community.families = new List <ProteoformFamily> { f2 }; Sweet.lollipop.topdown_proteoforms = new List <TopDownProteoform>() { td }; lines = ResultsSummaryGenerator.datatable_tostring(ResultsSummaryGenerator.experimental_results_dataframe(Sweet.lollipop.target_proteoform_community, Sweet.lollipop.TusherAnalysis1)).Split(new string[] { Environment.NewLine }, StringSplitOptions.None); Assert.True(lines.Count() == 3); lines = ResultsSummaryGenerator.datatable_tostring(ResultsSummaryGenerator.topdown_results_dataframe()).Split(new string[] { Environment.NewLine }, StringSplitOptions.None); Assert.True(lines.Count() == 3); Assert.True(lines.Any(a => a.Contains("TD1"))); Sweet.lollipop.target_proteoform_community.families = new List <ProteoformFamily> { f, f2 }; lines = ResultsSummaryGenerator.datatable_tostring(ResultsSummaryGenerator.experimental_intensities_dataframe()).Split(new string[] { Environment.NewLine }, StringSplitOptions.None); Assert.AreEqual(lines.Count(), 4); Assert.True(lines.Any(a => a.Contains("E1"))); Assert.True(lines.Any(a => a.Contains("TD1"))); }
public void nodes_table_gives_meaningful_topdown() { TopDownProteoform t = ConstructorsForTesting.TopDownProteoform("ACC", 999.99, 50); ProteoformFamily f = new ProteoformFamily(t); f.construct_family(); string node_table = CytoscapeScript.get_cytoscape_nodes_tsv(new List <ProteoformFamily> { f }, null, CytoscapeScript.color_scheme_names[0], Lollipop.edge_labels[0], Lollipop.node_labels[0], Lollipop.node_positioning[0], 2, f.theoretical_proteoforms, false, Lollipop.gene_name_labels[1]); Assert.True(node_table.Contains("ACC")); Assert.True(node_table.Contains("999.99")); }
public void saveall() { Sweet.lollipop = new Lollipop(); ExperimentalProteoform e = ConstructorsForTesting.ExperimentalProteoform("asdf"); Sweet.lollipop.qVals.Add(e.quant); GoTermNumber g = new GoTermNumber(new GoTerm("id", "desc", Aspect.BiologicalProcess), 0, 0, 0, 0); g.by = -1; Sweet.lollipop.TusherAnalysis1.GoAnalysis.goTermNumbers.Add(g); Sweet.lollipop.topdown_proteoforms = new List <TopDownProteoform>() { ConstructorsForTesting.TopDownProteoform("td1", 1000, 10) }; ResultsSummaryGenerator.save_all(TestContext.CurrentContext.TestDirectory, Sweet.time_stamp(), Sweet.lollipop.TusherAnalysis1 as IGoAnalysis, Sweet.lollipop.TusherAnalysis1 as TusherAnalysis); }
public void test_add_topdown_theoreticals() { Sweet.lollipop = new Lollipop(); TopDownProteoform t = ConstructorsForTesting.TopDownProteoform("P32329_1", 1000, 10); //sequence not in database TopDownProteoform t2 = ConstructorsForTesting.TopDownProteoform("BADACCESSION", 1000, 10); //accession not in database TopDownProteoform t3 = ConstructorsForTesting.TopDownProteoform("P32329_3", 1000, 10); //ptmset not in database w/ this sequence... TopDownProteoform t4 = ConstructorsForTesting.TopDownProteoform("P32329_4", 1000, 10); //in database --> won't make a theoretical proteoform TopDownProteoform t5 = ConstructorsForTesting.TopDownProteoform("P32329_5", 1000, 10); //will have sequence not in database with ptmset t3.sequence = "ADGYEEIIITNQQSFYSVDLEVGTPPQNVTVLVDTGSSDLWIMGSDNPYCSSNSMGSSRRR"; t4.sequence = "ADGYEEIIITNQQSFYSVDLEVGTPPQNVTVLVDTGSSDLWIMGSDNPYCSSNSMGSSRRR"; t.sequence = "VKLTSIAAGVAAIAATASATTTLAQSDERVNLVELGVYVSDIRAHLA"; t5.sequence = "VKLTSIAAGVAAIAATASATTTLAQSDERVNLVELGVYVSDIRAHLA"; t.accepted = true; t2.accepted = true; t3.accepted = true; t4.accepted = true; t5.accepted = true; t3.topdown_begin = 68; t3.topdown_end = 128; ModificationMotif motif; ModificationMotif.TryGetMotif("K", out motif); t3.topdown_ptm_set = new PtmSet(new List <Ptm>() { new Ptm(10, new Modification("Acetylation", _modificationType: "Unlocalized", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 79.96)) }); t5.topdown_ptm_set = new PtmSet(new List <Ptm>() { new Ptm(15, new Modification("Acetylation", _modificationType: "Unloaclized", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 42.02)) }); Sweet.lollipop.methionine_oxidation = false; Sweet.lollipop.carbamidomethylation = false; Sweet.lollipop.methionine_cleavage = true; Sweet.lollipop.combine_identical_sequences = false; Sweet.lollipop.combine_theoretical_proteoforms_byMass = false; Sweet.lollipop.max_ptms = 3; Sweet.lollipop.decoy_databases = 1; Sweet.lollipop.min_peptide_length = 7; Sweet.lollipop.ptmset_mass_tolerance = 0.00001; Sweet.lollipop.combine_identical_sequences = true; Sweet.lollipop.theoretical_database.limit_triples_and_greater = false; Sweet.lollipop.enter_input_files(new string[] { Path.Combine(TestContext.CurrentContext.TestDirectory, "uniprot_yeast_test_12entries.xml") }, Lollipop.acceptable_extensions[2], Lollipop.file_types[2], Sweet.lollipop.input_files, false); Sweet.lollipop.enter_input_files(new string[] { Path.Combine(TestContext.CurrentContext.TestDirectory, "ptmlist.txt") }, Lollipop.acceptable_extensions[2], Lollipop.file_types[2], Sweet.lollipop.input_files, false); Sweet.lollipop.theoretical_database.theoretical_proteins.Clear(); Sweet.lollipop.theoretical_database.get_theoretical_proteoforms(Path.Combine(TestContext.CurrentContext.TestDirectory)); Assert.AreEqual(28, Sweet.lollipop.target_proteoform_community.theoretical_proteoforms.Length); Sweet.lollipop.topdown_proteoforms = new List <TopDownProteoform>() { t, t2, t3, t4, t5 }; Sweet.lollipop.theoretical_database.make_theoretical_proteoforms(); Assert.IsTrue(t.accepted); Assert.IsFalse(t2.accepted); Assert.IsTrue(t3.accepted); Assert.IsTrue(t4.accepted); Assert.IsTrue(t5.accepted); Assert.AreEqual(26, Sweet.lollipop.theoretical_database.expanded_proteins.Length); //should have new topdown protein added Assert.AreEqual(1, Sweet.lollipop.theoretical_database.expanded_proteins.Count(p => p.topdown_protein)); //only add 1 new sequence Assert.AreEqual("VKLTSIAAGVAAIAATASATTTLAQSDERVNLVELGVYVSDIRAHLA", Sweet.lollipop.theoretical_database.expanded_proteins.Where(p => p.topdown_protein).First().BaseSequence); List <TheoreticalProteoform> td_theoreticals = Sweet.lollipop.target_proteoform_community.theoretical_proteoforms.Where(p => p.topdown_theoretical).OrderBy(p => p.accession).ToList(); Assert.AreEqual(1, Sweet.lollipop.target_proteoform_community.theoretical_proteoforms.Count(p => !p.topdown_theoretical && p.sequence == "ADGYEEIIITNQQSFYSVDLEVGTPPQNVTVLVDTGSSDLWIMGSDNPYCSSNSMGSSRRR" && p.ptm_set.ptm_combination.Count == 0)); Assert.AreEqual(1, td_theoreticals.Count(p => p.sequence == "VKLTSIAAGVAAIAATASATTTLAQSDERVNLVELGVYVSDIRAHLA" && p.ptm_set.ptm_combination.Count == 0)); Assert.AreEqual(1, td_theoreticals.Count(p => p.sequence == "VKLTSIAAGVAAIAATASATTTLAQSDERVNLVELGVYVSDIRAHLA" && p.ptm_set.ptm_description == "Acetylation")); Assert.AreEqual(1, td_theoreticals.Count(p => p.sequence == "ADGYEEIIITNQQSFYSVDLEVGTPPQNVTVLVDTGSSDLWIMGSDNPYCSSNSMGSSRRR" && p.ptm_set.ptm_description == "Acetylation")); Assert.AreEqual(31, Sweet.lollipop.target_proteoform_community.theoretical_proteoforms.Length); Assert.AreEqual(2, Sweet.lollipop.target_proteoform_community.theoretical_proteoforms.Count(p => p.ExpandedProteinList.Any(e => e.topdown_protein))); }
public void TestCorrectTopDownID() { TopDownProteoform td = ConstructorsForTesting.TopDownProteoform("TD1", 1000, 40); //linked reference null - should be false td.set_correct_id(); Assert.IsFalse(td.correct_id); TheoreticalProteoform t = ConstructorsForTesting.make_a_theoretical(); t.ExpandedProteinList.First().AccessionList.Add("TD1"); td.linked_proteoform_references = new List <Proteoform>() { t }; //no PTMs diff begin fail td.begin = 10; td.topdown_begin = 10; td.end = 20; td.topdown_end = 30; td.ptm_set = new PtmSet(new List <Ptm>()); td.topdown_ptm_set = new PtmSet(new List <Ptm>()); td.set_correct_id(); Assert.IsFalse(td.correct_id); //no PTMs diff end fail td.begin = 10; td.topdown_begin = 20; td.end = 30; td.topdown_end = 30; td.set_correct_id(); Assert.IsFalse(td.correct_id); //no PTMs same pass td.begin = 10; td.topdown_begin = 10; td.end = 30; td.topdown_end = 30; td.set_correct_id(); Assert.IsTrue(td.correct_id); //same begin and end, T has more PTMs ModificationMotif motif; ModificationMotif.TryGetMotif("K", out motif); td.ptm_set = new PtmSet(new List <Ptm>() { new Ptm(15, new Modification("Acetylation", _modificationType: "type", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 42.02)) }); td.set_correct_id(); Assert.IsFalse(td.correct_id); //same begin and end TD has more of a PTM type td.ptm_set = new PtmSet(new List <Ptm>()); td.topdown_ptm_set = new PtmSet(new List <Ptm>() { new Ptm(15, new Modification("Acetylation", _modificationType: "type", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 42.02)) }); td.set_correct_id(); Assert.IsFalse(td.correct_id); //same begin and end and PTMs td.ptm_set = new PtmSet(new List <Ptm>() { new Ptm(15, new Modification("Acetylation", _modificationType: "type", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 42.02)) }); td.topdown_ptm_set = new PtmSet(new List <Ptm>() { new Ptm(15, new Modification("Acetylation", _modificationType: "type", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 42.02)) }); td.set_correct_id(); Assert.IsTrue(td.correct_id); }
public void TestRelateTD() { Sweet.lollipop = new Lollipop(); Sweet.lollipop.neucode_labeled = false; Sweet.lollipop.maximum_missed_monos = 1; Sweet.lollipop.agg_minBiorepsWithObservations = 0; InputFile f = new InputFile("path", Purpose.Identification); Sweet.lollipop.input_files.Add(f); // Two proteoforms; lysine count equal; mass difference < 250 -- return 1 Component c1 = new Component(); c1.weighted_monoisotopic_mass = 1000.0; c1.rt_apex = 45; c1.accepted = true; c1.id = 1.ToString(); c1.intensity_sum = 1e6; c1.input_file = f; c1.charge_states = new List <ChargeState>() { new ChargeState(1, c1.intensity_sum, c1.weighted_monoisotopic_mass) }; Component c2 = new Component(); c2.weighted_monoisotopic_mass = 1000.0; c2.rt_apex = 85; c2.accepted = true; c2.input_file = f; c2.intensity_sum = 1e6; c2.charge_states = new List <ChargeState>() { new ChargeState(1, c2.intensity_sum, c2.weighted_monoisotopic_mass) }; c2.id = 2.ToString(); Component c3 = new Component(); c3.weighted_monoisotopic_mass = 1131.04; c3.rt_apex = 45; c3.accepted = true; c3.input_file = f; c3.intensity_sum = 1e6; c3.charge_states = new List <ChargeState>() { new ChargeState(1, c3.intensity_sum, c3.weighted_monoisotopic_mass) }; c3.id = 3.ToString(); Component c4 = new Component(); c4.weighted_monoisotopic_mass = 2000.00; c4.rt_apex = 45; c4.accepted = true; c4.input_file = new InputFile("path", Purpose.Identification); c4.intensity_sum = 1e6; c4.charge_states = new List <ChargeState>() { new ChargeState(1, c4.intensity_sum, c4.weighted_monoisotopic_mass) }; c4.id = 4.ToString(); Component c5 = new Component(); c5.weighted_monoisotopic_mass = 1001.0; c5.rt_apex = 45; c5.accepted = true; c5.input_file = f; c5.intensity_sum = 1e6; c5.charge_states = new List <ChargeState>() { new ChargeState(1, c5.intensity_sum, c5.weighted_monoisotopic_mass) }; c5.id = 2.ToString(); List <IAggregatable> components = new List <IAggregatable>() { c1, c2, c3, c4, c5 }; Sweet.lollipop.raw_experimental_components = components.OfType <Component>().ToList(); TopDownProteoform td1 = ConstructorsForTesting.TopDownProteoform("ACCESSION_1", 1000.0, 45); TopDownProteoform td2 = ConstructorsForTesting.TopDownProteoform("ACCESSION_2", 1001.0, 85); TopDownProteoform td3 = ConstructorsForTesting.TopDownProteoform("ACCESSION_3", 1131.04, 45); TheoreticalProteoform t1 = ConstructorsForTesting.make_a_theoretical("ACCESSION", 1000.0, 1); //need to make theoretical accession database TestProteoformCommunityRelate.prepare_for_et(new List <double>() { 0 }); Sweet.lollipop.target_proteoform_community.community_number = -100; Sweet.lollipop.theoretical_database.theoreticals_by_accession = new Dictionary <int, Dictionary <string, List <TheoreticalProteoform> > >(); Sweet.lollipop.theoretical_database.theoreticals_by_accession.Add(-100, new Dictionary <string, List <TheoreticalProteoform> >()); Sweet.lollipop.theoretical_database.theoreticals_by_accession[-100].Add(t1.accession, new List <TheoreticalProteoform>() { t1 }); //need to make decon error top "deconvolution error" ModificationMotif motif; ModificationMotif.TryGetMotif("S", out motif); Modification m = new Modification("id", _modificationType: "modtype", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 1); Sweet.lollipop.theoretical_database.all_mods_with_mass.Add(m); PtmSet set = new PtmSet(new List <Ptm> { new Ptm(-1, m) }); Sweet.lollipop.theoretical_database.all_possible_ptmsets.Add(set); Sweet.lollipop.modification_ranks.Add(-1.0023, 2); Sweet.lollipop.theoretical_database.possible_ptmset_dictionary.Add(-1.0, new List <PtmSet>() { set }); //need missing error Modification m2 = new Modification("id", _modificationType: "modtype", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 1); Sweet.lollipop.theoretical_database.all_mods_with_mass.Add(m2); PtmSet set2 = new PtmSet(new List <Ptm> { new Ptm(-1, m2) }); Sweet.lollipop.theoretical_database.all_possible_ptmsets.Add(set2); Sweet.lollipop.modification_ranks.Add(-87.03, 2); Sweet.lollipop.theoretical_database.possible_ptmset_dictionary.Add(-87.0, new List <PtmSet>() { set2 }); Sweet.lollipop.target_proteoform_community.theoretical_proteoforms = new List <TheoreticalProteoform>() { t1 }.ToArray(); Sweet.lollipop.topdown_proteoforms = new List <TopDownProteoform> { td1, td2, td3 }; Sweet.lollipop.add_td_proteoforms = true; Sweet.lollipop.aggregate_proteoforms(Sweet.lollipop.validate_proteoforms, Sweet.lollipop.raw_neucode_pairs, Sweet.lollipop.raw_experimental_components, Sweet.lollipop.raw_quantification_components, 0); List <ProteoformRelation> relations = Sweet.lollipop.target_proteoform_community.relate(Sweet.lollipop.target_proteoform_community.experimental_proteoforms, Sweet.lollipop.target_proteoform_community.theoretical_proteoforms, ProteoformComparison.ExperimentalTheoretical, true, Environment.CurrentDirectory, true); List <DeltaMassPeak> peaks = Sweet.lollipop.target_proteoform_community.accept_deltaMass_peaks(relations, new Dictionary <string, List <ProteoformRelation> >()); //should have 4 experimental proteoforms -- 3 topdown, 1 not topdown experimental Assert.AreEqual(3, Sweet.lollipop.target_proteoform_community.experimental_proteoforms.Count(e => e.topdown_id)); Assert.AreEqual(4, Sweet.lollipop.target_proteoform_community.experimental_proteoforms.Count()); Assert.AreEqual(1, relations.Count); Assert.AreEqual(1, relations.Count(r => r.RelationType == ProteoformComparison.ExperimentalTheoretical && (r.connected_proteoforms[0] as ExperimentalProteoform).topdown_id)); Assert.AreEqual(1, td1.relationships.Count(r => r.RelationType == ProteoformComparison.ExperimentalTheoretical && (r.connected_proteoforms[0] as ExperimentalProteoform).topdown_id)); Assert.AreEqual(0, td2.relationships.Count()); Assert.AreEqual(0, td3.relationships.Count()); //accession 3 has higher score... gets td Sweet.lollipop.clear_td(); td3 = ConstructorsForTesting.TopDownProteoform("ACCESSION_3", 1000.0, 45); TopDownHit h3 = new TopDownHit(); h3.score = 100; td3.topdown_hits = new List <TopDownHit>() { h3 }; TopDownHit h4 = new TopDownHit(); h4.score = 1; TopDownProteoform td4 = ConstructorsForTesting.TopDownProteoform("ACCESSION_4", 1001.0, 45); td4.topdown_hits = new List <TopDownHit>() { h4 }; Sweet.lollipop.topdown_proteoforms = new List <TopDownProteoform> { td4, td3 }; Sweet.lollipop.topdown_proteoforms.OrderBy(p => p.modified_mass); Sweet.lollipop.aggregate_proteoforms(Sweet.lollipop.validate_proteoforms, Sweet.lollipop.raw_neucode_pairs, Sweet.lollipop.raw_experimental_components, Sweet.lollipop.raw_quantification_components, 0); Assert.AreEqual(0, Math.Round(td3.modified_mass - td3.matching_experimental.modified_mass, 0)); Assert.IsNull(td4.matching_experimental); }