public void Generate(MBCO_association_class mbco_association, Ontology_type_enum ontology, params string[] bg_symbols) { this.Ontology = ontology; this.Bg_genes = Array_class.Deep_copy_string_array(bg_symbols); this.MBCO_association = mbco_association.Deep_copy(); if (this.Bg_genes.Length > 0) { MBCO_association.Keep_only_bg_symbols(bg_symbols); } }
private Ontology_enrichment_line_class[] Add_missing_process_information(Ontology_enrichment_line_class[] enrichment_lines) { enrichment_lines = enrichment_lines.OrderBy(l => l.Scp_name).ToArray(); int enrich_length = enrichment_lines.Length; Ontology_enrichment_line_class enrich_line; MBCO_association.Order_by_processName_symbol(); int onto_length = MBCO_association.MBCO_associations.Length; int indexOnto = 0; MBCO_association_line_class mbco_association_line; int stringCompare; for (int indexE = 0; indexE < enrich_length; indexE++) { enrich_line = enrichment_lines[indexE]; stringCompare = -2; while ((indexOnto < onto_length) && (stringCompare < 0)) { mbco_association_line = MBCO_association.MBCO_associations[indexOnto]; stringCompare = mbco_association_line.ProcessName.CompareTo(enrich_line.Scp_name); if (stringCompare < 0) { indexOnto++; } else if (stringCompare == 0) { if (!string.IsNullOrEmpty(mbco_association_line.ProcessID)) { enrich_line.Scp_id = (string)mbco_association_line.ProcessID.Clone(); } enrich_line.ProcessLevel = mbco_association_line.ProcessLevel; if (!string.IsNullOrEmpty(mbco_association_line.Parent_processName)) { enrich_line.Parent_scp_name = (string)mbco_association_line.Parent_processName.Clone(); } } } } return(enrichment_lines); }
private Ontology_enrichment_line_class[] Generate_enrichment_lines_and_calculate_pvalues(string[] experimental_symbols, Entry_type_enum entryType, int timepoint, string sample_name) { experimental_symbols = experimental_symbols.Distinct().OrderBy(l => l).ToArray(); int experimental_symbols_length = experimental_symbols.Length; Dictionary <string, int> processName_symbol_count = new Dictionary <string, int>(); Dictionary <string, List <string> > processName_symbol_overlap_symbols = new Dictionary <string, List <string> >(); MBCO_association.Order_by_symbol_processName(); #region Count overlap between process genes and experimental genes string experimental_symbol; int mbco_associations_length = MBCO_association.MBCO_associations.Length; int indexSymbol = 0; int stringCompare; MBCO_association_line_class mbco_association_line; for (int indexMBCO = 0; indexMBCO < mbco_associations_length; indexMBCO++) { mbco_association_line = MBCO_association.MBCO_associations[indexMBCO]; stringCompare = -2; while ((indexSymbol < experimental_symbols_length) && (stringCompare < 0)) { experimental_symbol = experimental_symbols[indexSymbol]; stringCompare = experimental_symbol.CompareTo(mbco_association_line.Symbol); if (stringCompare < 0) { indexSymbol++; } else if (stringCompare == 0) { #region Overlap symbols if (!processName_symbol_overlap_symbols.ContainsKey(mbco_association_line.ProcessName)) { processName_symbol_overlap_symbols.Add(mbco_association_line.ProcessName, new List <string>()); } processName_symbol_overlap_symbols[mbco_association_line.ProcessName].Add(experimental_symbol); #endregion } } #region Process symbol count if ((indexMBCO != 0) && (mbco_association_line.ProcessName.Equals(MBCO_association.MBCO_associations[indexMBCO - 1].ProcessName)) && (mbco_association_line.Symbol.Equals(MBCO_association.MBCO_associations[indexMBCO - 1].Symbol))) { throw new Exception(); } if (!processName_symbol_count.ContainsKey(mbco_association_line.ProcessName)) { processName_symbol_count.Add(mbco_association_line.ProcessName, 1); } else { processName_symbol_count[mbco_association_line.ProcessName]++; } #endregion } #endregion #region Generate enrichment lines string[] processNames = processName_symbol_overlap_symbols.Keys.ToArray(); string processName; int processNames_length = processNames.Length; Ontology_enrichment_line_class[] enrich_lines = new Ontology_enrichment_line_class[processNames_length]; Ontology_enrichment_line_class new_enrich_line; for (int indexP = 0; indexP < processNames_length; indexP++) { processName = processNames[indexP]; new_enrich_line = new Ontology_enrichment_line_class(); new_enrich_line.Ontology_type = this.Ontology; new_enrich_line.Scp_name = (string)processName.Clone(); new_enrich_line.Experimental_symbols_count = experimental_symbols_length; new_enrich_line.Process_symbols_count = processName_symbol_count[processName]; new_enrich_line.Bg_symbol_count = this.Bg_genes.Length; new_enrich_line.Overlap_symbols = processName_symbol_overlap_symbols[processName].OrderBy(l => l).ToArray(); new_enrich_line.Overlap_count = new_enrich_line.Overlap_symbols.Length; new_enrich_line.EntryType = entryType; new_enrich_line.Timepoint = timepoint; new_enrich_line.Sample_name = (string)sample_name.Clone(); enrich_lines[indexP] = new_enrich_line; } #endregion #region Calculate pvalues int enrich_length = enrich_lines.Length; Ontology_enrichment_line_class enrich_line; int a; int b; int c; int d; Fisher_exact_test_class fisher = new Fisher_exact_test_class(this.Bg_genes.Length, false); for (int indexE = 0; indexE < enrich_length; indexE++) { enrich_line = enrich_lines[indexE]; a = enrich_line.Overlap_count; b = enrich_line.Experimental_symbols_count - a; c = enrich_line.Process_symbols_count - a; d = enrich_line.Bg_symbol_count - a - b - c; if ((a < 0) || (b < 0) || (c < 0) || (d < 0)) { throw new Exception("negative values"); } enrich_line.Pvalue = fisher.Get_rightTailed_p_value(a, b, c, d); enrich_line.Minus_log10_pvalue = -(float)Math.Log10(enrich_line.Pvalue); } #endregion #region Calculate qvalues and FDR enrich_lines = enrich_lines.OrderBy(l => l.Pvalue).ToArray(); int enrich_lines_length = enrich_lines.Length; int rank = 0; for (int indexE = 0; indexE < enrich_lines_length; indexE++) { enrich_line = enrich_lines[indexE]; rank++; enrich_line.Qvalue = enrich_line.Pvalue * ((double)processNames_length / (double)rank); if (enrich_line.Qvalue > 1) { enrich_line.Qvalue = 1; } } double lowest_qvalue = -1; for (int indexE = enrich_lines_length - 1; indexE >= 0; indexE--) { enrich_line = enrich_lines[indexE]; if ((lowest_qvalue == -1) || (lowest_qvalue > enrich_line.Qvalue)) { lowest_qvalue = enrich_line.Qvalue; } enrich_line.FDR = lowest_qvalue; } #endregion return(enrich_lines); }
public void Keep_only_indicated_level(int level) { MBCO_association.Keep_only_lines_with_indicated_level(level); }