public void Add_other_lines(Ontology_enrichment_line_class[] other_lines) { int this_enrich_length = this.Enrich.Length; int other_enrich_length = other_lines.Length; int new_enrich_length = this_enrich_length + other_enrich_length; Ontology_enrichment_line_class[] new_enrich = new Ontology_enrichment_line_class[new_enrich_length]; int indexNew = -1; for (int indexThis = 0; indexThis < this_enrich_length; indexThis++) { indexNew++; new_enrich[indexNew] = this.Enrich[indexThis]; } for (int indexOther = 0; indexOther < other_enrich_length; indexOther++) { indexNew++; new_enrich[indexNew] = other_lines[indexOther].Deep_copy(); } this.Enrich = new_enrich; }
public Ontology_enrichment_line_class[] Calculate_p_values_and_do_mutliple_hypothesis_correcion_for_input_genes(string[] inputGenes, Deg_line_class first_deg_line) { inputGenes = inputGenes.OrderBy(l => l).ToArray(); string inputGene; int inputGenes_length = inputGenes.Length; int indexInput = 0; int stringCompare = -2; int overlap_count = 0; int inputGenes_count = inputGenes.Length; int processGenes_count = 0; int a; int b; int c; int d; int background_genes_length = this.Background_genes.Length; int processes_count = 0; int process_gene_association_length = this.Ontology_association.Process_gene_associations.Length; Ontology_association_line_class onto_asso_line; Ontology_enrichment_line_class onto_enrichment_line; List <Ontology_enrichment_line_class> enrichment_list = new List <Ontology_enrichment_line_class>(); List <string> overlap_genes = new List <string>(); #region Caculate p-values for (int indexOnto = 0; indexOnto < process_gene_association_length; indexOnto++) { onto_asso_line = this.Ontology_association.Process_gene_associations[indexOnto]; if ((indexOnto == 0) || (!onto_asso_line.ProcessName.Equals(this.Ontology_association.Process_gene_associations[indexOnto - 1].ProcessName))) { processes_count++; processGenes_count = 0; overlap_count = 0; overlap_genes.Clear(); indexInput = 0; } if ((indexOnto != 0) && (onto_asso_line.ProcessName.Equals(this.Ontology_association.Process_gene_associations[indexOnto - 1].ProcessName)) && (onto_asso_line.GeneSymbol.CompareTo(this.Ontology_association.Process_gene_associations[indexOnto - 1].GeneSymbol) <= 0)) { throw new Exception(); // duplicated gene association with process or gene symbols are not sorted properly } stringCompare = -2; processGenes_count++; while ((indexInput < inputGenes_length) && (stringCompare < 0)) { inputGene = inputGenes[indexInput]; stringCompare = inputGene.ToUpper().CompareTo(onto_asso_line.GeneSymbol.ToUpper()); if (stringCompare < 0) { indexInput++; } else if (stringCompare == 0) { overlap_count++; overlap_genes.Add(inputGene); if (overlap_genes.Distinct().ToArray().Length != overlap_genes.Count) { throw new Exception(); } } } if ((indexOnto == process_gene_association_length - 1) || (!onto_asso_line.ProcessName.Equals(this.Ontology_association.Process_gene_associations[indexOnto + 1].ProcessName))) { if (overlap_count > 0) { a = overlap_count; b = processGenes_count - overlap_count; c = inputGenes_length - overlap_count; d = background_genes_length - a - b - c; if ((a < 0) || (b < 0) || (c < 0) || (d < 0)) { throw new Exception(); } onto_enrichment_line = new Ontology_enrichment_line_class(); onto_enrichment_line.Ontology = this.Ontology_association.Ontology; onto_enrichment_line.Overlap_count = overlap_count; onto_enrichment_line.ProcessName = (string)onto_asso_line.ProcessName.Clone(); onto_enrichment_line.P_value = Fisher.Get_rightTailed_p_value(a, b, c, d); onto_enrichment_line.Minus_log10_pvalue = -Math.Log10(onto_enrichment_line.P_value); onto_enrichment_line.Sequencing_run = (string)first_deg_line.Sequencing_run.Clone(); onto_enrichment_line.Cell = (string)first_deg_line.Cell.Clone(); onto_enrichment_line.Condition1 = (string)first_deg_line.Condition1.Clone(); onto_enrichment_line.Condition2 = (string)first_deg_line.Condition2.Clone(); onto_enrichment_line.Overlap_genes = overlap_genes.OrderBy(l => l).ToArray(); enrichment_list.Add(onto_enrichment_line); } } } #endregion #region Calculate q-values and bonferroni enrichment_list = enrichment_list.OrderBy(l => l.P_value).ToList(); int enrichment_length = enrichment_list.Count; List <int> rank_list = new List <int>(); int rank = 0; int first_index_of_identical_p_value = 0; Ontology_enrichment_line_class inner_onto_line; for (int indexO = 0; indexO < enrichment_length; indexO++) { onto_enrichment_line = enrichment_list[indexO]; //Bonferroni onto_enrichment_line.Bonferroni = onto_enrichment_line.P_value * enrichment_length; if (onto_enrichment_line.Bonferroni > 1) { onto_enrichment_line.Bonferroni = 1; } //Qvalue if ((indexO == 0) || (!onto_enrichment_line.P_value.Equals(enrichment_list[indexO - 1].P_value))) { rank_list.Clear(); first_index_of_identical_p_value = indexO; } rank++; rank_list.Add(rank); if ((indexO == enrichment_length - 1) || (onto_enrichment_line.P_value != enrichment_list[indexO + 1].P_value)) { float rank_average = (float)rank_list.Average(); for (int indexInnerO = first_index_of_identical_p_value; indexInnerO <= indexO; indexInnerO++) { inner_onto_line = enrichment_list[indexInnerO]; inner_onto_line.Q_value = inner_onto_line.P_value * (processes_count / (float)rank_list.Average()); if (inner_onto_line.Q_value > 1) { inner_onto_line.Q_value = 1; } } } } #endregion #region Calculabe FDR enrichment_list = enrichment_list.OrderBy(l => l.P_value).ToList(); double smallest_q_value = -1; for (int indexO = enrichment_length - 1; indexO >= 0; indexO--) { onto_enrichment_line = enrichment_list[indexO]; if ((indexO == enrichment_length - 1) || (onto_enrichment_line.Q_value < smallest_q_value)) { smallest_q_value = onto_enrichment_line.Q_value; } if (smallest_q_value == -1) { throw new Exception(); } onto_enrichment_line.False_discovery_rate = smallest_q_value; } #endregion return(enrichment_list.ToArray()); }
private Ontology_enrichment_line_class[] Generate_enrichment_lines_and_calculate_pvalues(string[] experimental_symbols, Entry_type_enum entryType, int timepoint, string sample_name) { experimental_symbols = experimental_symbols.Distinct().OrderBy(l => l).ToArray(); int experimental_symbols_length = experimental_symbols.Length; Dictionary <string, int> processName_symbol_count = new Dictionary <string, int>(); Dictionary <string, List <string> > processName_symbol_overlap_symbols = new Dictionary <string, List <string> >(); MBCO_association.Order_by_symbol_processName(); #region Count overlap between process genes and experimental genes string experimental_symbol; int mbco_associations_length = MBCO_association.MBCO_associations.Length; int indexSymbol = 0; int stringCompare; MBCO_association_line_class mbco_association_line; for (int indexMBCO = 0; indexMBCO < mbco_associations_length; indexMBCO++) { mbco_association_line = MBCO_association.MBCO_associations[indexMBCO]; stringCompare = -2; while ((indexSymbol < experimental_symbols_length) && (stringCompare < 0)) { experimental_symbol = experimental_symbols[indexSymbol]; stringCompare = experimental_symbol.CompareTo(mbco_association_line.Symbol); if (stringCompare < 0) { indexSymbol++; } else if (stringCompare == 0) { #region Overlap symbols if (!processName_symbol_overlap_symbols.ContainsKey(mbco_association_line.ProcessName)) { processName_symbol_overlap_symbols.Add(mbco_association_line.ProcessName, new List <string>()); } processName_symbol_overlap_symbols[mbco_association_line.ProcessName].Add(experimental_symbol); #endregion } } #region Process symbol count if ((indexMBCO != 0) && (mbco_association_line.ProcessName.Equals(MBCO_association.MBCO_associations[indexMBCO - 1].ProcessName)) && (mbco_association_line.Symbol.Equals(MBCO_association.MBCO_associations[indexMBCO - 1].Symbol))) { throw new Exception(); } if (!processName_symbol_count.ContainsKey(mbco_association_line.ProcessName)) { processName_symbol_count.Add(mbco_association_line.ProcessName, 1); } else { processName_symbol_count[mbco_association_line.ProcessName]++; } #endregion } #endregion #region Generate enrichment lines string[] processNames = processName_symbol_overlap_symbols.Keys.ToArray(); string processName; int processNames_length = processNames.Length; Ontology_enrichment_line_class[] enrich_lines = new Ontology_enrichment_line_class[processNames_length]; Ontology_enrichment_line_class new_enrich_line; for (int indexP = 0; indexP < processNames_length; indexP++) { processName = processNames[indexP]; new_enrich_line = new Ontology_enrichment_line_class(); new_enrich_line.Ontology_type = this.Ontology; new_enrich_line.Scp_name = (string)processName.Clone(); new_enrich_line.Experimental_symbols_count = experimental_symbols_length; new_enrich_line.Process_symbols_count = processName_symbol_count[processName]; new_enrich_line.Bg_symbol_count = this.Bg_genes.Length; new_enrich_line.Overlap_symbols = processName_symbol_overlap_symbols[processName].OrderBy(l => l).ToArray(); new_enrich_line.Overlap_count = new_enrich_line.Overlap_symbols.Length; new_enrich_line.EntryType = entryType; new_enrich_line.Timepoint = timepoint; new_enrich_line.Sample_name = (string)sample_name.Clone(); enrich_lines[indexP] = new_enrich_line; } #endregion #region Calculate pvalues int enrich_length = enrich_lines.Length; Ontology_enrichment_line_class enrich_line; int a; int b; int c; int d; Fisher_exact_test_class fisher = new Fisher_exact_test_class(this.Bg_genes.Length, false); for (int indexE = 0; indexE < enrich_length; indexE++) { enrich_line = enrich_lines[indexE]; a = enrich_line.Overlap_count; b = enrich_line.Experimental_symbols_count - a; c = enrich_line.Process_symbols_count - a; d = enrich_line.Bg_symbol_count - a - b - c; if ((a < 0) || (b < 0) || (c < 0) || (d < 0)) { throw new Exception("negative values"); } enrich_line.Pvalue = fisher.Get_rightTailed_p_value(a, b, c, d); enrich_line.Minus_log10_pvalue = -(float)Math.Log10(enrich_line.Pvalue); } #endregion #region Calculate qvalues and FDR enrich_lines = enrich_lines.OrderBy(l => l.Pvalue).ToArray(); int enrich_lines_length = enrich_lines.Length; int rank = 0; for (int indexE = 0; indexE < enrich_lines_length; indexE++) { enrich_line = enrich_lines[indexE]; rank++; enrich_line.Qvalue = enrich_line.Pvalue * ((double)processNames_length / (double)rank); if (enrich_line.Qvalue > 1) { enrich_line.Qvalue = 1; } } double lowest_qvalue = -1; for (int indexE = enrich_lines_length - 1; indexE >= 0; indexE--) { enrich_line = enrich_lines[indexE]; if ((lowest_qvalue == -1) || (lowest_qvalue > enrich_line.Qvalue)) { lowest_qvalue = enrich_line.Qvalue; } enrich_line.FDR = lowest_qvalue; } #endregion return(enrich_lines); }
public void Order_by_complete_sample_pvalue() { Enrich = Ontology_enrichment_line_class.Order_by_complete_sample_pvalue(this.Enrich); }
public void Order_by_sample_scpName() { Enrich = Ontology_enrichment_line_class.Order_by_sample_and_scpName(this.Enrich); }