示例#1
0
        public void Add_other_lines(Ontology_enrichment_line_class[] other_lines)
        {
            int this_enrich_length  = this.Enrich.Length;
            int other_enrich_length = other_lines.Length;
            int new_enrich_length   = this_enrich_length + other_enrich_length;

            Ontology_enrichment_line_class[] new_enrich = new Ontology_enrichment_line_class[new_enrich_length];
            int indexNew = -1;

            for (int indexThis = 0; indexThis < this_enrich_length; indexThis++)
            {
                indexNew++;
                new_enrich[indexNew] = this.Enrich[indexThis];
            }
            for (int indexOther = 0; indexOther < other_enrich_length; indexOther++)
            {
                indexNew++;
                new_enrich[indexNew] = other_lines[indexOther].Deep_copy();
            }
            this.Enrich = new_enrich;
        }
示例#2
0
        public Ontology_enrichment_line_class[] Calculate_p_values_and_do_mutliple_hypothesis_correcion_for_input_genes(string[] inputGenes, Deg_line_class first_deg_line)
        {
            inputGenes = inputGenes.OrderBy(l => l).ToArray();
            string inputGene;
            int    inputGenes_length = inputGenes.Length;
            int    indexInput        = 0;
            int    stringCompare     = -2;

            int overlap_count = 0;
            int inputGenes_count = inputGenes.Length;
            int processGenes_count = 0;
            int a; int b; int c; int d;
            int background_genes_length = this.Background_genes.Length;
            int processes_count         = 0;

            int process_gene_association_length = this.Ontology_association.Process_gene_associations.Length;
            Ontology_association_line_class onto_asso_line;

            Ontology_enrichment_line_class        onto_enrichment_line;
            List <Ontology_enrichment_line_class> enrichment_list = new List <Ontology_enrichment_line_class>();
            List <string> overlap_genes = new List <string>();


            #region Caculate p-values
            for (int indexOnto = 0; indexOnto < process_gene_association_length; indexOnto++)
            {
                onto_asso_line = this.Ontology_association.Process_gene_associations[indexOnto];
                if ((indexOnto == 0) || (!onto_asso_line.ProcessName.Equals(this.Ontology_association.Process_gene_associations[indexOnto - 1].ProcessName)))
                {
                    processes_count++;
                    processGenes_count = 0;
                    overlap_count      = 0;
                    overlap_genes.Clear();
                    indexInput = 0;
                }
                if ((indexOnto != 0) &&
                    (onto_asso_line.ProcessName.Equals(this.Ontology_association.Process_gene_associations[indexOnto - 1].ProcessName)) &&
                    (onto_asso_line.GeneSymbol.CompareTo(this.Ontology_association.Process_gene_associations[indexOnto - 1].GeneSymbol) <= 0))
                {
                    throw new Exception(); // duplicated gene association with process or gene symbols are not sorted properly
                }
                stringCompare = -2;
                processGenes_count++;
                while ((indexInput < inputGenes_length) && (stringCompare < 0))
                {
                    inputGene     = inputGenes[indexInput];
                    stringCompare = inputGene.ToUpper().CompareTo(onto_asso_line.GeneSymbol.ToUpper());
                    if (stringCompare < 0)
                    {
                        indexInput++;
                    }
                    else if (stringCompare == 0)
                    {
                        overlap_count++;
                        overlap_genes.Add(inputGene);
                        if (overlap_genes.Distinct().ToArray().Length != overlap_genes.Count)
                        {
                            throw new Exception();
                        }
                    }
                }
                if ((indexOnto == process_gene_association_length - 1) || (!onto_asso_line.ProcessName.Equals(this.Ontology_association.Process_gene_associations[indexOnto + 1].ProcessName)))
                {
                    if (overlap_count > 0)
                    {
                        a = overlap_count;
                        b = processGenes_count - overlap_count;
                        c = inputGenes_length - overlap_count;
                        d = background_genes_length - a - b - c;

                        if ((a < 0) || (b < 0) || (c < 0) || (d < 0))
                        {
                            throw new Exception();
                        }
                        onto_enrichment_line                    = new Ontology_enrichment_line_class();
                        onto_enrichment_line.Ontology           = this.Ontology_association.Ontology;
                        onto_enrichment_line.Overlap_count      = overlap_count;
                        onto_enrichment_line.ProcessName        = (string)onto_asso_line.ProcessName.Clone();
                        onto_enrichment_line.P_value            = Fisher.Get_rightTailed_p_value(a, b, c, d);
                        onto_enrichment_line.Minus_log10_pvalue = -Math.Log10(onto_enrichment_line.P_value);
                        onto_enrichment_line.Sequencing_run     = (string)first_deg_line.Sequencing_run.Clone();
                        onto_enrichment_line.Cell               = (string)first_deg_line.Cell.Clone();
                        onto_enrichment_line.Condition1         = (string)first_deg_line.Condition1.Clone();
                        onto_enrichment_line.Condition2         = (string)first_deg_line.Condition2.Clone();
                        onto_enrichment_line.Overlap_genes      = overlap_genes.OrderBy(l => l).ToArray();

                        enrichment_list.Add(onto_enrichment_line);
                    }
                }
            }
            #endregion

            #region Calculate q-values and bonferroni
            enrichment_list = enrichment_list.OrderBy(l => l.P_value).ToList();
            int        enrichment_length = enrichment_list.Count;
            List <int> rank_list         = new List <int>();
            int        rank = 0;
            int        first_index_of_identical_p_value = 0;
            Ontology_enrichment_line_class inner_onto_line;
            for (int indexO = 0; indexO < enrichment_length; indexO++)
            {
                onto_enrichment_line = enrichment_list[indexO];

                //Bonferroni
                onto_enrichment_line.Bonferroni = onto_enrichment_line.P_value * enrichment_length;
                if (onto_enrichment_line.Bonferroni > 1)
                {
                    onto_enrichment_line.Bonferroni = 1;
                }

                //Qvalue
                if ((indexO == 0) ||
                    (!onto_enrichment_line.P_value.Equals(enrichment_list[indexO - 1].P_value)))
                {
                    rank_list.Clear();
                    first_index_of_identical_p_value = indexO;
                }

                rank++;
                rank_list.Add(rank);

                if ((indexO == enrichment_length - 1) ||
                    (onto_enrichment_line.P_value != enrichment_list[indexO + 1].P_value))
                {
                    float rank_average = (float)rank_list.Average();
                    for (int indexInnerO = first_index_of_identical_p_value; indexInnerO <= indexO; indexInnerO++)
                    {
                        inner_onto_line         = enrichment_list[indexInnerO];
                        inner_onto_line.Q_value = inner_onto_line.P_value * (processes_count / (float)rank_list.Average());
                        if (inner_onto_line.Q_value > 1)
                        {
                            inner_onto_line.Q_value = 1;
                        }
                    }
                }
            }
            #endregion

            #region Calculabe FDR
            enrichment_list = enrichment_list.OrderBy(l => l.P_value).ToList();
            double smallest_q_value = -1;
            for (int indexO = enrichment_length - 1; indexO >= 0; indexO--)
            {
                onto_enrichment_line = enrichment_list[indexO];
                if ((indexO == enrichment_length - 1) || (onto_enrichment_line.Q_value < smallest_q_value))
                {
                    smallest_q_value = onto_enrichment_line.Q_value;
                }
                if (smallest_q_value == -1)
                {
                    throw new Exception();
                }
                onto_enrichment_line.False_discovery_rate = smallest_q_value;
            }
            #endregion

            return(enrichment_list.ToArray());
        }
示例#3
0
        private Ontology_enrichment_line_class[] Generate_enrichment_lines_and_calculate_pvalues(string[] experimental_symbols, Entry_type_enum entryType, int timepoint, string sample_name)
        {
            experimental_symbols = experimental_symbols.Distinct().OrderBy(l => l).ToArray();
            int experimental_symbols_length = experimental_symbols.Length;
            Dictionary <string, int>            processName_symbol_count           = new Dictionary <string, int>();
            Dictionary <string, List <string> > processName_symbol_overlap_symbols = new Dictionary <string, List <string> >();

            MBCO_association.Order_by_symbol_processName();

            #region Count overlap between process genes and experimental genes
            string experimental_symbol;
            int    mbco_associations_length = MBCO_association.MBCO_associations.Length;
            int    indexSymbol = 0;
            int    stringCompare;
            MBCO_association_line_class mbco_association_line;
            for (int indexMBCO = 0; indexMBCO < mbco_associations_length; indexMBCO++)
            {
                mbco_association_line = MBCO_association.MBCO_associations[indexMBCO];
                stringCompare         = -2;
                while ((indexSymbol < experimental_symbols_length) && (stringCompare < 0))
                {
                    experimental_symbol = experimental_symbols[indexSymbol];
                    stringCompare       = experimental_symbol.CompareTo(mbco_association_line.Symbol);
                    if (stringCompare < 0)
                    {
                        indexSymbol++;
                    }
                    else if (stringCompare == 0)
                    {
                        #region Overlap symbols
                        if (!processName_symbol_overlap_symbols.ContainsKey(mbco_association_line.ProcessName))
                        {
                            processName_symbol_overlap_symbols.Add(mbco_association_line.ProcessName, new List <string>());
                        }
                        processName_symbol_overlap_symbols[mbco_association_line.ProcessName].Add(experimental_symbol);
                        #endregion
                    }
                }
                #region Process symbol count
                if ((indexMBCO != 0) &&
                    (mbco_association_line.ProcessName.Equals(MBCO_association.MBCO_associations[indexMBCO - 1].ProcessName)) &&
                    (mbco_association_line.Symbol.Equals(MBCO_association.MBCO_associations[indexMBCO - 1].Symbol)))
                {
                    throw new Exception();
                }
                if (!processName_symbol_count.ContainsKey(mbco_association_line.ProcessName))
                {
                    processName_symbol_count.Add(mbco_association_line.ProcessName, 1);
                }
                else
                {
                    processName_symbol_count[mbco_association_line.ProcessName]++;
                }
                #endregion
            }
            #endregion

            #region Generate enrichment lines
            string[] processNames = processName_symbol_overlap_symbols.Keys.ToArray();
            string   processName;
            int      processNames_length = processNames.Length;
            Ontology_enrichment_line_class[] enrich_lines = new Ontology_enrichment_line_class[processNames_length];
            Ontology_enrichment_line_class   new_enrich_line;
            for (int indexP = 0; indexP < processNames_length; indexP++)
            {
                processName     = processNames[indexP];
                new_enrich_line = new Ontology_enrichment_line_class();
                new_enrich_line.Ontology_type = this.Ontology;
                new_enrich_line.Scp_name      = (string)processName.Clone();
                new_enrich_line.Experimental_symbols_count = experimental_symbols_length;
                new_enrich_line.Process_symbols_count      = processName_symbol_count[processName];
                new_enrich_line.Bg_symbol_count            = this.Bg_genes.Length;
                new_enrich_line.Overlap_symbols            = processName_symbol_overlap_symbols[processName].OrderBy(l => l).ToArray();
                new_enrich_line.Overlap_count = new_enrich_line.Overlap_symbols.Length;
                new_enrich_line.EntryType     = entryType;
                new_enrich_line.Timepoint     = timepoint;
                new_enrich_line.Sample_name   = (string)sample_name.Clone();
                enrich_lines[indexP]          = new_enrich_line;
            }
            #endregion

            #region Calculate pvalues
            int enrich_length = enrich_lines.Length;
            Ontology_enrichment_line_class enrich_line;
            int a; int b; int c; int d;
            Fisher_exact_test_class fisher = new Fisher_exact_test_class(this.Bg_genes.Length, false);
            for (int indexE = 0; indexE < enrich_length; indexE++)
            {
                enrich_line = enrich_lines[indexE];
                a           = enrich_line.Overlap_count;
                b           = enrich_line.Experimental_symbols_count - a;
                c           = enrich_line.Process_symbols_count - a;
                d           = enrich_line.Bg_symbol_count - a - b - c;
                if ((a < 0) || (b < 0) || (c < 0) || (d < 0))
                {
                    throw new Exception("negative values");
                }
                enrich_line.Pvalue             = fisher.Get_rightTailed_p_value(a, b, c, d);
                enrich_line.Minus_log10_pvalue = -(float)Math.Log10(enrich_line.Pvalue);
            }
            #endregion

            #region Calculate qvalues and FDR
            enrich_lines = enrich_lines.OrderBy(l => l.Pvalue).ToArray();
            int enrich_lines_length = enrich_lines.Length;
            int rank = 0;
            for (int indexE = 0; indexE < enrich_lines_length; indexE++)
            {
                enrich_line = enrich_lines[indexE];
                rank++;

                enrich_line.Qvalue = enrich_line.Pvalue * ((double)processNames_length / (double)rank);
                if (enrich_line.Qvalue > 1)
                {
                    enrich_line.Qvalue = 1;
                }
            }

            double lowest_qvalue = -1;
            for (int indexE = enrich_lines_length - 1; indexE >= 0; indexE--)
            {
                enrich_line = enrich_lines[indexE];
                if ((lowest_qvalue == -1) ||
                    (lowest_qvalue > enrich_line.Qvalue))
                {
                    lowest_qvalue = enrich_line.Qvalue;
                }
                enrich_line.FDR = lowest_qvalue;
            }
            #endregion

            return(enrich_lines);
        }
示例#4
0
 public void Order_by_complete_sample_pvalue()
 {
     Enrich = Ontology_enrichment_line_class.Order_by_complete_sample_pvalue(this.Enrich);
 }
示例#5
0
 public void Order_by_sample_scpName()
 {
     Enrich = Ontology_enrichment_line_class.Order_by_sample_and_scpName(this.Enrich);
 }