Beispiel #1
0
        /**
         * Computes the Fisher scores for all the permutations in a single pass.
         * The algorithm works by starting on one side (min a) and moving to the other side (max a).
         * We compute all the probabilities that we encounter on the way incrementally.
         * Then we sort the probabilities in increasing order and sum them up in that direction.
         * The result is a mapping between the permutation probabilities and p-values.
         * TODO - we may want to cache the resulting list in case of multiple calls
         * */
        public double[,] computeAllPermutationsScores()
        {
            int           cPermutations   = getMaxPossibleA() - getMinPossibleA() + 1;
            List <double> alProbabilities = new List <double>();

            double[,] adScores = new double[cPermutations, 2];
            //We start from the table with the maximal value to avoid numeric computation problems
            ContingencyTable ctMaxValue = getMaxValueTable();
            ContingencyTable ctIterator = ctMaxValue;
            double           pStart     = ctIterator.getHypergeometricProbability();

            double pCurrent = pStart, dSum = 0.0;
            int    iCurrent = 0;

            //iterate to the right side
            while (ctIterator != null)
            {
                //Add the probability of the current permutation to the list
                alProbabilities.Add(pCurrent);
                //Increment the probability
                pCurrent = ctIterator.incrementalHypergeometricProbability(pCurrent);
                //Increment the table - will return null once a exceeds the max value
                ctIterator = ctIterator.next();
            }

            //iterate to the left side
            ctIterator = ctMaxValue;
            pCurrent   = ctIterator.decrementalHypergeometricProbability(pStart);
            ctIterator = ctIterator.previous();
            while (ctIterator != null)
            {
                //Add the probability of the current permutation to the list
                alProbabilities.Add(pCurrent);
                //Decrement the probability
                pCurrent = ctIterator.decrementalHypergeometricProbability(pCurrent);
                //Decrement the table - will return null once a drops below the min value
                ctIterator = ctIterator.previous();
            }

            //Sort the observed probabilities in increasing order
            alProbabilities.Sort();
            //BUGBUG - suspecting that we do not handle well identical entries. Not sure if this bug is occuring.
            dSum = 0.0;
            //Sum the probabilities in increasing order, computing two sided p-values
            //BUGBUG - Not sure how to make this work for one sided tests.
            for (iCurrent = 0; iCurrent < alProbabilities.Count; iCurrent++)
            {
                pCurrent = (double)alProbabilities[iCurrent];
                dSum    += pCurrent;
                if (dSum > 1.0)
                {
                    dSum = 1.0;
                }
                adScores[iCurrent, 0] = pCurrent;
                adScores[iCurrent, 1] = dSum;
            }
            return(adScores);
        }
Beispiel #2
0
 public double getCachedValue(ContingencyTable ct)
 {
     int[] aKey = new int[] { ct.getA(), ct.getB(), ct.getC(), ct.getD() };
     if (m_slContingencyTables.ContainsKey(aKey))
     {
         return(m_slContingencyTables[aKey]);
     }
     return(double.NaN);
 }
Beispiel #3
0
        /**
         * Computes the hypergeometric probablity using the following factorization:
         * (a+b)!(a+c)!(b+d)!(c+d)!     (a+b)!    (a+c)!    (c+d)!     (b+d)!
         * ------------------------ =  ------- * ------- * -------- * --------
         *       a!b!c!d!n!              a!b!       c!        d!         n!
         *  The assumption is that (a+b) is the smallest marginal.
         *  A better implementation would check for the smallest marginal and factor according to it, but the current implementation seems fast enough.
         * */
        public static double pr(ContingencyTable ct)
        {
            double pt = 1;
            double iFactorial = 0;
            int    a = ct.getA(), b = ct.getB(), c = ct.getC(), d = ct.getD();
            double iDenominator    = a + b + c + d;
            double iMinDenominator = b + d;

            for (iFactorial = a + 1; iFactorial <= a + b; iFactorial++) // (a+b)!/a!b!
            {
                pt *= iFactorial / (iFactorial - a);
                while ((pt > 1) && (iDenominator > iMinDenominator))
                {
                    pt /= iDenominator;
                    iDenominator--;
                }
            }
            for (iFactorial = c + 1; iFactorial <= a + c; iFactorial++) // (a+c)!/c!
            {
                pt *= iFactorial;
                while ((pt > 1) && (iDenominator > iMinDenominator))
                {
                    pt /= iDenominator;
                    iDenominator--;
                }
            }
            for (iFactorial = d + 1; iFactorial <= c + d; iFactorial++) // (c+d)!/d!
            {
                pt *= iFactorial;
                while ((pt > 1) && (iDenominator > iMinDenominator))
                {
                    pt /= iDenominator;
                    iDenominator--;
                }
            }

            if (pt == 0.0) //underflow
            {
                return(double.Epsilon);
            }

            while ((iDenominator > iMinDenominator) && (pt > 0.0))
            {
                pt /= iDenominator;
                if (pt == 0.0) //underflow
                {
                    return(double.Epsilon);
                }
                iDenominator--;
            }
            if (pt > 1.0) //numerical error
            {
                pt = 1.0;
            }

            return(pt);
        }
Beispiel #4
0
 public void setCachedValue(ContingencyTable ct, double dValue)
 {
     int[] aKey = new int[] { ct.getA(), ct.getB(), ct.getC(), ct.getD() };
     if (!m_slContingencyTables.ContainsKey(aKey))
     {
         m_slContingencyTables.Add(aKey, dValue);
     }
     else
     {
         m_slContingencyTables[aKey] = dValue;
     }
 }
        private Map <double, FDRData> initFDRMap(List <ContingencyTable> actTables)
        {
            Map <double, FDRData> slFDR = new Map <double, FDRData>();
            int iTable = 0;
            ContingencyTable ctCurrent    = null;
            double           dFisherScore = 0.0;

            for (iTable = 0; iTable < actTables.Count; iTable++)
            {
                ctCurrent    = (ContingencyTable)actTables[iTable];
                dFisherScore = round(ctCurrent.getFisher2TailPermutationTest());
                if (!slFDR.ContainsKey(dFisherScore))
                {
                    slFDR.Add(dFisherScore, new FDRData(dFisherScore));
                }
            }
            slFDR.Add(10.0, new FDRData(10.0)); // add a last entry with a huge fisher score
            return(slFDR);
        }
        //Loading the contingency tables from the input file
        //Implements the sampling techniques
        private List <ContingencyTable> loadTables()
        {
            try
            {
                StreamReader     sr        = m_fiInput.OpenText();
                ContingencyTable ctCurrent = null;
                if (m_bReportProgress)
                {
                    m_bContinue = m_prReport.reportPhase("Loading data");
                    m_bContinue = m_prReport.reportMessage("Loading data from file " + m_fiInput.Name, true);
                }
                string sLine = "";
                List <ContingencyTable> actTables = new List <ContingencyTable>();
                int    cTables = 0;
                long   cCharacters = 0;
                bool   bUseTable = true;
                double dSampleProbability = 0.0, dProb = 0.0;
                Random rnd = new Random();
                int    iLineNumber = 0;

                if (m_bHuge)
                {
                    m_ctcTableCounts = new ContingencyTableCache();
                }
                else
                {
                    m_ctcTableCounts = null;
                }

                //On the first iteration go through the file to check the number of rows (tables)
                if (m_cTables == -1)
                {
                    m_cTables = 0;
                    sLine     = sr.ReadLine();
                    initColumnHeaders(sLine);
                    while (!sr.EndOfStream)
                    {
                        sLine = sr.ReadLine();
                        m_cTables++;
                        if (m_bReportProgress)
                        {
                            if (m_cTables % MAX_REPROT_POINT == 0)
                            {
                                m_bContinue = m_prReport.reportMessage(".", false);
                            }
                        }
                    }
                    if (m_bReportProgress)
                    {
                        m_bContinue = m_prReport.reportMessage("", true);
                        m_bContinue = m_prReport.reportMessage("Found " + m_cTables + " data rows.", true);
                    }
                }
                //Instead of enforcing a hard sample size, we sample the given the sample probability
                dSampleProbability = m_iSampleSize / (double)m_cTables;
                sr.Close();
                sr = m_fiInput.OpenText();
                if (m_bReportProgress)
                {
                    if (m_bSampling)
                    {
                        m_bContinue = m_prReport.reportPhase("Sampling tables");
                        m_bContinue = m_prReport.reportMessage("Sampling " + m_iSampleSize + " tables.", true);
                    }
                }

                if (m_bHasColumnHeaders)
                {
                    sr.ReadLine();
                }
                while (!sr.EndOfStream && m_bContinue)
                {
                    sLine = sr.ReadLine().Trim();
                    iLineNumber++;
                    if (sLine.Length > 0)
                    {
                        bUseTable = true;//general use flag - sampling, validation, ...
                        if (m_bSampling)
                        {
                            dProb = rnd.NextDouble();
                            if (dProb > dSampleProbability)
                            {
                                bUseTable = false;
                            }
                        }
                        if (bUseTable)
                        {
                            ctCurrent = new ContingencyTable(sLine, m_cTableNamesColumns);
                            bUseTable = ctCurrent.validate();
                        }
                        if (bUseTable)
                        {
                            if (m_bHuge)//instead of maintaining all the tables try to see whether we already loaded a table with the same counts
                            {
                                double dCount = m_ctcTableCounts.getCachedValue(ctCurrent);
                                if (double.IsNaN(dCount))//First time table was observed
                                {
                                    dCount = 0;
                                    actTables.Add(ctCurrent);
                                }
                                m_ctcTableCounts.setCachedValue(ctCurrent, dCount + 1);//increment the table count
                            }
                            else//not huge - maintain all tables (including duplicates)
                            {
                                actTables.Add(ctCurrent);
                            }
                        }
                        cTables++;
                    }
                    if ((cTables > 0) && (cTables % MAX_REPROT_POINT == 0))
                    {
                        if (m_bReportProgress)
                        {
                            m_bContinue = m_prReport.reportProcessedTables(cTables, m_cTables);
                            m_bContinue = m_prReport.reportMessage("Loaded " + cTables + " tables.", false);
                            if (m_bHuge)
                            {
                                m_bContinue = m_prReport.reportMessage(" Found " + actTables.Count + " distinct tables.", false);
                            }
                            m_bContinue = m_prReport.reportMessage("", true);
                        }
                    }
                    cCharacters += sLine.Length + 2;
                }
                if (m_bReportProgress)
                {
                    m_bContinue = m_prReport.reportMessage("Done loading data. Found " + actTables.Count + " distinct tables.", true);
                }
                sr.Close();
                return(actTables);
            }
            catch (Exception e)
            {
                m_bContinue = m_prReport.reportError("Could not load data : " + e.Message);
            }
            return(null);
        }
        /*
         * Computing pi in the filtering case.
         * In this case we compute a different pi for each p-value
         * A table is considered relevant for the pi computation of a p-value p only if its marginals support a p-value that is more extreme than p.
         * */
        private Map <double, double> computeFilteringPi(List <ContingencyTable> actTables, List <double> lPValues)
        {
            Map <double, List <ContingencyTable> > slRelevantTables = new Map <double, List <ContingencyTable> >();
            double dSumObservedPValuesInRange = 0.0, dCurrentTableFisherTestPValue = 0.0;
            int    cObservedTablesInRange = 0;
            double dFisherScore = 0.0, dHyperProbability = 0.0, dMinimalPossiblePValue = 0.0, dFirstLargerKey = 0.0;
            double dSumExpectedNullsInRange = 0;
            double dSumNullProbsInRange = 0.0;
            int    cNullsInRange = 0;
            int    iTable = 0;
            Map <double, double> slPi = new Map <double, double>();
            ContingencyTable     ctCurrent = null;

            if (m_bReportProgress)
            {
                m_bContinue = m_prReport.reportPhase("Computing relevant tables.");
                m_bContinue = m_prReport.reportMessage("Started computing relevant tables for PI computation.", true);
            }

            //We first compute the list of relevant tables.
            //For each table we compute its minimal achievable p-value and add it to the next p-value on the list.
            //Now, the relevant tables are all the tables that belong to a p-value that is more exterme than the current one.
            for (iTable = 0; iTable < actTables.Count && m_bContinue; iTable++)
            {
                ctCurrent = (ContingencyTable)actTables[iTable];
                dMinimalPossiblePValue = ctCurrent.getMinimalAchievablePValue();
                dFirstLargerKey        = getNextKey(lPValues, dMinimalPossiblePValue);
                if (!slRelevantTables.ContainsKey(dFirstLargerKey))
                {
                    slRelevantTables.Add(dFirstLargerKey, new List <ContingencyTable>());
                }
                slRelevantTables[dFirstLargerKey].Add(ctCurrent);
                if (m_bReportProgress && (iTable > 0) && (iTable % 1000 == 0))
                {
                    m_bContinue = m_prReport.reportProcessedTables(iTable, actTables.Count);
                }
            }

            //We iterate from smallest p-value to largest. The order is important because we want the relevant tables list to grow all the time.
            for (iTable = 0; iTable < actTables.Count && m_bContinue; iTable++)
            {
                ctCurrent = (ContingencyTable)actTables[iTable];

                dCurrentTableFisherTestPValue = round(ctCurrent.getFisher2TailPermutationTest());

                if (slRelevantTables.ContainsKey(dCurrentTableFisherTestPValue))
                {
                    //Now we iterate over the list of relevant tables
                    //Note - a table never becomes irrelevant. Therefore we always accumulate more observations and remove any.
                    foreach (ContingencyTable ctRelevant in slRelevantTables[dCurrentTableFisherTestPValue])
                    {
                        dFisherScore = ctRelevant.getFisher2TailPermutationTest();

                        dSumObservedPValuesInRange += dFisherScore;
                        cObservedTablesInRange++;
                        //TODO - calling computeAllPermutationsScores twice - inefficient
                        double[,] adScores = ctRelevant.computeAllPermutationsScores();

                        for (int iCurrent = 0; iCurrent < adScores.GetLength(0); iCurrent++)
                        {
                            dHyperProbability = adScores[iCurrent, 0];
                            dFisherScore      = adScores[iCurrent, 1];

                            dSumNullProbsInRange     += dHyperProbability;
                            dSumExpectedNullsInRange += dFisherScore * dHyperProbability;
                            cNullsInRange++;
                        }
                    }
                    slRelevantTables.Remove(dCurrentTableFisherTestPValue);
                }
                //After iterating over all the relevant tables we compute the PI for that p-value
                //using the weighted sum method
                slPi[dCurrentTableFisherTestPValue] = (dSumObservedPValuesInRange / cObservedTablesInRange) /
                                                      (dSumExpectedNullsInRange / dSumNullProbsInRange);
                if (m_bReportProgress && (iTable > 0) && (iTable % 1000 == 0))
                {
                    m_bContinue = m_prReport.reportProcessedTables(iTable, actTables.Count);
                }
            }
            slPi[10.0] = 1.0;
            return(slPi);
        }
        /*
         * Main FDR computation function.
         * Takes as input an array of tables, already sorted by Fisher scores.
         * Outputs a map from p-value to FDR.
         * */
        private Map <double, FDRData> computeFDR(List <ContingencyTable> actTables)
        {
            int iTable = 0, cTables = actTables.Count;
            ContingencyTable ctCurrent = null;
            double           dFirstLargerKey = 0.0;
            double           dHyperProbability = 0.0, dFisherScore = 0.0;
            DateTime         dtBefore = DateTime.Now, dtAfter = DateTime.Now;
            TimeSpan         tsCurrent = TimeSpan.Zero, tsTotal = TimeSpan.Zero;
            int cTableCount = 1;
            int cReprotInterval = 0;
            Map <double, FDRData> slFDR = null;
            double dSumObservedPValues = 0.0, dCurrentTableFisherTestPValue = 0.0;
            double dSumNullPValues = 0.0, dExpectedNullPValue = 0.0;
            double dEPhiNull = 0.0, dEPhiObserved = 0.0;
            int    cNullPValues     = 0;
            int    cObservedPValues = 0;

            if (m_bReportProgress)
            {
                m_bContinue = m_prReport.reportPhase("Computing pooled p-values.");
                m_bContinue = m_prReport.reportMessage("Started computing pooled p-values values.", true);
            }

            slFDR = initFDRMap(actTables);

            cReprotInterval = Math.Min(actTables.Count / 10, MAX_REPROT_POINT);

            for (iTable = 0; iTable < cTables && m_bContinue; iTable++)
            {
                ctCurrent = (ContingencyTable)actTables[iTable];

                dCurrentTableFisherTestPValue = ctCurrent.getFisher2TailPermutationTest();

                dSumObservedPValues += dCurrentTableFisherTestPValue;

                //dEPhiObserved += -Math.Log(1 - 0.99999999 * dCurrentTableFisherTestPValue);
                dEPhiObserved += Math.Sqrt(dCurrentTableFisherTestPValue);


                cObservedPValues++;

                double[,] adScores = ctCurrent.computeAllPermutationsScores();
                int iCurrent = 0;
                if (m_bHuge)
                {
                    cTableCount = (int)m_ctcTableCounts.getCachedValue(ctCurrent);
                }
                else
                {
                    cTableCount = 1;
                }

                for (iCurrent = 0; iCurrent < adScores.GetLength(0); iCurrent++)
                {
                    dHyperProbability = adScores[iCurrent, 0];
                    dFisherScore      = adScores[iCurrent, 1];

                    dSumNullPValues     += dHyperProbability;
                    dExpectedNullPValue += dFisherScore * dHyperProbability;
                    //dEPhiNull += -Math.Log(1 - 0.99999999 * dFisherScore) * dHyperProbability;
                    dEPhiNull += Math.Sqrt(dFisherScore) * dHyperProbability;
                    cNullPValues++;

                    dFirstLargerKey = getNextKey(slFDR.KeyList, dFisherScore);

                    slFDR[dFirstLargerKey].PooledPValue += (dHyperProbability * cTableCount);
                }

                if ((iTable > 0) && (iTable % cReprotInterval == 0))
                {
                    if (m_bReportProgress)
                    {
                        dtAfter     = DateTime.Now;
                        tsCurrent   = dtAfter.Subtract(dtBefore);
                        tsTotal    += tsCurrent;
                        m_bContinue = m_prReport.reportProcessedTables(iTable, cTables);
                        m_bContinue = m_prReport.reportMessage("Done " + iTable + " tables, avg time (ms) " + Math.Round(tsTotal.TotalMilliseconds / (iTable + 1)) +
                                                               ", total time " + tsTotal, true);
                    }
                }
            }

            double dPi = 1.0;

            if ((m_pmEvaluatePi == PiMethod.WeightedSum) || (m_pmEvaluatePi == PiMethod.DoubleAverage))
            {
                if (m_pmEvaluatePi == PiMethod.WeightedSum)
                {
                    dPi = (dSumObservedPValues / cObservedPValues) / (dExpectedNullPValue / dSumNullPValues); // \pi_0 = (\sum_T p(T))/(\sum_T p(T)pr(T|H=0))
                }
                else if (m_pmEvaluatePi == PiMethod.DoubleAverage)
                {
                    dPi = 2.0 * (dSumObservedPValues / cObservedPValues); // \pi_0 = 2 * avg(p)
                }
                double dPhiPi = dEPhiObserved / dEPhiNull;


                m_bContinue = m_prReport.reportMessage("Estimating PI = " + dPi, true);
            }
            else if (m_pmEvaluatePi == PiMethod.Filtering)
            {
                Map <double, double> slPi  = computeFilteringPi(actTables, slFDR.KeyList);
                List <double>        lKeys = new List <double>(slFDR.Keys);
                foreach (double dKey in lKeys)
                {
                    slFDR[dKey].FilteringPi = slPi[dKey];
                }
            }
            m_dPi = dPi;
            sumFDRs(actTables, slFDR, dPi);
            return(slFDR);
        }
        /*
         * Writes the results of the computation to a file.
         * First line is the headers (if exist) with the new columns added.
         * Each following line is the contingency table with the Fisher scores, FDR and q-value
         * */
        private List <string> getResults(List <ContingencyTable> actTables
                                         , Map <double, FDRData> slFDR)
        {
            int iTable = 0;
            ContingencyTable ctCurrent = null;
            double           dFisherTest = 0.0, dCurrentQValue = 0.0;
            double           dNextKey    = 0;
            string           sHeader     = "";
            FDRData          fdCurrent   = null;
            string           sOutputLine = "";
            List <string>    lResults    = new List <string>();
            bool             bFiltering  = m_pmEvaluatePi == PiMethod.Filtering;

            if (m_bReportProgress)
            {
                m_bContinue = m_prReport.reportPhase("Writing results.");
            }

            sHeader = m_sColumnHeaders + "\tp-value";
            if (m_bFullOutput)
            {
                sHeader += "\tpooled p-value\t";
                if (bFiltering)
                {
                    sHeader += "filtering pi\t";
                }
                if (m_bPositiveFDR)
                {
                    sHeader += "pr(R(p)>0)\tpFDR";
                }
                else
                {
                    sHeader += "FDR";
                }
            }
            sHeader += "\tq-value";
            lResults.Add(sHeader);


            List <KeyValuePair <double, double> > lPToQMappings = new List <KeyValuePair <double, double> >();

            //When the huge flag is used, the tables are not kept.
            //We now have to go over the entire input file, read each table,
            //compute p-value for it, and map it into FDR and q-value.
            if (m_bHuge)
            {
                StreamReader sr    = m_fiInput.OpenText();
                string       sLine = "";
                double       dFisherScoreCutoff = 0.0;
                bool         bUseTable          = true;

                if (m_dFDRCutoff > 0.0)
                {
                    dFisherScoreCutoff = mapFDR2FisherScore(slFDR, m_dFDRCutoff);
                }

                iTable = 0;
                while (!sr.EndOfStream)
                {
                    sLine = sr.ReadLine();
                    if (sLine.Length > 0)
                    {
                        ctCurrent = new ContingencyTable(sLine, m_cTableNamesColumns);
                        bUseTable = ctCurrent.validate();
                        if (bUseTable)
                        {
                            dFisherTest    = round(ctCurrent.getFisher2TailPermutationTest(dFisherScoreCutoff));
                            dNextKey       = getNextKey(slFDR.KeyList, dFisherTest);
                            fdCurrent      = slFDR[dNextKey];
                            dCurrentQValue = round(fdCurrent.QValue);
                            if (dCurrentQValue <= m_dFDRCutoff)
                            {
                                sOutputLine  = ctCurrent.ToString() + "\t";
                                sOutputLine += fdCurrent.getData(m_bFullOutput, bFiltering, m_bPositiveFDR);
                                lResults.Add(sOutputLine);
                                lPToQMappings.Add(new KeyValuePair <double, double>(dNextKey, dCurrentQValue));//will not work for huge because multiple tables will be missed
                            }
                        }
                        iTable++;
                        if (m_bReportProgress && (iTable % MAX_REPROT_POINT == 0))
                        {
                            m_bContinue = m_prReport.reportProcessedTables(iTable, m_cTables);
                            m_bContinue = m_prReport.reportMessage("Written " + iTable + " tables.", true);
                        }
                    }
                }
                sr.Close();
            }
            else//Not huge - all data is already in memory - just write the tables.
            {
                for (iTable = 0; iTable < actTables.Count; iTable++)
                {
                    ctCurrent      = (ContingencyTable)actTables[iTable];
                    dFisherTest    = ctCurrent.getFisher2TailPermutationTest();
                    dNextKey       = getNextKey(slFDR.KeyList, dFisherTest);
                    fdCurrent      = slFDR[dNextKey];
                    dCurrentQValue = floor(fdCurrent.QValue);
                    if (dCurrentQValue <= m_dFDRCutoff)
                    {
                        sOutputLine  = ctCurrent.ToString() + "\t";
                        sOutputLine += fdCurrent.getData(m_bFullOutput, bFiltering, m_bPositiveFDR);
                        lPToQMappings.Add(new KeyValuePair <double, double>(dNextKey, dCurrentQValue));
                        lResults.Add(sOutputLine);
                    }
                    if (m_bReportProgress && (iTable % MAX_REPROT_POINT == 0))
                    {
                        m_bContinue = m_prReport.reportProcessedTables(iTable, actTables.Count);
                    }


                    //swMarginalPValues.WriteLine(fdCurrent.PValue);
                }
            }
            PToQMapping = lPToQMappings;
            if (m_bReportProgress)
            {
                m_bContinue = m_prReport.reportMessage("Done writing results", true);
            }



            //swMarginalPValues.Close();


            return(lResults);
        }
        /**
         * When computing we only add the probabilities of the permutations to the closest higher p-value table
         * Now, we need to go over all these tables and sum everything that has smaller p-value (more significant)
         * We take a mapping from p-value to single appearance probabilities
         * and teturn a mapping from p-value to sum of all more significant probabilities
         * */
        private void sumFDRs(List <ContingencyTable> actTables, Map <double, FDRData> slFDR, double dPiEstimation)
        {
            double           dSum = 0;
            int              iTable = 0;
            long             cAllTables = actTables.Count;
            long             cTables = 0;
            ContingencyTable ctCurrent = null, ctNext = null;
            double           dFisherScore = 0.0, dNextFisherScore = 0.0;

            int iSample = 0;

            //First, sum all the pooled p-values that are lower than the current table
            foreach (FDRData data in slFDR.Values)
            {
                dSum += data.PooledPValue;
                data.PooledPValue  = dSum;
                data.PooledPValue /= cAllTables;
                if (data.FilteringPi > 0.0)
                {
                    data.FDR = dSum * data.FilteringPi;
                }
                else
                {
                    data.FDR = dSum * dPiEstimation;
                }
                if (m_bPositiveFDR)
                {
                    data.RejectionAreaProb = OneMinusOneMinuXToTheM(data.PooledPValue, cAllTables);
                    data.FDR /= data.RejectionAreaProb;
                }
                iSample++;
            }

            dSum = 0;
            //We now have to divide by the number of more significant tables to move from pooled p-values to FDR
            for (iTable = 0; iTable < actTables.Count; iTable++)
            {
                ctCurrent = (ContingencyTable)actTables[iTable];
                if (iTable < actTables.Count - 1)
                {
                    ctNext = (ContingencyTable)actTables[iTable + 1];
                }
                else
                {
                    ctNext = null;
                }
                dFisherScore = round(ctCurrent.getFisher2TailPermutationTest());
                if (m_bHuge)//special case to huge datasets where the same table can appear multiple times
                {
                    cTables += (long)m_ctcTableCounts.getCachedValue(ctCurrent);
                }
                else
                {
                    cTables++;
                }
                if (ctNext != null)
                {
                    dNextFisherScore = round(ctNext.getFisher2TailPermutationTest());
                }
                if ((ctNext == null) || (dFisherScore != dNextFisherScore))
                {
                    slFDR[dFisherScore].FDR /= cTables;
                }
            }
        }
Beispiel #11
0
        /**
         * Computes the Fisher 2 sided p-value.
         * We iterate over all the premutations and sum the ones that have a lower probability (more extreme).
         * We compute from scratch only a single hypergeometric probability - the probability of the "real" table.
         * Then we iterate by incrementing the table and the probability (right side) and by decrementing the table and the probability (left side).
         * The algorithm has the complexity of O(n), but usually runs much faster.
         * Adding another possible optimization - when the p-value exceeds a cutoff - return 1. This is useful when we only need to know whether one value is larger than the other.
         * When the values are too small to be represented by a double (less than 1-E302) the computation returns an upper bound on the real value.
         * */
        private double computeFisher2TailPermutationTest(double dObservedTableProbability, double dCutoff)
        {
            double           p0 = dObservedTableProbability;
            double           p0Epsilon = p0 * 0.00001;
            double           p = p0, pt = 0, pAbove = 0.0, pLeft = p0, pRight = 0.0;
            ContingencyTable ctIterator = null;
            int cPermutations = 0, cRemiaingPermutations = getMaxPossibleA() - getMinPossibleA();

            m_dMinimalPValue = p0;

            if (p0 == double.Epsilon)
            {
                return(p0 * cRemiaingPermutations); //an upper bound estimation
            }
            //Iterate to the right side - increasing values of a
            if (g_ttTestType == TestType.Right || g_ttTestType == TestType.TwoSided)
            {
                ctIterator = next();
                pt         = incrementalHypergeometricProbability(p0);
                while (ctIterator != null)
                {
                    if (pt < m_dMinimalPValue)
                    {
                        m_dMinimalPValue = pt;
                    }
                    cPermutations++;

                    if (p0 + p0Epsilon >= pt)
                    {
                        p      = p + pt;
                        pLeft += pt;
                        if (p > dCutoff)
                        {
                            return(1.0);
                        }
                    }
                    else
                    {
                        pAbove += pt;
                    }
                    pt         = ctIterator.incrementalHypergeometricProbability(pt);
                    ctIterator = ctIterator.next();

                    if ((ctIterator != null) && (pt <= p0Epsilon))
                    {
                        pt        *= (getMaxPossibleA() - ctIterator.getA() + 1);
                        p         += pt;
                        pLeft     += pt;
                        ctIterator = null;
                    }
                }
            }
            //Iterate to the left side - decreasing values of a
            if (g_ttTestType == TestType.Left || g_ttTestType == TestType.TwoSided)
            {
                ctIterator = previous();
                pt         = decrementalHypergeometricProbability(p0);
                double dBackward = pt;
                while (ctIterator != null)
                {
                    if (pt < m_dMinimalPValue)
                    {
                        m_dMinimalPValue = pt;
                    }
                    cPermutations++;
                    if (p0 + p0Epsilon >= pt)
                    {
                        p       = p + pt;
                        pRight += pt;
                        if (p > dCutoff)
                        {
                            return(1.0);
                        }
                    }
                    else
                    {
                        pAbove += pt;
                    }
                    double dBefore = pt;
                    pt         = ctIterator.decrementalHypergeometricProbability(pt);
                    ctIterator = ctIterator.previous();

                    if ((ctIterator != null) && (pt <= p0Epsilon))
                    {
                        pt        *= (ctIterator.getA() - getMinPossibleA());
                        p         += pt;
                        pRight    += pt;
                        ctIterator = null;
                    }
                }
            }

            m_cComputedFisherScores++;

            return(p);
        }
Beispiel #12
0
        public double[] computeAllFisherStatistics(double[] adResults, ref bool bApproximated)
        {
            double           p0 = getHypergeometricProbability();// probability of seeing the actual data
            double           p0Epsilon = p0 * 0.00001;
            double           p = p0, pt = 0, ptMax = 0.0, pLeft = p0, pRight = p0;
            ContingencyTable ctMaxTable = getMaxValueTable(), ctIterator = ctMaxTable;
            int iMaxA = getMaxPossibleA(), iMinA = getMinPossibleA();
            int cPermutations = 0, cRemiaingPermutations = iMaxA - iMinA;
            int iCurrentA = 0;

            double[] adMapping = new double[iMaxA + 1];

            adResults[0] = p0;

            ptMax = ctIterator.getHypergeometricProbability();
            pt    = ptMax;
            while (ctIterator != null)
            {
                cPermutations++;
                iCurrentA            = ctIterator.getA();
                adMapping[iCurrentA] = pt;

                if (iCurrentA > m_iA)
                {
                    pRight += pt;
                }
                if (iCurrentA < m_iA)
                {
                    pLeft += pt;
                }

                if (p0 + p0Epsilon >= pt && iCurrentA != m_iA)
                {
                    p = p + pt;
                }
                pt         = ctIterator.incrementalHypergeometricProbability(pt);
                ctIterator = ctIterator.next();

                if ((ctIterator != null) && (pt == double.Epsilon))
                {
                    pt           *= (iMaxA - ctIterator.getA() + 1);
                    p            += pt;
                    pRight       += pt;
                    bApproximated = true;
                    for (iCurrentA = ctIterator.getA(); iCurrentA <= iMaxA; iCurrentA++)
                    {
                        adMapping[iCurrentA] = double.Epsilon;
                    }
                    ctIterator = null;
                }
            }
            //Iterate to the left side - decreasing values of a
            ctIterator = ctMaxTable.previous();
            pt         = ctMaxTable.decrementalHypergeometricProbability(ptMax);
            while (ctIterator != null)
            {
                cPermutations++;
                iCurrentA            = ctIterator.getA();
                adMapping[iCurrentA] = pt;

                if (iCurrentA > m_iA)
                {
                    pRight += pt;
                }
                if (iCurrentA < m_iA)
                {
                    pLeft += pt;
                }

                if (p0 + p0Epsilon >= pt && iCurrentA != m_iA)
                {
                    p = p + pt;
                }
                pt         = ctIterator.decrementalHypergeometricProbability(pt);
                ctIterator = ctIterator.previous();

                if ((ctIterator != null) && (pt == double.Epsilon))
                {
                    pt           *= (ctIterator.getA() - getMinPossibleA());
                    p            += pt;
                    pLeft        += pt;
                    bApproximated = true;
                    for (iCurrentA = ctIterator.getA(); iCurrentA >= iMinA; iCurrentA--)
                    {
                        adMapping[iCurrentA] = double.Epsilon;
                    }
                    ctIterator = null;
                }
            }
            for (iCurrentA = iMinA - 1; iCurrentA >= 0; iCurrentA--)
            {
                adMapping[iCurrentA] = 0.0;
            }

            adResults[1] = pLeft;
            adResults[2] = pRight;
            adResults[3] = p;

            return(adMapping);
        }