Exemplo n.º 1
0
        /**
         * We implement a comparable to allow the tables to be sorted by p-values.
         * */

        public int CompareTo(object obj)
        {
            if (obj is ContingencyTable)
            {
                ContingencyTable rOther = (ContingencyTable)obj;
                if (rOther.getFisher2TailPermutationTest() < getFisher2TailPermutationTest())
                {
                    return(1);
                }
                if (rOther.getFisher2TailPermutationTest() > getFisher2TailPermutationTest())
                {
                    return(-1);
                }
            }
            else if (obj is double)
            {
                double d = (double)obj;
                if (d < getFisher2TailPermutationTest())
                {
                    return(1);
                }
                if (d > getFisher2TailPermutationTest())
                {
                    return(-1);
                }
            }
            return(0);
        }
        private Map <double, FDRData> initFDRMap(List <ContingencyTable> actTables)
        {
            Map <double, FDRData> slFDR = new Map <double, FDRData>();
            int iTable = 0;
            ContingencyTable ctCurrent    = null;
            double           dFisherScore = 0.0;

            for (iTable = 0; iTable < actTables.Count; iTable++)
            {
                ctCurrent    = (ContingencyTable)actTables[iTable];
                dFisherScore = round(ctCurrent.getFisher2TailPermutationTest());
                if (!slFDR.ContainsKey(dFisherScore))
                {
                    slFDR.Add(dFisherScore, new FDRData(dFisherScore));
                }
            }
            slFDR.Add(10.0, new FDRData(10.0)); // add a last entry with a huge fisher score
            return(slFDR);
        }
        /*
         * Computing pi in the filtering case.
         * In this case we compute a different pi for each p-value
         * A table is considered relevant for the pi computation of a p-value p only if its marginals support a p-value that is more extreme than p.
         * */
        private Map <double, double> computeFilteringPi(List <ContingencyTable> actTables, List <double> lPValues)
        {
            Map <double, List <ContingencyTable> > slRelevantTables = new Map <double, List <ContingencyTable> >();
            double dSumObservedPValuesInRange = 0.0, dCurrentTableFisherTestPValue = 0.0;
            int    cObservedTablesInRange = 0;
            double dFisherScore = 0.0, dHyperProbability = 0.0, dMinimalPossiblePValue = 0.0, dFirstLargerKey = 0.0;
            double dSumExpectedNullsInRange = 0;
            double dSumNullProbsInRange = 0.0;
            int    cNullsInRange = 0;
            int    iTable = 0;
            Map <double, double> slPi = new Map <double, double>();
            ContingencyTable     ctCurrent = null;

            if (m_bReportProgress)
            {
                m_bContinue = m_prReport.reportPhase("Computing relevant tables.");
                m_bContinue = m_prReport.reportMessage("Started computing relevant tables for PI computation.", true);
            }

            //We first compute the list of relevant tables.
            //For each table we compute its minimal achievable p-value and add it to the next p-value on the list.
            //Now, the relevant tables are all the tables that belong to a p-value that is more exterme than the current one.
            for (iTable = 0; iTable < actTables.Count && m_bContinue; iTable++)
            {
                ctCurrent = (ContingencyTable)actTables[iTable];
                dMinimalPossiblePValue = ctCurrent.getMinimalAchievablePValue();
                dFirstLargerKey        = getNextKey(lPValues, dMinimalPossiblePValue);
                if (!slRelevantTables.ContainsKey(dFirstLargerKey))
                {
                    slRelevantTables.Add(dFirstLargerKey, new List <ContingencyTable>());
                }
                slRelevantTables[dFirstLargerKey].Add(ctCurrent);
                if (m_bReportProgress && (iTable > 0) && (iTable % 1000 == 0))
                {
                    m_bContinue = m_prReport.reportProcessedTables(iTable, actTables.Count);
                }
            }

            //We iterate from smallest p-value to largest. The order is important because we want the relevant tables list to grow all the time.
            for (iTable = 0; iTable < actTables.Count && m_bContinue; iTable++)
            {
                ctCurrent = (ContingencyTable)actTables[iTable];

                dCurrentTableFisherTestPValue = round(ctCurrent.getFisher2TailPermutationTest());

                if (slRelevantTables.ContainsKey(dCurrentTableFisherTestPValue))
                {
                    //Now we iterate over the list of relevant tables
                    //Note - a table never becomes irrelevant. Therefore we always accumulate more observations and remove any.
                    foreach (ContingencyTable ctRelevant in slRelevantTables[dCurrentTableFisherTestPValue])
                    {
                        dFisherScore = ctRelevant.getFisher2TailPermutationTest();

                        dSumObservedPValuesInRange += dFisherScore;
                        cObservedTablesInRange++;
                        //TODO - calling computeAllPermutationsScores twice - inefficient
                        double[,] adScores = ctRelevant.computeAllPermutationsScores();

                        for (int iCurrent = 0; iCurrent < adScores.GetLength(0); iCurrent++)
                        {
                            dHyperProbability = adScores[iCurrent, 0];
                            dFisherScore      = adScores[iCurrent, 1];

                            dSumNullProbsInRange     += dHyperProbability;
                            dSumExpectedNullsInRange += dFisherScore * dHyperProbability;
                            cNullsInRange++;
                        }
                    }
                    slRelevantTables.Remove(dCurrentTableFisherTestPValue);
                }
                //After iterating over all the relevant tables we compute the PI for that p-value
                //using the weighted sum method
                slPi[dCurrentTableFisherTestPValue] = (dSumObservedPValuesInRange / cObservedTablesInRange) /
                                                      (dSumExpectedNullsInRange / dSumNullProbsInRange);
                if (m_bReportProgress && (iTable > 0) && (iTable % 1000 == 0))
                {
                    m_bContinue = m_prReport.reportProcessedTables(iTable, actTables.Count);
                }
            }
            slPi[10.0] = 1.0;
            return(slPi);
        }
        /*
         * Main FDR computation function.
         * Takes as input an array of tables, already sorted by Fisher scores.
         * Outputs a map from p-value to FDR.
         * */
        private Map <double, FDRData> computeFDR(List <ContingencyTable> actTables)
        {
            int iTable = 0, cTables = actTables.Count;
            ContingencyTable ctCurrent = null;
            double           dFirstLargerKey = 0.0;
            double           dHyperProbability = 0.0, dFisherScore = 0.0;
            DateTime         dtBefore = DateTime.Now, dtAfter = DateTime.Now;
            TimeSpan         tsCurrent = TimeSpan.Zero, tsTotal = TimeSpan.Zero;
            int cTableCount = 1;
            int cReprotInterval = 0;
            Map <double, FDRData> slFDR = null;
            double dSumObservedPValues = 0.0, dCurrentTableFisherTestPValue = 0.0;
            double dSumNullPValues = 0.0, dExpectedNullPValue = 0.0;
            double dEPhiNull = 0.0, dEPhiObserved = 0.0;
            int    cNullPValues     = 0;
            int    cObservedPValues = 0;

            if (m_bReportProgress)
            {
                m_bContinue = m_prReport.reportPhase("Computing pooled p-values.");
                m_bContinue = m_prReport.reportMessage("Started computing pooled p-values values.", true);
            }

            slFDR = initFDRMap(actTables);

            cReprotInterval = Math.Min(actTables.Count / 10, MAX_REPROT_POINT);

            for (iTable = 0; iTable < cTables && m_bContinue; iTable++)
            {
                ctCurrent = (ContingencyTable)actTables[iTable];

                dCurrentTableFisherTestPValue = ctCurrent.getFisher2TailPermutationTest();

                dSumObservedPValues += dCurrentTableFisherTestPValue;

                //dEPhiObserved += -Math.Log(1 - 0.99999999 * dCurrentTableFisherTestPValue);
                dEPhiObserved += Math.Sqrt(dCurrentTableFisherTestPValue);


                cObservedPValues++;

                double[,] adScores = ctCurrent.computeAllPermutationsScores();
                int iCurrent = 0;
                if (m_bHuge)
                {
                    cTableCount = (int)m_ctcTableCounts.getCachedValue(ctCurrent);
                }
                else
                {
                    cTableCount = 1;
                }

                for (iCurrent = 0; iCurrent < adScores.GetLength(0); iCurrent++)
                {
                    dHyperProbability = adScores[iCurrent, 0];
                    dFisherScore      = adScores[iCurrent, 1];

                    dSumNullPValues     += dHyperProbability;
                    dExpectedNullPValue += dFisherScore * dHyperProbability;
                    //dEPhiNull += -Math.Log(1 - 0.99999999 * dFisherScore) * dHyperProbability;
                    dEPhiNull += Math.Sqrt(dFisherScore) * dHyperProbability;
                    cNullPValues++;

                    dFirstLargerKey = getNextKey(slFDR.KeyList, dFisherScore);

                    slFDR[dFirstLargerKey].PooledPValue += (dHyperProbability * cTableCount);
                }

                if ((iTable > 0) && (iTable % cReprotInterval == 0))
                {
                    if (m_bReportProgress)
                    {
                        dtAfter     = DateTime.Now;
                        tsCurrent   = dtAfter.Subtract(dtBefore);
                        tsTotal    += tsCurrent;
                        m_bContinue = m_prReport.reportProcessedTables(iTable, cTables);
                        m_bContinue = m_prReport.reportMessage("Done " + iTable + " tables, avg time (ms) " + Math.Round(tsTotal.TotalMilliseconds / (iTable + 1)) +
                                                               ", total time " + tsTotal, true);
                    }
                }
            }

            double dPi = 1.0;

            if ((m_pmEvaluatePi == PiMethod.WeightedSum) || (m_pmEvaluatePi == PiMethod.DoubleAverage))
            {
                if (m_pmEvaluatePi == PiMethod.WeightedSum)
                {
                    dPi = (dSumObservedPValues / cObservedPValues) / (dExpectedNullPValue / dSumNullPValues); // \pi_0 = (\sum_T p(T))/(\sum_T p(T)pr(T|H=0))
                }
                else if (m_pmEvaluatePi == PiMethod.DoubleAverage)
                {
                    dPi = 2.0 * (dSumObservedPValues / cObservedPValues); // \pi_0 = 2 * avg(p)
                }
                double dPhiPi = dEPhiObserved / dEPhiNull;


                m_bContinue = m_prReport.reportMessage("Estimating PI = " + dPi, true);
            }
            else if (m_pmEvaluatePi == PiMethod.Filtering)
            {
                Map <double, double> slPi  = computeFilteringPi(actTables, slFDR.KeyList);
                List <double>        lKeys = new List <double>(slFDR.Keys);
                foreach (double dKey in lKeys)
                {
                    slFDR[dKey].FilteringPi = slPi[dKey];
                }
            }
            m_dPi = dPi;
            sumFDRs(actTables, slFDR, dPi);
            return(slFDR);
        }
        /*
         * Writes the results of the computation to a file.
         * First line is the headers (if exist) with the new columns added.
         * Each following line is the contingency table with the Fisher scores, FDR and q-value
         * */
        private List <string> getResults(List <ContingencyTable> actTables
                                         , Map <double, FDRData> slFDR)
        {
            int iTable = 0;
            ContingencyTable ctCurrent = null;
            double           dFisherTest = 0.0, dCurrentQValue = 0.0;
            double           dNextKey    = 0;
            string           sHeader     = "";
            FDRData          fdCurrent   = null;
            string           sOutputLine = "";
            List <string>    lResults    = new List <string>();
            bool             bFiltering  = m_pmEvaluatePi == PiMethod.Filtering;

            if (m_bReportProgress)
            {
                m_bContinue = m_prReport.reportPhase("Writing results.");
            }

            sHeader = m_sColumnHeaders + "\tp-value";
            if (m_bFullOutput)
            {
                sHeader += "\tpooled p-value\t";
                if (bFiltering)
                {
                    sHeader += "filtering pi\t";
                }
                if (m_bPositiveFDR)
                {
                    sHeader += "pr(R(p)>0)\tpFDR";
                }
                else
                {
                    sHeader += "FDR";
                }
            }
            sHeader += "\tq-value";
            lResults.Add(sHeader);


            List <KeyValuePair <double, double> > lPToQMappings = new List <KeyValuePair <double, double> >();

            //When the huge flag is used, the tables are not kept.
            //We now have to go over the entire input file, read each table,
            //compute p-value for it, and map it into FDR and q-value.
            if (m_bHuge)
            {
                StreamReader sr    = m_fiInput.OpenText();
                string       sLine = "";
                double       dFisherScoreCutoff = 0.0;
                bool         bUseTable          = true;

                if (m_dFDRCutoff > 0.0)
                {
                    dFisherScoreCutoff = mapFDR2FisherScore(slFDR, m_dFDRCutoff);
                }

                iTable = 0;
                while (!sr.EndOfStream)
                {
                    sLine = sr.ReadLine();
                    if (sLine.Length > 0)
                    {
                        ctCurrent = new ContingencyTable(sLine, m_cTableNamesColumns);
                        bUseTable = ctCurrent.validate();
                        if (bUseTable)
                        {
                            dFisherTest    = round(ctCurrent.getFisher2TailPermutationTest(dFisherScoreCutoff));
                            dNextKey       = getNextKey(slFDR.KeyList, dFisherTest);
                            fdCurrent      = slFDR[dNextKey];
                            dCurrentQValue = round(fdCurrent.QValue);
                            if (dCurrentQValue <= m_dFDRCutoff)
                            {
                                sOutputLine  = ctCurrent.ToString() + "\t";
                                sOutputLine += fdCurrent.getData(m_bFullOutput, bFiltering, m_bPositiveFDR);
                                lResults.Add(sOutputLine);
                                lPToQMappings.Add(new KeyValuePair <double, double>(dNextKey, dCurrentQValue));//will not work for huge because multiple tables will be missed
                            }
                        }
                        iTable++;
                        if (m_bReportProgress && (iTable % MAX_REPROT_POINT == 0))
                        {
                            m_bContinue = m_prReport.reportProcessedTables(iTable, m_cTables);
                            m_bContinue = m_prReport.reportMessage("Written " + iTable + " tables.", true);
                        }
                    }
                }
                sr.Close();
            }
            else//Not huge - all data is already in memory - just write the tables.
            {
                for (iTable = 0; iTable < actTables.Count; iTable++)
                {
                    ctCurrent      = (ContingencyTable)actTables[iTable];
                    dFisherTest    = ctCurrent.getFisher2TailPermutationTest();
                    dNextKey       = getNextKey(slFDR.KeyList, dFisherTest);
                    fdCurrent      = slFDR[dNextKey];
                    dCurrentQValue = floor(fdCurrent.QValue);
                    if (dCurrentQValue <= m_dFDRCutoff)
                    {
                        sOutputLine  = ctCurrent.ToString() + "\t";
                        sOutputLine += fdCurrent.getData(m_bFullOutput, bFiltering, m_bPositiveFDR);
                        lPToQMappings.Add(new KeyValuePair <double, double>(dNextKey, dCurrentQValue));
                        lResults.Add(sOutputLine);
                    }
                    if (m_bReportProgress && (iTable % MAX_REPROT_POINT == 0))
                    {
                        m_bContinue = m_prReport.reportProcessedTables(iTable, actTables.Count);
                    }


                    //swMarginalPValues.WriteLine(fdCurrent.PValue);
                }
            }
            PToQMapping = lPToQMappings;
            if (m_bReportProgress)
            {
                m_bContinue = m_prReport.reportMessage("Done writing results", true);
            }



            //swMarginalPValues.Close();


            return(lResults);
        }
        /**
         * When computing we only add the probabilities of the permutations to the closest higher p-value table
         * Now, we need to go over all these tables and sum everything that has smaller p-value (more significant)
         * We take a mapping from p-value to single appearance probabilities
         * and teturn a mapping from p-value to sum of all more significant probabilities
         * */
        private void sumFDRs(List <ContingencyTable> actTables, Map <double, FDRData> slFDR, double dPiEstimation)
        {
            double           dSum = 0;
            int              iTable = 0;
            long             cAllTables = actTables.Count;
            long             cTables = 0;
            ContingencyTable ctCurrent = null, ctNext = null;
            double           dFisherScore = 0.0, dNextFisherScore = 0.0;

            int iSample = 0;

            //First, sum all the pooled p-values that are lower than the current table
            foreach (FDRData data in slFDR.Values)
            {
                dSum += data.PooledPValue;
                data.PooledPValue  = dSum;
                data.PooledPValue /= cAllTables;
                if (data.FilteringPi > 0.0)
                {
                    data.FDR = dSum * data.FilteringPi;
                }
                else
                {
                    data.FDR = dSum * dPiEstimation;
                }
                if (m_bPositiveFDR)
                {
                    data.RejectionAreaProb = OneMinusOneMinuXToTheM(data.PooledPValue, cAllTables);
                    data.FDR /= data.RejectionAreaProb;
                }
                iSample++;
            }

            dSum = 0;
            //We now have to divide by the number of more significant tables to move from pooled p-values to FDR
            for (iTable = 0; iTable < actTables.Count; iTable++)
            {
                ctCurrent = (ContingencyTable)actTables[iTable];
                if (iTable < actTables.Count - 1)
                {
                    ctNext = (ContingencyTable)actTables[iTable + 1];
                }
                else
                {
                    ctNext = null;
                }
                dFisherScore = round(ctCurrent.getFisher2TailPermutationTest());
                if (m_bHuge)//special case to huge datasets where the same table can appear multiple times
                {
                    cTables += (long)m_ctcTableCounts.getCachedValue(ctCurrent);
                }
                else
                {
                    cTables++;
                }
                if (ctNext != null)
                {
                    dNextFisherScore = round(ctNext.getFisher2TailPermutationTest());
                }
                if ((ctNext == null) || (dFisherScore != dNextFisherScore))
                {
                    slFDR[dFisherScore].FDR /= cTables;
                }
            }
        }