/** * We implement a comparable to allow the tables to be sorted by p-values. * */ public int CompareTo(object obj) { if (obj is ContingencyTable) { ContingencyTable rOther = (ContingencyTable)obj; if (rOther.getFisher2TailPermutationTest() < getFisher2TailPermutationTest()) { return(1); } if (rOther.getFisher2TailPermutationTest() > getFisher2TailPermutationTest()) { return(-1); } } else if (obj is double) { double d = (double)obj; if (d < getFisher2TailPermutationTest()) { return(1); } if (d > getFisher2TailPermutationTest()) { return(-1); } } return(0); }
private Map <double, FDRData> initFDRMap(List <ContingencyTable> actTables) { Map <double, FDRData> slFDR = new Map <double, FDRData>(); int iTable = 0; ContingencyTable ctCurrent = null; double dFisherScore = 0.0; for (iTable = 0; iTable < actTables.Count; iTable++) { ctCurrent = (ContingencyTable)actTables[iTable]; dFisherScore = round(ctCurrent.getFisher2TailPermutationTest()); if (!slFDR.ContainsKey(dFisherScore)) { slFDR.Add(dFisherScore, new FDRData(dFisherScore)); } } slFDR.Add(10.0, new FDRData(10.0)); // add a last entry with a huge fisher score return(slFDR); }
/* * Computing pi in the filtering case. * In this case we compute a different pi for each p-value * A table is considered relevant for the pi computation of a p-value p only if its marginals support a p-value that is more extreme than p. * */ private Map <double, double> computeFilteringPi(List <ContingencyTable> actTables, List <double> lPValues) { Map <double, List <ContingencyTable> > slRelevantTables = new Map <double, List <ContingencyTable> >(); double dSumObservedPValuesInRange = 0.0, dCurrentTableFisherTestPValue = 0.0; int cObservedTablesInRange = 0; double dFisherScore = 0.0, dHyperProbability = 0.0, dMinimalPossiblePValue = 0.0, dFirstLargerKey = 0.0; double dSumExpectedNullsInRange = 0; double dSumNullProbsInRange = 0.0; int cNullsInRange = 0; int iTable = 0; Map <double, double> slPi = new Map <double, double>(); ContingencyTable ctCurrent = null; if (m_bReportProgress) { m_bContinue = m_prReport.reportPhase("Computing relevant tables."); m_bContinue = m_prReport.reportMessage("Started computing relevant tables for PI computation.", true); } //We first compute the list of relevant tables. //For each table we compute its minimal achievable p-value and add it to the next p-value on the list. //Now, the relevant tables are all the tables that belong to a p-value that is more exterme than the current one. for (iTable = 0; iTable < actTables.Count && m_bContinue; iTable++) { ctCurrent = (ContingencyTable)actTables[iTable]; dMinimalPossiblePValue = ctCurrent.getMinimalAchievablePValue(); dFirstLargerKey = getNextKey(lPValues, dMinimalPossiblePValue); if (!slRelevantTables.ContainsKey(dFirstLargerKey)) { slRelevantTables.Add(dFirstLargerKey, new List <ContingencyTable>()); } slRelevantTables[dFirstLargerKey].Add(ctCurrent); if (m_bReportProgress && (iTable > 0) && (iTable % 1000 == 0)) { m_bContinue = m_prReport.reportProcessedTables(iTable, actTables.Count); } } //We iterate from smallest p-value to largest. The order is important because we want the relevant tables list to grow all the time. for (iTable = 0; iTable < actTables.Count && m_bContinue; iTable++) { ctCurrent = (ContingencyTable)actTables[iTable]; dCurrentTableFisherTestPValue = round(ctCurrent.getFisher2TailPermutationTest()); if (slRelevantTables.ContainsKey(dCurrentTableFisherTestPValue)) { //Now we iterate over the list of relevant tables //Note - a table never becomes irrelevant. Therefore we always accumulate more observations and remove any. foreach (ContingencyTable ctRelevant in slRelevantTables[dCurrentTableFisherTestPValue]) { dFisherScore = ctRelevant.getFisher2TailPermutationTest(); dSumObservedPValuesInRange += dFisherScore; cObservedTablesInRange++; //TODO - calling computeAllPermutationsScores twice - inefficient double[,] adScores = ctRelevant.computeAllPermutationsScores(); for (int iCurrent = 0; iCurrent < adScores.GetLength(0); iCurrent++) { dHyperProbability = adScores[iCurrent, 0]; dFisherScore = adScores[iCurrent, 1]; dSumNullProbsInRange += dHyperProbability; dSumExpectedNullsInRange += dFisherScore * dHyperProbability; cNullsInRange++; } } slRelevantTables.Remove(dCurrentTableFisherTestPValue); } //After iterating over all the relevant tables we compute the PI for that p-value //using the weighted sum method slPi[dCurrentTableFisherTestPValue] = (dSumObservedPValuesInRange / cObservedTablesInRange) / (dSumExpectedNullsInRange / dSumNullProbsInRange); if (m_bReportProgress && (iTable > 0) && (iTable % 1000 == 0)) { m_bContinue = m_prReport.reportProcessedTables(iTable, actTables.Count); } } slPi[10.0] = 1.0; return(slPi); }
/* * Main FDR computation function. * Takes as input an array of tables, already sorted by Fisher scores. * Outputs a map from p-value to FDR. * */ private Map <double, FDRData> computeFDR(List <ContingencyTable> actTables) { int iTable = 0, cTables = actTables.Count; ContingencyTable ctCurrent = null; double dFirstLargerKey = 0.0; double dHyperProbability = 0.0, dFisherScore = 0.0; DateTime dtBefore = DateTime.Now, dtAfter = DateTime.Now; TimeSpan tsCurrent = TimeSpan.Zero, tsTotal = TimeSpan.Zero; int cTableCount = 1; int cReprotInterval = 0; Map <double, FDRData> slFDR = null; double dSumObservedPValues = 0.0, dCurrentTableFisherTestPValue = 0.0; double dSumNullPValues = 0.0, dExpectedNullPValue = 0.0; double dEPhiNull = 0.0, dEPhiObserved = 0.0; int cNullPValues = 0; int cObservedPValues = 0; if (m_bReportProgress) { m_bContinue = m_prReport.reportPhase("Computing pooled p-values."); m_bContinue = m_prReport.reportMessage("Started computing pooled p-values values.", true); } slFDR = initFDRMap(actTables); cReprotInterval = Math.Min(actTables.Count / 10, MAX_REPROT_POINT); for (iTable = 0; iTable < cTables && m_bContinue; iTable++) { ctCurrent = (ContingencyTable)actTables[iTable]; dCurrentTableFisherTestPValue = ctCurrent.getFisher2TailPermutationTest(); dSumObservedPValues += dCurrentTableFisherTestPValue; //dEPhiObserved += -Math.Log(1 - 0.99999999 * dCurrentTableFisherTestPValue); dEPhiObserved += Math.Sqrt(dCurrentTableFisherTestPValue); cObservedPValues++; double[,] adScores = ctCurrent.computeAllPermutationsScores(); int iCurrent = 0; if (m_bHuge) { cTableCount = (int)m_ctcTableCounts.getCachedValue(ctCurrent); } else { cTableCount = 1; } for (iCurrent = 0; iCurrent < adScores.GetLength(0); iCurrent++) { dHyperProbability = adScores[iCurrent, 0]; dFisherScore = adScores[iCurrent, 1]; dSumNullPValues += dHyperProbability; dExpectedNullPValue += dFisherScore * dHyperProbability; //dEPhiNull += -Math.Log(1 - 0.99999999 * dFisherScore) * dHyperProbability; dEPhiNull += Math.Sqrt(dFisherScore) * dHyperProbability; cNullPValues++; dFirstLargerKey = getNextKey(slFDR.KeyList, dFisherScore); slFDR[dFirstLargerKey].PooledPValue += (dHyperProbability * cTableCount); } if ((iTable > 0) && (iTable % cReprotInterval == 0)) { if (m_bReportProgress) { dtAfter = DateTime.Now; tsCurrent = dtAfter.Subtract(dtBefore); tsTotal += tsCurrent; m_bContinue = m_prReport.reportProcessedTables(iTable, cTables); m_bContinue = m_prReport.reportMessage("Done " + iTable + " tables, avg time (ms) " + Math.Round(tsTotal.TotalMilliseconds / (iTable + 1)) + ", total time " + tsTotal, true); } } } double dPi = 1.0; if ((m_pmEvaluatePi == PiMethod.WeightedSum) || (m_pmEvaluatePi == PiMethod.DoubleAverage)) { if (m_pmEvaluatePi == PiMethod.WeightedSum) { dPi = (dSumObservedPValues / cObservedPValues) / (dExpectedNullPValue / dSumNullPValues); // \pi_0 = (\sum_T p(T))/(\sum_T p(T)pr(T|H=0)) } else if (m_pmEvaluatePi == PiMethod.DoubleAverage) { dPi = 2.0 * (dSumObservedPValues / cObservedPValues); // \pi_0 = 2 * avg(p) } double dPhiPi = dEPhiObserved / dEPhiNull; m_bContinue = m_prReport.reportMessage("Estimating PI = " + dPi, true); } else if (m_pmEvaluatePi == PiMethod.Filtering) { Map <double, double> slPi = computeFilteringPi(actTables, slFDR.KeyList); List <double> lKeys = new List <double>(slFDR.Keys); foreach (double dKey in lKeys) { slFDR[dKey].FilteringPi = slPi[dKey]; } } m_dPi = dPi; sumFDRs(actTables, slFDR, dPi); return(slFDR); }
/* * Writes the results of the computation to a file. * First line is the headers (if exist) with the new columns added. * Each following line is the contingency table with the Fisher scores, FDR and q-value * */ private List <string> getResults(List <ContingencyTable> actTables , Map <double, FDRData> slFDR) { int iTable = 0; ContingencyTable ctCurrent = null; double dFisherTest = 0.0, dCurrentQValue = 0.0; double dNextKey = 0; string sHeader = ""; FDRData fdCurrent = null; string sOutputLine = ""; List <string> lResults = new List <string>(); bool bFiltering = m_pmEvaluatePi == PiMethod.Filtering; if (m_bReportProgress) { m_bContinue = m_prReport.reportPhase("Writing results."); } sHeader = m_sColumnHeaders + "\tp-value"; if (m_bFullOutput) { sHeader += "\tpooled p-value\t"; if (bFiltering) { sHeader += "filtering pi\t"; } if (m_bPositiveFDR) { sHeader += "pr(R(p)>0)\tpFDR"; } else { sHeader += "FDR"; } } sHeader += "\tq-value"; lResults.Add(sHeader); List <KeyValuePair <double, double> > lPToQMappings = new List <KeyValuePair <double, double> >(); //When the huge flag is used, the tables are not kept. //We now have to go over the entire input file, read each table, //compute p-value for it, and map it into FDR and q-value. if (m_bHuge) { StreamReader sr = m_fiInput.OpenText(); string sLine = ""; double dFisherScoreCutoff = 0.0; bool bUseTable = true; if (m_dFDRCutoff > 0.0) { dFisherScoreCutoff = mapFDR2FisherScore(slFDR, m_dFDRCutoff); } iTable = 0; while (!sr.EndOfStream) { sLine = sr.ReadLine(); if (sLine.Length > 0) { ctCurrent = new ContingencyTable(sLine, m_cTableNamesColumns); bUseTable = ctCurrent.validate(); if (bUseTable) { dFisherTest = round(ctCurrent.getFisher2TailPermutationTest(dFisherScoreCutoff)); dNextKey = getNextKey(slFDR.KeyList, dFisherTest); fdCurrent = slFDR[dNextKey]; dCurrentQValue = round(fdCurrent.QValue); if (dCurrentQValue <= m_dFDRCutoff) { sOutputLine = ctCurrent.ToString() + "\t"; sOutputLine += fdCurrent.getData(m_bFullOutput, bFiltering, m_bPositiveFDR); lResults.Add(sOutputLine); lPToQMappings.Add(new KeyValuePair <double, double>(dNextKey, dCurrentQValue));//will not work for huge because multiple tables will be missed } } iTable++; if (m_bReportProgress && (iTable % MAX_REPROT_POINT == 0)) { m_bContinue = m_prReport.reportProcessedTables(iTable, m_cTables); m_bContinue = m_prReport.reportMessage("Written " + iTable + " tables.", true); } } } sr.Close(); } else//Not huge - all data is already in memory - just write the tables. { for (iTable = 0; iTable < actTables.Count; iTable++) { ctCurrent = (ContingencyTable)actTables[iTable]; dFisherTest = ctCurrent.getFisher2TailPermutationTest(); dNextKey = getNextKey(slFDR.KeyList, dFisherTest); fdCurrent = slFDR[dNextKey]; dCurrentQValue = floor(fdCurrent.QValue); if (dCurrentQValue <= m_dFDRCutoff) { sOutputLine = ctCurrent.ToString() + "\t"; sOutputLine += fdCurrent.getData(m_bFullOutput, bFiltering, m_bPositiveFDR); lPToQMappings.Add(new KeyValuePair <double, double>(dNextKey, dCurrentQValue)); lResults.Add(sOutputLine); } if (m_bReportProgress && (iTable % MAX_REPROT_POINT == 0)) { m_bContinue = m_prReport.reportProcessedTables(iTable, actTables.Count); } //swMarginalPValues.WriteLine(fdCurrent.PValue); } } PToQMapping = lPToQMappings; if (m_bReportProgress) { m_bContinue = m_prReport.reportMessage("Done writing results", true); } //swMarginalPValues.Close(); return(lResults); }
/** * When computing we only add the probabilities of the permutations to the closest higher p-value table * Now, we need to go over all these tables and sum everything that has smaller p-value (more significant) * We take a mapping from p-value to single appearance probabilities * and teturn a mapping from p-value to sum of all more significant probabilities * */ private void sumFDRs(List <ContingencyTable> actTables, Map <double, FDRData> slFDR, double dPiEstimation) { double dSum = 0; int iTable = 0; long cAllTables = actTables.Count; long cTables = 0; ContingencyTable ctCurrent = null, ctNext = null; double dFisherScore = 0.0, dNextFisherScore = 0.0; int iSample = 0; //First, sum all the pooled p-values that are lower than the current table foreach (FDRData data in slFDR.Values) { dSum += data.PooledPValue; data.PooledPValue = dSum; data.PooledPValue /= cAllTables; if (data.FilteringPi > 0.0) { data.FDR = dSum * data.FilteringPi; } else { data.FDR = dSum * dPiEstimation; } if (m_bPositiveFDR) { data.RejectionAreaProb = OneMinusOneMinuXToTheM(data.PooledPValue, cAllTables); data.FDR /= data.RejectionAreaProb; } iSample++; } dSum = 0; //We now have to divide by the number of more significant tables to move from pooled p-values to FDR for (iTable = 0; iTable < actTables.Count; iTable++) { ctCurrent = (ContingencyTable)actTables[iTable]; if (iTable < actTables.Count - 1) { ctNext = (ContingencyTable)actTables[iTable + 1]; } else { ctNext = null; } dFisherScore = round(ctCurrent.getFisher2TailPermutationTest()); if (m_bHuge)//special case to huge datasets where the same table can appear multiple times { cTables += (long)m_ctcTableCounts.getCachedValue(ctCurrent); } else { cTables++; } if (ctNext != null) { dNextFisherScore = round(ctNext.getFisher2TailPermutationTest()); } if ((ctNext == null) || (dFisherScore != dNextFisherScore)) { slFDR[dFisherScore].FDR /= cTables; } } }