//Loading the contingency tables from the input file //Implements the sampling techniques private List <ContingencyTable> loadTables() { try { StreamReader sr = m_fiInput.OpenText(); ContingencyTable ctCurrent = null; if (m_bReportProgress) { m_bContinue = m_prReport.reportPhase("Loading data"); m_bContinue = m_prReport.reportMessage("Loading data from file " + m_fiInput.Name, true); } string sLine = ""; List <ContingencyTable> actTables = new List <ContingencyTable>(); int cTables = 0; long cCharacters = 0; bool bUseTable = true; double dSampleProbability = 0.0, dProb = 0.0; Random rnd = new Random(); int iLineNumber = 0; if (m_bHuge) { m_ctcTableCounts = new ContingencyTableCache(); } else { m_ctcTableCounts = null; } //On the first iteration go through the file to check the number of rows (tables) if (m_cTables == -1) { m_cTables = 0; sLine = sr.ReadLine(); initColumnHeaders(sLine); while (!sr.EndOfStream) { sLine = sr.ReadLine(); m_cTables++; if (m_bReportProgress) { if (m_cTables % MAX_REPROT_POINT == 0) { m_bContinue = m_prReport.reportMessage(".", false); } } } if (m_bReportProgress) { m_bContinue = m_prReport.reportMessage("", true); m_bContinue = m_prReport.reportMessage("Found " + m_cTables + " data rows.", true); } } //Instead of enforcing a hard sample size, we sample the given the sample probability dSampleProbability = m_iSampleSize / (double)m_cTables; sr.Close(); sr = m_fiInput.OpenText(); if (m_bReportProgress) { if (m_bSampling) { m_bContinue = m_prReport.reportPhase("Sampling tables"); m_bContinue = m_prReport.reportMessage("Sampling " + m_iSampleSize + " tables.", true); } } if (m_bHasColumnHeaders) { sr.ReadLine(); } while (!sr.EndOfStream && m_bContinue) { sLine = sr.ReadLine().Trim(); iLineNumber++; if (sLine.Length > 0) { bUseTable = true;//general use flag - sampling, validation, ... if (m_bSampling) { dProb = rnd.NextDouble(); if (dProb > dSampleProbability) { bUseTable = false; } } if (bUseTable) { ctCurrent = new ContingencyTable(sLine, m_cTableNamesColumns); bUseTable = ctCurrent.validate(); } if (bUseTable) { if (m_bHuge)//instead of maintaining all the tables try to see whether we already loaded a table with the same counts { double dCount = m_ctcTableCounts.getCachedValue(ctCurrent); if (double.IsNaN(dCount))//First time table was observed { dCount = 0; actTables.Add(ctCurrent); } m_ctcTableCounts.setCachedValue(ctCurrent, dCount + 1);//increment the table count } else//not huge - maintain all tables (including duplicates) { actTables.Add(ctCurrent); } } cTables++; } if ((cTables > 0) && (cTables % MAX_REPROT_POINT == 0)) { if (m_bReportProgress) { m_bContinue = m_prReport.reportProcessedTables(cTables, m_cTables); m_bContinue = m_prReport.reportMessage("Loaded " + cTables + " tables.", false); if (m_bHuge) { m_bContinue = m_prReport.reportMessage(" Found " + actTables.Count + " distinct tables.", false); } m_bContinue = m_prReport.reportMessage("", true); } } cCharacters += sLine.Length + 2; } if (m_bReportProgress) { m_bContinue = m_prReport.reportMessage("Done loading data. Found " + actTables.Count + " distinct tables.", true); } sr.Close(); return(actTables); } catch (Exception e) { m_bContinue = m_prReport.reportError("Could not load data : " + e.Message); } return(null); }
/* * Writes the results of the computation to a file. * First line is the headers (if exist) with the new columns added. * Each following line is the contingency table with the Fisher scores, FDR and q-value * */ private List <string> getResults(List <ContingencyTable> actTables , Map <double, FDRData> slFDR) { int iTable = 0; ContingencyTable ctCurrent = null; double dFisherTest = 0.0, dCurrentQValue = 0.0; double dNextKey = 0; string sHeader = ""; FDRData fdCurrent = null; string sOutputLine = ""; List <string> lResults = new List <string>(); bool bFiltering = m_pmEvaluatePi == PiMethod.Filtering; if (m_bReportProgress) { m_bContinue = m_prReport.reportPhase("Writing results."); } sHeader = m_sColumnHeaders + "\tp-value"; if (m_bFullOutput) { sHeader += "\tpooled p-value\t"; if (bFiltering) { sHeader += "filtering pi\t"; } if (m_bPositiveFDR) { sHeader += "pr(R(p)>0)\tpFDR"; } else { sHeader += "FDR"; } } sHeader += "\tq-value"; lResults.Add(sHeader); List <KeyValuePair <double, double> > lPToQMappings = new List <KeyValuePair <double, double> >(); //When the huge flag is used, the tables are not kept. //We now have to go over the entire input file, read each table, //compute p-value for it, and map it into FDR and q-value. if (m_bHuge) { StreamReader sr = m_fiInput.OpenText(); string sLine = ""; double dFisherScoreCutoff = 0.0; bool bUseTable = true; if (m_dFDRCutoff > 0.0) { dFisherScoreCutoff = mapFDR2FisherScore(slFDR, m_dFDRCutoff); } iTable = 0; while (!sr.EndOfStream) { sLine = sr.ReadLine(); if (sLine.Length > 0) { ctCurrent = new ContingencyTable(sLine, m_cTableNamesColumns); bUseTable = ctCurrent.validate(); if (bUseTable) { dFisherTest = round(ctCurrent.getFisher2TailPermutationTest(dFisherScoreCutoff)); dNextKey = getNextKey(slFDR.KeyList, dFisherTest); fdCurrent = slFDR[dNextKey]; dCurrentQValue = round(fdCurrent.QValue); if (dCurrentQValue <= m_dFDRCutoff) { sOutputLine = ctCurrent.ToString() + "\t"; sOutputLine += fdCurrent.getData(m_bFullOutput, bFiltering, m_bPositiveFDR); lResults.Add(sOutputLine); lPToQMappings.Add(new KeyValuePair <double, double>(dNextKey, dCurrentQValue));//will not work for huge because multiple tables will be missed } } iTable++; if (m_bReportProgress && (iTable % MAX_REPROT_POINT == 0)) { m_bContinue = m_prReport.reportProcessedTables(iTable, m_cTables); m_bContinue = m_prReport.reportMessage("Written " + iTable + " tables.", true); } } } sr.Close(); } else//Not huge - all data is already in memory - just write the tables. { for (iTable = 0; iTable < actTables.Count; iTable++) { ctCurrent = (ContingencyTable)actTables[iTable]; dFisherTest = ctCurrent.getFisher2TailPermutationTest(); dNextKey = getNextKey(slFDR.KeyList, dFisherTest); fdCurrent = slFDR[dNextKey]; dCurrentQValue = floor(fdCurrent.QValue); if (dCurrentQValue <= m_dFDRCutoff) { sOutputLine = ctCurrent.ToString() + "\t"; sOutputLine += fdCurrent.getData(m_bFullOutput, bFiltering, m_bPositiveFDR); lPToQMappings.Add(new KeyValuePair <double, double>(dNextKey, dCurrentQValue)); lResults.Add(sOutputLine); } if (m_bReportProgress && (iTable % MAX_REPROT_POINT == 0)) { m_bContinue = m_prReport.reportProcessedTables(iTable, actTables.Count); } //swMarginalPValues.WriteLine(fdCurrent.PValue); } } PToQMapping = lPToQMappings; if (m_bReportProgress) { m_bContinue = m_prReport.reportMessage("Done writing results", true); } //swMarginalPValues.Close(); return(lResults); }