/// <summary> /// /// </summary> /// <returns>bool indicating successful tabulate. False indicates the audit failed, in which case the outputFileName will be used</returns> public static bool CreateTabulateReport(DirectoryInfo dirinfo, ICollection <string> inputFilePatternCollection, string outputFileName, KeepTest <Dictionary <string, string> > globalKeepTest, List <KeepTest <Dictionary <string, string> > > splitKeepTestList, double maxPValue, bool auditRowIndexValues, bool useStoreyTibsharaniMethod, int numTestsStoreyTibsOverride, bool doLocalTabulation) { using (TextWriter textWriter = File.CreateText(outputFileName)) // Do this early so that if it fails, well know { int splitCount = splitKeepTestList.Count + 1; List <KeyValuePair <Dictionary <string, string>, double> >[] realRowCollectionToSortArray = new List <KeyValuePair <Dictionary <string, string>, double> > [splitCount]; //List<double>[] nullValueCollectionToBeSortedArray = new List<double>[splitCount]; Dictionary <int, List <double> >[] nullValueCollectionToBeSortedArray = new Dictionary <int, List <double> > [splitCount]; int[] totalPValueCount = new int[splitCount]; for (int i = 0; i < splitCount; i++) { realRowCollectionToSortArray[i] = new List <KeyValuePair <Dictionary <string, string>, double> >(10000); //nullValueCollectionToBeSortedArray[i] = new List<double>(10000); nullValueCollectionToBeSortedArray[i] = new Dictionary <int, List <double> >(); } string headerSoFar = null; Set <int> broadRealAndNullIndexSetSoFar = null; foreach (string broadInputFilePattern in inputFilePatternCollection) { Set <int> narrowRealAndNullIndexSetSetSoFar = Set <int> .GetInstance(); foreach (string narrowInputFilePattern in broadInputFilePattern.Split('+')) { Set <int> realAndNullIndexSet; RowIndexTabulator tabulator = TryCreateTabulateReportInternal(out realAndNullIndexSet, dirinfo, narrowInputFilePattern, globalKeepTest, splitKeepTestList, maxPValue, auditRowIndexValues, useStoreyTibsharaniMethod, ref realRowCollectionToSortArray, ref nullValueCollectionToBeSortedArray, ref totalPValueCount, ref headerSoFar, doLocalTabulation); if (!tabulator.IsComplete()) { textWriter.WriteLine(tabulator.GetSkipRangeCollection()); Console.WriteLine("Not all needed rows were found in {0}.", narrowInputFilePattern); Console.WriteLine("Found rows:\n{0}", tabulator.GetSkipRangeCollection()); Console.WriteLine("{0} created as skip file.", outputFileName); return(false); } //Instead of throwing an error, we could filter out the duplicated null indexes Helper.CheckCondition(narrowRealAndNullIndexSetSetSoFar.IntersectionIsEmpty(realAndNullIndexSet), string.Format("Within inputFilePattern {0}, multiple '+'-connected parts cover the same nullIndex(s), {1}", broadInputFilePattern, narrowRealAndNullIndexSetSetSoFar.Intersection(realAndNullIndexSet))); narrowRealAndNullIndexSetSetSoFar.AddNewRange(realAndNullIndexSet); } Helper.CheckCondition(!auditRowIndexValues || narrowRealAndNullIndexSetSetSoFar.Contains(-1), string.Format("The 'null' index -1 for the real data was not seen in {0}", broadInputFilePattern)); if (broadRealAndNullIndexSetSoFar == null) { broadRealAndNullIndexSetSoFar = narrowRealAndNullIndexSetSetSoFar; } //else //{ // Helper.CheckCondition(broadRealAndNullIndexSetSoFar.Equals(narrowRealAndNullIndexSetSetSoFar), // string.Format("The broad inputFilePattern {0} covers a different set of nullIndexes ({1}) than its predecessors ({2})", // broadInputFilePattern, narrowRealAndNullIndexSetSetSoFar, broadRealAndNullIndexSetSoFar)); //} } double numberOfRandomizationRuns = useStoreyTibsharaniMethod ? 0 : broadRealAndNullIndexSetSoFar.Count - 1; Console.WriteLine("Detected {0} randomized runs relative to the number of real runs.", numberOfRandomizationRuns); Helper.CheckCondition <InvalidDataException>(useStoreyTibsharaniMethod || numberOfRandomizationRuns > 0, "No randomization runs detected. Did you mean to include a -{0} flag?", Tabulate.STOREY_METHOD_NAME); //Compute q-values from p-values (and p-values from test statistic) List <KeyValuePair <Dictionary <string, string>, double> > rowAndQValues = new List <KeyValuePair <Dictionary <string, string>, double> >(1000); Dictionary <double, double> rowToPvalFromRandomizations = null; for (int i = 0; i < splitCount; i++) { int numTestsToUse; if (numTestsStoreyTibsOverride != -1) { Console.WriteLine("Using " + numTestsStoreyTibsOverride + " p-values for computation of q-values rather than the observed number (" + totalPValueCount[i] + ")"); numTestsToUse = numTestsStoreyTibsOverride; } else { numTestsToUse = totalPValueCount[i]; } //List<double> placeFiller = nullValueCollectionToBeSortedArray[i][0]; Dictionary <Dictionary <string, string>, double> qValueList; if (useStoreyTibsharaniMethod) { qValueList = SpecialFunctions.ComputeQValuesUseStoreyTibsharani(ref realRowCollectionToSortArray[i], row => row.Value, numTestsToUse) .ToDictionary(entry => entry.Key.Key, entry => entry.Value); } else if (!doLocalTabulation) { qValueList = SpecialFunctions.ComputeQValuesUseNulls(ref realRowCollectionToSortArray[i], row => row.Value, row => int.Parse(((KeyValuePair <System.Collections.Generic.Dictionary <string, string>, double>)row).Key["groupId"]), row => int.Parse(((KeyValuePair <System.Collections.Generic.Dictionary <string, string>, double>)row).Key["rowIndex"]), ref nullValueCollectionToBeSortedArray[i], numberOfRandomizationRuns, out rowToPvalFromRandomizations, doLocalTabulation) .ToDictionary(entry => entry.Key.Key, entry => entry.Value); } else//do local tabulation { qValueList = SpecialFunctions.ComputeQValuesUseNulls(ref realRowCollectionToSortArray[i], row => row.Value, row => int.Parse(((KeyValuePair <System.Collections.Generic.Dictionary <string, string>, double>)row).Key["groupId"]), row => int.Parse(((KeyValuePair <System.Collections.Generic.Dictionary <string, string>, double>)row).Key["rowIndex"]), ref nullValueCollectionToBeSortedArray[i], numberOfRandomizationRuns, out rowToPvalFromRandomizations, doLocalTabulation) .ToDictionary(entry => entry.Key.Key, entry => entry.Value); } //Dictionary<Dictionary<string, string>, double> qValueList = // (useStoreyTibsharaniMethod ? // SpecialFunctions.ComputeQValuesUseStoreyTibsharani(ref realRowCollectionToSortArray[i], row => row.Value, numTestsToUse) : // SpecialFunctions.ComputeQValuesUseNulls(ref realRowCollectionToSortArray[i], row => row.Value, // ref nullValueCollectionToBeSortedArray[i], numberOfRandomizationRuns,out pValToPvalFromRandomizations)) // .ToDictionary(entry => entry.Key.Key, entry => entry.Value); foreach (KeyValuePair <Dictionary <string, string>, double> rowAndQValue in qValueList) { rowAndQValues.Add(new KeyValuePair <Dictionary <string, string>, double>(rowAndQValue.Key, rowAndQValue.Value)); } } rowAndQValues.Sort((row1, row2) => row1.Value == row2.Value ? AccessPValueFromPhylotreeRow(row1.Key).CompareTo(AccessPValueFromPhylotreeRow(row2.Key)) : row1.Value.CompareTo(row2.Value)); //!!!this code is repeated elsewhere if (COL_TO_TABULATE.TESTSTATISTIC == _columnToTabulate) { Helper.CheckCondition(!useStoreyTibsharaniMethod, "the way its set up now, cannot use TestStatistic column with useStoreyTibshirani"); textWriter.WriteLine(Helper.CreateTabString(headerSoFar, "pValFromRandomizations", "qValue")); } else { textWriter.WriteLine(Helper.CreateTabString(headerSoFar, "qValue")); } //foreach (Dictionary<string, string> row in realRowCollectionToSortArray) //{ // double qValue = qValueList[row]; // textWriter.WriteLine(Helper.CreateTabString(row[""], qValue)); //} foreach (KeyValuePair <Dictionary <string, string>, double> rowAndQValue in rowAndQValues) { if (COL_TO_TABULATE.TESTSTATISTIC == _columnToTabulate) { double thisRow = double.Parse(rowAndQValue.Key["rowIndex"]); double thisPvalFromRandomization = rowToPvalFromRandomizations[thisRow]; textWriter.WriteLine(Helper.CreateTabString(rowAndQValue.Key[""], thisPvalFromRandomization, rowAndQValue.Value)); } else { textWriter.WriteLine(Helper.CreateTabString(rowAndQValue.Key[""], rowAndQValue.Value)); } } } return(true); }