Example #1
0
        /// <summary>
        ///
        /// </summary>
        /// <returns>bool indicating successful tabulate. False indicates the audit failed, in which case the outputFileName will be used</returns>
        public static bool CreateTabulateReport(DirectoryInfo dirinfo, ICollection <string> inputFilePatternCollection, string outputFileName,
                                                KeepTest <Dictionary <string, string> > globalKeepTest, List <KeepTest <Dictionary <string, string> > > splitKeepTestList, double maxPValue,
                                                bool auditRowIndexValues, bool useStoreyTibsharaniMethod, int numTestsStoreyTibsOverride, bool doLocalTabulation)
        {
            using (TextWriter textWriter = File.CreateText(outputFileName)) // Do this early so that if it fails, well know
            {
                int splitCount = splitKeepTestList.Count + 1;
                List <KeyValuePair <Dictionary <string, string>, double> >[] realRowCollectionToSortArray = new List <KeyValuePair <Dictionary <string, string>, double> > [splitCount];
                //List<double>[] nullValueCollectionToBeSortedArray = new List<double>[splitCount];
                Dictionary <int, List <double> >[] nullValueCollectionToBeSortedArray = new Dictionary <int, List <double> > [splitCount];
                int[] totalPValueCount = new int[splitCount];

                for (int i = 0; i < splitCount; i++)
                {
                    realRowCollectionToSortArray[i] = new List <KeyValuePair <Dictionary <string, string>, double> >(10000);
                    //nullValueCollectionToBeSortedArray[i] = new List<double>(10000);
                    nullValueCollectionToBeSortedArray[i] = new Dictionary <int, List <double> >();
                }

                string headerSoFar = null;

                Set <int> broadRealAndNullIndexSetSoFar = null;

                foreach (string broadInputFilePattern in inputFilePatternCollection)
                {
                    Set <int> narrowRealAndNullIndexSetSetSoFar = Set <int> .GetInstance();

                    foreach (string narrowInputFilePattern in broadInputFilePattern.Split('+'))
                    {
                        Set <int>         realAndNullIndexSet;
                        RowIndexTabulator tabulator = TryCreateTabulateReportInternal(out realAndNullIndexSet, dirinfo, narrowInputFilePattern,
                                                                                      globalKeepTest, splitKeepTestList, maxPValue, auditRowIndexValues, useStoreyTibsharaniMethod,
                                                                                      ref realRowCollectionToSortArray, ref nullValueCollectionToBeSortedArray, ref totalPValueCount, ref headerSoFar, doLocalTabulation);
                        if (!tabulator.IsComplete())
                        {
                            textWriter.WriteLine(tabulator.GetSkipRangeCollection());
                            Console.WriteLine("Not all needed rows were found in {0}.", narrowInputFilePattern);
                            Console.WriteLine("Found rows:\n{0}", tabulator.GetSkipRangeCollection());
                            Console.WriteLine("{0} created as skip file.", outputFileName);
                            return(false);
                        }



                        //Instead of throwing an error, we could filter out the duplicated null indexes
                        Helper.CheckCondition(narrowRealAndNullIndexSetSetSoFar.IntersectionIsEmpty(realAndNullIndexSet),
                                              string.Format("Within inputFilePattern {0}, multiple '+'-connected parts cover the same nullIndex(s), {1}",
                                                            broadInputFilePattern,
                                                            narrowRealAndNullIndexSetSetSoFar.Intersection(realAndNullIndexSet)));

                        narrowRealAndNullIndexSetSetSoFar.AddNewRange(realAndNullIndexSet);
                    }

                    Helper.CheckCondition(!auditRowIndexValues || narrowRealAndNullIndexSetSetSoFar.Contains(-1),
                                          string.Format("The 'null' index -1 for the real data was not seen in {0}", broadInputFilePattern));


                    if (broadRealAndNullIndexSetSoFar == null)
                    {
                        broadRealAndNullIndexSetSoFar = narrowRealAndNullIndexSetSetSoFar;
                    }
                    //else
                    //{
                    //	Helper.CheckCondition(broadRealAndNullIndexSetSoFar.Equals(narrowRealAndNullIndexSetSetSoFar),
                    //		string.Format("The broad inputFilePattern {0} covers a different set of nullIndexes ({1}) than its predecessors ({2})",
                    //		broadInputFilePattern, narrowRealAndNullIndexSetSetSoFar, broadRealAndNullIndexSetSoFar));
                    //}
                }

                double numberOfRandomizationRuns = useStoreyTibsharaniMethod ? 0 : broadRealAndNullIndexSetSoFar.Count - 1;
                Console.WriteLine("Detected {0} randomized runs relative to the number of real runs.", numberOfRandomizationRuns);
                Helper.CheckCondition <InvalidDataException>(useStoreyTibsharaniMethod || numberOfRandomizationRuns > 0, "No randomization runs detected. Did you mean to include a -{0} flag?", Tabulate.STOREY_METHOD_NAME);

                //Compute q-values from p-values (and p-values from test statistic)
                List <KeyValuePair <Dictionary <string, string>, double> > rowAndQValues = new List <KeyValuePair <Dictionary <string, string>, double> >(1000);
                Dictionary <double, double> rowToPvalFromRandomizations = null;
                for (int i = 0; i < splitCount; i++)
                {
                    int numTestsToUse;
                    if (numTestsStoreyTibsOverride != -1)
                    {
                        Console.WriteLine("Using " + numTestsStoreyTibsOverride + " p-values for computation of q-values rather than the observed number (" + totalPValueCount[i] + ")");
                        numTestsToUse = numTestsStoreyTibsOverride;
                    }
                    else
                    {
                        numTestsToUse = totalPValueCount[i];
                    }


                    //List<double> placeFiller = nullValueCollectionToBeSortedArray[i][0];

                    Dictionary <Dictionary <string, string>, double> qValueList;
                    if (useStoreyTibsharaniMethod)
                    {
                        qValueList = SpecialFunctions.ComputeQValuesUseStoreyTibsharani(ref realRowCollectionToSortArray[i], row => row.Value, numTestsToUse)
                                     .ToDictionary(entry => entry.Key.Key, entry => entry.Value);
                    }
                    else if (!doLocalTabulation)
                    {
                        qValueList = SpecialFunctions.ComputeQValuesUseNulls(ref realRowCollectionToSortArray[i],
                                                                             row => row.Value,
                                                                             row => int.Parse(((KeyValuePair <System.Collections.Generic.Dictionary <string, string>, double>)row).Key["groupId"]),
                                                                             row => int.Parse(((KeyValuePair <System.Collections.Generic.Dictionary <string, string>, double>)row).Key["rowIndex"]),
                                                                             ref nullValueCollectionToBeSortedArray[i], numberOfRandomizationRuns, out rowToPvalFromRandomizations, doLocalTabulation)
                                     .ToDictionary(entry => entry.Key.Key, entry => entry.Value);
                    }
                    else//do local tabulation
                    {
                        qValueList = SpecialFunctions.ComputeQValuesUseNulls(ref realRowCollectionToSortArray[i],
                                                                             row => row.Value,
                                                                             row => int.Parse(((KeyValuePair <System.Collections.Generic.Dictionary <string, string>, double>)row).Key["groupId"]),
                                                                             row => int.Parse(((KeyValuePair <System.Collections.Generic.Dictionary <string, string>, double>)row).Key["rowIndex"]),
                                                                             ref nullValueCollectionToBeSortedArray[i], numberOfRandomizationRuns, out rowToPvalFromRandomizations, doLocalTabulation)
                                     .ToDictionary(entry => entry.Key.Key, entry => entry.Value);
                    }

                    //Dictionary<Dictionary<string, string>, double> qValueList =
                    //    (useStoreyTibsharaniMethod ?
                    //        SpecialFunctions.ComputeQValuesUseStoreyTibsharani(ref realRowCollectionToSortArray[i], row => row.Value, numTestsToUse) :
                    //        SpecialFunctions.ComputeQValuesUseNulls(ref realRowCollectionToSortArray[i], row => row.Value,
                    //                ref nullValueCollectionToBeSortedArray[i], numberOfRandomizationRuns,out pValToPvalFromRandomizations))
                    //    .ToDictionary(entry => entry.Key.Key, entry => entry.Value);

                    foreach (KeyValuePair <Dictionary <string, string>, double> rowAndQValue in qValueList)
                    {
                        rowAndQValues.Add(new KeyValuePair <Dictionary <string, string>, double>(rowAndQValue.Key, rowAndQValue.Value));
                    }
                }

                rowAndQValues.Sort((row1, row2) =>
                                   row1.Value == row2.Value ?
                                   AccessPValueFromPhylotreeRow(row1.Key).CompareTo(AccessPValueFromPhylotreeRow(row2.Key)) :
                                   row1.Value.CompareTo(row2.Value));

                //!!!this code is repeated elsewhere
                if (COL_TO_TABULATE.TESTSTATISTIC == _columnToTabulate)
                {
                    Helper.CheckCondition(!useStoreyTibsharaniMethod, "the way its set up now, cannot use TestStatistic column with useStoreyTibshirani");
                    textWriter.WriteLine(Helper.CreateTabString(headerSoFar, "pValFromRandomizations", "qValue"));
                }
                else
                {
                    textWriter.WriteLine(Helper.CreateTabString(headerSoFar, "qValue"));
                }
                //foreach (Dictionary<string, string> row in realRowCollectionToSortArray)
                //{
                //    double qValue = qValueList[row];
                //    textWriter.WriteLine(Helper.CreateTabString(row[""], qValue));
                //}
                foreach (KeyValuePair <Dictionary <string, string>, double> rowAndQValue in rowAndQValues)
                {
                    if (COL_TO_TABULATE.TESTSTATISTIC == _columnToTabulate)
                    {
                        double thisRow = double.Parse(rowAndQValue.Key["rowIndex"]);
                        double thisPvalFromRandomization = rowToPvalFromRandomizations[thisRow];
                        textWriter.WriteLine(Helper.CreateTabString(rowAndQValue.Key[""], thisPvalFromRandomization, rowAndQValue.Value));
                    }
                    else
                    {
                        textWriter.WriteLine(Helper.CreateTabString(rowAndQValue.Key[""], rowAndQValue.Value));
                    }
                }
            }
            return(true);
        }