Example #1
0
        /// <summary>
        /// Merges all the files in dirinfo matching inputFilePattern and places them in mergedFileName. If MergedFileName exists, will append to the end. Then attempts to
        /// tabulate with the given parameters. If it fails, then it will places the workitems that it found in completedRowsFileName in the form a range, which can be used as a skipfile.
        /// If successful, then it deletes completedRowsFileName, if it exists. This is useful for deleting previous skip files.
        /// </summary>
        /// <param name="dirinfo">location where all the files can be found.</param>
        /// <returns>True if tabulate was successful, false otherwise.</returns>
        public static bool MergeThenTabulateOrCreateSkipFile(DirectoryInfo dirinfo, string inputFilePattern, string mergedFileName, string tabulateResultFileName,
                                                             string skipFileName, KeepTest <Dictionary <string, string> > keepTest, List <KeepTest <Dictionary <string, string> > > splitKeepTestList, double maxPForTabulate, bool useStoreyMethod)
        {
            Console.Write("Merging files...");
            Tabulate.MergeFilesUsedToTabulate(dirinfo, inputFilePattern, mergedFileName, true);
            Console.WriteLine("done merging.");


            bool tabulated = Tabulate.CreateTabulateReport(
                dirinfo,
                mergedFileName,
                tabulateResultFileName,
                keepTest,
                splitKeepTestList,
                maxPForTabulate, true /* audit */, useStoreyMethod);

            if (tabulated)
            {
                //string skipFileName = completedRowsFileName.Replace("completedRows", "skipFile");
                //File.Delete(completedRowsFileName);	// at this point we know everything's done, so delete it.
                if (File.Exists(skipFileName))
                {
                    File.Delete(skipFileName);
                }

                return(true);
            }
            else
            {
                Console.WriteLine("Tabulation failed. Missing rows placed in {0}.", skipFileName);
                SpecialFunctions.MoveAndReplace(tabulateResultFileName, skipFileName);
                return(false);
            }
        }
Example #2
0
        public static bool CreateTabulateReport(DirectoryInfo dirinfo, ICollection <string> inputFilePatternCollection, string outputFileName,
                                                KeepTest <Dictionary <string, string> > globalKeepTest, List <KeepTest <Dictionary <string, string> > > splitKeepTestList, double maxPValue,
                                                bool auditRowIndexValues, bool useStoreyTibsharaniMethod, bool doLocalTabulation)
        {
            int numTestsStoreyTibsOverride = -1;

            return(CreateTabulateReport(dirinfo, inputFilePatternCollection, outputFileName, globalKeepTest, splitKeepTestList, maxPValue, auditRowIndexValues, useStoreyTibsharaniMethod, numTestsStoreyTibsOverride, doLocalTabulation));
        }
        public static UniversalWorkList GetInstance(
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration,
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration,
            //Dictionary<string, Dictionary<string, SufficientStatistics>> predictorVariableToCaseIdToRealNonMissingValue,
            NullDataCollection nullDataCollection,
            RangeCollection nullIndexRange,
            KeepTest <Dictionary <string, string> > keepTest
            )
        {
            //SpecialFunctions.CheckCondition(-1 <= nullIndexStart && nullIndexStart <= nullIndexLast);

            bool enumeratePairs = keepTest is KeepPredictorTargetPairs;

            if (keepTest is KeepCollection <Dictionary <string, string> > )
            {
                foreach (KeepTest <Dictionary <string, string> > keepTestInCollection in ((KeepCollection <Dictionary <string, string> >)keepTest).KeepTestCollection)
                {
                    if (keepTestInCollection is KeepPredictorTargetPairs)
                    {
                        enumeratePairs = true;
                    }
                }
            }
            UniversalWorkList aUniversalWorkList;

            if (enumeratePairs)
            {
                aUniversalWorkList = UniversalWorkListPredTargPairs.GetInstance(
                    predictorNameAndCaseIdToNonMissingValueEnumeration,
                    targetNameAndCaseIdToNonMissingValueEnumeration,
                    nullDataCollection,
                    nullIndexRange,
                    keepTest
                    );
            }
            else
            {
                aUniversalWorkList = new UniversalWorkList(
                    predictorNameAndCaseIdToNonMissingValueEnumeration,
                    targetNameAndCaseIdToNonMissingValueEnumeration,
                    nullDataCollection,
                    nullIndexRange,
                    keepTest
                    );
            }
//            aUniversalWorkList._predictorNameAndCaseIdToNonMissingValueEnumeration = predictorNameAndCaseIdToNonMissingValueEnumeration;
//            aUniversalWorkList._targetNameAndCaseIdToNonMissingValueEnumeration = targetNameAndCaseIdToNonMissingValueEnumeration;
////          aUniversalWorkList._targetVariables = targetVariables;
////          aUniversalWorkList._predictorVariableToCaseIdToNonMissingValue = predictorVariableToCaseIdToRealNonMissingValue;
//            aUniversalWorkList._keepTest = keepTest;
//            aUniversalWorkList._nullDataCollection = nullDataCollection;
//            aUniversalWorkList._nullIndexRange = nullIndexRange;

            return(aUniversalWorkList);
        }
        protected UniversalWorkList(
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration,
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration,
            NullDataCollection nullDataCollection,
            RangeCollection nullIndexRange,
            KeepTest <Dictionary <string, string> > keepTest
            )
        {
            _predictorNameAndCaseIdToNonMissingValueEnumeration = predictorNameAndCaseIdToNonMissingValueEnumeration;
            _targetNameAndCaseIdToNonMissingValueEnumeration    = targetNameAndCaseIdToNonMissingValueEnumeration;
            _keepTest           = keepTest;
            _nullDataCollection = nullDataCollection;
            _nullIndexRange     = nullIndexRange;

            //Console.WriteLine("In UniversalWorkList constructor.");
        }
 protected UniversalWorkListPredTargPairs(
     IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration,
     IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration,
     NullDataCollection nullDataCollection,
     RangeCollection nullIndexRange,
     KeepTest <Dictionary <string, string> > keepTest
     )
     :
     base(
         predictorNameAndCaseIdToNonMissingValueEnumeration,
         targetNameAndCaseIdToNonMissingValueEnumeration,
         nullDataCollection, nullIndexRange,
         keepTest
         )
 {
     //Console.WriteLine("In UniversalWorkListPredTargPairs constructor.");
 }
        new public static UniversalWorkList GetInstance(
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration,
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration,
            //Dictionary<string, Dictionary<string, SufficientStatistics>> predictorVariableToCaseIdToRealNonMissingValue,
            NullDataCollection nullDataCollection,
            RangeCollection nullIndexRange,
            KeepTest <Dictionary <string, string> > keepTest
            )
        {
            //SpecialFunctions.CheckCondition(-1 <= nullIndexStart && nullIndexStart <= nullIndexLast);
            UniversalWorkList aUniversalWorkList = new UniversalWorkListPredTargPairs(
                predictorNameAndCaseIdToNonMissingValueEnumeration,
                targetNameAndCaseIdToNonMissingValueEnumeration,
                nullDataCollection,
                nullIndexRange,
                keepTest
                );

            return(aUniversalWorkList);
        }
        //!!!would be nice if class didn't have to know all these classes it was compatible with
        public override bool IsCompatibleWithNewKeepTest(KeepTest <Dictionary <string, string> > keepTestNew)
        {
            if (keepTestNew is K1)
            {
                return(k1 <= ((K1)keepTestNew).k1);
            }

            //!!!This code is duplicate many times
            if (keepTestNew is And <Dictionary <string, string> > )
            {
                And <Dictionary <string, string> > aAnd = (And <Dictionary <string, string> >)keepTestNew;
                foreach (KeepTest <Dictionary <string, string> > conjunct in aAnd.KeepTestCollection)
                {
                    if (!IsCompatibleWithNewKeepTest(conjunct))
                    {
                        return(false);
                    }
                }
                return(true);
            }

            return(false);
        }
 public override bool IsCompatibleWithNewKeepTest(KeepTest <Dictionary <string, string> > keepTestNew)
 {
     return(false); //!!!could be made tighter
 }
        public void Run(
            ModelScorer modelScorer,
            PhyloTree phyloTree,
            string predictorSparseFileName,
            string targetSparseFileName,
            string leafDistributionName,
            string nullDataGeneratorName,
            KeepTest <Dictionary <string, string> > keepTest,
            RangeCollection skipRowIndexRangeCollectionOrNull,
            string shortName,
            string outputDirectoryName,
            RangeCollection pieceIndexRangeCollection, int pieceCount,
            RangeCollection nullIndexRangeCollection,
            string optimizerName)
        {
            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();
            Directory.CreateDirectory(outputDirectoryName);


            string outputFileName = string.Format(@"{0}\{1}.{2}.{3}.{4}.{5}.{6}{7}.txt",
                                                  outputDirectoryName, shortName,
                                                  leafDistributionName, nullDataGeneratorName,
                                                  nullIndexRangeCollection,
                                                  pieceCount,
                                                  pieceIndexRangeCollection,
                                                  skipRowIndexRangeCollectionOrNull == null ? "" : ".Skip" + skipRowIndexRangeCollectionOrNull.Count().ToString()
                                                  );

            #region from PhyloTree refactor
            //Dictionary<string, Dictionary<string, bool>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory<bool>(predictorSparseFileName);
            //IEnumerable<Pair<string, Dictionary<string, T>>> targetNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration<T>(targetSparseFileName);

            //NullDataCollection nullDataGenerator =
            //    NullDataCollection.GetInstance(this, modelTester, nullIndexRangeCollection, predictorVariableToCaseIdToRealNonMissingValue);

            //UniversalWorkList<T> workList = UniversalWorkList<T>.GetInstance(
            //    predictorVariableToCaseIdToRealNonMissingValue,
            //    targetNameAndCaseIdToNonMissingValueEnumeration,
            //    nullDataGenerator, nullIndexRangeCollection, keepTest);
            #endregion
            bool speedOverMemory = true;

            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > >
            predictorNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(predictorSparseFileName, speedOverMemory);
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > >
            targetNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(targetSparseFileName, speedOverMemory);

            NullDataCollection nullDataGenerator =
                CreateNullDataGenerator(nullDataGeneratorName, modelScorer, phyloTree, nullIndexRangeCollection,
                                        predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration);

            UniversalWorkList workList = UniversalWorkList.GetInstance(
                predictorNameAndCaseIdToNonMissingValueEnumeration,
                targetNameAndCaseIdToNonMissingValueEnumeration,
                nullDataGenerator, nullIndexRangeCollection, keepTest);

            int workListCount = SpecialFunctions.Count(workList.List());

            int effectiveWorkListCount;
            if (skipRowIndexRangeCollectionOrNull == null)
            {
                effectiveWorkListCount = workListCount;
            }
            else
            {
                effectiveWorkListCount = 0;
                for (int iRowIndex = 0; iRowIndex < workListCount; iRowIndex++)
                {
                    if (!skipRowIndexRangeCollectionOrNull.Contains(iRowIndex))
                    {
                        effectiveWorkListCount++;
                    }
                }
            }
            Console.WriteLine("{0} Total rows. Skipping {1} of them.", workListCount, workListCount - effectiveWorkListCount);

            using (TextWriter textWriter = File.CreateText(outputFileName))
            {
                textWriter.WriteLine(Header);
                int rowIndex          = -1;
                int effectiveRowIndex = -1;

                foreach (RowData rowAndTargetData in workList.List())
                {
                    //!!!make all these parameters and the calculation a class
                    ++rowIndex;
                    Debug.Assert(rowIndex < workListCount); // real assert

                    if (skipRowIndexRangeCollectionOrNull == null || !skipRowIndexRangeCollectionOrNull.Contains(rowIndex))
                    {
                        ++effectiveRowIndex;

                        int workIndex = ExtractWorkIndex(effectiveRowIndex, pieceCount, effectiveWorkListCount);

                        if (pieceIndexRangeCollection.Contains(workIndex))
                        {
                            Debug.WriteLine("WorkItemIndex " + rowIndex.ToString());
                            string reportLine;
                            try
                            {
                                reportLine =
                                    CreateReportLine(modelScorer, phyloTree, rowAndTargetData, workList, rowIndex, workListCount, workIndex);
                            }
                            catch (OutOfMemoryException)
                            {
                                Console.WriteLine("OUT OF MEMORY!! Clearing cache and trying to recover where we left off.");
                                modelScorer.ClearCache();
                                reportLine =
                                    CreateReportLine(modelScorer, phyloTree, rowAndTargetData, workList, rowIndex, workListCount, workIndex);
                            }

                            textWriter.WriteLine(reportLine);
                            textWriter.Flush();
                        }
                    }
                }
            }
            stopwatch.Stop();
            Console.WriteLine("Running time: " + stopwatch.Elapsed);
        }
 public override bool IsCompatibleWithNewKeepTest(KeepTest <Dictionary <string, string> > keepTestNew)
 {
     throw new Exception("The method or operation is not implemented.");
 }
 public override bool IsCompatibleWithNewKeepTest(KeepTest <Dictionary <string, string> > keepTestNew)
 {
     return(false);
 }
 public override bool IsCompatibleWithNewKeepTest(KeepTest <Dictionary <string, string> > keepTestNew)
 {
     // haven't done anything here.
     return(false);
 }
Example #13
0
        private static Set <int> CreateTabulateReportInternal(
            string inputFilePattern,
            KeepTest <Dictionary <string, string> > keepTest,
            double maxPValue,
            bool auditRowIndexValues,
            ref List <Dictionary <string, string> > realRowCollectionToSort,
            ref List <double> nullValueCollectionToBeSorted,
            ref string headerSoFar)
        {
            Set <int> nullIndexSet = Set <int> .GetInstance();

            //!!!very similar code elsewhere
            RowIndexTabulator rowIndexTabulator = RowIndexTabulator.GetInstance(auditRowIndexValues);

            //RangeCollection unfilteredRowIndexRangeCollection = RangeCollection.GetInstance();

            foreach (string fileName in Directory.GetFiles(Directory.GetCurrentDirectory(), inputFilePattern))
            {
                Debug.WriteLine(fileName);
                string headerOnFile;
                bool   firstRow = true;
                foreach (Dictionary <string, string> row in SpecialFunctions.TabFileTable(fileName, /*includeWholeLine*/ true, out headerOnFile))
                {
                    if (firstRow)
                    {
                        firstRow = false;
                        if (headerSoFar == null)
                        {
                            headerSoFar = headerOnFile;
                        }
                        else if (headerSoFar != headerOnFile)
                        {
                            Console.WriteLine("Warning: The header for file {0} is different from the 1st file read in", fileName);
                        }
                    }

                    if (rowIndexTabulator.TryAdd(row, fileName) && keepTest.Test(row))
                    {
                        //int unfilteredRowIndex = ReadUnfilteredRowIndexButIfMissingUseRowIndex(row, rowIndex);

                        //unfilteredRowIndexRangeCollection.Add(unfilteredRowIndex);

                        SpecialFunctions.CheckCondition(row.ContainsKey(NullIndexColumnName), string.Format(@"When tabulating a ""{0}"" column is required. (File ""{1}"")", NullIndexColumnName, fileName));

                        int nullIndex = int.Parse(row[NullIndexColumnName]);
                        nullIndexSet.AddNewOrOld(nullIndex);

                        double pValue = AccessPValueFromPhylotreeRow(row);
                        //if (double.IsNaN(pValue))
                        //{
                        //    pValue = 1;
                        //    row["PValue"] = "1";
                        //}
                        if (pValue <= maxPValue)
                        {
                            if (nullIndex == -1)
                            {
                                realRowCollectionToSort.Add(row);
                            }
                            else
                            {
                                nullValueCollectionToBeSorted.Add(pValue);
                            }
                        }
                    }
                }
            }

            rowIndexTabulator.CheckIsComplete(inputFilePattern);

            return(nullIndexSet);
        }
Example #14
0
        /// <summary>
        /// Does the work.
        /// </summary>
        public override void DoWork()
        {
            // get our input data and null the field to make sure we don't serialize it back
            InputData inputData = mInputData;

            mInputData = null;

            // get the job-specific names of input files
            FileDefCollection fileDefs             = Job.FileDefs;
            string            treeFileName         = Utility.GetNamedFileDef(fileDefs, Constants.TreeFileDefName).LocalName;
            string            predictorFileName    = Utility.GetNamedFileDef(fileDefs, Constants.PredictorFileDefName).LocalName;
            string            targetFileName       = Utility.GetNamedFileDef(fileDefs, Constants.TargetFileDefName).LocalName;
            string            skipRowIndexFileName = Utility.GetNamedFileDef(fileDefs, Constants.SkipRowIndexFileDefName).LocalName;

            // construct RangeCollections
            RangeCollection pieceIndexRangeCollection = RangeCollection.Parse(inputData.PieceIndexRange);
            RangeCollection nullIndexRangeCollection  = RangeCollection.Parse(inputData.NullIndexRange);
            RangeCollection skipRowIndexRangeCollection;
            FileInfo        fileInfo = new FileInfo(skipRowIndexFileName);

            if (fileInfo.Length > 0)
            {
                skipRowIndexRangeCollection = RangeCollection.Parse(File.ReadAllText(skipRowIndexFileName));
            }
            else
            {
                skipRowIndexRangeCollection = null;
            }

            // do the rest
            PhyloTree   aPhyloTree  = PhyloTree.GetInstance(treeFileName, null);
            ModelScorer modelScorer =
                ModelScorer.GetInstance(aPhyloTree, inputData.LeafDistributionName, inputData.OptimizerName);
            ModelEvaluator modelEvaluator = ModelEvaluator.GetInstance(inputData.LeafDistributionName, modelScorer);
            KeepTest <Dictionary <string, string> > keepTest =
                KeepTest <Dictionary <string, string> > .GetInstance(null, inputData.KeepTestName);

            PhyloDDriver driver = PhyloDDriver.GetInstance();

            // create a name for the temporary job sandbox.  This directory gets created by driver.Run(...)
            string agentOutputDirectoryName =
                Path.Combine(Environment.CurrentDirectory, String.Format(CultureInfo.InvariantCulture, "{0}.{1}", Job.JobId, Task.TaskId));

            // save the standard out and standard error in memory streams
            using (MemoryStream streamOut = new MemoryStream(), streamError = new MemoryStream())
            {
                try
                {
                    // redirect the outputs
                    using (
                        StreamWriter writerOut = new StreamWriter(streamOut),
                        writerError = new StreamWriter(streamError))
                    {
                        Console.SetOut(writerOut);
                        Console.SetError(writerError);

                        try
                        {
                            // run the model
                            string outputFileName = driver.Run(
                                modelEvaluator,
                                predictorFileName, targetFileName,
                                inputData.LeafDistributionName, inputData.NullDataGeneratorName,
                                keepTest, skipRowIndexRangeCollection,
                                inputData.NiceName,
                                agentOutputDirectoryName,
                                pieceIndexRangeCollection, inputData.PieceCount,
                                nullIndexRangeCollection,
                                inputData.OptimizerName);

                            // this is the expected output file name -- save this so it can be written on the master side with the same name.
                            mOutputFileName = Path.GetFileName(outputFileName);


                            mLocalOutputFileName = Path.Combine(inputData.LocalOutputDirectoryName, mOutputFileName);

                            // get the output data
                            string fullOutputPath = Path.Combine(agentOutputDirectoryName, mOutputFileName);
                            if (!File.Exists(fullOutputPath))
                            {
                                TaskResult.FailureReason  = TaskFailureReason.MissingOutput;
                                TaskResult.FailureMessage = String.Format(CultureInfo.CurrentCulture, "Cannot find output file '{0}'", targetFileName);
                                TaskResult.Status         = TaskAssignmentStatus.Failed;
                            }
                            using (StreamReader outputData = new StreamReader(fullOutputPath))
                            {
                                mOutputData = outputData.ReadToEnd();
                            }
                        }
                        finally
                        {
                            // this finally is to make sure we delete the folder
                            // get rid of the sandbox
                            Directory.Delete(agentOutputDirectoryName, true);
                        }
                    }
                }
                finally
                {
                    // this finally is to make sure we get console output
                    Encoding encoding = Encoding.Default;
                    TaskResult.StandardOutput = encoding.GetString(streamOut.GetBuffer());
                    TaskResult.StandardError  = encoding.GetString(streamError.GetBuffer());
                }
            }
        }
Example #15
0
        /// <summary>
        /// currently hard-coded to map the rowId to a groupId, used for localTabulation, by using hypothesisId = rowId % numRealHypotheses
        /// </summary>
        /// <param name="nullIndexSet"></param>
        /// <param name="dirinfo"></param>
        /// <param name="inputFilePattern"></param>
        /// <param name="globalKeepTest"></param>
        /// <param name="splitKeepTestList"></param>
        /// <param name="maxPValue"></param>
        /// <param name="auditRowIndexValues"></param>
        /// <param name="useStoreyTibsharaniMethod"></param>
        /// <param name="realRowCollectionToSortArray"></param>
        /// <param name="nullValueCollectionToBeSortedArrayDict"></param>
        /// <param name="totalPValueCount"></param>
        /// <param name="headerSoFar"></param>
        /// <param name="doLocalTabulationOfPermutationsToGetPvaluesFromRandomizations"></param>
        /// <returns></returns>
        private static RowIndexTabulator TryCreateTabulateReportInternal(out Set <int> nullIndexSet, DirectoryInfo dirinfo,
                                                                         string inputFilePattern,
                                                                         KeepTest <Dictionary <string, string> > globalKeepTest,
                                                                         List <KeepTest <Dictionary <string, string> > > splitKeepTestList,
                                                                         double maxPValue,
                                                                         bool auditRowIndexValues,
                                                                         bool useStoreyTibsharaniMethod,
                                                                         ref List <KeyValuePair <Dictionary <string, string>, double> >[] realRowCollectionToSortArray,
                                                                         ref Dictionary <int, List <double> >[] nullValueCollectionToBeSortedArrayDict,
                                                                         ref int[] totalPValueCount,
                                                                         ref string headerSoFar,
                                                                         bool doLocalTabulationOfPermutationsToGetPvaluesFromRandomizations
                                                                         )
        {
            //int splitCount=splitKeepTestList.Count;
            //List<double>[] nullValueCollectionToBeSortedArray = new List<double>[splitCount];
            //for (int j = 0; j < splitCount; j++) nullValueCollectionToBeSortedArray[j] = new List<double>();

            nullIndexSet = Set <int> .GetInstance();

            //!!!very similar code elsewhere
            RowIndexTabulator rowIndexTabulator = RowIndexTabulator.GetInstance(auditRowIndexValues);
            //RangeCollection unfilteredRowIndexRangeCollection = new RangeCollection();
            int lastWriteLineLength = 0;
            int nullValueCount      = 0;

            foreach (FileInfo fileinfo in dirinfo.GetFiles(inputFilePattern))
            {
                try
                {
                    int sigLines = realRowCollectionToSortArray.Select(split => split.Count).Sum();
                    //nullValueCount = nullValueCollectionToBeSortedArray.Select(split => split.Count).Sum();
                    int totalLines = sigLines + nullValueCount + totalPValueCount.Sum();

                    string writeLine = string.Format("{0}/{1} lines have p<=1. Now reading {2}", sigLines, totalLines, fileinfo.FullName);
                    Console.Write("\r{0,-" + lastWriteLineLength + "}", writeLine);
                    lastWriteLineLength = writeLine.Length;

                    string headerOnFile;
                    using (TextReader reader = SpecialFunctions.GetTextReaderWithExternalReadWriteAccess(fileinfo.FullName))
                    {
                        headerOnFile = reader.ReadLine();
                        if (headerSoFar == null)
                        {
                            headerSoFar = headerOnFile;
                        }
                        else if (headerSoFar != headerOnFile)
                        {
                            Console.WriteLine("Warning: The header for file {0} is different from the 1st file read in", fileinfo.Name);
                        }
                    }

                    //KeepAa2AaOnly keepAa = KeepAa2AaOnly.GetInstance();
                    //Console.WriteLine(keepAa);

                    using (TextReader reader = SpecialFunctions.GetTextReaderWithExternalReadWriteAccess(fileinfo.FullName))
                    {
                        foreach (Dictionary <string, string> row in SpecialFunctions.TabFileTable(reader, headerOnFile, /*includeWholeLine*/ true))
                        {
                            if (rowIndexTabulator.TryAdd(row, fileinfo.FullName) && globalKeepTest.Test(row))
                            {
                                //Helper.CheckCondition(row.ContainsKey(NullIndexColumnName), string.Format(@"When tabulating a ""{0}"" column is required. (File ""{1}"")", NullIndexColumnName, fileinfo.Name));

                                //int nullIndex = int.Parse(row[NullIndexColumnName]);
                                int nullIndex = !row.ContainsKey(NullIndexColumnName) && useStoreyTibsharaniMethod ? -1 : int.Parse(row[NullIndexColumnName]);
                                nullIndexSet.AddNewOrOld(nullIndex);

                                double pValue = AccessPValueFromPhylotreeRow(row);
                                if (useStoreyTibsharaniMethod && nullIndex == -1)
                                {
                                    int splitIdx = GetSplitTabulateIndex(row, splitKeepTestList);
                                    if (pValue <= maxPValue)
                                    {
                                        realRowCollectionToSortArray[splitIdx].Add(new KeyValuePair <Dictionary <string, string>, double>(row, pValue));
                                    }
                                    //nullValueCollectionToBeSortedArray[splitIdx].Add(pValue);
                                    totalPValueCount[splitIdx]++;
                                }
                                else if (!useStoreyTibsharaniMethod)
                                {
                                    if (pValue <= maxPValue)
                                    {
                                        int splitIdx = GetSplitTabulateIndex(row, splitKeepTestList);
                                        if (nullIndex == -1)
                                        {
                                            realRowCollectionToSortArray[splitIdx].Add(new KeyValuePair <Dictionary <string, string>, double>(row, pValue));
                                            //realRowCollectionToSortArray[splitIdx].Add(row);
                                        }
                                        else
                                        {
                                            int groupId;
                                            if (!doLocalTabulationOfPermutationsToGetPvaluesFromRandomizations)
                                            {
                                                //always add it to the zero key if not doing local tabulations
                                                groupId = 0;
                                            }
                                            else
                                            {
                                                groupId = int.Parse(row[GroupIdColumnName]);
                                            }

                                            nullValueCollectionToBeSortedArrayDict[splitIdx].GetValueOrDefault(groupId).Add(pValue);
                                            nullValueCount++;
                                            //nullValueCollectionToBeSortedArray[splitIdx].Add(pValue);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                catch
                {
                    Console.WriteLine("\nFailure parsing {0}.", fileinfo.Name);
                    throw;
                }
            }

            Console.WriteLine("\r{0,-" + lastWriteLineLength + "}", "Read all files.");
            return(rowIndexTabulator);
            //rowIndexTabulator.CheckIsComplete(inputFilePattern);

            //return nullIndexSet;
        }
Example #16
0
        static void Main(string[] args)
        {
            try
            {
                ArgCollection argCollection = ArgCollection.GetInstance(args);

                if (argCollection.ExtractOptionalFlag("help"))
                {
                    Console.WriteLine("");
                    Console.WriteLine(UsageMessage);
                    Console.WriteLine(HelpMessage);
                    return;
                }

                string optimizerName = argCollection.ExtractOptional <string>("optimizer", "BrentThenGrid");
                string keepTestName  = argCollection.ExtractOptional <string>("keepTest", "AlwaysKeep");
                string skipRowIndexFileNameOrNull = argCollection.ExtractOptional <string>("skipRowIndexFile", null);

                argCollection.CheckNoMoreOptions();

                string          treeFileName          = argCollection.ExtractNext <string>("treeFile");
                string          predictorFileName     = argCollection.ExtractNext <string>("predictorFile");
                string          targetFileName        = argCollection.ExtractNext <string>("targetFile");
                string          leafDistributionName  = argCollection.ExtractNext <string>("leafDistribution");
                string          nullDataGeneratorName = argCollection.ExtractNext <string>("nullDataGenerator");
                string          niceName                  = argCollection.ExtractNext <string>("niceName");
                string          outputDirectory           = argCollection.ExtractNext <string>("outputDirectory");
                RangeCollection pieceIndexRangeCollection = argCollection.ExtractNext <RangeCollection>("pieceIndexRange");
                int             pieceCount                = argCollection.ExtractNext <int>("pieceCount");
                RangeCollection nullIndexRangeCollection  = argCollection.ExtractNext <RangeCollection>("nullIndexRange");

                argCollection.CheckThatEmpty();

                if (!PhyloDDriver.ValidateDistribution(leafDistributionName))
                {
                    Console.WriteLine("{0} is not a recognized distribution name. Please choose a name from the following list:", leafDistributionName);
                    foreach (string name in PhyloDDriver.GetDistributionNames())
                    {
                        Console.WriteLine("\t{0}", name);
                    }
                    throw new ArgumentException("Invalid distribution name.");
                }
                RangeCollection skipRowIndexRangeCollectionOrNull = (null == skipRowIndexFileNameOrNull) || skipRowIndexFileNameOrNull == "null" ? null : RangeCollection.Parse(File.ReadAllText(skipRowIndexFileNameOrNull));
                KeepTest <Dictionary <string, string> > keepTest  = KeepTest <Dictionary <string, string> > .GetInstance(null, keepTestName);

                SpecialFunctions.CheckCondition(pieceIndexRangeCollection.IsBetween(0, pieceCount - 1), "pieceIndex must be at least 0 and less than pieceCount");
                SpecialFunctions.CheckCondition(nullIndexRangeCollection.IsBetween(-1, int.MaxValue), "nullIndex must be at least -1");

                PhyloTree aPhyloTree = PhyloTree.GetInstance(treeFileName, null);

                ModelScorer    modelScorer    = ModelScorer.GetInstance(aPhyloTree, leafDistributionName, optimizerName);
                ModelEvaluator modelEvaluator = ModelEvaluator.GetInstance(leafDistributionName, modelScorer);
                PhyloDDriver   driver         = PhyloDDriver.GetInstance();

                driver.Run(
                    modelEvaluator,
                    predictorFileName, targetFileName,
                    leafDistributionName, nullDataGeneratorName,
                    keepTest, skipRowIndexRangeCollectionOrNull,
                    niceName,
                    outputDirectory,
                    pieceIndexRangeCollection, pieceCount,
                    nullIndexRangeCollection,
                    optimizerName);

                //Console.Write("Press enter to exist.");
                //Console.Read();
            }
            catch (Exception exception)
            {
                Console.WriteLine("");
                Console.WriteLine(exception.Message);
                if (exception.InnerException != null)
                {
                    Console.WriteLine(exception.InnerException.Message);
                }

                Console.WriteLine("");
                Console.WriteLine(UsageMessage);
                throw;
            }
        }
        static void Main(string[] argsx)
        {
            try
            {
                List <string> argumentCollection = new List <string>(argsx);

                bool   auditRowIndexValues = true;
                string noAuditFlag         = "-NoAudit";
                if (argumentCollection.Contains(noAuditFlag))
                {
                    argumentCollection.Remove(noAuditFlag);
                    auditRowIndexValues = false;
                }

                double maxPValue         = 1.0; // Ignore pValues greater than this
                string maxPValueFlag     = "-MaxPValue";
                int    maxPValuePosition = argumentCollection.IndexOf(maxPValueFlag);
                if (maxPValuePosition >= 0)
                {
                    argumentCollection.RemoveAt(maxPValuePosition);
                    SpecialFunctions.CheckCondition(maxPValuePosition < argumentCollection.Count, "pValue expected after -MaxPValue");
                    maxPValue = double.Parse(argumentCollection[maxPValuePosition]);
                    argumentCollection.RemoveAt(maxPValuePosition);
                }

                KeepTest <Dictionary <string, string> > keepTest; // Ignore pValues greater than this
                string keepTestFlag     = "-KeepTest";
                int    keepTestPosition = argumentCollection.IndexOf(keepTestFlag);
                if (keepTestPosition >= 0)
                {
                    argumentCollection.RemoveAt(keepTestPosition);
                    SpecialFunctions.CheckCondition(keepTestPosition < argumentCollection.Count, "KeepTest expected after -MaxPValue");
                    keepTest = KeepTest <Dictionary <string, string> > .GetInstance(null, argumentCollection[keepTestPosition]);

                    argumentCollection.RemoveAt(keepTestPosition);
                }
                else
                {
                    keepTest = new AlwaysKeep <Dictionary <string, string> >();
                }


                SpecialFunctions.CheckCondition(argumentCollection.Count > 1, "Expect 2 or more parameters");
                string outputFileName = argumentCollection[argumentCollection.Count - 1];
                argumentCollection.RemoveAt(argumentCollection.Count - 1);

                Tabulate.CreateTabulateReport(argumentCollection, outputFileName, keepTest, maxPValue, auditRowIndexValues);
            }
            catch (Exception e)
            {
                Console.WriteLine("");
                Console.WriteLine(e.Message);
                if (e.InnerException != null)
                {
                    Console.WriteLine(e.InnerException.Message);
                }
                Console.WriteLine(@"
Usage:
Tabulate {-NoAudit} {-MaxPValue maxPValue} {-KeepTest keeptest} broadInputFileNamePattern1 {broadInputFileNamePattern2 ...} outputFileName
 
Each broadInputFileNamePattern1 is of the form 
narrowInputFileNamePattern1{+narrowInputFileNamePattern2...}
 
Each broadInputFileNamePattern must cover the same range of nullIndexes (including -1, the real index).

Each narrowInputFileNamePattern within a broadInputFileNamePattern must cover a disjoint
set of nullIndexes.

For example 
Tabulate -MaxPValue .05  raw\GagEscape0606*-1-19*.txt raw\GagReversion0606*-1-9*.txt+raw\GagReversion0606*10-19*.txt AllGag.qValue.txt 
Notice that broad pattern
    raw\GagEscape0606*-1-19*.txt 
has one narrow pattern and covers nullIndex's -1 to 19

While broad pattern
    raw\GagReversion0606*-1-9*.txt+raw\GagReversion0606*10-19*.txt
has two narrow patterns:
   raw\GagReversion0606*-1-9*.txt, which covers nullIndexes -1 to 9 
   raw\GagReversion0606*10-19*.txt which covers nullIndexes 10 to 19 

 

By default, ""Tabulate"" will audit the ""rowIndex"" and ""rowCount"" values
in the input to remove duplicates and check that all rows are present.
Use ""-NoAudit"" when this is not desired.

Use ""-MaxPValue maxPValue"", where maxPValue is a double, to ignore rows with obviously bad rows 
");
                throw;
            }
        }
Example #18
0
        ///// <summary>
        /////
        ///// </summary>
        ///// <returns>bool indicating successful tabulate. False indicates the audit failed, in which case the outputFileName will be used
        ///// to create a skip file.</returns>
        //public static bool CreateTabulateReport(DirectoryInfo dirinfo, ICollection<string> inputFilePatternCollection, string outputFileName,
        //    KeepTest<Dictionary<string, string>> globalKeepTest, double maxPValue, bool auditRowIndexValues, bool useStoreyTibsharaniMethod)
        //{
        //    return CreateTabulateReport(dirinfo, inputFilePatternCollection, outputFileName, globalKeepTest, new List<KeepTest<Dictionary<string,string>>>(),
        //        maxPValue, auditRowIndexValues, useStoreyTibsharaniMethod);
        //}


        public static bool CreateTabulateReport(DirectoryInfo dirinfo, string inputFilePattern, string outputFileName,
                                                KeepTest <Dictionary <string, string> > globalKeepTest, List <KeepTest <Dictionary <string, string> > > splitKeepTestList, double maxPValue,
                                                bool auditRowIndexValues, bool useStoreyTibsharaniMethod)
        {
            return(CreateTabulateReport(dirinfo, SpecialFunctions.CreateSingletonList(inputFilePattern), outputFileName, globalKeepTest, splitKeepTestList, maxPValue, auditRowIndexValues, useStoreyTibsharaniMethod));
        }
 new public static KeepTest <Dictionary <string, string> > GetInstance(string inputDirectory,
                                                                       string binarySeqFileName, string hlaFileName, string keepTestName, int merSize, Dictionary <int, string> pidToCaseName)
 {
     //!!!would be nice of classes could parse themselves
     if (keepTestName.StartsWith(KeepEndOfGag.Prefix))
     {
         bool keepIt = bool.Parse(keepTestName.Substring(KeepEndOfGag.Prefix.Length));
         return(KeepEndOfGag.GetInstance(keepIt));
     }
     else if (keepTestName.StartsWith(K1.Prefix))
     {
         int k1 = int.Parse(keepTestName.Substring(K1.Prefix.Length));
         return(K1.GetInstance(k1));
     }
     //else if (keepTestName.StartsWith(K2.Prefix))
     //{
     //    int k2 = int.Parse(keepTestName.Substring(K2.Prefix.Length));
     //    return K2.GetInstance(k2);
     //}
     else if (keepTestName.StartsWith(KeepNonOverlappingAA.Prefix))
     {
         return(KeepNonOverlappingAA.GetInstance());
     }
     else if (keepTestName.StartsWith(KeepGene.Prefix))
     {
         string geneRange = keepTestName.Substring(KeepGene.Prefix.Length);
         return(KeepGene.GetInstance(geneRange));
     }
     else if (keepTestName.StartsWith(KeepSpecificRows.Prefix))
     {
         return(KeepSpecificRows.GetInstance(keepTestName.Substring(KeepSpecificRows.Prefix.Length)));
     }
     else if (keepTestName.StartsWith(KeepSpecificRow.Prefix))
     {
         return(KeepSpecificRow.GetInstance(keepTestName.Substring(KeepSpecificRow.Prefix.Length)));
     }
     else if (keepTestName.StartsWith(KeepSpecificGenes.Prefix))
     {
         return(KeepSpecificGenes.GetInstance(keepTestName.Substring(KeepSpecificGenes.Prefix.Length)));
     }
     else if (keepTestName.StartsWith(KeepOneOfAAPair.Prefix))
     {
         return(KeepOneOfAAPair.GetInstance());
     }
     else if (keepTestName.StartsWith(KeepAllButSamePosition.Prefix))
     {
         return(KeepAllButSamePosition.GetInstance());
     }
     else if (keepTestName.StartsWith(KeepAllButSameDeletion.Prefix))
     {
         return(KeepAllButSameDeletion.GetInstance());
     }
     else if (keepTestName.StartsWith(KeepNonTrivialRows.Prefix))
     {
         return(new KeepNonTrivialRows());
     }
     else if (keepTestName.StartsWith(KeepTestTemp.Prefix))
     {
         return(KeepTestTemp.GetInstance());
     }
     //else if (keepTestName.StartsWith(KeepPollockOneDirection.Prefix))
     //{
     //    return KeepPollockOneDirection.GetInstance(keepTestName.Substring(KeepPollockOneDirection.Prefix.Length));
     //}
     //else if (keepTestName.StartsWith(KeepFisherOneDirection.Prefix))
     //{
     //    return KeepFisherOneDirection.GetInstance(keepTestName.Substring(KeepFisherOneDirection.Prefix.Length));
     //}
     else if (keepTestName.StartsWith(KeepNonRare.Prefix))
     {
         return(KeepNonRare.GetInstance(keepTestName.Substring(KeepNonRare.Prefix.Length)));
     }
     else if (keepTestName.StartsWith(KeepPredictorTargetPairs.Prefix))
     {
         return(KeepPredictorTargetPairs.GetInstance(keepTestName.Substring(KeepPredictorTargetPairs.Prefix.Length)));
     }
     else if (keepTestName.StartsWith("JointGagPolTest"))
     {
         return(And <Dictionary <string, string> > .GetInstance(
                    //KeepRandom<Dictionary<string,string>>.GetInstance(0, 0.001), // how do we make it the same when we count and when we really run through it?
                    KeepOneOfAAPair.GetInstance(),
                    KeepNonOverlappingAA.GetInstance(),
                    KeepSpecificGenes.GetInstance(keepTestName.Substring("JointGagPolTest".Length))));
     }
     else
     {
         return(KeepTest <TRow> .GetInstance(inputDirectory, binarySeqFileName, null, keepTestName, merSize, pidToCaseName));
     }
 }
Example #20
0
        /// <summary>
        ///
        /// </summary>
        /// <returns>bool indicating successful tabulate. False indicates the audit failed, in which case the outputFileName will be used</returns>
        public static bool CreateTabulateReport(DirectoryInfo dirinfo, ICollection <string> inputFilePatternCollection, string outputFileName,
                                                KeepTest <Dictionary <string, string> > globalKeepTest, List <KeepTest <Dictionary <string, string> > > splitKeepTestList, double maxPValue,
                                                bool auditRowIndexValues, bool useStoreyTibsharaniMethod, int numTestsStoreyTibsOverride, bool doLocalTabulation)
        {
            using (TextWriter textWriter = File.CreateText(outputFileName)) // Do this early so that if it fails, well know
            {
                int splitCount = splitKeepTestList.Count + 1;
                List <KeyValuePair <Dictionary <string, string>, double> >[] realRowCollectionToSortArray = new List <KeyValuePair <Dictionary <string, string>, double> > [splitCount];
                //List<double>[] nullValueCollectionToBeSortedArray = new List<double>[splitCount];
                Dictionary <int, List <double> >[] nullValueCollectionToBeSortedArray = new Dictionary <int, List <double> > [splitCount];
                int[] totalPValueCount = new int[splitCount];

                for (int i = 0; i < splitCount; i++)
                {
                    realRowCollectionToSortArray[i] = new List <KeyValuePair <Dictionary <string, string>, double> >(10000);
                    //nullValueCollectionToBeSortedArray[i] = new List<double>(10000);
                    nullValueCollectionToBeSortedArray[i] = new Dictionary <int, List <double> >();
                }

                string headerSoFar = null;

                Set <int> broadRealAndNullIndexSetSoFar = null;

                foreach (string broadInputFilePattern in inputFilePatternCollection)
                {
                    Set <int> narrowRealAndNullIndexSetSetSoFar = Set <int> .GetInstance();

                    foreach (string narrowInputFilePattern in broadInputFilePattern.Split('+'))
                    {
                        Set <int>         realAndNullIndexSet;
                        RowIndexTabulator tabulator = TryCreateTabulateReportInternal(out realAndNullIndexSet, dirinfo, narrowInputFilePattern,
                                                                                      globalKeepTest, splitKeepTestList, maxPValue, auditRowIndexValues, useStoreyTibsharaniMethod,
                                                                                      ref realRowCollectionToSortArray, ref nullValueCollectionToBeSortedArray, ref totalPValueCount, ref headerSoFar, doLocalTabulation);
                        if (!tabulator.IsComplete())
                        {
                            textWriter.WriteLine(tabulator.GetSkipRangeCollection());
                            Console.WriteLine("Not all needed rows were found in {0}.", narrowInputFilePattern);
                            Console.WriteLine("Found rows:\n{0}", tabulator.GetSkipRangeCollection());
                            Console.WriteLine("{0} created as skip file.", outputFileName);
                            return(false);
                        }



                        //Instead of throwing an error, we could filter out the duplicated null indexes
                        Helper.CheckCondition(narrowRealAndNullIndexSetSetSoFar.IntersectionIsEmpty(realAndNullIndexSet),
                                              string.Format("Within inputFilePattern {0}, multiple '+'-connected parts cover the same nullIndex(s), {1}",
                                                            broadInputFilePattern,
                                                            narrowRealAndNullIndexSetSetSoFar.Intersection(realAndNullIndexSet)));

                        narrowRealAndNullIndexSetSetSoFar.AddNewRange(realAndNullIndexSet);
                    }

                    Helper.CheckCondition(!auditRowIndexValues || narrowRealAndNullIndexSetSetSoFar.Contains(-1),
                                          string.Format("The 'null' index -1 for the real data was not seen in {0}", broadInputFilePattern));


                    if (broadRealAndNullIndexSetSoFar == null)
                    {
                        broadRealAndNullIndexSetSoFar = narrowRealAndNullIndexSetSetSoFar;
                    }
                    //else
                    //{
                    //	Helper.CheckCondition(broadRealAndNullIndexSetSoFar.Equals(narrowRealAndNullIndexSetSetSoFar),
                    //		string.Format("The broad inputFilePattern {0} covers a different set of nullIndexes ({1}) than its predecessors ({2})",
                    //		broadInputFilePattern, narrowRealAndNullIndexSetSetSoFar, broadRealAndNullIndexSetSoFar));
                    //}
                }

                double numberOfRandomizationRuns = useStoreyTibsharaniMethod ? 0 : broadRealAndNullIndexSetSoFar.Count - 1;
                Console.WriteLine("Detected {0} randomized runs relative to the number of real runs.", numberOfRandomizationRuns);
                Helper.CheckCondition <InvalidDataException>(useStoreyTibsharaniMethod || numberOfRandomizationRuns > 0, "No randomization runs detected. Did you mean to include a -{0} flag?", Tabulate.STOREY_METHOD_NAME);

                //Compute q-values from p-values (and p-values from test statistic)
                List <KeyValuePair <Dictionary <string, string>, double> > rowAndQValues = new List <KeyValuePair <Dictionary <string, string>, double> >(1000);
                Dictionary <double, double> rowToPvalFromRandomizations = null;
                for (int i = 0; i < splitCount; i++)
                {
                    int numTestsToUse;
                    if (numTestsStoreyTibsOverride != -1)
                    {
                        Console.WriteLine("Using " + numTestsStoreyTibsOverride + " p-values for computation of q-values rather than the observed number (" + totalPValueCount[i] + ")");
                        numTestsToUse = numTestsStoreyTibsOverride;
                    }
                    else
                    {
                        numTestsToUse = totalPValueCount[i];
                    }


                    //List<double> placeFiller = nullValueCollectionToBeSortedArray[i][0];

                    Dictionary <Dictionary <string, string>, double> qValueList;
                    if (useStoreyTibsharaniMethod)
                    {
                        qValueList = SpecialFunctions.ComputeQValuesUseStoreyTibsharani(ref realRowCollectionToSortArray[i], row => row.Value, numTestsToUse)
                                     .ToDictionary(entry => entry.Key.Key, entry => entry.Value);
                    }
                    else if (!doLocalTabulation)
                    {
                        qValueList = SpecialFunctions.ComputeQValuesUseNulls(ref realRowCollectionToSortArray[i],
                                                                             row => row.Value,
                                                                             row => int.Parse(((KeyValuePair <System.Collections.Generic.Dictionary <string, string>, double>)row).Key["groupId"]),
                                                                             row => int.Parse(((KeyValuePair <System.Collections.Generic.Dictionary <string, string>, double>)row).Key["rowIndex"]),
                                                                             ref nullValueCollectionToBeSortedArray[i], numberOfRandomizationRuns, out rowToPvalFromRandomizations, doLocalTabulation)
                                     .ToDictionary(entry => entry.Key.Key, entry => entry.Value);
                    }
                    else//do local tabulation
                    {
                        qValueList = SpecialFunctions.ComputeQValuesUseNulls(ref realRowCollectionToSortArray[i],
                                                                             row => row.Value,
                                                                             row => int.Parse(((KeyValuePair <System.Collections.Generic.Dictionary <string, string>, double>)row).Key["groupId"]),
                                                                             row => int.Parse(((KeyValuePair <System.Collections.Generic.Dictionary <string, string>, double>)row).Key["rowIndex"]),
                                                                             ref nullValueCollectionToBeSortedArray[i], numberOfRandomizationRuns, out rowToPvalFromRandomizations, doLocalTabulation)
                                     .ToDictionary(entry => entry.Key.Key, entry => entry.Value);
                    }

                    //Dictionary<Dictionary<string, string>, double> qValueList =
                    //    (useStoreyTibsharaniMethod ?
                    //        SpecialFunctions.ComputeQValuesUseStoreyTibsharani(ref realRowCollectionToSortArray[i], row => row.Value, numTestsToUse) :
                    //        SpecialFunctions.ComputeQValuesUseNulls(ref realRowCollectionToSortArray[i], row => row.Value,
                    //                ref nullValueCollectionToBeSortedArray[i], numberOfRandomizationRuns,out pValToPvalFromRandomizations))
                    //    .ToDictionary(entry => entry.Key.Key, entry => entry.Value);

                    foreach (KeyValuePair <Dictionary <string, string>, double> rowAndQValue in qValueList)
                    {
                        rowAndQValues.Add(new KeyValuePair <Dictionary <string, string>, double>(rowAndQValue.Key, rowAndQValue.Value));
                    }
                }

                rowAndQValues.Sort((row1, row2) =>
                                   row1.Value == row2.Value ?
                                   AccessPValueFromPhylotreeRow(row1.Key).CompareTo(AccessPValueFromPhylotreeRow(row2.Key)) :
                                   row1.Value.CompareTo(row2.Value));

                //!!!this code is repeated elsewhere
                if (COL_TO_TABULATE.TESTSTATISTIC == _columnToTabulate)
                {
                    Helper.CheckCondition(!useStoreyTibsharaniMethod, "the way its set up now, cannot use TestStatistic column with useStoreyTibshirani");
                    textWriter.WriteLine(Helper.CreateTabString(headerSoFar, "pValFromRandomizations", "qValue"));
                }
                else
                {
                    textWriter.WriteLine(Helper.CreateTabString(headerSoFar, "qValue"));
                }
                //foreach (Dictionary<string, string> row in realRowCollectionToSortArray)
                //{
                //    double qValue = qValueList[row];
                //    textWriter.WriteLine(Helper.CreateTabString(row[""], qValue));
                //}
                foreach (KeyValuePair <Dictionary <string, string>, double> rowAndQValue in rowAndQValues)
                {
                    if (COL_TO_TABULATE.TESTSTATISTIC == _columnToTabulate)
                    {
                        double thisRow = double.Parse(rowAndQValue.Key["rowIndex"]);
                        double thisPvalFromRandomization = rowToPvalFromRandomizations[thisRow];
                        textWriter.WriteLine(Helper.CreateTabString(rowAndQValue.Key[""], thisPvalFromRandomization, rowAndQValue.Value));
                    }
                    else
                    {
                        textWriter.WriteLine(Helper.CreateTabString(rowAndQValue.Key[""], rowAndQValue.Value));
                    }
                }
            }
            return(true);
        }
Example #21
0
        //Similar to the other tabulators, but can work with multiple sets of pValues files
        //!!!would be better if could cut off really bad pValues to save memory
        //!!! also would be nice to have filters
        public static void CreateTabulateReport(ICollection <string> inputFilePatternCollection, string outputFileName,
                                                KeepTest <Dictionary <string, string> > keepTest, double maxPValue, bool auditRowIndexValues)
        {
            //SpecialFunctions.CheckCondition(!File.Exists(outputFileName), "Output file already exists: " + outputFileName);
            using (TextWriter textWriter = File.CreateText(outputFileName)) // Do this early so that if it fails, well know
            {
                List <Dictionary <string, string> > realRowCollectionToSort = new List <Dictionary <string, string> >();
                List <double> nullValueCollectionToBeSorted = new List <double>();

                string headerSoFar = null;

                Set <int> broadRealAndNullIndexSetSoFar = null;

                foreach (string broadInputFilePattern in inputFilePatternCollection)
                {
                    Set <int> narrowRealAndNullIndexSetSetSoFar = Set <int> .GetInstance();

                    foreach (string narrowInputFilePattern in broadInputFilePattern.Split('+'))
                    {
                        Set <int> realAndNullIndexSet =
                            CreateTabulateReportInternal(narrowInputFilePattern, keepTest, maxPValue, auditRowIndexValues,
                                                         ref realRowCollectionToSort, ref nullValueCollectionToBeSorted, ref headerSoFar);

                        //Instead of throwing an error, we could filter out the duplicated null indexes
                        SpecialFunctions.CheckCondition(narrowRealAndNullIndexSetSetSoFar.IntersectionIsEmpty(realAndNullIndexSet),
                                                        string.Format("Within inputFilePattern {0}, multiple '+'-connected parts cover the same nullIndex(s), {1}",
                                                                      broadInputFilePattern,
                                                                      narrowRealAndNullIndexSetSetSoFar.Intersection(realAndNullIndexSet)));

                        narrowRealAndNullIndexSetSetSoFar.AddNewRange(realAndNullIndexSet);
                    }

                    SpecialFunctions.CheckCondition(!auditRowIndexValues || narrowRealAndNullIndexSetSetSoFar.Contains(-1),
                                                    string.Format("The 'null' index -1 for the real data was not seen in {0}", broadInputFilePattern));


                    if (broadRealAndNullIndexSetSoFar == null)
                    {
                        broadRealAndNullIndexSetSoFar = narrowRealAndNullIndexSetSetSoFar;
                    }
                    else
                    {
                        SpecialFunctions.CheckCondition(broadRealAndNullIndexSetSoFar.Equals(narrowRealAndNullIndexSetSetSoFar),
                                                        string.Format("The broad inputFilePattern {0} covers a different set of nullIndexes ({1}) than its predecessors ({2})",
                                                                      broadInputFilePattern, narrowRealAndNullIndexSetSetSoFar, broadRealAndNullIndexSetSoFar));
                    }
                }

                double numberOfRandomizationRuns = broadRealAndNullIndexSetSoFar.Count - 1;
                Console.WriteLine("Detected {0} randomized runs relative to the number of real runs.", numberOfRandomizationRuns);
                Dictionary <Dictionary <string, string>, double> qValueList = SpecialFunctions.ComputeQValues(ref realRowCollectionToSort, AccessPValueFromPhylotreeRow, ref nullValueCollectionToBeSorted, numberOfRandomizationRuns);

                //!!!this code is repeated elsewhere
                textWriter.WriteLine(SpecialFunctions.CreateTabString(headerSoFar, "qValue"));
                foreach (Dictionary <string, string> row in realRowCollectionToSort)
                {
                    double qValue = qValueList[row];
                    textWriter.WriteLine(SpecialFunctions.CreateTabString(row[""], qValue));
                }
            }
        }