public static UniversalWorkList GetInstance(
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration,
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration,
            //Dictionary<string, Dictionary<string, SufficientStatistics>> predictorVariableToCaseIdToRealNonMissingValue,
            NullDataCollection nullDataCollection,
            RangeCollection nullIndexRange,
            KeepTest <Dictionary <string, string> > keepTest
            )
        {
            //SpecialFunctions.CheckCondition(-1 <= nullIndexStart && nullIndexStart <= nullIndexLast);

            bool enumeratePairs = keepTest is KeepPredictorTargetPairs;

            if (keepTest is KeepCollection <Dictionary <string, string> > )
            {
                foreach (KeepTest <Dictionary <string, string> > keepTestInCollection in ((KeepCollection <Dictionary <string, string> >)keepTest).KeepTestCollection)
                {
                    if (keepTestInCollection is KeepPredictorTargetPairs)
                    {
                        enumeratePairs = true;
                    }
                }
            }
            UniversalWorkList aUniversalWorkList;

            if (enumeratePairs)
            {
                aUniversalWorkList = UniversalWorkListPredTargPairs.GetInstance(
                    predictorNameAndCaseIdToNonMissingValueEnumeration,
                    targetNameAndCaseIdToNonMissingValueEnumeration,
                    nullDataCollection,
                    nullIndexRange,
                    keepTest
                    );
            }
            else
            {
                aUniversalWorkList = new UniversalWorkList(
                    predictorNameAndCaseIdToNonMissingValueEnumeration,
                    targetNameAndCaseIdToNonMissingValueEnumeration,
                    nullDataCollection,
                    nullIndexRange,
                    keepTest
                    );
            }
//            aUniversalWorkList._predictorNameAndCaseIdToNonMissingValueEnumeration = predictorNameAndCaseIdToNonMissingValueEnumeration;
//            aUniversalWorkList._targetNameAndCaseIdToNonMissingValueEnumeration = targetNameAndCaseIdToNonMissingValueEnumeration;
////          aUniversalWorkList._targetVariables = targetVariables;
////          aUniversalWorkList._predictorVariableToCaseIdToNonMissingValue = predictorVariableToCaseIdToRealNonMissingValue;
//            aUniversalWorkList._keepTest = keepTest;
//            aUniversalWorkList._nullDataCollection = nullDataCollection;
//            aUniversalWorkList._nullIndexRange = nullIndexRange;

            return(aUniversalWorkList);
        }
 protected NullDataCollection CreateNullDataGenerator(
     string nullDataGeneratorName,
     ModelScorer modelScorer,
     PhyloTree phyloTree,
     RangeCollection nullIndexRangeCollection,
     IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration,
     IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration)
 {
     return(NullDataCollection.GetInstance(
                NullDataGenerator.GetInstance(nullDataGeneratorName, modelScorer, phyloTree, this),
                nullIndexRangeCollection,
                predictorNameAndCaseIdToNonMissingValueEnumeration,
                targetNameAndCaseIdToNonMissingValueEnumeration));
 }
        protected UniversalWorkList(
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration,
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration,
            NullDataCollection nullDataCollection,
            RangeCollection nullIndexRange,
            KeepTest <Dictionary <string, string> > keepTest
            )
        {
            _predictorNameAndCaseIdToNonMissingValueEnumeration = predictorNameAndCaseIdToNonMissingValueEnumeration;
            _targetNameAndCaseIdToNonMissingValueEnumeration    = targetNameAndCaseIdToNonMissingValueEnumeration;
            _keepTest           = keepTest;
            _nullDataCollection = nullDataCollection;
            _nullIndexRange     = nullIndexRange;

            //Console.WriteLine("In UniversalWorkList constructor.");
        }
 protected UniversalWorkListPredTargPairs(
     IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration,
     IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration,
     NullDataCollection nullDataCollection,
     RangeCollection nullIndexRange,
     KeepTest <Dictionary <string, string> > keepTest
     )
     :
     base(
         predictorNameAndCaseIdToNonMissingValueEnumeration,
         targetNameAndCaseIdToNonMissingValueEnumeration,
         nullDataCollection, nullIndexRange,
         keepTest
         )
 {
     //Console.WriteLine("In UniversalWorkListPredTargPairs constructor.");
 }
        new public static UniversalWorkList GetInstance(
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration,
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration,
            //Dictionary<string, Dictionary<string, SufficientStatistics>> predictorVariableToCaseIdToRealNonMissingValue,
            NullDataCollection nullDataCollection,
            RangeCollection nullIndexRange,
            KeepTest <Dictionary <string, string> > keepTest
            )
        {
            //SpecialFunctions.CheckCondition(-1 <= nullIndexStart && nullIndexStart <= nullIndexLast);
            UniversalWorkList aUniversalWorkList = new UniversalWorkListPredTargPairs(
                predictorNameAndCaseIdToNonMissingValueEnumeration,
                targetNameAndCaseIdToNonMissingValueEnumeration,
                nullDataCollection,
                nullIndexRange,
                keepTest
                );

            return(aUniversalWorkList);
        }
        public void Run(
            ModelScorer modelScorer,
            PhyloTree phyloTree,
            string predictorSparseFileName,
            string targetSparseFileName,
            string leafDistributionName,
            string nullDataGeneratorName,
            KeepTest <Dictionary <string, string> > keepTest,
            RangeCollection skipRowIndexRangeCollectionOrNull,
            string shortName,
            string outputDirectoryName,
            RangeCollection pieceIndexRangeCollection, int pieceCount,
            RangeCollection nullIndexRangeCollection,
            string optimizerName)
        {
            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();
            Directory.CreateDirectory(outputDirectoryName);


            string outputFileName = string.Format(@"{0}\{1}.{2}.{3}.{4}.{5}.{6}{7}.txt",
                                                  outputDirectoryName, shortName,
                                                  leafDistributionName, nullDataGeneratorName,
                                                  nullIndexRangeCollection,
                                                  pieceCount,
                                                  pieceIndexRangeCollection,
                                                  skipRowIndexRangeCollectionOrNull == null ? "" : ".Skip" + skipRowIndexRangeCollectionOrNull.Count().ToString()
                                                  );

            #region from PhyloTree refactor
            //Dictionary<string, Dictionary<string, bool>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory<bool>(predictorSparseFileName);
            //IEnumerable<Pair<string, Dictionary<string, T>>> targetNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration<T>(targetSparseFileName);

            //NullDataCollection nullDataGenerator =
            //    NullDataCollection.GetInstance(this, modelTester, nullIndexRangeCollection, predictorVariableToCaseIdToRealNonMissingValue);

            //UniversalWorkList<T> workList = UniversalWorkList<T>.GetInstance(
            //    predictorVariableToCaseIdToRealNonMissingValue,
            //    targetNameAndCaseIdToNonMissingValueEnumeration,
            //    nullDataGenerator, nullIndexRangeCollection, keepTest);
            #endregion
            bool speedOverMemory = true;

            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > >
            predictorNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(predictorSparseFileName, speedOverMemory);
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > >
            targetNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(targetSparseFileName, speedOverMemory);

            NullDataCollection nullDataGenerator =
                CreateNullDataGenerator(nullDataGeneratorName, modelScorer, phyloTree, nullIndexRangeCollection,
                                        predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration);

            UniversalWorkList workList = UniversalWorkList.GetInstance(
                predictorNameAndCaseIdToNonMissingValueEnumeration,
                targetNameAndCaseIdToNonMissingValueEnumeration,
                nullDataGenerator, nullIndexRangeCollection, keepTest);

            int workListCount = SpecialFunctions.Count(workList.List());

            int effectiveWorkListCount;
            if (skipRowIndexRangeCollectionOrNull == null)
            {
                effectiveWorkListCount = workListCount;
            }
            else
            {
                effectiveWorkListCount = 0;
                for (int iRowIndex = 0; iRowIndex < workListCount; iRowIndex++)
                {
                    if (!skipRowIndexRangeCollectionOrNull.Contains(iRowIndex))
                    {
                        effectiveWorkListCount++;
                    }
                }
            }
            Console.WriteLine("{0} Total rows. Skipping {1} of them.", workListCount, workListCount - effectiveWorkListCount);

            using (TextWriter textWriter = File.CreateText(outputFileName))
            {
                textWriter.WriteLine(Header);
                int rowIndex          = -1;
                int effectiveRowIndex = -1;

                foreach (RowData rowAndTargetData in workList.List())
                {
                    //!!!make all these parameters and the calculation a class
                    ++rowIndex;
                    Debug.Assert(rowIndex < workListCount); // real assert

                    if (skipRowIndexRangeCollectionOrNull == null || !skipRowIndexRangeCollectionOrNull.Contains(rowIndex))
                    {
                        ++effectiveRowIndex;

                        int workIndex = ExtractWorkIndex(effectiveRowIndex, pieceCount, effectiveWorkListCount);

                        if (pieceIndexRangeCollection.Contains(workIndex))
                        {
                            Debug.WriteLine("WorkItemIndex " + rowIndex.ToString());
                            string reportLine;
                            try
                            {
                                reportLine =
                                    CreateReportLine(modelScorer, phyloTree, rowAndTargetData, workList, rowIndex, workListCount, workIndex);
                            }
                            catch (OutOfMemoryException)
                            {
                                Console.WriteLine("OUT OF MEMORY!! Clearing cache and trying to recover where we left off.");
                                modelScorer.ClearCache();
                                reportLine =
                                    CreateReportLine(modelScorer, phyloTree, rowAndTargetData, workList, rowIndex, workListCount, workIndex);
                            }

                            textWriter.WriteLine(reportLine);
                            textWriter.Flush();
                        }
                    }
                }
            }
            stopwatch.Stop();
            Console.WriteLine("Running time: " + stopwatch.Elapsed);
        }
        public void ScoreTree(
            ModelScorer modelScorer,
            PhyloTree phyloTree,
            string predictorSparseFileName,
            string targetSparseFileName,
            string predictorVariableName,
            string targetVariableName,
            double[] nullModelArgs,
            double[] altModelArgs)
        {
            //Dictionary<string, Dictionary<string, SufficientStatistics>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory(predictorSparseFileName);
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration(predictorSparseFileName);
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration    = LoadSparseFileEnumeration(targetSparseFileName);

            RangeCollection    nullIndexRangeCollection = RangeCollection.GetInstance(-1, -1);
            NullDataCollection nullDataGenerator        =
                CreateNullDataGenerator("PredictorPermutation", modelScorer, phyloTree, nullIndexRangeCollection,
                                        predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration);

            UniversalWorkList workList = UniversalWorkList.GetInstance(
                predictorNameAndCaseIdToNonMissingValueEnumeration,
                targetNameAndCaseIdToNonMissingValueEnumeration,
                //targetNameAndCaseIdToNonMissingValueEnumeration,
                nullDataGenerator, nullIndexRangeCollection, AlwaysKeep <Dictionary <string, string> > .GetInstance());


            foreach (RowData rowAndTargetData in workList.List())
            {
                if (rowAndTargetData.Row[PhyloTree.PredictorVariableColumnName] == predictorVariableName &&
                    rowAndTargetData.Row[PhyloTree.TargetVariableColumnName] == targetVariableName)
                {
                    Dictionary <string, SufficientStatistics> caseIdToNonNullPredictorValue = rowAndTargetData.PredictorData;//workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(-1, predictorVariableName);
                    Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData;

                    Converter <Leaf, SufficientStatistics> targetDistributionMap = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue);
                    Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonNullPredictorValue);
                    Converter <Leaf, SufficientStatistics> altDistributionMap = CreateAlternativeSufficientStatisticsMap(predictorDistributionClassFunction, targetDistributionMap);
                    double                    logLikelihood;
                    Score                     scoreIndTarget, scoreIndPredictor, scoreAlt;
                    MessageInitializer        messageInitializer;
                    OptimizationParameterList nullParams = NullModelDistribution.GetParameters(nullModelArgs);
                    OptimizationParameterList altParams  = AltModelDistribution.GetParameters(altModelArgs);

                    Console.WriteLine(SpecialFunctions.CreateTabString("Variable", nullParams.ToStringHeader(), "LogL"));
                    messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionMap, NullModelDistribution);
                    logLikelihood      = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams);
                    scoreIndTarget     = Score.GetInstance(logLikelihood, nullParams);
                    Console.WriteLine("Target\t" + scoreIndTarget);

                    messageInitializer = modelScorer.CreateMessageInitializer(targetDistributionMap, predictorDistributionClassFunction, NullModelDistribution);
                    logLikelihood      = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams);
                    modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams);
                    scoreIndPredictor = Score.GetInstance(logLikelihood, nullParams);
                    Console.WriteLine("Predictor\t" + scoreIndPredictor);

                    Console.WriteLine("\n" + SpecialFunctions.CreateTabString("Variable", altParams.ToStringHeader(), "LogL"));
                    messageInitializer = modelScorer.CreateMessageInitializer(null, altDistributionMap, AltModelDistribution);
                    logLikelihood      = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, altParams);
                    scoreAlt           = Score.GetInstance(logLikelihood, altParams);
                    Console.WriteLine(SpecialFunctions.CreateTabString(AltModelDistribution, scoreAlt));
                }
            }
        }