protected NullDataCollection(
            NullDataGenerator nullDataGenerator,
            RangeCollection nullIndexRange,
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration,
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration
            //Dictionary<string, Dictionary<string, SufficientStatistics>> predictorVariableToCaseIdToRealNonMissingValue,
            //Dictionary<string, Dictionary<string, SufficientStatistics>> targetVariableToCaseIdToRealNonMissingValue
            )
        {
            Console.WriteLine(nullDataGenerator);
            Name = nullDataGenerator.Name;
            _nullIndexToNullDataGenerator = new Dictionary <int, NullDataGenerator>();
            foreach (int nullIndex in nullIndexRange.Elements)
            {
                int predCount = SpecialFunctions.Count(predictorNameAndCaseIdToNonMissingValueEnumeration);
                int preseed   = ~nullIndex.GetHashCode() ^ predCount.GetHashCode()
                                ^ "NullDataCollection".GetHashCode();

                NullDataGenerator newNullDataGenerator = (NullDataGenerator)nullDataGenerator.Clone();
                newNullDataGenerator.SetPreseed(preseed);
                newNullDataGenerator.SetPredictorNameAndCaseIdToNonMissingValueEnumeration(predictorNameAndCaseIdToNonMissingValueEnumeration);
                newNullDataGenerator.SetTargetNameAndCaseIdToNonMissingValueEnumeration(targetNameAndCaseIdToNonMissingValueEnumeration);
                //newNullDataGenerator.RealPredictorVariableToCaseIdToNonMissingValue = predictorVariableToCaseIdToRealNonMissingValue;
                //newNullDataGenerator.RealTargetVariableToCaseIdToNonMissingValue = targetVariableToCaseIdToRealNonMissingValue;
                _nullIndexToNullDataGenerator.Add(nullIndex, newNullDataGenerator);
            }
        }
        public static RangeCollection Parse(string ranges, bool mergeOverlappingRanges)
        {
            RangeCollection aRangeCollection = GetInstance();

            int lastBegin = int.MaxValue;
            int lastEnd   = int.MinValue;

            string[] contiguousRanges = ranges.Split(',');
            foreach (string r in contiguousRanges)
            {
                string range = r;   // only do this cuz I need to be able to reassign it later and you can't with foreach iterators.

                bool beginIsNegative = false;
                bool endIsNegative   = false;

                if (range[0] == '-')
                {
                    beginIsNegative = true;
                    range           = range.Substring(1);
                }
                if (range.IndexOf("--") > 0)
                {
                    endIsNegative = true;
                    range         = range.Replace("--", "-");
                }

                string[] rangeBeginAndEnd = range.Split('-');

                int begin = int.Parse(rangeBeginAndEnd[0].Trim());
                if (beginIsNegative)
                {
                    begin *= -1;
                }

                int end = int.Parse(rangeBeginAndEnd[1].Trim());
                if (endIsNegative)
                {
                    end *= -1;
                }

                SpecialFunctions.CheckCondition(end > lastEnd, "Illformed Range. " + ranges);
                SpecialFunctions.CheckCondition(end >= begin, range + " is not a valid range. End is before begin!");

                if (begin > lastEnd + 1 || !mergeOverlappingRanges) // this is the beginning of a new range
                {
                    aRangeCollection.StartItems.Add(begin);
                    aRangeCollection.ItemToLength.Add(begin, end - begin + 1);
                }
                else // we're really just extending the previous range
                {
                    aRangeCollection.ItemToLength[lastBegin] = end - lastBegin + 1;
                }

                lastBegin = begin;
                lastEnd   = end;
            }

            return(aRangeCollection);
        }
 public static NullDataCollection GetInstance(
     NullDataGenerator nullDataGenerator,
     RangeCollection nullIndexRange,
     IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration,
     IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration)
 {
     return(new NullDataCollection(nullDataGenerator, nullIndexRange, predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration));
 }
        static public void Test()
        {
            RangeCollection aRangeCollection = RangeCollection.GetInstance();

            aRangeCollection.Add(0);
            SpecialFunctions.CheckCondition("0-0" == aRangeCollection.ToString());
            aRangeCollection.Add(1);
            SpecialFunctions.CheckCondition("0-1" == aRangeCollection.ToString());
            aRangeCollection.Add(4);
            SpecialFunctions.CheckCondition("0-1,4-4" == aRangeCollection.ToString());
            aRangeCollection.Add(5);
            SpecialFunctions.CheckCondition("0-1,4-5" == aRangeCollection.ToString());
            aRangeCollection.Add(7);
            SpecialFunctions.CheckCondition("0-1,4-5,7-7" == aRangeCollection.ToString());
            aRangeCollection.Add(2);
            SpecialFunctions.CheckCondition("0-2,4-5,7-7" == aRangeCollection.ToString());
            aRangeCollection.Add(3);
            SpecialFunctions.CheckCondition("0-5,7-7" == aRangeCollection.ToString());
            aRangeCollection.Add(6);
            SpecialFunctions.CheckCondition("0-7" == aRangeCollection.ToString());
            aRangeCollection.Add(-10);
            SpecialFunctions.CheckCondition("-10--10,0-7" == aRangeCollection.ToString());
            aRangeCollection.Add(-5);
            SpecialFunctions.CheckCondition("-10--10,-5--5,0-7" == aRangeCollection.ToString());

            aRangeCollection = RangeCollection.Parse("1-5,7-12,13-14");

            Console.WriteLine(aRangeCollection);
            Console.WriteLine(aRangeCollection.Contains(3));
            Console.WriteLine(aRangeCollection.Contains(12));
            Console.WriteLine(aRangeCollection.Contains(13));
            Console.WriteLine(aRangeCollection.Contains(6));

            aRangeCollection = RangeCollection.Parse("-10--5,-1-14");

            Console.WriteLine(aRangeCollection);
            Console.WriteLine(aRangeCollection.Contains(-12));
            Console.WriteLine(aRangeCollection.Contains(-10));
            Console.WriteLine(aRangeCollection.Contains(-7));
            Console.WriteLine(aRangeCollection.Contains(-5));
            Console.WriteLine(aRangeCollection.Contains(-4));
            Console.WriteLine(aRangeCollection.Contains(0));
            Console.WriteLine(aRangeCollection.Contains(1));
            Console.WriteLine(aRangeCollection.Contains(-2));

            Console.WriteLine("Count: " + aRangeCollection.Count());
            Console.WriteLine("Count -5 to 2: " + aRangeCollection.Count(-5, 2));

            RangeCollectionCollection rcc = RangeCollectionCollection.GetInstance(aRangeCollection);

            Console.WriteLine(rcc);
            Console.WriteLine(rcc.GetContainingRangeCollection(-12));
            Console.WriteLine(rcc.GetContainingRangeCollection(-10));
            Console.WriteLine(rcc.GetContainingRangeCollection(-5));
            Console.WriteLine(rcc.GetContainingRangeCollection(3));
            Console.WriteLine(rcc.GetContainingRangeCollection(15));
        }
        public static UniversalWorkList GetInstance(
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration,
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration,
            //Dictionary<string, Dictionary<string, SufficientStatistics>> predictorVariableToCaseIdToRealNonMissingValue,
            NullDataCollection nullDataCollection,
            RangeCollection nullIndexRange,
            KeepTest <Dictionary <string, string> > keepTest
            )
        {
            //SpecialFunctions.CheckCondition(-1 <= nullIndexStart && nullIndexStart <= nullIndexLast);

            bool enumeratePairs = keepTest is KeepPredictorTargetPairs;

            if (keepTest is KeepCollection <Dictionary <string, string> > )
            {
                foreach (KeepTest <Dictionary <string, string> > keepTestInCollection in ((KeepCollection <Dictionary <string, string> >)keepTest).KeepTestCollection)
                {
                    if (keepTestInCollection is KeepPredictorTargetPairs)
                    {
                        enumeratePairs = true;
                    }
                }
            }
            UniversalWorkList aUniversalWorkList;

            if (enumeratePairs)
            {
                aUniversalWorkList = UniversalWorkListPredTargPairs.GetInstance(
                    predictorNameAndCaseIdToNonMissingValueEnumeration,
                    targetNameAndCaseIdToNonMissingValueEnumeration,
                    nullDataCollection,
                    nullIndexRange,
                    keepTest
                    );
            }
            else
            {
                aUniversalWorkList = new UniversalWorkList(
                    predictorNameAndCaseIdToNonMissingValueEnumeration,
                    targetNameAndCaseIdToNonMissingValueEnumeration,
                    nullDataCollection,
                    nullIndexRange,
                    keepTest
                    );
            }
//            aUniversalWorkList._predictorNameAndCaseIdToNonMissingValueEnumeration = predictorNameAndCaseIdToNonMissingValueEnumeration;
//            aUniversalWorkList._targetNameAndCaseIdToNonMissingValueEnumeration = targetNameAndCaseIdToNonMissingValueEnumeration;
////          aUniversalWorkList._targetVariables = targetVariables;
////          aUniversalWorkList._predictorVariableToCaseIdToNonMissingValue = predictorVariableToCaseIdToRealNonMissingValue;
//            aUniversalWorkList._keepTest = keepTest;
//            aUniversalWorkList._nullDataCollection = nullDataCollection;
//            aUniversalWorkList._nullIndexRange = nullIndexRange;

            return(aUniversalWorkList);
        }
 protected NullDataCollection CreateNullDataGenerator(
     string nullDataGeneratorName,
     ModelScorer modelScorer,
     PhyloTree phyloTree,
     RangeCollection nullIndexRangeCollection,
     IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration,
     IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration)
 {
     return(NullDataCollection.GetInstance(
                NullDataGenerator.GetInstance(nullDataGeneratorName, modelScorer, phyloTree, this),
                nullIndexRangeCollection,
                predictorNameAndCaseIdToNonMissingValueEnumeration,
                targetNameAndCaseIdToNonMissingValueEnumeration));
 }
        private static bool ProcessRow(RangeCollection skipRowIndexRangeCollectionOrNull, RangeCollection pieceIndexRangeCollection, int rowIndex, int workIndex)
        {
            bool doTheWork = true;

            if (skipRowIndexRangeCollectionOrNull != null && skipRowIndexRangeCollectionOrNull.Contains(rowIndex))
            {
                doTheWork = false;
            }

            if (!pieceIndexRangeCollection.Contains(workIndex))
            {
                doTheWork = false;
            }
            return(doTheWork);
        }
        protected UniversalWorkList(
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration,
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration,
            NullDataCollection nullDataCollection,
            RangeCollection nullIndexRange,
            KeepTest <Dictionary <string, string> > keepTest
            )
        {
            _predictorNameAndCaseIdToNonMissingValueEnumeration = predictorNameAndCaseIdToNonMissingValueEnumeration;
            _targetNameAndCaseIdToNonMissingValueEnumeration    = targetNameAndCaseIdToNonMissingValueEnumeration;
            _keepTest           = keepTest;
            _nullDataCollection = nullDataCollection;
            _nullIndexRange     = nullIndexRange;

            //Console.WriteLine("In UniversalWorkList constructor.");
        }
 protected UniversalWorkListPredTargPairs(
     IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration,
     IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration,
     NullDataCollection nullDataCollection,
     RangeCollection nullIndexRange,
     KeepTest <Dictionary <string, string> > keepTest
     )
     :
     base(
         predictorNameAndCaseIdToNonMissingValueEnumeration,
         targetNameAndCaseIdToNonMissingValueEnumeration,
         nullDataCollection, nullIndexRange,
         keepTest
         )
 {
     //Console.WriteLine("In UniversalWorkListPredTargPairs constructor.");
 }
        new public static UniversalWorkList GetInstance(
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration,
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration,
            //Dictionary<string, Dictionary<string, SufficientStatistics>> predictorVariableToCaseIdToRealNonMissingValue,
            NullDataCollection nullDataCollection,
            RangeCollection nullIndexRange,
            KeepTest <Dictionary <string, string> > keepTest
            )
        {
            //SpecialFunctions.CheckCondition(-1 <= nullIndexStart && nullIndexStart <= nullIndexLast);
            UniversalWorkList aUniversalWorkList = new UniversalWorkListPredTargPairs(
                predictorNameAndCaseIdToNonMissingValueEnumeration,
                targetNameAndCaseIdToNonMissingValueEnumeration,
                nullDataCollection,
                nullIndexRange,
                keepTest
                );

            return(aUniversalWorkList);
        }
        public void Run(
            ModelScorer modelScorer,
            PhyloTree phyloTree,
            string predictorSparseFileName,
            string targetSparseFileName,
            string leafDistributionName,
            string nullDataGeneratorName,
            KeepTest <Dictionary <string, string> > keepTest,
            RangeCollection skipRowIndexRangeCollectionOrNull,
            string shortName,
            string outputDirectoryName,
            RangeCollection pieceIndexRangeCollection, int pieceCount,
            RangeCollection nullIndexRangeCollection,
            string optimizerName)
        {
            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();
            Directory.CreateDirectory(outputDirectoryName);


            string outputFileName = string.Format(@"{0}\{1}.{2}.{3}.{4}.{5}.{6}{7}.txt",
                                                  outputDirectoryName, shortName,
                                                  leafDistributionName, nullDataGeneratorName,
                                                  nullIndexRangeCollection,
                                                  pieceCount,
                                                  pieceIndexRangeCollection,
                                                  skipRowIndexRangeCollectionOrNull == null ? "" : ".Skip" + skipRowIndexRangeCollectionOrNull.Count().ToString()
                                                  );

            #region from PhyloTree refactor
            //Dictionary<string, Dictionary<string, bool>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory<bool>(predictorSparseFileName);
            //IEnumerable<Pair<string, Dictionary<string, T>>> targetNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration<T>(targetSparseFileName);

            //NullDataCollection nullDataGenerator =
            //    NullDataCollection.GetInstance(this, modelTester, nullIndexRangeCollection, predictorVariableToCaseIdToRealNonMissingValue);

            //UniversalWorkList<T> workList = UniversalWorkList<T>.GetInstance(
            //    predictorVariableToCaseIdToRealNonMissingValue,
            //    targetNameAndCaseIdToNonMissingValueEnumeration,
            //    nullDataGenerator, nullIndexRangeCollection, keepTest);
            #endregion
            bool speedOverMemory = true;

            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > >
            predictorNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(predictorSparseFileName, speedOverMemory);
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > >
            targetNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(targetSparseFileName, speedOverMemory);

            NullDataCollection nullDataGenerator =
                CreateNullDataGenerator(nullDataGeneratorName, modelScorer, phyloTree, nullIndexRangeCollection,
                                        predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration);

            UniversalWorkList workList = UniversalWorkList.GetInstance(
                predictorNameAndCaseIdToNonMissingValueEnumeration,
                targetNameAndCaseIdToNonMissingValueEnumeration,
                nullDataGenerator, nullIndexRangeCollection, keepTest);

            int workListCount = SpecialFunctions.Count(workList.List());

            int effectiveWorkListCount;
            if (skipRowIndexRangeCollectionOrNull == null)
            {
                effectiveWorkListCount = workListCount;
            }
            else
            {
                effectiveWorkListCount = 0;
                for (int iRowIndex = 0; iRowIndex < workListCount; iRowIndex++)
                {
                    if (!skipRowIndexRangeCollectionOrNull.Contains(iRowIndex))
                    {
                        effectiveWorkListCount++;
                    }
                }
            }
            Console.WriteLine("{0} Total rows. Skipping {1} of them.", workListCount, workListCount - effectiveWorkListCount);

            using (TextWriter textWriter = File.CreateText(outputFileName))
            {
                textWriter.WriteLine(Header);
                int rowIndex          = -1;
                int effectiveRowIndex = -1;

                foreach (RowData rowAndTargetData in workList.List())
                {
                    //!!!make all these parameters and the calculation a class
                    ++rowIndex;
                    Debug.Assert(rowIndex < workListCount); // real assert

                    if (skipRowIndexRangeCollectionOrNull == null || !skipRowIndexRangeCollectionOrNull.Contains(rowIndex))
                    {
                        ++effectiveRowIndex;

                        int workIndex = ExtractWorkIndex(effectiveRowIndex, pieceCount, effectiveWorkListCount);

                        if (pieceIndexRangeCollection.Contains(workIndex))
                        {
                            Debug.WriteLine("WorkItemIndex " + rowIndex.ToString());
                            string reportLine;
                            try
                            {
                                reportLine =
                                    CreateReportLine(modelScorer, phyloTree, rowAndTargetData, workList, rowIndex, workListCount, workIndex);
                            }
                            catch (OutOfMemoryException)
                            {
                                Console.WriteLine("OUT OF MEMORY!! Clearing cache and trying to recover where we left off.");
                                modelScorer.ClearCache();
                                reportLine =
                                    CreateReportLine(modelScorer, phyloTree, rowAndTargetData, workList, rowIndex, workListCount, workIndex);
                            }

                            textWriter.WriteLine(reportLine);
                            textWriter.Flush();
                        }
                    }
                }
            }
            stopwatch.Stop();
            Console.WriteLine("Running time: " + stopwatch.Elapsed);
        }
        public void ScoreTree(
            ModelScorer modelScorer,
            PhyloTree phyloTree,
            string predictorSparseFileName,
            string targetSparseFileName,
            string predictorVariableName,
            string targetVariableName,
            double[] nullModelArgs,
            double[] altModelArgs)
        {
            //Dictionary<string, Dictionary<string, SufficientStatistics>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory(predictorSparseFileName);
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration(predictorSparseFileName);
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration    = LoadSparseFileEnumeration(targetSparseFileName);

            RangeCollection    nullIndexRangeCollection = RangeCollection.GetInstance(-1, -1);
            NullDataCollection nullDataGenerator        =
                CreateNullDataGenerator("PredictorPermutation", modelScorer, phyloTree, nullIndexRangeCollection,
                                        predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration);

            UniversalWorkList workList = UniversalWorkList.GetInstance(
                predictorNameAndCaseIdToNonMissingValueEnumeration,
                targetNameAndCaseIdToNonMissingValueEnumeration,
                //targetNameAndCaseIdToNonMissingValueEnumeration,
                nullDataGenerator, nullIndexRangeCollection, AlwaysKeep <Dictionary <string, string> > .GetInstance());


            foreach (RowData rowAndTargetData in workList.List())
            {
                if (rowAndTargetData.Row[PhyloTree.PredictorVariableColumnName] == predictorVariableName &&
                    rowAndTargetData.Row[PhyloTree.TargetVariableColumnName] == targetVariableName)
                {
                    Dictionary <string, SufficientStatistics> caseIdToNonNullPredictorValue = rowAndTargetData.PredictorData;//workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(-1, predictorVariableName);
                    Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData;

                    Converter <Leaf, SufficientStatistics> targetDistributionMap = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue);
                    Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonNullPredictorValue);
                    Converter <Leaf, SufficientStatistics> altDistributionMap = CreateAlternativeSufficientStatisticsMap(predictorDistributionClassFunction, targetDistributionMap);
                    double                    logLikelihood;
                    Score                     scoreIndTarget, scoreIndPredictor, scoreAlt;
                    MessageInitializer        messageInitializer;
                    OptimizationParameterList nullParams = NullModelDistribution.GetParameters(nullModelArgs);
                    OptimizationParameterList altParams  = AltModelDistribution.GetParameters(altModelArgs);

                    Console.WriteLine(SpecialFunctions.CreateTabString("Variable", nullParams.ToStringHeader(), "LogL"));
                    messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionMap, NullModelDistribution);
                    logLikelihood      = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams);
                    scoreIndTarget     = Score.GetInstance(logLikelihood, nullParams);
                    Console.WriteLine("Target\t" + scoreIndTarget);

                    messageInitializer = modelScorer.CreateMessageInitializer(targetDistributionMap, predictorDistributionClassFunction, NullModelDistribution);
                    logLikelihood      = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams);
                    modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams);
                    scoreIndPredictor = Score.GetInstance(logLikelihood, nullParams);
                    Console.WriteLine("Predictor\t" + scoreIndPredictor);

                    Console.WriteLine("\n" + SpecialFunctions.CreateTabString("Variable", altParams.ToStringHeader(), "LogL"));
                    messageInitializer = modelScorer.CreateMessageInitializer(null, altDistributionMap, AltModelDistribution);
                    logLikelihood      = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, altParams);
                    scoreAlt           = Score.GetInstance(logLikelihood, altParams);
                    Console.WriteLine(SpecialFunctions.CreateTabString(AltModelDistribution, scoreAlt));
                }
            }
        }