protected NullDataCollection(
            NullDataGenerator nullDataGenerator,
            RangeCollection nullIndexRange,
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration,
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration
            //Dictionary<string, Dictionary<string, SufficientStatistics>> predictorVariableToCaseIdToRealNonMissingValue,
            //Dictionary<string, Dictionary<string, SufficientStatistics>> targetVariableToCaseIdToRealNonMissingValue
            )
        {
            Console.WriteLine(nullDataGenerator);
            Name = nullDataGenerator.Name;
            _nullIndexToNullDataGenerator = new Dictionary <int, NullDataGenerator>();
            foreach (int nullIndex in nullIndexRange.Elements)
            {
                int predCount = SpecialFunctions.Count(predictorNameAndCaseIdToNonMissingValueEnumeration);
                int preseed   = ~nullIndex.GetHashCode() ^ predCount.GetHashCode()
                                ^ "NullDataCollection".GetHashCode();

                NullDataGenerator newNullDataGenerator = (NullDataGenerator)nullDataGenerator.Clone();
                newNullDataGenerator.SetPreseed(preseed);
                newNullDataGenerator.SetPredictorNameAndCaseIdToNonMissingValueEnumeration(predictorNameAndCaseIdToNonMissingValueEnumeration);
                newNullDataGenerator.SetTargetNameAndCaseIdToNonMissingValueEnumeration(targetNameAndCaseIdToNonMissingValueEnumeration);
                //newNullDataGenerator.RealPredictorVariableToCaseIdToNonMissingValue = predictorVariableToCaseIdToRealNonMissingValue;
                //newNullDataGenerator.RealTargetVariableToCaseIdToNonMissingValue = targetVariableToCaseIdToRealNonMissingValue;
                _nullIndexToNullDataGenerator.Add(nullIndex, newNullDataGenerator);
            }
        }
示例#2
0
        private static Dictionary <string, int> SufficientStatisticsMapToIntDictionaryMap(Converter <Leaf, SufficientStatistics> leafToStatsMap, IEnumerable <Leaf> fullLeafCollection)
        {
            Dictionary <string, int> result = new Dictionary <string, int>(SpecialFunctions.Count(fullLeafCollection));

            foreach (Leaf leaf in fullLeafCollection)
            {
                SufficientStatistics value = leafToStatsMap(leaf);
                if (!value.IsMissing())
                {
                    result.Add(leaf.CaseName, (int)(BooleanStatistics)value);
                }
            }
            return(result);
        }
        public override Dictionary <string, SufficientStatistics> GenerateRandomMapping(Dictionary <string, SufficientStatistics> realCaseIdToNonMissingValue, ref Random random)
        {
            //!!!!put check in to make sure ISufficientSTatistics is reall BooleanStatistics
            Converter <Leaf, SufficientStatistics> leafToDistnClassFunction = PhyloDDriver.CreateSufficientStatisticsMap(realCaseIdToNonMissingValue);

            PhyloTree tree = _modelScorer.PhyloTree;

            MessageInitializer messageInitializer = MessageInitializerDiscrete.GetInstance(leafToDistnClassFunction, _discreteDistribution, new int[] { 1, 1 }, tree.LeafCollection);

            Score score = _modelScorer.MaximizeLikelihood(messageInitializer);

            double percentNonMissing = (double)tree.CountOfNonMissingLeaves(realCaseIdToNonMissingValue) /
                                       (double)SpecialFunctions.Count(tree.LeafCollection);
            double equilibrium = score.OptimizationParameters[(int)DistributionDiscreteConditional.ParameterIndex.Equilibrium].Value;
            double lambda      = score.OptimizationParameters[(int)DistributionDiscreteConditional.ParameterIndex.Lambda].Value;

            Dictionary <string, BooleanStatistics> randomCaseIdToNonMissingValue = tree.EvolveBinaryTree(equilibrium, lambda, 1 - percentNonMissing, ref random);

            Dictionary <string, SufficientStatistics> converted;

            SpecialFunctions.ConvertDictionaryToBaseClasses(randomCaseIdToNonMissingValue, out converted);

            return(converted);
        }
        public void Run(
            ModelScorer modelScorer,
            PhyloTree phyloTree,
            string predictorSparseFileName,
            string targetSparseFileName,
            string leafDistributionName,
            string nullDataGeneratorName,
            KeepTest <Dictionary <string, string> > keepTest,
            RangeCollection skipRowIndexRangeCollectionOrNull,
            string shortName,
            string outputDirectoryName,
            RangeCollection pieceIndexRangeCollection, int pieceCount,
            RangeCollection nullIndexRangeCollection,
            string optimizerName)
        {
            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();
            Directory.CreateDirectory(outputDirectoryName);


            string outputFileName = string.Format(@"{0}\{1}.{2}.{3}.{4}.{5}.{6}{7}.txt",
                                                  outputDirectoryName, shortName,
                                                  leafDistributionName, nullDataGeneratorName,
                                                  nullIndexRangeCollection,
                                                  pieceCount,
                                                  pieceIndexRangeCollection,
                                                  skipRowIndexRangeCollectionOrNull == null ? "" : ".Skip" + skipRowIndexRangeCollectionOrNull.Count().ToString()
                                                  );

            #region from PhyloTree refactor
            //Dictionary<string, Dictionary<string, bool>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory<bool>(predictorSparseFileName);
            //IEnumerable<Pair<string, Dictionary<string, T>>> targetNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration<T>(targetSparseFileName);

            //NullDataCollection nullDataGenerator =
            //    NullDataCollection.GetInstance(this, modelTester, nullIndexRangeCollection, predictorVariableToCaseIdToRealNonMissingValue);

            //UniversalWorkList<T> workList = UniversalWorkList<T>.GetInstance(
            //    predictorVariableToCaseIdToRealNonMissingValue,
            //    targetNameAndCaseIdToNonMissingValueEnumeration,
            //    nullDataGenerator, nullIndexRangeCollection, keepTest);
            #endregion
            bool speedOverMemory = true;

            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > >
            predictorNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(predictorSparseFileName, speedOverMemory);
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > >
            targetNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(targetSparseFileName, speedOverMemory);

            NullDataCollection nullDataGenerator =
                CreateNullDataGenerator(nullDataGeneratorName, modelScorer, phyloTree, nullIndexRangeCollection,
                                        predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration);

            UniversalWorkList workList = UniversalWorkList.GetInstance(
                predictorNameAndCaseIdToNonMissingValueEnumeration,
                targetNameAndCaseIdToNonMissingValueEnumeration,
                nullDataGenerator, nullIndexRangeCollection, keepTest);

            int workListCount = SpecialFunctions.Count(workList.List());

            int effectiveWorkListCount;
            if (skipRowIndexRangeCollectionOrNull == null)
            {
                effectiveWorkListCount = workListCount;
            }
            else
            {
                effectiveWorkListCount = 0;
                for (int iRowIndex = 0; iRowIndex < workListCount; iRowIndex++)
                {
                    if (!skipRowIndexRangeCollectionOrNull.Contains(iRowIndex))
                    {
                        effectiveWorkListCount++;
                    }
                }
            }
            Console.WriteLine("{0} Total rows. Skipping {1} of them.", workListCount, workListCount - effectiveWorkListCount);

            using (TextWriter textWriter = File.CreateText(outputFileName))
            {
                textWriter.WriteLine(Header);
                int rowIndex          = -1;
                int effectiveRowIndex = -1;

                foreach (RowData rowAndTargetData in workList.List())
                {
                    //!!!make all these parameters and the calculation a class
                    ++rowIndex;
                    Debug.Assert(rowIndex < workListCount); // real assert

                    if (skipRowIndexRangeCollectionOrNull == null || !skipRowIndexRangeCollectionOrNull.Contains(rowIndex))
                    {
                        ++effectiveRowIndex;

                        int workIndex = ExtractWorkIndex(effectiveRowIndex, pieceCount, effectiveWorkListCount);

                        if (pieceIndexRangeCollection.Contains(workIndex))
                        {
                            Debug.WriteLine("WorkItemIndex " + rowIndex.ToString());
                            string reportLine;
                            try
                            {
                                reportLine =
                                    CreateReportLine(modelScorer, phyloTree, rowAndTargetData, workList, rowIndex, workListCount, workIndex);
                            }
                            catch (OutOfMemoryException)
                            {
                                Console.WriteLine("OUT OF MEMORY!! Clearing cache and trying to recover where we left off.");
                                modelScorer.ClearCache();
                                reportLine =
                                    CreateReportLine(modelScorer, phyloTree, rowAndTargetData, workList, rowIndex, workListCount, workIndex);
                            }

                            textWriter.WriteLine(reportLine);
                            textWriter.Flush();
                        }
                    }
                }
            }
            stopwatch.Stop();
            Console.WriteLine("Running time: " + stopwatch.Elapsed);
        }