protected NullDataCollection( NullDataGenerator nullDataGenerator, RangeCollection nullIndexRange, IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration, IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration //Dictionary<string, Dictionary<string, SufficientStatistics>> predictorVariableToCaseIdToRealNonMissingValue, //Dictionary<string, Dictionary<string, SufficientStatistics>> targetVariableToCaseIdToRealNonMissingValue ) { Console.WriteLine(nullDataGenerator); Name = nullDataGenerator.Name; _nullIndexToNullDataGenerator = new Dictionary <int, NullDataGenerator>(); foreach (int nullIndex in nullIndexRange.Elements) { int predCount = SpecialFunctions.Count(predictorNameAndCaseIdToNonMissingValueEnumeration); int preseed = ~nullIndex.GetHashCode() ^ predCount.GetHashCode() ^ "NullDataCollection".GetHashCode(); NullDataGenerator newNullDataGenerator = (NullDataGenerator)nullDataGenerator.Clone(); newNullDataGenerator.SetPreseed(preseed); newNullDataGenerator.SetPredictorNameAndCaseIdToNonMissingValueEnumeration(predictorNameAndCaseIdToNonMissingValueEnumeration); newNullDataGenerator.SetTargetNameAndCaseIdToNonMissingValueEnumeration(targetNameAndCaseIdToNonMissingValueEnumeration); //newNullDataGenerator.RealPredictorVariableToCaseIdToNonMissingValue = predictorVariableToCaseIdToRealNonMissingValue; //newNullDataGenerator.RealTargetVariableToCaseIdToNonMissingValue = targetVariableToCaseIdToRealNonMissingValue; _nullIndexToNullDataGenerator.Add(nullIndex, newNullDataGenerator); } }
private static Dictionary <string, int> SufficientStatisticsMapToIntDictionaryMap(Converter <Leaf, SufficientStatistics> leafToStatsMap, IEnumerable <Leaf> fullLeafCollection) { Dictionary <string, int> result = new Dictionary <string, int>(SpecialFunctions.Count(fullLeafCollection)); foreach (Leaf leaf in fullLeafCollection) { SufficientStatistics value = leafToStatsMap(leaf); if (!value.IsMissing()) { result.Add(leaf.CaseName, (int)(BooleanStatistics)value); } } return(result); }
public override Dictionary <string, SufficientStatistics> GenerateRandomMapping(Dictionary <string, SufficientStatistics> realCaseIdToNonMissingValue, ref Random random) { //!!!!put check in to make sure ISufficientSTatistics is reall BooleanStatistics Converter <Leaf, SufficientStatistics> leafToDistnClassFunction = PhyloDDriver.CreateSufficientStatisticsMap(realCaseIdToNonMissingValue); PhyloTree tree = _modelScorer.PhyloTree; MessageInitializer messageInitializer = MessageInitializerDiscrete.GetInstance(leafToDistnClassFunction, _discreteDistribution, new int[] { 1, 1 }, tree.LeafCollection); Score score = _modelScorer.MaximizeLikelihood(messageInitializer); double percentNonMissing = (double)tree.CountOfNonMissingLeaves(realCaseIdToNonMissingValue) / (double)SpecialFunctions.Count(tree.LeafCollection); double equilibrium = score.OptimizationParameters[(int)DistributionDiscreteConditional.ParameterIndex.Equilibrium].Value; double lambda = score.OptimizationParameters[(int)DistributionDiscreteConditional.ParameterIndex.Lambda].Value; Dictionary <string, BooleanStatistics> randomCaseIdToNonMissingValue = tree.EvolveBinaryTree(equilibrium, lambda, 1 - percentNonMissing, ref random); Dictionary <string, SufficientStatistics> converted; SpecialFunctions.ConvertDictionaryToBaseClasses(randomCaseIdToNonMissingValue, out converted); return(converted); }
public void Run( ModelScorer modelScorer, PhyloTree phyloTree, string predictorSparseFileName, string targetSparseFileName, string leafDistributionName, string nullDataGeneratorName, KeepTest <Dictionary <string, string> > keepTest, RangeCollection skipRowIndexRangeCollectionOrNull, string shortName, string outputDirectoryName, RangeCollection pieceIndexRangeCollection, int pieceCount, RangeCollection nullIndexRangeCollection, string optimizerName) { Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); Directory.CreateDirectory(outputDirectoryName); string outputFileName = string.Format(@"{0}\{1}.{2}.{3}.{4}.{5}.{6}{7}.txt", outputDirectoryName, shortName, leafDistributionName, nullDataGeneratorName, nullIndexRangeCollection, pieceCount, pieceIndexRangeCollection, skipRowIndexRangeCollectionOrNull == null ? "" : ".Skip" + skipRowIndexRangeCollectionOrNull.Count().ToString() ); #region from PhyloTree refactor //Dictionary<string, Dictionary<string, bool>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory<bool>(predictorSparseFileName); //IEnumerable<Pair<string, Dictionary<string, T>>> targetNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration<T>(targetSparseFileName); //NullDataCollection nullDataGenerator = // NullDataCollection.GetInstance(this, modelTester, nullIndexRangeCollection, predictorVariableToCaseIdToRealNonMissingValue); //UniversalWorkList<T> workList = UniversalWorkList<T>.GetInstance( // predictorVariableToCaseIdToRealNonMissingValue, // targetNameAndCaseIdToNonMissingValueEnumeration, // nullDataGenerator, nullIndexRangeCollection, keepTest); #endregion bool speedOverMemory = true; IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(predictorSparseFileName, speedOverMemory); IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(targetSparseFileName, speedOverMemory); NullDataCollection nullDataGenerator = CreateNullDataGenerator(nullDataGeneratorName, modelScorer, phyloTree, nullIndexRangeCollection, predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration); UniversalWorkList workList = UniversalWorkList.GetInstance( predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration, nullDataGenerator, nullIndexRangeCollection, keepTest); int workListCount = SpecialFunctions.Count(workList.List()); int effectiveWorkListCount; if (skipRowIndexRangeCollectionOrNull == null) { effectiveWorkListCount = workListCount; } else { effectiveWorkListCount = 0; for (int iRowIndex = 0; iRowIndex < workListCount; iRowIndex++) { if (!skipRowIndexRangeCollectionOrNull.Contains(iRowIndex)) { effectiveWorkListCount++; } } } Console.WriteLine("{0} Total rows. Skipping {1} of them.", workListCount, workListCount - effectiveWorkListCount); using (TextWriter textWriter = File.CreateText(outputFileName)) { textWriter.WriteLine(Header); int rowIndex = -1; int effectiveRowIndex = -1; foreach (RowData rowAndTargetData in workList.List()) { //!!!make all these parameters and the calculation a class ++rowIndex; Debug.Assert(rowIndex < workListCount); // real assert if (skipRowIndexRangeCollectionOrNull == null || !skipRowIndexRangeCollectionOrNull.Contains(rowIndex)) { ++effectiveRowIndex; int workIndex = ExtractWorkIndex(effectiveRowIndex, pieceCount, effectiveWorkListCount); if (pieceIndexRangeCollection.Contains(workIndex)) { Debug.WriteLine("WorkItemIndex " + rowIndex.ToString()); string reportLine; try { reportLine = CreateReportLine(modelScorer, phyloTree, rowAndTargetData, workList, rowIndex, workListCount, workIndex); } catch (OutOfMemoryException) { Console.WriteLine("OUT OF MEMORY!! Clearing cache and trying to recover where we left off."); modelScorer.ClearCache(); reportLine = CreateReportLine(modelScorer, phyloTree, rowAndTargetData, workList, rowIndex, workListCount, workIndex); } textWriter.WriteLine(reportLine); textWriter.Flush(); } } } } stopwatch.Stop(); Console.WriteLine("Running time: " + stopwatch.Elapsed); }