protected NullDataCollection CreateNullDataGenerator( string nullDataGeneratorName, ModelScorer modelScorer, PhyloTree phyloTree, RangeCollection nullIndexRangeCollection, IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration, IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration) { return(NullDataCollection.GetInstance( NullDataGenerator.GetInstance(nullDataGeneratorName, modelScorer, phyloTree, this), nullIndexRangeCollection, predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration)); }
public string Run( ModelEvaluator modelEvaluator, //PhyloTree phyloTree, string predictorSparseFileName, string targetSparseFileName, string leafDistributionName, string nullDataGeneratorName, KeepTest <Dictionary <string, string> > keepTest, RangeCollection skipRowIndexRangeCollectionOrNull, string shortName, string outputDirectoryName, RangeCollection pieceIndexRangeCollection, int pieceCount, RangeCollection nullIndexRangeCollection, string optimizerName) { Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); Directory.CreateDirectory(outputDirectoryName); #region from PhyloTree refactor //Dictionary<string, Dictionary<string, bool>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory<bool>(predictorSparseFileName); //IEnumerable<Pair<string, Dictionary<string, T>>> targetNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration<T>(targetSparseFileName); //NullDataCollection nullDataGenerator = // NullDataCollection.GetInstance(this, modelTester, nullIndexRangeCollection, predictorVariableToCaseIdToRealNonMissingValue); //UniversalWorkList<T> workList = UniversalWorkList<T>.GetInstance( // predictorVariableToCaseIdToRealNonMissingValue, // targetNameAndCaseIdToNonMissingValueEnumeration, // nullDataGenerator, nullIndexRangeCollection, keepTest); #endregion bool speedOverMemory = true; IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(predictorSparseFileName, speedOverMemory); IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(targetSparseFileName, speedOverMemory); NullDataCollection nullDataGenerator = NullDataCollection.GetInstance(modelEvaluator.CreateNullDataGenerator(nullDataGeneratorName), nullIndexRangeCollection, predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration); UniversalWorkList workList = UniversalWorkList.GetInstance( predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration, nullDataGenerator, nullIndexRangeCollection, keepTest); int workListCount = SpecialFunctions.Count(workList.List()); int effectiveWorkListCount; if (skipRowIndexRangeCollectionOrNull == null) { effectiveWorkListCount = workListCount; } else { effectiveWorkListCount = 0; for (int iRowIndex = 0; iRowIndex < workListCount; iRowIndex++) { if (!skipRowIndexRangeCollectionOrNull.Contains(iRowIndex)) { effectiveWorkListCount++; } } } Console.WriteLine("{0} Total rows. Skipping {1} of them.", workListCount, workListCount - effectiveWorkListCount); string outputFileName = string.Format(@"{0}\{1}.{2}.{3}.{4}.{5}.{6}{7}.txt", outputDirectoryName, shortName, modelEvaluator.Name, nullDataGenerator.Name, nullIndexRangeCollection, pieceCount, pieceIndexRangeCollection, skipRowIndexRangeCollectionOrNull == null ? "" : ".Skip" + skipRowIndexRangeCollectionOrNull.Count().ToString() ); using (TextWriter textWriter = File.CreateText(outputFileName)) { textWriter.WriteLine(GetHeaderString(modelEvaluator)); textWriter.Flush(); int rowIndex = -1; int effectiveRowIndex = -1; foreach (RowData rowAndTargetData in workList.List()) { //TODOmake all these parameters and the calculation a class ++rowIndex; Debug.Assert(rowIndex < workListCount); // real assert if (skipRowIndexRangeCollectionOrNull == null || !skipRowIndexRangeCollectionOrNull.Contains(rowIndex)) { ++effectiveRowIndex; int workIndex = ExtractWorkIndex(effectiveRowIndex, pieceCount, effectiveWorkListCount); if (pieceIndexRangeCollection.Contains(workIndex)) { Debug.WriteLine("WorkItemIndex " + rowIndex.ToString()); string reportLine; try { reportLine = CreateReportLine(modelEvaluator, rowAndTargetData, workList, rowIndex, workListCount, workIndex); } catch (OutOfMemoryException) { Console.WriteLine("OUT OF MEMORY!! Clearing cache and trying to recover where we left off."); modelEvaluator.ModelScorer.ClearCache(); reportLine = CreateReportLine(modelEvaluator, rowAndTargetData, workList, rowIndex, workListCount, workIndex); } textWriter.WriteLine(reportLine); textWriter.Flush(); } } } } stopwatch.Stop(); Console.WriteLine("Running time: {0}", stopwatch.Elapsed); if (modelEvaluator.ModelScorer != null) { Console.WriteLine("Function calls (per ML call): {0} ({1:f4})", modelEvaluator.ModelScorer.FuncCalls, modelEvaluator.ModelScorer.FuncCalls / (double)(modelEvaluator.ModelScorer.CacheMisses)); Console.WriteLine("Cache hits (%): {0} ({1:f4})", modelEvaluator.ModelScorer.CacheHits, 100 * modelEvaluator.ModelScorer.CacheHits / (double)(modelEvaluator.ModelScorer.CacheHits + modelEvaluator.ModelScorer.CacheMisses)); Console.WriteLine("Cache clears: {0}", modelEvaluator.ModelScorer.CacheClears); } return(outputFileName); }