public void Run( ModelScorer modelScorer, PhyloTree phyloTree, string predictorSparseFileName, string targetSparseFileName, string leafDistributionName, string nullDataGeneratorName, KeepTest <Dictionary <string, string> > keepTest, RangeCollection skipRowIndexRangeCollectionOrNull, string shortName, string outputDirectoryName, RangeCollection pieceIndexRangeCollection, int pieceCount, RangeCollection nullIndexRangeCollection, string optimizerName) { Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); Directory.CreateDirectory(outputDirectoryName); string outputFileName = string.Format(@"{0}\{1}.{2}.{3}.{4}.{5}.{6}{7}.txt", outputDirectoryName, shortName, leafDistributionName, nullDataGeneratorName, nullIndexRangeCollection, pieceCount, pieceIndexRangeCollection, skipRowIndexRangeCollectionOrNull == null ? "" : ".Skip" + skipRowIndexRangeCollectionOrNull.Count().ToString() ); #region from PhyloTree refactor //Dictionary<string, Dictionary<string, bool>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory<bool>(predictorSparseFileName); //IEnumerable<Pair<string, Dictionary<string, T>>> targetNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration<T>(targetSparseFileName); //NullDataCollection nullDataGenerator = // NullDataCollection.GetInstance(this, modelTester, nullIndexRangeCollection, predictorVariableToCaseIdToRealNonMissingValue); //UniversalWorkList<T> workList = UniversalWorkList<T>.GetInstance( // predictorVariableToCaseIdToRealNonMissingValue, // targetNameAndCaseIdToNonMissingValueEnumeration, // nullDataGenerator, nullIndexRangeCollection, keepTest); #endregion bool speedOverMemory = true; IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(predictorSparseFileName, speedOverMemory); IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(targetSparseFileName, speedOverMemory); NullDataCollection nullDataGenerator = CreateNullDataGenerator(nullDataGeneratorName, modelScorer, phyloTree, nullIndexRangeCollection, predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration); UniversalWorkList workList = UniversalWorkList.GetInstance( predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration, nullDataGenerator, nullIndexRangeCollection, keepTest); int workListCount = SpecialFunctions.Count(workList.List()); int effectiveWorkListCount; if (skipRowIndexRangeCollectionOrNull == null) { effectiveWorkListCount = workListCount; } else { effectiveWorkListCount = 0; for (int iRowIndex = 0; iRowIndex < workListCount; iRowIndex++) { if (!skipRowIndexRangeCollectionOrNull.Contains(iRowIndex)) { effectiveWorkListCount++; } } } Console.WriteLine("{0} Total rows. Skipping {1} of them.", workListCount, workListCount - effectiveWorkListCount); using (TextWriter textWriter = File.CreateText(outputFileName)) { textWriter.WriteLine(Header); int rowIndex = -1; int effectiveRowIndex = -1; foreach (RowData rowAndTargetData in workList.List()) { //!!!make all these parameters and the calculation a class ++rowIndex; Debug.Assert(rowIndex < workListCount); // real assert if (skipRowIndexRangeCollectionOrNull == null || !skipRowIndexRangeCollectionOrNull.Contains(rowIndex)) { ++effectiveRowIndex; int workIndex = ExtractWorkIndex(effectiveRowIndex, pieceCount, effectiveWorkListCount); if (pieceIndexRangeCollection.Contains(workIndex)) { Debug.WriteLine("WorkItemIndex " + rowIndex.ToString()); string reportLine; try { reportLine = CreateReportLine(modelScorer, phyloTree, rowAndTargetData, workList, rowIndex, workListCount, workIndex); } catch (OutOfMemoryException) { Console.WriteLine("OUT OF MEMORY!! Clearing cache and trying to recover where we left off."); modelScorer.ClearCache(); reportLine = CreateReportLine(modelScorer, phyloTree, rowAndTargetData, workList, rowIndex, workListCount, workIndex); } textWriter.WriteLine(reportLine); textWriter.Flush(); } } } } stopwatch.Stop(); Console.WriteLine("Running time: " + stopwatch.Elapsed); }
public void ScoreTree( ModelScorer modelScorer, PhyloTree phyloTree, string predictorSparseFileName, string targetSparseFileName, string predictorVariableName, string targetVariableName, double[] nullModelArgs, double[] altModelArgs) { //Dictionary<string, Dictionary<string, SufficientStatistics>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory(predictorSparseFileName); IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration(predictorSparseFileName); IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration(targetSparseFileName); RangeCollection nullIndexRangeCollection = RangeCollection.GetInstance(-1, -1); NullDataCollection nullDataGenerator = CreateNullDataGenerator("PredictorPermutation", modelScorer, phyloTree, nullIndexRangeCollection, predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration); UniversalWorkList workList = UniversalWorkList.GetInstance( predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration, //targetNameAndCaseIdToNonMissingValueEnumeration, nullDataGenerator, nullIndexRangeCollection, AlwaysKeep <Dictionary <string, string> > .GetInstance()); foreach (RowData rowAndTargetData in workList.List()) { if (rowAndTargetData.Row[PhyloTree.PredictorVariableColumnName] == predictorVariableName && rowAndTargetData.Row[PhyloTree.TargetVariableColumnName] == targetVariableName) { Dictionary <string, SufficientStatistics> caseIdToNonNullPredictorValue = rowAndTargetData.PredictorData;//workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(-1, predictorVariableName); Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData; Converter <Leaf, SufficientStatistics> targetDistributionMap = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue); Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonNullPredictorValue); Converter <Leaf, SufficientStatistics> altDistributionMap = CreateAlternativeSufficientStatisticsMap(predictorDistributionClassFunction, targetDistributionMap); double logLikelihood; Score scoreIndTarget, scoreIndPredictor, scoreAlt; MessageInitializer messageInitializer; OptimizationParameterList nullParams = NullModelDistribution.GetParameters(nullModelArgs); OptimizationParameterList altParams = AltModelDistribution.GetParameters(altModelArgs); Console.WriteLine(SpecialFunctions.CreateTabString("Variable", nullParams.ToStringHeader(), "LogL")); messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionMap, NullModelDistribution); logLikelihood = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams); scoreIndTarget = Score.GetInstance(logLikelihood, nullParams); Console.WriteLine("Target\t" + scoreIndTarget); messageInitializer = modelScorer.CreateMessageInitializer(targetDistributionMap, predictorDistributionClassFunction, NullModelDistribution); logLikelihood = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams); modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams); scoreIndPredictor = Score.GetInstance(logLikelihood, nullParams); Console.WriteLine("Predictor\t" + scoreIndPredictor); Console.WriteLine("\n" + SpecialFunctions.CreateTabString("Variable", altParams.ToStringHeader(), "LogL")); messageInitializer = modelScorer.CreateMessageInitializer(null, altDistributionMap, AltModelDistribution); logLikelihood = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, altParams); scoreAlt = Score.GetInstance(logLikelihood, altParams); Console.WriteLine(SpecialFunctions.CreateTabString(AltModelDistribution, scoreAlt)); } } }