public static EvaluationResultsTestGivenTrain GetInstance(ModelEvaluator modelEval, EvaluationResults train, EvaluationResults testAndTrain) { List <Score> nullScores = new List <Score>(train.NullScores.Count); for (int i = 0; i < train.NullScores.Count; i++) { Score score = Score.GetInstance( testAndTrain.NullScores[i].Loglikelihood - train.NullScores[i].Loglikelihood, testAndTrain.NullScores[i].OptimizationParameters, testAndTrain.NullScores[i].Distribution); nullScores.Add(score); } Score altScore = Score.GetInstance( testAndTrain.AltScore.Loglikelihood - train.AltScore.Loglikelihood, testAndTrain.AltScore.OptimizationParameters, testAndTrain.AltScore.Distribution); return(new EvaluationResultsTestGivenTrain(modelEval, testAndTrain, nullScores, altScore)); }
private string CreateReportLine( ModelEvaluator modelEvaluator, RowData rowAndTargetData, UniversalWorkList workList, int rowIndex, int workListCount, int workIndex) { Dictionary <string, string> row = rowAndTargetData.Row; string predictorVariable = row[Tabulate.PredictorVariableColumnName]; string targetVariable = row[Tabulate.TargetVariableColumnName]; int nullIndex = int.Parse(row[Tabulate.NullIndexColumnName]); Dictionary <string, SufficientStatistics> caseIdToNonMissingPredictorValue = rowAndTargetData.PredictorData; Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData; Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonMissingPredictorValue); Converter <Leaf, SufficientStatistics> targetDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue); EvaluationResults results = modelEvaluator.EvaluateModelOnData(predictorDistributionClassFunction, targetDistributionClassFunction); string reportLine = SpecialFunctions.CreateTabString( results.ModelEvaluator.Name, rowIndex, workListCount, workIndex, nullIndex, predictorVariable, targetVariable, results.ToString()); return(reportLine); }
public static EvaluationResultsDiscrete GetInstance(ModelEvaluator modelEval, List <Score> nullScores, Score altScore, int[] fisherCounts, int chiSquareDegreesOfFreedom) { return(new EvaluationResultsDiscrete(modelEval, nullScores, altScore, fisherCounts, chiSquareDegreesOfFreedom)); }
public static EvaluationResultsDiscrete GetInstance(ModelEvaluator modelEval, Score nullScore, Score altScore, int[] fisherCounts, int chiSquareDegreesOfFreedom) { List <Score> singletonList = SpecialFunctions.CreateSingletonList(nullScore); return(new EvaluationResultsDiscrete(modelEval, singletonList, altScore, fisherCounts, chiSquareDegreesOfFreedom)); }
protected EvaluationResultsDiscrete(ModelEvaluator modelEval, List <Score> nullScores, Score altScore, int[] fisherCounts, int chiSquareDegreesOfFreedom) : base(modelEval, nullScores, altScore, chiSquareDegreesOfFreedom, SpecialFunctions.Sum(fisherCounts)) { _fisherCounts = fisherCounts; }
public string Run( ModelEvaluator modelEvaluator, //PhyloTree phyloTree, string predictorSparseFileName, string targetSparseFileName, string leafDistributionName, string nullDataGeneratorName, KeepTest <Dictionary <string, string> > keepTest, RangeCollection skipRowIndexRangeCollectionOrNull, string shortName, string outputDirectoryName, RangeCollection pieceIndexRangeCollection, int pieceCount, RangeCollection nullIndexRangeCollection, string optimizerName) { Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); Directory.CreateDirectory(outputDirectoryName); #region from PhyloTree refactor //Dictionary<string, Dictionary<string, bool>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory<bool>(predictorSparseFileName); //IEnumerable<Pair<string, Dictionary<string, T>>> targetNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration<T>(targetSparseFileName); //NullDataCollection nullDataGenerator = // NullDataCollection.GetInstance(this, modelTester, nullIndexRangeCollection, predictorVariableToCaseIdToRealNonMissingValue); //UniversalWorkList<T> workList = UniversalWorkList<T>.GetInstance( // predictorVariableToCaseIdToRealNonMissingValue, // targetNameAndCaseIdToNonMissingValueEnumeration, // nullDataGenerator, nullIndexRangeCollection, keepTest); #endregion bool speedOverMemory = true; IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(predictorSparseFileName, speedOverMemory); IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(targetSparseFileName, speedOverMemory); NullDataCollection nullDataGenerator = NullDataCollection.GetInstance(modelEvaluator.CreateNullDataGenerator(nullDataGeneratorName), nullIndexRangeCollection, predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration); UniversalWorkList workList = UniversalWorkList.GetInstance( predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration, nullDataGenerator, nullIndexRangeCollection, keepTest); int workListCount = SpecialFunctions.Count(workList.List()); int effectiveWorkListCount; if (skipRowIndexRangeCollectionOrNull == null) { effectiveWorkListCount = workListCount; } else { effectiveWorkListCount = 0; for (int iRowIndex = 0; iRowIndex < workListCount; iRowIndex++) { if (!skipRowIndexRangeCollectionOrNull.Contains(iRowIndex)) { effectiveWorkListCount++; } } } Console.WriteLine("{0} Total rows. Skipping {1} of them.", workListCount, workListCount - effectiveWorkListCount); string outputFileName = string.Format(@"{0}\{1}.{2}.{3}.{4}.{5}.{6}{7}.txt", outputDirectoryName, shortName, modelEvaluator.Name, nullDataGenerator.Name, nullIndexRangeCollection, pieceCount, pieceIndexRangeCollection, skipRowIndexRangeCollectionOrNull == null ? "" : ".Skip" + skipRowIndexRangeCollectionOrNull.Count().ToString() ); using (TextWriter textWriter = File.CreateText(outputFileName)) { textWriter.WriteLine(GetHeaderString(modelEvaluator)); textWriter.Flush(); int rowIndex = -1; int effectiveRowIndex = -1; foreach (RowData rowAndTargetData in workList.List()) { //TODOmake all these parameters and the calculation a class ++rowIndex; Debug.Assert(rowIndex < workListCount); // real assert if (skipRowIndexRangeCollectionOrNull == null || !skipRowIndexRangeCollectionOrNull.Contains(rowIndex)) { ++effectiveRowIndex; int workIndex = ExtractWorkIndex(effectiveRowIndex, pieceCount, effectiveWorkListCount); if (pieceIndexRangeCollection.Contains(workIndex)) { Debug.WriteLine("WorkItemIndex " + rowIndex.ToString()); string reportLine; try { reportLine = CreateReportLine(modelEvaluator, rowAndTargetData, workList, rowIndex, workListCount, workIndex); } catch (OutOfMemoryException) { Console.WriteLine("OUT OF MEMORY!! Clearing cache and trying to recover where we left off."); modelEvaluator.ModelScorer.ClearCache(); reportLine = CreateReportLine(modelEvaluator, rowAndTargetData, workList, rowIndex, workListCount, workIndex); } textWriter.WriteLine(reportLine); textWriter.Flush(); } } } } stopwatch.Stop(); Console.WriteLine("Running time: {0}", stopwatch.Elapsed); if (modelEvaluator.ModelScorer != null) { Console.WriteLine("Function calls (per ML call): {0} ({1:f4})", modelEvaluator.ModelScorer.FuncCalls, modelEvaluator.ModelScorer.FuncCalls / (double)(modelEvaluator.ModelScorer.CacheMisses)); Console.WriteLine("Cache hits (%): {0} ({1:f4})", modelEvaluator.ModelScorer.CacheHits, 100 * modelEvaluator.ModelScorer.CacheHits / (double)(modelEvaluator.ModelScorer.CacheHits + modelEvaluator.ModelScorer.CacheMisses)); Console.WriteLine("Cache clears: {0}", modelEvaluator.ModelScorer.CacheClears); } return(outputFileName); }
protected EvaluationResultsFisher(ModelEvaluator modelEval, List <Score> nullScores, Score altScore, int[] fisherCounts, int chiSquareDegreesOfFreedom, double pValue) : base(modelEval, nullScores, altScore, fisherCounts, chiSquareDegreesOfFreedom) { _pValue = pValue; }
public static EvaluationResults GetInstance(ModelEvaluator modelEval, List <Score> nullScores, Score altScore, int chiSquareDegreesOfFreedom, int globalNonMissingCount) { return(new EvaluationResults(modelEval, nullScores, altScore, chiSquareDegreesOfFreedom, globalNonMissingCount)); }
public static ModelEvaluatorReverse GetInstance(ModelEvaluator modelEvaluator) { return(new ModelEvaluatorReverse(modelEvaluator)); }
protected ModelEvaluatorReverse(ModelEvaluator evaluator) : base(evaluator.NullDistns, evaluator.AltDistn, evaluator.ModelScorer) { _internalEvaluator = evaluator; }
new public static ModelEvaluatorCrossValidate GetInstance(string nameAndParametersOfEvaluatorToCrossValidate, ModelScorer scorer) { ModelEvaluator evaluator = ModelEvaluator.GetInstance(nameAndParametersOfEvaluatorToCrossValidate, scorer); return(GetInstance(evaluator)); }
public static ModelEvaluatorCrossValidate GetInstance(ModelEvaluator modelToCrossValidate) { return(new ModelEvaluatorCrossValidate(modelToCrossValidate, DefaultCrossValidateCount)); }
public static EvaluationResultsCrossValidate GetInstance(ModelEvaluator modelEval, EvaluationResults representativeResults) { return(new EvaluationResultsCrossValidate(modelEval, representativeResults.NullScores, representativeResults.AltScore, representativeResults, 1)); }
protected EvaluationResultsTestGivenTrain(ModelEvaluator modelEval, EvaluationResults testAndTrain, List <Score> testGiveTrainNullScores, Score testGivenTrainAltScore) : base(modelEval, testGiveTrainNullScores, testGivenTrainAltScore, testAndTrain.ChiSquareDegreesOfFreedom, testAndTrain.GlobalNonMissingCount) { _testAndTrain = testAndTrain; }