public static EvaluationResultsTestGivenTrain GetInstance(ModelEvaluator modelEval, EvaluationResults train, EvaluationResults testAndTrain)
        {
            List <Score> nullScores = new List <Score>(train.NullScores.Count);

            for (int i = 0; i < train.NullScores.Count; i++)
            {
                Score score = Score.GetInstance(
                    testAndTrain.NullScores[i].Loglikelihood - train.NullScores[i].Loglikelihood,
                    testAndTrain.NullScores[i].OptimizationParameters,
                    testAndTrain.NullScores[i].Distribution);
                nullScores.Add(score);
            }
            Score altScore = Score.GetInstance(
                testAndTrain.AltScore.Loglikelihood - train.AltScore.Loglikelihood,
                testAndTrain.AltScore.OptimizationParameters,
                testAndTrain.AltScore.Distribution);

            return(new EvaluationResultsTestGivenTrain(modelEval, testAndTrain, nullScores, altScore));
        }
        private string CreateReportLine(
            ModelEvaluator modelEvaluator,
            RowData rowAndTargetData,
            UniversalWorkList workList,
            int rowIndex, int workListCount, int workIndex)
        {
            Dictionary <string, string> row = rowAndTargetData.Row;
            string predictorVariable        = row[Tabulate.PredictorVariableColumnName];
            string targetVariable           = row[Tabulate.TargetVariableColumnName];
            int    nullIndex = int.Parse(row[Tabulate.NullIndexColumnName]);

            Dictionary <string, SufficientStatistics> caseIdToNonMissingPredictorValue = rowAndTargetData.PredictorData;
            Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue    = rowAndTargetData.TargetData;

            Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonMissingPredictorValue);
            Converter <Leaf, SufficientStatistics> targetDistributionClassFunction    = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue);

            EvaluationResults results = modelEvaluator.EvaluateModelOnData(predictorDistributionClassFunction, targetDistributionClassFunction);

            string reportLine = SpecialFunctions.CreateTabString(
                results.ModelEvaluator.Name, rowIndex, workListCount, workIndex, nullIndex, predictorVariable, targetVariable, results.ToString());

            return(reportLine);
        }
Ejemplo n.º 3
0
 public static EvaluationResultsDiscrete GetInstance(ModelEvaluator modelEval, List <Score> nullScores, Score altScore, int[] fisherCounts, int chiSquareDegreesOfFreedom)
 {
     return(new EvaluationResultsDiscrete(modelEval, nullScores, altScore, fisherCounts, chiSquareDegreesOfFreedom));
 }
Ejemplo n.º 4
0
        public static EvaluationResultsDiscrete GetInstance(ModelEvaluator modelEval, Score nullScore, Score altScore, int[] fisherCounts, int chiSquareDegreesOfFreedom)
        {
            List <Score> singletonList = SpecialFunctions.CreateSingletonList(nullScore);

            return(new EvaluationResultsDiscrete(modelEval, singletonList, altScore, fisherCounts, chiSquareDegreesOfFreedom));
        }
Ejemplo n.º 5
0
 protected EvaluationResultsDiscrete(ModelEvaluator modelEval, List <Score> nullScores, Score altScore, int[] fisherCounts, int chiSquareDegreesOfFreedom)
     :
     base(modelEval, nullScores, altScore, chiSquareDegreesOfFreedom, SpecialFunctions.Sum(fisherCounts))
 {
     _fisherCounts = fisherCounts;
 }
        public string Run(
            ModelEvaluator modelEvaluator,
            //PhyloTree phyloTree,
            string predictorSparseFileName,
            string targetSparseFileName,
            string leafDistributionName,
            string nullDataGeneratorName,
            KeepTest <Dictionary <string, string> > keepTest,
            RangeCollection skipRowIndexRangeCollectionOrNull,
            string shortName,
            string outputDirectoryName,
            RangeCollection pieceIndexRangeCollection, int pieceCount,
            RangeCollection nullIndexRangeCollection,
            string optimizerName)
        {
            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();
            Directory.CreateDirectory(outputDirectoryName);



            #region from PhyloTree refactor
            //Dictionary<string, Dictionary<string, bool>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory<bool>(predictorSparseFileName);
            //IEnumerable<Pair<string, Dictionary<string, T>>> targetNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration<T>(targetSparseFileName);

            //NullDataCollection nullDataGenerator =
            //    NullDataCollection.GetInstance(this, modelTester, nullIndexRangeCollection, predictorVariableToCaseIdToRealNonMissingValue);

            //UniversalWorkList<T> workList = UniversalWorkList<T>.GetInstance(
            //    predictorVariableToCaseIdToRealNonMissingValue,
            //    targetNameAndCaseIdToNonMissingValueEnumeration,
            //    nullDataGenerator, nullIndexRangeCollection, keepTest);
            #endregion
            bool speedOverMemory = true;

            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > >
            predictorNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(predictorSparseFileName, speedOverMemory);
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > >
            targetNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(targetSparseFileName, speedOverMemory);

            NullDataCollection nullDataGenerator =
                NullDataCollection.GetInstance(modelEvaluator.CreateNullDataGenerator(nullDataGeneratorName), nullIndexRangeCollection,
                                               predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration);


            UniversalWorkList workList = UniversalWorkList.GetInstance(
                predictorNameAndCaseIdToNonMissingValueEnumeration,
                targetNameAndCaseIdToNonMissingValueEnumeration,
                nullDataGenerator, nullIndexRangeCollection, keepTest);

            int workListCount = SpecialFunctions.Count(workList.List());

            int effectiveWorkListCount;
            if (skipRowIndexRangeCollectionOrNull == null)
            {
                effectiveWorkListCount = workListCount;
            }
            else
            {
                effectiveWorkListCount = 0;
                for (int iRowIndex = 0; iRowIndex < workListCount; iRowIndex++)
                {
                    if (!skipRowIndexRangeCollectionOrNull.Contains(iRowIndex))
                    {
                        effectiveWorkListCount++;
                    }
                }
            }
            Console.WriteLine("{0} Total rows. Skipping {1} of them.", workListCount, workListCount - effectiveWorkListCount);

            string outputFileName = string.Format(@"{0}\{1}.{2}.{3}.{4}.{5}.{6}{7}.txt",
                                                  outputDirectoryName, shortName,
                                                  modelEvaluator.Name, nullDataGenerator.Name,
                                                  nullIndexRangeCollection,
                                                  pieceCount,
                                                  pieceIndexRangeCollection,
                                                  skipRowIndexRangeCollectionOrNull == null ? "" : ".Skip" + skipRowIndexRangeCollectionOrNull.Count().ToString()
                                                  );

            using (TextWriter textWriter = File.CreateText(outputFileName))
            {
                textWriter.WriteLine(GetHeaderString(modelEvaluator));
                textWriter.Flush();
                int rowIndex          = -1;
                int effectiveRowIndex = -1;

                foreach (RowData rowAndTargetData in workList.List())
                {
                    //TODOmake all these parameters and the calculation a class
                    ++rowIndex;
                    Debug.Assert(rowIndex < workListCount); // real assert

                    if (skipRowIndexRangeCollectionOrNull == null || !skipRowIndexRangeCollectionOrNull.Contains(rowIndex))
                    {
                        ++effectiveRowIndex;

                        int workIndex = ExtractWorkIndex(effectiveRowIndex, pieceCount, effectiveWorkListCount);

                        if (pieceIndexRangeCollection.Contains(workIndex))
                        {
                            Debug.WriteLine("WorkItemIndex " + rowIndex.ToString());
                            string reportLine;
                            try
                            {
                                reportLine =
                                    CreateReportLine(modelEvaluator, rowAndTargetData, workList, rowIndex, workListCount, workIndex);
                            }
                            catch (OutOfMemoryException)
                            {
                                Console.WriteLine("OUT OF MEMORY!! Clearing cache and trying to recover where we left off.");
                                modelEvaluator.ModelScorer.ClearCache();
                                reportLine =
                                    CreateReportLine(modelEvaluator, rowAndTargetData, workList, rowIndex, workListCount, workIndex);
                            }

                            textWriter.WriteLine(reportLine);
                            textWriter.Flush();
                        }
                    }
                }
            }
            stopwatch.Stop();
            Console.WriteLine("Running time: {0}", stopwatch.Elapsed);
            if (modelEvaluator.ModelScorer != null)
            {
                Console.WriteLine("Function calls (per ML call): {0} ({1:f4})", modelEvaluator.ModelScorer.FuncCalls, modelEvaluator.ModelScorer.FuncCalls / (double)(modelEvaluator.ModelScorer.CacheMisses));
                Console.WriteLine("Cache hits (%): {0} ({1:f4})", modelEvaluator.ModelScorer.CacheHits, 100 * modelEvaluator.ModelScorer.CacheHits / (double)(modelEvaluator.ModelScorer.CacheHits + modelEvaluator.ModelScorer.CacheMisses));
                Console.WriteLine("Cache clears: {0}", modelEvaluator.ModelScorer.CacheClears);
            }

            return(outputFileName);
        }
Ejemplo n.º 7
0
 protected EvaluationResultsFisher(ModelEvaluator modelEval, List <Score> nullScores, Score altScore, int[] fisherCounts, int chiSquareDegreesOfFreedom, double pValue)
     :
     base(modelEval, nullScores, altScore, fisherCounts, chiSquareDegreesOfFreedom)
 {
     _pValue = pValue;
 }
Ejemplo n.º 8
0
 public static EvaluationResults GetInstance(ModelEvaluator modelEval, List <Score> nullScores, Score altScore, int chiSquareDegreesOfFreedom, int globalNonMissingCount)
 {
     return(new EvaluationResults(modelEval, nullScores, altScore, chiSquareDegreesOfFreedom, globalNonMissingCount));
 }
Ejemplo n.º 9
0
 public static ModelEvaluatorReverse GetInstance(ModelEvaluator modelEvaluator)
 {
     return(new ModelEvaluatorReverse(modelEvaluator));
 }
Ejemplo n.º 10
0
 protected ModelEvaluatorReverse(ModelEvaluator evaluator) :
     base(evaluator.NullDistns, evaluator.AltDistn, evaluator.ModelScorer)
 {
     _internalEvaluator = evaluator;
 }
        new public static ModelEvaluatorCrossValidate GetInstance(string nameAndParametersOfEvaluatorToCrossValidate, ModelScorer scorer)
        {
            ModelEvaluator evaluator = ModelEvaluator.GetInstance(nameAndParametersOfEvaluatorToCrossValidate, scorer);

            return(GetInstance(evaluator));
        }
 public static ModelEvaluatorCrossValidate GetInstance(ModelEvaluator modelToCrossValidate)
 {
     return(new ModelEvaluatorCrossValidate(modelToCrossValidate, DefaultCrossValidateCount));
 }
 public static EvaluationResultsCrossValidate GetInstance(ModelEvaluator modelEval, EvaluationResults representativeResults)
 {
     return(new EvaluationResultsCrossValidate(modelEval, representativeResults.NullScores, representativeResults.AltScore, representativeResults, 1));
 }
 protected EvaluationResultsTestGivenTrain(ModelEvaluator modelEval, EvaluationResults testAndTrain, List <Score> testGiveTrainNullScores, Score testGivenTrainAltScore)
     :
     base(modelEval, testGiveTrainNullScores, testGivenTrainAltScore, testAndTrain.ChiSquareDegreesOfFreedom, testAndTrain.GlobalNonMissingCount)
 {
     _testAndTrain = testAndTrain;
 }