public override EvaluationResults EvaluateModelOnDataGivenParams(Converter <Leaf, SufficientStatistics> predMap, Converter <Leaf, SufficientStatistics> targMap, EvaluationResults previousResults) { int predCount = ModelScorer.PhyloTree.CountOfNonMissingLeaves(predMap); int targCount = ModelScorer.PhyloTree.CountOfNonMissingLeaves(targMap); int globalNonMissingCount = ModelScorer.PhyloTree.CountOfNonMissingLeaves(predMap, targMap); MessageInitializerGaussian nullMessageInitializer = MessageInitializerGaussian.GetInstance( predMap, targMap, (DistributionGaussianConditional)NullDistns[0], ModelScorer.PhyloTree.LeafCollection); double nullLL = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializer, previousResults.NullScores[0].OptimizationParameters); Score nullScore = Score.GetInstance(nullLL, previousResults.NullScores[0].OptimizationParameters, previousResults.NullScores[0].Distribution); MessageInitializerGaussian altMessageInitializer = MessageInitializerGaussian.GetInstance( predMap, targMap, (DistributionGaussianConditional)AltDistn, ModelScorer.PhyloTree.LeafCollection); double altLL = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, previousResults.AltScore.OptimizationParameters); Score altScore = Score.GetInstance(altLL, previousResults.AltScore.OptimizationParameters, previousResults.AltScore.Distribution); EvaluationResults evalResults = EvaluationResultsGaussian.GetInstance(this, nullScore, altScore, predCount, targCount, globalNonMissingCount, ChiSquareDegreesOfFreedom); return(evalResults); }
//public static Converter<Leaf, SufficientStatistics> TESTPRED, TESTTARG; //public static EvaluationResults TESTEVALRESULTS; public override EvaluationResults EvaluateModelOnDataGivenParams( Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap, EvaluationResults previousResults) { int[] fisherCounts = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap); int targNullIdx = _includePredictorInScore ? 1 : 0; OptimizationParameterList nullParamsTarg = previousResults.NullScores[targNullIdx].OptimizationParameters; MessageInitializerDiscrete nullMessageInitializerTarg = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, NullDistn, new int[0], ModelScorer.PhyloTree.LeafCollection); double nullLLTarg = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerTarg, nullParamsTarg); Score nullScoreTarg = Score.GetInstance(nullLLTarg, nullParamsTarg, NullDistn); OptimizationParameterList altParams = previousResults.AltScore.OptimizationParameters; MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, new int[0], ModelScorer.PhyloTree.LeafCollection); double condLL = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, altParams); Score altScore = Score.GetInstance(condLL, altParams, AltDistn); List <Score> nullScores = new List <Score>(); if (_includePredictorInScore) { OptimizationParameterList nullParamsPred = previousResults.NullScores[0].OptimizationParameters; MessageInitializerDiscrete nullMessageInitializerPred = MessageInitializerDiscrete.GetInstance(targetMap, predictorMap, NullDistn, new int[0], ModelScorer.PhyloTree.LeafCollection); double nullLLPred = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerPred, nullParamsPred); Score nullScorePred = Score.GetInstance(nullLLPred, nullParamsPred, NullDistn); nullScores.Add(nullScorePred); // conditional model altScore doesn't include predLL. If we're here, we want to add it to make it comparable to joint or reverseConditional altScore = Score.GetInstance(altScore.Loglikelihood + nullScorePred.Loglikelihood, altScore.OptimizationParameters, altScore.Distribution); } nullScores.Add(nullScoreTarg); EvaluationResults evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, altScore, fisherCounts, ChiSquareDegreesOfFreedom); return(evalResults); }
public override EvaluationResults EvaluateModelOnDataGivenParams(Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap, EvaluationResults previousResults) { int[] fisherCounts = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap); OptimizationParameterList nullParamsTarg = previousResults.NullScores[1].OptimizationParameters; MessageInitializerDiscrete nullMessageInitializerTarg = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteSingleVariable)NullDistns[0], fisherCounts, ModelScorer.PhyloTree.LeafCollection); double nullLLTarg = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerTarg, nullParamsTarg); Score nullScoreTarg = Score.GetInstance(nullLLTarg, nullParamsTarg, previousResults.NullScores[1].Distribution); OptimizationParameterList nullParamsPred = previousResults.NullScores[0].OptimizationParameters; MessageInitializerDiscrete nullMessageInitializerPred = MessageInitializerDiscrete.GetInstance(targetMap, predictorMap, (DistributionDiscreteSingleVariable)NullDistns[1], fisherCounts, ModelScorer.PhyloTree.LeafCollection); double nullLLPred = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerPred, nullParamsPred); Score nullScorePred = Score.GetInstance(nullLLPred, nullParamsPred, previousResults.NullScores[0].Distribution); List <Score> nullScores = new List <Score>(new Score[] { nullScorePred, nullScoreTarg }); OptimizationParameterList altParams = previousResults.AltScore.OptimizationParameters; double altLL; if (((DistributionDiscreteJoint)AltDistn).ParametersCannotBeEvaluated(altParams)) { // we'll get here only if one of the variables is always (or never) true. In this case, the variables must be independent. altLL = nullLLTarg + nullLLPred; } else { MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(CreateJointMap(predictorMap, targetMap), (DistributionDiscreteJoint)AltDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection); altLL = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, altParams); } Score altScore = Score.GetInstance(altLL, altParams, previousResults.AltScore.Distribution); EvaluationResults evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, altScore, fisherCounts, ChiSquareDegreesOfFreedom); return(evalResults); }
public override EvaluationResults EvaluateModelOnData(Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap) { EvaluationResults evalResults; int[] fisherCounts = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap); int[] realFisherCounts = fisherCounts; // for compatability when NAIVE_EQUILIBRIUM is set #if NAIVE_EQUILIBRIUM //USE THIS FOR BACKWARDS COMPATABILITY int[] tempCounts = ModelScorer.PhyloTree.CountsOfLeaves(targetMap); fisherCounts = tempCounts; #endif //MessageInitializerDiscrete nullMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, NullDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection); //if (TryShortCutFromCounts(realFisherCounts, nullMessageInitializer, out evalResults)) //{ // return evalResults; //} //Score nullScore = ModelScorer.MaximizeLikelihood(nullMessageInitializer); bool isInvariant; Score nullScoreTarg = ComputeSingleVariableScore(predictorMap, targetMap, NullDistn, fisherCounts, out isInvariant); Score altScore = ComputeConditionalVariableScore(predictorMap, targetMap, nullScoreTarg, fisherCounts); //(realFisherCounts, nullScoreTarg, out evalResults)) //{ // return evalResults; //} //MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, nullScore.OptimizationParameters, ModelScorer.PhyloTree.LeafCollection); //Score condScore = ModelScorer.MaximizeLikelihood(altMessageInitializer); List <Score> nullScores = new List <Score>(); if (_includePredictorInScore) { int[] predFisherCounts = new int[] { realFisherCounts[0], realFisherCounts[2], realFisherCounts[1], realFisherCounts[3] }; Score predNullScore = ComputeSingleVariableScore(targetMap, predictorMap, NullDistn, predFisherCounts, out isInvariant); nullScores.Add(predNullScore); // conditional model altScore doesn't include predLL. If we're here, we want to add it to make it comparable to joint or reverseConditional altScore = Score.GetInstance(altScore.Loglikelihood + predNullScore.Loglikelihood, altScore.OptimizationParameters, altScore.Distribution); } nullScores.Add(nullScoreTarg); evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, altScore, realFisherCounts, ChiSquareDegreesOfFreedom); #if DEBUG MessageInitializerDiscrete nullMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, NullDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection); MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, nullScoreTarg.OptimizationParameters, ModelScorer.PhyloTree.LeafCollection); double nullLL = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializer, nullScoreTarg.OptimizationParameters); double altLL = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, altScore.OptimizationParameters); if (_includePredictorInScore) { int[] predFisherCounts = new int[] { realFisherCounts[0], realFisherCounts[2], realFisherCounts[1], realFisherCounts[3] }; MessageInitializerDiscrete nullMessageInitializerPred = MessageInitializerDiscrete.GetInstance(targetMap, predictorMap, NullDistn, predFisherCounts, ModelScorer.PhyloTree.LeafCollection); double nullLLPred = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerPred, nullScores[0].OptimizationParameters); altLL += nullLLPred; } EvaluationResults evalResults2 = EvaluateModelOnDataGivenParams(predictorMap, targetMap, evalResults); double eps = 1E-10; Debug.Assert(ComplexNumber.ApproxEqual(nullLL, nullScoreTarg.Loglikelihood, eps)); Debug.Assert(ComplexNumber.ApproxEqual(altLL, altScore.Loglikelihood, eps)); Debug.Assert(ComplexNumber.ApproxEqual(evalResults.NullLL, evalResults2.NullLL, eps) && ComplexNumber.ApproxEqual(evalResults.AltLL, evalResults2.AltLL, eps), "In ModelEvaluatorCond, results of maximizing LL and computing LL from same params are not the same."); #endif return(evalResults); }
public void ScoreTree( ModelScorer modelScorer, PhyloTree phyloTree, string predictorSparseFileName, string targetSparseFileName, string predictorVariableName, string targetVariableName, double[] nullModelArgs, double[] altModelArgs) { //Dictionary<string, Dictionary<string, SufficientStatistics>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory(predictorSparseFileName); IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration(predictorSparseFileName); IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration(targetSparseFileName); RangeCollection nullIndexRangeCollection = RangeCollection.GetInstance(-1, -1); NullDataCollection nullDataGenerator = CreateNullDataGenerator("PredictorPermutation", modelScorer, phyloTree, nullIndexRangeCollection, predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration); UniversalWorkList workList = UniversalWorkList.GetInstance( predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration, //targetNameAndCaseIdToNonMissingValueEnumeration, nullDataGenerator, nullIndexRangeCollection, AlwaysKeep <Dictionary <string, string> > .GetInstance()); foreach (RowData rowAndTargetData in workList.List()) { if (rowAndTargetData.Row[PhyloTree.PredictorVariableColumnName] == predictorVariableName && rowAndTargetData.Row[PhyloTree.TargetVariableColumnName] == targetVariableName) { Dictionary <string, SufficientStatistics> caseIdToNonNullPredictorValue = rowAndTargetData.PredictorData;//workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(-1, predictorVariableName); Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData; Converter <Leaf, SufficientStatistics> targetDistributionMap = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue); Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonNullPredictorValue); Converter <Leaf, SufficientStatistics> altDistributionMap = CreateAlternativeSufficientStatisticsMap(predictorDistributionClassFunction, targetDistributionMap); double logLikelihood; Score scoreIndTarget, scoreIndPredictor, scoreAlt; MessageInitializer messageInitializer; OptimizationParameterList nullParams = NullModelDistribution.GetParameters(nullModelArgs); OptimizationParameterList altParams = AltModelDistribution.GetParameters(altModelArgs); Console.WriteLine(SpecialFunctions.CreateTabString("Variable", nullParams.ToStringHeader(), "LogL")); messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionMap, NullModelDistribution); logLikelihood = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams); scoreIndTarget = Score.GetInstance(logLikelihood, nullParams); Console.WriteLine("Target\t" + scoreIndTarget); messageInitializer = modelScorer.CreateMessageInitializer(targetDistributionMap, predictorDistributionClassFunction, NullModelDistribution); logLikelihood = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams); modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams); scoreIndPredictor = Score.GetInstance(logLikelihood, nullParams); Console.WriteLine("Predictor\t" + scoreIndPredictor); Console.WriteLine("\n" + SpecialFunctions.CreateTabString("Variable", altParams.ToStringHeader(), "LogL")); messageInitializer = modelScorer.CreateMessageInitializer(null, altDistributionMap, AltModelDistribution); logLikelihood = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, altParams); scoreAlt = Score.GetInstance(logLikelihood, altParams); Console.WriteLine(SpecialFunctions.CreateTabString(AltModelDistribution, scoreAlt)); } } }