public override EvaluationResults EvaluateModelOnData(Converter <Leaf, SufficientStatistics> predMap, Converter <Leaf, SufficientStatistics> targMap) { int predCount = ModelScorer.PhyloTree.CountOfNonMissingLeaves(predMap); int targCount = ModelScorer.PhyloTree.CountOfNonMissingLeaves(targMap); int globalNonMissingCount = ModelScorer.PhyloTree.CountOfNonMissingLeaves(predMap, targMap); MessageInitializerGaussian nullMessageInitializer = MessageInitializerGaussian.GetInstance( predMap, targMap, (DistributionGaussianConditional)NullDistns[0], ModelScorer.PhyloTree.LeafCollection); Score nullScore = ModelScorer.MaximizeLikelihood(nullMessageInitializer); MessageInitializerGaussian altMessageInitializer = MessageInitializerGaussian.GetInstance( predMap, targMap, (DistributionGaussianConditional)AltDistn, ModelScorer.PhyloTree.LeafCollection); Score altScore = ModelScorer.MaximizeLikelihood(altMessageInitializer); EvaluationResults evalResults = EvaluationResultsGaussian.GetInstance(this, nullScore, altScore, predCount, targCount, globalNonMissingCount, ChiSquareDegreesOfFreedom); return(evalResults); }
public override EvaluationResults EvaluateModelOnData(Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap) { int[] realFisherCounts = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap); int tt = realFisherCounts[(int)TwoByTwo.ParameterIndex.TT]; int tf = realFisherCounts[(int)TwoByTwo.ParameterIndex.TF]; int ft = realFisherCounts[(int)TwoByTwo.ParameterIndex.FT]; int ff = realFisherCounts[(int)TwoByTwo.ParameterIndex.FF]; int[] fisherCountsPred = new int[] { tt, ft, tf, ff }; //ModelScorer.PhyloTree.FisherCounts(targetMap, predictorMap); int[] fisherCountsTarg = realFisherCounts; #if NAIVE_EQUILIBRIUM //USE THIS FOR BACKWARDS COMPATABILITY int[] tempCountsPred = ModelScorer.PhyloTree.CountsOfLeaves(predictorMap); int[] tempCountsTarg = ModelScorer.PhyloTree.CountsOfLeaves(targetMap); fisherCountsPred = tempCountsPred; fisherCountsTarg = tempCountsTarg; #endif bool predIsInvariant, targIsInvariant; Score nullScorePred = ComputeSingleVariableScore(targetMap, predictorMap, (DistributionDiscreteSingleVariable)NullDistns[0], fisherCountsPred, out predIsInvariant); Score nullScoreTarg = ComputeSingleVariableScore(predictorMap, targetMap, (DistributionDiscreteSingleVariable)NullDistns[1], fisherCountsTarg, out targIsInvariant); List <Score> nullScores = new List <Score>(new Score[] { nullScorePred, nullScoreTarg }); OptimizationParameterList initParams = ((DistributionDiscreteJoint)AltDistn).GenerateInitialParams(nullScorePred.OptimizationParameters, nullScoreTarg.OptimizationParameters); Score jointScore; if (predIsInvariant || targIsInvariant) // cannot compute parameters in this case. They come directly from the single variable params { double jointLL = nullScorePred.Loglikelihood + nullScoreTarg.Loglikelihood; jointScore = Score.GetInstance(jointLL, initParams, AltDistn); } else { MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(CreateJointMap(predictorMap, targetMap), (DistributionDiscreteJoint)AltDistn, initParams, ModelScorer.PhyloTree.LeafCollection); jointScore = ModelScorer.MaximizeLikelihood(altMessageInitializer); } EvaluationResults evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, jointScore, realFisherCounts, ChiSquareDegreesOfFreedom); return(evalResults); }
private Score ComputeConditionalVariableScore( Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap, Score nullScore, int[] fisherCounts) { int tt = fisherCounts[(int)TwoByTwo.ParameterIndex.TT]; int tf = fisherCounts[(int)TwoByTwo.ParameterIndex.TF]; int ft = fisherCounts[(int)TwoByTwo.ParameterIndex.FT]; int sum = SpecialFunctions.Sum(fisherCounts); Score altScore; if (tt + ft == sum || tt + ft == 0) // target is always true or false { bool isNaN = sum == 0; OptimizationParameterList altParamList = AltDistn.GetParameters(); altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor1].Value = 0; altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor2].Value = 0; altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Lambda].Value = isNaN ? double.NaN : 0; altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Equilibrium].Value = (double)(tt + ft) / sum; altScore = Score.GetInstance(isNaN ? double.NaN : 0, altParamList, AltDistn); } else if (tt + tf == 0 || tt + tf == sum) // predictor is always true or false { OptimizationParameterList nullParamList = nullScore.OptimizationParameters; OptimizationParameterList altParamList = AltDistn.GetParameters(); altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor1].Value = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor1].Value; altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor2].Value = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor2].Value; altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Lambda].Value = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Lambda].Value; altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Equilibrium].Value = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Equilibrium].Value; altScore = Score.GetInstance(nullScore.Loglikelihood, altParamList, AltDistn); } else // compute ML using ModelScorer { MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, nullScore.OptimizationParameters, ModelScorer.PhyloTree.LeafCollection); altScore = ModelScorer.MaximizeLikelihood(altMessageInitializer); } return(altScore); }
public override Dictionary <string, SufficientStatistics> GenerateRandomMapping(Dictionary <string, SufficientStatistics> realCaseIdToNonMissingValue, ref Random random) { //!!!!put check in to make sure ISufficientSTatistics is reall BooleanStatistics Converter <Leaf, SufficientStatistics> leafToDistnClassFunction = PhyloDDriver.CreateSufficientStatisticsMap(realCaseIdToNonMissingValue); PhyloTree tree = _modelScorer.PhyloTree; MessageInitializer messageInitializer = MessageInitializerDiscrete.GetInstance(leafToDistnClassFunction, _discreteDistribution, new int[] { 1, 1 }, tree.LeafCollection); Score score = _modelScorer.MaximizeLikelihood(messageInitializer); double percentNonMissing = (double)tree.CountOfNonMissingLeaves(realCaseIdToNonMissingValue) / (double)SpecialFunctions.Count(tree.LeafCollection); double equilibrium = score.OptimizationParameters[(int)DistributionDiscreteConditional.ParameterIndex.Equilibrium].Value; double lambda = score.OptimizationParameters[(int)DistributionDiscreteConditional.ParameterIndex.Lambda].Value; Dictionary <string, BooleanStatistics> randomCaseIdToNonMissingValue = tree.EvolveBinaryTree(equilibrium, lambda, 1 - percentNonMissing, ref random); Dictionary <string, SufficientStatistics> converted; SpecialFunctions.ConvertDictionaryToBaseClasses(randomCaseIdToNonMissingValue, out converted); return(converted); }
protected Score ComputeSingleVariableScore( Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap, DistributionDiscreteSingleVariable nullDistn, int[] fisherCounts, out bool variableIsInvariant) { MessageInitializerDiscrete nullMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, nullDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection); double p = (double)TwoByTwo.GetRightSum(fisherCounts) / SpecialFunctions.Sum(fisherCounts); Score nullScore; if (TryGetSingleVariableScoreFromCounts(nullMessageInitializer, p, out nullScore)) { variableIsInvariant = true; } else { variableIsInvariant = false; nullScore = ModelScorer.MaximizeLikelihood(nullMessageInitializer); } return(nullScore); }