public override EvaluationResults EvaluateModelOnData(Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap) { int[] realFisherCounts = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap); int tt = realFisherCounts[(int)TwoByTwo.ParameterIndex.TT]; int tf = realFisherCounts[(int)TwoByTwo.ParameterIndex.TF]; int ft = realFisherCounts[(int)TwoByTwo.ParameterIndex.FT]; int ff = realFisherCounts[(int)TwoByTwo.ParameterIndex.FF]; int[] fisherCountsPred = new int[] { tt, ft, tf, ff }; //ModelScorer.PhyloTree.FisherCounts(targetMap, predictorMap); int[] fisherCountsTarg = realFisherCounts; #if NAIVE_EQUILIBRIUM //USE THIS FOR BACKWARDS COMPATABILITY int[] tempCountsPred = ModelScorer.PhyloTree.CountsOfLeaves(predictorMap); int[] tempCountsTarg = ModelScorer.PhyloTree.CountsOfLeaves(targetMap); fisherCountsPred = tempCountsPred; fisherCountsTarg = tempCountsTarg; #endif bool predIsInvariant, targIsInvariant; Score nullScorePred = ComputeSingleVariableScore(targetMap, predictorMap, (DistributionDiscreteSingleVariable)NullDistns[0], fisherCountsPred, out predIsInvariant); Score nullScoreTarg = ComputeSingleVariableScore(predictorMap, targetMap, (DistributionDiscreteSingleVariable)NullDistns[1], fisherCountsTarg, out targIsInvariant); List <Score> nullScores = new List <Score>(new Score[] { nullScorePred, nullScoreTarg }); OptimizationParameterList initParams = ((DistributionDiscreteJoint)AltDistn).GenerateInitialParams(nullScorePred.OptimizationParameters, nullScoreTarg.OptimizationParameters); Score jointScore; if (predIsInvariant || targIsInvariant) // cannot compute parameters in this case. They come directly from the single variable params { double jointLL = nullScorePred.Loglikelihood + nullScoreTarg.Loglikelihood; jointScore = Score.GetInstance(jointLL, initParams, AltDistn); } else { MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(CreateJointMap(predictorMap, targetMap), (DistributionDiscreteJoint)AltDistn, initParams, ModelScorer.PhyloTree.LeafCollection); jointScore = ModelScorer.MaximizeLikelihood(altMessageInitializer); } EvaluationResults evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, jointScore, realFisherCounts, ChiSquareDegreesOfFreedom); return(evalResults); }
private Score ComputeConditionalVariableScore( Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap, Score nullScore, int[] fisherCounts) { int tt = fisherCounts[(int)TwoByTwo.ParameterIndex.TT]; int tf = fisherCounts[(int)TwoByTwo.ParameterIndex.TF]; int ft = fisherCounts[(int)TwoByTwo.ParameterIndex.FT]; int sum = SpecialFunctions.Sum(fisherCounts); Score altScore; if (tt + ft == sum || tt + ft == 0) // target is always true or false { bool isNaN = sum == 0; OptimizationParameterList altParamList = AltDistn.GetParameters(); altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor1].Value = 0; altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor2].Value = 0; altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Lambda].Value = isNaN ? double.NaN : 0; altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Equilibrium].Value = (double)(tt + ft) / sum; altScore = Score.GetInstance(isNaN ? double.NaN : 0, altParamList, AltDistn); } else if (tt + tf == 0 || tt + tf == sum) // predictor is always true or false { OptimizationParameterList nullParamList = nullScore.OptimizationParameters; OptimizationParameterList altParamList = AltDistn.GetParameters(); altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor1].Value = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor1].Value; altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor2].Value = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor2].Value; altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Lambda].Value = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Lambda].Value; altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Equilibrium].Value = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Equilibrium].Value; altScore = Score.GetInstance(nullScore.Loglikelihood, altParamList, AltDistn); } else // compute ML using ModelScorer { MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, nullScore.OptimizationParameters, ModelScorer.PhyloTree.LeafCollection); altScore = ModelScorer.MaximizeLikelihood(altMessageInitializer); } return(altScore); }
//public static Converter<Leaf, SufficientStatistics> TESTPRED, TESTTARG; //public static EvaluationResults TESTEVALRESULTS; public override EvaluationResults EvaluateModelOnDataGivenParams( Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap, EvaluationResults previousResults) { int[] fisherCounts = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap); int targNullIdx = _includePredictorInScore ? 1 : 0; OptimizationParameterList nullParamsTarg = previousResults.NullScores[targNullIdx].OptimizationParameters; MessageInitializerDiscrete nullMessageInitializerTarg = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, NullDistn, new int[0], ModelScorer.PhyloTree.LeafCollection); double nullLLTarg = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerTarg, nullParamsTarg); Score nullScoreTarg = Score.GetInstance(nullLLTarg, nullParamsTarg, NullDistn); OptimizationParameterList altParams = previousResults.AltScore.OptimizationParameters; MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, new int[0], ModelScorer.PhyloTree.LeafCollection); double condLL = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, altParams); Score altScore = Score.GetInstance(condLL, altParams, AltDistn); List <Score> nullScores = new List <Score>(); if (_includePredictorInScore) { OptimizationParameterList nullParamsPred = previousResults.NullScores[0].OptimizationParameters; MessageInitializerDiscrete nullMessageInitializerPred = MessageInitializerDiscrete.GetInstance(targetMap, predictorMap, NullDistn, new int[0], ModelScorer.PhyloTree.LeafCollection); double nullLLPred = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerPred, nullParamsPred); Score nullScorePred = Score.GetInstance(nullLLPred, nullParamsPred, NullDistn); nullScores.Add(nullScorePred); // conditional model altScore doesn't include predLL. If we're here, we want to add it to make it comparable to joint or reverseConditional altScore = Score.GetInstance(altScore.Loglikelihood + nullScorePred.Loglikelihood, altScore.OptimizationParameters, altScore.Distribution); } nullScores.Add(nullScoreTarg); EvaluationResults evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, altScore, fisherCounts, ChiSquareDegreesOfFreedom); return(evalResults); }
public override EvaluationResults EvaluateModelOnDataGivenParams(Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap, EvaluationResults previousResults) { int[] fisherCounts = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap); OptimizationParameterList nullParamsTarg = previousResults.NullScores[1].OptimizationParameters; MessageInitializerDiscrete nullMessageInitializerTarg = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteSingleVariable)NullDistns[0], fisherCounts, ModelScorer.PhyloTree.LeafCollection); double nullLLTarg = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerTarg, nullParamsTarg); Score nullScoreTarg = Score.GetInstance(nullLLTarg, nullParamsTarg, previousResults.NullScores[1].Distribution); OptimizationParameterList nullParamsPred = previousResults.NullScores[0].OptimizationParameters; MessageInitializerDiscrete nullMessageInitializerPred = MessageInitializerDiscrete.GetInstance(targetMap, predictorMap, (DistributionDiscreteSingleVariable)NullDistns[1], fisherCounts, ModelScorer.PhyloTree.LeafCollection); double nullLLPred = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerPred, nullParamsPred); Score nullScorePred = Score.GetInstance(nullLLPred, nullParamsPred, previousResults.NullScores[0].Distribution); List <Score> nullScores = new List <Score>(new Score[] { nullScorePred, nullScoreTarg }); OptimizationParameterList altParams = previousResults.AltScore.OptimizationParameters; double altLL; if (((DistributionDiscreteJoint)AltDistn).ParametersCannotBeEvaluated(altParams)) { // we'll get here only if one of the variables is always (or never) true. In this case, the variables must be independent. altLL = nullLLTarg + nullLLPred; } else { MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(CreateJointMap(predictorMap, targetMap), (DistributionDiscreteJoint)AltDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection); altLL = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, altParams); } Score altScore = Score.GetInstance(altLL, altParams, previousResults.AltScore.Distribution); EvaluationResults evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, altScore, fisherCounts, ChiSquareDegreesOfFreedom); return(evalResults); }
public override Dictionary <string, SufficientStatistics> GenerateRandomMapping(Dictionary <string, SufficientStatistics> realCaseIdToNonMissingValue, ref Random random) { //!!!!put check in to make sure ISufficientSTatistics is reall BooleanStatistics Converter <Leaf, SufficientStatistics> leafToDistnClassFunction = PhyloDDriver.CreateSufficientStatisticsMap(realCaseIdToNonMissingValue); PhyloTree tree = _modelScorer.PhyloTree; MessageInitializer messageInitializer = MessageInitializerDiscrete.GetInstance(leafToDistnClassFunction, _discreteDistribution, new int[] { 1, 1 }, tree.LeafCollection); Score score = _modelScorer.MaximizeLikelihood(messageInitializer); double percentNonMissing = (double)tree.CountOfNonMissingLeaves(realCaseIdToNonMissingValue) / (double)SpecialFunctions.Count(tree.LeafCollection); double equilibrium = score.OptimizationParameters[(int)DistributionDiscreteConditional.ParameterIndex.Equilibrium].Value; double lambda = score.OptimizationParameters[(int)DistributionDiscreteConditional.ParameterIndex.Lambda].Value; Dictionary <string, BooleanStatistics> randomCaseIdToNonMissingValue = tree.EvolveBinaryTree(equilibrium, lambda, 1 - percentNonMissing, ref random); Dictionary <string, SufficientStatistics> converted; SpecialFunctions.ConvertDictionaryToBaseClasses(randomCaseIdToNonMissingValue, out converted); return(converted); }
protected Score ComputeSingleVariableScore( Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap, DistributionDiscreteSingleVariable nullDistn, int[] fisherCounts, out bool variableIsInvariant) { MessageInitializerDiscrete nullMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, nullDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection); double p = (double)TwoByTwo.GetRightSum(fisherCounts) / SpecialFunctions.Sum(fisherCounts); Score nullScore; if (TryGetSingleVariableScoreFromCounts(nullMessageInitializer, p, out nullScore)) { variableIsInvariant = true; } else { variableIsInvariant = false; nullScore = ModelScorer.MaximizeLikelihood(nullMessageInitializer); } return(nullScore); }
private bool TryGetSingleVariableScoreFromCounts(MessageInitializerDiscrete singleVariableMessageInitializer, double pVar, out Score score) { if (pVar == 1 || pVar == 0) { OptimizationParameterList nullParamList = singleVariableMessageInitializer.DiscreteDistribution.GetParameters(); nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Lambda].Value = 0; nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Equilibrium].Value = pVar; score = Score.GetInstance(0, nullParamList, singleVariableMessageInitializer.DiscreteDistribution); return(true); } else if (double.IsNaN(pVar)) { OptimizationParameterList nullParamList = singleVariableMessageInitializer.DiscreteDistribution.GetParameters(); nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Lambda].Value = double.NaN; nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Equilibrium].Value = double.NaN; score = Score.GetInstance(double.NaN, nullParamList, singleVariableMessageInitializer.DiscreteDistribution); return(true); } else { score = null; return(false); } }
public override EvaluationResults EvaluateModelOnData(Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap) { EvaluationResults evalResults; int[] fisherCounts = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap); int[] realFisherCounts = fisherCounts; // for compatability when NAIVE_EQUILIBRIUM is set #if NAIVE_EQUILIBRIUM //USE THIS FOR BACKWARDS COMPATABILITY int[] tempCounts = ModelScorer.PhyloTree.CountsOfLeaves(targetMap); fisherCounts = tempCounts; #endif //MessageInitializerDiscrete nullMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, NullDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection); //if (TryShortCutFromCounts(realFisherCounts, nullMessageInitializer, out evalResults)) //{ // return evalResults; //} //Score nullScore = ModelScorer.MaximizeLikelihood(nullMessageInitializer); bool isInvariant; Score nullScoreTarg = ComputeSingleVariableScore(predictorMap, targetMap, NullDistn, fisherCounts, out isInvariant); Score altScore = ComputeConditionalVariableScore(predictorMap, targetMap, nullScoreTarg, fisherCounts); //(realFisherCounts, nullScoreTarg, out evalResults)) //{ // return evalResults; //} //MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, nullScore.OptimizationParameters, ModelScorer.PhyloTree.LeafCollection); //Score condScore = ModelScorer.MaximizeLikelihood(altMessageInitializer); List <Score> nullScores = new List <Score>(); if (_includePredictorInScore) { int[] predFisherCounts = new int[] { realFisherCounts[0], realFisherCounts[2], realFisherCounts[1], realFisherCounts[3] }; Score predNullScore = ComputeSingleVariableScore(targetMap, predictorMap, NullDistn, predFisherCounts, out isInvariant); nullScores.Add(predNullScore); // conditional model altScore doesn't include predLL. If we're here, we want to add it to make it comparable to joint or reverseConditional altScore = Score.GetInstance(altScore.Loglikelihood + predNullScore.Loglikelihood, altScore.OptimizationParameters, altScore.Distribution); } nullScores.Add(nullScoreTarg); evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, altScore, realFisherCounts, ChiSquareDegreesOfFreedom); #if DEBUG MessageInitializerDiscrete nullMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, NullDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection); MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, nullScoreTarg.OptimizationParameters, ModelScorer.PhyloTree.LeafCollection); double nullLL = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializer, nullScoreTarg.OptimizationParameters); double altLL = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, altScore.OptimizationParameters); if (_includePredictorInScore) { int[] predFisherCounts = new int[] { realFisherCounts[0], realFisherCounts[2], realFisherCounts[1], realFisherCounts[3] }; MessageInitializerDiscrete nullMessageInitializerPred = MessageInitializerDiscrete.GetInstance(targetMap, predictorMap, NullDistn, predFisherCounts, ModelScorer.PhyloTree.LeafCollection); double nullLLPred = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerPred, nullScores[0].OptimizationParameters); altLL += nullLLPred; } EvaluationResults evalResults2 = EvaluateModelOnDataGivenParams(predictorMap, targetMap, evalResults); double eps = 1E-10; Debug.Assert(ComplexNumber.ApproxEqual(nullLL, nullScoreTarg.Loglikelihood, eps)); Debug.Assert(ComplexNumber.ApproxEqual(altLL, altScore.Loglikelihood, eps)); Debug.Assert(ComplexNumber.ApproxEqual(evalResults.NullLL, evalResults2.NullLL, eps) && ComplexNumber.ApproxEqual(evalResults.AltLL, evalResults2.AltLL, eps), "In ModelEvaluatorCond, results of maximizing LL and computing LL from same params are not the same."); #endif return(evalResults); }