예제 #1
0
        public override EvaluationResults EvaluateModelOnData(Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap)
        {
            int[] realFisherCounts = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap);

            int tt = realFisherCounts[(int)TwoByTwo.ParameterIndex.TT];
            int tf = realFisherCounts[(int)TwoByTwo.ParameterIndex.TF];
            int ft = realFisherCounts[(int)TwoByTwo.ParameterIndex.FT];
            int ff = realFisherCounts[(int)TwoByTwo.ParameterIndex.FF];

            int[] fisherCountsPred = new int[] { tt, ft, tf, ff };  //ModelScorer.PhyloTree.FisherCounts(targetMap, predictorMap);
            int[] fisherCountsTarg = realFisherCounts;

#if NAIVE_EQUILIBRIUM
            //USE THIS FOR BACKWARDS COMPATABILITY
            int[] tempCountsPred = ModelScorer.PhyloTree.CountsOfLeaves(predictorMap);
            int[] tempCountsTarg = ModelScorer.PhyloTree.CountsOfLeaves(targetMap);
            fisherCountsPred = tempCountsPred;
            fisherCountsTarg = tempCountsTarg;
#endif
            bool predIsInvariant, targIsInvariant;

            Score nullScorePred = ComputeSingleVariableScore(targetMap, predictorMap, (DistributionDiscreteSingleVariable)NullDistns[0], fisherCountsPred, out predIsInvariant);
            Score nullScoreTarg = ComputeSingleVariableScore(predictorMap, targetMap, (DistributionDiscreteSingleVariable)NullDistns[1], fisherCountsTarg, out targIsInvariant);

            List <Score> nullScores = new List <Score>(new Score[] { nullScorePred, nullScoreTarg });
            OptimizationParameterList initParams = ((DistributionDiscreteJoint)AltDistn).GenerateInitialParams(nullScorePred.OptimizationParameters, nullScoreTarg.OptimizationParameters);
            Score jointScore;

            if (predIsInvariant || targIsInvariant)  // cannot compute parameters in this case. They come directly from the single variable params
            {
                double jointLL = nullScorePred.Loglikelihood + nullScoreTarg.Loglikelihood;
                jointScore = Score.GetInstance(jointLL, initParams, AltDistn);
            }
            else
            {
                MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(CreateJointMap(predictorMap, targetMap), (DistributionDiscreteJoint)AltDistn, initParams, ModelScorer.PhyloTree.LeafCollection);
                jointScore = ModelScorer.MaximizeLikelihood(altMessageInitializer);
            }

            EvaluationResults evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, jointScore, realFisherCounts, ChiSquareDegreesOfFreedom);

            return(evalResults);
        }
예제 #2
0
        private Score ComputeConditionalVariableScore(
            Converter <Leaf, SufficientStatistics> predictorMap,
            Converter <Leaf, SufficientStatistics> targetMap,
            Score nullScore,
            int[] fisherCounts)
        {
            int tt  = fisherCounts[(int)TwoByTwo.ParameterIndex.TT];
            int tf  = fisherCounts[(int)TwoByTwo.ParameterIndex.TF];
            int ft  = fisherCounts[(int)TwoByTwo.ParameterIndex.FT];
            int sum = SpecialFunctions.Sum(fisherCounts);

            Score altScore;

            if (tt + ft == sum || tt + ft == 0) // target is always true or false
            {
                bool isNaN = sum == 0;
                OptimizationParameterList altParamList = AltDistn.GetParameters();
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor1].Value  = 0;
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor2].Value  = 0;
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Lambda].Value      = isNaN ? double.NaN : 0;
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Equilibrium].Value = (double)(tt + ft) / sum;
                altScore = Score.GetInstance(isNaN ? double.NaN : 0, altParamList, AltDistn);
            }
            else if (tt + tf == 0 || tt + tf == sum) // predictor is always true or false
            {
                OptimizationParameterList nullParamList = nullScore.OptimizationParameters;
                OptimizationParameterList altParamList  = AltDistn.GetParameters();
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor1].Value  = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor1].Value;
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor2].Value  = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor2].Value;
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Lambda].Value      = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Lambda].Value;
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Equilibrium].Value = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Equilibrium].Value;

                altScore = Score.GetInstance(nullScore.Loglikelihood, altParamList, AltDistn);
            }
            else // compute ML using ModelScorer
            {
                MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, nullScore.OptimizationParameters, ModelScorer.PhyloTree.LeafCollection);
                altScore = ModelScorer.MaximizeLikelihood(altMessageInitializer);
            }
            return(altScore);
        }
예제 #3
0
        //public static Converter<Leaf, SufficientStatistics> TESTPRED, TESTTARG;
        //public static EvaluationResults TESTEVALRESULTS;

        public override EvaluationResults EvaluateModelOnDataGivenParams(
            Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap, EvaluationResults previousResults)
        {
            int[] fisherCounts = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap);

            int targNullIdx = _includePredictorInScore ? 1 : 0;

            OptimizationParameterList  nullParamsTarg             = previousResults.NullScores[targNullIdx].OptimizationParameters;
            MessageInitializerDiscrete nullMessageInitializerTarg = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap,
                                                                                                           NullDistn, new int[0], ModelScorer.PhyloTree.LeafCollection);
            double nullLLTarg    = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerTarg, nullParamsTarg);
            Score  nullScoreTarg = Score.GetInstance(nullLLTarg, nullParamsTarg, NullDistn);


            OptimizationParameterList  altParams             = previousResults.AltScore.OptimizationParameters;
            MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap,
                                                                                                      (DistributionDiscreteConditional)AltDistn, new int[0], ModelScorer.PhyloTree.LeafCollection);
            double condLL   = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, altParams);
            Score  altScore = Score.GetInstance(condLL, altParams, AltDistn);

            List <Score> nullScores = new List <Score>();

            if (_includePredictorInScore)
            {
                OptimizationParameterList  nullParamsPred             = previousResults.NullScores[0].OptimizationParameters;
                MessageInitializerDiscrete nullMessageInitializerPred = MessageInitializerDiscrete.GetInstance(targetMap, predictorMap,
                                                                                                               NullDistn, new int[0], ModelScorer.PhyloTree.LeafCollection);
                double nullLLPred    = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerPred, nullParamsPred);
                Score  nullScorePred = Score.GetInstance(nullLLPred, nullParamsPred, NullDistn);
                nullScores.Add(nullScorePred);
                // conditional model altScore doesn't include predLL. If we're here, we want to add it to make it comparable to joint or reverseConditional
                altScore = Score.GetInstance(altScore.Loglikelihood + nullScorePred.Loglikelihood, altScore.OptimizationParameters, altScore.Distribution);
            }
            nullScores.Add(nullScoreTarg);


            EvaluationResults evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, altScore, fisherCounts, ChiSquareDegreesOfFreedom);

            return(evalResults);
        }
예제 #4
0
        public override EvaluationResults EvaluateModelOnDataGivenParams(Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap, EvaluationResults previousResults)
        {
            int[] fisherCounts = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap);

            OptimizationParameterList  nullParamsTarg             = previousResults.NullScores[1].OptimizationParameters;
            MessageInitializerDiscrete nullMessageInitializerTarg = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteSingleVariable)NullDistns[0], fisherCounts, ModelScorer.PhyloTree.LeafCollection);
            double nullLLTarg    = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerTarg, nullParamsTarg);
            Score  nullScoreTarg = Score.GetInstance(nullLLTarg, nullParamsTarg, previousResults.NullScores[1].Distribution);

            OptimizationParameterList  nullParamsPred             = previousResults.NullScores[0].OptimizationParameters;
            MessageInitializerDiscrete nullMessageInitializerPred = MessageInitializerDiscrete.GetInstance(targetMap, predictorMap, (DistributionDiscreteSingleVariable)NullDistns[1], fisherCounts, ModelScorer.PhyloTree.LeafCollection);
            double nullLLPred    = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerPred, nullParamsPred);
            Score  nullScorePred = Score.GetInstance(nullLLPred, nullParamsPred, previousResults.NullScores[0].Distribution);

            List <Score> nullScores = new List <Score>(new Score[] { nullScorePred, nullScoreTarg });

            OptimizationParameterList altParams = previousResults.AltScore.OptimizationParameters;

            double altLL;

            if (((DistributionDiscreteJoint)AltDistn).ParametersCannotBeEvaluated(altParams))
            {
                // we'll get here only if one of the variables is always (or never) true. In this case, the variables must be independent.
                altLL = nullLLTarg + nullLLPred;
            }
            else
            {
                MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(CreateJointMap(predictorMap, targetMap), (DistributionDiscreteJoint)AltDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection);
                altLL = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, altParams);
            }

            Score altScore = Score.GetInstance(altLL, altParams, previousResults.AltScore.Distribution);

            EvaluationResults evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, altScore, fisherCounts, ChiSquareDegreesOfFreedom);

            return(evalResults);
        }
예제 #5
0
        public override Dictionary <string, SufficientStatistics> GenerateRandomMapping(Dictionary <string, SufficientStatistics> realCaseIdToNonMissingValue, ref Random random)
        {
            //!!!!put check in to make sure ISufficientSTatistics is reall BooleanStatistics
            Converter <Leaf, SufficientStatistics> leafToDistnClassFunction = PhyloDDriver.CreateSufficientStatisticsMap(realCaseIdToNonMissingValue);

            PhyloTree tree = _modelScorer.PhyloTree;

            MessageInitializer messageInitializer = MessageInitializerDiscrete.GetInstance(leafToDistnClassFunction, _discreteDistribution, new int[] { 1, 1 }, tree.LeafCollection);

            Score score = _modelScorer.MaximizeLikelihood(messageInitializer);

            double percentNonMissing = (double)tree.CountOfNonMissingLeaves(realCaseIdToNonMissingValue) /
                                       (double)SpecialFunctions.Count(tree.LeafCollection);
            double equilibrium = score.OptimizationParameters[(int)DistributionDiscreteConditional.ParameterIndex.Equilibrium].Value;
            double lambda      = score.OptimizationParameters[(int)DistributionDiscreteConditional.ParameterIndex.Lambda].Value;

            Dictionary <string, BooleanStatistics> randomCaseIdToNonMissingValue = tree.EvolveBinaryTree(equilibrium, lambda, 1 - percentNonMissing, ref random);

            Dictionary <string, SufficientStatistics> converted;

            SpecialFunctions.ConvertDictionaryToBaseClasses(randomCaseIdToNonMissingValue, out converted);

            return(converted);
        }
예제 #6
0
        protected Score ComputeSingleVariableScore(
            Converter <Leaf, SufficientStatistics> predictorMap,
            Converter <Leaf, SufficientStatistics> targetMap,
            DistributionDiscreteSingleVariable nullDistn,
            int[] fisherCounts,
            out bool variableIsInvariant)
        {
            MessageInitializerDiscrete nullMessageInitializer =
                MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, nullDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection);

            double p = (double)TwoByTwo.GetRightSum(fisherCounts) / SpecialFunctions.Sum(fisherCounts);
            Score  nullScore;

            if (TryGetSingleVariableScoreFromCounts(nullMessageInitializer, p, out nullScore))
            {
                variableIsInvariant = true;
            }
            else
            {
                variableIsInvariant = false;
                nullScore           = ModelScorer.MaximizeLikelihood(nullMessageInitializer);
            }
            return(nullScore);
        }
예제 #7
0
 private bool TryGetSingleVariableScoreFromCounts(MessageInitializerDiscrete singleVariableMessageInitializer, double pVar, out Score score)
 {
     if (pVar == 1 || pVar == 0)
     {
         OptimizationParameterList nullParamList = singleVariableMessageInitializer.DiscreteDistribution.GetParameters();
         nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Lambda].Value      = 0;
         nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Equilibrium].Value = pVar;
         score = Score.GetInstance(0, nullParamList, singleVariableMessageInitializer.DiscreteDistribution);
         return(true);
     }
     else if (double.IsNaN(pVar))
     {
         OptimizationParameterList nullParamList = singleVariableMessageInitializer.DiscreteDistribution.GetParameters();
         nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Lambda].Value      = double.NaN;
         nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Equilibrium].Value = double.NaN;
         score = Score.GetInstance(double.NaN, nullParamList, singleVariableMessageInitializer.DiscreteDistribution);
         return(true);
     }
     else
     {
         score = null;
         return(false);
     }
 }
예제 #8
0
        public override EvaluationResults EvaluateModelOnData(Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap)
        {
            EvaluationResults evalResults;

            int[] fisherCounts     = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap);
            int[] realFisherCounts = fisherCounts;  // for compatability when NAIVE_EQUILIBRIUM is set

#if NAIVE_EQUILIBRIUM
            //USE THIS FOR BACKWARDS COMPATABILITY
            int[] tempCounts = ModelScorer.PhyloTree.CountsOfLeaves(targetMap);
            fisherCounts = tempCounts;
#endif

            //MessageInitializerDiscrete nullMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, NullDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection);
            //if (TryShortCutFromCounts(realFisherCounts, nullMessageInitializer, out evalResults))
            //{
            //    return evalResults;
            //}

            //Score nullScore = ModelScorer.MaximizeLikelihood(nullMessageInitializer);
            bool isInvariant;

            Score nullScoreTarg = ComputeSingleVariableScore(predictorMap, targetMap, NullDistn, fisherCounts, out isInvariant);
            Score altScore      = ComputeConditionalVariableScore(predictorMap, targetMap, nullScoreTarg, fisherCounts);

            //(realFisherCounts, nullScoreTarg, out evalResults))
            //{
            //    return evalResults;
            //}

            //MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, nullScore.OptimizationParameters, ModelScorer.PhyloTree.LeafCollection);
            //Score condScore = ModelScorer.MaximizeLikelihood(altMessageInitializer);

            List <Score> nullScores = new List <Score>();
            if (_includePredictorInScore)
            {
                int[] predFisherCounts = new int[] { realFisherCounts[0], realFisherCounts[2], realFisherCounts[1], realFisherCounts[3] };
                Score predNullScore    = ComputeSingleVariableScore(targetMap, predictorMap, NullDistn, predFisherCounts, out isInvariant);
                nullScores.Add(predNullScore);
                // conditional model altScore doesn't include predLL. If we're here, we want to add it to make it comparable to joint or reverseConditional
                altScore = Score.GetInstance(altScore.Loglikelihood + predNullScore.Loglikelihood, altScore.OptimizationParameters, altScore.Distribution);
            }
            nullScores.Add(nullScoreTarg);

            evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, altScore, realFisherCounts, ChiSquareDegreesOfFreedom);



#if DEBUG
            MessageInitializerDiscrete nullMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, NullDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection);
            MessageInitializerDiscrete altMessageInitializer  = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, nullScoreTarg.OptimizationParameters, ModelScorer.PhyloTree.LeafCollection);
            double nullLL = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializer, nullScoreTarg.OptimizationParameters);
            double altLL  = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, altScore.OptimizationParameters);

            if (_includePredictorInScore)
            {
                int[] predFisherCounts = new int[] { realFisherCounts[0], realFisherCounts[2], realFisherCounts[1], realFisherCounts[3] };
                MessageInitializerDiscrete nullMessageInitializerPred = MessageInitializerDiscrete.GetInstance(targetMap, predictorMap, NullDistn, predFisherCounts, ModelScorer.PhyloTree.LeafCollection);
                double nullLLPred = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerPred, nullScores[0].OptimizationParameters);
                altLL += nullLLPred;
            }

            EvaluationResults evalResults2 = EvaluateModelOnDataGivenParams(predictorMap, targetMap, evalResults);

            double eps = 1E-10;
            Debug.Assert(ComplexNumber.ApproxEqual(nullLL, nullScoreTarg.Loglikelihood, eps));
            Debug.Assert(ComplexNumber.ApproxEqual(altLL, altScore.Loglikelihood, eps));
            Debug.Assert(ComplexNumber.ApproxEqual(evalResults.NullLL, evalResults2.NullLL, eps) && ComplexNumber.ApproxEqual(evalResults.AltLL, evalResults2.AltLL, eps), "In ModelEvaluatorCond, results of maximizing LL and computing LL from same params are not the same.");
#endif

            return(evalResults);
        }