Beispiel #1
0
        public override EvaluationResults EvaluateModelOnData(Converter <Leaf, SufficientStatistics> predMap, Converter <Leaf, SufficientStatistics> targMap)
        {
            int predCount             = ModelScorer.PhyloTree.CountOfNonMissingLeaves(predMap);
            int targCount             = ModelScorer.PhyloTree.CountOfNonMissingLeaves(targMap);
            int globalNonMissingCount = ModelScorer.PhyloTree.CountOfNonMissingLeaves(predMap, targMap);

            MessageInitializerGaussian nullMessageInitializer = MessageInitializerGaussian.GetInstance(
                predMap, targMap, (DistributionGaussianConditional)NullDistns[0], ModelScorer.PhyloTree.LeafCollection);
            Score nullScore = ModelScorer.MaximizeLikelihood(nullMessageInitializer);

            MessageInitializerGaussian altMessageInitializer = MessageInitializerGaussian.GetInstance(
                predMap, targMap, (DistributionGaussianConditional)AltDistn, ModelScorer.PhyloTree.LeafCollection);
            Score altScore = ModelScorer.MaximizeLikelihood(altMessageInitializer);

            EvaluationResults evalResults = EvaluationResultsGaussian.GetInstance(this, nullScore, altScore, predCount, targCount, globalNonMissingCount, ChiSquareDegreesOfFreedom);

            return(evalResults);
        }
Beispiel #2
0
        public override EvaluationResults EvaluateModelOnData(Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap)
        {
            int[] realFisherCounts = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap);

            int tt = realFisherCounts[(int)TwoByTwo.ParameterIndex.TT];
            int tf = realFisherCounts[(int)TwoByTwo.ParameterIndex.TF];
            int ft = realFisherCounts[(int)TwoByTwo.ParameterIndex.FT];
            int ff = realFisherCounts[(int)TwoByTwo.ParameterIndex.FF];

            int[] fisherCountsPred = new int[] { tt, ft, tf, ff };  //ModelScorer.PhyloTree.FisherCounts(targetMap, predictorMap);
            int[] fisherCountsTarg = realFisherCounts;

#if NAIVE_EQUILIBRIUM
            //USE THIS FOR BACKWARDS COMPATABILITY
            int[] tempCountsPred = ModelScorer.PhyloTree.CountsOfLeaves(predictorMap);
            int[] tempCountsTarg = ModelScorer.PhyloTree.CountsOfLeaves(targetMap);
            fisherCountsPred = tempCountsPred;
            fisherCountsTarg = tempCountsTarg;
#endif
            bool predIsInvariant, targIsInvariant;

            Score nullScorePred = ComputeSingleVariableScore(targetMap, predictorMap, (DistributionDiscreteSingleVariable)NullDistns[0], fisherCountsPred, out predIsInvariant);
            Score nullScoreTarg = ComputeSingleVariableScore(predictorMap, targetMap, (DistributionDiscreteSingleVariable)NullDistns[1], fisherCountsTarg, out targIsInvariant);

            List <Score> nullScores = new List <Score>(new Score[] { nullScorePred, nullScoreTarg });
            OptimizationParameterList initParams = ((DistributionDiscreteJoint)AltDistn).GenerateInitialParams(nullScorePred.OptimizationParameters, nullScoreTarg.OptimizationParameters);
            Score jointScore;

            if (predIsInvariant || targIsInvariant)  // cannot compute parameters in this case. They come directly from the single variable params
            {
                double jointLL = nullScorePred.Loglikelihood + nullScoreTarg.Loglikelihood;
                jointScore = Score.GetInstance(jointLL, initParams, AltDistn);
            }
            else
            {
                MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(CreateJointMap(predictorMap, targetMap), (DistributionDiscreteJoint)AltDistn, initParams, ModelScorer.PhyloTree.LeafCollection);
                jointScore = ModelScorer.MaximizeLikelihood(altMessageInitializer);
            }

            EvaluationResults evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, jointScore, realFisherCounts, ChiSquareDegreesOfFreedom);

            return(evalResults);
        }
Beispiel #3
0
        private Score ComputeConditionalVariableScore(
            Converter <Leaf, SufficientStatistics> predictorMap,
            Converter <Leaf, SufficientStatistics> targetMap,
            Score nullScore,
            int[] fisherCounts)
        {
            int tt  = fisherCounts[(int)TwoByTwo.ParameterIndex.TT];
            int tf  = fisherCounts[(int)TwoByTwo.ParameterIndex.TF];
            int ft  = fisherCounts[(int)TwoByTwo.ParameterIndex.FT];
            int sum = SpecialFunctions.Sum(fisherCounts);

            Score altScore;

            if (tt + ft == sum || tt + ft == 0) // target is always true or false
            {
                bool isNaN = sum == 0;
                OptimizationParameterList altParamList = AltDistn.GetParameters();
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor1].Value  = 0;
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor2].Value  = 0;
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Lambda].Value      = isNaN ? double.NaN : 0;
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Equilibrium].Value = (double)(tt + ft) / sum;
                altScore = Score.GetInstance(isNaN ? double.NaN : 0, altParamList, AltDistn);
            }
            else if (tt + tf == 0 || tt + tf == sum) // predictor is always true or false
            {
                OptimizationParameterList nullParamList = nullScore.OptimizationParameters;
                OptimizationParameterList altParamList  = AltDistn.GetParameters();
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor1].Value  = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor1].Value;
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor2].Value  = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor2].Value;
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Lambda].Value      = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Lambda].Value;
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Equilibrium].Value = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Equilibrium].Value;

                altScore = Score.GetInstance(nullScore.Loglikelihood, altParamList, AltDistn);
            }
            else // compute ML using ModelScorer
            {
                MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, nullScore.OptimizationParameters, ModelScorer.PhyloTree.LeafCollection);
                altScore = ModelScorer.MaximizeLikelihood(altMessageInitializer);
            }
            return(altScore);
        }
        public override Dictionary <string, SufficientStatistics> GenerateRandomMapping(Dictionary <string, SufficientStatistics> realCaseIdToNonMissingValue, ref Random random)
        {
            //!!!!put check in to make sure ISufficientSTatistics is reall BooleanStatistics
            Converter <Leaf, SufficientStatistics> leafToDistnClassFunction = PhyloDDriver.CreateSufficientStatisticsMap(realCaseIdToNonMissingValue);

            PhyloTree tree = _modelScorer.PhyloTree;

            MessageInitializer messageInitializer = MessageInitializerDiscrete.GetInstance(leafToDistnClassFunction, _discreteDistribution, new int[] { 1, 1 }, tree.LeafCollection);

            Score score = _modelScorer.MaximizeLikelihood(messageInitializer);

            double percentNonMissing = (double)tree.CountOfNonMissingLeaves(realCaseIdToNonMissingValue) /
                                       (double)SpecialFunctions.Count(tree.LeafCollection);
            double equilibrium = score.OptimizationParameters[(int)DistributionDiscreteConditional.ParameterIndex.Equilibrium].Value;
            double lambda      = score.OptimizationParameters[(int)DistributionDiscreteConditional.ParameterIndex.Lambda].Value;

            Dictionary <string, BooleanStatistics> randomCaseIdToNonMissingValue = tree.EvolveBinaryTree(equilibrium, lambda, 1 - percentNonMissing, ref random);

            Dictionary <string, SufficientStatistics> converted;

            SpecialFunctions.ConvertDictionaryToBaseClasses(randomCaseIdToNonMissingValue, out converted);

            return(converted);
        }
Beispiel #5
0
        protected Score ComputeSingleVariableScore(
            Converter <Leaf, SufficientStatistics> predictorMap,
            Converter <Leaf, SufficientStatistics> targetMap,
            DistributionDiscreteSingleVariable nullDistn,
            int[] fisherCounts,
            out bool variableIsInvariant)
        {
            MessageInitializerDiscrete nullMessageInitializer =
                MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, nullDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection);

            double p = (double)TwoByTwo.GetRightSum(fisherCounts) / SpecialFunctions.Sum(fisherCounts);
            Score  nullScore;

            if (TryGetSingleVariableScoreFromCounts(nullMessageInitializer, p, out nullScore))
            {
                variableIsInvariant = true;
            }
            else
            {
                variableIsInvariant = false;
                nullScore           = ModelScorer.MaximizeLikelihood(nullMessageInitializer);
            }
            return(nullScore);
        }