Esempio n. 1
0
        public override EvaluationResults EvaluateModelOnDataGivenParams(Converter <Leaf, SufficientStatistics> predMap, Converter <Leaf, SufficientStatistics> targMap, EvaluationResults previousResults)
        {
            int predCount             = ModelScorer.PhyloTree.CountOfNonMissingLeaves(predMap);
            int targCount             = ModelScorer.PhyloTree.CountOfNonMissingLeaves(targMap);
            int globalNonMissingCount = ModelScorer.PhyloTree.CountOfNonMissingLeaves(predMap, targMap);

            MessageInitializerGaussian nullMessageInitializer = MessageInitializerGaussian.GetInstance(
                predMap, targMap, (DistributionGaussianConditional)NullDistns[0], ModelScorer.PhyloTree.LeafCollection);
            double nullLL    = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializer, previousResults.NullScores[0].OptimizationParameters);
            Score  nullScore = Score.GetInstance(nullLL, previousResults.NullScores[0].OptimizationParameters, previousResults.NullScores[0].Distribution);

            MessageInitializerGaussian altMessageInitializer = MessageInitializerGaussian.GetInstance(
                predMap, targMap, (DistributionGaussianConditional)AltDistn, ModelScorer.PhyloTree.LeafCollection);
            double altLL    = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, previousResults.AltScore.OptimizationParameters);
            Score  altScore = Score.GetInstance(altLL, previousResults.AltScore.OptimizationParameters, previousResults.AltScore.Distribution);

            EvaluationResults evalResults = EvaluationResultsGaussian.GetInstance(this, nullScore, altScore, predCount, targCount, globalNonMissingCount, ChiSquareDegreesOfFreedom);

            return(evalResults);
        }
Esempio n. 2
0
        //public static Converter<Leaf, SufficientStatistics> TESTPRED, TESTTARG;
        //public static EvaluationResults TESTEVALRESULTS;

        public override EvaluationResults EvaluateModelOnDataGivenParams(
            Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap, EvaluationResults previousResults)
        {
            int[] fisherCounts = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap);

            int targNullIdx = _includePredictorInScore ? 1 : 0;

            OptimizationParameterList  nullParamsTarg             = previousResults.NullScores[targNullIdx].OptimizationParameters;
            MessageInitializerDiscrete nullMessageInitializerTarg = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap,
                                                                                                           NullDistn, new int[0], ModelScorer.PhyloTree.LeafCollection);
            double nullLLTarg    = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerTarg, nullParamsTarg);
            Score  nullScoreTarg = Score.GetInstance(nullLLTarg, nullParamsTarg, NullDistn);


            OptimizationParameterList  altParams             = previousResults.AltScore.OptimizationParameters;
            MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap,
                                                                                                      (DistributionDiscreteConditional)AltDistn, new int[0], ModelScorer.PhyloTree.LeafCollection);
            double condLL   = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, altParams);
            Score  altScore = Score.GetInstance(condLL, altParams, AltDistn);

            List <Score> nullScores = new List <Score>();

            if (_includePredictorInScore)
            {
                OptimizationParameterList  nullParamsPred             = previousResults.NullScores[0].OptimizationParameters;
                MessageInitializerDiscrete nullMessageInitializerPred = MessageInitializerDiscrete.GetInstance(targetMap, predictorMap,
                                                                                                               NullDistn, new int[0], ModelScorer.PhyloTree.LeafCollection);
                double nullLLPred    = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerPred, nullParamsPred);
                Score  nullScorePred = Score.GetInstance(nullLLPred, nullParamsPred, NullDistn);
                nullScores.Add(nullScorePred);
                // conditional model altScore doesn't include predLL. If we're here, we want to add it to make it comparable to joint or reverseConditional
                altScore = Score.GetInstance(altScore.Loglikelihood + nullScorePred.Loglikelihood, altScore.OptimizationParameters, altScore.Distribution);
            }
            nullScores.Add(nullScoreTarg);


            EvaluationResults evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, altScore, fisherCounts, ChiSquareDegreesOfFreedom);

            return(evalResults);
        }
Esempio n. 3
0
        public override EvaluationResults EvaluateModelOnDataGivenParams(Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap, EvaluationResults previousResults)
        {
            int[] fisherCounts = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap);

            OptimizationParameterList  nullParamsTarg             = previousResults.NullScores[1].OptimizationParameters;
            MessageInitializerDiscrete nullMessageInitializerTarg = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteSingleVariable)NullDistns[0], fisherCounts, ModelScorer.PhyloTree.LeafCollection);
            double nullLLTarg    = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerTarg, nullParamsTarg);
            Score  nullScoreTarg = Score.GetInstance(nullLLTarg, nullParamsTarg, previousResults.NullScores[1].Distribution);

            OptimizationParameterList  nullParamsPred             = previousResults.NullScores[0].OptimizationParameters;
            MessageInitializerDiscrete nullMessageInitializerPred = MessageInitializerDiscrete.GetInstance(targetMap, predictorMap, (DistributionDiscreteSingleVariable)NullDistns[1], fisherCounts, ModelScorer.PhyloTree.LeafCollection);
            double nullLLPred    = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerPred, nullParamsPred);
            Score  nullScorePred = Score.GetInstance(nullLLPred, nullParamsPred, previousResults.NullScores[0].Distribution);

            List <Score> nullScores = new List <Score>(new Score[] { nullScorePred, nullScoreTarg });

            OptimizationParameterList altParams = previousResults.AltScore.OptimizationParameters;

            double altLL;

            if (((DistributionDiscreteJoint)AltDistn).ParametersCannotBeEvaluated(altParams))
            {
                // we'll get here only if one of the variables is always (or never) true. In this case, the variables must be independent.
                altLL = nullLLTarg + nullLLPred;
            }
            else
            {
                MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(CreateJointMap(predictorMap, targetMap), (DistributionDiscreteJoint)AltDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection);
                altLL = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, altParams);
            }

            Score altScore = Score.GetInstance(altLL, altParams, previousResults.AltScore.Distribution);

            EvaluationResults evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, altScore, fisherCounts, ChiSquareDegreesOfFreedom);

            return(evalResults);
        }
Esempio n. 4
0
        public override EvaluationResults EvaluateModelOnData(Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap)
        {
            EvaluationResults evalResults;

            int[] fisherCounts     = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap);
            int[] realFisherCounts = fisherCounts;  // for compatability when NAIVE_EQUILIBRIUM is set

#if NAIVE_EQUILIBRIUM
            //USE THIS FOR BACKWARDS COMPATABILITY
            int[] tempCounts = ModelScorer.PhyloTree.CountsOfLeaves(targetMap);
            fisherCounts = tempCounts;
#endif

            //MessageInitializerDiscrete nullMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, NullDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection);
            //if (TryShortCutFromCounts(realFisherCounts, nullMessageInitializer, out evalResults))
            //{
            //    return evalResults;
            //}

            //Score nullScore = ModelScorer.MaximizeLikelihood(nullMessageInitializer);
            bool isInvariant;

            Score nullScoreTarg = ComputeSingleVariableScore(predictorMap, targetMap, NullDistn, fisherCounts, out isInvariant);
            Score altScore      = ComputeConditionalVariableScore(predictorMap, targetMap, nullScoreTarg, fisherCounts);

            //(realFisherCounts, nullScoreTarg, out evalResults))
            //{
            //    return evalResults;
            //}

            //MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, nullScore.OptimizationParameters, ModelScorer.PhyloTree.LeafCollection);
            //Score condScore = ModelScorer.MaximizeLikelihood(altMessageInitializer);

            List <Score> nullScores = new List <Score>();
            if (_includePredictorInScore)
            {
                int[] predFisherCounts = new int[] { realFisherCounts[0], realFisherCounts[2], realFisherCounts[1], realFisherCounts[3] };
                Score predNullScore    = ComputeSingleVariableScore(targetMap, predictorMap, NullDistn, predFisherCounts, out isInvariant);
                nullScores.Add(predNullScore);
                // conditional model altScore doesn't include predLL. If we're here, we want to add it to make it comparable to joint or reverseConditional
                altScore = Score.GetInstance(altScore.Loglikelihood + predNullScore.Loglikelihood, altScore.OptimizationParameters, altScore.Distribution);
            }
            nullScores.Add(nullScoreTarg);

            evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, altScore, realFisherCounts, ChiSquareDegreesOfFreedom);



#if DEBUG
            MessageInitializerDiscrete nullMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, NullDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection);
            MessageInitializerDiscrete altMessageInitializer  = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, nullScoreTarg.OptimizationParameters, ModelScorer.PhyloTree.LeafCollection);
            double nullLL = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializer, nullScoreTarg.OptimizationParameters);
            double altLL  = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, altScore.OptimizationParameters);

            if (_includePredictorInScore)
            {
                int[] predFisherCounts = new int[] { realFisherCounts[0], realFisherCounts[2], realFisherCounts[1], realFisherCounts[3] };
                MessageInitializerDiscrete nullMessageInitializerPred = MessageInitializerDiscrete.GetInstance(targetMap, predictorMap, NullDistn, predFisherCounts, ModelScorer.PhyloTree.LeafCollection);
                double nullLLPred = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerPred, nullScores[0].OptimizationParameters);
                altLL += nullLLPred;
            }

            EvaluationResults evalResults2 = EvaluateModelOnDataGivenParams(predictorMap, targetMap, evalResults);

            double eps = 1E-10;
            Debug.Assert(ComplexNumber.ApproxEqual(nullLL, nullScoreTarg.Loglikelihood, eps));
            Debug.Assert(ComplexNumber.ApproxEqual(altLL, altScore.Loglikelihood, eps));
            Debug.Assert(ComplexNumber.ApproxEqual(evalResults.NullLL, evalResults2.NullLL, eps) && ComplexNumber.ApproxEqual(evalResults.AltLL, evalResults2.AltLL, eps), "In ModelEvaluatorCond, results of maximizing LL and computing LL from same params are not the same.");
#endif

            return(evalResults);
        }
        public void ScoreTree(
            ModelScorer modelScorer,
            PhyloTree phyloTree,
            string predictorSparseFileName,
            string targetSparseFileName,
            string predictorVariableName,
            string targetVariableName,
            double[] nullModelArgs,
            double[] altModelArgs)
        {
            //Dictionary<string, Dictionary<string, SufficientStatistics>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory(predictorSparseFileName);
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration(predictorSparseFileName);
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration    = LoadSparseFileEnumeration(targetSparseFileName);

            RangeCollection    nullIndexRangeCollection = RangeCollection.GetInstance(-1, -1);
            NullDataCollection nullDataGenerator        =
                CreateNullDataGenerator("PredictorPermutation", modelScorer, phyloTree, nullIndexRangeCollection,
                                        predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration);

            UniversalWorkList workList = UniversalWorkList.GetInstance(
                predictorNameAndCaseIdToNonMissingValueEnumeration,
                targetNameAndCaseIdToNonMissingValueEnumeration,
                //targetNameAndCaseIdToNonMissingValueEnumeration,
                nullDataGenerator, nullIndexRangeCollection, AlwaysKeep <Dictionary <string, string> > .GetInstance());


            foreach (RowData rowAndTargetData in workList.List())
            {
                if (rowAndTargetData.Row[PhyloTree.PredictorVariableColumnName] == predictorVariableName &&
                    rowAndTargetData.Row[PhyloTree.TargetVariableColumnName] == targetVariableName)
                {
                    Dictionary <string, SufficientStatistics> caseIdToNonNullPredictorValue = rowAndTargetData.PredictorData;//workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(-1, predictorVariableName);
                    Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData;

                    Converter <Leaf, SufficientStatistics> targetDistributionMap = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue);
                    Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonNullPredictorValue);
                    Converter <Leaf, SufficientStatistics> altDistributionMap = CreateAlternativeSufficientStatisticsMap(predictorDistributionClassFunction, targetDistributionMap);
                    double                    logLikelihood;
                    Score                     scoreIndTarget, scoreIndPredictor, scoreAlt;
                    MessageInitializer        messageInitializer;
                    OptimizationParameterList nullParams = NullModelDistribution.GetParameters(nullModelArgs);
                    OptimizationParameterList altParams  = AltModelDistribution.GetParameters(altModelArgs);

                    Console.WriteLine(SpecialFunctions.CreateTabString("Variable", nullParams.ToStringHeader(), "LogL"));
                    messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionMap, NullModelDistribution);
                    logLikelihood      = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams);
                    scoreIndTarget     = Score.GetInstance(logLikelihood, nullParams);
                    Console.WriteLine("Target\t" + scoreIndTarget);

                    messageInitializer = modelScorer.CreateMessageInitializer(targetDistributionMap, predictorDistributionClassFunction, NullModelDistribution);
                    logLikelihood      = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams);
                    modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams);
                    scoreIndPredictor = Score.GetInstance(logLikelihood, nullParams);
                    Console.WriteLine("Predictor\t" + scoreIndPredictor);

                    Console.WriteLine("\n" + SpecialFunctions.CreateTabString("Variable", altParams.ToStringHeader(), "LogL"));
                    messageInitializer = modelScorer.CreateMessageInitializer(null, altDistributionMap, AltModelDistribution);
                    logLikelihood      = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, altParams);
                    scoreAlt           = Score.GetInstance(logLikelihood, altParams);
                    Console.WriteLine(SpecialFunctions.CreateTabString(AltModelDistribution, scoreAlt));
                }
            }
        }