protected override double ComputeLLR(ModelScorer modelScorer, PhyloTree phyloTree, StringBuilder stringBuilder, double targetMarginal, double predictorMarginal,
                                             Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction, Converter <Leaf, SufficientStatistics> targetDistributionClassFunction)
        {
            Converter <Leaf, SufficientStatistics> LeafToJointDistributionClass =
                CreateAlternativeSufficientStatisticsMap(predictorDistributionClassFunction, targetDistributionClassFunction);

            double             logLikelihoodIndependentModel, logLikelihoodJointModel;
            Score              scoreIndTarget, scoreIndPredictor, scoreJoint;
            MessageInitializer messageInitializer;

            // first score the target.
            NullModelDistribution.EmpiricalEquilibrium = targetMarginal;
            messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionClassFunction, NullModelDistribution);
            scoreIndTarget     = modelScorer.ScoreModel(messageInitializer, false);

            NullModelDistribution.EmpiricalEquilibrium = predictorMarginal;
            messageInitializer = modelScorer.CreateMessageInitializer(targetDistributionClassFunction, predictorDistributionClassFunction, NullModelDistribution);
            scoreIndPredictor  = modelScorer.ScoreModel(messageInitializer, false);

            DistributionDiscreteJointBinary jointDistn = (DistributionDiscreteJointBinary)AlternativeModelDistribution;

            jointDistn.SetInitialParams(scoreIndPredictor.OptimizationParameters, scoreIndTarget.OptimizationParameters);
            messageInitializer = modelScorer.CreateMessageInitializer(null, LeafToJointDistributionClass, jointDistn);
            scoreJoint         = modelScorer.ScoreModel(messageInitializer, false);

            logLikelihoodIndependentModel = scoreIndTarget.Loglikelihood + scoreIndPredictor.Loglikelihood;
            logLikelihoodJointModel       = scoreJoint.Loglikelihood;

            stringBuilder.Append(SpecialFunctions.CreateTabString(scoreIndPredictor.ToString(NullModelDistribution), scoreIndTarget.ToString(NullModelDistribution),
                                                                  logLikelihoodIndependentModel, scoreJoint.ToString(jointDistn), ""));

            double diff = logLikelihoodJointModel - logLikelihoodIndependentModel;

            return(diff);
        }
Ejemplo n.º 2
0
        // !! Add support for ModelTesterGaussian
        public static NullDataGenerator GetInstance(string generatorName, ModelScorer modelScorer, IDistribution distribution)
        {
            switch (generatorName.ToLower())
            {
            case "predictorpermutation":
                return(new NullDataGeneratorPredictorPermutation());

            case "targetpermutation":
                return(new NullDataGeneratorTargetPermutation());

            case "predictorparametric":
                //!!HACK!! We can currently only evolve binary data. But even if modelTester is gaussian, the predictor is still binary.
                // so all we need is a discrete model tester with any distribution (they all have the same null distribution, which is all
                // that's used.
                SpecialFunctions.CheckCondition(distribution is DistributionDiscrete, "Parametric data generation is currently only supported for discrete variables");
                return(new NullDataGeneratorPredictorParametric(modelScorer, (DistributionDiscrete)distribution));

            case "targetparametric":
                SpecialFunctions.CheckCondition(distribution is DistributionDiscrete, "Parametric data generation is currently only supported for discrete variables");
                return(new NullDataGeneratorTargetParametric(modelScorer, (DistributionDiscrete)distribution));

            default:
                throw new ArgumentException("Cannot parse " + generatorName + " into a NullDataGenerator");
            }
        }
Ejemplo n.º 3
0
        protected override string CreateReportLine(
            ModelScorer modelScorer,
            PhyloTree phyloTree,
            RowData rowAndTargetData,
            UniversalWorkList workList,
            int rowIndex, int workListCount, int workIndex)
        {
            Dictionary <string, string> row = rowAndTargetData.Row;
            string predictorVariable        = row[PhyloTree.PredictorVariableColumnName]; // e.g. hla
            string targetVariable           = row[PhyloTree.TargetVariableColumnName];    // e.g. A@182 (amino acid "A" at position 182)
            int    nullIndex = int.Parse(row[PhyloTree.NullIndexColumnName]);

            //Dictionary<string, bool> caseIdToNonMissingPredictorValue = workList.NullIndexToPredictorToCaseIdToNonMissingValue[nullIndex][predictorVariable];
            Dictionary <string, SufficientStatistics> caseIdToNonMissingPredictorValue = rowAndTargetData.PredictorData; //workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(nullIndex, predictorVariable);
            Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue    = rowAndTargetData.TargetData;

            TwoByTwo fishers2by2 = TwoByTwo.GetInstance(
                SufficientStatisticsMapToIntMap(caseIdToNonMissingPredictorValue),
                SufficientStatisticsMapToIntMap(caseIdToNonMissingTargetValue));

            double pValue = fishers2by2.FisherExactTest;

            string reportLine = SpecialFunctions.CreateTabString(this, rowIndex, workListCount, workIndex, nullIndex,
                                                                 predictorVariable,
                                                                 targetVariable,
                                                                 fishers2by2.CountsString(),
                                                                 fishers2by2.FisherExactTest);

            return(reportLine);
        }
Ejemplo n.º 4
0
 protected ModelEvaluatorDiscreteConditional(
     DistributionDiscreteSingleVariable nullDistn,
     DistributionDiscreteConditional conditionalDistn,
     ModelScorer scorer,
     bool includePredictorInScore)
     : base(SpecialFunctions.CreateSingletonList <IDistributionSingleVariable>(nullDistn), conditionalDistn, scorer)
 {
     _includePredictorInScore = includePredictorInScore;
 }
 protected NullDataCollection CreateNullDataGenerator(
     string nullDataGeneratorName,
     ModelScorer modelScorer,
     PhyloTree phyloTree,
     RangeCollection nullIndexRangeCollection,
     IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration,
     IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration)
 {
     return(NullDataCollection.GetInstance(
                NullDataGenerator.GetInstance(nullDataGeneratorName, modelScorer, phyloTree, this),
                nullIndexRangeCollection,
                predictorNameAndCaseIdToNonMissingValueEnumeration,
                targetNameAndCaseIdToNonMissingValueEnumeration));
 }
Ejemplo n.º 6
0
        public override EvaluationResults EvaluateModelOnData(Converter <Leaf, SufficientStatistics> predMap, Converter <Leaf, SufficientStatistics> targMap)
        {
            int predCount             = ModelScorer.PhyloTree.CountOfNonMissingLeaves(predMap);
            int targCount             = ModelScorer.PhyloTree.CountOfNonMissingLeaves(targMap);
            int globalNonMissingCount = ModelScorer.PhyloTree.CountOfNonMissingLeaves(predMap, targMap);

            MessageInitializerGaussian nullMessageInitializer = MessageInitializerGaussian.GetInstance(
                predMap, targMap, (DistributionGaussianConditional)NullDistns[0], ModelScorer.PhyloTree.LeafCollection);
            Score nullScore = ModelScorer.MaximizeLikelihood(nullMessageInitializer);

            MessageInitializerGaussian altMessageInitializer = MessageInitializerGaussian.GetInstance(
                predMap, targMap, (DistributionGaussianConditional)AltDistn, ModelScorer.PhyloTree.LeafCollection);
            Score altScore = ModelScorer.MaximizeLikelihood(altMessageInitializer);

            EvaluationResults evalResults = EvaluationResultsGaussian.GetInstance(this, nullScore, altScore, predCount, targCount, globalNonMissingCount, ChiSquareDegreesOfFreedom);

            return(evalResults);
        }
Ejemplo n.º 7
0
        public override EvaluationResults EvaluateModelOnData(Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap)
        {
            int[] realFisherCounts = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap);

            int tt = realFisherCounts[(int)TwoByTwo.ParameterIndex.TT];
            int tf = realFisherCounts[(int)TwoByTwo.ParameterIndex.TF];
            int ft = realFisherCounts[(int)TwoByTwo.ParameterIndex.FT];
            int ff = realFisherCounts[(int)TwoByTwo.ParameterIndex.FF];

            int[] fisherCountsPred = new int[] { tt, ft, tf, ff };  //ModelScorer.PhyloTree.FisherCounts(targetMap, predictorMap);
            int[] fisherCountsTarg = realFisherCounts;

#if NAIVE_EQUILIBRIUM
            //USE THIS FOR BACKWARDS COMPATABILITY
            int[] tempCountsPred = ModelScorer.PhyloTree.CountsOfLeaves(predictorMap);
            int[] tempCountsTarg = ModelScorer.PhyloTree.CountsOfLeaves(targetMap);
            fisherCountsPred = tempCountsPred;
            fisherCountsTarg = tempCountsTarg;
#endif
            bool predIsInvariant, targIsInvariant;

            Score nullScorePred = ComputeSingleVariableScore(targetMap, predictorMap, (DistributionDiscreteSingleVariable)NullDistns[0], fisherCountsPred, out predIsInvariant);
            Score nullScoreTarg = ComputeSingleVariableScore(predictorMap, targetMap, (DistributionDiscreteSingleVariable)NullDistns[1], fisherCountsTarg, out targIsInvariant);

            List <Score> nullScores = new List <Score>(new Score[] { nullScorePred, nullScoreTarg });
            OptimizationParameterList initParams = ((DistributionDiscreteJoint)AltDistn).GenerateInitialParams(nullScorePred.OptimizationParameters, nullScoreTarg.OptimizationParameters);
            Score jointScore;

            if (predIsInvariant || targIsInvariant)  // cannot compute parameters in this case. They come directly from the single variable params
            {
                double jointLL = nullScorePred.Loglikelihood + nullScoreTarg.Loglikelihood;
                jointScore = Score.GetInstance(jointLL, initParams, AltDistn);
            }
            else
            {
                MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(CreateJointMap(predictorMap, targetMap), (DistributionDiscreteJoint)AltDistn, initParams, ModelScorer.PhyloTree.LeafCollection);
                jointScore = ModelScorer.MaximizeLikelihood(altMessageInitializer);
            }

            EvaluationResults evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, jointScore, realFisherCounts, ChiSquareDegreesOfFreedom);

            return(evalResults);
        }
Ejemplo n.º 8
0
        public override EvaluationResults EvaluateModelOnDataGivenParams(Converter <Leaf, SufficientStatistics> predMap, Converter <Leaf, SufficientStatistics> targMap, EvaluationResults previousResults)
        {
            int predCount             = ModelScorer.PhyloTree.CountOfNonMissingLeaves(predMap);
            int targCount             = ModelScorer.PhyloTree.CountOfNonMissingLeaves(targMap);
            int globalNonMissingCount = ModelScorer.PhyloTree.CountOfNonMissingLeaves(predMap, targMap);

            MessageInitializerGaussian nullMessageInitializer = MessageInitializerGaussian.GetInstance(
                predMap, targMap, (DistributionGaussianConditional)NullDistns[0], ModelScorer.PhyloTree.LeafCollection);
            double nullLL    = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializer, previousResults.NullScores[0].OptimizationParameters);
            Score  nullScore = Score.GetInstance(nullLL, previousResults.NullScores[0].OptimizationParameters, previousResults.NullScores[0].Distribution);

            MessageInitializerGaussian altMessageInitializer = MessageInitializerGaussian.GetInstance(
                predMap, targMap, (DistributionGaussianConditional)AltDistn, ModelScorer.PhyloTree.LeafCollection);
            double altLL    = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, previousResults.AltScore.OptimizationParameters);
            Score  altScore = Score.GetInstance(altLL, previousResults.AltScore.OptimizationParameters, previousResults.AltScore.Distribution);

            EvaluationResults evalResults = EvaluationResultsGaussian.GetInstance(this, nullScore, altScore, predCount, targCount, globalNonMissingCount, ChiSquareDegreesOfFreedom);

            return(evalResults);
        }
Ejemplo n.º 9
0
        private Score ComputeConditionalVariableScore(
            Converter <Leaf, SufficientStatistics> predictorMap,
            Converter <Leaf, SufficientStatistics> targetMap,
            Score nullScore,
            int[] fisherCounts)
        {
            int tt  = fisherCounts[(int)TwoByTwo.ParameterIndex.TT];
            int tf  = fisherCounts[(int)TwoByTwo.ParameterIndex.TF];
            int ft  = fisherCounts[(int)TwoByTwo.ParameterIndex.FT];
            int sum = SpecialFunctions.Sum(fisherCounts);

            Score altScore;

            if (tt + ft == sum || tt + ft == 0) // target is always true or false
            {
                bool isNaN = sum == 0;
                OptimizationParameterList altParamList = AltDistn.GetParameters();
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor1].Value  = 0;
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor2].Value  = 0;
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Lambda].Value      = isNaN ? double.NaN : 0;
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Equilibrium].Value = (double)(tt + ft) / sum;
                altScore = Score.GetInstance(isNaN ? double.NaN : 0, altParamList, AltDistn);
            }
            else if (tt + tf == 0 || tt + tf == sum) // predictor is always true or false
            {
                OptimizationParameterList nullParamList = nullScore.OptimizationParameters;
                OptimizationParameterList altParamList  = AltDistn.GetParameters();
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor1].Value  = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor1].Value;
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor2].Value  = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Predictor2].Value;
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Lambda].Value      = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Lambda].Value;
                altParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Equilibrium].Value = nullParamList[(int)DistributionDiscreteSingleVariable.ParameterIndex.Equilibrium].Value;

                altScore = Score.GetInstance(nullScore.Loglikelihood, altParamList, AltDistn);
            }
            else // compute ML using ModelScorer
            {
                MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, nullScore.OptimizationParameters, ModelScorer.PhyloTree.LeafCollection);
                altScore = ModelScorer.MaximizeLikelihood(altMessageInitializer);
            }
            return(altScore);
        }
Ejemplo n.º 10
0
        //public static Converter<Leaf, SufficientStatistics> TESTPRED, TESTTARG;
        //public static EvaluationResults TESTEVALRESULTS;

        public override EvaluationResults EvaluateModelOnDataGivenParams(
            Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap, EvaluationResults previousResults)
        {
            int[] fisherCounts = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap);

            int targNullIdx = _includePredictorInScore ? 1 : 0;

            OptimizationParameterList  nullParamsTarg             = previousResults.NullScores[targNullIdx].OptimizationParameters;
            MessageInitializerDiscrete nullMessageInitializerTarg = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap,
                                                                                                           NullDistn, new int[0], ModelScorer.PhyloTree.LeafCollection);
            double nullLLTarg    = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerTarg, nullParamsTarg);
            Score  nullScoreTarg = Score.GetInstance(nullLLTarg, nullParamsTarg, NullDistn);


            OptimizationParameterList  altParams             = previousResults.AltScore.OptimizationParameters;
            MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap,
                                                                                                      (DistributionDiscreteConditional)AltDistn, new int[0], ModelScorer.PhyloTree.LeafCollection);
            double condLL   = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, altParams);
            Score  altScore = Score.GetInstance(condLL, altParams, AltDistn);

            List <Score> nullScores = new List <Score>();

            if (_includePredictorInScore)
            {
                OptimizationParameterList  nullParamsPred             = previousResults.NullScores[0].OptimizationParameters;
                MessageInitializerDiscrete nullMessageInitializerPred = MessageInitializerDiscrete.GetInstance(targetMap, predictorMap,
                                                                                                               NullDistn, new int[0], ModelScorer.PhyloTree.LeafCollection);
                double nullLLPred    = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerPred, nullParamsPred);
                Score  nullScorePred = Score.GetInstance(nullLLPred, nullParamsPred, NullDistn);
                nullScores.Add(nullScorePred);
                // conditional model altScore doesn't include predLL. If we're here, we want to add it to make it comparable to joint or reverseConditional
                altScore = Score.GetInstance(altScore.Loglikelihood + nullScorePred.Loglikelihood, altScore.OptimizationParameters, altScore.Distribution);
            }
            nullScores.Add(nullScoreTarg);


            EvaluationResults evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, altScore, fisherCounts, ChiSquareDegreesOfFreedom);

            return(evalResults);
        }
Ejemplo n.º 11
0
        public override EvaluationResults EvaluateModelOnDataGivenParams(Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap, EvaluationResults previousResults)
        {
            int[] fisherCounts = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap);

            OptimizationParameterList  nullParamsTarg             = previousResults.NullScores[1].OptimizationParameters;
            MessageInitializerDiscrete nullMessageInitializerTarg = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteSingleVariable)NullDistns[0], fisherCounts, ModelScorer.PhyloTree.LeafCollection);
            double nullLLTarg    = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerTarg, nullParamsTarg);
            Score  nullScoreTarg = Score.GetInstance(nullLLTarg, nullParamsTarg, previousResults.NullScores[1].Distribution);

            OptimizationParameterList  nullParamsPred             = previousResults.NullScores[0].OptimizationParameters;
            MessageInitializerDiscrete nullMessageInitializerPred = MessageInitializerDiscrete.GetInstance(targetMap, predictorMap, (DistributionDiscreteSingleVariable)NullDistns[1], fisherCounts, ModelScorer.PhyloTree.LeafCollection);
            double nullLLPred    = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerPred, nullParamsPred);
            Score  nullScorePred = Score.GetInstance(nullLLPred, nullParamsPred, previousResults.NullScores[0].Distribution);

            List <Score> nullScores = new List <Score>(new Score[] { nullScorePred, nullScoreTarg });

            OptimizationParameterList altParams = previousResults.AltScore.OptimizationParameters;

            double altLL;

            if (((DistributionDiscreteJoint)AltDistn).ParametersCannotBeEvaluated(altParams))
            {
                // we'll get here only if one of the variables is always (or never) true. In this case, the variables must be independent.
                altLL = nullLLTarg + nullLLPred;
            }
            else
            {
                MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(CreateJointMap(predictorMap, targetMap), (DistributionDiscreteJoint)AltDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection);
                altLL = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, altParams);
            }

            Score altScore = Score.GetInstance(altLL, altParams, previousResults.AltScore.Distribution);

            EvaluationResults evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, altScore, fisherCounts, ChiSquareDegreesOfFreedom);

            return(evalResults);
        }
Ejemplo n.º 12
0
 public static ModelEvaluator GetInstance(string nameAndParameters, ModelScorer scorer)
 {
     nameAndParameters = nameAndParameters.ToLower();
     if (nameAndParameters.StartsWith(ModelEvaluatorCrossValidate.BaseName.ToLower()))
     {
         return(ModelEvaluatorCrossValidate.GetInstance(nameAndParameters.Substring(ModelEvaluatorCrossValidate.BaseName.Length), scorer));
     }
     else if (nameAndParameters.StartsWith(ModelEvaluatorDiscreteConditionalCollection.BaseName.ToLower()))
     {
         return(ModelEvaluatorDiscreteConditionalCollection.GetInstance(nameAndParameters.Substring(ModelEvaluatorDiscreteConditionalCollection.BaseName.Length), scorer));
     }
     else if (nameAndParameters.StartsWith(ModelEvaluatorDiscrete.BaseName.ToLower()))
     {
         return(ModelEvaluatorDiscrete.GetInstance(nameAndParameters.Substring(ModelEvaluatorDiscrete.BaseName.Length), scorer));
     }
     else if (nameAndParameters.StartsWith(ModelEvaluatorGaussian.BaseName.ToLower()))
     {
         return(ModelEvaluatorGaussian.GetInstance(nameAndParameters.Substring(ModelEvaluatorGaussian.BaseName.Length), scorer));
     }
     else
     {
         throw new ArgumentException("ModelEvaluator cannot parse " + nameAndParameters);
     }
 }
        protected override double ComputeLLR(ModelScorer modelScorer, PhyloTree phyloTree, StringBuilder stringBuilder, double targetMarginal, double predictorMarginal,
                                             Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction, Converter <Leaf, SufficientStatistics> targetDistributionClassFunction)
        {
            NullModelDistribution.EmpiricalEquilibrium = targetMarginal;
            NullModelDistribution.InitialParamVals     = null;

            MessageInitializer messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionClassFunction, NullModelDistribution);

            List <double> logLikelihoodList = new List <double>();

            foreach (bool useParameter in new bool[] { false, true })
            {
                Score score = modelScorer.ScoreModel(messageInitializer, useParameter);

                stringBuilder.Append(SpecialFunctions.CreateTabString(score.ToString(useParameter ? AlternativeModelDistribution : NullModelDistribution), ""));
                logLikelihoodList.Add(score.Loglikelihood);
                AltModelDistribution.InitialParamVals = score.OptimizationParameters;
                Debug.WriteLine(SpecialFunctions.CreateTabString("AltModelDistribution.InitialParamVals = score.OptimizationParameters", score.OptimizationParameters));
            }

            double diff = logLikelihoodList[1] - logLikelihoodList[0];

            return(diff);
        }
Ejemplo n.º 14
0
        protected Score ComputeSingleVariableScore(
            Converter <Leaf, SufficientStatistics> predictorMap,
            Converter <Leaf, SufficientStatistics> targetMap,
            DistributionDiscreteSingleVariable nullDistn,
            int[] fisherCounts,
            out bool variableIsInvariant)
        {
            MessageInitializerDiscrete nullMessageInitializer =
                MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, nullDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection);

            double p = (double)TwoByTwo.GetRightSum(fisherCounts) / SpecialFunctions.Sum(fisherCounts);
            Score  nullScore;

            if (TryGetSingleVariableScoreFromCounts(nullMessageInitializer, p, out nullScore))
            {
                variableIsInvariant = true;
            }
            else
            {
                variableIsInvariant = false;
                nullScore           = ModelScorer.MaximizeLikelihood(nullMessageInitializer);
            }
            return(nullScore);
        }
Ejemplo n.º 15
0
        public static ModelEvaluatorDiscreteConditional GetInstance(string leafDistributionName, ModelScorer modelScorer, bool includePredictorInScore)
        {
            DistributionDiscreteSingleVariable nullDistn = DistributionDiscreteSingleVariable.GetInstance();
            DistributionDiscreteConditional    condDistn = DistributionDiscreteConditional.GetInstance(leafDistributionName);

            return(new ModelEvaluatorDiscreteConditional(nullDistn, condDistn, modelScorer, includePredictorInScore));
        }
Ejemplo n.º 16
0
 new public static ModelEvaluatorDiscreteConditional GetInstance(string leafDistributionName, ModelScorer modelScorer)
 {
     return(GetInstance(leafDistributionName, modelScorer, false));
 }
Ejemplo n.º 17
0
 protected ModelEvaluator(List <IDistributionSingleVariable> nullDistns, IDistribution altDistn, ModelScorer scorer)
 {
     _scorer     = scorer;
     _nullDistns = nullDistns;
     _altDistn   = altDistn;
 }
Ejemplo n.º 18
0
 protected ModelEvaluatorGaussian(List <IDistributionSingleVariable> nullDistns, DistributionGaussianConditional altDistn, ModelScorer scorer)
     : base(nullDistns, altDistn, scorer)
 {
 }
Ejemplo n.º 19
0
        new public static ModelEvaluatorGaussian GetInstance(string nameAndParameters, ModelScorer scorer)
        {
            IDistributionSingleVariable     nullDistn = DistributionGaussianConditional.GetSingleVariableInstance(nameAndParameters);
            DistributionGaussianConditional altDistn  = DistributionGaussianConditional.GetInstance(nameAndParameters);

            return(new ModelEvaluatorGaussian(SpecialFunctions.CreateSingletonList(nullDistn), altDistn, scorer));
        }
        public void Run(
            ModelScorer modelScorer,
            PhyloTree phyloTree,
            string predictorSparseFileName,
            string targetSparseFileName,
            string leafDistributionName,
            string nullDataGeneratorName,
            KeepTest <Dictionary <string, string> > keepTest,
            RangeCollection skipRowIndexRangeCollectionOrNull,
            string shortName,
            string outputDirectoryName,
            RangeCollection pieceIndexRangeCollection, int pieceCount,
            RangeCollection nullIndexRangeCollection,
            string optimizerName)
        {
            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();
            Directory.CreateDirectory(outputDirectoryName);


            string outputFileName = string.Format(@"{0}\{1}.{2}.{3}.{4}.{5}.{6}{7}.txt",
                                                  outputDirectoryName, shortName,
                                                  leafDistributionName, nullDataGeneratorName,
                                                  nullIndexRangeCollection,
                                                  pieceCount,
                                                  pieceIndexRangeCollection,
                                                  skipRowIndexRangeCollectionOrNull == null ? "" : ".Skip" + skipRowIndexRangeCollectionOrNull.Count().ToString()
                                                  );

            #region from PhyloTree refactor
            //Dictionary<string, Dictionary<string, bool>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory<bool>(predictorSparseFileName);
            //IEnumerable<Pair<string, Dictionary<string, T>>> targetNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration<T>(targetSparseFileName);

            //NullDataCollection nullDataGenerator =
            //    NullDataCollection.GetInstance(this, modelTester, nullIndexRangeCollection, predictorVariableToCaseIdToRealNonMissingValue);

            //UniversalWorkList<T> workList = UniversalWorkList<T>.GetInstance(
            //    predictorVariableToCaseIdToRealNonMissingValue,
            //    targetNameAndCaseIdToNonMissingValueEnumeration,
            //    nullDataGenerator, nullIndexRangeCollection, keepTest);
            #endregion
            bool speedOverMemory = true;

            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > >
            predictorNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(predictorSparseFileName, speedOverMemory);
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > >
            targetNameAndCaseIdToNonMissingValueEnumeration = CreateNameAndCaseIdToNonMissingValueEnumeration(targetSparseFileName, speedOverMemory);

            NullDataCollection nullDataGenerator =
                CreateNullDataGenerator(nullDataGeneratorName, modelScorer, phyloTree, nullIndexRangeCollection,
                                        predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration);

            UniversalWorkList workList = UniversalWorkList.GetInstance(
                predictorNameAndCaseIdToNonMissingValueEnumeration,
                targetNameAndCaseIdToNonMissingValueEnumeration,
                nullDataGenerator, nullIndexRangeCollection, keepTest);

            int workListCount = SpecialFunctions.Count(workList.List());

            int effectiveWorkListCount;
            if (skipRowIndexRangeCollectionOrNull == null)
            {
                effectiveWorkListCount = workListCount;
            }
            else
            {
                effectiveWorkListCount = 0;
                for (int iRowIndex = 0; iRowIndex < workListCount; iRowIndex++)
                {
                    if (!skipRowIndexRangeCollectionOrNull.Contains(iRowIndex))
                    {
                        effectiveWorkListCount++;
                    }
                }
            }
            Console.WriteLine("{0} Total rows. Skipping {1} of them.", workListCount, workListCount - effectiveWorkListCount);

            using (TextWriter textWriter = File.CreateText(outputFileName))
            {
                textWriter.WriteLine(Header);
                int rowIndex          = -1;
                int effectiveRowIndex = -1;

                foreach (RowData rowAndTargetData in workList.List())
                {
                    //!!!make all these parameters and the calculation a class
                    ++rowIndex;
                    Debug.Assert(rowIndex < workListCount); // real assert

                    if (skipRowIndexRangeCollectionOrNull == null || !skipRowIndexRangeCollectionOrNull.Contains(rowIndex))
                    {
                        ++effectiveRowIndex;

                        int workIndex = ExtractWorkIndex(effectiveRowIndex, pieceCount, effectiveWorkListCount);

                        if (pieceIndexRangeCollection.Contains(workIndex))
                        {
                            Debug.WriteLine("WorkItemIndex " + rowIndex.ToString());
                            string reportLine;
                            try
                            {
                                reportLine =
                                    CreateReportLine(modelScorer, phyloTree, rowAndTargetData, workList, rowIndex, workListCount, workIndex);
                            }
                            catch (OutOfMemoryException)
                            {
                                Console.WriteLine("OUT OF MEMORY!! Clearing cache and trying to recover where we left off.");
                                modelScorer.ClearCache();
                                reportLine =
                                    CreateReportLine(modelScorer, phyloTree, rowAndTargetData, workList, rowIndex, workListCount, workIndex);
                            }

                            textWriter.WriteLine(reportLine);
                            textWriter.Flush();
                        }
                    }
                }
            }
            stopwatch.Stop();
            Console.WriteLine("Running time: " + stopwatch.Elapsed);
        }
Ejemplo n.º 21
0
        new public static ModelEvaluatorDiscrete GetInstance(string nameAndParameters, ModelScorer scorer)
        {
            nameAndParameters = nameAndParameters.ToLower();

            if (nameAndParameters.StartsWith(ModelEvaluatorDiscreteConditional.BaseName.ToLower()))
            {
                return(ModelEvaluatorDiscreteConditional.GetInstance(nameAndParameters.Substring(ModelEvaluatorDiscreteConditional.BaseName.Length), scorer));
            }
            else if (nameAndParameters.StartsWith(ModelEvaluatorDiscreteJoint.BaseName.ToLower()))
            {
                return(ModelEvaluatorDiscreteJoint.GetInstance(nameAndParameters.Substring(ModelEvaluatorDiscreteJoint.BaseName.Length), scorer));
            }
            else if (nameAndParameters.Equals(ModelEvaluatorDiscreteFisher.BaseName.ToLower()))
            {
                return(ModelEvaluatorDiscreteFisher.GetInstance(scorer.PhyloTree.LeafCollection));
            }
            throw new ArgumentException("Cold not parse " + nameAndParameters + " into a ModelEvaluatorDiscrete.");
        }
        public void ScoreTree(
            ModelScorer modelScorer,
            PhyloTree phyloTree,
            string predictorSparseFileName,
            string targetSparseFileName,
            string predictorVariableName,
            string targetVariableName,
            double[] nullModelArgs,
            double[] altModelArgs)
        {
            //Dictionary<string, Dictionary<string, SufficientStatistics>> predictorVariableToCaseIdToRealNonMissingValue = LoadSparseFileInMemory(predictorSparseFileName);
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > predictorNameAndCaseIdToNonMissingValueEnumeration = LoadSparseFileEnumeration(predictorSparseFileName);
            IEnumerable <Pair <string, Dictionary <string, SufficientStatistics> > > targetNameAndCaseIdToNonMissingValueEnumeration    = LoadSparseFileEnumeration(targetSparseFileName);

            RangeCollection    nullIndexRangeCollection = RangeCollection.GetInstance(-1, -1);
            NullDataCollection nullDataGenerator        =
                CreateNullDataGenerator("PredictorPermutation", modelScorer, phyloTree, nullIndexRangeCollection,
                                        predictorNameAndCaseIdToNonMissingValueEnumeration, targetNameAndCaseIdToNonMissingValueEnumeration);

            UniversalWorkList workList = UniversalWorkList.GetInstance(
                predictorNameAndCaseIdToNonMissingValueEnumeration,
                targetNameAndCaseIdToNonMissingValueEnumeration,
                //targetNameAndCaseIdToNonMissingValueEnumeration,
                nullDataGenerator, nullIndexRangeCollection, AlwaysKeep <Dictionary <string, string> > .GetInstance());


            foreach (RowData rowAndTargetData in workList.List())
            {
                if (rowAndTargetData.Row[PhyloTree.PredictorVariableColumnName] == predictorVariableName &&
                    rowAndTargetData.Row[PhyloTree.TargetVariableColumnName] == targetVariableName)
                {
                    Dictionary <string, SufficientStatistics> caseIdToNonNullPredictorValue = rowAndTargetData.PredictorData;//workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(-1, predictorVariableName);
                    Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData;

                    Converter <Leaf, SufficientStatistics> targetDistributionMap = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue);
                    Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonNullPredictorValue);
                    Converter <Leaf, SufficientStatistics> altDistributionMap = CreateAlternativeSufficientStatisticsMap(predictorDistributionClassFunction, targetDistributionMap);
                    double                    logLikelihood;
                    Score                     scoreIndTarget, scoreIndPredictor, scoreAlt;
                    MessageInitializer        messageInitializer;
                    OptimizationParameterList nullParams = NullModelDistribution.GetParameters(nullModelArgs);
                    OptimizationParameterList altParams  = AltModelDistribution.GetParameters(altModelArgs);

                    Console.WriteLine(SpecialFunctions.CreateTabString("Variable", nullParams.ToStringHeader(), "LogL"));
                    messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionMap, NullModelDistribution);
                    logLikelihood      = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams);
                    scoreIndTarget     = Score.GetInstance(logLikelihood, nullParams);
                    Console.WriteLine("Target\t" + scoreIndTarget);

                    messageInitializer = modelScorer.CreateMessageInitializer(targetDistributionMap, predictorDistributionClassFunction, NullModelDistribution);
                    logLikelihood      = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams);
                    modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, nullParams);
                    scoreIndPredictor = Score.GetInstance(logLikelihood, nullParams);
                    Console.WriteLine("Predictor\t" + scoreIndPredictor);

                    Console.WriteLine("\n" + SpecialFunctions.CreateTabString("Variable", altParams.ToStringHeader(), "LogL"));
                    messageInitializer = modelScorer.CreateMessageInitializer(null, altDistributionMap, AltModelDistribution);
                    logLikelihood      = modelScorer.ComputeLogLikelihoodModelGivenData(messageInitializer, altParams);
                    scoreAlt           = Score.GetInstance(logLikelihood, altParams);
                    Console.WriteLine(SpecialFunctions.CreateTabString(AltModelDistribution, scoreAlt));
                }
            }
        }
 protected abstract string CreateReportLine(
     ModelScorer modelScorer,
     PhyloTree phyloTree,
     RowData rowAndTargetData,
     UniversalWorkList workList,
     int rowIndex, int workListCount, int workIndex);
Ejemplo n.º 24
0
        new public static ModelEvaluatorDiscreteJoint GetInstance(string leafDistributionName, ModelScorer modelScorer)
        {
            DistributionDiscreteSingleVariable nullDistn  = DistributionDiscreteSingleVariable.GetInstance();
            DistributionDiscreteJoint          jointDistn = DistributionDiscreteJoint.GetInstance(leafDistributionName);
            List <IDistributionSingleVariable> nullDistns = new List <IDistributionSingleVariable>();

            nullDistns.Add(nullDistn);
            nullDistns.Add(nullDistn);

            return(new ModelEvaluatorDiscreteJoint(nullDistns, jointDistn, modelScorer));
        }
Ejemplo n.º 25
0
 public NullDataGeneratorTargetParametric(ModelScorer modelScorer, DistributionDiscrete distribution)
     :
     base(modelScorer, distribution)
 {
 }
Ejemplo n.º 26
0
 public NullDataGeneratorParametric(ModelScorer modelScorer, DistributionDiscrete discreteDistn)
 {
     _modelScorer          = modelScorer;
     _discreteDistribution = discreteDistn;
 }
        protected override string CreateReportLine(
            ModelScorer modelScorer,
            PhyloTree phyloTree,
            RowData rowAndTargetData,
            UniversalWorkList workList,
            int rowIndex, int workListCount, int workIndex)
        {
            //!!!there is very similar code in ModelTesterDiscrete.cs

            Dictionary <string, string> row = rowAndTargetData.Row;
            string predictorVariable        = row[PhyloTree.PredictorVariableColumnName];
            string targetVariable           = row[PhyloTree.TargetVariableColumnName]; // e.g. A@182 (amino acid "A" at position 182)
            int    nullIndex = int.Parse(row[PhyloTree.NullIndexColumnName]);

            //Dictionary<string, bool> caseIdToNonNullPredictorValue = workList.NullIndexToPredictorToCaseIdToNonMissingValue[nullIndex][predictorVariable];
            Dictionary <string, SufficientStatistics> caseIdToNonNullPredictorValue = rowAndTargetData.PredictorData; //workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(nullIndex, predictorVariable);
            Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData;

            Converter <Leaf, SufficientStatistics> targetDistributionMap = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue);
            Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonNullPredictorValue);

            int[] predictorCounts = phyloTree.CountsOfLeaves(predictorDistributionClassFunction);

            int predictorFalseNameCount = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.False];
            int predictorTrueNameCount  = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.True];
            int targetNonMissingCount   = phyloTree.CountOfNonMissingLeaves(caseIdToNonMissingTargetValue);
            int globalNonMissingCount   = phyloTree.GlobalNonMissingCount(predictorDistributionClassFunction, targetDistributionMap);

            StringBuilder stringBuilder = new StringBuilder(
                SpecialFunctions.CreateTabString(
                    this, rowIndex, workListCount, workIndex, nullIndex, predictorVariable,
                    predictorFalseNameCount,
                    predictorTrueNameCount,
                    predictorTrueNameCount + predictorFalseNameCount,
                    targetVariable,
                    targetNonMissingCount,
                    globalNonMissingCount,
                    ""));

            bool ignoreRow = false;

            foreach (int count in predictorCounts)
            {
                if (count == 0)
                {
                    ignoreRow = true;
                }
            }

            if (ignoreRow)
            {
                CompleteRowWithNaN(stringBuilder);
            }
            else
            {
                List <double>      logLikelihoodList  = new List <double>();
                MessageInitializer messageInitializer =
                    modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionMap, NullModelDistribution);
                NullModelDistribution.InitialParamVals = null;
                foreach (bool useParameter in new bool[] { false, true })
                {
                    Score score = modelScorer.ScoreModel(messageInitializer, useParameter);
                    stringBuilder.Append(SpecialFunctions.CreateTabString(score, ""));
                    Debug.Write(SpecialFunctions.CreateTabString(score, ""));
                    logLikelihoodList.Add(score.Loglikelihood);
                    AltModelDistribution.InitialParamVals = score.OptimizationParameters;
                }

                double diff   = logLikelihoodList[1] - logLikelihoodList[0];
                double pValue = SpecialFunctions.LogLikelihoodRatioTest(Math.Max(diff, 0), ChiSquareDegreesOfFreedom);

                stringBuilder.Append(SpecialFunctions.CreateTabString(diff, pValue));
                Debug.WriteLine(SpecialFunctions.CreateTabString(diff, pValue));
            }
            return(stringBuilder.ToString());
        }
Ejemplo n.º 28
0
        public override EvaluationResults EvaluateModelOnData(Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap)
        {
            EvaluationResults evalResults;

            int[] fisherCounts     = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap);
            int[] realFisherCounts = fisherCounts;  // for compatability when NAIVE_EQUILIBRIUM is set

#if NAIVE_EQUILIBRIUM
            //USE THIS FOR BACKWARDS COMPATABILITY
            int[] tempCounts = ModelScorer.PhyloTree.CountsOfLeaves(targetMap);
            fisherCounts = tempCounts;
#endif

            //MessageInitializerDiscrete nullMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, NullDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection);
            //if (TryShortCutFromCounts(realFisherCounts, nullMessageInitializer, out evalResults))
            //{
            //    return evalResults;
            //}

            //Score nullScore = ModelScorer.MaximizeLikelihood(nullMessageInitializer);
            bool isInvariant;

            Score nullScoreTarg = ComputeSingleVariableScore(predictorMap, targetMap, NullDistn, fisherCounts, out isInvariant);
            Score altScore      = ComputeConditionalVariableScore(predictorMap, targetMap, nullScoreTarg, fisherCounts);

            //(realFisherCounts, nullScoreTarg, out evalResults))
            //{
            //    return evalResults;
            //}

            //MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, nullScore.OptimizationParameters, ModelScorer.PhyloTree.LeafCollection);
            //Score condScore = ModelScorer.MaximizeLikelihood(altMessageInitializer);

            List <Score> nullScores = new List <Score>();
            if (_includePredictorInScore)
            {
                int[] predFisherCounts = new int[] { realFisherCounts[0], realFisherCounts[2], realFisherCounts[1], realFisherCounts[3] };
                Score predNullScore    = ComputeSingleVariableScore(targetMap, predictorMap, NullDistn, predFisherCounts, out isInvariant);
                nullScores.Add(predNullScore);
                // conditional model altScore doesn't include predLL. If we're here, we want to add it to make it comparable to joint or reverseConditional
                altScore = Score.GetInstance(altScore.Loglikelihood + predNullScore.Loglikelihood, altScore.OptimizationParameters, altScore.Distribution);
            }
            nullScores.Add(nullScoreTarg);

            evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, altScore, realFisherCounts, ChiSquareDegreesOfFreedom);



#if DEBUG
            MessageInitializerDiscrete nullMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, NullDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection);
            MessageInitializerDiscrete altMessageInitializer  = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, nullScoreTarg.OptimizationParameters, ModelScorer.PhyloTree.LeafCollection);
            double nullLL = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializer, nullScoreTarg.OptimizationParameters);
            double altLL  = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, altScore.OptimizationParameters);

            if (_includePredictorInScore)
            {
                int[] predFisherCounts = new int[] { realFisherCounts[0], realFisherCounts[2], realFisherCounts[1], realFisherCounts[3] };
                MessageInitializerDiscrete nullMessageInitializerPred = MessageInitializerDiscrete.GetInstance(targetMap, predictorMap, NullDistn, predFisherCounts, ModelScorer.PhyloTree.LeafCollection);
                double nullLLPred = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerPred, nullScores[0].OptimizationParameters);
                altLL += nullLLPred;
            }

            EvaluationResults evalResults2 = EvaluateModelOnDataGivenParams(predictorMap, targetMap, evalResults);

            double eps = 1E-10;
            Debug.Assert(ComplexNumber.ApproxEqual(nullLL, nullScoreTarg.Loglikelihood, eps));
            Debug.Assert(ComplexNumber.ApproxEqual(altLL, altScore.Loglikelihood, eps));
            Debug.Assert(ComplexNumber.ApproxEqual(evalResults.NullLL, evalResults2.NullLL, eps) && ComplexNumber.ApproxEqual(evalResults.AltLL, evalResults2.AltLL, eps), "In ModelEvaluatorCond, results of maximizing LL and computing LL from same params are not the same.");
#endif

            return(evalResults);
        }
Ejemplo n.º 29
0
 private ModelEvaluatorDiscreteJoint(List <IDistributionSingleVariable> nullDistns, DistributionDiscreteJoint jointDistn, ModelScorer scorer)
     : base(nullDistns, jointDistn, scorer)
 {
 }
Ejemplo n.º 30
0
 protected ModelEvaluatorDiscrete(List <IDistributionSingleVariable> nullDistns, DistributionDiscrete altDistn, ModelScorer scorer)
     : base(nullDistns, altDistn, scorer)
 {
 }