public override double[] GetPriorProbabilities(OptimizationParameterList discreteParameters) { const double eps = 0.0001; double[] priors = new double[NonMissingClassCount]; try { EigenPair eig = LinearAlgebra.ComputeSparseEigenPair(LinearAlgebra.Transpose(GetTransitionProbabilityMatrix(discreteParameters, 1))); ComplexNumber[] eigenValues = eig.EigenValues; for (int i = 0; i < 4; i++) { if (ComplexNumber.ApproxEqual(eigenValues[i], 1, eps)) { priors = LinearAlgebra.Abs(LinearAlgebra.ComplexToDouble(LinearAlgebra.Transpose(eig.EigenVectors)[i])); break; } } priors = LinearAlgebra.Normalize(priors); } catch (Exception e) { throw new NotComputableException("Problem computing the prior: " + e.Message); } return(priors); }
public override EvaluationResults EvaluateModelOnData(Converter <Leaf, SufficientStatistics> predMap, Converter <Leaf, SufficientStatistics> targMap) { TwoByTwo fishers2by2 = TwoByTwo.GetInstance( SufficientStatisticsMapToIntDictionaryMap(predMap, _fullLeafCollection), SufficientStatisticsMapToIntDictionaryMap(targMap, _fullLeafCollection)); int[] fisherCounts = fishers2by2.ToOneDArray(); List <Score> nullScores; Score altScore; ComputeIidScores(fisherCounts, out nullScores, out altScore); EvaluationResultsFisher results = EvaluationResultsFisher.GetInstance(this, nullScores, altScore, fishers2by2); #if DEBUG EvaluationResults results2 = EvaluateModelOnDataGivenParams(predMap, targMap, results); double eps = 1E-14; Debug.Assert(ComplexNumber.ApproxEqual(results.AltLL, results2.AltLL, eps)); Debug.Assert(ComplexNumber.ApproxEqual(results.NullLL, results2.NullLL, eps)); Debug.Assert(ComplexNumber.ApproxEqual(results.ComputePValue(), results2.ComputePValue(), eps)); #endif return(results); }
public override EvaluationResults EvaluateModelOnData(Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap) { EvaluationResults evalResults; int[] fisherCounts = ModelScorer.PhyloTree.FisherCounts(predictorMap, targetMap); int[] realFisherCounts = fisherCounts; // for compatability when NAIVE_EQUILIBRIUM is set #if NAIVE_EQUILIBRIUM //USE THIS FOR BACKWARDS COMPATABILITY int[] tempCounts = ModelScorer.PhyloTree.CountsOfLeaves(targetMap); fisherCounts = tempCounts; #endif //MessageInitializerDiscrete nullMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, NullDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection); //if (TryShortCutFromCounts(realFisherCounts, nullMessageInitializer, out evalResults)) //{ // return evalResults; //} //Score nullScore = ModelScorer.MaximizeLikelihood(nullMessageInitializer); bool isInvariant; Score nullScoreTarg = ComputeSingleVariableScore(predictorMap, targetMap, NullDistn, fisherCounts, out isInvariant); Score altScore = ComputeConditionalVariableScore(predictorMap, targetMap, nullScoreTarg, fisherCounts); //(realFisherCounts, nullScoreTarg, out evalResults)) //{ // return evalResults; //} //MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, nullScore.OptimizationParameters, ModelScorer.PhyloTree.LeafCollection); //Score condScore = ModelScorer.MaximizeLikelihood(altMessageInitializer); List <Score> nullScores = new List <Score>(); if (_includePredictorInScore) { int[] predFisherCounts = new int[] { realFisherCounts[0], realFisherCounts[2], realFisherCounts[1], realFisherCounts[3] }; Score predNullScore = ComputeSingleVariableScore(targetMap, predictorMap, NullDistn, predFisherCounts, out isInvariant); nullScores.Add(predNullScore); // conditional model altScore doesn't include predLL. If we're here, we want to add it to make it comparable to joint or reverseConditional altScore = Score.GetInstance(altScore.Loglikelihood + predNullScore.Loglikelihood, altScore.OptimizationParameters, altScore.Distribution); } nullScores.Add(nullScoreTarg); evalResults = EvaluationResultsDiscrete.GetInstance(this, nullScores, altScore, realFisherCounts, ChiSquareDegreesOfFreedom); #if DEBUG MessageInitializerDiscrete nullMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, NullDistn, fisherCounts, ModelScorer.PhyloTree.LeafCollection); MessageInitializerDiscrete altMessageInitializer = MessageInitializerDiscrete.GetInstance(predictorMap, targetMap, (DistributionDiscreteConditional)AltDistn, nullScoreTarg.OptimizationParameters, ModelScorer.PhyloTree.LeafCollection); double nullLL = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializer, nullScoreTarg.OptimizationParameters); double altLL = ModelScorer.ComputeLogLikelihoodModelGivenData(altMessageInitializer, altScore.OptimizationParameters); if (_includePredictorInScore) { int[] predFisherCounts = new int[] { realFisherCounts[0], realFisherCounts[2], realFisherCounts[1], realFisherCounts[3] }; MessageInitializerDiscrete nullMessageInitializerPred = MessageInitializerDiscrete.GetInstance(targetMap, predictorMap, NullDistn, predFisherCounts, ModelScorer.PhyloTree.LeafCollection); double nullLLPred = ModelScorer.ComputeLogLikelihoodModelGivenData(nullMessageInitializerPred, nullScores[0].OptimizationParameters); altLL += nullLLPred; } EvaluationResults evalResults2 = EvaluateModelOnDataGivenParams(predictorMap, targetMap, evalResults); double eps = 1E-10; Debug.Assert(ComplexNumber.ApproxEqual(nullLL, nullScoreTarg.Loglikelihood, eps)); Debug.Assert(ComplexNumber.ApproxEqual(altLL, altScore.Loglikelihood, eps)); Debug.Assert(ComplexNumber.ApproxEqual(evalResults.NullLL, evalResults2.NullLL, eps) && ComplexNumber.ApproxEqual(evalResults.AltLL, evalResults2.AltLL, eps), "In ModelEvaluatorCond, results of maximizing LL and computing LL from same params are not the same."); #endif return(evalResults); }
public override EvaluationResults EvaluateModelOnData(Converter <Leaf, SufficientStatistics> v1, Converter <Leaf, SufficientStatistics> v2) { List <Leaf> nonMissingLeaves = new List <Leaf>(100); int seed = 0; foreach (Leaf leaf in ModelScorer.PhyloTree.LeafCollection) { SufficientStatistics class1 = v1(leaf); SufficientStatistics class2 = v2(leaf); if (!class1.IsMissing() && !class2.IsMissing()) { nonMissingLeaves.Add(leaf); seed ^= (leaf.CaseName + class1.ToString() + class2.ToString()).GetHashCode(); } } Random rand = new Random(seed); nonMissingLeaves = SpecialFunctions.Shuffle(nonMissingLeaves, ref rand); int groupSize = nonMissingLeaves.Count / _crossValidateCount; EvaluationResultsCrossValidate combinedResults = null; double testAltLLSum = 0; // for debugging double testNullLLSum = 0; // for debugging for (int i = 0; i < _crossValidateCount; i++) { int testStart = i * groupSize; int trainStart = testStart + groupSize; Set <Leaf> trainSet = new Set <Leaf>(SpecialFunctions.SubList(nonMissingLeaves, trainStart, nonMissingLeaves.Count - trainStart)); trainSet.AddNewRange(SpecialFunctions.SubList(nonMissingLeaves, 0, testStart)); Converter <Leaf, SufficientStatistics> v1Train = CreateFilteredMap(v1, trainSet); Converter <Leaf, SufficientStatistics> v2Train = CreateFilteredMap(v2, trainSet); EvaluationResults trainingResults = InternalEvaluator.EvaluateModelOnData(v1Train, v2Train); EvaluationResults testAndTrainResult = InternalEvaluator.EvaluateModelOnDataGivenParams(v1, v2, trainingResults); EvaluationResultsTestGivenTrain testGivenTrainResult = EvaluationResultsTestGivenTrain.GetInstance(this, trainingResults, testAndTrainResult); if (combinedResults == null) { combinedResults = EvaluationResultsCrossValidate.GetInstance(this, testGivenTrainResult); } else { combinedResults = combinedResults.AddNewResults(testGivenTrainResult); } if (double.IsInfinity(combinedResults.AltLL)) // no point in continuing...infinity will kill everything. { break; } #if DEBUG double eps = 1E-10; EvaluationResults testTrainingResults = InternalEvaluator.EvaluateModelOnDataGivenParams(v1Train, v2Train, trainingResults); Debug.Assert(ComplexNumber.ApproxEqual(testTrainingResults.AltLL, trainingResults.AltLL, eps) && ComplexNumber.ApproxEqual(testTrainingResults.NullLL, trainingResults.NullLL, eps)); //Debug.Assert(testTrainingResults.Equals(trainingResults)); double newNullLL = testAndTrainResult.NullLL - trainingResults.NullLL; double newAltLL = testAndTrainResult.AltLL - trainingResults.AltLL; Debug.Assert(ComplexNumber.ApproxEqual(newNullLL, testGivenTrainResult.NullLL, eps)); Debug.Assert(ComplexNumber.ApproxEqual(newAltLL, testGivenTrainResult.AltLL, eps)); testNullLLSum += newNullLL; testAltLLSum += newAltLL; Debug.Assert(ComplexNumber.ApproxEqual(testNullLLSum, combinedResults.NullLL, eps), "Combined result has wrong NullLL"); Debug.Assert(ComplexNumber.ApproxEqual(testAltLLSum, combinedResults.AltLL, eps), "Combined result has wrong AltLL"); #endif } return(combinedResults); }