public override bool Equals(object obj) { MessageInitializer other = obj as MessageInitializer; if (other == null || _hashCode != other._hashCode || _distribution.DependsOnMoreThanOneVariable != other._distribution.DependsOnMoreThanOneVariable || _distribution.ToString() != other._distribution.ToString() ) { return(false); } foreach (Leaf leaf in _fullLeafCollection) { if (IsMissing(leaf) != other.IsMissing(leaf) || LeafToTargetStatistics(leaf) != other.LeafToTargetStatistics(leaf)) { return(false); } // if these distributions depend on the predictor variables, then make sure they all match up. if (_distribution.DependsOnMoreThanOneVariable) { foreach (KeyValuePair <Converter <Leaf, SufficientStatistics>, Converter <Leaf, SufficientStatistics> > predMapPair in SpecialFunctions.EnumerateTwo(LeafToPredictorStatisticsList, other.LeafToPredictorStatisticsList)) { if (predMapPair.Key(leaf) != predMapPair.Value(leaf)) { return(false); } } } } return(true); }
public double ComputeLogLikelihoodModelGivenData(MessageInitializer messageInitializer, OptimizationParameterList paramList) { double loglikelihood = ComputeLogLikelihoodModelGivenData(messageInitializer, paramList, false); if (double.IsNegativeInfinity(loglikelihood)) { loglikelihood = ComputeLogLikelihoodModelGivenData(messageInitializer, paramList, true); } return(loglikelihood); }
/// <summary> /// Learns the optimal parameters for the data contained in the messageInitializer and returns the corresponding Score. /// </summary> public Score MaximizeLikelihood(MessageInitializer messageInitializer) { Score score; #region Caching details //Key aKey = Key.GetInstance(Tree, messageInitializer); if (_cache.ContainsKey(messageInitializer)) { CacheHits++; score = _cache[messageInitializer]; #if (DEBUG) Score scoreLive = MaximizeLikelihoodInternal(messageInitializer); if (Math.Abs(score.Loglikelihood - scoreLive.Loglikelihood) >= 10e-7) { double diff = scoreLive.Loglikelihood - score.Loglikelihood; Debug.WriteLine("Cache differs from computed score by " + diff); } // note: minute (10E-14) differences sometimes arise. The original explanation was that these were differences in rounding // errors caused when missing data was caught in different places. I have tried to localize the catch of missing data and // throw errors elsewhere but still have the same rounding errors. Not sure what else the cause could be. SpecialFunctions.CheckCondition(//score.Loglikelihood == scoreLive.Loglikelihood, Math.Abs(score.Loglikelihood - scoreLive.Loglikelihood) < 10e-7, "Cached score " + score.Loglikelihood + " doesn't match live score " + scoreLive.Loglikelihood); #endif } else { CacheMisses++; score = MaximizeLikelihoodInternal(messageInitializer); if (_cache.Count > MAX_CACHE_SIZE) { //_cache.Clear(); ClearCache(); } _cache.Add(messageInitializer, score); } #endregion return(score); }
protected override double ComputeLLR(ModelScorer modelScorer, PhyloTree phyloTree, StringBuilder stringBuilder, double targetMarginal, double predictorMarginal, Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction, Converter <Leaf, SufficientStatistics> targetDistributionClassFunction) { NullModelDistribution.EmpiricalEquilibrium = targetMarginal; NullModelDistribution.InitialParamVals = null; MessageInitializer messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionClassFunction, NullModelDistribution); List <double> logLikelihoodList = new List <double>(); foreach (bool useParameter in new bool[] { false, true }) { Score score = modelScorer.ScoreModel(messageInitializer, useParameter); stringBuilder.Append(SpecialFunctions.CreateTabString(score.ToString(useParameter ? AlternativeModelDistribution : NullModelDistribution), "")); logLikelihoodList.Add(score.Loglikelihood); AltModelDistribution.InitialParamVals = score.OptimizationParameters; Debug.WriteLine(SpecialFunctions.CreateTabString("AltModelDistribution.InitialParamVals = score.OptimizationParameters", score.OptimizationParameters)); } double diff = logLikelihoodList[1] - logLikelihoodList[0]; return(diff); }
public override Dictionary <string, SufficientStatistics> GenerateRandomMapping(Dictionary <string, SufficientStatistics> realCaseIdToNonMissingValue, ref Random random) { //!!!!put check in to make sure ISufficientSTatistics is reall BooleanStatistics Converter <Leaf, SufficientStatistics> leafToDistnClassFunction = PhyloDDriver.CreateSufficientStatisticsMap(realCaseIdToNonMissingValue); PhyloTree tree = _modelScorer.PhyloTree; MessageInitializer messageInitializer = MessageInitializerDiscrete.GetInstance(leafToDistnClassFunction, _discreteDistribution, new int[] { 1, 1 }, tree.LeafCollection); Score score = _modelScorer.MaximizeLikelihood(messageInitializer); double percentNonMissing = (double)tree.CountOfNonMissingLeaves(realCaseIdToNonMissingValue) / (double)SpecialFunctions.Count(tree.LeafCollection); double equilibrium = score.OptimizationParameters[(int)DistributionDiscreteConditional.ParameterIndex.Equilibrium].Value; double lambda = score.OptimizationParameters[(int)DistributionDiscreteConditional.ParameterIndex.Lambda].Value; Dictionary <string, BooleanStatistics> randomCaseIdToNonMissingValue = tree.EvolveBinaryTree(equilibrium, lambda, 1 - percentNonMissing, ref random); Dictionary <string, SufficientStatistics> converted; SpecialFunctions.ConvertDictionaryToBaseClasses(randomCaseIdToNonMissingValue, out converted); return(converted); }
protected Score MaximizeLikelihoodInternal(MessageInitializer messageInitializer) { OptimizationParameterList paramsToOptimize = messageInitializer.GetOptimizationParameters(); int functionEvaluationCount = 0; bool useLogMethod = false; Converter <OptimizationParameterList, double> functionToOptimize = delegate(OptimizationParameterList paramList) { FuncCalls++; ++functionEvaluationCount; //Debug.WriteLine("EvalCount " + functionEvaluationCount.ToString()); double loglikelihood = ComputeLogLikelihoodModelGivenData(messageInitializer, paramList, useLogMethod); if (!useLogMethod && double.IsNegativeInfinity(loglikelihood)) { useLogMethod = true; loglikelihood = ComputeLogLikelihoodModelGivenData(messageInitializer, paramList, useLogMethod); } //SpecialFunctions.CheckCondition(!double.IsNaN(loglikelihood), "for debugging: got a NaN from ComputeLogLikelihoodModelGivenData"); //if (double.IsNaN(loglikelihood)) //{ // return double.NegativeInfinity; //} return(loglikelihood); }; double loglikelihoodExternal = GridSearch.Optimize(functionToOptimize, paramsToOptimize, 10, 5); Score score = Score.GetInstance(loglikelihoodExternal, paramsToOptimize, messageInitializer.PropogationDistribution); Debug.WriteLine(SpecialFunctions.CreateTabString(GridSearch.DebugCount, score, functionEvaluationCount)); return(score); }
public override double ComputeLogLikelihoodModelGivenData(MessageInitializer messageInitializer, OptimizationParameterList paramList, bool useLogMethod) { return(PhyloTree.ComputeLogLikelihoodModelGivenDataGaussian(messageInitializer, paramList)); }
public abstract double ComputeLogLikelihoodModelGivenData(MessageInitializer messageInitializer, OptimizationParameterList paramList, bool useLogMethod);
protected override string CreateReportLine( ModelScorer modelScorer, PhyloTree phyloTree, RowData rowAndTargetData, UniversalWorkList workList, int rowIndex, int workListCount, int workIndex) { //!!!there is very similar code in ModelTesterDiscrete.cs Dictionary <string, string> row = rowAndTargetData.Row; string predictorVariable = row[PhyloTree.PredictorVariableColumnName]; string targetVariable = row[PhyloTree.TargetVariableColumnName]; // e.g. A@182 (amino acid "A" at position 182) int nullIndex = int.Parse(row[PhyloTree.NullIndexColumnName]); //Dictionary<string, bool> caseIdToNonNullPredictorValue = workList.NullIndexToPredictorToCaseIdToNonMissingValue[nullIndex][predictorVariable]; Dictionary <string, SufficientStatistics> caseIdToNonNullPredictorValue = rowAndTargetData.PredictorData; //workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(nullIndex, predictorVariable); Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData; Converter <Leaf, SufficientStatistics> targetDistributionMap = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue); Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonNullPredictorValue); int[] predictorCounts = phyloTree.CountsOfLeaves(predictorDistributionClassFunction); int predictorFalseNameCount = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.False]; int predictorTrueNameCount = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.True]; int targetNonMissingCount = phyloTree.CountOfNonMissingLeaves(caseIdToNonMissingTargetValue); int globalNonMissingCount = phyloTree.GlobalNonMissingCount(predictorDistributionClassFunction, targetDistributionMap); StringBuilder stringBuilder = new StringBuilder( SpecialFunctions.CreateTabString( this, rowIndex, workListCount, workIndex, nullIndex, predictorVariable, predictorFalseNameCount, predictorTrueNameCount, predictorTrueNameCount + predictorFalseNameCount, targetVariable, targetNonMissingCount, globalNonMissingCount, "")); bool ignoreRow = false; foreach (int count in predictorCounts) { if (count == 0) { ignoreRow = true; } } if (ignoreRow) { CompleteRowWithNaN(stringBuilder); } else { List <double> logLikelihoodList = new List <double>(); MessageInitializer messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionMap, NullModelDistribution); NullModelDistribution.InitialParamVals = null; foreach (bool useParameter in new bool[] { false, true }) { Score score = modelScorer.ScoreModel(messageInitializer, useParameter); stringBuilder.Append(SpecialFunctions.CreateTabString(score, "")); Debug.Write(SpecialFunctions.CreateTabString(score, "")); logLikelihoodList.Add(score.Loglikelihood); AltModelDistribution.InitialParamVals = score.OptimizationParameters; } double diff = logLikelihoodList[1] - logLikelihoodList[0]; double pValue = SpecialFunctions.LogLikelihoodRatioTest(Math.Max(diff, 0), ChiSquareDegreesOfFreedom); stringBuilder.Append(SpecialFunctions.CreateTabString(diff, pValue)); Debug.WriteLine(SpecialFunctions.CreateTabString(diff, pValue)); } return(stringBuilder.ToString()); }