public override Dictionary <string, SufficientStatistics> GenerateRandomMapping(Dictionary <string, SufficientStatistics> realCaseIdToNonMissingValue, ref Random random) { //!!!!put check in to make sure ISufficientSTatistics is reall BooleanStatistics Converter <Leaf, SufficientStatistics> leafToDistnClassFunction = PhyloDDriver.CreateSufficientStatisticsMap(realCaseIdToNonMissingValue); PhyloTree tree = _modelScorer.PhyloTree; MessageInitializer messageInitializer = MessageInitializerDiscrete.GetInstance(leafToDistnClassFunction, _discreteDistribution, new int[] { 1, 1 }, tree.LeafCollection); Score score = _modelScorer.MaximizeLikelihood(messageInitializer); double percentNonMissing = (double)tree.CountOfNonMissingLeaves(realCaseIdToNonMissingValue) / (double)SpecialFunctions.Count(tree.LeafCollection); double equilibrium = score.OptimizationParameters[(int)DistributionDiscreteConditional.ParameterIndex.Equilibrium].Value; double lambda = score.OptimizationParameters[(int)DistributionDiscreteConditional.ParameterIndex.Lambda].Value; Dictionary <string, BooleanStatistics> randomCaseIdToNonMissingValue = tree.EvolveBinaryTree(equilibrium, lambda, 1 - percentNonMissing, ref random); Dictionary <string, SufficientStatistics> converted; SpecialFunctions.ConvertDictionaryToBaseClasses(randomCaseIdToNonMissingValue, out converted); return(converted); }
protected override string CreateReportLine( ModelScorer modelScorer, PhyloTree phyloTree, RowData rowAndTargetData, UniversalWorkList workList, int rowIndex, int workListCount, int workIndex) { //!!!there is very similar code in ModelTesterDiscrete.cs Dictionary <string, string> row = rowAndTargetData.Row; string predictorVariable = row[PhyloTree.PredictorVariableColumnName]; string targetVariable = row[PhyloTree.TargetVariableColumnName]; // e.g. A@182 (amino acid "A" at position 182) int nullIndex = int.Parse(row[PhyloTree.NullIndexColumnName]); //Dictionary<string, bool> caseIdToNonNullPredictorValue = workList.NullIndexToPredictorToCaseIdToNonMissingValue[nullIndex][predictorVariable]; Dictionary <string, SufficientStatistics> caseIdToNonNullPredictorValue = rowAndTargetData.PredictorData; //workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(nullIndex, predictorVariable); Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData; Converter <Leaf, SufficientStatistics> targetDistributionMap = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue); Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonNullPredictorValue); int[] predictorCounts = phyloTree.CountsOfLeaves(predictorDistributionClassFunction); int predictorFalseNameCount = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.False]; int predictorTrueNameCount = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.True]; int targetNonMissingCount = phyloTree.CountOfNonMissingLeaves(caseIdToNonMissingTargetValue); int globalNonMissingCount = phyloTree.GlobalNonMissingCount(predictorDistributionClassFunction, targetDistributionMap); StringBuilder stringBuilder = new StringBuilder( SpecialFunctions.CreateTabString( this, rowIndex, workListCount, workIndex, nullIndex, predictorVariable, predictorFalseNameCount, predictorTrueNameCount, predictorTrueNameCount + predictorFalseNameCount, targetVariable, targetNonMissingCount, globalNonMissingCount, "")); bool ignoreRow = false; foreach (int count in predictorCounts) { if (count == 0) { ignoreRow = true; } } if (ignoreRow) { CompleteRowWithNaN(stringBuilder); } else { List <double> logLikelihoodList = new List <double>(); MessageInitializer messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionMap, NullModelDistribution); NullModelDistribution.InitialParamVals = null; foreach (bool useParameter in new bool[] { false, true }) { Score score = modelScorer.ScoreModel(messageInitializer, useParameter); stringBuilder.Append(SpecialFunctions.CreateTabString(score, "")); Debug.Write(SpecialFunctions.CreateTabString(score, "")); logLikelihoodList.Add(score.Loglikelihood); AltModelDistribution.InitialParamVals = score.OptimizationParameters; } double diff = logLikelihoodList[1] - logLikelihoodList[0]; double pValue = SpecialFunctions.LogLikelihoodRatioTest(Math.Max(diff, 0), ChiSquareDegreesOfFreedom); stringBuilder.Append(SpecialFunctions.CreateTabString(diff, pValue)); Debug.WriteLine(SpecialFunctions.CreateTabString(diff, pValue)); } return(stringBuilder.ToString()); }