protected override string CreateReportLine( ModelScorer modelScorer, PhyloTree phyloTree, RowData rowAndTargetData, UniversalWorkList workList, int rowIndex, int workListCount, int workIndex) { //!!!there is very similar code in ModelTesterDiscrete.cs Dictionary <string, string> row = rowAndTargetData.Row; string predictorVariable = row[PhyloTree.PredictorVariableColumnName]; string targetVariable = row[PhyloTree.TargetVariableColumnName]; // e.g. A@182 (amino acid "A" at position 182) int nullIndex = int.Parse(row[PhyloTree.NullIndexColumnName]); //Dictionary<string, bool> caseIdToNonNullPredictorValue = workList.NullIndexToPredictorToCaseIdToNonMissingValue[nullIndex][predictorVariable]; Dictionary <string, SufficientStatistics> caseIdToNonNullPredictorValue = rowAndTargetData.PredictorData; //workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(nullIndex, predictorVariable); Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData; Converter <Leaf, SufficientStatistics> targetDistributionMap = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue); Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonNullPredictorValue); int[] predictorCounts = phyloTree.CountsOfLeaves(predictorDistributionClassFunction); int predictorFalseNameCount = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.False]; int predictorTrueNameCount = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.True]; int targetNonMissingCount = phyloTree.CountOfNonMissingLeaves(caseIdToNonMissingTargetValue); int globalNonMissingCount = phyloTree.GlobalNonMissingCount(predictorDistributionClassFunction, targetDistributionMap); StringBuilder stringBuilder = new StringBuilder( SpecialFunctions.CreateTabString( this, rowIndex, workListCount, workIndex, nullIndex, predictorVariable, predictorFalseNameCount, predictorTrueNameCount, predictorTrueNameCount + predictorFalseNameCount, targetVariable, targetNonMissingCount, globalNonMissingCount, "")); bool ignoreRow = false; foreach (int count in predictorCounts) { if (count == 0) { ignoreRow = true; } } if (ignoreRow) { CompleteRowWithNaN(stringBuilder); } else { List <double> logLikelihoodList = new List <double>(); MessageInitializer messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionMap, NullModelDistribution); NullModelDistribution.InitialParamVals = null; foreach (bool useParameter in new bool[] { false, true }) { Score score = modelScorer.ScoreModel(messageInitializer, useParameter); stringBuilder.Append(SpecialFunctions.CreateTabString(score, "")); Debug.Write(SpecialFunctions.CreateTabString(score, "")); logLikelihoodList.Add(score.Loglikelihood); AltModelDistribution.InitialParamVals = score.OptimizationParameters; } double diff = logLikelihoodList[1] - logLikelihoodList[0]; double pValue = SpecialFunctions.LogLikelihoodRatioTest(Math.Max(diff, 0), ChiSquareDegreesOfFreedom); stringBuilder.Append(SpecialFunctions.CreateTabString(diff, pValue)); Debug.WriteLine(SpecialFunctions.CreateTabString(diff, pValue)); } return(stringBuilder.ToString()); }
//protected override NullDataCollection CreateNullDataGenerator(ModelScorer modelScorer, PhyloTree phyloTree, RangeCollection nullIndexRangeCollection, Dictionary<string, Dictionary<string, BooleanStatistics>> predictorVariableToCaseIdToRealNonMissingValue) //{ // if (DateTime.Now.Date == new DateTime(2006, 6, 28).Date) // for testing, force it to use the parametric bootstrap // { // return NullDataCollection.GetInstance( // new NullDataGeneratorAlongTree(modelScorer, phyloTree, (ModelTesterDiscrete)this), // nullIndexRangeCollection, // predictorVariableToCaseIdToRealNonMissingValue); // } // return base.CreateNullDataGenerator(modelScorer, phyloTree, nullIndexRangeCollection, predictorVariableToCaseIdToRealNonMissingValue); //} //public override Converter<Leaf, SufficientStatistics> CreateTargetSufficientStatisticsMap(Dictionary<string, ISufficientStatistics> caseIdToNonMissingValue) //{ // return ISufficientStatistics.DictionaryToLeafMap(caseIdToNonMissingValue); //} //public override Converter<Leaf, SufficientStatistics> CreatePredictorSufficientStatisticsMap(Dictionary<string, BooleanStatistics> caseIdToNonMissingValue) //{ // return CreateTargetSufficientStatisticsMap(caseIdToNonMissingValue); //} protected override string CreateReportLine( ModelScorer modelScorer, PhyloTree phyloTree, RowData rowAndTargetData, UniversalWorkList workList, int rowIndex, int workListCount, int workIndex) { //!!!there is very similar code in ModelTesterGaussian.cs // we're iterating over each predictor (e.g. hla), each target (e.g. position in the sequence, // and each possible substring at that position). // Then we ask the question, Does the presence of predictor (e.g. hla) // influence the probability that target (e.g. mer in position n1pos) will show up? // nullIndex specifies whether this is the true data or randomized data. Dictionary <string, string> row = rowAndTargetData.Row; string predictorVariable = row[PhyloTree.PredictorVariableColumnName]; // e.g. hla string targetVariable = row[PhyloTree.TargetVariableColumnName]; // e.g. A@182 (amino acid "A" at position 182) int nullIndex = int.Parse(row[PhyloTree.NullIndexColumnName]); //Dictionary<string, bool> caseIdToNonMissingPredictorValue = workList.NullIndexToPredictorToCaseIdToNonMissingValue[nullIndex][predictorVariable]; Dictionary <string, SufficientStatistics> caseIdToNonMissingPredictorValue = rowAndTargetData.PredictorData; //workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(nullIndex, predictorVariable); Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData; IEnumerator <SufficientStatistics> enumerator = caseIdToNonMissingPredictorValue.Values.GetEnumerator(); enumerator.MoveNext(); SufficientStatistics representative = enumerator.Current; bool predictorIsBoolean = representative is BooleanStatistics; Converter <Leaf, SufficientStatistics> targetDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue); Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonMissingPredictorValue); int[] predictorCounts = predictorIsBoolean ? phyloTree.CountsOfLeaves(predictorDistributionClassFunction, NullModelDistribution) : new int[2]; int[] targetCounts = phyloTree.CountsOfLeaves(targetDistributionClassFunction, NullModelDistribution); int predictorFalseNameCount = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.False]; int predictorTrueNameCount = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.True]; int targetFalseNameCount = targetCounts[(int)DistributionDiscreteBinary.DistributionClass.False]; int targetTrueNameCount = targetCounts[(int)DistributionDiscreteBinary.DistributionClass.True]; int[] fisherCounts = predictorIsBoolean ? phyloTree.FisherCounts(predictorDistributionClassFunction, targetDistributionClassFunction) : new int[4]; int globalNonMissingCount = predictorIsBoolean ? fisherCounts[0] + fisherCounts[1] + fisherCounts[2] + fisherCounts[3] : phyloTree.GlobalNonMissingCount(predictorDistributionClassFunction, targetDistributionClassFunction); StringBuilder stringBuilder = new StringBuilder( SpecialFunctions.CreateTabString(this, rowIndex, workListCount, workIndex, nullIndex, predictorVariable, predictorFalseNameCount, predictorTrueNameCount, predictorTrueNameCount + predictorFalseNameCount, targetVariable, targetFalseNameCount, targetTrueNameCount, targetTrueNameCount + targetFalseNameCount, fisherCounts[0], fisherCounts[1], fisherCounts[2], fisherCounts[3], globalNonMissingCount, "")); bool ignoreRow = false; foreach (int[] counts in new int[][] { predictorIsBoolean?predictorCounts : new int[] { 1, 1 }, targetCounts }) { foreach (int count in counts) { if (count == 0) { ignoreRow = true; } } } if (ignoreRow) { CompleteRowWithNaN(stringBuilder); } else { double targetMarginal = (double)targetTrueNameCount / (double)(targetTrueNameCount + targetFalseNameCount); double predictorMarginal = (double)predictorTrueNameCount / (double)(predictorTrueNameCount + predictorFalseNameCount); double diff = ComputeLLR(modelScorer, phyloTree, stringBuilder, targetMarginal, predictorMarginal, predictorDistributionClassFunction, targetDistributionClassFunction); double pValue = SpecialFunctions.LogLikelihoodRatioTest(Math.Max(diff, 0), ChiSquareDegreesOfFreedom); stringBuilder.Append(SpecialFunctions.CreateTabString(diff, pValue)); } return(stringBuilder.ToString()); }