protected override string CreateReportLine(
            ModelScorer modelScorer,
            PhyloTree phyloTree,
            RowData rowAndTargetData,
            UniversalWorkList workList,
            int rowIndex, int workListCount, int workIndex)
        {
            //!!!there is very similar code in ModelTesterDiscrete.cs

            Dictionary <string, string> row = rowAndTargetData.Row;
            string predictorVariable        = row[PhyloTree.PredictorVariableColumnName];
            string targetVariable           = row[PhyloTree.TargetVariableColumnName]; // e.g. A@182 (amino acid "A" at position 182)
            int    nullIndex = int.Parse(row[PhyloTree.NullIndexColumnName]);

            //Dictionary<string, bool> caseIdToNonNullPredictorValue = workList.NullIndexToPredictorToCaseIdToNonMissingValue[nullIndex][predictorVariable];
            Dictionary <string, SufficientStatistics> caseIdToNonNullPredictorValue = rowAndTargetData.PredictorData; //workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(nullIndex, predictorVariable);
            Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData;

            Converter <Leaf, SufficientStatistics> targetDistributionMap = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue);
            Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonNullPredictorValue);

            int[] predictorCounts = phyloTree.CountsOfLeaves(predictorDistributionClassFunction);

            int predictorFalseNameCount = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.False];
            int predictorTrueNameCount  = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.True];
            int targetNonMissingCount   = phyloTree.CountOfNonMissingLeaves(caseIdToNonMissingTargetValue);
            int globalNonMissingCount   = phyloTree.GlobalNonMissingCount(predictorDistributionClassFunction, targetDistributionMap);

            StringBuilder stringBuilder = new StringBuilder(
                SpecialFunctions.CreateTabString(
                    this, rowIndex, workListCount, workIndex, nullIndex, predictorVariable,
                    predictorFalseNameCount,
                    predictorTrueNameCount,
                    predictorTrueNameCount + predictorFalseNameCount,
                    targetVariable,
                    targetNonMissingCount,
                    globalNonMissingCount,
                    ""));

            bool ignoreRow = false;

            foreach (int count in predictorCounts)
            {
                if (count == 0)
                {
                    ignoreRow = true;
                }
            }

            if (ignoreRow)
            {
                CompleteRowWithNaN(stringBuilder);
            }
            else
            {
                List <double>      logLikelihoodList  = new List <double>();
                MessageInitializer messageInitializer =
                    modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionMap, NullModelDistribution);
                NullModelDistribution.InitialParamVals = null;
                foreach (bool useParameter in new bool[] { false, true })
                {
                    Score score = modelScorer.ScoreModel(messageInitializer, useParameter);
                    stringBuilder.Append(SpecialFunctions.CreateTabString(score, ""));
                    Debug.Write(SpecialFunctions.CreateTabString(score, ""));
                    logLikelihoodList.Add(score.Loglikelihood);
                    AltModelDistribution.InitialParamVals = score.OptimizationParameters;
                }

                double diff   = logLikelihoodList[1] - logLikelihoodList[0];
                double pValue = SpecialFunctions.LogLikelihoodRatioTest(Math.Max(diff, 0), ChiSquareDegreesOfFreedom);

                stringBuilder.Append(SpecialFunctions.CreateTabString(diff, pValue));
                Debug.WriteLine(SpecialFunctions.CreateTabString(diff, pValue));
            }
            return(stringBuilder.ToString());
        }
Exemple #2
0
        //protected override NullDataCollection CreateNullDataGenerator(ModelScorer modelScorer, PhyloTree phyloTree, RangeCollection nullIndexRangeCollection, Dictionary<string, Dictionary<string, BooleanStatistics>> predictorVariableToCaseIdToRealNonMissingValue)
        //{
        //    if (DateTime.Now.Date == new DateTime(2006, 6, 28).Date)  // for testing, force it to use the parametric bootstrap
        //    {
        //        return NullDataCollection.GetInstance(
        //            new NullDataGeneratorAlongTree(modelScorer, phyloTree, (ModelTesterDiscrete)this),
        //            nullIndexRangeCollection,
        //            predictorVariableToCaseIdToRealNonMissingValue);
        //    }


        //    return base.CreateNullDataGenerator(modelScorer, phyloTree, nullIndexRangeCollection, predictorVariableToCaseIdToRealNonMissingValue);
        //}

        //public override Converter<Leaf, SufficientStatistics> CreateTargetSufficientStatisticsMap(Dictionary<string, ISufficientStatistics> caseIdToNonMissingValue)
        //{
        //    return ISufficientStatistics.DictionaryToLeafMap(caseIdToNonMissingValue);
        //}

        //public override Converter<Leaf, SufficientStatistics> CreatePredictorSufficientStatisticsMap(Dictionary<string, BooleanStatistics> caseIdToNonMissingValue)
        //{
        //    return CreateTargetSufficientStatisticsMap(caseIdToNonMissingValue);
        //}

        protected override string CreateReportLine(
            ModelScorer modelScorer,
            PhyloTree phyloTree,
            RowData rowAndTargetData,
            UniversalWorkList workList,
            int rowIndex, int workListCount, int workIndex)
        {
            //!!!there is very similar code in ModelTesterGaussian.cs

            // we're iterating over each predictor (e.g. hla), each target (e.g. position in the sequence,
            // and each possible substring at that position).
            // Then we ask the question, Does the presence of predictor (e.g. hla)
            // influence the probability that target (e.g. mer in position n1pos) will show up?
            // nullIndex specifies whether this is the true data or randomized data.
            Dictionary <string, string> row = rowAndTargetData.Row;
            string predictorVariable        = row[PhyloTree.PredictorVariableColumnName]; // e.g. hla
            string targetVariable           = row[PhyloTree.TargetVariableColumnName];    // e.g. A@182 (amino acid "A" at position 182)
            int    nullIndex = int.Parse(row[PhyloTree.NullIndexColumnName]);

            //Dictionary<string, bool> caseIdToNonMissingPredictorValue = workList.NullIndexToPredictorToCaseIdToNonMissingValue[nullIndex][predictorVariable];
            Dictionary <string, SufficientStatistics> caseIdToNonMissingPredictorValue = rowAndTargetData.PredictorData; //workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(nullIndex, predictorVariable);
            Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue    = rowAndTargetData.TargetData;

            IEnumerator <SufficientStatistics> enumerator = caseIdToNonMissingPredictorValue.Values.GetEnumerator();

            enumerator.MoveNext();
            SufficientStatistics representative = enumerator.Current;
            bool predictorIsBoolean             = representative is BooleanStatistics;

            Converter <Leaf, SufficientStatistics> targetDistributionClassFunction    = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue);
            Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonMissingPredictorValue);

            int[] predictorCounts = predictorIsBoolean ?
                                    phyloTree.CountsOfLeaves(predictorDistributionClassFunction, NullModelDistribution) : new int[2];
            int[] targetCounts = phyloTree.CountsOfLeaves(targetDistributionClassFunction, NullModelDistribution);


            int predictorFalseNameCount = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.False];
            int predictorTrueNameCount  = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.True];
            int targetFalseNameCount    = targetCounts[(int)DistributionDiscreteBinary.DistributionClass.False];
            int targetTrueNameCount     = targetCounts[(int)DistributionDiscreteBinary.DistributionClass.True];

            int[] fisherCounts = predictorIsBoolean ?
                                 phyloTree.FisherCounts(predictorDistributionClassFunction, targetDistributionClassFunction) : new int[4];

            int globalNonMissingCount = predictorIsBoolean ?
                                        fisherCounts[0] + fisherCounts[1] + fisherCounts[2] + fisherCounts[3] :
                                        phyloTree.GlobalNonMissingCount(predictorDistributionClassFunction, targetDistributionClassFunction);

            StringBuilder stringBuilder = new StringBuilder(
                SpecialFunctions.CreateTabString(this, rowIndex, workListCount, workIndex, nullIndex, predictorVariable,
                                                 predictorFalseNameCount,
                                                 predictorTrueNameCount,
                                                 predictorTrueNameCount + predictorFalseNameCount,
                                                 targetVariable,
                                                 targetFalseNameCount,
                                                 targetTrueNameCount,
                                                 targetTrueNameCount + targetFalseNameCount,
                                                 fisherCounts[0], fisherCounts[1], fisherCounts[2], fisherCounts[3],
                                                 globalNonMissingCount,
                                                 ""));

            bool ignoreRow = false;

            foreach (int[] counts in new int[][] { predictorIsBoolean?predictorCounts : new int[] { 1, 1 }, targetCounts })
            {
                foreach (int count in counts)
                {
                    if (count == 0)
                    {
                        ignoreRow = true;
                    }
                }
            }

            if (ignoreRow)
            {
                CompleteRowWithNaN(stringBuilder);
            }
            else
            {
                double targetMarginal    = (double)targetTrueNameCount / (double)(targetTrueNameCount + targetFalseNameCount);
                double predictorMarginal = (double)predictorTrueNameCount / (double)(predictorTrueNameCount + predictorFalseNameCount);

                double diff = ComputeLLR(modelScorer, phyloTree, stringBuilder, targetMarginal, predictorMarginal, predictorDistributionClassFunction, targetDistributionClassFunction);


                double pValue = SpecialFunctions.LogLikelihoodRatioTest(Math.Max(diff, 0), ChiSquareDegreesOfFreedom);

                stringBuilder.Append(SpecialFunctions.CreateTabString(diff, pValue));
            }

            return(stringBuilder.ToString());
        }