public override IEnumerable <RowData> List()
        {
            //Console.WriteLine("Enumerating pairs");
            foreach (int nullIndex in _nullIndexRange.Elements)
            {
                foreach (
                    KeyValuePair <Pair <string, Dictionary <string, SufficientStatistics> >, Pair <string, Dictionary <string, SufficientStatistics> > >
                    predictorAndTarget
                    in SpecialFunctions.EnumerateTwo(_predictorNameAndCaseIdToNonMissingValueEnumeration, _targetNameAndCaseIdToNonMissingValueEnumeration))
                {
                    Pair <string, Dictionary <string, SufficientStatistics> > predictorNameAndCaseIdToNonMissingValue = predictorAndTarget.Key;
                    Pair <string, Dictionary <string, SufficientStatistics> > targetNameAndCaseIdToNonMissingValue    = predictorAndTarget.Value;

                    string targetVariable    = targetNameAndCaseIdToNonMissingValue.First;
                    string predictorVariable = predictorNameAndCaseIdToNonMissingValue.First;

                    Dictionary <string, string> row = new Dictionary <string, string>();

                    row.Add(Tabulate.PredictorVariableColumnName, predictorVariable);
                    row.Add(Tabulate.TargetVariableColumnName, targetVariable);

                    row.Add(Tabulate.NullIndexColumnName, nullIndex.ToString());
                    if (_keepTest.Test(row))
                    {
                        //RowAndTargetData rowAndTargetData = RowAndTargetData.GetInstance(row, targetNameAndCaseIdToNonMissingValue.Second);
                        RowData rowData =
                            RowData.GetInstance(
                                row,
                                delegate()
                        {
                            return(_nullDataCollection.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariableOrReal(
                                       nullIndex, predictorVariable, predictorNameAndCaseIdToNonMissingValue.Second));
                        },
                                delegate()
                        {
                            return(_nullDataCollection.GetCaseIdToNonMissingValueForNullIndexAndTargetVariableOrReal(
                                       nullIndex, targetVariable, targetNameAndCaseIdToNonMissingValue.Second));
                        });

                        yield return(rowData);
                    }
                }

                _keepTest.Reset();   // some KeepTests need to be reset before they run through the set again.
            }
        }
        // iterates over all possible targets, predictors and nullIndex values. That is, performs
        // an exhaustive combination analysis.

        public virtual IEnumerable <RowData> List()
        {
            //int index = -1;
            foreach (Pair <string, Dictionary <string, SufficientStatistics> > targetNameAndCaseIdToNonMissingValue in _targetNameAndCaseIdToNonMissingValueEnumeration)
            {
                string targetVariable = targetNameAndCaseIdToNonMissingValue.First;

                foreach (int nullIndex in _nullIndexRange.Elements)
                {
                    foreach (Pair <string, Dictionary <string, SufficientStatistics> > predictorNameAndCaseIdToNonMissingValue in _predictorNameAndCaseIdToNonMissingValueEnumeration)
                    //foreach (string predictorVariable in _predictorVariableToCaseIdToNonMissingValue.Keys)
                    {
                        //++index;

                        string predictorVariable = predictorNameAndCaseIdToNonMissingValue.First;

                        Dictionary <string, string> row = new Dictionary <string, string>();

                        row.Add(Tabulate.PredictorVariableColumnName, predictorVariable);
                        row.Add(Tabulate.TargetVariableColumnName, targetVariable);

                        row.Add(Tabulate.NullIndexColumnName, nullIndex.ToString());
                        if (_keepTest.Test(row))
                        {
                            //RowAndTargetData rowAndTargetData = RowAndTargetData.GetInstance(row, targetNameAndCaseIdToNonMissingValue.Second);
                            RowData rowData =
                                RowData.GetInstance(
                                    row,
                                    delegate()
                            {
                                return(_nullDataCollection.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariableOrReal(
                                           nullIndex, predictorVariable, predictorNameAndCaseIdToNonMissingValue.Second));
                            },
                                    delegate()
                            {
                                return(_nullDataCollection.GetCaseIdToNonMissingValueForNullIndexAndTargetVariableOrReal(
                                           nullIndex, targetVariable, targetNameAndCaseIdToNonMissingValue.Second));
                            });

                            yield return(rowData);
                        }
                    }
                }
            }
            _keepTest.Reset();   // some KeepTests need to be reset before they run through the set again.
        }
        private string CreateReportLine(
            ModelEvaluator modelEvaluator,
            RowData rowAndTargetData,
            UniversalWorkList workList,
            int rowIndex, int workListCount, int workIndex)
        {
            Dictionary <string, string> row = rowAndTargetData.Row;
            string predictorVariable        = row[Tabulate.PredictorVariableColumnName];
            string targetVariable           = row[Tabulate.TargetVariableColumnName];
            int    nullIndex = int.Parse(row[Tabulate.NullIndexColumnName]);

            Dictionary <string, SufficientStatistics> caseIdToNonMissingPredictorValue = rowAndTargetData.PredictorData;
            Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue    = rowAndTargetData.TargetData;

            Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonMissingPredictorValue);
            Converter <Leaf, SufficientStatistics> targetDistributionClassFunction    = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue);

            EvaluationResults results = modelEvaluator.EvaluateModelOnData(predictorDistributionClassFunction, targetDistributionClassFunction);

            string reportLine = SpecialFunctions.CreateTabString(
                results.ModelEvaluator.Name, rowIndex, workListCount, workIndex, nullIndex, predictorVariable, targetVariable, results.ToString());

            return(reportLine);
        }
 protected abstract string CreateReportLine(
     ModelScorer modelScorer,
     PhyloTree phyloTree,
     RowData rowAndTargetData,
     UniversalWorkList workList,
     int rowIndex, int workListCount, int workIndex);
        protected override string CreateReportLine(
            ModelScorer modelScorer,
            PhyloTree phyloTree,
            RowData rowAndTargetData,
            UniversalWorkList workList,
            int rowIndex, int workListCount, int workIndex)
        {
            //!!!there is very similar code in ModelTesterDiscrete.cs

            Dictionary <string, string> row = rowAndTargetData.Row;
            string predictorVariable        = row[PhyloTree.PredictorVariableColumnName];
            string targetVariable           = row[PhyloTree.TargetVariableColumnName]; // e.g. A@182 (amino acid "A" at position 182)
            int    nullIndex = int.Parse(row[PhyloTree.NullIndexColumnName]);

            //Dictionary<string, bool> caseIdToNonNullPredictorValue = workList.NullIndexToPredictorToCaseIdToNonMissingValue[nullIndex][predictorVariable];
            Dictionary <string, SufficientStatistics> caseIdToNonNullPredictorValue = rowAndTargetData.PredictorData; //workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(nullIndex, predictorVariable);
            Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData;

            Converter <Leaf, SufficientStatistics> targetDistributionMap = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue);
            Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonNullPredictorValue);

            int[] predictorCounts = phyloTree.CountsOfLeaves(predictorDistributionClassFunction);

            int predictorFalseNameCount = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.False];
            int predictorTrueNameCount  = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.True];
            int targetNonMissingCount   = phyloTree.CountOfNonMissingLeaves(caseIdToNonMissingTargetValue);
            int globalNonMissingCount   = phyloTree.GlobalNonMissingCount(predictorDistributionClassFunction, targetDistributionMap);

            StringBuilder stringBuilder = new StringBuilder(
                SpecialFunctions.CreateTabString(
                    this, rowIndex, workListCount, workIndex, nullIndex, predictorVariable,
                    predictorFalseNameCount,
                    predictorTrueNameCount,
                    predictorTrueNameCount + predictorFalseNameCount,
                    targetVariable,
                    targetNonMissingCount,
                    globalNonMissingCount,
                    ""));

            bool ignoreRow = false;

            foreach (int count in predictorCounts)
            {
                if (count == 0)
                {
                    ignoreRow = true;
                }
            }

            if (ignoreRow)
            {
                CompleteRowWithNaN(stringBuilder);
            }
            else
            {
                List <double>      logLikelihoodList  = new List <double>();
                MessageInitializer messageInitializer =
                    modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionMap, NullModelDistribution);
                NullModelDistribution.InitialParamVals = null;
                foreach (bool useParameter in new bool[] { false, true })
                {
                    Score score = modelScorer.ScoreModel(messageInitializer, useParameter);
                    stringBuilder.Append(SpecialFunctions.CreateTabString(score, ""));
                    Debug.Write(SpecialFunctions.CreateTabString(score, ""));
                    logLikelihoodList.Add(score.Loglikelihood);
                    AltModelDistribution.InitialParamVals = score.OptimizationParameters;
                }

                double diff   = logLikelihoodList[1] - logLikelihoodList[0];
                double pValue = SpecialFunctions.LogLikelihoodRatioTest(Math.Max(diff, 0), ChiSquareDegreesOfFreedom);

                stringBuilder.Append(SpecialFunctions.CreateTabString(diff, pValue));
                Debug.WriteLine(SpecialFunctions.CreateTabString(diff, pValue));
            }
            return(stringBuilder.ToString());
        }
示例#6
0
        //protected override NullDataCollection CreateNullDataGenerator(ModelScorer modelScorer, PhyloTree phyloTree, RangeCollection nullIndexRangeCollection, Dictionary<string, Dictionary<string, BooleanStatistics>> predictorVariableToCaseIdToRealNonMissingValue)
        //{
        //    if (DateTime.Now.Date == new DateTime(2006, 6, 28).Date)  // for testing, force it to use the parametric bootstrap
        //    {
        //        return NullDataCollection.GetInstance(
        //            new NullDataGeneratorAlongTree(modelScorer, phyloTree, (ModelTesterDiscrete)this),
        //            nullIndexRangeCollection,
        //            predictorVariableToCaseIdToRealNonMissingValue);
        //    }


        //    return base.CreateNullDataGenerator(modelScorer, phyloTree, nullIndexRangeCollection, predictorVariableToCaseIdToRealNonMissingValue);
        //}

        //public override Converter<Leaf, SufficientStatistics> CreateTargetSufficientStatisticsMap(Dictionary<string, ISufficientStatistics> caseIdToNonMissingValue)
        //{
        //    return ISufficientStatistics.DictionaryToLeafMap(caseIdToNonMissingValue);
        //}

        //public override Converter<Leaf, SufficientStatistics> CreatePredictorSufficientStatisticsMap(Dictionary<string, BooleanStatistics> caseIdToNonMissingValue)
        //{
        //    return CreateTargetSufficientStatisticsMap(caseIdToNonMissingValue);
        //}

        protected override string CreateReportLine(
            ModelScorer modelScorer,
            PhyloTree phyloTree,
            RowData rowAndTargetData,
            UniversalWorkList workList,
            int rowIndex, int workListCount, int workIndex)
        {
            //!!!there is very similar code in ModelTesterGaussian.cs

            // we're iterating over each predictor (e.g. hla), each target (e.g. position in the sequence,
            // and each possible substring at that position).
            // Then we ask the question, Does the presence of predictor (e.g. hla)
            // influence the probability that target (e.g. mer in position n1pos) will show up?
            // nullIndex specifies whether this is the true data or randomized data.
            Dictionary <string, string> row = rowAndTargetData.Row;
            string predictorVariable        = row[PhyloTree.PredictorVariableColumnName]; // e.g. hla
            string targetVariable           = row[PhyloTree.TargetVariableColumnName];    // e.g. A@182 (amino acid "A" at position 182)
            int    nullIndex = int.Parse(row[PhyloTree.NullIndexColumnName]);

            //Dictionary<string, bool> caseIdToNonMissingPredictorValue = workList.NullIndexToPredictorToCaseIdToNonMissingValue[nullIndex][predictorVariable];
            Dictionary <string, SufficientStatistics> caseIdToNonMissingPredictorValue = rowAndTargetData.PredictorData; //workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(nullIndex, predictorVariable);
            Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue    = rowAndTargetData.TargetData;

            IEnumerator <SufficientStatistics> enumerator = caseIdToNonMissingPredictorValue.Values.GetEnumerator();

            enumerator.MoveNext();
            SufficientStatistics representative = enumerator.Current;
            bool predictorIsBoolean             = representative is BooleanStatistics;

            Converter <Leaf, SufficientStatistics> targetDistributionClassFunction    = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue);
            Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonMissingPredictorValue);

            int[] predictorCounts = predictorIsBoolean ?
                                    phyloTree.CountsOfLeaves(predictorDistributionClassFunction, NullModelDistribution) : new int[2];
            int[] targetCounts = phyloTree.CountsOfLeaves(targetDistributionClassFunction, NullModelDistribution);


            int predictorFalseNameCount = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.False];
            int predictorTrueNameCount  = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.True];
            int targetFalseNameCount    = targetCounts[(int)DistributionDiscreteBinary.DistributionClass.False];
            int targetTrueNameCount     = targetCounts[(int)DistributionDiscreteBinary.DistributionClass.True];

            int[] fisherCounts = predictorIsBoolean ?
                                 phyloTree.FisherCounts(predictorDistributionClassFunction, targetDistributionClassFunction) : new int[4];

            int globalNonMissingCount = predictorIsBoolean ?
                                        fisherCounts[0] + fisherCounts[1] + fisherCounts[2] + fisherCounts[3] :
                                        phyloTree.GlobalNonMissingCount(predictorDistributionClassFunction, targetDistributionClassFunction);

            StringBuilder stringBuilder = new StringBuilder(
                SpecialFunctions.CreateTabString(this, rowIndex, workListCount, workIndex, nullIndex, predictorVariable,
                                                 predictorFalseNameCount,
                                                 predictorTrueNameCount,
                                                 predictorTrueNameCount + predictorFalseNameCount,
                                                 targetVariable,
                                                 targetFalseNameCount,
                                                 targetTrueNameCount,
                                                 targetTrueNameCount + targetFalseNameCount,
                                                 fisherCounts[0], fisherCounts[1], fisherCounts[2], fisherCounts[3],
                                                 globalNonMissingCount,
                                                 ""));

            bool ignoreRow = false;

            foreach (int[] counts in new int[][] { predictorIsBoolean?predictorCounts : new int[] { 1, 1 }, targetCounts })
            {
                foreach (int count in counts)
                {
                    if (count == 0)
                    {
                        ignoreRow = true;
                    }
                }
            }

            if (ignoreRow)
            {
                CompleteRowWithNaN(stringBuilder);
            }
            else
            {
                double targetMarginal    = (double)targetTrueNameCount / (double)(targetTrueNameCount + targetFalseNameCount);
                double predictorMarginal = (double)predictorTrueNameCount / (double)(predictorTrueNameCount + predictorFalseNameCount);

                double diff = ComputeLLR(modelScorer, phyloTree, stringBuilder, targetMarginal, predictorMarginal, predictorDistributionClassFunction, targetDistributionClassFunction);


                double pValue = SpecialFunctions.LogLikelihoodRatioTest(Math.Max(diff, 0), ChiSquareDegreesOfFreedom);

                stringBuilder.Append(SpecialFunctions.CreateTabString(diff, pValue));
            }

            return(stringBuilder.ToString());
        }