public override bool Equals(object obj)
        {
            MessageInitializer other = obj as MessageInitializer;

            if (other == null ||
                _hashCode != other._hashCode ||
                _distribution.DependsOnMoreThanOneVariable != other._distribution.DependsOnMoreThanOneVariable ||
                _distribution.ToString() != other._distribution.ToString()
                )
            {
                return(false);
            }

            foreach (Leaf leaf in _fullLeafCollection)
            {
                if (IsMissing(leaf) != other.IsMissing(leaf) ||
                    LeafToTargetStatistics(leaf) != other.LeafToTargetStatistics(leaf))
                {
                    return(false);
                }
                // if these distributions depend on the predictor variables, then make sure they all match up.
                if (_distribution.DependsOnMoreThanOneVariable)
                {
                    foreach (KeyValuePair <Converter <Leaf, SufficientStatistics>, Converter <Leaf, SufficientStatistics> > predMapPair in SpecialFunctions.EnumerateTwo(LeafToPredictorStatisticsList, other.LeafToPredictorStatisticsList))
                    {
                        if (predMapPair.Key(leaf) != predMapPair.Value(leaf))
                        {
                            return(false);
                        }
                    }
                }
            }

            return(true);
        }
Пример #2
0
        public double ComputeLogLikelihoodModelGivenData(MessageInitializer messageInitializer, OptimizationParameterList paramList)
        {
            double loglikelihood = ComputeLogLikelihoodModelGivenData(messageInitializer, paramList, false);

            if (double.IsNegativeInfinity(loglikelihood))
            {
                loglikelihood = ComputeLogLikelihoodModelGivenData(messageInitializer, paramList, true);
            }
            return(loglikelihood);
        }
Пример #3
0
        /// <summary>
        /// Learns the optimal parameters for the data contained in the messageInitializer and returns the corresponding Score.
        /// </summary>
        public Score MaximizeLikelihood(MessageInitializer messageInitializer)
        {
            Score score;

            #region Caching details
            //Key aKey = Key.GetInstance(Tree, messageInitializer);

            if (_cache.ContainsKey(messageInitializer))
            {
                CacheHits++;
                score = _cache[messageInitializer];
#if (DEBUG)
                Score scoreLive = MaximizeLikelihoodInternal(messageInitializer);

                if (Math.Abs(score.Loglikelihood - scoreLive.Loglikelihood) >= 10e-7)
                {
                    double diff = scoreLive.Loglikelihood - score.Loglikelihood;
                    Debug.WriteLine("Cache differs from computed score by " + diff);
                }
                // note: minute (10E-14) differences sometimes arise. The original explanation was that these were differences in rounding
                // errors caused when missing data was caught in different places. I have tried to localize the catch of missing data and
                // throw errors elsewhere but still have the same rounding errors. Not sure what else the cause could be.
                SpecialFunctions.CheckCondition(//score.Loglikelihood == scoreLive.Loglikelihood,
                    Math.Abs(score.Loglikelihood - scoreLive.Loglikelihood) < 10e-7,
                    "Cached score " + score.Loglikelihood + " doesn't match live score " + scoreLive.Loglikelihood);
#endif
            }
            else
            {
                CacheMisses++;
                score = MaximizeLikelihoodInternal(messageInitializer);

                if (_cache.Count > MAX_CACHE_SIZE)
                {
                    //_cache.Clear();
                    ClearCache();
                }
                _cache.Add(messageInitializer, score);
            }
            #endregion

            return(score);
        }
        protected override double ComputeLLR(ModelScorer modelScorer, PhyloTree phyloTree, StringBuilder stringBuilder, double targetMarginal, double predictorMarginal,
                                             Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction, Converter <Leaf, SufficientStatistics> targetDistributionClassFunction)
        {
            NullModelDistribution.EmpiricalEquilibrium = targetMarginal;
            NullModelDistribution.InitialParamVals     = null;

            MessageInitializer messageInitializer = modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionClassFunction, NullModelDistribution);

            List <double> logLikelihoodList = new List <double>();

            foreach (bool useParameter in new bool[] { false, true })
            {
                Score score = modelScorer.ScoreModel(messageInitializer, useParameter);

                stringBuilder.Append(SpecialFunctions.CreateTabString(score.ToString(useParameter ? AlternativeModelDistribution : NullModelDistribution), ""));
                logLikelihoodList.Add(score.Loglikelihood);
                AltModelDistribution.InitialParamVals = score.OptimizationParameters;
                Debug.WriteLine(SpecialFunctions.CreateTabString("AltModelDistribution.InitialParamVals = score.OptimizationParameters", score.OptimizationParameters));
            }

            double diff = logLikelihoodList[1] - logLikelihoodList[0];

            return(diff);
        }
Пример #5
0
        public override Dictionary <string, SufficientStatistics> GenerateRandomMapping(Dictionary <string, SufficientStatistics> realCaseIdToNonMissingValue, ref Random random)
        {
            //!!!!put check in to make sure ISufficientSTatistics is reall BooleanStatistics
            Converter <Leaf, SufficientStatistics> leafToDistnClassFunction = PhyloDDriver.CreateSufficientStatisticsMap(realCaseIdToNonMissingValue);

            PhyloTree tree = _modelScorer.PhyloTree;

            MessageInitializer messageInitializer = MessageInitializerDiscrete.GetInstance(leafToDistnClassFunction, _discreteDistribution, new int[] { 1, 1 }, tree.LeafCollection);

            Score score = _modelScorer.MaximizeLikelihood(messageInitializer);

            double percentNonMissing = (double)tree.CountOfNonMissingLeaves(realCaseIdToNonMissingValue) /
                                       (double)SpecialFunctions.Count(tree.LeafCollection);
            double equilibrium = score.OptimizationParameters[(int)DistributionDiscreteConditional.ParameterIndex.Equilibrium].Value;
            double lambda      = score.OptimizationParameters[(int)DistributionDiscreteConditional.ParameterIndex.Lambda].Value;

            Dictionary <string, BooleanStatistics> randomCaseIdToNonMissingValue = tree.EvolveBinaryTree(equilibrium, lambda, 1 - percentNonMissing, ref random);

            Dictionary <string, SufficientStatistics> converted;

            SpecialFunctions.ConvertDictionaryToBaseClasses(randomCaseIdToNonMissingValue, out converted);

            return(converted);
        }
Пример #6
0
        protected Score MaximizeLikelihoodInternal(MessageInitializer messageInitializer)
        {
            OptimizationParameterList paramsToOptimize = messageInitializer.GetOptimizationParameters();
            int functionEvaluationCount = 0;

            bool useLogMethod = false;

            Converter <OptimizationParameterList, double> functionToOptimize =
                delegate(OptimizationParameterList paramList)
            {
                FuncCalls++;
                ++functionEvaluationCount;
                //Debug.WriteLine("EvalCount " + functionEvaluationCount.ToString());
                double loglikelihood = ComputeLogLikelihoodModelGivenData(messageInitializer, paramList, useLogMethod);

                if (!useLogMethod && double.IsNegativeInfinity(loglikelihood))
                {
                    useLogMethod  = true;
                    loglikelihood = ComputeLogLikelihoodModelGivenData(messageInitializer, paramList, useLogMethod);
                }
                //SpecialFunctions.CheckCondition(!double.IsNaN(loglikelihood), "for debugging: got a NaN from ComputeLogLikelihoodModelGivenData");
                //if (double.IsNaN(loglikelihood))
                //{
                //    return double.NegativeInfinity;
                //}
                return(loglikelihood);
            };

            double loglikelihoodExternal = GridSearch.Optimize(functionToOptimize, paramsToOptimize, 10, 5);

            Score score = Score.GetInstance(loglikelihoodExternal, paramsToOptimize, messageInitializer.PropogationDistribution);

            Debug.WriteLine(SpecialFunctions.CreateTabString(GridSearch.DebugCount, score, functionEvaluationCount));

            return(score);
        }
Пример #7
0
 public override double ComputeLogLikelihoodModelGivenData(MessageInitializer messageInitializer, OptimizationParameterList paramList, bool useLogMethod)
 {
     return(PhyloTree.ComputeLogLikelihoodModelGivenDataGaussian(messageInitializer, paramList));
 }
Пример #8
0
 public abstract double ComputeLogLikelihoodModelGivenData(MessageInitializer messageInitializer, OptimizationParameterList paramList, bool useLogMethod);
        protected override string CreateReportLine(
            ModelScorer modelScorer,
            PhyloTree phyloTree,
            RowData rowAndTargetData,
            UniversalWorkList workList,
            int rowIndex, int workListCount, int workIndex)
        {
            //!!!there is very similar code in ModelTesterDiscrete.cs

            Dictionary <string, string> row = rowAndTargetData.Row;
            string predictorVariable        = row[PhyloTree.PredictorVariableColumnName];
            string targetVariable           = row[PhyloTree.TargetVariableColumnName]; // e.g. A@182 (amino acid "A" at position 182)
            int    nullIndex = int.Parse(row[PhyloTree.NullIndexColumnName]);

            //Dictionary<string, bool> caseIdToNonNullPredictorValue = workList.NullIndexToPredictorToCaseIdToNonMissingValue[nullIndex][predictorVariable];
            Dictionary <string, SufficientStatistics> caseIdToNonNullPredictorValue = rowAndTargetData.PredictorData; //workList.GetCaseIdToNonMissingValueForNullIndexAndPredictorVariable(nullIndex, predictorVariable);
            Dictionary <string, SufficientStatistics> caseIdToNonMissingTargetValue = rowAndTargetData.TargetData;

            Converter <Leaf, SufficientStatistics> targetDistributionMap = CreateSufficientStatisticsMap(caseIdToNonMissingTargetValue);
            Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction = CreateSufficientStatisticsMap(caseIdToNonNullPredictorValue);

            int[] predictorCounts = phyloTree.CountsOfLeaves(predictorDistributionClassFunction);

            int predictorFalseNameCount = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.False];
            int predictorTrueNameCount  = predictorCounts[(int)DistributionDiscreteBinary.DistributionClass.True];
            int targetNonMissingCount   = phyloTree.CountOfNonMissingLeaves(caseIdToNonMissingTargetValue);
            int globalNonMissingCount   = phyloTree.GlobalNonMissingCount(predictorDistributionClassFunction, targetDistributionMap);

            StringBuilder stringBuilder = new StringBuilder(
                SpecialFunctions.CreateTabString(
                    this, rowIndex, workListCount, workIndex, nullIndex, predictorVariable,
                    predictorFalseNameCount,
                    predictorTrueNameCount,
                    predictorTrueNameCount + predictorFalseNameCount,
                    targetVariable,
                    targetNonMissingCount,
                    globalNonMissingCount,
                    ""));

            bool ignoreRow = false;

            foreach (int count in predictorCounts)
            {
                if (count == 0)
                {
                    ignoreRow = true;
                }
            }

            if (ignoreRow)
            {
                CompleteRowWithNaN(stringBuilder);
            }
            else
            {
                List <double>      logLikelihoodList  = new List <double>();
                MessageInitializer messageInitializer =
                    modelScorer.CreateMessageInitializer(predictorDistributionClassFunction, targetDistributionMap, NullModelDistribution);
                NullModelDistribution.InitialParamVals = null;
                foreach (bool useParameter in new bool[] { false, true })
                {
                    Score score = modelScorer.ScoreModel(messageInitializer, useParameter);
                    stringBuilder.Append(SpecialFunctions.CreateTabString(score, ""));
                    Debug.Write(SpecialFunctions.CreateTabString(score, ""));
                    logLikelihoodList.Add(score.Loglikelihood);
                    AltModelDistribution.InitialParamVals = score.OptimizationParameters;
                }

                double diff   = logLikelihoodList[1] - logLikelihoodList[0];
                double pValue = SpecialFunctions.LogLikelihoodRatioTest(Math.Max(diff, 0), ChiSquareDegreesOfFreedom);

                stringBuilder.Append(SpecialFunctions.CreateTabString(diff, pValue));
                Debug.WriteLine(SpecialFunctions.CreateTabString(diff, pValue));
            }
            return(stringBuilder.ToString());
        }