//private bool AllVarianceZero(Dictionary<string, GaussianStatistics> caseNameToTarget)
        private static bool AllVarianceZero(IEnumerable <Leaf> LeafCollection, Converter <Leaf, SufficientStatistics> caseNameToTarget)
        {
            bool varianceIsZero = false; //If empty input, then return false
            bool firstTime      = true;

            foreach (Leaf leaf in LeafCollection)
            {
                SufficientStatistics stats = caseNameToTarget(leaf);
                if (stats.IsMissing())
                {
                    continue;
                }
                GaussianStatistics gaussianStatistics = (GaussianStatistics)stats;
                SpecialFunctions.CheckCondition((gaussianStatistics.Variance == 0) == (gaussianStatistics.SampleSize == 1), "Variance must be zero exactly when the sample size is 1");
                if (firstTime)
                {
                    firstTime      = false;
                    varianceIsZero = (gaussianStatistics.Variance == 0);
                }
                else
                {
                    SpecialFunctions.CheckCondition(varianceIsZero == (gaussianStatistics.Variance == 0), "If any variances are zero, then all must be zero");
                }
            }
            return(varianceIsZero);
        }
Пример #2
0
        private static Dictionary <string, int> SufficientStatisticsMapToIntDictionaryMap(Converter <Leaf, SufficientStatistics> leafToStatsMap, IEnumerable <Leaf> fullLeafCollection)
        {
            Dictionary <string, int> result = new Dictionary <string, int>(SpecialFunctions.Count(fullLeafCollection));

            foreach (Leaf leaf in fullLeafCollection)
            {
                SufficientStatistics value = leafToStatsMap(leaf);
                if (!value.IsMissing())
                {
                    result.Add(leaf.CaseName, (int)(BooleanStatistics)value);
                }
            }
            return(result);
        }
        public override Converter <Leaf, SufficientStatistics> CreateAlternativeSufficientStatisticsMap(
            Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction,
            Converter <Leaf, SufficientStatistics> targetDistributionClassFunction)
        {
            return(delegate(Leaf leaf)
            {
                DistributionDiscreteJointBinary.DistributionClass jointClass;

                SufficientStatistics predStats = predictorDistributionClassFunction(leaf);
                SufficientStatistics targStats = targetDistributionClassFunction(leaf);

                if (predStats.IsMissing() || targStats.IsMissing())
                {
                    jointClass = DistributionDiscreteJointBinary.DistributionClass.Missing;
                }
                else
                {
                    DiscreteStatistics predClass = (DiscreteStatistics)predStats;
                    DiscreteStatistics targetClass = (DiscreteStatistics)targStats;

                    if (predClass == (int)DistributionDiscreteBinary.DistributionClass.False)
                    {
                        if (targetClass == (int)DistributionDiscreteBinary.DistributionClass.False)
                        {
                            jointClass = DistributionDiscreteJointBinary.DistributionClass.FalseFalse;
                        }
                        else
                        {
                            jointClass = DistributionDiscreteJointBinary.DistributionClass.FalseTrue;
                        }
                    }
                    else
                    {
                        if (targetClass == (int)DistributionDiscreteBinary.DistributionClass.False)
                        {
                            jointClass = DistributionDiscreteJointBinary.DistributionClass.TrueFalse;
                        }
                        else
                        {
                            jointClass = DistributionDiscreteJointBinary.DistributionClass.TrueTrue;
                        }
                    }
                }
                return (DiscreteStatistics)(int)jointClass;
            });
        }
Пример #4
0
        public static Converter <Leaf, SufficientStatistics> CreateJointMap(Converter <Leaf, SufficientStatistics> predictorMap, Converter <Leaf, SufficientStatistics> targetMap)
        {
            return(delegate(Leaf leaf)
            {
                DistributionDiscreteJoint.DistributionClass jointClass;

                SufficientStatistics predStats = predictorMap(leaf);
                SufficientStatistics targStats = targetMap(leaf);

                if (predStats.IsMissing() || targStats.IsMissing())
                {
                    jointClass = DistributionDiscreteJoint.DistributionClass.Missing;
                }
                else
                {
                    DiscreteStatistics predClass = (DiscreteStatistics)predStats;
                    DiscreteStatistics targetClass = (DiscreteStatistics)targStats;

                    if (predClass == (int)DistributionDiscreteConditional.DistributionClass.False)
                    {
                        if (targetClass == (int)DistributionDiscreteConditional.DistributionClass.False)
                        {
                            jointClass = DistributionDiscreteJoint.DistributionClass.FalseFalse;
                        }
                        else
                        {
                            jointClass = DistributionDiscreteJoint.DistributionClass.FalseTrue;
                        }
                    }
                    else
                    {
                        if (targetClass == (int)DistributionDiscreteConditional.DistributionClass.False)
                        {
                            jointClass = DistributionDiscreteJoint.DistributionClass.TrueFalse;
                        }
                        else
                        {
                            jointClass = DistributionDiscreteJoint.DistributionClass.TrueTrue;
                        }
                    }
                }
                return (DiscreteStatistics)(int)jointClass;
            });
        }
        //public override Converter<Leaf, SufficientStatistics> CreateTargetSufficientStatisticsMap(Dictionary<string, ISufficientStatistics> caseIdToNonMissingValue)
        //{
        //    return ISufficientStatistics.DictionaryToLeafMap(caseIdToNonMissingValue);
        //}

        public override Converter <Leaf, SufficientStatistics> CreateAlternativeSufficientStatisticsMap(
            Converter <Leaf, SufficientStatistics> predictorDistributionClassFunction,
            Converter <Leaf, SufficientStatistics> targetDistributionClassFunction)
        {
            return(delegate(Leaf leaf)
            {
                SufficientStatistics predStats = predictorDistributionClassFunction(leaf);
                SufficientStatistics targetStats = targetDistributionClassFunction(leaf);


                // bail on missing data.
                if (predStats.IsMissing() || targetStats.IsMissing())
                {
                    return GaussianStatistics.GetMissingInstance();
                }
                else
                {
                    return targetStats;
                }
            });
        }
        public override EvaluationResults EvaluateModelOnData(Converter <Leaf, SufficientStatistics> v1, Converter <Leaf, SufficientStatistics> v2)
        {
            List <Leaf> nonMissingLeaves = new List <Leaf>(100);
            int         seed             = 0;

            foreach (Leaf leaf in ModelScorer.PhyloTree.LeafCollection)
            {
                SufficientStatistics class1 = v1(leaf);
                SufficientStatistics class2 = v2(leaf);
                if (!class1.IsMissing() && !class2.IsMissing())
                {
                    nonMissingLeaves.Add(leaf);
                    seed ^= (leaf.CaseName + class1.ToString() + class2.ToString()).GetHashCode();
                }
            }

            Random rand = new Random(seed);

            nonMissingLeaves = SpecialFunctions.Shuffle(nonMissingLeaves, ref rand);

            int groupSize = nonMissingLeaves.Count / _crossValidateCount;

            EvaluationResultsCrossValidate combinedResults = null;
            double testAltLLSum  = 0;   // for debugging
            double testNullLLSum = 0;   // for debugging

            for (int i = 0; i < _crossValidateCount; i++)
            {
                int        testStart  = i * groupSize;
                int        trainStart = testStart + groupSize;
                Set <Leaf> trainSet   = new Set <Leaf>(SpecialFunctions.SubList(nonMissingLeaves, trainStart, nonMissingLeaves.Count - trainStart));
                trainSet.AddNewRange(SpecialFunctions.SubList(nonMissingLeaves, 0, testStart));

                Converter <Leaf, SufficientStatistics> v1Train = CreateFilteredMap(v1, trainSet);
                Converter <Leaf, SufficientStatistics> v2Train = CreateFilteredMap(v2, trainSet);

                EvaluationResults trainingResults    = InternalEvaluator.EvaluateModelOnData(v1Train, v2Train);
                EvaluationResults testAndTrainResult = InternalEvaluator.EvaluateModelOnDataGivenParams(v1, v2, trainingResults);
                EvaluationResultsTestGivenTrain testGivenTrainResult = EvaluationResultsTestGivenTrain.GetInstance(this, trainingResults, testAndTrainResult);

                if (combinedResults == null)
                {
                    combinedResults = EvaluationResultsCrossValidate.GetInstance(this, testGivenTrainResult);
                }
                else
                {
                    combinedResults = combinedResults.AddNewResults(testGivenTrainResult);
                }

                if (double.IsInfinity(combinedResults.AltLL))   // no point in continuing...infinity will kill everything.
                {
                    break;
                }
#if DEBUG
                double            eps = 1E-10;
                EvaluationResults testTrainingResults = InternalEvaluator.EvaluateModelOnDataGivenParams(v1Train, v2Train, trainingResults);
                Debug.Assert(ComplexNumber.ApproxEqual(testTrainingResults.AltLL, trainingResults.AltLL, eps) &&
                             ComplexNumber.ApproxEqual(testTrainingResults.NullLL, trainingResults.NullLL, eps));
                //Debug.Assert(testTrainingResults.Equals(trainingResults));

                double newNullLL = testAndTrainResult.NullLL - trainingResults.NullLL;
                double newAltLL  = testAndTrainResult.AltLL - trainingResults.AltLL;

                Debug.Assert(ComplexNumber.ApproxEqual(newNullLL, testGivenTrainResult.NullLL, eps));
                Debug.Assert(ComplexNumber.ApproxEqual(newAltLL, testGivenTrainResult.AltLL, eps));

                testNullLLSum += newNullLL;
                testAltLLSum  += newAltLL;

                Debug.Assert(ComplexNumber.ApproxEqual(testNullLLSum, combinedResults.NullLL, eps), "Combined result has wrong NullLL");
                Debug.Assert(ComplexNumber.ApproxEqual(testAltLLSum, combinedResults.AltLL, eps), "Combined result has wrong AltLL");
#endif
            }
            return(combinedResults);
        }