Пример #1
0
        protected internal static ICollection <K> GetSetOfAllKeys <K>(Distribution <K> d1, Distribution <K> d2)
        {
            if (d1.GetNumberOfKeys() != d2.GetNumberOfKeys())
            {
                throw new Exception("Tried to compare two Distribution<K> objects but d1.numberOfKeys != d2.numberOfKeys");
            }
            ICollection <K> allKeys = Generics.NewHashSet(d1.GetCounter().KeySet());

            Sharpen.Collections.AddAll(allKeys, d2.GetCounter().KeySet());
            if (allKeys.Count > d1.GetNumberOfKeys())
            {
                throw new Exception("Tried to compare two Distribution<K> objects but d1.counter intersect d2.counter > numberOfKeys");
            }
            return(allKeys);
        }
Пример #2
0
        /// <summary>Calculates the KL divergence between the two distributions.</summary>
        /// <remarks>
        /// Calculates the KL divergence between the two distributions.
        /// That is, it calculates KL(from || to).
        /// In other words, how well can d1 be represented by d2.
        /// if there is some value in d1 that gets zero prob in d2, then return positive infinity.
        /// </remarks>
        /// <returns>The KL divergence between the distributions</returns>
        public static double KlDivergence <K>(Distribution <K> from, Distribution <K> to)
        {
            ICollection <K> allKeys          = GetSetOfAllKeys(from, to);
            int             numKeysRemaining = from.GetNumberOfKeys();
            double          result           = 0.0;
            double          assignedMass1    = 0.0;
            double          assignedMass2    = 0.0;
            double          log2             = Math.Log(2.0);
            double          p1;
            double          p2;
            double          epsilon = 1e-10;

            foreach (K key in allKeys)
            {
                p1 = from.ProbabilityOf(key);
                p2 = to.ProbabilityOf(key);
                numKeysRemaining--;
                assignedMass1 += p1;
                assignedMass2 += p2;
                if (p1 < epsilon)
                {
                    continue;
                }
                double logFract = Math.Log(p1 / p2);
                if (logFract == double.PositiveInfinity)
                {
                    System.Console.Out.WriteLine("Didtributions.kldivergence returning +inf: p1=" + p1 + ", p2=" + p2);
                    System.Console.Out.Flush();
                    return(double.PositiveInfinity);
                }
                // can't recover
                result += p1 * (logFract / log2);
            }
            // express it in log base 2
            if (numKeysRemaining != 0)
            {
                p1 = (1.0 - assignedMass1) / numKeysRemaining;
                if (p1 > epsilon)
                {
                    p2 = (1.0 - assignedMass2) / numKeysRemaining;
                    double logFract = Math.Log(p1 / p2);
                    if (logFract == double.PositiveInfinity)
                    {
                        System.Console.Out.WriteLine("Distributions.klDivergence (remaining mass) returning +inf: p1=" + p1 + ", p2=" + p2);
                        System.Console.Out.Flush();
                        return(double.PositiveInfinity);
                    }
                    // can't recover
                    result += numKeysRemaining * p1 * (logFract / log2);
                }
            }
            // express it in log base 2
            return(result);
        }
Пример #3
0
        /// <summary>Returns a new Distribution<K> with counts averaged from the two given Distributions.</summary>
        /// <remarks>
        /// Returns a new Distribution<K> with counts averaged from the two given Distributions.
        /// The average Distribution<K> will contain the union of keys in both
        /// source Distributions, and each count will be the weighted average of the two source
        /// counts for that key,  a missing count in one Distribution
        /// is treated as if it has probability equal to that returned by the probabilityOf() function.
        /// </remarks>
        /// <returns>
        /// A new distribution with counts that are the mean of the resp. counts
        /// in the given distributions with the remaining probability mass adjusted accordingly.
        /// </returns>
        public static Distribution <K> WeightedAverage <K>(Distribution <K> d1, double w1, Distribution <K> d2)
        {
            double          w2      = 1.0 - w1;
            ICollection <K> allKeys = GetSetOfAllKeys(d1, d2);
            int             numKeys = d1.GetNumberOfKeys();
            ICounter <K>    c       = new ClassicCounter <K>();

            foreach (K key in allKeys)
            {
                double newProbability = d1.ProbabilityOf(key) * w1 + d2.ProbabilityOf(key) * w2;
                c.SetCount(key, newProbability);
            }
            return(Distribution.GetDistributionFromPartiallySpecifiedCounter(c, numKeys));
        }