コード例 #1
0
        public virtual void TestGetDistributionFromLogValues()
        {
            ICounter <string> c1 = new ClassicCounter <string>();

            c1.SetCount("p", 1.0);
            c1.SetCount("q", 2.0);
            c1.SetCount("r", 3.0);
            c1.SetCount("s", 4.0);
            // take log
            Counters.LogInPlace(c1);
            // now call distribution
            Distribution <string> distribution = Distribution.GetDistributionFromLogValues(c1);

            // test
            NUnit.Framework.Assert.AreEqual(distribution.KeySet().Count, 4);
            // size
            // keys
            NUnit.Framework.Assert.AreEqual(distribution.ContainsKey("p"), true);
            NUnit.Framework.Assert.AreEqual(distribution.ContainsKey("q"), true);
            NUnit.Framework.Assert.AreEqual(distribution.ContainsKey("r"), true);
            NUnit.Framework.Assert.AreEqual(distribution.ContainsKey("s"), true);
            // values
            NUnit.Framework.Assert.AreEqual(distribution.GetCount("p"), 1.0E-1, 1E-10);
            NUnit.Framework.Assert.AreEqual(distribution.GetCount("q"), 2.0E-1, 1E-10);
            NUnit.Framework.Assert.AreEqual(distribution.GetCount("r"), 3.0E-1, 1E-10);
            NUnit.Framework.Assert.AreEqual(distribution.GetCount("s"), 4.0E-1, 1E-10);
        }
コード例 #2
0
        public virtual void TestToSortedString()
        {
            ICounter <string> c = new ClassicCounter <string>();

            c.SetCount("b", 0.25);
            c.SetCount("a", 0.5);
            c.SetCount("c", 1.0);
            // check full argument version
            string result = Counters.ToSortedString(c, 5, "%s%.1f", ":", "{%s}");

            NUnit.Framework.Assert.AreEqual("{c1.0:a0.5:b0.3}", result);
            // check version with no wrapper
            result = Counters.ToSortedString(c, 2, "%2$f %1$s", "\n");
            NUnit.Framework.Assert.AreEqual("1.000000 c\n0.500000 a", result);
            // check some equivalences to other Counters methods
            int k = 2;

            result = Counters.ToSortedString(c, k, "%s=%s", ", ", "[%s]");
            NUnit.Framework.Assert.AreEqual(Counters.ToString(c, k), result);
            NUnit.Framework.Assert.AreEqual(Counters.ToBiggestValuesFirstString(c, k), result);
            result = Counters.ToSortedString(c, k, "%2$g\t%1$s", "\n", "%s\n");
            NUnit.Framework.Assert.AreEqual(Counters.ToVerticalString(c, k), result);
            // test sorting by keys
            result = Counters.ToSortedByKeysString(c, "%s=>%.2f", "; ", "<%s>");
            NUnit.Framework.Assert.AreEqual("<a=>0.50; b=>0.25; c=>1.00>", result);
        }
コード例 #3
0
        public static double SampleBeta(double a, double b, Random random)
        {
            ICounter <bool> c = new ClassicCounter <bool>();

            c.SetCount(true, a);
            c.SetCount(false, b);
            Multinomial <bool> beta = (new Edu.Stanford.Nlp.Stats.Dirichlet <bool>(c)).DrawSample(random);

            return(beta.ProbabilityOf(true));
        }
コード例 #4
0
        public virtual void TestSerializeStringCounter()
        {
            ICounter <string> counts = new ClassicCounter <string>();

            for (int @base = -10; @base < 10; ++@base)
            {
                if (@base == 0)
                {
                    continue;
                }
                for (int exponent = -100; exponent < 100; ++exponent)
                {
                    double number = Math.Pow(Math.Pi * @base, exponent);
                    counts.SetCount(double.ToString(number), number);
                }
            }
            File tmp = File.CreateTempFile("counts", ".tab.gz");

            tmp.DeleteOnExit();
            Counters.SerializeStringCounter(counts, tmp.GetPath());
            ICounter <string> reread = Counters.DeserializeStringCounter(tmp.GetPath());

            foreach (KeyValuePair <string, double> entry in reread.EntrySet())
            {
                double old = counts.GetCount(entry.Key);
                NUnit.Framework.Assert.AreEqual(old, entry.Value, Math.Abs(old) / 1e5);
            }
        }
コード例 #5
0
        public static Multinomial <F> DrawSample <F>(Random random, ICounter <F> parameters)
        {
            ICounter <F> multParameters = new ClassicCounter <F>();
            double       sum            = 0.0;

            foreach (F o in parameters.KeySet())
            {
                double parameter = Gamma.DrawSample(random, parameters.GetCount(o));
                sum += parameter;
                multParameters.SetCount(o, parameter);
            }
            foreach (F o_1 in multParameters.KeySet())
            {
                multParameters.SetCount(o_1, multParameters.GetCount(o_1) / sum);
            }
            return(new Multinomial <F>(multParameters));
        }
コード例 #6
0
        public virtual void SetCount(K1 o1, K2 o2, double count)
        {
            ClassicCounter <K2> c = GetCounter(o1);
            double oldCount       = GetCount(o1, o2);

            total -= oldCount;
            c.SetCount(o2, count);
            total += count;
        }
コード例 #7
0
        public virtual ClassicCounter <OUT> LastF1()
        {
            ClassicCounter <OUT> result = new ClassicCounter <OUT>();
            ICollection <OUT>    keys   = Sets.Union(previousGuessed.KeySet(), previousGold.KeySet());

            foreach (OUT key in keys)
            {
                result.SetCount(key, LastF1(key));
            }
            return(result);
        }
コード例 #8
0
        public virtual void TestHIndex()
        {
            // empty counter
            ICounter <string> c = new ClassicCounter <string>();

            NUnit.Framework.Assert.AreEqual(0, Counters.HIndex(c));
            // two items with 2 or more citations
            c.SetCount("X", 3);
            c.SetCount("Y", 2);
            c.SetCount("Z", 1);
            NUnit.Framework.Assert.AreEqual(2, Counters.HIndex(c));
            // 14 items with 14 or more citations
            for (int i = 0; i < 14; ++i)
            {
                c.SetCount(i.ToString(), 15);
            }
            NUnit.Framework.Assert.AreEqual(14, Counters.HIndex(c));
            // 15 items with 15 or more citations
            c.SetCount("15", 15);
            NUnit.Framework.Assert.AreEqual(15, Counters.HIndex(c));
        }
コード例 #9
0
        public virtual void TestJensenShannonDivergence()
        {
            // borrow from ArrayMathTest
            ICounter <string> a = new ClassicCounter <string>();

            a.SetCount("a", 1.0);
            a.SetCount("b", 1.0);
            a.SetCount("c", 7.0);
            a.SetCount("d", 1.0);
            ICounter <string> b = new ClassicCounter <string>();

            b.SetCount("b", 1.0);
            b.SetCount("c", 1.0);
            b.SetCount("d", 7.0);
            b.SetCount("e", 1.0);
            b.SetCount("f", 0.0);
            NUnit.Framework.Assert.AreEqual(0.46514844544032313, Counters.JensenShannonDivergence(a, b), 1e-5);
            ICounter <string> c = new ClassicCounter <string>(Java.Util.Collections.SingletonList("A"));
            ICounter <string> d = new ClassicCounter <string>(Arrays.AsList("B", "C"));

            NUnit.Framework.Assert.AreEqual(1.0, Counters.JensenShannonDivergence(c, d), 1e-5);
        }
コード例 #10
0
        public virtual void TestFlatten()
        {
            IDictionary <string, ICounter <string> > h = new Dictionary <string, ICounter <string> >();
            ICounter <string> a = new ClassicCounter <string>();

            a.SetCount("a", 1.0);
            a.SetCount("b", 1.0);
            a.SetCount("c", 7.0);
            a.SetCount("d", 1.0);
            ICounter <string> b = new ClassicCounter <string>();

            b.SetCount("b", 1.0);
            b.SetCount("c", 1.0);
            b.SetCount("d", 7.0);
            b.SetCount("e", 1.0);
            b.SetCount("f", 1.0);
            h["first"]  = a;
            h["second"] = b;
            ICounter <string> flat = Counters.Flatten(h);

            NUnit.Framework.Assert.AreEqual(6, flat.Size());
            NUnit.Framework.Assert.AreEqual(2.0, flat.GetCount("b"));
        }
コード例 #11
0
        /// <summary>Returns a new Distribution<K> with counts averaged from the two given Distributions.</summary>
        /// <remarks>
        /// Returns a new Distribution<K> with counts averaged from the two given Distributions.
        /// The average Distribution<K> will contain the union of keys in both
        /// source Distributions, and each count will be the weighted average of the two source
        /// counts for that key,  a missing count in one Distribution
        /// is treated as if it has probability equal to that returned by the probabilityOf() function.
        /// </remarks>
        /// <returns>
        /// A new distribution with counts that are the mean of the resp. counts
        /// in the given distributions with the remaining probability mass adjusted accordingly.
        /// </returns>
        public static Distribution <K> WeightedAverage <K>(Distribution <K> d1, double w1, Distribution <K> d2)
        {
            double          w2      = 1.0 - w1;
            ICollection <K> allKeys = GetSetOfAllKeys(d1, d2);
            int             numKeys = d1.GetNumberOfKeys();
            ICounter <K>    c       = new ClassicCounter <K>();

            foreach (K key in allKeys)
            {
                double newProbability = d1.ProbabilityOf(key) * w1 + d2.ProbabilityOf(key) * w2;
                c.SetCount(key, newProbability);
            }
            return(Distribution.GetDistributionFromPartiallySpecifiedCounter(c, numKeys));
        }
コード例 #12
0
        public virtual ClassicCounter <Pair <K1, K2> > Flatten()
        {
            ClassicCounter <Pair <K1, K2> > result = new ClassicCounter <Pair <K1, K2> >();

            result.SetDefaultReturnValue(defaultValue);
            foreach (K1 key1 in FirstKeySet())
            {
                ClassicCounter <K2> inner = GetCounter(key1);
                foreach (K2 key2 in inner.KeySet())
                {
                    result.SetCount(new Pair <K1, K2>(key1, key2), inner.GetCount(key2));
                }
            }
            return(result);
        }
コード例 #13
0
        /// <summary>
        /// Creates a Distribution from the given counter, ie makes an internal
        /// copy of the counter and divides all counts by the total count.
        /// </summary>
        /// <returns>a new Distribution</returns>
        public static Edu.Stanford.Nlp.Stats.Distribution <E> GetDistributionFromLogValues <E>(ICounter <E> counter)
        {
            ICounter <E> c = new ClassicCounter <E>();
            // go through once to get the max
            // shift all by max so as to minimize the possibility of underflow
            double max = Counters.Max(counter);

            // Thang 17Feb12: max should operate on counter instead of c, fixed!
            foreach (E key in counter.KeySet())
            {
                double count = Math.Exp(counter.GetCount(key) - max);
                c.SetCount(key, count);
            }
            return(GetDistribution(c));
        }
コード例 #14
0
        /// <summary>
        /// Converts from the format printed by the toString method back into
        /// a Counter&lt;String&gt;.
        /// </summary>
        /// <remarks>
        /// Converts from the format printed by the toString method back into
        /// a Counter&lt;String&gt;.  The toString() doesn't escape, so this only
        /// works providing the keys of the Counter do not have commas or equals signs
        /// in them.
        /// </remarks>
        /// <param name="s">A String representation of a Counter</param>
        /// <returns>The Counter</returns>
        public static ClassicCounter <string> FromString(string s)
        {
            ClassicCounter <string> result = new ClassicCounter <string>();

            if (!s.StartsWith("{") || !s.EndsWith("}"))
            {
                throw new Exception("invalid format: ||" + s + "||");
            }
            s = Sharpen.Runtime.Substring(s, 1, s.Length - 1);
            string[] lines = s.Split(", ");
            foreach (string line in lines)
            {
                string[] fields = line.Split("=");
                if (fields.Length != 2)
                {
                    throw new Exception("Got unsplittable line: \"" + line + '\"');
                }
                result.SetCount(fields[0], double.Parse(fields[1]));
            }
            return(result);
        }
コード例 #15
0
        // ----------------------------------------------------------------------------
        /// <summary>
        /// Creates a Distribution from the given counter using Gale &amp; Sampsons'
        /// "simple Good-Turing" smoothing.
        /// </summary>
        /// <returns>a new simple Good-Turing smoothed Distribution.</returns>
        public static Edu.Stanford.Nlp.Stats.Distribution <E> SimpleGoodTuring <E>(ICounter <E> counter, int numberOfKeys)
        {
            // check arguments
            ValidateCounter(counter);
            int numUnseen = numberOfKeys - counter.Size();

            if (numUnseen < 1)
            {
                throw new ArgumentException(string.Format("ERROR: numberOfKeys %d must be > size of counter %d!", numberOfKeys, counter.Size()));
            }
            // do smoothing
            int[][] cc = CountCounts2IntArrays(CollectCountCounts(counter));
            int[]   r  = cc[0];
            // counts
            int[] n = cc[1];
            // counts of counts
            Edu.Stanford.Nlp.Stats.SimpleGoodTuring sgt = new Edu.Stanford.Nlp.Stats.SimpleGoodTuring(r, n);
            // collate results
            ICounter <int> probsByCount = new ClassicCounter <int>();

            double[] probs = sgt.GetProbabilities();
            for (int i = 0; i < probs.Length; i++)
            {
                probsByCount.SetCount(r[i], probs[i]);
            }
            // make smoothed distribution
            Edu.Stanford.Nlp.Stats.Distribution <E> dist = new Edu.Stanford.Nlp.Stats.Distribution <E>();
            dist.counter = new ClassicCounter <E>();
            foreach (KeyValuePair <E, double> entry in counter.EntrySet())
            {
                E   item  = entry.Key;
                int count = (int)Math.Round(entry.Value);
                dist.counter.SetCount(item, probsByCount.GetCount(count));
            }
            dist.numberOfKeys = numberOfKeys;
            dist.reservedMass = sgt.GetProbabilityForUnseen();
            return(dist);
        }
コード例 #16
0
        // EXTRA I/O METHODS
        /// <summary>Returns the Counter over Strings specified by this String.</summary>
        /// <remarks>
        /// Returns the Counter over Strings specified by this String.
        /// The String is often the whole contents of a file.
        /// The file can include comments if each line of comment starts with
        /// a hash (#) symbol, and does not contain any TAB characters.
        /// Otherwise, the format is one entry per line.  Each line must contain
        /// precisely one tab separating a key and a value, giving a format of:
        /// <blockquote>
        /// StringKey\tdoubleValue\n
        /// </blockquote>
        /// </remarks>
        /// <param name="s">
        /// String representation of a Counter, where entries are one per
        /// line such that each line is either a comment (begins with #)
        /// or key \t value
        /// </param>
        /// <returns>The Counter with String keys</returns>
        public static ClassicCounter <string> ValueOfIgnoreComments(string s)
        {
            ClassicCounter <string> result = new ClassicCounter <string>();

            string[] lines = s.Split("\n");
            foreach (string line in lines)
            {
                string[] fields = line.Split("\t");
                if (fields.Length != 2)
                {
                    if (line.StartsWith("#"))
                    {
                        continue;
                    }
                    else
                    {
                        throw new Exception("Got unsplittable line: \"" + line + '\"');
                    }
                }
                result.SetCount(fields[0], double.Parse(fields[1]));
            }
            return(result);
        }
コード例 #17
0
        /// <summary>For internal testing purposes only.</summary>
        public static void Main(string[] args)
        {
            ICounter <string> c2 = new ClassicCounter <string>();

            c2.IncrementCount("p", 13);
            c2.SetCount("q", 12);
            c2.SetCount("w", 5);
            c2.IncrementCount("x", 7.5);
            // System.out.println(getDistribution(c2).getCount("w") + " should be 0.13333");
            ClassicCounter <string> c = new ClassicCounter <string>();
            double p               = 1000;
            string Unk             = "!*UNKNOWN*!";
            ICollection <string> s = Generics.NewHashSet();

            s.Add(Unk);
            // fill counter with roughly Zipfian distribution
            //    "1" : 1000
            //    "2" :  500
            //    "3" :  333
            //       ...
            //  "UNK" :   45
            //       ...
            //  "666" :    2
            //  "667" :    1
            //       ...
            // "1000" :    1
            for (int rank = 1; rank < 2000; rank++)
            {
                string i = rank.ToString();
                c.SetCount(i, Math.Round(p / rank));
                s.Add(i);
            }
            for (int rank_1 = 2000; rank_1 <= 4000; rank_1++)
            {
                string i = rank_1.ToString();
                s.Add(i);
            }
            Distribution <string> n     = GetDistribution(c);
            Distribution <string> prior = GetUniformDistribution(s);
            Distribution <string> dir1  = DistributionWithDirichletPrior(c, prior, 4000);
            Distribution <string> dir2  = DynamicCounterWithDirichletPrior(c, prior, 4000);
            Distribution <string> add1;
            Distribution <string> gt;

            if (true)
            {
                add1 = LaplaceSmoothedDistribution(c, 4000);
                gt   = GoodTuringSmoothedCounter(c, 4000);
            }
            else
            {
                c.SetCount(Unk, 45);
                add1 = LaplaceWithExplicitUnknown(c, 0.5, Unk);
                gt   = GoodTuringWithExplicitUnknown(c, Unk);
            }
            Distribution <string> sgt = SimpleGoodTuring(c, 4000);

            System.Console.Out.Printf("%10s %10s %10s %10s %10s %10s %10s%n", "Freq", "Norm", "Add1", "Dir1", "Dir2", "GT", "SGT");
            System.Console.Out.Printf("%10s %10s %10s %10s %10s %10s %10s%n", "----------", "----------", "----------", "----------", "----------", "----------", "----------");
            for (int i_1 = 1; i_1 < 5; i_1++)
            {
                System.Console.Out.Printf("%10d ", Math.Round(p / i_1));
                string @in = i_1.ToString();
                System.Console.Out.Printf("%10.8f ", n.ProbabilityOf(@in.ToString()));
                System.Console.Out.Printf("%10.8f ", add1.ProbabilityOf(@in));
                System.Console.Out.Printf("%10.8f ", dir1.ProbabilityOf(@in));
                System.Console.Out.Printf("%10.8f ", dir2.ProbabilityOf(@in));
                System.Console.Out.Printf("%10.8f ", gt.ProbabilityOf(@in));
                System.Console.Out.Printf("%10.8f ", sgt.ProbabilityOf(@in));
                System.Console.Out.WriteLine();
            }
            System.Console.Out.Printf("%10s %10s %10s %10s %10s %10s %10s%n", "----------", "----------", "----------", "----------", "----------", "----------", "----------");
            System.Console.Out.Printf("%10d ", 1);
            string last = 1500.ToString();

            System.Console.Out.Printf("%10.8f ", n.ProbabilityOf(last));
            System.Console.Out.Printf("%10.8f ", add1.ProbabilityOf(last));
            System.Console.Out.Printf("%10.8f ", dir1.ProbabilityOf(last));
            System.Console.Out.Printf("%10.8f ", dir2.ProbabilityOf(last));
            System.Console.Out.Printf("%10.8f ", gt.ProbabilityOf(last));
            System.Console.Out.Printf("%10.8f ", sgt.ProbabilityOf(last));
            System.Console.Out.WriteLine();
            System.Console.Out.Printf("%10s %10s %10s %10s %10s %10s %10s%n", "----------", "----------", "----------", "----------", "----------", "----------", "----------");
            System.Console.Out.Printf("%10s ", "UNK");
            System.Console.Out.Printf("%10.8f ", n.ProbabilityOf(Unk));
            System.Console.Out.Printf("%10.8f ", add1.ProbabilityOf(Unk));
            System.Console.Out.Printf("%10.8f ", dir1.ProbabilityOf(Unk));
            System.Console.Out.Printf("%10.8f ", dir2.ProbabilityOf(Unk));
            System.Console.Out.Printf("%10.8f ", gt.ProbabilityOf(Unk));
            System.Console.Out.Printf("%10.8f ", sgt.ProbabilityOf(Unk));
            System.Console.Out.WriteLine();
            System.Console.Out.Printf("%10s %10s %10s %10s %10s %10s %10s%n", "----------", "----------", "----------", "----------", "----------", "----------", "----------");
            System.Console.Out.Printf("%10s ", "RESERVE");
            System.Console.Out.Printf("%10.8f ", n.GetReservedMass());
            System.Console.Out.Printf("%10.8f ", add1.GetReservedMass());
            System.Console.Out.Printf("%10.8f ", dir1.GetReservedMass());
            System.Console.Out.Printf("%10.8f ", dir2.GetReservedMass());
            System.Console.Out.Printf("%10.8f ", gt.GetReservedMass());
            System.Console.Out.Printf("%10.8f ", sgt.GetReservedMass());
            System.Console.Out.WriteLine();
            System.Console.Out.Printf("%10s %10s %10s %10s %10s %10s %10s%n", "----------", "----------", "----------", "----------", "----------", "----------", "----------");
            System.Console.Out.Printf("%10s ", "Total");
            System.Console.Out.Printf("%10.8f ", n.TotalCount());
            System.Console.Out.Printf("%10.8f ", add1.TotalCount());
            System.Console.Out.Printf("%10.8f ", dir1.TotalCount());
            System.Console.Out.Printf("%10.8f ", dir2.TotalCount());
            System.Console.Out.Printf("%10.8f ", gt.TotalCount());
            System.Console.Out.Printf("%10.8f ", sgt.TotalCount());
            System.Console.Out.WriteLine();
        }