public void MultinomialDistribution_Sum_FollowsMultinumialLaw() { var rand = new Random(0); const int N = 42; const int m = 4; const int nubmerOfIterations = 10; for (int i = 0; i < nubmerOfIterations; ++i) { double[] randomWeights = Enumerable.Range(0, m).Select(_ => rand.NextDouble()).ToArray(); double sum = 0.0; for (int k1 = 0; k1 <= N; ++k1) { for (int k2 = 0; k1 + k2 <= N; ++k2) { for (int k3 = 0; k1 + k2 + k3 <= N; ++k3) { int k4 = N - k1 - k2 - k3; sum += MultinomialDistribution.DensityAt(new[] { k1, k2, k3, k4 }, randomWeights); } } } Assert.Equal(1.0, sum, precision: 4); } }
public static MultinomialDistribution <string> SubtokenProbDist(IEnumerable <string> subtokens) { var distribution = new MultinomialDistribution <string>(); distribution.AddManyOnce(subtokens); return(distribution); }
public void Test_Multinomial() { int[] k1 = { 1, 4, 4, 2 }; Assert.AreEqual(34650, MultinomialDistribution.Multinomial(k1)); int[] k2 = { 2, 4, 3, 2, 1 }; Assert.AreEqual(831600, MultinomialDistribution.Multinomial(k2)); int[] k3 = { 0, 1 }; Assert.AreEqual(1, MultinomialDistribution.Multinomial(k3)); }
public void MultinomialDistribution_OfSingleNumber_IsOne() { foreach (int i in new[] { 1, 2, 8, 42 }) { var oneOfI = MultinomialDistribution.DensityAt(new int[] { i }, new double[] { 1.0 }); Assert.Equal(1, oneOfI); } }
public void LogProbabilityMassFunctionTest() { MultivariateDiscreteDistribution target = new MultinomialDistribution(5, 0.25, 0.25, 0.25, 0.25); int[] observation = { 1, 1, 1, 2 }; double expected = System.Math.Log(target.ProbabilityMassFunction(observation)); double actual = target.LogProbabilityMassFunction(observation); Assert.AreEqual(expected, actual, 1e-6); }
public void Test_MultinomialPMF() { double[,] test_multPMF = MultinomialDistribution.GetDist(MockNumberOfTrials, MockProbabilites); Assert.AreEqual(0.006898447265625, test_multPMF[5, 1], 0.0001); Assert.AreEqual(0.0051099609375, test_multPMF[3, 2], 0.0001); Assert.AreNotEqual(0, test_multPMF[0, 20]); foreach (double testValue in test_multPMF) { Assert.GreaterOrEqual(testValue, 0); } }
public void ProbabilityMassFunctionTest() { MultinomialDistribution dist = new MultinomialDistribution(5, 0.25, 0.25, 0.25, 0.25); int[] observation = { 1, 1, 1, 2 }; double actual = dist.ProbabilityMassFunction(observation); double expected = 0.05859375; Assert.AreEqual(expected, actual, 1e-6); }
public void Test_ExtremeCases() { double[,] test_multPMF = MultinomialDistribution.GetDist(50, new double[] { 0, 1 }); for (int i = 0; i < test_multPMF.GetLength(0); ++i) { for (int j = 0; j < test_multPMF.GetLength(1) - i; ++j) { Assert.LessOrEqual(0, test_multPMF[i, j], string.Format("{0},{1}", i, j)); Assert.GreaterOrEqual(1, test_multPMF[i, j], string.Format("{0},{1}", i, j)); } } }
public void ProbabilityMassFunctionTest2() { // Example from http://onlinestatbook.com/2/probability/multinomial.html MultinomialDistribution dist = new MultinomialDistribution(12, 0.40, 0.35, 0.25); int[] observation = { 7, 2, 3 }; double actual = dist.ProbabilityMassFunction(observation); double expected = 0.02483712; Assert.AreEqual(expected, actual, 1e-6); }
/// <summary> /// Compute VI using pre-computed information (that can be cashed across multiple evaluations /// </summary> /// <param name="nodeSubGrouping"></param> /// <param name="numTypes"></param> /// <param name="baseNameProbDist"></param> /// <returns></returns> double IVariationOfInformationComputer <NodeName> .ComputeVariationOfInformation(List <HashSet <LatticeNode <NodeName> > > nodeSubGrouping, int numTotalNodes) { Debug.Assert(_baseSubtokenProbDist != null); var subtokenDistrPerGroup = nodeSubGrouping.Select(g => SubtokenProbDist(g.Where(n => n.Data.Length > 0).SelectMany(n => n.Data))).ToList(); // P(subtoken|type) // Compute H(subtoken|type) double subtokenGivenTypeConditionalEntropy = 0; int idx = 0; foreach (var typeGroup in nodeSubGrouping) { var subtokenDistrForGroup = subtokenDistrPerGroup[idx]; MultinomialDistribution <string> typeGroupParentProb = null; if (_dirichletAlpha > 0) { typeGroupParentProb = GetParentNameDistribution(typeGroup, _baseSubtokenProbDist); } double probType = ((double)typeGroup.Where(n => n.Data.Length > 0).Count()) / numTotalNodes; var subtokenEntropy = subtokenDistrForGroup.Elements .Select(e => subtokenDistrForGroup.ProbabilityOf(e, typeGroupParentProb, _dirichletAlpha)) .Select(p => - p * Math.Log(p)).Sum(); subtokenGivenTypeConditionalEntropy += probType * subtokenEntropy; idx++; } // Compute H(type|subtokens) double typeGivenSubtokenConditionalEntropy = 0; foreach (var subtoken in _baseSubtokenProbDist.Elements) { var baseProbSubtoken = _baseSubtokenProbDist.ProbabilityOf(subtoken); var entropyOfTypeGivenSubtoken = subtokenDistrPerGroup .Select(g => { return(((double)g[subtoken]) / (double)_baseSubtokenProbDist[subtoken]); }) .Where(p => p != 0) .Select(p => - p * Math.Log(p)).Sum(); typeGivenSubtokenConditionalEntropy += baseProbSubtoken * entropyOfTypeGivenSubtoken; } Debug.Assert(subtokenGivenTypeConditionalEntropy >= 0); Debug.Assert(typeGivenSubtokenConditionalEntropy >= 0); return(subtokenGivenTypeConditionalEntropy + typeGivenSubtokenConditionalEntropy); }
public void MultinomialDistribution_WithNEqual1AndSameWeights_IsUniform() { foreach (int i in new[] { 1, 2, 8, 42 }) { var density = Enumerable.Repeat(1.0, i); for (int j = 0; j < i; ++j) { var dirac = Enumerable.Repeat(0, j).Concat(new[] { 1 }).Concat(Enumerable.Repeat(0, i - j - 1)); var allCertain = MultinomialDistribution.DensityAt(dirac, density); Assert.Equal((double)1 / i, allCertain); } } }
public void BinomialDistribution_WithCertainSingleOutcome_HasCertainCombinedOutcome() { foreach (int i in new[] { 1, 2, 8, 42 }) { var allCertain = MultinomialDistribution.DensityAt(new int[] { i, 0 }, new double[] { 1.0, 0.0 }); Assert.Equal(1, allCertain); } foreach (int i in new[] { 1, 2, 8, 42 }) { var allCertain = MultinomialDistribution.DensityAt(new int[] { i, 0, 0 }, new double[] { 1.0, 0.0, 0.0 }); Assert.Equal(1, allCertain); } }
public void FitTest() { MultinomialDistribution dist = new MultinomialDistribution(7, new double[2]); double[][] observation = { new double[] { 0, 2 }, new double[] { 1, 2 }, new double[] { 5, 1 }, }; dist.Fit(observation); Assert.AreEqual(dist.Probabilities[0], 0.857142857142857, 0.000000001); Assert.AreEqual(dist.Probabilities[1], 0.714285714285714, 0.000000001); }
public MultinomialDistribution <string> GetParentNameDistribution(HashSet <LatticeNode <NodeName> > group, MultinomialDistribution <string> baseDistribution, double distanceDiscount = .9, double tolerance = 10e-4) { var distribution = new MultinomialDistribution <string>(); // Add minimally the base distribution to avoid NaNs foreach (var subtoken in baseDistribution.Elements) { distribution.Add(subtoken, (decimal)(tolerance * baseDistribution.ProbabilityOf(subtoken))); } var visited = new HashSet <LatticeNode <NodeName> >(group); var toVisit = new Stack <(LatticeNode <NodeName>, int)>(); foreach (var parentNode in group.SelectMany(n => n.Parents).Where(n => !visited.Contains(n))) { toVisit.Push((parentNode, 1)); } while (toVisit.Count > 0) { (var nextNode, var depth) = toVisit.Pop(); visited.Add(nextNode); decimal countAs = (decimal)(Math.Pow(distanceDiscount, depth)); foreach (var subtoken in nextNode.Data) { distribution.Add(subtoken, countAs); } foreach (var parentNode in nextNode.Parents.Where(n => !visited.Contains(n))) { toVisit.Push((parentNode, depth + 1)); } } return(distribution); }
public void ConstructorTest() { int numberOfTrials = 5; double[] probabilities = { 0.25, 0.75 }; // Create a new Multinomial distribution with 5 trials for 2 symbols var dist = new MultinomialDistribution(numberOfTrials, probabilities); int dimensions = dist.Dimension; // 2 double[] mean = dist.Mean; // { 1.25, 3.75 } double[] median = dist.Median; // { 1.25, 3.75 } double[] var = dist.Variance; // { -0.9375, -0.9375 } double pdf1 = dist.ProbabilityMassFunction(new[] { 2, 3 }); // 0.26367187499999994 double pdf2 = dist.ProbabilityMassFunction(new[] { 1, 4 }); // 0.3955078125 double pdf3 = dist.ProbabilityMassFunction(new[] { 5, 0 }); // 0.0009765625 double lpdf = dist.LogProbabilityMassFunction(new[] { 1, 4 }); // -0.9275847384929139 string str = dist.ToString(CultureInfo.InvariantCulture); // output is "Multinomial(x; n = 5, p = { 0.25, 0.75 })" Assert.AreEqual(1.25, mean[0]); Assert.AreEqual(3.75, mean[1]); Assert.AreEqual(1.25, median[0]); Assert.AreEqual(3.75, median[1]); Assert.AreEqual(-0.9375, var[0]); Assert.AreEqual(-0.9375, var[1]); Assert.AreEqual(0.26367187499999994, pdf1); Assert.AreEqual(0.3955078125, pdf2); Assert.AreEqual(0.0009765625, pdf3); Assert.AreEqual(-0.9275847384929139, lpdf); Assert.AreEqual("Multinomial(x; n = 5, p = { 0.25, 0.75 })", str); }
public void Test_Binomial() { Assert.AreEqual(2300, MultinomialDistribution.Binomial(25, 3)); Assert.AreEqual(12870, MultinomialDistribution.Binomial(16, 8)); }
void IVariationOfInformationComputer <NodeName> .CacheGlobalInformation(IEnumerable <LatticeNode <NodeName> > nodes) { _baseSubtokenProbDist = SubtokenProbDist(nodes.SelectMany(n => n.Data)); }
public static double ProbName(NodeName subtokens, MultinomialDistribution <string> distribution, MultinomialDistribution <string> prior = null, double dirichletAlpha = .1) => CrossEntropyNameMultinomial(subtokens.Select(s => distribution.ProbabilityOf(s, prior, dirichletAlpha)).ToArray());