public void TestRandomProbability() { var random = new Random(1337); var tau = 200; var softmax = new Softmax(tau, random); var qValue = new QValue(new double[] { 121, 231, 425, 676 }); var bestAction = PolicyHelpers.SelectMax(qValue, random); var numSelected = new TestInstance[qValue.Count]; for (int i = 0; i < qValue.Count; i++) { numSelected[i] = new TestInstance() { Action = i }; } int numTests = 3000; for (int i = 0; i < numTests; i++) { int action = softmax.Select(qValue); numSelected[action].Count++; } numSelected = numSelected.OrderBy(x => x.Count).ToArray(); Assert.AreEqual(0, numSelected[0].Action); Assert.AreEqual(1, numSelected[1].Action); Assert.AreEqual(2, numSelected[2].Action); Assert.AreEqual(3, numSelected[3].Action); }