public WittenBellProbabilityDistribution(FrequencyDistribution <TSample> freqDist, int binCount) { if (binCount <= freqDist.ObservedSamples.Count) { throw new ArgumentOutOfRangeException("binCount"); } _freqDist = freqDist; if (freqDist.ObservedSamples.Count > 0) { int z = binCount - _freqDist.ObservedSamples.Count; _probZero = (double)_freqDist.ObservedSamples.Count / (z * (_freqDist.SampleOutcomeCount + _freqDist.ObservedSamples.Count)); } }
public LidstoneProbabilityDistribution(FrequencyDistribution <TSample> freqDist, double gamma, int binCount) { if (binCount <= freqDist.ObservedSamples.Count) { throw new ArgumentOutOfRangeException("binCount"); } if (double.IsInfinity(gamma) || double.IsNaN(gamma) || gamma < 0) { throw new ArgumentOutOfRangeException("gamma"); } _freqDist = freqDist; _gamma = gamma; _binCount = binCount; _divisor = _freqDist.SampleOutcomeCount + (_binCount * gamma); }
public void FixtureSetUp() { _fd = new FrequencyDistribution <string>(); _fd.Increment("a", 1); _fd.Increment("b", 1); _fd.Increment("c", 2); _fd.Increment("d", 3); _fd.Increment("e", 4); _fd.Increment("f", 4); _fd.Increment("g", 4); _fd.Increment("h", 5); _fd.Increment("i", 5); _fd.Increment("j", 6); _fd.Increment("k", 6); _fd.Increment("l", 6); _fd.Increment("m", 7); _fd.Increment("n", 7); _fd.Increment("o", 8); _fd.Increment("p", 9); _fd.Increment("q", 10); }
public FrequencyDistribution(FrequencyDistribution<TSample> fd) { _sampleCounts = new Dictionary<TSample, int>(fd._sampleCounts); SampleOutcomeCount = fd.SampleOutcomeCount; }
public MaxLikelihoodProbabilityDistribution(FrequencyDistribution <TSample> freqDist) { _freqDist = freqDist; }
public SimpleGoodTuringProbabilityDistribution(FrequencyDistribution <TSample> freqDist, int binCount) { if (binCount <= freqDist.ObservedSamples.Count) { throw new ArgumentOutOfRangeException("binCount"); } _freqDist = freqDist; _binCount = binCount; _probs = new Dictionary <int, double>(); if (freqDist.ObservedSamples.Count == 0) { return; } var r = new List <int>(); var nr = new List <int>(); int b = 0, i = 0; while (b != _freqDist.ObservedSamples.Count) { int nri = _freqDist.ObservedSamples.Count(s => _freqDist[s] == i); if (nri > 0) { b += nri; r.Add(i); nr.Add(nri); } i++; } var zr = new double[r.Count]; var logr = new double[r.Count]; var logzr = new double[r.Count]; for (int j = 0; j < r.Count; j++) { i = j > 0 ? r[j - 1] : 0; int k = j == r.Count - 1 ? 2 * r[j] - i : r[j + 1]; zr[j] = 2.0 * nr[j] / (k - i); logr[j] = Math.Log(r[j]); logzr[j] = Math.Log(zr[j]); } double xycov = 0, xvar = 0, xmean = 0, ymean = 0; for (int j = 0; j < r.Count; j++) { xmean += logr[j]; ymean += logzr[j]; } xmean /= r.Count; ymean /= r.Count; for (int j = 0; j < logr.Length; j++) { xycov += (logr[j] - xmean) * (logzr[j] - ymean); xvar += Math.Pow(logr[j] - xmean, 2); } _slope = Math.Abs(xvar - 0) > double.Epsilon ? xycov / xvar : 0; _intercept = ymean - _slope * xmean; var rstar = new double[r.Count]; for (int j = 0; j < r.Count; j++) { double smoothRstar = (r[j] + 1) * GetSmoothedSamplesCount(r[j] + 1) / GetSmoothedSamplesCount(r[j]); if (r.Count == j + 1 || r[j + 1] != r[j] + 1) { rstar[j] = smoothRstar; } else { double unsmoothRstar = (double)(r[j] + 1) * nr[j + 1] / nr[j]; double std = Math.Sqrt(GetVariance(r[j], nr[j], nr[j + 1])); if (Math.Abs(unsmoothRstar - smoothRstar) <= 1.96 * std) { rstar[j] = smoothRstar; } else { rstar[j] = unsmoothRstar; } } } double samplesCountPrime = 0; for (int j = 0; j < r.Count; j++) { samplesCountPrime += nr[j] * rstar[j]; } _probZero = (double)_freqDist.ObservedSamples.Count(s => _freqDist[s] == 1) / _freqDist.SampleOutcomeCount; for (int j = 0; j < r.Count; j++) { _probs[r[j]] = (1.0 - _probZero) * rstar[j] / samplesCountPrime; } }