Exemple #1
0
        public WittenBellProbabilityDistribution(FrequencyDistribution <TSample> freqDist, int binCount)
        {
            if (binCount <= freqDist.ObservedSamples.Count)
            {
                throw new ArgumentOutOfRangeException("binCount");
            }

            _freqDist = freqDist;
            if (freqDist.ObservedSamples.Count > 0)
            {
                int z = binCount - _freqDist.ObservedSamples.Count;
                _probZero = (double)_freqDist.ObservedSamples.Count / (z * (_freqDist.SampleOutcomeCount + _freqDist.ObservedSamples.Count));
            }
        }
Exemple #2
0
        public LidstoneProbabilityDistribution(FrequencyDistribution <TSample> freqDist, double gamma, int binCount)
        {
            if (binCount <= freqDist.ObservedSamples.Count)
            {
                throw new ArgumentOutOfRangeException("binCount");
            }
            if (double.IsInfinity(gamma) || double.IsNaN(gamma) || gamma < 0)
            {
                throw new ArgumentOutOfRangeException("gamma");
            }

            _freqDist = freqDist;
            _gamma    = gamma;
            _binCount = binCount;
            _divisor  = _freqDist.SampleOutcomeCount + (_binCount * gamma);
        }
 public void FixtureSetUp()
 {
     _fd = new FrequencyDistribution <string>();
     _fd.Increment("a", 1);
     _fd.Increment("b", 1);
     _fd.Increment("c", 2);
     _fd.Increment("d", 3);
     _fd.Increment("e", 4);
     _fd.Increment("f", 4);
     _fd.Increment("g", 4);
     _fd.Increment("h", 5);
     _fd.Increment("i", 5);
     _fd.Increment("j", 6);
     _fd.Increment("k", 6);
     _fd.Increment("l", 6);
     _fd.Increment("m", 7);
     _fd.Increment("n", 7);
     _fd.Increment("o", 8);
     _fd.Increment("p", 9);
     _fd.Increment("q", 10);
 }
Exemple #4
0
		public FrequencyDistribution(FrequencyDistribution<TSample> fd)
		{
			_sampleCounts = new Dictionary<TSample, int>(fd._sampleCounts);
			SampleOutcomeCount = fd.SampleOutcomeCount;
		}
 public MaxLikelihoodProbabilityDistribution(FrequencyDistribution <TSample> freqDist)
 {
     _freqDist = freqDist;
 }
        public SimpleGoodTuringProbabilityDistribution(FrequencyDistribution <TSample> freqDist, int binCount)
        {
            if (binCount <= freqDist.ObservedSamples.Count)
            {
                throw new ArgumentOutOfRangeException("binCount");
            }

            _freqDist = freqDist;
            _binCount = binCount;
            _probs    = new Dictionary <int, double>();

            if (freqDist.ObservedSamples.Count == 0)
            {
                return;
            }

            var r = new List <int>();
            var nr = new List <int>();
            int b = 0, i = 0;

            while (b != _freqDist.ObservedSamples.Count)
            {
                int nri = _freqDist.ObservedSamples.Count(s => _freqDist[s] == i);
                if (nri > 0)
                {
                    b += nri;
                    r.Add(i);
                    nr.Add(nri);
                }
                i++;
            }

            var zr    = new double[r.Count];
            var logr  = new double[r.Count];
            var logzr = new double[r.Count];

            for (int j = 0; j < r.Count; j++)
            {
                i = j > 0 ? r[j - 1] : 0;
                int k = j == r.Count - 1 ? 2 * r[j] - i : r[j + 1];
                zr[j]    = 2.0 * nr[j] / (k - i);
                logr[j]  = Math.Log(r[j]);
                logzr[j] = Math.Log(zr[j]);
            }

            double xycov = 0, xvar = 0, xmean = 0, ymean = 0;

            for (int j = 0; j < r.Count; j++)
            {
                xmean += logr[j];
                ymean += logzr[j];
            }
            xmean /= r.Count;
            ymean /= r.Count;
            for (int j = 0; j < logr.Length; j++)
            {
                xycov += (logr[j] - xmean) * (logzr[j] - ymean);
                xvar  += Math.Pow(logr[j] - xmean, 2);
            }
            _slope     = Math.Abs(xvar - 0) > double.Epsilon ? xycov / xvar : 0;
            _intercept = ymean - _slope * xmean;

            var rstar = new double[r.Count];

            for (int j = 0; j < r.Count; j++)
            {
                double smoothRstar = (r[j] + 1) * GetSmoothedSamplesCount(r[j] + 1) / GetSmoothedSamplesCount(r[j]);
                if (r.Count == j + 1 || r[j + 1] != r[j] + 1)
                {
                    rstar[j] = smoothRstar;
                }
                else
                {
                    double unsmoothRstar = (double)(r[j] + 1) * nr[j + 1] / nr[j];
                    double std           = Math.Sqrt(GetVariance(r[j], nr[j], nr[j + 1]));
                    if (Math.Abs(unsmoothRstar - smoothRstar) <= 1.96 * std)
                    {
                        rstar[j] = smoothRstar;
                    }
                    else
                    {
                        rstar[j] = unsmoothRstar;
                    }
                }
            }

            double samplesCountPrime = 0;

            for (int j = 0; j < r.Count; j++)
            {
                samplesCountPrime += nr[j] * rstar[j];
            }

            _probZero = (double)_freqDist.ObservedSamples.Count(s => _freqDist[s] == 1) / _freqDist.SampleOutcomeCount;
            for (int j = 0; j < r.Count; j++)
            {
                _probs[r[j]] = (1.0 - _probZero) * rstar[j] / samplesCountPrime;
            }
        }