private double GetBeta(string[] ngram)
        {
            string[] shortGram = ngram.GetNGram(0, ngram.Length - 1);

            double beta;

            if (_betaCache.TryFindValue(shortGram, out beta) && beta != 0)
            {
                return(beta);
            }

            double a = 0.0, b = 0.0;

            foreach (var item in _referenceSource.GetAllSegments())
            {
                ngram[ngram.Length - 1] = item; // replace last segment
                int frequencyA = _referenceSource.FindOccurrences(ngram);

                if (frequencyA > 0)
                {
                    var bNGram     = ngram.GetNGram(1, ngram.Length - 1);
                    int frequencyB = _referenceSource.FindOccurrences(bNGram);

                    a += GetProbabilityIfPresent(ngram, frequencyA);
                    b += GetProbabilityIfPresent(bNGram, frequencyB);
                }
            }

            beta = (1.0 - a) / (1.0 - b);
            _betaCache.Increment(shortGram, beta);
            return(beta);
        }
Ejemplo n.º 2
0
        public void Init(TextSource referenceSource, int n)
        {
            if (_initPassed)
            {
                return;
            }
            _initPassed = true;

            if (n < 1)
            {
                _factor = 1;
                return;
            }

            int n1 = 0;

            IEnumerable <NGramCache> nGrams = referenceSource.GetNGramCache().NextSegment.Values;

            while (n > 1)
            {
                n--;
                nGrams = nGrams.SelectMany(el => el.NextSegment.Values);
            }

            foreach (var item in nGrams)
            {
                var frequency = Convert.ToInt32(item.Value);
                if (frequency == 1)
                {
                    n1++;
                }
            }

            _factor = 1.0 - ((double)n1 / (double)referenceSource.GetAllSegments().Count());
        }