private double GetBeta(string[] ngram) { string[] shortGram = ngram.GetNGram(0, ngram.Length - 1); double beta; if (_betaCache.TryFindValue(shortGram, out beta) && beta != 0) { return(beta); } double a = 0.0, b = 0.0; foreach (var item in _referenceSource.GetAllSegments()) { ngram[ngram.Length - 1] = item; // replace last segment int frequencyA = _referenceSource.FindOccurrences(ngram); if (frequencyA > 0) { var bNGram = ngram.GetNGram(1, ngram.Length - 1); int frequencyB = _referenceSource.FindOccurrences(bNGram); a += GetProbabilityIfPresent(ngram, frequencyA); b += GetProbabilityIfPresent(bNGram, frequencyB); } } beta = (1.0 - a) / (1.0 - b); _betaCache.Increment(shortGram, beta); return(beta); }
public void Init(TextSource referenceSource, int n) { if (_initPassed) { return; } _initPassed = true; if (n < 1) { _factor = 1; return; } int n1 = 0; IEnumerable <NGramCache> nGrams = referenceSource.GetNGramCache().NextSegment.Values; while (n > 1) { n--; nGrams = nGrams.SelectMany(el => el.NextSegment.Values); } foreach (var item in nGrams) { var frequency = Convert.ToInt32(item.Value); if (frequency == 1) { n1++; } } _factor = 1.0 - ((double)n1 / (double)referenceSource.GetAllSegments().Count()); }