/// <summary> /// Calculates the average type token ratio. /// </summary> /// <param name="tokens">The tokens.</param> //private void CalculateAverageTypeTokenRatio(FrequencyResults returnValue, Token[] tokens) //{ // List<decimal> TTRValues = new List<decimal>(); // for (int x = 0; x < tokens.Length - returnValue.WindowSize; ++x) // { // var TempWordCount = new SortedDictionary<string, int>(); // for (int y = 0; y < returnValue.WindowSize; ++y) // { // var CurrentToken = tokens[x + y]; // var TempWord = (CurrentToken.StemmedValue ?? CurrentToken.Value).ToLower(); // if (TempWordCount.ContainsKey(TempWord)) // { // ++TempWordCount[TempWord]; // } // else // { // TempWordCount.Add(TempWord, 1); // } // } // TTRValues.Add((decimal)TempWordCount.Keys.Count / returnValue.WindowSize); // } // if (TTRValues.Count > 0) // returnValue.AverageTypeTokenRatio = TTRValues.Average(); //} /// <summary> /// Calculates the term frequency. /// </summary> /// <param name="returnValue">The return value.</param> private void CalculateTermFrequency(FrequencyResults returnValue) { foreach (var Key in returnValue.WordCount.Keys) { returnValue.TermFrequency.Add(Key, returnValue.WordCount[Key] / (double)returnValue.NumberOfWords); } }
/// <summary> /// Analyzes the specified tokens. /// </summary> /// <param name="tokens">The tokens.</param> /// <param name="windowSize">Size of the window.</param> /// <returns>The frequency results</returns> public FrequencyResults Analyze(IEnumerable <Token> tokens, int windowSize) { var ReturnValue = new FrequencyResults(); ReturnValue.WindowSize = windowSize <= 0 ? 1 : windowSize; var WordTokens = tokens.Where(x => x.TokenType == TokenType.Word || x.TokenType == TokenType.Abbreviation).ToArray(); CalculateWordCount(WordTokens, ReturnValue.WordCount); ReturnValue.NumberOfWords = WordTokens.Length; ReturnValue.NumberOfTypes = ReturnValue.WordCount.Keys.Count; //CalculateAverageTypeTokenRatio(ReturnValue, WordTokens); CalculateTermFrequency(ReturnValue); return(ReturnValue); }