private double CalculateChiSquared(string word, TextDocument[] documents, string tag) { var cacheKey = string.Format("{0}:{1}", word, tag); if (cache.ContainsKey(cacheKey)) { return cache[cacheKey]; } var N = (double)documents.Length; var A = (double)documents.WhereMarkedWith(tag).WhereContains(word).Count() + 0.5; var B = (double)documents.WhereNotMarkedWith(tag).WhereContains(word).Count() + 0.5; var C = (double)documents.WhereMarkedWith(tag).WhereNotContains(word).Count() + 0.5; var D = (double)documents.WhereNotMarkedWith(tag).WhereNotContains(word).Count() + 0.5; return cache[cacheKey] = N * (A * D - C * B) * (A * D - C * B) / ((A + C) * (B + D) * (A + B) * (C + D)); }
private void PrintStats(string message, TextDocument[] textDocument, string targetTag) { var positive = textDocument.WhereMarkedWith(targetTag).Count(); var negative = textDocument.WhereNotMarkedWith(targetTag).Count(); Console.WriteLine("{0} Pos = {1}, Neg = {2}", message, positive, negative); }