private void PrintNormalizedTagsStats(TextDocument[] documents)
        {
            var tags = documents.SelectMany(x => x.Tags).Distinct().ToArray();
            var stats = tags.Select(tag => new TagStatistics
            {
                Name = tag,
                Count = documents.Count(p => p.Tags.Contains(tag))
            }).ToArray();

            Logger.Log("Normalized tags stats");
            foreach (var tag in stats.OrderByDescending(x => x.Count))
            {
                Logger.Log("Tag = {0}, count = {1}", tag.Name, tag.Count);
            }
            PrintDelimeter();
        }
        public FeatureSelectionResult Select(TextDocument[] documents, FeatureSelectionParams featureSelectionParams)
        {
            var words = documents.SelectMany(p => p.Words).Distinct().ToArray();
            var documentsCount = documents.Where(d => d.Tags.Contains(featureSelectionParams.TargetTag)).GetDocumentsCountByWord();

            var featuredWords = words
                .Where(documentsCount.ContainsKey)
                .Select(w => new WeightedWord { Word = w, Metric = documentsCount[w] })
                .OrderByDescending(w => w.Metric)
                .Take(words.Length * featureSelectionParams.UpperBoundPercent / 100)
                .Skip(words.Length * featureSelectionParams.LowerBoundPercent / 100)
                .ToArray();

            return new FeatureSelectionResult
            {
                FeaturedWords = featuredWords
            };
        }
        public FeatureSelectionResult Select(TextDocument[] documents, FeatureSelectionParams featureSelectionParams)
        {
            cache = new Dictionary<string, double>();
            var words = documents.SelectMany(p => p.Words).Distinct().ToArray();
            var documentsCount = documents.GetDocumentsCountByWord();
            words = words.Where(w => documentsCount.ContainsKey(w) && documentsCount[w] >= featureSelectionParams.MinDocCount && documentsCount[w] <= featureSelectionParams.MaxDocCount).ToArray();

            var featuredWords = words
                .Select(w => new WeightedWord { Word = w, Metric = CalculateChiSquared(w, documents, featureSelectionParams.TargetTag) })
                .OrderByDescending(w => w.Metric)
                .Take(words.Length * featureSelectionParams.UpperBoundPercent / 100)
                .Skip(words.Length * featureSelectionParams.LowerBoundPercent / 100)
                .ToArray();
            return new FeatureSelectionResult
            {
                FeaturedWords = featuredWords
            };
        }