public void Process() { var words = wordSource.GetWords(); var statistics = new OccurrenceStatistics(words); var grammarInfo = grammarInfoParser.GetGrammarInfo(statistics.OccurrenceCount.Keys); statistics = grammarFormJoiner.Join(statistics, grammarInfo); IEnumerable<string> filteredWords = statistics.OccurrenceCount.Keys; foreach (var filter in wordFilters) filteredWords = filter.Filter(filteredWords, grammarInfo); var filteredWordsSet = new HashSet<string>(filteredWords); var orderedRatings = statistics.OccurrenceCount .Select(pair => new WordRating(pair.Key, pair.Value)) .Where(item => filteredWordsSet.Contains(item.Word)) .OrderByDescending(item => item.OccurencesCount) .Take(wordCount) .ToArray(); var wordRectangles = fontManager.GenerateFonts(orderedRatings); var cloudScheme = cloudGenerator.Generate(wordRectangles); var coloredCloudScheme = colorManager.GenerateColors(cloudScheme); cloudRenderer.Render(coloredCloudScheme); }
public OccurrenceStatistics Join(OccurrenceStatistics statistics, IReadOnlyDictionary<string, WordGrammarInfo> grammarInfo) { var wordsGroupedByInitialForm = statistics.OccurrenceCount.Keys .Where(grammarInfo.ContainsKey) .GroupBy(word => grammarInfo[word].InitialForm); return new OccurrenceStatistics(wordsGroupedByInitialForm .Select(wordForms => new { MostCommonForm = wordForms .OrderByDescending(form => statistics.OccurrenceCount[form]) .First(), InitialForm = wordForms.Key, TotalCount = wordForms.Select(form => statistics.OccurrenceCount[form]).Sum() }) .ToDictionary(item => item.MostCommonForm, item => item.TotalCount)); }
public void Join_selectsMostCommonForm() { var filter = new GrammarFormJoiner(); var statistics = new OccurrenceStatistics(new Dictionary<string, int> { {"активный", 10}, {"активное", 20}, {"команд", 40}, }); var grammarInfo = new Dictionary<string, WordGrammarInfo> { {"активный", new WordGrammarInfo("активный", PartOfSpeech.Adjective)}, {"активное", new WordGrammarInfo("активный", PartOfSpeech.Adjective)}, {"команд", new WordGrammarInfo("команда", PartOfSpeech.Noun)}, }; statistics = filter.Join(statistics, grammarInfo); statistics.OccurrenceCount.Keys.Should().BeEquivalentTo("активное", "команд"); }
public void Join_mergesGrammarForms() { var filter = new GrammarFormJoiner(); var statistics = new OccurrenceStatistics(new Dictionary<string, int> { {"активный", 10}, {"активное", 20}, {"команд", 40}, }); var grammarInfo = new Dictionary<string, WordGrammarInfo> { {"активный", new WordGrammarInfo("активный", PartOfSpeech.Adjective)}, {"активное", new WordGrammarInfo("активный", PartOfSpeech.Adjective)}, {"команд", new WordGrammarInfo("команда", PartOfSpeech.Noun)}, }; statistics = filter.Join(statistics, grammarInfo); statistics.OccurrenceCount.Keys.Should().HaveCount(2) .And.Contain(key => grammarInfo[key].InitialForm == "активный" && statistics.OccurrenceCount[key] == 30) .And.Contain(key => grammarInfo[key].InitialForm == "команда" && statistics.OccurrenceCount[key] == 40); }