public static List <TagCloudEntry> BuildTagCloud(WebLibraryDetail web_library_detail, PDFDocument pdf_document) { int MAX_PAGE_LIMIT = 20; AITags ai_tags = pdf_document.LibraryRef.Xlibrary.AITagManager.AITags; HashSet <string> autotags = ai_tags.GetTagsWithDocument(pdf_document.Fingerprint); foreach (var tag in TagTools.ConvertTagBundleToTags(pdf_document.Tags)) { autotags.Add(tag); } CountingDictionary <string> word_counts = new CountingDictionary <string>(); { Logging.Info("+Counting the autotags"); int total_tags = 0; for (int page = 1; page <= pdf_document.PageCount && page < MAX_PAGE_LIMIT; ++page) { string page_text = pdf_document.GetFullOCRText(page); foreach (string autotag in autotags) { int word_count = StringTools.CountStringOccurence(page_text, autotag); if (0 < word_count) { ++total_tags; word_counts.TallyOne(autotag); } } } Logging.Info("-Counting the autotags: total_occurences={0} unique_tags={1}", total_tags, word_counts.Count); } Logging.Info("+Building the ratios"); List <TagCloudEntry> entries = new List <TagCloudEntry>(); foreach (var pair in word_counts) { int document_count = ai_tags.GetTagCount(pair.Key) + 1; // Limit the wordcount to cull the hyperfrequent words int word_count = pair.Value; TagCloudEntry entry = new TagCloudEntry(); entry.word = pair.Key; entry.word_count = word_count; entry.document_count = document_count; entry.importance = word_count / (double)document_count; entries.Add(entry); } Logging.Info("-Building the ratios"); entries.Sort(delegate(TagCloudEntry a, TagCloudEntry b) { return(-Sorting.Compare(a.importance, b.importance)); }); return(entries); }
private void text_block_word_MouseUp(object sender, MouseButtonEventArgs e) { if (null != TagClick) { FeatureTrackingManager.Instance.UseFeature(Features.Document_TagCloud); // If they are not holding down CTRL, clear down the already selected items if (!KeyboardTools.IsCTRLDown()) { foreach (var tag in entries) { tag.selected = false; } foreach (TextBlock text_block_word in TagPanel.Children) { text_block_word.FontWeight = FontWeights.Normal; } } { // Find the clicked tag TextBlock text_block_word = (TextBlock)sender; TagCloudEntry entry = (TagCloudEntry)text_block_word.Tag; entry.selected = !entry.selected; text_block_word.FontWeight = entry.selected ? FontWeights.Bold : FontWeights.Normal; } // Get all the selected tags List <string> tags_selected = new List <string>(); { if (null != entries) { foreach (var tag in entries) { if (tag.selected) { tags_selected.Add(tag.word); } } } } TagClick(tags_selected); } }