Beispiel #1
0
        public static List <TagCloudEntry> BuildTagCloud(WebLibraryDetail web_library_detail, PDFDocument pdf_document)
        {
            int MAX_PAGE_LIMIT = 20;

            AITags ai_tags = pdf_document.LibraryRef.Xlibrary.AITagManager.AITags;

            HashSet <string> autotags = ai_tags.GetTagsWithDocument(pdf_document.Fingerprint);

            foreach (var tag in TagTools.ConvertTagBundleToTags(pdf_document.Tags))
            {
                autotags.Add(tag);
            }


            CountingDictionary <string> word_counts = new CountingDictionary <string>();

            {
                Logging.Info("+Counting the autotags");
                int total_tags = 0;

                for (int page = 1; page <= pdf_document.PageCount && page < MAX_PAGE_LIMIT; ++page)
                {
                    string page_text = pdf_document.GetFullOCRText(page);
                    foreach (string autotag in autotags)
                    {
                        int word_count = StringTools.CountStringOccurence(page_text, autotag);
                        if (0 < word_count)
                        {
                            ++total_tags;
                            word_counts.TallyOne(autotag);
                        }
                    }
                }
                Logging.Info("-Counting the autotags: total_occurences={0} unique_tags={1}", total_tags, word_counts.Count);
            }

            Logging.Info("+Building the ratios");
            List <TagCloudEntry> entries = new List <TagCloudEntry>();

            foreach (var pair in word_counts)
            {
                int document_count = ai_tags.GetTagCount(pair.Key) + 1;

                // Limit the wordcount to cull the hyperfrequent words
                int word_count = pair.Value;

                TagCloudEntry entry = new TagCloudEntry();
                entry.word           = pair.Key;
                entry.word_count     = word_count;
                entry.document_count = document_count;
                entry.importance     = word_count / (double)document_count;

                entries.Add(entry);
            }
            Logging.Info("-Building the ratios");

            entries.Sort(delegate(TagCloudEntry a, TagCloudEntry b) { return(-Sorting.Compare(a.importance, b.importance)); });
            return(entries);
        }
        private void text_block_word_MouseUp(object sender, MouseButtonEventArgs e)
        {
            if (null != TagClick)
            {
                FeatureTrackingManager.Instance.UseFeature(Features.Document_TagCloud);

                // If they are not holding down CTRL, clear down the already selected items
                if (!KeyboardTools.IsCTRLDown())
                {
                    foreach (var tag in entries)
                    {
                        tag.selected = false;
                    }

                    foreach (TextBlock text_block_word in TagPanel.Children)
                    {
                        text_block_word.FontWeight = FontWeights.Normal;
                    }
                }

                {
                    // Find the clicked tag
                    TextBlock     text_block_word = (TextBlock)sender;
                    TagCloudEntry entry           = (TagCloudEntry)text_block_word.Tag;
                    entry.selected             = !entry.selected;
                    text_block_word.FontWeight = entry.selected ? FontWeights.Bold : FontWeights.Normal;
                }

                // Get all the selected tags
                List <string> tags_selected = new List <string>();
                {
                    if (null != entries)
                    {
                        foreach (var tag in entries)
                        {
                            if (tag.selected)
                            {
                                tags_selected.Add(tag.word);
                            }
                        }
                    }
                }

                TagClick(tags_selected);
            }
        }