Exemplo n.º 1
0
        // -----------------------------

        internal static MultiMapSet <string, string> GetNodeItems(Library library, HashSet <string> parent_fingerprints)
        {
            Logging.Info("+Getting node items for " + "Authors");

            List <PDFDocument> pdf_documents = null;

            if (null == parent_fingerprints)
            {
                pdf_documents = library.PDFDocuments;
            }
            else
            {
                pdf_documents = library.GetDocumentByFingerprints(parent_fingerprints);
            }

            MultiMapSet <string, string> tags_with_fingerprints = new MultiMapSet <string, string>();

            foreach (PDFDocument pdf_document in pdf_documents)
            {
                List <NameTools.Name> names = SimilarAuthors.GetAuthorsForPDFDocument(pdf_document);
                foreach (NameTools.Name name in names)
                {
                    tags_with_fingerprints.Add(name.last_name, pdf_document.Fingerprint);
                }
            }

            Logging.Info("-Getting node items");
            return(tags_with_fingerprints);
        }
Exemplo n.º 2
0
        private void ExpandDocuments()
        {
            FeatureTrackingManager.Instance.UseFeature(Features.Brainstorm_ExploreLibrary_Author_Documents);

            List <PDFDocument> pdf_documents = SimilarAuthors.GetDocumentsBySameAuthorsSurnameAndInitial(pdf_author_node_content.LibraryRef, pdf_author_node_content.Surname, pdf_author_node_content.Initial);

            foreach (PDFDocument pdf_document in pdf_documents)
            {
                PDFDocumentNodeContent content = new PDFDocumentNodeContent(pdf_document.Fingerprint, pdf_document.LibraryRef.Id);
                NodeControlAddingByKeyboard.AddChildToNodeControl(node_control, content, false);
            }
        }
Exemplo n.º 3
0
        private static void DoInterestingAnalysis_SimilarAuthors(PDFReadingControl pdf_reading_control, PDFDocument pdf_document)
        {
            // Populate the similar authors
            try
            {
                List <NameTools.Name>          authors           = SimilarAuthors.GetAuthorsForPDFDocument(pdf_document);
                MultiMap <string, PDFDocument> authors_documents = SimilarAuthors.GetDocumentsBySameAuthors(pdf_document.LibraryRef, pdf_document, authors);

                WPFDoEvents.InvokeAsyncInUIThread(() => {
                    pdf_reading_control.SimilarAuthorsControl.SetItems(authors_documents);
                });
            }
            catch (Exception ex)
            {
                Logging.Error(ex, "There was a problem creating the tag cloud for document {0}", pdf_document.Fingerprint);
            }
        }
        private static void DoInterestingAnalysis_SimilarAuthors(PDFReadingControl pdf_reading_control, PDFRendererControl pdf_renderer_control, PDFRendererControlStats pdf_renderer_control_stats)
        {
            // Populate the similar authors
            try
            {
                List <NameTools.Name>          authors           = SimilarAuthors.GetAuthorsForPDFDocument(pdf_renderer_control_stats.pdf_document);
                MultiMap <string, PDFDocument> authors_documents = SimilarAuthors.GetDocumentsBySameAuthors(pdf_renderer_control_stats.pdf_document.Library, pdf_renderer_control_stats.pdf_document, authors);

                pdf_renderer_control.Dispatcher.Invoke(new Action(() =>
                {
                    pdf_reading_control.SimilarAuthorsControl.SetItems(authors_documents);
                }
                                                                  ));
            }
            catch (Exception ex)
            {
                Logging.Error(ex, "There was a problem creating the tag cloud for document {0}", pdf_renderer_control_stats.pdf_document.Fingerprint);
            }
        }
Exemplo n.º 5
0
        public static int FindCitations(PDFDocument pdf_document)
        {
            int total_found = 0;

            string target_title = GenerateTextOnlyTitle(pdf_document.TitleCombined);

            List <NameTools.Name> names = SimilarAuthors.GetAuthorsForPDFDocument(pdf_document);

            if (0 < names.Count)
            {
                // Look for all other docs that mention this author
                string author_query = null;
                {
                    StringBuilder sb = new StringBuilder();
                    for (int i = 0; i < 3 && i < names.Count; ++i)
                    {
                        sb.AppendFormat("+{0} ", names[i].last_name);
                    }
                    author_query = sb.ToString();
                }

                StatusManager.Instance.UpdateStatusBusy("CitationDocumentFinder", String.Format("Looking for new citations in \"{0}\" by \"{1}\"", pdf_document.TitleCombined, author_query));

                List <IndexPageResult> index_page_results_with_author = LibrarySearcher.FindAllPagesMatchingQuery(pdf_document.Library, author_query);
                Logging.Info("  **** We have {0} documents matching {1}", index_page_results_with_author.Count, author_query);
                foreach (var index_page_result in index_page_results_with_author)
                {
                    try
                    {
                        string fingerprint = index_page_result.fingerprint;

                        // Check that the other one exists
                        PDFDocument pdf_document_other = pdf_document.Library.GetDocumentByFingerprint(fingerprint);
                        if (null == pdf_document_other || !pdf_document_other.DocumentExists)
                        {
                            continue;
                        }

                        // Let's not work on the same document
                        if (pdf_document.Fingerprint == pdf_document_other.Fingerprint)
                        {
                            continue;
                        }

                        // Lets not do work that has already been done before...
                        {
                            bool already_found = true;
                            already_found = already_found && pdf_document.PDFDocumentCitationManager.ContainsInboundCitation(pdf_document_other.Fingerprint);
                            already_found = already_found && pdf_document_other.PDFDocumentCitationManager.ContainsOutboundCitation(pdf_document.Fingerprint);
                            if (already_found)
                            {
                                Logging.Info("Skipping check for citation from {0} to {1} because we know it already.", pdf_document_other.Fingerprint, pdf_document.Fingerprint);
                                continue;
                            }
                        }

                        // Now search each page for the title of the paper
                        foreach (PageResult page_result in index_page_result.page_results)
                        {
                            // Don't process the metadata "page"
                            if (0 == page_result.page)
                            {
                                continue;
                            }

                            WordList word_list_page = pdf_document_other.PDFRenderer.GetOCRText(page_result.page);
                            if (null != word_list_page)
                            {
                                StringBuilder sb = new StringBuilder();
                                foreach (var word in word_list_page)
                                {
                                    sb.Append(word.Text);
                                }
                                string text_to_search_for_title = GenerateTextOnlyTitle(sb.ToString());

                                // If we have a match, record it!
                                if (text_to_search_for_title.Contains(target_title))
                                {
                                    pdf_document.PDFDocumentCitationManager.AddInboundCitation(pdf_document_other.Fingerprint);
                                    pdf_document_other.PDFDocumentCitationManager.AddOutboundCitation(pdf_document.Fingerprint);
                                    ++total_found;

                                    break;
                                }
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        Logging.Warn(ex, "There was a problem during citation finding while processing one of the matching author documents.");
                    }
                }
            }

            StatusManager.Instance.UpdateStatus("CitationDocumentFinder", String.Format("Found {0} new citations of '{1}'", total_found, pdf_document.TitleCombined));

            return(total_found);
        }