private void word_connector_ContextChanged_BACKGROUND_FindRecommendations()
        {
            while (true)
            {
                // Get the next context to search for, and if there is none, then exit the background thread
                string context;

                Utilities.LockPerfTimer l1_clk = Utilities.LockPerfChecker.Start();
                lock (context_thread_lock)
                {
                    l1_clk.LockPerfTimerStop();
                    context = context_thread_next_context;
                    context_thread_next_context = null;

                    if (null == context)
                    {
                        context_thread_running = false;
                        break;
                    }
                }

                // Now that we have the context, do the query
                List <PDFDocument> context_pdf_documents = new List <PDFDocument>();
                {
                    WebLibraryDetail web_library_detail = this.web_library_detail;
                    if (null != web_library_detail)
                    {
                        string context_search_string = PolishContextForLucene(context);
                        context_search_string = context_search_string.Trim();
                        if (!String.IsNullOrEmpty(context_search_string))
                        {
                            List <IndexResult> fingerprints = LibrarySearcher.FindAllFingerprintsMatchingQuery(web_library_detail.library, context_search_string);
                            if (null != fingerprints && 0 != fingerprints.Count)
                            {
                                foreach (var fingerprint in fingerprints)
                                {
                                    if (20 <= context_pdf_documents.Count)
                                    {
                                        break;
                                    }

                                    PDFDocument pdf_document = web_library_detail.library.GetDocumentByFingerprint(fingerprint.fingerprint);
                                    if (null != pdf_document)
                                    {
                                        context_pdf_documents.Add(pdf_document);
                                    }
                                }
                            }
                        }
                    }
                }

                // And get the GUI to update with the results
                Dispatcher.BeginInvoke(new Action(() =>
                {
                    word_connector_ContextChanged_BACKGROUND_PopulateRecommendations(context_pdf_documents);
                }
                                                  ), DispatcherPriority.Background);
            }
        }
Exemplo n.º 2
0
        public void DoSearch()
        {
            string query = SearchQuick.Text;

            // Do we have anything to do?
            if (String.IsNullOrEmpty(query))
            {
                SystemSounds.Beep.Play();
                return;
            }

            // Search each library
            List <CombinedSearchResultItem> results = new List <CombinedSearchResultItem>();

            foreach (WebLibraryDetail web_library_detail in WebLibraryManager.Instance.WebLibraryDetails_WorkingWebLibraries)
            {
                Logging.Info("Searching library {0}", web_library_detail.Title);

                Library library = web_library_detail.Xlibrary;

                List <IndexResult> index_results = LibrarySearcher.FindAllFingerprintsMatchingQuery(web_library_detail, query);

                foreach (IndexResult index_result in index_results)
                {
                    PDFDocument pdf_document = library.GetDocumentByFingerprint(index_result.fingerprint);
                    if (null != pdf_document)
                    {
                        CombinedSearchResultItem result = new CombinedSearchResultItem
                        {
                            fingerprint  = index_result.fingerprint,
                            score        = index_result.score,
                            pdf_document = pdf_document
                        };
                        results.Add(result);
                    }
                    else
                    {
                        Logging.Debug特("Received a null document from library search?! (Fingerprint: {0})", index_result.fingerprint);
                    }
                }
            }

            // Sort the results
            results.Sort(delegate(CombinedSearchResultItem p1, CombinedSearchResultItem p2) { return(-Sorting.Compare(p1.score, p2.score)); });

            // Create the ordered results
            List <PDFDocument>          pdf_documents = new List <PDFDocument>();
            Dictionary <string, double> search_scores = new Dictionary <string, double>();

            foreach (CombinedSearchResultItem result in results)
            {
                pdf_documents.Add(result.pdf_document);
                search_scores[result.fingerprint] = result.score;
            }

            ObjLibraryCatalog.SetPDFDocuments(pdf_documents, null, query, search_scores);
        }
        internal void ExecuteSearchQuick(string query)
        {
            ASSERT.Test(query != null);
            SearchQuick.Text = query;

            if (!String.IsNullOrEmpty(query))
            {
                FeatureTrackingManager.Instance.UseFeature(Features.Library_KeywordFilter);

                List <IndexResult> index_results = LibrarySearcher.FindAllFingerprintsMatchingQuery(library_filter_control.web_library_detail, query);

                library_filter_control.search_quick_query        = query;
                library_filter_control.search_quick_scores       = new Dictionary <string, double>();
                library_filter_control.search_quick_fingerprints = new HashSet <string>();
                foreach (var index_result in index_results)
                {
                    library_filter_control.search_quick_fingerprints.Add(index_result.fingerprint);
                    library_filter_control.search_quick_scores[index_result.fingerprint] = index_result.score;
                }
            }
            else
            {
                library_filter_control.search_quick_query        = null;
                library_filter_control.search_quick_fingerprints = null;
                library_filter_control.search_quick_scores       = null;
            }

            // Create the feedback
            library_filter_control.search_quick_fingerprints_span = new Span();
            Bold bold = new Bold();

            bold.Inlines.Add("Search");
            library_filter_control.search_quick_fingerprints_span.Inlines.Add(bold);
            library_filter_control.search_quick_fingerprints_span.Inlines.Add(" (click search score for details)");
            library_filter_control.search_quick_fingerprints_span.Inlines.Add(": ");
            library_filter_control.search_quick_fingerprints_span.Inlines.Add("'");
            library_filter_control.search_quick_fingerprints_span.Inlines.Add(query);
            library_filter_control.search_quick_fingerprints_span.Inlines.Add("'");
            library_filter_control.search_quick_fingerprints_span.Inlines.Add(LibraryFilterHelpers.GetClearImageInline("Clear this filter.", hyperlink_search_quick_fingerprints_span_OnClick));
        }
Exemplo n.º 4
0
        public static int FindCitations(PDFDocument pdf_document)
        {
            int total_found = 0;

            string target_title = GenerateTextOnlyTitle(pdf_document.TitleCombined);

            List <NameTools.Name> names = SimilarAuthors.GetAuthorsForPDFDocument(pdf_document);

            if (0 < names.Count)
            {
                // Look for all other docs that mention this author
                string author_query = null;
                {
                    StringBuilder sb = new StringBuilder();
                    for (int i = 0; i < 3 && i < names.Count; ++i)
                    {
                        sb.AppendFormat("+{0} ", names[i].last_name);
                    }
                    author_query = sb.ToString();
                }

                StatusManager.Instance.UpdateStatusBusy("CitationDocumentFinder", String.Format("Looking for new citations in \"{0}\" by \"{1}\"", pdf_document.TitleCombined, author_query));

                List <IndexPageResult> index_page_results_with_author = LibrarySearcher.FindAllPagesMatchingQuery(pdf_document.Library, author_query);
                Logging.Info("  **** We have {0} documents matching {1}", index_page_results_with_author.Count, author_query);
                foreach (var index_page_result in index_page_results_with_author)
                {
                    try
                    {
                        string fingerprint = index_page_result.fingerprint;

                        // Check that the other one exists
                        PDFDocument pdf_document_other = pdf_document.Library.GetDocumentByFingerprint(fingerprint);
                        if (null == pdf_document_other || !pdf_document_other.DocumentExists)
                        {
                            continue;
                        }

                        // Let's not work on the same document
                        if (pdf_document.Fingerprint == pdf_document_other.Fingerprint)
                        {
                            continue;
                        }

                        // Lets not do work that has already been done before...
                        {
                            bool already_found = true;
                            already_found = already_found && pdf_document.PDFDocumentCitationManager.ContainsInboundCitation(pdf_document_other.Fingerprint);
                            already_found = already_found && pdf_document_other.PDFDocumentCitationManager.ContainsOutboundCitation(pdf_document.Fingerprint);
                            if (already_found)
                            {
                                Logging.Info("Skipping check for citation from {0} to {1} because we know it already.", pdf_document_other.Fingerprint, pdf_document.Fingerprint);
                                continue;
                            }
                        }

                        // Now search each page for the title of the paper
                        foreach (PageResult page_result in index_page_result.page_results)
                        {
                            // Don't process the metadata "page"
                            if (0 == page_result.page)
                            {
                                continue;
                            }

                            WordList word_list_page = pdf_document_other.PDFRenderer.GetOCRText(page_result.page);
                            if (null != word_list_page)
                            {
                                StringBuilder sb = new StringBuilder();
                                foreach (var word in word_list_page)
                                {
                                    sb.Append(word.Text);
                                }
                                string text_to_search_for_title = GenerateTextOnlyTitle(sb.ToString());

                                // If we have a match, record it!
                                if (text_to_search_for_title.Contains(target_title))
                                {
                                    pdf_document.PDFDocumentCitationManager.AddInboundCitation(pdf_document_other.Fingerprint);
                                    pdf_document_other.PDFDocumentCitationManager.AddOutboundCitation(pdf_document.Fingerprint);
                                    ++total_found;

                                    break;
                                }
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        Logging.Warn(ex, "There was a problem during citation finding while processing one of the matching author documents.");
                    }
                }
            }

            StatusManager.Instance.UpdateStatus("CitationDocumentFinder", String.Format("Found {0} new citations of '{1}'", total_found, pdf_document.TitleCombined));

            return(total_found);
        }
Exemplo n.º 5
0
        public void Regenerate(AsyncCallback callback)
        {
            Utilities.LockPerfTimer l1_clk = Utilities.LockPerfChecker.Start();
            lock (in_progress_lock)
            {
                l1_clk.LockPerfTimerStop();
                if (regenerating_in_progress)
                {
                    Logging.Info("Not regenerating AutoTags because a regeneration is already in progress.");
                    return;
                }

                regenerating_in_progress = true;
            }

            Stopwatch clk = Stopwatch.StartNew();

            try
            {
                Logging.Info("+AITagManager is starting regenerating");

                StatusManager.Instance.UpdateStatusBusy("AITags", "Loading documents");
                List <PDFDocument> pdf_documents = library.PDFDocuments;

                int count_title_by_user    = 0;
                int could_title_by_suggest = 0;
                StatusManager.Instance.UpdateStatusBusy("AITags", "Deciding whether to use suggested titles");
                foreach (PDFDocument pdf_document in pdf_documents)
                {
                    if (pdf_document.IsTitleGeneratedByUser)
                    {
                        ++count_title_by_user;
                    }
                    else
                    {
                        ++could_title_by_suggest;
                    }
                }

                bool use_suggested_titles = could_title_by_suggest > count_title_by_user;

                StatusManager.Instance.UpdateStatusBusy("AITags", "Scanning titles");
                List <string> titles = new List <string>();
                foreach (PDFDocument pdf_document in pdf_documents)
                {
                    if (use_suggested_titles || pdf_document.IsTitleGeneratedByUser)
                    {
                        titles.Add(pdf_document.TitleCombined);
                    }
                }

                StatusManager.Instance.UpdateStatusBusy("AITags", "Generating AutoTags");

                // Get the black/whitelists
                List <string> words_blacklist = new List <string>();
                List <string> words_whitelist = new List <string>();
                {
                    List <BlackWhiteListEntry> entries = library.BlackWhiteListManager.ReadList();
                    foreach (var entry in entries)
                    {
                        if (entry.is_deleted)
                        {
                            continue;
                        }

                        switch (entry.list_type)
                        {
                        case BlackWhiteListEntry.ListType.White:
                            words_whitelist.Add(entry.word);
                            break;

                        case BlackWhiteListEntry.ListType.Black:
                            words_blacklist.Add(entry.word);
                            break;

                        default:
                            Logging.Warn("Unknown black/whitelist type " + entry.list_type);
                            break;
                        }
                    }
                }

                // Generate them
                CountingDictionary <NGram> ai_tags = BuzzwordGenerator.GenerateBuzzwords(titles, words_blacklist, words_whitelist, true);
                Logging.Info("Generated {0} autotags", ai_tags.Count);
                if (ai_tags.Count < 20)
                {
                    Logging.Warn("There are too few autotags (only {0}), so not supressing Scrabble words...", ai_tags.Count);
                    ai_tags = BuzzwordGenerator.GenerateBuzzwords(titles, words_blacklist, words_whitelist, false);
                    Logging.Info("Generated {0} autotags without Scrabble suppression", ai_tags.Count);
                }

                StatusManager.Instance.UpdateStatusBusy("AITags", "AutoTagging documents");
                AITags ai_tags_record = new AITags();

                // Go through each ngram and see what documents contain it
                StatusManager.Instance.ClearCancelled("AITags");
                List <NGram> ai_tags_list = new List <NGram>(ai_tags.Keys);
                for (int i = 0; i < ai_tags_list.Count; ++i)
                {
                    try
                    {
                        NGram  ai_tag = ai_tags_list[i];
                        string tag    = ai_tag.text;

                        if (StatusManager.Instance.IsCancelled("AITags"))
                        {
                            break;
                        }

                        StatusManager.Instance.UpdateStatusBusy("AITags", String.Format("AutoTagging papers with '{0}'", tag), i, ai_tags_list.Count, true);

                        // Surround the tag with quotes and search the index
                        string search_tag = "\"" + tag + "\"";
                        List <IndexPageResult> fingerprints_potential = LibrarySearcher.FindAllPagesMatchingQuery(library, search_tag);

                        if (null != fingerprints_potential)
                        {
                            // Skip this tag if too many documents have it...
                            if (ai_tag.is_acronym && fingerprints_potential.Count > 0.05 * pdf_documents.Count)
                            {
                                Logging.Info("Skipping AutoTag {0} because too many documents have it...", tag);
                                continue;
                            }

                            foreach (var fingerprint_potential in fingerprints_potential)
                            {
                                // Non-acronyms are definitely tagged
                                if (!ai_tag.is_acronym)
                                {
                                    ai_tags_record.Associate(tag, fingerprint_potential.fingerprint);
                                }
                                else
                                {
                                    // Acronyms need to be done manually because we only want the upper case ones...
                                    PDFDocument pdf_document = library.GetDocumentByFingerprint(fingerprint_potential.fingerprint);
                                    if (null != pdf_document && !pdf_document.Deleted)
                                    {
                                        bool have_tag = false;

                                        if (!have_tag)
                                        {
                                            string doc_title = pdf_document.TitleCombined;
                                            if (!String.IsNullOrEmpty(doc_title))
                                            {
                                                if (!ai_tag.is_acronym)
                                                {
                                                    doc_title = doc_title.ToLower();
                                                }
                                                if (doc_title.Contains(tag))
                                                {
                                                    have_tag = true;
                                                }
                                            }
                                        }

                                        if (!have_tag)
                                        {
                                            string doc_comment = pdf_document.Comments;
                                            if (!String.IsNullOrEmpty(doc_comment))
                                            {
                                                if (!ai_tag.is_acronym)
                                                {
                                                    doc_comment = doc_comment.ToLower();
                                                }
                                                if (doc_comment.Contains(tag))
                                                {
                                                    have_tag = true;
                                                }
                                            }
                                        }

                                        if (!have_tag && pdf_document.DocumentExists)
                                        {
                                            foreach (var page_result in fingerprint_potential.page_results)
                                            {
                                                if (have_tag)
                                                {
                                                    break;
                                                }

                                                int      page           = page_result.page;
                                                WordList page_word_list = pdf_document.PDFRenderer.GetOCRText(page);
                                                if (null != page_word_list)
                                                {
                                                    foreach (Word word in page_word_list)
                                                    {
                                                        if (tag == word.Text)
                                                        {
                                                            have_tag = true;
                                                            break;
                                                        }
                                                    }
                                                }
                                            }
                                        }

                                        // If we have this tag, record it
                                        if (have_tag)
                                        {
                                            ai_tags_record.Associate(tag, fingerprint_potential.fingerprint);
                                        }
                                    }
                                    else
                                    {
                                        Logging.Warn("Could not find a document matching fingerprint {0}", fingerprint_potential);
                                    }
                                }
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        Logging.Error(ex, "There was an exception while processing one of the autotags");
                    }
                }

                bool use_new_autotags = true;

                if (StatusManager.Instance.IsCancelled("AITags"))
                {
                    if (!MessageBoxes.AskQuestion("You cancelled the generation of your AutoTags.  Do you want to use the partially generated AutoTags (YES) or keep your old AutoTags (NO)?"))
                    {
                        use_new_autotags = false;
                    }
                }

                if (use_new_autotags)
                {
                    StatusManager.Instance.UpdateStatusBusy("AITags", "Saving AutoTags");
                    SerializeFile.ProtoSave <AITags>(Filename_Store, ai_tags_record);
                    current_ai_tags_record = ai_tags_record;
                }

                StatusManager.Instance.UpdateStatus("AITags", "AutoTags generated!");
            }
            finally
            {
                Utilities.LockPerfTimer l2_clk = Utilities.LockPerfChecker.Start();
                lock (in_progress_lock)
                {
                    l2_clk.LockPerfTimerStop();
                    regenerating_in_progress = false;
                }

                Logging.Info("-AITagManager is finished regenerating (time spent: {0} ms)", clk.ElapsedMilliseconds);
            }

            // Call any callback that might be interested
            callback?.Invoke(null);
        }