public static List <Result> GetRelevantOthers(PDFDocument pdf_document, int NUM_OTHERS) { List <Result> results = new List <Result>(); try { if (null == pdf_document.Library.ExpeditionManager.ExpeditionDataSource) { return(results); } ExpeditionDataSource eds = pdf_document.Library.ExpeditionManager.ExpeditionDataSource; LDAAnalysis lda_analysis = eds.LDAAnalysis; if (!pdf_document.Library.ExpeditionManager.ExpeditionDataSource.docs_index.ContainsKey(pdf_document.Fingerprint)) { return(results); } // Fill the similar papers { int doc_id = pdf_document.Library.ExpeditionManager.ExpeditionDataSource.docs_index[pdf_document.Fingerprint]; TopicProbability[] topics = lda_analysis.DensityOfTopicsInDocsSorted[doc_id]; List <DocProbability> similar_docs = new List <DocProbability>(); // Only look at the first 5 topics for (int t = 0; t < topics.Length && t < 3; ++t) { int topic = topics[t].topic; double topic_prob = topics[t].prob; // Look at the first 50 docs in each topic (if there are that many) DocProbability[] docs = lda_analysis.DensityOfDocsInTopicsSorted[topic]; for (int d = 0; d < docs.Length && d < 50; ++d) { int doc = docs[d].doc; double doc_prob = docs[d].prob; DocProbability dp = new DocProbability(Math.Sqrt(topic_prob * doc_prob), doc); similar_docs.Add(dp); } } // Now take the top N docs similar_docs.Sort(); for (int i = 0; i < similar_docs.Count && i < NUM_OTHERS; ++i) { PDFDocument pdf_document_similar = pdf_document.Library.GetDocumentByFingerprint(eds.docs[similar_docs[i].doc]); results.Add(new Result { pdf_document = pdf_document_similar, relevance = similar_docs[i].prob }); } } } catch (Exception ex) { Logging.Error(ex, "There was a problem getting the relevant others for document {0}", pdf_document.Fingerprint); } return(results); }
private void PopulateDetail(bool detailed_mode) { // Clear the old ObjHeader.Header = null; ObjHeader.ToolTip = null; ObjPapers.Children.Clear(); // Try to get the context TopicOverviewData tod = DataContext as TopicOverviewData; if (null == tod) { return; } // Quick refs ExpeditionDataSource eds = tod.web_library_detail.Xlibrary?.ExpeditionManager?.ExpeditionDataSource; if (null != eds) { LDAAnalysis lda_analysis = eds.LDAAnalysis; // First the terms header { string header = eds.GetDescriptionForTopic(tod.topic); ObjHeader.Header = header; ObjHeader.ToolTip = header; ObjHeader.HeaderBackground = new SolidColorBrush(eds.Colours[tod.topic]); } // Then the docs { int NUM_DOCS = Math.Min(detailed_mode ? 50 : 10, lda_analysis.NUM_DOCS); ASSERT.Test(tod.topic >= 0); ASSERT.Test(tod.topic < lda_analysis.NUM_TOPICS); for (int d = 0; d < NUM_DOCS && d < eds.docs.Count; ++d) { DocProbability[] docs = lda_analysis.DensityOfDocsInTopicsSorted[tod.topic]; ASSERT.Test(docs != null); ASSERT.Test(docs.Length == lda_analysis.NUM_DOCS); DocProbability lda_elem = docs[d]; ASSERT.Test(lda_elem != null); PDFDocument pdf_document = tod.web_library_detail.Xlibrary.GetDocumentByFingerprint(eds.docs[lda_elem.doc]); string doc_percentage = String.Format("{0:N0}%", 100 * lda_elem.prob); bool alternator = false; TextBlock text_doc = ListFormattingTools.GetDocumentTextBlock(pdf_document, ref alternator, Features.Expedition_TopicDocument, TopicDocumentPressed_MouseButtonEventHandler, doc_percentage + " - "); ObjPapers.Children.Add(text_doc); } // The MORE button if (!detailed_mode && NUM_DOCS < eds.docs.Count) { AugmentedButton button_more = new AugmentedButton(); button_more.Caption = "Show me more"; button_more.Click += button_more_Click; ObjPapers.Children.Add(button_more); } // The BRAINSTORM button { AugmentedButton button_brainstorm = new AugmentedButton(); button_brainstorm.Caption = "Show me in Brainstorm"; button_brainstorm.Click += button_brainstorm_Click; button_brainstorm.Tag = tod; ObjPapers.Children.Add(button_brainstorm); } } } }
public static List <Result> GetRelevantOthers(PDFDocument pdf_document, int NUM_OTHERS) { List <Result> results = new List <Result>(); try { ExpeditionDataSource eds = pdf_document.LibraryRef?.Xlibrary?.ExpeditionManager?.ExpeditionDataSource; if (null != eds) { LDAAnalysis lda_analysis = eds.LDAAnalysis; if (eds.docs_index.ContainsKey(pdf_document.Fingerprint)) { // Fill the similar papers int doc_id = eds.docs_index[pdf_document.Fingerprint]; TopicProbability[] topics = lda_analysis.DensityOfTopicsInDocsSorted[doc_id]; List <DocProbability> similar_docs = new List <DocProbability>(); // Only look at the first 5 topics for (int t = 0; t < topics.Length && t < 3; ++t) { int topic = topics[t].topic; double topic_prob = topics[t].prob; ASSERT.Test(topic >= 0); ASSERT.Test(topic < lda_analysis.NUM_TOPICS); // Look at the first 50 docs in each topic (if there are that many) DocProbability[] docs = lda_analysis.DensityOfDocsInTopicsSorted[topic]; ASSERT.Test(docs != null); ASSERT.Test(docs.Length == lda_analysis.NUM_DOCS); for (int d = 0; d < Math.Min(docs.Length, 50); ++d) { int doc = docs[d].doc; double doc_prob = docs[d].prob; DocProbability dp = new DocProbability(Math.Sqrt(topic_prob * doc_prob), doc); similar_docs.Add(dp); } } // Now take the top N docs similar_docs.Sort(); for (int i = 0; i < similar_docs.Count && i < NUM_OTHERS; ++i) { string fingerprint_to_look_for = eds.docs[similar_docs[i].doc]; PDFDocument pdf_document_similar = pdf_document.LibraryRef.Xlibrary.GetDocumentByFingerprint(fingerprint_to_look_for); if (null == pdf_document_similar) { Logging.Warn("ExpeditionPaperSuggestions: Cannot find similar document anymore for fingerprint {0}", fingerprint_to_look_for); } else { results.Add(new Result { pdf_document = pdf_document_similar, relevance = similar_docs[i].prob }); } } } } } catch (Exception ex) { Logging.Error(ex, "There was a problem getting the relevant others for document {0}", pdf_document.Fingerprint); } return(results); }