Esempio n. 1
0
        public static List <Result> GetRelevantOthers(PDFDocument pdf_document, int NUM_OTHERS)
        {
            List <Result> results = new List <Result>();

            try
            {
                if (null == pdf_document.Library.ExpeditionManager.ExpeditionDataSource)
                {
                    return(results);
                }

                ExpeditionDataSource eds          = pdf_document.Library.ExpeditionManager.ExpeditionDataSource;
                LDAAnalysis          lda_analysis = eds.LDAAnalysis;

                if (!pdf_document.Library.ExpeditionManager.ExpeditionDataSource.docs_index.ContainsKey(pdf_document.Fingerprint))
                {
                    return(results);
                }

                // Fill the similar papers
                {
                    int doc_id = pdf_document.Library.ExpeditionManager.ExpeditionDataSource.docs_index[pdf_document.Fingerprint];
                    TopicProbability[] topics = lda_analysis.DensityOfTopicsInDocsSorted[doc_id];

                    List <DocProbability> similar_docs = new List <DocProbability>();

                    // Only look at the first 5 topics
                    for (int t = 0; t < topics.Length && t < 3; ++t)
                    {
                        int    topic      = topics[t].topic;
                        double topic_prob = topics[t].prob;

                        // Look at the first 50 docs in each topic (if there are that many)
                        DocProbability[] docs = lda_analysis.DensityOfDocsInTopicsSorted[topic];
                        for (int d = 0; d < docs.Length && d < 50; ++d)
                        {
                            int    doc      = docs[d].doc;
                            double doc_prob = docs[d].prob;

                            DocProbability dp = new DocProbability(Math.Sqrt(topic_prob * doc_prob), doc);
                            similar_docs.Add(dp);
                        }
                    }

                    // Now take the top N docs
                    similar_docs.Sort();
                    for (int i = 0; i < similar_docs.Count && i < NUM_OTHERS; ++i)
                    {
                        PDFDocument pdf_document_similar = pdf_document.Library.GetDocumentByFingerprint(eds.docs[similar_docs[i].doc]);
                        results.Add(new Result {
                            pdf_document = pdf_document_similar, relevance = similar_docs[i].prob
                        });
                    }
                }
            }
            catch (Exception ex)
            {
                Logging.Error(ex, "There was a problem getting the relevant others for document {0}", pdf_document.Fingerprint);
            }

            return(results);
        }
        private void PopulateDetail(bool detailed_mode)
        {
            // Clear the old
            ObjHeader.Header  = null;
            ObjHeader.ToolTip = null;
            ObjPapers.Children.Clear();

            // Try to get the context
            TopicOverviewData tod = DataContext as TopicOverviewData;

            if (null == tod)
            {
                return;
            }

            // Quick refs
            ExpeditionDataSource eds = tod.web_library_detail.Xlibrary?.ExpeditionManager?.ExpeditionDataSource;

            if (null != eds)
            {
                LDAAnalysis lda_analysis = eds.LDAAnalysis;

                // First the terms header
                {
                    string header = eds.GetDescriptionForTopic(tod.topic);
                    ObjHeader.Header           = header;
                    ObjHeader.ToolTip          = header;
                    ObjHeader.HeaderBackground = new SolidColorBrush(eds.Colours[tod.topic]);
                }

                // Then the docs
                {
                    int NUM_DOCS = Math.Min(detailed_mode ? 50 : 10, lda_analysis.NUM_DOCS);

                    ASSERT.Test(tod.topic >= 0);
                    ASSERT.Test(tod.topic < lda_analysis.NUM_TOPICS);

                    for (int d = 0; d < NUM_DOCS && d < eds.docs.Count; ++d)
                    {
                        DocProbability[] docs = lda_analysis.DensityOfDocsInTopicsSorted[tod.topic];
                        ASSERT.Test(docs != null);
                        ASSERT.Test(docs.Length == lda_analysis.NUM_DOCS);
                        DocProbability lda_elem = docs[d];
                        ASSERT.Test(lda_elem != null);

                        PDFDocument pdf_document = tod.web_library_detail.Xlibrary.GetDocumentByFingerprint(eds.docs[lda_elem.doc]);

                        string doc_percentage = String.Format("{0:N0}%", 100 * lda_elem.prob);

                        bool      alternator = false;
                        TextBlock text_doc   = ListFormattingTools.GetDocumentTextBlock(pdf_document, ref alternator, Features.Expedition_TopicDocument, TopicDocumentPressed_MouseButtonEventHandler, doc_percentage + " - ");
                        ObjPapers.Children.Add(text_doc);
                    }

                    // The MORE button
                    if (!detailed_mode && NUM_DOCS < eds.docs.Count)
                    {
                        AugmentedButton button_more = new AugmentedButton();
                        button_more.Caption = "Show me more";
                        button_more.Click  += button_more_Click;
                        ObjPapers.Children.Add(button_more);
                    }

                    // The BRAINSTORM button
                    {
                        AugmentedButton button_brainstorm = new AugmentedButton();
                        button_brainstorm.Caption = "Show me in Brainstorm";
                        button_brainstorm.Click  += button_brainstorm_Click;
                        button_brainstorm.Tag     = tod;
                        ObjPapers.Children.Add(button_brainstorm);
                    }
                }
            }
        }
        public static List <Result> GetRelevantOthers(PDFDocument pdf_document, int NUM_OTHERS)
        {
            List <Result> results = new List <Result>();

            try
            {
                ExpeditionDataSource eds = pdf_document.LibraryRef?.Xlibrary?.ExpeditionManager?.ExpeditionDataSource;

                if (null != eds)
                {
                    LDAAnalysis lda_analysis = eds.LDAAnalysis;

                    if (eds.docs_index.ContainsKey(pdf_document.Fingerprint))
                    {
                        // Fill the similar papers

                        int doc_id = eds.docs_index[pdf_document.Fingerprint];
                        TopicProbability[] topics = lda_analysis.DensityOfTopicsInDocsSorted[doc_id];

                        List <DocProbability> similar_docs = new List <DocProbability>();

                        // Only look at the first 5 topics
                        for (int t = 0; t < topics.Length && t < 3; ++t)
                        {
                            int    topic      = topics[t].topic;
                            double topic_prob = topics[t].prob;

                            ASSERT.Test(topic >= 0);
                            ASSERT.Test(topic < lda_analysis.NUM_TOPICS);

                            // Look at the first 50 docs in each topic (if there are that many)
                            DocProbability[] docs = lda_analysis.DensityOfDocsInTopicsSorted[topic];
                            ASSERT.Test(docs != null);
                            ASSERT.Test(docs.Length == lda_analysis.NUM_DOCS);

                            for (int d = 0; d < Math.Min(docs.Length, 50); ++d)
                            {
                                int    doc      = docs[d].doc;
                                double doc_prob = docs[d].prob;

                                DocProbability dp = new DocProbability(Math.Sqrt(topic_prob * doc_prob), doc);
                                similar_docs.Add(dp);
                            }
                        }

                        // Now take the top N docs
                        similar_docs.Sort();
                        for (int i = 0; i < similar_docs.Count && i < NUM_OTHERS; ++i)
                        {
                            string      fingerprint_to_look_for = eds.docs[similar_docs[i].doc];
                            PDFDocument pdf_document_similar    = pdf_document.LibraryRef.Xlibrary.GetDocumentByFingerprint(fingerprint_to_look_for);
                            if (null == pdf_document_similar)
                            {
                                Logging.Warn("ExpeditionPaperSuggestions: Cannot find similar document anymore for fingerprint {0}", fingerprint_to_look_for);
                            }
                            else
                            {
                                results.Add(new Result {
                                    pdf_document = pdf_document_similar, relevance = similar_docs[i].prob
                                });
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Logging.Error(ex, "There was a problem getting the relevant others for document {0}", pdf_document.Fingerprint);
            }

            return(results);
        }