private static void GetDensityForDocument(PDFDocument pdf_document, out int doc_id, out int num_topics, out float[] density_of_topics_in_document)
        {
            doc_id     = -1;
            num_topics = -1;
            density_of_topics_in_document = null;

            if (null == pdf_document)
            {
                return;
            }

            ExpeditionDataSource eds = pdf_document.LibraryRef.Xlibrary?.ExpeditionManager?.ExpeditionDataSource;

            if (null != eds)
            {
                LDAAnalysis lda_analysis = eds.LDAAnalysis;
                if (eds.docs_index.ContainsKey(pdf_document.Fingerprint))
                {
                    // Result!

                    doc_id     = eds.docs_index[pdf_document.Fingerprint];
                    num_topics = lda_analysis.NUM_TOPICS;
                    density_of_topics_in_document = new float[num_topics];
                    for (int i = 0; i < lda_analysis.NUM_TOPICS; ++i)
                    {
                        density_of_topics_in_document[i] = lda_analysis.DensityOfTopicsInDocuments[doc_id, i];
                    }
                }
            }
        }
Beispiel #2
0
        public string DumpTopicsPopularity()
        {
            StringBuilder sb = new StringBuilder();

            // Count how many docs count each topic in their top-5
            LDAAnalysis lda = LDAAnalysis;

            int TOP_N = Math.Min(5, lda.NUM_TOPICS); // Must be less than or equal to 5

            int[,] topics_popularity = new int[lda.NUM_TOPICS, TOP_N];

            {
                TopicProbability[][] density_of_top5_topics_in_docs_sorted = lda.DensityOfTop5TopicsInDocsSorted; // [doc][n<5]
                for (int doc = 0; doc < lda.NUM_DOCS; ++doc)
                {
                    for (int n = 0; n < TOP_N; ++n)
                    {
                        int topic = density_of_top5_topics_in_docs_sorted[doc][n].topic;
                        ++topics_popularity[topic, n];
                    }
                }
            }

            // Show the descriptive keywords for each topic
            {
                for (int topic = 0; topic < lda.NUM_TOPICS; ++topic)
                {
                    string description = GetDescriptionForTopic(topic, true, ";", false);
                    sb.AppendFormat("{0}", description);
                    sb.AppendLine();
                    for (int n = 0; n < TOP_N; ++n)
                    {
                        sb.AppendFormat("{0}de:{1}\t", n + 1, topics_popularity[topic, n]);
                    }
                    sb.AppendLine();
                }
            }

            sb.AppendLine();

            {
                for (int topic = 0; topic < lda.NUM_TOPICS; ++topic)
                {
                    sb.AppendFormat("{0}\t", topic);

                    for (int n = 0; n < TOP_N; ++n)
                    {
                        sb.AppendFormat("{0}\t", topics_popularity[topic, n]);
                    }
                    sb.AppendLine();
                }
            }

            return(sb.ToString());
        }
Beispiel #3
0
        private void TopicOverviewControl_DataContextChanged(object sender, DependencyPropertyChangedEventArgs e)
        {
            // Clear the old
            ObjPDFRendererControlPlaceholder.Children.Clear();

            AugmentedBindable <PDFDocument> pdf_document_bindable = DataContext as AugmentedBindable <PDFDocument>;

            if (null == pdf_document_bindable)
            {
                return;
            }

            PDFDocument pdf_document = pdf_document_bindable.Underlying;

            if (null == pdf_document.Library.ExpeditionManager.ExpeditionDataSource)
            {
                return;
            }

            ExpeditionDataSource eds          = pdf_document.Library.ExpeditionManager.ExpeditionDataSource;
            LDAAnalysis          lda_analysis = eds.LDAAnalysis;

            try
            {
                if (!pdf_document.Library.ExpeditionManager.ExpeditionDataSource.docs_index.ContainsKey(pdf_document.Fingerprint))
                {
                    MessageBoxes.Warn("Expedition doesn't have any information about this paper.  Please Refresh your Expedition.");
                    return;
                }
            }
            catch (Exception ex)
            {
                Logging.Error(ex, "There was a problem with Expedition for document {0}", pdf_document.Fingerprint);
            }



            if (pdf_document_bindable.Underlying.DocumentExists)
            {
                ObjPDFRendererControlPlaceholderBorder.Visibility = Visibility.Visible;
                ObjPDFRendererControlPlaceholderRow.Height        = new GridLength(1, GridUnitType.Star);

                PDFRendererControl pdf_renderer_control = new PDFRendererControl(pdf_document_bindable.Underlying, false, PDFRendererControl.ZoomType.Zoom1Up);
                ObjPDFRendererControlPlaceholder.Children.Add(pdf_renderer_control);
            }
            else
            {
                ObjPDFRendererControlPlaceholderBorder.Visibility = Visibility.Collapsed;
                ObjPDFRendererControlPlaceholderRow.Height        = new GridLength(0, GridUnitType.Pixel);
            }
        }
Beispiel #4
0
        public void PrintStats_TOPICS()
        {
            LDAAnalysis lda = LDAAnalysis;

            for (int topic = 0; topic < lda.NUM_TOPICS; ++topic)
            {
                Console.WriteLine("Topic: {0}", GetDescriptionForTopic(topic));
                for (int word = 0; word < 10; ++word)
                {
                    Console.WriteLine("{0} & {1} & {2}", word + 1, words[lda.DensityOfWordsInTopicsSorted[topic][word].word], lda.DensityOfWordsInTopicsSorted[topic][word].prob);
                }
                Console.WriteLine();
            }
        }
Beispiel #5
0
        public void PrintStats_DOCS()
        {
            LDAAnalysis lda = LDAAnalysis;

            for (int doc = 0; doc < lda.NUM_DOCS; ++doc)
            {
                Console.Write("Doc {0}:", doc);
                for (int topic = 0; topic < lda.NUM_TOPICS; ++topic)
                {
                    Console.Write("\t{0:0}", 100 * lda.DensityOfTopicsInDocuments[doc, topic]);
                }
                Console.WriteLine();
            }
        }
Beispiel #6
0
        public string GetDescriptionForTopic(int topic, bool include_topic_number = true, string separator = "; ", bool stop_at_word_probability_jump = true)
        {
            StringBuilder sb = new StringBuilder();

            if (include_topic_number)
            {
                sb.Append(String.Format("{0}. ", topic + 1));
            }

            LDAAnalysis lda = LDAAnalysis;

            WordProbability[] lda_wordprobs = lda.DensityOfWordsInTopicsSorted[topic];
            ASSERT.Test(lda_wordprobs != null);

            double last_term_prob = 0;

            for (int t = 0; t < 5 && t < lda.NUM_WORDS; ++t)
            {
                WordProbability lda_node = lda_wordprobs[t];
                ASSERT.Test(lda_node != null);

                if (last_term_prob / lda_node.prob > 10)
                {
                    if (stop_at_word_probability_jump)
                    {
                        break;
                    }
                    else
                    {
                        sb.Append(" // ");
                    }
                }
                last_term_prob = lda_node.prob;

                sb.Append(String.Format("{0}", words[lda_node.word]));
                sb.Append(separator);
            }

            string description = sb.ToString();

            if (description.EndsWith(separator))
            {
                description = description.Substring(0, description.Length - separator.Length);
            }

            return(description);
        }
        void ButtonExportTopics_Click(object sender, RoutedEventArgs e)
        {
            if (null != library.ExpeditionManager.ExpeditionDataSource)
            {
                ExpeditionDataSource eds = library.ExpeditionManager.ExpeditionDataSource;
                LDAAnalysis lda_analysis = library.ExpeditionManager.ExpeditionDataSource.LDAAnalysis;

                StringBuilder sb = new StringBuilder();
                for (int i = 0; i < library.ExpeditionManager.ExpeditionDataSource.LDAAnalysis.NUM_TOPICS; ++i)
                {
                    string topic_description = eds.GetDescriptionForTopic(i);
                    sb.AppendFormat("{1}\r\n", i, topic_description);
                }

                string filename = TempFile.GenerateTempFilename("txt");
                File.WriteAllText(filename, sb.ToString());
                Process.Start(filename);
            }
            else
            {
                MessageBoxes.Error("You need to first run Expedition for this library.");
            }
        }
Beispiel #8
0
        private static void ExpandSimilars(PDFDocument doc, NodeControl node_control)
        {
            WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread();
            ASSERT.Test(doc != null);

            FeatureTrackingManager.Instance.UseFeature(Features.Brainstorm_ExploreLibrary_Document_Similars);

            if (doc != null)
            {
                ASSERT.Test(doc.LibraryRef.Xlibrary != null);

                ExpeditionDataSource eds = doc.LibraryRef.Xlibrary?.ExpeditionManager?.ExpeditionDataSource;
                if (null != eds)
                {
                    if (eds.docs_index.ContainsKey(doc.Fingerprint))
                    {
                        int         doc_id = eds.docs_index[doc.Fingerprint];
                        LDAAnalysis lda    = eds.LDAAnalysis;

                        float[,] density_of_topics_in_docs = lda.DensityOfTopicsInDocuments;

                        float[] distribution = new float[lda.NUM_TOPICS];
                        for (int topic_i = 0; topic_i < lda.NUM_TOPICS; ++topic_i)
                        {
                            distribution[topic_i] = density_of_topics_in_docs[doc_id, topic_i];
                        }

                        ThemeNodeContentControl.AddDocumentsSimilarToDistribution(node_control, doc.LibraryRef, eds, distribution);
                    }
                }
                else
                {
                    Logging.Warn("Expedition has not been run for library '{0}'.", doc.LibraryRef.Title);
                }
            }
        }
        private void PopulateDetail(bool detailed_mode)
        {
            // Clear the old
            ObjHeader.Header  = null;
            ObjHeader.ToolTip = null;
            ObjPapers.Children.Clear();

            // Try to get the context
            TopicOverviewData tod = DataContext as TopicOverviewData;

            if (null == tod)
            {
                return;
            }

            // Quick refs
            ExpeditionDataSource eds = tod.web_library_detail.Xlibrary?.ExpeditionManager?.ExpeditionDataSource;

            if (null != eds)
            {
                LDAAnalysis lda_analysis = eds.LDAAnalysis;

                // First the terms header
                {
                    string header = eds.GetDescriptionForTopic(tod.topic);
                    ObjHeader.Header           = header;
                    ObjHeader.ToolTip          = header;
                    ObjHeader.HeaderBackground = new SolidColorBrush(eds.Colours[tod.topic]);
                }

                // Then the docs
                {
                    int NUM_DOCS = Math.Min(detailed_mode ? 50 : 10, lda_analysis.NUM_DOCS);

                    ASSERT.Test(tod.topic >= 0);
                    ASSERT.Test(tod.topic < lda_analysis.NUM_TOPICS);

                    for (int d = 0; d < NUM_DOCS && d < eds.docs.Count; ++d)
                    {
                        DocProbability[] docs = lda_analysis.DensityOfDocsInTopicsSorted[tod.topic];
                        ASSERT.Test(docs != null);
                        ASSERT.Test(docs.Length == lda_analysis.NUM_DOCS);
                        DocProbability lda_elem = docs[d];
                        ASSERT.Test(lda_elem != null);

                        PDFDocument pdf_document = tod.web_library_detail.Xlibrary.GetDocumentByFingerprint(eds.docs[lda_elem.doc]);

                        string doc_percentage = String.Format("{0:N0}%", 100 * lda_elem.prob);

                        bool      alternator = false;
                        TextBlock text_doc   = ListFormattingTools.GetDocumentTextBlock(pdf_document, ref alternator, Features.Expedition_TopicDocument, TopicDocumentPressed_MouseButtonEventHandler, doc_percentage + " - ");
                        ObjPapers.Children.Add(text_doc);
                    }

                    // The MORE button
                    if (!detailed_mode && NUM_DOCS < eds.docs.Count)
                    {
                        AugmentedButton button_more = new AugmentedButton();
                        button_more.Caption = "Show me more";
                        button_more.Click  += button_more_Click;
                        ObjPapers.Children.Add(button_more);
                    }

                    // The BRAINSTORM button
                    {
                        AugmentedButton button_brainstorm = new AugmentedButton();
                        button_brainstorm.Caption = "Show me in Brainstorm";
                        button_brainstorm.Click  += button_brainstorm_Click;
                        button_brainstorm.Tag     = tod;
                        ObjPapers.Children.Add(button_brainstorm);
                    }
                }
            }
        }
Beispiel #10
0
        public static List <Result> GetRelevantOthers(PDFDocument pdf_document, int NUM_OTHERS)
        {
            List <Result> results = new List <Result>();

            try
            {
                if (null == pdf_document.Library.ExpeditionManager.ExpeditionDataSource)
                {
                    return(results);
                }

                ExpeditionDataSource eds          = pdf_document.Library.ExpeditionManager.ExpeditionDataSource;
                LDAAnalysis          lda_analysis = eds.LDAAnalysis;

                if (!pdf_document.Library.ExpeditionManager.ExpeditionDataSource.docs_index.ContainsKey(pdf_document.Fingerprint))
                {
                    return(results);
                }

                // Fill the similar papers
                {
                    int doc_id = pdf_document.Library.ExpeditionManager.ExpeditionDataSource.docs_index[pdf_document.Fingerprint];
                    TopicProbability[] topics = lda_analysis.DensityOfTopicsInDocsSorted[doc_id];

                    List <DocProbability> similar_docs = new List <DocProbability>();

                    // Only look at the first 5 topics
                    for (int t = 0; t < topics.Length && t < 3; ++t)
                    {
                        int    topic      = topics[t].topic;
                        double topic_prob = topics[t].prob;

                        // Look at the first 50 docs in each topic (if there are that many)
                        DocProbability[] docs = lda_analysis.DensityOfDocsInTopicsSorted[topic];
                        for (int d = 0; d < docs.Length && d < 50; ++d)
                        {
                            int    doc      = docs[d].doc;
                            double doc_prob = docs[d].prob;

                            DocProbability dp = new DocProbability(Math.Sqrt(topic_prob * doc_prob), doc);
                            similar_docs.Add(dp);
                        }
                    }

                    // Now take the top N docs
                    similar_docs.Sort();
                    for (int i = 0; i < similar_docs.Count && i < NUM_OTHERS; ++i)
                    {
                        PDFDocument pdf_document_similar = pdf_document.Library.GetDocumentByFingerprint(eds.docs[similar_docs[i].doc]);
                        results.Add(new Result {
                            pdf_document = pdf_document_similar, relevance = similar_docs[i].prob
                        });
                    }
                }
            }
            catch (Exception ex)
            {
                Logging.Error(ex, "There was a problem getting the relevant others for document {0}", pdf_document.Fingerprint);
            }

            return(results);
        }
Beispiel #11
0
        private void ExpeditionPaperThemesControl_DataContextChanged(object sender, DependencyPropertyChangedEventArgs e)
        {
            // Clear the old
            ObjSeriesTopics.DataSource        = null;
            TxtPleaseRunExpedition.Visibility = Visibility.Visible;
            ChartTopics.Visibility            = Visibility.Collapsed;

            AugmentedBindable <PDFDocument> pdf_document_bindable = DataContext as AugmentedBindable <PDFDocument>;

            if (null == pdf_document_bindable)
            {
                return;
            }

            PDFDocument pdf_document = pdf_document_bindable.Underlying;

            if (null == pdf_document.Library.ExpeditionManager.ExpeditionDataSource)
            {
                return;
            }

            ExpeditionDataSource eds          = pdf_document.Library.ExpeditionManager.ExpeditionDataSource;
            LDAAnalysis          lda_analysis = eds.LDAAnalysis;

            // Draw the pie chart
            {
                try
                {
                    if (!eds.docs_index.ContainsKey(pdf_document.Fingerprint))
                    {
                        return;
                    }

                    int doc_id = eds.docs_index[pdf_document.Fingerprint];
                    TopicProbability[] topics = lda_analysis.DensityOfTopicsInDocsSorted[doc_id];

                    int     ITEMS_IN_CHART = Math.Min(topics.Length, 3);
                    Brush[] brushes        = new Brush[ITEMS_IN_CHART + 1];

                    List <ChartItem> chart_items = new List <ChartItem>();
                    double           remaining_segment_percentage = 1.0;
                    for (int t = 0; t < ITEMS_IN_CHART; ++t)
                    {
                        string topic_name = eds.GetDescriptionForTopic(topics[t].topic);
                        double percentage = topics[t].prob;

                        chart_items.Add(new ChartItem {
                            Topic = topic_name, Percentage = percentage
                        });
                        brushes[t] = new SolidColorBrush(eds.Colours[topics[t].topic]);

                        remaining_segment_percentage -= percentage;
                    }

                    chart_items.Add(new ChartItem {
                        Topic = "Others", Percentage = remaining_segment_percentage
                    });
                    brushes[ITEMS_IN_CHART] = new SolidColorBrush(Colors.White);

                    ObjChartTopicsArea.ColorModel.CustomPalette = brushes;
                    ObjChartTopicsArea.ColorModel.Palette       = ChartColorPalette.Custom;
                    ObjSeriesTopics.DataSource = chart_items;

                    // Silly
                    ObjSeriesTopics.AnimationDuration = TimeSpan.FromMilliseconds(1000);
                    ObjSeriesTopics.EnableAnimation   = false;
                    ObjSeriesTopics.AnimateOneByOne   = true;
                    ObjSeriesTopics.AnimateOption     = AnimationOptions.Fade;
                    ObjSeriesTopics.EnableAnimation   = true;
                }
                catch (Exception ex)
                {
                    Logging.Error(ex, "There was a problem while generating the topics chart for document {0}", pdf_document.Fingerprint);
                }
            }

            TxtPleaseRunExpedition.Visibility = Visibility.Collapsed;
            ChartTopics.Visibility            = Visibility.Visible;
        }
        public static List <Result> GetRelevantOthers(PDFDocument pdf_document, int NUM_OTHERS)
        {
            List <Result> results = new List <Result>();

            try
            {
                ExpeditionDataSource eds = pdf_document.LibraryRef?.Xlibrary?.ExpeditionManager?.ExpeditionDataSource;

                if (null != eds)
                {
                    LDAAnalysis lda_analysis = eds.LDAAnalysis;

                    if (eds.docs_index.ContainsKey(pdf_document.Fingerprint))
                    {
                        // Fill the similar papers

                        int doc_id = eds.docs_index[pdf_document.Fingerprint];
                        TopicProbability[] topics = lda_analysis.DensityOfTopicsInDocsSorted[doc_id];

                        List <DocProbability> similar_docs = new List <DocProbability>();

                        // Only look at the first 5 topics
                        for (int t = 0; t < topics.Length && t < 3; ++t)
                        {
                            int    topic      = topics[t].topic;
                            double topic_prob = topics[t].prob;

                            ASSERT.Test(topic >= 0);
                            ASSERT.Test(topic < lda_analysis.NUM_TOPICS);

                            // Look at the first 50 docs in each topic (if there are that many)
                            DocProbability[] docs = lda_analysis.DensityOfDocsInTopicsSorted[topic];
                            ASSERT.Test(docs != null);
                            ASSERT.Test(docs.Length == lda_analysis.NUM_DOCS);

                            for (int d = 0; d < Math.Min(docs.Length, 50); ++d)
                            {
                                int    doc      = docs[d].doc;
                                double doc_prob = docs[d].prob;

                                DocProbability dp = new DocProbability(Math.Sqrt(topic_prob * doc_prob), doc);
                                similar_docs.Add(dp);
                            }
                        }

                        // Now take the top N docs
                        similar_docs.Sort();
                        for (int i = 0; i < similar_docs.Count && i < NUM_OTHERS; ++i)
                        {
                            string      fingerprint_to_look_for = eds.docs[similar_docs[i].doc];
                            PDFDocument pdf_document_similar    = pdf_document.LibraryRef.Xlibrary.GetDocumentByFingerprint(fingerprint_to_look_for);
                            if (null == pdf_document_similar)
                            {
                                Logging.Warn("ExpeditionPaperSuggestions: Cannot find similar document anymore for fingerprint {0}", fingerprint_to_look_for);
                            }
                            else
                            {
                                results.Add(new Result {
                                    pdf_document = pdf_document_similar, relevance = similar_docs[i].prob
                                });
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Logging.Error(ex, "There was a problem getting the relevant others for document {0}", pdf_document.Fingerprint);
            }

            return(results);
        }
Beispiel #13
0
        internal static void AddDocumentsInfluentialInDistribution(NodeControl node_control_, WebLibraryDetail web_library_detail, ExpeditionDataSource eds, float[] tags_distribution)
        {
            WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread();
            ASSERT.Test(eds != null);

            Logging.Info("+Performing ThemedPageRank on {0} documents", eds.LDAAnalysis.NUM_DOCS);

            // We have the distribution of the topic in tags_distribution

            // Create an array for the document biases
            // Fill the array using the dot product of the document distribution dotted with the topic distribution - then normalise
            LDAAnalysis lda = eds.LDAAnalysis;

            float[,] density_of_topics_in_docs = lda.DensityOfTopicsInDocuments;

            double[] biases = new double[lda.NUM_DOCS];
            for (int doc = 0; doc < lda.NUM_DOCS; ++doc)
            {
                double bias_num_squared = 0;
                double bias_den_doc     = 0;
                double bias_den_tags    = 0;

                for (int topic = 0; topic < lda.NUM_TOPICS; ++topic)
                {
                    bias_num_squared += density_of_topics_in_docs[doc, topic] * tags_distribution[topic];
                    bias_den_doc     += density_of_topics_in_docs[doc, topic] * density_of_topics_in_docs[doc, topic];
                    bias_den_tags    += tags_distribution[topic] * tags_distribution[topic];
                }

                biases[doc] = bias_num_squared / (Math.Sqrt(bias_den_doc) * Math.Sqrt(bias_den_tags));
            }

            // Then build up a matrix FROM each document -
            List <int>[] references_outbound = new List <int> [lda.NUM_DOCS];
            for (int doc = 0; doc < lda.NUM_DOCS; ++doc)
            {
                references_outbound[doc] = new List <int>();

                string      fingerprint  = eds.docs[doc];
                PDFDocument pdf_document = web_library_detail.Xlibrary.GetDocumentByFingerprint(fingerprint);
                if (null == pdf_document)
                {
                    Logging.Warn("ThemeExplorer::AddInInfluential: Cannot find document anymore for fingerprint {0}", fingerprint);
                }
                else
                {
                    List <Citation> citations_outbound = pdf_document.PDFDocumentCitationManager.GetOutboundCitations();
                    foreach (Citation citation in citations_outbound)
                    {
                        string fingerprint_inbound = citation.fingerprint_inbound;
                        if (eds.docs_index.ContainsKey(fingerprint_inbound))
                        {
                            int doc_inbound = eds.docs_index[fingerprint_inbound];
                            references_outbound[doc].Add(doc_inbound);
                        }
                    }
                }
            }

            // Space for the pageranks
            double[] pageranks_current = new double[lda.NUM_DOCS];
            double[] pageranks_next    = new double[lda.NUM_DOCS];

            // Initialise
            for (int doc = 0; doc < lda.NUM_DOCS; ++doc)
            {
                pageranks_current[doc] = biases[doc];
            }

            // Iterate
            int NUM_ITERATIONS = 20;

            for (int iteration = 0; iteration < NUM_ITERATIONS; ++iteration)
            {
                Logging.Info("Performing ThemedPageRank iteration {0}", iteration);

                // Spread out the activation pageranks
                for (int doc = 0; doc < lda.NUM_DOCS; ++doc)
                {
                    foreach (int doc_inbound in references_outbound[doc])
                    {
                        pageranks_next[doc_inbound] += biases[doc] / references_outbound[doc].Count;
                    }
                }

                // Mix the spread out pageranks with the initial bias pageranks
                double ALPHA = 0.5;
                for (int doc = 0; doc < lda.NUM_DOCS; ++doc)
                {
                    pageranks_next[doc] = (1 - ALPHA) * pageranks_next[doc] + ALPHA * biases[doc];
                }

                // Normalise the next pageranks
                double total = 0;
                for (int doc = 0; doc < lda.NUM_DOCS; ++doc)
                {
                    total += pageranks_next[doc];
                }
                if (0 < total)
                {
                    for (int doc = 0; doc < lda.NUM_DOCS; ++doc)
                    {
                        pageranks_next[doc] /= total;
                    }
                }

                // Switch in the next pageranks because we will overwrite them
                double[] pageranks_temp = pageranks_current;
                pageranks_current = pageranks_next;
                pageranks_next    = pageranks_temp;
            }

            // Sort the pageranks, descending
            int[] docs = new int[lda.NUM_DOCS];
            for (int doc = 0; doc < lda.NUM_DOCS; ++doc)
            {
                docs[doc] = doc;
            }
            Array.Sort(pageranks_current, docs);
            Array.Reverse(pageranks_current);
            Array.Reverse(docs);

            WPFDoEvents.InvokeInUIThread(() =>
            {
                // Make the nodes
                for (int doc = 0; doc < 10 && doc < docs.Length; ++doc)
                {
                    int doc_id         = docs[doc];
                    string fingerprint = eds.docs[doc_id];

                    PDFDocument pdf_document = web_library_detail.Xlibrary.GetDocumentByFingerprint(fingerprint);
                    if (null == pdf_document)
                    {
                        Logging.Warn("Couldn't find similar document with fingerprint {0}", fingerprint);
                    }
                    else
                    {
                        PDFDocumentNodeContent content = new PDFDocumentNodeContent(pdf_document.Fingerprint, pdf_document.LibraryRef.Id);
                        NodeControlAddingByKeyboard.AddChildToNodeControl(node_control_, content, false);
                    }
                }
            });
        }
Beispiel #14
0
        // ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

        private void ApplyTagsDistribution(DistributionUseDelegate distribution_use)
        {
            WPFDoEvents.AssertThisCodeIsRunningInTheUIThread();

            // Get the distribution for the themes
            string tags = theme_node_content.Underlying.Tags;

            string[] tags_array = tags.Split('\n');

            string library_fingerprint = theme_node_content.Underlying.library_fingerprint;

            SafeThreadPool.QueueUserWorkItem(o =>
            {
                WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread();

                WebLibraryDetail web_library_detail = WebLibraryManager.Instance.GetLibrary(library_fingerprint);
                if (null == web_library_detail)
                {
                    Logging.Warn("Unable to locate library " + library_fingerprint);
                    return;
                }

                ExpeditionDataSource eds = web_library_detail.Xlibrary?.ExpeditionManager?.ExpeditionDataSource;
                if (null != eds)
                {
                    LDAAnalysis lda = eds.LDAAnalysis;

                    float[] tags_distribution   = new float[lda.NUM_TOPICS];
                    int tags_distribution_denom = 0;
                    foreach (string tag in tags_array)
                    {
                        if (eds.words_index.ContainsKey(tag))
                        {
                            ++tags_distribution_denom;

                            int tag_id = eds.words_index[tag];
                            for (int topic_i = 0; topic_i < lda.NUM_TOPICS; ++topic_i)
                            {
                                tags_distribution[topic_i] += lda.PseudoDensityOfTopicsInWords[tag_id, topic_i];
                            }
                        }
                        else
                        {
                            Logging.Warn("Ignoring tag {0} which we don't recognise.", tag);
                        }
                    }

                    if (0 < tags_distribution_denom)
                    {
                        // Normalise the tags distribution
                        for (int topic_i = 0; topic_i < lda.NUM_TOPICS; ++topic_i)
                        {
                            tags_distribution[topic_i] /= tags_distribution_denom;
                        }
                    }

                    distribution_use(node_control, web_library_detail, eds, tags_distribution);
                }
                else
                {
                    Logging.Warn("Expedition has not been run for library '{0}'.", web_library_detail.Title);
                }
            });
        }