private void ExpandSimilars() { FeatureTrackingManager.Instance.UseFeature(Features.Brainstorm_ExploreLibrary_Document_Similars); if (null != pdf_document_node_content.PDFDocument.Library.ExpeditionManager) { ExpeditionDataSource eds = pdf_document_node_content.PDFDocument.Library.ExpeditionManager.ExpeditionDataSource; if (null != eds) { if (eds.docs_index.ContainsKey(pdf_document_node_content.PDFDocument.Fingerprint)) { int doc_id = eds.docs_index[pdf_document_node_content.PDFDocument.Fingerprint]; float[,] density_of_topics_in_docs = eds.LDAAnalysis.DensityOfTopicsInDocuments; float[] distribution = new float[eds.LDAAnalysis.NUM_TOPICS]; for (int topic_i = 0; topic_i < eds.LDAAnalysis.NUM_TOPICS; ++topic_i) { distribution[topic_i] = density_of_topics_in_docs[doc_id, topic_i]; } ThemeNodeContentControl.AddDocumentsSimilarToDistribution(node_control, pdf_document_node_content.PDFDocument.Library, eds, distribution); } } } }
private void ExpandThemes() { FeatureTrackingManager.Instance.UseFeature(Features.Brainstorm_ExploreLibrary_Document_Themes); bool added_at_least_one_theme = false; if (null != pdf_document_node_content.PDFDocument.Library.ExpeditionManager) { ExpeditionDataSource eds = pdf_document_node_content.PDFDocument.Library.ExpeditionManager.ExpeditionDataSource; if (null != eds) { if (eds.docs_index.ContainsKey(pdf_document_node_content.PDFDocument.Fingerprint)) { int doc_id = eds.docs_index[pdf_document_node_content.PDFDocument.Fingerprint]; TopicProbability[] topics = eds.LDAAnalysis.DensityOfTopicsInDocsSorted[doc_id]; for (int t = 0; t < topics.Length && t < 5; ++t) { string topic_name = eds.GetDescriptionForTopic(topics[t].topic, false, "\n"); ThemeNodeContent tnc = new ThemeNodeContent(topic_name, pdf_document_node_content.PDFDocument.Library.WebLibraryDetail.Id); NodeControlAddingByKeyboard.AddChildToNodeControl(node_control, tnc, false); added_at_least_one_theme = true; } } } } if (!added_at_least_one_theme) { MessageBoxes.Warn("There were no themes available for this document. Please run Expedition against your library."); } }
// ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ private void ApplyTagsDistribution(DistributionUseDelegate distribution_use) { // Get the distribution for the themes string tags = theme_node_content.Underlying.Tags; string[] tags_array = tags.Split('\n'); string library_fingerprint = theme_node_content.Underlying.library_fingerprint; Library library = WebLibraryManager.Instance.GetLibrary(library_fingerprint); if (null == library) { Logging.Warn("Unable to locate library " + library_fingerprint); return; } if (null == library.ExpeditionManager || null == library.ExpeditionManager.ExpeditionDataSource) { Logging.Warn("Expedition has not been run for library '{0}'.", library.WebLibraryDetail.Title); return; } ExpeditionDataSource eds = library.ExpeditionManager.ExpeditionDataSource; float[] tags_distribution = new float[eds.LDAAnalysis.NUM_TOPICS]; int tags_distribution_denom = 0; foreach (string tag in tags_array) { if (eds.words_index.ContainsKey(tag)) { ++tags_distribution_denom; int tag_id = eds.words_index[tag]; for (int topic_i = 0; topic_i < eds.LDAAnalysis.NUM_TOPICS; ++topic_i) { tags_distribution[topic_i] += eds.LDAAnalysis.PseudoDensityOfTopicsInWords[tag_id, topic_i]; } } else { Logging.Warn("Ignoring tag {0} which we don't recognise.", tag); } } if (0 < tags_distribution_denom) { // Normalise the tags distribution for (int topic_i = 0; topic_i < eds.LDAAnalysis.NUM_TOPICS; ++topic_i) { tags_distribution[topic_i] /= tags_distribution_denom; } } distribution_use(node_control, library, eds, tags_distribution); }
internal void ExploreTopicInBrainstorm(Library library, int topic) { ExpeditionDataSource eds = library.ExpeditionManager.ExpeditionDataSource; string topic_name = eds.GetDescriptionForTopic(topic, false, "\n"); BrainstormControl brainstorm_control = Instance.OpenNewBrainstorm(); ThemeNodeContent tnc = new ThemeNodeContent(topic_name, library.WebLibraryDetail.Id); NodeControl node_control = brainstorm_control.SceneRenderingControl.AddNewNodeControlInScreenCentre(tnc); brainstorm_control.AutoArrange = true; // Then expand the interesting documents - old style //ThemeNodeContentControl node_content_control = node_control.NodeContentControl as ThemeNodeContentControl; //node_content_control.ExpandSpecificDocuments(); //node_content_control.ExpandInfluentialDocuments(); // Then expand the interesting documents { // Thmeme docs brainstorm_control.SceneRenderingControl.SelectAll(); brainstorm_control.SceneRenderingControl.RaiseEvent(new KeyEventArgs(Keyboard.PrimaryDevice, PresentationSource.FromVisual(brainstorm_control.SceneRenderingControl), 0, Key.D) { RoutedEvent = Keyboard.KeyDownEvent }); brainstorm_control.SceneRenderingControl.RaiseEvent(new KeyEventArgs(Keyboard.PrimaryDevice, PresentationSource.FromVisual(brainstorm_control.SceneRenderingControl), 0, Key.S) { RoutedEvent = Keyboard.KeyDownEvent }); // Authors brainstorm_control.SceneRenderingControl.SelectAll(); brainstorm_control.SceneRenderingControl.RaiseEvent(new KeyEventArgs(Keyboard.PrimaryDevice, PresentationSource.FromVisual(brainstorm_control.SceneRenderingControl), 0, Key.A) { RoutedEvent = Keyboard.KeyDownEvent }); // Their docs brainstorm_control.SceneRenderingControl.SelectAll(); brainstorm_control.SceneRenderingControl.RaiseEvent(new KeyEventArgs(Keyboard.PrimaryDevice, PresentationSource.FromVisual(brainstorm_control.SceneRenderingControl), 0, Key.D) { RoutedEvent = Keyboard.KeyDownEvent }); } brainstorm_control.SceneRenderingControl.SetSelectedNodeControl(node_control, false); }
private static void ExpandThemes(PDFDocument doc, NodeControl node_control) { WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread(); ASSERT.Test(doc != null); FeatureTrackingManager.Instance.UseFeature(Features.Brainstorm_ExploreLibrary_Document_Themes); if (doc != null) { ASSERT.Test(doc.LibraryRef.Xlibrary != null); bool added_at_least_one_theme = false; ExpeditionDataSource eds = doc.LibraryRef.Xlibrary?.ExpeditionManager?.ExpeditionDataSource; if (null != eds) { if (eds.docs_index.ContainsKey(doc.Fingerprint)) { int doc_id = eds.docs_index[doc.Fingerprint]; TopicProbability[] topics = eds.LDAAnalysis.DensityOfTopicsInDocsSorted[doc_id]; WPFDoEvents.InvokeInUIThread(() => { for (int t = 0; t < Math.Min(topics.Length, 5); ++t) { string topic_name = eds.GetDescriptionForTopic(topics[t].topic, include_topic_number: false, "\n"); ThemeNodeContent tnc = new ThemeNodeContent(topic_name, doc.LibraryRef.Id); NodeControlAddingByKeyboard.AddChildToNodeControl(node_control, tnc, false); added_at_least_one_theme = true; } }); } else { Logging.Warn("Expedition has not been run for library '{0}'.", doc.LibraryRef.Title); } } if (!added_at_least_one_theme) { MessageBoxes.Warn("There were no themes available for this document. Please run Expedition against your library."); } } }
internal void ExploreLibraryInBrainstorm(Library library) { BrainstormControl brainstorm_control = Instance.OpenNewBrainstorm(); int WIDTH = 320; int HEIGHT = 240; LibraryNodeContent content_library = new LibraryNodeContent(library.WebLibraryDetail.Title, library.WebLibraryDetail.Id); NodeControl node_library = brainstorm_control.SceneRenderingControl.AddNewNodeControl(content_library, 0, 0, WIDTH, HEIGHT); ExpeditionDataSource eds = library.ExpeditionManager.ExpeditionDataSource; if (null != eds) { for (int topic = 0; topic < eds.lda_sampler.NumTopics; ++topic) { string topic_name = eds.GetDescriptionForTopic(topic, false, "\n"); ThemeNodeContent tnc = new ThemeNodeContent(topic_name, library.WebLibraryDetail.Id); NodeControlAddingByKeyboard.AddChildToNodeControl(node_library, tnc); } } else { { StringNodeContent content_warning = new StringNodeContent("Please run Expedition on your library."); NodeControl node_warning = brainstorm_control.SceneRenderingControl.AddNewNodeControl(content_warning, 0, -2 * HEIGHT); brainstorm_control.SceneRenderingControl.AddNewConnectorControl(node_library, node_warning); } { StringNodeContent content_warning = new StringNodeContent("Then you will get to\nexplore its themes."); NodeControl node_warning = brainstorm_control.SceneRenderingControl.AddNewNodeControl(content_warning, -WIDTH, +2 * HEIGHT); brainstorm_control.SceneRenderingControl.AddNewConnectorControl(node_library, node_warning); } { StringNodeContent content_warning = new StringNodeContent("And you will get to\nexplore its documents."); NodeControl node_warning = brainstorm_control.SceneRenderingControl.AddNewNodeControl(content_warning, +WIDTH, +2 * HEIGHT); brainstorm_control.SceneRenderingControl.AddNewConnectorControl(node_library, node_warning); } } brainstorm_control.AutoArrange = true; }
public static MultiMapSet <string, string> GetNodeItems_STATIC(Library library, HashSet <string> parent_fingerprints) { MultiMapSet <string, string> results = new MultiMapSet <string, string>(); try { // Check that expedition has been run... if (null == library.ExpeditionManager || null == library.ExpeditionManager.ExpeditionDataSource) { return(results); } ExpeditionDataSource eds = library.ExpeditionManager.ExpeditionDataSource; for (int t = 0; t < eds.LDAAnalysis.NUM_TOPICS; ++t) { string topic_name = eds.GetDescriptionForTopic(t, false, "; "); // Show the top % of docs int num_docs = eds.LDAAnalysis.NUM_DOCS / 10; num_docs = Math.Max(num_docs, 3); for (int d = 0; d < eds.LDAAnalysis.NUM_DOCS && d < num_docs; ++d) { PDFDocument pdf_document = library.GetDocumentByFingerprint(eds.docs[eds.LDAAnalysis.DensityOfDocsInTopicsSorted[t][d].doc]); if (null == parent_fingerprints || parent_fingerprints.Contains(pdf_document.Fingerprint)) { results.Add(topic_name, pdf_document.Fingerprint); } } } } catch (Exception ex) { Logging.Error(ex, "There was a problem while loading the themes for the library explorer."); } return(results); }
private static void ExpandSimilars(PDFDocument doc, NodeControl node_control) { WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread(); ASSERT.Test(doc != null); FeatureTrackingManager.Instance.UseFeature(Features.Brainstorm_ExploreLibrary_Document_Similars); if (doc != null) { ASSERT.Test(doc.LibraryRef.Xlibrary != null); ExpeditionDataSource eds = doc.LibraryRef.Xlibrary?.ExpeditionManager?.ExpeditionDataSource; if (null != eds) { if (eds.docs_index.ContainsKey(doc.Fingerprint)) { int doc_id = eds.docs_index[doc.Fingerprint]; LDAAnalysis lda = eds.LDAAnalysis; float[,] density_of_topics_in_docs = lda.DensityOfTopicsInDocuments; float[] distribution = new float[lda.NUM_TOPICS]; for (int topic_i = 0; topic_i < lda.NUM_TOPICS; ++topic_i) { distribution[topic_i] = density_of_topics_in_docs[doc_id, topic_i]; } ThemeNodeContentControl.AddDocumentsSimilarToDistribution(node_control, doc.LibraryRef, eds, distribution); } } else { Logging.Warn("Expedition has not been run for library '{0}'.", doc.LibraryRef.Title); } } }
internal static void AddDocumentsInfluentialInDistribution(NodeControl node_control_, Library library, ExpeditionDataSource eds, float[] tags_distribution) { Logging.Info("+Performing ThemedPageRank on {0} documents", eds.LDAAnalysis.NUM_DOCS); // We have the distribution of the topic in tags_distribution // Create an array for the document biases // Fill the array using the dot product of the document distribution dotted with the topic distribution - then normalise double[] biases = new double[eds.LDAAnalysis.NUM_DOCS]; for (int doc = 0; doc < eds.LDAAnalysis.NUM_DOCS; ++doc) { double bias_num_squared = 0; double bias_den_doc = 0; double bias_den_tags = 0; for (int topic = 0; topic < eds.LDAAnalysis.NUM_TOPICS; ++topic) { bias_num_squared += eds.LDAAnalysis.DensityOfTopicsInDocuments[doc, topic] * tags_distribution[topic]; bias_den_doc += eds.LDAAnalysis.DensityOfTopicsInDocuments[doc, topic] * eds.LDAAnalysis.DensityOfTopicsInDocuments[doc, topic]; bias_den_tags += tags_distribution[topic] * tags_distribution[topic]; } biases[doc] = bias_num_squared / (Math.Sqrt(bias_den_doc) * Math.Sqrt(bias_den_tags)); } // Then build up a matrix FROM each document - List <int>[] references_outbound = new List <int> [eds.LDAAnalysis.NUM_DOCS]; for (int doc = 0; doc < eds.LDAAnalysis.NUM_DOCS; ++doc) { references_outbound[doc] = new List <int>(); string fingerprint = eds.docs[doc]; PDFDocument pdf_document = library.GetDocumentByFingerprint(fingerprint); if (null == pdf_document) { Logging.Warn("ThemeExplorer::AddInInfluential: Cannot find document anymore for fingerprint {0}", fingerprint); } else { List <Citation> citations_outbound = pdf_document.PDFDocumentCitationManager.GetOutboundCitations(); foreach (Citation citation in citations_outbound) { string fingerprint_inbound = citation.fingerprint_inbound; if (eds.docs_index.ContainsKey(fingerprint_inbound)) { int doc_inbound = eds.docs_index[fingerprint_inbound]; references_outbound[doc].Add(doc_inbound); } } } } // Space for the pageranks double[] pageranks_current = new double[eds.LDAAnalysis.NUM_DOCS]; double[] pageranks_next = new double[eds.LDAAnalysis.NUM_DOCS]; // Initialise for (int doc = 0; doc < eds.LDAAnalysis.NUM_DOCS; ++doc) { pageranks_current[doc] = biases[doc]; } // Iterate int NUM_ITERATIONS = 20; for (int iteration = 0; iteration < NUM_ITERATIONS; ++iteration) { Logging.Info("Performing ThemedPageRank iteration {0}", iteration); // Spread out the activation pageranks for (int doc = 0; doc < eds.LDAAnalysis.NUM_DOCS; ++doc) { foreach (int doc_inbound in references_outbound[doc]) { pageranks_next[doc_inbound] += biases[doc] / references_outbound[doc].Count; } } // Mix the spread out pageranks with the initial bias pageranks double ALPHA = 0.5; for (int doc = 0; doc < eds.LDAAnalysis.NUM_DOCS; ++doc) { pageranks_next[doc] = (1 - ALPHA) * pageranks_next[doc] + ALPHA * biases[doc]; } // Normalise the next pageranks double total = 0; for (int doc = 0; doc < eds.LDAAnalysis.NUM_DOCS; ++doc) { total += pageranks_next[doc]; } if (0 < total) { for (int doc = 0; doc < eds.LDAAnalysis.NUM_DOCS; ++doc) { pageranks_next[doc] /= total; } } // Switch in the next pageranks because we will overwrite them double[] pageranks_temp = pageranks_current; pageranks_current = pageranks_next; pageranks_next = pageranks_temp; } // Sort the pageranks, descending int[] docs = new int[eds.LDAAnalysis.NUM_DOCS]; for (int doc = 0; doc < eds.LDAAnalysis.NUM_DOCS; ++doc) { docs[doc] = doc; } Array.Sort(pageranks_current, docs); Array.Reverse(pageranks_current); Array.Reverse(docs); // Make the nodes for (int doc = 0; doc < 10 && doc < docs.Length; ++doc) { int doc_id = docs[doc]; string fingerprint = eds.docs[doc_id]; PDFDocument pdf_document = library.GetDocumentByFingerprint(fingerprint); if (null == pdf_document) { Logging.Warn("Couldn't find similar document with fingerprint {0}", fingerprint); } else { PDFDocumentNodeContent content = new PDFDocumentNodeContent(pdf_document.Fingerprint, pdf_document.Library.WebLibraryDetail.Id); NodeControlAddingByKeyboard.AddChildToNodeControl(node_control_, content, false); } } }
internal void ColourNodeBackground(NodeControl node_control_, Library library, ExpeditionDataSource eds, float[] tags_distribution) { TextBorder.Opacity = 0.8; TextBorder.Background = ThemeBrushes.GetBrushForDistribution(library, tags_distribution.Length, tags_distribution); if (ThemeBrushes.UNKNOWN_BRUSH == TextBorder.Background) { TextBorder.Background = NodeThemes.background_brush; } }
internal static void AddDocumentsSimilarToDistribution(NodeControl node_control_, Library library, ExpeditionDataSource eds, float[] tags_distribution) { // Get the most similar PDFDocuments int[] doc_ids = LDAAnalysisTools.GetDocumentsSimilarToDistribution(eds.LDAAnalysis, tags_distribution); for (int i = 0; i < 10 && i < doc_ids.Length; ++i) { int doc_id = doc_ids[i]; string fingerprint = eds.docs[doc_id]; PDFDocument pdf_document = library.GetDocumentByFingerprint(fingerprint); if (null == pdf_document) { Logging.Warn("Couldn't find similar document with fingerprint {0}", fingerprint); } else { PDFDocumentNodeContent content = new PDFDocumentNodeContent(pdf_document.Fingerprint, pdf_document.Library.WebLibraryDetail.Id); NodeControlAddingByKeyboard.AddChildToNodeControl(node_control_, content, false); } } }
internal void ColourNodeBackground(NodeControl node_control_, WebLibraryDetail web_library_detail, ExpeditionDataSource eds, float[] tags_distribution) { WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread(); ASSERT.Test(eds != null); WPFDoEvents.InvokeInUIThread(() => { TextBorder.Opacity = 0.8; TextBorder.Background = ThemeBrushes.GetBrushForDistribution(web_library_detail, tags_distribution.Length, tags_distribution); if (ThemeBrushes.UNKNOWN_BRUSH == TextBorder.Background) { TextBorder.Background = NodeThemes.background_brush; } }); }
// ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ private void ApplyTagsDistribution(DistributionUseDelegate distribution_use) { WPFDoEvents.AssertThisCodeIsRunningInTheUIThread(); // Get the distribution for the themes string tags = theme_node_content.Underlying.Tags; string[] tags_array = tags.Split('\n'); string library_fingerprint = theme_node_content.Underlying.library_fingerprint; SafeThreadPool.QueueUserWorkItem(o => { WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread(); WebLibraryDetail web_library_detail = WebLibraryManager.Instance.GetLibrary(library_fingerprint); if (null == web_library_detail) { Logging.Warn("Unable to locate library " + library_fingerprint); return; } ExpeditionDataSource eds = web_library_detail.Xlibrary?.ExpeditionManager?.ExpeditionDataSource; if (null != eds) { LDAAnalysis lda = eds.LDAAnalysis; float[] tags_distribution = new float[lda.NUM_TOPICS]; int tags_distribution_denom = 0; foreach (string tag in tags_array) { if (eds.words_index.ContainsKey(tag)) { ++tags_distribution_denom; int tag_id = eds.words_index[tag]; for (int topic_i = 0; topic_i < lda.NUM_TOPICS; ++topic_i) { tags_distribution[topic_i] += lda.PseudoDensityOfTopicsInWords[tag_id, topic_i]; } } else { Logging.Warn("Ignoring tag {0} which we don't recognise.", tag); } } if (0 < tags_distribution_denom) { // Normalise the tags distribution for (int topic_i = 0; topic_i < lda.NUM_TOPICS; ++topic_i) { tags_distribution[topic_i] /= tags_distribution_denom; } } distribution_use(node_control, web_library_detail, eds, tags_distribution); } else { Logging.Warn("Expedition has not been run for library '{0}'.", web_library_detail.Title); } }); }
internal static void AddDocumentsSimilarToDistribution(NodeControl node_control_, WebLibraryDetail web_library_detail, ExpeditionDataSource eds, float[] tags_distribution) { WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread(); ASSERT.Test(eds != null); // Get the most similar PDFDocuments int[] doc_ids = LDAAnalysisTools.GetDocumentsSimilarToDistribution(eds.LDAAnalysis, tags_distribution); WPFDoEvents.InvokeInUIThread(() => { WPFDoEvents.AssertThisCodeIsRunningInTheUIThread(); for (int i = 0; i < 10 && i < doc_ids.Length; ++i) { int doc_id = doc_ids[i]; string fingerprint = eds.docs[doc_id]; PDFDocument pdf_document = web_library_detail.Xlibrary.GetDocumentByFingerprint(fingerprint); if (null == pdf_document) { Logging.Warn("Couldn't find similar document with fingerprint {0}", fingerprint); } else { PDFDocumentNodeContent content = new PDFDocumentNodeContent(pdf_document.Fingerprint, pdf_document.LibraryRef.Id); NodeControlAddingByKeyboard.AddChildToNodeControl(node_control_, content, false); } } }); }