CreateCountersForWordsAndWordPairs ( String searchTerm, CalculateGraphMetricsContext calculateGraphMetricsContext, out WordCounter wordCounter, out WordPairCounter wordPairCounter ) { Debug.Assert(calculateGraphMetricsContext != null); // When counting word pairs, skip words in the user-supplied list. String [] asWordsForWordCounterToSkip = StringUtil.SplitOnSpaces( calculateGraphMetricsContext.GraphMetricUserSettings .WordMetricUserSettings.WordsToSkip); wordPairCounter = new WordPairCounter(asWordsForWordCounterToSkip); // When counting words, skip words in the user-supplied list, AND the // search term, AND "rt" ("reply to"). List <String> oWordsForWordPairCounterToSkip = new List <String>( asWordsForWordCounterToSkip); if (!String.IsNullOrEmpty(searchTerm)) { oWordsForWordPairCounterToSkip.AddRange( StringUtil.SplitOnSpaces(searchTerm)); } oWordsForWordPairCounterToSkip.Add("rt"); wordCounter = new WordCounter( oWordsForWordPairCounterToSkip.ToArray()); }
AddGraphMetricValuesForTopWordsAndWordPairs ( IEnumerable <IEdge> edges, String statusEdgeColumnName, Int32 maximumTopTerms, WordCounter wordCounter, WordPairCounter wordPairCounter, Int32 vertexRowID, List <GraphMetricValueWithID> topWordsInTweetByCountGraphMetricValues, List <GraphMetricValueWithID> topWordsInTweetBySalienceGraphMetricValues, List <GraphMetricValueWithID> topWordPairsInTweetByCountGraphMetricValues, List <GraphMetricValueWithID> topWordPairsInTweetBySalienceGraphMetricValues ) { Debug.Assert(edges != null); Debug.Assert(!String.IsNullOrEmpty(statusEdgeColumnName)); Debug.Assert(maximumTopTerms > 0); Debug.Assert(wordCounter != null); Debug.Assert(wordPairCounter != null); Debug.Assert(topWordsInTweetByCountGraphMetricValues != null); Debug.Assert(topWordsInTweetBySalienceGraphMetricValues != null); Debug.Assert(topWordPairsInTweetByCountGraphMetricValues != null); Debug.Assert(topWordPairsInTweetBySalienceGraphMetricValues != null); String sTopWordsInTweetByCount, sTopWordsInTweetBySalience, sTopWordPairsInTweetByCount, sTopWordPairsInTweetBySalience; ConcatenateTopWordsAndWordPairs(edges, statusEdgeColumnName, maximumTopTerms, wordCounter, wordPairCounter, out sTopWordsInTweetByCount, out sTopWordsInTweetBySalience, out sTopWordPairsInTweetByCount, out sTopWordPairsInTweetBySalience); topWordsInTweetByCountGraphMetricValues.Add( new GraphMetricValueWithID(vertexRowID, ExcelTextUtil.ForceCellText(sTopWordsInTweetByCount))); topWordsInTweetBySalienceGraphMetricValues.Add( new GraphMetricValueWithID(vertexRowID, ExcelTextUtil.ForceCellText(sTopWordsInTweetBySalience))); topWordPairsInTweetByCountGraphMetricValues.Add( new GraphMetricValueWithID(vertexRowID, ExcelTextUtil.ForceCellText(sTopWordPairsInTweetByCount))); topWordPairsInTweetBySalienceGraphMetricValues.Add( new GraphMetricValueWithID(vertexRowID, ExcelTextUtil.ForceCellText(sTopWordPairsInTweetBySalience))); }
TestCountTermsInDocument11() { // Test Jana Diesner's example for mutual information, which doesn't // skip words. WordPairCounter oWordPairCounter = new WordPairCounter( new String[] {} ); oWordPairCounter.CountTermsInDocument("Tim and Ben play soccer."); oWordPairCounter.CountTermsInDocument("Tim and Sue play soccer."); oWordPairCounter.CountTermsInDocument("Yes we can."); oWordPairCounter.CountTermsInDocument("epic fail"); oWordPairCounter.CountTermsInDocument("Tim is available now."); oWordPairCounter.CalculateMutualInformationOfCountedTerms(); Assert.AreEqual(5, oWordPairCounter.TotalDocuments); Assert.AreEqual(19, oWordPairCounter.TotalWordsInDocuments); IEnumerable<CountedWordPair> oCountedWordPairs = oWordPairCounter.CountedTerms; Assert.AreEqual( 12, oCountedWordPairs.Count() ); CountedWordPair oTheCountedWordPair; oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "tim" && oCountedWordPair.Word2 == "and" && oCountedWordPair.Count == 2 && oCountedWordPair.DocumentsInWhichTermWasCounted == 2 ) ); Assert.AreEqual(0.669, oTheCountedWordPair.MutualInformation, 0.001); oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "and" && oCountedWordPair.Word2 == "ben" && oCountedWordPair.Count == 1 && oCountedWordPair.DocumentsInWhichTermWasCounted == 1 ) ); Assert.AreEqual(0.845, oTheCountedWordPair.MutualInformation, 0.001); oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "ben" && oCountedWordPair.Word2 == "play" && oCountedWordPair.Count == 1 && oCountedWordPair.DocumentsInWhichTermWasCounted == 1 ) ); Assert.AreEqual(0.845, oTheCountedWordPair.MutualInformation, 0.001); oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "and" && oCountedWordPair.Word2 == "sue" && oCountedWordPair.Count == 1 && oCountedWordPair.DocumentsInWhichTermWasCounted == 1 ) ); Assert.AreEqual(0.845, oTheCountedWordPair.MutualInformation, 0.001); oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "sue" && oCountedWordPair.Word2 == "play" && oCountedWordPair.Count == 1 && oCountedWordPair.DocumentsInWhichTermWasCounted == 1 ) ); Assert.AreEqual(0.845, oTheCountedWordPair.MutualInformation, 0.001); oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "play" && oCountedWordPair.Word2 == "soccer" && oCountedWordPair.Count == 2 && oCountedWordPair.DocumentsInWhichTermWasCounted == 2 ) ); Assert.AreEqual(0.845, oTheCountedWordPair.MutualInformation, 0.001); oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "yes" && oCountedWordPair.Word2 == "we" && oCountedWordPair.Count == 1 && oCountedWordPair.DocumentsInWhichTermWasCounted == 1 ) ); Assert.AreEqual(1.146, oTheCountedWordPair.MutualInformation, 0.001); oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "we" && oCountedWordPair.Word2 == "can" && oCountedWordPair.Count == 1 && oCountedWordPair.DocumentsInWhichTermWasCounted == 1 ) ); Assert.AreEqual(1.146, oTheCountedWordPair.MutualInformation, 0.001); oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "epic" && oCountedWordPair.Word2 == "fail" && oCountedWordPair.Count == 1 && oCountedWordPair.DocumentsInWhichTermWasCounted == 1 ) ); Assert.AreEqual(1.146, oTheCountedWordPair.MutualInformation, 0.001); oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "tim" && oCountedWordPair.Word2 == "is" && oCountedWordPair.Count == 1 && oCountedWordPair.DocumentsInWhichTermWasCounted == 1 ) ); Assert.AreEqual(0.669, oTheCountedWordPair.MutualInformation, 0.001); oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "is" && oCountedWordPair.Word2 == "available" && oCountedWordPair.Count == 1 && oCountedWordPair.DocumentsInWhichTermWasCounted == 1 ) ); Assert.AreEqual(1.146, oTheCountedWordPair.MutualInformation, 0.001); oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "available" && oCountedWordPair.Word2 == "now" && oCountedWordPair.Count == 1 && oCountedWordPair.DocumentsInWhichTermWasCounted == 1 ) ); Assert.AreEqual(1.146, oTheCountedWordPair.MutualInformation, 0.001); }
SetUp() { m_oWordPairCounter = new WordPairCounter(WordsToSkip); }
CountTermsInEdgeOrVertex ( IMetadataProvider oEdgeOrVertex, String sTextColumnName, WordCounter oWordCounter, WordPairCounter oWordPairCounter ) { Debug.Assert(oEdgeOrVertex != null); Debug.Assert( !String.IsNullOrEmpty(sTextColumnName) ); Debug.Assert(oWordCounter != null); Debug.Assert(oWordPairCounter != null); AssertValid(); Object oTextAsObject; if ( oEdgeOrVertex.TryGetValue(sTextColumnName, typeof(String), out oTextAsObject ) ) { String sText = (String)oTextAsObject; if ( !String.IsNullOrEmpty(sText) ) { oWordCounter.CountTermsInDocument(sText); oWordPairCounter.CountTermsInDocument(sText); } } }
TryCountTermsNoGroups ( IGraph oGraph, WordMetricUserSettings oWordMetricUserSettings, WordCounter oWordCounter, WordPairCounter oWordPairCounter, HashSet<String> oUniqueImportedIDs, out GraphMetricColumn [] oGraphMetricColumns ) { Debug.Assert(oGraph != null); Debug.Assert(oWordMetricUserSettings != null); Debug.Assert(oWordCounter != null); Debug.Assert(oWordPairCounter != null); AssertValid(); Boolean bTextColumnIsOnEdgeWorksheet = oWordMetricUserSettings.TextColumnIsOnEdgeWorksheet; System.Collections.IEnumerable oEdgesOrVertices = bTextColumnIsOnEdgeWorksheet ? (System.Collections.IEnumerable)oGraph.Edges : (System.Collections.IEnumerable)oGraph.Vertices; // Count the terms in each of the column's cells. foreach ( IMetadataProvider oEdgeOrVertex in EnumerateEdgesOrVertices( oEdgesOrVertices, bTextColumnIsOnEdgeWorksheet, oGraph, oUniqueImportedIDs) ) { CountTermsInEdgeOrVertex(oEdgeOrVertex, oWordMetricUserSettings.TextColumnName, oWordCounter, oWordPairCounter); } oWordCounter.CalculateSalienceOfCountedTerms(); oWordPairCounter.CalculateSalienceOfCountedTerms(); oWordPairCounter.CalculateMutualInformationOfCountedTerms(); // Transfer the words and word pairs to graph metric value lists. List<GraphMetricValueOrdered> oWordWordValues, oWordCountValues, oWordSalienceValues; List<GraphMetricValueOrdered> oWordPairWord1Values, oWordPairWord2Values, oWordPairCountValues, oWordPairSalienceValues, oWordPairMutualInformationValues; CreateGraphMetricValueLists( out oWordWordValues, out oWordCountValues, out oWordSalienceValues, out oWordPairWord1Values, out oWordPairWord2Values, out oWordPairCountValues, out oWordPairSalienceValues, out oWordPairMutualInformationValues ); foreach (CountedWord oCountedWord in oWordCounter.CountedTerms) { AddCountedWordToValueLists(oCountedWord, oWordMetricUserSettings, oWordWordValues, oWordCountValues, oWordSalienceValues); } foreach (CountedWordPair oCountedWordPair in oWordPairCounter.CountedTerms) { AddCountedWordPairToValueLists(oCountedWordPair, oWordMetricUserSettings, oWordPairWord1Values, oWordPairWord2Values, oWordPairCountValues, oWordPairSalienceValues, oWordPairMutualInformationValues); } oGraphMetricColumns = CreateGraphMetricColumns( oWordWordValues, oWordCountValues, oWordSalienceValues, null, oWordPairWord1Values, oWordPairWord2Values, oWordPairCountValues, oWordPairSalienceValues, oWordPairMutualInformationValues, null ); return (true); }
TryCountVertexTermsByGroup ( IGraph oGraph, WordMetricUserSettings oWordMetricUserSettings, WordCounter oWordCounter, WordPairCounter oWordPairCounter, HashSet<String> oUniqueImportedIDs, out GraphMetricColumn [] oGraphMetricColumns ) { Debug.Assert(oGraph != null); Debug.Assert(oWordMetricUserSettings != null); Debug.Assert(oWordCounter != null); Debug.Assert(oWordPairCounter != null); AssertValid(); List<GraphMetricValueOrdered> oWordWordValues, oWordCountValues, oWordSalienceValues; List<GraphMetricValueOrdered> oWordPairWord1Values, oWordPairWord2Values, oWordPairCountValues, oWordPairSalienceValues, oWordPairMutualInformationValues; CreateGraphMetricValueLists( out oWordWordValues, out oWordCountValues, out oWordSalienceValues, out oWordPairWord1Values, out oWordPairWord2Values, out oWordPairCountValues, out oWordPairSalienceValues, out oWordPairMutualInformationValues ); List<GraphMetricValueOrdered> oWordGroupNameValues = new List<GraphMetricValueOrdered>(); List<GraphMetricValueOrdered> oWordPairGroupNameValues = new List<GraphMetricValueOrdered>(); // Get a list of the graph's groups, adding a dummy group for the // entire graph and another to contain any non-grouped vertices. foreach ( GroupInfo oGroup in EnumerateGroupsForCountingVertexTerms(oGraph) ) { // Count the terms in this group. oWordCounter.Clear(); oWordPairCounter.Clear(); foreach ( IVertex oVertex in EnumerateEdgesOrVertices( oGroup.Vertices, false, oGraph, oUniqueImportedIDs) ) { CountTermsInEdgeOrVertex(oVertex, oWordMetricUserSettings.TextColumnName, oWordCounter, oWordPairCounter); } oWordCounter.CalculateSalienceOfCountedTerms(); oWordPairCounter.CalculateSalienceOfCountedTerms(); oWordPairCounter.CalculateMutualInformationOfCountedTerms(); // Transfer the words and word pairs to the graph metric value // lists. AddCountedWordsToValueLists(oWordCounter.CountedTerms, oWordMetricUserSettings, oGroup.Name, oWordWordValues, oWordCountValues, oWordSalienceValues, oWordGroupNameValues); AddCountedWordPairsToValueLists(oWordPairCounter.CountedTerms, oWordMetricUserSettings, oGroup.Name, oWordPairWord1Values, oWordPairWord2Values, oWordPairCountValues, oWordPairSalienceValues, oWordPairMutualInformationValues, oWordPairGroupNameValues); } oGraphMetricColumns = CreateGraphMetricColumns( oWordWordValues, oWordCountValues, oWordSalienceValues, oWordGroupNameValues, oWordPairWord1Values, oWordPairWord2Values, oWordPairCountValues, oWordPairSalienceValues, oWordPairMutualInformationValues, oWordPairGroupNameValues ); return (true); }
TryCountEdgeTermsByGroup ( IGraph oGraph, WordMetricUserSettings oWordMetricUserSettings, WordCounter oWordCounter, WordPairCounter oWordPairCounter, HashSet<String> oUniqueImportedIDs, out GraphMetricColumn [] oGraphMetricColumns ) { Debug.Assert(oGraph != null); Debug.Assert(oWordMetricUserSettings != null); Debug.Assert(oWordCounter != null); Debug.Assert(oWordPairCounter != null); AssertValid(); List<GraphMetricValueOrdered> oWordWordValues, oWordCountValues, oWordSalienceValues; List<GraphMetricValueOrdered> oWordPairWord1Values, oWordPairWord2Values, oWordPairCountValues, oWordPairSalienceValues, oWordPairMutualInformationValues; CreateGraphMetricValueLists( out oWordWordValues, out oWordCountValues, out oWordSalienceValues, out oWordPairWord1Values, out oWordPairWord2Values, out oWordPairCountValues, out oWordPairSalienceValues, out oWordPairMutualInformationValues ); List<GraphMetricValueOrdered> oWordGroupNameValues = new List<GraphMetricValueOrdered>(); List<GraphMetricValueOrdered> oWordPairGroupNameValues = new List<GraphMetricValueOrdered>(); // Get the edges in each of the graph's groups. Include a "dummy" // group that contains the edges that aren't contained in any real // groups. foreach ( GroupEdgeInfo oGroupEdgeInfo in GroupEdgeSorter.SortGroupEdges(oGraph, Int32.MaxValue, true, true) ) { // Count the terms in this group. oWordCounter.Clear(); oWordPairCounter.Clear(); foreach ( IEdge oEdge in EnumerateEdgesOrVertices( oGroupEdgeInfo.Edges, true, oGraph, oUniqueImportedIDs) ) { CountTermsInEdgeOrVertex(oEdge, oWordMetricUserSettings.TextColumnName, oWordCounter, oWordPairCounter); } oWordCounter.CalculateSalienceOfCountedTerms(); oWordPairCounter.CalculateSalienceOfCountedTerms(); oWordPairCounter.CalculateMutualInformationOfCountedTerms(); // Transfer the words and word pairs to the graph metric value // lists. String sGroupName = oGroupEdgeInfo.GroupName; AddCountedWordsToValueLists( oWordCounter.CountedTerms, oWordMetricUserSettings, sGroupName, oWordWordValues, oWordCountValues, oWordSalienceValues, oWordGroupNameValues); AddCountedWordPairsToValueLists( oWordPairCounter.CountedTerms, oWordMetricUserSettings, sGroupName, oWordPairWord1Values, oWordPairWord2Values, oWordPairCountValues, oWordPairSalienceValues, oWordPairMutualInformationValues, oWordPairGroupNameValues); if ( sGroupName == GroupEdgeSorter.DummyGroupNameForEntireGraph && oUniqueImportedIDs != null ) { // This is the dummy group that stores all the edges in the // graph. Note that SortGroupEdges() guarantees that this is // the first group, so the imported IDs need to be cleared only // once within this loop. oUniqueImportedIDs.Clear(); } } oGraphMetricColumns = CreateGraphMetricColumns( oWordWordValues, oWordCountValues, oWordSalienceValues, oWordGroupNameValues, oWordPairWord1Values, oWordPairWord2Values, oWordPairCountValues, oWordPairSalienceValues, oWordPairMutualInformationValues, oWordPairGroupNameValues ); return (true); }
TryCalculateGraphMetrics ( IGraph graph, CalculateGraphMetricsContext calculateGraphMetricsContext, out GraphMetricColumn [] graphMetricColumns ) { Debug.Assert(graph != null); Debug.Assert(calculateGraphMetricsContext != null); AssertValid(); graphMetricColumns = new GraphMetricColumn[0]; WordMetricUserSettings oWordMetricUserSettings = calculateGraphMetricsContext.GraphMetricUserSettings .WordMetricUserSettings; if ( !calculateGraphMetricsContext.ShouldCalculateGraphMetrics( GraphMetrics.Words) || String.IsNullOrEmpty(oWordMetricUserSettings.TextColumnName) ) { return (true); } String [] asWordsToSkip = StringUtil.SplitOnCommonDelimiters( oWordMetricUserSettings.WordsToSkip); WordCounter oWordCounter = new WordCounter(asWordsToSkip); WordPairCounter oWordPairCounter = new WordPairCounter(asWordsToSkip); // The edges or vertices may have unique imported IDs. If so, this // becomes a collection of the IDs. HashSet<String> oUniqueImportedIDs = EdgesOrVerticesHaveImportedIDs(graph, oWordMetricUserSettings.TextColumnIsOnEdgeWorksheet) ? new HashSet<String>() : null; if (oWordMetricUserSettings.CountByGroup) { if (oWordMetricUserSettings.TextColumnIsOnEdgeWorksheet) { return ( TryCountEdgeTermsByGroup(graph, oWordMetricUserSettings, oWordCounter, oWordPairCounter, oUniqueImportedIDs, out graphMetricColumns) ); } else { return ( TryCountVertexTermsByGroup(graph, oWordMetricUserSettings, oWordCounter, oWordPairCounter, oUniqueImportedIDs, out graphMetricColumns) ); } } else { return ( TryCountTermsNoGroups(graph, oWordMetricUserSettings, oWordCounter, oWordPairCounter, oUniqueImportedIDs, out graphMetricColumns) ); } }
TestCountTermsInDocument11() { // Test Jana Diesner's example for mutual information, which doesn't // skip words. WordPairCounter oWordPairCounter = new WordPairCounter( new String[] {}); oWordPairCounter.CountTermsInDocument("Tim and Ben play soccer."); oWordPairCounter.CountTermsInDocument("Tim and Sue play soccer."); oWordPairCounter.CountTermsInDocument("Yes we can."); oWordPairCounter.CountTermsInDocument("epic fail"); oWordPairCounter.CountTermsInDocument("Tim is available now."); oWordPairCounter.CalculateMutualInformationOfCountedTerms(); Assert.AreEqual(5, oWordPairCounter.TotalDocuments); Assert.AreEqual(19, oWordPairCounter.TotalWordsInDocuments); IEnumerable <CountedWordPair> oCountedWordPairs = oWordPairCounter.CountedTerms; Assert.AreEqual(12, oCountedWordPairs.Count()); CountedWordPair oTheCountedWordPair; oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "tim" && oCountedWordPair.Word2 == "and" && oCountedWordPair.Count == 2 && oCountedWordPair.DocumentsInWhichTermWasCounted == 2 )); Assert.AreEqual(0.669, oTheCountedWordPair.MutualInformation, 0.001); oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "and" && oCountedWordPair.Word2 == "ben" && oCountedWordPair.Count == 1 && oCountedWordPair.DocumentsInWhichTermWasCounted == 1 )); Assert.AreEqual(0.845, oTheCountedWordPair.MutualInformation, 0.001); oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "ben" && oCountedWordPair.Word2 == "play" && oCountedWordPair.Count == 1 && oCountedWordPair.DocumentsInWhichTermWasCounted == 1 )); Assert.AreEqual(0.845, oTheCountedWordPair.MutualInformation, 0.001); oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "and" && oCountedWordPair.Word2 == "sue" && oCountedWordPair.Count == 1 && oCountedWordPair.DocumentsInWhichTermWasCounted == 1 )); Assert.AreEqual(0.845, oTheCountedWordPair.MutualInformation, 0.001); oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "sue" && oCountedWordPair.Word2 == "play" && oCountedWordPair.Count == 1 && oCountedWordPair.DocumentsInWhichTermWasCounted == 1 )); Assert.AreEqual(0.845, oTheCountedWordPair.MutualInformation, 0.001); oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "play" && oCountedWordPair.Word2 == "soccer" && oCountedWordPair.Count == 2 && oCountedWordPair.DocumentsInWhichTermWasCounted == 2 )); Assert.AreEqual(0.845, oTheCountedWordPair.MutualInformation, 0.001); oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "yes" && oCountedWordPair.Word2 == "we" && oCountedWordPair.Count == 1 && oCountedWordPair.DocumentsInWhichTermWasCounted == 1 )); Assert.AreEqual(1.146, oTheCountedWordPair.MutualInformation, 0.001); oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "we" && oCountedWordPair.Word2 == "can" && oCountedWordPair.Count == 1 && oCountedWordPair.DocumentsInWhichTermWasCounted == 1 )); Assert.AreEqual(1.146, oTheCountedWordPair.MutualInformation, 0.001); oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "epic" && oCountedWordPair.Word2 == "fail" && oCountedWordPair.Count == 1 && oCountedWordPair.DocumentsInWhichTermWasCounted == 1 )); Assert.AreEqual(1.146, oTheCountedWordPair.MutualInformation, 0.001); oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "tim" && oCountedWordPair.Word2 == "is" && oCountedWordPair.Count == 1 && oCountedWordPair.DocumentsInWhichTermWasCounted == 1 )); Assert.AreEqual(0.669, oTheCountedWordPair.MutualInformation, 0.001); oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "is" && oCountedWordPair.Word2 == "available" && oCountedWordPair.Count == 1 && oCountedWordPair.DocumentsInWhichTermWasCounted == 1 )); Assert.AreEqual(1.146, oTheCountedWordPair.MutualInformation, 0.001); oTheCountedWordPair = oCountedWordPairs.Single( oCountedWordPair => ( oCountedWordPair.Word1 == "available" && oCountedWordPair.Word2 == "now" && oCountedWordPair.Count == 1 && oCountedWordPair.DocumentsInWhichTermWasCounted == 1 )); Assert.AreEqual(1.146, oTheCountedWordPair.MutualInformation, 0.001); }
ConcatenateTopWordsAndWordPairs ( IEnumerable<IEdge> oEdges, String sStatusEdgeColumnName, Int32 iMaximumTopStrings, WordCounter oWordCounter, WordPairCounter oWordPairCounter, out String sTopWordsInTweetByCount, out String sTopWordsInTweetBySalience, out String sTopWordPairsInTweetByCount, out String sTopWordPairsInTweetBySalience ) { Debug.Assert(oEdges != null); Debug.Assert( !String.IsNullOrEmpty(sStatusEdgeColumnName) ); Debug.Assert(iMaximumTopStrings > 0); Debug.Assert(oWordCounter != null); Debug.Assert(oWordPairCounter != null); oWordCounter.Clear(); oWordPairCounter.Clear(); foreach (IEdge oEdge in oEdges) { String sStatus; if ( oEdge.TryGetNonEmptyStringValue(sStatusEdgeColumnName, out sStatus) ) { oWordCounter.CountTermsInDocument(sStatus); oWordPairCounter.CountTermsInDocument(sStatus); } } oWordCounter.CalculateSalienceOfCountedTerms(); oWordPairCounter.CalculateSalienceOfCountedTerms(); sTopWordsInTweetByCount = String.Join( TwitterSearchNetworkWordMetricUtil.WordSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWord oCountedWord in oWordCounter.CountedTerms orderby oCountedWord.Count descending select oCountedWord.Word) , iMaximumTopStrings ) ); sTopWordsInTweetBySalience = String.Join( TwitterSearchNetworkWordMetricUtil.WordSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWord oCountedWord in oWordCounter.CountedTerms orderby oCountedWord.Salience descending select oCountedWord.Word) , iMaximumTopStrings ) ); sTopWordPairsInTweetByCount = String.Join( TwitterSearchNetworkWordMetricUtil.WordPairSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWordPair oCountedWordPair in oWordPairCounter.CountedTerms orderby oCountedWordPair.Count descending select TwitterSearchNetworkWordMetricUtil.FormatWordPair( oCountedWordPair) ) , iMaximumTopStrings ) ); sTopWordPairsInTweetBySalience = String.Join( TwitterSearchNetworkWordMetricUtil.WordPairSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWordPair oCountedWordPair in oWordPairCounter.CountedTerms orderby oCountedWordPair.Salience descending select TwitterSearchNetworkWordMetricUtil.FormatWordPair( oCountedWordPair) ) , iMaximumTopStrings ) ); }
CreateCountersForWordsAndWordPairs ( String searchTerm, CalculateGraphMetricsContext calculateGraphMetricsContext, out WordCounter wordCounter, out WordPairCounter wordPairCounter ) { Debug.Assert(calculateGraphMetricsContext != null); // When counting word pairs, skip words in the user-supplied list. String [] asWordsForWordCounterToSkip = StringUtil.SplitOnSpaces( calculateGraphMetricsContext.GraphMetricUserSettings .WordMetricUserSettings.WordsToSkip); wordPairCounter = new WordPairCounter(asWordsForWordCounterToSkip); // When counting words, skip words in the user-supplied list, AND the // search term, AND "rt" ("reply to"). List<String> oWordsForWordPairCounterToSkip = new List<String>( asWordsForWordCounterToSkip); if ( !String.IsNullOrEmpty(searchTerm) ) { oWordsForWordPairCounterToSkip.AddRange( StringUtil.SplitOnSpaces(searchTerm) ); } oWordsForWordPairCounterToSkip.Add("rt"); wordCounter = new WordCounter( oWordsForWordPairCounterToSkip.ToArray() ); }
AddGraphMetricValuesForTopWordsAndWordPairs ( IEnumerable<IEdge> edges, String statusEdgeColumnName, Int32 maximumTopTerms, WordCounter wordCounter, WordPairCounter wordPairCounter, Int32 vertexRowID, List<GraphMetricValueWithID> topWordsInTweetByCountGraphMetricValues, List<GraphMetricValueWithID> topWordsInTweetBySalienceGraphMetricValues, List<GraphMetricValueWithID> topWordPairsInTweetByCountGraphMetricValues, List<GraphMetricValueWithID> topWordPairsInTweetBySalienceGraphMetricValues ) { Debug.Assert(edges != null); Debug.Assert( !String.IsNullOrEmpty(statusEdgeColumnName) ); Debug.Assert(maximumTopTerms > 0); Debug.Assert(wordCounter != null); Debug.Assert(wordPairCounter != null); Debug.Assert(topWordsInTweetByCountGraphMetricValues != null); Debug.Assert(topWordsInTweetBySalienceGraphMetricValues != null); Debug.Assert(topWordPairsInTweetByCountGraphMetricValues != null); Debug.Assert(topWordPairsInTweetBySalienceGraphMetricValues != null); String sTopWordsInTweetByCount, sTopWordsInTweetBySalience, sTopWordPairsInTweetByCount, sTopWordPairsInTweetBySalience; ConcatenateTopWordsAndWordPairs(edges, statusEdgeColumnName, maximumTopTerms, wordCounter, wordPairCounter, out sTopWordsInTweetByCount, out sTopWordsInTweetBySalience, out sTopWordPairsInTweetByCount, out sTopWordPairsInTweetBySalience); topWordsInTweetByCountGraphMetricValues.Add( new GraphMetricValueWithID( vertexRowID, ExcelUtil.ForceCellText(sTopWordsInTweetByCount) ) ); topWordsInTweetBySalienceGraphMetricValues.Add( new GraphMetricValueWithID( vertexRowID, ExcelUtil.ForceCellText(sTopWordsInTweetBySalience) ) ); topWordPairsInTweetByCountGraphMetricValues.Add( new GraphMetricValueWithID( vertexRowID, ExcelUtil.ForceCellText(sTopWordPairsInTweetByCount) ) ); topWordPairsInTweetBySalienceGraphMetricValues.Add( new GraphMetricValueWithID( vertexRowID, ExcelUtil.ForceCellText(sTopWordPairsInTweetBySalience) ) ); }
ConcatenateTopWordsAndWordPairs ( IEnumerable <IEdge> oEdges, String sStatusEdgeColumnName, Int32 iMaximumTopStrings, WordCounter oWordCounter, WordPairCounter oWordPairCounter, out String sTopWordsInTweetByCount, out String sTopWordsInTweetBySalience, out String sTopWordPairsInTweetByCount, out String sTopWordPairsInTweetBySalience ) { Debug.Assert(oEdges != null); Debug.Assert(!String.IsNullOrEmpty(sStatusEdgeColumnName)); Debug.Assert(iMaximumTopStrings > 0); Debug.Assert(oWordCounter != null); Debug.Assert(oWordPairCounter != null); oWordCounter.Clear(); oWordPairCounter.Clear(); foreach (IEdge oEdge in oEdges) { String sStatus; if (oEdge.TryGetNonEmptyStringValue(sStatusEdgeColumnName, out sStatus)) { oWordCounter.CountTermsInDocument(sStatus); oWordPairCounter.CountTermsInDocument(sStatus); } } oWordCounter.CalculateSalienceOfCountedTerms(); oWordPairCounter.CalculateSalienceOfCountedTerms(); sTopWordsInTweetByCount = String.Join( TwitterSearchNetworkWordMetricUtil.WordSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWord oCountedWord in oWordCounter.CountedTerms orderby oCountedWord.Count descending select oCountedWord.Word) , iMaximumTopStrings )); sTopWordsInTweetBySalience = String.Join( TwitterSearchNetworkWordMetricUtil.WordSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWord oCountedWord in oWordCounter.CountedTerms orderby oCountedWord.Salience descending select oCountedWord.Word) , iMaximumTopStrings )); sTopWordPairsInTweetByCount = String.Join( TwitterSearchNetworkWordMetricUtil.WordPairSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWordPair oCountedWordPair in oWordPairCounter.CountedTerms orderby oCountedWordPair.Count descending select TwitterSearchNetworkWordMetricUtil.FormatWordPair( oCountedWordPair)) , iMaximumTopStrings )); sTopWordPairsInTweetBySalience = String.Join( TwitterSearchNetworkWordMetricUtil.WordPairSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWordPair oCountedWordPair in oWordPairCounter.CountedTerms orderby oCountedWordPair.Salience descending select TwitterSearchNetworkWordMetricUtil.FormatWordPair( oCountedWordPair)) , iMaximumTopStrings )); }