TryCountVertexTermsByGroup ( IGraph oGraph, WordMetricUserSettings oWordMetricUserSettings, WordCounter oWordCounter, WordPairCounter oWordPairCounter, HashSet<String> oUniqueImportedIDs, out GraphMetricColumn [] oGraphMetricColumns ) { Debug.Assert(oGraph != null); Debug.Assert(oWordMetricUserSettings != null); Debug.Assert(oWordCounter != null); Debug.Assert(oWordPairCounter != null); AssertValid(); List<GraphMetricValueOrdered> oWordWordValues, oWordCountValues, oWordSalienceValues; List<GraphMetricValueOrdered> oWordPairWord1Values, oWordPairWord2Values, oWordPairCountValues, oWordPairSalienceValues, oWordPairMutualInformationValues; CreateGraphMetricValueLists( out oWordWordValues, out oWordCountValues, out oWordSalienceValues, out oWordPairWord1Values, out oWordPairWord2Values, out oWordPairCountValues, out oWordPairSalienceValues, out oWordPairMutualInformationValues ); List<GraphMetricValueOrdered> oWordGroupNameValues = new List<GraphMetricValueOrdered>(); List<GraphMetricValueOrdered> oWordPairGroupNameValues = new List<GraphMetricValueOrdered>(); // Get a list of the graph's groups, adding a dummy group for the // entire graph and another to contain any non-grouped vertices. foreach ( GroupInfo oGroup in EnumerateGroupsForCountingVertexTerms(oGraph) ) { // Count the terms in this group. oWordCounter.Clear(); oWordPairCounter.Clear(); foreach ( IVertex oVertex in EnumerateEdgesOrVertices( oGroup.Vertices, false, oGraph, oUniqueImportedIDs) ) { CountTermsInEdgeOrVertex(oVertex, oWordMetricUserSettings.TextColumnName, oWordCounter, oWordPairCounter); } oWordCounter.CalculateSalienceOfCountedTerms(); oWordPairCounter.CalculateSalienceOfCountedTerms(); oWordPairCounter.CalculateMutualInformationOfCountedTerms(); // Transfer the words and word pairs to the graph metric value // lists. AddCountedWordsToValueLists(oWordCounter.CountedTerms, oWordMetricUserSettings, oGroup.Name, oWordWordValues, oWordCountValues, oWordSalienceValues, oWordGroupNameValues); AddCountedWordPairsToValueLists(oWordPairCounter.CountedTerms, oWordMetricUserSettings, oGroup.Name, oWordPairWord1Values, oWordPairWord2Values, oWordPairCountValues, oWordPairSalienceValues, oWordPairMutualInformationValues, oWordPairGroupNameValues); } oGraphMetricColumns = CreateGraphMetricColumns( oWordWordValues, oWordCountValues, oWordSalienceValues, oWordGroupNameValues, oWordPairWord1Values, oWordPairWord2Values, oWordPairCountValues, oWordPairSalienceValues, oWordPairMutualInformationValues, oWordPairGroupNameValues ); return (true); }
ConcatenateTopWordsAndWordPairs ( IEnumerable<IEdge> oEdges, String sStatusEdgeColumnName, Int32 iMaximumTopStrings, WordCounter oWordCounter, WordPairCounter oWordPairCounter, out String sTopWordsInTweetByCount, out String sTopWordsInTweetBySalience, out String sTopWordPairsInTweetByCount, out String sTopWordPairsInTweetBySalience ) { Debug.Assert(oEdges != null); Debug.Assert( !String.IsNullOrEmpty(sStatusEdgeColumnName) ); Debug.Assert(iMaximumTopStrings > 0); Debug.Assert(oWordCounter != null); Debug.Assert(oWordPairCounter != null); oWordCounter.Clear(); oWordPairCounter.Clear(); foreach (IEdge oEdge in oEdges) { String sStatus; if ( oEdge.TryGetNonEmptyStringValue(sStatusEdgeColumnName, out sStatus) ) { oWordCounter.CountTermsInDocument(sStatus); oWordPairCounter.CountTermsInDocument(sStatus); } } oWordCounter.CalculateSalienceOfCountedTerms(); oWordPairCounter.CalculateSalienceOfCountedTerms(); sTopWordsInTweetByCount = String.Join( TwitterSearchNetworkWordMetricUtil.WordSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWord oCountedWord in oWordCounter.CountedTerms orderby oCountedWord.Count descending select oCountedWord.Word) , iMaximumTopStrings ) ); sTopWordsInTweetBySalience = String.Join( TwitterSearchNetworkWordMetricUtil.WordSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWord oCountedWord in oWordCounter.CountedTerms orderby oCountedWord.Salience descending select oCountedWord.Word) , iMaximumTopStrings ) ); sTopWordPairsInTweetByCount = String.Join( TwitterSearchNetworkWordMetricUtil.WordPairSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWordPair oCountedWordPair in oWordPairCounter.CountedTerms orderby oCountedWordPair.Count descending select TwitterSearchNetworkWordMetricUtil.FormatWordPair( oCountedWordPair) ) , iMaximumTopStrings ) ); sTopWordPairsInTweetBySalience = String.Join( TwitterSearchNetworkWordMetricUtil.WordPairSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWordPair oCountedWordPair in oWordPairCounter.CountedTerms orderby oCountedWordPair.Salience descending select TwitterSearchNetworkWordMetricUtil.FormatWordPair( oCountedWordPair) ) , iMaximumTopStrings ) ); }
TryCountEdgeTermsByGroup ( IGraph oGraph, WordMetricUserSettings oWordMetricUserSettings, WordCounter oWordCounter, WordPairCounter oWordPairCounter, HashSet<String> oUniqueImportedIDs, out GraphMetricColumn [] oGraphMetricColumns ) { Debug.Assert(oGraph != null); Debug.Assert(oWordMetricUserSettings != null); Debug.Assert(oWordCounter != null); Debug.Assert(oWordPairCounter != null); AssertValid(); List<GraphMetricValueOrdered> oWordWordValues, oWordCountValues, oWordSalienceValues; List<GraphMetricValueOrdered> oWordPairWord1Values, oWordPairWord2Values, oWordPairCountValues, oWordPairSalienceValues, oWordPairMutualInformationValues; CreateGraphMetricValueLists( out oWordWordValues, out oWordCountValues, out oWordSalienceValues, out oWordPairWord1Values, out oWordPairWord2Values, out oWordPairCountValues, out oWordPairSalienceValues, out oWordPairMutualInformationValues ); List<GraphMetricValueOrdered> oWordGroupNameValues = new List<GraphMetricValueOrdered>(); List<GraphMetricValueOrdered> oWordPairGroupNameValues = new List<GraphMetricValueOrdered>(); // Get the edges in each of the graph's groups. Include a "dummy" // group that contains the edges that aren't contained in any real // groups. foreach ( GroupEdgeInfo oGroupEdgeInfo in GroupEdgeSorter.SortGroupEdges(oGraph, Int32.MaxValue, true, true) ) { // Count the terms in this group. oWordCounter.Clear(); oWordPairCounter.Clear(); foreach ( IEdge oEdge in EnumerateEdgesOrVertices( oGroupEdgeInfo.Edges, true, oGraph, oUniqueImportedIDs) ) { CountTermsInEdgeOrVertex(oEdge, oWordMetricUserSettings.TextColumnName, oWordCounter, oWordPairCounter); } oWordCounter.CalculateSalienceOfCountedTerms(); oWordPairCounter.CalculateSalienceOfCountedTerms(); oWordPairCounter.CalculateMutualInformationOfCountedTerms(); // Transfer the words and word pairs to the graph metric value // lists. String sGroupName = oGroupEdgeInfo.GroupName; AddCountedWordsToValueLists( oWordCounter.CountedTerms, oWordMetricUserSettings, sGroupName, oWordWordValues, oWordCountValues, oWordSalienceValues, oWordGroupNameValues); AddCountedWordPairsToValueLists( oWordPairCounter.CountedTerms, oWordMetricUserSettings, sGroupName, oWordPairWord1Values, oWordPairWord2Values, oWordPairCountValues, oWordPairSalienceValues, oWordPairMutualInformationValues, oWordPairGroupNameValues); if ( sGroupName == GroupEdgeSorter.DummyGroupNameForEntireGraph && oUniqueImportedIDs != null ) { // This is the dummy group that stores all the edges in the // graph. Note that SortGroupEdges() guarantees that this is // the first group, so the imported IDs need to be cleared only // once within this loop. oUniqueImportedIDs.Clear(); } } oGraphMetricColumns = CreateGraphMetricColumns( oWordWordValues, oWordCountValues, oWordSalienceValues, oWordGroupNameValues, oWordPairWord1Values, oWordPairWord2Values, oWordPairCountValues, oWordPairSalienceValues, oWordPairMutualInformationValues, oWordPairGroupNameValues ); return (true); }
ConcatenateTopWordsAndWordPairs ( IEnumerable <IEdge> oEdges, String sStatusEdgeColumnName, Int32 iMaximumTopStrings, WordCounter oWordCounter, WordPairCounter oWordPairCounter, out String sTopWordsInTweetByCount, out String sTopWordsInTweetBySalience, out String sTopWordPairsInTweetByCount, out String sTopWordPairsInTweetBySalience ) { Debug.Assert(oEdges != null); Debug.Assert(!String.IsNullOrEmpty(sStatusEdgeColumnName)); Debug.Assert(iMaximumTopStrings > 0); Debug.Assert(oWordCounter != null); Debug.Assert(oWordPairCounter != null); oWordCounter.Clear(); oWordPairCounter.Clear(); foreach (IEdge oEdge in oEdges) { String sStatus; if (oEdge.TryGetNonEmptyStringValue(sStatusEdgeColumnName, out sStatus)) { oWordCounter.CountTermsInDocument(sStatus); oWordPairCounter.CountTermsInDocument(sStatus); } } oWordCounter.CalculateSalienceOfCountedTerms(); oWordPairCounter.CalculateSalienceOfCountedTerms(); sTopWordsInTweetByCount = String.Join( TwitterSearchNetworkWordMetricUtil.WordSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWord oCountedWord in oWordCounter.CountedTerms orderby oCountedWord.Count descending select oCountedWord.Word) , iMaximumTopStrings )); sTopWordsInTweetBySalience = String.Join( TwitterSearchNetworkWordMetricUtil.WordSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWord oCountedWord in oWordCounter.CountedTerms orderby oCountedWord.Salience descending select oCountedWord.Word) , iMaximumTopStrings )); sTopWordPairsInTweetByCount = String.Join( TwitterSearchNetworkWordMetricUtil.WordPairSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWordPair oCountedWordPair in oWordPairCounter.CountedTerms orderby oCountedWordPair.Count descending select TwitterSearchNetworkWordMetricUtil.FormatWordPair( oCountedWordPair)) , iMaximumTopStrings )); sTopWordPairsInTweetBySalience = String.Join( TwitterSearchNetworkWordMetricUtil.WordPairSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWordPair oCountedWordPair in oWordPairCounter.CountedTerms orderby oCountedWordPair.Salience descending select TwitterSearchNetworkWordMetricUtil.FormatWordPair( oCountedWordPair)) , iMaximumTopStrings )); }