ConcatenateTopStringsBySalience ( IEnumerable <IEdge> edges, String edgeColumnName, Int32 maximumTopStrings ) { Debug.Assert(edges != null); Debug.Assert(!String.IsNullOrEmpty(edgeColumnName)); Debug.Assert(maximumTopStrings > 0); // Don't convert to lower case (bitly URLs are case-sensitive, for // example), and don't skip any words. WordCounter oWordCounter = new WordCounter(false, new String[0]); oWordCounter.SkipUrlsAndPunctuation = false; foreach (IEdge oEdge in edges) { String sSpaceDelimitedCellValue; if (oEdge.TryGetNonEmptyStringValue(edgeColumnName, out sSpaceDelimitedCellValue)) { oWordCounter.CountTermsInDocument(sSpaceDelimitedCellValue); } } oWordCounter.CalculateSalienceOfCountedTerms(); return(String.Join(TwitterSearchNetworkWordMetricUtil.WordSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWord oCountedWord in oWordCounter.CountedTerms orderby oCountedWord.Salience descending select oCountedWord.Word), maximumTopStrings ))); }
CountTermsInEdgeOrVertex ( IMetadataProvider oEdgeOrVertex, String sTextColumnName, WordCounter oWordCounter, WordPairCounter oWordPairCounter ) { Debug.Assert(oEdgeOrVertex != null); Debug.Assert( !String.IsNullOrEmpty(sTextColumnName) ); Debug.Assert(oWordCounter != null); Debug.Assert(oWordPairCounter != null); AssertValid(); Object oTextAsObject; if ( oEdgeOrVertex.TryGetValue(sTextColumnName, typeof(String), out oTextAsObject ) ) { String sText = (String)oTextAsObject; if ( !String.IsNullOrEmpty(sText) ) { oWordCounter.CountTermsInDocument(sText); oWordPairCounter.CountTermsInDocument(sText); } } }
TestCountTermsInDocument13() { // Don't convert to lower case. WordCounter oWordCounterForThisTest = new WordCounter(false, WordsToSkip); oWordCounterForThisTest.CountTermsInDocument("the brown jumping fox"); oWordCounterForThisTest.CountTermsInDocument("FOX JUMPING BROWN THE"); Assert.AreEqual(2, oWordCounterForThisTest.TotalDocuments); Assert.AreEqual(7, oWordCounterForThisTest.TotalWordsInDocuments); IEnumerable<CountedWord> oCountedWords = oWordCounterForThisTest.CountedTerms; Assert.AreEqual( 7, oCountedWords.Count() ); oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "brown" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 ) ); oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "jumping" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 ) ); oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "fox" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 ) ); oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "BROWN" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 ) ); oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "JUMPING" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 ) ); oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "FOX" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 ) ); oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "THE" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 ) ); }
TestCountTermsInDocument11() { // Test Jana Diesner's example, which doesn't skip words. WordCounter oWordCounter = new WordCounter( new String[] {} ); oWordCounter.CountTermsInDocument("Ben and Bob went out for dinner."); oWordCounter.CountTermsInDocument("Ben and Mary share a driveway."); oWordCounter.CountTermsInDocument("Brent and Bob went out for lunch."); oWordCounter.CalculateSalienceOfCountedTerms(); Assert.AreEqual(3, oWordCounter.TotalDocuments); Assert.AreEqual(20, oWordCounter.TotalWordsInDocuments); IEnumerable<CountedWord> oCountedWords = oWordCounter.CountedTerms; Assert.AreEqual( 13, oCountedWords.Count() ); CountedWord oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "ben" && oCountedWord.Count == 2 && oCountedWord.DocumentsInWhichTermWasCounted == 2 ) ); Assert.AreEqual(0.018, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "and" && oCountedWord.Count == 3 && oCountedWord.DocumentsInWhichTermWasCounted == 3 ) ); Assert.AreEqual(0.0, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "bob" && oCountedWord.Count == 2 && oCountedWord.DocumentsInWhichTermWasCounted == 2 ) ); Assert.AreEqual(0.018, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "went" && oCountedWord.Count == 2 && oCountedWord.DocumentsInWhichTermWasCounted == 2 ) ); Assert.AreEqual(0.018, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "out" && oCountedWord.Count == 2 && oCountedWord.DocumentsInWhichTermWasCounted == 2 ) ); Assert.AreEqual(0.018, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "for" && oCountedWord.Count == 2 && oCountedWord.DocumentsInWhichTermWasCounted == 2 ) ); Assert.AreEqual(0.018, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "dinner" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 ) ); Assert.AreEqual(0.024, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "mary" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 ) ); Assert.AreEqual(0.024, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "share" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 ) ); Assert.AreEqual(0.024, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "a" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 ) ); Assert.AreEqual(0.024, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "driveway" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 ) ); Assert.AreEqual(0.024, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "brent" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 ) ); Assert.AreEqual(0.024, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "lunch" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 ) ); Assert.AreEqual(0.024, oTheCountedWord.Salience, 0.001); }
ConcatenateTopWordsAndWordPairs ( IEnumerable<IEdge> oEdges, String sStatusEdgeColumnName, Int32 iMaximumTopStrings, WordCounter oWordCounter, WordPairCounter oWordPairCounter, out String sTopWordsInTweetByCount, out String sTopWordsInTweetBySalience, out String sTopWordPairsInTweetByCount, out String sTopWordPairsInTweetBySalience ) { Debug.Assert(oEdges != null); Debug.Assert( !String.IsNullOrEmpty(sStatusEdgeColumnName) ); Debug.Assert(iMaximumTopStrings > 0); Debug.Assert(oWordCounter != null); Debug.Assert(oWordPairCounter != null); oWordCounter.Clear(); oWordPairCounter.Clear(); foreach (IEdge oEdge in oEdges) { String sStatus; if ( oEdge.TryGetNonEmptyStringValue(sStatusEdgeColumnName, out sStatus) ) { oWordCounter.CountTermsInDocument(sStatus); oWordPairCounter.CountTermsInDocument(sStatus); } } oWordCounter.CalculateSalienceOfCountedTerms(); oWordPairCounter.CalculateSalienceOfCountedTerms(); sTopWordsInTweetByCount = String.Join( TwitterSearchNetworkWordMetricUtil.WordSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWord oCountedWord in oWordCounter.CountedTerms orderby oCountedWord.Count descending select oCountedWord.Word) , iMaximumTopStrings ) ); sTopWordsInTweetBySalience = String.Join( TwitterSearchNetworkWordMetricUtil.WordSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWord oCountedWord in oWordCounter.CountedTerms orderby oCountedWord.Salience descending select oCountedWord.Word) , iMaximumTopStrings ) ); sTopWordPairsInTweetByCount = String.Join( TwitterSearchNetworkWordMetricUtil.WordPairSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWordPair oCountedWordPair in oWordPairCounter.CountedTerms orderby oCountedWordPair.Count descending select TwitterSearchNetworkWordMetricUtil.FormatWordPair( oCountedWordPair) ) , iMaximumTopStrings ) ); sTopWordPairsInTweetBySalience = String.Join( TwitterSearchNetworkWordMetricUtil.WordPairSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWordPair oCountedWordPair in oWordPairCounter.CountedTerms orderby oCountedWordPair.Salience descending select TwitterSearchNetworkWordMetricUtil.FormatWordPair( oCountedWordPair) ) , iMaximumTopStrings ) ); }
ConcatenateTopStringsBySalience ( IEnumerable<IEdge> edges, String edgeColumnName, Int32 maximumTopStrings ) { Debug.Assert(edges != null); Debug.Assert( !String.IsNullOrEmpty(edgeColumnName) ); Debug.Assert(maximumTopStrings > 0); // Don't convert to lower case (bitly URLs are case-sensitive, for // example), and don't skip any words. WordCounter oWordCounter = new WordCounter( false, new String[0] ); oWordCounter.SkipUrlsAndPunctuation = false; foreach (IEdge oEdge in edges) { String sSpaceDelimitedCellValue; if ( oEdge.TryGetNonEmptyStringValue(edgeColumnName, out sSpaceDelimitedCellValue) ) { oWordCounter.CountTermsInDocument(sSpaceDelimitedCellValue); } } oWordCounter.CalculateSalienceOfCountedTerms(); return ( String.Join(TwitterSearchNetworkWordMetricUtil.WordSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWord oCountedWord in oWordCounter.CountedTerms orderby oCountedWord.Salience descending select oCountedWord.Word), maximumTopStrings ) ) ); }
ConcatenateTopWordsAndWordPairs ( IEnumerable <IEdge> oEdges, String sStatusEdgeColumnName, Int32 iMaximumTopStrings, WordCounter oWordCounter, WordPairCounter oWordPairCounter, out String sTopWordsInTweetByCount, out String sTopWordsInTweetBySalience, out String sTopWordPairsInTweetByCount, out String sTopWordPairsInTweetBySalience ) { Debug.Assert(oEdges != null); Debug.Assert(!String.IsNullOrEmpty(sStatusEdgeColumnName)); Debug.Assert(iMaximumTopStrings > 0); Debug.Assert(oWordCounter != null); Debug.Assert(oWordPairCounter != null); oWordCounter.Clear(); oWordPairCounter.Clear(); foreach (IEdge oEdge in oEdges) { String sStatus; if (oEdge.TryGetNonEmptyStringValue(sStatusEdgeColumnName, out sStatus)) { oWordCounter.CountTermsInDocument(sStatus); oWordPairCounter.CountTermsInDocument(sStatus); } } oWordCounter.CalculateSalienceOfCountedTerms(); oWordPairCounter.CalculateSalienceOfCountedTerms(); sTopWordsInTweetByCount = String.Join( TwitterSearchNetworkWordMetricUtil.WordSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWord oCountedWord in oWordCounter.CountedTerms orderby oCountedWord.Count descending select oCountedWord.Word) , iMaximumTopStrings )); sTopWordsInTweetBySalience = String.Join( TwitterSearchNetworkWordMetricUtil.WordSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWord oCountedWord in oWordCounter.CountedTerms orderby oCountedWord.Salience descending select oCountedWord.Word) , iMaximumTopStrings )); sTopWordPairsInTweetByCount = String.Join( TwitterSearchNetworkWordMetricUtil.WordPairSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWordPair oCountedWordPair in oWordPairCounter.CountedTerms orderby oCountedWordPair.Count descending select TwitterSearchNetworkWordMetricUtil.FormatWordPair( oCountedWordPair)) , iMaximumTopStrings )); sTopWordPairsInTweetBySalience = String.Join( TwitterSearchNetworkWordMetricUtil.WordPairSeparator, TwitterSearchNetworkStringUtil.TakeTopStringsAsArray( (from CountedWordPair oCountedWordPair in oWordPairCounter.CountedTerms orderby oCountedWordPair.Salience descending select TwitterSearchNetworkWordMetricUtil.FormatWordPair( oCountedWordPair)) , iMaximumTopStrings )); }
TestCountTermsInDocument13() { // Don't convert to lower case. WordCounter oWordCounterForThisTest = new WordCounter(false, WordsToSkip); oWordCounterForThisTest.CountTermsInDocument("the brown jumping fox"); oWordCounterForThisTest.CountTermsInDocument("FOX JUMPING BROWN THE"); Assert.AreEqual(2, oWordCounterForThisTest.TotalDocuments); Assert.AreEqual(7, oWordCounterForThisTest.TotalWordsInDocuments); IEnumerable <CountedWord> oCountedWords = oWordCounterForThisTest.CountedTerms; Assert.AreEqual(7, oCountedWords.Count()); oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "brown" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 )); oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "jumping" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 )); oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "fox" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 )); oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "BROWN" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 )); oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "JUMPING" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 )); oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "FOX" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 )); oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "THE" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 )); }
TestCountTermsInDocument11() { // Test Jana Diesner's example, which doesn't skip words. WordCounter oWordCounter = new WordCounter(new String[] {}); oWordCounter.CountTermsInDocument("Ben and Bob went out for dinner."); oWordCounter.CountTermsInDocument("Ben and Mary share a driveway."); oWordCounter.CountTermsInDocument("Brent and Bob went out for lunch."); oWordCounter.CalculateSalienceOfCountedTerms(); Assert.AreEqual(3, oWordCounter.TotalDocuments); Assert.AreEqual(20, oWordCounter.TotalWordsInDocuments); IEnumerable <CountedWord> oCountedWords = oWordCounter.CountedTerms; Assert.AreEqual(13, oCountedWords.Count()); CountedWord oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "ben" && oCountedWord.Count == 2 && oCountedWord.DocumentsInWhichTermWasCounted == 2 )); Assert.AreEqual(0.018, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "and" && oCountedWord.Count == 3 && oCountedWord.DocumentsInWhichTermWasCounted == 3 )); Assert.AreEqual(0.0, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "bob" && oCountedWord.Count == 2 && oCountedWord.DocumentsInWhichTermWasCounted == 2 )); Assert.AreEqual(0.018, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "went" && oCountedWord.Count == 2 && oCountedWord.DocumentsInWhichTermWasCounted == 2 )); Assert.AreEqual(0.018, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "out" && oCountedWord.Count == 2 && oCountedWord.DocumentsInWhichTermWasCounted == 2 )); Assert.AreEqual(0.018, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "for" && oCountedWord.Count == 2 && oCountedWord.DocumentsInWhichTermWasCounted == 2 )); Assert.AreEqual(0.018, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "dinner" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 )); Assert.AreEqual(0.024, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "mary" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 )); Assert.AreEqual(0.024, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "share" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 )); Assert.AreEqual(0.024, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "a" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 )); Assert.AreEqual(0.024, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "driveway" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 )); Assert.AreEqual(0.024, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "brent" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 )); Assert.AreEqual(0.024, oTheCountedWord.Salience, 0.001); oTheCountedWord = oCountedWords.Single( oCountedWord => ( oCountedWord.Word == "lunch" && oCountedWord.Count == 1 && oCountedWord.DocumentsInWhichTermWasCounted == 1 )); Assert.AreEqual(0.024, oTheCountedWord.Salience, 0.001); }