public void TestShingleAnalyzerWrapperPhraseQuery() { Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(), 2); Searcher = SetUpSearcher(analyzer); var q = new PhraseQuery(); var ts = analyzer.TokenStream("content", new StringReader("this sentence")); var j = -1; var posIncrAtt = ts.AddAttribute <IPositionIncrementAttribute>(); var termAtt = ts.AddAttribute <ITermAttribute>(); while (ts.IncrementToken()) { j += posIncrAtt.PositionIncrement; var termText = termAtt.Term; q.Add(new Term("content", termText), j); } var hits = Searcher.Search(q, null, 1000).ScoreDocs; var ranks = new[] { 0 }; CompareRanks(hits, ranks); }
public void TestWrappedAnalyzerDoesNotReuse() { Analyzer a = new ShingleAnalyzerWrapper(new NonreusableAnalyzer()); AssertAnalyzesToReuse(a, "please divide into shingles.", new[] { "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles" }, new[] { 0, 0, 7, 7, 14, 14, 19 }, new[] { 6, 13, 13, 18, 18, 27, 27 }, new[] { 1, 0, 1, 0, 1, 0, 1 }); AssertAnalyzesToReuse(a, "please divide into shingles.", new[] { "please", "please divide", "divide", "divide into", "into", "into shingles.", "shingles." }, new[] { 0, 0, 7, 7, 14, 14, 19 }, new[] { 6, 13, 13, 18, 18, 28, 28 }, new[] { 1, 0, 1, 0, 1, 0, 1 }); AssertAnalyzesToReuse(a, "please divide into shingles.", new[] { "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles" }, new[] { 0, 0, 7, 7, 14, 14, 19 }, new[] { 6, 13, 13, 18, 18, 27, 27 }, new[] { 1, 0, 1, 0, 1, 0, 1 }); }
public void TestReusableTokenStream() { Analyzer a = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(), 2); AssertAnalyzesToReuse(a, "please divide into shingles", new[] { "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles" }, new[] { 0, 0, 7, 7, 14, 14, 19 }, new[] { 6, 13, 13, 18, 18, 27, 27 }, new[] { 1, 0, 1, 0, 1, 0, 1 }); AssertAnalyzesToReuse(a, "divide me up again", new[] { "divide", "divide me", "me", "me up", "up", "up again", "again" }, new[] { 0, 0, 7, 7, 10, 10, 13 }, new[] { 6, 9, 9, 12, 12, 18, 18 }, new[] { 1, 0, 1, 0, 1, 0, 1 }); }
public void TestShingleAnalyzerWrapperBooleanQuery() { Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(), 2); Searcher = SetUpSearcher(analyzer); var q = new BooleanQuery(); var ts = analyzer.TokenStream("content", new StringReader("test sentence")); var termAtt = ts.AddAttribute <ITermAttribute>(); while (ts.IncrementToken()) { var termText = termAtt.Term; q.Add(new TermQuery(new Term("content", termText)), Occur.SHOULD); } var hits = Searcher.Search(q, null, 1000).ScoreDocs; var ranks = new[] { 1, 2, 0 }; CompareRanks(hits, ranks); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testReusableTokenStream() throws Exception public virtual void testReusableTokenStream() { Analyzer a = new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 2); assertAnalyzesTo(a, "please divide into shingles", new string[] {"please", "please divide", "divide", "divide into", "into", "into shingles", "shingles"}, new int[] {0, 0, 7, 7, 14, 14, 19}, new int[] {6, 13, 13, 18, 18, 27, 27}, new int[] {1, 0, 1, 0, 1, 0, 1}); assertAnalyzesTo(a, "divide me up again", new string[] {"divide", "divide me", "me", "me up", "up", "up again", "again"}, new int[] {0, 0, 7, 7, 10, 10, 13}, new int[] {6, 9, 9, 12, 12, 18, 18}, new int[] {1, 0, 1, 0, 1, 0, 1}); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testOutputUnigramsIfNoShinglesSingleToken() throws Exception public virtual void testOutputUnigramsIfNoShinglesSingleToken() { ShingleAnalyzerWrapper analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE, "", false, true, ShingleFilter.DEFAULT_FILLER_TOKEN); assertAnalyzesTo(analyzer, "please", new string[] {"please"}, new int[] {0}, new int[] {6}, new int[] {1}); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testNullTokenSeparator() throws Exception public virtual void testNullTokenSeparator() { ShingleAnalyzerWrapper analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE, null, true, false, ShingleFilter.DEFAULT_FILLER_TOKEN); assertAnalyzesTo(analyzer, "please divide into shingles", new string[] {"please", "pleasedivide", "divide", "divideinto", "into", "intoshingles", "shingles"}, new int[] {0, 0, 7, 7, 14, 14, 19}, new int[] {6, 13, 13, 18, 18, 27, 27}, new int[] {1, 0, 1, 0, 1, 0, 1}); analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE, "", false, false, ShingleFilter.DEFAULT_FILLER_TOKEN); assertAnalyzesTo(analyzer, "please divide into shingles", new string[] {"pleasedivide", "divideinto", "intoshingles"}, new int[] {0, 7, 14}, new int[] {13, 18, 27}, new int[] {1, 1, 1}); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testNonDefaultMinShingleSize() throws Exception public virtual void testNonDefaultMinShingleSize() { ShingleAnalyzerWrapper analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 3, 4); assertAnalyzesTo(analyzer, "please divide this sentence into shingles", new string[] {"please", "please divide this", "please divide this sentence", "divide", "divide this sentence", "divide this sentence into", "this", "this sentence into", "this sentence into shingles", "sentence", "sentence into shingles", "into", "shingles"}, new int[] {0, 0, 0, 7, 7, 7, 14, 14, 14, 19, 19, 28, 33}, new int[] {6, 18, 27, 13, 27, 32, 18, 32, 41, 27, 41, 32, 41}, new int[] {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1}); analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 3, 4, ShingleFilter.DEFAULT_TOKEN_SEPARATOR, false, false, ShingleFilter.DEFAULT_FILLER_TOKEN); assertAnalyzesTo(analyzer, "please divide this sentence into shingles", new string[] {"please divide this", "please divide this sentence", "divide this sentence", "divide this sentence into", "this sentence into", "this sentence into shingles", "sentence into shingles"}, new int[] {0, 0, 7, 7, 14, 14, 19}, new int[] {18, 27, 27, 32, 32, 41, 41}, new int[] {1, 0, 1, 0, 1, 0, 1}); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testAltFillerToken() throws Exception public virtual void testAltFillerToken() { Analyzer @delegate = new AnalyzerAnonymousInnerClassHelper(this); ShingleAnalyzerWrapper analyzer = new ShingleAnalyzerWrapper(@delegate, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE, ShingleFilter.DEFAULT_TOKEN_SEPARATOR, true, false, "--"); assertAnalyzesTo(analyzer, "please divide into shingles", new string[] {"please", "please divide", "divide", "divide --", "-- shingles", "shingles"}, new int[] {0, 0, 7, 7, 19, 19}, new int[] {6, 13, 13, 19, 27, 27}, new int[] {1, 0, 1, 0, 1, 1}); analyzer = new ShingleAnalyzerWrapper(@delegate, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE, ShingleFilter.DEFAULT_TOKEN_SEPARATOR, false, false, null); assertAnalyzesTo(analyzer, "please divide into shingles", new string[] {"please divide", "divide ", " shingles"}, new int[] {0, 7, 19}, new int[] {13, 19, 27}, new int[] {1, 1, 1}); analyzer = new ShingleAnalyzerWrapper(@delegate, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE, ShingleFilter.DEFAULT_TOKEN_SEPARATOR, false, false, ""); assertAnalyzesTo(analyzer, "please divide into shingles", new string[] {"please divide", "divide ", " shingles"}, new int[] {0, 7, 19}, new int[] {13, 19, 27}, new int[] {1, 1, 1}); }