//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void test() throws java.io.IOException public virtual void test() { TokenRangeSinkFilter sinkFilter = new TokenRangeSinkFilter(2, 4); string test = "The quick red fox jumped over the lazy brown dogs"; TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)); TeeSinkTokenFilter.SinkTokenStream rangeToks = tee.newSinkTokenStream(sinkFilter); int count = 0; tee.reset(); while (tee.incrementToken()) { count++; } int sinkCount = 0; rangeToks.reset(); while (rangeToks.incrementToken()) { sinkCount++; } assertTrue(count + " does not equal: " + 10, count == 10); assertTrue("rangeToks Size: " + sinkCount + " is not: " + 2, sinkCount == 2); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testMultipleSources() throws Exception public virtual void testMultipleSources() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(new MockTokenizer(new java.io.StringReader(buffer1.toString()), MockTokenizer.WHITESPACE, false)); TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer1.ToString()), MockTokenizer.WHITESPACE, false)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final TeeSinkTokenFilter.SinkTokenStream dogDetector = tee1.newSinkTokenStream(dogFilter); TeeSinkTokenFilter.SinkTokenStream dogDetector = tee1.newSinkTokenStream(dogFilter); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final TeeSinkTokenFilter.SinkTokenStream theDetector = tee1.newSinkTokenStream(theFilter); TeeSinkTokenFilter.SinkTokenStream theDetector = tee1.newSinkTokenStream(theFilter); tee1.reset(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final TokenStream source1 = new CachingTokenFilter(tee1); TokenStream source1 = new CachingTokenFilter(tee1); tee1.addAttribute(typeof(CheckClearAttributesAttribute)); dogDetector.addAttribute(typeof(CheckClearAttributesAttribute)); theDetector.addAttribute(typeof(CheckClearAttributesAttribute)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(new MockTokenizer(new java.io.StringReader(buffer2.toString()), MockTokenizer.WHITESPACE, false)); TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer2.ToString()), MockTokenizer.WHITESPACE, false)); tee2.addSinkTokenStream(dogDetector); tee2.addSinkTokenStream(theDetector); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final TokenStream source2 = tee2; TokenStream source2 = tee2; assertTokenStreamContents(source1, tokens1); assertTokenStreamContents(source2, tokens2); assertTokenStreamContents(theDetector, new string[] { "The", "the", "The", "the" }); assertTokenStreamContents(dogDetector, new string[] { "Dogs", "Dogs" }); source1.reset(); TokenStream lowerCasing = new LowerCaseFilter(TEST_VERSION_CURRENT, source1); string[] lowerCaseTokens = new string[tokens1.Length]; for (int i = 0; i < tokens1.Length; i++) { lowerCaseTokens[i] = tokens1[i].ToLower(Locale.ROOT); } assertTokenStreamContents(lowerCasing, lowerCaseTokens); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testGeneral() throws java.io.IOException public virtual void testGeneral() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final TeeSinkTokenFilter source = new TeeSinkTokenFilter(new MockTokenizer(new java.io.StringReader(buffer1.toString()), MockTokenizer.WHITESPACE, false)); TeeSinkTokenFilter source = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer1.ToString()), MockTokenizer.WHITESPACE, false)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final TokenStream sink1 = source.newSinkTokenStream(); TokenStream sink1 = source.newSinkTokenStream(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final TokenStream sink2 = source.newSinkTokenStream(theFilter); TokenStream sink2 = source.newSinkTokenStream(theFilter); source.addAttribute(typeof(CheckClearAttributesAttribute)); sink1.addAttribute(typeof(CheckClearAttributesAttribute)); sink2.addAttribute(typeof(CheckClearAttributesAttribute)); assertTokenStreamContents(source, tokens1); assertTokenStreamContents(sink1, tokens1); assertTokenStreamContents(sink2, new string[] { "The", "the" }); }
// LUCENE-1448 // TODO: instead of testing it this way, we can test // with BaseTokenStreamTestCase now... //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception public virtual void testEndOffsetPositionWithTeeSinkTokenFilter() { Directory dir = newDirectory(); Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); TokenStream tokenStream = analyzer.tokenStream("field", "abcd "); TeeSinkTokenFilter tee = new TeeSinkTokenFilter(tokenStream); TokenStream sink = tee.newSinkTokenStream(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = true; ft.StoreTermVectorPositions = true; Field f1 = new Field("field", tee, ft); Field f2 = new Field("field", sink, ft); doc.add(f1); doc.add(f2); w.addDocument(doc); w.close(); IndexReader r = DirectoryReader.open(dir); Terms vector = r.getTermVectors(0).terms("field"); assertEquals(1, vector.size()); TermsEnum termsEnum = vector.iterator(null); termsEnum.next(); assertEquals(2, termsEnum.totalTermFreq()); DocsAndPositionsEnum positions = termsEnum.docsAndPositions(null, null); assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(2, positions.freq()); positions.nextPosition(); assertEquals(0, positions.startOffset()); assertEquals(4, positions.endOffset()); positions.nextPosition(); assertEquals(8, positions.startOffset()); assertEquals(12, positions.endOffset()); assertEquals(DocIdSetIterator.NO_MORE_DOCS, positions.nextDoc()); r.close(); dir.close(); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void test() throws java.io.IOException public virtual void test() { TokenTypeSinkFilter sinkFilter = new TokenTypeSinkFilter("D"); string test = "The quick red fox jumped over the lazy brown dogs"; TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(this, new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false))); TeeSinkTokenFilter.SinkTokenStream sink = ttf.newSinkTokenStream(sinkFilter); bool seenDogs = false; CharTermAttribute termAtt = ttf.addAttribute(typeof(CharTermAttribute)); TypeAttribute typeAtt = ttf.addAttribute(typeof(TypeAttribute)); ttf.reset(); while (ttf.incrementToken()) { if (termAtt.ToString().Equals("dogs")) { seenDogs = true; assertTrue(typeAtt.type() + " is not equal to " + "D", typeAtt.type().Equals("D") == true); } else { assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().Equals("word")); } } assertTrue(seenDogs + " does not equal: " + true, seenDogs == true); int sinkCount = 0; sink.reset(); while (sink.incrementToken()) { sinkCount++; } assertTrue("sink Size: " + sinkCount + " is not: " + 1, sinkCount == 1); }
/// <summary> /// Not an explicit test, just useful to print out some info on performance /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void performance() throws Exception public virtual void performance() { int[] tokCount = new int[] {100, 500, 1000, 2000, 5000, 10000}; int[] modCounts = new int[] {1, 2, 5, 10, 20, 50, 100, 200, 500}; for (int k = 0; k < tokCount.Length; k++) { StringBuilder buffer = new StringBuilder(); Console.WriteLine("-----Tokens: " + tokCount[k] + "-----"); for (int i = 0; i < tokCount[k]; i++) { buffer.Append(English.intToEnglish(i).toUpperCase(Locale.ROOT)).Append(' '); } //make sure we produce the same tokens TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString())))); TokenStream sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(this, 100)); teeStream.consumeAllTokens(); TokenStream stream = new ModuloTokenFilter(this, new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString()))), 100); CharTermAttribute tfTok = stream.addAttribute(typeof(CharTermAttribute)); CharTermAttribute sinkTok = sink.addAttribute(typeof(CharTermAttribute)); for (int i = 0; stream.incrementToken(); i++) { assertTrue(sink.incrementToken()); assertTrue(tfTok + " is not equal to " + sinkTok + " at token: " + i, tfTok.Equals(sinkTok) == true); } //simulate two fields, each being analyzed once, for 20 documents for (int j = 0; j < modCounts.Length; j++) { int tfPos = 0; long start = DateTimeHelperClass.CurrentUnixTimeMillis(); for (int i = 0; i < 20; i++) { stream = new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString()))); PositionIncrementAttribute posIncrAtt = stream.getAttribute(typeof(PositionIncrementAttribute)); while (stream.incrementToken()) { tfPos += posIncrAtt.PositionIncrement; } stream = new ModuloTokenFilter(this, new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString()))), modCounts[j]); posIncrAtt = stream.getAttribute(typeof(PositionIncrementAttribute)); while (stream.incrementToken()) { tfPos += posIncrAtt.PositionIncrement; } } long finish = DateTimeHelperClass.CurrentUnixTimeMillis(); Console.WriteLine("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms"); int sinkPos = 0; //simulate one field with one sink start = DateTimeHelperClass.CurrentUnixTimeMillis(); for (int i = 0; i < 20; i++) { teeStream = new TeeSinkTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString())))); sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(this, modCounts[j])); PositionIncrementAttribute posIncrAtt = teeStream.getAttribute(typeof(PositionIncrementAttribute)); while (teeStream.incrementToken()) { sinkPos += posIncrAtt.PositionIncrement; } //System.out.println("Modulo--------"); posIncrAtt = sink.getAttribute(typeof(PositionIncrementAttribute)); while (sink.incrementToken()) { sinkPos += posIncrAtt.PositionIncrement; } } finish = DateTimeHelperClass.CurrentUnixTimeMillis(); Console.WriteLine("ModCount: " + modCounts[j] + " Tee fields took " + (finish - start) + " ms"); assertTrue(sinkPos + " does not equal: " + tfPos, sinkPos == tfPos); } Console.WriteLine("- End Tokens: " + tokCount[k] + "-----"); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testMultipleSources() throws Exception public virtual void testMultipleSources() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(new MockTokenizer(new java.io.StringReader(buffer1.toString()), MockTokenizer.WHITESPACE, false)); TeeSinkTokenFilter tee1 = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer1.ToString()), MockTokenizer.WHITESPACE, false)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final TeeSinkTokenFilter.SinkTokenStream dogDetector = tee1.newSinkTokenStream(dogFilter); TeeSinkTokenFilter.SinkTokenStream dogDetector = tee1.newSinkTokenStream(dogFilter); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final TeeSinkTokenFilter.SinkTokenStream theDetector = tee1.newSinkTokenStream(theFilter); TeeSinkTokenFilter.SinkTokenStream theDetector = tee1.newSinkTokenStream(theFilter); tee1.reset(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final TokenStream source1 = new CachingTokenFilter(tee1); TokenStream source1 = new CachingTokenFilter(tee1); tee1.addAttribute(typeof(CheckClearAttributesAttribute)); dogDetector.addAttribute(typeof(CheckClearAttributesAttribute)); theDetector.addAttribute(typeof(CheckClearAttributesAttribute)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(new MockTokenizer(new java.io.StringReader(buffer2.toString()), MockTokenizer.WHITESPACE, false)); TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer2.ToString()), MockTokenizer.WHITESPACE, false)); tee2.addSinkTokenStream(dogDetector); tee2.addSinkTokenStream(theDetector); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final TokenStream source2 = tee2; TokenStream source2 = tee2; assertTokenStreamContents(source1, tokens1); assertTokenStreamContents(source2, tokens2); assertTokenStreamContents(theDetector, new string[]{"The", "the", "The", "the"}); assertTokenStreamContents(dogDetector, new string[]{"Dogs", "Dogs"}); source1.reset(); TokenStream lowerCasing = new LowerCaseFilter(TEST_VERSION_CURRENT, source1); string[] lowerCaseTokens = new string[tokens1.Length]; for (int i = 0; i < tokens1.Length; i++) { lowerCaseTokens[i] = tokens1[i].ToLower(Locale.ROOT); } assertTokenStreamContents(lowerCasing, lowerCaseTokens); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testGeneral() throws java.io.IOException public virtual void testGeneral() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final TeeSinkTokenFilter source = new TeeSinkTokenFilter(new MockTokenizer(new java.io.StringReader(buffer1.toString()), MockTokenizer.WHITESPACE, false)); TeeSinkTokenFilter source = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(buffer1.ToString()), MockTokenizer.WHITESPACE, false)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final TokenStream sink1 = source.newSinkTokenStream(); TokenStream sink1 = source.newSinkTokenStream(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final TokenStream sink2 = source.newSinkTokenStream(theFilter); TokenStream sink2 = source.newSinkTokenStream(theFilter); source.addAttribute(typeof(CheckClearAttributesAttribute)); sink1.addAttribute(typeof(CheckClearAttributesAttribute)); sink2.addAttribute(typeof(CheckClearAttributesAttribute)); assertTokenStreamContents(source, tokens1); assertTokenStreamContents(sink1, tokens1); assertTokenStreamContents(sink2, new string[]{"The", "the"}); }
/// <summary> /// Not an explicit test, just useful to print out some info on performance /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void performance() throws Exception public virtual void performance() { int[] tokCount = new int[] { 100, 500, 1000, 2000, 5000, 10000 }; int[] modCounts = new int[] { 1, 2, 5, 10, 20, 50, 100, 200, 500 }; for (int k = 0; k < tokCount.Length; k++) { StringBuilder buffer = new StringBuilder(); Console.WriteLine("-----Tokens: " + tokCount[k] + "-----"); for (int i = 0; i < tokCount[k]; i++) { buffer.Append(English.intToEnglish(i).toUpperCase(Locale.ROOT)).Append(' '); } //make sure we produce the same tokens TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString())))); TokenStream sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(this, 100)); teeStream.consumeAllTokens(); TokenStream stream = new ModuloTokenFilter(this, new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString()))), 100); CharTermAttribute tfTok = stream.addAttribute(typeof(CharTermAttribute)); CharTermAttribute sinkTok = sink.addAttribute(typeof(CharTermAttribute)); for (int i = 0; stream.incrementToken(); i++) { assertTrue(sink.incrementToken()); assertTrue(tfTok + " is not equal to " + sinkTok + " at token: " + i, tfTok.Equals(sinkTok) == true); } //simulate two fields, each being analyzed once, for 20 documents for (int j = 0; j < modCounts.Length; j++) { int tfPos = 0; long start = DateTimeHelperClass.CurrentUnixTimeMillis(); for (int i = 0; i < 20; i++) { stream = new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString()))); PositionIncrementAttribute posIncrAtt = stream.getAttribute(typeof(PositionIncrementAttribute)); while (stream.incrementToken()) { tfPos += posIncrAtt.PositionIncrement; } stream = new ModuloTokenFilter(this, new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString()))), modCounts[j]); posIncrAtt = stream.getAttribute(typeof(PositionIncrementAttribute)); while (stream.incrementToken()) { tfPos += posIncrAtt.PositionIncrement; } } long finish = DateTimeHelperClass.CurrentUnixTimeMillis(); Console.WriteLine("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms"); int sinkPos = 0; //simulate one field with one sink start = DateTimeHelperClass.CurrentUnixTimeMillis(); for (int i = 0; i < 20; i++) { teeStream = new TeeSinkTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString())))); sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(this, modCounts[j])); PositionIncrementAttribute posIncrAtt = teeStream.getAttribute(typeof(PositionIncrementAttribute)); while (teeStream.incrementToken()) { sinkPos += posIncrAtt.PositionIncrement; } //System.out.println("Modulo--------"); posIncrAtt = sink.getAttribute(typeof(PositionIncrementAttribute)); while (sink.incrementToken()) { sinkPos += posIncrAtt.PositionIncrement; } } finish = DateTimeHelperClass.CurrentUnixTimeMillis(); Console.WriteLine("ModCount: " + modCounts[j] + " Tee fields took " + (finish - start) + " ms"); assertTrue(sinkPos + " does not equal: " + tfPos, sinkPos == tfPos); } Console.WriteLine("- End Tokens: " + tokCount[k] + "-----"); } }