incrementToken() public method

public incrementToken ( ) : bool
return bool
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void test() throws java.io.IOException
        public virtual void test()
        {
            TokenRangeSinkFilter sinkFilter = new TokenRangeSinkFilter(2, 4);
            string             test         = "The quick red fox jumped over the lazy brown dogs";
            TeeSinkTokenFilter tee          = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));

            TeeSinkTokenFilter.SinkTokenStream rangeToks = tee.newSinkTokenStream(sinkFilter);

            int count = 0;

            tee.reset();
            while (tee.incrementToken())
            {
                count++;
            }

            int sinkCount = 0;

            rangeToks.reset();
            while (rangeToks.incrementToken())
            {
                sinkCount++;
            }

            assertTrue(count + " does not equal: " + 10, count == 10);
            assertTrue("rangeToks Size: " + sinkCount + " is not: " + 2, sinkCount == 2);
        }
Example #2
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void test() throws java.io.IOException
        public virtual void test()
        {
            TokenTypeSinkFilter sinkFilter = new TokenTypeSinkFilter("D");
            string test = "The quick red fox jumped over the lazy brown dogs";

            TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(this, new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)));

            TeeSinkTokenFilter.SinkTokenStream sink = ttf.newSinkTokenStream(sinkFilter);

            bool seenDogs = false;

            CharTermAttribute termAtt = ttf.addAttribute(typeof(CharTermAttribute));
            TypeAttribute     typeAtt = ttf.addAttribute(typeof(TypeAttribute));

            ttf.reset();
            while (ttf.incrementToken())
            {
                if (termAtt.ToString().Equals("dogs"))
                {
                    seenDogs = true;
                    assertTrue(typeAtt.type() + " is not equal to " + "D", typeAtt.type().Equals("D") == true);
                }
                else
                {
                    assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().Equals("word"));
                }
            }
            assertTrue(seenDogs + " does not equal: " + true, seenDogs == true);

            int sinkCount = 0;

            sink.reset();
            while (sink.incrementToken())
            {
                sinkCount++;
            }

            assertTrue("sink Size: " + sinkCount + " is not: " + 1, sinkCount == 1);
        }
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void test() throws java.io.IOException
        public virtual void test()
        {
            TokenRangeSinkFilter sinkFilter = new TokenRangeSinkFilter(2, 4);
            string test = "The quick red fox jumped over the lazy brown dogs";
            TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
            TeeSinkTokenFilter.SinkTokenStream rangeToks = tee.newSinkTokenStream(sinkFilter);

            int count = 0;
            tee.reset();
            while (tee.incrementToken())
            {
              count++;
            }

            int sinkCount = 0;
            rangeToks.reset();
            while (rangeToks.incrementToken())
            {
              sinkCount++;
            }

            assertTrue(count + " does not equal: " + 10, count == 10);
            assertTrue("rangeToks Size: " + sinkCount + " is not: " + 2, sinkCount == 2);
        }
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void test() throws java.io.IOException
        public virtual void test()
        {
            TokenTypeSinkFilter sinkFilter = new TokenTypeSinkFilter("D");
            string test = "The quick red fox jumped over the lazy brown dogs";

            TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(this, new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)));
            TeeSinkTokenFilter.SinkTokenStream sink = ttf.newSinkTokenStream(sinkFilter);

            bool seenDogs = false;

            CharTermAttribute termAtt = ttf.addAttribute(typeof(CharTermAttribute));
            TypeAttribute typeAtt = ttf.addAttribute(typeof(TypeAttribute));
            ttf.reset();
            while (ttf.incrementToken())
            {
              if (termAtt.ToString().Equals("dogs"))
              {
            seenDogs = true;
            assertTrue(typeAtt.type() + " is not equal to " + "D", typeAtt.type().Equals("D") == true);
              }
              else
              {
            assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().Equals("word"));
              }
            }
            assertTrue(seenDogs + " does not equal: " + true, seenDogs == true);

            int sinkCount = 0;
            sink.reset();
            while (sink.incrementToken())
            {
              sinkCount++;
            }

            assertTrue("sink Size: " + sinkCount + " is not: " + 1, sinkCount == 1);
        }
        /// <summary>
        /// Not an explicit test, just useful to print out some info on performance
        /// </summary>
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void performance() throws Exception
        public virtual void performance()
        {
            int[] tokCount = new int[] {100, 500, 1000, 2000, 5000, 10000};
            int[] modCounts = new int[] {1, 2, 5, 10, 20, 50, 100, 200, 500};
            for (int k = 0; k < tokCount.Length; k++)
            {
              StringBuilder buffer = new StringBuilder();
              Console.WriteLine("-----Tokens: " + tokCount[k] + "-----");
              for (int i = 0; i < tokCount[k]; i++)
              {
            buffer.Append(English.intToEnglish(i).toUpperCase(Locale.ROOT)).Append(' ');
              }
              //make sure we produce the same tokens
              TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString()))));
              TokenStream sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(this, 100));
              teeStream.consumeAllTokens();
              TokenStream stream = new ModuloTokenFilter(this, new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString()))), 100);
              CharTermAttribute tfTok = stream.addAttribute(typeof(CharTermAttribute));
              CharTermAttribute sinkTok = sink.addAttribute(typeof(CharTermAttribute));
              for (int i = 0; stream.incrementToken(); i++)
              {
            assertTrue(sink.incrementToken());
            assertTrue(tfTok + " is not equal to " + sinkTok + " at token: " + i, tfTok.Equals(sinkTok) == true);
              }

              //simulate two fields, each being analyzed once, for 20 documents
              for (int j = 0; j < modCounts.Length; j++)
              {
            int tfPos = 0;
            long start = DateTimeHelperClass.CurrentUnixTimeMillis();
            for (int i = 0; i < 20; i++)
            {
              stream = new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString())));
              PositionIncrementAttribute posIncrAtt = stream.getAttribute(typeof(PositionIncrementAttribute));
              while (stream.incrementToken())
              {
                tfPos += posIncrAtt.PositionIncrement;
              }
              stream = new ModuloTokenFilter(this, new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString()))), modCounts[j]);
              posIncrAtt = stream.getAttribute(typeof(PositionIncrementAttribute));
              while (stream.incrementToken())
              {
                tfPos += posIncrAtt.PositionIncrement;
              }
            }
            long finish = DateTimeHelperClass.CurrentUnixTimeMillis();
            Console.WriteLine("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms");
            int sinkPos = 0;
            //simulate one field with one sink
            start = DateTimeHelperClass.CurrentUnixTimeMillis();
            for (int i = 0; i < 20; i++)
            {
              teeStream = new TeeSinkTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString()))));
              sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(this, modCounts[j]));
              PositionIncrementAttribute posIncrAtt = teeStream.getAttribute(typeof(PositionIncrementAttribute));
              while (teeStream.incrementToken())
              {
                sinkPos += posIncrAtt.PositionIncrement;
              }
              //System.out.println("Modulo--------");
              posIncrAtt = sink.getAttribute(typeof(PositionIncrementAttribute));
              while (sink.incrementToken())
              {
                sinkPos += posIncrAtt.PositionIncrement;
              }
            }
            finish = DateTimeHelperClass.CurrentUnixTimeMillis();
            Console.WriteLine("ModCount: " + modCounts[j] + " Tee fields took " + (finish - start) + " ms");
            assertTrue(sinkPos + " does not equal: " + tfPos, sinkPos == tfPos);

              }
              Console.WriteLine("- End Tokens: " + tokCount[k] + "-----");
            }
        }
Example #6
0
        /// <summary>
        /// Not an explicit test, just useful to print out some info on performance
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void performance() throws Exception
        public virtual void performance()
        {
            int[] tokCount  = new int[] { 100, 500, 1000, 2000, 5000, 10000 };
            int[] modCounts = new int[] { 1, 2, 5, 10, 20, 50, 100, 200, 500 };
            for (int k = 0; k < tokCount.Length; k++)
            {
                StringBuilder buffer = new StringBuilder();
                Console.WriteLine("-----Tokens: " + tokCount[k] + "-----");
                for (int i = 0; i < tokCount[k]; i++)
                {
                    buffer.Append(English.intToEnglish(i).toUpperCase(Locale.ROOT)).Append(' ');
                }
                //make sure we produce the same tokens
                TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString()))));
                TokenStream        sink      = teeStream.newSinkTokenStream(new ModuloSinkFilter(this, 100));
                teeStream.consumeAllTokens();
                TokenStream       stream  = new ModuloTokenFilter(this, new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString()))), 100);
                CharTermAttribute tfTok   = stream.addAttribute(typeof(CharTermAttribute));
                CharTermAttribute sinkTok = sink.addAttribute(typeof(CharTermAttribute));
                for (int i = 0; stream.incrementToken(); i++)
                {
                    assertTrue(sink.incrementToken());
                    assertTrue(tfTok + " is not equal to " + sinkTok + " at token: " + i, tfTok.Equals(sinkTok) == true);
                }

                //simulate two fields, each being analyzed once, for 20 documents
                for (int j = 0; j < modCounts.Length; j++)
                {
                    int  tfPos = 0;
                    long start = DateTimeHelperClass.CurrentUnixTimeMillis();
                    for (int i = 0; i < 20; i++)
                    {
                        stream = new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString())));
                        PositionIncrementAttribute posIncrAtt = stream.getAttribute(typeof(PositionIncrementAttribute));
                        while (stream.incrementToken())
                        {
                            tfPos += posIncrAtt.PositionIncrement;
                        }
                        stream     = new ModuloTokenFilter(this, new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString()))), modCounts[j]);
                        posIncrAtt = stream.getAttribute(typeof(PositionIncrementAttribute));
                        while (stream.incrementToken())
                        {
                            tfPos += posIncrAtt.PositionIncrement;
                        }
                    }
                    long finish = DateTimeHelperClass.CurrentUnixTimeMillis();
                    Console.WriteLine("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms");
                    int sinkPos = 0;
                    //simulate one field with one sink
                    start = DateTimeHelperClass.CurrentUnixTimeMillis();
                    for (int i = 0; i < 20; i++)
                    {
                        teeStream = new TeeSinkTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString()))));
                        sink      = teeStream.newSinkTokenStream(new ModuloSinkFilter(this, modCounts[j]));
                        PositionIncrementAttribute posIncrAtt = teeStream.getAttribute(typeof(PositionIncrementAttribute));
                        while (teeStream.incrementToken())
                        {
                            sinkPos += posIncrAtt.PositionIncrement;
                        }
                        //System.out.println("Modulo--------");
                        posIncrAtt = sink.getAttribute(typeof(PositionIncrementAttribute));
                        while (sink.incrementToken())
                        {
                            sinkPos += posIncrAtt.PositionIncrement;
                        }
                    }
                    finish = DateTimeHelperClass.CurrentUnixTimeMillis();
                    Console.WriteLine("ModCount: " + modCounts[j] + " Tee fields took " + (finish - start) + " ms");
                    assertTrue(sinkPos + " does not equal: " + tfPos, sinkPos == tfPos);
                }
                Console.WriteLine("- End Tokens: " + tokCount[k] + "-----");
            }
        }