Beispiel #1
0
        /// <summary>
        /// Pop one input token's worth of tokens off the filter and verify that they are as expected.
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: void assertNextTerms(String expectedUnfolded, String expectedFolded, ASCIIFoldingFilter filter, org.apache.lucene.analysis.tokenattributes.CharTermAttribute termAtt) throws Exception
        internal virtual void assertNextTerms(string expectedUnfolded, string expectedFolded, ASCIIFoldingFilter filter, CharTermAttribute termAtt)
        {
            assertTrue(filter.incrementToken());
            assertEquals(expectedFolded, termAtt.ToString());
            if (filter.PreserveOriginal && !expectedUnfolded.Equals(expectedFolded))
            {
                assertTrue(filter.incrementToken());
                assertEquals(expectedUnfolded, termAtt.ToString());
            }
        }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
            public override bool incrementToken()
            {
                if (input.incrementToken())
                {
                    if (termAtt.ToString().Equals("largegap") || termAtt.ToString().Equals("/"))
                    {
                        posIncAtt.PositionIncrement = 10;
                    }
                    return(true);
                }
                else
                {
                    return(false);
                }
            }
Beispiel #3
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testStopListPositions() throws java.io.IOException
        public virtual void testStopListPositions()
        {
            CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false);
            StopAnalyzer newStop      = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
            string       s            = "This is a good test of the english stop analyzer with positions";

            int[]       expectedIncr = new int[] { 1, 1, 1, 3, 1, 1, 1, 2, 1 };
            TokenStream stream       = newStop.tokenStream("test", s);

            try
            {
                assertNotNull(stream);
                int i = 0;
                CharTermAttribute          termAtt    = stream.getAttribute(typeof(CharTermAttribute));
                PositionIncrementAttribute posIncrAtt = stream.addAttribute(typeof(PositionIncrementAttribute));

                stream.reset();
                while (stream.incrementToken())
                {
                    string text = termAtt.ToString();
                    assertFalse(stopWordsSet.contains(text));
                    assertEquals(expectedIncr[i++], posIncrAtt.PositionIncrement);
                }
                stream.end();
            }
            finally
            {
                IOUtils.closeWhileHandlingException(stream);
            }
        }
Beispiel #4
0
        // we only check a few core attributes here.
        // TODO: test other things
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void assertEquals(String s, org.apache.lucene.analysis.TokenStream left, org.apache.lucene.analysis.TokenStream right) throws Exception
        public virtual void assertEquals(string s, TokenStream left, TokenStream right)
        {
            left.reset();
            right.reset();
            CharTermAttribute          leftTerm    = left.addAttribute(typeof(CharTermAttribute));
            CharTermAttribute          rightTerm   = right.addAttribute(typeof(CharTermAttribute));
            OffsetAttribute            leftOffset  = left.addAttribute(typeof(OffsetAttribute));
            OffsetAttribute            rightOffset = right.addAttribute(typeof(OffsetAttribute));
            PositionIncrementAttribute leftPos     = left.addAttribute(typeof(PositionIncrementAttribute));
            PositionIncrementAttribute rightPos    = right.addAttribute(typeof(PositionIncrementAttribute));

            while (left.incrementToken())
            {
                assertTrue("wrong number of tokens for input: " + s, right.incrementToken());
                assertEquals("wrong term text for input: " + s, leftTerm.ToString(), rightTerm.ToString());
                assertEquals("wrong position for input: " + s, leftPos.PositionIncrement, rightPos.PositionIncrement);
                assertEquals("wrong start offset for input: " + s, leftOffset.startOffset(), rightOffset.startOffset());
                assertEquals("wrong end offset for input: " + s, leftOffset.endOffset(), rightOffset.endOffset());
            }
            ;
            assertFalse("wrong number of tokens for input: " + s, right.incrementToken());
            left.end();
            right.end();
            assertEquals("wrong final offset for input: " + s, leftOffset.endOffset(), rightOffset.endOffset());
            left.close();
            right.close();
        }
Beispiel #5
0
 public sealed override bool IncrementToken()
 {
     if (m_input.IncrementToken())
     {
         string text = termAttribute.ToString();
         termAttribute.SetEmpty();
         termAttribute.Append(CyrillicLatinConverter.cir2lat(text));
         return(true);
     }
     return(false);
 }
Beispiel #6
0
        public override bool IncrementToken()
        {
            bool success = false;

            if (m_input.IncrementToken())
            {
                string text = termAttribute.ToString();
                termAttribute.Clear();
                termAttribute.Append(CyrillicLatinConverter.Cir2lat(text));
                success = true;
            }
            return(success);
        }
Beispiel #7
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: private java.util.List<String> filter(org.apache.lucene.analysis.TokenFilter filter) throws java.io.IOException
        private IList <string> filter(TokenFilter filter)
        {
            IList <string>    tas     = new List <string>();
            CharTermAttribute termAtt = filter.getAttribute(typeof(CharTermAttribute));

            filter.reset();
            while (filter.incrementToken())
            {
                tas.Add(termAtt.ToString());
            }
            filter.end();
            filter.close();
            return(tas);
        }
Beispiel #8
0
 public override bool IncrementToken()
 {
     if (input.IncrementToken())
     {
         var collationKey  = collator.GetCollationKey(termAtt.ToString()).toByteArray();
         int encodedLength = IndexableBinaryStringTools.getEncodedLength(collationKey, 0, collationKey.Length);
         termAtt.resizeBuffer(encodedLength);
         termAtt.Length = encodedLength;
         IndexableBinaryStringTools.encode(collationKey, 0, collationKey.Length, termAtt.buffer(), 0, encodedLength);
         return(true);
     }
     else
     {
         return(false);
     }
 }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: private void assertCollatesToSame(org.apache.lucene.analysis.TokenStream stream1, org.apache.lucene.analysis.TokenStream stream2) throws java.io.IOException
        private void assertCollatesToSame(TokenStream stream1, TokenStream stream2)
        {
            stream1.reset();
            stream2.reset();
            CharTermAttribute term1 = stream1.addAttribute(typeof(CharTermAttribute));
            CharTermAttribute term2 = stream2.addAttribute(typeof(CharTermAttribute));

            assertTrue(stream1.incrementToken());
            assertTrue(stream2.incrementToken());
            assertEquals(term1.ToString(), term2.ToString());
            assertFalse(stream1.incrementToken());
            assertFalse(stream2.incrementToken());
            stream1.end();
            stream2.end();
            stream1.close();
            stream2.close();
        }
Beispiel #10
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: private void testPositons(TypeTokenFilter stpf) throws java.io.IOException
        private void testPositons(TypeTokenFilter stpf)
        {
            TypeAttribute              typeAtt       = stpf.getAttribute(typeof(TypeAttribute));
            CharTermAttribute          termAttribute = stpf.getAttribute(typeof(CharTermAttribute));
            PositionIncrementAttribute posIncrAtt    = stpf.getAttribute(typeof(PositionIncrementAttribute));

            stpf.reset();
            bool enablePositionIncrements = stpf.EnablePositionIncrements;

            while (stpf.incrementToken())
            {
                log("Token: " + termAttribute.ToString() + ": " + typeAtt.type() + " - " + posIncrAtt.PositionIncrement);
                assertEquals("if position increment is enabled the positionIncrementAttribute value should be 3, otherwise 1", posIncrAtt.PositionIncrement, enablePositionIncrements ? 3 : 1);
            }
            stpf.end();
            stpf.close();
        }
Beispiel #11
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testPerField() throws Exception
        public virtual void testPerField()
        {
            string text = "Qwerty";

            IDictionary <string, Analyzer> analyzerPerField = new Dictionary <string, Analyzer>();

            analyzerPerField["special"] = new SimpleAnalyzer(TEST_VERSION_CURRENT);

            PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), analyzerPerField);

            TokenStream tokenStream = analyzer.tokenStream("field", text);

            try
            {
                CharTermAttribute termAtt = tokenStream.getAttribute(typeof(CharTermAttribute));
                tokenStream.reset();

                assertTrue(tokenStream.incrementToken());
                assertEquals("WhitespaceAnalyzer does not lowercase", "Qwerty", termAtt.ToString());
                assertFalse(tokenStream.incrementToken());
                tokenStream.end();
            }
            finally
            {
                IOUtils.closeWhileHandlingException(tokenStream);
            }

            tokenStream = analyzer.tokenStream("special", text);
            try
            {
                CharTermAttribute termAtt = tokenStream.getAttribute(typeof(CharTermAttribute));
                tokenStream.reset();

                assertTrue(tokenStream.incrementToken());
                assertEquals("SimpleAnalyzer lowercases", "qwerty", termAtt.ToString());
                assertFalse(tokenStream.incrementToken());
                tokenStream.end();
            }
            finally
            {
                IOUtils.closeWhileHandlingException(tokenStream);
            }
        }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testSupplementaryCharacters() throws java.io.IOException
        public virtual void testSupplementaryCharacters()
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final String s = org.apache.lucene.util.TestUtil.randomUnicodeString(random(), 10);
            string s = TestUtil.randomUnicodeString(random(), 10);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int codePointCount = s.codePointCount(0, s.length());
            int codePointCount = s.codePointCount(0, s.Length);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int minGram = org.apache.lucene.util.TestUtil.nextInt(random(), 1, 3);
            int minGram = TestUtil.Next(random(), 1, 3);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int maxGram = org.apache.lucene.util.TestUtil.nextInt(random(), minGram, 10);
            int         maxGram = TestUtil.Next(random(), minGram, 10);
            TokenStream tk      = new KeywordTokenizer(new StringReader(s));

            tk = new NGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.CharTermAttribute termAtt = tk.addAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class);
            CharTermAttribute termAtt = tk.addAttribute(typeof(CharTermAttribute));
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.OffsetAttribute offsetAtt = tk.addAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute.class);
            OffsetAttribute offsetAtt = tk.addAttribute(typeof(OffsetAttribute));

            tk.reset();
            for (int start = 0; start < codePointCount; ++start)
            {
                for (int end = start + minGram; end <= Math.Min(codePointCount, start + maxGram); ++end)
                {
                    assertTrue(tk.incrementToken());
                    assertEquals(0, offsetAtt.startOffset());
                    assertEquals(s.Length, offsetAtt.endOffset());
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int startIndex = Character.offsetByCodePoints(s, 0, start);
                    int startIndex = char.offsetByCodePoints(s, 0, start);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int endIndex = Character.offsetByCodePoints(s, 0, end);
                    int endIndex = char.offsetByCodePoints(s, 0, end);
                    assertEquals(s.Substring(startIndex, endIndex - startIndex), termAtt.ToString());
                }
            }
            assertFalse(tk.incrementToken());
        }
Beispiel #13
0
        public override bool accept(AttributeSource source)
        {
            if (termAtt == null)
            {
                termAtt = source.addAttribute(typeof(CharTermAttribute));
            }
            try
            {
                DateTime date = dateFormat.parse(termAtt.ToString());   //We don't care about the date, just that we can parse it as a date
                if (date != null)
                {
                    return(true);
                }
            }
            catch (ParseException)
            {
            }

            return(false);
        }
Beispiel #14
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testFilterTokens() throws Exception
        public virtual void testFilterTokens()
        {
            SnowballFilter             filter     = new SnowballFilter(new TestTokenStream(this), "English");
            CharTermAttribute          termAtt    = filter.getAttribute(typeof(CharTermAttribute));
            OffsetAttribute            offsetAtt  = filter.getAttribute(typeof(OffsetAttribute));
            TypeAttribute              typeAtt    = filter.getAttribute(typeof(TypeAttribute));
            PayloadAttribute           payloadAtt = filter.getAttribute(typeof(PayloadAttribute));
            PositionIncrementAttribute posIncAtt  = filter.getAttribute(typeof(PositionIncrementAttribute));
            FlagsAttribute             flagsAtt   = filter.getAttribute(typeof(FlagsAttribute));

            filter.incrementToken();

            assertEquals("accent", termAtt.ToString());
            assertEquals(2, offsetAtt.startOffset());
            assertEquals(7, offsetAtt.endOffset());
            assertEquals("wrd", typeAtt.type());
            assertEquals(3, posIncAtt.PositionIncrement);
            assertEquals(77, flagsAtt.Flags);
            assertEquals(new BytesRef(new sbyte[] { 0, 1, 2, 3 }), payloadAtt.Payload);
        }
Beispiel #15
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: private void doTestStopPositons(StopFilter stpf, boolean enableIcrements) throws java.io.IOException
        private void doTestStopPositons(StopFilter stpf, bool enableIcrements)
        {
            log("---> test with enable-increments-" + (enableIcrements?"enabled":"disabled"));
            stpf.EnablePositionIncrements = enableIcrements;
            CharTermAttribute          termAtt    = stpf.getAttribute(typeof(CharTermAttribute));
            PositionIncrementAttribute posIncrAtt = stpf.getAttribute(typeof(PositionIncrementAttribute));

            stpf.reset();
            for (int i = 0; i < 20; i += 3)
            {
                assertTrue(stpf.incrementToken());
                log("Token " + i + ": " + stpf);
                string w = English.intToEnglish(i).trim();
                assertEquals("expecting token " + i + " to be " + w, w, termAtt.ToString());
                assertEquals("all but first token must have position increment of 3", enableIcrements?(i == 0?1:3):1, posIncrAtt.PositionIncrement);
            }
            assertFalse(stpf.incrementToken());
            stpf.end();
            stpf.close();
        }
Beispiel #16
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testDefaults() throws java.io.IOException
        public virtual void testDefaults()
        {
            assertTrue(stop != null);
            TokenStream stream = stop.tokenStream("test", "This is a test of the english stop analyzer");

            try
            {
                assertTrue(stream != null);
                CharTermAttribute termAtt = stream.getAttribute(typeof(CharTermAttribute));
                stream.reset();

                while (stream.incrementToken())
                {
                    assertFalse(inValidTokens.Contains(termAtt.ToString()));
                }
                stream.end();
            }
            finally
            {
                IOUtils.closeWhileHandlingException(stream);
            }
        }
Beispiel #17
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
            public override bool incrementToken()
            {
                if (bufferedState != null)
                {
                    restoreState(bufferedState);
                    posIncAtt.PositionIncrement = 0;
                    termAtt.setEmpty().append("hte");
                    bufferedState = null;
                    return(true);
                }
                else if (input.incrementToken())
                {
                    if (termAtt.ToString().Equals("the"))
                    {
                        bufferedState = captureState();
                    }
                    return(true);
                }
                else
                {
                    return(false);
                }
            }
Beispiel #18
0
        /// <returns>  Returns true for the next token in the stream, or false at EOS </returns>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
        public override bool incrementToken()
        {
            if (input.incrementToken())
            {
                string term = termAtt.ToString();

                // Check the exclusion table
                if (!keywordAttr.Keyword)
                {
                    string s = stemmer.stem(term);
                    // If not stemmed, don't waste the time  adjusting the token.
                    if ((s != null) && !s.Equals(term))
                    {
                        termAtt.setEmpty().append(s);
                    }
                }
                return(true);
            }
            else
            {
                return(false);
            }
        }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: private static java.util.List<String> splitByTokenizer(String source, TokenizerFactory tokFactory) throws java.io.IOException
        private static IList <string> splitByTokenizer(string source, TokenizerFactory tokFactory)
        {
            StringReader   reader  = new StringReader(source);
            TokenStream    ts      = loadTokenizer(tokFactory, reader);
            IList <string> tokList = new List <string>();

            try
            {
                CharTermAttribute termAtt = ts.addAttribute(typeof(CharTermAttribute));
                ts.reset();
                while (ts.incrementToken())
                {
                    if (termAtt.length() > 0)
                    {
                        tokList.Add(termAtt.ToString());
                    }
                }
            }
            finally
            {
                reader.close();
            }
            return(tokList);
        }
        /// <summary>
        /// TODO: rewrite tests not to use string comparison.
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: private static String tsToString(org.apache.lucene.analysis.TokenStream in) throws java.io.IOException
        private static string tsToString(TokenStream @in)
        {
            StringBuilder     @out    = new StringBuilder();
            CharTermAttribute termAtt = @in.addAttribute(typeof(CharTermAttribute));

            // extra safety to enforce, that the state is not preserved and also
            // assign bogus values
            @in.clearAttributes();
            termAtt.setEmpty().append("bogusTerm");
            @in.reset();
            while (@in.incrementToken())
            {
                if (@out.Length > 0)
                {
                    @out.Append(' ');
                }
                @out.Append(termAtt.ToString());
                @in.clearAttributes();
                termAtt.setEmpty().append("bogusTerm");
            }

            @in.close();
            return(@out.ToString());
        }
Beispiel #21
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testStopList() throws java.io.IOException
        public virtual void testStopList()
        {
            CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false);
            StopAnalyzer newStop      = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
            TokenStream  stream       = newStop.tokenStream("test", "This is a good test of the english stop analyzer");

            try
            {
                assertNotNull(stream);
                CharTermAttribute termAtt = stream.getAttribute(typeof(CharTermAttribute));

                stream.reset();
                while (stream.incrementToken())
                {
                    string text = termAtt.ToString();
                    assertFalse(stopWordsSet.contains(text));
                }
                stream.end();
            }
            finally
            {
                IOUtils.closeWhileHandlingException(stream);
            }
        }
Beispiel #22
0
            public override bool accept(AttributeSource a)
            {
                CharTermAttribute termAtt = a.getAttribute(typeof(CharTermAttribute));

                return(termAtt.ToString().Equals("Dogs", StringComparison.CurrentCultureIgnoreCase));
            }