Esempio n. 1
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testFilterPositions() throws Exception
        public virtual void testFilterPositions()
        {
            TokenStream          ts        = new MockTokenizer(new StringReader("abcde vwxyz"), MockTokenizer.WHITESPACE, false);
            EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, ts, EdgeNGramTokenFilter.Side.FRONT, 1, 3);

            assertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc", "v", "vw", "vwx" }, new int[] { 0, 0, 0, 6, 6, 6 }, new int[] { 5, 5, 5, 11, 11, 11 }, null, new int[] { 1, 0, 0, 1, 0, 0 }, null, null, false);
        }
Esempio n. 2
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testSmallTokenInStream() throws Exception
        public virtual void testSmallTokenInStream()
        {
            input = new MockTokenizer(new StringReader("abc de fgh"), MockTokenizer.WHITESPACE, false);
            EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 3, 3);

            assertTokenStreamContents(tokenizer, new string[] { "abc", "fgh" }, new int[] { 0, 7 }, new int[] { 3, 10 });
        }
Esempio n. 3
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testGraphs() throws java.io.IOException
        public virtual void testGraphs()
        {
            TokenStream tk = new LetterTokenizer(TEST_VERSION_CURRENT, new StringReader("abc d efgh ij klmno p q"));

            tk = new ShingleFilter(tk);
            tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, 7, 10);
            assertTokenStreamContents(tk, new string[] { "efgh ij", "ij klmn", "ij klmno", "klmno p" }, new int[] { 6, 11, 11, 14 }, new int[] { 13, 19, 19, 21 }, new int[] { 3, 1, 0, 1 }, new int[] { 2, 2, 2, 2 }, 23);
        }
Esempio n. 4
0
            protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
            {
                Tokenizer   tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
                TokenFilter filters   = new ASCIIFoldingFilter(tokenizer);

                filters = new EdgeNGramTokenFilter(Version.LUCENE_43, filters, EdgeNGramTokenFilter.Side.FRONT, 2, 15);
                return(new TokenStreamComponents(tokenizer, filters));
            }
Esempio n. 5
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testReset() throws Exception
        public virtual void testReset()
        {
            WhitespaceTokenizer  tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
            EdgeNGramTokenFilter filter    = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, EdgeNGramTokenFilter.Side.FRONT, 1, 3);

            assertTokenStreamContents(filter, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 5, 5, 5 });
            tokenizer.Reader = new StringReader("abcde");
            assertTokenStreamContents(filter, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 5, 5, 5 });
        }
Esempio n. 6
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testFirstTokenPositionIncrement() throws Exception
        public virtual void testFirstTokenPositionIncrement()
        {
            TokenStream ts = new MockTokenizer(new StringReader("a abc"), MockTokenizer.WHITESPACE, false);

            ts = new PositionFilter(ts);     // All but first token will get 0 position increment
            EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, ts, EdgeNGramTokenFilter.Side.FRONT, 2, 3);

            // The first token "a" will not be output, since it's smaller than the mingram size of 2.
            // The second token on input to EdgeNGramTokenFilter will have position increment of 0,
            // which should be increased to 1, since this is the first output token in the stream.
            assertTokenStreamContents(filter, new string[] { "ab", "abc" }, new int[] { 2, 2 }, new int[] { 5, 5 }, new int[] { 1, 0 });
        }
Esempio n. 7
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testSupplementaryCharacters() throws java.io.IOException
        public virtual void testSupplementaryCharacters()
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final String s = org.apache.lucene.util.TestUtil.randomUnicodeString(random(), 10);
            string s = TestUtil.randomUnicodeString(random(), 10);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int codePointCount = s.codePointCount(0, s.length());
            int codePointCount = s.codePointCount(0, s.Length);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int minGram = org.apache.lucene.util.TestUtil.nextInt(random(), 1, 3);
            int minGram = TestUtil.Next(random(), 1, 3);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int maxGram = org.apache.lucene.util.TestUtil.nextInt(random(), minGram, 10);
            int         maxGram = TestUtil.Next(random(), minGram, 10);
            TokenStream tk      = new KeywordTokenizer(new StringReader(s));

            tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.CharTermAttribute termAtt = tk.addAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class);
            CharTermAttribute termAtt = tk.addAttribute(typeof(CharTermAttribute));
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.OffsetAttribute offsetAtt = tk.addAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute.class);
            OffsetAttribute offsetAtt = tk.addAttribute(typeof(OffsetAttribute));

            tk.reset();
            for (int i = minGram; i <= Math.Min(codePointCount, maxGram); ++i)
            {
                assertTrue(tk.incrementToken());
                assertEquals(0, offsetAtt.startOffset());
                assertEquals(s.Length, offsetAtt.endOffset());
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int end = Character.offsetByCodePoints(s, 0, i);
                int end = char.offsetByCodePoints(s, 0, i);
                assertEquals(s.Substring(0, end), termAtt.ToString());
            }
            assertFalse(tk.incrementToken());
        }
Esempio n. 8
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testBackRangeOfNgrams() throws Exception
        public virtual void testBackRangeOfNgrams()
        {
            EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(Version.LUCENE_43, input, EdgeNGramTokenFilter.Side.BACK, 1, 3);

            assertTokenStreamContents(tokenizer, new string[] { "e", "de", "cde" }, new int[] { 4, 3, 2 }, new int[] { 5, 5, 5 }, null, null, null, null, false);
        }
Esempio n. 9
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testFrontRangeOfNgrams() throws Exception
        public virtual void testFrontRangeOfNgrams()
        {
            EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 1, 3);

            assertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 5, 5, 5 });
        }
Esempio n. 10
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testOversizedNgrams() throws Exception
        public virtual void testOversizedNgrams()
        {
            EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 6, 6);

            assertTokenStreamContents(tokenizer, new string[0], new int[0], new int[0]);
        }
Esempio n. 11
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testBackUnigram() throws Exception
        public virtual void testBackUnigram()
        {
            EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(Version.LUCENE_43, input, EdgeNGramTokenFilter.Side.BACK, 1, 1);

            assertTokenStreamContents(tokenizer, new string[] { "e" }, new int[] { 4 }, new int[] { 5 });
        }
Esempio n. 12
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testFrontUnigram() throws Exception
        public virtual void testFrontUnigram()
        {
            EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 1, 1);

            assertTokenStreamContents(tokenizer, new string[] { "a" }, new int[] { 0 }, new int[] { 5 });
        }
        public override TokenStream create(TokenStream input)
        {
            var filter = new EdgeNGramTokenFilter(input, _side ?? EdgeNGramTokenFilter.DEFAULT_SIDE, _minGram ?? EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, _maxGram ?? EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);

            return(filter);
        }