Example #1
0
 public PositionLengthAttributeAnonymousInnerClassHelper(NGramTokenFilter outerInstance)
 {
     this.outerInstance = outerInstance;
 }
 public PositionLengthAttributeAnonymousInnerClassHelper(NGramTokenFilter outerInstance)
 {
     this.outerInstance = outerInstance;
 }
 public virtual void TestBigrams()
 {
     NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 2, 2);
     AssertTokenStreamContents(filter, new string[] { "ab", "bc", "cd", "de" }, new int[] { 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0 });
 }
 public virtual void TestNgramsNoIncrement()
 {
     NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 1, 3);
     AssertTokenStreamContents(filter, new string[] { "a", "ab", "abc", "b", "bc", "bcd", "c", "cd", "cde", "d", "de", "e" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 }, null, new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, null, null, false);
 }
        public virtual void TestLucene43()
        {
#pragma warning disable 612, 618
            NGramTokenFilter filter = new NGramTokenFilter(LuceneVersion.LUCENE_43, input, 2, 3);
#pragma warning restore 612, 618
            AssertTokenStreamContents(filter, new string[] { "ab", "bc", "cd", "de", "abc", "bcd", "cde" }, new int[] { 0, 1, 2, 3, 0, 1, 2 }, new int[] { 2, 3, 4, 5, 3, 4, 5 }, null, new int[] { 1, 1, 1, 1, 1, 1, 1 }, null, null, false);
        }
 public virtual void TestSupplementaryCharacters()
 {
     string s = TestUtil.RandomUnicodeString(Random(), 10);
     int codePointCount = Character.CodePointCount(s, 0, s.Length);
     int minGram = TestUtil.NextInt(Random(), 1, 3);
     int maxGram = TestUtil.NextInt(Random(), minGram, 10);
     TokenStream tk = new KeywordTokenizer(new StringReader(s));
     tk = new NGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
     ICharTermAttribute termAtt = tk.AddAttribute<ICharTermAttribute>();
     IOffsetAttribute offsetAtt = tk.AddAttribute<IOffsetAttribute>();
     tk.Reset();
     for (int start = 0; start < codePointCount; ++start)
     {
         for (int end = start + minGram; end <= Math.Min(codePointCount, start + maxGram); ++end)
         {
             assertTrue(tk.IncrementToken());
             assertEquals(0, offsetAtt.StartOffset());
             assertEquals(s.Length, offsetAtt.EndOffset());
             int startIndex = Character.OffsetByCodePoints(s, 0, start);
             int endIndex = Character.OffsetByCodePoints(s, 0, end);
             assertEquals(s.Substring(startIndex, endIndex - startIndex), termAtt.ToString());
         }
     }
     assertFalse(tk.IncrementToken());
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
     filters = new NGramTokenFilter(TEST_VERSION_CURRENT, filters, 2, 2);
     return new TokenStreamComponents(tokenizer, filters);
 }
 public virtual void TestReset()
 {
     WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
     NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 1, 1);
     AssertTokenStreamContents(filter, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0, 0 });
     tokenizer.Reader = new StringReader("abcde");
     AssertTokenStreamContents(filter, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0, 0 });
 }
 public virtual void TestSmallTokenInStream()
 {
     input = new MockTokenizer(new StringReader("abc de fgh"), MockTokenizer.WHITESPACE, false);
     NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 3, 3);
     AssertTokenStreamContents(filter, new string[] { "abc", "fgh" }, new int[] { 0, 7 }, new int[] { 3, 10 }, new int[] { 1, 2 });
 }
 public virtual void TestOversizedNgrams()
 {
     NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 6, 7);
     AssertTokenStreamContents(filter, new string[0], new int[0], new int[0]);
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, reader, 2, 94);
     //TokenStream stream = new SopTokenFilter(tokenizer);
     TokenStream stream = new ShingleFilter(tokenizer, 5);
     //stream = new SopTokenFilter(stream);
     stream = new NGramTokenFilter(TEST_VERSION_CURRENT, stream, 55, 83);
     //stream = new SopTokenFilter(stream);
     return new TokenStreamComponents(tokenizer, stream);
 }