public PositionLengthAttributeAnonymousInnerClassHelper(NGramTokenFilter outerInstance) { this.outerInstance = outerInstance; }
public PositionLengthAttributeAnonymousInnerClassHelper(NGramTokenFilter outerInstance) { this.outerInstance = outerInstance; }
public virtual void TestBigrams() { NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 2, 2); AssertTokenStreamContents(filter, new string[] { "ab", "bc", "cd", "de" }, new int[] { 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0 }); }
public virtual void TestNgramsNoIncrement() { NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 1, 3); AssertTokenStreamContents(filter, new string[] { "a", "ab", "abc", "b", "bc", "bcd", "c", "cd", "cde", "d", "de", "e" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 }, null, new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, null, null, false); }
public virtual void TestLucene43() { #pragma warning disable 612, 618 NGramTokenFilter filter = new NGramTokenFilter(LuceneVersion.LUCENE_43, input, 2, 3); #pragma warning restore 612, 618 AssertTokenStreamContents(filter, new string[] { "ab", "bc", "cd", "de", "abc", "bcd", "cde" }, new int[] { 0, 1, 2, 3, 0, 1, 2 }, new int[] { 2, 3, 4, 5, 3, 4, 5 }, null, new int[] { 1, 1, 1, 1, 1, 1, 1 }, null, null, false); }
public virtual void TestSupplementaryCharacters() { string s = TestUtil.RandomUnicodeString(Random(), 10); int codePointCount = Character.CodePointCount(s, 0, s.Length); int minGram = TestUtil.NextInt(Random(), 1, 3); int maxGram = TestUtil.NextInt(Random(), minGram, 10); TokenStream tk = new KeywordTokenizer(new StringReader(s)); tk = new NGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram); ICharTermAttribute termAtt = tk.AddAttribute<ICharTermAttribute>(); IOffsetAttribute offsetAtt = tk.AddAttribute<IOffsetAttribute>(); tk.Reset(); for (int start = 0; start < codePointCount; ++start) { for (int end = start + minGram; end <= Math.Min(codePointCount, start + maxGram); ++end) { assertTrue(tk.IncrementToken()); assertEquals(0, offsetAtt.StartOffset()); assertEquals(s.Length, offsetAtt.EndOffset()); int startIndex = Character.OffsetByCodePoints(s, 0, start); int endIndex = Character.OffsetByCodePoints(s, 0, end); assertEquals(s.Substring(startIndex, endIndex - startIndex), termAtt.ToString()); } } assertFalse(tk.IncrementToken()); }
public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); TokenFilter filters = new ASCIIFoldingFilter(tokenizer); filters = new NGramTokenFilter(TEST_VERSION_CURRENT, filters, 2, 2); return new TokenStreamComponents(tokenizer, filters); }
public virtual void TestReset() { WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde")); NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 1, 1); AssertTokenStreamContents(filter, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0, 0 }); tokenizer.Reader = new StringReader("abcde"); AssertTokenStreamContents(filter, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0, 0 }); }
public virtual void TestSmallTokenInStream() { input = new MockTokenizer(new StringReader("abc de fgh"), MockTokenizer.WHITESPACE, false); NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 3, 3); AssertTokenStreamContents(filter, new string[] { "abc", "fgh" }, new int[] { 0, 7 }, new int[] { 3, 10 }, new int[] { 1, 2 }); }
public virtual void TestOversizedNgrams() { NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 6, 7); AssertTokenStreamContents(filter, new string[0], new int[0], new int[0]); }
public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, reader, 2, 94); //TokenStream stream = new SopTokenFilter(tokenizer); TokenStream stream = new ShingleFilter(tokenizer, 5); //stream = new SopTokenFilter(stream); stream = new NGramTokenFilter(TEST_VERSION_CURRENT, stream, 55, 83); //stream = new SopTokenFilter(stream); return new TokenStreamComponents(tokenizer, stream); }