public virtual void TestReset() { NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 1, 1); AssertTokenStreamContents(tokenizer, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5); // abcde tokenizer.SetReader(new StringReader("abcde")); AssertTokenStreamContents(tokenizer, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5); // abcde }
public virtual void TestRandomStrings() { for (int i = 0; i < 10; i++) { int min = TestUtil.NextInt32(Random, 2, 10); int max = TestUtil.NextInt32(Random, min, 20); Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { Tokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, reader, min, max); return(new TokenStreamComponents(tokenizer, tokenizer)); }); CheckRandomData(Random, a, 200 * RandomMultiplier, 20); CheckRandomData(Random, a, 10 * RandomMultiplier, 1027); } }
public void TestOversizedNgrams() { NGramTokenizer tokenizer = new NGramTokenizer(input, 6, 7); AssertTokenStreamContents(tokenizer, new String[0], new int[0], new int[0], 5 /* abcde */); }
public void TestNgrams() { NGramTokenizer tokenizer = new NGramTokenizer(input, 1, 3); AssertTokenStreamContents(tokenizer, new String[] { "a", "b", "c", "d", "e", "ab", "bc", "cd", "de", "abc", "bcd", "cde" }, new int[] { 0, 1, 2, 3, 4, 0, 1, 2, 3, 0, 1, 2 }, new int[] { 1, 2, 3, 4, 5, 2, 3, 4, 5, 3, 4, 5 }, 5 /* abcde */ ); }
public void TestBigrams() { NGramTokenizer tokenizer = new NGramTokenizer(input, 2, 2); AssertTokenStreamContents(tokenizer, new String[] { "ab", "bc", "cd", "de" }, new int[] { 0, 1, 2, 3 }, new int[] { 2, 3, 4, 5 }, 5 /* abcde */); }
public void TestUnigrams() { NGramTokenizer tokenizer = new NGramTokenizer(input, 1, 1); AssertTokenStreamContents(tokenizer, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5 /* abcde */); }
public void TestReset() { NGramTokenizer tokenizer = new NGramTokenizer(input, 1, 1); AssertTokenStreamContents(tokenizer, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5 /* abcde */); tokenizer.Reset(new StringReader("abcde")); AssertTokenStreamContents(tokenizer, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5 /* abcde */); }
public virtual void TestOversizedNgrams() { NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 6, 7); AssertTokenStreamContents(tokenizer, new string[0], new int[0], new int[0], 5); // abcde }
public virtual void TestNgrams() { NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 1, 3); AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc", "b", "bc", "bcd", "c", "cd", "cde", "d", "de", "e" }, new int[] { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4 }, new int[] { 1, 2, 3, 2, 3, 4, 3, 4, 5, 4, 5, 5 }, null, null, null, 5, false); // abcde }
public virtual void TestBigrams() { NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 2, 2); AssertTokenStreamContents(tokenizer, new string[] { "ab", "bc", "cd", "de" }, new int[] { 0, 1, 2, 3 }, new int[] { 2, 3, 4, 5 }, 5); // abcde }
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, reader, min, max); return(new TokenStreamComponents(tokenizer, tokenizer)); }