C# (CSharp) WhitespaceTokenizer.SetReader Examples

Programming Language: C# (CSharp)

Method/Function: SetReader

Examples at hotexamples.com: 5

C# (CSharp) WhitespaceTokenizer.SetReader - 5 examples found. These are the top rated real world C# (CSharp) examples of WhitespaceTokenizer.SetReader extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Tokenize(6)

SetReader(5)

Reset(3)

AddAttribute(1)

Factory(1)

GetAttribute(1)

IncrementToken(1)

Example #1

Show file

File: NGramTokenFilterTest.cs Project: zalintyre/lucenenet

 public virtual void TestReset()
 {
     WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
     NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 1, 1);
     AssertTokenStreamContents(filter, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0, 0 });
     tokenizer.SetReader(new StringReader("abcde"));
     AssertTokenStreamContents(filter, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0, 0 });
 }

Example #2

Show file

        public virtual void TestReset()
        {
            Tokenizer   wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("please divide this sentence"));
            TokenStream filter      = new ShingleFilter(wsTokenizer, 2);

            AssertTokenStreamContents(filter, new string[] { "please", "please divide", "divide", "divide this", "this", "this sentence", "sentence" }, new int[] { 0, 0, 7, 7, 14, 14, 19 }, new int[] { 6, 13, 13, 18, 18, 27, 27 }, new string[] { TypeAttribute.DEFAULT_TYPE, "shingle", TypeAttribute.DEFAULT_TYPE, "shingle", TypeAttribute.DEFAULT_TYPE, "shingle", TypeAttribute.DEFAULT_TYPE }, new int[] { 1, 0, 1, 0, 1, 0, 1 });
            wsTokenizer.SetReader(new StringReader("please divide this sentence"));
            AssertTokenStreamContents(filter, new string[] { "please", "please divide", "divide", "divide this", "this", "this sentence", "sentence" }, new int[] { 0, 0, 7, 7, 14, 14, 19 }, new int[] { 6, 13, 13, 18, 18, 27, 27 }, new string[] { TypeAttribute.DEFAULT_TYPE, "shingle", TypeAttribute.DEFAULT_TYPE, "shingle", TypeAttribute.DEFAULT_TYPE, "shingle", TypeAttribute.DEFAULT_TYPE }, new int[] { 1, 0, 1, 0, 1, 0, 1 });
        }

Example #3

Show file

File: EdgeNGramTokenFilterTest.cs Project: yohikofox/lucenenet

        public virtual void TestReset()
        {
            WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));

#pragma warning disable 612, 618
            EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
#pragma warning restore 612, 618
            AssertTokenStreamContents(filter, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 5, 5, 5 });
            tokenizer.SetReader(new StringReader("abcde"));
            AssertTokenStreamContents(filter, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 5, 5, 5 });
        }

Example #4

Show file

File: CommonGramsFilterTest.cs Project: simixsistemas/lucenenet

        public virtual void TestQueryReset()
        {
            const string           input = "How the s a brown s cow d like A B thing?";
            WhitespaceTokenizer    wt    = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
            CommonGramsFilter      cgf   = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
            CommonGramsQueryFilter nsf   = new CommonGramsQueryFilter(cgf);

            ICharTermAttribute term = wt.AddAttribute <ICharTermAttribute>();

            nsf.Reset();
            assertTrue(nsf.IncrementToken());
            assertEquals("How_the", term.ToString());
            assertTrue(nsf.IncrementToken());
            assertEquals("the_s", term.ToString());
            nsf.Dispose();

            wt.SetReader(new StringReader(input));
            nsf.Reset();
            assertTrue(nsf.IncrementToken());
            assertEquals("How_the", term.ToString());
        }

Example #5

Show file

File: TestCompoundWordTokenFilter.cs Project: simixsistemas/lucenenet

        public virtual void TestReset()
        {
            CharArraySet dict = makeDictionary("Rind", "Fleisch", "Draht", "Schere", "Gesetz", "Aufgabe", "Überwachung");

            Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Rindfleischüberwachungsgesetz"));
            DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, wsTokenizer, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);

            ICharTermAttribute termAtt = tf.GetAttribute <ICharTermAttribute>();

            tf.Reset();
            assertTrue(tf.IncrementToken());
            assertEquals("Rindfleischüberwachungsgesetz", termAtt.ToString());
            assertTrue(tf.IncrementToken());
            assertEquals("Rind", termAtt.ToString());
            tf.End();
            tf.Dispose();
            wsTokenizer.SetReader(new StringReader("Rindfleischüberwachungsgesetz"));
            tf.Reset();
            assertTrue(tf.IncrementToken());
            assertEquals("Rindfleischüberwachungsgesetz", termAtt.ToString());
        }