public virtual void TestReset()
 {
     WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
     NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 1, 1);
     AssertTokenStreamContents(filter, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0, 0 });
     tokenizer.SetReader(new StringReader("abcde"));
     AssertTokenStreamContents(filter, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0, 0 });
 }
Example #2
0
        public virtual void TestReset()
        {
            Tokenizer   wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("please divide this sentence"));
            TokenStream filter      = new ShingleFilter(wsTokenizer, 2);

            AssertTokenStreamContents(filter, new string[] { "please", "please divide", "divide", "divide this", "this", "this sentence", "sentence" }, new int[] { 0, 0, 7, 7, 14, 14, 19 }, new int[] { 6, 13, 13, 18, 18, 27, 27 }, new string[] { TypeAttribute.DEFAULT_TYPE, "shingle", TypeAttribute.DEFAULT_TYPE, "shingle", TypeAttribute.DEFAULT_TYPE, "shingle", TypeAttribute.DEFAULT_TYPE }, new int[] { 1, 0, 1, 0, 1, 0, 1 });
            wsTokenizer.SetReader(new StringReader("please divide this sentence"));
            AssertTokenStreamContents(filter, new string[] { "please", "please divide", "divide", "divide this", "this", "this sentence", "sentence" }, new int[] { 0, 0, 7, 7, 14, 14, 19 }, new int[] { 6, 13, 13, 18, 18, 27, 27 }, new string[] { TypeAttribute.DEFAULT_TYPE, "shingle", TypeAttribute.DEFAULT_TYPE, "shingle", TypeAttribute.DEFAULT_TYPE, "shingle", TypeAttribute.DEFAULT_TYPE }, new int[] { 1, 0, 1, 0, 1, 0, 1 });
        }
        public virtual void TestReset()
        {
            WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));

#pragma warning disable 612, 618
            EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
#pragma warning restore 612, 618
            AssertTokenStreamContents(filter, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 5, 5, 5 });
            tokenizer.SetReader(new StringReader("abcde"));
            AssertTokenStreamContents(filter, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 5, 5, 5 });
        }
        public virtual void TestQueryReset()
        {
            const string           input = "How the s a brown s cow d like A B thing?";
            WhitespaceTokenizer    wt    = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
            CommonGramsFilter      cgf   = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
            CommonGramsQueryFilter nsf   = new CommonGramsQueryFilter(cgf);

            ICharTermAttribute term = wt.AddAttribute <ICharTermAttribute>();

            nsf.Reset();
            assertTrue(nsf.IncrementToken());
            assertEquals("How_the", term.ToString());
            assertTrue(nsf.IncrementToken());
            assertEquals("the_s", term.ToString());
            nsf.Dispose();

            wt.SetReader(new StringReader(input));
            nsf.Reset();
            assertTrue(nsf.IncrementToken());
            assertEquals("How_the", term.ToString());
        }
        public virtual void TestReset()
        {
            CharArraySet dict = makeDictionary("Rind", "Fleisch", "Draht", "Schere", "Gesetz", "Aufgabe", "Überwachung");

            Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Rindfleischüberwachungsgesetz"));
            DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, wsTokenizer, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);

            ICharTermAttribute termAtt = tf.GetAttribute <ICharTermAttribute>();

            tf.Reset();
            assertTrue(tf.IncrementToken());
            assertEquals("Rindfleischüberwachungsgesetz", termAtt.ToString());
            assertTrue(tf.IncrementToken());
            assertEquals("Rind", termAtt.ToString());
            tf.End();
            tf.Dispose();
            wsTokenizer.SetReader(new StringReader("Rindfleischüberwachungsgesetz"));
            tf.Reset();
            assertTrue(tf.IncrementToken());
            assertEquals("Rindfleischüberwachungsgesetz", termAtt.ToString());
        }