Tokenizer for testing.

this tokenizer is a replacement for #WHITESPACE, #SIMPLE, and #KEYWORD tokenizers. If you are writing a component such as a TokenFilter, its a great idea to test it wrapping this tokenizer instead for extra checks. this tokenizer has the following behavior:

  • An internal state-machine is used for checking consumer consistency. These checks can be disabled with #setEnableChecks(boolean).
  • For convenience, optionally lowercases terms that it outputs.
Inheritance: Tokenizer
 public void TestEndNotStopWord()
 {
     CharArraySet stopWords = StopFilter.MakeStopSet(TEST_VERSION_CURRENT, "to");
     TokenStream stream = new MockTokenizer(new StringReader("go to"));
     TokenStream filter = new SuggestStopFilter(stream, stopWords);
     AssertTokenStreamContents(filter,
                               new string[] { "go", "to" },
                               new int[] { 0, 3 },
                               new int[] { 2, 5 },
                               null,
                               new int[] { 1, 1 },
                               null,
                               5,
                               new bool[] { false, true },
                               true);
 }
        public void TestMidStopWord()
        {

            CharArraySet stopWords = StopFilter.MakeStopSet(TEST_VERSION_CURRENT, "to");
            TokenStream stream = new MockTokenizer(new StringReader("go to school"));
            TokenStream filter = new SuggestStopFilter(stream, stopWords);

            filter = new SuggestStopFilter(stream, stopWords);
            AssertTokenStreamContents(filter,
                                      new String[] { "go", "school" },
                                      new int[] { 0, 6 },
                                      new int[] { 2, 12 },
                                      null,
                                      new int[] { 1, 2 },
                                      null,
                                      12,
                                      new bool[] { false, false },
                                      true);
        }
Exemplo n.º 3
0
            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

                return(new TokenStreamComponents(tokenizer, tokenizer));
            }
Exemplo n.º 4
0
 public override TokenStreamComponents CreateComponents(string fieldName, System.IO.TextReader reader)
 {
     MockTokenizer tokenizer = new MockTokenizer(reader);
     return new TokenStreamComponents(tokenizer, new MockSynonymFilter(tokenizer));
 }
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
     TokenStream stream = new SynonymFilter(tokenizer, map, ignoreCase);
     return new TokenStreamComponents(tokenizer, new RemoveDuplicatesTokenFilter(stream));
 }
Exemplo n.º 6
0
        protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
        {
            Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);

            return(new TokenStreamComponents(result, new MockPayloadFilter(result, fieldName)));
        }
Exemplo n.º 7
0
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
     return new TokenStreamComponents(tokenizer, new SimplePayloadFilter(tokenizer));
 }
Exemplo n.º 8
0
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer t = new MockTokenizer(reader, Dfa, Lowercase, Limit);
     return new TokenStreamComponents(t, t);
 }
Exemplo n.º 9
0
 public virtual void TestMixupMultiValued()
 {
     Directory dir = NewDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
     Document doc = new Document();
     Field field = new TextField("field", "", Field.Store.NO);
     TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true);
     Assert.IsFalse(ts.HasAttribute<PayloadAttribute>());
     field.TokenStream = ts;
     doc.Add(field);
     Field field2 = new TextField("field", "", Field.Store.NO);
     Token withPayload = new Token("withPayload", 0, 11);
     withPayload.Payload = new BytesRef("test");
     ts = new CannedTokenStream(withPayload);
     Assert.IsTrue(ts.HasAttribute<IPayloadAttribute>());
     field2.TokenStream = ts;
     doc.Add(field2);
     Field field3 = new TextField("field", "", Field.Store.NO);
     ts = new MockTokenizer(new StringReader("nopayload"), MockTokenizer.WHITESPACE, true);
     Assert.IsFalse(ts.HasAttribute<PayloadAttribute>());
     field3.TokenStream = ts;
     doc.Add(field3);
     writer.AddDocument(doc);
     DirectoryReader reader = writer.Reader;
     SegmentReader sr = GetOnlySegmentReader(reader);
     DocsAndPositionsEnum de = sr.TermPositionsEnum(new Term("field", "withPayload"));
     de.NextDoc();
     de.NextPosition();
     Assert.AreEqual(new BytesRef("test"), de.Payload);
     writer.Dispose();
     reader.Dispose();
     dir.Dispose();
 }
Exemplo n.º 10
0
 public void TestMockTokenizerCtor()
 {
     var sr = new StringReader("Hello");
     var mt = new MockTokenizer(sr);
 }
Exemplo n.º 11
0
 protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     PayloadData payload;
     FieldToData.TryGetValue(fieldName, out payload);
     Tokenizer ts = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     TokenStream tokenStream = (payload != null) ? (TokenStream)new PayloadFilter(ts, payload.Data, payload.Offset, payload.Length) : ts;
     return new TokenStreamComponents(ts, tokenStream);
 }
Exemplo n.º 12
0
 public virtual void TestMixupDocs()
 {
     Directory dir = NewDirectory();
     IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null);
     iwc.SetMergePolicy(NewLogMergePolicy());
     RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);
     Document doc = new Document();
     Field field = new TextField("field", "", Field.Store.NO);
     TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true);
     Assert.IsFalse(ts.HasAttribute<PayloadAttribute>());
     field.TokenStream = ts;
     doc.Add(field);
     writer.AddDocument(doc);
     Token withPayload = new Token("withPayload", 0, 11);
     withPayload.Payload = new BytesRef("test");
     ts = new CannedTokenStream(withPayload);
     Assert.IsTrue(ts.HasAttribute<IPayloadAttribute>());
     field.TokenStream = ts;
     writer.AddDocument(doc);
     ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true);
     Assert.IsFalse(ts.HasAttribute<PayloadAttribute>());
     field.TokenStream = ts;
     writer.AddDocument(doc);
     DirectoryReader reader = writer.Reader;
     AtomicReader sr = SlowCompositeReaderWrapper.Wrap(reader);
     DocsAndPositionsEnum de = sr.TermPositionsEnum(new Term("field", "withPayload"));
     de.NextDoc();
     de.NextPosition();
     Assert.AreEqual(new BytesRef("test"), de.Payload);
     writer.Dispose();
     reader.Dispose();
     dir.Dispose();
 }
Exemplo n.º 13
0
 public void TestMockTokenizerCtor()
 {
     var sr = new StringReader("Hello");
     var mt = new MockTokenizer(sr);
 }
Exemplo n.º 14
0
            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer t = new MockTokenizer(reader, Dfa, Lowercase, Limit);

                return(new TokenStreamComponents(t, t));
            }
Exemplo n.º 15
0
            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
            {
                Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false, 5);

                return(new TokenStreamComponents(t, t));
            }
Exemplo n.º 16
0
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     TokenStream t2 = new RemoveATokens(t);
     TokenStream t3 = new MockGraphTokenFilter(Random(), t2);
     return new TokenStreamComponents(t, t3);
 }
Exemplo n.º 17
0
 public override TokenStreamComponents CreateComponents(string fieldName, System.IO.TextReader reader)
 {
     Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
     return new TokenStreamComponents(tokenizer, new MockCollationFilter(tokenizer));
 }
Exemplo n.º 18
0
            /** Filters MockTokenizer with StopFilter. */

            public override sealed TokenStreamComponents CreateComponents(String fieldName, TextReader reader)
            {
                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
                return new TokenStreamComponents(tokenizer, new QPTestFilter(tokenizer));
            }
Exemplo n.º 19
0
        protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
        {
            Tokenizer t = new MockTokenizer(factory, reader, MockTokenizer.KEYWORD, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);

            return(new TokenStreamComponents(t));
        }
Exemplo n.º 20
0
 protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     TokenStream t1 = new MockHoleInjectingTokenFilter(Random(), t);
     TokenStream t2 = new MockGraphTokenFilter(Random(), t1);
     return new TokenStreamComponents(t, t2);
 }
        public void TestMultipleStopWordsEnd2()
        {

            CharArraySet stopWords = StopFilter.MakeStopSet(TEST_VERSION_CURRENT, "to", "the", "a");
            TokenStream stream = new MockTokenizer(new StringReader("go to a the "));
            TokenStream filter = new SuggestStopFilter(stream, stopWords);

            filter = new SuggestStopFilter(stream, stopWords);
            AssertTokenStreamContents(filter,
                                      new String[] { "go" },
                                      new int[] { 0 },
                                      new int[] { 2 },
                                      null,
                                      new int[] { 1 },
                                      null,
                                      12,
                                      new bool[] { false },
                                      true);
        }
Exemplo n.º 22
0
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer t = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false, 5);
     return new TokenStreamComponents(t, t);
 }
Exemplo n.º 23
0
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer t = new MockTokenizer(Factory, reader, MockTokenizer.KEYWORD, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
     return new TokenStreamComponents(t);
 }
Exemplo n.º 24
0
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
     return new TokenStreamComponents(tokenizer, new MockFixedLengthPayloadFilter(Random(), tokenizer, Length));
 }
Exemplo n.º 25
0
 public override TokenStreamComponents CreateComponents(string fieldName, System.IO.TextReader reader)
 {
     Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
     return new TokenStreamComponents(result, new TestPosIncrementFilter(result));
 }
Exemplo n.º 26
0
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
     return new TokenStreamComponents(tokenizer, new PayloadFilter(PayloadCount, tokenizer));
 }
Exemplo n.º 27
0
 protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
     return new TokenStreamComponents(result, new MockPayloadFilter(result, fieldName));
 }
Exemplo n.º 28
0
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
     return new TokenStreamComponents(result, new PayloadFilter(result, fieldName));
 }
Exemplo n.º 29
0
 protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     return new TokenStreamComponents(tokenizer, tokenizer);
 }
Exemplo n.º 30
0
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     MockTokenizer tokenizer = new MockTokenizer(reader, RunAutomaton, LowerCase, MaxTokenLength_Renamed);
     tokenizer.EnableChecks = EnableChecks_Renamed;
     MockTokenFilter filt = new MockTokenFilter(tokenizer, Filter);
     return new TokenStreamComponents(tokenizer, MaybePayload(filt, fieldName));
 }
Exemplo n.º 31
0
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     return new TokenStreamComponents(tokenizer, new TokenFilterAnonymousInnerClassHelper(this, tokenizer));
 }