Characters before the delimiter are the "token", those after are the payload.

For example, if the delimiter is '|', then for the string "foo|bar", foo is the token and "bar" is a payload.

Note, you can also include a {@link org.apache.lucene.analysis.payloads.PayloadEncoder} to convert the payload in an appropriate way (from characters to bytes).

Note make sure your Tokenizer doesn't split on the delimiter, or this won't work

Inheritance: TokenFilter
 public void TestNext()
 {
     var encoding = Encoding.UTF8;
     String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
     DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)));
     AssertTermEquals("The", filter, null);
     AssertTermEquals("quick", filter, encoding.GetBytes("JJ"));
     AssertTermEquals("red", filter, encoding.GetBytes("JJ"));
     AssertTermEquals("fox", filter, encoding.GetBytes("NN"));
     AssertTermEquals("jumped", filter, encoding.GetBytes("VB"));
     AssertTermEquals("over", filter, null);
     AssertTermEquals("the", filter, null);
     AssertTermEquals("lazy", filter, encoding.GetBytes("JJ"));
     AssertTermEquals("brown", filter, encoding.GetBytes("JJ"));
     AssertTermEquals("dogs", filter, encoding.GetBytes("NN"));
     Assert.False(filter.IncrementToken());
 }
 public void TestFloatEncoding()
 {
     String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7";
     DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)), '|', new FloatEncoder());
     ITermAttribute termAtt = filter.GetAttribute<ITermAttribute>();
     IPayloadAttribute payAtt = filter.GetAttribute<IPayloadAttribute>();
     AssertTermEquals("The", filter, termAtt, payAtt, null);
     AssertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(1.0f));
     AssertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(2.0f));
     AssertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(3.5f));
     AssertTermEquals("jumped", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(0.5f));
     AssertTermEquals("over", filter, termAtt, payAtt, null);
     AssertTermEquals("the", filter, termAtt, payAtt, null);
     AssertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(5.0f));
     AssertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(99.3f));
     AssertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(83.7f));
     Assert.False(filter.IncrementToken());
 }
示例#3
0
        public virtual void TestNext()
        {
            string test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
            DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());

            filter.Reset();
            AssertTermEquals("The", filter, null);
            AssertTermEquals("quick", filter, "JJ".getBytes(Encoding.UTF8));
            AssertTermEquals("red", filter, "JJ".getBytes(Encoding.UTF8));
            AssertTermEquals("fox", filter, "NN".getBytes(Encoding.UTF8));
            AssertTermEquals("jumped", filter, "VB".getBytes(Encoding.UTF8));
            AssertTermEquals("over", filter, null);
            AssertTermEquals("the", filter, null);
            AssertTermEquals("lazy", filter, "JJ".getBytes(Encoding.UTF8));
            AssertTermEquals("brown", filter, "JJ".getBytes(Encoding.UTF8));
            AssertTermEquals("dogs", filter, "NN".getBytes(Encoding.UTF8));
            assertFalse(filter.IncrementToken());
            filter.End();
            filter.Dispose();
        }
        public virtual void TestNext()
        {

            string test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
            DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
            filter.Reset();
            AssertTermEquals("The", filter, null);
            AssertTermEquals("quick", filter, "JJ".getBytes(Encoding.UTF8));
            AssertTermEquals("red", filter, "JJ".getBytes(Encoding.UTF8));
            AssertTermEquals("fox", filter, "NN".getBytes(Encoding.UTF8));
            AssertTermEquals("jumped", filter, "VB".getBytes(Encoding.UTF8));
            AssertTermEquals("over", filter, null);
            AssertTermEquals("the", filter, null);
            AssertTermEquals("lazy", filter, "JJ".getBytes(Encoding.UTF8));
            AssertTermEquals("brown", filter, "JJ".getBytes(Encoding.UTF8));
            AssertTermEquals("dogs", filter, "NN".getBytes(Encoding.UTF8));
            assertFalse(filter.IncrementToken());
            filter.End();
            filter.Dispose();
        }
 public virtual void TestFloatEncoding()
 {
     string test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7";
     DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new FloatEncoder());
     ICharTermAttribute termAtt = filter.GetAttribute<ICharTermAttribute>();
     IPayloadAttribute payAtt = filter.GetAttribute<IPayloadAttribute>();
     filter.Reset();
     AssertTermEquals("The", filter, termAtt, payAtt, null);
     AssertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(1.0f));
     AssertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(2.0f));
     AssertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(3.5f));
     AssertTermEquals("jumped", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(0.5f));
     AssertTermEquals("over", filter, termAtt, payAtt, null);
     AssertTermEquals("the", filter, termAtt, payAtt, null);
     AssertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(5.0f));
     AssertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(99.3f));
     AssertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.EncodeFloat(83.7f));
     assertFalse(filter.IncrementToken());
     filter.End();
     filter.Dispose();
 }
示例#6
0
        public virtual void TestIntEncoding()
        {
            string test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83";
            DelimitedPayloadTokenFilter filter  = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new IntegerEncoder());
            ICharTermAttribute          termAtt = filter.GetAttribute <ICharTermAttribute>();
            IPayloadAttribute           payAtt  = filter.GetAttribute <IPayloadAttribute>();

            filter.Reset();
            AssertTermEquals("The", filter, termAtt, payAtt, null);
            AssertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.EncodeInt32(1));
            AssertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.EncodeInt32(2));
            AssertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.EncodeInt32(3));
            AssertTermEquals("jumped", filter, termAtt, payAtt, null);
            AssertTermEquals("over", filter, termAtt, payAtt, null);
            AssertTermEquals("the", filter, termAtt, payAtt, null);
            AssertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.EncodeInt32(5));
            AssertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.EncodeInt32(99));
            AssertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.EncodeInt32(83));
            assertFalse(filter.IncrementToken());
            filter.End();
            filter.Dispose();
        }
示例#7
0
        public virtual void TestFloatEncoding()
        {
            string test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7";
            DelimitedPayloadTokenFilter filter  = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new SingleEncoder());
            ICharTermAttribute          termAtt = filter.GetAttribute <ICharTermAttribute>();
            IPayloadAttribute           payAtt  = filter.GetAttribute <IPayloadAttribute>();

            filter.Reset();
            AssertTermEquals("The", filter, termAtt, payAtt, null);
            AssertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.EncodeSingle(1.0f));
            AssertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.EncodeSingle(2.0f));
            AssertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.EncodeSingle(3.5f));
            AssertTermEquals("jumped", filter, termAtt, payAtt, PayloadHelper.EncodeSingle(0.5f));
            AssertTermEquals("over", filter, termAtt, payAtt, null);
            AssertTermEquals("the", filter, termAtt, payAtt, null);
            AssertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.EncodeSingle(5.0f));
            AssertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.EncodeSingle(99.3f));
            AssertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.EncodeSingle(83.7f));
            assertFalse(filter.IncrementToken());
            filter.End();
            filter.Dispose();
        }
 public void TestIntEncoding()
 {
     String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83";
     DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)), '|', new IntegerEncoder());
     ITermAttribute termAtt = filter.GetAttribute<ITermAttribute>();
     IPayloadAttribute payAtt = filter.GetAttribute<IPayloadAttribute>();
     AssertTermEquals("The", filter, termAtt, payAtt, null);
     AssertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.EncodeInt(1));
     AssertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.EncodeInt(2));
     AssertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.EncodeInt(3));
     AssertTermEquals("jumped", filter, termAtt, payAtt, null);
     AssertTermEquals("over", filter, termAtt, payAtt, null);
     AssertTermEquals("the", filter, termAtt, payAtt, null);
     AssertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.EncodeInt(5));
     AssertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.EncodeInt(99));
     AssertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.EncodeInt(83));
     Assert.False(filter.IncrementToken());
 }
 public virtual void TestIntEncoding()
 {
     string test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83";
     DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new IntegerEncoder());
     ICharTermAttribute termAtt = filter.GetAttribute<ICharTermAttribute>();
     IPayloadAttribute payAtt = filter.GetAttribute<IPayloadAttribute>();
     filter.Reset();
     AssertTermEquals("The", filter, termAtt, payAtt, null);
     AssertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.EncodeInt(1));
     AssertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.EncodeInt(2));
     AssertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.EncodeInt(3));
     AssertTermEquals("jumped", filter, termAtt, payAtt, null);
     AssertTermEquals("over", filter, termAtt, payAtt, null);
     AssertTermEquals("the", filter, termAtt, payAtt, null);
     AssertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.EncodeInt(5));
     AssertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.EncodeInt(99));
     AssertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.EncodeInt(83));
     assertFalse(filter.IncrementToken());
     filter.End();
     filter.Dispose();
 }