public MockVariableLengthPayloadFilter(Random random, TokenStream @in) : base(@in) { this.Random = random; this.Payload = new BytesRef(Bytes); this.PayloadAtt = AddAttribute<IPayloadAttribute>(); }
public SimplePayloadFilter(TokenStream input) : base(input) { Pos = 0; PayloadAttr = input.AddAttribute<IPayloadAttribute>(); TermAttr = input.AddAttribute<ICharTermAttribute>(); }
public DelimitedPayloadTokenFilter(TokenStream input, char delimiter, PayloadEncoder encoder) : base(input) { termAtt = AddAttribute<ITermAttribute>(); payAtt = AddAttribute<IPayloadAttribute>(); this.delimiter = delimiter; this.encoder = encoder; }
public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) : base(input) { //Need to encode the payload thePayload = new Payload(PayloadHelper.EncodeFloat(payload)); this.typeMatch = typeMatch; payloadAtt = AddAttribute<IPayloadAttribute>(); typeAtt = AddAttribute<ITypeAttribute>(); }
public MockPayloadFilter(TokenStream input, string fieldName) : base(input) { this.FieldName = fieldName; Pos = 0; i = 0; PosIncrAttr = input.AddAttribute<IPositionIncrementAttribute>(); PayloadAttr = input.AddAttribute<IPayloadAttribute>(); TermAttr = input.AddAttribute<ICharTermAttribute>(); }
public IterTokenStream(params Token[] tokens) : base() { this.tokens = tokens; this.termAtt = AddAttribute<ICharTermAttribute>(); this.offsetAtt = AddAttribute<IOffsetAttribute>(); this.posIncAtt = AddAttribute<IPositionIncrementAttribute>(); this.flagsAtt = AddAttribute<IFlagsAttribute>(); this.typeAtt = AddAttribute<ITypeAttribute>(); this.payloadAtt = AddAttribute<IPayloadAttribute>(); }
public MockFixedLengthPayloadFilter(Random random, TokenStream @in, int length) : base(@in) { if (length < 0) { throw new System.ArgumentException("length must be >= 0"); } this.Random = random; this.Bytes = new byte[length]; this.Payload = new BytesRef(Bytes); this.PayloadAtt = AddAttribute<IPayloadAttribute>(); }
public NumericPayloadTokenFilter(TokenStream input, float payload, string typeMatch) : base(input) { if (typeMatch == null) { throw new System.ArgumentException("typeMatch cannot be null"); } //Need to encode the payload thePayload = new BytesRef(PayloadHelper.EncodeFloat(payload)); this.typeMatch = typeMatch; this.payloadAtt = AddAttribute<IPayloadAttribute>(); this.typeAtt = AddAttribute<ITypeAttribute>(); }
public virtual void TestDelim() { TextReader reader = new StringReader("the*0.1 quick*0.1 red*0.1"); TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); stream = TokenFilterFactory("DelimitedPayload", "encoder", "float", "delimiter", "*").Create(stream); stream.Reset(); while (stream.IncrementToken()) { IPayloadAttribute payAttr = stream.GetAttribute <IPayloadAttribute>(); assertNotNull(payAttr); byte[] payData = payAttr.Payload.Bytes; assertNotNull(payData); float payFloat = PayloadHelper.DecodeSingle(payData); assertEquals(0.1f, payFloat, 0.0f); } stream.End(); stream.Dispose(); }
internal override void Start(IIndexableField f) { if (fieldState.AttributeSource.HasAttribute <IPayloadAttribute>()) { payloadAttribute = fieldState.AttributeSource.GetAttribute <IPayloadAttribute>(); } else { payloadAttribute = null; } if (hasOffsets) { offsetAttribute = fieldState.AttributeSource.AddAttribute <IOffsetAttribute>(); } else { offsetAttribute = null; } }
public virtual void TestFilterTokens() { SnowballFilter filter = new SnowballFilter(new TestTokenStream(this), "English"); ICharTermAttribute termAtt = filter.GetAttribute <ICharTermAttribute>(); IOffsetAttribute offsetAtt = filter.GetAttribute <IOffsetAttribute>(); ITypeAttribute typeAtt = filter.GetAttribute <ITypeAttribute>(); IPayloadAttribute payloadAtt = filter.GetAttribute <IPayloadAttribute>(); IPositionIncrementAttribute posIncAtt = filter.GetAttribute <IPositionIncrementAttribute>(); IFlagsAttribute flagsAtt = filter.GetAttribute <IFlagsAttribute>(); filter.IncrementToken(); assertEquals("accent", termAtt.ToString()); assertEquals(2, offsetAtt.StartOffset()); assertEquals(7, offsetAtt.EndOffset()); assertEquals("wrd", typeAtt.Type); assertEquals(3, posIncAtt.PositionIncrement); assertEquals(77, flagsAtt.Flags); assertEquals(new BytesRef(new byte[] { 0, 1, 2, 3 }), payloadAtt.Payload); }
public void TestPayloads() { var encoding = Encoding.UTF8; String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN"; DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test))); ITermAttribute termAtt = filter.GetAttribute <ITermAttribute>(); IPayloadAttribute payAtt = filter.GetAttribute <IPayloadAttribute>(); AssertTermEquals("The", filter, termAtt, payAtt, null); AssertTermEquals("quick", filter, termAtt, payAtt, encoding.GetBytes("JJ")); AssertTermEquals("red", filter, termAtt, payAtt, encoding.GetBytes("JJ")); AssertTermEquals("fox", filter, termAtt, payAtt, encoding.GetBytes("NN")); AssertTermEquals("jumped", filter, termAtt, payAtt, encoding.GetBytes("VB")); AssertTermEquals("over", filter, termAtt, payAtt, null); AssertTermEquals("the", filter, termAtt, payAtt, null); AssertTermEquals("lazy", filter, termAtt, payAtt, encoding.GetBytes("JJ")); AssertTermEquals("brown", filter, termAtt, payAtt, encoding.GetBytes("JJ")); AssertTermEquals("dogs", filter, termAtt, payAtt, encoding.GetBytes("NN")); Assert.False(filter.IncrementToken()); }
public void TestFilterTokens() { SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English"); ITermAttribute termAtt = filter.GetAttribute <ITermAttribute>(); IOffsetAttribute offsetAtt = filter.GetAttribute <IOffsetAttribute>(); ITypeAttribute typeAtt = filter.GetAttribute <ITypeAttribute>(); IPayloadAttribute payloadAtt = filter.GetAttribute <IPayloadAttribute>(); IPositionIncrementAttribute posIncAtt = filter.GetAttribute <IPositionIncrementAttribute>(); IFlagsAttribute flagsAtt = filter.GetAttribute <IFlagsAttribute>(); filter.IncrementToken(); Assert.AreEqual("accent", termAtt.Term); Assert.AreEqual(2, offsetAtt.StartOffset); Assert.AreEqual(7, offsetAtt.EndOffset); Assert.AreEqual("wrd", typeAtt.Type); Assert.AreEqual(3, posIncAtt.PositionIncrement); Assert.AreEqual(77, flagsAtt.Flags); Assert.AreEqual(new Payload(new byte[] { 0, 1, 2, 3 }), payloadAtt.Payload); }
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix) { this.suffix = suffix; this.prefix = prefix; prefixExhausted = false; termAtt = AddAttribute<ICharTermAttribute>(); posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); payloadAtt = AddAttribute<IPayloadAttribute>(); offsetAtt = AddAttribute<IOffsetAttribute>(); typeAtt = AddAttribute<ITypeAttribute>(); flagsAtt = AddAttribute<IFlagsAttribute>(); p_termAtt = prefix.AddAttribute<ICharTermAttribute>(); p_posIncrAtt = prefix.AddAttribute<IPositionIncrementAttribute>(); p_payloadAtt = prefix.AddAttribute<IPayloadAttribute>(); p_offsetAtt = prefix.AddAttribute<IOffsetAttribute>(); p_typeAtt = prefix.AddAttribute<ITypeAttribute>(); p_flagsAtt = prefix.AddAttribute<IFlagsAttribute>(); }
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix) { this.suffix = suffix; this.prefix = prefix; prefixExhausted = false; termAtt = AddAttribute <ICharTermAttribute>(); posIncrAtt = AddAttribute <IPositionIncrementAttribute>(); payloadAtt = AddAttribute <IPayloadAttribute>(); offsetAtt = AddAttribute <IOffsetAttribute>(); typeAtt = AddAttribute <ITypeAttribute>(); flagsAtt = AddAttribute <IFlagsAttribute>(); p_termAtt = prefix.AddAttribute <ICharTermAttribute>(); p_posIncrAtt = prefix.AddAttribute <IPositionIncrementAttribute>(); p_payloadAtt = prefix.AddAttribute <IPayloadAttribute>(); p_offsetAtt = prefix.AddAttribute <IOffsetAttribute>(); p_typeAtt = prefix.AddAttribute <ITypeAttribute>(); p_flagsAtt = prefix.AddAttribute <IFlagsAttribute>(); }
public virtual void TestPayloads() { string test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN"; DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); ICharTermAttribute termAtt = filter.GetAttribute <ICharTermAttribute>(); IPayloadAttribute payAtt = filter.GetAttribute <IPayloadAttribute>(); filter.Reset(); AssertTermEquals("The", filter, termAtt, payAtt, null); AssertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes(Encoding.UTF8)); AssertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes(Encoding.UTF8)); AssertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes(Encoding.UTF8)); AssertTermEquals("jumped", filter, termAtt, payAtt, "VB".getBytes(Encoding.UTF8)); AssertTermEquals("over", filter, termAtt, payAtt, null); AssertTermEquals("the", filter, termAtt, payAtt, null); AssertTermEquals("lazy", filter, termAtt, payAtt, "JJ".getBytes(Encoding.UTF8)); AssertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes(Encoding.UTF8)); AssertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes(Encoding.UTF8)); assertFalse(filter.IncrementToken()); filter.End(); filter.Dispose(); }
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix) { Suffix = suffix; Prefix = prefix; _prefixExhausted = false; // ReSharper disable DoNotCallOverridableMethodsInConstructor _termAtt = AddAttribute <ITermAttribute>(); _posIncrAtt = AddAttribute <IPositionIncrementAttribute>(); _payloadAtt = AddAttribute <IPayloadAttribute>(); _offsetAtt = AddAttribute <IOffsetAttribute>(); _typeAtt = AddAttribute <ITypeAttribute>(); _flagsAtt = AddAttribute <IFlagsAttribute>(); // ReSharper restore DoNotCallOverridableMethodsInConstructor _pTermAtt = prefix.AddAttribute <ITermAttribute>(); _pPosIncrAtt = prefix.AddAttribute <IPositionIncrementAttribute>(); _pPayloadAtt = prefix.AddAttribute <IPayloadAttribute>(); _pOffsetAtt = prefix.AddAttribute <IOffsetAttribute>(); _pTypeAtt = prefix.AddAttribute <ITypeAttribute>(); _pFlagsAtt = prefix.AddAttribute <IFlagsAttribute>(); }
public virtual void TestFloatEncoding() { string test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7"; DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new SingleEncoder()); ICharTermAttribute termAtt = filter.GetAttribute <ICharTermAttribute>(); IPayloadAttribute payAtt = filter.GetAttribute <IPayloadAttribute>(); filter.Reset(); AssertTermEquals("The", filter, termAtt, payAtt, null); AssertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.EncodeSingle(1.0f)); AssertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.EncodeSingle(2.0f)); AssertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.EncodeSingle(3.5f)); AssertTermEquals("jumped", filter, termAtt, payAtt, PayloadHelper.EncodeSingle(0.5f)); AssertTermEquals("over", filter, termAtt, payAtt, null); AssertTermEquals("the", filter, termAtt, payAtt, null); AssertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.EncodeSingle(5.0f)); AssertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.EncodeSingle(99.3f)); AssertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.EncodeSingle(83.7f)); assertFalse(filter.IncrementToken()); filter.End(); filter.Dispose(); }
void AssertTermEquals(String expected, TokenStream stream, byte[] expectPay) { ITermAttribute termAtt = stream.GetAttribute <ITermAttribute>(); IPayloadAttribute payloadAtt = stream.GetAttribute <IPayloadAttribute>(); Assert.True(stream.IncrementToken()); Assert.AreEqual(expected, termAtt.Term); Payload payload = payloadAtt.Payload; if (payload != null) { Assert.True(payload.Length == expectPay.Length, payload.Length + " does not equal: " + expectPay.Length); for (int i = 0; i < expectPay.Length; i++) { Assert.True(expectPay[i] == payload.ByteAt(i), expectPay[i] + " does not equal: " + payload.ByteAt(i)); } } else { Assert.True(expectPay == null, "expectPay is not null and it should be"); } }
public virtual void TestIntEncoding() { string test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83"; DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new IntegerEncoder()); ICharTermAttribute termAtt = filter.GetAttribute <ICharTermAttribute>(); IPayloadAttribute payAtt = filter.GetAttribute <IPayloadAttribute>(); filter.Reset(); AssertTermEquals("The", filter, termAtt, payAtt, null); AssertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.EncodeInt32(1)); AssertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.EncodeInt32(2)); AssertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.EncodeInt32(3)); AssertTermEquals("jumped", filter, termAtt, payAtt, null); AssertTermEquals("over", filter, termAtt, payAtt, null); AssertTermEquals("the", filter, termAtt, payAtt, null); AssertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.EncodeInt32(5)); AssertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.EncodeInt32(99)); AssertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.EncodeInt32(83)); assertFalse(filter.IncrementToken()); filter.End(); filter.Dispose(); }
public void test() { String test = "The quick red fox jumped over the lazy brown dogs"; TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(new StringReader(test)))); int count = 0; ITermAttribute termAtt = nptf.GetAttribute <ITermAttribute>(); ITypeAttribute typeAtt = nptf.GetAttribute <ITypeAttribute>(); IPayloadAttribute payloadAtt = nptf.GetAttribute <IPayloadAttribute>(); while (nptf.IncrementToken()) { Assert.True(typeAtt.Type.Equals(char.ToUpper(termAtt.TermBuffer()[0]).ToString()), typeAtt.Type + " is not null and it should be"); Assert.True(payloadAtt.Payload != null, "nextToken.getPayload() is null and it shouldn't be"); String type = Encoding.UTF8.GetString(payloadAtt.Payload.GetData());; Assert.True(type != null, "type is null and it shouldn't be"); Assert.True(type.Equals(typeAtt.Type) == true, type + " is not equal to " + typeAtt.Type); count++; } Assert.True(count == 10, count + " does not equal: " + 10); }
public void Test() { String test = "The quick red fox jumped over the lazy brown dogs"; TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test))); int count = 0; IPayloadAttribute payloadAtt = nptf.GetAttribute <IPayloadAttribute>(); IOffsetAttribute offsetAtt = nptf.GetAttribute <IOffsetAttribute>(); while (nptf.IncrementToken()) { Payload pay = payloadAtt.Payload; Assert.True(pay != null, "pay is null and it shouldn't be"); byte[] data = pay.GetData(); int start = PayloadHelper.DecodeInt(data, 0); Assert.True(start == offsetAtt.StartOffset, start + " does not equal: " + offsetAtt.StartOffset); int end = PayloadHelper.DecodeInt(data, 4); Assert.True(end == offsetAtt.EndOffset, end + " does not equal: " + offsetAtt.EndOffset); count++; } Assert.True(count == 10, count + " does not equal: " + 10); }
public virtual void Test() { string test = "The quick red fox jumped over the lazy brown dogs"; TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(this, new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false))); int count = 0; ICharTermAttribute termAtt = nptf.GetAttribute <ICharTermAttribute>(); ITypeAttribute typeAtt = nptf.GetAttribute <ITypeAttribute>(); IPayloadAttribute payloadAtt = nptf.GetAttribute <IPayloadAttribute>(); nptf.Reset(); while (nptf.IncrementToken()) { assertTrue(typeAtt.Type + " is not null and it should be", typeAtt.Type.Equals(char.ToUpper(termAtt.Buffer()[0]).ToString())); assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.Payload != null); string type = payloadAtt.Payload.Utf8ToString(); assertTrue(type + " is not equal to " + typeAtt.Type, type.Equals(typeAtt.Type)); count++; } assertTrue(count + " does not equal: " + 10, count == 10); }
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix) { Suffix = suffix; Prefix = prefix; _prefixExhausted = false; // ReSharper disable DoNotCallOverridableMethodsInConstructor _termAtt = AddAttribute<ITermAttribute>(); _posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); _payloadAtt = AddAttribute<IPayloadAttribute>(); _offsetAtt = AddAttribute<IOffsetAttribute>(); _typeAtt = AddAttribute<ITypeAttribute>(); _flagsAtt = AddAttribute<IFlagsAttribute>(); // ReSharper restore DoNotCallOverridableMethodsInConstructor _pTermAtt = prefix.AddAttribute<ITermAttribute>(); _pPosIncrAtt = prefix.AddAttribute<IPositionIncrementAttribute>(); _pPayloadAtt = prefix.AddAttribute<IPayloadAttribute>(); _pOffsetAtt = prefix.AddAttribute<IOffsetAttribute>(); _pTypeAtt = prefix.AddAttribute<ITypeAttribute>(); _pFlagsAtt = prefix.AddAttribute<IFlagsAttribute>(); }
internal virtual void AssertTermEquals(string expected, TokenStream stream, byte[] expectPay) { ICharTermAttribute termAtt = stream.GetAttribute <ICharTermAttribute>(); IPayloadAttribute payloadAtt = stream.GetAttribute <IPayloadAttribute>(); assertTrue(stream.IncrementToken()); assertEquals(expected, termAtt.ToString()); BytesRef payload = payloadAtt.Payload; if (payload != null) { assertTrue(payload.Length + " does not equal: " + expectPay.Length, payload.Length == expectPay.Length); for (int i = 0; i < expectPay.Length; i++) { assertTrue(expectPay[i] + " does not equal: " + payload.Bytes[i + payload.Offset], expectPay[i] == payload.Bytes[i + payload.Offset]); } } else { assertTrue("expectPay is not null and it should be", expectPay == null); } }
public virtual void Test() { string test = "The quick red fox jumped over the lazy brown dogs"; TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)); int count = 0; IPayloadAttribute payloadAtt = nptf.GetAttribute <IPayloadAttribute>(); IOffsetAttribute offsetAtt = nptf.GetAttribute <IOffsetAttribute>(); nptf.Reset(); while (nptf.IncrementToken()) { BytesRef pay = payloadAtt.Payload; assertTrue("pay is null and it shouldn't be", pay != null); byte[] data = pay.Bytes; int start = PayloadHelper.DecodeInt32(data, 0); assertTrue(start + " does not equal: " + offsetAtt.StartOffset, start == offsetAtt.StartOffset); int end = PayloadHelper.DecodeInt32(data, 4); assertTrue(end + " does not equal: " + offsetAtt.EndOffset, end == offsetAtt.EndOffset); count++; } assertTrue(count + " does not equal: " + 10, count == 10); }
protected internal RandomTokenStream(BaseTermVectorsFormatTestCase outerInstance, int len, string[] sampleTerms, BytesRef[] sampleTermBytes, bool offsetsGoBackwards) { this.OuterInstance = outerInstance; Terms = new string[len]; TermBytes = new BytesRef[len]; PositionsIncrements = new int[len]; Positions = new int[len]; StartOffsets = new int[len]; EndOffsets = new int[len]; Payloads = new BytesRef[len]; for (int i = 0; i < len; ++i) { int o = Random().Next(sampleTerms.Length); Terms[i] = sampleTerms[o]; TermBytes[i] = sampleTermBytes[o]; PositionsIncrements[i] = TestUtil.NextInt(Random(), i == 0 ? 1 : 0, 10); if (offsetsGoBackwards) { StartOffsets[i] = Random().Next(); EndOffsets[i] = Random().Next(); } else { if (i == 0) { StartOffsets[i] = TestUtil.NextInt(Random(), 0, 1 << 16); } else { StartOffsets[i] = StartOffsets[i - 1] + TestUtil.NextInt(Random(), 0, Rarely() ? 1 << 16 : 20); } EndOffsets[i] = StartOffsets[i] + TestUtil.NextInt(Random(), 0, Rarely() ? 1 << 10 : 20); } } for (int i = 0; i < len; ++i) { if (i == 0) { Positions[i] = PositionsIncrements[i] - 1; } else { Positions[i] = Positions[i - 1] + PositionsIncrements[i]; } } if (Rarely()) { Arrays.Fill(Payloads, outerInstance.RandomPayload()); } else { for (int i = 0; i < len; ++i) { Payloads[i] = outerInstance.RandomPayload(); } } PositionToTerms = new Dictionary <int?, ISet <int?> >(len); StartOffsetToTerms = new Dictionary <int?, ISet <int?> >(len); for (int i = 0; i < len; ++i) { if (!PositionToTerms.ContainsKey(Positions[i])) { PositionToTerms[Positions[i]] = new HashSet <int?>();//size1 } PositionToTerms[Positions[i]].Add(i); if (!StartOffsetToTerms.ContainsKey(StartOffsets[i])) { StartOffsetToTerms[StartOffsets[i]] = new HashSet <int?>();//size1 } StartOffsetToTerms[StartOffsets[i]].Add(i); } Freqs = new Dictionary <string, int?>(); foreach (string term in Terms) { if (Freqs.ContainsKey(term)) { Freqs[term] = Freqs[term] + 1; } else { Freqs[term] = 1; } } AddAttributeImpl(new PermissiveOffsetAttributeImpl()); TermAtt = AddAttribute <ICharTermAttribute>(); PiAtt = AddAttribute <IPositionIncrementAttribute>(); OAtt = AddAttribute <IOffsetAttribute>(); PAtt = AddAttribute <IPayloadAttribute>(); }
public PayloadFilter(TokenStream input, System.String fieldName):base(input) { this.fieldName = fieldName; pos = 0; i = 0; posIncrAttr = input.AddAttribute<IPositionIncrementAttribute>(); payloadAttr = input.AddAttribute<IPayloadAttribute>(); termAttr = input.AddAttribute<ITermAttribute>(); }
internal PoolingPayloadTokenStream(TestPayloads enclosingInstance, ByteArrayPool pool) { InitBlock(enclosingInstance); this.pool = pool; payload = pool.Get(); Enclosing_Instance.GenerateRandomData(payload); term = pool.BytesToString(payload); first = true; payloadAtt = AddAttribute<IPayloadAttribute>(); termAtt = AddAttribute<ITermAttribute>(); }
public PayloadFilter(TokenStream input, string fieldName) : base(input) { this.FieldName = fieldName; PayAtt = AddAttribute<IPayloadAttribute>(); }
protected internal RandomTokenStream(BaseTermVectorsFormatTestCase outerInstance, int len, string[] sampleTerms, BytesRef[] sampleTermBytes, bool offsetsGoBackwards) { this.OuterInstance = outerInstance; Terms = new string[len]; TermBytes = new BytesRef[len]; PositionsIncrements = new int[len]; Positions = new int[len]; StartOffsets = new int[len]; EndOffsets = new int[len]; Payloads = new BytesRef[len]; for (int i = 0; i < len; ++i) { int o = Random().Next(sampleTerms.Length); Terms[i] = sampleTerms[o]; TermBytes[i] = sampleTermBytes[o]; PositionsIncrements[i] = TestUtil.NextInt(Random(), i == 0 ? 1 : 0, 10); if (offsetsGoBackwards) { StartOffsets[i] = Random().Next(); EndOffsets[i] = Random().Next(); } else { if (i == 0) { StartOffsets[i] = TestUtil.NextInt(Random(), 0, 1 << 16); } else { StartOffsets[i] = StartOffsets[i - 1] + TestUtil.NextInt(Random(), 0, Rarely() ? 1 << 16 : 20); } EndOffsets[i] = StartOffsets[i] + TestUtil.NextInt(Random(), 0, Rarely() ? 1 << 10 : 20); } } for (int i = 0; i < len; ++i) { if (i == 0) { Positions[i] = PositionsIncrements[i] - 1; } else { Positions[i] = Positions[i - 1] + PositionsIncrements[i]; } } if (Rarely()) { Arrays.Fill(Payloads, outerInstance.RandomPayload()); } else { for (int i = 0; i < len; ++i) { Payloads[i] = outerInstance.RandomPayload(); } } PositionToTerms = new Dictionary<int?, ISet<int?>>(len); StartOffsetToTerms = new Dictionary<int?, ISet<int?>>(len); for (int i = 0; i < len; ++i) { if (!PositionToTerms.ContainsKey(Positions[i])) { PositionToTerms[Positions[i]] = new HashSet<int?>();//size1 } PositionToTerms[Positions[i]].Add(i); if (!StartOffsetToTerms.ContainsKey(StartOffsets[i])) { StartOffsetToTerms[StartOffsets[i]] = new HashSet<int?>();//size1 } StartOffsetToTerms[StartOffsets[i]].Add(i); } Freqs = new Dictionary<string, int?>(); foreach (string term in Terms) { if (Freqs.ContainsKey(term)) { Freqs[term] = Freqs[term] + 1; } else { Freqs[term] = 1; } } AddAttributeImpl(new PermissiveOffsetAttributeImpl()); TermAtt = AddAttribute<ICharTermAttribute>(); PiAtt = AddAttribute<IPositionIncrementAttribute>(); OAtt = AddAttribute<IOffsetAttribute>(); PAtt = AddAttribute<IPayloadAttribute>(); }
internal override void Start(IndexableField f) { if (DoVectorOffsets) { OffsetAttribute = FieldState.AttributeSource_Renamed.AddAttribute<IOffsetAttribute>(); } else { OffsetAttribute = null; } if (DoVectorPayloads && FieldState.AttributeSource_Renamed.HasAttribute<IPayloadAttribute>()) { PayloadAttribute = FieldState.AttributeSource_Renamed.GetAttribute<IPayloadAttribute>(); } else { PayloadAttribute = null; } }
internal TestTokenStream() { termAtt = AddAttribute<ITermAttribute>(); offsetAtt = AddAttribute<IOffsetAttribute>(); typeAtt = AddAttribute<ITypeAttribute>(); payloadAtt = AddAttribute<IPayloadAttribute>(); posIncAtt = AddAttribute<IPositionIncrementAttribute>(); flagsAtt = AddAttribute<IFlagsAttribute>(); }
public PayloadFilter(TestPayloadSpans enclosingInstance, TokenStream input, System.String fieldName):base(input) { InitBlock(enclosingInstance); this.fieldName = fieldName; pos = 0; CollectionsHelper.AddIfNotContains(entities, "xx"); CollectionsHelper.AddIfNotContains(entities, "one"); CollectionsHelper.AddIfNotContains(nopayload, "nopayload"); CollectionsHelper.AddIfNotContains(nopayload, "np"); termAtt = AddAttribute<ITermAttribute>(); posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); payloadAtt = AddAttribute<IPayloadAttribute>(); }
public override void CopyTo(Attribute target) { IPayloadAttribute t = (IPayloadAttribute)target; t.Payload = (payload == null)?null:(Payload)payload.Clone(); }
public PayloadSetter(TokenStream input) : base(input) { p = new BytesRef(data, 0, 1); payloadAtt = AddAttribute <IPayloadAttribute>(); }
public PayloadSetter(TokenStream input):base(input) { InitBlock(); payloadAtt = AddAttribute<IPayloadAttribute>(); }
public PayloadFilter(TestPayloadNearQuery enclosingInstance, TokenStream input, System.String fieldName):base(input) { InitBlock(enclosingInstance); this.fieldName = fieldName; payAtt = AddAttribute<IPayloadAttribute>(); }
protected internal PayloadFilter(AtomicInt32 payloadCount, TokenStream input) : base(input) { this.payloadCount = payloadCount; payloadAtt = AddAttribute <IPayloadAttribute>(); }
void AssertTermEquals(String expected, TokenStream stream, ITermAttribute termAtt, IPayloadAttribute payAtt, byte[] expectPay) { Assert.True(stream.IncrementToken()); Assert.AreEqual(expected, termAtt.Term); Payload payload = payAtt.Payload; if (payload != null) { Assert.True(payload.Length == expectPay.Length, payload.Length + " does not equal: " + expectPay.Length); for (int i = 0; i < expectPay.Length; i++) { Assert.True(expectPay[i] == payload.ByteAt(i), expectPay[i] + " does not equal: " + payload.ByteAt(i)); } } else { Assert.True(expectPay == null, "expectPay is not null and it should be"); } }
public PositionsTokenStream() { term = AddAttribute <ICharTermAttribute>(); payload = AddAttribute <IPayloadAttribute>(); offset = AddAttribute <IOffsetAttribute>(); }
public PayloadSetter(TokenStream input) : base(input) { if (!InstanceFieldsInitialized) { InitializeInstanceFields(); InstanceFieldsInitialized = true; } payloadAtt = AddAttribute<IPayloadAttribute>(); }
public PayloadFilter(TestPayloadSpans outerInstance, TokenStream input) : base(input) { this.OuterInstance = outerInstance; Pos = 0; Entities.Add("xx"); Entities.Add("one"); Nopayload.Add("nopayload"); Nopayload.Add("np"); TermAtt = AddAttribute<ICharTermAttribute>(); PosIncrAtt = AddAttribute<IPositionIncrementAttribute>(); PayloadAtt = AddAttribute<IPayloadAttribute>(); }
public PayloadSetter(TokenStream input) : base(input) { InitBlock(); payloadAtt = AddAttribute <IPayloadAttribute>(); }
public PayloadFilter(TokenStream in_Renamed, byte[] data, int offset, int length):base(in_Renamed) { this.data = data; this.length = length; this.offset = offset; payloadAtt = AddAttribute<IPayloadAttribute>(); }
internal virtual void AssertTermEquals(string expected, TokenStream stream, ICharTermAttribute termAtt, IPayloadAttribute payAtt, byte[] expectPay) { assertTrue(stream.IncrementToken()); assertEquals(expected, termAtt.ToString()); BytesRef payload = payAtt.Payload; if (payload != null) { assertTrue(payload.Length + " does not equal: " + expectPay.Length, payload.Length == expectPay.Length); for (int i = 0; i < expectPay.Length; i++) { assertTrue(expectPay[i] + " does not equal: " + payload.Bytes[i + payload.Offset], expectPay[i] == payload.Bytes[i + payload.Offset]); } } else { assertTrue("expectPay is not null and it should be", expectPay == null); } }
public TypeAsPayloadTokenFilter(TokenStream input) : base(input) { payloadAtt = AddAttribute <IPayloadAttribute>(); typeAtt = AddAttribute <ITypeAttribute>(); }
public PayloadFilter(PayloadHelper outerInstance, TokenStream input, string fieldName) : base(input) { this.OuterInstance = outerInstance; this.FieldName = fieldName; PayloadAtt = AddAttribute<IPayloadAttribute>(); }
public TokenOffsetPayloadTokenFilter(TokenStream input) : base(input) { offsetAtt = AddAttribute <IOffsetAttribute>(); payAtt = AddAttribute <IPayloadAttribute>(); }
protected CompoundWordTokenFilterBase(TokenStream input, ISet<string> dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch) : base(input) { this.tokens = new LinkedList<Token>(); this.minWordSize = minWordSize; this.minSubwordSize = minSubwordSize; this.maxSubwordSize = maxSubwordSize; this.onlyLongestMatch = onlyLongestMatch; if (dictionary is CharArraySet) { this.dictionary = (CharArraySet)dictionary; } else { this.dictionary = new CharArraySet(dictionary.Count, false); AddAllLowerCase(this.dictionary, dictionary); } termAtt = AddAttribute<ITermAttribute>(); offsetAtt = AddAttribute<IOffsetAttribute>(); flagsAtt = AddAttribute<IFlagsAttribute>(); posIncAtt = AddAttribute<IPositionIncrementAttribute>(); typeAtt = AddAttribute<ITypeAttribute>(); payloadAtt = AddAttribute<IPayloadAttribute>(); }
protected internal RandomTokenStream(BaseTermVectorsFormatTestCase baseTermVectorsFormatTestCase, int len, string[] sampleTerms, BytesRef[] sampleTermBytes, bool offsetsGoBackwards) { terms = new string[len]; termBytes = new BytesRef[len]; positionsIncrements = new int[len]; positions = new int[len]; startOffsets = new int[len]; endOffsets = new int[len]; payloads = new BytesRef[len]; for (int i = 0; i < len; ++i) { int o = Random.Next(sampleTerms.Length); terms[i] = sampleTerms[o]; termBytes[i] = sampleTermBytes[o]; positionsIncrements[i] = TestUtil.NextInt32(Random, i == 0 ? 1 : 0, 10); if (offsetsGoBackwards) { startOffsets[i] = Random.Next(); endOffsets[i] = Random.Next(); } else { if (i == 0) { startOffsets[i] = TestUtil.NextInt32(Random, 0, 1 << 16); } else { startOffsets[i] = startOffsets[i - 1] + TestUtil.NextInt32(Random, 0, Rarely() ? 1 << 16 : 20); } endOffsets[i] = startOffsets[i] + TestUtil.NextInt32(Random, 0, Rarely() ? 1 << 10 : 20); } } for (int i = 0; i < len; ++i) { if (i == 0) { positions[i] = positionsIncrements[i] - 1; } else { positions[i] = positions[i - 1] + positionsIncrements[i]; } } if (Rarely()) { Arrays.Fill(payloads, baseTermVectorsFormatTestCase.RandomPayload()); } else { for (int i = 0; i < len; ++i) { payloads[i] = baseTermVectorsFormatTestCase.RandomPayload(); } } positionToTerms = new Dictionary <int?, ISet <int?> >(len); startOffsetToTerms = new Dictionary <int?, ISet <int?> >(len); for (int i = 0; i < len; ++i) { if (!positionToTerms.TryGetValue(positions[i], out ISet <int?> positionTerms)) { positionToTerms[positions[i]] = positionTerms = new JCG.HashSet <int?>(1); } positionTerms.Add(i); if (!startOffsetToTerms.TryGetValue(startOffsets[i], out ISet <int?> startOffsetTerms)) { startOffsetToTerms[startOffsets[i]] = startOffsetTerms = new JCG.HashSet <int?>(1); } startOffsetTerms.Add(i); } freqs = new Dictionary <string, int?>(); foreach (string term in terms) { if (freqs.TryGetValue(term, out int?freq)) { freqs[term] = freq + 1; } else { freqs[term] = 1; } } AddAttributeImpl(new PermissiveOffsetAttribute()); termAtt = AddAttribute <ICharTermAttribute>(); piAtt = AddAttribute <IPositionIncrementAttribute>(); oAtt = AddAttribute <IOffsetAttribute>(); pAtt = AddAttribute <IPayloadAttribute>(); }
protected internal PayloadFilter(AtomicInteger payloadCount, TokenStream input) : base(input) { this.PayloadCount = payloadCount; PayloadAtt = AddAttribute<IPayloadAttribute>(); }
public PayloadFilter(TestPayloadNearQuery enclosingInstance, TokenStream input, System.String fieldName) : base(input) { InitBlock(enclosingInstance); this.fieldName = fieldName; payAtt = AddAttribute <IPayloadAttribute>(); }
public TypeAsPayloadTokenFilter(TokenStream input) : base(input) { payloadAtt = AddAttribute<IPayloadAttribute>(); typeAtt = AddAttribute<ITypeAttribute>(); }
public PayloadFilter(TokenStream input, string fieldName) : base(input) { this.FieldName = fieldName; PayAtt = AddAttribute <IPayloadAttribute>(); }
private readonly IPayloadAttribute payloadAttribute; // LUCENENET: marked readonly ///<summary>Constructor</summary> /// <param name="vector"> /// Terms that contains the data for /// creating the <see cref="TokenStream"/>. Must have positions and offsets. /// </param> public TokenStreamFromTermPositionVector(Terms vector) { termAttribute = AddAttribute <ICharTermAttribute>(); positionIncrementAttribute = AddAttribute <IPositionIncrementAttribute>(); offsetAttribute = AddAttribute <IOffsetAttribute>(); payloadAttribute = AddAttribute <IPayloadAttribute>(); bool hasOffsets = vector.HasOffsets; bool hasPayloads = vector.HasPayloads; TermsEnum termsEnum = vector.GetEnumerator(); BytesRef text; DocsAndPositionsEnum dpEnum = null; while (termsEnum.MoveNext()) { text = termsEnum.Term; dpEnum = termsEnum.DocsAndPositions(null, dpEnum); dpEnum.NextDoc(); int freq = dpEnum.Freq; for (int j = 0; j < freq; j++) { int pos = dpEnum.NextPosition(); Token token; if (hasOffsets) { token = new Token(text.Utf8ToString(), dpEnum.StartOffset, dpEnum.EndOffset); } else { token = new Token(); token.SetEmpty().Append(text.Utf8ToString()); } if (hasPayloads) { // Must make a deep copy of the returned payload, // since D&PEnum API is allowed to re-use on every // call: token.Payload = BytesRef.DeepCopyOf(dpEnum.GetPayload()); } // Yes - this is the position, not the increment! This is for // sorting. This value // will be corrected before use. token.PositionIncrement = pos; this.positionedTokens.Add(token); } } CollectionUtil.TimSort(this.positionedTokens, tokenComparer); int lastPosition = -1; foreach (Token token in this.positionedTokens) { int thisPosition = token.PositionIncrement; token.PositionIncrement = thisPosition - lastPosition; lastPosition = thisPosition; } this.tokensAtCurrentPosition = this.positionedTokens.GetEnumerator(); }
public PayloadSetter(TokenStream input) : base(input) { InitializeInstanceFields(); payloadAtt = AddAttribute <IPayloadAttribute>(); }
public void TestPayloads() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); FieldType myFieldType = new FieldType(TextField.TYPE_NOT_STORED); myFieldType.StoreTermVectors = (true); myFieldType.StoreTermVectorOffsets = (true); myFieldType.StoreTermVectorPositions = (true); myFieldType.StoreTermVectorPayloads = (true); curOffset = 0; Token[] tokens = new Token[] { getToken("foxes"), getToken("can"), getToken("jump"), getToken("high") }; Document doc = new Document(); doc.Add(new Field("field", new CannedTokenStream(tokens), myFieldType)); writer.AddDocument(doc); IndexReader reader = writer.GetReader(); writer.Dispose(); assertEquals(1, reader.NumDocs); for (int i = 0; i < 2; i++) { // Do this twice, once passing true and then passing // false: they are entirely different code paths // under-the-hood: TokenStream ts = TokenSources.GetTokenStream(reader.GetTermVectors(0).GetTerms("field"), i == 0); ICharTermAttribute termAtt = ts.GetAttribute <ICharTermAttribute>(); IPositionIncrementAttribute posIncAtt = ts.GetAttribute <IPositionIncrementAttribute>(); IOffsetAttribute offsetAtt = ts.GetAttribute <IOffsetAttribute>(); IPayloadAttribute payloadAtt = ts.GetAttribute <IPayloadAttribute>(); foreach (Token token in tokens) { assertTrue(ts.IncrementToken()); assertEquals(token.toString(), termAtt.toString()); assertEquals(token.PositionIncrement, posIncAtt.PositionIncrement); assertEquals(token.Payload, payloadAtt.Payload); assertEquals(token.StartOffset, offsetAtt.StartOffset); assertEquals(token.EndOffset, offsetAtt.EndOffset); } assertFalse(ts.IncrementToken()); } reader.Dispose(); dir.Dispose(); }
public TokenListStream(ICollection<Token> tokens) { _tokens = tokens; _termAtt = AddAttribute<ITermAttribute>(); _posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); _payloadAtt = AddAttribute<IPayloadAttribute>(); _offsetAtt = AddAttribute<IOffsetAttribute>(); _typeAtt = AddAttribute<ITypeAttribute>(); _flagsAtt = AddAttribute<IFlagsAttribute>(); }
protected internal PayloadFilter(TokenStream input):base(input) { payloadAtt = AddAttribute<IPayloadAttribute>(); }