/// <summary> /// Create a new <seealso cref="TypeTokenFilter"/>. </summary> /// <param name="version"> the Lucene match version </param> /// <param name="input"> the <seealso cref="TokenStream"/> to consume </param> /// <param name="stopTypes"> the types to filter </param> /// <param name="useWhiteList"> if true, then tokens whose type is in stopTypes will /// be kept, otherwise they will be filtered out </param> public TypeTokenFilter(LuceneVersion version, TokenStream input, IEnumerable<string> stopTypes, bool useWhiteList) : base(version, input) { typeAttribute = AddAttribute<ITypeAttribute>(); this.stopTypes = new HashSet<string>(stopTypes); this.useWhiteList = useWhiteList; }
public AddSuffixFilter(TokenStream input, Dictionary<string, char[]> _suffixByTokenType) : base(input) { termAtt = AddAttribute <ITermAttribute>(); typeAtt = AddAttribute <ITypeAttribute>(); this.suffixByTokenType = _suffixByTokenType; }
public TypeTokenFilter(Version version, bool enablePositionIncrements, TokenStream input, HashSet<string> stopTypes, bool useWhiteList) : base(version, enablePositionIncrements, input) { typeAttribute = AddAttribute<ITypeAttribute>(); this.stopTypes = stopTypes; this.useWhiteList = useWhiteList; }
void Init() { InitPanGuSegment(); termAtt = AddAttribute<ITermAttribute>(); offsetAtt = AddAttribute<IOffsetAttribute>(); posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); typeAtt = AddAttribute<ITypeAttribute>(); }
/* * 此处忽略调用base(input);因调用后input的position会被移动 * by zh */ public MMSegTokenizer(Seg seg, TextReader input) : base(input) { mmSeg = new MMSeg(input, seg); termAtt = AddAttribute<ITermAttribute>(); offsetAtt = AddAttribute<IOffsetAttribute>(); typeAtt = AddAttribute<ITypeAttribute>(); }
public CutLeterDigitFilter(TokenStream input) : base(input) { reusableToken = new Token(); termAtt = AddAttribute<ITermAttribute>(); offsetAtt = AddAttribute<IOffsetAttribute>(); typeAtt = AddAttribute<ITypeAttribute>(); }
public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) : base(input) { //Need to encode the payload thePayload = new Payload(PayloadHelper.EncodeFloat(payload)); this.typeMatch = typeMatch; payloadAtt = AddAttribute<IPayloadAttribute>(); typeAtt = AddAttribute<ITypeAttribute>(); }
public override bool Accept(AttributeSource source) { if (typeAtt == null) { typeAtt = source.AddAttribute<ITypeAttribute>(); } return typeToMatch.Equals(typeAtt.Type); }
public override bool Accept(AttributeSource source) { if (typeAtt == null) { typeAtt = source.AddAttribute<ITypeAttribute>(); } //check to see if this is a Category return (typeToMatch.Equals(typeAtt.Type)); }
public JiebaTokenizer(JiebaSegmenter seg, string input) { segmenter = seg; termAtt = AddAttribute<ITermAttribute>(); offsetAtt = AddAttribute<IOffsetAttribute>(); typeAtt = AddAttribute<ITypeAttribute>(); var text = input; tokens = segmenter.Tokenize(text, TokenizerMode.Search).ToList(); }
/// <summary> /// Construct a token stream filtering the given input using a Set of common /// words to create bigrams. Outputs both unigrams with position increment and /// bigrams with position increment 0 type=gram where one or both of the words /// in a potential bigram are in the set of common words . /// </summary> /// <param name="input"> TokenStream input in filter chain </param> /// <param name="commonWords"> The set of common words. </param> public CommonGramsFilter(LuceneVersion matchVersion, TokenStream input, CharArraySet commonWords) : base(input) { termAttribute = AddAttribute<ICharTermAttribute>(); offsetAttribute = AddAttribute<IOffsetAttribute>(); typeAttribute = AddAttribute<ITypeAttribute>(); posIncAttribute = AddAttribute<IPositionIncrementAttribute>(); posLenAttribute = AddAttribute<IPositionLengthAttribute>(); this.commonWords = commonWords; }
private void Init(System.IO.TextReader _input, HebMorph.DataStructures.DictRadix<int> _prefixesTree) { termAtt = AddAttribute <ITermAttribute>(); offsetAtt = AddAttribute <IOffsetAttribute>(); //posIncrAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute)); typeAtt = AddAttribute <ITypeAttribute>(); input = _input; hebMorphTokenizer = new HebMorph.Tokenizer(_input); prefixesTree = _prefixesTree; }
public IterTokenStream(params Token[] tokens) : base() { this.tokens = tokens; this.termAtt = AddAttribute<ICharTermAttribute>(); this.offsetAtt = AddAttribute<IOffsetAttribute>(); this.posIncAtt = AddAttribute<IPositionIncrementAttribute>(); this.flagsAtt = AddAttribute<IFlagsAttribute>(); this.typeAtt = AddAttribute<ITypeAttribute>(); this.payloadAtt = AddAttribute<IPayloadAttribute>(); }
public ExpanderFilter(TokenStream input, [NotNull] Func<String, IEnumerable<Expansion>> expander, Boolean emitSource = true) : base(input) { if (expander == null) throw new ArgumentNullException("expander"); _expander = expander; _emitSource = emitSource; _termAttr = AddAttribute<ITermAttribute>(); _posAttr = AddAttribute<IPositionIncrementAttribute>(); _typeAttr = AddAttribute<ITypeAttribute>(); }
public NumericPayloadTokenFilter(TokenStream input, float payload, string typeMatch) : base(input) { if (typeMatch == null) { throw new System.ArgumentException("typeMatch cannot be null"); } //Need to encode the payload thePayload = new BytesRef(PayloadHelper.EncodeFloat(payload)); this.typeMatch = typeMatch; this.payloadAtt = AddAttribute<IPayloadAttribute>(); this.typeAtt = AddAttribute<ITypeAttribute>(); }
private void Init(System.IO.TextReader input, HebMorph.StreamLemmatizer _lemmatizer, HebMorph.LemmaFilters.LemmaFilterBase _lemmaFilter, bool AlwaysSaveMarkedOriginal) { termAtt = AddAttribute <ITermAttribute>(); offsetAtt = AddAttribute<IOffsetAttribute>(); posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); typeAtt = AddAttribute <ITypeAttribute>(); //payAtt = (PayloadAttribute)AddAttribute(typeof(PayloadAttribute)); this.input = input; this._streamLemmatizer = _lemmatizer; this._streamLemmatizer.SetStream(input); this.alwaysSaveMarkedOriginal = AlwaysSaveMarkedOriginal; this.lemmaFilter = _lemmaFilter; }
public static IEnumerable <BodyTypeInfo> GetTypes(this ITypeAttribute attribute) { if (attribute == null) { throw new ArgumentNullException(nameof(attribute)); } var result = new List <BodyTypeInfo>(); var types = new List <Type> { attribute.Type1, attribute.Type2, attribute.Type3, attribute.Type4 }.Where(t => t != null).ToList(); var typeCount = types.Count; var currentBodyType = 0; result.Add(new BodyTypeInfo { Type = types[0], CountType = types[0].IsGenericList() ? ObjectCountType.List : ObjectCountType.Single }); for (var i = 1; i < typeCount; ++i) { var currentType = types[i]; var previousType = types[i - 1]; if (IsSingleOrListPair(currentType, previousType)) { result[currentBodyType].CountType = ObjectCountType.SingleOrList; continue; } result.Add(new BodyTypeInfo { Type = types[i], CountType = types[i].IsGenericList() ? ObjectCountType.List : ObjectCountType.Single }); currentBodyType++; } return(result); }
private void TestPositons(TypeTokenFilter stpf) { ITypeAttribute typeAtt = stpf.GetAttribute <ITypeAttribute>(); ICharTermAttribute termAttribute = stpf.GetAttribute <ICharTermAttribute>(); IPositionIncrementAttribute posIncrAtt = stpf.GetAttribute <IPositionIncrementAttribute>(); stpf.Reset(); bool enablePositionIncrements = stpf.EnablePositionIncrements; while (stpf.IncrementToken()) { log("Token: " + termAttribute.ToString() + ": " + typeAtt.Type + " - " + posIncrAtt.PositionIncrement); assertEquals("if position increment is enabled the positionIncrementAttribute value should be 3, otherwise 1", posIncrAtt.PositionIncrement, enablePositionIncrements ? 3 : 1); } stpf.End(); stpf.Dispose(); }
public virtual void TestFilterTokens() { SnowballFilter filter = new SnowballFilter(new TestTokenStream(this), "English"); ICharTermAttribute termAtt = filter.GetAttribute <ICharTermAttribute>(); IOffsetAttribute offsetAtt = filter.GetAttribute <IOffsetAttribute>(); ITypeAttribute typeAtt = filter.GetAttribute <ITypeAttribute>(); IPayloadAttribute payloadAtt = filter.GetAttribute <IPayloadAttribute>(); IPositionIncrementAttribute posIncAtt = filter.GetAttribute <IPositionIncrementAttribute>(); IFlagsAttribute flagsAtt = filter.GetAttribute <IFlagsAttribute>(); filter.IncrementToken(); assertEquals("accent", termAtt.ToString()); assertEquals(2, offsetAtt.StartOffset()); assertEquals(7, offsetAtt.EndOffset()); assertEquals("wrd", typeAtt.Type); assertEquals(3, posIncAtt.PositionIncrement); assertEquals(77, flagsAtt.Flags); assertEquals(new BytesRef(new byte[] { 0, 1, 2, 3 }), payloadAtt.Payload); }
public void TestFilterTokens() { SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English"); ITermAttribute termAtt = filter.GetAttribute <ITermAttribute>(); IOffsetAttribute offsetAtt = filter.GetAttribute <IOffsetAttribute>(); ITypeAttribute typeAtt = filter.GetAttribute <ITypeAttribute>(); IPayloadAttribute payloadAtt = filter.GetAttribute <IPayloadAttribute>(); IPositionIncrementAttribute posIncAtt = filter.GetAttribute <IPositionIncrementAttribute>(); IFlagsAttribute flagsAtt = filter.GetAttribute <IFlagsAttribute>(); filter.IncrementToken(); Assert.AreEqual("accent", termAtt.Term); Assert.AreEqual(2, offsetAtt.StartOffset); Assert.AreEqual(7, offsetAtt.EndOffset); Assert.AreEqual("wrd", typeAtt.Type); Assert.AreEqual(3, posIncAtt.PositionIncrement); Assert.AreEqual(77, flagsAtt.Flags); Assert.AreEqual(new Payload(new byte[] { 0, 1, 2, 3 }), payloadAtt.Payload); }
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix) { this.suffix = suffix; this.prefix = prefix; prefixExhausted = false; termAtt = AddAttribute <ICharTermAttribute>(); posIncrAtt = AddAttribute <IPositionIncrementAttribute>(); payloadAtt = AddAttribute <IPayloadAttribute>(); offsetAtt = AddAttribute <IOffsetAttribute>(); typeAtt = AddAttribute <ITypeAttribute>(); flagsAtt = AddAttribute <IFlagsAttribute>(); p_termAtt = prefix.AddAttribute <ICharTermAttribute>(); p_posIncrAtt = prefix.AddAttribute <IPositionIncrementAttribute>(); p_payloadAtt = prefix.AddAttribute <IPayloadAttribute>(); p_offsetAtt = prefix.AddAttribute <IOffsetAttribute>(); p_typeAtt = prefix.AddAttribute <ITypeAttribute>(); p_flagsAtt = prefix.AddAttribute <IFlagsAttribute>(); }
/// <summary> /// Creates a new WordDelimiterFilter /// </summary> /// <param name="in"> TokenStream to be filtered </param> /// <param name="charTypeTable"> table containing character types </param> /// <param name="configurationFlags"> Flags configuring the filter </param> /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param> public WordDelimiterFilter(LuceneVersion matchVersion, TokenStream @in, sbyte[] charTypeTable, int configurationFlags, CharArraySet protWords) : base(@in) { if (!InstanceFieldsInitialized) { InitializeInstanceFields(); InstanceFieldsInitialized = true; } if (!matchVersion.OnOrAfter(LuceneVersion.LUCENE_48)) { throw new System.ArgumentException("This class only works with Lucene 4.8+. To emulate the old (broken) behavior of WordDelimiterFilter, use Lucene47WordDelimiterFilter"); } this.flags = configurationFlags; this.protWords = protWords; this.iterator = new WordDelimiterIterator(charTypeTable, Has(SPLIT_ON_CASE_CHANGE), Has(SPLIT_ON_NUMERICS), Has(STEM_ENGLISH_POSSESSIVE)); this.termAttribute = AddAttribute<ICharTermAttribute>(); this.offsetAttribute = AddAttribute<IOffsetAttribute>(); this.posIncAttribute = AddAttribute<IPositionIncrementAttribute>(); this.typeAttribute = AddAttribute<ITypeAttribute>(); }
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix) { this.suffix = suffix; this.prefix = prefix; prefixExhausted = false; termAtt = AddAttribute<ICharTermAttribute>(); posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); payloadAtt = AddAttribute<IPayloadAttribute>(); offsetAtt = AddAttribute<IOffsetAttribute>(); typeAtt = AddAttribute<ITypeAttribute>(); flagsAtt = AddAttribute<IFlagsAttribute>(); p_termAtt = prefix.AddAttribute<ICharTermAttribute>(); p_posIncrAtt = prefix.AddAttribute<IPositionIncrementAttribute>(); p_payloadAtt = prefix.AddAttribute<IPayloadAttribute>(); p_offsetAtt = prefix.AddAttribute<IOffsetAttribute>(); p_typeAtt = prefix.AddAttribute<ITypeAttribute>(); p_flagsAtt = prefix.AddAttribute<IFlagsAttribute>(); }
public virtual void Test() { TokenTypeSinkFilter sinkFilter = new TokenTypeSinkFilter("D"); string test = "The quick red fox jumped over the lazy brown dogs"; TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false))); TeeSinkTokenFilter.SinkTokenStream sink = ttf.NewSinkTokenStream(sinkFilter); bool seenDogs = false; ICharTermAttribute termAtt = ttf.AddAttribute <ICharTermAttribute>(); ITypeAttribute typeAtt = ttf.AddAttribute <ITypeAttribute>(); ttf.Reset(); while (ttf.IncrementToken()) { if (termAtt.ToString().Equals("dogs", StringComparison.Ordinal)) { seenDogs = true; assertTrue(typeAtt.Type + " is not equal to " + "D", typeAtt.Type.Equals("D", StringComparison.Ordinal) == true); } else { assertTrue(typeAtt.Type + " is not null and it should be", typeAtt.Type.Equals("word", StringComparison.Ordinal)); } } assertTrue(seenDogs + " does not equal: " + true, seenDogs == true); int sinkCount = 0; sink.Reset(); while (sink.IncrementToken()) { sinkCount++; } assertTrue("sink Size: " + sinkCount + " is not: " + 1, sinkCount == 1); }
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix) { Suffix = suffix; Prefix = prefix; _prefixExhausted = false; // ReSharper disable DoNotCallOverridableMethodsInConstructor _termAtt = AddAttribute<ITermAttribute>(); _posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); _payloadAtt = AddAttribute<IPayloadAttribute>(); _offsetAtt = AddAttribute<IOffsetAttribute>(); _typeAtt = AddAttribute<ITypeAttribute>(); _flagsAtt = AddAttribute<IFlagsAttribute>(); // ReSharper restore DoNotCallOverridableMethodsInConstructor _pTermAtt = prefix.AddAttribute<ITermAttribute>(); _pPosIncrAtt = prefix.AddAttribute<IPositionIncrementAttribute>(); _pPayloadAtt = prefix.AddAttribute<IPayloadAttribute>(); _pOffsetAtt = prefix.AddAttribute<IOffsetAttribute>(); _pTypeAtt = prefix.AddAttribute<ITypeAttribute>(); _pFlagsAtt = prefix.AddAttribute<IFlagsAttribute>(); }
public void Test() { TokenTypeSinkFilter sinkFilter = new TokenTypeSinkFilter("D"); String test = "The quick red fox jumped over the lazy brown dogs"; TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(new StringReader(test)))); TeeSinkTokenFilter.SinkTokenStream sink = ttf.NewSinkTokenStream(sinkFilter); bool seenDogs = false; ITermAttribute termAtt = ttf.AddAttribute <ITermAttribute>(); ITypeAttribute typeAtt = ttf.AddAttribute <ITypeAttribute>(); ttf.Reset(); while (ttf.IncrementToken()) { if (termAtt.Term.Equals("dogs")) { seenDogs = true; Assert.True(typeAtt.Type.Equals("D") == true, typeAtt.Type + " is not equal to " + "D"); } else { Assert.True(typeAtt.Type.Equals("word"), typeAtt.Type + " is not null and it should be"); } } Assert.True(seenDogs == true, seenDogs + " does not equal: " + true); int sinkCount = 0; sink.Reset(); while (sink.IncrementToken()) { sinkCount++; } Assert.True(sinkCount == 1, "sink Size: " + sinkCount + " is not: " + 1); }
/// <summary> /// Creates a new WordDelimiterFilter /// </summary> /// <param name="matchVersion"> lucene compatibility version </param> /// <param name="in"> TokenStream to be filtered </param> /// <param name="charTypeTable"> table containing character types </param> /// <param name="configurationFlags"> Flags configuring the filter </param> /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param> public WordDelimiterFilter(LuceneVersion matchVersion, TokenStream @in, byte[] charTypeTable, WordDelimiterFlags configurationFlags, CharArraySet protWords) : base(@in) { this.termAttribute = AddAttribute <ICharTermAttribute>(); this.offsetAttribute = AddAttribute <IOffsetAttribute>(); this.posIncAttribute = AddAttribute <IPositionIncrementAttribute>(); this.typeAttribute = AddAttribute <ITypeAttribute>(); concat = new WordDelimiterConcatenation(this); concatAll = new WordDelimiterConcatenation(this); sorter = new OffsetSorter(this); if (!matchVersion.OnOrAfter(LuceneVersion.LUCENE_48)) { throw new ArgumentException("This class only works with Lucene 4.8+. To emulate the old (broken) behavior of WordDelimiterFilter, use Lucene47WordDelimiterFilter"); } this.flags = configurationFlags; this.protWords = protWords; this.iterator = new WordDelimiterIterator(charTypeTable, Has(WordDelimiterFlags.SPLIT_ON_CASE_CHANGE), Has(WordDelimiterFlags.SPLIT_ON_NUMERICS), Has(WordDelimiterFlags.STEM_ENGLISH_POSSESSIVE)); }
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix) { Suffix = suffix; Prefix = prefix; _prefixExhausted = false; // ReSharper disable DoNotCallOverridableMethodsInConstructor _termAtt = AddAttribute <ITermAttribute>(); _posIncrAtt = AddAttribute <IPositionIncrementAttribute>(); _payloadAtt = AddAttribute <IPayloadAttribute>(); _offsetAtt = AddAttribute <IOffsetAttribute>(); _typeAtt = AddAttribute <ITypeAttribute>(); _flagsAtt = AddAttribute <IFlagsAttribute>(); // ReSharper restore DoNotCallOverridableMethodsInConstructor _pTermAtt = prefix.AddAttribute <ITermAttribute>(); _pPosIncrAtt = prefix.AddAttribute <IPositionIncrementAttribute>(); _pPayloadAtt = prefix.AddAttribute <IPayloadAttribute>(); _pOffsetAtt = prefix.AddAttribute <IOffsetAttribute>(); _pTypeAtt = prefix.AddAttribute <ITypeAttribute>(); _pFlagsAtt = prefix.AddAttribute <IFlagsAttribute>(); }
public virtual void Test() { string test = "The quick red fox jumped over the lazy brown dogs"; TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(this, new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false))); int count = 0; ICharTermAttribute termAtt = nptf.GetAttribute <ICharTermAttribute>(); ITypeAttribute typeAtt = nptf.GetAttribute <ITypeAttribute>(); IPayloadAttribute payloadAtt = nptf.GetAttribute <IPayloadAttribute>(); nptf.Reset(); while (nptf.IncrementToken()) { assertTrue(typeAtt.Type + " is not null and it should be", typeAtt.Type.Equals(char.ToUpper(termAtt.Buffer()[0]).ToString())); assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.Payload != null); string type = payloadAtt.Payload.Utf8ToString(); assertTrue(type + " is not equal to " + typeAtt.Type, type.Equals(typeAtt.Type)); count++; } assertTrue(count + " does not equal: " + 10, count == 10); }
public static Token NextToken(TokenStream input, Token reusableToken) { if (input == null) { return(null); } if (!input.IncrementToken()) { return(null); } ITermAttribute termAtt = input.GetAttribute <ITermAttribute>(); IOffsetAttribute offsetAtt = input.GetAttribute <IOffsetAttribute>(); ITypeAttribute typeAtt = input.GetAttribute <ITypeAttribute>(); if (reusableToken == null) { reusableToken = new Token(); } reusableToken.Clear(); if (termAtt != null) { reusableToken.SetTermBuffer(termAtt.TermBuffer(), 0, termAtt.TermLength()); } if (offsetAtt != null) { reusableToken.StartOffset = offsetAtt.StartOffset; reusableToken.EndOffset = offsetAtt.EndOffset; } if (typeAtt != null) { reusableToken.Type = typeAtt.Type; } return(reusableToken); }
public void test() { String test = "The quick red fox jumped over the lazy brown dogs"; TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(new StringReader(test)))); int count = 0; ITermAttribute termAtt = nptf.GetAttribute <ITermAttribute>(); ITypeAttribute typeAtt = nptf.GetAttribute <ITypeAttribute>(); IPayloadAttribute payloadAtt = nptf.GetAttribute <IPayloadAttribute>(); while (nptf.IncrementToken()) { Assert.True(typeAtt.Type.Equals(char.ToUpper(termAtt.TermBuffer()[0]).ToString()), typeAtt.Type + " is not null and it should be"); Assert.True(payloadAtt.Payload != null, "nextToken.getPayload() is null and it shouldn't be"); String type = Encoding.UTF8.GetString(payloadAtt.Payload.GetData());; Assert.True(type != null, "type is null and it shouldn't be"); Assert.True(type.Equals(typeAtt.Type) == true, type + " is not equal to " + typeAtt.Type); count++; } Assert.True(count == 10, count + " does not equal: " + 10); }
public virtual void TestCloneAttributes() { AttributeSource src = new AttributeSource(); IFlagsAttribute flagsAtt = src.AddAttribute <IFlagsAttribute>(); ITypeAttribute typeAtt = src.AddAttribute <ITypeAttribute>(); flagsAtt.Flags = 1234; typeAtt.Type = "TestType"; AttributeSource clone = src.CloneAttributes(); IEnumerator <Type> it = clone.GetAttributeClassesEnumerator(); it.MoveNext(); Assert.AreEqual(typeof(IFlagsAttribute), it.Current, "FlagsAttribute must be the first attribute"); it.MoveNext(); Assert.AreEqual(typeof(ITypeAttribute), it.Current, "TypeAttribute must be the second attribute"); Assert.IsFalse(it.MoveNext(), "No more attributes"); IFlagsAttribute flagsAtt2 = clone.GetAttribute <IFlagsAttribute>(); ITypeAttribute typeAtt2 = clone.GetAttribute <ITypeAttribute>(); Assert.AreNotSame(flagsAtt2, flagsAtt, "FlagsAttribute of original and clone must be different instances"); Assert.AreNotSame(typeAtt2, typeAtt, "TypeAttribute of original and clone must be different instances"); Assert.AreEqual(flagsAtt2, flagsAtt, "FlagsAttribute of original and clone must be equal"); Assert.AreEqual(typeAtt2, typeAtt, "TypeAttribute of original and clone must be equal"); // test copy back flagsAtt2.Flags = 4711; typeAtt2.Type = "OtherType"; clone.CopyTo(src); Assert.AreEqual(4711, flagsAtt.Flags, "FlagsAttribute of original must now contain updated term"); Assert.AreEqual(typeAtt.Type, "OtherType", "TypeAttribute of original must now contain updated type"); // verify again: Assert.AreNotSame(flagsAtt2, flagsAtt, "FlagsAttribute of original and clone must be different instances"); Assert.AreNotSame(typeAtt2, typeAtt, "TypeAttribute of original and clone must be different instances"); Assert.AreEqual(flagsAtt2, flagsAtt, "FlagsAttribute of original and clone must be equal"); Assert.AreEqual(typeAtt2, typeAtt, "TypeAttribute of original and clone must be equal"); }
public virtual void TestLongStream() { using (NumericTokenStream stream = (new NumericTokenStream()).SetInt64Value(Lvalue)) { // use getAttribute to test if attributes really exist, if not an IAE will be throwed ITermToBytesRefAttribute bytesAtt = stream.GetAttribute <ITermToBytesRefAttribute>(); ITypeAttribute typeAtt = stream.GetAttribute <ITypeAttribute>(); NumericTokenStream.INumericTermAttribute numericAtt = stream.GetAttribute <NumericTokenStream.INumericTermAttribute>(); BytesRef bytes = bytesAtt.BytesRef; stream.Reset(); Assert.AreEqual(64, numericAtt.ValueSize); for (int shift = 0; shift < 64; shift += NumericUtils.PRECISION_STEP_DEFAULT) { Assert.IsTrue(stream.IncrementToken(), "New token is available"); Assert.AreEqual(shift, numericAtt.Shift, "Shift value wrong"); bytesAtt.FillBytesRef(); Assert.AreEqual(Lvalue & ~((1L << shift) - 1L), NumericUtils.PrefixCodedToInt64(bytes), "Term is incorrectly encoded"); Assert.AreEqual(Lvalue & ~((1L << shift) - 1L), numericAtt.RawValue, "Term raw value is incorrectly encoded"); Assert.AreEqual((shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.Type, "Type incorrect"); } Assert.IsFalse(stream.IncrementToken(), "More tokens available"); stream.End(); } }
internal bool <TryGetEncoding> b__3_0(ITypeAttribute att) => (string.CompareOrdinal(att.Name, "TcEncoding") == 0);
internal WordTokenFilter(TokenStream input) : base(input) { termAtt = AddAttribute<ITermAttribute>(); typeAtt = AddAttribute<ITypeAttribute>(); }
public EmailFilter(TokenStream @in) : base(@in) { this.typeAtt = AddAttribute <ITypeAttribute>(); }
private void Init() { termAtt = AddAttribute<ITermAttribute>(); offsetAtt = AddAttribute<IOffsetAttribute>(); typeAtt = AddAttribute<ITypeAttribute>(); }
public TestFilter(TestMultiAnalyzer enclosingInstance, TokenStream in_Renamed):base(in_Renamed) { InitBlock(enclosingInstance); termAtt = AddAttribute<ITermAttribute>(); posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); offsetAtt = AddAttribute<IOffsetAttribute>(); typeAtt = AddAttribute<ITypeAttribute>(); }
public TypeAsPayloadTokenFilter(TokenStream input) : base(input) { payloadAtt = AddAttribute <IPayloadAttribute>(); typeAtt = AddAttribute <ITypeAttribute>(); }
internal WordTokenFilter(TokenStream input) : base(input) { termAtt = AddAttribute <ICharTermAttribute>(); typeAtt = AddAttribute <ITypeAttribute>(); }
private void Init() { _termAtt = this.AddAttribute <ITermAttribute>(); _offsetAtt = this.AddAttribute <IOffsetAttribute>(); _typeAtt = this.AddAttribute <ITypeAttribute>(); }
internal static bool TryGetEncoding(ITypeAttribute att, out Encoding encoding) { encoding = null; return((att.Name == "TcEncoding") && TryGetEncoding(att.Value, out encoding)); }
/// <summary> /// Construct filtering <i>in</i>. </summary> public ClassicFilter(TokenStream @in) : base(@in) { typeAtt = AddAttribute<ITypeAttribute>(); termAtt = AddAttribute<ICharTermAttribute>(); }
protected CompoundWordTokenFilterBase(TokenStream input, ISet<string> dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch) : base(input) { this.tokens = new LinkedList<Token>(); this.minWordSize = minWordSize; this.minSubwordSize = minSubwordSize; this.maxSubwordSize = maxSubwordSize; this.onlyLongestMatch = onlyLongestMatch; if (dictionary is CharArraySet) { this.dictionary = (CharArraySet)dictionary; } else { this.dictionary = new CharArraySet(dictionary.Count, false); AddAllLowerCase(this.dictionary, dictionary); } termAtt = AddAttribute<ITermAttribute>(); offsetAtt = AddAttribute<IOffsetAttribute>(); flagsAtt = AddAttribute<IFlagsAttribute>(); posIncAtt = AddAttribute<IPositionIncrementAttribute>(); typeAtt = AddAttribute<ITypeAttribute>(); payloadAtt = AddAttribute<IPayloadAttribute>(); }
internal TypeAttribute(ITypeAttribute att) { this._name = att.Name; this._value = att.Value; }
// offsetsAreCorrect also validates: // - graph offsets are correct (all tokens leaving from // pos X have the same startOffset; all tokens // arriving to pos Y have the same endOffset) // - offsets only move forwards (startOffset >= // lastStartOffset) public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, int?finalOffset, int?finalPosInc, bool[] keywordAtts, bool offsetsAreCorrect) { Assert.IsNotNull(output); var checkClearAtt = ts.AddAttribute <ICheckClearAttributesAttribute>(); ICharTermAttribute termAtt = null; if (output.Length > 0) { Assert.IsTrue(ts.HasAttribute <ICharTermAttribute>(), "has no CharTermAttribute"); termAtt = ts.GetAttribute <ICharTermAttribute>(); } IOffsetAttribute offsetAtt = null; if (startOffsets != null || endOffsets != null || finalOffset != null) { Assert.IsTrue(ts.HasAttribute <IOffsetAttribute>(), "has no OffsetAttribute"); offsetAtt = ts.GetAttribute <IOffsetAttribute>(); } ITypeAttribute typeAtt = null; if (types != null) { Assert.IsTrue(ts.HasAttribute <ITypeAttribute>(), "has no TypeAttribute"); typeAtt = ts.GetAttribute <ITypeAttribute>(); } IPositionIncrementAttribute posIncrAtt = null; if (posIncrements != null || finalPosInc != null) { Assert.IsTrue(ts.HasAttribute <IPositionIncrementAttribute>(), "has no PositionIncrementAttribute"); posIncrAtt = ts.GetAttribute <IPositionIncrementAttribute>(); } IPositionLengthAttribute posLengthAtt = null; if (posLengths != null) { Assert.IsTrue(ts.HasAttribute <IPositionLengthAttribute>(), "has no PositionLengthAttribute"); posLengthAtt = ts.GetAttribute <IPositionLengthAttribute>(); } IKeywordAttribute keywordAtt = null; if (keywordAtts != null) { Assert.IsTrue(ts.HasAttribute <IKeywordAttribute>(), "has no KeywordAttribute"); keywordAtt = ts.GetAttribute <IKeywordAttribute>(); } // Maps position to the start/end offset: IDictionary <int?, int?> posToStartOffset = new Dictionary <int?, int?>(); IDictionary <int?, int?> posToEndOffset = new Dictionary <int?, int?>(); ts.Reset(); int pos = -1; int lastStartOffset = 0; for (int i = 0; i < output.Length; i++) { // extra safety to enforce, that the state is not preserved and also assign bogus values ts.ClearAttributes(); termAtt.SetEmpty().Append("bogusTerm"); if (offsetAtt != null) { offsetAtt.SetOffset(14584724, 24683243); } if (typeAtt != null) { typeAtt.Type = "bogusType"; } if (posIncrAtt != null) { posIncrAtt.PositionIncrement = 45987657; } if (posLengthAtt != null) { posLengthAtt.PositionLength = 45987653; } if (keywordAtt != null) { keywordAtt.Keyword = (i & 1) == 0; } bool reset = checkClearAtt.AndResetClearCalled; // reset it, because we called clearAttribute() before Assert.IsTrue(ts.IncrementToken(), "token " + i + " does not exist"); Assert.IsTrue(reset, "ClearAttributes() was not called correctly in TokenStream chain"); Assert.AreEqual(output[i], termAtt.ToString(), "term " + i + ", output[i] = " + output[i] + ", termAtt = " + termAtt.ToString()); if (startOffsets != null) { Assert.AreEqual(startOffsets[i], offsetAtt.StartOffset(), "startOffset " + i); } if (endOffsets != null) { Assert.AreEqual(endOffsets[i], offsetAtt.EndOffset(), "endOffset " + i); } if (types != null) { Assert.AreEqual(types[i], typeAtt.Type, "type " + i); } if (posIncrements != null) { Assert.AreEqual(posIncrements[i], posIncrAtt.PositionIncrement, "posIncrement " + i); } if (posLengths != null) { Assert.AreEqual(posLengths[i], posLengthAtt.PositionLength, "posLength " + i); } if (keywordAtts != null) { Assert.AreEqual(keywordAtts[i], keywordAtt.Keyword, "keywordAtt " + i); } // we can enforce some basic things about a few attributes even if the caller doesn't check: if (offsetAtt != null) { int startOffset = offsetAtt.StartOffset(); int endOffset = offsetAtt.EndOffset(); if (finalOffset != null) { Assert.IsTrue(startOffset <= (int)finalOffset, "startOffset must be <= finalOffset"); Assert.IsTrue(endOffset <= (int)finalOffset, "endOffset must be <= finalOffset: got endOffset=" + endOffset + " vs finalOffset=" + (int)finalOffset); } if (offsetsAreCorrect) { Assert.IsTrue(offsetAtt.StartOffset() >= lastStartOffset, "offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset); lastStartOffset = offsetAtt.StartOffset(); } if (offsetsAreCorrect && posLengthAtt != null && posIncrAtt != null) { // Validate offset consistency in the graph, ie // all tokens leaving from a certain pos have the // same startOffset, and all tokens arriving to a // certain pos have the same endOffset: int posInc = posIncrAtt.PositionIncrement; pos += posInc; int posLength = posLengthAtt.PositionLength; if (!posToStartOffset.ContainsKey(pos)) { // First time we've seen a token leaving from this position: posToStartOffset[pos] = startOffset; //System.out.println(" + s " + pos + " -> " + startOffset); } else { // We've seen a token leaving from this position // before; verify the startOffset is the same: //System.out.println(" + vs " + pos + " -> " + startOffset); Assert.AreEqual((int)posToStartOffset[pos], startOffset, "pos=" + pos + " posLen=" + posLength + " token=" + termAtt); } int endPos = pos + posLength; if (!posToEndOffset.ContainsKey(endPos)) { // First time we've seen a token arriving to this position: posToEndOffset[endPos] = endOffset; //System.out.println(" + e " + endPos + " -> " + endOffset); } else { // We've seen a token arriving to this position // before; verify the endOffset is the same: //System.out.println(" + ve " + endPos + " -> " + endOffset); Assert.AreEqual((int)posToEndOffset[endPos], endOffset, "pos=" + pos + " posLen=" + posLength + " token=" + termAtt); } } } if (posIncrAtt != null) { if (i == 0) { Assert.IsTrue(posIncrAtt.PositionIncrement >= 1, "first posIncrement must be >= 1"); } else { Assert.IsTrue(posIncrAtt.PositionIncrement >= 0, "posIncrement must be >= 0"); } } if (posLengthAtt != null) { Assert.IsTrue(posLengthAtt.PositionLength >= 1, "posLength must be >= 1"); } } if (ts.IncrementToken()) { Assert.Fail("TokenStream has more tokens than expected (expected count=" + output.Length + "); extra token=" + termAtt); } // repeat our extra safety checks for End() ts.ClearAttributes(); if (termAtt != null) { termAtt.SetEmpty().Append("bogusTerm"); } if (offsetAtt != null) { offsetAtt.SetOffset(14584724, 24683243); } if (typeAtt != null) { typeAtt.Type = "bogusType"; } if (posIncrAtt != null) { posIncrAtt.PositionIncrement = 45987657; } if (posLengthAtt != null) { posLengthAtt.PositionLength = 45987653; } var reset_ = checkClearAtt.AndResetClearCalled; // reset it, because we called clearAttribute() before ts.End(); Assert.IsTrue(checkClearAtt.AndResetClearCalled, "super.End()/ClearAttributes() was not called correctly in End()"); if (finalOffset != null) { Assert.AreEqual((int)finalOffset, offsetAtt.EndOffset(), "finalOffset"); } if (offsetAtt != null) { Assert.IsTrue(offsetAtt.EndOffset() >= 0, "finalOffset must be >= 0"); } if (finalPosInc != null) { Assert.AreEqual((int)finalPosInc, posIncrAtt.PositionIncrement, "finalPosInc"); } ts.Dispose(); }
public virtual void TestCaptureState() { // init a first instance AttributeSource src = new AttributeSource(); ICharTermAttribute termAtt = src.AddAttribute <ICharTermAttribute>(); ITypeAttribute typeAtt = src.AddAttribute <ITypeAttribute>(); termAtt.Append("TestTerm"); typeAtt.Type = "TestType"; int hashCode = src.GetHashCode(); AttributeSource.State state = src.CaptureState(); // modify the attributes termAtt.SetEmpty().Append("AnotherTestTerm"); typeAtt.Type = "AnotherTestType"; Assert.IsTrue(hashCode != src.GetHashCode(), "Hash code should be different"); src.RestoreState(state); Assert.AreEqual(termAtt.ToString(), "TestTerm"); Assert.AreEqual(typeAtt.Type, "TestType"); Assert.AreEqual(hashCode, src.GetHashCode(), "Hash code should be equal after restore"); // restore into an exact configured copy AttributeSource copy = new AttributeSource(); copy.AddAttribute <ICharTermAttribute>(); copy.AddAttribute <ITypeAttribute>(); copy.RestoreState(state); Assert.AreEqual(src.GetHashCode(), copy.GetHashCode(), "Both AttributeSources should have same hashCode after restore"); Assert.AreEqual(src, copy, "Both AttributeSources should be equal after restore"); // init a second instance (with attributes in different order and one additional attribute) AttributeSource src2 = new AttributeSource(); typeAtt = src2.AddAttribute <ITypeAttribute>(); IFlagsAttribute flagsAtt = src2.AddAttribute <IFlagsAttribute>(); termAtt = src2.AddAttribute <ICharTermAttribute>(); flagsAtt.Flags = 12345; src2.RestoreState(state); Assert.AreEqual(termAtt.ToString(), "TestTerm"); Assert.AreEqual(typeAtt.Type, "TestType"); Assert.AreEqual(12345, flagsAtt.Flags, "FlagsAttribute should not be touched"); // init a third instance missing one Attribute AttributeSource src3 = new AttributeSource(); termAtt = src3.AddAttribute <ICharTermAttribute>(); try { src3.RestoreState(state); Assert.Fail("The third instance is missing the TypeAttribute, so restoreState() should throw IllegalArgumentException"); } #pragma warning disable 168 catch (System.ArgumentException iae) #pragma warning restore 168 { // pass } }
/// <summary> /// Constructs a new CommonGramsQueryFilter based on the provided CommomGramsFilter /// </summary> /// <param name="input"> CommonGramsFilter the QueryFilter will use </param> public CommonGramsQueryFilter(CommonGramsFilter input) : base(input) { typeAttribute = AddAttribute <ITypeAttribute>(); posIncAttribute = AddAttribute <IPositionIncrementAttribute>(); }
/* * Constructs a ShingleFilter with the specified single size from the * {@link TokenStream} <c>input</c> * * @param input input stream * @param maxShingleSize maximum shingle size produced by the filter. */ public ShingleFilter(TokenStream input, int maxShingleSize) : base(input) { SetMaxShingleSize(maxShingleSize); this.termAtt = AddAttribute<ITermAttribute>(); ; this.offsetAtt = AddAttribute<IOffsetAttribute>(); ; this.posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); ; this.typeAtt = AddAttribute<ITypeAttribute>(); ; }
public SynonymFilter(TokenStream input, ISynonymEngine synonymEngine) : base(input) { SynonymEngine = synonymEngine; termAtt = AddAttribute<ITermAttribute>(); posIncAtt = AddAttribute<IPositionIncrementAttribute>(); typeAtt = AddAttribute<ITypeAttribute>(); }
public TestFilter(TokenStream @in) : base(@in) { termAtt = AddAttribute<ICharTermAttribute>(); posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); offsetAtt = AddAttribute<IOffsetAttribute>(); typeAtt = AddAttribute<ITypeAttribute>(); }
public static void AssertTokenStreamContents(TokenStream ts, System.String[] output, int[] startOffsets, int[] endOffsets, System.String[] types, int[] posIncrements, int?finalOffset) { Assert.IsNotNull(output); ICheckClearAttributesAttribute checkClearAtt = ts.AddAttribute <ICheckClearAttributesAttribute>(); Assert.IsTrue(ts.HasAttribute <ITermAttribute>(), "has no TermAttribute"); ITermAttribute termAtt = ts.GetAttribute <ITermAttribute>(); IOffsetAttribute offsetAtt = null; if (startOffsets != null || endOffsets != null || finalOffset != null) { Assert.IsTrue(ts.HasAttribute <IOffsetAttribute>(), "has no OffsetAttribute"); offsetAtt = ts.GetAttribute <IOffsetAttribute>(); } ITypeAttribute typeAtt = null; if (types != null) { Assert.IsTrue(ts.HasAttribute <ITypeAttribute>(), "has no TypeAttribute"); typeAtt = ts.GetAttribute <ITypeAttribute>(); } IPositionIncrementAttribute posIncrAtt = null; if (posIncrements != null) { Assert.IsTrue(ts.HasAttribute <IPositionIncrementAttribute>(), "has no PositionIncrementAttribute"); posIncrAtt = ts.GetAttribute <IPositionIncrementAttribute>(); } ts.Reset(); for (int i = 0; i < output.Length; i++) { // extra safety to enforce, that the state is not preserved and also assign bogus values ts.ClearAttributes(); termAtt.SetTermBuffer("bogusTerm"); if (offsetAtt != null) { offsetAtt.SetOffset(14584724, 24683243); } if (typeAtt != null) { typeAtt.Type = "bogusType"; } if (posIncrAtt != null) { posIncrAtt.PositionIncrement = 45987657; } checkClearAtt.GetAndResetClearCalled(); // reset it, because we called clearAttribute() before Assert.IsTrue(ts.IncrementToken(), "token " + i + " does not exist"); Assert.IsTrue(checkClearAtt.GetAndResetClearCalled(), "clearAttributes() was not called correctly in TokenStream chain"); Assert.AreEqual(output[i], termAtt.Term, "term " + i); if (startOffsets != null) { Assert.AreEqual(startOffsets[i], offsetAtt.StartOffset, "startOffset " + i); } if (endOffsets != null) { Assert.AreEqual(endOffsets[i], offsetAtt.EndOffset, "endOffset " + i); } if (types != null) { Assert.AreEqual(types[i], typeAtt.Type, "type " + i); } if (posIncrements != null) { Assert.AreEqual(posIncrements[i], posIncrAtt.PositionIncrement, "posIncrement " + i); } } Assert.IsFalse(ts.IncrementToken(), "end of stream"); ts.End(); if (finalOffset.HasValue) { Assert.AreEqual(finalOffset, offsetAtt.EndOffset, "finalOffset "); } ts.Close(); }
internal WordTokenFilter(TypeAsPayloadTokenFilterTest outerInstance, TokenStream input) : base(input) { this.outerInstance = outerInstance; termAtt = AddAttribute <ICharTermAttribute>(); typeAtt = AddAttribute <ITypeAttribute>(); }
public TypeAsPayloadTokenFilter(TokenStream input) : base(input) { payloadAtt = AddAttribute<IPayloadAttribute>(); typeAtt = AddAttribute<ITypeAttribute>(); }
private void Init() { termAtt = AddAttribute <ICharTermAttribute>(); offsetAtt = AddAttribute <IOffsetAttribute>(); typeAtt = AddAttribute <ITypeAttribute>(); }
public FakeStandardTokenizer(TokenStream input) : base(input) { typeAtt = AddAttribute <ITypeAttribute>(); }
internal TestTokenStream() { termAtt = AddAttribute<ITermAttribute>(); offsetAtt = AddAttribute<IOffsetAttribute>(); typeAtt = AddAttribute<ITypeAttribute>(); payloadAtt = AddAttribute<IPayloadAttribute>(); posIncAtt = AddAttribute<IPositionIncrementAttribute>(); flagsAtt = AddAttribute<IFlagsAttribute>(); }
public EmailFilter(TokenStream @in) : base(@in) { this.typeAtt = AddAttribute<ITypeAttribute>(); }
public TestTokenStream(Token[] testToken) { _testToken = testToken; _termAtt = AddAttribute<ITermAttribute>(); _offsetAtt = AddAttribute<IOffsetAttribute>(); _posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); _typeAtt = AddAttribute<ITypeAttribute>(); }
public override void CopyTo(Attribute target) { ITypeAttribute t = (ITypeAttribute)target; t.Type = type; }