public AddSuffixFilter(TokenStream input, Dictionary<string, char[]> _suffixByTokenType) : base(input) { termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute)); typeAtt = (TypeAttribute)AddAttribute(typeof(TypeAttribute)); this.suffixByTokenType = _suffixByTokenType; }
override public object Clone() { TypeAttribute impl = new TypeAttribute(); impl.type = type; return(impl); }
public HyphenationTokenFilter(TokenStream input, Hyphenator hyphenator) : base(input) { _hyphenator = hyphenator; _termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute)); _typeAtt = (TypeAttribute)AddAttribute(typeof(TypeAttribute)); _ofsAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute)); }
public override bool Equals(object other) { if (other == this) { return(true); } if (other is TypeAttribute) { TypeAttribute o = (TypeAttribute)other; return(this.type == null ? o.type == null : this.type.Equals(o.type)); } return(false); }
public virtual void TestTypeAttribute() { TypeAttribute att = new TypeAttribute(); Assert.AreEqual(TypeAttribute.DEFAULT_TYPE, att.Type); att.Type = "hallo"; Assert.AreEqual("type=hallo", att.ToString()); TypeAttribute att2 = (TypeAttribute) AssertCloneIsEqual(att); Assert.AreEqual("hallo", att2.Type); att2 = (TypeAttribute) AssertCopyIsEqual(att); Assert.AreEqual("hallo", att2.Type); att.Clear(); Assert.AreEqual(TypeAttribute.DEFAULT_TYPE, att.Type); }
public virtual void TestTypeAttribute() { TypeAttribute att = new TypeAttribute(); Assert.AreEqual(TypeAttribute.DEFAULT_TYPE, att.Type); att.Type = "hallo"; Assert.AreEqual("type=hallo", att.ToString()); TypeAttribute att2 = (TypeAttribute)AssertCloneIsEqual(att); Assert.AreEqual("hallo", att2.Type); att2 = (TypeAttribute)AssertCopyIsEqual(att); Assert.AreEqual("hallo", att2.Type); att.Clear(); Assert.AreEqual(TypeAttribute.DEFAULT_TYPE, att.Type); }
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix) { Suffix = suffix; Prefix = prefix; _prefixExhausted = false; // ReSharper disable DoNotCallOverridableMethodsInConstructor _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute)); _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute)); _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute)); _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute)); _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute)); _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute)); // ReSharper restore DoNotCallOverridableMethodsInConstructor _pTermAtt = (TermAttribute) prefix.AddAttribute(typeof (TermAttribute)); _pPosIncrAtt = (PositionIncrementAttribute) prefix.AddAttribute(typeof (PositionIncrementAttribute)); _pPayloadAtt = (PayloadAttribute) prefix.AddAttribute(typeof (PayloadAttribute)); _pOffsetAtt = (OffsetAttribute) prefix.AddAttribute(typeof (OffsetAttribute)); _pTypeAtt = (TypeAttribute) prefix.AddAttribute(typeof (TypeAttribute)); _pFlagsAtt = (FlagsAttribute) prefix.AddAttribute(typeof (FlagsAttribute)); }
/// <summary> /// Creates a shingle filter with ad hoc parameter settings. /// </summary> /// <param name="input">stream from which to construct the matrix</param> /// <param name="minimumShingleSize">minimum number of tokens in any shingle.</param> /// <param name="maximumShingleSize">maximum number of tokens in any shingle.</param> /// <param name="spacerCharacter">character to use between texts of the token parts in a shingle. null for none.</param> /// <param name="ignoringSinglePrefixOrSuffixShingle">if true, shingles that only contains permutation of the first of the last column will not be produced as shingles. Useful when adding boundary marker tokens such as '^' and '$'.</param> /// <param name="settingsCodec">codec used to read input token weight and matrix positioning.</param> public ShingleMatrixFilter(TokenStream input, int minimumShingleSize, int maximumShingleSize, Char? spacerCharacter, bool ignoringSinglePrefixOrSuffixShingle, TokenSettingsCodec settingsCodec) { _input = input; MinimumShingleSize = minimumShingleSize; MaximumShingleSize = maximumShingleSize; SpacerCharacter = spacerCharacter; IsIgnoringSinglePrefixOrSuffixShingle = ignoringSinglePrefixOrSuffixShingle; _settingsCodec = settingsCodec; // ReSharper disable DoNotCallOverridableMethodsInConstructor _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute)); _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute)); _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute)); _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute)); _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute)); _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute)); // ReSharper restore DoNotCallOverridableMethodsInConstructor _inTermAtt = (TermAttribute) input.AddAttribute(typeof (TermAttribute)); _inPosIncrAtt = (PositionIncrementAttribute) input.AddAttribute(typeof (PositionIncrementAttribute)); _inPayloadAtt = (PayloadAttribute) input.AddAttribute(typeof (PayloadAttribute)); _inOffsetAtt = (OffsetAttribute) input.AddAttribute(typeof (OffsetAttribute)); _inTypeAtt = (TypeAttribute) input.AddAttribute(typeof (TypeAttribute)); _inFlagsAtt = (FlagsAttribute) input.AddAttribute(typeof (FlagsAttribute)); }
/// <summary> /// Creates a shingle filter based on a user defined matrix. /// /// The filter /will/ delete columns from the input matrix! You will not be able to reset the filter if you used this constructor. /// todo: don't touch the matrix! use a bool, set the input stream to null or something, and keep track of where in the matrix we are at. /// /// </summary> /// <param name="matrix">the input based for creating shingles. Does not need to contain any information until ShingleMatrixFilter.Next(Token) is called the first time.</param> /// <param name="minimumShingleSize">minimum number of tokens in any shingle.</param> /// <param name="maximumShingleSize">maximum number of tokens in any shingle.</param> /// <param name="spacerCharacter">character to use between texts of the token parts in a shingle. null for none.</param> /// <param name="ignoringSinglePrefixOrSuffixShingle">if true, shingles that only contains permutation of the first of the last column will not be produced as shingles. Useful when adding boundary marker tokens such as '^' and '$'.</param> /// <param name="settingsCodec">codec used to read input token weight and matrix positioning.</param> public ShingleMatrixFilter(Matrix.Matrix matrix, int minimumShingleSize, int maximumShingleSize, Char spacerCharacter, bool ignoringSinglePrefixOrSuffixShingle, TokenSettingsCodec settingsCodec) { Matrix = matrix; MinimumShingleSize = minimumShingleSize; MaximumShingleSize = maximumShingleSize; SpacerCharacter = spacerCharacter; IsIgnoringSinglePrefixOrSuffixShingle = ignoringSinglePrefixOrSuffixShingle; _settingsCodec = settingsCodec; // ReSharper disable DoNotCallOverridableMethodsInConstructor _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute)); _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute)); _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute)); _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute)); _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute)); _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute)); // ReSharper restore DoNotCallOverridableMethodsInConstructor // set the input to be an empty token stream, we already have the data. _input = new EmptyTokenStream(); _inTermAtt = (TermAttribute) _input.AddAttribute(typeof (TermAttribute)); _inPosIncrAtt = (PositionIncrementAttribute) _input.AddAttribute(typeof (PositionIncrementAttribute)); _inPayloadAtt = (PayloadAttribute) _input.AddAttribute(typeof (PayloadAttribute)); _inOffsetAtt = (OffsetAttribute) _input.AddAttribute(typeof (OffsetAttribute)); _inTypeAtt = (TypeAttribute) _input.AddAttribute(typeof (TypeAttribute)); _inFlagsAtt = (FlagsAttribute) _input.AddAttribute(typeof (FlagsAttribute)); }
public override void CopyTo(Attribute target) { TypeAttribute t = (TypeAttribute)target; t.type = type; }
private void Init(System.IO.TextReader input, HebMorph.StreamLemmatizer _lemmatizer, HebMorph.LemmaFilters.LemmaFilterBase _lemmaFilter, bool AlwaysSaveMarkedOriginal) { termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute)); offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute)); posIncrAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute)); typeAtt = (TypeAttribute)AddAttribute(typeof(TypeAttribute)); //payAtt = (PayloadAttribute)AddAttribute(typeof(PayloadAttribute)); this._streamLemmatizer = _lemmatizer; this._streamLemmatizer.SetStream(input); this.alwaysSaveMarkedOriginal = AlwaysSaveMarkedOriginal; this.lemmaFilter = _lemmaFilter; }
override public System.Object Clone() { TypeAttribute impl = new TypeAttribute(); impl.type = type; return impl; }
public TokenListStream(ICollection<Token> tokens) { _tokens = tokens; _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute)); _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute)); _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute)); _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute)); _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute)); _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute)); }
/// <summary> /// Constructs a ShingleFilter with the specified single size from the TokenStream /// </summary> /// <param name="input">input token stream</param> /// <param name="maxShingleSize">maximum shingle size produced by the filter.</param> public ShingleFilter(TokenStream input, int maxShingleSize) : base(input) { SetMaxShingleSize(maxShingleSize); // ReSharper disable DoNotCallOverridableMethodsInConstructor _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute)); _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute)); _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute)); _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute)); // ReSharper restore DoNotCallOverridableMethodsInConstructor }
public MetaphoneReplacementFilter(TokenStream input) : base(input) { termAttr = AddAttribute(typeof (TermAttribute)) as TermAttribute; typeAttr = AddAttribute(typeof (TypeAttribute)) as TypeAttribute; }
public override void CopyTo(AttributeImpl target) { TypeAttribute t = (TypeAttribute)target; t.SetType(type); }