override public object Clone() { PositionIncrementAttribute impl = new PositionIncrementAttribute(); impl.positionIncrement = positionIncrement; return(impl); }
public SingleCharTokenizer(TokenStream input): base(input) { _input = input; _termAttribute = (TermAttribute)AddAttribute(typeof(TermAttribute)); _offsetAttribute = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute)); _positionIncrementAttribute = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute)); }
public SynonymFilter(TokenStream input, ISynonymEngine engine) : base(input) { synonymStack = new Stack<string>(); this.engine = engine; this.termAttr = AddAttribute(typeof(TermAttribute)) as TermAttribute; this.posIncrAttr = AddAttribute(typeof(PositionIncrementAttribute)) as PositionIncrementAttribute; }
/// <summary> /// Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using /// affix rules in the provided HunspellDictionary. /// </summary> /// <param name="input">TokenStream whose tokens will be stemmed.</param> /// <param name="dictionary">HunspellDictionary containing the affix rules and words that will be used to stem the tokens.</param> /// <param name="dedup">true if only unique terms should be output.</param> public HunspellStemFilter(TokenStream input, HunspellDictionary dictionary, Boolean dedup = true) : base(input) { _posIncAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute)); _termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute)); _dedup = dedup; _stemmer = new HunspellStemmer(dictionary); }
public SynonymFilter(TokenStream in_Renamed, ISynonymEngine engine) : base(in_Renamed) { synonymStack = new Stack<string>(); this.engine = engine; termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute)); posIncrAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute)); }
public AnsjTokenizer(AbstractAnalysis analysis, TextReader input, HashSet<string> filter, bool pstemming) : base(input) { _analysis = analysis; _termAtt = AddAttribute<TermAttribute>(); _offsetAtt = AddAttribute<OffsetAttribute>(); _positionAttr = AddAttribute<PositionIncrementAttribute>(); _filter = filter; _pstemming = pstemming; }
public SynonymFilter (TokenStream input, SynonymEngine engine) : base(input) { if (engine == null) throw new ArgumentNullException("synonymEngine"); synonymStack = new Stack<string>(); this.engine = engine; this.termAtt = (TermAttribute)AddAttribute<ITermAttribute>(); this.posIncrAtt = (PositionIncrementAttribute)AddAttribute<IPositionIncrementAttribute>(); //this.termAtt = this.AddAttribute<string>(); //this.posIncrAtt = this.AddAttribute<string>(); }
public override bool Equals(object other) { if (other == this) { return(true); } if (other is PositionIncrementAttribute) { PositionIncrementAttribute _other = (PositionIncrementAttribute)other; return(positionIncrement == _other.positionIncrement); } return(false); }
public virtual void TestPositionIncrementAttribute() { PositionIncrementAttribute att = new PositionIncrementAttribute(); Assert.AreEqual(1, att.PositionIncrement); att.PositionIncrement = 1234; Assert.AreEqual("positionIncrement=1234", att.ToString()); PositionIncrementAttribute att2 = (PositionIncrementAttribute) AssertCloneIsEqual(att); Assert.AreEqual(1234, att2.PositionIncrement); att2 = (PositionIncrementAttribute) AssertCopyIsEqual(att); Assert.AreEqual(1234, att2.PositionIncrement); att.Clear(); Assert.AreEqual(1, att.PositionIncrement); }
public virtual void TestPositionIncrementAttribute() { PositionIncrementAttribute att = new PositionIncrementAttribute(); Assert.AreEqual(1, att.PositionIncrement); att.PositionIncrement = 1234; Assert.AreEqual("positionIncrement=1234", att.ToString()); PositionIncrementAttribute att2 = (PositionIncrementAttribute)AssertCloneIsEqual(att); Assert.AreEqual(1234, att2.PositionIncrement); att2 = (PositionIncrementAttribute)AssertCopyIsEqual(att); Assert.AreEqual(1234, att2.PositionIncrement); att.Clear(); Assert.AreEqual(1, att.PositionIncrement); }
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix) { Suffix = suffix; Prefix = prefix; _prefixExhausted = false; // ReSharper disable DoNotCallOverridableMethodsInConstructor _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute)); _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute)); _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute)); _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute)); _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute)); _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute)); // ReSharper restore DoNotCallOverridableMethodsInConstructor _pTermAtt = (TermAttribute) prefix.AddAttribute(typeof (TermAttribute)); _pPosIncrAtt = (PositionIncrementAttribute) prefix.AddAttribute(typeof (PositionIncrementAttribute)); _pPayloadAtt = (PayloadAttribute) prefix.AddAttribute(typeof (PayloadAttribute)); _pOffsetAtt = (OffsetAttribute) prefix.AddAttribute(typeof (OffsetAttribute)); _pTypeAtt = (TypeAttribute) prefix.AddAttribute(typeof (TypeAttribute)); _pFlagsAtt = (FlagsAttribute) prefix.AddAttribute(typeof (FlagsAttribute)); }
/// <summary> /// Creates a shingle filter with ad hoc parameter settings. /// </summary> /// <param name="input">stream from which to construct the matrix</param> /// <param name="minimumShingleSize">minimum number of tokens in any shingle.</param> /// <param name="maximumShingleSize">maximum number of tokens in any shingle.</param> /// <param name="spacerCharacter">character to use between texts of the token parts in a shingle. null for none.</param> /// <param name="ignoringSinglePrefixOrSuffixShingle">if true, shingles that only contains permutation of the first of the last column will not be produced as shingles. Useful when adding boundary marker tokens such as '^' and '$'.</param> /// <param name="settingsCodec">codec used to read input token weight and matrix positioning.</param> public ShingleMatrixFilter(TokenStream input, int minimumShingleSize, int maximumShingleSize, Char? spacerCharacter, bool ignoringSinglePrefixOrSuffixShingle, TokenSettingsCodec settingsCodec) { _input = input; MinimumShingleSize = minimumShingleSize; MaximumShingleSize = maximumShingleSize; SpacerCharacter = spacerCharacter; IsIgnoringSinglePrefixOrSuffixShingle = ignoringSinglePrefixOrSuffixShingle; _settingsCodec = settingsCodec; // ReSharper disable DoNotCallOverridableMethodsInConstructor _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute)); _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute)); _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute)); _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute)); _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute)); _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute)); // ReSharper restore DoNotCallOverridableMethodsInConstructor _inTermAtt = (TermAttribute) input.AddAttribute(typeof (TermAttribute)); _inPosIncrAtt = (PositionIncrementAttribute) input.AddAttribute(typeof (PositionIncrementAttribute)); _inPayloadAtt = (PayloadAttribute) input.AddAttribute(typeof (PayloadAttribute)); _inOffsetAtt = (OffsetAttribute) input.AddAttribute(typeof (OffsetAttribute)); _inTypeAtt = (TypeAttribute) input.AddAttribute(typeof (TypeAttribute)); _inFlagsAtt = (FlagsAttribute) input.AddAttribute(typeof (FlagsAttribute)); }
/// <summary> /// Creates a shingle filter based on a user defined matrix. /// /// The filter /will/ delete columns from the input matrix! You will not be able to reset the filter if you used this constructor. /// todo: don't touch the matrix! use a bool, set the input stream to null or something, and keep track of where in the matrix we are at. /// /// </summary> /// <param name="matrix">the input based for creating shingles. Does not need to contain any information until ShingleMatrixFilter.Next(Token) is called the first time.</param> /// <param name="minimumShingleSize">minimum number of tokens in any shingle.</param> /// <param name="maximumShingleSize">maximum number of tokens in any shingle.</param> /// <param name="spacerCharacter">character to use between texts of the token parts in a shingle. null for none.</param> /// <param name="ignoringSinglePrefixOrSuffixShingle">if true, shingles that only contains permutation of the first of the last column will not be produced as shingles. Useful when adding boundary marker tokens such as '^' and '$'.</param> /// <param name="settingsCodec">codec used to read input token weight and matrix positioning.</param> public ShingleMatrixFilter(Matrix.Matrix matrix, int minimumShingleSize, int maximumShingleSize, Char spacerCharacter, bool ignoringSinglePrefixOrSuffixShingle, TokenSettingsCodec settingsCodec) { Matrix = matrix; MinimumShingleSize = minimumShingleSize; MaximumShingleSize = maximumShingleSize; SpacerCharacter = spacerCharacter; IsIgnoringSinglePrefixOrSuffixShingle = ignoringSinglePrefixOrSuffixShingle; _settingsCodec = settingsCodec; // ReSharper disable DoNotCallOverridableMethodsInConstructor _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute)); _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute)); _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute)); _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute)); _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute)); _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute)); // ReSharper restore DoNotCallOverridableMethodsInConstructor // set the input to be an empty token stream, we already have the data. _input = new EmptyTokenStream(); _inTermAtt = (TermAttribute) _input.AddAttribute(typeof (TermAttribute)); _inPosIncrAtt = (PositionIncrementAttribute) _input.AddAttribute(typeof (PositionIncrementAttribute)); _inPayloadAtt = (PayloadAttribute) _input.AddAttribute(typeof (PayloadAttribute)); _inOffsetAtt = (OffsetAttribute) _input.AddAttribute(typeof (OffsetAttribute)); _inTypeAtt = (TypeAttribute) _input.AddAttribute(typeof (TypeAttribute)); _inFlagsAtt = (FlagsAttribute) _input.AddAttribute(typeof (FlagsAttribute)); }
public override void CopyTo(Attribute target) { PositionIncrementAttribute t = (PositionIncrementAttribute)target; t.PositionIncrement = positionIncrement; }
private void Init(System.IO.TextReader input, HebMorph.StreamLemmatizer _lemmatizer, HebMorph.LemmaFilters.LemmaFilterBase _lemmaFilter, bool AlwaysSaveMarkedOriginal) { termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute)); offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute)); posIncrAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute)); typeAtt = (TypeAttribute)AddAttribute(typeof(TypeAttribute)); //payAtt = (PayloadAttribute)AddAttribute(typeof(PayloadAttribute)); this._streamLemmatizer = _lemmatizer; this._streamLemmatizer.SetStream(input); this.alwaysSaveMarkedOriginal = AlwaysSaveMarkedOriginal; this.lemmaFilter = _lemmaFilter; }
override public System.Object Clone() { PositionIncrementAttribute impl = new PositionIncrementAttribute(); impl.positionIncrement = positionIncrement; return impl; }
public TokenListStream(ICollection<Token> tokens) { _tokens = tokens; _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute)); _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute)); _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute)); _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute)); _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute)); _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute)); }
/// <summary> /// Constructs a ShingleFilter with the specified single size from the TokenStream /// </summary> /// <param name="input">input token stream</param> /// <param name="maxShingleSize">maximum shingle size produced by the filter.</param> public ShingleFilter(TokenStream input, int maxShingleSize) : base(input) { SetMaxShingleSize(maxShingleSize); // ReSharper disable DoNotCallOverridableMethodsInConstructor _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute)); _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute)); _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute)); _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute)); // ReSharper restore DoNotCallOverridableMethodsInConstructor }
/// <summary> /// Creates NGramTokenFilter with given min and max n-grams. </summary> /// <param name="version"> Lucene version to enable correct position increments. /// See <a href="#version">above</a> for details. </param> /// <param name="input"> <seealso cref="TokenStream"/> holding the input to be tokenized </param> /// <param name="minGram"> the smallest n-gram to generate </param> /// <param name="maxGram"> the largest n-gram to generate </param> public NGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) : base(new CodepointCountFilter(version, input, minGram, int.MaxValue)) { this.version = version; this.charUtils = version.OnOrAfter(Version.LUCENE_44) ? CharacterUtils.GetInstance(version) : CharacterUtils.Java4Instance; if (minGram < 1) { throw new System.ArgumentException("minGram must be greater than zero"); } if (minGram > maxGram) { throw new System.ArgumentException("minGram must not be greater than maxGram"); } this.minGram = minGram; this.maxGram = maxGram; if (version.OnOrAfter(Version.LUCENE_44)) { posIncAtt = AddAttribute(typeof(PositionIncrementAttribute)); posLenAtt = AddAttribute(typeof(PositionLengthAttribute)); } else { posIncAtt = new PositionIncrementAttributeAnonymousInnerClassHelper(this); posLenAtt = new PositionLengthAttributeAnonymousInnerClassHelper(this); } }
public override void CopyTo(AttributeImpl target) { PositionIncrementAttribute t = (PositionIncrementAttribute)target; t.SetPositionIncrement(positionIncrement); }
public SynonymFilter(TokenStream input) : base(input) { _termAtt = (TermAttribute) AddAttribute<ITermAttribute>(); _posIncrAtt = (PositionIncrementAttribute)AddAttribute<IPositionIncrementAttribute>(); }