예제 #1
0
 public AddSuffixFilter(TokenStream input, Dictionary<string, char[]> _suffixByTokenType)
     : base(input)
 {
     termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
     typeAtt = (TypeAttribute)AddAttribute(typeof(TypeAttribute));
     this.suffixByTokenType = _suffixByTokenType;
 }
예제 #2
0
        override public object Clone()
        {
            TypeAttribute impl = new TypeAttribute();

            impl.type = type;
            return(impl);
        }
 public HyphenationTokenFilter(TokenStream input, Hyphenator hyphenator)
     : base(input)
 {
     _hyphenator = hyphenator;
     _termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
     _typeAtt = (TypeAttribute)AddAttribute(typeof(TypeAttribute));
     _ofsAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
 }
예제 #4
0
        public override bool Equals(object other)
        {
            if (other == this)
            {
                return(true);
            }

            if (other is TypeAttribute)
            {
                TypeAttribute o = (TypeAttribute)other;
                return(this.type == null ? o.type == null : this.type.Equals(o.type));
            }

            return(false);
        }
		public virtual void  TestTypeAttribute()
		{
			TypeAttribute att = new TypeAttribute();
			Assert.AreEqual(TypeAttribute.DEFAULT_TYPE, att.Type);
			
			att.Type = "hallo";
			Assert.AreEqual("type=hallo", att.ToString());
			
			TypeAttribute att2 = (TypeAttribute) AssertCloneIsEqual(att);
			Assert.AreEqual("hallo", att2.Type);
			
			att2 = (TypeAttribute) AssertCopyIsEqual(att);
			Assert.AreEqual("hallo", att2.Type);
			
			att.Clear();
			Assert.AreEqual(TypeAttribute.DEFAULT_TYPE, att.Type);
		}
        public virtual void  TestTypeAttribute()
        {
            TypeAttribute att = new TypeAttribute();

            Assert.AreEqual(TypeAttribute.DEFAULT_TYPE, att.Type);

            att.Type = "hallo";
            Assert.AreEqual("type=hallo", att.ToString());

            TypeAttribute att2 = (TypeAttribute)AssertCloneIsEqual(att);

            Assert.AreEqual("hallo", att2.Type);

            att2 = (TypeAttribute)AssertCopyIsEqual(att);
            Assert.AreEqual("hallo", att2.Type);

            att.Clear();
            Assert.AreEqual(TypeAttribute.DEFAULT_TYPE, att.Type);
        }
        public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix)
        {
            Suffix = suffix;
            Prefix = prefix;
            _prefixExhausted = false;

            // ReSharper disable DoNotCallOverridableMethodsInConstructor
            _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute));
            _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute));
            _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute));
            _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute));
            _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute));
            _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute));
            // ReSharper restore DoNotCallOverridableMethodsInConstructor

            _pTermAtt = (TermAttribute) prefix.AddAttribute(typeof (TermAttribute));
            _pPosIncrAtt = (PositionIncrementAttribute) prefix.AddAttribute(typeof (PositionIncrementAttribute));
            _pPayloadAtt = (PayloadAttribute) prefix.AddAttribute(typeof (PayloadAttribute));
            _pOffsetAtt = (OffsetAttribute) prefix.AddAttribute(typeof (OffsetAttribute));
            _pTypeAtt = (TypeAttribute) prefix.AddAttribute(typeof (TypeAttribute));
            _pFlagsAtt = (FlagsAttribute) prefix.AddAttribute(typeof (FlagsAttribute));
        }
        /// <summary>
        /// Creates a shingle filter with ad hoc parameter settings.
        /// </summary>
        /// <param name="input">stream from which to construct the matrix</param>
        /// <param name="minimumShingleSize">minimum number of tokens in any shingle.</param>
        /// <param name="maximumShingleSize">maximum number of tokens in any shingle.</param>
        /// <param name="spacerCharacter">character to use between texts of the token parts in a shingle. null for none.</param>
        /// <param name="ignoringSinglePrefixOrSuffixShingle">if true, shingles that only contains permutation of the first of the last column will not be produced as shingles. Useful when adding boundary marker tokens such as '^' and '$'.</param>
        /// <param name="settingsCodec">codec used to read input token weight and matrix positioning.</param>
        public ShingleMatrixFilter(TokenStream input, int minimumShingleSize, int maximumShingleSize, Char? spacerCharacter, bool ignoringSinglePrefixOrSuffixShingle, TokenSettingsCodec settingsCodec)
        {
            _input = input;
            MinimumShingleSize = minimumShingleSize;
            MaximumShingleSize = maximumShingleSize;
            SpacerCharacter = spacerCharacter;
            IsIgnoringSinglePrefixOrSuffixShingle = ignoringSinglePrefixOrSuffixShingle;
            _settingsCodec = settingsCodec;

            // ReSharper disable DoNotCallOverridableMethodsInConstructor
            _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute));
            _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute));
            _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute));
            _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute));
            _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute));
            _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute));
            // ReSharper restore DoNotCallOverridableMethodsInConstructor

            _inTermAtt = (TermAttribute) input.AddAttribute(typeof (TermAttribute));
            _inPosIncrAtt = (PositionIncrementAttribute) input.AddAttribute(typeof (PositionIncrementAttribute));
            _inPayloadAtt = (PayloadAttribute) input.AddAttribute(typeof (PayloadAttribute));
            _inOffsetAtt = (OffsetAttribute) input.AddAttribute(typeof (OffsetAttribute));
            _inTypeAtt = (TypeAttribute) input.AddAttribute(typeof (TypeAttribute));
            _inFlagsAtt = (FlagsAttribute) input.AddAttribute(typeof (FlagsAttribute));
        }
        /// <summary>
        /// Creates a shingle filter based on a user defined matrix.
        /// 
        /// The filter /will/ delete columns from the input matrix! You will not be able to reset the filter if you used this constructor.
        /// todo: don't touch the matrix! use a bool, set the input stream to null or something, and keep track of where in the matrix we are at.
        /// 
        /// </summary>
        /// <param name="matrix">the input based for creating shingles. Does not need to contain any information until ShingleMatrixFilter.Next(Token) is called the first time.</param>
        /// <param name="minimumShingleSize">minimum number of tokens in any shingle.</param>
        /// <param name="maximumShingleSize">maximum number of tokens in any shingle.</param>
        /// <param name="spacerCharacter">character to use between texts of the token parts in a shingle. null for none.</param>
        /// <param name="ignoringSinglePrefixOrSuffixShingle">if true, shingles that only contains permutation of the first of the last column will not be produced as shingles. Useful when adding boundary marker tokens such as '^' and '$'.</param>
        /// <param name="settingsCodec">codec used to read input token weight and matrix positioning.</param>
        public ShingleMatrixFilter(Matrix.Matrix matrix, int minimumShingleSize, int maximumShingleSize, Char spacerCharacter, bool ignoringSinglePrefixOrSuffixShingle, TokenSettingsCodec settingsCodec)
        {
            Matrix = matrix;
            MinimumShingleSize = minimumShingleSize;
            MaximumShingleSize = maximumShingleSize;
            SpacerCharacter = spacerCharacter;
            IsIgnoringSinglePrefixOrSuffixShingle = ignoringSinglePrefixOrSuffixShingle;
            _settingsCodec = settingsCodec;

            // ReSharper disable DoNotCallOverridableMethodsInConstructor
            _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute));
            _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute));
            _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute));
            _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute));
            _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute));
            _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute));
            // ReSharper restore DoNotCallOverridableMethodsInConstructor

            // set the input to be an empty token stream, we already have the data.
            _input = new EmptyTokenStream();

            _inTermAtt = (TermAttribute) _input.AddAttribute(typeof (TermAttribute));
            _inPosIncrAtt = (PositionIncrementAttribute) _input.AddAttribute(typeof (PositionIncrementAttribute));
            _inPayloadAtt = (PayloadAttribute) _input.AddAttribute(typeof (PayloadAttribute));
            _inOffsetAtt = (OffsetAttribute) _input.AddAttribute(typeof (OffsetAttribute));
            _inTypeAtt = (TypeAttribute) _input.AddAttribute(typeof (TypeAttribute));
            _inFlagsAtt = (FlagsAttribute) _input.AddAttribute(typeof (FlagsAttribute));
        }
예제 #10
0
        public override void CopyTo(Attribute target)
        {
            TypeAttribute t = (TypeAttribute)target;

            t.type = type;
        }
예제 #11
0
        private void Init(System.IO.TextReader input, HebMorph.StreamLemmatizer _lemmatizer,
            HebMorph.LemmaFilters.LemmaFilterBase _lemmaFilter, bool AlwaysSaveMarkedOriginal)
        {
            termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
            offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
            posIncrAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute));
            typeAtt = (TypeAttribute)AddAttribute(typeof(TypeAttribute));
            //payAtt = (PayloadAttribute)AddAttribute(typeof(PayloadAttribute));

            this._streamLemmatizer = _lemmatizer;
            this._streamLemmatizer.SetStream(input);
            this.alwaysSaveMarkedOriginal = AlwaysSaveMarkedOriginal;
            this.lemmaFilter = _lemmaFilter;
        }
		override public System.Object Clone()
		{
            TypeAttribute impl = new TypeAttribute();
            impl.type = type;
            return impl;
		}
 public TokenListStream(ICollection<Token> tokens)
 {
     _tokens = tokens;
     _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute));
     _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute));
     _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute));
     _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute));
     _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute));
     _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute));
 }
예제 #14
0
        /// <summary>
        /// Constructs a ShingleFilter with the specified single size from the TokenStream
        /// </summary>
        /// <param name="input">input token stream</param>
        /// <param name="maxShingleSize">maximum shingle size produced by the filter.</param>
        public ShingleFilter(TokenStream input, int maxShingleSize) : base(input)
        {
            SetMaxShingleSize(maxShingleSize);

            // ReSharper disable DoNotCallOverridableMethodsInConstructor
            _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute));
            _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute));
            _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute));
            _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute));
            // ReSharper restore DoNotCallOverridableMethodsInConstructor
        }
 public MetaphoneReplacementFilter(TokenStream input)
     : base(input)
 {
     termAttr = AddAttribute(typeof (TermAttribute)) as TermAttribute;
     typeAttr = AddAttribute(typeof (TypeAttribute)) as TypeAttribute;
 }
예제 #16
0
        public override void  CopyTo(AttributeImpl target)
        {
            TypeAttribute t = (TypeAttribute)target;

            t.SetType(type);
        }