public override bool Equals(object other)
        {
            if (other == this)
            {
                return(true);
            }

            if (other is ITermAttribute)
            {
                InitTermBuffer();
                TermAttribute o = ((TermAttribute)other);
                o.InitTermBuffer();

                if (termLength != o.termLength)
                {
                    return(false);
                }
                for (int i = 0; i < termLength; i++)
                {
                    if (termBuffer[i] != o.termBuffer[i])
                    {
                        return(false);
                    }
                }
                return(true);
            }

            return(false);
        }
        public override void  CopyTo(AttributeImpl target)
        {
            InitTermBuffer();
            TermAttribute t = (TermAttribute)target;

            t.SetTermBuffer(termBuffer, 0, termLength);
        }
Example #3
0
 public AddSuffixFilter(TokenStream input, Dictionary<string, char[]> _suffixByTokenType)
     : base(input)
 {
     termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
     typeAtt = (TypeAttribute)AddAttribute(typeof(TypeAttribute));
     this.suffixByTokenType = _suffixByTokenType;
 }
Example #4
0
 public SingleCharTokenizer(TokenStream input): base(input)
 {
     _input = input;
     _termAttribute = (TermAttribute)AddAttribute(typeof(TermAttribute));
     _offsetAttribute = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
     _positionIncrementAttribute = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute));
 }
Example #5
0
 public SynonymFilter(TokenStream input, ISynonymEngine engine)
     : base(input)
 {
     synonymStack = new Stack<string>();
     this.engine = engine;
     this.termAttr = AddAttribute(typeof(TermAttribute)) as TermAttribute;
     this.posIncrAttr = AddAttribute(typeof(PositionIncrementAttribute)) as PositionIncrementAttribute;
 }
        /// <summary>
        ///   Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using
        ///   affix rules in the provided HunspellDictionary.
        /// </summary>
        /// <param name="input">TokenStream whose tokens will be stemmed.</param>
        /// <param name="dictionary">HunspellDictionary containing the affix rules and words that will be used to stem the tokens.</param>
        /// <param name="dedup">true if only unique terms should be output.</param>
        public HunspellStemFilter(TokenStream input, HunspellDictionary dictionary, Boolean dedup = true)
            : base(input) {
            _posIncAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute));
            _termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));

            _dedup = dedup;
            _stemmer = new HunspellStemmer(dictionary);
        }
 public HyphenationTokenFilter(TokenStream input, Hyphenator hyphenator)
     : base(input)
 {
     _hyphenator = hyphenator;
     _termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
     _typeAtt = (TypeAttribute)AddAttribute(typeof(TypeAttribute));
     _ofsAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
 }
    public SynonymFilter(TokenStream in_Renamed, ISynonymEngine engine)
        : base(in_Renamed)
    {
        synonymStack = new Stack<string>();
            this.engine = engine;

            termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
            posIncrAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute));
    }
Example #9
0
 public AnsjTokenizer(AbstractAnalysis analysis, TextReader input, HashSet<string> filter, bool pstemming)
     : base(input)
 {
     _analysis = analysis;
     _termAtt = AddAttribute<TermAttribute>();
     _offsetAtt = AddAttribute<OffsetAttribute>();
     _positionAttr = AddAttribute<PositionIncrementAttribute>();
     _filter = filter;
     _pstemming = pstemming;
 }
Example #10
0
        public virtual void  TestToString()
        {
            char[]        b = new char[] { 'a', 'l', 'o', 'h', 'a' };
            TermAttribute t = new TermAttribute();

            t.SetTermBuffer(b, 0, 5);
            Assert.AreEqual("term=aloha", t.ToString());

            t.SetTermBuffer("hi there");
            Assert.AreEqual("term=hi there", t.ToString());
        }
Example #11
0
        public override object Clone()
        {
            TermAttribute t = (TermAttribute)base.Clone();

            // Do a deep clone
            if (termBuffer != null)
            {
                t.termBuffer = new char[termBuffer.Length];
                termBuffer.CopyTo(t.termBuffer, 0);
            }
            return(t);
        }
 public virtual void  TestResize()
 {
     TermAttribute t = new TermAttribute();
     char[] content = "hello".ToCharArray();
     t.SetTermBuffer(content, 0, content.Length);
     for (int i = 0; i < 2000; i++)
     {
         t.ResizeTermBuffer(i);
         Assert.IsTrue(i <= t.TermBuffer().Length);
         Assert.AreEqual("hello", t.Term);
     }
 }
Example #13
0
        public SynonymFilter (TokenStream input, SynonymEngine engine) : base(input) {
            if (engine == null)
                throw new ArgumentNullException("synonymEngine");
            synonymStack = new Stack<string>();
            this.engine = engine;

            this.termAtt = (TermAttribute)AddAttribute<ITermAttribute>();
            this.posIncrAtt = (PositionIncrementAttribute)AddAttribute<IPositionIncrementAttribute>();

            //this.termAtt = this.AddAttribute<string>();
            //this.posIncrAtt = this.AddAttribute<string>();
        }
Example #14
0
        public virtual void  TestClone()
        {
            TermAttribute t = new TermAttribute();

            char[] content = "hello".ToCharArray();
            t.SetTermBuffer(content, 0, 5);
            char[]        buf  = t.TermBuffer();
            TermAttribute copy = (TermAttribute)TestSimpleAttributeImpls.AssertCloneIsEqual(t);

            Assert.AreEqual(t.Term, copy.Term);
            Assert.AreNotSame(buf, copy.TermBuffer());
        }
Example #15
0
        public virtual void  TestResize()
        {
            TermAttribute t = new TermAttribute();

            char[] content = "hello".ToCharArray();
            t.SetTermBuffer(content, 0, content.Length);
            for (int i = 0; i < 2000; i++)
            {
                t.ResizeTermBuffer(i);
                Assert.IsTrue(i <= t.TermBuffer().Length);
                Assert.AreEqual("hello", t.Term);
            }
        }
        public IntMetaDataTokenStream(string tokenText)
        {
            _tokenText = tokenText;

            // NOTE: Calling the AddAttribute<T> method failed, so 
            // switched to using AddAttributeImpl.
            _termAttribute = new TermAttribute();
            _offsetAttribute = new OffsetAttribute();
            _payloadAtt = new PayloadAttribute();
            base.AddAttributeImpl(_termAttribute);
            base.AddAttributeImpl(_offsetAttribute);
            base.AddAttributeImpl(_payloadAtt);
        }
Example #17
0
        public virtual void  TestEquals()
        {
            TermAttribute t1a = new TermAttribute();

            char[] content1a = "hello".ToCharArray();
            t1a.SetTermBuffer(content1a, 0, 5);
            TermAttribute t1b = new TermAttribute();

            char[] content1b = "hello".ToCharArray();
            t1b.SetTermBuffer(content1b, 0, 5);
            TermAttribute t2 = new TermAttribute();

            char[] content2 = "hello2".ToCharArray();
            t2.SetTermBuffer(content2, 0, 6);
            Assert.IsTrue(t1a.Equals(t1b));
            Assert.IsFalse(t1a.Equals(t2));
            Assert.IsFalse(t2.Equals(t1b));
        }
Example #18
0
			/**
			 * Creates NGramTokenFilter with given min and max n-grams.
			 * <param name="input"><see cref="TokenStream"/> holding the input to be tokenized</param>
			 * <param name="minGram">the smallest n-gram to generate</param>
			 * <param name="maxGram">the largest n-gram to generate</param>
			 */
			public NGramTokenFilter(TokenStream input, int minGram, int maxGram)
				: base(input)
			{

				if (minGram < 1)
				{
					throw new System.ArgumentException("minGram must be greater than zero");
				}
				if (minGram > maxGram)
				{
					throw new System.ArgumentException("minGram must not be greater than maxGram");
				}
				this.minGram = minGram;
				this.maxGram = maxGram;

				this.termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
				this.offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
			}
Example #19
0
        public virtual void  TestMixedStringArray()
        {
            TermAttribute t = new TermAttribute();

            t.SetTermBuffer("hello");
            Assert.AreEqual(t.TermLength(), 5);
            Assert.AreEqual(t.Term, "hello");
            t.SetTermBuffer("hello2");
            Assert.AreEqual(t.TermLength(), 6);
            Assert.AreEqual(t.Term, "hello2");
            t.SetTermBuffer("hello3".ToCharArray(), 0, 6);
            Assert.AreEqual(t.Term, "hello3");

            // Make sure if we get the buffer and change a character
            // that term() reflects the change
            char[] buffer = t.TermBuffer();
            buffer[1] = 'o';
            Assert.AreEqual(t.Term, "hollo3");
        }
        public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix)
        {
            Suffix = suffix;
            Prefix = prefix;
            _prefixExhausted = false;

            // ReSharper disable DoNotCallOverridableMethodsInConstructor
            _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute));
            _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute));
            _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute));
            _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute));
            _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute));
            _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute));
            // ReSharper restore DoNotCallOverridableMethodsInConstructor

            _pTermAtt = (TermAttribute) prefix.AddAttribute(typeof (TermAttribute));
            _pPosIncrAtt = (PositionIncrementAttribute) prefix.AddAttribute(typeof (PositionIncrementAttribute));
            _pPayloadAtt = (PayloadAttribute) prefix.AddAttribute(typeof (PayloadAttribute));
            _pOffsetAtt = (OffsetAttribute) prefix.AddAttribute(typeof (OffsetAttribute));
            _pTypeAtt = (TypeAttribute) prefix.AddAttribute(typeof (TypeAttribute));
            _pFlagsAtt = (FlagsAttribute) prefix.AddAttribute(typeof (FlagsAttribute));
        }
        /**
         * Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
         *
         * @param input {@link TokenStream} holding the input to be tokenized
         * @param side the {@link Side} from which to chop off an n-gram
         * @param minGram the smallest n-gram to generate
         * @param maxGram the largest n-gram to generate
         */
        public EdgeNGramTokenFilter(TokenStream input, Side side, int minGram, int maxGram)
            : base(input)
        {
            if (side == null)
            {
                throw new System.ArgumentException("sideLabel must be either front or back");
            }

            if (minGram < 1)
            {
                throw new System.ArgumentException("minGram must be greater than zero");
            }

            if (minGram > maxGram)
            {
                throw new System.ArgumentException("minGram must not be greater than maxGram");
            }

            this.minGram = minGram;
            this.maxGram = maxGram;
            this.side = side;
            this.termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
            this.offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
        }
 public ArabicStemFilter(TokenStream input) : base(input)
 {
     stemmer = new ArabicStemmer();
     termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
 }
Example #23
0
        private string[] GetAnalyzedText(string field, string text)
        {
            //return TextSplitter.SplitText(field, text, _analyzers);
            var reader = new StringReader(text);
            var tokenStream = _masterAnalyzer.TokenStream(field, reader);
            _termAtt = (TermAttribute)tokenStream.AddAttribute(typeof(TermAttribute));

            var tokens = new List<string>();
            var words = new List<string>();
            while (tokenStream.IncrementToken())
            {
                tokens.Add(_termAtt.ToString());
                words.Add(_termAtt.Term());
            }
            return words.ToArray();
        }
 public PersianLemmatizationFilter(TokenStream input)
     : base(input)
 {
     lemmatizer = new PersianLemmatizer();
     _termAtt = (TermAttribute)input.AddAttribute<ITermAttribute>();
 }
        /// <summary>
        /// Constructs a ShingleFilter with the specified single size from the TokenStream
        /// </summary>
        /// <param name="input">input token stream</param>
        /// <param name="maxShingleSize">maximum shingle size produced by the filter.</param>
        public ShingleFilter(TokenStream input, int maxShingleSize) : base(input)
        {
            SetMaxShingleSize(maxShingleSize);

            // ReSharper disable DoNotCallOverridableMethodsInConstructor
            _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute));
            _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute));
            _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute));
            _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute));
            // ReSharper restore DoNotCallOverridableMethodsInConstructor
        }
Example #26
0
        public virtual void  TestGrow()
        {
            TermAttribute t = new TermAttribute();

            System.Text.StringBuilder buf = new System.Text.StringBuilder("ab");
            for (int i = 0; i < 20; i++)
            {
                char[] content = buf.ToString().ToCharArray();
                t.SetTermBuffer(content, 0, content.Length);
                Assert.AreEqual(buf.Length, t.TermLength());
                Assert.AreEqual(buf.ToString(), t.Term);
                buf.Append(buf.ToString());
            }
            Assert.AreEqual(1048576, t.TermLength());
            Assert.AreEqual(1048576, t.TermBuffer().Length);

            // now as a string, first variant
            t   = new TermAttribute();
            buf = new System.Text.StringBuilder("ab");
            for (int i = 0; i < 20; i++)
            {
                System.String content = buf.ToString();
                t.SetTermBuffer(content, 0, content.Length);
                Assert.AreEqual(content.Length, t.TermLength());
                Assert.AreEqual(content, t.Term);
                buf.Append(content);
            }
            Assert.AreEqual(1048576, t.TermLength());
            Assert.AreEqual(1048576, t.TermBuffer().Length);

            // now as a string, second variant
            t   = new TermAttribute();
            buf = new System.Text.StringBuilder("ab");
            for (int i = 0; i < 20; i++)
            {
                System.String content = buf.ToString();
                t.SetTermBuffer(content);
                Assert.AreEqual(content.Length, t.TermLength());
                Assert.AreEqual(content, t.Term);
                buf.Append(content);
            }
            Assert.AreEqual(1048576, t.TermLength());
            Assert.AreEqual(1048576, t.TermBuffer().Length);

            // Test for slow growth to a long term
            t   = new TermAttribute();
            buf = new System.Text.StringBuilder("a");
            for (int i = 0; i < 20000; i++)
            {
                System.String content = buf.ToString();
                t.SetTermBuffer(content);
                Assert.AreEqual(content.Length, t.TermLength());
                Assert.AreEqual(content, t.Term);
                buf.Append("a");
            }
            Assert.AreEqual(20000, t.TermLength());
            Assert.AreEqual(32768, t.TermBuffer().Length);

            // Test for slow growth to a long term
            t   = new TermAttribute();
            buf = new System.Text.StringBuilder("a");
            for (int i = 0; i < 20000; i++)
            {
                System.String content = buf.ToString();
                t.SetTermBuffer(content);
                Assert.AreEqual(content.Length, t.TermLength());
                Assert.AreEqual(content, t.Term);
                buf.Append("a");
            }
            Assert.AreEqual(20000, t.TermLength());
            Assert.AreEqual(32768, t.TermBuffer().Length);
        }
 public virtual void  TestMixedStringArray()
 {
     TermAttribute t = new TermAttribute();
     t.SetTermBuffer("hello");
     Assert.AreEqual(t.TermLength(), 5);
     Assert.AreEqual(t.Term, "hello");
     t.SetTermBuffer("hello2");
     Assert.AreEqual(t.TermLength(), 6);
     Assert.AreEqual(t.Term, "hello2");
     t.SetTermBuffer("hello3".ToCharArray(), 0, 6);
     Assert.AreEqual(t.Term, "hello3");
     
     // Make sure if we get the buffer and change a character
     // that term() reflects the change
     char[] buffer = t.TermBuffer();
     buffer[1] = 'o';
     Assert.AreEqual(t.Term, "hollo3");
 }
 public virtual void  TestToString()
 {
     char[] b = new char[]{'a', 'l', 'o', 'h', 'a'};
     TermAttribute t = new TermAttribute();
     t.SetTermBuffer(b, 0, 5);
     Assert.AreEqual("term=aloha", t.ToString());
     
     t.SetTermBuffer("hi there");
     Assert.AreEqual("term=hi there", t.ToString());
 }
 public virtual void  TestCopyTo()
 {
     TermAttribute t = new TermAttribute();
     TermAttribute copy = (TermAttribute) TestSimpleAttributeImpls.AssertCopyIsEqual(t);
     Assert.AreEqual("", t.Term);
     Assert.AreEqual("", copy.Term);
     
     t = new TermAttribute();
     char[] content = "hello".ToCharArray();
     t.SetTermBuffer(content, 0, 5);
     char[] buf = t.TermBuffer();
     copy = (TermAttribute) TestSimpleAttributeImpls.AssertCopyIsEqual(t);
     Assert.AreEqual(t.Term, copy.Term);
     Assert.AreNotSame(buf, copy.TermBuffer());
 }
 public virtual void  TestEquals()
 {
     TermAttribute t1a = new TermAttribute();
     char[] content1a = "hello".ToCharArray();
     t1a.SetTermBuffer(content1a, 0, 5);
     TermAttribute t1b = new TermAttribute();
     char[] content1b = "hello".ToCharArray();
     t1b.SetTermBuffer(content1b, 0, 5);
     TermAttribute t2 = new TermAttribute();
     char[] content2 = "hello2".ToCharArray();
     t2.SetTermBuffer(content2, 0, 6);
     Assert.IsTrue(t1a.Equals(t1b));
     Assert.IsFalse(t1a.Equals(t2));
     Assert.IsFalse(t2.Equals(t1b));
 }
 public virtual void  TestGrow()
 {
     TermAttribute t = new TermAttribute();
     System.Text.StringBuilder buf = new System.Text.StringBuilder("ab");
     for (int i = 0; i < 20; i++)
     {
         char[] content = buf.ToString().ToCharArray();
         t.SetTermBuffer(content, 0, content.Length);
         Assert.AreEqual(buf.Length, t.TermLength());
         Assert.AreEqual(buf.ToString(), t.Term);
         buf.Append(buf.ToString());
     }
     Assert.AreEqual(1048576, t.TermLength());
     Assert.AreEqual(1179654, t.TermBuffer().Length);
     
     // now as a string, first variant
     t = new TermAttribute();
     buf = new System.Text.StringBuilder("ab");
     for (int i = 0; i < 20; i++)
     {
         System.String content = buf.ToString();
         t.SetTermBuffer(content, 0, content.Length);
         Assert.AreEqual(content.Length, t.TermLength());
         Assert.AreEqual(content, t.Term);
         buf.Append(content);
     }
     Assert.AreEqual(1048576, t.TermLength());
     Assert.AreEqual(1179654, t.TermBuffer().Length);
     
     // now as a string, second variant
     t = new TermAttribute();
     buf = new System.Text.StringBuilder("ab");
     for (int i = 0; i < 20; i++)
     {
         System.String content = buf.ToString();
         t.SetTermBuffer(content);
         Assert.AreEqual(content.Length, t.TermLength());
         Assert.AreEqual(content, t.Term);
         buf.Append(content);
     }
     Assert.AreEqual(1048576, t.TermLength());
     Assert.AreEqual(1179654, t.TermBuffer().Length);
     
     // Test for slow growth to a long term
     t = new TermAttribute();
     buf = new System.Text.StringBuilder("a");
     for (int i = 0; i < 20000; i++)
     {
         System.String content = buf.ToString();
         t.SetTermBuffer(content);
         Assert.AreEqual(content.Length, t.TermLength());
         Assert.AreEqual(content, t.Term);
         buf.Append("a");
     }
     Assert.AreEqual(20000, t.TermLength());
     Assert.AreEqual(20167, t.TermBuffer().Length);
     
     // Test for slow growth to a long term
     t = new TermAttribute();
     buf = new System.Text.StringBuilder("a");
     for (int i = 0; i < 20000; i++)
     {
         System.String content = buf.ToString();
         t.SetTermBuffer(content);
         Assert.AreEqual(content.Length, t.TermLength());
         Assert.AreEqual(content, t.Term);
         buf.Append("a");
     }
     Assert.AreEqual(20000, t.TermLength());
     Assert.AreEqual(20167, t.TermBuffer().Length);
 }
Example #32
0
        private void Init(System.IO.TextReader input, HebMorph.StreamLemmatizer _lemmatizer,
            HebMorph.LemmaFilters.LemmaFilterBase _lemmaFilter, bool AlwaysSaveMarkedOriginal)
        {
            termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
            offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
            posIncrAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute));
            typeAtt = (TypeAttribute)AddAttribute(typeof(TypeAttribute));
            //payAtt = (PayloadAttribute)AddAttribute(typeof(PayloadAttribute));

            this._streamLemmatizer = _lemmatizer;
            this._streamLemmatizer.SetStream(input);
            this.alwaysSaveMarkedOriginal = AlwaysSaveMarkedOriginal;
            this.lemmaFilter = _lemmaFilter;
        }
Example #33
0
 public CollationKeyFilter(TokenStream input, CultureInfo cultureInfo) : base(input)
 {
     this.cultureInfo = cultureInfo;
     termAtt = (TermAttribute) base.AddAttribute(typeof (TermAttribute));
 }
        /// <summary>
        /// Creates a shingle filter based on a user defined matrix.
        /// 
        /// The filter /will/ delete columns from the input matrix! You will not be able to reset the filter if you used this constructor.
        /// todo: don't touch the matrix! use a bool, set the input stream to null or something, and keep track of where in the matrix we are at.
        /// 
        /// </summary>
        /// <param name="matrix">the input based for creating shingles. Does not need to contain any information until ShingleMatrixFilter.Next(Token) is called the first time.</param>
        /// <param name="minimumShingleSize">minimum number of tokens in any shingle.</param>
        /// <param name="maximumShingleSize">maximum number of tokens in any shingle.</param>
        /// <param name="spacerCharacter">character to use between texts of the token parts in a shingle. null for none.</param>
        /// <param name="ignoringSinglePrefixOrSuffixShingle">if true, shingles that only contains permutation of the first of the last column will not be produced as shingles. Useful when adding boundary marker tokens such as '^' and '$'.</param>
        /// <param name="settingsCodec">codec used to read input token weight and matrix positioning.</param>
        public ShingleMatrixFilter(Matrix.Matrix matrix, int minimumShingleSize, int maximumShingleSize, Char spacerCharacter, bool ignoringSinglePrefixOrSuffixShingle, TokenSettingsCodec settingsCodec)
        {
            Matrix = matrix;
            MinimumShingleSize = minimumShingleSize;
            MaximumShingleSize = maximumShingleSize;
            SpacerCharacter = spacerCharacter;
            IsIgnoringSinglePrefixOrSuffixShingle = ignoringSinglePrefixOrSuffixShingle;
            _settingsCodec = settingsCodec;

            // ReSharper disable DoNotCallOverridableMethodsInConstructor
            _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute));
            _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute));
            _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute));
            _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute));
            _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute));
            _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute));
            // ReSharper restore DoNotCallOverridableMethodsInConstructor

            // set the input to be an empty token stream, we already have the data.
            _input = new EmptyTokenStream();

            _inTermAtt = (TermAttribute) _input.AddAttribute(typeof (TermAttribute));
            _inPosIncrAtt = (PositionIncrementAttribute) _input.AddAttribute(typeof (PositionIncrementAttribute));
            _inPayloadAtt = (PayloadAttribute) _input.AddAttribute(typeof (PayloadAttribute));
            _inOffsetAtt = (OffsetAttribute) _input.AddAttribute(typeof (OffsetAttribute));
            _inTypeAtt = (TypeAttribute) _input.AddAttribute(typeof (TypeAttribute));
            _inFlagsAtt = (FlagsAttribute) _input.AddAttribute(typeof (FlagsAttribute));
        }
 public TokenListStream(ICollection<Token> tokens)
 {
     _tokens = tokens;
     _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute));
     _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute));
     _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute));
     _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute));
     _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute));
     _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute));
 }
 protected EdgeNGramTokenFilter(TokenStream input)
     : base(input)
 {
     this.termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
     this.offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
 }
Example #37
0
 public PersianStemFilter(TokenStream input)
     : base(input)
 {
     _stemmer = new PersianStemmer();
     _termAttr = AddAttribute<TermAttribute>();
 }
        /// <summary>
        /// Creates a shingle filter with ad hoc parameter settings.
        /// </summary>
        /// <param name="input">stream from which to construct the matrix</param>
        /// <param name="minimumShingleSize">minimum number of tokens in any shingle.</param>
        /// <param name="maximumShingleSize">maximum number of tokens in any shingle.</param>
        /// <param name="spacerCharacter">character to use between texts of the token parts in a shingle. null for none.</param>
        /// <param name="ignoringSinglePrefixOrSuffixShingle">if true, shingles that only contains permutation of the first of the last column will not be produced as shingles. Useful when adding boundary marker tokens such as '^' and '$'.</param>
        /// <param name="settingsCodec">codec used to read input token weight and matrix positioning.</param>
        public ShingleMatrixFilter(TokenStream input, int minimumShingleSize, int maximumShingleSize, Char? spacerCharacter, bool ignoringSinglePrefixOrSuffixShingle, TokenSettingsCodec settingsCodec)
        {
            _input = input;
            MinimumShingleSize = minimumShingleSize;
            MaximumShingleSize = maximumShingleSize;
            SpacerCharacter = spacerCharacter;
            IsIgnoringSinglePrefixOrSuffixShingle = ignoringSinglePrefixOrSuffixShingle;
            _settingsCodec = settingsCodec;

            // ReSharper disable DoNotCallOverridableMethodsInConstructor
            _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute));
            _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute));
            _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute));
            _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute));
            _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute));
            _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute));
            // ReSharper restore DoNotCallOverridableMethodsInConstructor

            _inTermAtt = (TermAttribute) input.AddAttribute(typeof (TermAttribute));
            _inPosIncrAtt = (PositionIncrementAttribute) input.AddAttribute(typeof (PositionIncrementAttribute));
            _inPayloadAtt = (PayloadAttribute) input.AddAttribute(typeof (PayloadAttribute));
            _inOffsetAtt = (OffsetAttribute) input.AddAttribute(typeof (OffsetAttribute));
            _inTypeAtt = (TypeAttribute) input.AddAttribute(typeof (TypeAttribute));
            _inFlagsAtt = (FlagsAttribute) input.AddAttribute(typeof (FlagsAttribute));
        }
 public PersianNormalizationFilter(TokenStream input)
     : base(input)
 {
     _normalizer = new PersianNormalizer();
     _termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
 }