/// <summary>
 /// Create a new IndonesianStemFilter.
 /// <para>
 /// If <code>stemDerivational</code> is false, 
 /// only inflectional suffixes (particles and possessive pronouns) are stemmed.
 /// </para>
 /// </summary>
 public IndonesianStemFilter(TokenStream input, bool stemDerivational)
       : base(input)
 {
     this.stemDerivational = stemDerivational;
     termAtt = AddAttribute<ICharTermAttribute>();
     keywordAtt = AddAttribute<IKeywordAttribute>();
 }
Пример #2
0
 /// <summary>
 /// Creates a new NorwegianLightStemFilter </summary>
 /// <param name="flags"> set to <seealso cref="NorwegianLightStemmer#BOKMAAL"/>,
 ///                     <seealso cref="NorwegianLightStemmer#NYNORSK"/>, or both. </param>
 public NorwegianMinimalStemFilter(TokenStream input, int flags)
     : base(input)
 {
     this.stemmer = new NorwegianMinimalStemmer(flags);
     termAtt      = AddAttribute <ICharTermAttribute>();
     keywordAttr  = AddAttribute <IKeywordAttribute>();
 }
Пример #3
0
 /// <summary>
 /// Create a new <see cref="IndonesianStemFilter"/>.
 /// <para>
 /// If <paramref name="stemDerivational"/> is false,
 /// only inflectional suffixes (particles and possessive pronouns) are stemmed.
 /// </para>
 /// </summary>
 public IndonesianStemFilter(TokenStream input, bool stemDerivational)
     : base(input)
 {
     this.stemDerivational = stemDerivational;
     termAtt    = AddAttribute <ICharTermAttribute>();
     keywordAtt = AddAttribute <IKeywordAttribute>();
 }
Пример #4
0
 public SnowballFilter(TokenStream input, SnowballProgram stemmer)
       : base(input)
 {
     this.stemmer = stemmer;
     this.termAtt = AddAttribute<ICharTermAttribute>();
     this.keywordAttr = AddAttribute<IKeywordAttribute>();
 }
Пример #5
0
 public SnowballFilter(TokenStream input, SnowballProgram stemmer)
     : base(input)
 {
     this.stemmer     = stemmer;
     this.termAtt     = AddAttribute <ICharTermAttribute>();
     this.keywordAttr = AddAttribute <IKeywordAttribute>();
 }
 public JapaneseKatakanaStemFilter(TokenStream input, int minimumLength)
     : base(input)
 {
     this.minimumKatakanaLength = minimumLength;
     this.termAttr    = AddAttribute <ICharTermAttribute>();
     this.keywordAttr = AddAttribute <IKeywordAttribute>();
 }
Пример #7
0
 /// <summary>
 /// Creates a new <see cref="NorwegianLightStemFilter"/> </summary>
 /// <param name="input"> the source <see cref="TokenStream"/> to filter </param>
 /// <param name="flags"> set to <see cref="NorwegianStandard.BOKMAAL"/>,
 ///                     <see cref="NorwegianStandard.NYNORSK"/>, or both. </param>
 public NorwegianLightStemFilter(TokenStream input, NorwegianStandard flags)
     : base(input)
 {
     stemmer     = new NorwegianLightStemmer(flags);
     termAtt     = AddAttribute <ICharTermAttribute>();
     keywordAttr = AddAttribute <IKeywordAttribute>();
 }
 /// <summary>
 /// Creates a new NorwegianLightStemFilter </summary>
 /// <param name="flags"> set to <seealso cref="NorwegianLightStemmer#BOKMAAL"/>, 
 ///                     <seealso cref="NorwegianLightStemmer#NYNORSK"/>, or both. </param>
 public NorwegianMinimalStemFilter(TokenStream input, int flags)
       : base(input)
 {
     this.stemmer = new NorwegianMinimalStemmer(flags);
     termAtt = AddAttribute<ICharTermAttribute>();
     keywordAttr = AddAttribute<IKeywordAttribute>();
 }
Пример #9
0
 public JapaneseBaseFormFilter(TokenStream input)
     : base(input)
 {
     this.termAtt      = AddAttribute <ICharTermAttribute>();
     this.basicFormAtt = AddAttribute <IBaseFormAttribute>();
     this.keywordAtt   = AddAttribute <IKeywordAttribute>();
 }
Пример #10
0
 /// <summary>
 /// Create a new <see cref="StemmerOverrideFilter"/>, performing dictionary-based stemming
 /// with the provided dictionary (<paramref name="stemmerOverrideMap"/>).
 /// <para>
 /// Any dictionary-stemmed terms will be marked with <see cref="KeywordAttribute"/>
 /// so that they will not be stemmed with stemmers down the chain.
 /// </para>
 /// </summary>
 public StemmerOverrideFilter(TokenStream input, StemmerOverrideMap stemmerOverrideMap)
     : base(input)
 {
     this.stemmerOverrideMap = stemmerOverrideMap;
     fstReader  = stemmerOverrideMap.GetBytesReader();
     termAtt    = AddAttribute <ICharTermAttribute>();
     keywordAtt = AddAttribute <IKeywordAttribute>();
 }
Пример #11
0
 /// <summary>
 /// Create filter using the supplied stemming table.
 /// </summary>
 /// <param name="in">input token stream</param>
 /// <param name="stemmer">stemmer</param>
 /// <param name="minLength">For performance reasons words shorter than minLength
 /// characters are not processed, but simply returned.</param>
 public StempelFilter(TokenStream @in, StempelStemmer stemmer, int minLength)
     : base(@in)
 {
     this.stemmer    = stemmer;
     this.minLength  = minLength;
     this.termAtt    = AddAttribute <ICharTermAttribute>();
     this.keywordAtt = AddAttribute <IKeywordAttribute>();
 }
 /// <summary>
 /// Create a new StemmerOverrideFilter, performing dictionary-based stemming
 /// with the provided <code>dictionary</code>.
 /// <para>
 /// Any dictionary-stemmed terms will be marked with <seealso cref="KeywordAttribute"/>
 /// so that they will not be stemmed with stemmers down the chain.
 /// </para>
 /// </summary>
 public StemmerOverrideFilter(TokenStream input, StemmerOverrideMap stemmerOverrideMap)
       : base(input)
 {
     this.stemmerOverrideMap = stemmerOverrideMap;
     fstReader = stemmerOverrideMap.BytesReader;
     termAtt = AddAttribute<ICharTermAttribute>();
     keywordAtt = AddAttribute<IKeywordAttribute>();
 }
Пример #13
0
 /// <summary>
 /// Create filter using the supplied stemming table.
 /// </summary>
 /// <param name="in">input token stream</param>
 /// <param name="stemmer">stemmer</param>
 /// <param name="minLength">For performance reasons words shorter than minLength 
 /// characters are not processed, but simply returned.</param>
 public StempelFilter(TokenStream @in, StempelStemmer stemmer, int minLength)
     : base(@in)
 {
     this.stemmer = stemmer;
     this.minLength = minLength;
     this.termAtt = AddAttribute<ICharTermAttribute>();
     this.keywordAtt = AddAttribute<IKeywordAttribute>();
 }
Пример #14
0
 /// <summary>
 /// Sole constructor. </summary>
 public SuggestStopFilter(TokenStream input, CharArraySet stopWords)
     : base(input)
 {
     this.stopWords = stopWords;
     this.termAtt = AddAttribute<ICharTermAttribute>();
     this.posIncAtt = AddAttribute<IPositionIncrementAttribute>();
     this.keywordAtt = AddAttribute<IKeywordAttribute>();
     this.offsetAtt = AddAttribute<IOffsetAttribute>();
 }
Пример #15
0
        private int lemmaNum = 0;                   // lemma counter

        public OpenNLPLemmatizerFilter(TokenStream input, NLPLemmatizerOp lemmatizerOp)
            : base(input)
        {
            this.lemmatizerOp = lemmatizerOp;
            this.termAtt      = AddAttribute <ICharTermAttribute>();
            this.typeAtt      = AddAttribute <ITypeAttribute>();
            this.keywordAtt   = AddAttribute <IKeywordAttribute>();
            this.flagsAtt     = AddAttribute <IFlagsAttribute>();
        }
Пример #16
0
 /// <summary>
 /// Sole constructor. </summary>
 public SuggestStopFilter(TokenStream input, CharArraySet stopWords)
     : base(input)
 {
     this.stopWords  = stopWords;
     this.termAtt    = AddAttribute <ICharTermAttribute>();
     this.posIncAtt  = AddAttribute <IPositionIncrementAttribute>();
     this.keywordAtt = AddAttribute <IKeywordAttribute>();
     this.offsetAtt  = AddAttribute <IOffsetAttribute>();
 }
Пример #17
0
 public TruncateTokenFilter(TokenStream input, int length) : base(input)
 {
     if (length < 1)
     {
         throw new System.ArgumentOutOfRangeException("length parameter must be a positive number: " + length);
     }
     this.length        = length;
     this.termAttribute = AddAttribute <ICharTermAttribute>();
     this.keywordAttr   = AddAttribute <IKeywordAttribute>();
 }
 public TruncateTokenFilter(TokenStream input, int length) : base(input)
 {
     if (length < 1)
     {
         throw new System.ArgumentOutOfRangeException("length parameter must be a positive number: " + length);
     }
     this.length = length;
     this.termAttribute = AddAttribute<ICharTermAttribute>();
     this.keywordAttr = AddAttribute<IKeywordAttribute>();
 }
Пример #19
0
 /// <summary>
 /// Creates a new HunspellStemFilter that will stem tokens from the given <see cref="TokenStream"/> using affix rules in the provided
 /// Dictionary
 /// </summary>
 /// <param name="input"> <see cref="TokenStream"/> whose tokens will be stemmed </param>
 /// <param name="dictionary"> Hunspell <see cref="Dictionary"/> containing the affix rules and words that will be used to stem the tokens </param>
 /// <param name="dedup"> remove duplicates </param>
 /// <param name="longestOnly"> true if only the longest term should be output. </param>
 public HunspellStemFilter(TokenStream input, Dictionary dictionary, bool dedup, bool longestOnly)
     : base(input)
 {
     this.dedup       = dedup && longestOnly == false; // don't waste time deduping if longestOnly is set
     this.stemmer     = new Stemmer(dictionary);
     this.longestOnly = longestOnly;
     termAtt          = AddAttribute <ICharTermAttribute>();
     posIncAtt        = AddAttribute <IPositionIncrementAttribute>();
     keywordAtt       = AddAttribute <IKeywordAttribute>();
 }
Пример #20
0
 public TruncateTokenFilter(TokenStream input, int length)
     : base(input)
 {
     if (length < 1)
     {
         throw new ArgumentOutOfRangeException(nameof(length), "length parameter must be a positive number: " + length); // LUCENENET specific - changed from IllegalArgumentException to ArgumentOutOfRangeException (.NET convention)
     }
     this.length        = length;
     this.termAttribute = AddAttribute <ICharTermAttribute>();
     this.keywordAttr   = AddAttribute <IKeywordAttribute>();
 }
Пример #21
0
        /// <summary>
        /// Creates a filter with a given dictionary.
        /// </summary>
        /// <param name="input">Input token stream.</param>
        /// <param name="dict"><see cref="Dictionary"/> to use for stemming.</param>
        public MorfologikFilter(TokenStream input, Dictionary dict)
            : base(input)
        {
            this.termAtt     = AddAttribute <ICharTermAttribute>();
            this.tagsAtt     = AddAttribute <IMorphosyntacticTagsAttribute>();
            this.posIncrAtt  = AddAttribute <IPositionIncrementAttribute>();
            this.keywordAttr = AddAttribute <IKeywordAttribute>();

            this.input     = input;
            this.stemmer   = new DictionaryLookup(dict);
            this.lemmaList = new List <WordData>();
        }
Пример #22
0
        /// <summary>
        /// Construct the named stemming filter.
        ///
        /// Available stemmers are listed in Lucene.Net.Tartarus.Snowball.Ext.
        /// The name of a stemmer is the part of the class name before "Stemmer",
        /// e.g., the stemmer in <see cref="Tartarus.Snowball.Ext.EnglishStemmer"/> is named "English".
        /// </summary>
        /// <param name="in"> the input tokens to stem </param>
        /// <param name="name"> the name of a stemmer </param>
        public SnowballFilter(TokenStream @in, string name)
            : base(@in)
        {
            try
            {
                string className = typeof(SnowballProgram).Namespace + ".Ext." +
                                   name + "Stemmer, " + this.GetType().GetTypeInfo().Assembly.GetName().Name;
                Type stemClass = Type.GetType(className);

                stemmer = (SnowballProgram)Activator.CreateInstance(stemClass);
            }
            catch (Exception e)
            {
                throw new System.ArgumentException("Invalid stemmer class specified: " + name, e);
            }

            this.termAtt     = AddAttribute <ICharTermAttribute>();
            this.keywordAttr = AddAttribute <IKeywordAttribute>();
        }
Пример #23
0
        public void TestCustomAttribute()
        {
            TokenStream stream = new KeywordTokenizer(new StringReader("D'Angelo"));

            stream = new PatternKeywordMarkerFilter(stream, new Regex(".*"));
            stream = new BeiderMorseFilter(stream, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true));
            IKeywordAttribute keyAtt = stream.AddAttribute <IKeywordAttribute>();

            stream.Reset();
            int i = 0;

            while (stream.IncrementToken())
            {
                assertTrue(keyAtt.IsKeyword);
                i++;
            }
            assertEquals(12, i);
            stream.End();
            stream.Dispose();
        }
Пример #24
0
        /// <summary>
        /// Construct the named stemming filter.
        ///
        /// Available stemmers are listed in <seealso cref="org.tartarus.snowball.ext"/>.
        /// The name of a stemmer is the part of the class name before "Stemmer",
        /// e.g., the stemmer in <seealso cref="org.tartarus.snowball.ext.EnglishStemmer"/> is named "English".
        /// </summary>
        /// <param name="in"> the input tokens to stem </param>
        /// <param name="name"> the name of a stemmer </param>
        public SnowballFilter(TokenStream @in, string name)
            : base(@in)
        {
            try
            {
                // LUCENENET TODO: There should probably be a way to make this an extesibility point so
                // custom extensions can be loaded.
                string className = typeof(SnowballProgram).Namespace + ".Ext." +
                                   name + "Stemmer, " + this.GetType().Assembly.GetName().Name;
                Type stemClass = Type.GetType(className);

                stemmer = (SnowballProgram)Activator.CreateInstance(stemClass);
            }
            catch (Exception e)
            {
                throw new System.ArgumentException("Invalid stemmer class specified: " + name, e);
            }

            this.termAtt     = AddAttribute <ICharTermAttribute>();
            this.keywordAttr = AddAttribute <IKeywordAttribute>();
        }
Пример #25
0
 public DutchStemFilter(TokenStream _in)
     : base(_in)
 {
     termAtt     = AddAttribute <ICharTermAttribute>();
     keywordAttr = AddAttribute <IKeywordAttribute>();
 }
Пример #26
0
        /// <summary>
        /// Construct the named stemming filter.
        /// 
        /// Available stemmers are listed in <seealso cref="org.tartarus.snowball.ext"/>.
        /// The name of a stemmer is the part of the class name before "Stemmer",
        /// e.g., the stemmer in <seealso cref="org.tartarus.snowball.ext.EnglishStemmer"/> is named "English".
        /// </summary>
        /// <param name="in"> the input tokens to stem </param>
        /// <param name="name"> the name of a stemmer </param>
        public SnowballFilter(TokenStream @in, string name)
              : base(@in)
        {
            try
            {
                // LUCENENET TODO: There should probably be a way to make this an extesibility point so
                // custom extensions can be loaded.
                string className = typeof(SnowballProgram).Namespace + ".Ext." +
                    name + "Stemmer, " + this.GetType().Assembly.GetName().Name;
                Type stemClass = Type.GetType(className);

                stemmer = (SnowballProgram)Activator.CreateInstance(stemClass);
            }
            catch (Exception e)
            {
                throw new System.ArgumentException("Invalid stemmer class specified: " + name, e);
            }

            this.termAtt = AddAttribute<ICharTermAttribute>();
            this.keywordAttr = AddAttribute<IKeywordAttribute>();
        }
Пример #27
0
 /// <summary>
 /// Construct a token stream filtering the given input.
 /// </summary>
 public KeywordRepeatFilter(TokenStream input)
     : base(input)
 {
     keywordAttribute = AddAttribute<IKeywordAttribute>();
     posIncAttr = AddAttribute<IPositionIncrementAttribute>();
 }
 public GalicianMinimalStemFilter(TokenStream input)
       : base(input)
 {
     termAtt = AddAttribute<ICharTermAttribute>();
     keywordAttr = AddAttribute<IKeywordAttribute>();
 }
Пример #29
0
 public HindiNormalizationFilter(TokenStream input)
     : base(input)
 {
     termAtt    = AddAttribute <ICharTermAttribute>();
     keywordAtt = AddAttribute <IKeywordAttribute>();
 }
Пример #30
0
 /// <summary>
 /// Creates a new <seealso cref="KeywordMarkerFilter"/> </summary>
 /// <param name="in"> the input stream </param>
 protected internal KeywordMarkerFilter(TokenStream @in)
     : base(@in)
 {
     keywordAttr = AddAttribute<IKeywordAttribute>();
 }
 public HungarianLightStemFilter(TokenStream input)
       : base(input)
 {
     termAtt = AddAttribute<ICharTermAttribute>();
     keywordAttr = AddAttribute<IKeywordAttribute>();
 }
 /// <summary>
 /// Creates a new BrazilianStemFilter 
 /// </summary>
 /// <param name="in"> the source <seealso cref="TokenStream"/>  </param>
 public BrazilianStemFilter(TokenStream @in)
       : base(@in)
 {
     termAtt = AddAttribute<ICharTermAttribute>();
     keywordAttr = AddAttribute<IKeywordAttribute>();
 }
Пример #33
0
        // offsetsAreCorrect also validates:
        //   - graph offsets are correct (all tokens leaving from
        //     pos X have the same startOffset; all tokens
        //     arriving to pos Y have the same endOffset)
        //   - offsets only move forwards (startOffset >=
        //     lastStartOffset)
        public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, int?finalOffset, int?finalPosInc, bool[] keywordAtts, bool offsetsAreCorrect)
        {
            Assert.IsNotNull(output);
            var checkClearAtt = ts.AddAttribute <ICheckClearAttributesAttribute>();

            ICharTermAttribute termAtt = null;

            if (output.Length > 0)
            {
                Assert.IsTrue(ts.HasAttribute <ICharTermAttribute>(), "has no CharTermAttribute");
                termAtt = ts.GetAttribute <ICharTermAttribute>();
            }

            IOffsetAttribute offsetAtt = null;

            if (startOffsets != null || endOffsets != null || finalOffset != null)
            {
                Assert.IsTrue(ts.HasAttribute <IOffsetAttribute>(), "has no OffsetAttribute");
                offsetAtt = ts.GetAttribute <IOffsetAttribute>();
            }

            ITypeAttribute typeAtt = null;

            if (types != null)
            {
                Assert.IsTrue(ts.HasAttribute <ITypeAttribute>(), "has no TypeAttribute");
                typeAtt = ts.GetAttribute <ITypeAttribute>();
            }

            IPositionIncrementAttribute posIncrAtt = null;

            if (posIncrements != null || finalPosInc != null)
            {
                Assert.IsTrue(ts.HasAttribute <IPositionIncrementAttribute>(), "has no PositionIncrementAttribute");
                posIncrAtt = ts.GetAttribute <IPositionIncrementAttribute>();
            }

            IPositionLengthAttribute posLengthAtt = null;

            if (posLengths != null)
            {
                Assert.IsTrue(ts.HasAttribute <IPositionLengthAttribute>(), "has no PositionLengthAttribute");
                posLengthAtt = ts.GetAttribute <IPositionLengthAttribute>();
            }

            IKeywordAttribute keywordAtt = null;

            if (keywordAtts != null)
            {
                Assert.IsTrue(ts.HasAttribute <IKeywordAttribute>(), "has no KeywordAttribute");
                keywordAtt = ts.GetAttribute <IKeywordAttribute>();
            }

            // Maps position to the start/end offset:
            IDictionary <int?, int?> posToStartOffset = new Dictionary <int?, int?>();
            IDictionary <int?, int?> posToEndOffset   = new Dictionary <int?, int?>();

            ts.Reset();
            int pos             = -1;
            int lastStartOffset = 0;

            for (int i = 0; i < output.Length; i++)
            {
                // extra safety to enforce, that the state is not preserved and also assign bogus values
                ts.ClearAttributes();
                termAtt.SetEmpty().Append("bogusTerm");
                if (offsetAtt != null)
                {
                    offsetAtt.SetOffset(14584724, 24683243);
                }
                if (typeAtt != null)
                {
                    typeAtt.Type = "bogusType";
                }
                if (posIncrAtt != null)
                {
                    posIncrAtt.PositionIncrement = 45987657;
                }
                if (posLengthAtt != null)
                {
                    posLengthAtt.PositionLength = 45987653;
                }
                if (keywordAtt != null)
                {
                    keywordAtt.Keyword = (i & 1) == 0;
                }

                bool reset = checkClearAtt.AndResetClearCalled; // reset it, because we called clearAttribute() before
                Assert.IsTrue(ts.IncrementToken(), "token " + i + " does not exist");
                Assert.IsTrue(reset, "ClearAttributes() was not called correctly in TokenStream chain");

                Assert.AreEqual(output[i], termAtt.ToString(), "term " + i + ", output[i] = " + output[i] + ", termAtt = " + termAtt.ToString());
                if (startOffsets != null)
                {
                    Assert.AreEqual(startOffsets[i], offsetAtt.StartOffset(), "startOffset " + i);
                }
                if (endOffsets != null)
                {
                    Assert.AreEqual(endOffsets[i], offsetAtt.EndOffset(), "endOffset " + i);
                }
                if (types != null)
                {
                    Assert.AreEqual(types[i], typeAtt.Type, "type " + i);
                }
                if (posIncrements != null)
                {
                    Assert.AreEqual(posIncrements[i], posIncrAtt.PositionIncrement, "posIncrement " + i);
                }
                if (posLengths != null)
                {
                    Assert.AreEqual(posLengths[i], posLengthAtt.PositionLength, "posLength " + i);
                }
                if (keywordAtts != null)
                {
                    Assert.AreEqual(keywordAtts[i], keywordAtt.Keyword, "keywordAtt " + i);
                }

                // we can enforce some basic things about a few attributes even if the caller doesn't check:
                if (offsetAtt != null)
                {
                    int startOffset = offsetAtt.StartOffset();
                    int endOffset   = offsetAtt.EndOffset();
                    if (finalOffset != null)
                    {
                        Assert.IsTrue(startOffset <= (int)finalOffset, "startOffset must be <= finalOffset");
                        Assert.IsTrue(endOffset <= (int)finalOffset, "endOffset must be <= finalOffset: got endOffset=" + endOffset + " vs finalOffset=" + (int)finalOffset);
                    }

                    if (offsetsAreCorrect)
                    {
                        Assert.IsTrue(offsetAtt.StartOffset() >= lastStartOffset, "offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset);
                        lastStartOffset = offsetAtt.StartOffset();
                    }

                    if (offsetsAreCorrect && posLengthAtt != null && posIncrAtt != null)
                    {
                        // Validate offset consistency in the graph, ie
                        // all tokens leaving from a certain pos have the
                        // same startOffset, and all tokens arriving to a
                        // certain pos have the same endOffset:
                        int posInc = posIncrAtt.PositionIncrement;
                        pos += posInc;

                        int posLength = posLengthAtt.PositionLength;

                        if (!posToStartOffset.ContainsKey(pos))
                        {
                            // First time we've seen a token leaving from this position:
                            posToStartOffset[pos] = startOffset;
                            //System.out.println("  + s " + pos + " -> " + startOffset);
                        }
                        else
                        {
                            // We've seen a token leaving from this position
                            // before; verify the startOffset is the same:
                            //System.out.println("  + vs " + pos + " -> " + startOffset);
                            Assert.AreEqual((int)posToStartOffset[pos], startOffset, "pos=" + pos + " posLen=" + posLength + " token=" + termAtt);
                        }

                        int endPos = pos + posLength;

                        if (!posToEndOffset.ContainsKey(endPos))
                        {
                            // First time we've seen a token arriving to this position:
                            posToEndOffset[endPos] = endOffset;
                            //System.out.println("  + e " + endPos + " -> " + endOffset);
                        }
                        else
                        {
                            // We've seen a token arriving to this position
                            // before; verify the endOffset is the same:
                            //System.out.println("  + ve " + endPos + " -> " + endOffset);
                            Assert.AreEqual((int)posToEndOffset[endPos], endOffset, "pos=" + pos + " posLen=" + posLength + " token=" + termAtt);
                        }
                    }
                }
                if (posIncrAtt != null)
                {
                    if (i == 0)
                    {
                        Assert.IsTrue(posIncrAtt.PositionIncrement >= 1, "first posIncrement must be >= 1");
                    }
                    else
                    {
                        Assert.IsTrue(posIncrAtt.PositionIncrement >= 0, "posIncrement must be >= 0");
                    }
                }
                if (posLengthAtt != null)
                {
                    Assert.IsTrue(posLengthAtt.PositionLength >= 1, "posLength must be >= 1");
                }
            }

            if (ts.IncrementToken())
            {
                Assert.Fail("TokenStream has more tokens than expected (expected count=" + output.Length + "); extra token=" + termAtt);
            }

            // repeat our extra safety checks for End()
            ts.ClearAttributes();
            if (termAtt != null)
            {
                termAtt.SetEmpty().Append("bogusTerm");
            }
            if (offsetAtt != null)
            {
                offsetAtt.SetOffset(14584724, 24683243);
            }
            if (typeAtt != null)
            {
                typeAtt.Type = "bogusType";
            }
            if (posIncrAtt != null)
            {
                posIncrAtt.PositionIncrement = 45987657;
            }
            if (posLengthAtt != null)
            {
                posLengthAtt.PositionLength = 45987653;
            }

            var reset_ = checkClearAtt.AndResetClearCalled; // reset it, because we called clearAttribute() before

            ts.End();
            Assert.IsTrue(checkClearAtt.AndResetClearCalled, "super.End()/ClearAttributes() was not called correctly in End()");

            if (finalOffset != null)
            {
                Assert.AreEqual((int)finalOffset, offsetAtt.EndOffset(), "finalOffset");
            }
            if (offsetAtt != null)
            {
                Assert.IsTrue(offsetAtt.EndOffset() >= 0, "finalOffset must be >= 0");
            }
            if (finalPosInc != null)
            {
                Assert.AreEqual((int)finalPosInc, posIncrAtt.PositionIncrement, "finalPosInc");
            }

            ts.Dispose();
        }
Пример #34
0
 /// <summary>
 /// Construct a token stream filtering the given input.
 /// </summary>
 public KeywordRepeatFilter(TokenStream input)
     : base(input)
 {
     keywordAttribute = AddAttribute <IKeywordAttribute>();
     posIncAttr       = AddAttribute <IPositionIncrementAttribute>();
 }
Пример #35
0
 public LowerCaseFilterMock(TokenStream @in) : base(@in)
 {
     termAtt     = AddAttribute <ICharTermAttribute>();
     keywordAttr = AddAttribute <IKeywordAttribute>();
 }
Пример #36
0
 /// <summary>
 /// Creates a new <see cref="KeywordMarkerFilter"/> </summary>
 /// <param name="in"> the input stream </param>
 protected KeywordMarkerFilter(TokenStream @in)
     : base(@in)
 {
     keywordAttr = AddAttribute <IKeywordAttribute>();
 }
Пример #37
0
 public CzechStemFilter(TokenStream input)
     : base(input)
 {
     termAtt = AddAttribute<ICharTermAttribute>();
     keywordAttr = AddAttribute<IKeywordAttribute>();
 }
Пример #38
0
 public PortugueseMinimalStemFilter(TokenStream input)
     : base(input)
 {
     termAtt     = AddAttribute <ICharTermAttribute>();
     keywordAttr = AddAttribute <IKeywordAttribute>();
 }
Пример #39
0
 public DutchStemFilter(TokenStream _in)
       : base(_in)
 {
     termAtt = AddAttribute<ICharTermAttribute>();
     keywordAttr = AddAttribute<IKeywordAttribute>();
 }
 public AdvancedBulgarianStemFilter(TokenStream input)
     : base(input)
 {
     termAtt     = AddAttribute <ICharTermAttribute>();
     keywordAttr = AddAttribute <IKeywordAttribute>();
 }
Пример #41
0
 public HungarianLightStemFilter(TokenStream input)
     : base(input)
 {
     termAtt     = AddAttribute <ICharTermAttribute>();
     keywordAttr = AddAttribute <IKeywordAttribute>();
 }
 public HindiNormalizationFilter(TokenStream input)
       : base(input)
 {
     termAtt = AddAttribute<ICharTermAttribute>();
     keywordAtt = AddAttribute<IKeywordAttribute>();
 }
Пример #43
0
 /// <summary>
 /// Creates a new <see cref="BrazilianStemFilter"/>
 /// </summary>
 /// <param name="in"> the source <see cref="TokenStream"/>  </param>
 public BrazilianStemFilter(TokenStream @in)
     : base(@in)
 {
     termAtt     = AddAttribute <ICharTermAttribute>();
     keywordAttr = AddAttribute <IKeywordAttribute>();
 }
Пример #44
0
 public GreekStemFilter(TokenStream input)
     : base(input)
 {
     termAtt     = AddAttribute <ICharTermAttribute>();
     keywordAttr = AddAttribute <IKeywordAttribute>();
 }
 public PortugueseLightStemFilter(TokenStream input)
     : base(input)
 {
     termAtt = AddAttribute<ICharTermAttribute>();
     keywordAttr = AddAttribute<IKeywordAttribute>();
 }
 public LowerCaseFilterMock(TokenStream @in) : base(@in)
 {
     termAtt = AddAttribute<ICharTermAttribute>();
     keywordAttr = AddAttribute<IKeywordAttribute>();
 }