Esempio n. 1
0
 public override TokenStream TokenStream(string fieldName, TextReader reader)
 {
     var tokenizer = new StandardTokenizer(Version.LUCENE_30, reader);
     var shingleMatrix = new ShingleMatrixFilter(tokenizer, 2, 8, ' ');
     var lowerCaseFilter = new LowerCaseFilter(shingleMatrix);
     return new StopFilter(true, lowerCaseFilter, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
 }
Esempio n. 2
0
        public override TokenStream TokenStream(string fieldName, TextReader reader)
        {
            // This should be a good tokenizer for most European-language documents:
            // Splits words at punctuation characters, removing punctuation.
            // Splits words at hyphens, unless there's a number in the token...
            // Recognizes email addresses and internet hostnames as one token.
            var intput = new StandardTokenizer(Version.LUCENE_30, reader);

            // A ShingleMatrixFilter constructs shingles from a token stream.
            // "2010 Audi RS5 Quattro Coupe" => "2010 Audi", "Audi RS5", "RS5 Quattro", "Quattro Coupe"
            var shingleMatrixOutput = new ShingleMatrixFilter(
                                                // stream from which to construct the matrix
                                                intput,
                                                // minimum number of tokens in any shingle
                                                2,
                                                // maximum number of tokens in any shingle.
                                                8,
                                                // character to use between texts of the token parts in a shingle.
                                                ' ');

            // Normalizes token text to lower case.
            var lowerCaseFilter = new LowerCaseFilter(shingleMatrixOutput);

            // Removes stop words from a token stream.
            return new StopFilter(true, lowerCaseFilter, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
        }
        /*
        public override TokenStream ReusableTokenStream(string fieldName, System.IO.TextReader reader)
        {
            MMSegTokenizer mmsegTokenizer = (MMSegTokenizer)base.PreviousTokenStream;
            if (mmsegTokenizer == null)
            {
                mmsegTokenizer = new MMSegTokenizer(NewSeg, reader);
                base.PreviousTokenStream = mmsegTokenizer;
            }
            else
            {
                mmsegTokenizer.Reset(reader);
            }
            return mmsegTokenizer;
        }
        */

        public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader)
        {
            Lucene.Net.Analysis.TokenStream result = new MMSegTokenizer(NewSeg, reader);
            result.Reset();
            result = new LowerCaseFilter(result);
            return result;
        }
 public override TokenStream TokenStream(string fieldName, TextReader reader)
 {
     TokenStream result = whitespaceAnalyzer.TokenStream(fieldName, reader);
     result = new LowerCaseFilter(result);
     result = new StopFilter(true, result, stopWords);
     return result;
 }
Esempio n. 5
0
 public override TokenStream TokenStream(string fieldName, TextReader reader)
 {
     TokenStream result = new StandardTokenizer(reader);
     result = new StandardFilter(result);
     result = new LowerCaseFilter(result);
     result = new RuSnowballFilter(result);
     return result;
 }
        public override TokenStream TokenStream(string fieldName, TextReader reader)
        {
            TokenStream result =
                new LowerCaseFilter(

                    new StandardTokenizer(_version, reader));
            return result;
        }
Esempio n. 7
0
 /// <summary>Constructs a {@link StandardTokenizer} filtered by a {@link
 /// StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. 
 /// </summary>
 public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
 {
     TokenStream result = new StandardTokenizer(reader);
     result = new StandardFilter(result);
     result = new LowerCaseFilter(result);
     result = new StopFilter(result, stopSet);
     return result;
 }
Esempio n. 8
0
 public override TokenStream TokenStream(string fieldname, TextReader reader)
 {
     TokenStream result = new PersianTokenizer(reader);
     result = new LowerCaseFilter(result);
     result = new PersianNormalizationFilter(result);
     result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(_version), result, _stoptable);
     result = new PersianLemmatizationFilter(result);
     return result;
 }
Esempio n. 9
0
 public override TokenStream TokenStream(string fieldName, TextReader reader)
 {
     var seg = new JiebaSegmenter();
     TokenStream result = new JiebaTokenizer(seg, reader);
     // This filter is necessary, because the parser converts the queries to lower case.
     result = new LowerCaseFilter(result);
     result = new StopFilter(true, result, StopWords);
     return result;
 }
Esempio n. 10
0
 public override TokenStream TokenStream(String fieldName, TextReader reader)
 {
     Tokenizer source = new StandardTokenizer(matchVersion, reader);
     TokenStream sink = new StandardFilter(source);
     sink = new LowerCaseFilter(sink);
     //sink = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), sink, stopSet);
     sink = new CroatianStemFilter(sink, stemmer);
     return sink;
 }
Esempio n. 11
0
 public override TokenStream TokenStream(string fieldName, TextReader reader)
 {
     // Split the title based on IdSeparators, then run it through the innerAnalyzer
     string title = reader.ReadToEnd();
     string partiallyTokenized = String.Join(" ", title.Split(PackageIndexEntity.IdSeparators, StringSplitOptions.RemoveEmptyEntries));
     TokenStream result = whitespaceAnalyzer.TokenStream(fieldName, new StringReader(partiallyTokenized));
     result = new LowerCaseFilter(result);
     return result;
 }
Esempio n. 12
0
 public override TokenStream TokenStream(string fieldName, TextReader reader)
 {
     var tokenizer = new StandardTokenizer(Version.LUCENE_29, reader);
     tokenizer.MaxTokenLength = 255;
     TokenStream filter = new StandardFilter(tokenizer);
     filter = new LowerCaseFilter(filter);
     filter = new StopFilter(false, filter, StandardAnalyzer.STOP_WORDS_SET);
     return new NGramTokenFilter(filter, 2, 6);
 }
Esempio n. 13
0
        public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader)
        {
            var seg = new JiebaSegmenter();
            TokenStream result = new JiebaTokenizer(seg, reader);

            result = new LowerCaseFilter(result);
            result = new StopFilter(true, result, StopWords);
            return result;
        }
Esempio n. 14
0
 /** Constructs a {@link StandardTokenizer} filtered by a {@link
  * StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter}
  * and a {@link SpanishStemFilter}. */
 public override TokenStream TokenStream(string fieldName, TextReader reader)
 {
     TokenStream result = new StandardTokenizer(Version.LUCENE_24,reader);
     result = new StandardFilter(result);
     result = new LowerCaseFilter(result);
     result = new StopFilter(true,result, stopTable);
     result = new SpanishStemFilter(result);
     return result;
 }
        public override TokenStream TokenStream(string fieldName, TextReader reader)
        {
            TokenStream result = new StandardTokenizer(matchVersion, reader);
            result = new StandardFilter(result);
            result = new LowerCaseFilter(result);
            result = new StopFilter(this.enableStopPositionIncrements, result, stoptable);
            result = new BulgarianStemFilter(result);

            return result;
        }
        public override TokenStream TokenStream(string fieldName, TextReader reader)
        {
            //Need to account for the | breaks in relatedcontent
            var tokenizedInput = new LowerCaseFilter(new StandardFilter(new StandardTokenizer(_version, reader)));
            //return new ShingleFilter(tokenizedInput, 4);

            var output = new ShingleFilter(tokenizedInput, 4);
            //output.SetOutputUnigrams(false);
            return output;
        }
Esempio n. 17
0
        /// <summary>
        /// </summary>
        public override TokenStream TokenStream(string fieldName, TextReader reader)
        {
            TokenStream t = null;
            t = new LetterOrDigitTokenizer(reader);
            t = new LowerCaseFilter(t);
            t = new ASCIIFoldingFilter(t);
            t = new SingleCharTokenizer(t);

            return t;
        }
Esempio n. 18
0
        public override TokenStream TokenStream(string fieldName, TextReader reader)
        {
            var tokenizer = new StandardTokenizer(Version.LUCENE_30, reader);
            tokenizer.MaxTokenLength = 255;
            TokenStream filter = new StandardFilter(tokenizer);
            filter = new LowerCaseFilter(filter);
            filter = new NGramTokenFilter(filter, 2, 255);

            return filter;
        }
Esempio n. 19
0
 /// <summary>Constructs a <see cref="StandardTokenizer"/> filtered by a {@link
 /// StandardFilter}, a <see cref="LowerCaseFilter"/> and a <see cref="StopFilter"/>. 
 /// </summary>
 public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
 {
     TokenStream result = new StandardTokenizer(matchVersion, reader);
     result = new StandardFilter(result);
     result = new LowerCaseFilter(result);
     if (stopSet != null)
         result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                 result, stopSet);
     result = new SnowballFilter(result, name);
     return result;
 }
        public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader)
        {
            TokenStream result = new StandardTokenizer(kLuceneVersion, reader);

            result = new StandardFilter(result);
            result = new LowerCaseFilter(result);
            result = new ASCIIFoldingFilter(result);
            result = new StopFilter(false, result, StopFilter.MakeStopSet(kEnglishStopWords));
            result = new EdgeNGramTokenFilter(
                result, Lucene.Net.Analysis.NGram.EdgeNGramTokenFilter.Side.FRONT, 1, 20);

            return result;
        }
 public override TokenStream TokenStream(string fieldName, TextReader reader) {
     TokenStream result = new StandardTokenizer(this._luceneVersion, reader);
     result = new StandardFilter(result);
     result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(this._luceneVersion),
         result,
         CharArraySet.UnmodifiableSet(new CharArraySet((IEnumerable<string>)FRENCH_STOP_WORDS, false))
     );
     result = new FrenchStemFilter(result, CharArraySet.EMPTY_SET);
     // Convert to lowercase after stemming!
     result = new LowerCaseFilter(result);
     result = new ASCIIFoldingFilter(result);
     return result;
 }
Esempio n. 22
0
        public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader)
        {
            TokenStream result = new StandardTokenizer(Lucene.Net.Util.Version.LUCENE_29, reader);
            //result = new StandardFilter(result);
            result = new LowerCaseFilter(result);

            if (STOP_WORDS != null)
                result = new StopFilter(false, result, STOP_WORDS);
            result = new ASCIIFoldingFilter(result);
            result = new SnowballFilter(result, "English");

            return result;
        }
Esempio n. 23
0
        /// <summary>
        /// 
        /// </summary>
        /// <remarks></remarks>
        /// <seealso cref=""/>
        /// <param name="fieldName"></param>
        /// <param name="reader"></param>
        /// <returns></returns>
        public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader)
        {
            //create the tokenizer
            TokenStream result = new StandardTokenizer(Lucene.Net.Util.Version.LUCENE_30, reader);

            //add in filters
            result = new StandardFilter(result); // first normalize the StandardTokenizer
            result = new LowerCaseFilter(result);// makes sure everything is lower case
            result = new StopFilter(true, result, StopAnalyzer.ENGLISH_STOP_WORDS_SET); // use the default list of Stop Words, provided by the StopAnalyzer class.
            result = new SynonymFilter(result, SynonymEngine); // injects the synonyms.

            //return the built token stream.
            return result;
        }
Esempio n. 24
0
        public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader)
        {
            TokenStream result = new StandardTokenizer(Lucene.Net.Util.Version.LUCENE_29, reader);

            //result = new StandardFilter(result);
            result = new LowerCaseFilter(result);
            if (STOP_WORDS != null)
                result = new StopFilter(false, result, STOP_WORDS);
            result = new ASCIIFoldingFilter(result);

            // we are using a distinct version of the Spanish stemmer, called Spanish2
            // Please check if this class can be found in the Snowball library, the relative path
            // should be: Snowball\SF\Snowball\Ext\
            // just in case, I would leave a copy of this class in this project
            result = new SnowballFilter(result, "Spanish");

            return result;
        }
 public virtual void  TestOverridesAny()
 {
     try
     {
         TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader(doc));
         stream = new AnonymousClassTokenFilter(this, stream);
         stream = new LowerCaseFilter(stream);
         stream = new StopFilter(stream, stopwords);
         while (stream.IncrementToken())
         {
             ;
         }
         Assert.Fail("One TokenFilter does not override any of the required methods, so it should fail.");
     }
     catch (System.NotSupportedException uoe)
     {
         Assert.IsTrue(uoe.Message.EndsWith("does not implement any of incrementToken(), next(Token), next()."));
     }
 }
Esempio n. 26
0
        public override TokenStream TokenStream(string fieldName, TextReader reader)
        {
            Tokenizer src = new StandardTokenizer(Version.LUCENE_30, reader);
            TokenStream filter = new StandardFilter(src);
            //var stoplist = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
            HashSet<string> newWords = new HashSet<string>()
            {
                "said", "have", "the", "more", "from", "who", "he", "than", "it", "were", "use", "has", "also",
                "been", "we", "which", "had", "you", "us", "them", "so", "in", "i", "our", "his", "to", "of", "a",
                "st", "ad", "co", "re", "ve", "1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "f", "g", "it"
            };
            foreach (var var in StopAnalyzer.ENGLISH_STOP_WORDS_SET)
            {
                newWords.Add(var);
            }

            TokenStream result = new StandardTokenizer(Version.LUCENE_30, reader);
            result = new SnowballFilter(result, new EnglishStemmer());
            result = new LowerCaseFilter(result);
            result = new ASCIIFoldingFilter(result);
            result = new StopFilter(true, result, newWords);
            return result;
        }
        private void  TestCachingCustomToken(int api)
        {
            TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader(doc));

            stream = new PartOfSpeechTaggingFilter(stream);
            stream = new LowerCaseFilter(stream);
            stream = new StopFilter(stream, stopwords);
            stream = new CachingTokenFilter(stream);             // <- the caching is done before the annotating!
            stream = new PartOfSpeechAnnotatingFilter(stream);

            switch (api)
            {
            case 0:
                ConsumeStreamNewAPI(stream);
                ConsumeStreamNewAPI(stream);
                break;

            case 1:
                ConsumeStreamOldAPI(stream);
                ConsumeStreamOldAPI(stream);
                break;

            case 2:
                ConsumeStreamVeryOldAPI(stream);
                ConsumeStreamVeryOldAPI(stream);
                break;

            case 3:
                ConsumeStreamNewAPI(stream);
                ConsumeStreamOldAPI(stream);
                ConsumeStreamVeryOldAPI(stream);
                ConsumeStreamNewAPI(stream);
                ConsumeStreamVeryOldAPI(stream);
                break;
            }
        }
        public virtual void  TestOnlyNewAPI()
        {
            TokenStream.SetOnlyUseNewAPI(true);
            try
            {
                // this should fail with UOE
                try
                {
                    TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader(doc));
                    stream = new PartOfSpeechTaggingFilter(stream);                     // <-- this one is evil!
                    stream = new LowerCaseFilter(stream);
                    stream = new StopFilter(stream, stopwords);
                    while (stream.IncrementToken())
                    {
                        ;
                    }
                    Assert.Fail("If only the new API is allowed, this should fail with an UOE");
                }
                catch (System.NotSupportedException uoe)
                {
                    Assert.IsTrue((typeof(PartOfSpeechTaggingFilter).FullName + " does not implement incrementToken() which is needed for onlyUseNewAPI.").Equals(uoe.Message));
                }

                // this should pass, as all core token streams support the new API
                TokenStream stream2 = new WhitespaceTokenizer(new System.IO.StringReader(doc));
                stream2 = new LowerCaseFilter(stream2);
                stream2 = new StopFilter(stream2, stopwords);
                while (stream2.IncrementToken())
                {
                    ;
                }

                // Test, if all attributes are implemented by their implementation, not Token/TokenWrapper
                Assert.IsTrue(stream2.AddAttribute(typeof(TermAttribute)) is TermAttributeImpl, "TermAttribute is implemented by TermAttributeImpl");
                Assert.IsTrue(stream2.AddAttribute(typeof(OffsetAttribute)) is OffsetAttributeImpl, "OffsetAttribute is implemented by OffsetAttributeImpl");
                Assert.IsTrue(stream2.AddAttribute(typeof(Lucene.Net.Analysis.Tokenattributes.FlagsAttribute)) is FlagsAttributeImpl, "FlagsAttribute is implemented by FlagsAttributeImpl");
                Assert.IsTrue(stream2.AddAttribute(typeof(PayloadAttribute)) is PayloadAttributeImpl, "PayloadAttribute is implemented by PayloadAttributeImpl");
                Assert.IsTrue(stream2.AddAttribute(typeof(PositionIncrementAttribute)) is PositionIncrementAttributeImpl, "PositionIncrementAttribute is implemented by PositionIncrementAttributeImpl");
                Assert.IsTrue(stream2.AddAttribute(typeof(TypeAttribute)) is TypeAttributeImpl, "TypeAttribute is implemented by TypeAttributeImpl");
                Assert.IsTrue(stream2.AddAttribute(typeof(SenselessAttribute)) is SenselessAttributeImpl, "SenselessAttribute is not implemented by SenselessAttributeImpl");

                // try to call old API, this should fail
                try
                {
                    stream2.Reset();
                    Token reusableToken = new Token();
                    while ((reusableToken = stream2.Next(reusableToken)) != null)
                    {
                        ;
                    }
                    Assert.Fail("If only the new API is allowed, this should fail with an UOE");
                }
                catch (System.NotSupportedException uoe)
                {
                    Assert.IsTrue("This TokenStream only supports the new Attributes API.".Equals(uoe.Message));
                }
                try
                {
                    stream2.Reset();
                    while (stream2.Next() != null)
                    {
                        ;
                    }
                    Assert.Fail("If only the new API is allowed, this should fail with an UOE");
                }
                catch (System.NotSupportedException uoe)
                {
                    Assert.IsTrue("This TokenStream only supports the new Attributes API.".Equals(uoe.Message));
                }

                // Test if the wrapper API (onlyUseNewAPI==false) uses TokenWrapper
                // as attribute instance.
                // TokenWrapper encapsulates a Token instance that can be exchanged
                // by another Token instance without changing the AttributeImpl instance
                // itsself.
                TokenStream.SetOnlyUseNewAPI(false);
                stream2 = new WhitespaceTokenizer(new System.IO.StringReader(doc));
                Assert.IsTrue(stream2.AddAttribute(typeof(TermAttribute)) is TokenWrapper, "TermAttribute is implemented by TokenWrapper");
                Assert.IsTrue(stream2.AddAttribute(typeof(OffsetAttribute)) is TokenWrapper, "OffsetAttribute is implemented by TokenWrapper");
                Assert.IsTrue(stream2.AddAttribute(typeof(Lucene.Net.Analysis.Tokenattributes.FlagsAttribute)) is TokenWrapper, "FlagsAttribute is implemented by TokenWrapper");
                Assert.IsTrue(stream2.AddAttribute(typeof(PayloadAttribute)) is TokenWrapper, "PayloadAttribute is implemented by TokenWrapper");
                Assert.IsTrue(stream2.AddAttribute(typeof(PositionIncrementAttribute)) is TokenWrapper, "PositionIncrementAttribute is implemented by TokenWrapper");
                Assert.IsTrue(stream2.AddAttribute(typeof(TypeAttribute)) is TokenWrapper, "TypeAttribute is implemented by TokenWrapper");
                // This one is not implemented by TokenWrapper:
                Assert.IsTrue(stream2.AddAttribute(typeof(SenselessAttribute)) is SenselessAttributeImpl, "SenselessAttribute is not implemented by SenselessAttributeImpl");
            }
            finally
            {
                TokenStream.SetOnlyUseNewAPI(false);
            }
        }
Esempio n. 29
0
 public override TokenStream TokenStream(string fieldName, TextReader reader) {
     TokenStream result = new PanGuTokenizer(reader);
     result = new LowerCaseFilter(result);
     return result;
 }
Esempio n. 30
0
		public virtual void  TestMultipleSources()
		{
			SinkTokenizer theDetector = new AnonymousClassSinkTokenizer1(this, null);
			SinkTokenizer dogDetector = new AnonymousClassSinkTokenizer2(this, null);
			TokenStream source1 = new CachingTokenFilter(new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(new System.IO.StringReader(buffer1.ToString())), theDetector), dogDetector));
			TokenStream source2 = new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(new System.IO.StringReader(buffer2.ToString())), theDetector), dogDetector);
			int i = 0;
			Token reusableToken = new Token();
			for (Token nextToken = source1.Next(reusableToken); nextToken != null; nextToken = source1.Next(reusableToken))
			{
				Assert.IsTrue(nextToken.Term().Equals(tokens1[i]) == true, nextToken.Term() + " is not equal to " + tokens1[i]);
				i++;
			}
			Assert.IsTrue(i == tokens1.Length, i + " does not equal: " + tokens1.Length);
			Assert.IsTrue(theDetector.GetTokens().Count == 2, "theDetector Size: " + theDetector.GetTokens().Count + " is not: " + 2);
			Assert.IsTrue(dogDetector.GetTokens().Count == 1, "dogDetector Size: " + dogDetector.GetTokens().Count + " is not: " + 1);
			i = 0;
			for (Token nextToken = source2.Next(reusableToken); nextToken != null; nextToken = source2.Next(reusableToken))
			{
				Assert.IsTrue(nextToken.Term().Equals(tokens2[i]) == true, nextToken.Term() + " is not equal to " + tokens2[i]);
				i++;
			}
			Assert.IsTrue(i == tokens2.Length, i + " does not equal: " + tokens2.Length);
			Assert.IsTrue(theDetector.GetTokens().Count == 4, "theDetector Size: " + theDetector.GetTokens().Count + " is not: " + 4);
			Assert.IsTrue(dogDetector.GetTokens().Count == 2, "dogDetector Size: " + dogDetector.GetTokens().Count + " is not: " + 2);
			i = 0;
			for (Token nextToken = theDetector.Next(reusableToken); nextToken != null; nextToken = theDetector.Next(reusableToken))
			{
				Assert.IsTrue(nextToken.Term().ToUpper().Equals("The".ToUpper()) == true, nextToken.Term() + " is not equal to " + "The");
				i++;
			}
			Assert.IsTrue(i == theDetector.GetTokens().Count, i + " does not equal: " + theDetector.GetTokens().Count);
			i = 0;
			for (Token nextToken = dogDetector.Next(reusableToken); nextToken != null; nextToken = dogDetector.Next(reusableToken))
			{
				Assert.IsTrue(nextToken.Term().ToUpper().Equals("Dogs".ToUpper()) == true, nextToken.Term() + " is not equal to " + "Dogs");
				i++;
			}
			Assert.IsTrue(i == dogDetector.GetTokens().Count, i + " does not equal: " + dogDetector.GetTokens().Count);
			source1.Reset();
			TokenStream lowerCasing = new LowerCaseFilter(source1);
			i = 0;
			for (Token nextToken = lowerCasing.Next(reusableToken); nextToken != null; nextToken = lowerCasing.Next(reusableToken))
			{
				Assert.IsTrue(nextToken.Term().Equals(tokens1[i].ToLower()) == true, nextToken.Term() + " is not equal to " + tokens1[i].ToLower());
				i++;
			}
			Assert.IsTrue(i == tokens1.Length, i + " does not equal: " + tokens1.Length);
		}
Esempio n. 31
0
 /*
  * Creates a {@link TokenStream} which tokenizes all the text in the 
  * provided {@link Reader}.
  *
  * @return  A {@link TokenStream} built from a 
  *   {@link RussianLetterTokenizer} filtered with 
  *   {@link RussianLowerCaseFilter}, {@link StopFilter}, 
  *   and {@link RussianStemFilter}
  */
 public override TokenStream TokenStream(String fieldName, TextReader reader)
 {
     TokenStream result = new RussianLetterTokenizer(reader);
     result = new LowerCaseFilter(result);
     result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                             result, stopSet);
     result = new RussianStemFilter(result);
     return result;
 }
Esempio n. 32
0
	/*
	 * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
	 *
	 * @return  A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
	 * 			{@link StandardFilter}, {@link LowerCaseFilter}, and {@link StopFilter}
	 */
	public override sealed TokenStream TokenStream( String fieldName, TextReader reader ) {
                TokenStream result = new StandardTokenizer( matchVersion, reader );
		result = new StandardFilter( result );
		result = new LowerCaseFilter( result );
		result = new StopFilter( StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                         result, stoptable );
		return result;
	}
Esempio n. 33
0
		public virtual void  TestOverridesAny()
		{
			try
			{
				TokenStream stream = new WhitespaceTokenizer(new System.IO.StringReader(doc));
				stream = new AnonymousClassTokenFilter(this, stream);
				stream = new LowerCaseFilter(stream);
				stream = new StopFilter(stream, stopwords);
				while (stream.IncrementToken())
					;
				Assert.Fail("One TokenFilter does not override any of the required methods, so it should fail.");
			}
			catch (System.NotSupportedException uoe)
			{
				Assert.IsTrue(uoe.Message.EndsWith("does not implement any of incrementToken(), next(Token), next()."));
			}
		}