public SnowballFilter(TokenStream input, SnowballProgram stemmer)
       : base(input)
 {
     this.stemmer = stemmer;
     this.termAtt = AddAttribute<ICharTermAttribute>();
     this.keywordAttr = AddAttribute<IKeywordAttribute>();
 }
Beispiel #2
0
 public SnowballFilter(TokenStream input, SnowballProgram stemmer)
     : base(input)
 {
     this.stemmer     = stemmer;
     this.termAtt     = AddAttribute <ICharTermAttribute>();
     this.keywordAttr = AddAttribute <IKeywordAttribute>();
 }
Beispiel #3
0
 /// <summary>
 /// Construct the named stemming filter.
 ///
 /// Available stemmers are listed in <seealso cref="org.tartarus.snowball.ext"/>.
 /// The name of a stemmer is the part of the class name before "Stemmer",
 /// e.g., the stemmer in <seealso cref="org.tartarus.snowball.ext.EnglishStemmer"/> is named "English".
 /// </summary>
 /// <param name="in"> the input tokens to stem </param>
 /// <param name="name"> the name of a stemmer </param>
 public SnowballFilter(TokenStream @in, string name) : base(@in)
 {
     //Class.forName is frowned upon in place of the ResourceLoader but in this case,
     // the factory will use the other constructor so that the program is already loaded.
     try
     {
         Type stemClass = Type.GetType("org.tartarus.snowball.ext." + name + "Stemmer").asSubclass(typeof(SnowballProgram));
         stemmer = stemClass.newInstance();
     }
     catch (Exception e)
     {
         throw new System.ArgumentException("Invalid stemmer class specified: " + name, e);
     }
 }
Beispiel #4
0
        /// <summary>
        /// Construct the named stemming filter.
        ///
        /// Available stemmers are listed in Lucene.Net.Tartarus.Snowball.Ext.
        /// The name of a stemmer is the part of the class name before "Stemmer",
        /// e.g., the stemmer in <see cref="Tartarus.Snowball.Ext.EnglishStemmer"/> is named "English".
        /// </summary>
        /// <param name="in"> the input tokens to stem </param>
        /// <param name="name"> the name of a stemmer </param>
        public SnowballFilter(TokenStream @in, string name)
            : base(@in)
        {
            try
            {
                string className = typeof(SnowballProgram).Namespace + ".Ext." +
                                   name + "Stemmer, " + this.GetType().GetTypeInfo().Assembly.GetName().Name;
                Type stemClass = Type.GetType(className);

                stemmer = (SnowballProgram)Activator.CreateInstance(stemClass);
            }
            catch (Exception e)
            {
                throw new System.ArgumentException("Invalid stemmer class specified: " + name, e);
            }

            this.termAtt     = AddAttribute <ICharTermAttribute>();
            this.keywordAttr = AddAttribute <IKeywordAttribute>();
        }
Beispiel #5
0
        /// <summary>
        /// Construct the named stemming filter.
        ///
        /// Available stemmers are listed in <seealso cref="org.tartarus.snowball.ext"/>.
        /// The name of a stemmer is the part of the class name before "Stemmer",
        /// e.g., the stemmer in <seealso cref="org.tartarus.snowball.ext.EnglishStemmer"/> is named "English".
        /// </summary>
        /// <param name="in"> the input tokens to stem </param>
        /// <param name="name"> the name of a stemmer </param>
        public SnowballFilter(TokenStream @in, string name)
            : base(@in)
        {
            try
            {
                // LUCENENET TODO: There should probably be a way to make this an extesibility point so
                // custom extensions can be loaded.
                string className = typeof(SnowballProgram).Namespace + ".Ext." +
                                   name + "Stemmer, " + this.GetType().Assembly.GetName().Name;
                Type stemClass = Type.GetType(className);

                stemmer = (SnowballProgram)Activator.CreateInstance(stemClass);
            }
            catch (Exception e)
            {
                throw new System.ArgumentException("Invalid stemmer class specified: " + name, e);
            }

            this.termAtt     = AddAttribute <ICharTermAttribute>();
            this.keywordAttr = AddAttribute <IKeywordAttribute>();
        }
Beispiel #6
0
        public override TokenStream TokenStream(string fieldName, TextReader reader)
        {
            if (!this._Language.Equals("JA", StringComparison.CurrentCultureIgnoreCase))
            {//不是日语的需要重建Reader。wangyunpeng
                reader = this.InitReader(reader);
            }
            TokenStream result = this._Analyzer.TokenStream(fieldName, reader);

            result = new StopFilter(this._EnableStopPositionIncrements, result, this._StopCharArraySet, true);
            SnowballProgram snowballProgram = SnowballDict.GetSnowball(this._Language);//词干。wangyunpeng,2015-8-17改成线程安全的调用方式。

            if (snowballProgram != null)
            {
                result = new SnowballFilter(result, snowballProgram);
            }
            if (_UseIndexSynonyms)
            {//在创建索引的时候,将同义词,近义词,相关词存入索引。
                result = new SynonymsFilter(this._Language, result);
            }
            return(result);
        }
        /// <summary>
        /// Construct the named stemming filter.
        /// 
        /// Available stemmers are listed in <seealso cref="org.tartarus.snowball.ext"/>.
        /// The name of a stemmer is the part of the class name before "Stemmer",
        /// e.g., the stemmer in <seealso cref="org.tartarus.snowball.ext.EnglishStemmer"/> is named "English".
        /// </summary>
        /// <param name="in"> the input tokens to stem </param>
        /// <param name="name"> the name of a stemmer </param>
        public SnowballFilter(TokenStream @in, string name)
              : base(@in)
        {
            try
            {
                // LUCENENET TODO: There should probably be a way to make this an extesibility point so
                // custom extensions can be loaded.
                string className = typeof(SnowballProgram).Namespace + ".Ext." +
                    name + "Stemmer, " + this.GetType().Assembly.GetName().Name;
                Type stemClass = Type.GetType(className);

                stemmer = (SnowballProgram)Activator.CreateInstance(stemClass);
            }
            catch (Exception e)
            {
                throw new System.ArgumentException("Invalid stemmer class specified: " + name, e);
            }

            this.termAtt = AddAttribute<ICharTermAttribute>();
            this.keywordAttr = AddAttribute<IKeywordAttribute>();
        }
Beispiel #8
0
 public Among(string s, int substring_i, int result,
        string methodname, SnowballProgram methodobject)
 {
     this.s_size = s.Length;
     this.s = s.ToCharArray();
     this.substring_i = substring_i;
     this.result = result;
     this.methodobject = methodobject;
     if (methodname.Length == 0)
     {
         this.method = null;
     }
     else
     {
         try
         {
             this.method = methodobject.GetType().GetMethod(methodname, EMPTY_PARAMS);
         }
         catch (MissingMethodException e)
         {
             throw new Exception(e.ToString(), e);
         }
     }
 }
Beispiel #9
0
 public SnowballFilter(TokenStream input, SnowballProgram stemmer) : base(input)
 {
     this.stemmer = stemmer;
 }
Beispiel #10
0
        /// <summary>
        /// 获取词干对象(线程不安全)
        /// </summary>
        /// <param name="language"></param>
        /// <returns></returns>
        //public static SnowballProgram GetSnowball(string language)
        //{
        //    if (_dictSnowball.ContainsKey(language))
        //        return _dictSnowball[language];
        //    return null;
        //}
        /// <summary>
        /// 获取词干对象(线程安全)
        /// </summary>
        /// <param name="language"></param>
        /// <returns></returns>
        public static SnowballProgram GetSnowball(string language)
        {
            SnowballProgram result = null;

            switch (language)
            {
            case "DA":
                result = new DanishStemmer();
                break;    //丹麦语

            case "NL":
                result = new DutchStemmer();
                break;    //荷兰语

            case "EN":
                result = new EnglishStemmer();
                break;    //英语

            case "FI":
                result = new FinnishStemmer();
                break;    //芬兰语

            case "FR":
                result = new FrenchStemmer();
                break;    //法语

            case "DE2":
                result = new German2Stemmer();
                break;    //德语2

            case "DE":
                result = new GermanStemmer();
                break;    //德语

            case "HU":
                result = new HungarianStemmer();
                break;

            case "IT":
                result = new ItalianStemmer();
                break;

            case "文斯语":
                result = new LovinsStemmer();
                break;

            case "NO":
                result = new NorwegianStemmer();
                break;

            case "波特语":
                result = new PorterStemmer();
                break;    //英语的

            case "PT":
                result = new PortugueseStemmer();
                break;    //葡萄牙语

            case "RO":
                result = new RomanianStemmer();
                break;

            case "RU":
                result = new RussianStemmer();
                break;    //俄语

            case "ES":
                result = new SpanishStemmer();
                break;    //西班牙语

            case "SV":
                result = new SwedishStemmer();
                break;

            case "TR":
                result = new TurkishStemmer();
                break;    //土耳其语
            }
            return(result);
        }