Esempio n. 1
        static StopAnalyzer()
            IList <string> stopWords = Arrays.AsList("a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with");
            var            stopSet   = new CharArraySet(LuceneVersion.LUCENE_CURRENT, stopWords, false);

            ENGLISH_STOP_WORDS_SET = CharArraySet.UnmodifiableSet(stopSet);
Esempio n. 2
 internal static ISet <string> LoadDefaultStopWordSet()
     using (StreamReader reader = new StreamReader(System.Reflection.Assembly.GetAssembly(typeof(ArabicAnalyzer)).GetManifestResourceStream("Lucene.Net.Analysis.AR." + DEFAULT_STOPWORD_FILE)))
         return(CharArraySet.UnmodifiableSet(CharArraySet.Copy(WordlistLoader.GetWordSet(reader, STOPWORDS_COMMENT))));
Esempio n. 3
         * Builds an analyzer with the given stop words and stemming exclusion words
         * @param matchVersion
         *          lucene compatibility version
         * @param stopwords
         *          a stopword set

        public BrazilianAnalyzer(Version matchVersion, ISet <string> stopwords,
                                 ISet <string> stemExclusionSet)
            : this(matchVersion, stopwords)
            excltable = CharArraySet.UnmodifiableSet(CharArraySet
Esempio n. 4
        public DutchAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap <string> stemOverrideDict)
            this.matchVersion = matchVersion;
            this.stoptable    = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stopwords));
            this.excltable    = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stemExclusionTable));
#pragma warning disable 612, 618
            if (stemOverrideDict.Count == 0 || !matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
#pragma warning restore 612, 618
                this.stemdict     = null;
                this.origStemdict = CharArrayMap.UnmodifiableMap(CharArrayMap.Copy(matchVersion, stemOverrideDict));
                this.origStemdict = null;
                // we don't need to ignore case here since we lowercase in this analyzer anyway
                StemmerOverrideFilter.Builder        builder = new StemmerOverrideFilter.Builder(false);
                CharArrayMap <string> .EntryIterator iter    = (CharArrayMap <string> .EntryIterator)stemOverrideDict.EntrySet().GetEnumerator();
                CharsRef spare = new CharsRef();
                while (iter.HasNext)
                    char[] nextKey = iter.NextKey();
                    spare.CopyChars(nextKey, 0, nextKey.Length);
                    builder.Add(new string(spare.Chars), iter.CurrentValue);
                    this.stemdict = builder.Build();
                catch (IOException ex)
                    throw new Exception("can not build stem dict", ex);
Esempio n. 5
 public DutchAnalyzer(Version matchVersion, ISet <string> stopwords, ISet <string> stemExclusionTable)
     stoptable         = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords));
     excltable         = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stemExclusionTable));
     this.matchVersion = matchVersion;
     SetOverridesTokenStreamMethod <DutchAnalyzer>();
Esempio n. 6
 /// <summary>
 /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 /// provided this analyzer will add a <see cref="SetKeywordMarkerFilter"/> before
 /// stemming.
 /// </summary>
 /// <param name="matchVersion">lucene compatibility version</param>
 /// <param name="stopwords">a stopword set</param>
 /// <param name="stemExclusionSet">a set of terms not to be stemmed</param>
 public PolishAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet)
     : base(matchVersion, stopwords)
     this.stemTable        = DefaultsHolder.DEFAULT_TABLE;
     this.stemExclusionSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(
                                                              matchVersion, stemExclusionSet));
Esempio n. 7
 /// <summary>
 /// Builds an analyzer with the given stop words
 /// </summary>
 /// <param name="matchVersion">lucene compatibility version</param>
 /// <param name="stopwords">a stopword set</param>
 /// <param name="stemExclusionSet">a stemming exclusion set</param>
 /// <param name="normalizeDin2">Specifies if the DIN-2007-2 style stemmer should be used in addition to DIN1.  This
 /// will cause words with 'ae', 'ue', or 'oe' in them (expanded umlauts) to be first converted to 'a', 'u', and 'o'
 /// respectively, before the DIN1 stemmer is invoked.</param>
 public GermanAnalyzer(Version matchVersion, ISet <string> stopwords, ISet <string> stemExclusionSet, bool normalizeDin2)
     stopSet           = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords));
     exclusionSet      = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stemExclusionSet));
     this.matchVersion = matchVersion;
     _normalizeDin2    = normalizeDin2;
     SetOverridesTokenStreamMethod <GermanAnalyzer>();
Esempio n. 8
 static StopWordList()
         var englishStopSet = new CharArraySet(EnglishStopWords.Length, false);
         englishStopSet.AddAll(new System.Collections.ArrayList(EnglishStopWords));
         EnglishStopWordsSet = CharArraySet.UnmodifiableSet(englishStopSet);
Esempio n. 9
            public CustomAnalyzer()
                _defaultStopwords = new PersianAnalyzer(Lucene.Net.Util.LuceneVersion.LUCENE_48).StopwordSet;

                _defaultArticles = CharArraySet.UnmodifiableSet(
                    new CharArraySet(Lucene.Net.Util.LuceneVersion.LUCENE_48,
                                         ), true));
            internal static CharArraySet LoadDefaultStopWordSet()
                // make sure it is unmodifiable as we expose it in the outer class
                                                                              .GetDecodingReader(typeof(SmartChineseAnalyzer), DEFAULT_STOPWORD_FILE,
                                                                                                 Encoding.UTF8), STOPWORD_FILE_COMMENT,
#pragma warning disable 612, 618

#pragma warning restore 612, 618
Esempio n. 11
        private static CharArraySet LoadEnglishStopWordsSet() // LUCENENET: Avoid static constructors (see
            IList <string> stopWords = new string[] { "a", "an", "and", "are", "as", "at", "be",
                                                      "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on",
                                                      "or", "such", "that", "the", "their", "then", "there", "these", "they", "this",
                                                      "to", "was", "will", "with" };

#pragma warning disable 612, 618
            var stopSet = new CharArraySet(LuceneVersion.LUCENE_CURRENT, stopWords, false);
#pragma warning restore 612, 618
Esempio n. 12
            static ISet <String> LoadDefaultStopWordSet()
                var stream = System.Reflection.Assembly.GetAssembly(typeof(PersianAnalyzer)).GetManifestResourceStream("Lucene.Net.Analyzers.Fa." + DEFAULT_STOPWORD_FILE);

                    StreamReader reader = new StreamReader(stream, System.Text.Encoding.UTF8);
                    // make sure it is unmodifiable as we expose it in the outer class
                    return(CharArraySet.UnmodifiableSet(new CharArraySet(WordlistLoader.GetWordSet(reader, STOPWORDS_COMMENT), true)));
Esempio n. 13
        private static ISet <string> GetStopWords()
            int    portalId;
            string cultureCode;

            var searchDoc = Thread.GetData(Thread.GetNamedDataSlot(Constants.TlsSearchInfo)) as SearchDocument;

            if (searchDoc == null)
                portalId    = 0; // default
                cultureCode = Thread.CurrentThread.CurrentCulture.Name;
                portalId    = searchDoc.PortalId;
                cultureCode = searchDoc.CultureCode;
                if (string.IsNullOrEmpty(cultureCode))
                    var portalInfo = PortalController.Instance.GetPortal(portalId);
                    if (portalInfo != null)
                        cultureCode = portalInfo.DefaultLanguage;

            var stops           = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
            var searchStopWords = SearchHelper.Instance.GetSearchStopWords(portalId, cultureCode);

            if (searchStopWords != null && !string.IsNullOrEmpty(searchStopWords.StopWords))
                //TODO Use cache from InternalSearchController
                var cultureInfo = new CultureInfo(cultureCode ?? "en-US");
                var strArray    = searchStopWords.StopWords.Split(',').Select(s => s.ToLower(cultureInfo)).ToArray();
                var set         = new CharArraySet(strArray.Length, false);
                stops = CharArraySet.UnmodifiableSet(set);

Esempio n. 14
        static StopWord()
            CharArraySet charArraySet    = new CharArraySet(0, true);
            string       applicationPath = Path.Combine(LuceneNetConfig.LuceneDictDirectory, "Stopword.txt");

            if (File.Exists(applicationPath))
                Encoding encoding = EncodingType.GetType(applicationPath);
                using (StreamReader sr = new StreamReader(applicationPath, encoding))
                    while (!sr.EndOfStream)
                        string line = sr.ReadLine();
                        if (line != null)
            _StopWordList = CharArraySet.UnmodifiableSet(charArraySet);
Esempio n. 15
 /// <summary>
 /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 /// provided this analyzer will add a <see cref="SetKeywordMarkerFilter"/> before
 /// stemming.
 /// </summary>
 /// <param name="matchVersion"> lucene compatibility version </param>
 /// <param name="stopwords"> a stopword set </param>
 /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
 public FinnishAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet)
     : base(matchVersion, stopwords)
     this.stemExclusionSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stemExclusionSet));
Esempio n. 16
 /// <summary>
 /// Builds an analyzer with the given stop words and stemming exclusion words
 /// </summary>
 /// <param name="matchVersion">
 ///          lucene compatibility version </param>
 /// <param name="stopwords">
 ///          a stopword set </param>
 /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
 public BrazilianAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet)
     : this(matchVersion, stopwords)
     excltable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stemExclusionSet));
Esempio n. 17
 /// <summary>
 /// Builds the named analyzer with the given stop words. </summary>
 public SnowballAnalyzer(LuceneVersion matchVersion, string name, CharArraySet stopWords) : this(matchVersion, name)
     stopSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stopWords));
Esempio n. 18
  * Builds an analyzer with the given stop words
  * @param matchVersion
  *          lucene compatibility version
  * @param stopwords
  *          a stopword set
  * @param stemExclutionSet
  *          a stemming exclusion set
 public FrenchAnalyzer(Version matchVersion, ISet <string> stopwords, ISet <string> stemExclutionSet)
     this.matchVersion = matchVersion;
     this.stoptable    = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords));
     this.excltable    = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stemExclutionSet));
Esempio n. 19
 /// <summary>
 /// Builds an analyzer with the given stop words
 /// </summary>
 /// <param name="matchVersion">
 ///          lucene compatibility version </param>
 /// <param name="stopwords">
 ///          a stopword set </param>
 /// <param name="stemExclutionSet">
 ///          a stemming exclusion set </param>
 public FrenchAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclutionSet)
     : base(matchVersion, stopwords)
     this.excltable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stemExclutionSet));
Esempio n. 20
 public DanishAnalyzer(Version matchVersion, ISet <string> stopwords, ISet <string> stemExclusionTable)
     StopTable      = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords));
     ExclusionTable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stemExclusionTable));
     MatchVersion   = matchVersion;
Esempio n. 21
 public CJKAnalyzer(Version matchVersion, ISet <string> stopWords)
     stopTable         = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopWords));
     this.matchVersion = matchVersion;
Esempio n. 22
         * Builds an analyzer with the given stop words and stemming exclusion words
         * @param matchVersion
         *          lucene compatibility version
         * @param stopwords
         *          a stopword set

        public BrazilianAnalyzerCustom(Lucene.Net.Util.Version matchVersion, ISet <string> stopwords,
                                       ISet <string> stemExclusionSet)
            : this(matchVersion, stopwords)
            this.excltable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stemExclusionSet));
Esempio n. 23
        static Stopwords()
            PORTUGUESE = new string[] {
            var stopSet = new CharArraySet(PORTUGUESE.Length, false);

            stopSet.AddAll(new System.Collections.ArrayList(PORTUGUESE));
            PORTUGUESE_SET = CharArraySet.UnmodifiableSet(stopSet);
Esempio n. 24
         * Builds an analyzer with the given stop words
         * @param matchVersion
         *          lucene compatibility version
         * @param stopwords
         *          a stopword set

        public BrazilianAnalyzer(Version matchVersion, ISet <string> stopwords)
            stoptable         = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords));
            this.matchVersion = matchVersion;
Esempio n. 25
         * Builds an analyzer with the given stop words
         * @param matchVersion
         *          lucene compatibility version
         * @param stopwords
         *          a stopword set

        public BrazilianAnalyzerCustom(Lucene.Net.Util.Version matchVersion, ISet <string> stopwords)
            this.stoptable    = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords));
            this.matchVersion = matchVersion;
Esempio n. 26
 /// <summary>
 /// Creates a new instance initialized with the given stopword set
 /// </summary>
 /// <param name="version">
 ///          the Lucene version for cross version compatibility </param>
 /// <param name="stopwords">
 ///          the analyzer's stopword set </param>
 protected StopwordAnalyzerBase(LuceneVersion version, CharArraySet stopwords)
     m_matchVersion = version;
     // analyzers should use char array set for stopwords!
     this.m_stopwords = stopwords is null ? CharArraySet.EMPTY_SET : CharArraySet.UnmodifiableSet(CharArraySet.Copy(version, stopwords));
 /// <summary>
 /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 /// provided this analyzer will add a <see cref="SetKeywordMarkerFilter"/> before
 /// stemming.
 /// </summary>
 /// <param name="matchVersion"><see cref="LuceneVersion"/> to match.</param>
 /// <param name="stopwords">A stopword set.</param>
 /// <param name="stemExclusionSet">A set of terms not to be stemmed.</param>
 public UkrainianMorfologikAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet)
     : base(matchVersion, stopwords)
     this.stemExclusionSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stemExclusionSet));
Esempio n. 28
  * Builds an analyzer with the given stop words and stemming exclusion words
  * @param matchVersion
  *          lucene compatibility version
  * @param stopwords
  *          a stopword set
 public CzechAnalyzer(Version matchVersion, ISet <string> stopwords)
     this.matchVersion = matchVersion;
     this.stoptable    = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords));
Esempio n. 29
 /// <summary>
 /// Builds an analyzer with the given stop words and a set of work to be
 /// excluded from the <see cref="CzechStemFilter"/>.
 /// </summary>
 /// <param name="matchVersion"> <see cref="LuceneVersion"/> to match </param>
 /// <param name="stopwords"> a stopword set </param>
 /// <param name="stemExclusionTable"> a stemming exclusion set </param>
 public CzechAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable)
     : base(matchVersion, stopwords)
     this.stemExclusionTable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stemExclusionTable));
Esempio n. 30
  * Builds an analyzer with the given stop words
  * @param matchVersion
  *          lucene compatibility version
  * @param stopwords
  *          a stopword set
 public GreekAnalyzer(Version matchVersion, ISet <string> stopwords)
     stopSet           = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords));
     this.matchVersion = matchVersion;