コード例 #1
0
 public RuleWithSetExceptions(string suffix, int min, string replacement, string[] exceptions)
     : base(suffix, min, replacement)
 {
     for (int i = 0; i < exceptions.Length; i++)
       {
     if (!exceptions[i].EndsWith(suffix, StringComparison.Ordinal))
     {
       throw new Exception("useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'");
     }
       }
       this.exceptions = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(exceptions), false);
 }
コード例 #2
0
ファイル: DutchAnalyzer.cs プロジェクト: Cefa68000/lucenenet
 public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<string> stemOverrideDict)
 {
     this.matchVersion = matchVersion;
     this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
     this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
     if (stemOverrideDict.Empty || !matchVersion.onOrAfter(Version.LUCENE_31))
     {
       this.stemdict = null;
       this.origStemdict = CharArrayMap.unmodifiableMap(CharArrayMap.copy(matchVersion, stemOverrideDict));
     }
     else
     {
       this.origStemdict = null;
       // we don't need to ignore case here since we lowercase in this analyzer anyway
       StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false);
       CharArrayMap<string>.EntryIterator iter = stemOverrideDict.entrySet().GetEnumerator();
       CharsRef spare = new CharsRef();
       while (iter.hasNext())
       {
     char[] nextKey = iter.nextKey();
     spare.copyChars(nextKey, 0, nextKey.Length);
     builder.add(spare, iter.currentValue());
       }
       try
       {
     this.stemdict = builder.build();
       }
       catch (IOException ex)
       {
     throw new Exception("can not build stem dict", ex);
       }
     }
 }
コード例 #3
0
ファイル: DutchAnalyzer.cs プロジェクト: Cefa68000/lucenenet
            static DefaultSetHolder()
            {
                try
                  {
                DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
                  }
                  catch (IOException)
                  {
                // default set should always be present as it is part of the
                // distribution (JAR)
                throw new Exception("Unable to load default stopword set");
                  }

                  DEFAULT_STEM_DICT = new CharArrayMap<>(Version.LUCENE_CURRENT, 4, false);
                  DEFAULT_STEM_DICT.put("fiets", "fiets"); //otherwise fiet
                  DEFAULT_STEM_DICT.put("bromfiets", "bromfiets"); //otherwise bromfiet
                  DEFAULT_STEM_DICT.put("ei", "eier");
                  DEFAULT_STEM_DICT.put("kind", "kinder");
            }
コード例 #4
0
ファイル: DutchAnalyzer.cs プロジェクト: Cefa68000/lucenenet
 public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable)
     : this(matchVersion, stopwords, stemExclusionTable, matchVersion.onOrAfter(Version.LUCENE_36) ? DefaultSetHolder.DEFAULT_STEM_DICT : CharArrayMap.emptyMap<string>())
 {
     // historically, this ctor never the stem dict!!!!!
     // so we populate it only for >= 3.6
 }