public RuleWithSetExceptions(string suffix, int min, string replacement, string[] exceptions) : base(suffix, min, replacement) { for (int i = 0; i < exceptions.Length; i++) { if (!exceptions[i].EndsWith(suffix, StringComparison.Ordinal)) { throw new Exception("useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'"); } } this.exceptions = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(exceptions), false); }
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<string> stemOverrideDict) { this.matchVersion = matchVersion; this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords)); this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable)); if (stemOverrideDict.Empty || !matchVersion.onOrAfter(Version.LUCENE_31)) { this.stemdict = null; this.origStemdict = CharArrayMap.unmodifiableMap(CharArrayMap.copy(matchVersion, stemOverrideDict)); } else { this.origStemdict = null; // we don't need to ignore case here since we lowercase in this analyzer anyway StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false); CharArrayMap<string>.EntryIterator iter = stemOverrideDict.entrySet().GetEnumerator(); CharsRef spare = new CharsRef(); while (iter.hasNext()) { char[] nextKey = iter.nextKey(); spare.copyChars(nextKey, 0, nextKey.Length); builder.add(spare, iter.currentValue()); } try { this.stemdict = builder.build(); } catch (IOException ex) { throw new Exception("can not build stem dict", ex); } } }
static DefaultSetHolder() { try { DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT); } catch (IOException) { // default set should always be present as it is part of the // distribution (JAR) throw new Exception("Unable to load default stopword set"); } DEFAULT_STEM_DICT = new CharArrayMap<>(Version.LUCENE_CURRENT, 4, false); DEFAULT_STEM_DICT.put("fiets", "fiets"); //otherwise fiet DEFAULT_STEM_DICT.put("bromfiets", "bromfiets"); //otherwise bromfiet DEFAULT_STEM_DICT.put("ei", "eier"); DEFAULT_STEM_DICT.put("kind", "kinder"); }
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable) : this(matchVersion, stopwords, stemExclusionTable, matchVersion.onOrAfter(Version.LUCENE_36) ? DefaultSetHolder.DEFAULT_STEM_DICT : CharArrayMap.emptyMap<string>()) { // historically, this ctor never the stem dict!!!!! // so we populate it only for >= 3.6 }