public virtual void Inform(IResourceLoader loader) { if (dictionaryFiles != null) { AssureMatchVersion(); IList <string> files = SplitFileNames(dictionaryFiles); if (files.Count() > 0) { StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(ignoreCase); foreach (string file in files) { IList <string> list = GetLines(loader, file.Trim()); foreach (string line in list) { string[] mapping = new Regex("\t").Split(line, 2); builder.Add(mapping[0], mapping[1]); } } dictionary = builder.Build(); } } }
public virtual void Inform(IResourceLoader loader) { if (dictionaryFiles != null) { AssureMatchVersion(); IEnumerable<string> files = SplitFileNames(dictionaryFiles); if (files.Count() > 0) { StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(ignoreCase); foreach (string file in files) { IEnumerable<string> list = GetLines(loader, file.Trim()); foreach (string line in list) { string[] mapping = new Regex("\t").Split(line, 2); builder.Add(mapping[0], mapping[1]); } } dictionary = builder.Build(); } } }
public virtual void TestRandomRealisticKeyword() { IDictionary <string, string> map = new Dictionary <string, string>(); int numTerms = AtLeast(50); for (int i = 0; i < numTerms; i++) { string randomRealisticUnicodeString = TestUtil.RandomRealisticUnicodeString(Random); if (randomRealisticUnicodeString.Length > 0) { string value = TestUtil.RandomSimpleString(Random); map[randomRealisticUnicodeString] = value.Length == 0 ? "a" : value; } } if (map.Count == 0) { map["booked"] = "books"; } StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(Random.nextBoolean()); IDictionary <string, string> entrySet = map; foreach (KeyValuePair <string, string> entry in entrySet) { builder.Add(entry.Key, entry.Value); } StemmerOverrideFilter.StemmerOverrideMap build = builder.Build(); foreach (KeyValuePair <string, string> entry in entrySet) { if (Random.nextBoolean()) { Tokenizer tokenizer = new KeywordTokenizer(new StringReader(entry.Key)); TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(tokenizer, build)); AssertTokenStreamContents(stream, new string[] { entry.Value }); } } }
public DutchAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<string> stemOverrideDict) { this.matchVersion = matchVersion; this.stoptable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stopwords)); this.excltable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stemExclusionTable)); #pragma warning disable 612, 618 if (stemOverrideDict.Count == 0 || !matchVersion.OnOrAfter(LuceneVersion.LUCENE_31)) #pragma warning restore 612, 618 { this.stemdict = null; this.origStemdict = CharArrayMap.UnmodifiableMap(CharArrayMap.Copy(matchVersion, stemOverrideDict)); } else { this.origStemdict = null; // we don't need to ignore case here since we lowercase in this analyzer anyway StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false); CharArrayMap<string>.EntryIterator iter = (CharArrayMap<string>.EntryIterator)stemOverrideDict.EntrySet().GetEnumerator(); CharsRef spare = new CharsRef(); while (iter.HasNext) { char[] nextKey = iter.NextKey(); spare.CopyChars(nextKey, 0, nextKey.Length); builder.Add(new string(spare.Chars), iter.CurrentValue); } try { this.stemdict = builder.Build(); } catch (IOException ex) { throw new Exception("can not build stem dict", ex); } } }