A read-only 4-byte FST backed map that allows fast case-insensitive key value lookups for StemmerOverrideFilter
コード例 #1
0
 public virtual void Inform(IResourceLoader loader)
 {
     if (dictionaryFiles != null)
     {
         AssureMatchVersion();
         IList <string> files = SplitFileNames(dictionaryFiles);
         if (files.Count() > 0)
         {
             StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(ignoreCase);
             foreach (string file in files)
             {
                 IList <string> list = GetLines(loader, file.Trim());
                 foreach (string line in list)
                 {
                     string[] mapping = new Regex("\t").Split(line, 2);
                     builder.Add(mapping[0], mapping[1]);
                 }
             }
             dictionary = builder.Build();
         }
     }
 }
コード例 #2
0
 public virtual void Inform(IResourceLoader loader)
 {
     if (dictionaryFiles != null)
     {
         AssureMatchVersion();
         IEnumerable<string> files = SplitFileNames(dictionaryFiles);
         if (files.Count() > 0)
         {
             StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(ignoreCase);
             foreach (string file in files)
             {
                 IEnumerable<string> list = GetLines(loader, file.Trim());
                 foreach (string line in list)
                 {
                     string[] mapping = new Regex("\t").Split(line, 2);
                     builder.Add(mapping[0], mapping[1]);
                 }
             }
             dictionary = builder.Build();
         }
     }
 }
コード例 #3
0
        public virtual void TestRandomRealisticKeyword()
        {
            IDictionary <string, string> map = new Dictionary <string, string>();
            int numTerms = AtLeast(50);

            for (int i = 0; i < numTerms; i++)
            {
                string randomRealisticUnicodeString = TestUtil.RandomRealisticUnicodeString(Random);
                if (randomRealisticUnicodeString.Length > 0)
                {
                    string value = TestUtil.RandomSimpleString(Random);
                    map[randomRealisticUnicodeString] = value.Length == 0 ? "a" : value;
                }
            }
            if (map.Count == 0)
            {
                map["booked"] = "books";
            }
            StemmerOverrideFilter.Builder builder  = new StemmerOverrideFilter.Builder(Random.nextBoolean());
            IDictionary <string, string>  entrySet = map;

            foreach (KeyValuePair <string, string> entry in entrySet)
            {
                builder.Add(entry.Key, entry.Value);
            }
            StemmerOverrideFilter.StemmerOverrideMap build = builder.Build();
            foreach (KeyValuePair <string, string> entry in entrySet)
            {
                if (Random.nextBoolean())
                {
                    Tokenizer   tokenizer = new KeywordTokenizer(new StringReader(entry.Key));
                    TokenStream stream    = new PorterStemFilter(new StemmerOverrideFilter(tokenizer, build));
                    AssertTokenStreamContents(stream, new string[] { entry.Value });
                }
            }
        }
コード例 #4
0
 public DutchAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<string> stemOverrideDict)
 {
     this.matchVersion = matchVersion;
     this.stoptable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stopwords));
     this.excltable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stemExclusionTable));
     #pragma warning disable 612, 618
     if (stemOverrideDict.Count == 0 || !matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
     #pragma warning restore 612, 618
     {
         this.stemdict = null;
         this.origStemdict = CharArrayMap.UnmodifiableMap(CharArrayMap.Copy(matchVersion, stemOverrideDict));
     }
     else
     {
         this.origStemdict = null;
         // we don't need to ignore case here since we lowercase in this analyzer anyway
         StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false);
         CharArrayMap<string>.EntryIterator iter = (CharArrayMap<string>.EntryIterator)stemOverrideDict.EntrySet().GetEnumerator();
         CharsRef spare = new CharsRef();
         while (iter.HasNext)
         {
             char[] nextKey = iter.NextKey();
             spare.CopyChars(nextKey, 0, nextKey.Length);
             builder.Add(new string(spare.Chars), iter.CurrentValue);
         }
         try
         {
             this.stemdict = builder.Build();
         }
         catch (IOException ex)
         {
             throw new Exception("can not build stem dict", ex);
         }
     }
 }