コード例 #1
0
ファイル: DutchAnalyzer.cs プロジェクト: zfxsss/lucenenet
 public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap <string> stemOverrideDict)
 {
     this.matchVersion = matchVersion;
     this.stoptable    = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
     this.excltable    = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
     if (stemOverrideDict.Empty || !matchVersion.onOrAfter(Version.LUCENE_31))
     {
         this.stemdict     = null;
         this.origStemdict = CharArrayMap.unmodifiableMap(CharArrayMap.copy(matchVersion, stemOverrideDict));
     }
     else
     {
         this.origStemdict = null;
         // we don't need to ignore case here since we lowercase in this analyzer anyway
         StemmerOverrideFilter.Builder        builder = new StemmerOverrideFilter.Builder(false);
         CharArrayMap <string> .EntryIterator iter    = stemOverrideDict.entrySet().GetEnumerator();
         CharsRef spare = new CharsRef();
         while (iter.hasNext())
         {
             char[] nextKey = iter.nextKey();
             spare.copyChars(nextKey, 0, nextKey.Length);
             builder.add(spare, iter.currentValue());
         }
         try
         {
             this.stemdict = builder.build();
         }
         catch (IOException ex)
         {
             throw new Exception("can not build stem dict", ex);
         }
     }
 }
コード例 #2
0
 /// <summary>
 /// Create a new <see cref="StemmerOverrideFilter"/>, performing dictionary-based stemming
 /// with the provided dictionary (<paramref name="stemmerOverrideMap"/>).
 /// <para>
 /// Any dictionary-stemmed terms will be marked with <see cref="KeywordAttribute"/>
 /// so that they will not be stemmed with stemmers down the chain.
 /// </para>
 /// </summary>
 public StemmerOverrideFilter(TokenStream input, StemmerOverrideMap stemmerOverrideMap)
     : base(input)
 {
     this.stemmerOverrideMap = stemmerOverrideMap;
     fstReader  = stemmerOverrideMap.GetBytesReader();
     termAtt    = AddAttribute <ICharTermAttribute>();
     keywordAtt = AddAttribute <IKeywordAttribute>();
 }
コード例 #3
0
 /// <summary>
 /// Create a new StemmerOverrideFilter, performing dictionary-based stemming
 /// with the provided <code>dictionary</code>.
 /// <para>
 /// Any dictionary-stemmed terms will be marked with <seealso cref="KeywordAttribute"/>
 /// so that they will not be stemmed with stemmers down the chain.
 /// </para>
 /// </summary>
 public StemmerOverrideFilter(TokenStream input, StemmerOverrideMap stemmerOverrideMap)
       : base(input)
 {
     this.stemmerOverrideMap = stemmerOverrideMap;
     fstReader = stemmerOverrideMap.BytesReader;
     termAtt = AddAttribute<ICharTermAttribute>();
     keywordAtt = AddAttribute<IKeywordAttribute>();
 }
コード例 #4
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testRandomRealisticKeyword() throws java.io.IOException
        public virtual void testRandomRealisticKeyword()
        {
            IDictionary <string, string> map = new Dictionary <string, string>();
            int numTerms = atLeast(50);

            for (int i = 0; i < numTerms; i++)
            {
                string randomRealisticUnicodeString = TestUtil.randomRealisticUnicodeString(random());
                if (randomRealisticUnicodeString.Length > 0)
                {
                    string value = TestUtil.randomSimpleString(random());
                    map[randomRealisticUnicodeString] = value.Length == 0 ? "a" : value;
                }
            }
            if (map.Count == 0)
            {
                map["booked"] = "books";
            }
            StemmerOverrideFilter.Builder         builder  = new StemmerOverrideFilter.Builder(random().nextBoolean());
            ISet <KeyValuePair <string, string> > entrySet = map.SetOfKeyValuePairs();

            foreach (KeyValuePair <string, string> entry in entrySet)
            {
                builder.add(entry.Key, entry.Value);
            }
            StemmerOverrideMap build = builder.build();

            foreach (KeyValuePair <string, string> entry in entrySet)
            {
                if (random().nextBoolean())
                {
                    Tokenizer   tokenizer = new KeywordTokenizer(new StringReader(entry.Key));
                    TokenStream stream    = new PorterStemFilter(new StemmerOverrideFilter(tokenizer, build));
                    assertTokenStreamContents(stream, new string[] { entry.Value });
                }
            }
        }
コード例 #5
0
 /// <summary>
 /// Create a new StemmerOverrideFilter, performing dictionary-based stemming
 /// with the provided <code>dictionary</code>.
 /// <para>
 /// Any dictionary-stemmed terms will be marked with <seealso cref="KeywordAttribute"/>
 /// so that they will not be stemmed with stemmers down the chain.
 /// </para>
 /// </summary>
 public StemmerOverrideFilter(TokenStream input, StemmerOverrideMap stemmerOverrideMap) : base(input)
 {
     this.stemmerOverrideMap = stemmerOverrideMap;
     fstReader = stemmerOverrideMap.BytesReader;
 }
コード例 #6
0
	  /// <summary>
	  /// Create a new StemmerOverrideFilter, performing dictionary-based stemming
	  /// with the provided <code>dictionary</code>.
	  /// <para>
	  /// Any dictionary-stemmed terms will be marked with <seealso cref="KeywordAttribute"/>
	  /// so that they will not be stemmed with stemmers down the chain.
	  /// </para>
	  /// </summary>
//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
//ORIGINAL LINE: public StemmerOverrideFilter(final org.apache.lucene.analysis.TokenStream input, final StemmerOverrideMap stemmerOverrideMap)
	  public StemmerOverrideFilter(TokenStream input, StemmerOverrideMap stemmerOverrideMap) : base(input)
	  {
		this.stemmerOverrideMap = stemmerOverrideMap;
		fstReader = stemmerOverrideMap.BytesReader;
	  }