コード例 #1
0
        /**
         * Create a CommonGramsFilter and wrap it with a CommonGramsQueryFilter
         */
        public override TokenStream create(TokenStream input)
        {
            CommonGramsFilter commonGrams = new CommonGramsFilter(input, commonWords,
                                                                  ignoreCase);
            CommonGramsQueryFilter commonGramsQuery = new CommonGramsQueryFilter(
                commonGrams);

            return(commonGramsQuery);
        }
コード例 #2
0
 /**
  * Construct a token stream filtering the given input using a Set of common
  * words to create bigrams, case-sensitive if ignoreCase is false (unless Set
  * is CharArraySet). If <code>commonWords</code> is an instance of
  * {@link CharArraySet} (true if <code>makeCommonSet()</code> was used to
  * construct the set) it will be directly used and <code>ignoreCase</code>
  * will be ignored since <code>CharArraySet</code> directly controls case
  * sensitivity.
  * <p/>
  * If <code>commonWords</code> is not an instance of {@link CharArraySet}, a
  * new CharArraySet will be constructed and <code>ignoreCase</code> will be
  * used to specify the case sensitivity of that set.
  *
  * @param input TokenStream input in filter chain.
  * @param commonWords The set of common words.
  * @param ignoreCase -Ignore case when constructing bigrams for common words.
  */
 public CommonGramsFilter(TokenStream input, Set commonWords, bool ignoreCase) : base(input)
 {
     if (commonWords is CharArraySet)
     {
         this.commonWords = (CharArraySet)commonWords;
     }
     else
     {
         this.commonWords = new CharArraySet(commonWords.size(), ignoreCase);
         this.commonWords.addAll(commonWords);
     }
     init();
 }
コード例 #3
0
 /**
  * Construct a token stream filtering the given input using a Set of common
  * words to create bigrams. Outputs both unigrams with position increment and
  * bigrams with position increment 0 type=gram where one or both of the words
  * in a potential bigram are in the set of common words .
  *
  * @param input TokenStream input in filter chain
  * @param commonWords The set of common words.
  *
  */
 public CommonGramsFilter(TokenStream input, Set commonWords) : this(input, commonWords, false)
 {
 }
コード例 #4
0
 /**
  * Construct a token stream filtering the given input using an Array of common
  * words to create bigrams and is case-sensitive if ignoreCase is false.
  *
  * @param input Tokenstream in filter chain
  * @param commonWords words to be used in constructing bigrams
  * @param ignoreCase -Ignore case when constructing bigrams for common words.
  */
 public CommonGramsFilter(TokenStream input, string[] commonWords, bool ignoreCase) : base(input)
 {
     this.commonWords = makeCommonSet(commonWords, ignoreCase);
     init();
 }
コード例 #5
0
 /**
  * Construct a token stream filtering the given input using an Array of common
  * words to create bigrams.
  *
  * @param input Tokenstream in filter chain
  * @param commonWords words to be used in constructing bigrams
  */
 public CommonGramsFilter(TokenStream input, string[] commonWords) : this(input, commonWords, false)
 {
     init();
 }