/** * Create a CommonGramsFilter and wrap it with a CommonGramsQueryFilter */ public override TokenStream create(TokenStream input) { CommonGramsFilter commonGrams = new CommonGramsFilter(input, commonWords, ignoreCase); CommonGramsQueryFilter commonGramsQuery = new CommonGramsQueryFilter( commonGrams); return(commonGramsQuery); }
/** * Construct a token stream filtering the given input using a Set of common * words to create bigrams, case-sensitive if ignoreCase is false (unless Set * is CharArraySet). If <code>commonWords</code> is an instance of * {@link CharArraySet} (true if <code>makeCommonSet()</code> was used to * construct the set) it will be directly used and <code>ignoreCase</code> * will be ignored since <code>CharArraySet</code> directly controls case * sensitivity. * <p/> * If <code>commonWords</code> is not an instance of {@link CharArraySet}, a * new CharArraySet will be constructed and <code>ignoreCase</code> will be * used to specify the case sensitivity of that set. * * @param input TokenStream input in filter chain. * @param commonWords The set of common words. * @param ignoreCase -Ignore case when constructing bigrams for common words. */ public CommonGramsFilter(TokenStream input, Set commonWords, bool ignoreCase) : base(input) { if (commonWords is CharArraySet) { this.commonWords = (CharArraySet)commonWords; } else { this.commonWords = new CharArraySet(commonWords.size(), ignoreCase); this.commonWords.addAll(commonWords); } init(); }
/** * Construct a token stream filtering the given input using a Set of common * words to create bigrams. Outputs both unigrams with position increment and * bigrams with position increment 0 type=gram where one or both of the words * in a potential bigram are in the set of common words . * * @param input TokenStream input in filter chain * @param commonWords The set of common words. * */ public CommonGramsFilter(TokenStream input, Set commonWords) : this(input, commonWords, false) { }
/** * Construct a token stream filtering the given input using an Array of common * words to create bigrams and is case-sensitive if ignoreCase is false. * * @param input Tokenstream in filter chain * @param commonWords words to be used in constructing bigrams * @param ignoreCase -Ignore case when constructing bigrams for common words. */ public CommonGramsFilter(TokenStream input, string[] commonWords, bool ignoreCase) : base(input) { this.commonWords = makeCommonSet(commonWords, ignoreCase); init(); }
/** * Construct a token stream filtering the given input using an Array of common * words to create bigrams. * * @param input Tokenstream in filter chain * @param commonWords words to be used in constructing bigrams */ public CommonGramsFilter(TokenStream input, string[] commonWords) : this(input, commonWords, false) { init(); }