public PhoneticFilter(TokenStream input, Encoder encoder, string name, bool inject) : base(input) { this.encoder = encoder; this.name = name; this.inject = inject; this.termAtt = (TermAttribute)addAttribute(typeof(TermAttribute)); this.posAtt = (PositionIncrementAttribute)addAttribute(typeof(PositionIncrementAttribute)); }
/** * Converts the original query string to a collection of Lucene Tokens. * @param original the original query string * @return a Collection of Lucene Tokens */ public override Collection /*<Token>*/ convert(string original) { if (original == null) // this can happen with q.alt = and no query { return(Collections.emptyList()); } Collection /*<Token>*/ result = new ArrayList/*<Token>*/ (); //TODO: Extract the words using a simple regex, but not query stuff, and then analyze them to produce the token stream Matcher matcher = QUERY_REGEX.matcher(original); TokenStream stream; while (matcher.find()) { string word = matcher.group(0); if (word.Equals("AND") == false && word.Equals("OR") == false) { try { stream = analyzer.reusableTokenStream("", new StringReader(word)); // TODO: support custom attributes TermAttribute termAtt = (TermAttribute)stream.addAttribute(typeof(TermAttribute)); FlagsAttribute flagsAtt = (FlagsAttribute)stream.addAttribute(typeof(FlagsAttribute)); TypeAttribute typeAtt = (TypeAttribute)stream.addAttribute(typeof(TypeAttribute)); PayloadAttribute payloadAtt = (PayloadAttribute)stream.addAttribute(typeof(PayloadAttribute)); PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute)stream.addAttribute(typeof(PositionIncrementAttribute)); stream.reset(); while (stream.incrementToken()) { Token token = new Token(); token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength()); token.setStartOffset(matcher.start()); token.setEndOffset(matcher.end()); token.setFlags(flagsAtt.getFlags()); token.setType(typeAtt.type()); token.setPayload(payloadAtt.getPayload()); token.setPositionIncrement(posIncAtt.getPositionIncrement()); result.add(token); } } #pragma warning disable 168 catch (IOException e) { } #pragma warning restore 168 } } return(result); }
public SnowballPorterFilter(TokenStream source, SnowballProgram stemmer, CharArraySet protWords) : base(source) { this.protWords = protWords; this.stemmer = stemmer; this.termAtt = (TermAttribute)addAttribute(typeof(TermAttribute)); }