public override bool incrementToken() { if (save != null) { // clearAttributes(); // not currently necessary restoreState(save); save = null; return(true); } if (!input.incrementToken()) { return(false); } // pass through zero-length terms if (termAtt.termLength() == 0) { return(true); } string value = termAtt.term(); string phonetic = null; try { string v = encoder.encode(value).ToString(); if (v.Length > 0 && !value.Equals(v)) { phonetic = v; } } #pragma warning disable 168 catch (java.lang.Exception ignored) { } // just use the direct text #pragma warning restore 168 if (phonetic == null) { return(true); } if (!inject) { // just modify this token termAtt.setTermBuffer(phonetic); return(true); } // We need to return both the original and the phonetic tokens. // to avoid a orig=captureState() change_to_phonetic() saved=captureState() restoreState(orig) // we return the phonetic alternative first int origOffset = posAtt.getPositionIncrement(); posAtt.setPositionIncrement(0); save = captureState(); posAtt.setPositionIncrement(origOffset); termAtt.setTermBuffer(phonetic); return(true); }
/** * Converts the original query string to a collection of Lucene Tokens. * @param original the original query string * @return a Collection of Lucene Tokens */ public override Collection /*<Token>*/ convert(string original) { if (original == null) // this can happen with q.alt = and no query { return(Collections.emptyList()); } Collection /*<Token>*/ result = new ArrayList/*<Token>*/ (); //TODO: Extract the words using a simple regex, but not query stuff, and then analyze them to produce the token stream Matcher matcher = QUERY_REGEX.matcher(original); TokenStream stream; while (matcher.find()) { string word = matcher.group(0); if (word.Equals("AND") == false && word.Equals("OR") == false) { try { stream = analyzer.reusableTokenStream("", new StringReader(word)); // TODO: support custom attributes TermAttribute termAtt = (TermAttribute)stream.addAttribute(typeof(TermAttribute)); FlagsAttribute flagsAtt = (FlagsAttribute)stream.addAttribute(typeof(FlagsAttribute)); TypeAttribute typeAtt = (TypeAttribute)stream.addAttribute(typeof(TypeAttribute)); PayloadAttribute payloadAtt = (PayloadAttribute)stream.addAttribute(typeof(PayloadAttribute)); PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute)stream.addAttribute(typeof(PositionIncrementAttribute)); stream.reset(); while (stream.incrementToken()) { Token token = new Token(); token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength()); token.setStartOffset(matcher.start()); token.setEndOffset(matcher.end()); token.setFlags(flagsAtt.getFlags()); token.setType(typeAtt.type()); token.setPayload(payloadAtt.getPayload()); token.setPositionIncrement(posIncAtt.getPositionIncrement()); result.add(token); } } #pragma warning disable 168 catch (IOException e) { } #pragma warning restore 168 } } return(result); }