/* * This shows how to construct a phrase query containing shingles. */ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testShingleAnalyzerWrapperPhraseQuery() throws Exception public virtual void testShingleAnalyzerWrapperPhraseQuery() { PhraseQuery q = new PhraseQuery(); TokenStream ts = analyzer.tokenStream("content", "this sentence"); try { int j = -1; PositionIncrementAttribute posIncrAtt = ts.addAttribute(typeof(PositionIncrementAttribute)); CharTermAttribute termAtt = ts.addAttribute(typeof(CharTermAttribute)); ts.reset(); while (ts.incrementToken()) { j += posIncrAtt.PositionIncrement; string termText = termAtt.ToString(); q.add(new Term("content", termText), j); } ts.end(); } finally { IOUtils.closeWhileHandlingException(ts); } ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs; int[] ranks = new int[] { 0 }; compareRanks(hits, ranks); }
public PhoneticFilter(TokenStream input, Encoder encoder, string name, bool inject) : base(input) { this.encoder = encoder; this.name = name; this.inject = inject; this.termAtt = (TermAttribute)addAttribute(typeof(TermAttribute)); this.posAtt = (PositionIncrementAttribute)addAttribute(typeof(PositionIncrementAttribute)); }
/** * Converts the original query string to a collection of Lucene Tokens. * @param original the original query string * @return a Collection of Lucene Tokens */ public override Collection /*<Token>*/ convert(string original) { if (original == null) // this can happen with q.alt = and no query { return(Collections.emptyList()); } Collection /*<Token>*/ result = new ArrayList/*<Token>*/ (); //TODO: Extract the words using a simple regex, but not query stuff, and then analyze them to produce the token stream Matcher matcher = QUERY_REGEX.matcher(original); TokenStream stream; while (matcher.find()) { string word = matcher.group(0); if (word.Equals("AND") == false && word.Equals("OR") == false) { try { stream = analyzer.reusableTokenStream("", new StringReader(word)); // TODO: support custom attributes TermAttribute termAtt = (TermAttribute)stream.addAttribute(typeof(TermAttribute)); FlagsAttribute flagsAtt = (FlagsAttribute)stream.addAttribute(typeof(FlagsAttribute)); TypeAttribute typeAtt = (TypeAttribute)stream.addAttribute(typeof(TypeAttribute)); PayloadAttribute payloadAtt = (PayloadAttribute)stream.addAttribute(typeof(PayloadAttribute)); PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute)stream.addAttribute(typeof(PositionIncrementAttribute)); stream.reset(); while (stream.incrementToken()) { Token token = new Token(); token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength()); token.setStartOffset(matcher.start()); token.setEndOffset(matcher.end()); token.setFlags(flagsAtt.getFlags()); token.setType(typeAtt.type()); token.setPayload(payloadAtt.getPayload()); token.setPositionIncrement(posIncAtt.getPositionIncrement()); result.add(token); } } #pragma warning disable 168 catch (IOException e) { } #pragma warning restore 168 } } return(result); }
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix) { this.suffix = suffix; this.prefix = prefix; prefixExhausted = false; termAtt = addAttribute(typeof(CharTermAttribute)); posIncrAtt = addAttribute(typeof(PositionIncrementAttribute)); payloadAtt = addAttribute(typeof(PayloadAttribute)); offsetAtt = addAttribute(typeof(OffsetAttribute)); typeAtt = addAttribute(typeof(TypeAttribute)); flagsAtt = addAttribute(typeof(FlagsAttribute)); p_termAtt = prefix.addAttribute(typeof(CharTermAttribute)); p_posIncrAtt = prefix.addAttribute(typeof(PositionIncrementAttribute)); p_payloadAtt = prefix.addAttribute(typeof(PayloadAttribute)); p_offsetAtt = prefix.addAttribute(typeof(OffsetAttribute)); p_typeAtt = prefix.addAttribute(typeof(TypeAttribute)); p_flagsAtt = prefix.addAttribute(typeof(FlagsAttribute)); }
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix) { this.suffix = suffix; this.prefix = prefix; prefixExhausted = false; termAtt = addAttribute(typeof(CharTermAttribute)); posIncrAtt = addAttribute(typeof(PositionIncrementAttribute)); payloadAtt = addAttribute(typeof(PayloadAttribute)); offsetAtt = addAttribute(typeof(OffsetAttribute)); typeAtt = addAttribute(typeof(TypeAttribute)); flagsAtt = addAttribute(typeof(FlagsAttribute)); p_termAtt = prefix.addAttribute(typeof(CharTermAttribute)); p_posIncrAtt = prefix.addAttribute(typeof(PositionIncrementAttribute)); p_payloadAtt = prefix.addAttribute(typeof(PayloadAttribute)); p_offsetAtt = prefix.addAttribute(typeof(OffsetAttribute)); p_typeAtt = prefix.addAttribute(typeof(TypeAttribute)); p_flagsAtt = prefix.addAttribute(typeof(FlagsAttribute)); }
public TokenStreamAnonymousInnerClassHelper(TestRemoveDuplicatesTokenFilter outerInstance, IEnumerator<Token> toks) { this.outerInstance = outerInstance; this.toks = toks; termAtt = addAttribute(typeof(CharTermAttribute)); offsetAtt = addAttribute(typeof(OffsetAttribute)); posIncAtt = addAttribute(typeof(PositionIncrementAttribute)); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: static void testNGrams(int minGram, int maxGram, String s, final String nonTokenChars, boolean edgesOnly) throws java.io.IOException //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET: internal static void testNGrams(int minGram, int maxGram, string s, string nonTokenChars, bool edgesOnly) { // convert the string to code points //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int[] codePoints = toCodePoints(s); int[] codePoints = toCodePoints(s); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int[] offsets = new int[codePoints.length + 1]; int[] offsets = new int[codePoints.Length + 1]; for (int i = 0; i < codePoints.Length; ++i) { offsets[i + 1] = offsets[i] + char.charCount(codePoints[i]); } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.TokenStream grams = new NGramTokenizer(TEST_VERSION_CURRENT, new java.io.StringReader(s), minGram, maxGram, edgesOnly) TokenStream grams = new NGramTokenizerAnonymousInnerClassHelper(TEST_VERSION_CURRENT, new StringReader(s), minGram, maxGram, edgesOnly, nonTokenChars); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.CharTermAttribute termAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class); CharTermAttribute termAtt = grams.addAttribute(typeof(CharTermAttribute)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute posIncAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute.class); PositionIncrementAttribute posIncAtt = grams.addAttribute(typeof(PositionIncrementAttribute)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute posLenAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute.class); PositionLengthAttribute posLenAtt = grams.addAttribute(typeof(PositionLengthAttribute)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.OffsetAttribute offsetAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute.class); OffsetAttribute offsetAtt = grams.addAttribute(typeof(OffsetAttribute)); grams.reset(); for (int start = 0; start < codePoints.Length; ++start) { for (int end = start + minGram; end <= start + maxGram && end <= codePoints.Length; ++end) { if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1])) { // not on an edge goto nextGramContinue; } for (int j = start; j < end; ++j) { if (!isTokenChar(nonTokenChars, codePoints[j])) { goto nextGramContinue; } } assertTrue(grams.incrementToken()); assertArrayEquals(Arrays.copyOfRange(codePoints, start, end), toCodePoints(termAtt)); assertEquals(1, posIncAtt.PositionIncrement); assertEquals(1, posLenAtt.PositionLength); assertEquals(offsets[start], offsetAtt.startOffset()); assertEquals(offsets[end], offsetAtt.endOffset()); nextGramContinue :; } nextGramBreak :; } assertFalse(grams.incrementToken()); grams.end(); assertEquals(s.Length, offsetAtt.startOffset()); assertEquals(s.Length, offsetAtt.endOffset()); }