/* * This shows how to construct a phrase query containing shingles. */ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testShingleAnalyzerWrapperPhraseQuery() throws Exception public virtual void testShingleAnalyzerWrapperPhraseQuery() { PhraseQuery q = new PhraseQuery(); TokenStream ts = analyzer.tokenStream("content", "this sentence"); try { int j = -1; PositionIncrementAttribute posIncrAtt = ts.addAttribute(typeof(PositionIncrementAttribute)); CharTermAttribute termAtt = ts.addAttribute(typeof(CharTermAttribute)); ts.reset(); while (ts.incrementToken()) { j += posIncrAtt.PositionIncrement; string termText = termAtt.ToString(); q.add(new Term("content", termText), j); } ts.end(); } finally { IOUtils.closeWhileHandlingException(ts); } ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs; int[] ranks = new int[] { 0 }; compareRanks(hits, ranks); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void test() throws java.io.IOException public virtual void test() { string test = "The quick red fox jumped over the lazy brown dogs"; NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(this, new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)), 3, "D"); bool seenDogs = false; CharTermAttribute termAtt = nptf.getAttribute(typeof(CharTermAttribute)); TypeAttribute typeAtt = nptf.getAttribute(typeof(TypeAttribute)); PayloadAttribute payloadAtt = nptf.getAttribute(typeof(PayloadAttribute)); nptf.reset(); while (nptf.incrementToken()) { if (termAtt.ToString().Equals("dogs")) { seenDogs = true; assertTrue(typeAtt.type() + " is not equal to " + "D", typeAtt.type().Equals("D") == true); assertTrue("payloadAtt.getPayload() is null and it shouldn't be", payloadAtt.Payload != null); sbyte[] bytes = payloadAtt.Payload.bytes; //safe here to just use the bytes, otherwise we should use offset, length assertTrue(bytes.Length + " does not equal: " + payloadAtt.Payload.length, bytes.Length == payloadAtt.Payload.length); assertTrue(payloadAtt.Payload.offset + " does not equal: " + 0, payloadAtt.Payload.offset == 0); float pay = PayloadHelper.decodeFloat(bytes); assertTrue(pay + " does not equal: " + 3, pay == 3); } else { assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().Equals("word")); } } assertTrue(seenDogs + " does not equal: " + true, seenDogs == true); }
/* * How to construct a boolean query with shingles. A query like this will * implicitly score those documents higher that contain the words in the query * in the right order and adjacent to each other. */ //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testShingleAnalyzerWrapperBooleanQuery() throws Exception public virtual void testShingleAnalyzerWrapperBooleanQuery() { BooleanQuery q = new BooleanQuery(); TokenStream ts = analyzer.tokenStream("content", "test sentence"); try { CharTermAttribute termAtt = ts.addAttribute(typeof(CharTermAttribute)); ts.reset(); while (ts.incrementToken()) { string termText = termAtt.ToString(); q.add(new TermQuery(new Term("content", termText)), BooleanClause.Occur.SHOULD); } ts.end(); } finally { IOUtils.closeWhileHandlingException(ts); } ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs; int[] ranks = new int[] { 1, 2, 0 }; compareRanks(hits, ranks); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: void assertTermEquals(String expected, org.apache.lucene.analysis.TokenStream stream, org.apache.lucene.analysis.tokenattributes.CharTermAttribute termAtt, org.apache.lucene.analysis.tokenattributes.PayloadAttribute payAtt, byte[] expectPay) throws Exception internal virtual void assertTermEquals(string expected, TokenStream stream, CharTermAttribute termAtt, PayloadAttribute payAtt, sbyte[] expectPay) { assertTrue(stream.incrementToken()); assertEquals(expected, termAtt.ToString()); BytesRef payload = payAtt.Payload; if (payload != null) { assertTrue(payload.length + " does not equal: " + expectPay.Length, payload.length == expectPay.Length); for (int i = 0; i < expectPay.Length; i++) { assertTrue(expectPay[i] + " does not equal: " + payload.bytes[i + payload.offset], expectPay[i] == payload.bytes[i + payload.offset]); } } else { assertTrue("expectPay is not null and it should be", expectPay == null); } }
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix) { this.suffix = suffix; this.prefix = prefix; prefixExhausted = false; termAtt = addAttribute(typeof(CharTermAttribute)); posIncrAtt = addAttribute(typeof(PositionIncrementAttribute)); payloadAtt = addAttribute(typeof(PayloadAttribute)); offsetAtt = addAttribute(typeof(OffsetAttribute)); typeAtt = addAttribute(typeof(TypeAttribute)); flagsAtt = addAttribute(typeof(FlagsAttribute)); p_termAtt = prefix.addAttribute(typeof(CharTermAttribute)); p_posIncrAtt = prefix.addAttribute(typeof(PositionIncrementAttribute)); p_payloadAtt = prefix.addAttribute(typeof(PayloadAttribute)); p_offsetAtt = prefix.addAttribute(typeof(OffsetAttribute)); p_typeAtt = prefix.addAttribute(typeof(TypeAttribute)); p_flagsAtt = prefix.addAttribute(typeof(FlagsAttribute)); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testQueryReset() throws Exception public virtual void testQueryReset() { const string input = "How the s a brown s cow d like A B thing?"; WhitespaceTokenizer wt = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input)); CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords); CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf); CharTermAttribute term = wt.addAttribute(typeof(CharTermAttribute)); nsf.reset(); assertTrue(nsf.incrementToken()); assertEquals("How_the", term.ToString()); assertTrue(nsf.incrementToken()); assertEquals("the_s", term.ToString()); nsf.close(); wt.Reader = new StringReader(input); nsf.reset(); assertTrue(nsf.incrementToken()); assertEquals("How_the", term.ToString()); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testReset() throws Exception public virtual void testReset() { CharArraySet dict = makeDictionary("Rind", "Fleisch", "Draht", "Schere", "Gesetz", "Aufgabe", "Überwachung"); Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Rindfleischüberwachungsgesetz")); DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, wsTokenizer, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false); CharTermAttribute termAtt = tf.getAttribute(typeof(CharTermAttribute)); tf.reset(); assertTrue(tf.incrementToken()); assertEquals("Rindfleischüberwachungsgesetz", termAtt.ToString()); assertTrue(tf.incrementToken()); assertEquals("Rind", termAtt.ToString()); tf.end(); tf.close(); wsTokenizer.Reader = new StringReader("Rindfleischüberwachungsgesetz"); tf.reset(); assertTrue(tf.incrementToken()); assertEquals("Rindfleischüberwachungsgesetz", termAtt.ToString()); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void test() throws java.io.IOException public virtual void test() { string test = "The quick red fox jumped over the lazy brown dogs"; TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(this, new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false))); int count = 0; CharTermAttribute termAtt = nptf.getAttribute(typeof(CharTermAttribute)); TypeAttribute typeAtt = nptf.getAttribute(typeof(TypeAttribute)); PayloadAttribute payloadAtt = nptf.getAttribute(typeof(PayloadAttribute)); nptf.reset(); while (nptf.incrementToken()) { assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().Equals(char.ToUpper(termAtt.buffer()[0]).ToString())); assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.Payload != null); string type = payloadAtt.Payload.utf8ToString(); assertTrue(type + " is not equal to " + typeAtt.type(), type.Equals(typeAtt.type())); count++; } assertTrue(count + " does not equal: " + 10, count == 10); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testFloatEncoding() throws Exception public virtual void testFloatEncoding() { string test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7"; DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new FloatEncoder()); CharTermAttribute termAtt = filter.getAttribute(typeof(CharTermAttribute)); PayloadAttribute payAtt = filter.getAttribute(typeof(PayloadAttribute)); filter.reset(); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeFloat(1.0f)); assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeFloat(2.0f)); assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeFloat(3.5f)); assertTermEquals("jumped", filter, termAtt, payAtt, PayloadHelper.encodeFloat(0.5f)); assertTermEquals("over", filter, termAtt, payAtt, null); assertTermEquals("the", filter, termAtt, payAtt, null); assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeFloat(5.0f)); assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeFloat(99.3f)); assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeFloat(83.7f)); assertFalse(filter.incrementToken()); filter.end(); filter.close(); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testPayloads() throws Exception public virtual void testPayloads() { string test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN"; DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); CharTermAttribute termAtt = filter.getAttribute(typeof(CharTermAttribute)); PayloadAttribute payAtt = filter.getAttribute(typeof(PayloadAttribute)); filter.reset(); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, "JJ".GetBytes(StandardCharsets.UTF_8)); assertTermEquals("red", filter, termAtt, payAtt, "JJ".GetBytes(StandardCharsets.UTF_8)); assertTermEquals("fox", filter, termAtt, payAtt, "NN".GetBytes(StandardCharsets.UTF_8)); assertTermEquals("jumped", filter, termAtt, payAtt, "VB".GetBytes(StandardCharsets.UTF_8)); assertTermEquals("over", filter, termAtt, payAtt, null); assertTermEquals("the", filter, termAtt, payAtt, null); assertTermEquals("lazy", filter, termAtt, payAtt, "JJ".GetBytes(StandardCharsets.UTF_8)); assertTermEquals("brown", filter, termAtt, payAtt, "JJ".GetBytes(StandardCharsets.UTF_8)); assertTermEquals("dogs", filter, termAtt, payAtt, "NN".GetBytes(StandardCharsets.UTF_8)); assertFalse(filter.incrementToken()); filter.end(); filter.close(); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testIntEncoding() throws Exception public virtual void testIntEncoding() { string test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83"; DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new IntegerEncoder()); CharTermAttribute termAtt = filter.getAttribute(typeof(CharTermAttribute)); PayloadAttribute payAtt = filter.getAttribute(typeof(PayloadAttribute)); filter.reset(); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeInt(1)); assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeInt(2)); assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeInt(3)); assertTermEquals("jumped", filter, termAtt, payAtt, null); assertTermEquals("over", filter, termAtt, payAtt, null); assertTermEquals("the", filter, termAtt, payAtt, null); assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeInt(5)); assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeInt(99)); assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeInt(83)); assertFalse(filter.incrementToken()); filter.end(); filter.close(); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void test() throws java.io.IOException public virtual void test() { TokenTypeSinkFilter sinkFilter = new TokenTypeSinkFilter("D"); string test = "The quick red fox jumped over the lazy brown dogs"; TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(this, new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false))); TeeSinkTokenFilter.SinkTokenStream sink = ttf.newSinkTokenStream(sinkFilter); bool seenDogs = false; CharTermAttribute termAtt = ttf.addAttribute(typeof(CharTermAttribute)); TypeAttribute typeAtt = ttf.addAttribute(typeof(TypeAttribute)); ttf.reset(); while (ttf.incrementToken()) { if (termAtt.ToString().Equals("dogs")) { seenDogs = true; assertTrue(typeAtt.type() + " is not equal to " + "D", typeAtt.type().Equals("D") == true); } else { assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().Equals("word")); } } assertTrue(seenDogs + " does not equal: " + true, seenDogs == true); int sinkCount = 0; sink.reset(); while (sink.incrementToken()) { sinkCount++; } assertTrue("sink Size: " + sinkCount + " is not: " + 1, sinkCount == 1); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: static void testNGrams(int minGram, int maxGram, String s, final String nonTokenChars, boolean edgesOnly) throws java.io.IOException //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET: internal static void testNGrams(int minGram, int maxGram, string s, string nonTokenChars, bool edgesOnly) { // convert the string to code points //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int[] codePoints = toCodePoints(s); int[] codePoints = toCodePoints(s); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int[] offsets = new int[codePoints.length + 1]; int[] offsets = new int[codePoints.Length + 1]; for (int i = 0; i < codePoints.Length; ++i) { offsets[i + 1] = offsets[i] + char.charCount(codePoints[i]); } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.TokenStream grams = new NGramTokenizer(TEST_VERSION_CURRENT, new java.io.StringReader(s), minGram, maxGram, edgesOnly) TokenStream grams = new NGramTokenizerAnonymousInnerClassHelper(TEST_VERSION_CURRENT, new StringReader(s), minGram, maxGram, edgesOnly, nonTokenChars); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.CharTermAttribute termAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class); CharTermAttribute termAtt = grams.addAttribute(typeof(CharTermAttribute)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute posIncAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute.class); PositionIncrementAttribute posIncAtt = grams.addAttribute(typeof(PositionIncrementAttribute)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute posLenAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute.class); PositionLengthAttribute posLenAtt = grams.addAttribute(typeof(PositionLengthAttribute)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.OffsetAttribute offsetAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute.class); OffsetAttribute offsetAtt = grams.addAttribute(typeof(OffsetAttribute)); grams.reset(); for (int start = 0; start < codePoints.Length; ++start) { for (int end = start + minGram; end <= start + maxGram && end <= codePoints.Length; ++end) { if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1])) { // not on an edge goto nextGramContinue; } for (int j = start; j < end; ++j) { if (!isTokenChar(nonTokenChars, codePoints[j])) { goto nextGramContinue; } } assertTrue(grams.incrementToken()); assertArrayEquals(Arrays.copyOfRange(codePoints, start, end), toCodePoints(termAtt)); assertEquals(1, posIncAtt.PositionIncrement); assertEquals(1, posLenAtt.PositionLength); assertEquals(offsets[start], offsetAtt.startOffset()); assertEquals(offsets[end], offsetAtt.endOffset()); nextGramContinue :; } nextGramBreak :; } assertFalse(grams.incrementToken()); grams.end(); assertEquals(s.Length, offsetAtt.startOffset()); assertEquals(s.Length, offsetAtt.endOffset()); }
/// <summary> /// Pop one input token's worth of tokens off the filter and verify that they are as expected. /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: void assertNextTerms(String expectedUnfolded, String expectedFolded, ASCIIFoldingFilter filter, org.apache.lucene.analysis.tokenattributes.CharTermAttribute termAtt) throws Exception internal virtual void assertNextTerms(string expectedUnfolded, string expectedFolded, ASCIIFoldingFilter filter, CharTermAttribute termAtt) { assertTrue(filter.incrementToken()); assertEquals(expectedFolded, termAtt.ToString()); if (filter.PreserveOriginal && !expectedUnfolded.Equals(expectedFolded)) { assertTrue(filter.incrementToken()); assertEquals(expectedUnfolded, termAtt.ToString()); } }
public TokenStreamAnonymousInnerClassHelper(TestRemoveDuplicatesTokenFilter outerInstance, IEnumerator<Token> toks) { this.outerInstance = outerInstance; this.toks = toks; termAtt = addAttribute(typeof(CharTermAttribute)); offsetAtt = addAttribute(typeof(OffsetAttribute)); posIncAtt = addAttribute(typeof(PositionIncrementAttribute)); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException public override bool incrementToken() { if (hasMoreTokensInClone) { int start = breaker.current(); int end = breaker.next(); if (end != BreakIterator.DONE) { clonedToken.copyTo(this); termAtt.copyBuffer(clonedTermAtt.buffer(), start, end - start); if (hasIllegalOffsets) { offsetAtt.setOffset(clonedOffsetAtt.startOffset(), clonedOffsetAtt.endOffset()); } else { offsetAtt.setOffset(clonedOffsetAtt.startOffset() + start, clonedOffsetAtt.startOffset() + end); } if (handlePosIncr) { posAtt.PositionIncrement = 1; } return true; } hasMoreTokensInClone = false; } if (!input.incrementToken()) { return false; } if (termAtt.length() == 0 || char.UnicodeBlock.of(termAtt.charAt(0)) != char.UnicodeBlock.THAI) { return true; } hasMoreTokensInClone = true; // if length by start + end offsets doesn't match the term text then assume // this is a synonym and don't adjust the offsets. hasIllegalOffsets = offsetAtt.endOffset() - offsetAtt.startOffset() != termAtt.length(); // we lazy init the cloned token, as in ctor not all attributes may be added if (clonedToken == null) { clonedToken = cloneAttributes(); clonedTermAtt = clonedToken.getAttribute(typeof(CharTermAttribute)); clonedOffsetAtt = clonedToken.getAttribute(typeof(OffsetAttribute)); } else { this.copyTo(clonedToken); } // reinit CharacterIterator charIterator.setText(clonedTermAtt.buffer(), 0, clonedTermAtt.length()); breaker.Text = charIterator; int end = breaker.next(); if (end != BreakIterator.DONE) { termAtt.Length = end; if (hasIllegalOffsets) { offsetAtt.setOffset(clonedOffsetAtt.startOffset(), clonedOffsetAtt.endOffset()); } else { offsetAtt.setOffset(clonedOffsetAtt.startOffset(), clonedOffsetAtt.startOffset() + end); } // position increment keeps as it is for first token return true; } return false; }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: @Override public void reset() throws java.io.IOException public override void reset() { base.reset(); hasMoreTokensInClone = false; clonedToken = null; clonedTermAtt = null; clonedOffsetAtt = null; }