/// <summary> /// Pop one input token's worth of tokens off the filter and verify that they are as expected. /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: void assertNextTerms(String expectedUnfolded, String expectedFolded, ASCIIFoldingFilter filter, org.apache.lucene.analysis.tokenattributes.CharTermAttribute termAtt) throws Exception internal virtual void assertNextTerms(string expectedUnfolded, string expectedFolded, ASCIIFoldingFilter filter, CharTermAttribute termAtt) { assertTrue(filter.incrementToken()); assertEquals(expectedFolded, termAtt.ToString()); if (filter.PreserveOriginal && !expectedUnfolded.Equals(expectedFolded)) { assertTrue(filter.incrementToken()); assertEquals(expectedUnfolded, termAtt.ToString()); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException public override bool incrementToken() { if (input.incrementToken()) { if (termAtt.ToString().Equals("largegap") || termAtt.ToString().Equals("/")) { posIncAtt.PositionIncrement = 10; } return(true); } else { return(false); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testStopListPositions() throws java.io.IOException public virtual void testStopListPositions() { CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false); StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet); string s = "This is a good test of the english stop analyzer with positions"; int[] expectedIncr = new int[] { 1, 1, 1, 3, 1, 1, 1, 2, 1 }; TokenStream stream = newStop.tokenStream("test", s); try { assertNotNull(stream); int i = 0; CharTermAttribute termAtt = stream.getAttribute(typeof(CharTermAttribute)); PositionIncrementAttribute posIncrAtt = stream.addAttribute(typeof(PositionIncrementAttribute)); stream.reset(); while (stream.incrementToken()) { string text = termAtt.ToString(); assertFalse(stopWordsSet.contains(text)); assertEquals(expectedIncr[i++], posIncrAtt.PositionIncrement); } stream.end(); } finally { IOUtils.closeWhileHandlingException(stream); } }
// we only check a few core attributes here. // TODO: test other things //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void assertEquals(String s, org.apache.lucene.analysis.TokenStream left, org.apache.lucene.analysis.TokenStream right) throws Exception public virtual void assertEquals(string s, TokenStream left, TokenStream right) { left.reset(); right.reset(); CharTermAttribute leftTerm = left.addAttribute(typeof(CharTermAttribute)); CharTermAttribute rightTerm = right.addAttribute(typeof(CharTermAttribute)); OffsetAttribute leftOffset = left.addAttribute(typeof(OffsetAttribute)); OffsetAttribute rightOffset = right.addAttribute(typeof(OffsetAttribute)); PositionIncrementAttribute leftPos = left.addAttribute(typeof(PositionIncrementAttribute)); PositionIncrementAttribute rightPos = right.addAttribute(typeof(PositionIncrementAttribute)); while (left.incrementToken()) { assertTrue("wrong number of tokens for input: " + s, right.incrementToken()); assertEquals("wrong term text for input: " + s, leftTerm.ToString(), rightTerm.ToString()); assertEquals("wrong position for input: " + s, leftPos.PositionIncrement, rightPos.PositionIncrement); assertEquals("wrong start offset for input: " + s, leftOffset.startOffset(), rightOffset.startOffset()); assertEquals("wrong end offset for input: " + s, leftOffset.endOffset(), rightOffset.endOffset()); } ; assertFalse("wrong number of tokens for input: " + s, right.incrementToken()); left.end(); right.end(); assertEquals("wrong final offset for input: " + s, leftOffset.endOffset(), rightOffset.endOffset()); left.close(); right.close(); }
public sealed override bool IncrementToken() { if (m_input.IncrementToken()) { string text = termAttribute.ToString(); termAttribute.SetEmpty(); termAttribute.Append(CyrillicLatinConverter.cir2lat(text)); return(true); } return(false); }
public override bool IncrementToken() { bool success = false; if (m_input.IncrementToken()) { string text = termAttribute.ToString(); termAttribute.Clear(); termAttribute.Append(CyrillicLatinConverter.Cir2lat(text)); success = true; } return(success); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: private java.util.List<String> filter(org.apache.lucene.analysis.TokenFilter filter) throws java.io.IOException private IList <string> filter(TokenFilter filter) { IList <string> tas = new List <string>(); CharTermAttribute termAtt = filter.getAttribute(typeof(CharTermAttribute)); filter.reset(); while (filter.incrementToken()) { tas.Add(termAtt.ToString()); } filter.end(); filter.close(); return(tas); }
public override bool IncrementToken() { if (input.IncrementToken()) { var collationKey = collator.GetCollationKey(termAtt.ToString()).toByteArray(); int encodedLength = IndexableBinaryStringTools.getEncodedLength(collationKey, 0, collationKey.Length); termAtt.resizeBuffer(encodedLength); termAtt.Length = encodedLength; IndexableBinaryStringTools.encode(collationKey, 0, collationKey.Length, termAtt.buffer(), 0, encodedLength); return(true); } else { return(false); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: private void assertCollatesToSame(org.apache.lucene.analysis.TokenStream stream1, org.apache.lucene.analysis.TokenStream stream2) throws java.io.IOException private void assertCollatesToSame(TokenStream stream1, TokenStream stream2) { stream1.reset(); stream2.reset(); CharTermAttribute term1 = stream1.addAttribute(typeof(CharTermAttribute)); CharTermAttribute term2 = stream2.addAttribute(typeof(CharTermAttribute)); assertTrue(stream1.incrementToken()); assertTrue(stream2.incrementToken()); assertEquals(term1.ToString(), term2.ToString()); assertFalse(stream1.incrementToken()); assertFalse(stream2.incrementToken()); stream1.end(); stream2.end(); stream1.close(); stream2.close(); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: private void testPositons(TypeTokenFilter stpf) throws java.io.IOException private void testPositons(TypeTokenFilter stpf) { TypeAttribute typeAtt = stpf.getAttribute(typeof(TypeAttribute)); CharTermAttribute termAttribute = stpf.getAttribute(typeof(CharTermAttribute)); PositionIncrementAttribute posIncrAtt = stpf.getAttribute(typeof(PositionIncrementAttribute)); stpf.reset(); bool enablePositionIncrements = stpf.EnablePositionIncrements; while (stpf.incrementToken()) { log("Token: " + termAttribute.ToString() + ": " + typeAtt.type() + " - " + posIncrAtt.PositionIncrement); assertEquals("if position increment is enabled the positionIncrementAttribute value should be 3, otherwise 1", posIncrAtt.PositionIncrement, enablePositionIncrements ? 3 : 1); } stpf.end(); stpf.close(); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testPerField() throws Exception public virtual void testPerField() { string text = "Qwerty"; IDictionary <string, Analyzer> analyzerPerField = new Dictionary <string, Analyzer>(); analyzerPerField["special"] = new SimpleAnalyzer(TEST_VERSION_CURRENT); PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), analyzerPerField); TokenStream tokenStream = analyzer.tokenStream("field", text); try { CharTermAttribute termAtt = tokenStream.getAttribute(typeof(CharTermAttribute)); tokenStream.reset(); assertTrue(tokenStream.incrementToken()); assertEquals("WhitespaceAnalyzer does not lowercase", "Qwerty", termAtt.ToString()); assertFalse(tokenStream.incrementToken()); tokenStream.end(); } finally { IOUtils.closeWhileHandlingException(tokenStream); } tokenStream = analyzer.tokenStream("special", text); try { CharTermAttribute termAtt = tokenStream.getAttribute(typeof(CharTermAttribute)); tokenStream.reset(); assertTrue(tokenStream.incrementToken()); assertEquals("SimpleAnalyzer lowercases", "qwerty", termAtt.ToString()); assertFalse(tokenStream.incrementToken()); tokenStream.end(); } finally { IOUtils.closeWhileHandlingException(tokenStream); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testSupplementaryCharacters() throws java.io.IOException public virtual void testSupplementaryCharacters() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String s = org.apache.lucene.util.TestUtil.randomUnicodeString(random(), 10); string s = TestUtil.randomUnicodeString(random(), 10); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int codePointCount = s.codePointCount(0, s.length()); int codePointCount = s.codePointCount(0, s.Length); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int minGram = org.apache.lucene.util.TestUtil.nextInt(random(), 1, 3); int minGram = TestUtil.Next(random(), 1, 3); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int maxGram = org.apache.lucene.util.TestUtil.nextInt(random(), minGram, 10); int maxGram = TestUtil.Next(random(), minGram, 10); TokenStream tk = new KeywordTokenizer(new StringReader(s)); tk = new NGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.CharTermAttribute termAtt = tk.addAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class); CharTermAttribute termAtt = tk.addAttribute(typeof(CharTermAttribute)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.OffsetAttribute offsetAtt = tk.addAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute.class); OffsetAttribute offsetAtt = tk.addAttribute(typeof(OffsetAttribute)); tk.reset(); for (int start = 0; start < codePointCount; ++start) { for (int end = start + minGram; end <= Math.Min(codePointCount, start + maxGram); ++end) { assertTrue(tk.incrementToken()); assertEquals(0, offsetAtt.startOffset()); assertEquals(s.Length, offsetAtt.endOffset()); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int startIndex = Character.offsetByCodePoints(s, 0, start); int startIndex = char.offsetByCodePoints(s, 0, start); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int endIndex = Character.offsetByCodePoints(s, 0, end); int endIndex = char.offsetByCodePoints(s, 0, end); assertEquals(s.Substring(startIndex, endIndex - startIndex), termAtt.ToString()); } } assertFalse(tk.incrementToken()); }
public override bool accept(AttributeSource source) { if (termAtt == null) { termAtt = source.addAttribute(typeof(CharTermAttribute)); } try { DateTime date = dateFormat.parse(termAtt.ToString()); //We don't care about the date, just that we can parse it as a date if (date != null) { return(true); } } catch (ParseException) { } return(false); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testFilterTokens() throws Exception public virtual void testFilterTokens() { SnowballFilter filter = new SnowballFilter(new TestTokenStream(this), "English"); CharTermAttribute termAtt = filter.getAttribute(typeof(CharTermAttribute)); OffsetAttribute offsetAtt = filter.getAttribute(typeof(OffsetAttribute)); TypeAttribute typeAtt = filter.getAttribute(typeof(TypeAttribute)); PayloadAttribute payloadAtt = filter.getAttribute(typeof(PayloadAttribute)); PositionIncrementAttribute posIncAtt = filter.getAttribute(typeof(PositionIncrementAttribute)); FlagsAttribute flagsAtt = filter.getAttribute(typeof(FlagsAttribute)); filter.incrementToken(); assertEquals("accent", termAtt.ToString()); assertEquals(2, offsetAtt.startOffset()); assertEquals(7, offsetAtt.endOffset()); assertEquals("wrd", typeAtt.type()); assertEquals(3, posIncAtt.PositionIncrement); assertEquals(77, flagsAtt.Flags); assertEquals(new BytesRef(new sbyte[] { 0, 1, 2, 3 }), payloadAtt.Payload); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: private void doTestStopPositons(StopFilter stpf, boolean enableIcrements) throws java.io.IOException private void doTestStopPositons(StopFilter stpf, bool enableIcrements) { log("---> test with enable-increments-" + (enableIcrements?"enabled":"disabled")); stpf.EnablePositionIncrements = enableIcrements; CharTermAttribute termAtt = stpf.getAttribute(typeof(CharTermAttribute)); PositionIncrementAttribute posIncrAtt = stpf.getAttribute(typeof(PositionIncrementAttribute)); stpf.reset(); for (int i = 0; i < 20; i += 3) { assertTrue(stpf.incrementToken()); log("Token " + i + ": " + stpf); string w = English.intToEnglish(i).trim(); assertEquals("expecting token " + i + " to be " + w, w, termAtt.ToString()); assertEquals("all but first token must have position increment of 3", enableIcrements?(i == 0?1:3):1, posIncrAtt.PositionIncrement); } assertFalse(stpf.incrementToken()); stpf.end(); stpf.close(); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testDefaults() throws java.io.IOException public virtual void testDefaults() { assertTrue(stop != null); TokenStream stream = stop.tokenStream("test", "This is a test of the english stop analyzer"); try { assertTrue(stream != null); CharTermAttribute termAtt = stream.getAttribute(typeof(CharTermAttribute)); stream.reset(); while (stream.incrementToken()) { assertFalse(inValidTokens.Contains(termAtt.ToString())); } stream.end(); } finally { IOUtils.closeWhileHandlingException(stream); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException public override bool incrementToken() { if (bufferedState != null) { restoreState(bufferedState); posIncAtt.PositionIncrement = 0; termAtt.setEmpty().append("hte"); bufferedState = null; return(true); } else if (input.incrementToken()) { if (termAtt.ToString().Equals("the")) { bufferedState = captureState(); } return(true); } else { return(false); } }
/// <returns> Returns true for the next token in the stream, or false at EOS </returns> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException public override bool incrementToken() { if (input.incrementToken()) { string term = termAtt.ToString(); // Check the exclusion table if (!keywordAttr.Keyword) { string s = stemmer.stem(term); // If not stemmed, don't waste the time adjusting the token. if ((s != null) && !s.Equals(term)) { termAtt.setEmpty().append(s); } } return(true); } else { return(false); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: private static java.util.List<String> splitByTokenizer(String source, TokenizerFactory tokFactory) throws java.io.IOException private static IList <string> splitByTokenizer(string source, TokenizerFactory tokFactory) { StringReader reader = new StringReader(source); TokenStream ts = loadTokenizer(tokFactory, reader); IList <string> tokList = new List <string>(); try { CharTermAttribute termAtt = ts.addAttribute(typeof(CharTermAttribute)); ts.reset(); while (ts.incrementToken()) { if (termAtt.length() > 0) { tokList.Add(termAtt.ToString()); } } } finally { reader.close(); } return(tokList); }
/// <summary> /// TODO: rewrite tests not to use string comparison. /// </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: private static String tsToString(org.apache.lucene.analysis.TokenStream in) throws java.io.IOException private static string tsToString(TokenStream @in) { StringBuilder @out = new StringBuilder(); CharTermAttribute termAtt = @in.addAttribute(typeof(CharTermAttribute)); // extra safety to enforce, that the state is not preserved and also // assign bogus values @in.clearAttributes(); termAtt.setEmpty().append("bogusTerm"); @in.reset(); while (@in.incrementToken()) { if (@out.Length > 0) { @out.Append(' '); } @out.Append(termAtt.ToString()); @in.clearAttributes(); termAtt.setEmpty().append("bogusTerm"); } @in.close(); return(@out.ToString()); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testStopList() throws java.io.IOException public virtual void testStopList() { CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false); StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet); TokenStream stream = newStop.tokenStream("test", "This is a good test of the english stop analyzer"); try { assertNotNull(stream); CharTermAttribute termAtt = stream.getAttribute(typeof(CharTermAttribute)); stream.reset(); while (stream.incrementToken()) { string text = termAtt.ToString(); assertFalse(stopWordsSet.contains(text)); } stream.end(); } finally { IOUtils.closeWhileHandlingException(stream); } }
public override bool accept(AttributeSource a) { CharTermAttribute termAtt = a.getAttribute(typeof(CharTermAttribute)); return(termAtt.ToString().Equals("Dogs", StringComparison.CurrentCultureIgnoreCase)); }