internal static void TestNGrams(int minGram, int maxGram, string s, string nonTokenChars, bool edgesOnly) { // convert the string to code points int[] codePoints = toCodePoints(s); int[] offsets = new int[codePoints.Length + 1]; for (int i = 0; i < codePoints.Length; ++i) { offsets[i + 1] = offsets[i] + Character.CharCount(codePoints[i]); } TokenStream grams = new NGramTokenizerAnonymousInnerClassHelper(TEST_VERSION_CURRENT, new StringReader(s), minGram, maxGram, edgesOnly, nonTokenChars); ICharTermAttribute termAtt = grams.AddAttribute <ICharTermAttribute>(); IPositionIncrementAttribute posIncAtt = grams.AddAttribute <IPositionIncrementAttribute>(); IPositionLengthAttribute posLenAtt = grams.AddAttribute <IPositionLengthAttribute>(); IOffsetAttribute offsetAtt = grams.AddAttribute <IOffsetAttribute>(); grams.Reset(); for (int start = 0; start < codePoints.Length; ++start) { for (int end = start + minGram; end <= start + maxGram && end <= codePoints.Length; ++end) { if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1])) { // not on an edge goto nextGramContinue; } for (int j = start; j < end; ++j) { if (!isTokenChar(nonTokenChars, codePoints[j])) { goto nextGramContinue; } } assertTrue(grams.IncrementToken()); assertArrayEquals(Arrays.CopyOfRange(codePoints, start, end), toCodePoints(termAtt.ToString())); assertEquals(1, posIncAtt.PositionIncrement); assertEquals(1, posLenAtt.PositionLength); assertEquals(offsets[start], offsetAtt.StartOffset); assertEquals(offsets[end], offsetAtt.EndOffset); nextGramContinue :; } //nextGramBreak:; } assertFalse(grams.IncrementToken()); grams.End(); assertEquals(s.Length, offsetAtt.StartOffset); assertEquals(s.Length, offsetAtt.EndOffset); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: static void testNGrams(int minGram, int maxGram, String s, final String nonTokenChars, boolean edgesOnly) throws java.io.IOException //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET: internal static void testNGrams(int minGram, int maxGram, string s, string nonTokenChars, bool edgesOnly) { // convert the string to code points //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int[] codePoints = toCodePoints(s); int[] codePoints = toCodePoints(s); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int[] offsets = new int[codePoints.length + 1]; int[] offsets = new int[codePoints.Length + 1]; for (int i = 0; i < codePoints.Length; ++i) { offsets[i + 1] = offsets[i] + char.charCount(codePoints[i]); } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.TokenStream grams = new NGramTokenizer(TEST_VERSION_CURRENT, new java.io.StringReader(s), minGram, maxGram, edgesOnly) TokenStream grams = new NGramTokenizerAnonymousInnerClassHelper(TEST_VERSION_CURRENT, new StringReader(s), minGram, maxGram, edgesOnly, nonTokenChars); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.CharTermAttribute termAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class); CharTermAttribute termAtt = grams.addAttribute(typeof(CharTermAttribute)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute posIncAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute.class); PositionIncrementAttribute posIncAtt = grams.addAttribute(typeof(PositionIncrementAttribute)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute posLenAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute.class); PositionLengthAttribute posLenAtt = grams.addAttribute(typeof(PositionLengthAttribute)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.OffsetAttribute offsetAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute.class); OffsetAttribute offsetAtt = grams.addAttribute(typeof(OffsetAttribute)); grams.reset(); for (int start = 0; start < codePoints.Length; ++start) { for (int end = start + minGram; end <= start + maxGram && end <= codePoints.Length; ++end) { if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1])) { // not on an edge goto nextGramContinue; } for (int j = start; j < end; ++j) { if (!isTokenChar(nonTokenChars, codePoints[j])) { goto nextGramContinue; } } assertTrue(grams.incrementToken()); assertArrayEquals(Arrays.copyOfRange(codePoints, start, end), toCodePoints(termAtt)); assertEquals(1, posIncAtt.PositionIncrement); assertEquals(1, posLenAtt.PositionLength); assertEquals(offsets[start], offsetAtt.startOffset()); assertEquals(offsets[end], offsetAtt.endOffset()); nextGramContinue:; } nextGramBreak:; } assertFalse(grams.incrementToken()); grams.end(); assertEquals(s.Length, offsetAtt.startOffset()); assertEquals(s.Length, offsetAtt.endOffset()); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: static void testNGrams(int minGram, int maxGram, String s, final String nonTokenChars, boolean edgesOnly) throws java.io.IOException //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET: internal static void testNGrams(int minGram, int maxGram, string s, string nonTokenChars, bool edgesOnly) { // convert the string to code points //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int[] codePoints = toCodePoints(s); int[] codePoints = toCodePoints(s); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int[] offsets = new int[codePoints.length + 1]; int[] offsets = new int[codePoints.Length + 1]; for (int i = 0; i < codePoints.Length; ++i) { offsets[i + 1] = offsets[i] + char.charCount(codePoints[i]); } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.TokenStream grams = new NGramTokenizer(TEST_VERSION_CURRENT, new java.io.StringReader(s), minGram, maxGram, edgesOnly) TokenStream grams = new NGramTokenizerAnonymousInnerClassHelper(TEST_VERSION_CURRENT, new StringReader(s), minGram, maxGram, edgesOnly, nonTokenChars); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.CharTermAttribute termAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class); CharTermAttribute termAtt = grams.addAttribute(typeof(CharTermAttribute)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute posIncAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute.class); PositionIncrementAttribute posIncAtt = grams.addAttribute(typeof(PositionIncrementAttribute)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute posLenAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute.class); PositionLengthAttribute posLenAtt = grams.addAttribute(typeof(PositionLengthAttribute)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.OffsetAttribute offsetAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute.class); OffsetAttribute offsetAtt = grams.addAttribute(typeof(OffsetAttribute)); grams.reset(); for (int start = 0; start < codePoints.Length; ++start) { for (int end = start + minGram; end <= start + maxGram && end <= codePoints.Length; ++end) { if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1])) { // not on an edge goto nextGramContinue; } for (int j = start; j < end; ++j) { if (!isTokenChar(nonTokenChars, codePoints[j])) { goto nextGramContinue; } } assertTrue(grams.incrementToken()); assertArrayEquals(Arrays.copyOfRange(codePoints, start, end), toCodePoints(termAtt)); assertEquals(1, posIncAtt.PositionIncrement); assertEquals(1, posLenAtt.PositionLength); assertEquals(offsets[start], offsetAtt.startOffset()); assertEquals(offsets[end], offsetAtt.endOffset()); nextGramContinue :; } nextGramBreak :; } assertFalse(grams.incrementToken()); grams.end(); assertEquals(s.Length, offsetAtt.startOffset()); assertEquals(s.Length, offsetAtt.endOffset()); }
internal static void TestNGrams(int minGram, int maxGram, string s, string nonTokenChars, bool edgesOnly) { // convert the string to code points int[] codePoints = toCodePoints(s); int[] offsets = new int[codePoints.Length + 1]; for (int i = 0; i < codePoints.Length; ++i) { offsets[i + 1] = offsets[i] + Character.CharCount(codePoints[i]); } TokenStream grams = new NGramTokenizerAnonymousInnerClassHelper(TEST_VERSION_CURRENT, new StringReader(s), minGram, maxGram, edgesOnly, nonTokenChars); ICharTermAttribute termAtt = grams.AddAttribute<ICharTermAttribute>(); IPositionIncrementAttribute posIncAtt = grams.AddAttribute<IPositionIncrementAttribute>(); IPositionLengthAttribute posLenAtt = grams.AddAttribute<IPositionLengthAttribute>(); IOffsetAttribute offsetAtt = grams.AddAttribute<IOffsetAttribute>(); grams.Reset(); for (int start = 0; start < codePoints.Length; ++start) { for (int end = start + minGram; end <= start + maxGram && end <= codePoints.Length; ++end) { if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1])) { // not on an edge goto nextGramContinue; } for (int j = start; j < end; ++j) { if (!isTokenChar(nonTokenChars, codePoints[j])) { goto nextGramContinue; } } assertTrue(grams.IncrementToken()); assertArrayEquals(Arrays.CopyOfRange(codePoints, start, end), toCodePoints(termAtt.ToString())); assertEquals(1, posIncAtt.PositionIncrement); assertEquals(1, posLenAtt.PositionLength); assertEquals(offsets[start], offsetAtt.StartOffset()); assertEquals(offsets[end], offsetAtt.EndOffset()); nextGramContinue:; } //nextGramBreak:; } assertFalse(grams.IncrementToken()); grams.End(); assertEquals(s.Length, offsetAtt.StartOffset()); assertEquals(s.Length, offsetAtt.EndOffset()); }