Beispiel #1
0
        internal static void TestNGrams(int minGram, int maxGram, string s, string nonTokenChars, bool edgesOnly)
        {
            // convert the string to code points
            int[] codePoints = toCodePoints(s);
            int[] offsets    = new int[codePoints.Length + 1];
            for (int i = 0; i < codePoints.Length; ++i)
            {
                offsets[i + 1] = offsets[i] + Character.CharCount(codePoints[i]);
            }
            TokenStream                 grams     = new NGramTokenizerAnonymousInnerClassHelper(TEST_VERSION_CURRENT, new StringReader(s), minGram, maxGram, edgesOnly, nonTokenChars);
            ICharTermAttribute          termAtt   = grams.AddAttribute <ICharTermAttribute>();
            IPositionIncrementAttribute posIncAtt = grams.AddAttribute <IPositionIncrementAttribute>();
            IPositionLengthAttribute    posLenAtt = grams.AddAttribute <IPositionLengthAttribute>();
            IOffsetAttribute            offsetAtt = grams.AddAttribute <IOffsetAttribute>();

            grams.Reset();
            for (int start = 0; start < codePoints.Length; ++start)
            {
                for (int end = start + minGram; end <= start + maxGram && end <= codePoints.Length; ++end)
                {
                    if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1]))
                    {
                        // not on an edge
                        goto nextGramContinue;
                    }
                    for (int j = start; j < end; ++j)
                    {
                        if (!isTokenChar(nonTokenChars, codePoints[j]))
                        {
                            goto nextGramContinue;
                        }
                    }
                    assertTrue(grams.IncrementToken());

                    assertArrayEquals(Arrays.CopyOfRange(codePoints, start, end), toCodePoints(termAtt.ToString()));
                    assertEquals(1, posIncAtt.PositionIncrement);
                    assertEquals(1, posLenAtt.PositionLength);
                    assertEquals(offsets[start], offsetAtt.StartOffset);
                    assertEquals(offsets[end], offsetAtt.EndOffset);
                    nextGramContinue :;
                }
                //nextGramBreak:;
            }
            assertFalse(grams.IncrementToken());
            grams.End();
            assertEquals(s.Length, offsetAtt.StartOffset);
            assertEquals(s.Length, offsetAtt.EndOffset);
        }
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: static void testNGrams(int minGram, int maxGram, String s, final String nonTokenChars, boolean edgesOnly) throws java.io.IOException
 //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
 internal static void testNGrams(int minGram, int maxGram, string s, string nonTokenChars, bool edgesOnly)
 {
     // convert the string to code points
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final int[] codePoints = toCodePoints(s);
     int[] codePoints = toCodePoints(s);
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final int[] offsets = new int[codePoints.length + 1];
     int[] offsets = new int[codePoints.Length + 1];
     for (int i = 0; i < codePoints.Length; ++i)
     {
       offsets[i + 1] = offsets[i] + char.charCount(codePoints[i]);
     }
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final org.apache.lucene.analysis.TokenStream grams = new NGramTokenizer(TEST_VERSION_CURRENT, new java.io.StringReader(s), minGram, maxGram, edgesOnly)
     TokenStream grams = new NGramTokenizerAnonymousInnerClassHelper(TEST_VERSION_CURRENT, new StringReader(s), minGram, maxGram, edgesOnly, nonTokenChars);
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.CharTermAttribute termAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class);
     CharTermAttribute termAtt = grams.addAttribute(typeof(CharTermAttribute));
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute posIncAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute.class);
     PositionIncrementAttribute posIncAtt = grams.addAttribute(typeof(PositionIncrementAttribute));
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute posLenAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute.class);
     PositionLengthAttribute posLenAtt = grams.addAttribute(typeof(PositionLengthAttribute));
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.OffsetAttribute offsetAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute.class);
     OffsetAttribute offsetAtt = grams.addAttribute(typeof(OffsetAttribute));
     grams.reset();
     for (int start = 0; start < codePoints.Length; ++start)
     {
       for (int end = start + minGram; end <= start + maxGram && end <= codePoints.Length; ++end)
       {
     if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1]))
     {
       // not on an edge
       goto nextGramContinue;
     }
     for (int j = start; j < end; ++j)
     {
       if (!isTokenChar(nonTokenChars, codePoints[j]))
       {
         goto nextGramContinue;
       }
     }
     assertTrue(grams.incrementToken());
     assertArrayEquals(Arrays.copyOfRange(codePoints, start, end), toCodePoints(termAtt));
     assertEquals(1, posIncAtt.PositionIncrement);
     assertEquals(1, posLenAtt.PositionLength);
     assertEquals(offsets[start], offsetAtt.startOffset());
     assertEquals(offsets[end], offsetAtt.endOffset());
       nextGramContinue:;
       }
       nextGramBreak:;
     }
     assertFalse(grams.incrementToken());
     grams.end();
     assertEquals(s.Length, offsetAtt.startOffset());
     assertEquals(s.Length, offsetAtt.endOffset());
 }
Beispiel #3
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: static void testNGrams(int minGram, int maxGram, String s, final String nonTokenChars, boolean edgesOnly) throws java.io.IOException
//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
        internal static void testNGrams(int minGram, int maxGram, string s, string nonTokenChars, bool edgesOnly)
        {
            // convert the string to code points
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int[] codePoints = toCodePoints(s);
            int[] codePoints = toCodePoints(s);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int[] offsets = new int[codePoints.length + 1];
            int[] offsets = new int[codePoints.Length + 1];
            for (int i = 0; i < codePoints.Length; ++i)
            {
                offsets[i + 1] = offsets[i] + char.charCount(codePoints[i]);
            }
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.TokenStream grams = new NGramTokenizer(TEST_VERSION_CURRENT, new java.io.StringReader(s), minGram, maxGram, edgesOnly)
            TokenStream grams = new NGramTokenizerAnonymousInnerClassHelper(TEST_VERSION_CURRENT, new StringReader(s), minGram, maxGram, edgesOnly, nonTokenChars);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.CharTermAttribute termAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class);
            CharTermAttribute termAtt = grams.addAttribute(typeof(CharTermAttribute));
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute posIncAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute.class);
            PositionIncrementAttribute posIncAtt = grams.addAttribute(typeof(PositionIncrementAttribute));
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute posLenAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute.class);
            PositionLengthAttribute posLenAtt = grams.addAttribute(typeof(PositionLengthAttribute));
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.OffsetAttribute offsetAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute.class);
            OffsetAttribute offsetAtt = grams.addAttribute(typeof(OffsetAttribute));

            grams.reset();
            for (int start = 0; start < codePoints.Length; ++start)
            {
                for (int end = start + minGram; end <= start + maxGram && end <= codePoints.Length; ++end)
                {
                    if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1]))
                    {
                        // not on an edge
                        goto nextGramContinue;
                    }
                    for (int j = start; j < end; ++j)
                    {
                        if (!isTokenChar(nonTokenChars, codePoints[j]))
                        {
                            goto nextGramContinue;
                        }
                    }
                    assertTrue(grams.incrementToken());
                    assertArrayEquals(Arrays.copyOfRange(codePoints, start, end), toCodePoints(termAtt));
                    assertEquals(1, posIncAtt.PositionIncrement);
                    assertEquals(1, posLenAtt.PositionLength);
                    assertEquals(offsets[start], offsetAtt.startOffset());
                    assertEquals(offsets[end], offsetAtt.endOffset());
                    nextGramContinue :;
                }
                nextGramBreak :;
            }
            assertFalse(grams.incrementToken());
            grams.end();
            assertEquals(s.Length, offsetAtt.startOffset());
            assertEquals(s.Length, offsetAtt.endOffset());
        }
 internal static void TestNGrams(int minGram, int maxGram, string s, string nonTokenChars, bool edgesOnly)
 {
     // convert the string to code points
     int[] codePoints = toCodePoints(s);
     int[] offsets = new int[codePoints.Length + 1];
     for (int i = 0; i < codePoints.Length; ++i)
     {
         offsets[i + 1] = offsets[i] + Character.CharCount(codePoints[i]);
     }
     TokenStream grams = new NGramTokenizerAnonymousInnerClassHelper(TEST_VERSION_CURRENT, new StringReader(s), minGram, maxGram, edgesOnly, nonTokenChars);
     ICharTermAttribute termAtt = grams.AddAttribute<ICharTermAttribute>();
     IPositionIncrementAttribute posIncAtt = grams.AddAttribute<IPositionIncrementAttribute>();
     IPositionLengthAttribute posLenAtt = grams.AddAttribute<IPositionLengthAttribute>();
     IOffsetAttribute offsetAtt = grams.AddAttribute<IOffsetAttribute>();
     grams.Reset();
     for (int start = 0; start < codePoints.Length; ++start)
     {
         for (int end = start + minGram; end <= start + maxGram && end <= codePoints.Length; ++end)
         {
             if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1]))
             {
                 // not on an edge
                 goto nextGramContinue;
             }
             for (int j = start; j < end; ++j)
             {
                 if (!isTokenChar(nonTokenChars, codePoints[j]))
                 {
                     goto nextGramContinue;
                 }
             }
             assertTrue(grams.IncrementToken());
             assertArrayEquals(Arrays.CopyOfRange(codePoints, start, end), toCodePoints(termAtt.ToString()));
             assertEquals(1, posIncAtt.PositionIncrement);
             assertEquals(1, posLenAtt.PositionLength);
             assertEquals(offsets[start], offsetAtt.StartOffset());
             assertEquals(offsets[end], offsetAtt.EndOffset());
             nextGramContinue:;
         }
         //nextGramBreak:;
     }
     assertFalse(grams.IncrementToken());
     grams.End();
     assertEquals(s.Length, offsetAtt.StartOffset());
     assertEquals(s.Length, offsetAtt.EndOffset());
 }