Exemplo n.º 1
        // LUCENE-1441
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testOffsets() throws Exception
        public virtual void testOffsets()
            TokenStream stream = (new KeywordAnalyzer()).tokenStream("field", new StringReader("abcd"));

                OffsetAttribute offsetAtt = stream.addAttribute(typeof(OffsetAttribute));
                assertEquals(0, offsetAtt.startOffset());
                assertEquals(4, offsetAtt.endOffset());
Exemplo n.º 2
        public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix)
            this.suffix     = suffix;
            this.prefix     = prefix;
            prefixExhausted = false;

            termAtt    = addAttribute(typeof(CharTermAttribute));
            posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
            payloadAtt = addAttribute(typeof(PayloadAttribute));
            offsetAtt  = addAttribute(typeof(OffsetAttribute));
            typeAtt    = addAttribute(typeof(TypeAttribute));
            flagsAtt   = addAttribute(typeof(FlagsAttribute));

            p_termAtt    = prefix.addAttribute(typeof(CharTermAttribute));
            p_posIncrAtt = prefix.addAttribute(typeof(PositionIncrementAttribute));
            p_payloadAtt = prefix.addAttribute(typeof(PayloadAttribute));
            p_offsetAtt  = prefix.addAttribute(typeof(OffsetAttribute));
            p_typeAtt    = prefix.addAttribute(typeof(TypeAttribute));
            p_flagsAtt   = prefix.addAttribute(typeof(FlagsAttribute));
	  public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix)
		this.suffix = suffix;
		this.prefix = prefix;
		prefixExhausted = false;

		termAtt = addAttribute(typeof(CharTermAttribute));
		posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
		payloadAtt = addAttribute(typeof(PayloadAttribute));
		offsetAtt = addAttribute(typeof(OffsetAttribute));
		typeAtt = addAttribute(typeof(TypeAttribute));
		flagsAtt = addAttribute(typeof(FlagsAttribute));

		p_termAtt = prefix.addAttribute(typeof(CharTermAttribute));
		p_posIncrAtt = prefix.addAttribute(typeof(PositionIncrementAttribute));
		p_payloadAtt = prefix.addAttribute(typeof(PayloadAttribute));
		p_offsetAtt = prefix.addAttribute(typeof(OffsetAttribute));
		p_typeAtt = prefix.addAttribute(typeof(TypeAttribute));
		p_flagsAtt = prefix.addAttribute(typeof(FlagsAttribute));
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void test() throws java.io.IOException
        public virtual void test()
            string test = "The quick red fox jumped over the lazy brown dogs";

            TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
            int count = 0;
            PayloadAttribute payloadAtt = nptf.getAttribute(typeof(PayloadAttribute));
            OffsetAttribute  offsetAtt  = nptf.getAttribute(typeof(OffsetAttribute));

            while (nptf.incrementToken())
                BytesRef pay = payloadAtt.Payload;
                assertTrue("pay is null and it shouldn't be", pay != null);
                sbyte[] data  = pay.bytes;
                int     start = PayloadHelper.decodeInt(data, 0);
                assertTrue(start + " does not equal: " + offsetAtt.startOffset(), start == offsetAtt.startOffset());
                int end = PayloadHelper.decodeInt(data, 4);
                assertTrue(end + " does not equal: " + offsetAtt.endOffset(), end == offsetAtt.endOffset());
            assertTrue(count + " does not equal: " + 10, count == 10);
Exemplo n.º 5
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
        public override bool incrementToken()
            if (hasMoreTokensInClone)
              int start = breaker.current();
              int end = breaker.next();
              if (end != BreakIterator.DONE)
            termAtt.copyBuffer(clonedTermAtt.buffer(), start, end - start);
            if (hasIllegalOffsets)
              offsetAtt.setOffset(clonedOffsetAtt.startOffset(), clonedOffsetAtt.endOffset());
              offsetAtt.setOffset(clonedOffsetAtt.startOffset() + start, clonedOffsetAtt.startOffset() + end);
            if (handlePosIncr)
                posAtt.PositionIncrement = 1;
            return true;
              hasMoreTokensInClone = false;

            if (!input.incrementToken())
              return false;

            if (termAtt.length() == 0 || char.UnicodeBlock.of(termAtt.charAt(0)) != char.UnicodeBlock.THAI)
              return true;

            hasMoreTokensInClone = true;

            // if length by start + end offsets doesn't match the term text then assume
            // this is a synonym and don't adjust the offsets.
            hasIllegalOffsets = offsetAtt.endOffset() - offsetAtt.startOffset() != termAtt.length();

            // we lazy init the cloned token, as in ctor not all attributes may be added
            if (clonedToken == null)
              clonedToken = cloneAttributes();
              clonedTermAtt = clonedToken.getAttribute(typeof(CharTermAttribute));
              clonedOffsetAtt = clonedToken.getAttribute(typeof(OffsetAttribute));

            // reinit CharacterIterator
            charIterator.setText(clonedTermAtt.buffer(), 0, clonedTermAtt.length());
            breaker.Text = charIterator;
            int end = breaker.next();
            if (end != BreakIterator.DONE)
              termAtt.Length = end;
              if (hasIllegalOffsets)
            offsetAtt.setOffset(clonedOffsetAtt.startOffset(), clonedOffsetAtt.endOffset());
            offsetAtt.setOffset(clonedOffsetAtt.startOffset(), clonedOffsetAtt.startOffset() + end);
              // position increment keeps as it is for first token
              return true;
            return false;
Exemplo n.º 6
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: @Override public void reset() throws java.io.IOException
 public override void reset()
     hasMoreTokensInClone = false;
     clonedToken = null;
     clonedTermAtt = null;
     clonedOffsetAtt = null;
 public TokenStreamAnonymousInnerClassHelper(TestRemoveDuplicatesTokenFilter outerInstance, IEnumerator<Token> toks)
     this.outerInstance = outerInstance;
       this.toks = toks;
       termAtt = addAttribute(typeof(CharTermAttribute));
       offsetAtt = addAttribute(typeof(OffsetAttribute));
       posIncAtt = addAttribute(typeof(PositionIncrementAttribute));
Exemplo n.º 8
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: static void testNGrams(int minGram, int maxGram, String s, final String nonTokenChars, boolean edgesOnly) throws java.io.IOException
//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
        internal static void testNGrams(int minGram, int maxGram, string s, string nonTokenChars, bool edgesOnly)
            // convert the string to code points
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int[] codePoints = toCodePoints(s);
            int[] codePoints = toCodePoints(s);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int[] offsets = new int[codePoints.length + 1];
            int[] offsets = new int[codePoints.Length + 1];
            for (int i = 0; i < codePoints.Length; ++i)
                offsets[i + 1] = offsets[i] + char.charCount(codePoints[i]);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.TokenStream grams = new NGramTokenizer(TEST_VERSION_CURRENT, new java.io.StringReader(s), minGram, maxGram, edgesOnly)
            TokenStream grams = new NGramTokenizerAnonymousInnerClassHelper(TEST_VERSION_CURRENT, new StringReader(s), minGram, maxGram, edgesOnly, nonTokenChars);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.CharTermAttribute termAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class);
            CharTermAttribute termAtt = grams.addAttribute(typeof(CharTermAttribute));
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute posIncAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute.class);
            PositionIncrementAttribute posIncAtt = grams.addAttribute(typeof(PositionIncrementAttribute));
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute posLenAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute.class);
            PositionLengthAttribute posLenAtt = grams.addAttribute(typeof(PositionLengthAttribute));
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.OffsetAttribute offsetAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute.class);
            OffsetAttribute offsetAtt = grams.addAttribute(typeof(OffsetAttribute));

            for (int start = 0; start < codePoints.Length; ++start)
                for (int end = start + minGram; end <= start + maxGram && end <= codePoints.Length; ++end)
                    if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1]))
                        // not on an edge
                        goto nextGramContinue;
                    for (int j = start; j < end; ++j)
                        if (!isTokenChar(nonTokenChars, codePoints[j]))
                            goto nextGramContinue;
                    assertArrayEquals(Arrays.copyOfRange(codePoints, start, end), toCodePoints(termAtt));
                    assertEquals(1, posIncAtt.PositionIncrement);
                    assertEquals(1, posLenAtt.PositionLength);
                    assertEquals(offsets[start], offsetAtt.startOffset());
                    assertEquals(offsets[end], offsetAtt.endOffset());
                    nextGramContinue :;
                nextGramBreak :;
            assertEquals(s.Length, offsetAtt.startOffset());
            assertEquals(s.Length, offsetAtt.endOffset());