Exemplo n.º 1
0
        // LUCENE-1441
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testOffsets() throws Exception
        public virtual void testOffsets()
        {
            TokenStream stream = (new KeywordAnalyzer()).tokenStream("field", new StringReader("abcd"));

            try
            {
                OffsetAttribute offsetAtt = stream.addAttribute(typeof(OffsetAttribute));
                stream.reset();
                assertTrue(stream.incrementToken());
                assertEquals(0, offsetAtt.startOffset());
                assertEquals(4, offsetAtt.endOffset());
                assertFalse(stream.incrementToken());
                stream.end();
            }
            finally
            {
                IOUtils.closeWhileHandlingException(stream);
            }
        }
Exemplo n.º 2
0
        public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix)
        {
            this.suffix     = suffix;
            this.prefix     = prefix;
            prefixExhausted = false;

            termAtt    = addAttribute(typeof(CharTermAttribute));
            posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
            payloadAtt = addAttribute(typeof(PayloadAttribute));
            offsetAtt  = addAttribute(typeof(OffsetAttribute));
            typeAtt    = addAttribute(typeof(TypeAttribute));
            flagsAtt   = addAttribute(typeof(FlagsAttribute));

            p_termAtt    = prefix.addAttribute(typeof(CharTermAttribute));
            p_posIncrAtt = prefix.addAttribute(typeof(PositionIncrementAttribute));
            p_payloadAtt = prefix.addAttribute(typeof(PayloadAttribute));
            p_offsetAtt  = prefix.addAttribute(typeof(OffsetAttribute));
            p_typeAtt    = prefix.addAttribute(typeof(TypeAttribute));
            p_flagsAtt   = prefix.addAttribute(typeof(FlagsAttribute));
        }
	  public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix)
	  {
		this.suffix = suffix;
		this.prefix = prefix;
		prefixExhausted = false;

		termAtt = addAttribute(typeof(CharTermAttribute));
		posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
		payloadAtt = addAttribute(typeof(PayloadAttribute));
		offsetAtt = addAttribute(typeof(OffsetAttribute));
		typeAtt = addAttribute(typeof(TypeAttribute));
		flagsAtt = addAttribute(typeof(FlagsAttribute));

		p_termAtt = prefix.addAttribute(typeof(CharTermAttribute));
		p_posIncrAtt = prefix.addAttribute(typeof(PositionIncrementAttribute));
		p_payloadAtt = prefix.addAttribute(typeof(PayloadAttribute));
		p_offsetAtt = prefix.addAttribute(typeof(OffsetAttribute));
		p_typeAtt = prefix.addAttribute(typeof(TypeAttribute));
		p_flagsAtt = prefix.addAttribute(typeof(FlagsAttribute));
	  }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void test() throws java.io.IOException
        public virtual void test()
        {
            string test = "The quick red fox jumped over the lazy brown dogs";

            TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
            int count = 0;
            PayloadAttribute payloadAtt = nptf.getAttribute(typeof(PayloadAttribute));
            OffsetAttribute  offsetAtt  = nptf.getAttribute(typeof(OffsetAttribute));

            nptf.reset();
            while (nptf.incrementToken())
            {
                BytesRef pay = payloadAtt.Payload;
                assertTrue("pay is null and it shouldn't be", pay != null);
                sbyte[] data  = pay.bytes;
                int     start = PayloadHelper.decodeInt(data, 0);
                assertTrue(start + " does not equal: " + offsetAtt.startOffset(), start == offsetAtt.startOffset());
                int end = PayloadHelper.decodeInt(data, 4);
                assertTrue(end + " does not equal: " + offsetAtt.endOffset(), end == offsetAtt.endOffset());
                count++;
            }
            assertTrue(count + " does not equal: " + 10, count == 10);
        }
Exemplo n.º 5
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
        public override bool incrementToken()
        {
            if (hasMoreTokensInClone)
            {
              int start = breaker.current();
              int end = breaker.next();
              if (end != BreakIterator.DONE)
              {
            clonedToken.copyTo(this);
            termAtt.copyBuffer(clonedTermAtt.buffer(), start, end - start);
            if (hasIllegalOffsets)
            {
              offsetAtt.setOffset(clonedOffsetAtt.startOffset(), clonedOffsetAtt.endOffset());
            }
            else
            {
              offsetAtt.setOffset(clonedOffsetAtt.startOffset() + start, clonedOffsetAtt.startOffset() + end);
            }
            if (handlePosIncr)
            {
                posAtt.PositionIncrement = 1;
            }
            return true;
              }
              hasMoreTokensInClone = false;
            }

            if (!input.incrementToken())
            {
              return false;
            }

            if (termAtt.length() == 0 || char.UnicodeBlock.of(termAtt.charAt(0)) != char.UnicodeBlock.THAI)
            {
              return true;
            }

            hasMoreTokensInClone = true;

            // if length by start + end offsets doesn't match the term text then assume
            // this is a synonym and don't adjust the offsets.
            hasIllegalOffsets = offsetAtt.endOffset() - offsetAtt.startOffset() != termAtt.length();

            // we lazy init the cloned token, as in ctor not all attributes may be added
            if (clonedToken == null)
            {
              clonedToken = cloneAttributes();
              clonedTermAtt = clonedToken.getAttribute(typeof(CharTermAttribute));
              clonedOffsetAtt = clonedToken.getAttribute(typeof(OffsetAttribute));
            }
            else
            {
              this.copyTo(clonedToken);
            }

            // reinit CharacterIterator
            charIterator.setText(clonedTermAtt.buffer(), 0, clonedTermAtt.length());
            breaker.Text = charIterator;
            int end = breaker.next();
            if (end != BreakIterator.DONE)
            {
              termAtt.Length = end;
              if (hasIllegalOffsets)
              {
            offsetAtt.setOffset(clonedOffsetAtt.startOffset(), clonedOffsetAtt.endOffset());
              }
              else
              {
            offsetAtt.setOffset(clonedOffsetAtt.startOffset(), clonedOffsetAtt.startOffset() + end);
              }
              // position increment keeps as it is for first token
              return true;
            }
            return false;
        }
Exemplo n.º 6
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: @Override public void reset() throws java.io.IOException
 public override void reset()
 {
     base.reset();
     hasMoreTokensInClone = false;
     clonedToken = null;
     clonedTermAtt = null;
     clonedOffsetAtt = null;
 }
 public TokenStreamAnonymousInnerClassHelper(TestRemoveDuplicatesTokenFilter outerInstance, IEnumerator<Token> toks)
 {
     this.outerInstance = outerInstance;
       this.toks = toks;
       termAtt = addAttribute(typeof(CharTermAttribute));
       offsetAtt = addAttribute(typeof(OffsetAttribute));
       posIncAtt = addAttribute(typeof(PositionIncrementAttribute));
 }
Exemplo n.º 8
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: static void testNGrams(int minGram, int maxGram, String s, final String nonTokenChars, boolean edgesOnly) throws java.io.IOException
//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
        internal static void testNGrams(int minGram, int maxGram, string s, string nonTokenChars, bool edgesOnly)
        {
            // convert the string to code points
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int[] codePoints = toCodePoints(s);
            int[] codePoints = toCodePoints(s);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int[] offsets = new int[codePoints.length + 1];
            int[] offsets = new int[codePoints.Length + 1];
            for (int i = 0; i < codePoints.Length; ++i)
            {
                offsets[i + 1] = offsets[i] + char.charCount(codePoints[i]);
            }
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.TokenStream grams = new NGramTokenizer(TEST_VERSION_CURRENT, new java.io.StringReader(s), minGram, maxGram, edgesOnly)
            TokenStream grams = new NGramTokenizerAnonymousInnerClassHelper(TEST_VERSION_CURRENT, new StringReader(s), minGram, maxGram, edgesOnly, nonTokenChars);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.CharTermAttribute termAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class);
            CharTermAttribute termAtt = grams.addAttribute(typeof(CharTermAttribute));
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute posIncAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute.class);
            PositionIncrementAttribute posIncAtt = grams.addAttribute(typeof(PositionIncrementAttribute));
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute posLenAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute.class);
            PositionLengthAttribute posLenAtt = grams.addAttribute(typeof(PositionLengthAttribute));
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.OffsetAttribute offsetAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute.class);
            OffsetAttribute offsetAtt = grams.addAttribute(typeof(OffsetAttribute));

            grams.reset();
            for (int start = 0; start < codePoints.Length; ++start)
            {
                for (int end = start + minGram; end <= start + maxGram && end <= codePoints.Length; ++end)
                {
                    if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1]))
                    {
                        // not on an edge
                        goto nextGramContinue;
                    }
                    for (int j = start; j < end; ++j)
                    {
                        if (!isTokenChar(nonTokenChars, codePoints[j]))
                        {
                            goto nextGramContinue;
                        }
                    }
                    assertTrue(grams.incrementToken());
                    assertArrayEquals(Arrays.copyOfRange(codePoints, start, end), toCodePoints(termAtt));
                    assertEquals(1, posIncAtt.PositionIncrement);
                    assertEquals(1, posLenAtt.PositionLength);
                    assertEquals(offsets[start], offsetAtt.startOffset());
                    assertEquals(offsets[end], offsetAtt.endOffset());
                    nextGramContinue :;
                }
                nextGramBreak :;
            }
            assertFalse(grams.incrementToken());
            grams.end();
            assertEquals(s.Length, offsetAtt.startOffset());
            assertEquals(s.Length, offsetAtt.endOffset());
        }