Example #1
0
        /*
         * (non-Javadoc)
         *
         * @see org.apache.lucene.analysis.TokenStream#next()
         */
        public override bool IncrementToken()
        {
            ClearAttributes();
            skippedPositions = 0;

            while (true)
            {
                int tokenType = scanner.NextToken;

                if (tokenType == StandardTokenizerInterface_Fields.YYEOF)
                {
                    return(false);
                }

                if (scanner.yylength() <= maxTokenLength)
                {
                    posIncrAtt.PositionIncrement = skippedPositions + 1;
                    scanner.getText(termAtt);
                    //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
                    //ORIGINAL LINE: final int start = scanner.yychar();
                    int start = scanner.yychar();
                    offsetAtt.SetOffset(correctOffset(start), correctOffset(start + termAtt.length()));
                    // This 'if' should be removed in the next release. For now, it converts
                    // invalid acronyms to HOST. When removed, only the 'else' part should
                    // remain.
                    if (tokenType == StandardTokenizer.ACRONYM_DEP)
                    {
                        typeAtt.Type   = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HOST];
                        termAtt.Length = termAtt.length() - 1; // remove extra '.'
                    }
                    else
                    {
                        typeAtt.Type = StandardTokenizer.TOKEN_TYPES[tokenType];
                    }
                    return(true);
                }
                else
                // When we skip a too-long term, we still increment the
                // position increment
                {
                    skippedPositions++;
                }
            }
        }
Example #2
0
        /*
         * (non-Javadoc)
         *
         * @see org.apache.lucene.analysis.TokenStream#next()
         */
        public override bool IncrementToken()
        {
            ClearAttributes();
            skippedPositions = 0;

            while (true)
            {
                int tokenType = scanner.NextToken;

                if (tokenType == StandardTokenizerInterface_Fields.YYEOF)
                {
                    return(false);
                }

                if (scanner.yylength() <= maxTokenLength)
                {
                    posIncrAtt.PositionIncrement = skippedPositions + 1;
                    scanner.getText(termAtt);

                    int start = scanner.yychar();
                    offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + termAtt.Length));

                    if (tokenType == ClassicTokenizer.ACRONYM_DEP)
                    {
                        typeAtt.Type   = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.HOST];
                        termAtt.Length = termAtt.Length - 1; // remove extra '.'
                    }
                    else
                    {
                        typeAtt.Type = ClassicTokenizer.TOKEN_TYPES[tokenType];
                    }
                    return(true);
                }
                else
                // When we skip a too-long term, we still increment the
                // position increment
                {
                    skippedPositions++;
                }
            }
        }