Ejemplo n.º 1
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
        public override bool incrementToken()
        {
            if (input.incrementToken())
            {
                char[] chArray = termAtt.buffer();
                int    chLen   = termAtt.length();
                int    idx     = 0;

                if (chLen > 1 && (chArray[0] == 'n' || chArray[0] == 't') && isUpperVowel(chArray[1]))
                {
                    chArray = termAtt.resizeBuffer(chLen + 1);
                    for (int i = chLen; i > 1; i--)
                    {
                        chArray[i] = chArray[i - 1];
                    }
                    chArray[1]     = '-';
                    termAtt.Length = chLen + 1;
                    idx            = 2;
                    chLen          = chLen + 1;
                }

                for (int i = idx; i < chLen;)
                {
                    i += char.toChars(char.ToLower(chArray[i]), chArray, i);
                }
                return(true);
            }
            else
            {
                return(false);
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Flushes a bigram token to output from our buffer
        /// This is the normal case, e.g. ABC -> AB BC
        /// </summary>
        private void flushBigram()
        {
            clearAttributes();
            char[] termBuffer = termAtt.resizeBuffer(4);     // maximum bigram length in code units (2 supplementaries)
            int    len1       = char.toChars(buffer[index], termBuffer, 0);
            int    len2       = len1 + char.toChars(buffer[index + 1], termBuffer, len1);

            termAtt.Length = len2;
            offsetAtt.setOffset(startOffset[index], endOffset[index + 1]);
            typeAtt.Type = DOUBLE_TYPE;
            // when outputting unigrams, all bigrams are synonyms that span two unigrams
            if (outputUnigrams)
            {
                posIncAtt.PositionIncrement = 0;
                posLengthAtt.PositionLength = 2;
            }
            index++;
        }
Ejemplo n.º 3
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
        public override bool incrementToken()
        {
            if (input.incrementToken())
            {
                if (!keywordAttr.Keyword)
                {
                    // this stemmer increases word length by 1: worst case '*çom' -> '*ción'
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int len = termAtt.length();
                    int len = termAtt.length();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.resizeBuffer(len+1), len);
                    int newlen = stemmer.stem(termAtt.resizeBuffer(len + 1), len);
                    termAtt.Length = newlen;
                }
                return(true);
            }
            else
            {
                return(false);
            }
        }