예제 #1
0
 public override bool IncrementToken()
 {
     if (input.IncrementToken())
     {
         var collationKey  = collator.GetCollationKey(termAtt.ToString()).toByteArray();
         int encodedLength = IndexableBinaryStringTools.getEncodedLength(collationKey, 0, collationKey.Length);
         termAtt.resizeBuffer(encodedLength);
         termAtt.Length = encodedLength;
         IndexableBinaryStringTools.encode(collationKey, 0, collationKey.Length, termAtt.buffer(), 0, encodedLength);
         return(true);
     }
     else
     {
         return(false);
     }
 }
예제 #2
0
        // ================================================= Helper Methods ================================================

        /// <summary>
        /// Writes the joined unhyphenated term
        /// </summary>
        private void unhyphenate()
        {
            restoreState(savedState);
            savedState = null;

            char[] term   = termAttribute.buffer();
            int    length = hyphenated.Length;

            if (length > termAttribute.length())
            {
                term = termAttribute.resizeBuffer(length);
            }

            hyphenated.getChars(0, length, term, 0);
            termAttribute.Length = length;
            offsetAttribute.setOffset(offsetAttribute.startOffset(), lastEndOffset);
            hyphenated.Length = 0;
        }
예제 #3
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
        public override bool incrementToken()
        {
            if (input.incrementToken())
            {
                int len = termAtt.length();
                if (marker != NOMARKER)
                {
                    len++;
                    termAtt.resizeBuffer(len);
                    termAtt.buffer()[len - 1] = marker;
                }
                reverse(matchVersion, termAtt.buffer(), 0, len);
                termAtt.Length = len;
                return(true);
            }
            else
            {
                return(false);
            }
        }
예제 #4
0
 private void init(Version version, int minGram, int maxGram, bool edgesOnly)
 {
     if (!version.onOrAfter(Version.LUCENE_44))
     {
         throw new System.ArgumentException("This class only works with Lucene 4.4+. To emulate the old (broken) behavior of NGramTokenizer, use Lucene43NGramTokenizer/Lucene43EdgeNGramTokenizer");
     }
     charUtils = version.onOrAfter(Version.LUCENE_44) ? CharacterUtils.getInstance(version) : CharacterUtils.Java4Instance;
     if (minGram < 1)
     {
         throw new System.ArgumentException("minGram must be greater than zero");
     }
     if (minGram > maxGram)
     {
         throw new System.ArgumentException("minGram must not be greater than maxGram");
     }
     this.minGram   = minGram;
     this.maxGram   = maxGram;
     this.edgesOnly = edgesOnly;
     charBuffer     = CharacterUtils.newCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader
     buffer         = new int[charBuffer.Buffer.Length];
     // Make the term att large enough
     termAtt.resizeBuffer(2 * maxGram);
 }
예제 #5
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
        public override bool incrementToken()
        {
            if (input.incrementToken())
            {
                int    state  = N;
                char[] buffer = termAtt.buffer();
                int    length = termAtt.length();
                for (int i = 0; i < length; i++)
                {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final char c = buffer[i];
                    char c = buffer[i];
                    switch (c)
                    {
                    case 'a':
                    case 'o':
                        state = U;
                        break;

                    case 'u':
                        state = (state == N) ? U : V;
                        break;

                    case 'e':
                        if (state == U)
                        {
                            length = StemmerUtil.delete(buffer, i--, length);
                        }
                        state = V;
                        break;

                    case 'i':
                    case 'q':
                    case 'y':
                        state = V;
                        break;

                    case 'ä':
                        buffer[i] = 'a';
                        state     = V;
                        break;

                    case 'ö':
                        buffer[i] = 'o';
                        state     = V;
                        break;

                    case 'ü':
                        buffer[i] = 'u';
                        state     = V;
                        break;

                    case 'ß':
                        buffer[i++] = 's';
                        buffer      = termAtt.resizeBuffer(1 + length);
                        if (i < length)
                        {
                            Array.Copy(buffer, i, buffer, i + 1, (length - i));
                        }
                        buffer[i] = 's';
                        length++;
                        state = N;
                        break;

                    default:
                        state = N;
                        break;
                    }
                }
                termAtt.Length = length;
                return(true);
            }
            else
            {
                return(false);
            }
        }