示例#1
0
            /// <summary>
            /// Adds an input string and it's stemmer override output to this builder.
            /// </summary>
            /// <param name="input"> the input char sequence </param>
            /// <param name="output"> the stemmer override output char sequence </param>
            /// <returns> <code>false</code> iff the input has already been added to this builder otherwise <code>true</code>. </returns>
            public virtual bool add(ICharSequence input, ICharSequence output)
            {
                int length = input.length();

                if (ignoreCase)
                {
                    // convert on the fly to lowercase
                    charsSpare.grow(length);
                    char[] buffer = charsSpare.chars;
                    for (int i = 0; i < length;)
                    {
                        i += char.toChars(char.ToLower(char.codePointAt(input, i)), buffer, i);
                    }
                    UnicodeUtil.UTF16toUTF8(buffer, 0, length, spare);
                }
                else
                {
                    UnicodeUtil.UTF16toUTF8(input, 0, length, spare);
                }
                if (hash.add(spare) >= 0)
                {
                    outputValues.Add(output);
                    return(true);
                }
                return(false);
            }
            /// <summary>
            /// Adds an input string and it's stemmer override output to this builder.
            /// </summary>
            /// <param name="input"> the input char sequence </param>
            /// <param name="output"> the stemmer override output char sequence </param>
            /// <returns> <code>false</code> iff the input has already been added to this builder otherwise <code>true</code>. </returns>
            public virtual bool add(CharSequence input, CharSequence output)
            {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int length = input.length();
                int length = input.length();

                if (ignoreCase)
                {
                    // convert on the fly to lowercase
                    charsSpare.grow(length);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final char[] buffer = charsSpare.chars;
                    char[] buffer = charsSpare.chars;
                    for (int i = 0; i < length;)
                    {
                        i += char.toChars(char.ToLower(char.codePointAt(input, i)), buffer, i);
                    }
                    UnicodeUtil.UTF16toUTF8(buffer, 0, length, spare);
                }
                else
                {
                    UnicodeUtil.UTF16toUTF8(input, 0, length, spare);
                }
                if (hash.add(spare) >= 0)
                {
                    outputValues.Add(output);
                    return(true);
                }
                return(false);
            }
示例#3
0
            // NOTE: while it's tempting to make this public, since
            // caller's parser likely knows the
            // numInput/numOutputWords, sneaky exceptions, much later
            // on, will result if these values are wrong; so we always
            // recompute ourselves to be safe:
            internal virtual void add(CharsRef input, int numInputWords, CharsRef output, int numOutputWords, bool includeOrig)
            {
                // first convert to UTF-8
                if (numInputWords <= 0)
                {
                    throw new System.ArgumentException("numInputWords must be > 0 (got " + numInputWords + ")");
                }
                if (input.length <= 0)
                {
                    throw new System.ArgumentException("input.length must be > 0 (got " + input.length + ")");
                }
                if (numOutputWords <= 0)
                {
                    throw new System.ArgumentException("numOutputWords must be > 0 (got " + numOutputWords + ")");
                }
                if (output.length <= 0)
                {
                    throw new System.ArgumentException("output.length must be > 0 (got " + output.length + ")");
                }

                Debug.Assert(!hasHoles(input), "input has holes: " + input);
                Debug.Assert(!hasHoles(output), "output has holes: " + output);

                //System.out.println("fmap.add input=" + input + " numInputWords=" + numInputWords + " output=" + output + " numOutputWords=" + numOutputWords);
                UnicodeUtil.UTF16toUTF8(output.chars, output.offset, output.length, utf8Scratch);
                // lookup in hash
                int ord = words.add(utf8Scratch);

                if (ord < 0)
                {
                    // already exists in our hash
                    ord = (-ord) - 1;
                    //System.out.println("  output=" + output + " old ord=" + ord);
                }
                else
                {
                    //System.out.println("  output=" + output + " new ord=" + ord);
                }

                MapEntry e = workingSet[input];

                if (e == null)
                {
                    e = new MapEntry();
                    workingSet[CharsRef.deepCopyOf(input)] = e;     // make a copy, since we will keep around in our map
                }

                e.ords.Add(ord);
                e.includeOrig       |= includeOrig;
                maxHorizontalContext = Math.Max(maxHorizontalContext, numInputWords);
                maxHorizontalContext = Math.Max(maxHorizontalContext, numOutputWords);
            }