C# (CSharp) ICU4N.Text UnicodeSet.Contains Examples

Programming Language: C# (CSharp)

Namespace/Package Name: ICU4N.Text

Class/Type: UnicodeSet

Method/Function: Contains

Examples at hotexamples.com: 10

C# (CSharp) ICU4N.Text UnicodeSet.Contains - 10 examples found. These are the top rated real world C# (CSharp) examples of ICU4N.Text.UnicodeSet.Contains extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

AddAll(19)

Contains(10)

Add(10)

Freeze(6)

RetainAll(6)

ApplyPattern(5)

Compact(5)

ContainsSome(5)

RemoveAll(4)

ContainsAll(4)

Remove(3)

GetRangeStart(3)

GetRangeEnd(3)

Clear(3)

Complement(2)

ComplementAll(2)

ResemblesPattern(2)

Retain(2)

GetHashCode(1)

Equals(1)

IsProperSubsetOf(1)

IsProperSupersetOf(1)

IsSubsetOf(1)

IsSupersetOf(1)

Overlaps(1)

ApplyInt32PropertyValue(1)

SetEquals(1)

SymmetricExceptWith(1)

Example #1

Show file

        /// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/>
        public override void AddSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet)
        {
            UnicodeSet myFilter = GetFilterAsUnicodeSet(inputFilter);

            if (!myFilter.ContainsAll(UnicodeNameTransliterator.OPEN_DELIM) || !myFilter.Contains(CLOSE_DELIM))
            {
                return; // we have to contain both prefix and suffix
            }
            UnicodeSet items = new UnicodeSet()
                               .AddAll('0', '9')
                               .AddAll('A', 'F')
                               .AddAll('a', 'z')  // for controls
                               .Add('<').Add('>') // for controls
                               .Add('(').Add(')') // for controls
                               .Add('-')
                               .Add(' ')
                               .AddAll(UnicodeNameTransliterator.OPEN_DELIM)
                               .Add(CLOSE_DELIM);

            items.RetainAll(myFilter);
            if (items.Count > 0)
            {
                sourceSet.AddAll(items);
                // could produce any character
                targetSet.AddAll(0, 0x10FFFF);
            }
        }

Example #2

Show file

 private bool IsSubsetOfInternal(UnicodeSet other)
 {
     foreach (var item in this)
     {
         if (!other.Contains(item))
         {
             return(false);
         }
     }
     return(true);
 }

Example #3

Show file

 /// <summary>
 /// Update the set of unhandled characters for the specified breakType to include
 /// all that have the same script as <paramref name="c"/>.
 /// May be called concurrently with <see cref="Handles(int, int)"/> or <see cref="FindBreaks(CharacterIterator, int, int, int, DictionaryBreakEngine.DequeI)"/>.
 /// Must not be called concurrently with itself.
 /// </summary>
 public void HandleChar(int c, int breakType)
 {
     if (breakType >= 0 && breakType < fHandled.Length && c != CharacterIteration.Done32)
     {
         UnicodeSet originalSet = fHandled[breakType];
         if (!originalSet.Contains(c))
         {
             int        script = UChar.GetIntPropertyValue(c, UProperty.Script);
             UnicodeSet newSet = new UnicodeSet();
             newSet.ApplyInt32PropertyValue(UProperty.Script, script);
             newSet.AddAll(originalSet);
             fHandled[breakType] = newSet;
         }
     }
 }

Example #4

Show file

 public int FindBreaks(CharacterIterator text, int startPos, int endPos,
                       int breakType, DictionaryBreakEngine.DequeI foundBreaks)
 {
     if (breakType >= 0 && breakType < fHandled.Length)
     {
         UnicodeSet uniset = fHandled[breakType];
         int        c      = CharacterIteration.Current32(text);
         while (text.Index < endPos && uniset.Contains(c))
         {
             CharacterIteration.Next32(text);
             c = CharacterIteration.Current32(text);
         }
     }
     return(0);
 }

Example #5

Show file

        /// <summary>
        /// Find the source and target sets, subject to the input filter.
        /// There is a known issue with filters containing multiple characters.
        /// </summary>
        // TODO: Problem: the rule is [{ab}]c > x
        // The filter is [a{bc}].
        // If the input is abc, then the rule will work.
        // However, following code applying the filter won't catch that case.
        internal void AddSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet, UnicodeSet revisiting)
        {
            int        limit      = anteContextLength + keyLength;
            UnicodeSet tempSource = new UnicodeSet();
            UnicodeSet temp       = new UnicodeSet();

            // We need to walk through the pattern.
            // Iff some of the characters at ALL of the the positions are matched by the filter, then we add temp to toUnionTo
            for (int i = anteContextLength; i < limit;)
            {
                int ch = UTF16.CharAt(pattern, i);
                i += UTF16.GetCharCount(ch);
                IUnicodeMatcher matcher = data.LookupMatcher(ch);
                if (matcher == null)
                {
                    if (!filter.Contains(ch))
                    {
                        return;
                    }
                    tempSource.Add(ch);
                }
                else
                {
                    try
                    {
                        if (!filter.ContainsSome((UnicodeSet)matcher))
                        {
                            return;
                        }
                        matcher.AddMatchSetTo(tempSource);
                    }
                    catch (InvalidCastException)
                    { // if the matcher is not a UnicodeSet
                        temp.Clear();
                        matcher.AddMatchSetTo(temp);
                        if (!filter.ContainsSome(temp))
                        {
                            return;
                        }
                        tempSource.AddAll(temp);
                    }
                }
            }
            // if we made our way through the gauntlet, add to source/target
            sourceSet.AddAll(tempSource);
            output.AddReplacementSetTo(targetSet);
        }

Example #6

Show file

        //------------------------------------------------------------------------
        //
        //           build          Build the list of non-overlapping character ranges
        //                          from the Unicode Sets.
        //
        //------------------------------------------------------------------------
        internal virtual void Build()
        {
            RangeDescriptor rlRange;

            if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("usets", StringComparison.Ordinal) >= 0)
            {
                PrintSets();
            }

            //  Initialize the process by creating a single range encompassing all characters
            //  that is in no sets.
            //
            fRangeList            = new RangeDescriptor();
            fRangeList.fStartChar = 0;
            fRangeList.fEndChar   = 0x10ffff;

            //
            //  Find the set of non-overlapping ranges of characters
            //
            foreach (RBBINode usetNode in fRB.fUSetNodes)
            {
                UnicodeSet inputSet           = usetNode.fInputSet;
                int        inputSetRangeCount = inputSet.RangeCount;
                int        inputSetRangeIndex = 0;
                rlRange = fRangeList;

                for (; ;)
                {
                    if (inputSetRangeIndex >= inputSetRangeCount)
                    {
                        break;
                    }
                    int inputSetRangeBegin = inputSet.GetRangeStart(inputSetRangeIndex);
                    int inputSetRangeEnd   = inputSet.GetRangeEnd(inputSetRangeIndex);

                    // skip over ranges from the range list that are completely
                    //   below the current range from the input unicode set.
                    while (rlRange.fEndChar < inputSetRangeBegin)
                    {
                        rlRange = rlRange.fNext;
                    }

                    // If the start of the range from the range list is before with
                    //   the start of the range from the unicode set, split the range list range
                    //   in two, with one part being before (wholly outside of) the unicode set
                    //   and the other containing the rest.
                    //   Then continue the loop; the post-split current range will then be skipped
                    //     over
                    if (rlRange.fStartChar < inputSetRangeBegin)
                    {
                        rlRange.Split(inputSetRangeBegin);
                        continue;
                    }

                    // Same thing at the end of the ranges...
                    // If the end of the range from the range list doesn't coincide with
                    //   the end of the range from the unicode set, split the range list
                    //   range in two.  The first part of the split range will be
                    //   wholly inside the Unicode set.
                    if (rlRange.fEndChar > inputSetRangeEnd)
                    {
                        rlRange.Split(inputSetRangeEnd + 1);
                    }

                    // The current rlRange is now entirely within the UnicodeSet range.
                    // Add this unicode set to the list of sets for this rlRange
                    if (rlRange.fIncludesSets.IndexOf(usetNode) == -1)
                    {
                        rlRange.fIncludesSets.Add(usetNode);
                    }

                    // Advance over ranges that we are finished with.
                    if (inputSetRangeEnd == rlRange.fEndChar)
                    {
                        inputSetRangeIndex++;
                    }
                    rlRange = rlRange.fNext;
                }
            }

            if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("range", StringComparison.Ordinal) >= 0)
            {
                PrintRanges();
            }

            //
            //  Group the above ranges, with each group consisting of one or more
            //    ranges that are in exactly the same set of original UnicodeSets.
            //    The groups are numbered, and these group numbers are the set of
            //    input symbols recognized by the run-time state machine.
            //
            //    Numbering: # 0  (state table column 0) is unused.
            //               # 1  is reserved - table column 1 is for end-of-input
            //               # 2  is reserved - table column 2 is for beginning-in-input
            //               # 3  is the first range list.
            //
            RangeDescriptor rlSearchRange;

            for (rlRange = fRangeList; rlRange != null; rlRange = rlRange.fNext)
            {
                for (rlSearchRange = fRangeList; rlSearchRange != rlRange; rlSearchRange = rlSearchRange.fNext)
                {
                    if (ListEqualityComparer <RBBINode> .Default.Equals(rlRange.fIncludesSets, rlSearchRange.fIncludesSets))
                    {
                        rlRange.fNum = rlSearchRange.fNum;
                        break;
                    }
                }
                if (rlRange.fNum == 0)
                {
                    fGroupCount++;
                    rlRange.fNum = fGroupCount + 2;
                    rlRange.SetDictionaryFlag();
                    AddValToSets(rlRange.fIncludesSets, fGroupCount + 2);
                }
            }

            // Handle input sets that contain the special string {eof}.
            //   Column 1 of the state table is reserved for EOF on input.
            //   Column 2 is reserved for before-the-start-input.
            //            (This column can be optimized away later if there are no rule
            //             references to {bof}.)
            //   Add this column value (1 or 2) to the equivalent expression
            //     subtree for each UnicodeSet that contains the string {eof}
            //   Because {bof} and {eof} are not a characters in the normal sense,
            //   they doesn't affect the computation of ranges or TRIE.

            string eofString = "eof";
            string bofString = "bof";

            foreach (RBBINode usetNode in fRB.fUSetNodes)
            {
                UnicodeSet inputSet = usetNode.fInputSet;
                if (inputSet.Contains(eofString))
                {
                    AddValToSet(usetNode, 1);
                }
                if (inputSet.Contains(bofString))
                {
                    AddValToSet(usetNode, 2);
                    fSawBOF = true;
                }
            }


            if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("rgroup", StringComparison.Ordinal) >= 0)
            {
                PrintRangeGroups();
            }
            if (fRB.fDebugEnv != null && fRB.fDebugEnv.IndexOf("esets", StringComparison.Ordinal) >= 0)
            {
                PrintSets();
            }

            fTrie = new Trie2Writable(0,       //   Initial value for all code points.
                                      0);      //   Error value for out-of-range input.

            for (rlRange = fRangeList; rlRange != null; rlRange = rlRange.fNext)
            {
                fTrie.SetRange(
                    rlRange.fStartChar,         // Range start
                    rlRange.fEndChar,           // Range end (inclusive)
                    rlRange.fNum,               // value for range
                    true                        // Overwrite previously written values
                    );
            }
        }

Example #7

Show file

        public override int DivideUpDictionaryRange(CharacterIterator fIter, int rangeStart, int rangeEnd,
                                                    DequeI foundBreaks)
        {
            if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD)
            {
                return(0);  // Not enough characters for word
            }
            int wordsFound = 0;
            int wordLength;
            int current;

            PossibleWord[] words = new PossibleWord[BURMESE_LOOKAHEAD];
            for (int i = 0; i < BURMESE_LOOKAHEAD; i++)
            {
                words[i] = new PossibleWord();
            }
            int uc;

            fIter.SetIndex(rangeStart);
            while ((current = fIter.Index) < rangeEnd)
            {
                wordLength = 0;

                //Look for candidate words at the current position
                int candidates = words[wordsFound % BURMESE_LOOKAHEAD].Candidates(fIter, fDictionary, rangeEnd);

                // If we found exactly one, use that
                if (candidates == 1)
                {
                    wordLength  = words[wordsFound % BURMESE_LOOKAHEAD].AcceptMarked(fIter);
                    wordsFound += 1;
                }

                // If there was more than one, see which one can take us forward the most words
                else if (candidates > 1)
                {
                    bool foundBest = false;
                    // If we're already at the end of the range, we're done
                    if (fIter.Index < rangeEnd)
                    {
                        do
                        {
                            int wordsMatched = 1;
                            if (words[(wordsFound + 1) % BURMESE_LOOKAHEAD].Candidates(fIter, fDictionary, rangeEnd) > 0)
                            {
                                if (wordsMatched < 2)
                                {
                                    // Followed by another dictionary word; mark first word as a good candidate
                                    words[wordsFound % BURMESE_LOOKAHEAD].MarkCurrent();
                                    wordsMatched = 2;
                                }

                                // If we're already at the end of the range, we're done
                                if (fIter.Index >= rangeEnd)
                                {
                                    break;
                                }

                                // See if any of the possible second words is followed by a third word
                                do
                                {
                                    // If we find a third word, stop right away
                                    if (words[(wordsFound + 2) % BURMESE_LOOKAHEAD].Candidates(fIter, fDictionary, rangeEnd) > 0)
                                    {
                                        words[wordsFound % BURMESE_LOOKAHEAD].MarkCurrent();
                                        foundBest = true;
                                        break;
                                    }
                                } while (words[(wordsFound + 1) % BURMESE_LOOKAHEAD].BackUp(fIter));
                            }
                        } while (words[wordsFound % BURMESE_LOOKAHEAD].BackUp(fIter) && !foundBest);
                    }
                    wordLength  = words[wordsFound % BURMESE_LOOKAHEAD].AcceptMarked(fIter);
                    wordsFound += 1;
                }

                // We come here after having either found a word or not. We look ahead to the
                // next word. If it's not a dictionary word, we will combine it with the word we
                // just found (if there is one), but only if the preceding word does not exceed
                // the threshold.
                // The text iterator should now be positioned at the end of the word we found.
                if (fIter.Index < rangeEnd && wordLength < BURMESE_ROOT_COMBINE_THRESHOLD)
                {
                    // If it is a dictionary word, do nothing. If it isn't, then if there is
                    // no preceding word, or the non-word shares less than the minimum threshold
                    // of characters with a dictionary word, then scan to resynchronize
                    if (words[wordsFound % BURMESE_LOOKAHEAD].Candidates(fIter, fDictionary, rangeEnd) <= 0 &&
                        (wordLength == 0 ||
                         words[wordsFound % BURMESE_LOOKAHEAD].LongestPrefix < BURMESE_PREFIX_COMBINE_THRESHOLD))
                    {
                        // Look for a plausible word boundary
                        int remaining = rangeEnd - (current + wordLength);
                        int pc        = fIter.Current;
                        int chars     = 0;
                        for (; ;)
                        {
                            fIter.Next();
                            uc     = fIter.Current;
                            chars += 1;
                            if (--remaining <= 0)
                            {
                                break;
                            }
                            if (fEndWordSet.Contains(pc) && fBeginWordSet.Contains(uc))
                            {
                                // Maybe. See if it's in the dictionary.
                                int candidate = words[(wordsFound + 1) % BURMESE_LOOKAHEAD].Candidates(fIter, fDictionary, rangeEnd);
                                fIter.SetIndex(current + wordLength + chars);
                                if (candidate > 0)
                                {
                                    break;
                                }
                            }
                            pc = uc;
                        }

                        // Bump the word count if there wasn't already one
                        if (wordLength <= 0)
                        {
                            wordsFound += 1;
                        }

                        // Update the length with the passed-over characters
                        wordLength += chars;
                    }
                    else
                    {
                        // Backup to where we were for next iteration
                        fIter.SetIndex(current + wordLength);
                    }
                }

                // Never stop before a combining mark.
                int currPos;
                while ((currPos = fIter.Index) < rangeEnd && fMarkSet.Contains(fIter.Current))
                {
                    fIter.Next();
                    wordLength += fIter.Index - currPos;
                }

                // Look ahead for possible suffixes if a dictionary word does not follow.
                // We do this in code rather than using a rule so that the heuristic
                // resynch continues to function. For example, one of the suffix characters
                // could be a typo in the middle of a word.
                // NOT CURRENTLY APPLICABLE TO BURMESE

                // Did we find a word on this iteration? If so, push it on the break stack
                if (wordLength > 0)
                {
                    foundBreaks.Push(current + wordLength);
                }
            }

            // Don't return a break for the end of the dictionary range if there is one there
            if (foundBreaks.Peek() >= rangeEnd)
            {
                foundBreaks.Pop();
                wordsFound -= 1;
            }

            return(wordsFound);
        }

Example #8

Show file

 public virtual bool Handles(int c, int breakType)
 {
     return(fTypes.SafeGet(breakType) && // this type can use us
            fSet.Contains(c));           // we recognize the character
 }

Example #9

Show file

        /// <summary>
        /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, Position, bool)"/>.
        /// </summary>
        protected override void HandleTransliterate(IReplaceable text,
                                                    Position offsets, bool isIncremental)
        {
            int maxLen = UCharacterName.Instance.MaxCharNameLength + 1; // allow for temporary trailing space

            StringBuffer name = new StringBuffer(maxLen);

            // Get the legal character set
            UnicodeSet legal = new UnicodeSet();

            UCharacterName.Instance.GetCharNameCharacters(legal);

            int cursor = offsets.Start;
            int limit  = offsets.Limit;

            // Modes:
            // 0 - looking for open delimiter
            // 1 - after open delimiter
            int mode    = 0;
            int openPos = -1; // open delim candidate pos

            int c;

            while (cursor < limit)
            {
                c = text.Char32At(cursor);

                switch (mode)
                {
                case 0:   // looking for open delimiter
                    if (c == OPEN_DELIM)
                    {     // quick check first
                        openPos = cursor;
                        int i = Utility.ParsePattern(OPEN_PAT, text, cursor, limit);
                        if (i >= 0 && i < limit)
                        {
                            mode        = 1;
                            name.Length = 0;
                            cursor      = i;
                            continue;     // *** reprocess char32At(cursor)
                        }
                    }
                    break;

                case 1:     // after open delimiter
                            // Look for legal chars.  If \s+ is found, convert it
                            // to a single space.  If closeDelimiter is found, exit
                            // the loop.  If any other character is found, exit the
                            // loop.  If the limit is reached, exit the loop.

                    // Convert \s+ => SPACE.  This assumes there are no
                    // runs of >1 space characters in names.
                    if (PatternProps.IsWhiteSpace(c))
                    {
                        // Ignore leading whitespace
                        if (name.Length > 0 &&
                            name[name.Length - 1] != SPACE)
                        {
                            name.Append(SPACE);
                            // If we are too long then abort.  maxLen includes
                            // temporary trailing space, so use '>'.
                            if (name.Length > maxLen)
                            {
                                mode = 0;
                            }
                        }
                        break;
                    }

                    if (c == CLOSE_DELIM)
                    {
                        int len = name.Length;

                        // Delete trailing space, if any
                        if (len > 0 &&
                            name[len - 1] == SPACE)
                        {
                            name.Length = --len;
                        }

                        c = UCharacter.GetCharFromExtendedName(name.ToString());
                        if (c != -1)
                        {
                            // Lookup succeeded

                            // assert(UTF16.getCharCount(CLOSE_DELIM) == 1);
                            cursor++;     // advance over CLOSE_DELIM

                            string str = UTF16.ValueOf(c);
                            text.Replace(openPos, cursor, str);

                            // Adjust indices for the change in the length of
                            // the string.  Do not assume that str.length() ==
                            // 1, in case of surrogates.
                            int delta = cursor - openPos - str.Length;
                            cursor -= delta;
                            limit  -= delta;
                            // assert(cursor == openPos + str.length());
                        }
                        // If the lookup failed, we leave things as-is and
                        // still switch to mode 0 and continue.
                        mode    = 0;
                        openPos = -1; // close off candidate
                        continue;     // *** reprocess char32At(cursor)
                    }

                    if (legal.Contains(c))
                    {
                        UTF16.Append(name, c);
                        // If we go past the longest possible name then abort.
                        // maxLen includes temporary trailing space, so use '>='.
                        if (name.Length >= maxLen)
                        {
                            mode = 0;
                        }
                    }

                    // Invalid character
                    else
                    {
                        --cursor;     // Backup and reprocess this character
                        mode = 0;
                    }

                    break;
                }

                cursor += UTF16.GetCharCount(c);
            }

            offsets.ContextLimit += limit - offsets.Limit;
            offsets.Limit         = limit;
            // In incremental mode, only advance the cursor up to the last
            // open delimiter candidate.
            offsets.Start = (isIncremental && openPos >= 0) ? openPos : cursor;
        }

Example #10

Show file

        public override int DivideUpDictionaryRange(CharacterIterator inText, int startPos, int endPos,
                                                    DequeI foundBreaks)
        {
            if (startPos >= endPos)
            {
                return(0);
            }

            inText.SetIndex(startPos);

            int inputLength = endPos - startPos;

            int[]        charPositions = new int[inputLength + 1];
            StringBuffer s             = new StringBuffer("");

            inText.SetIndex(startPos);
            while (inText.Index < endPos)
            {
                s.Append(inText.Current);
                inText.Next();
            }
            string prenormstr = s.ToString();

#pragma warning disable 612, 618
            bool isNormalized = Normalizer.QuickCheck(prenormstr, NormalizerMode.NFKC) == QuickCheckResult.Yes ||
                                Normalizer.IsNormalized(prenormstr, NormalizerMode.NFKC, 0);
#pragma warning restore 612, 618
            CharacterIterator text;
            int numChars = 0;
            if (isNormalized)
            {
                text = new StringCharacterIterator(prenormstr);
                int index = 0;
                charPositions[0] = 0;
                while (index < prenormstr.Length)
                {
                    int codepoint = prenormstr.CodePointAt(index);
                    index += Character.CharCount(codepoint);
                    numChars++;
                    charPositions[numChars] = index;
                }
            }
            else
            {
#pragma warning disable 612, 618
                string normStr = Normalizer.Normalize(prenormstr, NormalizerMode.NFKC);
                text          = new StringCharacterIterator(normStr);
                charPositions = new int[normStr.Length + 1];
                Normalizer normalizer = new Normalizer(prenormstr, NormalizerMode.NFKC, 0);
                int        index      = 0;
                charPositions[0] = 0;
                while (index < normalizer.EndIndex)
                {
                    normalizer.Next();
                    numChars++;
                    index = normalizer.Index;
                    charPositions[numChars] = index;
                }
#pragma warning restore 612, 618
            }

            // From here on out, do the algorithm. Note that our indices
            // refer to indices within the normalized string.
            int[] bestSnlp = new int[numChars + 1];
            bestSnlp[0] = 0;
            for (int i = 1; i <= numChars; i++)
            {
                bestSnlp[i] = kint32max;
            }

            int[] prev = new int[numChars + 1];
            for (int i = 0; i <= numChars; i++)
            {
                prev[i] = -1;
            }

            int   maxWordSize = 20;
            int[] values      = new int[numChars];
            int[] lengths     = new int[numChars];
            // dynamic programming to find the best segmentation
            bool is_prev_katakana = false;
            for (int i = 0; i < numChars; i++)
            {
                text.SetIndex(i);
                if (bestSnlp[i] == kint32max)
                {
                    continue;
                }

                int   maxSearchLength = (i + maxWordSize < numChars) ? maxWordSize : (numChars - i);
                int[] count_          = new int[1];
                fDictionary.Matches(text, maxSearchLength, lengths, count_, maxSearchLength, values);
                int count = count_[0];

                // if there are no single character matches found in the dictionary
                // starting with this character, treat character as a 1-character word
                // with the highest value possible (i.e. the least likely to occur).
                // Exclude Korean characters from this treatment, as they should be
                // left together by default.
                text.SetIndex(i);  // fDictionary.matches() advances the text position; undo that.
                if ((count == 0 || lengths[0] != 1) && CharacterIteration.Current32(text) != CharacterIteration.Done32 && !fHangulWordSet.Contains(CharacterIteration.Current32(text)))
                {
                    values[count]  = maxSnlp;
                    lengths[count] = 1;
                    count++;
                }

                for (int j = 0; j < count; j++)
                {
                    int newSnlp = bestSnlp[i] + values[j];
                    if (newSnlp < bestSnlp[lengths[j] + i])
                    {
                        bestSnlp[lengths[j] + i] = newSnlp;
                        prev[lengths[j] + i]     = i;
                    }
                }

                // In Japanese, single-character Katakana words are pretty rare.
                // So we apply the following heuristic to Katakana: any continuous
                // run of Katakana characters is considered a candidate word with
                // a default cost specified in the katakanaCost table according
                // to its length.
                bool is_katakana = IsKatakana(CharacterIteration.Current32(text));
                if (!is_prev_katakana && is_katakana)
                {
                    int j = i + 1;
                    CharacterIteration.Next32(text);
                    while (j < numChars && (j - i) < kMaxKatakanaGroupLength && IsKatakana(CharacterIteration.Current32(text)))
                    {
                        CharacterIteration.Next32(text);
                        ++j;
                    }

                    if ((j - i) < kMaxKatakanaGroupLength)
                    {
                        int newSnlp = bestSnlp[i] + GetKatakanaCost(j - i);
                        if (newSnlp < bestSnlp[j])
                        {
                            bestSnlp[j] = newSnlp;
                            prev[j]     = i;
                        }
                    }
                }
                is_prev_katakana = is_katakana;
            }

            int[] t_boundary = new int[numChars + 1];
            int   numBreaks  = 0;
            if (bestSnlp[numChars] == kint32max)
            {
                t_boundary[numBreaks] = numChars;
                numBreaks++;
            }
            else
            {
                for (int i = numChars; i > 0; i = prev[i])
                {
                    t_boundary[numBreaks] = i;
                    numBreaks++;
                }
                Assert.Assrt(prev[t_boundary[numBreaks - 1]] == 0);
            }

            if (foundBreaks.Count == 0 || foundBreaks.Peek() < startPos)
            {
                t_boundary[numBreaks++] = 0;
            }

            int correctedNumBreaks = 0;
            for (int i = numBreaks - 1; i >= 0; i--)
            {
                int pos = charPositions[t_boundary[i]] + startPos;
                if (!(foundBreaks.Contains(pos) || pos == startPos))
                {
                    foundBreaks.Push(charPositions[t_boundary[i]] + startPos);
                    correctedNumBreaks++;
                }
            }

            if (!foundBreaks.IsEmpty && foundBreaks.Peek() == endPos)
            {
                foundBreaks.Pop();
                correctedNumBreaks--;
            }
            if (!foundBreaks.IsEmpty)
            {
                inText.SetIndex(foundBreaks.Peek());
            }
            return(correctedNumBreaks);
        }