C# (CSharp) ICU4N.Text DictionaryMatcher.Matches示例

编程语言: C# (CSharp)

命名空间/包名称: ICU4N.Text

方法/功能: Matches

hotexamples.com的示例: 2

C# (CSharp) ICU4N.Text DictionaryMatcher.Matches - 已找到2个示例。这些是从开源项目中提取的最受好评的ICU4N.Text.DictionaryMatcher.Matches现实C# (CSharp)示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

Matches(2)

示例#1

显示文件

            // Fill the list of candidates if needed, select the longest, and return the number found
            public virtual int Candidates(CharacterIterator fIter, DictionaryMatcher dict, int rangeEnd)
            {
                int start = fIter.Index;

                if (start != offset)
                {
                    offset = start;
                    prefix = dict.Matches(fIter, rangeEnd - start, lengths, count, lengths.Length);
                    // Dictionary leaves text after longest prefix, not longest word. Back up.
                    if (count[0] <= 0)
                    {
                        fIter.SetIndex(start);
                    }
                }
                if (count[0] > 0)
                {
                    fIter.SetIndex(start + lengths[count[0] - 1]);
                }
                current = count[0] - 1;
                mark    = current;
                return(count[0]);
            }

示例#2

显示文件

        public override int DivideUpDictionaryRange(CharacterIterator inText, int startPos, int endPos,
                                                    DequeI foundBreaks)
        {
            if (startPos >= endPos)
            {
                return(0);
            }

            inText.SetIndex(startPos);

            int inputLength = endPos - startPos;

            int[]        charPositions = new int[inputLength + 1];
            StringBuffer s             = new StringBuffer("");

            inText.SetIndex(startPos);
            while (inText.Index < endPos)
            {
                s.Append(inText.Current);
                inText.Next();
            }
            string prenormstr = s.ToString();

#pragma warning disable 612, 618
            bool isNormalized = Normalizer.QuickCheck(prenormstr, NormalizerMode.NFKC) == QuickCheckResult.Yes ||
                                Normalizer.IsNormalized(prenormstr, NormalizerMode.NFKC, 0);
#pragma warning restore 612, 618
            CharacterIterator text;
            int numChars = 0;
            if (isNormalized)
            {
                text = new StringCharacterIterator(prenormstr);
                int index = 0;
                charPositions[0] = 0;
                while (index < prenormstr.Length)
                {
                    int codepoint = prenormstr.CodePointAt(index);
                    index += Character.CharCount(codepoint);
                    numChars++;
                    charPositions[numChars] = index;
                }
            }
            else
            {
#pragma warning disable 612, 618
                string normStr = Normalizer.Normalize(prenormstr, NormalizerMode.NFKC);
                text          = new StringCharacterIterator(normStr);
                charPositions = new int[normStr.Length + 1];
                Normalizer normalizer = new Normalizer(prenormstr, NormalizerMode.NFKC, 0);
                int        index      = 0;
                charPositions[0] = 0;
                while (index < normalizer.EndIndex)
                {
                    normalizer.Next();
                    numChars++;
                    index = normalizer.Index;
                    charPositions[numChars] = index;
                }
#pragma warning restore 612, 618
            }

            // From here on out, do the algorithm. Note that our indices
            // refer to indices within the normalized string.
            int[] bestSnlp = new int[numChars + 1];
            bestSnlp[0] = 0;
            for (int i = 1; i <= numChars; i++)
            {
                bestSnlp[i] = kint32max;
            }

            int[] prev = new int[numChars + 1];
            for (int i = 0; i <= numChars; i++)
            {
                prev[i] = -1;
            }

            int   maxWordSize = 20;
            int[] values      = new int[numChars];
            int[] lengths     = new int[numChars];
            // dynamic programming to find the best segmentation
            bool is_prev_katakana = false;
            for (int i = 0; i < numChars; i++)
            {
                text.SetIndex(i);
                if (bestSnlp[i] == kint32max)
                {
                    continue;
                }

                int   maxSearchLength = (i + maxWordSize < numChars) ? maxWordSize : (numChars - i);
                int[] count_          = new int[1];
                fDictionary.Matches(text, maxSearchLength, lengths, count_, maxSearchLength, values);
                int count = count_[0];

                // if there are no single character matches found in the dictionary
                // starting with this character, treat character as a 1-character word
                // with the highest value possible (i.e. the least likely to occur).
                // Exclude Korean characters from this treatment, as they should be
                // left together by default.
                text.SetIndex(i);  // fDictionary.matches() advances the text position; undo that.
                if ((count == 0 || lengths[0] != 1) && CharacterIteration.Current32(text) != CharacterIteration.Done32 && !fHangulWordSet.Contains(CharacterIteration.Current32(text)))
                {
                    values[count]  = maxSnlp;
                    lengths[count] = 1;
                    count++;
                }

                for (int j = 0; j < count; j++)
                {
                    int newSnlp = bestSnlp[i] + values[j];
                    if (newSnlp < bestSnlp[lengths[j] + i])
                    {
                        bestSnlp[lengths[j] + i] = newSnlp;
                        prev[lengths[j] + i]     = i;
                    }
                }

                // In Japanese, single-character Katakana words are pretty rare.
                // So we apply the following heuristic to Katakana: any continuous
                // run of Katakana characters is considered a candidate word with
                // a default cost specified in the katakanaCost table according
                // to its length.
                bool is_katakana = IsKatakana(CharacterIteration.Current32(text));
                if (!is_prev_katakana && is_katakana)
                {
                    int j = i + 1;
                    CharacterIteration.Next32(text);
                    while (j < numChars && (j - i) < kMaxKatakanaGroupLength && IsKatakana(CharacterIteration.Current32(text)))
                    {
                        CharacterIteration.Next32(text);
                        ++j;
                    }

                    if ((j - i) < kMaxKatakanaGroupLength)
                    {
                        int newSnlp = bestSnlp[i] + GetKatakanaCost(j - i);
                        if (newSnlp < bestSnlp[j])
                        {
                            bestSnlp[j] = newSnlp;
                            prev[j]     = i;
                        }
                    }
                }
                is_prev_katakana = is_katakana;
            }

            int[] t_boundary = new int[numChars + 1];
            int   numBreaks  = 0;
            if (bestSnlp[numChars] == kint32max)
            {
                t_boundary[numBreaks] = numChars;
                numBreaks++;
            }
            else
            {
                for (int i = numChars; i > 0; i = prev[i])
                {
                    t_boundary[numBreaks] = i;
                    numBreaks++;
                }
                Assert.Assrt(prev[t_boundary[numBreaks - 1]] == 0);
            }

            if (foundBreaks.Count == 0 || foundBreaks.Peek() < startPos)
            {
                t_boundary[numBreaks++] = 0;
            }

            int correctedNumBreaks = 0;
            for (int i = numBreaks - 1; i >= 0; i--)
            {
                int pos = charPositions[t_boundary[i]] + startPos;
                if (!(foundBreaks.Contains(pos) || pos == startPos))
                {
                    foundBreaks.Push(charPositions[t_boundary[i]] + startPos);
                    correctedNumBreaks++;
                }
            }

            if (!foundBreaks.IsEmpty && foundBreaks.Peek() == endPos)
            {
                foundBreaks.Pop();
                correctedNumBreaks--;
            }
            if (!foundBreaks.IsEmpty)
            {
                inText.SetIndex(foundBreaks.Peek());
            }
            return(correctedNumBreaks);
        }