Exemplo n.º 1
0
        public void setLocale(icu.Locale locale)
        {
            UErrorCode status = U_ZERO_ERROR;

            mBreakIterator.reset(icu.BreakIterator.createLineInstance(locale, status));
            // TODO: handle failure status
            if (mText != null)
            {
                mBreakIterator.setText(mUText, status);
            }
            mIteratorWasReset = true;
        }
Exemplo n.º 2
0
//C++ TO C# CONVERTER WARNING: The original C++ declaration of the following method implementation was not found:
        public void hyphenate(vector <HyphenationType> result, UInt16 word, int len, icu.Locale locale)
        {
            result.clear();
            result.resize(len);
            int paddedLen = len + 2;       // start and stop code each count for 1

            if (patternData != null && len >= minPrefix + minSuffix && paddedLen <= MAX_HYPHENATED_SIZE)
            {
                UInt16[]        alpha_codes = Arrays.InitializeWithDefaultInstances <UInt16>(MAX_HYPHENATED_SIZE);
                HyphenationType hyphenValue = alphabetLookup(alpha_codes, word, len);
                if (hyphenValue != HyphenationType.DONT_BREAK)
                {
                    hyphenateFromCodes(result.data(), alpha_codes, paddedLen, hyphenValue);
                    return;
                }
                // TODO: try NFC normalization
                // TODO: handle non-BMP Unicode (requires remapping of offsets)
            }
            // Note that we will always get here if the word contains a hyphen or a soft
            // hyphen, because the alphabet is not expected to contain a hyphen or a soft
            // hyphen character, so alphabetLookup would return DONT_BREAK.
            hyphenateWithNoPatterns(result.data(), word, len, locale);
        }
Exemplo n.º 3
0
        public const int kTab_Shift = 29; // keep synchronized with TAB_MASK in StaticLayout.java

        // Note: Locale persists across multiple invocations (it is not cleaned up by
        // finish()), explicitly to avoid the cost of creating ICU BreakIterator
        // objects. It should always be set on the first invocation, but callers are
        // encouraged not to call again unless locale has actually changed. That logic
        // could be here but it's better for performance that it's upstream because of
        // the cost of constructing and comparing the ICU Locale object.
        // Note: caller is responsible for managing lifetime of hyphenator
        public void setLocale(icu.Locale locale, Hyphenator hyphenator)
        {
            mWordBreaker.setLocale(locale);
            mLocale     = locale;
            mHyphenator = hyphenator;
        }
Exemplo n.º 4
0
        // apply various hyphenation rules including hard and soft hyphens, ignoring
        // patterns

        // Use various recommendations of UAX #14 Unicode Line Breaking Algorithm for
        // hyphenating words that didn't match patterns, especially words that contain
        // hyphens or soft hyphens (See sections 5.3, Use of Hyphen, and 5.4, Use of
        // Soft Hyphen).
        private void hyphenateWithNoPatterns(HyphenationType[] result, UInt16[] word, int len, icu.Locale locale)
        {
            result[0] = HyphenationType.DONT_BREAK;
            for (int i = 1; i < len; i++)
            {
                UInt16 prevChar = word[i - 1];
                if (i > 1 && isLineBreakingHyphen(new UInt16(prevChar)))
                {
                    // Break after hyphens, but only if they don't start the word.

                    if ((prevChar == GlobalMembers.CHAR_HYPHEN_MINUS || prevChar == GlobalMembers.CHAR_HYPHEN) && string.Compare(locale.getLanguage(), "pl") == 0 && minikin.GlobalMembers.getScript(word[i]) == USCRIPT_LATIN)
                    {
                        // In Polish, hyphens get repeated at the next line. To be safe,
                        // we will do this only if the next character is Latin.
                        result[i] = HyphenationType.BREAK_AND_INSERT_HYPHEN_AT_NEXT_LINE;
                    }
                    else
                    {
                        result[i] = HyphenationType.BREAK_AND_DONT_INSERT_HYPHEN;
                    }
                }
                else if (i > 1 && prevChar == GlobalMembers.CHAR_SOFT_HYPHEN)
                {
                    // Break after soft hyphens, but only if they don't start the word (a soft
                    // hyphen starting the word doesn't give any useful break opportunities).
                    // The type of the break is based on the script of the character we break
                    // on.
                    if (minikin.GlobalMembers.getScript(word[i]) == USCRIPT_ARABIC)
                    {
                        // For Arabic, we need to look and see if the characters around the soft
                        // hyphen actually join. If they don't, we'll just insert a normal
                        // hyphen.
                        result[i] = minikin.GlobalMembers.getHyphTypeForArabic(new UInt16(word), len, i);
                    }
                    else
                    {
                        result[i] = minikin.GlobalMembers.hyphenationTypeBasedOnScript(word[i]);
                    }
                }
                else if (prevChar == GlobalMembers.CHAR_MIDDLE_DOT && minPrefix < i && i <= len - minSuffix && ((word[i - 2] == 'l' && word[i] == 'l') || (word[i - 2] == 'L' && word[i] == 'L')) && string.Compare(locale.getLanguage(), "ca") == 0)
                {
                    // In Catalan, "l·l" should break as "l-" on the first line
                    // and "l" on the next line.
                    result[i] = HyphenationType.BREAK_AND_REPLACE_WITH_HYPHEN;
                }
                else
                {
                    result[i] = HyphenationType.DONT_BREAK;
                }
            }
        }