public void setLocale(icu.Locale locale) { UErrorCode status = U_ZERO_ERROR; mBreakIterator.reset(icu.BreakIterator.createLineInstance(locale, status)); // TODO: handle failure status if (mText != null) { mBreakIterator.setText(mUText, status); } mIteratorWasReset = true; }
//C++ TO C# CONVERTER WARNING: The original C++ declaration of the following method implementation was not found: public void hyphenate(vector <HyphenationType> result, UInt16 word, int len, icu.Locale locale) { result.clear(); result.resize(len); int paddedLen = len + 2; // start and stop code each count for 1 if (patternData != null && len >= minPrefix + minSuffix && paddedLen <= MAX_HYPHENATED_SIZE) { UInt16[] alpha_codes = Arrays.InitializeWithDefaultInstances <UInt16>(MAX_HYPHENATED_SIZE); HyphenationType hyphenValue = alphabetLookup(alpha_codes, word, len); if (hyphenValue != HyphenationType.DONT_BREAK) { hyphenateFromCodes(result.data(), alpha_codes, paddedLen, hyphenValue); return; } // TODO: try NFC normalization // TODO: handle non-BMP Unicode (requires remapping of offsets) } // Note that we will always get here if the word contains a hyphen or a soft // hyphen, because the alphabet is not expected to contain a hyphen or a soft // hyphen character, so alphabetLookup would return DONT_BREAK. hyphenateWithNoPatterns(result.data(), word, len, locale); }
public const int kTab_Shift = 29; // keep synchronized with TAB_MASK in StaticLayout.java // Note: Locale persists across multiple invocations (it is not cleaned up by // finish()), explicitly to avoid the cost of creating ICU BreakIterator // objects. It should always be set on the first invocation, but callers are // encouraged not to call again unless locale has actually changed. That logic // could be here but it's better for performance that it's upstream because of // the cost of constructing and comparing the ICU Locale object. // Note: caller is responsible for managing lifetime of hyphenator public void setLocale(icu.Locale locale, Hyphenator hyphenator) { mWordBreaker.setLocale(locale); mLocale = locale; mHyphenator = hyphenator; }
// apply various hyphenation rules including hard and soft hyphens, ignoring // patterns // Use various recommendations of UAX #14 Unicode Line Breaking Algorithm for // hyphenating words that didn't match patterns, especially words that contain // hyphens or soft hyphens (See sections 5.3, Use of Hyphen, and 5.4, Use of // Soft Hyphen). private void hyphenateWithNoPatterns(HyphenationType[] result, UInt16[] word, int len, icu.Locale locale) { result[0] = HyphenationType.DONT_BREAK; for (int i = 1; i < len; i++) { UInt16 prevChar = word[i - 1]; if (i > 1 && isLineBreakingHyphen(new UInt16(prevChar))) { // Break after hyphens, but only if they don't start the word. if ((prevChar == GlobalMembers.CHAR_HYPHEN_MINUS || prevChar == GlobalMembers.CHAR_HYPHEN) && string.Compare(locale.getLanguage(), "pl") == 0 && minikin.GlobalMembers.getScript(word[i]) == USCRIPT_LATIN) { // In Polish, hyphens get repeated at the next line. To be safe, // we will do this only if the next character is Latin. result[i] = HyphenationType.BREAK_AND_INSERT_HYPHEN_AT_NEXT_LINE; } else { result[i] = HyphenationType.BREAK_AND_DONT_INSERT_HYPHEN; } } else if (i > 1 && prevChar == GlobalMembers.CHAR_SOFT_HYPHEN) { // Break after soft hyphens, but only if they don't start the word (a soft // hyphen starting the word doesn't give any useful break opportunities). // The type of the break is based on the script of the character we break // on. if (minikin.GlobalMembers.getScript(word[i]) == USCRIPT_ARABIC) { // For Arabic, we need to look and see if the characters around the soft // hyphen actually join. If they don't, we'll just insert a normal // hyphen. result[i] = minikin.GlobalMembers.getHyphTypeForArabic(new UInt16(word), len, i); } else { result[i] = minikin.GlobalMembers.hyphenationTypeBasedOnScript(word[i]); } } else if (prevChar == GlobalMembers.CHAR_MIDDLE_DOT && minPrefix < i && i <= len - minSuffix && ((word[i - 2] == 'l' && word[i] == 'l') || (word[i - 2] == 'L' && word[i] == 'L')) && string.Compare(locale.getLanguage(), "ca") == 0) { // In Catalan, "l·l" should break as "l-" on the first line // and "l" on the next line. result[i] = HyphenationType.BREAK_AND_REPLACE_WITH_HYPHEN; } else { result[i] = HyphenationType.DONT_BREAK; } } }