public static uint editForThisLine(HyphenationType type) { switch (type) { case HyphenationType.DONT_BREAK: return(NO_EDIT); case HyphenationType.BREAK_AND_INSERT_HYPHEN: return(INSERT_HYPHEN_AT_END); case HyphenationType.BREAK_AND_INSERT_ARMENIAN_HYPHEN: return(INSERT_ARMENIAN_HYPHEN_AT_END); case HyphenationType.BREAK_AND_INSERT_MAQAF: return(INSERT_MAQAF_AT_END); case HyphenationType.BREAK_AND_INSERT_UCAS_HYPHEN: return(INSERT_UCAS_HYPHEN_AT_END); case HyphenationType.BREAK_AND_REPLACE_WITH_HYPHEN: return(REPLACE_WITH_HYPHEN_AT_END); case HyphenationType.BREAK_AND_INSERT_HYPHEN_AND_ZWJ: return(INSERT_ZWJ_AND_HYPHEN_AT_END); default: return(BREAK_AT_END); } }
public static uint editForNextLine(HyphenationType type) { switch (type) { case HyphenationType.DONT_BREAK: return(NO_EDIT); case HyphenationType.BREAK_AND_INSERT_HYPHEN_AT_NEXT_LINE: return(INSERT_HYPHEN_AT_START); case HyphenationType.BREAK_AND_INSERT_HYPHEN_AND_ZWJ: return(INSERT_ZWJ_AT_START); default: return(BREAK_AT_START); } }
//C++ TO C# CONVERTER WARNING: The original C++ declaration of the following method implementation was not found: public void hyphenate(vector <HyphenationType> result, UInt16 word, int len, icu.Locale locale) { result.clear(); result.resize(len); int paddedLen = len + 2; // start and stop code each count for 1 if (patternData != null && len >= minPrefix + minSuffix && paddedLen <= MAX_HYPHENATED_SIZE) { UInt16[] alpha_codes = Arrays.InitializeWithDefaultInstances <UInt16>(MAX_HYPHENATED_SIZE); HyphenationType hyphenValue = alphabetLookup(alpha_codes, word, len); if (hyphenValue != HyphenationType.DONT_BREAK) { hyphenateFromCodes(result.data(), alpha_codes, paddedLen, hyphenValue); return; } // TODO: try NFC normalization // TODO: handle non-BMP Unicode (requires remapping of offsets) } // Note that we will always get here if the word contains a hyphen or a soft // hyphen, because the alphabet is not expected to contain a hyphen or a soft // hyphen character, so alphabetLookup would return DONT_BREAK. hyphenateWithNoPatterns(result.data(), word, len, locale); }
public void addWordBreak(int offset, ParaWidth preBreak, ParaWidth postBreak, int preSpaceCount, int postSpaceCount, float penalty, HyphenationType hyph) { Candidate cand = new Candidate(); ParaWidth width = mCandidates.back().preBreak; if (postBreak - width > currentLineWidth() != null) { // Add desperate breaks. // Note: these breaks are based on the shaping of the (non-broken) original // text; they are imprecise especially in the presence of kerning, // ligatures, and Arabic shaping. int i = mCandidates.back().offset; width += mCharWidths[i++]; for (; i < offset; i++) { float w = mCharWidths[i]; if (w > 0F) { cand.offset = i; cand.preBreak = width; cand.postBreak = width; // postSpaceCount doesn't include trailing spaces cand.preSpaceCount = postSpaceCount; cand.postSpaceCount = postSpaceCount; cand.penalty = SCORE_DESPERATE; cand.hyphenType = HyphenationType.BREAK_AND_DONT_INSERT_HYPHEN; #if VERBOSE_DEBUG ALOGD("desperate cand: %zd %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak); #endif addCandidate(cand); width += w; } } } cand.offset = offset; cand.preBreak = preBreak; cand.postBreak = postBreak; cand.penalty = penalty; cand.preSpaceCount = preSpaceCount; cand.postSpaceCount = postSpaceCount; cand.hyphenType = hyph; #if VERBOSE_DEBUG ALOGD("cand: %zd %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak); #endif addCandidate(cand); }
// TODO: this class is actually fairly close to being general and not tied to // using Minikin to do the shaping of the strings. The main thing that would // need to be changed is having some kind of callback (or virtual class, or // maybe even template), which could easily be instantiated with Minikin's // Layout. Future work for when needed. public float addStyleRun(MinikinPaint paint, FontCollection typeface, FontStyle style, int start, int end, bool isRtl) { float width = 0.0f; int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR; float hyphenPenalty = 0.0F; if (paint != null) { width = Layout.measureText(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags, style, paint, typeface, mCharWidths.data() + start); // a heuristic that seems to perform well hyphenPenalty = 0.5 * paint.size * paint.scaleX * mLineWidths.getLineWidth(0); if (mHyphenationFrequency == kHyphenationFrequency_Normal) { hyphenPenalty *= 4.0; // TODO: Replace with a better value after some testing } if (mJustified) { // Make hyphenation more aggressive for fully justified text (so that // "normal" in justified mode is the same as "full" in ragged-right). hyphenPenalty *= 0.25; } else { // Line penalty is zero for justified text. mLinePenalty = Math.Max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER); } } int current = (int)mWordBreaker.current(); int afterWord = start; int lastBreak = start; ParaWidth lastBreakWidth = mWidth; ParaWidth postBreak = mWidth; int postSpaceCount = mSpaceCount; for (int i = start; i < end; i++) { UInt16 c = mTextBuf[i]; if (c == CHAR_TAB) { mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak); if (mFirstTabIndex == INT_MAX) { mFirstTabIndex = (int)i; } // fall back to greedy; other modes don't know how to deal with tabs mStrategy = kBreakStrategy_Greedy; } else { if (isWordSpace(new UInt16(c))) { mSpaceCount += 1; } mWidth += mCharWidths[i]; if (!isLineEndSpace(new UInt16(c))) { postBreak = mWidth; postSpaceCount = mSpaceCount; //C++ TO C# CONVERTER TODO TASK: The following line was determined to be a copy assignment (rather than a reference assignment) - this should be verified and a 'CopyFrom' method should be created: //ORIGINAL LINE: afterWord = i + 1; afterWord.CopyFrom(i + 1); } } if (i + 1 == current != null) { int wordStart = mWordBreaker.wordStart(); int wordEnd = mWordBreaker.wordEnd(); if (paint != null && mHyphenator != null && mHyphenationFrequency != kHyphenationFrequency_None && wordStart >= start != null && wordEnd > wordStart && wordEnd - wordStart <= LONGEST_HYPHENATED_WORD) { mHyphenator.hyphenate(mHyphBuf, mTextBuf[wordStart], wordEnd - wordStart, mLocale); #if VERBOSE_DEBUG string hyphenatedString; for (int j = wordStart; j < wordEnd; j++) { if (mHyphBuf[j - wordStart] == HyphenationType.BREAK_AND_INSERT_HYPHEN) { hyphenatedString.push_back('-'); } // Note: only works with ASCII, should do UTF-8 conversion here hyphenatedString.push_back(buffer()[j]); } ALOGD("hyphenated string: %s", hyphenatedString); #endif // measure hyphenated substrings for (int j = wordStart; j < wordEnd; j++) { HyphenationType hyph = mHyphBuf[j - wordStart]; if (hyph != HyphenationType.DONT_BREAK) { paint.hyphenEdit = HyphenEdit.editForThisLine(hyph); float firstPartWidth = Layout.measureText(mTextBuf.data(), lastBreak, j - lastBreak, mTextBuf.size(), bidiFlags, style, paint, typeface, null); ParaWidth hyphPostBreak = lastBreakWidth + firstPartWidth; paint.hyphenEdit = HyphenEdit.editForNextLine(hyph); float secondPartWidth = Layout.measureText(mTextBuf.data(), j, afterWord - j, mTextBuf.size(), bidiFlags, style, paint, typeface, null); ParaWidth hyphPreBreak = postBreak - secondPartWidth; addWordBreak(j, hyphPreBreak, hyphPostBreak, postSpaceCount, postSpaceCount, hyphenPenalty, hyph); paint.hyphenEdit = HyphenEdit.NO_EDIT; } } } // Skip break for zero-width characters inside replacement span if (paint != null || current == end || mCharWidths[current] > 0) { float penalty = hyphenPenalty * mWordBreaker.breakBadness(); addWordBreak(current, mWidth, postBreak, mSpaceCount, postSpaceCount, penalty, HyphenationType.DONT_BREAK); } //C++ TO C# CONVERTER TODO TASK: The following line was determined to be a copy assignment (rather than a reference assignment) - this should be verified and a 'CopyFrom' method should be created: //ORIGINAL LINE: lastBreak = current; lastBreak.CopyFrom(current); lastBreakWidth = mWidth; //C++ TO C# CONVERTER TODO TASK: The following line was determined to be a copy assignment (rather than a reference assignment) - this should be verified and a 'CopyFrom' method should be created: //ORIGINAL LINE: current = (int)mWordBreaker.next(); current.CopyFrom((int)mWordBreaker.next()); } } return(width); }
// calculate hyphenation from patterns, assuming alphabet lookup has already // been done /** * Internal implementation, after conversion to codes. All case folding and * normalization has been done by now, and all characters have been found in the * alphabet. Note: len here is the padded length including 0 codes at start and * end. **/ private void hyphenateFromCodes(HyphenationType[] result, UInt16[] codes, int len, HyphenationType hyphenValue) { //C++ TO C# CONVERTER TODO TASK: There is no equivalent in C# to 'static_assert': // static_assert(sizeof(HyphenationType) == sizeof(byte), "HyphnationType must be byte."); // Reuse the result array as a buffer for calculating intermediate hyphenation // numbers. //C++ TO C# CONVERTER TODO TASK: There is no equivalent to 'reinterpret_cast' in C#: byte[] buffer = reinterpret_cast <byte>(result); Header header = getHeader(); Trie trie = header.trieTable(); Pattern pattern = header.patternTable(); uint char_mask = new uint(trie.char_mask); uint link_shift = new uint(trie.link_shift); uint link_mask = new uint(trie.link_mask); uint pattern_shift = new uint(trie.pattern_shift); int maxOffset = len - minSuffix - 1; for (int i = 0; i < len - 1; i++) { uint node = 0; // index into Trie table for (int j = i; j < len; j++) { UInt16 c = codes[j]; uint entry = trie.data[node + c]; if ((entry & char_mask) == c) { node = (entry & link_mask) >> link_shift; } else { break; } uint pat_ix = trie.data[node] >> pattern_shift; // pat_ix contains a 3-tuple of length, shift (number of trailing zeros), // and an offset into the buf pool. This is the pattern for the substring // (i..j) we just matched, which we combine (via point-wise max) into the // buffer vector. if (pat_ix != 0) { uint pat_entry = pattern.data[pat_ix]; int pat_len = Pattern.len(new uint(pat_entry)); int pat_shift = Pattern.shift(new uint(pat_entry)); byte[] pat_buf = pattern.buf(new uint(pat_entry)); int offset = j + 1 - (pat_len + pat_shift); // offset is the index within buffer that lines up with the start of // pat_buf int start = Math.Max((int)minPrefix - offset, 0); int end = Math.Min(pat_len, (int)maxOffset - offset); for (int k = start; k < end; k++) { buffer[offset + k] = Math.Max(buffer[offset + k], pat_buf[k]); } } } } // Since the above calculation does not modify values outside // [minPrefix, len - minSuffix], they are left as 0 = DONT_BREAK. for (int i = minPrefix; i < maxOffset; i++) { // Hyphenation opportunities happen when the hyphenation numbers are odd. result[i] = ((buffer[i] & 1u) != null) ? hyphenValue : HyphenationType.DONT_BREAK; } }
// Try looking up word in alphabet table, return DONT_BREAK if any code units // fail to map. Otherwise, returns BREAK_AND_INSERT_HYPHEN, // BREAK_AND_INSERT_ARMENIAN_HYPHEN, or BREAK_AND_DONT_INSERT_HYPHEN based on // the the script of the characters seen. Note that this method writes len+2 // entries into alpha_codes (including start and stop) private HyphenationType alphabetLookup(UInt16[] alpha_codes, UInt16[] word, int len) { Header header = getHeader(); HyphenationType result = HyphenationType.BREAK_AND_INSERT_HYPHEN; // TODO: check header magic uint alphabetVersion = header.alphabetVersion(); if (alphabetVersion == 0) { AlphabetTable0 alphabet = header.alphabetTable0(); uint min_codepoint = new uint(alphabet.min_codepoint); uint max_codepoint = new uint(alphabet.max_codepoint); alpha_codes[0] = 0; // word start for (int i = 0; i < len; i++) { UInt16 c = word[i]; if (c < min_codepoint || c >= max_codepoint) { return(HyphenationType.DONT_BREAK); } byte code = alphabet.data[c - min_codepoint]; if (code == 0) { return(HyphenationType.DONT_BREAK); } if (result == HyphenationType.BREAK_AND_INSERT_HYPHEN) { result = minikin.GlobalMembers.hyphenationTypeBasedOnScript(new UInt16(c)); } alpha_codes[i + 1] = code; } alpha_codes[len + 1] = 0; // word termination return(result); } else if (alphabetVersion == 1) { AlphabetTable1 alphabet = header.alphabetTable1(); int n_entries = alphabet.n_entries; uint[] begin = new uint(alphabet.data); uint end = begin + n_entries; alpha_codes[0] = 0; for (int i = 0; i < len; i++) { UInt16 c = word[i]; var p = std::lower_bound <uint *, uint>(begin, end, c << 11); if (p == end) { return(HyphenationType.DONT_BREAK); } uint entry = p; if (AlphabetTable1.codepoint(new uint(entry)) != c) { return(HyphenationType.DONT_BREAK); } if (result == HyphenationType.BREAK_AND_INSERT_HYPHEN) { result = minikin.GlobalMembers.hyphenationTypeBasedOnScript(new UInt16(c)); } alpha_codes[i + 1] = AlphabetTable1.value(new uint(entry)); } alpha_codes[len + 1] = 0; return(result); } return(HyphenationType.DONT_BREAK); }