Beispiel #1
0
        public static uint editForThisLine(HyphenationType type)
        {
            switch (type)
            {
            case HyphenationType.DONT_BREAK:
                return(NO_EDIT);

            case HyphenationType.BREAK_AND_INSERT_HYPHEN:
                return(INSERT_HYPHEN_AT_END);

            case HyphenationType.BREAK_AND_INSERT_ARMENIAN_HYPHEN:
                return(INSERT_ARMENIAN_HYPHEN_AT_END);

            case HyphenationType.BREAK_AND_INSERT_MAQAF:
                return(INSERT_MAQAF_AT_END);

            case HyphenationType.BREAK_AND_INSERT_UCAS_HYPHEN:
                return(INSERT_UCAS_HYPHEN_AT_END);

            case HyphenationType.BREAK_AND_REPLACE_WITH_HYPHEN:
                return(REPLACE_WITH_HYPHEN_AT_END);

            case HyphenationType.BREAK_AND_INSERT_HYPHEN_AND_ZWJ:
                return(INSERT_ZWJ_AND_HYPHEN_AT_END);

            default:
                return(BREAK_AT_END);
            }
        }
Beispiel #2
0
        public static uint editForNextLine(HyphenationType type)
        {
            switch (type)
            {
            case HyphenationType.DONT_BREAK:
                return(NO_EDIT);

            case HyphenationType.BREAK_AND_INSERT_HYPHEN_AT_NEXT_LINE:
                return(INSERT_HYPHEN_AT_START);

            case HyphenationType.BREAK_AND_INSERT_HYPHEN_AND_ZWJ:
                return(INSERT_ZWJ_AT_START);

            default:
                return(BREAK_AT_START);
            }
        }
Beispiel #3
0
//C++ TO C# CONVERTER WARNING: The original C++ declaration of the following method implementation was not found:
        public void hyphenate(vector <HyphenationType> result, UInt16 word, int len, icu.Locale locale)
        {
            result.clear();
            result.resize(len);
            int paddedLen = len + 2;       // start and stop code each count for 1

            if (patternData != null && len >= minPrefix + minSuffix && paddedLen <= MAX_HYPHENATED_SIZE)
            {
                UInt16[]        alpha_codes = Arrays.InitializeWithDefaultInstances <UInt16>(MAX_HYPHENATED_SIZE);
                HyphenationType hyphenValue = alphabetLookup(alpha_codes, word, len);
                if (hyphenValue != HyphenationType.DONT_BREAK)
                {
                    hyphenateFromCodes(result.data(), alpha_codes, paddedLen, hyphenValue);
                    return;
                }
                // TODO: try NFC normalization
                // TODO: handle non-BMP Unicode (requires remapping of offsets)
            }
            // Note that we will always get here if the word contains a hyphen or a soft
            // hyphen, because the alphabet is not expected to contain a hyphen or a soft
            // hyphen character, so alphabetLookup would return DONT_BREAK.
            hyphenateWithNoPatterns(result.data(), word, len, locale);
        }
        public void addWordBreak(int offset, ParaWidth preBreak, ParaWidth postBreak, int preSpaceCount, int postSpaceCount, float penalty, HyphenationType hyph)
        {
            Candidate cand  = new Candidate();
            ParaWidth width = mCandidates.back().preBreak;

            if (postBreak - width > currentLineWidth() != null)
            {
                // Add desperate breaks.
                // Note: these breaks are based on the shaping of the (non-broken) original
                // text; they are imprecise especially in the presence of kerning,
                // ligatures, and Arabic shaping.
                int i = mCandidates.back().offset;
                width += mCharWidths[i++];
                for (; i < offset; i++)
                {
                    float w = mCharWidths[i];
                    if (w > 0F)
                    {
                        cand.offset    = i;
                        cand.preBreak  = width;
                        cand.postBreak = width;
                        // postSpaceCount doesn't include trailing spaces
                        cand.preSpaceCount  = postSpaceCount;
                        cand.postSpaceCount = postSpaceCount;
                        cand.penalty        = SCORE_DESPERATE;
                        cand.hyphenType     = HyphenationType.BREAK_AND_DONT_INSERT_HYPHEN;
        #if VERBOSE_DEBUG
                        ALOGD("desperate cand: %zd %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak);
        #endif
                        addCandidate(cand);
                        width += w;
                    }
                }
            }

            cand.offset         = offset;
            cand.preBreak       = preBreak;
            cand.postBreak      = postBreak;
            cand.penalty        = penalty;
            cand.preSpaceCount  = preSpaceCount;
            cand.postSpaceCount = postSpaceCount;
            cand.hyphenType     = hyph;
        #if VERBOSE_DEBUG
            ALOGD("cand: %zd %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak);
        #endif
            addCandidate(cand);
        }
        // TODO: this class is actually fairly close to being general and not tied to
        // using Minikin to do the shaping of the strings. The main thing that would
        // need to be changed is having some kind of callback (or virtual class, or
        // maybe even template), which could easily be instantiated with Minikin's
        // Layout. Future work for when needed.
        public float addStyleRun(MinikinPaint paint, FontCollection typeface, FontStyle style, int start, int end, bool isRtl)
        {
            float width     = 0.0f;
            int   bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR;

            float hyphenPenalty = 0.0F;

            if (paint != null)
            {
                width = Layout.measureText(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags, style, paint, typeface, mCharWidths.data() + start);

                // a heuristic that seems to perform well
                hyphenPenalty = 0.5 * paint.size * paint.scaleX * mLineWidths.getLineWidth(0);
                if (mHyphenationFrequency == kHyphenationFrequency_Normal)
                {
                    hyphenPenalty *= 4.0; // TODO: Replace with a better value after some testing
                }

                if (mJustified)
                {
                    // Make hyphenation more aggressive for fully justified text (so that
                    // "normal" in justified mode is the same as "full" in ragged-right).
                    hyphenPenalty *= 0.25;
                }
                else
                {
                    // Line penalty is zero for justified text.
                    mLinePenalty = Math.Max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER);
                }
            }

            int       current        = (int)mWordBreaker.current();
            int       afterWord      = start;
            int       lastBreak      = start;
            ParaWidth lastBreakWidth = mWidth;
            ParaWidth postBreak      = mWidth;
            int       postSpaceCount = mSpaceCount;

            for (int i = start; i < end; i++)
            {
                UInt16 c = mTextBuf[i];
                if (c == CHAR_TAB)
                {
                    mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak);
                    if (mFirstTabIndex == INT_MAX)
                    {
                        mFirstTabIndex = (int)i;
                    }
                    // fall back to greedy; other modes don't know how to deal with tabs
                    mStrategy = kBreakStrategy_Greedy;
                }
                else
                {
                    if (isWordSpace(new UInt16(c)))
                    {
                        mSpaceCount += 1;
                    }
                    mWidth += mCharWidths[i];
                    if (!isLineEndSpace(new UInt16(c)))
                    {
                        postBreak      = mWidth;
                        postSpaceCount = mSpaceCount;
                        //C++ TO C# CONVERTER TODO TASK: The following line was determined to be a copy assignment (rather than a reference assignment) - this should be verified and a 'CopyFrom' method should be created:
                        //ORIGINAL LINE: afterWord = i + 1;
                        afterWord.CopyFrom(i + 1);
                    }
                }
                if (i + 1 == current != null)
                {
                    int wordStart = mWordBreaker.wordStart();
                    int wordEnd   = mWordBreaker.wordEnd();
                    if (paint != null && mHyphenator != null && mHyphenationFrequency != kHyphenationFrequency_None && wordStart >= start != null && wordEnd > wordStart && wordEnd - wordStart <= LONGEST_HYPHENATED_WORD)
                    {
                        mHyphenator.hyphenate(mHyphBuf, mTextBuf[wordStart], wordEnd - wordStart, mLocale);
        #if VERBOSE_DEBUG
                        string hyphenatedString;
                        for (int j = wordStart; j < wordEnd; j++)
                        {
                            if (mHyphBuf[j - wordStart] == HyphenationType.BREAK_AND_INSERT_HYPHEN)
                            {
                                hyphenatedString.push_back('-');
                            }
                            // Note: only works with ASCII, should do UTF-8 conversion here
                            hyphenatedString.push_back(buffer()[j]);
                        }
                        ALOGD("hyphenated string: %s", hyphenatedString);
        #endif

                        // measure hyphenated substrings
                        for (int j = wordStart; j < wordEnd; j++)
                        {
                            HyphenationType hyph = mHyphBuf[j - wordStart];
                            if (hyph != HyphenationType.DONT_BREAK)
                            {
                                paint.hyphenEdit = HyphenEdit.editForThisLine(hyph);
                                float     firstPartWidth = Layout.measureText(mTextBuf.data(), lastBreak, j - lastBreak, mTextBuf.size(), bidiFlags, style, paint, typeface, null);
                                ParaWidth hyphPostBreak  = lastBreakWidth + firstPartWidth;

                                paint.hyphenEdit = HyphenEdit.editForNextLine(hyph);
                                float     secondPartWidth = Layout.measureText(mTextBuf.data(), j, afterWord - j, mTextBuf.size(), bidiFlags, style, paint, typeface, null);
                                ParaWidth hyphPreBreak    = postBreak - secondPartWidth;

                                addWordBreak(j, hyphPreBreak, hyphPostBreak, postSpaceCount, postSpaceCount, hyphenPenalty, hyph);

                                paint.hyphenEdit = HyphenEdit.NO_EDIT;
                            }
                        }
                    }

                    // Skip break for zero-width characters inside replacement span
                    if (paint != null || current == end || mCharWidths[current] > 0)
                    {
                        float penalty = hyphenPenalty * mWordBreaker.breakBadness();
                        addWordBreak(current, mWidth, postBreak, mSpaceCount, postSpaceCount, penalty, HyphenationType.DONT_BREAK);
                    }
                    //C++ TO C# CONVERTER TODO TASK: The following line was determined to be a copy assignment (rather than a reference assignment) - this should be verified and a 'CopyFrom' method should be created:
                    //ORIGINAL LINE: lastBreak = current;
                    lastBreak.CopyFrom(current);
                    lastBreakWidth = mWidth;
                    //C++ TO C# CONVERTER TODO TASK: The following line was determined to be a copy assignment (rather than a reference assignment) - this should be verified and a 'CopyFrom' method should be created:
                    //ORIGINAL LINE: current = (int)mWordBreaker.next();
                    current.CopyFrom((int)mWordBreaker.next());
                }
            }

            return(width);
        }
Beispiel #6
0
        // calculate hyphenation from patterns, assuming alphabet lookup has already
        // been done

        /**
         * Internal implementation, after conversion to codes. All case folding and
         * normalization has been done by now, and all characters have been found in the
         * alphabet. Note: len here is the padded length including 0 codes at start and
         * end.
         **/
        private void hyphenateFromCodes(HyphenationType[] result, UInt16[] codes, int len, HyphenationType hyphenValue)
        {
            //C++ TO C# CONVERTER TODO TASK: There is no equivalent in C# to 'static_assert':
            //  static_assert(sizeof(HyphenationType) == sizeof(byte), "HyphnationType must be byte.");
            // Reuse the result array as a buffer for calculating intermediate hyphenation
            // numbers.
//C++ TO C# CONVERTER TODO TASK: There is no equivalent to 'reinterpret_cast' in C#:
            byte[] buffer = reinterpret_cast <byte>(result);

            Header  header        = getHeader();
            Trie    trie          = header.trieTable();
            Pattern pattern       = header.patternTable();
            uint    char_mask     = new uint(trie.char_mask);
            uint    link_shift    = new uint(trie.link_shift);
            uint    link_mask     = new uint(trie.link_mask);
            uint    pattern_shift = new uint(trie.pattern_shift);
            int     maxOffset     = len - minSuffix - 1;

            for (int i = 0; i < len - 1; i++)
            {
                uint node = 0; // index into Trie table
                for (int j = i; j < len; j++)
                {
                    UInt16 c     = codes[j];
                    uint   entry = trie.data[node + c];
                    if ((entry & char_mask) == c)
                    {
                        node = (entry & link_mask) >> link_shift;
                    }
                    else
                    {
                        break;
                    }
                    uint pat_ix = trie.data[node] >> pattern_shift;
                    // pat_ix contains a 3-tuple of length, shift (number of trailing zeros),
                    // and an offset into the buf pool. This is the pattern for the substring
                    // (i..j) we just matched, which we combine (via point-wise max) into the
                    // buffer vector.
                    if (pat_ix != 0)
                    {
                        uint   pat_entry = pattern.data[pat_ix];
                        int    pat_len   = Pattern.len(new uint(pat_entry));
                        int    pat_shift = Pattern.shift(new uint(pat_entry));
                        byte[] pat_buf   = pattern.buf(new uint(pat_entry));
                        int    offset    = j + 1 - (pat_len + pat_shift);
                        // offset is the index within buffer that lines up with the start of
                        // pat_buf
                        int start = Math.Max((int)minPrefix - offset, 0);
                        int end   = Math.Min(pat_len, (int)maxOffset - offset);
                        for (int k = start; k < end; k++)
                        {
                            buffer[offset + k] = Math.Max(buffer[offset + k], pat_buf[k]);
                        }
                    }
                }
            }
            // Since the above calculation does not modify values outside
            // [minPrefix, len - minSuffix], they are left as 0 = DONT_BREAK.
            for (int i = minPrefix; i < maxOffset; i++)
            {
                // Hyphenation opportunities happen when the hyphenation numbers are odd.
                result[i] = ((buffer[i] & 1u) != null) ? hyphenValue : HyphenationType.DONT_BREAK;
            }
        }
Beispiel #7
0
        // Try looking up word in alphabet table, return DONT_BREAK if any code units
        // fail to map. Otherwise, returns BREAK_AND_INSERT_HYPHEN,
        // BREAK_AND_INSERT_ARMENIAN_HYPHEN, or BREAK_AND_DONT_INSERT_HYPHEN based on
        // the the script of the characters seen. Note that this method writes len+2
        // entries into alpha_codes (including start and stop)
        private HyphenationType alphabetLookup(UInt16[] alpha_codes, UInt16[] word, int len)
        {
            Header          header = getHeader();
            HyphenationType result = HyphenationType.BREAK_AND_INSERT_HYPHEN;
            // TODO: check header magic
            uint alphabetVersion = header.alphabetVersion();

            if (alphabetVersion == 0)
            {
                AlphabetTable0 alphabet      = header.alphabetTable0();
                uint           min_codepoint = new uint(alphabet.min_codepoint);
                uint           max_codepoint = new uint(alphabet.max_codepoint);
                alpha_codes[0] = 0; // word start
                for (int i = 0; i < len; i++)
                {
                    UInt16 c = word[i];
                    if (c < min_codepoint || c >= max_codepoint)
                    {
                        return(HyphenationType.DONT_BREAK);
                    }
                    byte code = alphabet.data[c - min_codepoint];
                    if (code == 0)
                    {
                        return(HyphenationType.DONT_BREAK);
                    }
                    if (result == HyphenationType.BREAK_AND_INSERT_HYPHEN)
                    {
                        result = minikin.GlobalMembers.hyphenationTypeBasedOnScript(new UInt16(c));
                    }
                    alpha_codes[i + 1] = code;
                }
                alpha_codes[len + 1] = 0; // word termination
                return(result);
            }
            else if (alphabetVersion == 1)
            {
                AlphabetTable1 alphabet  = header.alphabetTable1();
                int            n_entries = alphabet.n_entries;
                uint[]         begin     = new uint(alphabet.data);
                uint           end       = begin + n_entries;
                alpha_codes[0] = 0;
                for (int i = 0; i < len; i++)
                {
                    UInt16 c = word[i];
                    var    p = std::lower_bound <uint *, uint>(begin, end, c << 11);
                    if (p == end)
                    {
                        return(HyphenationType.DONT_BREAK);
                    }
                    uint entry = p;
                    if (AlphabetTable1.codepoint(new uint(entry)) != c)
                    {
                        return(HyphenationType.DONT_BREAK);
                    }
                    if (result == HyphenationType.BREAK_AND_INSERT_HYPHEN)
                    {
                        result = minikin.GlobalMembers.hyphenationTypeBasedOnScript(new UInt16(c));
                    }
                    alpha_codes[i + 1] = AlphabetTable1.value(new uint(entry));
                }
                alpha_codes[len + 1] = 0;
                return(result);
            }
            return(HyphenationType.DONT_BREAK);
        }