예제 #1
0
        private void Compact(CharVector kx, TernaryTree map, char p)
        {
            int k;

            if (p == 0)
            {
                return;
            }
            if (sc[p] == 0xFFFF)
            {
                k = map.Find(kv.Arr, lo[p]);
                if (k < 0)
                {
                    k = kx.Alloc(Strlen(kv.Arr, lo[p]) + 1);
                    Strcpy(kx.Arr, k, kv.Arr, lo[p]);
                    map.Insert(kx.Arr, k, (char)k);
                }
                lo[p] = (char)k;
            }
            else
            {
                Compact(kx, map, lo[p]);
                if (sc[p] != 0)
                {
                    Compact(kx, map, eq[p]);
                }
                Compact(kx, map, hi[p]);
            }
        }
예제 #2
0
        /**
         * Add a pattern to the tree. Mainly, to be used by
         * {@link SimplePatternParser SimplePatternParser} class as callback to
         * add a pattern to the tree.
         * @param pattern the hyphenation pattern
         * @param ivalue interletter weight values indicating the
         * desirability and priority of hyphenating at a given point
         * within the pattern. It should contain only digit characters.
         * (i.e. '0' to '9').
         */
        public void AddPattern(String pattern, String ivalue)
        {
            int k = ivalues.Find(ivalue);

            if (k <= 0)
            {
                k = PackValues(ivalue);
                ivalues.Insert(ivalue, (char)k);
            }
            Insert(pattern, (char)k);
        }
예제 #3
0
        /**
         * w = "****nnllllllnnn*****",
         * where n is a non-letter, l is a letter,
         * all n may be absent, the first n is at offset,
         * the first l is at offset + iIgnoreAtBeginning;
         * word = ".llllll.'\0'***",
         * where all l in w are copied into word.
         * In the first part of the routine len = w.length,
         * in the second part of the routine len = word.length.
         * Three indices are used:
         * Index(w), the index in w,
         * Index(word), the index in word,
         * Letterindex(word), the index in the letter part of word.
         * The following relations exist:
         * Index(w) = offset + i - 1
         * Index(word) = i - iIgnoreAtBeginning
         * Letterindex(word) = Index(word) - 1
         * (see first loop).
         * It follows that:
         * Index(w) - Index(word) = offset - 1 + iIgnoreAtBeginning
         * Index(w) = Letterindex(word) + offset + iIgnoreAtBeginning
         */

        /**
         * Hyphenate word and return an array of hyphenation points.
         * @param w char array that contains the word
         * @param offset Offset to first character in word
         * @param len Length of word
         * @param remainCharCount Minimum number of characters allowed
         * before the hyphenation point.
         * @param pushCharCount Minimum number of characters allowed after
         * the hyphenation point.
         * @return a {@link Hyphenation Hyphenation} object representing
         * the hyphenated word or null if word is not hyphenated.
         */
        public Hyphenation Hyphenate(char[] w, int offset, int len,
                                     int remainCharCount, int pushCharCount)
        {
            int i;

            char[] word = new char[len + 3];

            // normalize word
            char[] c = new char[2];
            int    iIgnoreAtBeginning = 0;
            int    iLength            = len;
            bool   bEndOfLetters      = false;

            for (i = 1; i <= len; i++)
            {
                c[0] = w[offset + i - 1];
                int nc = classmap.Find(c, 0);
                if (nc < 0)      // found a non-letter character ...
                {
                    if (i == (1 + iIgnoreAtBeginning))
                    {
                        // ... before any letter character
                        iIgnoreAtBeginning++;
                    }
                    else
                    {
                        // ... after a letter character
                        bEndOfLetters = true;
                    }
                    iLength--;
                }
                else
                {
                    if (!bEndOfLetters)
                    {
                        word[i - iIgnoreAtBeginning] = (char)nc;
                    }
                    else
                    {
                        return(null);
                    }
                }
            }
            len = iLength;
            if (len < (remainCharCount + pushCharCount))
            {
                // word is too short to be hyphenated
                return(null);
            }
            int[] result = new int[len + 1];
            int   k      = 0;

            // check exception list first
            String sw = new String(word, 1, len);

            if (stoplist.ContainsKey(sw))
            {
                // assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no = null)
                ArrayList hw = (ArrayList)stoplist[sw];
                int       j  = 0;
                for (i = 0; i < hw.Count; i++)
                {
                    Object o = hw[i];
                    // j = Index(sw) = Letterindex(word)?
                    // result[k] = corresponding Index(w)
                    if (o is String)
                    {
                        j += ((String)o).Length;
                        if (j >= remainCharCount && j < (len - pushCharCount))
                        {
                            result[k++] = j + iIgnoreAtBeginning;
                        }
                    }
                }
            }
            else
            {
                // use algorithm to get hyphenation points
                word[0]       = '.';              // word start marker
                word[len + 1] = '.';              // word end marker
                word[len + 2] = (char)0;          // null terminated
                byte[] il = new byte[len + 3];    // initialized to zero
                for (i = 0; i < len + 1; i++)
                {
                    SearchPatterns(word, i, il);
                }

                // hyphenation points are located where interletter value is odd
                // i is Letterindex(word),
                // i + 1 is Index(word),
                // result[k] = corresponding Index(w)
                for (i = 0; i < len; i++)
                {
                    if (((il[i + 1] & 1) == 1) && i >= remainCharCount &&
                        i <= (len - pushCharCount))
                    {
                        result[k++] = i + iIgnoreAtBeginning;
                    }
                }
            }


            if (k > 0)
            {
                // trim result array
                int[] res = new int[k];
                Array.Copy(result, 0, res, 0, k);
                return(new Hyphenation(new String(w, offset, len), res));
            }
            else
            {
                return(null);
            }
        }