예제 #1
0
        private void compact(CharVector kx, TernaryTree map, char p)
        {
            int k;

            if (p == 0)
            {
                return;
            }
            if (sc[p] == 0xFFFF)
            {
                k = map.find(kv.Array, lo[p]);
                if (k < 0)
                {
                    k = kx.alloc(strlen(kv.Array, lo[p]) + 1);
                    strcpy(kx.Array, k, kv.Array, lo[p]);
                    map.insert(kx.Array, k, (char)k);
                }
                lo[p] = (char)k;
            }
            else
            {
                compact(kx, map, lo[p]);
                if (sc[p] != 0)
                {
                    compact(kx, map, eq[p]);
                }
                compact(kx, map, hi[p]);
            }
        }
예제 #2
0
        /// <summary>
        /// Add a pattern to the tree. Mainly, to be used by
        /// <seealso cref="PatternParser PatternParser"/> class as callback to add a pattern to
        /// the tree.
        /// </summary>
        /// <param name="pattern"> the hyphenation pattern </param>
        /// <param name="ivalue"> interletter weight values indicating the desirability and
        ///        priority of hyphenating at a given point within the pattern. It
        ///        should contain only digit characters. (i.e. '0' to '9'). </param>
        public virtual void addPattern(string pattern, string ivalue)
        {
            int k = ivalues.find(ivalue);

            if (k <= 0)
            {
                k = packValues(ivalue);
                ivalues.insert(ivalue, (char)k);
            }
            insert(pattern, (char)k);
        }
예제 #3
0
        /// <summary>
        /// w = "****nnllllllnnn*****", where n is a non-letter, l is a letter, all n
        /// may be absent, the first n is at offset, the first l is at offset +
        /// iIgnoreAtBeginning; word = ".llllll.'\0'***", where all l in w are copied
        /// into word. In the first part of the routine len = w.length, in the second
        /// part of the routine len = word.length. Three indices are used: index(w),
        /// the index in w, index(word), the index in word, letterindex(word), the
        /// index in the letter part of word. The following relations exist: index(w) =
        /// offset + i - 1 index(word) = i - iIgnoreAtBeginning letterindex(word) =
        /// index(word) - 1 (see first loop). It follows that: index(w) - index(word) =
        /// offset - 1 + iIgnoreAtBeginning index(w) = letterindex(word) + offset +
        /// iIgnoreAtBeginning
        /// </summary>

        /// <summary>
        /// Hyphenate word and return an array of hyphenation points.
        /// </summary>
        /// <param name="w"> char array that contains the word </param>
        /// <param name="offset"> Offset to first character in word </param>
        /// <param name="len"> Length of word </param>
        /// <param name="remainCharCount"> Minimum number of characters allowed before the
        ///        hyphenation point. </param>
        /// <param name="pushCharCount"> Minimum number of characters allowed after the
        ///        hyphenation point. </param>
        /// <returns> a <seealso cref="Hyphenation Hyphenation"/> object representing the
        ///         hyphenated word or null if word is not hyphenated. </returns>
        public virtual Hyphenation hyphenate(char[] w, int offset, int len, int remainCharCount, int pushCharCount)
        {
            int i;

            char[] word = new char[len + 3];

            // normalize word
            char[] c = new char[2];
            int    iIgnoreAtBeginning = 0;
            int    iLength            = len;
            bool   bEndOfLetters      = false;

            for (i = 1; i <= len; i++)
            {
                c[0] = w[offset + i - 1];
                int nc = classmap.find(c, 0);
                if (nc < 0)   // found a non-letter character ...
                {
                    if (i == (1 + iIgnoreAtBeginning))
                    {
                        // ... before any letter character
                        iIgnoreAtBeginning++;
                    }
                    else
                    {
                        // ... after a letter character
                        bEndOfLetters = true;
                    }
                    iLength--;
                }
                else
                {
                    if (!bEndOfLetters)
                    {
                        word[i - iIgnoreAtBeginning] = (char)nc;
                    }
                    else
                    {
                        return(null);
                    }
                }
            }
            len = iLength;
            if (len < (remainCharCount + pushCharCount))
            {
                // word is too short to be hyphenated
                return(null);
            }
            int[] result = new int[len + 1];
            int   k      = 0;

            // check exception list first
            string sw = new string(word, 1, len);

            if (stoplist.ContainsKey(sw))
            {
                // assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no =
                // null)
                List <object> hw = stoplist[sw];
                int           j  = 0;
                for (i = 0; i < hw.Count; i++)
                {
                    object o = hw[i];
                    // j = index(sw) = letterindex(word)?
                    // result[k] = corresponding index(w)
                    if (o is string)
                    {
                        j += ((string)o).Length;
                        if (j >= remainCharCount && j < (len - pushCharCount))
                        {
                            result[k++] = j + iIgnoreAtBeginning;
                        }
                    }
                }
            }
            else
            {
                // use algorithm to get hyphenation points
                word[0]       = '.';             // word start marker
                word[len + 1] = '.';             // word end marker
                word[len + 2] = (char)0;         // null terminated
                sbyte[] il = new sbyte[len + 3]; // initialized to zero
                for (i = 0; i < len + 1; i++)
                {
                    searchPatterns(word, i, il);
                }

                // hyphenation points are located where interletter value is odd
                // i is letterindex(word),
                // i + 1 is index(word),
                // result[k] = corresponding index(w)
                for (i = 0; i < len; i++)
                {
                    if (((il[i + 1] & 1) == 1) && i >= remainCharCount && i <= (len - pushCharCount))
                    {
                        result[k++] = i + iIgnoreAtBeginning;
                    }
                }
            }

            if (k > 0)
            {
                // trim result array
                int[] res = new int[k + 2];
                Array.Copy(result, 0, res, 1, k);
                // We add the synthetical hyphenation points
                // at the beginning and end of the word
                res[0]     = 0;
                res[k + 1] = len;
                return(new Hyphenation(res));
            }
            else
            {
                return(null);
            }
        }