This class represents a hyphenated word. This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
コード例 #1
0
        protected override void Decompose()
        {
            // get the hyphenation points
            Hyphenation.Hyphenation hyphens = hyphenator.Hyphenate(m_termAtt.Buffer, 0, m_termAtt.Length, 1, 1);
            // No hyphen points found -> exit
            if (hyphens is null)
            {
                return;
            }

            int[] hyp = hyphens.HyphenationPoints;

            for (int i = 0; i < hyp.Length; ++i)
            {
                int           remaining         = hyp.Length - i;
                int           start             = hyp[i];
                CompoundToken longestMatchToken = null;
                for (int j = 1; j < remaining; j++)
                {
                    int partLength = hyp[i + j] - start;

                    // if the part is longer than maxSubwordSize we
                    // are done with this round
                    if (partLength > this.m_maxSubwordSize)
                    {
                        break;
                    }

                    // we only put subwords to the token stream
                    // that are longer than minPartSize
                    if (partLength < this.m_minSubwordSize)
                    {
                        // BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the
                        // calculation above, and we rely upon minSubwordSize being >=0 to filter them out...
                        continue;
                    }

                    // check the dictionary
                    if (m_dictionary is null || m_dictionary.Contains(m_termAtt.Buffer, start, partLength))
                    {
                        if (this.m_onlyLongestMatch)
                        {
                            if (longestMatchToken != null)
                            {
                                if (longestMatchToken.Text.Length < partLength)
                                {
                                    longestMatchToken = new CompoundToken(this, start, partLength);
                                }
                            }
                            else
                            {
                                longestMatchToken = new CompoundToken(this, start, partLength);
                            }
                        }
                        else
                        {
                            m_tokens.Enqueue(new CompoundToken(this, start, partLength));
                        }
                    }
コード例 #2
0
        protected override void Decompose()
        {
            // get the hyphenation points
            Hyphenation.Hyphenation hyphens = hyphenator.Hyphenate(termAtt.Buffer(), 0, termAtt.Length, 1, 1);
            // No hyphen points found -> exit
            if (hyphens == null)
            {
                return;
            }

            int[] hyp = hyphens.HyphenationPoints;

            for (int i = 0; i < hyp.Length; ++i)
            {
                int           remaining         = hyp.Length - i;
                int           start             = hyp[i];
                CompoundToken longestMatchToken = null;
                for (int j = 1; j < remaining; j++)
                {
                    int partLength = hyp[i + j] - start;

                    // if the part is longer than maxSubwordSize we
                    // are done with this round
                    if (partLength > this.maxSubwordSize)
                    {
                        break;
                    }

                    // we only put subwords to the token stream
                    // that are longer than minPartSize
                    if (partLength < this.minSubwordSize)
                    {
                        // BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the
                        // calculation above, and we rely upon minSubwordSize being >=0 to filter them out...
                        continue;
                    }

                    // check the dictionary
                    if (dictionary == null || dictionary.Contains(termAtt.Buffer(), start, partLength))
                    {
                        if (this.onlyLongestMatch)
                        {
                            if (longestMatchToken != null)
                            {
                                if (longestMatchToken.txt.Length < partLength)
                                {
                                    longestMatchToken = new CompoundToken(this, start, partLength);
                                }
                            }
                            else
                            {
                                longestMatchToken = new CompoundToken(this, start, partLength);
                            }
                        }
                        else
                        {
                            tokens.AddLast(new CompoundToken(this, start, partLength));
                        }
                    }
                    else if (dictionary.Contains(termAtt.Buffer(), start, partLength - 1))
                    {
                        // check the dictionary again with a word that is one character
                        // shorter
                        // to avoid problems with genitive 's characters and other binding
                        // characters
                        if (this.onlyLongestMatch)
                        {
                            if (longestMatchToken != null)
                            {
                                if (longestMatchToken.txt.Length < partLength - 1)
                                {
                                    longestMatchToken = new CompoundToken(this, start, partLength - 1);
                                }
                            }
                            else
                            {
                                longestMatchToken = new CompoundToken(this, start, partLength - 1);
                            }
                        }
                        else
                        {
                            tokens.AddLast(new CompoundToken(this, start, partLength - 1));
                        }
                    }
                }
                if (this.onlyLongestMatch && longestMatchToken != null)
                {
                    tokens.AddLast(longestMatchToken);
                }
            }
        }