Exemplo n.º 1
0
        /**
         * Determines if a space character should be inserted between a previous chunk and the current chunk.
         * This method is exposed as a callback so subclasses can fine time the algorithm for determining whether a space should be inserted or not.
         * By default, this method will insert a space if the there is a gap of more than half the font space character width between the end of the
         * previous chunk and the beginning of the current chunk.  It will also indicate that a space is needed if the starting point of the new chunk
         * appears *before* the end of the previous chunk (i.e. overlapping text).
         * @param chunk the new chunk being evaluated
         * @param previousChunk the chunk that appeared immediately before the current chunk
         * @return true if the two chunks represent different words (i.e. should have a space between them).  False otherwise.
         */
        virtual protected bool IsChunkAtWordBoundary(TextChunk chunk, TextChunk previousChunk)
        {
            float dist = chunk.DistanceFromEndOf(previousChunk);

            if (dist < -chunk.CharSpaceWidth || dist > chunk.CharSpaceWidth / 2.0f)
            {
                return(true);
            }

            return(false);
        }
Exemplo n.º 2
0
        /**
         * Determines if a space character should be inserted between a previous chunk and the current chunk.
         * This method is exposed as a callback so subclasses can fine time the algorithm for determining whether a space should be inserted or not.
         * By default, this method will insert a space if the there is a gap of more than half the font space character width between the end of the
         * previous chunk and the beginning of the current chunk.  It will also indicate that a space is needed if the starting point of the new chunk
         * appears *before* the end of the previous chunk (i.e. overlapping text).
         * @param chunk the new chunk being evaluated
         * @param previousChunk the chunk that appeared immediately before the current chunk
         * @return true if the two chunks represent different words (i.e. should have a space between them).  False otherwise.
         */
        virtual protected bool IsChunkAtWordBoundary(TextChunk chunk, TextChunk previousChunk)
        {
            /**
             * Here we handle a very specific case which in PDF may look like:
             * -.232 Tc [( P)-226.2(r)-231.8(e)-230.8(f)-238(a)-238.9(c)-228.9(e)]TJ
             * The font's charSpace width is 0.232 and it's compensated with charSpacing of 0.232.
             * And a resultant TextChunk.charSpaceWidth comes to TextChunk constructor as 0.
             * In this case every chunk is considered as a word boundary and space is added.
             * We should consider charSpaceWidth equal (or close) to zero as a no-space.
             */
            if (chunk.CharSpaceWidth < 0.1f)
            {
                return(false);
            }

            float dist = chunk.DistanceFromEndOf(previousChunk);

            if (dist < -chunk.CharSpaceWidth || dist > chunk.CharSpaceWidth / 2.0f)
            {
                return(true);
            }

            return(false);
        }
        /**
         * Returns the result so far.
         * @return  a String with the resulting text.
         * @lineposition (input string parameter) :
         * "first" - identifies first line in line-by-line mark processing;
         * "last" - identifies last line in line-by-line mark processing;
         * "" - middle lines in line-by-line mark processing.
         */
        public virtual String GetResultantText(string lineposition)
        {
            if (DUMP_STATE)
            {
                DumpState();
            }

            locationalResult.Sort();

            StringBuilder sb        = new StringBuilder();
            TextChunk     lastChunk = null;

            for (int i = 0; i < locationalResult.Count; i++)
            {
                TextChunk chunk = locationalResult[i];
                if (lastChunk == null)
                {
                    //if first chunk in first line contains alien symbols befere space - remove them
                    if (chunk.text.Contains(" ") && lineposition == "first")
                    {
                        int spacepos = chunk.text.IndexOf(" ");
                        chunk.text = chunk.text.Substring(spacepos + 1);
                    }
                    sb.Append(chunk.text);
                }
                else
                {
                    if (chunk.SameLine(lastChunk))
                    {
                        float dist = chunk.DistanceFromEndOf(lastChunk);

                        if (dist < -chunk.charSpaceWidth)
                        {
                            sb.Append(' ');
                        }

                        // we only insert a blank space if the trailing character of the previous string wasn't a space, and the leading character of the current string isn't a space
                        else if (dist > chunk.charSpaceWidth / 2.0f && !StartsWithSpace(chunk.text) && !EndsWithSpace(lastChunk.text))
                        {
                            sb.Append(' ');
                        }
                        // if last chunk in last line contains alien symbols after space - remove them
                        if (i == locationalResult.Count - 1 && lineposition == "last")
                        {
                            if (chunk.text.Contains(" "))
                            {
                                int spaceposend = chunk.text.IndexOf(" ");
                                chunk.text = chunk.text.Remove(spaceposend);
                            }
                        }
                        sb.Append(chunk.text);
                        //add space in last chunk in not last line and if last chink not ends with space itself.
                        if (i == locationalResult.Count - 1 && lineposition != "last" && !EndsWithSpace(chunk.text))
                        {
                            sb.Append(' ');
                        }
                    }
                    else
                    {
                        sb.Append('\n');
                        sb.Append(chunk.text);
                    }
                }
                lastChunk = chunk;
            }

            return(sb.ToString());
        }