Beispiel #1
0
        bool IsEndOfLine(IBlock block, BlockLine line)
        {
            float startOfBlock = block.GetX();
            float endOfBlock   = block.GetX() + block.GetWidth();
            float endOfLine    = line.GetX() + line.GetWidth();

            if ((block.GetX() + block.GetWidth() - line.GetX()) < 0)
            {
                return(true);
            }
            else
            {
                return(false);
            }
        }
Beispiel #2
0
        public BlockPage Process(BlockPage page)
        {
            GroupFontLineHelper groupFont = null;
            BlockLine           line      = null;
            IBlock last   = null;
            var    result = new BlockPage();


            foreach (var block in page.AllBlocks)
            {
                if (last != null)
                {
                    if (IsEndOfWord(block) || IsEndOfLine(block, line))
                    {
                        last = null;
                        result.Add(line);
                    }
                    else
                    {
                        line.Text += block.GetText();
                        line.Width = block.GetX() + block.GetWidth() - line.GetX();

                        groupFont.MergeFont((Block)block);
                    }
                }

                if ((last == null))
                {
                    var    b    = (Block)block;
                    string text = block.GetText();

                    line = new BlockLine()
                    {
                        Text   = text,
                        X      = block.GetX(),
                        H      = block.GetH(),
                        Width  = block.GetWidth(),
                        Height = block.GetHeight(),

                        HasBackColor = b.HasBackColor,

                        HasLargeSpace = false,

                        // might be inaccurate
                        FontFullName = b.FontFullName,
                        FontName     = b.FontName,
                        FontSize     = b.FontSize, // BE CAREFUL!
                        FontStyle    = b.FontStyle
                                                   // now the settings are done in GroupFontLineHelper
                    };

                    // TODO: validar a entrada duas vezes

                    if (groupFont != null)
                    {
                        groupFont.Done();
                    }

                    groupFont = new GroupFontLineHelper(line, b);

                    if (line.Width <= 0 || line.Height <= 0)
                    {
                        PdfReaderException.AlwaysThrow("line.Width <= 0 || line.Height <= 0");
                    }
                }
                last = block;
            }

            if (groupFont != null)
            {
                groupFont.Done();
            }

            return(result);
        }
Beispiel #3
0
        public BlockPage Process(BlockPage page)
        {
            GroupFontLineHelper groupFont = null;
            BlockLine           line      = null;
            IBlock last        = null;
            string last_hidden = null;
            var    result      = new BlockPage();

            foreach (var block in page.AllBlocks)
            {
                if (block is BlockHidden)
                {
                    var blockHidden = (BlockHidden)block;

                    if (last_hidden != null)
                    {
                        PdfReaderException.Warning("last_hidden != null: hidden text will be overwritten");
                    }

                    last_hidden = blockHidden.GetHiddenText();
                    continue;
                }

                if (last != null)
                {
                    if (CheckSubfonts(line, (Block)block))
                    {
                        bool isBackspace = CheckBackspace(line, block);

                        float endofblock = block.GetX() + block.GetWidth();
                        float endofline  = line.GetX() + line.GetWidth();

                        if (endofblock > endofline)
                        {
                            line.Width = block.GetX() + block.GetWidth() - line.GetX();
                        }

                        if (line.Width <= 0)
                        {
                            PdfReaderException.AlwaysThrow("line.Width <= 0");
                        }

                        // conside same line: update text and Width
                        // we dont add space character (should we?)
                        line.Text += block.GetText();

                        // gather statistics
                        statBackspace += (isBackspace) ? 1 : 0;
                        statSubfonts++;

                        // does not update 'last' variable!!
                        continue;
                    }
                }

                if ((last == null) || (CompareLine(block, last) != 0))
                {
                    var    b    = (Block)block;
                    string text = block.GetText();

                    if (last_hidden != null)
                    {
                        text        = last_hidden + text;
                        last_hidden = null;
                    }

                    line = new BlockLine()
                    {
                        Text   = text,
                        X      = block.GetX(),
                        H      = block.GetH(),
                        Width  = block.GetWidth(),
                        Height = block.GetHeight(),

                        HasBackColor = b.HasBackColor,

                        HasLargeSpace = false,

                        // might be inaccurate
                        FontFullName = b.FontFullName,
                        FontName     = b.FontName,
                        FontSize     = b.FontSize, // BE CAREFUL!
                        FontStyle    = b.FontStyle
                                                   // now the settings are done in GroupFontLineHelper
                    };

                    // TODO: validar a entrada duas vezes

                    if (groupFont != null)
                    {
                        groupFont.Done();
                    }

                    groupFont = new GroupFontLineHelper(line, b);

                    if (line.Width <= 0 || line.Height <= 0)
                    {
                        PdfReaderException.AlwaysThrow("line.Width <= 0 || line.Height <= 0");
                    }

                    result.Add(line);
                }
                else
                {
                    string separator = (ShouldAddSpace(last, block)) ? " " : "";

                    // same line: update text and Width
                    float startOfBlock = block.GetX();
                    float endOfBlock   = block.GetX() + block.GetWidth();
                    float endOfLine    = line.GetX() + line.GetWidth();

                    line.Text += separator + block.GetText();
                    line.Width = block.GetX() + block.GetWidth() - line.GetX();

                    if (line.Width <= 0)
                    {
                        PdfReaderException.AlwaysThrow("line.Width <= 0");
                    }

                    bool couldBeTable = ShouldAddLargeSpace(last, block);

                    if (couldBeTable)
                    {
                        line.HasLargeSpace = true;
                    }

                    // walking backwards
                    // very strict check: sometimes the start overlaps with the ending
                    //if (startOfBlock < endOfLine)
                    //    throw new InvalidOperationException();
                    // soft check: end of block should never that low unless it is an overlap
                    if (endOfBlock < endOfLine)
                    {
                        PdfReaderException.AlwaysThrow("endOfBlock < endOfLine", new IBlock[] { last, block });
                    }

                    groupFont.MergeFont((Block)block);
                }

                last = block;
            }

            if (groupFont != null)
            {
                groupFont.Done();
            }

            return(result);
        }