bool IsEndOfLine(IBlock block, BlockLine line) { float startOfBlock = block.GetX(); float endOfBlock = block.GetX() + block.GetWidth(); float endOfLine = line.GetX() + line.GetWidth(); if ((block.GetX() + block.GetWidth() - line.GetX()) < 0) { return(true); } else { return(false); } }
public BlockPage Process(BlockPage page) { GroupFontLineHelper groupFont = null; BlockLine line = null; IBlock last = null; string last_hidden = null; var result = new BlockPage(); foreach (var block in page.AllBlocks) { if (block is BlockHidden) { var blockHidden = (BlockHidden)block; if (last_hidden != null) { PdfReaderException.Warning("last_hidden != null: hidden text will be overwritten"); } last_hidden = blockHidden.GetHiddenText(); continue; } if (last != null) { if (CheckSubfonts(line, (Block)block)) { bool isBackspace = CheckBackspace(line, block); float endofblock = block.GetX() + block.GetWidth(); float endofline = line.GetX() + line.GetWidth(); if (endofblock > endofline) { line.Width = block.GetX() + block.GetWidth() - line.GetX(); } if (line.Width <= 0) { PdfReaderException.AlwaysThrow("line.Width <= 0"); } // conside same line: update text and Width // we dont add space character (should we?) line.Text += block.GetText(); // gather statistics statBackspace += (isBackspace) ? 1 : 0; statSubfonts++; // does not update 'last' variable!! continue; } } if ((last == null) || (CompareLine(block, last) != 0)) { var b = (Block)block; string text = block.GetText(); if (last_hidden != null) { text = last_hidden + text; last_hidden = null; } line = new BlockLine() { Text = text, X = block.GetX(), H = block.GetH(), Width = block.GetWidth(), Height = block.GetHeight(), HasBackColor = b.HasBackColor, HasLargeSpace = false, // might be inaccurate FontFullName = b.FontFullName, FontName = b.FontName, FontSize = b.FontSize, // BE CAREFUL! FontStyle = b.FontStyle // now the settings are done in GroupFontLineHelper }; // TODO: validar a entrada duas vezes if (groupFont != null) { groupFont.Done(); } groupFont = new GroupFontLineHelper(line, b); if (line.Width <= 0 || line.Height <= 0) { PdfReaderException.AlwaysThrow("line.Width <= 0 || line.Height <= 0"); } result.Add(line); } else { string separator = (ShouldAddSpace(last, block)) ? " " : ""; // same line: update text and Width float startOfBlock = block.GetX(); float endOfBlock = block.GetX() + block.GetWidth(); float endOfLine = line.GetX() + line.GetWidth(); line.Text += separator + block.GetText(); line.Width = block.GetX() + block.GetWidth() - line.GetX(); if (line.Width <= 0) { PdfReaderException.AlwaysThrow("line.Width <= 0"); } bool couldBeTable = ShouldAddLargeSpace(last, block); if (couldBeTable) { line.HasLargeSpace = true; } // walking backwards // very strict check: sometimes the start overlaps with the ending //if (startOfBlock < endOfLine) // throw new InvalidOperationException(); // soft check: end of block should never that low unless it is an overlap if (endOfBlock < endOfLine) { PdfReaderException.AlwaysThrow("endOfBlock < endOfLine", new IBlock[] { last, block }); } groupFont.MergeFont((Block)block); } last = block; } if (groupFont != null) { groupFont.Done(); } return(result); }