public BlockPage Process(BlockPage page) { var result = new BlockPage(); foreach (var block in page.AllBlocks) { var column = FindColumn(block); if (column == null) { PdfReaderException.AlwaysThrow("Invalid blockset column assigned -- review stage 2"); } var bset = block as BlockSet <IBlock>; if (bset != null) { var resizedBlock = new BlockSet2 <IBlock>(bset, column.GetX(), bset.GetH(), column.GetX() + column.GetWidth(), bset.GetH() + bset.GetHeight()); result.Add(resizedBlock); } else { // image or text? result.Add(block); } } return(result); }
IBlockSet <IBlock> Merge(IBlockSet <IBlock> b1, IBlockSet <IBlock> b2) { bool first = b1.GetX() < b2.GetX(); float x1 = Math.Min(b1.GetX(), b2.GetX()); float x2 = Math.Max(b1.GetX() + b1.GetWidth(), b2.GetX() + b2.GetWidth()); float h1 = Math.Min(b1.GetH(), b2.GetH()); float h2 = Math.Max(b1.GetH() + b1.GetHeight(), b2.GetH() + b2.GetHeight()); var blocks = (first) ? b1.Concat(b2) : b2.Concat(b1); var newblock = new BlockSet2 <IBlock>(blocks, x1, h1, x2, h2); return(newblock); }
public BlockPage Process(BlockPage page) { var result = new BlockPage(); SetupPage(page); const float error_othercolumn = 2f; var blocksets = page.AllBlocks.ToList(); if (blocksets.Count == 0) { return(page); } float x1 = _MinX; float x2 = _MaxX; float dx = _PageWidth; float h1 = page.AllBlocks.GetH(); float h2 = page.AllBlocks.GetH() + page.AllBlocks.GetHeight(); float dh = page.AllBlocks.GetHeight() + 2; // Prepare the values order by X int id = 0; var values = page.AllBlocks.Select(b => new Data { ID = id++, X = (int)(6.0 * ((b.GetX() - x1) / dx) + 0.5), X2 = (int)(6.0 * ((b.GetX() + b.GetWidth() - x1) / dx) + 0.5), Y = (int)(1000 * (b.GetH() - h1) / (dh)), Y1 = (int)(1000 * (b.GetH() + b.GetHeight() - h1) / (dh)), W = (int)(6.0 * (b.GetWidth() / dx) + 0.5), RW = b.GetWidth(), B = b }) .ToList(); for (int i = 0; i < values.Count; i++) { var blsearch = values[i]; if (blsearch.B is TableSet || blsearch.B is ImageBlock) { result.Add(blsearch.B); continue; } var bl = blsearch.B; Block block = null; if ((blsearch.X < 0) || (blsearch.X2 > 6)) { PdfReaderException.Warning("page calculation error"); blsearch.X = (blsearch.X < 0) ? 0 : blsearch.X; blsearch.X2 = (blsearch.X2 > 6) ? 6 : blsearch.X2; } // set min size if (blsearch.X == 0) { float diff = blsearch.B.GetX() - _MinX; if (diff < -MAX_PAGE_WIDTH_DIFFERENCE) { PdfReaderException.Warning("invalid difference"); } if (Math.Abs(diff) > error_othercolumn) { float width = bl.GetX() + bl.GetWidth() - _MinX; block = new Block() { X = _MinX, Width = width, H = blsearch.B.GetH(), Height = blsearch.B.GetHeight() }; } } if (blsearch.X2 == 6) { float diff = _MaxX - blsearch.B.GetX() - blsearch.B.GetWidth(); if (diff < -MAX_PAGE_WIDTH_DIFFERENCE) { PdfReaderException.Warning("invalid difference"); } if (Math.Abs(diff) > error_othercolumn) { float width = _MaxX - bl.GetX(); float bx1 = _MaxX - width; block = new Block() { X = bx1, Width = width, H = blsearch.B.GetH(), Height = blsearch.B.GetHeight() }; } } if (block != null) { // ensure it will increase float diff = block.GetWidth() - blsearch.B.GetWidth(); if (diff < 0) { if (_OriginalPageWidth == _PageWidth) { PdfReaderException.Warning("invalid difference: still same page width?"); } } // may receive multiples - confusing... var original = (IEnumerable <IBlock>)blsearch.B; if ((original is TableSet) || (original is ImageBlock)) { PdfReaderException.AlwaysThrow("Block should not be resized"); } var replace = new BlockSet2 <IBlock>(original, block.GetX(), block.GetH(), block.GetX() + block.GetWidth(), block.GetH() + block.GetHeight()); result.Add(replace); } else { result.Add(blsearch.B); } } return(result); }
public BlockPage Process(BlockPage page) { var blocksets = page.AllBlocks.ToList(); if (blocksets.Count == 0) { return(page); } // implemented ONLY for 3 columns if (blocksets.Count != 3) { return(page); } var columns = page.AllBlocks.OrderBy(b => b.GetX()).ToArray(); float maxColumn = page.AllBlocks.Max(b => b.GetWidth()); float x1 = page.AllBlocks.GetX(); float x2 = page.AllBlocks.GetX() + page.AllBlocks.GetWidth(); float dx = page.AllBlocks.GetWidth() + 2; int id = 0; var resizedColumns = columns.Select(b => new { ID = id++, X = (int)(6.0 * ((b.GetX() - x1) / dx) + 0.5), W = (int)(6.0 * (b.GetWidth() / dx) + 0.5), B = b }) .Select(d => { // may receive multiples - confusing... var original = (IEnumerable <IBlock>)d.B; if ((original is TableSet) || (original is ImageBlock)) { return(d.B); } int nextId = d.ID + 1; if (d.ID >= 3) // only first and second { return(d.B); } if (d.W == 1) // small column { var block = d.B; float new_x2 = columns[nextId].GetX() - COLUMN_DISTANCE; float old_x2 = block.GetX() + block.GetWidth(); float diff = new_x2 - old_x2; if (diff < 0) { PdfReaderException.Warning("decreasing the column size"); } var replace = new BlockSet2 <IBlock>(original, block.GetX(), block.GetH(), new_x2, block.GetH() + block.GetHeight()); return(replace); } return(d.B); }).ToArray(); var newpage = new BlockPage(); newpage.AddRange(resizedColumns); return(newpage); }
public BlockPage Process(BlockPage page) { float error_othercolumn = 2f; var blocksets = page.AllBlocks.ToList(); if (blocksets.Count == 0) { return(page); } float x1 = page.AllBlocks.GetX(); float x2 = page.AllBlocks.GetX() + page.AllBlocks.GetWidth(); float dx = page.AllBlocks.GetWidth() + 2; float h1 = page.AllBlocks.GetH(); float h2 = page.AllBlocks.GetH() + page.AllBlocks.GetHeight(); float dh = page.AllBlocks.GetHeight() + 2; float pageSize = page.AllBlocks.Max(b => b.GetX() + b.GetWidth()); // Prepare the values order by X int id = 0; var values = page.AllBlocks.Select(b => new Data { ID = id++, X = (int)(6.0 * ((b.GetX() - x1) / dx) + 0.5), X2 = (int)(6.0 * ((b.GetX() + b.GetWidth() - x1) / dx) + 0.5), Y = (int)(1000 * (b.GetH() - h1) / (dh)), Y1 = (int)(1000 * (b.GetH() + b.GetHeight() - h1) / (dh)), W = (int)(6.0 * (b.GetWidth() / dx) + 0.5), RW = b.GetWidth(), B = b }) .OrderByDescending(p => p.W) .ToList(); var columnW = (from v in values group v by v.W into g select new { g.Key, size = g.Max(ta => ta.RW) }).ToDictionary(t => t.Key); foreach (var blsearch in values) { if (blsearch.B is TableSet) { continue; } if (blsearch.B is ImageBlock) { continue; } // we could have used predefined blocks (w=6, w=3, etc) var predefinedBlocks = values; var over = predefinedBlocks .Where(v => v != blsearch && v.X <= blsearch.X && v.X2 >= blsearch.X2) .Where(v => v.RW > blsearch.RW) .Where(v => Math.Abs(v.RW - blsearch.RW) > error_othercolumn) .Select(v => v.B) .ToList(); var curblocks = values.Select(v => v.B).ToList(); List <IBlock> repls = new List <IBlock>(); foreach (var bl in over) { var compareBlocks = curblocks.Except(new IBlock[] { bl, blsearch.B }); var block = new Block() { X = bl.GetX(), Width = bl.GetWidth(), H = blsearch.B.GetH(), Height = blsearch.B.GetHeight() }; // ensure it will increase float diff = block.GetWidth() - blsearch.B.GetWidth(); if (diff < 0) { PdfReaderException.AlwaysThrow("should never decrease the block size"); } if (CheckBoundary(compareBlocks, block)) { // may receive multiples - confusing... var original = (IEnumerable <IBlock>)blsearch.B; if ((original is TableSet) || (original is ImageBlock)) { PdfReaderException.AlwaysThrow("Block should not be resized"); } var replace = new BlockSet2 <IBlock>(original, block.GetX(), block.GetH(), block.GetX() + block.GetWidth(), block.GetH() + block.GetHeight()); bool isStillContained = Block.Contains(replace, blsearch.B); if (!isStillContained) { bool hasOverlap = Block.HasOverlap(replace, blsearch.B); // TODO: review this issue if (!hasOverlap) { PdfReaderException.Warning("Block was moved to another place -- ignore"); continue; } } repls.Add(replace); } } if (repls.Count > 0) { // this is important because repls.Count can be > 1 if (repls.Count > 1) { // add a breakpoint to monitor if needed } var largest_replace = repls.OrderByDescending(t => t.GetWidth()).First(); blsearch.B = largest_replace; } } var result = new BlockPage(); result.AddRange(values.Select(p => (IBlock)p.B)); //result.AddRange(OrderedBlocks); return(result); }