public BlockPage Process(BlockPage page) { IBlock last = null; BlockSet <IBlock> currentBlockSet = null; var result = new BlockPage(); foreach (var block in page.AllBlocks) { bool shouldBreak = false; if (last != null) { // expect: previous >~ next float previous = last.GetH(); float next = block.GetH(); // previous >> next if (previous > next + statDownInTheBottom) { shouldBreak = true; } // previous < next if (previous < next - statGoingUp) { shouldBreak = true; } } // check for superscript font if ((shouldBreak) && (Block.IsSuperscriptFont((Block)last, (Block)block))) { shouldBreak = false; } if (shouldBreak && currentBlockSet.Count() > 1) { var tableline = currentBlockSet.TakeLast(2).First(); if (Block.AreSameLine(tableline, block)) { shouldBreak = false; } } if ((currentBlockSet == null) || shouldBreak) { currentBlockSet = new BlockSet <IBlock>(); result.Add(currentBlockSet); } currentBlockSet.Add(block); last = block; } return(result); }
IBlock[] CreateNewBlocks(BlockSet <IBlock> blocks, int middle) { int total = blocks.Count(); var blockA = new BlockSet <IBlock>(); var blockB = new BlockSet <IBlock>(); blockA.AddRange(blocks.Take(middle)); blockB.AddRange(blocks.TakeLast(total - middle)); return(new IBlock[] { blockA, blockB }); }
BlockSet <IBlock> GetCleanBlockSet(Block overlap, BlockSet <IBlock> block1) { var blockSet = new BlockSet <IBlock>(); var b1 = block1.Where(b => !HasOverlapY(b, overlap)); blockSet.AddRange(b1); if (blockSet.Count() == 0) { return(null); } return(blockSet); }
bool HasLineOverlap(BlockSet <IBlock> a, BlockSet <IBlock> b) { // why it would happen? if (a.Count() < 2) { PdfReaderException.AlwaysThrow("a.Count() < 2"); } var lastLines = a.TakeLast(2); var firstLine = b.Take(1).First(); var last = lastLines.First(); if (Block.AreSameLine(last, firstLine)) { return(true); } return(false); }
BlockSet <IBlock>[] SplitBlock(BlockSet <IBlock> blockset) { var blocks = blockset.ToList(); int total = blocks.Count - 1; float limit = blockset.GetWidth() / 2; int start = ScanBlock(i => blocks[i], blockset.GetX() + limit); int end = ScanBlock(i => blocks[total - i], blockset.GetX() + limit); // no split if (start == 0 && end == 0) { // VALIDATE //System.Diagnostics.Debugger.Break(); return(new BlockSet <IBlock>[] { blockset }); } // split into 2 pieces int middle = -1; // split into 2 pieces: there is a clear division middle = (start + end > total) ? (start) : middle; middle = (start == 0) ? (total - end + 1) : middle; middle = (end == 0) ? (start) : middle; if (middle > 0) { var blockA = new BlockSet <IBlock>(); var blockB = new BlockSet <IBlock>(); blockA.AddRange(blocks.Take(middle)); blockB.AddRange(blocks.TakeLast(total - middle + 1)); int count2 = blockA.Count() + blockB.Count(); if (count2 != blocks.Count) { PdfReaderException.AlwaysThrow("count2 != blocks.Count"); } // VALIDATE //System.Diagnostics.Debugger.Break(); return(new BlockSet <IBlock>[] { blockA, blockB }); } // split into 3 pieces var topBlock = new BlockSet <IBlock>(); var coreBlock = new BlockSet <IBlock>(); var bottomBlock = new BlockSet <IBlock>(); topBlock.AddRange(blocks.Take(start)); for (int i = start; i <= total - end; i++) { coreBlock.Add(blocks[i]); } bottomBlock.AddRange(blocks.TakeLast(end)); int count3 = topBlock.Count() + coreBlock.Count() + bottomBlock.Count(); if (count3 != blocks.Count) { PdfReaderException.AlwaysThrow("count3 != blocks.Count"); } // VALIDATE //System.Diagnostics.Debugger.Break(); return(new BlockSet <IBlock>[] { topBlock, coreBlock, bottomBlock }); }