static ImageTableCollection DetectTables(CellBoxes cellBoxes, int pageWidth, int pageHeight, int lineDistance, int textSize) { ImageTableCollection tables = new ImageTableCollection(); CrossPoints crossPoints; ImageCellCollection cells; int[,] crossPointIndexes; int index = 0, minX, minY, maxX, maxY; int spacingMin = (int)(textSize * RATIO_TEXT_HEIGHT); double widthRatio = 1d / pageWidth; double heightRatio = 1d / pageHeight; foreach (CellBox box in cellBoxes) { // box 안의 line이 5개가 넘어야 table로 인정한다. top, bottom, left, right가 있으므로 나머지 line이 적어도 1개 이상 있어야 함. if (box.Horizontals.Count + box.Verticals.Count > 0) { // 교차점을 구한다. crossPoints = DetectRawCrossPoints(horizontals: box.Horizontals, verticals: box.Verticals, lineDistance: lineDistance, crossPointIndexes: out crossPointIndexes); // 일단 cell을 구한다. cells = DetectRawCells(crossPoints: crossPoints, crossPointIndexes: crossPointIndexes, spacingMin: spacingMin, widthRatio: widthRatio, heightRatio: heightRatio); if (cells.Count > 0) { minX = minY = int.MaxValue; maxX = maxY = int.MinValue; foreach (ImageCell cell in cells) { if (cell.LeftX < minX) { minX = cell.LeftX; } if (cell.RightX > maxX) { maxX = cell.RightX; } if (cell.TopY < minY) { minY = cell.TopY; } if (cell.BottomY > maxY) { maxY = cell.BottomY; } } tables.Add(new ImageTable(index: index++, x: minX, y: minY, width: maxX - minX, height: maxY - minY, cells: cells)); } } } return(tables); }
public static ImageTableCollection GetTables(string imagePath, int textSize, bool drawTable = false, bool drawLine = false) { CellLines horizontals, verticals; int pageWidth, pageHeight; DetectLines(imagePath: imagePath, textSize: textSize, horizontals: out horizontals, verticals: out verticals, pageWidth: out pageWidth, pageHeight: out pageHeight); int lineDistance = GetLineDistanceByModeThickness(horizontals: ref horizontals, verticals: ref verticals); CellBoxes cellBoxes = DetectCellBoxes(lineDistance: lineDistance, horizontals: ref horizontals, verticals: ref verticals); ImageTableCollection tables = DetectTables(cellBoxes: cellBoxes, pageWidth: pageWidth, pageHeight: pageHeight, lineDistance: lineDistance, textSize: textSize); if (drawLine) { DrawLines(horizontals: horizontals, verticals: verticals, imagePath: imagePath); } if (drawTable) { DrawTables(tables: tables, imagePath: imagePath); } return(tables); }
static CellBoxes DetectCellBoxes(int lineDistance, ref CellLines horizontals, ref CellLines verticals) { CellBoxes cellBoxes = new CellBoxes(); CellLines horizontalsFinal, verticalsFinal; CellLine left = new CellLine(); CellLine right = new CellLine(); CellLine top, bottom; bool findLeft, findRight; int index = 0; Dictionary <int, int> duplicateDic = new Dictionary <int, int>(); // 위에서부터 아래로 찾는다. foreach (CellLine horizontal in horizontals) { if (!duplicateDic.ContainsKey(horizontal.Index)) { findLeft = findRight = false; foreach (CellLine vertical in verticals) { if (!duplicateDic.ContainsKey(vertical.Index)) { if (horizontal.MinX <= vertical.MaxX && horizontal.MaxX >= vertical.MinX && horizontal.MinY <= vertical.MaxY && horizontal.MaxY >= vertical.MinY) { // 현재 라인의 왼쪽과 같은 위치에서 시작되는 수직 라인을 찾는다. if (Math.Abs(horizontal.StartX - vertical.StartX) <= lineDistance) { findLeft = true; left = vertical; } // 현재 라인의 오른쪽과 같은 위치에서 시작되는 수직 라인을 찾는다. else if (Math.Abs(horizontal.EndX - vertical.StartX) <= lineDistance) { findRight = true; right = vertical; } } // 두 라인을 찾으면 if (findLeft && findRight) { // 두 라인을 이용해서 두 수직 라인의 하단과 이어지는 수평 라인을 찾는다. if (ExistBottomLine(topIndex: horizontal.Index, lineDistance: lineDistance, verticalLeft: left, verticalRight: right, horizontals: horizontals, duplicateDic: ref duplicateDic, bottom: out bottom)) { top = horizontal; duplicateDic.Add(top.Index, top.Index); duplicateDic.Add(bottom.Index, bottom.Index); duplicateDic.Add(left.Index, left.Index); duplicateDic.Add(right.Index, right.Index); horizontals = GetHorizonsAllInBox(lineDistance: lineDistance, top: top, bottom: bottom, left: left, right: right, horizontals: horizontals, duplicateDic: ref duplicateDic); verticals = GetVerticalAllInBox(lineDistance: lineDistance, top: top, bottom: bottom, left: left, right: right, verticals: verticals, duplicateDic: ref duplicateDic); RemoveNoiseLine(lineDistance: lineDistance, top: top, bottom: bottom, left: left, right: right, innerHorizontals: horizontals, innerVerticals: verticals, duplicateDic: ref duplicateDic, horizontalsFinal: out horizontalsFinal, verticalsFinal: out verticalsFinal); cellBoxes.Add(new CellBox(index: index++, horizontals: new CellLines(horizontalsFinal.OrderBy(line => line.StartY)), verticals: new CellLines(verticalsFinal.OrderBy(line => line.StartX)))); break; } } } } } } return(cellBoxes); }