public bool DoesWordCrossWord(CustomCell cell) { return((this.Top >= cell.Top && this.Top <= cell.Bottom) || (cell.Top >= this.Top && cell.Top <= this.Bottom) || (this.Bottom <= cell.Bottom && this.Bottom >= cell.Top) || (cell.Bottom <= this.Bottom && cell.Bottom >= this.Top)); }
private List <CustomRow> GetLinesFromWordsInRows(List <XmlElement> rows, int pageIndex) { List <CustomRow> lines = new List <CustomRow>(); foreach (XmlElement row in rows) { CustomRow line = new CustomRow() { IsTableRow = true, Top = -1, Bottom = -1 }; foreach (XmlElement xmlCell in row.GetElementsByTagName("cell")) { List <CustomCell> cells = xmlCell.GetElementsByTagName("line").Cast <XmlElement>(). Select(x => new CustomCell { Top = int.Parse(x.Attributes["t"].Value), Bottom = int.Parse(x.Attributes["b"].Value), Right = int.Parse(x.Attributes["r"].Value), Left = int.Parse(x.Attributes["l"].Value), BaseLine = int.Parse(x.Attributes["baseline"].Value), Text = (x as XmlElement).InnerText, }).ToList(); if (cells.Any()) { cells = cells.OrderBy(x => x.Top).ToList(); int cellTop = cells.Min(x => x.Top); int cellBottom = cells.Max(x => x.Bottom); line.Top = (line.Top != -1) ? Math.Min(line.Top, cellTop) : cellTop; line.Bottom = (line.Bottom != -1) ? Math.Max(line.Bottom, cellBottom) : cellBottom; CustomCell newCellWord = new CustomCell() { Top = cellTop, Bottom = cellBottom, Left = cells.Min(x => x.Left), Right = cells.Max(x => x.Right), Text = string.Empty }; foreach (CustomCell cell in cells) { newCellWord.Text += (cell != cells.Last()) ? $"{cell.Text} " : cell.Text; } line.Cells.Add(newCellWord); } } if (line.Cells.Any()) { lines.Add(line); } } return(lines); }
public bool AddWord(CustomCell cell) { if (Left == 0 && Right == 0) { Cells.Add(cell); Left = cell.Left; Right = cell.Right; return(true); } else if ((cell.Left >= Left && cell.Left <= Right) || (cell.Right <= Right && cell.Right >= Left)) { Cells.Add(cell); Left = Math.Min(Left, cell.Left); Right = Math.Max(Right, cell.Right); return(true); } return(false); }
private List <CustomRow> GetLinesFromWords(List <CustomCell> cellsNotInRows) { cellsNotInRows = cellsNotInRows.ToArray().ToList(); List <CustomRow> lines = new List <CustomRow>(); while (cellsNotInRows.Any()) { CustomCell currentCell = cellsNotInRows.First(); System.Drawing.Rectangle currentLineBoundaries = System.Drawing.Rectangle.Empty; // finding highest element in current line foreach (CustomCell cell in cellsNotInRows) { System.Drawing.Rectangle boundaries = currentCell.GetUnionBoundaries(cell); if (boundaries != System.Drawing.Rectangle.Empty && boundaries.Bottom - boundaries.Top > currentLineBoundaries.Bottom - currentLineBoundaries.Top) { currentLineBoundaries = boundaries; } } // creating new line CustomRow line = new CustomRow() { Top = currentLineBoundaries.Top, Bottom = currentLineBoundaries.Bottom }; lines.Add(line); foreach (CustomCell cell in cellsNotInRows) { if (cell.IsInsideVerticalBoundaries(currentLineBoundaries)) { line.Cells.Add(cell); } } // removing words from words collection foreach (CustomCell cell in line.Cells) { cellsNotInRows.Remove(cell); } } // sorting words in line (by both x and y axis) foreach (CustomRow line in lines) { List <CustomCell> cellsTmp = line.Cells.ToList(); while (cellsTmp.Any()) { WordsGroup wordsGroup = new WordsGroup(); CustomCell cell = cellsTmp.First(); wordsGroup.AddWord(cell); cellsTmp.Remove(cell); bool addedWord = false; while (addedWord) { foreach (CustomCell tmpCell in cellsTmp) { addedWord = wordsGroup.AddWord(tmpCell) ? true : addedWord; cellsTmp.Remove(tmpCell); if (addedWord) { break; } } } line.CellsGroups.Add(wordsGroup); } line.SortCells(); } return(lines); }
public System.Drawing.Rectangle GetUnionBoundaries(CustomCell cell) { return(DoesWordCrossWord(cell) ? System.Drawing.Rectangle.FromLTRB(Math.Min(cell.Left, Left), Math.Min(cell.Top, Top), Math.Max(cell.Right, Right), Math.Max(cell.Bottom, Bottom)) : System.Drawing.Rectangle.Empty); }