Exemplo n.º 1
0
        private List <CustomRow> GetLinesFromWordsInRows(List <XmlElement> rows, int pageIndex)
        {
            List <CustomRow> lines = new List <CustomRow>();

            foreach (XmlElement row in rows)
            {
                CustomRow line = new CustomRow()
                {
                    IsTableRow = true,
                    Top        = -1,
                    Bottom     = -1
                };
                foreach (XmlElement xmlCell in row.GetElementsByTagName("cell"))
                {
                    List <CustomCell> cells = xmlCell.GetElementsByTagName("line").Cast <XmlElement>().
                                              Select(x => new CustomCell
                    {
                        Top      = int.Parse(x.Attributes["t"].Value),
                        Bottom   = int.Parse(x.Attributes["b"].Value),
                        Right    = int.Parse(x.Attributes["r"].Value),
                        Left     = int.Parse(x.Attributes["l"].Value),
                        BaseLine = int.Parse(x.Attributes["baseline"].Value),
                        Text     = (x as XmlElement).InnerText,
                    }).ToList();

                    if (cells.Any())
                    {
                        cells = cells.OrderBy(x => x.Top).ToList();
                        int cellTop    = cells.Min(x => x.Top);
                        int cellBottom = cells.Max(x => x.Bottom);

                        line.Top    = (line.Top != -1) ? Math.Min(line.Top, cellTop) : cellTop;
                        line.Bottom = (line.Bottom != -1) ? Math.Max(line.Bottom, cellBottom) : cellBottom;

                        CustomCell newCellWord = new CustomCell()
                        {
                            Top    = cellTop,
                            Bottom = cellBottom,
                            Left   = cells.Min(x => x.Left),
                            Right  = cells.Max(x => x.Right),
                            Text   = string.Empty
                        };
                        foreach (CustomCell cell in cells)
                        {
                            newCellWord.Text += (cell != cells.Last()) ? $"{cell.Text} " : cell.Text;
                        }
                        line.Cells.Add(newCellWord);
                    }
                }
                if (line.Cells.Any())
                {
                    lines.Add(line);
                }
            }
            return(lines);
        }
Exemplo n.º 2
0
        private List <CustomRow> GetLinesFromWords(List <CustomCell> cellsNotInRows)
        {
            cellsNotInRows = cellsNotInRows.ToArray().ToList();
            List <CustomRow> lines = new List <CustomRow>();

            while (cellsNotInRows.Any())
            {
                CustomCell currentCell = cellsNotInRows.First();
                System.Drawing.Rectangle currentLineBoundaries = System.Drawing.Rectangle.Empty;
                // finding highest element in current line
                foreach (CustomCell cell in cellsNotInRows)
                {
                    System.Drawing.Rectangle boundaries = currentCell.GetUnionBoundaries(cell);
                    if (boundaries != System.Drawing.Rectangle.Empty &&
                        boundaries.Bottom - boundaries.Top > currentLineBoundaries.Bottom - currentLineBoundaries.Top)
                    {
                        currentLineBoundaries = boundaries;
                    }
                }
                // creating new line
                CustomRow line = new CustomRow()
                {
                    Top    = currentLineBoundaries.Top,
                    Bottom = currentLineBoundaries.Bottom
                };
                lines.Add(line);
                foreach (CustomCell cell in cellsNotInRows)
                {
                    if (cell.IsInsideVerticalBoundaries(currentLineBoundaries))
                    {
                        line.Cells.Add(cell);
                    }
                }
                // removing words from words collection
                foreach (CustomCell cell in line.Cells)
                {
                    cellsNotInRows.Remove(cell);
                }
            }

            // sorting words in line (by both x and y axis)
            foreach (CustomRow line in lines)
            {
                List <CustomCell> cellsTmp = line.Cells.ToList();
                while (cellsTmp.Any())
                {
                    WordsGroup wordsGroup = new WordsGroup();
                    CustomCell cell       = cellsTmp.First();
                    wordsGroup.AddWord(cell);
                    cellsTmp.Remove(cell);
                    bool addedWord = false;
                    while (addedWord)
                    {
                        foreach (CustomCell tmpCell in cellsTmp)
                        {
                            addedWord = wordsGroup.AddWord(tmpCell) ? true : addedWord;
                            cellsTmp.Remove(tmpCell);
                            if (addedWord)
                            {
                                break;
                            }
                        }
                    }
                    line.CellsGroups.Add(wordsGroup);
                }
                line.SortCells();
            }

            return(lines);
        }