Example #1
0
        void ProcessWordTable(WordDocHolder docHolder, Table table, int maxRowsToProcess)
        {
            var            rows          = table.Descendants <TableRow>().ToList();
            TableWidthInfo widthInfo     = InitializeTableWidthInfo(docHolder, table);
            int            saveRowsCount = TableRows.Count;
            int            maxCellsCount = 0;
            TableBorders   tblBorders    = GetTableBorders(table);

            for (int r = 0; r < rows.Count(); ++r)
            {
                OpenXmlTableRow newRow        = new OpenXmlTableRow();
                int             sumspan       = 0;
                var             tableRow      = rows[r];
                int             rowGridBefore = GetRowGridBefore(tableRow);
                bool            isEmpty       = true;
                var             row           = tableRow.Elements <TableCell>().ToArray();
                for (var i = 0; i < row.Length; ++i)
                {
                    var c = new OpenXmlWordCell(docHolder, row, i, widthInfo, TableRows.Count, sumspan, tblBorders);
                    if (newRow.RowCells.Count == 0)
                    {
                        c.MergedColsCount += rowGridBefore;
                    }
                    if (newRow.RowCells.Count > 0 && !newRow.RowCells.Last().HasRightBorder)
                    {
                        newRow.RowCells.Last().Text            += c.Text;
                        newRow.RowCells.Last().CellWidth       += c.CellWidth;
                        newRow.RowCells.Last().MergedColsCount += c.MergedColsCount;
                        newRow.RowCells.Last().HasRightBorder   = c.HasRightBorder;
                        sumspan += c.MergedColsCount;
                    }
                    else
                    {
                        newRow.RowCells.Add(c);
                        sumspan += c.MergedColsCount;
                    }
                    isEmpty = isEmpty && c.IsEmpty;
                }
                if (isEmpty)
                {
                    continue;
                }
                maxCellsCount = Math.Max(newRow.RowCells.Count, maxCellsCount);
                if (r == 0 && TableRows.Count > 0 &&
                    BigramsHolder.CheckMergeRow(
                        TableRows.Last().RowCells.ConvertAll(x => x.Text),
                        newRow.RowCells.ConvertAll(x => x.Text)))
                {
                    MergeRow(TableRows.Last().RowCells, newRow.RowCells);
                }
                else
                {
                    TableRows.Add(newRow);
                }

                if ((maxRowsToProcess != -1) && (TableRows.Count >= maxRowsToProcess))
                {
                    break;
                }
            }
            if ((TableRows.Count > 0) && !TableHeaderRecognizer.IsNamePositionAndIncomeTable(GetDataCells(0)))
            {
                if (maxCellsCount <= 4 || CheckNameColumnIsEmpty(saveRowsCount))
                {
                    //remove this suspicious table
                    TableRows.RemoveRange(saveRowsCount, TableRows.Count - saveRowsCount);
                }
            }
        }
        void ProcessHtmlTable(HtmlDocHolder docHolder, IElement table, int maxRowsToProcess)
        {
            var rows          = GetHtmlTableRows(table);
            int saveRowsCount = TableRows.Count;
            int maxCellsCount = 0;
            int maxSumSpan    = 0;

            for (int r = 0; r < rows.Count(); ++r)
            {
                List <HtmlAdapterCell> newRow = new List <HtmlAdapterCell>();
                int  sumspan = 0;
                var  row     = rows[r];
                bool isEmpty = true;
                foreach (var rowCell in GetHtmlTableCells(rows[r]))
                {
                    var c = new HtmlAdapterCell(docHolder, rowCell, TableRows.Count, sumspan);
                    newRow.Add(c);
                    for (int k = 1; k < c.MergedColsCount; ++k)
                    {
                        newRow.Add(new HtmlAdapterCell(TableRows.Count, sumspan + k));
                    }
                    sumspan += c.MergedColsCount;
                    isEmpty  = isEmpty && c.IsEmpty;
                }
                if (isEmpty)
                {
                    continue;
                }
                maxCellsCount = Math.Max(newRow.Count, maxCellsCount);
                maxSumSpan    = Math.Max(sumspan, maxSumSpan);

                // see 7007_8.html in tests
                for (int k = sumspan; k < maxSumSpan; ++k)
                {
                    newRow.Add(new HtmlAdapterCell(TableRows.Count, sumspan + k));
                }

                if (r == 0 && TableRows.Count > 0 &&
                    BigramsHolder.CheckMergeRow(
                        TableRows.Last().ConvertAll(x => x.Text),
                        newRow.ConvertAll(x => x.Text)))
                {
                    MergeRow(TableRows.Last(), newRow);
                }
                else
                {
                    TableRows.Add(newRow);
                }

                if ((maxRowsToProcess != -1) && (TableRows.Count >= maxRowsToProcess))
                {
                    break;
                }
            }
            if (saveRowsCount < TableRows.Count)
            {
                if (maxCellsCount <= 4)
                {
                    //remove this suspicious table
                    TableRows.RemoveRange(saveRowsCount, TableRows.Count - saveRowsCount);
                }
                else
                {
                    InsertRowSpanCells(saveRowsCount, TableRows.Count);
                    if (CheckNameColumnIsEmpty(saveRowsCount))
                    {
                        TableRows.RemoveRange(saveRowsCount, TableRows.Count - saveRowsCount);
                    }
                }
            }
        }