Пример #1
0
        void ProcessWordTable(WordDocHolder docHolder, Table table, int maxRowsToProcess)
        {
            var            rows          = table.Descendants <TableRow>().ToList();
            TableWidthInfo widthInfo     = InitializeTableWidthInfo(docHolder, table);
            int            saveRowsCount = TableRows.Count;
            int            maxCellsCount = 0;

            for (int r = 0; r < rows.Count(); ++r)
            {
                List <OpenXmlWordCell> newRow = new List <OpenXmlWordCell>();
                int  sumspan       = 0;
                var  row           = rows[r];
                int  rowGridBefore = GetRowGridBefore(row);
                bool isEmpty       = true;
                foreach (var rowCell in row.Elements <TableCell>())
                {
                    var c = new OpenXmlWordCell(docHolder, widthInfo, rowCell, TableRows.Count, sumspan);
                    if (newRow.Count == 0)
                    {
                        c.MergedColsCount += rowGridBefore;
                    }
                    newRow.Add(c);
                    sumspan += c.MergedColsCount;
                    isEmpty  = isEmpty && c.IsEmpty;
                }
                if (isEmpty)
                {
                    continue;
                }
                maxCellsCount = Math.Max(newRow.Count, maxCellsCount);
                if (r == 0 && TableRows.Count > 0 &&
                    BigramsHolder.CheckMergeRow(
                        TableRows.Last().ConvertAll(x => x.Text),
                        newRow.ConvertAll(x => x.Text)))
                {
                    MergeRow(TableRows.Last(), newRow);
                }
                else
                {
                    TableRows.Add(newRow);
                }

                if ((maxRowsToProcess != -1) && (TableRows.Count >= maxRowsToProcess))
                {
                    break;
                }
            }

            if (maxCellsCount <= 4 || CheckNameColumnIsEmpty(TableRows, saveRowsCount))
            {
                //remove this suspicious table
                TableRows.RemoveRange(saveRowsCount, TableRows.Count - saveRowsCount);
            }
        }
        void ProcessHtmlTable(HtmlDocHolder docHolder, IElement table, int maxRowsToProcess)
        {
            var rows          = GetHtmlTableRows(table);
            int saveRowsCount = TableRows.Count;
            int maxCellsCount = 0;
            int maxSumSpan    = 0;

            for (int r = 0; r < rows.Count(); ++r)
            {
                List <HtmlAdapterCell> newRow = new List <HtmlAdapterCell>();
                int  sumspan = 0;
                var  row     = rows[r];
                bool isEmpty = true;
                foreach (var rowCell in GetHtmlTableCells(rows[r]))
                {
                    var c = new HtmlAdapterCell(docHolder, rowCell, TableRows.Count, sumspan);
                    newRow.Add(c);
                    for (int k = 1; k < c.MergedColsCount; ++k)
                    {
                        newRow.Add(new HtmlAdapterCell(TableRows.Count, sumspan + k));
                    }
                    sumspan += c.MergedColsCount;
                    isEmpty  = isEmpty && c.IsEmpty;
                }
                if (isEmpty)
                {
                    continue;
                }
                maxCellsCount = Math.Max(newRow.Count, maxCellsCount);
                maxSumSpan    = Math.Max(sumspan, maxSumSpan);

                // see 7007_8.html in tests
                for (int k = sumspan; k < maxSumSpan; ++k)
                {
                    newRow.Add(new HtmlAdapterCell(TableRows.Count, sumspan + k));
                }

                if (r == 0 && TableRows.Count > 0 &&
                    BigramsHolder.CheckMergeRow(
                        TableRows.Last().ConvertAll(x => x.Text),
                        newRow.ConvertAll(x => x.Text)))
                {
                    MergeRow(TableRows.Last(), newRow);
                }
                else
                {
                    TableRows.Add(newRow);
                }

                if ((maxRowsToProcess != -1) && (TableRows.Count >= maxRowsToProcess))
                {
                    break;
                }
            }
            if (saveRowsCount < TableRows.Count)
            {
                if (maxCellsCount <= 4)
                {
                    //remove this suspicious table
                    TableRows.RemoveRange(saveRowsCount, TableRows.Count - saveRowsCount);
                }
                else
                {
                    InsertRowSpanCells(saveRowsCount, TableRows.Count);
                    if (CheckNameColumnIsEmpty(TableRows, saveRowsCount))
                    {
                        TableRows.RemoveRange(saveRowsCount, TableRows.Count - saveRowsCount);
                    }
                }
            }
        }