Пример #1
0
        static int ProcessTitle(IAdapter adapter, ColumnOrdering columnOrdering)
        {
            int    row      = 0;
            string title    = null;
            string ministry = null;
            int?   year     = null;

            bool findTitle        = false;
            bool prevRowIsSection = false;

            while (true)
            {
                var    currRow = adapter.GetCells(row);
                string section_text;
                bool   isSection = IAdapter.IsSectionRow(currRow, adapter.GetColsCount(), prevRowIsSection, out section_text);
                if (isSection)
                {
                    if (section_text.Length > 20)
                    {
                        if (GetValuesFromTitle(section_text, ref title, ref year, ref ministry))
                        {
                            findTitle = true;
                        }
                    }
                    else
                    {
                        columnOrdering.Section = section_text;
                    }
                }
                else if (WeakHeaderCheck(currRow))
                {
                    break;
                }

                row += 1;

                if (row >= adapter.GetRowsCount())
                {
                    row = 0;
                    break;
                    throw new ColumnDetectorException(String.Format("Headers not found"));
                }
                prevRowIsSection = isSection;
            }
            if (!findTitle)
            {
                if (GetValuesFromTitle(adapter.GetTitleOutsideTheTable(), ref title, ref year, ref ministry))
                {
                    findTitle = true;
                }
            }

            if (findTitle)
            {
                columnOrdering.Title        = title;
                columnOrdering.Year         = year;
                columnOrdering.MinistryName = ministry;
            }
            return(row);
        }
Пример #2
0
        static List <Cell> FindSubcellsUnder(IAdapter adapter, Cell cell)
        {
            var subCells = new List <Cell>();

            if (cell.Row + cell.MergedRowsCount >= adapter.GetRowsCount())
            {
                return(subCells);
            }
            if (cell.CellWidth == 0 && cell.GetText(true).Trim() == "")
            {
                return(subCells);
            }
            var undercCells = adapter.GetCells(cell.Row + cell.MergedRowsCount);

            foreach (var underCell in undercCells)
            {
                if (underCell.Col < cell.Col)
                {
                    continue;
                }
                if (underCell.Col >= cell.Col + cell.MergedColsCount)
                {
                    break;
                }
                if (!underCell.IsEmpty)
                {
                    subCells.Add(underCell);
                }
            }
            return(subCells);
        }
Пример #3
0
        public static DeclarationField PredictEmptyColumnTitle(IAdapter adapter, Cell headerCell)
        {
            List <string> texts           = new List <string>();
            int           rowIndex        = headerCell.Row + headerCell.MergedRowsCount;
            const int     maxRowToCollect = 10;

            for (int i = 0; i < maxRowToCollect; i++)
            {
                var    cells = adapter.GetCells(rowIndex, IAdapter.MaxColumnsCount);
                string dummy;
                if (IAdapter.IsSectionRow(cells, adapter.GetColsCount(), false, out dummy))
                {
                    rowIndex += 1;
                }
                else
                {
                    var c = adapter.GetCell(rowIndex, headerCell.Col);
                    if (c != null)
                    {
                        texts.Add(c.GetText(true));
                        rowIndex += c.MergedRowsCount;
                    }
                    else
                    {
                        rowIndex += 1;
                    }
                }
                if (rowIndex >= adapter.GetRowsCount())
                {
                    break;
                }
            }
            var field = PredictByStrings(texts);

            if (headerCell.TextAbove != null && ((field & DeclarationField.AllOwnTypes) > 0))
            {
                string h = headerCell.TextAbove;
                // AllOwnTypes defined from
                field &= ~DeclarationField.AllOwnTypes;
                if (HeaderHelpers.IsMixedColumn(h))
                {
                    field |= DeclarationField.Mixed;
                }
                else if (HeaderHelpers.IsStateColumn(h))
                {
                    field |= DeclarationField.State;
                }
                else if (HeaderHelpers.IsOwnedColumn(h))
                {
                    field |= DeclarationField.Owned;
                }
            }
            Logger.Debug(string.Format("predict by {0}  -> {1}",
                                       String.Join("\\n", texts), field));
            return(field);
        }
Пример #4
0
        static public List <Cell> GetColumnCells(IAdapter adapter, int headerStartRow, out int headerEndRow)
        {
            headerEndRow = headerStartRow + 1;
            var firstRow = adapter.GetCells(headerStartRow);

            List <Cell> columnCells = new List <Cell>();
            bool        headerCanHaveSecondLevel = true;
            int         maxMergedRows            = 1;
            var         texts = new List <string>();

            foreach (var cell in firstRow)
            {
                string text = cell.GetText(true);

                if (adapter.GetRowsCount() == cell.MergedRowsCount)
                {
                    continue;
                }
                if (cell.CellWidth == 0 && text.Trim() == "")
                {
                    continue;
                }

                if (maxMergedRows < cell.MergedRowsCount)
                {
                    maxMergedRows = cell.MergedRowsCount;
                }
                var underCells = FindSubcellsUnder(adapter, cell);

                if (underCells.Count() <= 1 || !headerCanHaveSecondLevel)
                {
                    headerEndRow = Math.Max(headerEndRow, cell.Row + cell.MergedRowsCount);

                    // иногда в двухярусном заголовке в верхней клетке пусто, а в нижней есть заголовок (TwoRowHeaderEmptyTopCellTest)
                    if (text.Trim() == "" && cell.MergedRowsCount < maxMergedRows && underCells.Count() == 1)
                    {
                        columnCells.Add(underCells.First());
                    }
                    else
                    {
                        columnCells.Add(cell);
                    }

                    texts.Add(cell.Text.NormSpaces());

                    // обработка ошибки документа DepEnergo2010
                    if (columnCells.Count == 1 && cell.MergedRowsCount == 1 && underCells.Count == 1)
                    {
                        string cellBelowName = underCells[0].GetText(true);
                        headerCanHaveSecondLevel = cellBelowName.Length < 5;
                    }
                }
                // current cell spans several columns, so the header probably occupies two rows instead of just one
                // with the second row reserved for subheaders
                else
                {
                    foreach (var underCell in underCells)
                    {
                        underCell.TextAbove = cell.Text.NormSpaces();
                        columnCells.Add(underCell);
                        texts.Add(underCell.TextAbove + "^" + underCell.Text.NormSpaces());
                    }
                    headerEndRow = Math.Max(headerEndRow, underCells[0].Row + underCells[0].MergedRowsCount);
                }
            }
            Logger.Debug("column titles: " + String.Join("|", texts));
            return(columnCells);
        }