Esempio n. 1
0
 public DataRow(IAdapter adapter, TableHeader columnOrdering, int row)
 {
     this.row            = row;
     this.adapter        = adapter;
     this.ColumnOrdering = columnOrdering;
     Cells = adapter.GetDataCells(row, columnOrdering.GetMaxColumnEndIndex());
     if (!this.adapter.IsExcel())
     {
         MapCells();
     }
 }
Esempio n. 2
0
        public TJsonTablePortion TablePortionToJson(TableHeader columnOrdering, int body_start, int body_end)
        {
            var table = new TJsonTablePortion();

            table.DataStart = body_start;
            int headerEnd = columnOrdering.GetPossibleHeaderEnd();

            for (int i = columnOrdering.GetPossibleHeaderBegin(); i < columnOrdering.GetPossibleHeaderEnd(); i++)
            {
                var row = GetJsonByRow(GetDataCells(i));
                table.Header.Add(row);
            }

            // find section before data
            for (int i = body_start; i >= headerEnd; i--)
            {
                string dummy;
                // cannot use prevRowIsSection
                var row = GetDataCells(i);
                if (IsSectionRow(i, row, columnOrdering.GetMaxColumnEndIndex(), false, out dummy))
                {
                    table.Section.Add(GetJsonByRow(row));
                    break;
                }
            }

            int maxRowsCount = body_end - body_start;

            table.DataEnd = body_start;
            int addedRows = 0;

            while (table.DataEnd < GetRowsCount() && addedRows < maxRowsCount)
            {
                if (!IsEmptyRow(table.DataEnd))
                {
                    table.Data.Add(GetJsonByRow(GetDataCells(table.DataEnd)));
                    addedRows++;
                }
                table.DataEnd++;
            }
            return(table);
        }
Esempio n. 3
0
        public void FindBordersAndPersonNames(TableHeader columnOrdering, bool updateTrigrams)
        {
            int rowOffset = columnOrdering.FirstDataRow;

            if (columnOrdering.Section != null)
            {
                CreateNewSection(rowOffset, columnOrdering.Section);
            }

            bool   skipEmptyPerson = false;
            string prevPersonName  = "";

            for (int row = rowOffset; row < Adapter.GetRowsCount(); row++)
            {
                DataRow currRow = Adapter.GetRow(columnOrdering, row);
                if (currRow == null || currRow.IsEmpty())
                {
                    continue;
                }
                if (IAdapter.IsNumbersRow(currRow.Cells))
                {
                    continue;
                }
                Logger.Debug(String.Format("currRow {0}, col_count={1}: {2}", row, currRow.Cells.Count, currRow.DebugString()));

                string sectionName;
                if (Adapter.IsSectionRow(row, currRow.Cells, columnOrdering.GetMaxColumnEndIndex(), false, out sectionName))
                {
                    CreateNewSection(row, sectionName);
                    continue;
                }
                {
                    TableHeader newColumnOrdering;
                    if (IsHeaderRow(currRow, out newColumnOrdering))
                    {
                        columnOrdering = newColumnOrdering;
                        Logger.Debug(String.Format("found a new table header {0}", currRow.DebugString()));
                        row = newColumnOrdering.GetPossibleHeaderEnd() - 1; // row++ in "for" cycle
                        continue;
                    }
                }

                if (updateTrigrams)
                {
                    ColumnByDataPredictor.UpdateByRow(columnOrdering, currRow);
                }

                if (!currRow.InitPersonData(prevPersonName))
                {
                    // be robust, ignore errors see 8562.pdf.docx in tests
                    continue;
                }

                if (currRow.PersonName != String.Empty)
                {
                    prevPersonName = currRow.PersonName;
                    CreateNewDeclarant(Adapter, currRow);
                    if (CurrentPerson != null)
                    {
                        skipEmptyPerson = false;
                    }
                }
                else if (currRow.RelativeType != String.Empty)
                {
                    if (!skipEmptyPerson)
                    {
                        try
                        {
                            CreateNewRelative(currRow);
                        }
                        catch (SmartParserRelativeWithoutPersonException e)
                        {
                            skipEmptyPerson = true;
                            Logger.Error(e.Message);
                            continue;
                        }
                    }
                }
                else
                {
                    if (CurrentPerson == null && FailOnRelativeOrphan)
                    {
                        skipEmptyPerson = true;
                        Logger.Error(String.Format("No person to attach info on row={0}", row));
                        continue;
                    }
                }
                if (!skipEmptyPerson)
                {
                    AddInputRowToCurrentPerson(columnOrdering, currRow);
                    if (_Declaration.Properties.Year == null && columnOrdering.ContainsField(DeclarationField.IncomeYear))
                    {
                        var incomeYear = currRow.GetDeclarationField(DeclarationField.IncomeYear);
                        if (incomeYear != null)
                        {
                            _Declaration.Properties.Year = int.Parse(incomeYear.Text);
                        }
                    }
                }
            }
            if (updateTrigrams)
            {
                ColumnByDataPredictor.WriteData();
            }

            Logger.Info("Parsed {0} declarants", _Declaration.PublicServants.Count());
        }