Esempio n. 1
0
 static void FixBadColumnName02(TableHeader c)
 {
     //move NameAndOccupationOrRelativeType  to NameOrRelativeType if Occupation  is present
     if (c.ContainsField(DeclarationField.NameAndOccupationOrRelativeType) &&
         c.ContainsField(DeclarationField.Occupation)
         )
     {
         TColumnInfo s = c.ColumnOrder[DeclarationField.NameAndOccupationOrRelativeType];
         s.Field = DeclarationField.NameOrRelativeType;
         c.Add(s);
         c.Delete(DeclarationField.NameAndOccupationOrRelativeType);
     }
 }
Esempio n. 2
0
 static void FixBadColumnName01_Template(TableHeader c, DeclarationField naturalText, DeclarationField country, DeclarationField square, DeclarationField type)
 {
     //move MixedColumnWithNaturalText  to MixedRealEstateType
     if (!c.ContainsField(naturalText))
     {
         return;
     }
     if (c.ContainsField(country) &&
         c.ContainsField(square)
         )
     {
         TColumnInfo s = c.ColumnOrder[naturalText];
         s.Field = type;
         c.Add(s);
         c.Delete(naturalText);
     }
 }
Esempio n. 3
0
        static void FixMissingSubheadersForVehicle(IAdapter adapter, TableHeader columnOrdering)
        {
            if (!columnOrdering.ContainsField(DeclarationField.Vehicle))
            {
                return;
            }

            TColumnInfo dummy;
            var         headerCell = adapter.GetDeclarationFieldWeak(columnOrdering, columnOrdering.HeaderBegin.Value, DeclarationField.Vehicle, out dummy);

            if (headerCell.MergedColsCount != 2)
            {
                return;
            }

            var subCells = FindSubcellsUnder(adapter, headerCell);

            if (subCells.Count == 1)
            {
                return;
            }

            string cleanHeader = headerCell.Text.ToLower().Replace(" ", "");

            if (cleanHeader.Contains("транспортныесредства") && cleanHeader.Contains("марка") && cleanHeader.Contains("вид"))
            {
                TColumnInfo columnVehicleType = new TColumnInfo();
                columnVehicleType.BeginColumn      = headerCell.Col;
                columnVehicleType.EndColumn        = headerCell.Col + 1;
                columnVehicleType.ColumnPixelWidth = headerCell.CellWidth / 2;
                columnVehicleType.Field            = DeclarationField.VehicleType;
                columnOrdering.Add(columnVehicleType);

                TColumnInfo columnVehicleModel = new TColumnInfo();
                columnVehicleModel.BeginColumn      = headerCell.Col + 1;
                columnVehicleModel.EndColumn        = headerCell.Col + 2;
                columnVehicleModel.ColumnPixelWidth = headerCell.CellWidth / 2;
                columnVehicleModel.Field            = DeclarationField.VehicleModel;
                columnOrdering.Add(columnVehicleModel);

                columnOrdering.Delete(DeclarationField.Vehicle);
            }
        }
Esempio n. 4
0
        static void FixMissingSubheadersForMergedColumns(IAdapter adapter, TableHeader columnOrdering,
                                                         DeclarationField mergedField, DeclarationField[] subColumns)
        {
            if (!columnOrdering.ContainsField(mergedField))
            {
                return;
            }
            TColumnInfo dummy;
            var         headerCell = adapter.GetDeclarationFieldWeak(columnOrdering, columnOrdering.HeaderBegin.Value, mergedField, out dummy);
            var         subCells   = FindSubcellsUnder(adapter, headerCell);

            // we check only the  second column, todo check the  first one and  the third
            if (subCells.Count != subColumns.Count() || !CheckSquareColumn(adapter, columnOrdering.FirstDataRow, 5, subCells, 1))
            {
                return;
            }
            for (int i = 0; i < subColumns.Count(); ++i)
            {
                AddColumn(columnOrdering, subColumns[i], subCells[i]);
            }
            columnOrdering.Delete(mergedField);
        }
Esempio n. 5
0
        public void FindBordersAndPersonNames(TableHeader columnOrdering, bool updateTrigrams)
        {
            int rowOffset = columnOrdering.FirstDataRow;

            if (columnOrdering.Section != null)
            {
                CreateNewSection(rowOffset, columnOrdering.Section);
            }

            bool   skipEmptyPerson = false;
            string prevPersonName  = "";

            for (int row = rowOffset; row < Adapter.GetRowsCount(); row++)
            {
                DataRow currRow = Adapter.GetRow(columnOrdering, row);
                if (currRow == null || currRow.IsEmpty())
                {
                    continue;
                }
                if (IAdapter.IsNumbersRow(currRow.Cells))
                {
                    continue;
                }
                Logger.Debug(String.Format("currRow {0}, col_count={1}: {2}", row, currRow.Cells.Count, currRow.DebugString()));

                string sectionName;
                if (Adapter.IsSectionRow(row, currRow.Cells, columnOrdering.GetMaxColumnEndIndex(), false, out sectionName))
                {
                    CreateNewSection(row, sectionName);
                    continue;
                }
                {
                    TableHeader newColumnOrdering;
                    if (IsHeaderRow(currRow, out newColumnOrdering))
                    {
                        columnOrdering = newColumnOrdering;
                        Logger.Debug(String.Format("found a new table header {0}", currRow.DebugString()));
                        row = newColumnOrdering.GetPossibleHeaderEnd() - 1; // row++ in "for" cycle
                        continue;
                    }
                }

                if (updateTrigrams)
                {
                    ColumnByDataPredictor.UpdateByRow(columnOrdering, currRow);
                }

                if (!currRow.InitPersonData(prevPersonName))
                {
                    // be robust, ignore errors see 8562.pdf.docx in tests
                    continue;
                }

                if (currRow.PersonName != String.Empty)
                {
                    prevPersonName = currRow.PersonName;
                    CreateNewDeclarant(Adapter, currRow);
                    if (CurrentPerson != null)
                    {
                        skipEmptyPerson = false;
                    }
                }
                else if (currRow.RelativeType != String.Empty)
                {
                    if (!skipEmptyPerson)
                    {
                        try
                        {
                            CreateNewRelative(currRow);
                        }
                        catch (SmartParserRelativeWithoutPersonException e)
                        {
                            skipEmptyPerson = true;
                            Logger.Error(e.Message);
                            continue;
                        }
                    }
                }
                else
                {
                    if (CurrentPerson == null && FailOnRelativeOrphan)
                    {
                        skipEmptyPerson = true;
                        Logger.Error(String.Format("No person to attach info on row={0}", row));
                        continue;
                    }
                }
                if (!skipEmptyPerson)
                {
                    AddInputRowToCurrentPerson(columnOrdering, currRow);
                    if (_Declaration.Properties.Year == null && columnOrdering.ContainsField(DeclarationField.IncomeYear))
                    {
                        var incomeYear = currRow.GetDeclarationField(DeclarationField.IncomeYear);
                        if (incomeYear != null)
                        {
                            _Declaration.Properties.Year = int.Parse(incomeYear.Text);
                        }
                    }
                }
            }
            if (updateTrigrams)
            {
                ColumnByDataPredictor.WriteData();
            }

            Logger.Info("Parsed {0} declarants", _Declaration.PublicServants.Count());
        }
Esempio n. 6
0
        //  see 8562.pdf.docx  in tests
        //  calc string width using graphics.MeasureString methods
        bool DivideDeclarantAndRelativesBySoftEolns(TableHeader columnOrdering, DataRow row)
        {
            if (CurrentDeclarant.Relatives.Count() > 0)
            {
                return(false);
            }
            if (!columnOrdering.ContainsField(DeclarationField.NameOrRelativeType))
            {
                return(false);
            }
            Cell nameCell = row.GetDeclarationField(DeclarationField.NameOrRelativeType);

            if (!(nameCell is OpenXmlWordCell) && !(nameCell is HtmlAdapterCell))
            {
                return(false);
            }
            if (nameCell is null)
            {
                return(false);
            }
            if (nameCell.IsEmpty)
            {
                return(false);
            }
            if (row.adapter.IsExcel())
            {
                return(false);                       // no font info
            }
            List <string> lines = GetLinesWithSoftBreaks(nameCell);

            if (lines.Count < 2)
            {
                return(false);
            }
            List <int> borders = new List <int>()
            {
                0
            };

            for (int i = 1; i < lines.Count; ++i)
            {
                if (DataHelper.ParseRelationType(lines[i], false) != RelationType.Error)
                {
                    borders.Add(i);
                }
            }
            if (borders.Count == 1)
            {
                return(false);
            }
            List <DataRow> dividedLines = new List <DataRow>();

            for (int i = 0; i < borders.Count; ++i)
            {
                dividedLines.Add(row.DeepClone());
            }
            for (int i = 0; i < row.Cells.Count; ++i)
            {
                DivideCell(row, i, borders, dividedLines);
            }
            for (int k = 0; k < borders.Count; ++k)
            {
                if (!DividedLinesToDataRows(row, dividedLines, k))
                {
                    return(false);
                }
            }
            Logger.Debug(String.Format("Divide line to {0} parts", borders.Count()));
            return(true);
        }