Exemplo n.º 1
0
        public void ColumnDetectorTest1TIAdapter()
        {
            string xlsxFile = Path.Combine(TestUtil.GetTestDataPath(), "fsin_2016_extract.xlsx");

            //IAdapter adapter = NpoiExcelAdapter.CreateAdapter(xlsxFile);
            // aspose do not want to read column widthes from this file, use aspose
            // fix it in the future (is it a bug in Npoi library?).

            IAdapter adapter = AsposeExcelAdapter.CreateAdapter(xlsxFile);

            TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter);

            Assert.AreEqual(ordering.ColumnOrder.Count, 12);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.NameOrRelativeType].BeginColumn == 0);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.Occupation].BeginColumn == 1);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateType].BeginColumn == 2);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateOwnershipType].BeginColumn == 3);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateSquare].BeginColumn == 4);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateCountry].BeginColumn == 5);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertyType].BeginColumn == 6);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertySquare].BeginColumn == 7);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertyCountry].BeginColumn == 8);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.Vehicle].BeginColumn == 9);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.DeclaredYearlyIncome].BeginColumn == 10);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.DataSources].BeginColumn == 11);
        }
Exemplo n.º 2
0
        public void RealEstateColumnDetector()
        {
            string   docxFile = Path.Combine(TestUtil.GetTestDataPath(), "glav_44_2010.doc");
            IAdapter adapter  = OpenXmlWordAdapter.CreateAdapter(docxFile, -1);

            TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter);

            Assert.AreEqual(ordering.ColumnOrder.Count, 9);
        }
Exemplo n.º 3
0
        public void SpendingsWrongColumnTest()
        {
            string   docxFile = Path.Combine(TestUtil.GetTestDataPath(), "82442.doc");
            IAdapter adapter  = OpenXmlWordAdapter.CreateAdapter(docxFile, -1);

            TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter);

            Assert.AreEqual(ordering.ColumnOrder[DeclarationField.DeclaredYearlyIncome].BeginColumn, 1);
        }
Exemplo n.º 4
0
        public void TwoRowHeaderEmptyTopCellTest2()
        {
            string   xlsxFile = Path.Combine(TestUtil.GetTestDataPath(), "customs-tworow-header.xls");
            IAdapter adapter  = AsposeExcelAdapter.CreateAdapter(xlsxFile);

            ColumnByDataPredictor.InitializeIfNotAlready();
            TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter);

            Assert.AreEqual(ordering.ColumnOrder.Count, 14);
            Assert.AreEqual(ordering.ColumnOrder[DeclarationField.Occupation].BeginColumn, 2);
        }
Exemplo n.º 5
0
        public void TwoRowHeaderEmptyTopCellTest()
        {
            string   docxFile = Path.Combine(TestUtil.GetTestDataPath(), "57715.doc");
            IAdapter adapter  = OpenXmlWordAdapter.CreateAdapter(docxFile, -1);

            TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter);

            Assert.AreEqual(ordering.ColumnOrder.Count, 13);
            Assert.AreEqual(ordering.ColumnOrder[DeclarationField.Vehicle].BeginColumn, 10);
            Assert.AreEqual(ordering.ColumnOrder[DeclarationField.DeclaredYearlyIncome].BeginColumn, 11);
        }
Exemplo n.º 6
0
        public void RedundantColumnDetector()
        {
            string   docxFile = Path.Combine(TestUtil.GetTestDataPath(), "18664.docx");
            IAdapter adapter  = OpenXmlWordAdapter.CreateAdapter(docxFile, -1);

            TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter);

            Assert.AreEqual(ordering.ColumnOrder.Count, 13);
            Assert.AreEqual(ordering.ColumnOrder[DeclarationField.AcquiredProperty].BeginColumn, 11);
            Assert.AreEqual(ordering.ColumnOrder[DeclarationField.MoneySources].BeginColumn, 12);
        }
Exemplo n.º 7
0
        public void XlsxTypeCTest()
        {
            string   xlsxFile       = Path.Combine(TestUtil.GetTestDataPath(), "c_sample.xlsx");
            IAdapter adapter        = AsposeExcelAdapter.CreateAdapter(xlsxFile);
            var      columnOrdering = TableHeaderRecognizer.ExamineTableBeginning(adapter);

            SmartParser.Lib.Parser parser      = new SmartParser.Lib.Parser(adapter);
            Declaration            declaration = parser.Parse(columnOrdering, false, null);
            string comments = "";
            string output   = DeclarationSerializer.Serialize(declaration, ref comments);
        }
Exemplo n.º 8
0
        public void FixVehicleColumns()
        {
            string   xlsxFile = Path.Combine(TestUtil.GetTestDataPath(), "17497.xls");
            IAdapter adapter  = AsposeExcelAdapter.CreateAdapter(xlsxFile, -1);

            ColumnByDataPredictor.InitializeIfNotAlready();

            TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter);

            Assert.AreEqual(15, ordering.ColumnOrder.Count);
            Assert.IsTrue(ordering.ContainsField(DeclarationField.VehicleType));
            Assert.IsTrue(ordering.ContainsField(DeclarationField.VehicleModel));
            Assert.IsFalse(ordering.ContainsField(DeclarationField.Vehicle));
        }
Exemplo n.º 9
0
        public void ColumnDetectorTest1()
        {
            string   xlsxFile = Path.Combine(TestUtil.GetTestDataPath(), "fsin_2016_extract.xlsx");
            IAdapter adapter  = AsposeExcelAdapter.CreateAdapter(xlsxFile);

            TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter);

            Assert.AreEqual(ordering.ColumnOrder.Count, 12);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.NameOrRelativeType].BeginColumn == 0);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.Occupation].BeginColumn == 1);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateType].BeginColumn == 2);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateOwnershipType].BeginColumn == 3);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateSquare].BeginColumn == 4);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateCountry].BeginColumn == 5);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertyType].BeginColumn == 6);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertySquare].BeginColumn == 7);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertyCountry].BeginColumn == 8);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.Vehicle].BeginColumn == 9);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.DeclaredYearlyIncome].BeginColumn == 10);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.DataSources].BeginColumn == 11);
        }
Exemplo n.º 10
0
        public void EmptyRealStateTypeColumnDetectorTest1()
        {
            string   xlsxFile = Path.Combine(TestUtil.GetTestDataPath(), "rabotniki_podved_organizacii_2013.xlsx");
            IAdapter adapter  = AsposeExcelAdapter.CreateAdapter(xlsxFile);

            ColumnByDataPredictor.InitializeIfNotAlready();
            TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter);

            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.DeclarantIndex].BeginColumn == 0);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.NameOrRelativeType].BeginColumn == 1);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.Occupation].BeginColumn == 2);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateType].BeginColumn == 3);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateOwnershipType].BeginColumn == 4);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateSquare].BeginColumn == 5);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateCountry].BeginColumn == 6);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertyType].BeginColumn == 7);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertySquare].BeginColumn == 8);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertyCountry].BeginColumn == 9);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.Vehicle].BeginColumn == 10);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.DeclaredYearlyIncome].BeginColumn == 11);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.DataSources].BeginColumn == 12);
        }
Exemplo n.º 11
0
        static Declaration BuildDeclarations(IAdapter adapter, string inputFile)
        {
            Declaration declaration;
            string      inputFileName = Path.GetFileName(inputFile);

            SmartParser.Lib.Parser parser = new SmartParser.Lib.Parser(adapter, !SkipRelativeOrphan);

            if (adapter.CurrentScheme == default)
            {
                SmartParser.Lib.TableHeader?columnOrdering = null;
                try
                {
                    columnOrdering   = TableHeaderRecognizer.ExamineTableBeginning(adapter);
                    LastGoodOrdering = columnOrdering;
                }
                catch (Exception ex)
                {
                    Logger.Info(ex.Message);
                    if (LastGoodOrdering != null)
                    {
                        Logger.Info("use the last known table header scheme");
                        columnOrdering = LastGoodOrdering;
                        columnOrdering.FirstDataRow = 0;
                    }
                    else
                    {
                        throw ex;
                    }
                }

                // Try to extract declaration year from file name if we weren't able to get it from document title
                if (!columnOrdering.Year.HasValue)
                {
                    columnOrdering.Year = TextHelpers.ExtractYear(inputFileName);
                }

                Logger.Info("Column ordering: ");
                foreach (var ordering in columnOrdering.ColumnOrder)
                {
                    Logger.Info(ordering.ToString());
                }

                Logger.Info(String.Format("OwnershipTypeInSeparateField: {0}",
                                          columnOrdering.OwnershipTypeInSeparateField));

                if (ColumnsOnly)
                {
                    return(null);
                }

                if (ColumnToDump != DeclarationField.None)
                {
                    DumpColumn(adapter, columnOrdering, ColumnToDump);
                    return(null);
                }

                if (columnOrdering.Title != null)
                {
                    Logger.Info("Declaration Title: {0} ", columnOrdering.Title);
                }

                if (columnOrdering.Year != null)
                {
                    Logger.Info("Declaration Year: {0} ", columnOrdering.Year.Value);
                }

                if (columnOrdering.MinistryName != null)
                {
                    Logger.Info("Declaration Ministry: {0} ", columnOrdering.MinistryName);
                }


                if (!columnOrdering.HasNameColumn())
                {
                    // TODO сначала поискать первый section_row и проверить, именно там может быть ФИО
                    // https://declarator.org/admin/declarations/jsonfile/186842/change/
                    throw new SmartParserException("Insufficient fields: No any of Declarant Name fields found.");
                }

                if (!(columnOrdering.ContainsField(DeclarationField.DeclarantIncome) ||
                      columnOrdering.ContainsField(DeclarationField.DeclarantIncomeInThousands) ||
                      columnOrdering.ContainsField(DeclarationField.DeclaredYearlyIncome) ||
                      columnOrdering.ContainsField(DeclarationField.DeclaredYearlyIncomeThousands)))
                {
                    if (!SmartParser.Lib.TableHeader.SearchForFioColumnOnly)
                    {
                        throw new SmartParserException("Insufficient fields: No any of Declarant Income fields found.");
                    }
                }

                declaration = parser.Parse(columnOrdering, BuildTrigrams, UserDocumentFileId);
                SaveRandomPortionToToloka(adapter, columnOrdering, declaration, inputFile);
            }
            else
            {
                declaration = adapter.CurrentScheme.Parse(parser, UserDocumentFileId);
            }
            return(declaration);
        }