public void ColumnDetectorTest1TIAdapter() { string xlsxFile = Path.Combine(TestUtil.GetTestDataPath(), "fsin_2016_extract.xlsx"); //IAdapter adapter = NpoiExcelAdapter.CreateAdapter(xlsxFile); // aspose do not want to read column widthes from this file, use aspose // fix it in the future (is it a bug in Npoi library?). IAdapter adapter = AsposeExcelAdapter.CreateAdapter(xlsxFile); TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter); Assert.AreEqual(ordering.ColumnOrder.Count, 12); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.NameOrRelativeType].BeginColumn == 0); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.Occupation].BeginColumn == 1); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateType].BeginColumn == 2); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateOwnershipType].BeginColumn == 3); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateSquare].BeginColumn == 4); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateCountry].BeginColumn == 5); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertyType].BeginColumn == 6); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertySquare].BeginColumn == 7); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertyCountry].BeginColumn == 8); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.Vehicle].BeginColumn == 9); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.DeclaredYearlyIncome].BeginColumn == 10); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.DataSources].BeginColumn == 11); }
public void RealEstateColumnDetector() { string docxFile = Path.Combine(TestUtil.GetTestDataPath(), "glav_44_2010.doc"); IAdapter adapter = OpenXmlWordAdapter.CreateAdapter(docxFile, -1); TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter); Assert.AreEqual(ordering.ColumnOrder.Count, 9); }
public void SpendingsWrongColumnTest() { string docxFile = Path.Combine(TestUtil.GetTestDataPath(), "82442.doc"); IAdapter adapter = OpenXmlWordAdapter.CreateAdapter(docxFile, -1); TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter); Assert.AreEqual(ordering.ColumnOrder[DeclarationField.DeclaredYearlyIncome].BeginColumn, 1); }
public void TwoRowHeaderEmptyTopCellTest2() { string xlsxFile = Path.Combine(TestUtil.GetTestDataPath(), "customs-tworow-header.xls"); IAdapter adapter = AsposeExcelAdapter.CreateAdapter(xlsxFile); ColumnByDataPredictor.InitializeIfNotAlready(); TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter); Assert.AreEqual(ordering.ColumnOrder.Count, 14); Assert.AreEqual(ordering.ColumnOrder[DeclarationField.Occupation].BeginColumn, 2); }
public void TwoRowHeaderEmptyTopCellTest() { string docxFile = Path.Combine(TestUtil.GetTestDataPath(), "57715.doc"); IAdapter adapter = OpenXmlWordAdapter.CreateAdapter(docxFile, -1); TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter); Assert.AreEqual(ordering.ColumnOrder.Count, 13); Assert.AreEqual(ordering.ColumnOrder[DeclarationField.Vehicle].BeginColumn, 10); Assert.AreEqual(ordering.ColumnOrder[DeclarationField.DeclaredYearlyIncome].BeginColumn, 11); }
public void RedundantColumnDetector() { string docxFile = Path.Combine(TestUtil.GetTestDataPath(), "18664.docx"); IAdapter adapter = OpenXmlWordAdapter.CreateAdapter(docxFile, -1); TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter); Assert.AreEqual(ordering.ColumnOrder.Count, 13); Assert.AreEqual(ordering.ColumnOrder[DeclarationField.AcquiredProperty].BeginColumn, 11); Assert.AreEqual(ordering.ColumnOrder[DeclarationField.MoneySources].BeginColumn, 12); }
public void XlsxTypeCTest() { string xlsxFile = Path.Combine(TestUtil.GetTestDataPath(), "c_sample.xlsx"); IAdapter adapter = AsposeExcelAdapter.CreateAdapter(xlsxFile); var columnOrdering = TableHeaderRecognizer.ExamineTableBeginning(adapter); SmartParser.Lib.Parser parser = new SmartParser.Lib.Parser(adapter); Declaration declaration = parser.Parse(columnOrdering, false, null); string comments = ""; string output = DeclarationSerializer.Serialize(declaration, ref comments); }
public void FixVehicleColumns() { string xlsxFile = Path.Combine(TestUtil.GetTestDataPath(), "17497.xls"); IAdapter adapter = AsposeExcelAdapter.CreateAdapter(xlsxFile, -1); ColumnByDataPredictor.InitializeIfNotAlready(); TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter); Assert.AreEqual(15, ordering.ColumnOrder.Count); Assert.IsTrue(ordering.ContainsField(DeclarationField.VehicleType)); Assert.IsTrue(ordering.ContainsField(DeclarationField.VehicleModel)); Assert.IsFalse(ordering.ContainsField(DeclarationField.Vehicle)); }
public void ColumnDetectorTest1() { string xlsxFile = Path.Combine(TestUtil.GetTestDataPath(), "fsin_2016_extract.xlsx"); IAdapter adapter = AsposeExcelAdapter.CreateAdapter(xlsxFile); TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter); Assert.AreEqual(ordering.ColumnOrder.Count, 12); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.NameOrRelativeType].BeginColumn == 0); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.Occupation].BeginColumn == 1); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateType].BeginColumn == 2); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateOwnershipType].BeginColumn == 3); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateSquare].BeginColumn == 4); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateCountry].BeginColumn == 5); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertyType].BeginColumn == 6); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertySquare].BeginColumn == 7); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertyCountry].BeginColumn == 8); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.Vehicle].BeginColumn == 9); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.DeclaredYearlyIncome].BeginColumn == 10); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.DataSources].BeginColumn == 11); }
public void EmptyRealStateTypeColumnDetectorTest1() { string xlsxFile = Path.Combine(TestUtil.GetTestDataPath(), "rabotniki_podved_organizacii_2013.xlsx"); IAdapter adapter = AsposeExcelAdapter.CreateAdapter(xlsxFile); ColumnByDataPredictor.InitializeIfNotAlready(); TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.DeclarantIndex].BeginColumn == 0); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.NameOrRelativeType].BeginColumn == 1); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.Occupation].BeginColumn == 2); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateType].BeginColumn == 3); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateOwnershipType].BeginColumn == 4); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateSquare].BeginColumn == 5); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateCountry].BeginColumn == 6); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertyType].BeginColumn == 7); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertySquare].BeginColumn == 8); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertyCountry].BeginColumn == 9); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.Vehicle].BeginColumn == 10); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.DeclaredYearlyIncome].BeginColumn == 11); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.DataSources].BeginColumn == 12); }
static Declaration BuildDeclarations(IAdapter adapter, string inputFile) { Declaration declaration; string inputFileName = Path.GetFileName(inputFile); SmartParser.Lib.Parser parser = new SmartParser.Lib.Parser(adapter, !SkipRelativeOrphan); if (adapter.CurrentScheme == default) { SmartParser.Lib.TableHeader?columnOrdering = null; try { columnOrdering = TableHeaderRecognizer.ExamineTableBeginning(adapter); LastGoodOrdering = columnOrdering; } catch (Exception ex) { Logger.Info(ex.Message); if (LastGoodOrdering != null) { Logger.Info("use the last known table header scheme"); columnOrdering = LastGoodOrdering; columnOrdering.FirstDataRow = 0; } else { throw ex; } } // Try to extract declaration year from file name if we weren't able to get it from document title if (!columnOrdering.Year.HasValue) { columnOrdering.Year = TextHelpers.ExtractYear(inputFileName); } Logger.Info("Column ordering: "); foreach (var ordering in columnOrdering.ColumnOrder) { Logger.Info(ordering.ToString()); } Logger.Info(String.Format("OwnershipTypeInSeparateField: {0}", columnOrdering.OwnershipTypeInSeparateField)); if (ColumnsOnly) { return(null); } if (ColumnToDump != DeclarationField.None) { DumpColumn(adapter, columnOrdering, ColumnToDump); return(null); } if (columnOrdering.Title != null) { Logger.Info("Declaration Title: {0} ", columnOrdering.Title); } if (columnOrdering.Year != null) { Logger.Info("Declaration Year: {0} ", columnOrdering.Year.Value); } if (columnOrdering.MinistryName != null) { Logger.Info("Declaration Ministry: {0} ", columnOrdering.MinistryName); } if (!columnOrdering.HasNameColumn()) { // TODO сначала поискать первый section_row и проверить, именно там может быть ФИО // https://declarator.org/admin/declarations/jsonfile/186842/change/ throw new SmartParserException("Insufficient fields: No any of Declarant Name fields found."); } if (!(columnOrdering.ContainsField(DeclarationField.DeclarantIncome) || columnOrdering.ContainsField(DeclarationField.DeclarantIncomeInThousands) || columnOrdering.ContainsField(DeclarationField.DeclaredYearlyIncome) || columnOrdering.ContainsField(DeclarationField.DeclaredYearlyIncomeThousands))) { if (!SmartParser.Lib.TableHeader.SearchForFioColumnOnly) { throw new SmartParserException("Insufficient fields: No any of Declarant Income fields found."); } } declaration = parser.Parse(columnOrdering, BuildTrigrams, UserDocumentFileId); SaveRandomPortionToToloka(adapter, columnOrdering, declaration, inputFile); } else { declaration = adapter.CurrentScheme.Parse(parser, UserDocumentFileId); } return(declaration); }