public void TestCanBePersonName() { string s = "Десятов Владимир Вячеславович,"; ColumnByDataPredictor.InitializeIfNotAlready(); var r = DataRow.CheckPersonName(s); Assert.IsTrue(r); }
public void TwoRowHeaderEmptyTopCellTest2() { string xlsxFile = Path.Combine(TestUtil.GetTestDataPath(), "customs-tworow-header.xls"); IAdapter adapter = AsposeExcelAdapter.CreateAdapter(xlsxFile); ColumnByDataPredictor.InitializeIfNotAlready(); TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter); Assert.AreEqual(ordering.ColumnOrder.Count, 14); Assert.AreEqual(ordering.ColumnOrder[DeclarationField.Occupation].BeginColumn, 2); }
public void FixVehicleColumns() { string xlsxFile = Path.Combine(TestUtil.GetTestDataPath(), "17497.xls"); IAdapter adapter = AsposeExcelAdapter.CreateAdapter(xlsxFile, -1); ColumnByDataPredictor.InitializeIfNotAlready(); TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter); Assert.AreEqual(15, ordering.ColumnOrder.Count); Assert.IsTrue(ordering.ContainsField(DeclarationField.VehicleType)); Assert.IsTrue(ordering.ContainsField(DeclarationField.VehicleModel)); Assert.IsFalse(ordering.ContainsField(DeclarationField.Vehicle)); }
public void EmptyRealStateTypeColumnDetectorTest1() { string xlsxFile = Path.Combine(TestUtil.GetTestDataPath(), "rabotniki_podved_organizacii_2013.xlsx"); IAdapter adapter = AsposeExcelAdapter.CreateAdapter(xlsxFile); ColumnByDataPredictor.InitializeIfNotAlready(); TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.DeclarantIndex].BeginColumn == 0); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.NameOrRelativeType].BeginColumn == 1); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.Occupation].BeginColumn == 2); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateType].BeginColumn == 3); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateOwnershipType].BeginColumn == 4); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateSquare].BeginColumn == 5); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateCountry].BeginColumn == 6); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertyType].BeginColumn == 7); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertySquare].BeginColumn == 8); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertyCountry].BeginColumn == 9); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.Vehicle].BeginColumn == 10); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.DeclaredYearlyIncome].BeginColumn == 11); Assert.IsTrue(ordering.ColumnOrder[DeclarationField.DataSources].BeginColumn == 12); }
public static int Main(string[] args) { string inputFile = ParseArgs(args); Logger.Info("Command line: " + String.Join(" ", args)); if (String.IsNullOrEmpty(inputFile)) { Console.WriteLine("no input file or directory"); return(1); } if (IsDirectory(inputFile)) { return(ParseDirectory(inputFile)); } if (inputFile.Contains("*") || inputFile.Contains("?") || inputFile.StartsWith("@")) { return(ParseByFileMask(inputFile)); } try { Logger.SetOutSecond(); if (OutFile == "") { OutFile = BuildOutFileNameByInput(inputFile); } ParseFile(inputFile, OutFile); } catch (SmartParserException e) { Logger.Error("Parsing Exception " + e.ToString()); } catch (Exception e) { Logger.Error("Unknown Parsing Exception " + e.ToString()); Logger.Info("Stack: " + e.StackTrace); } finally { Logger.SetOutMain(); } if (ColumnByDataPredictor.CalcPrecision) { Logger.Info(ColumnByDataPredictor.GetPrecisionStr()); } if (Logger.Errors.Count() > 0) { Logger.Info("*** Errors ({0}):", Logger.Errors.Count()); foreach (string e in Logger.Errors) { Logger.Info(e); } } return(0); }
public static int ParseFile(string inputFile, string outFile) { if (CheckJson && File.Exists(outFile)) { Logger.Info("JSON file {0} already exist", outFile); return(0); } if (!File.Exists(inputFile)) { Logger.Info("ERROR: {0} file NOT exists", inputFile); return(0); } ColumnByDataPredictor.InitializeIfNotAlready(Program.ColumnTrigramsFileName); string logFile = Path.Combine(Path.GetDirectoryName(inputFile), Path.GetFileName(inputFile) + ".log"); Logger.SetSecondLogFileName(Path.GetFullPath(logFile)); Logger.Info(String.Format("Parsing {0}", inputFile)); IAdapter adapter = GetAdapter(inputFile); Logger.Info(String.Format("TablesCount = {0}", adapter.GetTablesCount())); Logger.Info(String.Format("RowsCount = {0}", adapter.GetRowsCount())); if (adapter.GetTablesCount() == 0 && !inputFile.EndsWith(".toloka_json")) { throw new SmartParserException("No tables found in document"); } if (HtmlFileName != "") { adapter.WriteHtmlFile(HtmlFileName); } if (adapter.GetWorkSheetCount() > 1) { Logger.Info(String.Format("File has multiple ({0}) worksheets", adapter.GetWorkSheetCount())); Declaration allDeclarations = null; for (int sheetIndex = 0; sheetIndex < adapter.GetWorkSheetCount(); sheetIndex++) { adapter.SetCurrentWorksheet(sheetIndex); try { if (DeclarationSerializer.SmartParserJsonFormat == SmartParserJsonFormatEnum.Disclosures) { var sheetDeclarations = BuildDeclarations(adapter, inputFile); if (allDeclarations == null) { allDeclarations = sheetDeclarations; } else { allDeclarations.AddDeclarations(sheetDeclarations); } } else { string curOutFile = outFile.Replace(".json", "_" + sheetIndex.ToString() + ".json"); Logger.Info(String.Format("Parsing worksheet {0} into file {1}", sheetIndex, curOutFile)); WriteOutputJson(inputFile, BuildDeclarations(adapter, inputFile), curOutFile); } } catch (ColumnDetectorException) { Logger.Info(String.Format("Skipping empty sheet {0} (No headers found exception thrown)", sheetIndex)); } if (allDeclarations != null) { WriteOutputJson(inputFile, allDeclarations, outFile); } } } else { WriteOutputJson(inputFile, BuildDeclarations(adapter, inputFile), outFile); } return(0); }
public static int ParseMultipleFiles(IEnumerable <string> files, string outputDir) { var parse_results = new Dictionary <string, List <string> > { { "ok", new List <string>() }, { "error", new List <string>() }, { "too_many_errors", new List <string>() }, { "exception", new List <string>() }, }; foreach (string file in files) { Logger.Info("Parsing file " + file); bool caught = false; try { Logger.SetOutSecond(); ParseFile(file, BuildOutFileNameByInput(file)); } catch (SmartParserException e) { caught = true; Logger.Error("Parsing Exception " + e.ToString()); parse_results["exception"].Add(file); } catch (Exception e) { caught = true; Logger.Error("Parsing Exception " + e.ToString()); Logger.Debug("Stack: " + e.StackTrace); parse_results["exception"].Add(file); } finally { Logger.SetOutMain(); } if (caught) { Logger.Info("Result: Exception"); } if (!caught && Logger.Errors.Any()) { Logger.Info("Result: error"); parse_results["error"].Add(file); } if (!caught && !Logger.Errors.Any()) { Logger.Info("Result: OK"); parse_results["ok"].Add(file); } if (Logger.Errors.Any()) { Logger.Info(" Parsing errors ({0})", Logger.Errors.Count()); foreach (string e in Logger.Errors) { Logger.Info(e); } } } Logger.Info("Parsing Results:"); foreach (var key_value in parse_results) { Logger.Info("Result: {0} ({1})", key_value.Key, key_value.Value.Count()); foreach (string file in key_value.Value) { Logger.Info(file); } } if (Logger.UnknownRealEstate.Count() > 0) { Logger.Info("UnknownRealEstate.Count: {0}", Logger.UnknownRealEstate.Count()); string content = string.Join("\n", Logger.UnknownRealEstate); string dictfile = Path.Combine(outputDir, "UnknownRealEstate.txt"); File.WriteAllText(dictfile, content); Logger.Info("Output UnknownRealEstate to file {0}", dictfile); } if (ColumnByDataPredictor.CalcPrecision) { Logger.Info(ColumnByDataPredictor.GetPrecisionStr()); } return(0); }