コード例 #1
0
        public void TestCanBePersonName()
        {
            string s = "Десятов Владимир  Вячеславович,";

            ColumnByDataPredictor.InitializeIfNotAlready();
            var r = DataRow.CheckPersonName(s);

            Assert.IsTrue(r);
        }
コード例 #2
0
        public void TwoRowHeaderEmptyTopCellTest2()
        {
            string   xlsxFile = Path.Combine(TestUtil.GetTestDataPath(), "customs-tworow-header.xls");
            IAdapter adapter  = AsposeExcelAdapter.CreateAdapter(xlsxFile);

            ColumnByDataPredictor.InitializeIfNotAlready();
            TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter);

            Assert.AreEqual(ordering.ColumnOrder.Count, 14);
            Assert.AreEqual(ordering.ColumnOrder[DeclarationField.Occupation].BeginColumn, 2);
        }
コード例 #3
0
        public void FixVehicleColumns()
        {
            string   xlsxFile = Path.Combine(TestUtil.GetTestDataPath(), "17497.xls");
            IAdapter adapter  = AsposeExcelAdapter.CreateAdapter(xlsxFile, -1);

            ColumnByDataPredictor.InitializeIfNotAlready();

            TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter);

            Assert.AreEqual(15, ordering.ColumnOrder.Count);
            Assert.IsTrue(ordering.ContainsField(DeclarationField.VehicleType));
            Assert.IsTrue(ordering.ContainsField(DeclarationField.VehicleModel));
            Assert.IsFalse(ordering.ContainsField(DeclarationField.Vehicle));
        }
コード例 #4
0
        public void EmptyRealStateTypeColumnDetectorTest1()
        {
            string   xlsxFile = Path.Combine(TestUtil.GetTestDataPath(), "rabotniki_podved_organizacii_2013.xlsx");
            IAdapter adapter  = AsposeExcelAdapter.CreateAdapter(xlsxFile);

            ColumnByDataPredictor.InitializeIfNotAlready();
            TableHeader ordering = TableHeaderRecognizer.ExamineTableBeginning(adapter);

            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.DeclarantIndex].BeginColumn == 0);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.NameOrRelativeType].BeginColumn == 1);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.Occupation].BeginColumn == 2);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateType].BeginColumn == 3);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateOwnershipType].BeginColumn == 4);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateSquare].BeginColumn == 5);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.OwnedRealEstateCountry].BeginColumn == 6);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertyType].BeginColumn == 7);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertySquare].BeginColumn == 8);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.StatePropertyCountry].BeginColumn == 9);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.Vehicle].BeginColumn == 10);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.DeclaredYearlyIncome].BeginColumn == 11);
            Assert.IsTrue(ordering.ColumnOrder[DeclarationField.DataSources].BeginColumn == 12);
        }
コード例 #5
0
        public static int Main(string[] args)
        {
            string inputFile = ParseArgs(args);

            Logger.Info("Command line: " + String.Join(" ", args));
            if (String.IsNullOrEmpty(inputFile))
            {
                Console.WriteLine("no input file or directory");
                return(1);
            }


            if (IsDirectory(inputFile))
            {
                return(ParseDirectory(inputFile));
            }

            if (inputFile.Contains("*") || inputFile.Contains("?") || inputFile.StartsWith("@"))
            {
                return(ParseByFileMask(inputFile));
            }

            try
            {
                Logger.SetOutSecond();
                if (OutFile == "")
                {
                    OutFile = BuildOutFileNameByInput(inputFile);
                }

                ParseFile(inputFile, OutFile);
            }
            catch (SmartParserException e)
            {
                Logger.Error("Parsing Exception " + e.ToString());
            }
            catch (Exception e)
            {
                Logger.Error("Unknown Parsing Exception " + e.ToString());
                Logger.Info("Stack: " + e.StackTrace);
            }
            finally
            {
                Logger.SetOutMain();
            }

            if (ColumnByDataPredictor.CalcPrecision)
            {
                Logger.Info(ColumnByDataPredictor.GetPrecisionStr());
            }

            if (Logger.Errors.Count() > 0)
            {
                Logger.Info("*** Errors ({0}):", Logger.Errors.Count());

                foreach (string e in Logger.Errors)
                {
                    Logger.Info(e);
                }
            }

            return(0);
        }
コード例 #6
0
        public static int ParseFile(string inputFile, string outFile)
        {
            if (CheckJson && File.Exists(outFile))
            {
                Logger.Info("JSON file {0} already exist", outFile);
                return(0);
            }

            if (!File.Exists(inputFile))
            {
                Logger.Info("ERROR: {0} file NOT exists", inputFile);
                return(0);
            }


            ColumnByDataPredictor.InitializeIfNotAlready(Program.ColumnTrigramsFileName);

            string logFile = Path.Combine(Path.GetDirectoryName(inputFile),
                                          Path.GetFileName(inputFile) + ".log");

            Logger.SetSecondLogFileName(Path.GetFullPath(logFile));

            Logger.Info(String.Format("Parsing {0}", inputFile));
            IAdapter adapter = GetAdapter(inputFile);

            Logger.Info(String.Format("TablesCount = {0}", adapter.GetTablesCount()));
            Logger.Info(String.Format("RowsCount = {0}", adapter.GetRowsCount()));

            if (adapter.GetTablesCount() == 0 && !inputFile.EndsWith(".toloka_json"))
            {
                throw new SmartParserException("No tables found in document");
            }

            if (HtmlFileName != "")
            {
                adapter.WriteHtmlFile(HtmlFileName);
            }

            if (adapter.GetWorkSheetCount() > 1)
            {
                Logger.Info(String.Format("File has multiple ({0}) worksheets", adapter.GetWorkSheetCount()));
                Declaration allDeclarations = null;
                for (int sheetIndex = 0; sheetIndex < adapter.GetWorkSheetCount(); sheetIndex++)
                {
                    adapter.SetCurrentWorksheet(sheetIndex);
                    try
                    {
                        if (DeclarationSerializer.SmartParserJsonFormat == SmartParserJsonFormatEnum.Disclosures)
                        {
                            var sheetDeclarations = BuildDeclarations(adapter, inputFile);
                            if (allDeclarations == null)
                            {
                                allDeclarations = sheetDeclarations;
                            }
                            else
                            {
                                allDeclarations.AddDeclarations(sheetDeclarations);
                            }
                        }
                        else
                        {
                            string curOutFile = outFile.Replace(".json", "_" + sheetIndex.ToString() + ".json");
                            Logger.Info(String.Format("Parsing worksheet {0} into file {1}", sheetIndex, curOutFile));
                            WriteOutputJson(inputFile, BuildDeclarations(adapter, inputFile), curOutFile);
                        }
                    }
                    catch (ColumnDetectorException)
                    {
                        Logger.Info(String.Format("Skipping empty sheet {0} (No headers found exception thrown)",
                                                  sheetIndex));
                    }
                    if (allDeclarations != null)
                    {
                        WriteOutputJson(inputFile, allDeclarations, outFile);
                    }
                }
            }
            else
            {
                WriteOutputJson(inputFile, BuildDeclarations(adapter, inputFile), outFile);
            }

            return(0);
        }
コード例 #7
0
        public static int ParseMultipleFiles(IEnumerable <string> files, string outputDir)
        {
            var parse_results = new Dictionary <string, List <string> >
            {
                { "ok", new List <string>() },
                { "error", new List <string>() },
                { "too_many_errors", new List <string>() },
                { "exception", new List <string>() },
            };

            foreach (string file in files)
            {
                Logger.Info("Parsing file " + file);
                bool caught = false;
                try
                {
                    Logger.SetOutSecond();
                    ParseFile(file, BuildOutFileNameByInput(file));
                }
                catch (SmartParserException e)
                {
                    caught = true;
                    Logger.Error("Parsing Exception " + e.ToString());
                    parse_results["exception"].Add(file);
                }
                catch (Exception e)
                {
                    caught = true;
                    Logger.Error("Parsing Exception " + e.ToString());
                    Logger.Debug("Stack: " + e.StackTrace);
                    parse_results["exception"].Add(file);
                }
                finally
                {
                    Logger.SetOutMain();
                }

                if (caught)
                {
                    Logger.Info("Result: Exception");
                }

                if (!caught && Logger.Errors.Any())
                {
                    Logger.Info("Result: error");
                    parse_results["error"].Add(file);
                }

                if (!caught && !Logger.Errors.Any())
                {
                    Logger.Info("Result: OK");
                    parse_results["ok"].Add(file);
                }

                if (Logger.Errors.Any())
                {
                    Logger.Info(" Parsing errors ({0})", Logger.Errors.Count());

                    foreach (string e in Logger.Errors)
                    {
                        Logger.Info(e);
                    }
                }
            }

            Logger.Info("Parsing Results:");

            foreach (var key_value in parse_results)
            {
                Logger.Info("Result: {0} ({1})", key_value.Key, key_value.Value.Count());
                foreach (string file in key_value.Value)
                {
                    Logger.Info(file);
                }
            }

            if (Logger.UnknownRealEstate.Count() > 0)
            {
                Logger.Info("UnknownRealEstate.Count: {0}", Logger.UnknownRealEstate.Count());
                string content  = string.Join("\n", Logger.UnknownRealEstate);
                string dictfile = Path.Combine(outputDir, "UnknownRealEstate.txt");
                File.WriteAllText(dictfile, content);
                Logger.Info("Output UnknownRealEstate to file {0}", dictfile);
            }

            if (ColumnByDataPredictor.CalcPrecision)
            {
                Logger.Info(ColumnByDataPredictor.GetPrecisionStr());
            }

            return(0);
        }