Beispiel #1
0
        public static int ParseFile(string inputFile, string outFile)
        {
            if (CheckJson && File.Exists(outFile))
            {
                Logger.Info("JSON file {0} already exist", outFile);
                return(0);
            }

            if (!File.Exists(inputFile))
            {
                Logger.Info("ERROR: {0} file NOT exists", inputFile);
                return(0);
            }


            ColumnPredictor.InitializeIfNotAlready();

            string logFile = Path.Combine(Path.GetDirectoryName(inputFile),
                                          Path.GetFileName(inputFile) + ".log");

            Logger.SetSecondLogFileName(Path.GetFullPath(logFile));

            Logger.Info(String.Format("Parsing {0}", inputFile));
            IAdapter adapter = GetAdapter(inputFile);

            Logger.Info(String.Format("TablesCount = {0}", adapter.GetTablesCount()));
            Logger.Info(String.Format("RowsCount = {0}", adapter.GetRowsCount()));

            if (adapter.GetTablesCount() == 0 && !inputFile.EndsWith(".toloka_json"))
            {
                throw new SmartParserException("No tables found in document");
            }

            if (HtmlFileName != "")
            {
                adapter.WriteHtmlFile(HtmlFileName);
            }

            if (adapter.GetWorkSheetCount() > 1)
            {
                Logger.Info(String.Format("File has multiple ({0}) worksheets", adapter.GetWorkSheetCount()));
                Declaration allDeclarations = null;
                for (int sheetIndex = 0; sheetIndex < adapter.GetWorkSheetCount(); sheetIndex++)
                {
                    adapter.SetCurrentWorksheet(sheetIndex);
                    try
                    {
                        if (DeclarationSerializer.SmartParserJsonFormat == SmartParserJsonFormatEnum.Disclosures)
                        {
                            var sheetDeclarations = BuildDeclarations(adapter, inputFile);
                            if (allDeclarations == null)
                            {
                                allDeclarations = sheetDeclarations;
                            }
                            else
                            {
                                allDeclarations.AddDeclarations(sheetDeclarations);
                            }
                        }
                        else
                        {
                            string curOutFile = outFile.Replace(".json", "_" + sheetIndex.ToString() + ".json");
                            Logger.Info(String.Format("Parsing worksheet {0} into file {1}", sheetIndex, curOutFile));
                            WriteOutputJson(inputFile, BuildDeclarations(adapter, inputFile), curOutFile);
                        }
                    }
                    catch (ColumnDetectorException)
                    {
                        Logger.Info(String.Format("Skipping empty sheet {0} (No headers found exception thrown)",
                                                  sheetIndex));
                    }
                    if (allDeclarations != null)
                    {
                        WriteOutputJson(inputFile, allDeclarations, outFile);
                    }
                }
            }
            else
            {
                WriteOutputJson(inputFile, BuildDeclarations(adapter, inputFile), outFile);
            }

            return(0);
        }