コード例 #1
0
        public ExtractText(string fileName, bool formatted)
        {
            //ExStart:ExtractText
            int linesPerPage         = Console.WindowHeight;
            ExtractorFactory factory = new ExtractorFactory();

            TextExtractor extractor = formatted
                ? factory.CreateFormattedTextExtractor(fileName)
                : factory.CreateTextExtractor(fileName);

            if (extractor == null)
            {
                Console.WriteLine("The document's format is not supported");
                return;
            }

            try
            {
                string line = null;
                do
                {
                    Console.Clear();
                    Console.WriteLine("{0}", fileName);

                    int lineNumber = 0;
                    do
                    {
                        line = extractor.ExtractLine();
                        lineNumber++;
                        if (line != null)
                        {
                            Console.WriteLine(line);
                        }
                    }while (line != null && lineNumber < linesPerPage);

                    Console.WriteLine();
                    Console.WriteLine("Press Esc to exit or any other key to move to the next page");
                }while (line != null && Console.ReadKey().Key != ConsoleKey.Escape);
            }
            finally
            {
                extractor.Dispose();
            }
            //ExEnd:ExtractText
        }
        private async Task <Response> ParseFileText(string fileName, string folderName)
        {
            string logMsg = "ControllerName: GroupDocsParserController FileName: " + fileName + " FolderName: " + folderName;

            try
            {
                return(await ProcessTask(fileName, folderName, ".txt", false, "", delegate(string inFilePath, string outPath, string zipOutFolder)
                {
                    EncodingDetector detector = new EncodingDetector(Encoding.GetEncoding(1251));

                    if (!Directory.Exists(zipOutFolder))
                    {
                        Directory.CreateDirectory(zipOutFolder);
                    }

                    using (Stream stream = new FileStream(inFilePath, FileMode.Open)) {
                        System.IO.File.WriteAllText(outPath, "Encoding: " + detector.Detect(stream, true) + Environment.NewLine);
                    }

                    ExtractorFactory factory = new ExtractorFactory();
                    MetadataExtractor metadataExtractor = factory.CreateMetadataExtractor(inFilePath);
                    if (metadataExtractor != null)
                    {
                        MetadataCollection metadataCollection = metadataExtractor.ExtractMetadata(inFilePath);

                        System.IO.File.AppendAllText(outPath, Environment.NewLine + "Metadata:" + Environment.NewLine);
                        foreach (string key in metadataCollection.Keys)
                        {
                            System.IO.File.AppendAllText(outPath, string.Format("{0} = {1}", key, metadataCollection[key]) + Environment.NewLine);
                        }
                    }

                    System.IO.File.AppendAllText(outPath, Environment.NewLine + "Parsed content:" + Environment.NewLine);

                    string fileExt = Path.GetExtension(fileName).Substring(1).ToLower();
                    if (GetFormatType(fileExt) == FormatType.Excel)
                    {
                        CellsTextExtractor extractor = new CellsTextExtractor(inFilePath);
                        extractor.ExtractMode = ExtractMode.Standard;
                        for (int sheetIndex = 0; sheetIndex < extractor.SheetCount; sheetIndex++)
                        {
                            System.IO.File.AppendAllText(outPath, Environment.NewLine + "Sheet # " + extractor.SheetCount + Environment.NewLine);
                            System.IO.File.AppendAllText(outPath, extractor.ExtractSheet(sheetIndex));
                        }
                    }
                    else
                    {
                        TextExtractor textExtractor = factory.CreateFormattedTextExtractor(inFilePath);
                        if (textExtractor == null)
                        {
                            textExtractor = factory.CreateTextExtractor(inFilePath);
                        }
                        System.IO.File.AppendAllText(outPath, textExtractor.ExtractAll());
                    }
                }));
            }
            catch (Exception exc)
            {
                return(new Response {
                    FileName = fileName, FolderName = folderName, OutputType = "txt", Status = exc.Message, StatusCode = 500, Text = exc.ToString()
                });
            }
        }