public ExtractText(string fileName, bool formatted) { //ExStart:ExtractText int linesPerPage = Console.WindowHeight; ExtractorFactory factory = new ExtractorFactory(); TextExtractor extractor = formatted ? factory.CreateFormattedTextExtractor(fileName) : factory.CreateTextExtractor(fileName); if (extractor == null) { Console.WriteLine("The document's format is not supported"); return; } try { string line = null; do { Console.Clear(); Console.WriteLine("{0}", fileName); int lineNumber = 0; do { line = extractor.ExtractLine(); lineNumber++; if (line != null) { Console.WriteLine(line); } }while (line != null && lineNumber < linesPerPage); Console.WriteLine(); Console.WriteLine("Press Esc to exit or any other key to move to the next page"); }while (line != null && Console.ReadKey().Key != ConsoleKey.Escape); } finally { extractor.Dispose(); } //ExEnd:ExtractText }
private async Task <Response> ParseFileText(string fileName, string folderName) { string logMsg = "ControllerName: GroupDocsParserController FileName: " + fileName + " FolderName: " + folderName; try { return(await ProcessTask(fileName, folderName, ".txt", false, "", delegate(string inFilePath, string outPath, string zipOutFolder) { EncodingDetector detector = new EncodingDetector(Encoding.GetEncoding(1251)); if (!Directory.Exists(zipOutFolder)) { Directory.CreateDirectory(zipOutFolder); } using (Stream stream = new FileStream(inFilePath, FileMode.Open)) { System.IO.File.WriteAllText(outPath, "Encoding: " + detector.Detect(stream, true) + Environment.NewLine); } ExtractorFactory factory = new ExtractorFactory(); MetadataExtractor metadataExtractor = factory.CreateMetadataExtractor(inFilePath); if (metadataExtractor != null) { MetadataCollection metadataCollection = metadataExtractor.ExtractMetadata(inFilePath); System.IO.File.AppendAllText(outPath, Environment.NewLine + "Metadata:" + Environment.NewLine); foreach (string key in metadataCollection.Keys) { System.IO.File.AppendAllText(outPath, string.Format("{0} = {1}", key, metadataCollection[key]) + Environment.NewLine); } } System.IO.File.AppendAllText(outPath, Environment.NewLine + "Parsed content:" + Environment.NewLine); string fileExt = Path.GetExtension(fileName).Substring(1).ToLower(); if (GetFormatType(fileExt) == FormatType.Excel) { CellsTextExtractor extractor = new CellsTextExtractor(inFilePath); extractor.ExtractMode = ExtractMode.Standard; for (int sheetIndex = 0; sheetIndex < extractor.SheetCount; sheetIndex++) { System.IO.File.AppendAllText(outPath, Environment.NewLine + "Sheet # " + extractor.SheetCount + Environment.NewLine); System.IO.File.AppendAllText(outPath, extractor.ExtractSheet(sheetIndex)); } } else { TextExtractor textExtractor = factory.CreateFormattedTextExtractor(inFilePath); if (textExtractor == null) { textExtractor = factory.CreateTextExtractor(inFilePath); } System.IO.File.AppendAllText(outPath, textExtractor.ExtractAll()); } })); } catch (Exception exc) { return(new Response { FileName = fileName, FolderName = folderName, OutputType = "txt", Status = exc.Message, StatusCode = 500, Text = exc.ToString() }); } }