/// <summary> /// Extract from OST container /// </summary> public static void ExtractFromOstContainer(string fileName) { //ExStart:ExtractFromOstContainer ExtractorFactory factory = new ExtractorFactory(); //get OST file's path string filePath = Common.getFilePath(fileName); using (var container = new PersonalStorageContainer(filePath)) { for (int i = 0; i < container.Entities.Count; i++) { Console.WriteLine(container.Entities[i].Name); Console.WriteLine(container.Entities[i].Path.ToString()); Console.WriteLine(container.Entities[i].MediaType); Console.WriteLine(container.Entities[i][PersonalStorageContainer.EmailSubject]); Console.WriteLine(container.Entities[i][PersonalStorageContainer.EmailSender]); Console.WriteLine(container.Entities[i][PersonalStorageContainer.EmailReceiver]); using (TextExtractor extractor = factory.CreateTextExtractor(container.Entities[i].OpenStream())) { Console.WriteLine("Content:"); Console.WriteLine(extractor != null ? extractor.ExtractAll() : "The document format is not supported"); } } } //ExEnd:ExtractFromOstContainer }
/// <summary> /// Reads concrete files from a ZIP folder /// </summary> /// <param name="folderName">Name of the zipped folder</param> public static void ReadConcreteFile(string folderName) { //ExStart:ReadConcreteFile //get ZIP folder's path string folderPath = Common.getFilePath(folderName); ExtractorFactory extractorFactory = new ExtractorFactory(); //initialize ZIP container using (var container = new ZipContainer(folderPath)) { //loop through all the entities in the folder for (int i = 0; i < container.Entities.Count; i++) { //extract content of each entity by creating a textextractor using extractfactory's CreateTextExtractor function using (TextExtractor extractor = extractorFactory.CreateTextExtractor(container.Entities[i].OpenStream())) { //display the extracted text Console.WriteLine(extractor.ExtractAll()); } } } //ExEnd:ReadConcreteFile }
private async Task <Response> ParseFileText(string fileName, string folderName) { string logMsg = "ControllerName: GroupDocsParserController FileName: " + fileName + " FolderName: " + folderName; try { return(await ProcessTask(fileName, folderName, ".txt", false, "", delegate(string inFilePath, string outPath, string zipOutFolder) { EncodingDetector detector = new EncodingDetector(Encoding.GetEncoding(1251)); if (!Directory.Exists(zipOutFolder)) { Directory.CreateDirectory(zipOutFolder); } using (Stream stream = new FileStream(inFilePath, FileMode.Open)) { System.IO.File.WriteAllText(outPath, "Encoding: " + detector.Detect(stream, true) + Environment.NewLine); } ExtractorFactory factory = new ExtractorFactory(); MetadataExtractor metadataExtractor = factory.CreateMetadataExtractor(inFilePath); if (metadataExtractor != null) { MetadataCollection metadataCollection = metadataExtractor.ExtractMetadata(inFilePath); System.IO.File.AppendAllText(outPath, Environment.NewLine + "Metadata:" + Environment.NewLine); foreach (string key in metadataCollection.Keys) { System.IO.File.AppendAllText(outPath, string.Format("{0} = {1}", key, metadataCollection[key]) + Environment.NewLine); } } System.IO.File.AppendAllText(outPath, Environment.NewLine + "Parsed content:" + Environment.NewLine); string fileExt = Path.GetExtension(fileName).Substring(1).ToLower(); if (GetFormatType(fileExt) == FormatType.Excel) { CellsTextExtractor extractor = new CellsTextExtractor(inFilePath); extractor.ExtractMode = ExtractMode.Standard; for (int sheetIndex = 0; sheetIndex < extractor.SheetCount; sheetIndex++) { System.IO.File.AppendAllText(outPath, Environment.NewLine + "Sheet # " + extractor.SheetCount + Environment.NewLine); System.IO.File.AppendAllText(outPath, extractor.ExtractSheet(sheetIndex)); } } else { TextExtractor textExtractor = factory.CreateFormattedTextExtractor(inFilePath); if (textExtractor == null) { textExtractor = factory.CreateTextExtractor(inFilePath); } System.IO.File.AppendAllText(outPath, textExtractor.ExtractAll()); } })); } catch (Exception exc) { return(new Response { FileName = fileName, FolderName = folderName, OutputType = "txt", Status = exc.Message, StatusCode = 500, Text = exc.ToString() }); } }
/// <summary> /// Extracts text from the entity of ZIP container: /// </summary> /// <param name="folderName">Name of the zipped folder</param> public static void RetrieveEntity(string folderName) { //ExStart:RetrieveEntity_17.12 //get ZIP folder's path string folderPath = Common.GetFilePath(folderName); ExtractorFactory extractorFactory = new ExtractorFactory(); //initialize ZIP container using (var container = new ZipContainer(folderPath)) { Container.Entity containerEntry = container.GetEntity("META-INF\\container.xml"); // If the entity isn't found if (containerEntry == null) { throw new GroupDocsTextException("File not found"); } // Try to create a text extractor TextExtractor extractor = extractorFactory.CreateTextExtractor(containerEntry.OpenStream()); try { // Extract a text (if the document type is supported) Console.WriteLine(extractor == null ? "Document type isn't supported" : extractor.ExtractAll()); } finally { // Cleanup if (extractor != null) { extractor.Dispose(); } } } //ExEnd:RetrieveEntity_17.12 }