/// <summary>
        /// Extract from OST container
        /// </summary>
        public static void ExtractFromOstContainer(string fileName)
        {
            //ExStart:ExtractFromOstContainer
            ExtractorFactory factory = new ExtractorFactory();
            //get OST file's path
            string filePath = Common.getFilePath(fileName);

            using (var container = new PersonalStorageContainer(filePath))
            {
                for (int i = 0; i < container.Entities.Count; i++)
                {
                    Console.WriteLine(container.Entities[i].Name);
                    Console.WriteLine(container.Entities[i].Path.ToString());
                    Console.WriteLine(container.Entities[i].MediaType);
                    Console.WriteLine(container.Entities[i][PersonalStorageContainer.EmailSubject]);
                    Console.WriteLine(container.Entities[i][PersonalStorageContainer.EmailSender]);
                    Console.WriteLine(container.Entities[i][PersonalStorageContainer.EmailReceiver]);

                    using (TextExtractor extractor = factory.CreateTextExtractor(container.Entities[i].OpenStream()))
                    {
                        Console.WriteLine("Content:");
                        Console.WriteLine(extractor != null ? extractor.ExtractAll() : "The document format is not supported");
                    }
                }
            }
            //ExEnd:ExtractFromOstContainer
        }
        /// <summary>
        /// Reads concrete files from a ZIP folder
        /// </summary>
        /// <param name="folderName">Name of the zipped folder</param>
        public static void ReadConcreteFile(string folderName)
        {
            //ExStart:ReadConcreteFile
            //get ZIP folder's path
            string           folderPath       = Common.getFilePath(folderName);
            ExtractorFactory extractorFactory = new ExtractorFactory();

            //initialize ZIP container
            using (var container = new ZipContainer(folderPath))
            {
                //loop through all the entities in the folder
                for (int i = 0; i < container.Entities.Count; i++)
                {
                    //extract content of each entity by creating a textextractor using extractfactory's CreateTextExtractor function
                    using (TextExtractor extractor = extractorFactory.CreateTextExtractor(container.Entities[i].OpenStream()))
                    {
                        //display the extracted text
                        Console.WriteLine(extractor.ExtractAll());
                    }
                }
            }
            //ExEnd:ReadConcreteFile
        }
        private async Task <Response> ParseFileText(string fileName, string folderName)
        {
            string logMsg = "ControllerName: GroupDocsParserController FileName: " + fileName + " FolderName: " + folderName;

            try
            {
                return(await ProcessTask(fileName, folderName, ".txt", false, "", delegate(string inFilePath, string outPath, string zipOutFolder)
                {
                    EncodingDetector detector = new EncodingDetector(Encoding.GetEncoding(1251));

                    if (!Directory.Exists(zipOutFolder))
                    {
                        Directory.CreateDirectory(zipOutFolder);
                    }

                    using (Stream stream = new FileStream(inFilePath, FileMode.Open)) {
                        System.IO.File.WriteAllText(outPath, "Encoding: " + detector.Detect(stream, true) + Environment.NewLine);
                    }

                    ExtractorFactory factory = new ExtractorFactory();
                    MetadataExtractor metadataExtractor = factory.CreateMetadataExtractor(inFilePath);
                    if (metadataExtractor != null)
                    {
                        MetadataCollection metadataCollection = metadataExtractor.ExtractMetadata(inFilePath);

                        System.IO.File.AppendAllText(outPath, Environment.NewLine + "Metadata:" + Environment.NewLine);
                        foreach (string key in metadataCollection.Keys)
                        {
                            System.IO.File.AppendAllText(outPath, string.Format("{0} = {1}", key, metadataCollection[key]) + Environment.NewLine);
                        }
                    }

                    System.IO.File.AppendAllText(outPath, Environment.NewLine + "Parsed content:" + Environment.NewLine);

                    string fileExt = Path.GetExtension(fileName).Substring(1).ToLower();
                    if (GetFormatType(fileExt) == FormatType.Excel)
                    {
                        CellsTextExtractor extractor = new CellsTextExtractor(inFilePath);
                        extractor.ExtractMode = ExtractMode.Standard;
                        for (int sheetIndex = 0; sheetIndex < extractor.SheetCount; sheetIndex++)
                        {
                            System.IO.File.AppendAllText(outPath, Environment.NewLine + "Sheet # " + extractor.SheetCount + Environment.NewLine);
                            System.IO.File.AppendAllText(outPath, extractor.ExtractSheet(sheetIndex));
                        }
                    }
                    else
                    {
                        TextExtractor textExtractor = factory.CreateFormattedTextExtractor(inFilePath);
                        if (textExtractor == null)
                        {
                            textExtractor = factory.CreateTextExtractor(inFilePath);
                        }
                        System.IO.File.AppendAllText(outPath, textExtractor.ExtractAll());
                    }
                }));
            }
            catch (Exception exc)
            {
                return(new Response {
                    FileName = fileName, FolderName = folderName, OutputType = "txt", Status = exc.Message, StatusCode = 500, Text = exc.ToString()
                });
            }
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Extracts text from the entity of ZIP container:
        /// </summary>
        /// <param name="folderName">Name of the zipped folder</param>
        public static void RetrieveEntity(string folderName)
        {
            //ExStart:RetrieveEntity_17.12
            //get ZIP folder's path
            string folderPath = Common.GetFilePath(folderName);

            ExtractorFactory extractorFactory = new ExtractorFactory();

            //initialize ZIP container
            using (var container = new ZipContainer(folderPath))
            {
                Container.Entity containerEntry = container.GetEntity("META-INF\\container.xml");
                // If the entity isn't found
                if (containerEntry == null)
                {
                    throw new GroupDocsTextException("File not found");
                }

                // Try to create a text extractor
                TextExtractor extractor = extractorFactory.CreateTextExtractor(containerEntry.OpenStream());
                try
                {
                    // Extract a text (if the document type is supported)
                    Console.WriteLine(extractor == null ? "Document type isn't supported" : extractor.ExtractAll());
                }
                finally
                {
                    // Cleanup
                    if (extractor != null)
                    {
                        extractor.Dispose();
                    }
                }
            }

            //ExEnd:RetrieveEntity_17.12
        }