예제 #1
0
        private void RunFileCollector(string currentFolder)
        {
            string[] filesInCurrentFolder = Directory.GetFiles(currentFolder, "*.*", SearchOption.AllDirectories);
            int      countOfFiles         = filesInCurrentFolder.Length;
            int      fileCount            = 0;

            foreach (string file in filesInCurrentFolder)
            {
                FileObject fo = new FileObject(file);
                fileList.Add(fo);

                try
                {
                    switch (Path.GetExtension(file).ToLower())
                    {
                    case ".zip":
                        int increment = Convert.ToInt32(80.0 / Convert.ToDouble(countOfFiles));
                        ParseZip(fo.fullPath, fo.fullPath, increment);
                        break;

                    case ".doc":
                    case ".docx":
                        ParseDoc(ref fo, fo.fullPath);
                        break;

                    case ".pdf":
                        ParsePDF(ref fo, fo.fullPath);
                        break;

                    case ".xls":
                    case ".xlsx":
                        ParseXls(ref fo, fo.fullPath);
                        break;

                    case ".ppt":
                    case ".pptx":
                        ParsePpt(ref fo, fo.fullPath);
                        break;
                    }
                }
                catch (Exception ex)
                {
                    fo.comment = "File parsing error: " + ex.Message;
                }

                fileCount++;
                progress = Convert.ToInt32(80.0 * Convert.ToDouble(fileCount) / Convert.ToDouble(countOfFiles));
                Program.mainWindow.updateProgress(progress);
            }
        }
예제 #2
0
        private void ParseZip(string baseFolder, string filePath, int increment)
        {
            using (ZipArchive zipFile = ZipFile.OpenRead(filePath))
            {
                IReadOnlyCollection <ZipArchiveEntry> zippedFiles = zipFile.Entries;
                string tmpUnzipFolder = tmpFolder + "\\" + Guid.NewGuid();
                Directory.CreateDirectory(tmpUnzipFolder);
                int noOfFiles = zippedFiles.Count;
                int FileCount = 0;

                foreach (ZipArchiveEntry zippedFile in zippedFiles)
                {
                    string zippedPath = zippedFile.FullName;
                    if (zippedPath.Substring(zippedPath.Length - 1) != "/")
                    {
                        FileObject fo = new FileObject(baseFolder + "\\" + zippedPath.Replace('/', '\\'));
                        fileList.Add(fo);
                        string tmpFile = Path.Combine(tmpUnzipFolder, zippedFile.Name);
                        try
                        {
                            switch (Path.GetExtension(zippedPath).ToLower())
                            {
                            case ".zip":
                                zippedFile.ExtractToFile(tmpFile, true);
                                int zipincrement = Convert.ToInt32(Convert.ToDouble(increment) / Convert.ToDouble(noOfFiles));
                                ParseZip(fo.fullPath, tmpFile, zipincrement);
                                File.Delete(tmpFile);
                                break;

                            case ".doc":
                            case ".docx":
                                zippedFile.ExtractToFile(tmpFile, true);
                                ParseDoc(ref fo, tmpFile);
                                File.Delete(tmpFile);
                                break;

                            case ".pdf":
                                zippedFile.ExtractToFile(tmpFile, true);
                                ParsePDF(ref fo, tmpFile);
                                File.Delete(tmpFile);
                                break;

                            case ".xls":
                            case ".xlsx":
                                zippedFile.ExtractToFile(tmpFile, true);
                                ParseXls(ref fo, tmpFile);
                                File.Delete(tmpFile);
                                break;

                            case ".ppt":
                            case ".pptx":
                                zippedFile.ExtractToFile(tmpFile, true);
                                ParsePpt(ref fo, tmpFile);
                                File.Delete(tmpFile);
                                break;
                            }
                        }
                        catch (Exception ex)
                        {
                            fo.comment = "File parsing error: " + ex.Message;
                        }
                        FileCount++;
                        progress = Convert.ToInt32((increment * Convert.ToDouble(FileCount) / Convert.ToDouble(noOfFiles)));
                        Program.mainWindow.updateProgress(progress);
                    }
                }
                Directory.Delete(tmpUnzipFolder);
            }
        }
예제 #3
0
        private void ParseXls(ref FileObject fo, string filePath)
        {
            Workbook wb = new Workbook(filePath);

            Aspose.Cells.Properties.BuiltInDocumentPropertyCollection dp = wb.BuiltInDocumentProperties;
            WorksheetCollection wsc = wb.Worksheets;

            fo.pageCount = wsc.Count;

            int  NoOfImages       = 0;
            int  NoOfEmbeddedDocs = 0;
            bool isProtected      = false;

            foreach (Worksheet ws in wsc)
            {
                OleObjectCollection oles = ws.OleObjects;
                if (ws.IsProtected)
                {
                    isProtected = true;
                }
                foreach (OleObject ole in oles)
                {
                    switch (ole.FileFormatType)
                    {
                    case FileFormatType.Doc:
                    case FileFormatType.Xlsm:
                    case FileFormatType.Docx:
                    case FileFormatType.Xlsx:
                    case FileFormatType.Ppt:
                    case FileFormatType.Pdf:
                    case FileFormatType.CSV:
                    case FileFormatType.VSD:
                    case FileFormatType.VSDX:
                    case FileFormatType.Html:
                    case FileFormatType.XML:
                        NoOfEmbeddedDocs++;
                        break;

                    case FileFormatType.BMP:
                    case FileFormatType.TIFF:
                        NoOfImages++;
                        break;

                    default:
                        NoOfImages++;
                        break;
                    }
                }
            }

            fo.embeddedDocsCount = NoOfEmbeddedDocs;
            fo.imageCount        = NoOfImages;
            fo.hasPassword       = isProtected;

            string tmpFolderToExtract = tmpFolder + "\\" + Guid.NewGuid();

            Directory.CreateDirectory(tmpFolderToExtract);
            string tmpTextFile = tmpFolderToExtract + "\\" + "tmpTextexport.txt";

            byte[]         workbookData = new byte[0];
            TxtSaveOptions opts         = new TxtSaveOptions();

            opts.Separator = ' ';

            for (int idx = 0; idx < wb.Worksheets.Count; idx++)
            {
                MemoryStream ms = new MemoryStream();
                wb.Worksheets.ActiveSheetIndex = idx;
                wb.Save(ms, opts);
                ms.Position = 0;
                byte[] sheetData     = ms.ToArray();
                byte[] combinedArray = new byte[workbookData.Length + sheetData.Length];
                Array.Copy(workbookData, 0, combinedArray, 0, workbookData.Length);
                Array.Copy(sheetData, 0, combinedArray, workbookData.Length, sheetData.Length);
                workbookData = combinedArray;
            }

            File.WriteAllBytes(tmpTextFile, workbookData);

            fo.wordCount      = GetWordCount(tmpTextFile);
            fo.characterCount = GetCharCount(tmpTextFile);
            if (File.Exists(tmpTextFile))
            {
                File.Delete(tmpTextFile);
            }
            if (Directory.Exists(tmpFolderToExtract))
            {
                Directory.Delete(tmpFolderToExtract);
            }
        }