private void RunFileCollector(string currentFolder) { string[] filesInCurrentFolder = Directory.GetFiles(currentFolder, "*.*", SearchOption.AllDirectories); int countOfFiles = filesInCurrentFolder.Length; int fileCount = 0; foreach (string file in filesInCurrentFolder) { FileObject fo = new FileObject(file); fileList.Add(fo); try { switch (Path.GetExtension(file).ToLower()) { case ".zip": int increment = Convert.ToInt32(80.0 / Convert.ToDouble(countOfFiles)); ParseZip(fo.fullPath, fo.fullPath, increment); break; case ".doc": case ".docx": ParseDoc(ref fo, fo.fullPath); break; case ".pdf": ParsePDF(ref fo, fo.fullPath); break; case ".xls": case ".xlsx": ParseXls(ref fo, fo.fullPath); break; case ".ppt": case ".pptx": ParsePpt(ref fo, fo.fullPath); break; } } catch (Exception ex) { fo.comment = "File parsing error: " + ex.Message; } fileCount++; progress = Convert.ToInt32(80.0 * Convert.ToDouble(fileCount) / Convert.ToDouble(countOfFiles)); Program.mainWindow.updateProgress(progress); } }
private void ParseZip(string baseFolder, string filePath, int increment) { using (ZipArchive zipFile = ZipFile.OpenRead(filePath)) { IReadOnlyCollection <ZipArchiveEntry> zippedFiles = zipFile.Entries; string tmpUnzipFolder = tmpFolder + "\\" + Guid.NewGuid(); Directory.CreateDirectory(tmpUnzipFolder); int noOfFiles = zippedFiles.Count; int FileCount = 0; foreach (ZipArchiveEntry zippedFile in zippedFiles) { string zippedPath = zippedFile.FullName; if (zippedPath.Substring(zippedPath.Length - 1) != "/") { FileObject fo = new FileObject(baseFolder + "\\" + zippedPath.Replace('/', '\\')); fileList.Add(fo); string tmpFile = Path.Combine(tmpUnzipFolder, zippedFile.Name); try { switch (Path.GetExtension(zippedPath).ToLower()) { case ".zip": zippedFile.ExtractToFile(tmpFile, true); int zipincrement = Convert.ToInt32(Convert.ToDouble(increment) / Convert.ToDouble(noOfFiles)); ParseZip(fo.fullPath, tmpFile, zipincrement); File.Delete(tmpFile); break; case ".doc": case ".docx": zippedFile.ExtractToFile(tmpFile, true); ParseDoc(ref fo, tmpFile); File.Delete(tmpFile); break; case ".pdf": zippedFile.ExtractToFile(tmpFile, true); ParsePDF(ref fo, tmpFile); File.Delete(tmpFile); break; case ".xls": case ".xlsx": zippedFile.ExtractToFile(tmpFile, true); ParseXls(ref fo, tmpFile); File.Delete(tmpFile); break; case ".ppt": case ".pptx": zippedFile.ExtractToFile(tmpFile, true); ParsePpt(ref fo, tmpFile); File.Delete(tmpFile); break; } } catch (Exception ex) { fo.comment = "File parsing error: " + ex.Message; } FileCount++; progress = Convert.ToInt32((increment * Convert.ToDouble(FileCount) / Convert.ToDouble(noOfFiles))); Program.mainWindow.updateProgress(progress); } } Directory.Delete(tmpUnzipFolder); } }
private void ParseXls(ref FileObject fo, string filePath) { Workbook wb = new Workbook(filePath); Aspose.Cells.Properties.BuiltInDocumentPropertyCollection dp = wb.BuiltInDocumentProperties; WorksheetCollection wsc = wb.Worksheets; fo.pageCount = wsc.Count; int NoOfImages = 0; int NoOfEmbeddedDocs = 0; bool isProtected = false; foreach (Worksheet ws in wsc) { OleObjectCollection oles = ws.OleObjects; if (ws.IsProtected) { isProtected = true; } foreach (OleObject ole in oles) { switch (ole.FileFormatType) { case FileFormatType.Doc: case FileFormatType.Xlsm: case FileFormatType.Docx: case FileFormatType.Xlsx: case FileFormatType.Ppt: case FileFormatType.Pdf: case FileFormatType.CSV: case FileFormatType.VSD: case FileFormatType.VSDX: case FileFormatType.Html: case FileFormatType.XML: NoOfEmbeddedDocs++; break; case FileFormatType.BMP: case FileFormatType.TIFF: NoOfImages++; break; default: NoOfImages++; break; } } } fo.embeddedDocsCount = NoOfEmbeddedDocs; fo.imageCount = NoOfImages; fo.hasPassword = isProtected; string tmpFolderToExtract = tmpFolder + "\\" + Guid.NewGuid(); Directory.CreateDirectory(tmpFolderToExtract); string tmpTextFile = tmpFolderToExtract + "\\" + "tmpTextexport.txt"; byte[] workbookData = new byte[0]; TxtSaveOptions opts = new TxtSaveOptions(); opts.Separator = ' '; for (int idx = 0; idx < wb.Worksheets.Count; idx++) { MemoryStream ms = new MemoryStream(); wb.Worksheets.ActiveSheetIndex = idx; wb.Save(ms, opts); ms.Position = 0; byte[] sheetData = ms.ToArray(); byte[] combinedArray = new byte[workbookData.Length + sheetData.Length]; Array.Copy(workbookData, 0, combinedArray, 0, workbookData.Length); Array.Copy(sheetData, 0, combinedArray, workbookData.Length, sheetData.Length); workbookData = combinedArray; } File.WriteAllBytes(tmpTextFile, workbookData); fo.wordCount = GetWordCount(tmpTextFile); fo.characterCount = GetCharCount(tmpTextFile); if (File.Exists(tmpTextFile)) { File.Delete(tmpTextFile); } if (Directory.Exists(tmpFolderToExtract)) { Directory.Delete(tmpFolderToExtract); } }