public override void Execute(WorkItem workItem) { try { switch (workItem.Extension) { case "doc": using (DocxDocumentReader reader = new DocxDocumentReader(workItem.ConvertedFileName)) { workItem.discoveryTimer.Start(); DocumentText results = reader.Read(); workItem.DiscoveredText = results; workItem.FileSize += new FileInfo(workItem.ConvertedFileName).Length; } break; case "ppt": using (PptxDocumentReader reader = new PptxDocumentReader(workItem.ConvertedFileName)) { workItem.discoveryTimer.Start(); DocumentText results = reader.Read(); workItem.DiscoveredText = results; workItem.FileSize += new FileInfo(workItem.ConvertedFileName).Length; } break; case "xls": using (XlsxDocumentReader reader = new XlsxDocumentReader(workItem.ConvertedFileName)) { workItem.discoveryTimer.Start(); DocumentText results = reader.Read(); workItem.DiscoveredText = results; workItem.FileSize += new FileInfo(workItem.ConvertedFileName).Length; } break; default: throw new Exception("File extension not expected : " + workItem.Extension); } } catch (Exception e) { workItem.FailureMode = FailureMode.DiscoveryFailed; workItem.ShouldAbort = true; throw e; } finally { workItem.discoveryTimer.Stop(); } }
public override bool VerifyFile(TempFileForActions tempFile) { try { using (BinaryData bData = new BinaryData(tempFile.GetMemoryStream())) { using (DocxDocumentReader reader = new DocxDocumentReader(bData)) { reader.Read(); } } return true; } catch (System.Exception ex) { Logger.LogError("WordX Verification failed"); Logger.LogError(ex); } return false; }
DocumentText DiscoverDocument(string sFilename) { switch (m_ft) { case Workshare.Policy.FileType.WordDocument: using (WordDocumentReader Word2003Reader1 = new WordDocumentReader(sFilename, false)) { return Word2003Reader1.Read(); } case Workshare.Policy.FileType.ExcelSheet: using (ExcelDocumentReader Excel2003Reader1 = new ExcelDocumentReader(sFilename, false)) { return Excel2003Reader1.Read(); } case Workshare.Policy.FileType.PowerPoint: using (Workshare.FCS.Lite.PptDocumentReader Ppt2003Reader1 = new PptDocumentReader(sFilename, false)) { return Ppt2003Reader1.Read(); } case Workshare.Policy.FileType.WordDocumentX: case Workshare.Policy.FileType.WordDocumentMacroX: case Workshare.Policy.FileType.WordDocumentTemplateX: case Workshare.Policy.FileType.WordDocumentMacroTemplateX: using (DocxDocumentReader Word2007Reader1 = new DocxDocumentReader(sFilename)) { return Word2007Reader1.Read(); } case Workshare.Policy.FileType.ExcelSheetX: case Workshare.Policy.FileType.ExcelSheetMacroX: case Workshare.Policy.FileType.ExcelSheetTemplateX: case Workshare.Policy.FileType.ExcelSheetMacroTemplateX: using (XlsxDocumentReader Excel2007Reader1 = new XlsxDocumentReader(sFilename)) { return Excel2007Reader1.Read(); } case Workshare.Policy.FileType.PowerPointX: case Workshare.Policy.FileType.PowerPointMacroX: case Workshare.Policy.FileType.PowerPointTemplateX: case Workshare.Policy.FileType.PowerPointMacroTemplateX: case Workshare.Policy.FileType.PowerPointShowX: case Workshare.Policy.FileType.PowerPointMacroShowX: using (PptxDocumentReader Ppt2007Reader1 = new PptxDocumentReader(sFilename)) { return Ppt2007Reader1.Read(); } default: break; } return null; }
public override void Execute(WorkItem workItem) { try { switch (workItem.Extension) { case "doc": using (DocxDocumentReader reader = new DocxDocumentReader(workItem.CleanedFileName)) { DocumentText results = reader.Read(); //workItem.FileSize += new FileInfo(workItem.ConvertedFileName).Length; foreach (IAbstractTextType tt in results.GetTextTypes()) { if (tt.GetContentType() == ContentType.WorkshareProperty) continue; // We explicitly never clean these if (tt.GetChildCount() > 0 && (tt.GetContentType() != ContentType.Paragraph)) { workItem.Info = DumpTextType(tt); workItem.ContentType = tt.GetContentType(); throw new Exception("Unexpected content type found in cleaned doc"); } } } break; case "ppt": using (PptxDocumentReader reader = new PptxDocumentReader(workItem.CleanedFileName)) { DocumentText results = reader.Read(); //workItem.FileSize += new FileInfo(workItem.ConvertedFileName).Length; foreach (IAbstractTextType tt in results.GetTextTypes()) { if (tt.GetContentType() == ContentType.TextBox) continue; if (tt.GetContentType() == ContentType.Paragraph) continue; if (tt.GetChildCount() > 0 ) { workItem.Info = DumpTextType(tt); workItem.ContentType = tt.GetContentType(); throw new Exception("Unexpected content type found in cleaned ppt"); } } } break; case "xls": using (XlsxDocumentReader reader = new XlsxDocumentReader(workItem.CleanedFileName)) { DocumentText results = reader.Read(); //workItem.FileSize += new FileInfo(workItem.ConvertedFileName).Length; foreach (IAbstractTextType tt in results.GetTextTypes()) { if (tt.GetContentType() == ContentType.RoutingSlip) continue; if (tt.GetChildCount() > 0 && (tt.GetContentType() != ContentType.CellText)) { workItem.Info = DumpTextType(tt); workItem.ContentType = tt.GetContentType(); throw new Exception("Unexpected content type found in cleaned xls"); } } } break; default: throw new Exception("File extension not expected : " + workItem.Extension); } } catch (Exception e) { workItem.FailureMode = FailureMode.ThingsNotCleaned; throw e; } }