protected void TestCleanTextTypeFromDocument(string fileName, ContentType contentType, string[] textThatShouldntBeCleaned) { fileName = TESTFILE_DIR + fileName; string outFileName = fileName + "_cleaned.doc"; File.Copy(fileName, outFileName, true); using (Workshare.FCS.Lite.WordDocumentReader docReader = new WordDocumentReader(outFileName, true)) { docReader.Clean(BuildContentTypeList(contentType)); } using (WordDocumentReader docReader2 = new WordDocumentReader(outFileName, false)) { DocumentText dt = docReader2.Read(); IAbstractTextType tt = dt.GetUniqueTextType(contentType); IAbstractTextType ttHid = dt.GetUniqueTextType(ContentType.HiddenText); if (tt == null && ttHid == null) { CheckHaventCleanedTooMuch(dt.GetUniqueTextType(ContentType.Paragraph), textThatShouldntBeCleaned); return; } if (tt != null) Assert.AreEqual(0, tt.Nodes.Count, "Expected all the items of type " + contentType.ToString() + " to have been cleaned"); if (ttHid != null) Assert.AreEqual(0, ttHid.Nodes.Count, "Expected no significant hidden text to show up"); } }
private void TestCleanTCsFromDocument(string fileName, bool checkHiddenText) { string outFileName = fileName + "_cleaned.doc"; File.Copy(fileName, outFileName, true); using (Workshare.FCS.Lite.WordDocumentReader docReader = new WordDocumentReader(outFileName, true)) { docReader.Clean(BuildContentTypeList(ContentType.TrackChange)); } using (WordDocumentReader docReader2 = new WordDocumentReader(outFileName, false)) { DocumentText dt = docReader2.Read(); IAbstractTextType tt = dt.GetUniqueTextType(ContentType.TrackChange); IAbstractTextType ttHid = dt.GetUniqueTextType(ContentType.HiddenText); if (tt == null && ttHid == null) return; if (tt != null) Assert.AreEqual(0, tt.Nodes.Count, "Expected all the track changes to have been cleaned"); if (checkHiddenText) { if (ttHid != null) Assert.AreEqual(0, ttHid.Nodes.Count, "Expected no significant hidden text to show up"); } } }
private void CleanFile(string fileName, List<ContentType> listContentTypes, List<Exclusion> listExclusion) { try { using (WordDocumentReader wrdReader = new WordDocumentReader(fileName, true)) { wrdReader.Clean(listContentTypes, listExclusion); } } catch (System.Exception ex) { Logger.LogError("Word Binary cleaning failed"); Logger.LogError(ex); throw; } }
/// <summary> /// Original incarnation of lightspeed clean method. Left for comparison. There is at least one document that gets corrupted by the /// lightspeed clean user action as opposed to the method below. see Rally DE8912 in Professional & Deltaview project. /// </summary> /// <param name="listContentTypes"></param> private void DoLightSpeedClean(List<ContentType> listContentTypes) { System.Diagnostics.Stopwatch watch = new System.Diagnostics.Stopwatch(); watch.Start(); switch (m_ft) { case Workshare.Policy.FileType.WordDocument: using (WordDocumentReader Word2003Reader1 = new WordDocumentReader(m_sFileForBinClean, true)) { Word2003Reader1.Clean(listContentTypes); } break; case Workshare.Policy.FileType.ExcelSheet: using (ExcelDocumentReader Excel2003Reader1 = new ExcelDocumentReader(m_sFileForBinClean, true)) { Excel2003Reader1.Clean(listContentTypes); } break; case Workshare.Policy.FileType.PowerPoint: using (Workshare.FCS.Lite.PptDocumentReader Ppt2003Reader1 = new PptDocumentReader(m_sFileForBinClean, true)) { Ppt2003Reader1.Clean(listContentTypes); } break; case Workshare.Policy.FileType.WordDocumentX: case Workshare.Policy.FileType.WordDocumentMacroX: case Workshare.Policy.FileType.WordDocumentTemplateX: case Workshare.Policy.FileType.WordDocumentMacroTemplateX: { string outFileName = System.IO.Path.GetTempFileName(); using (DocxDocumentReader Word2007Reader1 = new DocxDocumentReader(m_sFileForBinClean)) { using (Stream outStr = File.Open(outFileName, FileMode.Create)) { Word2007Reader1.CleanTo(outStr, listContentTypes); } } File.Copy(outFileName, m_sFileForBinClean, true); File.Delete(outFileName); break; } case Workshare.Policy.FileType.ExcelSheetX: case Workshare.Policy.FileType.ExcelSheetMacroX: case Workshare.Policy.FileType.ExcelSheetTemplateX: case Workshare.Policy.FileType.ExcelSheetMacroTemplateX: { string outFileName = System.IO.Path.GetTempFileName(); using (XlsxDocumentReader Excel2007Reader1 = new XlsxDocumentReader(m_sFileForBinClean)) { using (Stream outStr = File.Open(outFileName, FileMode.Create)) { Excel2007Reader1.CleanTo(outStr, listContentTypes); } } File.Copy(outFileName, m_sFileForBinClean, true); File.Delete(outFileName); break; } case Workshare.Policy.FileType.PowerPointX: case Workshare.Policy.FileType.PowerPointMacroX: case Workshare.Policy.FileType.PowerPointTemplateX: case Workshare.Policy.FileType.PowerPointMacroTemplateX: case Workshare.Policy.FileType.PowerPointShowX: case Workshare.Policy.FileType.PowerPointMacroShowX: { string outFileName = System.IO.Path.GetTempFileName(); using (PptxDocumentReader Ppt2007Reader1 = new PptxDocumentReader(m_sFileForBinClean)) { using (Stream outStr = File.Open(outFileName, FileMode.Create)) { Ppt2007Reader1.CleanTo(outStr, listContentTypes); } } File.Copy(outFileName, m_sFileForBinClean, true); File.Delete(outFileName); break; } default: break; } watch.Stop(); m_binCleanTime = watch.Elapsed.TotalSeconds; }
public void DoBinaryClean(List<ContentType> listContentTypes) { switch (m_ft) { case Workshare.Policy.FileType.WordDocument: using (WordDocumentReader Word2003Reader1 = new WordDocumentReader(m_filenameIn, true)) { Word2003Reader1.Clean(listContentTypes); } break; case Workshare.Policy.FileType.ExcelSheet: using (ExcelDocumentReader Excel2003Reader1 = new ExcelDocumentReader(m_filenameIn, true)) { Excel2003Reader1.Clean(listContentTypes); } break; case Workshare.Policy.FileType.PowerPoint: using (Workshare.FCS.Lite.PptDocumentReader Ppt2003Reader1 = new PptDocumentReader(m_filenameIn, true)) { Ppt2003Reader1.Clean(listContentTypes); } break; default: break; } }