public void TestDiscoverDocxWithSpecificContentTypes() { using (DocxDocumentProcessor ddp = new DocxDocumentProcessor(File.Open(TESTFILE_DIR + "test002.docx", FileMode.Open))) { ddp.ContentTypesOfInterest = new ContentType[] { ContentType.HiddenText, ContentType.Field }; ddp.Process(DocumentProcessingActions.Discover); Assert.IsNotNull(ddp.DocumentText, "expected the document text object to be valid"); Assert.AreEqual(2, ddp.DocumentText.GetTextTypes().Count, "expected to discover only the 2 specified text types"); DocumentText dt = ddp.DocumentText; Assert.Greater(dt.GetTextTypes(ContentType.Field).Count, 0); Assert.Greater(dt.GetTextTypes(ContentType.HiddenText).Count, 0); Assert.AreEqual(0, dt.GetTextTypes(ContentType.SmartTag).Count); Assert.AreEqual(0, dt.GetTextTypes(ContentType.SmallText).Count); Assert.AreEqual(0, dt.GetTextTypes(ContentType.TrackChange).Count); } }
public void TestDiscoverDocxWithStyles() { using (DocxDocumentProcessor ddp = new DocxDocumentProcessor(File.Open(TESTFILE_DIR + "StyleExerciser.docx", FileMode.Open))) { ddp.Process(DocumentProcessingActions.Discover); TextType ttHidden = ddp.DocumentText.GetTextTypes(ContentType.HiddenText)[0] as TextType; Assert.IsNotNull(ttHidden); Assert.AreEqual(9,ttHidden.GetChildCount()); Assert.AreEqual("This is some text in the document in a hidden paragraph.", ttHidden.GetChild(0).GetInfo("Content")[0].value); Assert.AreEqual("This para contains ", ttHidden.GetChild(1).GetInfo("Content")[0].value); Assert.AreEqual(" text.", ttHidden.GetChild(2).GetInfo("Content")[0].value); Assert.AreEqual("This hidden para contains ", ttHidden.GetChild(3).GetInfo("Content")[0].value); Assert.AreEqual(" text.", ttHidden.GetChild(4).GetInfo("Content")[0].value); Assert.AreEqual("This text in hidden style with ", ttHidden.GetChild(5).GetInfo("Content")[0].value); Assert.AreEqual("embedded.", ttHidden.GetChild(6).GetInfo("Content")[0].value); Assert.AreEqual("This text in hidden style with ", ttHidden.GetChild(7).GetInfo("Content")[0].value); Assert.AreEqual(" embedded.", ttHidden.GetChild(8).GetInfo("Content")[0].value); } }
public void TestCleanDiagram_MultipleDiagramsSmallText() { string TEST_DOC = TESTFILE_DIR + "Test Multiple Smart Art Small.docx"; string OUTPUT_DOC = TESTFILE_DIR + "SelectivelyCleaned.docx"; if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); try { List<ContentType> typesToClean = new List<ContentType>(); typesToClean.Add(ContentType.SmallText); using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean)) { using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew)) { ddpclean.Process(DocumentProcessingActions.Clean); } } using (DocxDocumentProcessor ddpRead = new DocxDocumentProcessor(File.Open(OUTPUT_DOC, FileMode.Open))) { ddpRead.Process(DocumentProcessingActions.Discover); TextType ttDiagramText = ddpRead.DocumentText.GetTextTypes(ContentType.Paragraph)[0] as TextType; Assert.IsNotNull(ttDiagramText, "Clean documents should have empty lists"); Assert.AreEqual(14, ttDiagramText.GetChildCount(), "Cleaned document should still have some reported paragraph text"); } } finally { if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); } }
public void TestCleanDiagram_PartialWhiteText2() { string TEST_DOC = TESTFILE_DIR + "Test Smart Art Partial White Text.docx"; string OUTPUT_DOC = TESTFILE_DIR + "SelectivelyCleaned.docx"; if(File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); try { List<ContentType> typesToClean = new List<ContentType>(); typesToClean.Add(ContentType.WhiteText); using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean)) { using(ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew)) { ddpclean.Process(DocumentProcessingActions.Clean); } } using (DocxDocumentProcessor ddpRead = new DocxDocumentProcessor(File.Open(OUTPUT_DOC, FileMode.Open))) { ddpRead.Process(DocumentProcessingActions.Discover); TextType ttDiagramText = ddpRead.DocumentText.GetTextTypes(ContentType.Paragraph)[0] as TextType; Assert.IsNotNull(ttDiagramText, "there should be some paragraph text here"); Assert.AreEqual(3, ttDiagramText.GetChildCount(), "should have got two paragraph items back"); } } finally { if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); } }
public void TestCleanDiagram_MultiplePartialAndFullWhiteText() { string TEST_DOC = TESTFILE_DIR + "Test Multiple Smart Art Full and Partial White Text.docx"; string OUTPUT_DOC = TESTFILE_DIR + "SelectivelyCleaned.docx"; if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); try { List<ContentType> typesToClean = new List<ContentType>(); typesToClean.Add(ContentType.WhiteText); using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean)) { using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew)) { ddpclean.Process(DocumentProcessingActions.Clean); } } using (DocxDocumentProcessor ddpRead = new DocxDocumentProcessor(File.Open(OUTPUT_DOC, FileMode.Open))) { ddpRead.Process(DocumentProcessingActions.Discover); TextType ttDiagramText = ddpRead.DocumentText.GetTextTypes(ContentType.Paragraph)[0] as TextType; Assert.IsNotNull(ttDiagramText, "Clean documents should have empty lists"); Assert.AreEqual(8, ttDiagramText.GetChildCount(), "Cleaned document should still have some reported paragraph text"); int index = 0; IAbstractTextNode node = ttDiagramText.GetChild(index++); Assert.AreEqual("Here is some normal text. Nothing interesting just some random rubbish to put before anything else.", node.GetInfo("Content")[0].value, "wrong text reported"); node = ttDiagramText.GetChild(index++); Assert.AreEqual("Here is yet more text with no good reason for its existence other than to put some words down here and that is all.", node.GetInfo("Content")[0].value, "wrong text reported"); node = ttDiagramText.GetChild(index++); Assert.AreEqual("Something", node.GetInfo("Content")[0].value, "wrong text reported"); Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported"); Assert.AreEqual("{0A02ADD5-6F8B-4427-80A3-BC5EBA44FAE1}", node.GetInfo("ModelId")[0].value, "wrong model Id reported"); node = ttDiagramText.GetChild(index++); Assert.AreEqual("Dog", node.GetInfo("Content")[0].value, "wrong text reported"); Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported"); Assert.AreEqual("{4EEFAF20-A6EB-495C-A943-006BA844B082}", node.GetInfo("ModelId")[0].value, "wrong model Id reported"); node = ttDiagramText.GetChild(index++); Assert.AreEqual("More", node.GetInfo("Content")[0].value, "wrong text reported"); Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported"); Assert.AreEqual("{3FF29E88-F62F-479E-9AE6-191BA81828E4}", node.GetInfo("ModelId")[0].value, "wrong model Id reported"); node = ttDiagramText.GetChild(index++); Assert.AreEqual("Hare", node.GetInfo("Content")[0].value, "wrong text reported"); Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported"); Assert.AreEqual("{400EDE91-7194-4C6D-AD2E-B9F9B770F146}", node.GetInfo("ModelId")[0].value, "wrong model Id reported"); node = ttDiagramText.GetChild(index++); Assert.AreEqual("Oe", node.GetInfo("Content")[0].value, "wrong text reported"); Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported"); Assert.AreEqual("{6F1AD44A-05E7-492F-B5A2-F81EEF4F8CCA}", node.GetInfo("ModelId")[0].value, "wrong model Id reported"); node = ttDiagramText.GetChild(index++); Assert.AreEqual("Three", node.GetInfo("Content")[0].value, "wrong text reported"); Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported"); Assert.AreEqual("{6EE7408C-9700-4220-B9EA-6CBA1316EC66}", node.GetInfo("ModelId")[0].value, "wrong model Id reported"); } } finally { if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); } }
public void TestCleanDiagram_PartialWhiteText() {//disable this as we cannot fix this without breaking lots of other things at the moment string TEST_DOC = TESTFILE_DIR + "Test Single Smart Art Text Partial White Text.docx"; string OUTPUT_DOC = TESTFILE_DIR + "SelectivelyCleaned.docx"; if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); try { List<ContentType> typesToClean = new List<ContentType>(); typesToClean.Add(ContentType.WhiteText); using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean)) { using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew)) { ddpclean.Process(DocumentProcessingActions.Clean); } } using (DocxDocumentProcessor ddpRead = new DocxDocumentProcessor(File.Open(OUTPUT_DOC, FileMode.Open))) { ddpRead.Process(DocumentProcessingActions.Discover); } // TextType ttDiagramText = ddpRead.DocumentText.GetTextTypes(ContentType.Paragraph)[0] as TextType; // Assert.IsNotNull(ttDiagramText, "Clean documents should have empty lists"); // Assert.AreEqual(1, ttDiagramText.GetChildCount(), "Cleaned document should still have some reported paragraph text"); // IAbstractTextNode node = ttDiagramText.GetChild(0); // Assert.AreEqual("This is the of this smart art", node.GetInfo("Content")[0].value, "wrong text reported"); // Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported"); // Assert.AreEqual("{003D71BC-AAAA-4A07-900B-0B97444F09C3}", node.GetInfo("ModelId")[0].value, "wrong model Id reported"); } finally { if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); } }
public void TestCleanDiagram_WhiteText() {//disable this as we cannot fix this without breaking lots of other things at the moment string TEST_DOC = TESTFILE_DIR + "Test Single Smart Art Text White Text On No Fill.docx"; string OUTPUT_DOC = TESTFILE_DIR + "SelectivelyCleaned.docx"; if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); try { List<ContentType> typesToClean = new List<ContentType>(); typesToClean.Add(ContentType.WhiteText); using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean)) { using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew)) { ddpclean.Process(DocumentProcessingActions.Clean); } } using (DocxDocumentProcessor ddpRead = new DocxDocumentProcessor(File.Open(OUTPUT_DOC, FileMode.Open))) { ddpRead.Process(DocumentProcessingActions.Discover); // TextType ttDiagramText = ddpRead.DocumentText.GetTextTypes(ContentType.Paragraph)[0] as TextType; // Assert.IsNotNull(ttDiagramText, "Clean documents should have empty lists"); // Assert.AreEqual(0, ttDiagramText.GetChildCount(), "Clean documents should have no entries in lists"); } } finally { if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); } }
public void TestWorksharePropertiesAreDiscoverdButNotCleaned() { string TEST_DOC = TESTFILE_DIR + "TestLotsOfProperties.docx"; string OUTPUT_DOC = TESTFILE_DIR + "Cleaned.docx"; using (DocxDocumentProcessor ddp1 = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open))) { ddp1.Process(DocumentProcessingActions.Discover); DocumentText dt1 = ddp1.DocumentText; List<IAbstractTextType> ttCustom1 = dt1.GetTextTypes(ContentType.CustomProperty); if(ttCustom1 != null) Assert.AreEqual(0, ttCustom1.Count, "Something was interpretted as a custom property"); TextType ttWorkshare1 = dt1.GetTextTypes(ContentType.WorkshareProperty)[0] as TextType; Assert.AreEqual("WSClassification", ttWorkshare1.GetChild(0).GetInfo("Name")[0].value); Assert.AreEqual("WS_SEND_FOR_REVIEW", ttWorkshare1.GetChild(1).GetInfo("Name")[0].value); Assert.AreEqual("WS_RTS_TAG", ttWorkshare1.GetChild(2).GetInfo("Name")[0].value); Assert.AreEqual("EVOLVING_DOC_ID", ttWorkshare1.GetChild(3).GetInfo("Name")[0].value); Assert.AreEqual("SFR_COMPUTER_NAME", ttWorkshare1.GetChild(4).GetInfo("Name")[0].value); Assert.AreEqual("WSRestrictionLevel", ttWorkshare1.GetChild(5).GetInfo("Name")[0].value); Assert.AreEqual("WSRestrictionPassword", ttWorkshare1.GetChild(6).GetInfo("Name")[0].value); Assert.AreEqual(7, ttWorkshare1.GetChildCount(), "Found an extra property we did not expect"); } if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); try { using (DocxDocumentProcessor ddp2 = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open))) { using (ddp2.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew)) { ddp2.Process(DocumentProcessingActions.Clean); } } using (DocxDocumentProcessor ddp3 = new DocxDocumentProcessor(File.Open(OUTPUT_DOC, FileMode.Open))) { ddp3.Process(DocumentProcessingActions.Discover); DocumentText dt3 = ddp3.DocumentText; List<IAbstractTextType> ttCustom3 = dt3.GetTextTypes(ContentType.CustomProperty); if (ttCustom3 != null) Assert.AreEqual(0, ttCustom3.Count); TextType ttWorkshare3 = dt3.GetTextTypes(ContentType.WorkshareProperty)[0] as TextType; Assert.AreEqual("WSClassification", ttWorkshare3.GetChild(0).GetInfo("Name")[0].value); Assert.AreEqual("WS_SEND_FOR_REVIEW", ttWorkshare3.GetChild(1).GetInfo("Name")[0].value); Assert.AreEqual("WS_RTS_TAG", ttWorkshare3.GetChild(2).GetInfo("Name")[0].value); Assert.AreEqual("EVOLVING_DOC_ID", ttWorkshare3.GetChild(3).GetInfo("Name")[0].value); Assert.AreEqual("SFR_COMPUTER_NAME", ttWorkshare3.GetChild(4).GetInfo("Name")[0].value); Assert.AreEqual("WSRestrictionLevel", ttWorkshare3.GetChild(5).GetInfo("Name")[0].value); Assert.AreEqual("WSRestrictionPassword", ttWorkshare3.GetChild(6).GetInfo("Name")[0].value); Assert.AreEqual(7, ttWorkshare3.GetChildCount()); } } finally { if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); } }
private void ValidateFieldsWithNonStringExclusions(string outputFile, ContentType contentType, string test) { using (DocxDocumentProcessor ddp2 = new DocxDocumentProcessor(File.Open(outputFile, FileMode.Open))) { ddp2.ExcludeList = null; ddp2.Process(DocumentProcessingActions.Discover); Assert.IsNotNull(ddp2.DocumentText, "expected the document text object to be valid"); IAbstractTextType tt = ddp2.DocumentText.GetUniqueTextType(contentType); Assert.IsNotNull(tt, "expected the content type '" + contentType + "' to be valid"); CommonTestUtilities.CheckFieldAgainstExclusions(tt, test); } }
public void TestHandleFldCharProperly() { string TEST_DOC = TESTFILE_DIR + "TestHandleFldCharProperly.docx"; string OUTPUT_DOC = TESTFILE_DIR + "TestHandleFldCharProperlyCleaned.docx"; if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); ValidateMatchesSchema(TEST_DOC); try { List<ContentType> typesToClean = new List<ContentType>(); foreach (ContentType ct in Enum.GetValues(typeof(ContentType))) typesToClean.Add(ct); using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean)) { List<Exclusion> exl = new List<Exclusion>(); exl.Add(new Exclusion()); ddpclean.ExcludeList = exl; using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew)) { ddpclean.Process(DocumentProcessingActions.Clean); } } ValidateMatchesSchema(OUTPUT_DOC); } finally { if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); } }
public void TestCleanDeletedTrackChangeInTableRow() { string TEST_DOC = TESTFILE_DIR + "DeletedTrackChangeInTableRow.docx"; string OUTPUT_DOC = TESTFILE_DIR + "DeletedTrackChangeInTableRow_cleaned.docx"; if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); int iExpectedCount = CountPNodes(TEST_DOC); ValidateMatchesSchema(TEST_DOC); try { List<ContentType> typesToClean = new List<ContentType>(); typesToClean.Add(ContentType.TrackChange); using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean)) { using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew)) { ddpclean.Process(DocumentProcessingActions.Clean); } } Assert.AreEqual(iExpectedCount, CountPNodes(OUTPUT_DOC), "expected the same number of para nodes"); ValidateMatchesSchema(OUTPUT_DOC); } finally { if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); } }
public void TestCleanProblemComplexDoc() { string TEST_DOC = TESTFILE_DIR + "Getting Started Guide.docx"; string OUTPUT_DOC = TESTFILE_DIR + "GSGCleaned.docx"; if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); int iExpectedCount = CountPNodes(TEST_DOC); ValidateMatchesSchema(TEST_DOC); try { List<ContentType> typesToClean = new List<ContentType>(); //foreach (ContentType ct in Enum.GetValues(typeof(ContentType))) // typesToClean.Add(ct); typesToClean.Add(ContentType.InkAnnotation); using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean)) { using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew)) { ddpclean.Process(DocumentProcessingActions.Clean); } } ValidateMatchesSchema(OUTPUT_DOC); Assert.AreEqual(iExpectedCount, CountPNodes(OUTPUT_DOC), "expected to get 9 para nodes if we haven't mucked up"); } finally { if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); } }
public void TestCleanInkAnnotations_4() { string TEST_DOC = TESTFILE_DIR + "Test Ink Annotations 5.docx"; string OUTPUT_DOC = TESTFILE_DIR + "InkCleaned.docx"; if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); try { List<ContentType> typesToClean = new List<ContentType>(); typesToClean.Add(ContentType.InkAnnotation); using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean)) { using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew)) { ddpclean.Process(DocumentProcessingActions.Clean); } } using (DocxDocumentProcessor ddpRead = new DocxDocumentProcessor(File.Open(OUTPUT_DOC, FileMode.Open))) { ddpRead.Process(DocumentProcessingActions.Discover); Assert.IsNull(ddpRead.DocumentText.GetUniqueTextType(ContentType.InkAnnotation)); } } finally { if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); } }
public void TestNoCleanInkAnnotations_1() { string TEST_DOC = TESTFILE_DIR + "Test Ink Annotations 1.docx"; string OUTPUT_DOC = TESTFILE_DIR + "InkCleaned.docx"; if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); try { List<ContentType> typesToClean = new List<ContentType>(); using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean)) { using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew)) { ddpclean.Process(DocumentProcessingActions.Clean); } } using (DocxDocumentProcessor ddpRead = new DocxDocumentProcessor(File.Open(OUTPUT_DOC, FileMode.Open))) { ddpRead.Process(DocumentProcessingActions.Discover); Assert.Greater(ddpRead.DocumentText.GetTextTypes(ContentType.InkAnnotation).Count, 0, "expected the ink annotations to be left behind"); } } finally { if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); } }
public void TestFootAndEndNoteTriggersAreNotEnabledByContentRuleType() { if (File.Exists(TESTFILE_DIR + "SelectivelyCleaned.docm.docx")) File.Delete(TESTFILE_DIR + "SelectivelyCleaned.docm.docx"); List<ContentType> typesToClean = new List<ContentType>(); typesToClean.Add(ContentType.ContentRule); string TEST_DOC = TESTFILE_DIR + "HasAllMetadataTypesOfInterest.docm.docx"; string OUTPUT_DOC = TESTFILE_DIR + "SelectivelyCleaned.docm.docx"; try { using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean)) { using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew)) { ddpclean.Process(DocumentProcessingActions.Clean); } } XmlDocument xmlDocument = DocxTestUtilities.GetDocumentPartXml(OUTPUT_DOC); Assert.IsNotNull(xmlDocument); XmlNamespaceManager nsmgr = new XmlNamespaceManager(xmlDocument.NameTable); nsmgr.AddNamespace("w", "http://purl.oclc.org/ooxml/wordprocessingml/main"); XmlNodeList xmlNodes = xmlDocument.SelectNodes("//w:footnoteReference", nsmgr); Assert.AreEqual(1, xmlNodes.Count); xmlNodes = xmlDocument.SelectNodes("//w:endnoteReference", nsmgr); Assert.AreEqual(1, xmlNodes.Count); xmlDocument = DocxTestUtilities.GetSettingsPartXml(OUTPUT_DOC); Assert.IsNotNull(xmlDocument); xmlNodes = xmlDocument.SelectNodes("//w:footnotePr", nsmgr); Assert.AreEqual(1, xmlNodes.Count); xmlNodes = xmlDocument.SelectNodes("//w:endnotePr", nsmgr); Assert.AreEqual(1, xmlNodes.Count); } finally { if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); } }
public void TestWhiteIsNotRemovedAsRedactedText() { if (File.Exists(TESTFILE_DIR + "SelectivelyCleaned.docm.docx")) File.Delete(TESTFILE_DIR + "SelectivelyCleaned.docm.docx"); List<ContentType> typesToClean = new List<ContentType>(); typesToClean.Add(ContentType.RedactedText); string TEST_DOC = TESTFILE_DIR + "HasAllMetadataTypesOfInterest.docm.docx"; string OUTPUT_DOC = TESTFILE_DIR + "SelectivelyCleaned.docm.docx"; try { using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean)) { using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew)) { ddpclean.Process(DocumentProcessingActions.Clean); } } TextType tt = DiscoverTextType(OUTPUT_DOC, ContentType.WhiteText); Assert.IsNotNull(tt, "we expect the white text to stay"); Assert.AreEqual(1, tt.GetChildCount(), "missing the 1 item of white text"); } finally { if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); } }
private static void TestCleanOneTypeOfMetadataOnly(string sFileUnderTest, string outputFile, ContentType typeToClean) { if (File.Exists(outputFile)) File.Delete(outputFile); List<ContentType> typesToClean = new List<ContentType>(); if (typeToClean != ContentType.ContentRule) typesToClean.Add(typeToClean); try { using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(sFileUnderTest, FileMode.Open), typesToClean)) { using (ddpclean.Output = File.Open(outputFile, FileMode.CreateNew)) { ddpclean.Process(DocumentProcessingActions.Clean); } } using (DocxDocumentProcessor ddpRead = new DocxDocumentProcessor(File.Open(outputFile, FileMode.Open))) { ddpRead.Process(DocumentProcessingActions.Discover); TestHasAllMetadatTypesOtherThan(ddpRead.DocumentText, typeToClean); } } finally { if (File.Exists(outputFile)) File.Delete(outputFile); } }
public void TestSelectiveCleaningOfMetadataTypes() { string TEST_DOC = TESTFILE_DIR + "HasAllMetadataTypesOfInterest.docm.docx"; string OUTPUT_DOC = TESTFILE_DIR + "SelectivelyCleaned.docm"; if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); try { using (DocxDocumentProcessor ddpBase = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open))) { ddpBase.Process(DocumentProcessingActions.Discover); TestHasAllMetadatTypesOtherThan(ddpBase.DocumentText, ContentType.ContentRule); } TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.ContentRule); TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.Comment); TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.TrackChange); TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.HiddenText); TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.SmallText); TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.WhiteText); TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.AttachedTemplate); TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.SmartTag); // TODO Field cleaning needs a bit of clarification // TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.Field); //TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.Hyperlink); TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.CustomProperty); //TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.Macro); TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.RedactedText); TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.BuiltInProperty); //TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.Version); //TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.AutoVersion); //TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.RoutingSlip); } finally { if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); } }
private static void CleanDocument(string outputFile, string inputFile, ContentType[] contentTypesofInterest) { using (DocxDocumentProcessor ddp = new DocxDocumentProcessor(File.Open(inputFile, FileMode.Open))) { if (contentTypesofInterest != null) ddp.ContentTypesOfInterest = contentTypesofInterest; if (File.Exists(outputFile)) File.Delete(outputFile); using (Stream str = File.Open(outputFile, FileMode.CreateNew)) { ddp.Output = str; ddp.Process(DocumentProcessingActions.Clean); Assert.IsNull(ddp.DocumentText, "expected the document text object to be null"); } Assert.IsTrue(File.Exists(outputFile), "expected the cleaned file to be created"); } }
private static void ValidateNoMetaData(string outputFile) { using (DocxDocumentProcessor ddp2 = new DocxDocumentProcessor(File.Open(outputFile, FileMode.Open))) { ddp2.Process(DocumentProcessingActions.Discover); Assert.IsNotNull(ddp2.DocumentText, "expected the document text object to be valid"); foreach (IAbstractTextType tt in ddp2.DocumentText.GetTextTypes()) { if (tt.GetContentType() == ContentType.Paragraph || tt.GetContentType() == ContentType.Footer || tt.GetContentType() == ContentType.Header ) continue; Assert.AreEqual(0, tt.GetChildCount(), "we were expecting no metadata of type " + tt.GetContentType()); } } }
private int MetadataCount(string outputFile, ContentType contentType) { using (DocxDocumentProcessor ddp2 = new DocxDocumentProcessor(File.Open(outputFile, FileMode.Open))) { ddp2.Process(DocumentProcessingActions.Discover); Assert.IsNotNull(ddp2.DocumentText, "expected the document text object to be valid"); foreach (IAbstractTextType tt in ddp2.DocumentText.GetTextTypes()) { if (tt.GetContentType() != contentType) continue; return tt.Count; } return 0; } }
public void TestCleanDiagram_MultipleWhiteText() { string TEST_DOC = TESTFILE_DIR + "Test Multiple Smart Art Text Colours.docx"; string OUTPUT_DOC = TESTFILE_DIR + "SelectivelyCleaned.docx"; if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); try { List<ContentType> typesToClean = new List<ContentType>(); typesToClean.Add(ContentType.WhiteText); using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean)) { using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew)) { ddpclean.Process(DocumentProcessingActions.Clean); } } using (DocxDocumentProcessor ddpRead = new DocxDocumentProcessor(File.Open(OUTPUT_DOC, FileMode.Open))) { ddpRead.Process(DocumentProcessingActions.Discover); TextType ttDiagramText = ddpRead.DocumentText.GetTextTypes(ContentType.Paragraph)[0] as TextType; Assert.IsNotNull(ttDiagramText, "Clean documents should have empty lists"); Assert.AreEqual(7, ttDiagramText.GetChildCount(), "Cleaned document should still have some reported paragraph text"); int index = 0; IAbstractTextNode node = ttDiagramText.GetChild(index++); Assert.AreEqual("Red", node.GetInfo("Content")[0].value, "wrong text reported"); Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported"); Assert.AreEqual("{90858386-890F-46AF-A294-08A70FB465E4}", node.GetInfo("ModelId")[0].value, "wrong model Id reported"); node = ttDiagramText.GetChild(index++); Assert.AreEqual("White", node.GetInfo("Content")[0].value, "wrong text reported"); Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported"); Assert.AreEqual("{6C2EA5F2-FB0E-4C70-A5A6-A3BD8A582B11}", node.GetInfo("ModelId")[0].value, "wrong model Id reported"); node = ttDiagramText.GetChild(index++); Assert.AreEqual("Heading", node.GetInfo("Content")[0].value, "wrong text reported"); Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported"); Assert.AreEqual("{727A527E-6DDC-4851-9CE9-75BBD36389C4}", node.GetInfo("ModelId")[0].value, "wrong model Id reported"); node = ttDiagramText.GetChild(index++); Assert.AreEqual("See here", node.GetInfo("Content")[0].value, "wrong text reported"); Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported"); Assert.AreEqual("{7C068228-7C26-4B6A-9C23-C09E235294F9}", node.GetInfo("ModelId")[0].value, "wrong model Id reported"); node = ttDiagramText.GetChild(index++); Assert.AreEqual("Purple", node.GetInfo("Content")[0].value, "wrong text reported"); Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported"); Assert.AreEqual("{E461D11B-F02E-4040-999D-F627133CAF92}", node.GetInfo("ModelId")[0].value, "wrong model Id reported"); node = ttDiagramText.GetChild(index++); Assert.AreEqual("Next", node.GetInfo("Content")[0].value, "wrong text reported"); Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported"); Assert.AreEqual("{10C763CC-CC0F-4620-8AE9-D292BA412BC6}", node.GetInfo("ModelId")[0].value, "wrong model Id reported"); node = ttDiagramText.GetChild(index++); Assert.AreEqual("And Something", node.GetInfo("Content")[0].value, "wrong text reported"); Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported"); Assert.AreEqual("{90DA456E-8BB3-4E70-A51F-D10E57005F92}", node.GetInfo("ModelId")[0].value, "wrong model Id reported"); } } finally { if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); } }
private TextType DiscoverTextType(string sInputDoc, ContentType typeToGet) { using (DocxDocumentProcessor ddp = new DocxDocumentProcessor(File.Open(sInputDoc, FileMode.Open))) { ddp.Process(DocumentProcessingActions.Discover); if (ddp.DocumentText.GetTextTypes(typeToGet).Count == 0) return null; TextType ttResult = ddp.DocumentText.GetTextTypes(typeToGet)[0] as TextType; Assert.IsNotNull(ttResult); return ttResult; } }
public void TestPassThroughDocxWithSpecificContentTypes() { string sCopyFile = TESTFILE_DIR + "copy.docx"; if (File.Exists(sCopyFile)) File.Delete(sCopyFile); try { using (DocxDocumentProcessor ddp = new DocxDocumentProcessor(File.Open(TESTFILE_DIR + "test002.docx", FileMode.Open))) { using (ddp.Output = File.Open(sCopyFile, FileMode.CreateNew)) { ddp.ContentTypesOfInterest = new ContentType[] { ContentType.HiddenText, ContentType.Field }; ddp.Process(DocumentProcessingActions.PassThrough); } } Assert.IsTrue(CommonTestUtilities.AreZipFilesEqual(TESTFILE_DIR + "test002.docx", sCopyFile)); } finally { File.Delete(sCopyFile); } }
public void TestDiscoverDocx() { using (DocxDocumentProcessor ddp = new DocxDocumentProcessor(File.Open(TESTFILE_DIR + "test002.docx", FileMode.Open))) { ddp.Process(DocumentProcessingActions.Discover); Assert.IsNotNull(ddp.DocumentText, "expected the document text object to be valid"); Assert.Greater(ddp.DocumentText.GetTextTypes().Count, 0, "expected some document text types to have been added"); DocumentText dt = ddp.DocumentText; Assert.Greater(dt.GetTextTypes(ContentType.Field).Count, 0); Assert.Greater(dt.GetTextTypes(ContentType.HiddenText).Count, 0); Assert.Greater(dt.GetTextTypes(ContentType.SmartTag).Count, 0); Assert.Greater(dt.GetTextTypes(ContentType.SmallText).Count, 0); Assert.Greater(dt.GetTextTypes(ContentType.TrackChange).Count, 0); } }
public void TestCleanDiagram_MultipleDiagramsWhiteText() { string TEST_DOC = TESTFILE_DIR + "Test Multiple Smart Art.docx"; string OUTPUT_DOC = TESTFILE_DIR + "SelectivelyCleaned.docx"; if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); try { List<ContentType> typesToClean = new List<ContentType>(); typesToClean.Add(ContentType.WhiteText); using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean)) { using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew)) { ddpclean.Process(DocumentProcessingActions.Clean); } } using (DocxDocumentProcessor ddpRead = new DocxDocumentProcessor(File.Open(OUTPUT_DOC, FileMode.Open))) { ddpRead.Process(DocumentProcessingActions.Discover); TextType ttDiagramText = ddpRead.DocumentText.GetTextTypes(ContentType.Paragraph)[0] as TextType; Assert.IsNotNull(ttDiagramText, "Clean documents should have empty lists"); //if this starts failing check that we haven't actually improved matters //some of the reported items might really have been cleaned in an ideal world I think Assert.AreEqual(27, ttDiagramText.GetChildCount(), "Cleaned document should still have some reported paragraph text"); } } finally { if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); } }
public void TestTrackChangesTriggersAreNotEnabledByContentRuleType() { if (File.Exists(TESTFILE_DIR + "SelectivelyCleaned.docm")) File.Delete(TESTFILE_DIR + "SelectivelyCleaned.docm"); List<ContentType> typesToClean = new List<ContentType>(); typesToClean.Add(ContentType.ContentRule); string TEST_DOC = TESTFILE_DIR + "HasAllMetadataTypesOfInterest.docm"; string OUTPUT_DOC = TESTFILE_DIR + "SelectivelyCleaned.docm"; try { using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean)) { using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew)) { ddpclean.Process(DocumentProcessingActions.Clean); } } XmlDocument xmlDocument = DocxTestUtilities.GetDocumentPartXml(OUTPUT_DOC); Assert.IsNotNull(xmlDocument); XmlNamespaceManager nsmgr = new XmlNamespaceManager(xmlDocument.NameTable); nsmgr.AddNamespace("w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"); XmlNodeList xmlNodes = xmlDocument.SelectNodes("//w:tcPrChange", nsmgr); Assert.AreEqual(6, xmlNodes.Count); } finally { if (File.Exists(OUTPUT_DOC)) File.Delete(OUTPUT_DOC); } }