public void TestDiscoverDocxWithSpecificContentTypes()
        {
            using (DocxDocumentProcessor ddp = new DocxDocumentProcessor(File.Open(TESTFILE_DIR + "test002.docx", FileMode.Open)))
            {
                ddp.ContentTypesOfInterest = new ContentType[] { ContentType.HiddenText, ContentType.Field };
                ddp.Process(DocumentProcessingActions.Discover);

                Assert.IsNotNull(ddp.DocumentText, "expected the document text object to be valid");
                Assert.AreEqual(2, ddp.DocumentText.GetTextTypes().Count, "expected to discover only the 2 specified text types");

                DocumentText dt = ddp.DocumentText;
                Assert.Greater(dt.GetTextTypes(ContentType.Field).Count, 0);
                Assert.Greater(dt.GetTextTypes(ContentType.HiddenText).Count, 0);
                Assert.AreEqual(0, dt.GetTextTypes(ContentType.SmartTag).Count);
                Assert.AreEqual(0, dt.GetTextTypes(ContentType.SmallText).Count);
                Assert.AreEqual(0, dt.GetTextTypes(ContentType.TrackChange).Count);
            }
        }
        public void TestDiscoverDocxWithStyles()
        {
            using (DocxDocumentProcessor ddp = new DocxDocumentProcessor(File.Open(TESTFILE_DIR + "StyleExerciser.docx", FileMode.Open)))
            {
                ddp.Process(DocumentProcessingActions.Discover);

                TextType ttHidden = ddp.DocumentText.GetTextTypes(ContentType.HiddenText)[0] as TextType;
                Assert.IsNotNull(ttHidden);
                Assert.AreEqual(9,ttHidden.GetChildCount());

                Assert.AreEqual("This is some text in the document in a hidden paragraph.", ttHidden.GetChild(0).GetInfo("Content")[0].value);
                Assert.AreEqual("This para contains ", ttHidden.GetChild(1).GetInfo("Content")[0].value);
                Assert.AreEqual(" text.", ttHidden.GetChild(2).GetInfo("Content")[0].value);
                Assert.AreEqual("This hidden para contains ", ttHidden.GetChild(3).GetInfo("Content")[0].value);
                Assert.AreEqual(" text.", ttHidden.GetChild(4).GetInfo("Content")[0].value);
                Assert.AreEqual("This text in hidden style with ", ttHidden.GetChild(5).GetInfo("Content")[0].value);
                Assert.AreEqual("embedded.", ttHidden.GetChild(6).GetInfo("Content")[0].value);
                Assert.AreEqual("This text in hidden style with ", ttHidden.GetChild(7).GetInfo("Content")[0].value);
                Assert.AreEqual(" embedded.", ttHidden.GetChild(8).GetInfo("Content")[0].value);
            }
        }
        public void TestCleanDiagram_MultipleDiagramsSmallText()
        {
            string TEST_DOC = TESTFILE_DIR + "Test Multiple Smart Art Small.docx";
            string OUTPUT_DOC = TESTFILE_DIR + "SelectivelyCleaned.docx";
            if (File.Exists(OUTPUT_DOC))
                File.Delete(OUTPUT_DOC);

            try
            {
                List<ContentType> typesToClean = new List<ContentType>();
                typesToClean.Add(ContentType.SmallText);
                using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean))
                {
                    using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew))
                    {
                        ddpclean.Process(DocumentProcessingActions.Clean);
                    }
                }

                using (DocxDocumentProcessor ddpRead = new DocxDocumentProcessor(File.Open(OUTPUT_DOC, FileMode.Open)))
                {
                    ddpRead.Process(DocumentProcessingActions.Discover);

                    TextType ttDiagramText = ddpRead.DocumentText.GetTextTypes(ContentType.Paragraph)[0] as TextType;
                    Assert.IsNotNull(ttDiagramText, "Clean documents should have empty lists");
                    Assert.AreEqual(14, ttDiagramText.GetChildCount(), "Cleaned document should still have some reported paragraph text");
                }
            }
            finally
            {
                if (File.Exists(OUTPUT_DOC))
                    File.Delete(OUTPUT_DOC);
            }
        }
        public void TestCleanDiagram_PartialWhiteText2()
        { 
            string TEST_DOC = TESTFILE_DIR + "Test Smart Art Partial White Text.docx";
            string OUTPUT_DOC = TESTFILE_DIR + "SelectivelyCleaned.docx";
            if(File.Exists(OUTPUT_DOC))
                File.Delete(OUTPUT_DOC);

            try
            {
                List<ContentType> typesToClean = new List<ContentType>();
                typesToClean.Add(ContentType.WhiteText);
                using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean))
                {
                    using(ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew))
                    {
                        ddpclean.Process(DocumentProcessingActions.Clean);
                    }
                }

                using (DocxDocumentProcessor ddpRead = new DocxDocumentProcessor(File.Open(OUTPUT_DOC, FileMode.Open)))
                {
                    ddpRead.Process(DocumentProcessingActions.Discover);

                    TextType ttDiagramText = ddpRead.DocumentText.GetTextTypes(ContentType.Paragraph)[0] as TextType;
                    Assert.IsNotNull(ttDiagramText, "there should be some paragraph text here");
                    Assert.AreEqual(3, ttDiagramText.GetChildCount(), "should have got two paragraph items back");
                }
            }
            finally
            {
                if (File.Exists(OUTPUT_DOC))
                    File.Delete(OUTPUT_DOC);
            }
        }
        public void TestCleanDiagram_MultiplePartialAndFullWhiteText()
        {
            string TEST_DOC = TESTFILE_DIR + "Test Multiple Smart Art Full and Partial White Text.docx";
            string OUTPUT_DOC = TESTFILE_DIR + "SelectivelyCleaned.docx";
            if (File.Exists(OUTPUT_DOC))
                File.Delete(OUTPUT_DOC);

            try
            {
                List<ContentType> typesToClean = new List<ContentType>();
                typesToClean.Add(ContentType.WhiteText);
                using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean))
                {
                    using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew))
                    {
                        ddpclean.Process(DocumentProcessingActions.Clean);
                    }
                }

                using (DocxDocumentProcessor ddpRead = new DocxDocumentProcessor(File.Open(OUTPUT_DOC, FileMode.Open)))
                {
                    ddpRead.Process(DocumentProcessingActions.Discover);

                    TextType ttDiagramText = ddpRead.DocumentText.GetTextTypes(ContentType.Paragraph)[0] as TextType;
                    Assert.IsNotNull(ttDiagramText, "Clean documents should have empty lists");
                    Assert.AreEqual(8, ttDiagramText.GetChildCount(), "Cleaned document should still have some reported paragraph text");

                    int index = 0;
                    IAbstractTextNode node = ttDiagramText.GetChild(index++);
                    Assert.AreEqual("Here is some normal text.  Nothing interesting just some random rubbish to put before anything else.", node.GetInfo("Content")[0].value, "wrong text reported");

                    node = ttDiagramText.GetChild(index++);
                    Assert.AreEqual("Here is yet more text with no good reason for its existence other than to put some words down here and that is all.", node.GetInfo("Content")[0].value, "wrong text reported");

                    node = ttDiagramText.GetChild(index++);
                    Assert.AreEqual("Something", node.GetInfo("Content")[0].value, "wrong text reported");
                    Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported");
                    Assert.AreEqual("{0A02ADD5-6F8B-4427-80A3-BC5EBA44FAE1}", node.GetInfo("ModelId")[0].value, "wrong model Id reported");

                    node = ttDiagramText.GetChild(index++);
                    Assert.AreEqual("Dog", node.GetInfo("Content")[0].value, "wrong text reported");
                    Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported");
                    Assert.AreEqual("{4EEFAF20-A6EB-495C-A943-006BA844B082}", node.GetInfo("ModelId")[0].value, "wrong model Id reported");

                    node = ttDiagramText.GetChild(index++);
                    Assert.AreEqual("More", node.GetInfo("Content")[0].value, "wrong text reported");
                    Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported");
                    Assert.AreEqual("{3FF29E88-F62F-479E-9AE6-191BA81828E4}", node.GetInfo("ModelId")[0].value, "wrong model Id reported");


                    node = ttDiagramText.GetChild(index++);
                    Assert.AreEqual("Hare", node.GetInfo("Content")[0].value, "wrong text reported");
                    Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported");
                    Assert.AreEqual("{400EDE91-7194-4C6D-AD2E-B9F9B770F146}", node.GetInfo("ModelId")[0].value, "wrong model Id reported");

                    node = ttDiagramText.GetChild(index++);
                    Assert.AreEqual("Oe", node.GetInfo("Content")[0].value, "wrong text reported");
                    Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported");
                    Assert.AreEqual("{6F1AD44A-05E7-492F-B5A2-F81EEF4F8CCA}", node.GetInfo("ModelId")[0].value, "wrong model Id reported");
                    
                    node = ttDiagramText.GetChild(index++);
                    Assert.AreEqual("Three", node.GetInfo("Content")[0].value, "wrong text reported");
                    Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported");
                    Assert.AreEqual("{6EE7408C-9700-4220-B9EA-6CBA1316EC66}", node.GetInfo("ModelId")[0].value, "wrong model Id reported");
                }
            }
            finally
            {
                if (File.Exists(OUTPUT_DOC))
                    File.Delete(OUTPUT_DOC);
            }
        }
        public void TestCleanDiagram_PartialWhiteText()
        {//disable this as we cannot fix this without breaking lots of other things at the moment
            string TEST_DOC = TESTFILE_DIR + "Test Single Smart Art Text Partial White Text.docx";
            string OUTPUT_DOC = TESTFILE_DIR + "SelectivelyCleaned.docx";
            if (File.Exists(OUTPUT_DOC))
                File.Delete(OUTPUT_DOC);

            try
            {
                List<ContentType> typesToClean = new List<ContentType>();
                typesToClean.Add(ContentType.WhiteText);
                using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean))
                {
                    using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew))
                    {
                        ddpclean.Process(DocumentProcessingActions.Clean);
                    }
                }

                using (DocxDocumentProcessor ddpRead = new DocxDocumentProcessor(File.Open(OUTPUT_DOC, FileMode.Open)))
                {
                    ddpRead.Process(DocumentProcessingActions.Discover);
                }

            //        TextType ttDiagramText = ddpRead.DocumentText.GetTextTypes(ContentType.Paragraph)[0] as TextType;
            //        Assert.IsNotNull(ttDiagramText, "Clean documents should have empty lists");
            //        Assert.AreEqual(1, ttDiagramText.GetChildCount(), "Cleaned document should still have some reported paragraph text");

            //        IAbstractTextNode node = ttDiagramText.GetChild(0);
            //        Assert.AreEqual("This is the of this smart art", node.GetInfo("Content")[0].value, "wrong text reported");
            //        Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported");
            //        Assert.AreEqual("{003D71BC-AAAA-4A07-900B-0B97444F09C3}", node.GetInfo("ModelId")[0].value, "wrong model Id reported");
            }
            finally
            {
                if (File.Exists(OUTPUT_DOC))
                    File.Delete(OUTPUT_DOC);
            }
        }
        public void TestCleanDiagram_WhiteText()
        {//disable this as we cannot fix this without breaking lots of other things at the moment
            string TEST_DOC = TESTFILE_DIR + "Test Single Smart Art Text White Text On No Fill.docx";
            string OUTPUT_DOC = TESTFILE_DIR + "SelectivelyCleaned.docx";
            if (File.Exists(OUTPUT_DOC))
                File.Delete(OUTPUT_DOC);

            try
            {
                List<ContentType> typesToClean = new List<ContentType>();
                typesToClean.Add(ContentType.WhiteText);
                using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean))
                {
                    using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew))
                    {
                        ddpclean.Process(DocumentProcessingActions.Clean);
                    }
                }

                using (DocxDocumentProcessor ddpRead = new DocxDocumentProcessor(File.Open(OUTPUT_DOC, FileMode.Open)))
                {
                    ddpRead.Process(DocumentProcessingActions.Discover);

            //        TextType ttDiagramText = ddpRead.DocumentText.GetTextTypes(ContentType.Paragraph)[0] as TextType;
            //        Assert.IsNotNull(ttDiagramText, "Clean documents should have empty lists");
            //        Assert.AreEqual(0, ttDiagramText.GetChildCount(), "Clean documents should have no entries in lists");
                }
            }
            finally
            {
                if (File.Exists(OUTPUT_DOC))
                    File.Delete(OUTPUT_DOC);
            }
        }
        public void TestWorksharePropertiesAreDiscoverdButNotCleaned()
        {
            string TEST_DOC = TESTFILE_DIR + "TestLotsOfProperties.docx";
            string OUTPUT_DOC = TESTFILE_DIR + "Cleaned.docx";
            using (DocxDocumentProcessor ddp1 = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open)))
            {
                ddp1.Process(DocumentProcessingActions.Discover);
                DocumentText dt1 = ddp1.DocumentText;

                List<IAbstractTextType> ttCustom1 = dt1.GetTextTypes(ContentType.CustomProperty);
                if(ttCustom1 != null)
                    Assert.AreEqual(0, ttCustom1.Count, "Something was interpretted as a custom property");

                TextType ttWorkshare1 = dt1.GetTextTypes(ContentType.WorkshareProperty)[0] as TextType;
                Assert.AreEqual("WSClassification", ttWorkshare1.GetChild(0).GetInfo("Name")[0].value);
                Assert.AreEqual("WS_SEND_FOR_REVIEW", ttWorkshare1.GetChild(1).GetInfo("Name")[0].value);
                Assert.AreEqual("WS_RTS_TAG", ttWorkshare1.GetChild(2).GetInfo("Name")[0].value);
                Assert.AreEqual("EVOLVING_DOC_ID", ttWorkshare1.GetChild(3).GetInfo("Name")[0].value);
                Assert.AreEqual("SFR_COMPUTER_NAME", ttWorkshare1.GetChild(4).GetInfo("Name")[0].value);
                Assert.AreEqual("WSRestrictionLevel", ttWorkshare1.GetChild(5).GetInfo("Name")[0].value);
                Assert.AreEqual("WSRestrictionPassword", ttWorkshare1.GetChild(6).GetInfo("Name")[0].value);

                Assert.AreEqual(7, ttWorkshare1.GetChildCount(), "Found an extra property we did not expect");
            }

            if (File.Exists(OUTPUT_DOC))
                File.Delete(OUTPUT_DOC);

            try
            {
                using (DocxDocumentProcessor ddp2 = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open)))
                {
                    using (ddp2.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew))
                    {
                        ddp2.Process(DocumentProcessingActions.Clean);
                    }
                }
                using (DocxDocumentProcessor ddp3 = new DocxDocumentProcessor(File.Open(OUTPUT_DOC, FileMode.Open)))
                {
                    ddp3.Process(DocumentProcessingActions.Discover);
                    DocumentText dt3 = ddp3.DocumentText;

                    List<IAbstractTextType> ttCustom3 = dt3.GetTextTypes(ContentType.CustomProperty);
                    if (ttCustom3 != null)
                        Assert.AreEqual(0, ttCustom3.Count);

                    TextType ttWorkshare3 = dt3.GetTextTypes(ContentType.WorkshareProperty)[0] as TextType;
                    Assert.AreEqual("WSClassification", ttWorkshare3.GetChild(0).GetInfo("Name")[0].value);
                    Assert.AreEqual("WS_SEND_FOR_REVIEW", ttWorkshare3.GetChild(1).GetInfo("Name")[0].value);
                    Assert.AreEqual("WS_RTS_TAG", ttWorkshare3.GetChild(2).GetInfo("Name")[0].value);
                    Assert.AreEqual("EVOLVING_DOC_ID", ttWorkshare3.GetChild(3).GetInfo("Name")[0].value);
                    Assert.AreEqual("SFR_COMPUTER_NAME", ttWorkshare3.GetChild(4).GetInfo("Name")[0].value);
                    Assert.AreEqual("WSRestrictionLevel", ttWorkshare3.GetChild(5).GetInfo("Name")[0].value);
                    Assert.AreEqual("WSRestrictionPassword", ttWorkshare3.GetChild(6).GetInfo("Name")[0].value);

                    Assert.AreEqual(7, ttWorkshare3.GetChildCount());
                }
            }
            finally
            {
                if (File.Exists(OUTPUT_DOC))
                    File.Delete(OUTPUT_DOC);
            }
        }
        private void ValidateFieldsWithNonStringExclusions(string outputFile, ContentType contentType, string test)
        {
            using (DocxDocumentProcessor ddp2 = new DocxDocumentProcessor(File.Open(outputFile, FileMode.Open)))
            {
                ddp2.ExcludeList = null;
                ddp2.Process(DocumentProcessingActions.Discover);

                Assert.IsNotNull(ddp2.DocumentText, "expected the document text object to be valid");

                IAbstractTextType tt = ddp2.DocumentText.GetUniqueTextType(contentType);
                Assert.IsNotNull(tt, "expected the content type '" + contentType + "' to be valid");
                CommonTestUtilities.CheckFieldAgainstExclusions(tt, test);
            }
        }
        public void TestHandleFldCharProperly()
        {
            string TEST_DOC = TESTFILE_DIR + "TestHandleFldCharProperly.docx";
            string OUTPUT_DOC = TESTFILE_DIR + "TestHandleFldCharProperlyCleaned.docx";

            if (File.Exists(OUTPUT_DOC))
                File.Delete(OUTPUT_DOC);

            ValidateMatchesSchema(TEST_DOC);

            try
            {
                List<ContentType> typesToClean = new List<ContentType>();

                foreach (ContentType ct in Enum.GetValues(typeof(ContentType)))
                    typesToClean.Add(ct);

                using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean))
                {
                    List<Exclusion> exl = new List<Exclusion>();
                    exl.Add(new Exclusion());

                    ddpclean.ExcludeList = exl;


                    using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew))
                    {
                        ddpclean.Process(DocumentProcessingActions.Clean);
                    }
                }

                ValidateMatchesSchema(OUTPUT_DOC);
            }
            finally
            {
                if (File.Exists(OUTPUT_DOC))
                    File.Delete(OUTPUT_DOC);
            }
        }
        public void TestCleanDeletedTrackChangeInTableRow()
        {
            string TEST_DOC = TESTFILE_DIR + "DeletedTrackChangeInTableRow.docx";
            string OUTPUT_DOC = TESTFILE_DIR + "DeletedTrackChangeInTableRow_cleaned.docx";
            
            if (File.Exists(OUTPUT_DOC))
                File.Delete(OUTPUT_DOC);

            int iExpectedCount = CountPNodes(TEST_DOC);
            ValidateMatchesSchema(TEST_DOC);

            try
            {
                List<ContentType> typesToClean = new List<ContentType>();

                typesToClean.Add(ContentType.TrackChange);

                using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean))
                {
                    using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew))
                    {
                        ddpclean.Process(DocumentProcessingActions.Clean);
                    }
                }

                Assert.AreEqual(iExpectedCount, CountPNodes(OUTPUT_DOC), "expected the same number of para nodes");
                ValidateMatchesSchema(OUTPUT_DOC);

            }
            finally
            {
                if (File.Exists(OUTPUT_DOC))
                    File.Delete(OUTPUT_DOC);
            }
        }
        public void TestCleanProblemComplexDoc()
        {
            string TEST_DOC = TESTFILE_DIR + "Getting Started Guide.docx";
            string OUTPUT_DOC = TESTFILE_DIR + "GSGCleaned.docx";
            if (File.Exists(OUTPUT_DOC))
                File.Delete(OUTPUT_DOC);

           int iExpectedCount = CountPNodes(TEST_DOC);
           ValidateMatchesSchema(TEST_DOC);

            try
            {
                List<ContentType> typesToClean = new List<ContentType>();
                //foreach (ContentType ct in Enum.GetValues(typeof(ContentType)))
                //    typesToClean.Add(ct);

                typesToClean.Add(ContentType.InkAnnotation);

                using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean))
                {
                    using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew))
                    {
                        ddpclean.Process(DocumentProcessingActions.Clean);
                    }
                }

                ValidateMatchesSchema(OUTPUT_DOC);

                Assert.AreEqual(iExpectedCount, CountPNodes(OUTPUT_DOC), "expected to get 9 para nodes if we haven't mucked up");
            }
            finally
            {
                if (File.Exists(OUTPUT_DOC))
                    File.Delete(OUTPUT_DOC);
            }
        }
        public void TestCleanInkAnnotations_4()
        {
            string TEST_DOC = TESTFILE_DIR + "Test Ink Annotations 5.docx";
            string OUTPUT_DOC = TESTFILE_DIR + "InkCleaned.docx";
            if (File.Exists(OUTPUT_DOC))
                File.Delete(OUTPUT_DOC);

            try
            {
                List<ContentType> typesToClean = new List<ContentType>();
                typesToClean.Add(ContentType.InkAnnotation);
                using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean))
                {
                    using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew))
                    {
                        ddpclean.Process(DocumentProcessingActions.Clean);
                    }
                }

                using (DocxDocumentProcessor ddpRead = new DocxDocumentProcessor(File.Open(OUTPUT_DOC, FileMode.Open)))
                {
                    ddpRead.Process(DocumentProcessingActions.Discover);
                    Assert.IsNull(ddpRead.DocumentText.GetUniqueTextType(ContentType.InkAnnotation));
                }
            }
            finally
            {
                if (File.Exists(OUTPUT_DOC))
                    File.Delete(OUTPUT_DOC);
            }
        }
        public void TestNoCleanInkAnnotations_1()
        {
            string TEST_DOC = TESTFILE_DIR + "Test Ink Annotations 1.docx";
            string OUTPUT_DOC = TESTFILE_DIR + "InkCleaned.docx";
            if (File.Exists(OUTPUT_DOC))
                File.Delete(OUTPUT_DOC);

            try
            {
                List<ContentType> typesToClean = new List<ContentType>();
                using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean))
                {
                    using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew))
                    {
                        ddpclean.Process(DocumentProcessingActions.Clean);
                    }
                }

                using (DocxDocumentProcessor ddpRead = new DocxDocumentProcessor(File.Open(OUTPUT_DOC, FileMode.Open)))
                {
                    ddpRead.Process(DocumentProcessingActions.Discover);

                    Assert.Greater(ddpRead.DocumentText.GetTextTypes(ContentType.InkAnnotation).Count, 0, "expected the ink annotations to be left behind");
                }
            }
            finally
            {
                if (File.Exists(OUTPUT_DOC))
                    File.Delete(OUTPUT_DOC);
            }
        }
        public void TestFootAndEndNoteTriggersAreNotEnabledByContentRuleType()
        {
            if (File.Exists(TESTFILE_DIR + "SelectivelyCleaned.docm.docx"))
                File.Delete(TESTFILE_DIR + "SelectivelyCleaned.docm.docx");

            List<ContentType> typesToClean = new List<ContentType>();
            typesToClean.Add(ContentType.ContentRule);

            string TEST_DOC = TESTFILE_DIR + "HasAllMetadataTypesOfInterest.docm.docx";
            string OUTPUT_DOC = TESTFILE_DIR + "SelectivelyCleaned.docm.docx";
            try
            {
                using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean))
                {
                    using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew))
                    {
                        ddpclean.Process(DocumentProcessingActions.Clean);
                    }
                }
                XmlDocument xmlDocument = DocxTestUtilities.GetDocumentPartXml(OUTPUT_DOC);
                Assert.IsNotNull(xmlDocument);
                XmlNamespaceManager nsmgr = new XmlNamespaceManager(xmlDocument.NameTable);
                nsmgr.AddNamespace("w", "http://purl.oclc.org/ooxml/wordprocessingml/main");
                XmlNodeList xmlNodes = xmlDocument.SelectNodes("//w:footnoteReference", nsmgr);
                Assert.AreEqual(1, xmlNodes.Count);
                xmlNodes = xmlDocument.SelectNodes("//w:endnoteReference", nsmgr);
                Assert.AreEqual(1, xmlNodes.Count);

                xmlDocument = DocxTestUtilities.GetSettingsPartXml(OUTPUT_DOC);
                Assert.IsNotNull(xmlDocument);
                xmlNodes = xmlDocument.SelectNodes("//w:footnotePr", nsmgr);
                Assert.AreEqual(1, xmlNodes.Count);
                xmlNodes = xmlDocument.SelectNodes("//w:endnotePr", nsmgr);
                Assert.AreEqual(1, xmlNodes.Count);
            }
            finally
            {
                if (File.Exists(OUTPUT_DOC))
                    File.Delete(OUTPUT_DOC);
            }
        }
        public void TestWhiteIsNotRemovedAsRedactedText()
        {
            if (File.Exists(TESTFILE_DIR + "SelectivelyCleaned.docm.docx"))
                File.Delete(TESTFILE_DIR + "SelectivelyCleaned.docm.docx");

            List<ContentType> typesToClean = new List<ContentType>();
            typesToClean.Add(ContentType.RedactedText);

            string TEST_DOC = TESTFILE_DIR + "HasAllMetadataTypesOfInterest.docm.docx";
            string OUTPUT_DOC = TESTFILE_DIR + "SelectivelyCleaned.docm.docx";
            try
            {
                using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean))
                {
                    using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew))
                    {
                        ddpclean.Process(DocumentProcessingActions.Clean);
                    }
                }
                TextType tt = DiscoverTextType(OUTPUT_DOC, ContentType.WhiteText);
                Assert.IsNotNull(tt, "we expect the white text to stay");
                Assert.AreEqual(1, tt.GetChildCount(), "missing the 1 item of white text");
            }
            finally
            {
                if (File.Exists(OUTPUT_DOC))
                    File.Delete(OUTPUT_DOC);
            }
        }
        private static void TestCleanOneTypeOfMetadataOnly(string sFileUnderTest, string outputFile, ContentType typeToClean)
        {
            if (File.Exists(outputFile))
                File.Delete(outputFile);

            List<ContentType> typesToClean = new List<ContentType>();
            if (typeToClean != ContentType.ContentRule)
                typesToClean.Add(typeToClean);
            try
            {
                using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(sFileUnderTest, FileMode.Open), typesToClean))
                {
                    using (ddpclean.Output = File.Open(outputFile, FileMode.CreateNew))
                    {
                        ddpclean.Process(DocumentProcessingActions.Clean);
                    }
                }
                using (DocxDocumentProcessor ddpRead = new DocxDocumentProcessor(File.Open(outputFile, FileMode.Open)))
                {
                    ddpRead.Process(DocumentProcessingActions.Discover);
                    TestHasAllMetadatTypesOtherThan(ddpRead.DocumentText, typeToClean);
                }
            }
            finally
            {
                if (File.Exists(outputFile))
                    File.Delete(outputFile);
            }
        }
        public void TestSelectiveCleaningOfMetadataTypes()
        {
            string TEST_DOC = TESTFILE_DIR + "HasAllMetadataTypesOfInterest.docm.docx";
            string OUTPUT_DOC = TESTFILE_DIR + "SelectivelyCleaned.docm";

            if (File.Exists(OUTPUT_DOC))
                File.Delete(OUTPUT_DOC);

            try
            {
                using (DocxDocumentProcessor ddpBase = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open)))
                {
                    ddpBase.Process(DocumentProcessingActions.Discover);
                    TestHasAllMetadatTypesOtherThan(ddpBase.DocumentText, ContentType.ContentRule);
                }
                TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.ContentRule);
                TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.Comment);
                TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.TrackChange);
                TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.HiddenText);
                TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.SmallText);
                TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.WhiteText);
                TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.AttachedTemplate);
                TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.SmartTag);
// TODO Field cleaning needs a bit of clarification
                //              TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.Field);
                //TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.Hyperlink);
                TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.CustomProperty);
                //TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.Macro);
                TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.RedactedText);
                TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.BuiltInProperty);

                //TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.Version);
                //TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.AutoVersion);
                //TestCleanOneTypeOfMetadataOnly(TEST_DOC, OUTPUT_DOC, ContentType.RoutingSlip);
            }
            finally
            {
                if (File.Exists(OUTPUT_DOC))
                    File.Delete(OUTPUT_DOC);
            }
        }
        private static void CleanDocument(string outputFile, string inputFile, ContentType[] contentTypesofInterest)
        {
            using (DocxDocumentProcessor ddp = new DocxDocumentProcessor(File.Open(inputFile, FileMode.Open)))
            {
                if (contentTypesofInterest != null)
                    ddp.ContentTypesOfInterest = contentTypesofInterest;

                if (File.Exists(outputFile))
                    File.Delete(outputFile);
                using (Stream str = File.Open(outputFile, FileMode.CreateNew))
                {
                    ddp.Output = str;
                    ddp.Process(DocumentProcessingActions.Clean);

                    Assert.IsNull(ddp.DocumentText, "expected the document text object to be null");
                }

                Assert.IsTrue(File.Exists(outputFile), "expected the cleaned file to be created");

            }
        }
        private static void ValidateNoMetaData(string outputFile)
        {
            using (DocxDocumentProcessor ddp2 = new DocxDocumentProcessor(File.Open(outputFile, FileMode.Open)))
            {
                ddp2.Process(DocumentProcessingActions.Discover);

                Assert.IsNotNull(ddp2.DocumentText, "expected the document text object to be valid");
                foreach (IAbstractTextType tt in ddp2.DocumentText.GetTextTypes())
                {
                    if (tt.GetContentType() == ContentType.Paragraph ||
						tt.GetContentType() == ContentType.Footer ||
						tt.GetContentType() == ContentType.Header )
                        continue;
                    Assert.AreEqual(0, tt.GetChildCount(), "we were expecting no metadata of type " + tt.GetContentType());
                }

            }
        }
        private int MetadataCount(string outputFile, ContentType contentType)
        {
            using (DocxDocumentProcessor ddp2 = new DocxDocumentProcessor(File.Open(outputFile, FileMode.Open)))
            {
                ddp2.Process(DocumentProcessingActions.Discover);

                Assert.IsNotNull(ddp2.DocumentText, "expected the document text object to be valid");
                foreach (IAbstractTextType tt in ddp2.DocumentText.GetTextTypes())
                {
                    if (tt.GetContentType() != contentType)
                        continue;

                    return tt.Count;
                }
                return 0;
            }
        }
        public void TestCleanDiagram_MultipleWhiteText()
        {
            string TEST_DOC = TESTFILE_DIR + "Test Multiple Smart Art Text Colours.docx";
            string OUTPUT_DOC = TESTFILE_DIR + "SelectivelyCleaned.docx";
            if (File.Exists(OUTPUT_DOC))
                File.Delete(OUTPUT_DOC);

            try
            {
                List<ContentType> typesToClean = new List<ContentType>();
                typesToClean.Add(ContentType.WhiteText);
                using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean))
                {
                    using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew))
                    {
                        ddpclean.Process(DocumentProcessingActions.Clean);
                    }
                }

                using (DocxDocumentProcessor ddpRead = new DocxDocumentProcessor(File.Open(OUTPUT_DOC, FileMode.Open)))
                {
                    ddpRead.Process(DocumentProcessingActions.Discover);

                    TextType ttDiagramText = ddpRead.DocumentText.GetTextTypes(ContentType.Paragraph)[0] as TextType;
                    Assert.IsNotNull(ttDiagramText, "Clean documents should have empty lists");
                    Assert.AreEqual(7, ttDiagramText.GetChildCount(), "Cleaned document should still have some reported paragraph text");

                    int index = 0;
                    IAbstractTextNode node = ttDiagramText.GetChild(index++);
                    Assert.AreEqual("Red", node.GetInfo("Content")[0].value, "wrong text reported");
                    Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported");
                    Assert.AreEqual("{90858386-890F-46AF-A294-08A70FB465E4}", node.GetInfo("ModelId")[0].value, "wrong model Id reported");

                    node = ttDiagramText.GetChild(index++);
                    Assert.AreEqual("White", node.GetInfo("Content")[0].value, "wrong text reported");
                    Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported");
                    Assert.AreEqual("{6C2EA5F2-FB0E-4C70-A5A6-A3BD8A582B11}", node.GetInfo("ModelId")[0].value, "wrong model Id reported");

                    node = ttDiagramText.GetChild(index++);
                    Assert.AreEqual("Heading", node.GetInfo("Content")[0].value, "wrong text reported");
                    Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported");
                    Assert.AreEqual("{727A527E-6DDC-4851-9CE9-75BBD36389C4}", node.GetInfo("ModelId")[0].value, "wrong model Id reported");

                    node = ttDiagramText.GetChild(index++);
                    Assert.AreEqual("See here", node.GetInfo("Content")[0].value, "wrong text reported");
                    Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported");
                    Assert.AreEqual("{7C068228-7C26-4B6A-9C23-C09E235294F9}", node.GetInfo("ModelId")[0].value, "wrong model Id reported");

                    node = ttDiagramText.GetChild(index++);
                    Assert.AreEqual("Purple", node.GetInfo("Content")[0].value, "wrong text reported");
                    Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported");
                    Assert.AreEqual("{E461D11B-F02E-4040-999D-F627133CAF92}", node.GetInfo("ModelId")[0].value, "wrong model Id reported");

                    node = ttDiagramText.GetChild(index++);
                    Assert.AreEqual("Next", node.GetInfo("Content")[0].value, "wrong text reported");
                    Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported");
                    Assert.AreEqual("{10C763CC-CC0F-4620-8AE9-D292BA412BC6}", node.GetInfo("ModelId")[0].value, "wrong model Id reported");

                    node = ttDiagramText.GetChild(index++);
                    Assert.AreEqual("And Something", node.GetInfo("Content")[0].value, "wrong text reported");
                    Assert.AreEqual("Diagram_Text", node.GetInfo("Type")[0].value, "wrong text type reported");
                    Assert.AreEqual("{90DA456E-8BB3-4E70-A51F-D10E57005F92}", node.GetInfo("ModelId")[0].value, "wrong model Id reported");
                }
            }
            finally
            {
                if (File.Exists(OUTPUT_DOC))
                    File.Delete(OUTPUT_DOC);
            }
        }
        private TextType DiscoverTextType(string sInputDoc, ContentType typeToGet)
        {
            using (DocxDocumentProcessor ddp = new DocxDocumentProcessor(File.Open(sInputDoc, FileMode.Open)))
            {
                ddp.Process(DocumentProcessingActions.Discover);

                if (ddp.DocumentText.GetTextTypes(typeToGet).Count == 0)
                    return null;

                TextType ttResult = ddp.DocumentText.GetTextTypes(typeToGet)[0] as TextType;
                Assert.IsNotNull(ttResult);
                return ttResult;
            }
        }
        public void TestPassThroughDocxWithSpecificContentTypes()
        {
            string sCopyFile = TESTFILE_DIR + "copy.docx";
            if (File.Exists(sCopyFile))
                File.Delete(sCopyFile);

            try
            {
                using (DocxDocumentProcessor ddp = new DocxDocumentProcessor(File.Open(TESTFILE_DIR + "test002.docx", FileMode.Open)))
                {
                    using (ddp.Output = File.Open(sCopyFile, FileMode.CreateNew))
                    {
                        ddp.ContentTypesOfInterest = new ContentType[] { ContentType.HiddenText, ContentType.Field };
                        ddp.Process(DocumentProcessingActions.PassThrough);
                    }
                }
                Assert.IsTrue(CommonTestUtilities.AreZipFilesEqual(TESTFILE_DIR + "test002.docx", sCopyFile));
            }
            finally
            {
                File.Delete(sCopyFile);
            }
        }
        public void TestDiscoverDocx()
        {
            using (DocxDocumentProcessor ddp = new DocxDocumentProcessor(File.Open(TESTFILE_DIR + "test002.docx", FileMode.Open)))
            {
                ddp.Process(DocumentProcessingActions.Discover);

                Assert.IsNotNull(ddp.DocumentText, "expected the document text object to be valid");
                Assert.Greater(ddp.DocumentText.GetTextTypes().Count, 0, "expected some document text types to have been added");

                DocumentText dt = ddp.DocumentText;
                Assert.Greater(dt.GetTextTypes(ContentType.Field).Count, 0);
                Assert.Greater(dt.GetTextTypes(ContentType.HiddenText).Count, 0);
                Assert.Greater(dt.GetTextTypes(ContentType.SmartTag).Count, 0);
                Assert.Greater(dt.GetTextTypes(ContentType.SmallText).Count, 0);
                Assert.Greater(dt.GetTextTypes(ContentType.TrackChange).Count, 0);
            }
        }
        public void TestCleanDiagram_MultipleDiagramsWhiteText()
        {
            string TEST_DOC = TESTFILE_DIR + "Test Multiple Smart Art.docx";
            string OUTPUT_DOC = TESTFILE_DIR + "SelectivelyCleaned.docx";
            if (File.Exists(OUTPUT_DOC))
                File.Delete(OUTPUT_DOC);

            try
            {
                List<ContentType> typesToClean = new List<ContentType>();
                typesToClean.Add(ContentType.WhiteText);
                using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean))
                {
                    using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew))
                    {
                        ddpclean.Process(DocumentProcessingActions.Clean);
                    }
                }

                using (DocxDocumentProcessor ddpRead = new DocxDocumentProcessor(File.Open(OUTPUT_DOC, FileMode.Open)))
                {
                    ddpRead.Process(DocumentProcessingActions.Discover);

                    TextType ttDiagramText = ddpRead.DocumentText.GetTextTypes(ContentType.Paragraph)[0] as TextType;
                    Assert.IsNotNull(ttDiagramText, "Clean documents should have empty lists");
                    //if this starts failing check that we haven't actually improved matters
                    //some of the reported items might really have been cleaned in an ideal world I think
                    Assert.AreEqual(27, ttDiagramText.GetChildCount(), "Cleaned document should still have some reported paragraph text");
                }
            }
            finally
            {
                if (File.Exists(OUTPUT_DOC))
                    File.Delete(OUTPUT_DOC);
            }
        }
Exemplo n.º 27
0
        public void TestTrackChangesTriggersAreNotEnabledByContentRuleType()
        {
            if (File.Exists(TESTFILE_DIR + "SelectivelyCleaned.docm"))
                File.Delete(TESTFILE_DIR + "SelectivelyCleaned.docm");

            List<ContentType> typesToClean = new List<ContentType>();
            typesToClean.Add(ContentType.ContentRule);

            string TEST_DOC = TESTFILE_DIR + "HasAllMetadataTypesOfInterest.docm";
            string OUTPUT_DOC = TESTFILE_DIR + "SelectivelyCleaned.docm";
            try
            {
                using (DocxDocumentProcessor ddpclean = new DocxDocumentProcessor(File.Open(TEST_DOC, FileMode.Open), typesToClean))
                {
                    using (ddpclean.Output = File.Open(OUTPUT_DOC, FileMode.CreateNew))
                    {
                        ddpclean.Process(DocumentProcessingActions.Clean);
                    }
                }
                XmlDocument xmlDocument = DocxTestUtilities.GetDocumentPartXml(OUTPUT_DOC);
                Assert.IsNotNull(xmlDocument);
                XmlNamespaceManager nsmgr = new XmlNamespaceManager(xmlDocument.NameTable);
                nsmgr.AddNamespace("w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
                XmlNodeList xmlNodes = xmlDocument.SelectNodes("//w:tcPrChange", nsmgr);
                Assert.AreEqual(6, xmlNodes.Count);
            }
            finally
            {
                if (File.Exists(OUTPUT_DOC))
                    File.Delete(OUTPUT_DOC);
            }
        }