public void TestXMLPartFilter()
        {
            XmlPartFilter xpf = new XmlPartFilter(new CommonNamespaces(OpenXmlFormat.Transitional));

            xpf.Triggers.AddRange(DocxMetadataDefinitions.HiddenDocumentText);
            xpf.Triggers.AddRange(DocxMetadataDefinitions.SmartTags);

            using (Stream sIn = File.Open(TESTFILE_DIR + "document1.xml", FileMode.Open))
            {
                DocumentText docText = new DocumentText();
                Stream sOut = xpf.ProcessPart(sIn, docText, null, DocumentProcessingActions.DiscoverAndClean);

                Assert.IsNotNull(sOut, "output stream null unexpectedly");
                Assert.AreEqual(1, docText.GetTextTypes(ContentType.HiddenText).Count, "expected a hidden text type");
                Assert.AreEqual(1, docText.GetTextTypes(ContentType.HiddenText).Count, "expected a smart tags type");
            }
        }
        public void TestDiscoverCustomPropertiesXlsx()
        {
            XmlPartFilter xpf = new XmlPartFilter(new CommonNamespaces());

            xpf.Triggers.AddRange(GenericMetadataDefinitions.CustomProperties);
            xpf.Triggers.AddRange(GenericMetadataDefinitions.WorkshareProperties);

            using (Stream sIn = File.Open(TESTFILE_DIR + "xlsx_customonly.xml", FileMode.Open))
            {
                DocumentText docText = new DocumentText();
                Stream sOut = xpf.ProcessPart(sIn, docText, null, DocumentProcessingActions.DiscoverAndClean);

                Assert.IsNotNull(sOut, "output stream null unexpectedly");
                Assert.AreEqual(0, docText.GetTextTypes(ContentType.WorkshareProperty).Count, "expected no workshareproperty text type");
                Assert.AreEqual(1, docText.GetTextTypes(ContentType.CustomProperty).Count, "expected a customproperty type");
            }

        }
        public override Stream ProcessPart(Stream partData, DocumentText discoveryText, RelatedPartProvider relPartProvider, DocumentProcessingActions action)
        {
            //this is for the discovery on open documents - the copy loses all other macro information except this items
            //specifying macro content - there is a single vbaproject.bin file containing all macros
            //so far this appears to be valid

            if (DocumentProcessor.ActionIncludesCleaning(action))
                return null;

            if (action == DocumentProcessingActions.PassThrough)
                return partData;

            if (m_bInterestedInMacros)
            {
                List<IAbstractTextType> ttypes = discoveryText.GetTextTypes(ContentType.Macro);
                TextType macro = null;
                if (ttypes.Count > 0)
                {
                    macro = (TextType)ttypes[0];
                }
                else
                {
                    macro = new TextType(ContentType.Macro);
                    discoveryText.AddTextType(macro);
                }

                TextNode macroNode = new TextNode();
                NodeInfo ni = new NodeInfo();
                ni.name = "Id";
                ni.value = m_id;
                ni.type = DataType.String;
                macroNode.AddInfo(ni);

                ni = new NodeInfo();
                ni.name = "Target";
                ni.value = m_target;
                ni.type = DataType.String;
                macroNode.AddInfo(ni);

                ni = new NodeInfo();
                ni.name = "Type";
                ni.value = m_type;
                ni.type = DataType.String;
                macroNode.AddInfo(ni);

                macro.AddChild(macroNode);
            }

            Initialize();
            ConstructFilter(partData, discoveryText, action);
            ExecuteFilter();
            return m_outStream;
        }
        private void CompareDiscoveryForDoc(string filename, DocumentText discoveredText, DocumentText dtCheck)
        {
            foreach (IAbstractTextType ttOrig in dtCheck.GetTextTypes())
            {
                if (IsAllWhiteSpace(ttOrig))
                    continue;
                switch (ttOrig.GetContentType())
                {
                    case ContentType.Version:
                    case ContentType.RoutingSlip:
                    case ContentType.AutoVersion:
                        break;

                    case ContentType.Reviewer:
                        // don't bother checking this type
                        // a doc that *had* track changes has this type
                        // a docx that *had* track changes doesnt
                        //if (dtCheck.GetTextTypes(ContentType.Version).Count == 0)
                        //    CheckTypeWasFound(discoveredText, ttOrig);
                        break;

                    case ContentType.Macro:
                        if (!HadJustThisDocumentMacro(ttOrig))
                            CheckTypeWasFound(filename, discoveredText, ttOrig);
                        break;

                    case ContentType.TextBox:
                        if (!AreAllTextBoxesPictures(ttOrig))
                            CheckTypeWasFound(filename, discoveredText, ttOrig);
                        break;

                    default:

                        CheckTypeWasFound(filename, discoveredText, ttOrig);
                        break;
                }
            }
        }
Beispiel #5
0
        private TextType FindTextType(DocumentText dt)
        {
            foreach (TextType tt in dt.GetTextTypes())
            {
                if (tt.GetContentType() == ContentType)
                    return tt;
            }

            TextType tt2 = new TextType(ContentType);
            if (ContentType == ContentType.ContentRule)
                return tt2;
            
            dt.AddTextType(tt2);
            return tt2;
        }
        private static void CheckOneTypeOfMetadata(ContentType notExpected, DocumentText dt, ContentType type, string sText)
        {
            // Word can mark redacted text with "vanish" which may or may not be appropriate.
            if (notExpected == ContentType.HiddenText && type == ContentType.RedactedText)
                return;

            if (type == notExpected)
            {
                List<IAbstractTextType> listType = dt.GetTextTypes(type);
                Assert.IsNotNull(listType, "Failed to clean " + sText);
                if (listType.Count > 0)
                {
                    Assert.AreEqual(1, listType.Count, "Will take an empty TextType in the list as OK");
                    TextType tt = dt.GetTextTypes(type)[0] as TextType;
                    Assert.IsNotNull(tt, "Failed to clean " + sText);
                    Assert.AreEqual(0, tt.GetChildCount(), "Failed to clean " + sText);
                }
            }
            else
            {
                TextType tt = dt.GetTextTypes(type)[0] as TextType;
                Assert.Greater(tt.GetChildCount(), 0, "Test document has no " + sText);
            }
        }
        private static void TestHasAllMetadatTypesOtherThan(DocumentText dt, ContentType notExpected)
        {
            Assert.IsNotNull(dt, "expected the document text object to be valid");
            Assert.Greater(dt.GetTextTypes().Count, 0, "expected some document text types to have been added");

            CheckOneTypeOfMetadata(notExpected, dt, ContentType.Comment, "comments");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.TrackChange, "track changes");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.HiddenText, "hidden text");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.SmallText, "small text");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.WhiteText, "white text");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.AttachedTemplate, "attached template");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.SmartTag, "smart tags");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.Field, "fields");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.Hyperlink, "hyperlinks");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.CustomProperty, "custom properties");
            //CheckOneTypeOfMetadata(notExpected, dt, ContentType.Macro, "macro");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.RedactedText, "redacted text");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.BuiltInProperty, "built-in properties");

            //CheckOneTypeOfMetadata(notExpected, dt, ContentType.Variable, "document variables");
            //CheckOneTypeOfMetadata(notExpected, dt, ContentType.DocumentStatistic, "document Stats");
            //CheckOneTypeOfMetadata(notExpected, dt, ContentType.Reviewer, "reviewerss");

            //CheckOneTypeOfMetadata(notExpected, dt, ContentType.Version, "versions");
            //CheckOneTypeOfMetadata(notExpected, dt, ContentType.AutoVersion, "autoversions");
            //CheckOneTypeOfMetadata(notExpected, dt, ContentType.RoutingSlip, "routing slip");
        }
 private static void CheckOneTypeOfMetadata(ContentType notExpected, DocumentText dt, ContentType type, string sText)
 {
     if (type == notExpected)
     {
         List<IAbstractTextType> listType = dt.GetTextTypes(type);
         Assert.IsNotNull(listType, "Failed to clean " + sText);
         if (listType.Count > 0)
         {
             Assert.AreEqual(1, listType.Count, "Will take an empty TextType in the list as OK");
             TextType tt = dt.GetTextTypes(type)[0] as TextType;
             Assert.IsNotNull(tt, "Failed to clean " + sText);
             Assert.AreEqual(0, tt.GetChildCount(), "Failed to clean " + sText);
         }
     }
     else
     {
         TextType tt = dt.GetTextTypes(type)[0] as TextType;
         Assert.Greater(tt.GetChildCount(), 0, "Test document has no " + sText);
     }
 }
        private static void TestHasAllMetadataTypesOtherThan(DocumentText dt, ContentType notExpected)
        {
            Assert.IsNotNull(dt, "expected the document text object to be valid");
            Assert.Greater(dt.GetTextTypes().Count, 0, "expected some document text types to have been added");

            CheckOneTypeOfMetadata(notExpected, dt, ContentType.Header, "headers");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.Footer, "footers");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.Comment, "comments");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.TrackChange, "track changes");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.SmallText, "small text");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.WhiteText, "white text");
            //                CheckOneTypeOfMetadata(notExpected, dt, ContentType.SmartTag);
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.Hyperlink, "hyperlinks");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.Links, "links");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.HiddenSheet, "hidden sheet");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.HiddenRow, "hidden row");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.HiddenColumn, "hidden column");
         //   CheckOneTypeOfMetadata(notExpected, dt, ContentType.CustomProperty, "custom property");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.Macro, "macro");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.RedactedText, "redacted text");
        //    CheckOneTypeOfMetadata(notExpected, dt, ContentType.HiddenText, "hidden text");
        //    CheckOneTypeOfMetadata(notExpected, dt, ContentType.BuiltInProperty, "built in property");
        }
 private static void CheckThatBuiltInTypeOfMetadataWasCleaned(DocumentText dt, ContentType type)
 {
     List<IAbstractTextType> ttl = dt.GetTextTypes(type);
     Assert.IsNotNull(ttl, "Clean documents should have empty lists");
     Assert.AreEqual(1, ttl.Count, "Clean documents should have empty lists");
 }
        private static void TestHasAllMetadatTypesOtherThan(DocumentText dt, ContentType notExpected)
        {
            Assert.IsNotNull(dt, "expected the document text object to be valid");
            Assert.Greater(dt.GetTextTypes().Count, 0, "expected some document text types to have been added");

            TextType tt;
            if (ContentType.Header == notExpected)
            {
                List<IAbstractTextType> listHeaders = dt.GetTextTypes(ContentType.Header);
                Assert.IsNotNull(listHeaders, "HdrFtrs can have lists but no children in those lists");

                if (listHeaders.Count > 0)
                {
                    Assert.AreEqual(1, listHeaders.Count, "HdrFtrs can have lists but no children in those lists");
                    TextType ttHeaders = dt.GetTextTypes(ContentType.Header)[0] as TextType;
                    Assert.IsNotNull(ttHeaders);
                    Assert.AreEqual(0, ttHeaders.GetChildCount(), "Clean documents should have nothing discovered");
                }
            }
            else
            {
                tt = dt.GetTextTypes(ContentType.Header)[0] as TextType;
                Assert.Greater(tt.GetChildCount(), 0, "Test document has no headers");
            }
            if (ContentType.Footer == notExpected)
            {
                List<IAbstractTextType> listFooters = dt.GetTextTypes(ContentType.Footer);
                Assert.IsNotNull(listFooters, "HdrFtrs can have lists but no children in those lists");

                if (listFooters.Count > 0)
                {
                    Assert.AreEqual(1, listFooters.Count, "HdrFtrs can have lists but no children in those lists");
                    TextType ttFooters = dt.GetTextTypes(ContentType.Footer)[0] as TextType;
                    Assert.IsNotNull(ttFooters);
                    Assert.AreEqual(0, ttFooters.GetChildCount(), "Clean documents should have nothing discovered");
                }
            }
            else
            {
                tt = dt.GetTextTypes(ContentType.Footer)[0] as TextType;
                Assert.Greater(tt.GetChildCount(), 0, "Test document has no footers");
            }
            if (ContentType.Comment == notExpected)
            {
                CheckThatOneTypeOfMetadataWasCleaned(dt, ContentType.Comment);
            }
            else
            {
                tt = dt.GetTextTypes(ContentType.Comment)[0] as TextType;
                Assert.Greater(tt.GetChildCount(), 0, "Test document has no comments");
            }
            if (ContentType.Field == notExpected)
            {
                CheckThatOneTypeOfMetadataWasCleaned(dt, ContentType.Field);
            }
            else
            {
                tt = dt.GetTextTypes(ContentType.Field)[0] as TextType;
                Assert.Greater(tt.GetChildCount(), 0, "Test document has no 'fields'");
            }
            if (ContentType.HiddenSlide == notExpected)
            {
                CheckThatOneTypeOfMetadataWasCleaned(dt, ContentType.HiddenSlide);
            }
            else
            {
                tt = dt.GetTextTypes(ContentType.HiddenSlide)[0] as TextType;
                Assert.Greater(tt.GetChildCount(), 0, "Test document has no hidden slides");
            }
            if (ContentType.SpeakerNote == notExpected)
            {
                CheckThatOneTypeOfMetadataWasCleaned(dt, ContentType.SpeakerNote);
            }
            else
            {
                tt = dt.GetTextTypes(ContentType.SpeakerNote)[0] as TextType;
                Assert.Greater(tt.GetChildCount(), 0, "Test document has no speaker notes");
            }
            if (ContentType.CustomProperty == notExpected)
            {
                CheckThatOneTypeOfMetadataWasCleaned(dt, ContentType.CustomProperty);
            }
            else
            {
                tt = dt.GetTextTypes(ContentType.CustomProperty)[0] as TextType;
                Assert.Greater(tt.GetChildCount(), 0, "Test document has no custom properties");
            }
            if (ContentType.Macro == notExpected)
            {
                CheckThatOneTypeOfMetadataWasCleaned(dt, ContentType.Macro);
            }
            else
            {
                tt = dt.GetTextTypes(ContentType.Macro)[0] as TextType;
                Assert.Greater(tt.GetChildCount(), 0, "Test document has no macros");
            }
            if (ContentType.BuiltInProperty == notExpected)
            {
                CheckThatBuiltInTypeOfMetadataWasCleaned(dt, ContentType.BuiltInProperty);
            }
            else
            {
                tt = dt.GetTextTypes(ContentType.BuiltInProperty)[0] as TextType;
                Assert.Greater(tt.GetChildCount(), 0, "Test document has no built in properties");
            }
        }
Beispiel #12
0
        public List<DiscoveryDifference> DifferenceTo(DocumentText dtAPI, DocumentText dtBin)
        {
            List<DiscoveryDifference> Diffs = new List<DiscoveryDifference>();
            foreach (ContentType ttType in SingleFileCleaningTester.GetListOfAllContentTypes())
            {
                if (ttType == ContentType.Paragraph)
                    continue;

                int iOriginalCount = 0;
                foreach (IAbstractTextType ttOrig in m_dtMe.GetTextTypes())
                {
                    if (ttOrig.GetContentType() == ttType)
                    {
                        iOriginalCount = ttOrig.GetChildCount();
                        break;
                    }
                }
                
                // Hopefully safe to assume we did not find more in the cleaned document
                int iACount = 0;
                foreach (IAbstractTextType ttAPI in dtAPI.GetTextTypes())
                {
                    if (ttAPI.GetContentType() == ttType)
                    {
                        iACount = ttAPI.GetChildCount();
                        break;
                    }
                }
                int iBCount = 0;
                IAbstractTextType ttBinType = null;
                foreach (IAbstractTextType ttBin in dtBin.GetTextTypes())
                {
                    if (ttBin.GetContentType() == ttType)
                    {
                        iBCount = ttBin.GetChildCount();
                        ttBinType = ttBin;
                        break;
                    }
                }

                bool bIsExpected = IsExpectedFromRedaction(ttBinType);

                DiscoveryDifference Diff = new DiscoveryDifference(ttType, iOriginalCount, iACount, iBCount, bIsExpected);
                Diffs.Add(Diff);
            }
            return Diffs;
        }
        private void CheckCustomPropertiesWereFound(string fileName, DocumentText discoveredText, DocumentText dtCheck)
        {
            int iCheck = 0;
            IAbstractTextType ttCheck = dtCheck.GetTextTypes(ContentType.CustomProperty)[0];
            if(ttCheck != null)
                iCheck = ttCheck.GetChildCount();
            if (iCheck == 0)
            {
                if (discoveredText.GetTextTypes(ContentType.CustomProperty) == null)
                    return;
            }
            int iDisc = 0;
            TextType ttDiscover = null;
            if(discoveredText.GetTextTypes(ContentType.CustomProperty) != null &&
               discoveredText.GetTextTypes(ContentType.CustomProperty).Count > 0)
                ttDiscover = discoveredText.GetTextTypes(ContentType.CustomProperty)[0] as TextType;
            if(ttDiscover != null)
                iDisc = ttDiscover.GetChildCount();

            if (iCheck == iDisc)
                return;

            int iRecountCheck = 0;
            while (--iCheck >= 0)
            {
                string nameCheck = ttCheck.GetChild(iCheck).GetInfo("Name")[0].value;
                if (!nameCheck.StartsWith("_"))
                    ++iRecountCheck;
            }
            int iRecountDisc = 0;
            while (--iDisc >= 0)
            {
                string nameDisc = ttDiscover.GetChild(iDisc).GetInfo("Name")[0].value;
                if (!nameDisc.StartsWith("_"))
                    ++iRecountDisc;
            }

            if(iRecountCheck != iRecountDisc)
                throw new Exception("Failed to discover custom proprties correctly");

        }
        private void CheckTypeWasFound(string fileName, DocumentText discoveredText, IAbstractTextType ttOrig)
        {
            List<IAbstractTextType> ttList = discoveredText.GetTextTypes(ttOrig.GetContentType());
            if (ttList.Count == 0)
            {
                DumpFoundText(fileName, ttOrig);
                m_workItem.Info = DumpTextType(ttOrig);
                m_workItem.ContentType = ttOrig.GetContentType();

                throw new Exception("Failed to discover metadata found in base document");
            }
        }
        private void CompareDiscoveryForXls(string filename, DocumentText discoveredText, DocumentText dtCheck)
        {
            foreach (IAbstractTextType ttOrig in dtCheck.GetTextTypes())
            {
                if (IsAllWhiteSpace(ttOrig))
                    continue;

                switch (ttOrig.GetContentType())
                {
                    case ContentType.BuiltInProperty:
                    case ContentType.RoutingSlip:
                        break;

                    default:
                        CheckTypeWasFound(filename, discoveredText, ttOrig);
                        break;
                }
            }
        }