public XlsxPreprocessWorkbookXmlFilter(DocumentText docText, CommonNamespaces commonNamespaces, ContentType[] contentTypesToDetect, Dictionary<string, PartInfo> piDictSheets)
     : base(docText, commonNamespaces)
 {
     List<ContentType> m_contentTypesOfInterest = new List<ContentType>(contentTypesToDetect);
     m_piDictSheets = piDictSheets;
     m_sheet = NameTableUtils.NormalizeString(commonNamespaces.NameTable, "sheet");
     m_definedName = NameTableUtils.NormalizeString(commonNamespaces.NameTable, "definedName");
 }
示例#2
0
        public PptxComment(DocumentText docText, CommonNamespaces commonNamespaces)
            : base(docText, commonNamespaces)
        { 
#region XMLFilterBase
        //override protected void HandleStartElement(XmlNodeInformation nodeInfo);
        //override protected void HandleContent(XmlNodeInformation nodeInfo){}
        //override protected void HandleEndElement(XmlNodeInformation nodeInfo){}
#endregion
        }
 public XlsxSharedStringsXmlFilter(DocumentText docText, CommonNamespaces commonNamespaces, ContentType[] contentTypesOfInterest, ref XlsxProcessingDictionaries processingDictionaries)
     : base(docText, commonNamespaces)
 {
     List<ContentType> ctypes = new List<ContentType>(contentTypesOfInterest);
     m_interestedInWhiteText = ctypes.Contains(ContentType.WhiteText);
     m_interestedInSmallText = ctypes.Contains(ContentType.SmallText);
     m_interestedInRedactedText = ctypes.Contains(ContentType.RedactedText);
     m_interestedInHiddenText = ctypes.Contains(ContentType.HiddenRow);//add to this as we go I think
     m_processingDictionaries = processingDictionaries;
 }
        public override Stream ProcessPart(Stream partData, DocumentText discoveryText, RelatedPartProvider relPartProvider, DocumentProcessingActions action)
        {
            //this is for the discovery on open documents - the copy loses all other macro information except this items
            //specifying macro content - there is a single vbaproject.bin file containing all macros
            //so far this appears to be valid

            if (DocumentProcessor.ActionIncludesCleaning(action))
                return null;

            if (action == DocumentProcessingActions.PassThrough)
                return partData;

            if (m_bInterestedInMacros)
            {
                List<IAbstractTextType> ttypes = discoveryText.GetTextTypes(ContentType.Macro);
                TextType macro = null;
                if (ttypes.Count > 0)
                {
                    macro = (TextType)ttypes[0];
                }
                else
                {
                    macro = new TextType(ContentType.Macro);
                    discoveryText.AddTextType(macro);
                }

                TextNode macroNode = new TextNode();
                NodeInfo ni = new NodeInfo();
                ni.name = "Id";
                ni.value = m_id;
                ni.type = DataType.String;
                macroNode.AddInfo(ni);

                ni = new NodeInfo();
                ni.name = "Target";
                ni.value = m_target;
                ni.type = DataType.String;
                macroNode.AddInfo(ni);

                ni = new NodeInfo();
                ni.name = "Type";
                ni.value = m_type;
                ni.type = DataType.String;
                macroNode.AddInfo(ni);

                macro.AddChild(macroNode);
            }

            Initialize();
            ConstructFilter(partData, discoveryText, action);
            ExecuteFilter();
            return m_outStream;
        }
 public ContentTypesStateMachine(DocumentText docText)
 {
     m_DocumentText = docText;
     int iCount = Enum.GetValues(typeof(ContentType)).Length;
     m_StateMachines = new List<SingleTypeStateMachine>(iCount);
     m_BlockTypes = new List<Effect.BlockType>(iCount);
     for (int i = 0; i < iCount; i++)
     {
         m_StateMachines.Add(new SingleTypeStateMachine((ContentType)i, m_DocumentText));
         m_BlockTypes.Add(Effect.BlockType.Unknown);
     }
 }
 public XlsxPreprocessTableXmlFilter(DocumentText docText, CommonNamespaces commonNamespaces, string rId, string target, string parentSheet, ref XlsxProcessingDictionaries processingDictionaries)
     : base(docText, commonNamespaces)
 {
     m_processingDictionaries = processingDictionaries;
     m_tableData.rId = rId;
     int index = parentSheet.IndexOf("xl/");
     if (index != -1)
     {
         m_tableData.ParentSheet = parentSheet.Substring(3);
     }
     else
     {
         m_tableData.ParentSheet = parentSheet;
     }
 }
示例#7
0
        public void TestXMLPartFilter()
        {
            XmlPartFilter xpf = new XmlPartFilter(new CommonNamespaces(OpenXmlFormat.Transitional));

            xpf.Triggers.AddRange(DocxMetadataDefinitions.HiddenDocumentText);
            xpf.Triggers.AddRange(DocxMetadataDefinitions.SmartTags);

            using (Stream sIn = File.Open(TESTFILE_DIR + "document1.xml", FileMode.Open))
            {
                DocumentText docText = new DocumentText();
                Stream sOut = xpf.ProcessPart(sIn, docText, null, DocumentProcessingActions.DiscoverAndClean);

                Assert.IsNotNull(sOut, "output stream null unexpectedly");
                Assert.AreEqual(1, docText.GetTextTypes(ContentType.HiddenText).Count, "expected a hidden text type");
                Assert.AreEqual(1, docText.GetTextTypes(ContentType.HiddenText).Count, "expected a smart tags type");
            }
        }
 public override Stream ProcessPart(Stream partData, DocumentText discoveryText, RelatedPartProvider relPartProvider, DocumentProcessingActions action)
 {
     switch (action)
     {
         case DocumentProcessingActions.Discover:
             {
                 Initialize();
                 ConstructFilter(partData, discoveryText, action);
                 ExecuteFilter();
                 return m_outStream;
             }
         default:
             { 
                 return null;
             }
     }
 }
        public void TestDiscoverCustomPropertiesXlsx()
        {
            XmlPartFilter xpf = new XmlPartFilter(new CommonNamespaces());

            xpf.Triggers.AddRange(GenericMetadataDefinitions.CustomProperties);
            xpf.Triggers.AddRange(GenericMetadataDefinitions.WorkshareProperties);

            using (Stream sIn = File.Open(TESTFILE_DIR + "xlsx_customonly.xml", FileMode.Open))
            {
                DocumentText docText = new DocumentText();
                Stream sOut = xpf.ProcessPart(sIn, docText, null, DocumentProcessingActions.DiscoverAndClean);

                Assert.IsNotNull(sOut, "output stream null unexpectedly");
                Assert.AreEqual(0, docText.GetTextTypes(ContentType.WorkshareProperty).Count, "expected no workshareproperty text type");
                Assert.AreEqual(1, docText.GetTextTypes(ContentType.CustomProperty).Count, "expected a customproperty type");
            }

        }
        private void CompareDiscoveryForDoc(string filename, DocumentText discoveredText, DocumentText dtCheck)
        {
            foreach (IAbstractTextType ttOrig in dtCheck.GetTextTypes())
            {
                if (IsAllWhiteSpace(ttOrig))
                    continue;
                switch (ttOrig.GetContentType())
                {
                    case ContentType.Version:
                    case ContentType.RoutingSlip:
                    case ContentType.AutoVersion:
                        break;

                    case ContentType.Reviewer:
                        // don't bother checking this type
                        // a doc that *had* track changes has this type
                        // a docx that *had* track changes doesnt
                        //if (dtCheck.GetTextTypes(ContentType.Version).Count == 0)
                        //    CheckTypeWasFound(discoveredText, ttOrig);
                        break;

                    case ContentType.Macro:
                        if (!HadJustThisDocumentMacro(ttOrig))
                            CheckTypeWasFound(filename, discoveredText, ttOrig);
                        break;

                    case ContentType.TextBox:
                        if (!AreAllTextBoxesPictures(ttOrig))
                            CheckTypeWasFound(filename, discoveredText, ttOrig);
                        break;

                    default:

                        CheckTypeWasFound(filename, discoveredText, ttOrig);
                        break;
                }
            }
        }
示例#11
0
        public DocumentText Execute()
        {
            _docText = new DocumentText();
            _paraType = new TextType(ContentType.Paragraph);
            _docText.AddTextType(_paraType);

            _builder = new StringBuilder();
            _iPos = 0;

            // Properties
            try
            {
                foreach (KeyValuePair<string, string> kvp in Workshare.Pdf.Reader.GetProperties(_file, _password))
                {
                    AddProperty(kvp.Key, kvp.Value);
                }
            }
            catch(Exception ex)
            {
                AddError(ex.Message);
            }

            // Content
            try
            {
                foreach (string paragraph in Workshare.Pdf.Reader.GetParagraphs(_file, _password))
                {
                    AddText(paragraph);                    
                    NewParagraph();
                }
            }
            catch(Exception ex)
            {
                AddError(ex.Message);
            }

            return _docText;
        }
示例#12
0
        public void Process(DocumentProcessingActions documentProcessingActions)
        {
            if (documentProcessingActions != DocumentProcessingActions.Discover)
                m_outputPackage = SetupOutput(m_inputPackage);

            if (ActionIncludesDiscovery(documentProcessingActions))
                m_docText = new DocumentText();

            List<PartInfo> relObjects = m_inputPackage.GetRelatedObjects();
            foreach (PartInfo rel in relObjects)
            {
                PreprocessPart(rel, documentProcessingActions);
            }
            foreach (PartInfo rel in relObjects)
            {
                ProcessPart(rel, m_inputPackage, m_outputPackage, documentProcessingActions);
            }

            if (m_outputPackage != null)
                m_outputPackage.Dispose();

            PostProcess();
        }
示例#13
0
 public XlsxWorkbookXmlFilter(DocumentText docText, CommonNamespaces commonNamespaces, ContentType[] contentTypesToDetect, Dictionary<DefinedName, byte> FoundDefinedNames, Dictionary<string, WorkSheet> WorksheetLookup)
     : base(docText, commonNamespaces)
 {
     List<ContentType> m_contentTypesOfInterest = new List<ContentType>(contentTypesToDetect);
     m_interestedInHiddenSheets = m_contentTypesOfInterest.Contains(ContentType.HiddenSheet);
     m_interestedInExternalLinks = m_contentTypesOfInterest.Contains(ContentType.Links);
     m_interestedInMacros = m_contentTypesOfInterest.Contains(ContentType.Macro);
     
     //may have to get more specific later 
     if (m_contentTypesOfInterest.Contains(ContentType.HiddenText) ||
         m_contentTypesOfInterest.Contains(ContentType.HiddenColumn) ||
         m_contentTypesOfInterest.Contains(ContentType.HiddenRow))
     {
         m_interestedInHiddenData = true;
     }
     m_foundDefinedNames = FoundDefinedNames;
     m_worksheets = WorksheetLookup;
     m_sheet = NameTableUtils.NormalizeString(commonNamespaces.NameTable, "sheet");
     m_externalRef = NameTableUtils.NormalizeString(commonNamespaces.NameTable, "externalReferences");
     m_definedNames = NameTableUtils.NormalizeString(commonNamespaces.NameTable, "definedNames");
     m_definedName = NameTableUtils.NormalizeString(commonNamespaces.NameTable, "definedName");
     m_hidden = NameTableUtils.NormalizeString(commonNamespaces.NameTable, "hidden");
     m_veryHidden = NameTableUtils.NormalizeString(commonNamespaces.NameTable, "veryHidden");
 }
示例#14
0
 public PptxPreprocessSlide(DocumentText docText, CommonNamespaces commonNamespaces)
     : base(docText, commonNamespaces)
 {
 }
 public XlsxBaseWorksheetXmlFilter(DocumentText docText, CommonNamespaces commonNamespaces, Dictionary<string, string> StringContentLookup, Dictionary<string, WorkSheet> WorksheetLookup, Dictionary<string, CellFormatData> CellFormats, XlsxProcessingDictionaries processingDictionaries)
     : base(docText, commonNamespaces)
 {
     Init(commonNamespaces, StringContentLookup, WorksheetLookup, CellFormats, ref  processingDictionaries);
 }
示例#16
0
        internal TextType GetTextType(DocumentText dt)
        {
            if (m_DocText == null)
            {
                m_DocText = dt;
            }
            else if (m_DocText != dt)
                throw new System.InvalidOperationException();

            if (m_TextType == null)
            {
                m_TextType = FindTextType(dt);
            }
            return m_TextType;
        }
 public PptxPreprocessPresentation(DocumentText docText, CommonNamespaces commonNamespaces)
     : base(docText, commonNamespaces)
 {
 }
示例#18
0
 public DiagramXmlFilter(DocumentText docText, CommonNamespaces commonNamespaces, ContentType[] contentTypesOfInterest, ref PredefinedObjectsProcessingHelper DiagramHelper)
     : base(docText, commonNamespaces)
 {
     m_stateTracker = new DiagramStateTracker(new List<ContentType>(contentTypesOfInterest));
     m_DiagramHelper = DiagramHelper;
 }
示例#19
0
		internal DiscoveryResult(List<DictionaryEntry> entries)
        {
	
			m_docText = new DocumentText();
			Convert(entries);
        }
示例#20
0
 public ThemeXmlFilter(DocumentText docText, CommonNamespaces commonNamespaces, ref PredefinedObjectsProcessingHelper DiagramHelper)
     : base(docText, commonNamespaces)
 {
     m_DiagramHelper = DiagramHelper;
 }
        private static void TestHasAllMetadatTypesOtherThan(DocumentText dt, ContentType notExpected)
        {
            Assert.IsNotNull(dt, "expected the document text object to be valid");
            Assert.Greater(dt.GetTextTypes().Count, 0, "expected some document text types to have been added");

            CheckOneTypeOfMetadata(notExpected, dt, ContentType.Comment, "comments");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.TrackChange, "track changes");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.HiddenText, "hidden text");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.SmallText, "small text");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.WhiteText, "white text");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.AttachedTemplate, "attached template");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.SmartTag, "smart tags");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.Field, "fields");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.Hyperlink, "hyperlinks");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.CustomProperty, "custom properties");
            //CheckOneTypeOfMetadata(notExpected, dt, ContentType.Macro, "macro");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.RedactedText, "redacted text");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.BuiltInProperty, "built-in properties");

            //CheckOneTypeOfMetadata(notExpected, dt, ContentType.Variable, "document variables");
            //CheckOneTypeOfMetadata(notExpected, dt, ContentType.DocumentStatistic, "document Stats");
            //CheckOneTypeOfMetadata(notExpected, dt, ContentType.Reviewer, "reviewerss");

            //CheckOneTypeOfMetadata(notExpected, dt, ContentType.Version, "versions");
            //CheckOneTypeOfMetadata(notExpected, dt, ContentType.AutoVersion, "autoversions");
            //CheckOneTypeOfMetadata(notExpected, dt, ContentType.RoutingSlip, "routing slip");
        }
 private static void CheckOneTypeOfMetadata(ContentType notExpected, DocumentText dt, ContentType type, string sText)
 {
     if (type == notExpected)
     {
         List<IAbstractTextType> listType = dt.GetTextTypes(type);
         Assert.IsNotNull(listType, "Failed to clean " + sText);
         if (listType.Count > 0)
         {
             Assert.AreEqual(1, listType.Count, "Will take an empty TextType in the list as OK");
             TextType tt = dt.GetTextTypes(type)[0] as TextType;
             Assert.IsNotNull(tt, "Failed to clean " + sText);
             Assert.AreEqual(0, tt.GetChildCount(), "Failed to clean " + sText);
         }
     }
     else
     {
         TextType tt = dt.GetTextTypes(type)[0] as TextType;
         Assert.Greater(tt.GetChildCount(), 0, "Test document has no " + sText);
     }
 }
        private static void TestHasAllMetadataTypesOtherThan(DocumentText dt, ContentType notExpected)
        {
            Assert.IsNotNull(dt, "expected the document text object to be valid");
            Assert.Greater(dt.GetTextTypes().Count, 0, "expected some document text types to have been added");

            CheckOneTypeOfMetadata(notExpected, dt, ContentType.Header, "headers");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.Footer, "footers");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.Comment, "comments");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.TrackChange, "track changes");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.SmallText, "small text");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.WhiteText, "white text");
            //                CheckOneTypeOfMetadata(notExpected, dt, ContentType.SmartTag);
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.Hyperlink, "hyperlinks");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.Links, "links");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.HiddenSheet, "hidden sheet");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.HiddenRow, "hidden row");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.HiddenColumn, "hidden column");
         //   CheckOneTypeOfMetadata(notExpected, dt, ContentType.CustomProperty, "custom property");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.Macro, "macro");
            CheckOneTypeOfMetadata(notExpected, dt, ContentType.RedactedText, "redacted text");
        //    CheckOneTypeOfMetadata(notExpected, dt, ContentType.HiddenText, "hidden text");
        //    CheckOneTypeOfMetadata(notExpected, dt, ContentType.BuiltInProperty, "built in property");
        }
示例#24
0
 public PptxCommentAuthors(DocumentText docText, CommonNamespaces commonNamespaces)
     : base(docText, commonNamespaces)
 { 
 }
示例#25
0
 internal DiscoveryResult(DocumentText docText)
 {
     m_docText = docText;
 }
示例#26
0
 public XlsxRevisionXmlFilter(DocumentText docText, CommonNamespaces commonNamespaces, Dictionary<string, WorkSheet> WorksheetLookup)
     : base(docText, commonNamespaces)
 {
     WorksheetDataLookup = WorksheetLookup;
     m_currentRevisionType = RowColRevisionType.none;
 }
		public bool Execute()
		{
			m_bAbortFlag = false;
			try
			{
				CallbackSteppingMessage("Initialising");
				KillOfficeApps();
				m_ft = Workshare.ApplicationControllers.OfficeApplicationCache.GetFileTypeBasedOnFilename(m_sInputFile, false);
				m_cache = Workshare.ApplicationControllers.OfficeApplicationCache.Instance;

				appLaunchTime.Reset();
				appLaunchTime.Start();
				try
				{
					m_cache.GetHostApplication(m_ft); // otherwise the Shutdown will not release the office app instance
				}
				catch (Exception e)
				{
					if (e.Message.Contains("controller"))
						return false;

					throw;

				}
				appLaunchTime.Stop();
				TakeWorkingCopiesOfDataDataFile();

				CallbackSteppingMessage("Discover Original");
				m_dtReadOnlyDiscover = DiscoverDocument(m_sInputFile);
				m_dtDiscoverAfterApiClean = m_dtReadOnlyDiscover; // if the Api clean fails, we treat it as nothing cleaned

				if (m_advancedOptions.DoBinaryClean)
				{
					CallbackSteppingMessage("LightSpeed Redacting");
					List<ContentType> listContentTypes = GetListOfAllContentTypes();
					DoLightSpeedCleanEx(listContentTypes);
					m_LightSpeedCleaningWorked = true;
					CallbackSteppingMessage("Discover LightSpeed Redacted Version");
					m_dtDiscoverAfterBinClean = DiscoverDocument(m_sFileForBinClean);
				}

				if (m_advancedOptions.DoDomClean)
				{
					CallbackSteppingMessage("API Cleaning");
					DoAPIClean();
					CallbackSteppingMessage("Discover API Cleaned Version");
					m_dtDiscoverAfterApiClean = DiscoverDocument(m_sFileForApiClean);
				}

				if (m_advancedOptions.DoValidation)
				{
					CallbackSteppingMessage("Checking LightSpeed Redacted Doc for Corruption");
					ValidateNotCorrupt(m_sFileForBinClean);
				}

				CallbackSteppingMessage("Completed");
				return true;
			}
			finally
			{
				appCloseTime.Reset();
				if (m_cache != null)
				{
					appCloseTime.Start();
					m_cache.ShutDown();
					appCloseTime.Stop();
				}
			}
		}
示例#28
0
        private TextType FindTextType(DocumentText dt)
        {
            foreach (TextType tt in dt.GetTextTypes())
            {
                if (tt.GetContentType() == ContentType)
                    return tt;
            }

            TextType tt2 = new TextType(ContentType);
            if (ContentType == ContentType.ContentRule)
                return tt2;
            
            dt.AddTextType(tt2);
            return tt2;
        }
        private static void CheckOneTypeOfMetadata(ContentType notExpected, DocumentText dt, ContentType type, string sText)
        {
            // Word can mark redacted text with "vanish" which may or may not be appropriate.
            if (notExpected == ContentType.HiddenText && type == ContentType.RedactedText)
                return;

            if (type == notExpected)
            {
                List<IAbstractTextType> listType = dt.GetTextTypes(type);
                Assert.IsNotNull(listType, "Failed to clean " + sText);
                if (listType.Count > 0)
                {
                    Assert.AreEqual(1, listType.Count, "Will take an empty TextType in the list as OK");
                    TextType tt = dt.GetTextTypes(type)[0] as TextType;
                    Assert.IsNotNull(tt, "Failed to clean " + sText);
                    Assert.AreEqual(0, tt.GetChildCount(), "Failed to clean " + sText);
                }
            }
            else
            {
                TextType tt = dt.GetTextTypes(type)[0] as TextType;
                Assert.Greater(tt.GetChildCount(), 0, "Test document has no " + sText);
            }
        }
 public XlsxCalcChainXmlFilter(DocumentText docText, CommonNamespaces commonNamespaces, Dictionary<string, WorkSheet> WorksheetLookup, ref XlsxProcessingDictionaries processingDictionaries)
     : base(docText, commonNamespaces)
 {
     m_worksheetLookup = WorksheetLookup;
     m_processingDictionaries = processingDictionaries;
 }