public XlsxPreprocessWorkbookXmlFilter(DocumentText docText, CommonNamespaces commonNamespaces, ContentType[] contentTypesToDetect, Dictionary<string, PartInfo> piDictSheets) : base(docText, commonNamespaces) { List<ContentType> m_contentTypesOfInterest = new List<ContentType>(contentTypesToDetect); m_piDictSheets = piDictSheets; m_sheet = NameTableUtils.NormalizeString(commonNamespaces.NameTable, "sheet"); m_definedName = NameTableUtils.NormalizeString(commonNamespaces.NameTable, "definedName"); }
public PptxComment(DocumentText docText, CommonNamespaces commonNamespaces) : base(docText, commonNamespaces) { #region XMLFilterBase //override protected void HandleStartElement(XmlNodeInformation nodeInfo); //override protected void HandleContent(XmlNodeInformation nodeInfo){} //override protected void HandleEndElement(XmlNodeInformation nodeInfo){} #endregion }
public XlsxSharedStringsXmlFilter(DocumentText docText, CommonNamespaces commonNamespaces, ContentType[] contentTypesOfInterest, ref XlsxProcessingDictionaries processingDictionaries) : base(docText, commonNamespaces) { List<ContentType> ctypes = new List<ContentType>(contentTypesOfInterest); m_interestedInWhiteText = ctypes.Contains(ContentType.WhiteText); m_interestedInSmallText = ctypes.Contains(ContentType.SmallText); m_interestedInRedactedText = ctypes.Contains(ContentType.RedactedText); m_interestedInHiddenText = ctypes.Contains(ContentType.HiddenRow);//add to this as we go I think m_processingDictionaries = processingDictionaries; }
public override Stream ProcessPart(Stream partData, DocumentText discoveryText, RelatedPartProvider relPartProvider, DocumentProcessingActions action) { //this is for the discovery on open documents - the copy loses all other macro information except this items //specifying macro content - there is a single vbaproject.bin file containing all macros //so far this appears to be valid if (DocumentProcessor.ActionIncludesCleaning(action)) return null; if (action == DocumentProcessingActions.PassThrough) return partData; if (m_bInterestedInMacros) { List<IAbstractTextType> ttypes = discoveryText.GetTextTypes(ContentType.Macro); TextType macro = null; if (ttypes.Count > 0) { macro = (TextType)ttypes[0]; } else { macro = new TextType(ContentType.Macro); discoveryText.AddTextType(macro); } TextNode macroNode = new TextNode(); NodeInfo ni = new NodeInfo(); ni.name = "Id"; ni.value = m_id; ni.type = DataType.String; macroNode.AddInfo(ni); ni = new NodeInfo(); ni.name = "Target"; ni.value = m_target; ni.type = DataType.String; macroNode.AddInfo(ni); ni = new NodeInfo(); ni.name = "Type"; ni.value = m_type; ni.type = DataType.String; macroNode.AddInfo(ni); macro.AddChild(macroNode); } Initialize(); ConstructFilter(partData, discoveryText, action); ExecuteFilter(); return m_outStream; }
public ContentTypesStateMachine(DocumentText docText) { m_DocumentText = docText; int iCount = Enum.GetValues(typeof(ContentType)).Length; m_StateMachines = new List<SingleTypeStateMachine>(iCount); m_BlockTypes = new List<Effect.BlockType>(iCount); for (int i = 0; i < iCount; i++) { m_StateMachines.Add(new SingleTypeStateMachine((ContentType)i, m_DocumentText)); m_BlockTypes.Add(Effect.BlockType.Unknown); } }
public XlsxPreprocessTableXmlFilter(DocumentText docText, CommonNamespaces commonNamespaces, string rId, string target, string parentSheet, ref XlsxProcessingDictionaries processingDictionaries) : base(docText, commonNamespaces) { m_processingDictionaries = processingDictionaries; m_tableData.rId = rId; int index = parentSheet.IndexOf("xl/"); if (index != -1) { m_tableData.ParentSheet = parentSheet.Substring(3); } else { m_tableData.ParentSheet = parentSheet; } }
public void TestXMLPartFilter() { XmlPartFilter xpf = new XmlPartFilter(new CommonNamespaces(OpenXmlFormat.Transitional)); xpf.Triggers.AddRange(DocxMetadataDefinitions.HiddenDocumentText); xpf.Triggers.AddRange(DocxMetadataDefinitions.SmartTags); using (Stream sIn = File.Open(TESTFILE_DIR + "document1.xml", FileMode.Open)) { DocumentText docText = new DocumentText(); Stream sOut = xpf.ProcessPart(sIn, docText, null, DocumentProcessingActions.DiscoverAndClean); Assert.IsNotNull(sOut, "output stream null unexpectedly"); Assert.AreEqual(1, docText.GetTextTypes(ContentType.HiddenText).Count, "expected a hidden text type"); Assert.AreEqual(1, docText.GetTextTypes(ContentType.HiddenText).Count, "expected a smart tags type"); } }
public override Stream ProcessPart(Stream partData, DocumentText discoveryText, RelatedPartProvider relPartProvider, DocumentProcessingActions action) { switch (action) { case DocumentProcessingActions.Discover: { Initialize(); ConstructFilter(partData, discoveryText, action); ExecuteFilter(); return m_outStream; } default: { return null; } } }
public void TestDiscoverCustomPropertiesXlsx() { XmlPartFilter xpf = new XmlPartFilter(new CommonNamespaces()); xpf.Triggers.AddRange(GenericMetadataDefinitions.CustomProperties); xpf.Triggers.AddRange(GenericMetadataDefinitions.WorkshareProperties); using (Stream sIn = File.Open(TESTFILE_DIR + "xlsx_customonly.xml", FileMode.Open)) { DocumentText docText = new DocumentText(); Stream sOut = xpf.ProcessPart(sIn, docText, null, DocumentProcessingActions.DiscoverAndClean); Assert.IsNotNull(sOut, "output stream null unexpectedly"); Assert.AreEqual(0, docText.GetTextTypes(ContentType.WorkshareProperty).Count, "expected no workshareproperty text type"); Assert.AreEqual(1, docText.GetTextTypes(ContentType.CustomProperty).Count, "expected a customproperty type"); } }
private void CompareDiscoveryForDoc(string filename, DocumentText discoveredText, DocumentText dtCheck) { foreach (IAbstractTextType ttOrig in dtCheck.GetTextTypes()) { if (IsAllWhiteSpace(ttOrig)) continue; switch (ttOrig.GetContentType()) { case ContentType.Version: case ContentType.RoutingSlip: case ContentType.AutoVersion: break; case ContentType.Reviewer: // don't bother checking this type // a doc that *had* track changes has this type // a docx that *had* track changes doesnt //if (dtCheck.GetTextTypes(ContentType.Version).Count == 0) // CheckTypeWasFound(discoveredText, ttOrig); break; case ContentType.Macro: if (!HadJustThisDocumentMacro(ttOrig)) CheckTypeWasFound(filename, discoveredText, ttOrig); break; case ContentType.TextBox: if (!AreAllTextBoxesPictures(ttOrig)) CheckTypeWasFound(filename, discoveredText, ttOrig); break; default: CheckTypeWasFound(filename, discoveredText, ttOrig); break; } } }
public DocumentText Execute() { _docText = new DocumentText(); _paraType = new TextType(ContentType.Paragraph); _docText.AddTextType(_paraType); _builder = new StringBuilder(); _iPos = 0; // Properties try { foreach (KeyValuePair<string, string> kvp in Workshare.Pdf.Reader.GetProperties(_file, _password)) { AddProperty(kvp.Key, kvp.Value); } } catch(Exception ex) { AddError(ex.Message); } // Content try { foreach (string paragraph in Workshare.Pdf.Reader.GetParagraphs(_file, _password)) { AddText(paragraph); NewParagraph(); } } catch(Exception ex) { AddError(ex.Message); } return _docText; }
public void Process(DocumentProcessingActions documentProcessingActions) { if (documentProcessingActions != DocumentProcessingActions.Discover) m_outputPackage = SetupOutput(m_inputPackage); if (ActionIncludesDiscovery(documentProcessingActions)) m_docText = new DocumentText(); List<PartInfo> relObjects = m_inputPackage.GetRelatedObjects(); foreach (PartInfo rel in relObjects) { PreprocessPart(rel, documentProcessingActions); } foreach (PartInfo rel in relObjects) { ProcessPart(rel, m_inputPackage, m_outputPackage, documentProcessingActions); } if (m_outputPackage != null) m_outputPackage.Dispose(); PostProcess(); }
public XlsxWorkbookXmlFilter(DocumentText docText, CommonNamespaces commonNamespaces, ContentType[] contentTypesToDetect, Dictionary<DefinedName, byte> FoundDefinedNames, Dictionary<string, WorkSheet> WorksheetLookup) : base(docText, commonNamespaces) { List<ContentType> m_contentTypesOfInterest = new List<ContentType>(contentTypesToDetect); m_interestedInHiddenSheets = m_contentTypesOfInterest.Contains(ContentType.HiddenSheet); m_interestedInExternalLinks = m_contentTypesOfInterest.Contains(ContentType.Links); m_interestedInMacros = m_contentTypesOfInterest.Contains(ContentType.Macro); //may have to get more specific later if (m_contentTypesOfInterest.Contains(ContentType.HiddenText) || m_contentTypesOfInterest.Contains(ContentType.HiddenColumn) || m_contentTypesOfInterest.Contains(ContentType.HiddenRow)) { m_interestedInHiddenData = true; } m_foundDefinedNames = FoundDefinedNames; m_worksheets = WorksheetLookup; m_sheet = NameTableUtils.NormalizeString(commonNamespaces.NameTable, "sheet"); m_externalRef = NameTableUtils.NormalizeString(commonNamespaces.NameTable, "externalReferences"); m_definedNames = NameTableUtils.NormalizeString(commonNamespaces.NameTable, "definedNames"); m_definedName = NameTableUtils.NormalizeString(commonNamespaces.NameTable, "definedName"); m_hidden = NameTableUtils.NormalizeString(commonNamespaces.NameTable, "hidden"); m_veryHidden = NameTableUtils.NormalizeString(commonNamespaces.NameTable, "veryHidden"); }
public PptxPreprocessSlide(DocumentText docText, CommonNamespaces commonNamespaces) : base(docText, commonNamespaces) { }
public XlsxBaseWorksheetXmlFilter(DocumentText docText, CommonNamespaces commonNamespaces, Dictionary<string, string> StringContentLookup, Dictionary<string, WorkSheet> WorksheetLookup, Dictionary<string, CellFormatData> CellFormats, XlsxProcessingDictionaries processingDictionaries) : base(docText, commonNamespaces) { Init(commonNamespaces, StringContentLookup, WorksheetLookup, CellFormats, ref processingDictionaries); }
internal TextType GetTextType(DocumentText dt) { if (m_DocText == null) { m_DocText = dt; } else if (m_DocText != dt) throw new System.InvalidOperationException(); if (m_TextType == null) { m_TextType = FindTextType(dt); } return m_TextType; }
public PptxPreprocessPresentation(DocumentText docText, CommonNamespaces commonNamespaces) : base(docText, commonNamespaces) { }
public DiagramXmlFilter(DocumentText docText, CommonNamespaces commonNamespaces, ContentType[] contentTypesOfInterest, ref PredefinedObjectsProcessingHelper DiagramHelper) : base(docText, commonNamespaces) { m_stateTracker = new DiagramStateTracker(new List<ContentType>(contentTypesOfInterest)); m_DiagramHelper = DiagramHelper; }
internal DiscoveryResult(List<DictionaryEntry> entries) { m_docText = new DocumentText(); Convert(entries); }
public ThemeXmlFilter(DocumentText docText, CommonNamespaces commonNamespaces, ref PredefinedObjectsProcessingHelper DiagramHelper) : base(docText, commonNamespaces) { m_DiagramHelper = DiagramHelper; }
private static void TestHasAllMetadatTypesOtherThan(DocumentText dt, ContentType notExpected) { Assert.IsNotNull(dt, "expected the document text object to be valid"); Assert.Greater(dt.GetTextTypes().Count, 0, "expected some document text types to have been added"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.Comment, "comments"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.TrackChange, "track changes"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.HiddenText, "hidden text"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.SmallText, "small text"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.WhiteText, "white text"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.AttachedTemplate, "attached template"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.SmartTag, "smart tags"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.Field, "fields"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.Hyperlink, "hyperlinks"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.CustomProperty, "custom properties"); //CheckOneTypeOfMetadata(notExpected, dt, ContentType.Macro, "macro"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.RedactedText, "redacted text"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.BuiltInProperty, "built-in properties"); //CheckOneTypeOfMetadata(notExpected, dt, ContentType.Variable, "document variables"); //CheckOneTypeOfMetadata(notExpected, dt, ContentType.DocumentStatistic, "document Stats"); //CheckOneTypeOfMetadata(notExpected, dt, ContentType.Reviewer, "reviewerss"); //CheckOneTypeOfMetadata(notExpected, dt, ContentType.Version, "versions"); //CheckOneTypeOfMetadata(notExpected, dt, ContentType.AutoVersion, "autoversions"); //CheckOneTypeOfMetadata(notExpected, dt, ContentType.RoutingSlip, "routing slip"); }
private static void CheckOneTypeOfMetadata(ContentType notExpected, DocumentText dt, ContentType type, string sText) { if (type == notExpected) { List<IAbstractTextType> listType = dt.GetTextTypes(type); Assert.IsNotNull(listType, "Failed to clean " + sText); if (listType.Count > 0) { Assert.AreEqual(1, listType.Count, "Will take an empty TextType in the list as OK"); TextType tt = dt.GetTextTypes(type)[0] as TextType; Assert.IsNotNull(tt, "Failed to clean " + sText); Assert.AreEqual(0, tt.GetChildCount(), "Failed to clean " + sText); } } else { TextType tt = dt.GetTextTypes(type)[0] as TextType; Assert.Greater(tt.GetChildCount(), 0, "Test document has no " + sText); } }
private static void TestHasAllMetadataTypesOtherThan(DocumentText dt, ContentType notExpected) { Assert.IsNotNull(dt, "expected the document text object to be valid"); Assert.Greater(dt.GetTextTypes().Count, 0, "expected some document text types to have been added"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.Header, "headers"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.Footer, "footers"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.Comment, "comments"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.TrackChange, "track changes"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.SmallText, "small text"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.WhiteText, "white text"); // CheckOneTypeOfMetadata(notExpected, dt, ContentType.SmartTag); CheckOneTypeOfMetadata(notExpected, dt, ContentType.Hyperlink, "hyperlinks"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.Links, "links"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.HiddenSheet, "hidden sheet"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.HiddenRow, "hidden row"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.HiddenColumn, "hidden column"); // CheckOneTypeOfMetadata(notExpected, dt, ContentType.CustomProperty, "custom property"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.Macro, "macro"); CheckOneTypeOfMetadata(notExpected, dt, ContentType.RedactedText, "redacted text"); // CheckOneTypeOfMetadata(notExpected, dt, ContentType.HiddenText, "hidden text"); // CheckOneTypeOfMetadata(notExpected, dt, ContentType.BuiltInProperty, "built in property"); }
public PptxCommentAuthors(DocumentText docText, CommonNamespaces commonNamespaces) : base(docText, commonNamespaces) { }
internal DiscoveryResult(DocumentText docText) { m_docText = docText; }
public XlsxRevisionXmlFilter(DocumentText docText, CommonNamespaces commonNamespaces, Dictionary<string, WorkSheet> WorksheetLookup) : base(docText, commonNamespaces) { WorksheetDataLookup = WorksheetLookup; m_currentRevisionType = RowColRevisionType.none; }
public bool Execute() { m_bAbortFlag = false; try { CallbackSteppingMessage("Initialising"); KillOfficeApps(); m_ft = Workshare.ApplicationControllers.OfficeApplicationCache.GetFileTypeBasedOnFilename(m_sInputFile, false); m_cache = Workshare.ApplicationControllers.OfficeApplicationCache.Instance; appLaunchTime.Reset(); appLaunchTime.Start(); try { m_cache.GetHostApplication(m_ft); // otherwise the Shutdown will not release the office app instance } catch (Exception e) { if (e.Message.Contains("controller")) return false; throw; } appLaunchTime.Stop(); TakeWorkingCopiesOfDataDataFile(); CallbackSteppingMessage("Discover Original"); m_dtReadOnlyDiscover = DiscoverDocument(m_sInputFile); m_dtDiscoverAfterApiClean = m_dtReadOnlyDiscover; // if the Api clean fails, we treat it as nothing cleaned if (m_advancedOptions.DoBinaryClean) { CallbackSteppingMessage("LightSpeed Redacting"); List<ContentType> listContentTypes = GetListOfAllContentTypes(); DoLightSpeedCleanEx(listContentTypes); m_LightSpeedCleaningWorked = true; CallbackSteppingMessage("Discover LightSpeed Redacted Version"); m_dtDiscoverAfterBinClean = DiscoverDocument(m_sFileForBinClean); } if (m_advancedOptions.DoDomClean) { CallbackSteppingMessage("API Cleaning"); DoAPIClean(); CallbackSteppingMessage("Discover API Cleaned Version"); m_dtDiscoverAfterApiClean = DiscoverDocument(m_sFileForApiClean); } if (m_advancedOptions.DoValidation) { CallbackSteppingMessage("Checking LightSpeed Redacted Doc for Corruption"); ValidateNotCorrupt(m_sFileForBinClean); } CallbackSteppingMessage("Completed"); return true; } finally { appCloseTime.Reset(); if (m_cache != null) { appCloseTime.Start(); m_cache.ShutDown(); appCloseTime.Stop(); } } }
private TextType FindTextType(DocumentText dt) { foreach (TextType tt in dt.GetTextTypes()) { if (tt.GetContentType() == ContentType) return tt; } TextType tt2 = new TextType(ContentType); if (ContentType == ContentType.ContentRule) return tt2; dt.AddTextType(tt2); return tt2; }
private static void CheckOneTypeOfMetadata(ContentType notExpected, DocumentText dt, ContentType type, string sText) { // Word can mark redacted text with "vanish" which may or may not be appropriate. if (notExpected == ContentType.HiddenText && type == ContentType.RedactedText) return; if (type == notExpected) { List<IAbstractTextType> listType = dt.GetTextTypes(type); Assert.IsNotNull(listType, "Failed to clean " + sText); if (listType.Count > 0) { Assert.AreEqual(1, listType.Count, "Will take an empty TextType in the list as OK"); TextType tt = dt.GetTextTypes(type)[0] as TextType; Assert.IsNotNull(tt, "Failed to clean " + sText); Assert.AreEqual(0, tt.GetChildCount(), "Failed to clean " + sText); } } else { TextType tt = dt.GetTextTypes(type)[0] as TextType; Assert.Greater(tt.GetChildCount(), 0, "Test document has no " + sText); } }
public XlsxCalcChainXmlFilter(DocumentText docText, CommonNamespaces commonNamespaces, Dictionary<string, WorkSheet> WorksheetLookup, ref XlsxProcessingDictionaries processingDictionaries) : base(docText, commonNamespaces) { m_worksheetLookup = WorksheetLookup; m_processingDictionaries = processingDictionaries; }