public virtual void AddParent(IAbstractTextType parent) { if (parent == null) return; m_parents.Add(parent); }
static private List<KeyValuePair<string, string>> _typeList2Pair( string key, string value, IAbstractTextType textType) { List<KeyValuePair<string, string>> pairList = new List<KeyValuePair<string, string>>(); try { for (int i = 0; i < textType.GetChildCount(); ++i) { IAbstractTextNode textNode = textType.GetChild(i); string keyStr = _nodeValueFromName(textNode, key); string valStr = _nodeValueFromName(textNode, value); pairList.Add(new KeyValuePair<string, string>(keyStr, valStr)); } } catch( System.Exception e ) { Logger.LogError(e); } return pairList; }
public static string DumpTextType(IAbstractTextType ttOrig) { StringBuilder bld = new StringBuilder(); for (int i = 0; i < ttOrig.GetChildCount(); i++) { bld.Append(DumpNode(ttOrig.GetChild(i))); } return bld.ToString(); }
protected string GatherAllContent(IAbstractTextType iAbstractTextType) { StringBuilder bld = new StringBuilder(); foreach (IAbstractTextNode node in iAbstractTextType) { bld.Append(node.GetInfo("Content")[0].value); } return bld.ToString(); }
protected void CheckTextNodeContents(IAbstractTextType tt, string[] p) { for (int i = 0; i < p.Length; i++) { string expected = p[i]; string found = tt.Nodes[i].GetInfo("Content")[0].value; Assert.AreEqual(expected, found, "Wrong text found for " + i.ToString() + "th item"); } }
private void CheckHaventCleanedTooMuch(IAbstractTextType iAbstractTextType, string[] textThatShouldntBeCleaned) { string docText = GatherAllContent(iAbstractTextType); foreach (string findThis in textThatShouldntBeCleaned) { if (docText.IndexOf(findThis) == -1) { Assert.Fail("The text '" + findThis + "' was cleaned from the document and it shouldn't have been"); } } }
private bool IsExpectedFromRedaction(IAbstractTextType tt) { if (tt == null) return true; int iChildCount = tt.GetChildCount(); ContentType contType = tt.GetContentType(); switch (tt.GetContentType()) { case ContentType.Header: case ContentType.Footer: case ContentType.Endnote: case ContentType.Footnote: case ContentType.Paragraph: case ContentType.WorkshareProperty: case ContentType.CellText: case ContentType.HiddenColumn: case ContentType.HiddenRow: return true; // we don't clean any of these case ContentType.TextBox: if (IsPowerPoint() || IsExcel()) return true; break; case ContentType.AttachedTemplate: if (IsThereASignificantAttachedTemplate(tt)) return false; return true ; case ContentType.Field: if (AreThereAnyNonQuoteFields(tt)) return false; return true; case ContentType.HiddenText: if (TypeHasNonWhitespaceContent(tt)) return false; return true; } if (tt.GetChildCount() > 0) return false; return true; }
public static void CheckNode(IAbstractTextType ttype, string expectedName, string expectedValue) { for (int i = 0; i < ttype.GetChildCount(); i++) { IAbstractTextNode iAbstractTextNode = ttype.GetChild(i); Assert.IsNotNull(iAbstractTextNode); Assert.Greater(iAbstractTextNode.GetInfo("Name").Count, 0, "missing info for the property name"); if (expectedName == iAbstractTextNode.GetInfo("Name")[0].value) { Assert.Greater(iAbstractTextNode.GetInfo("Value").Count, 0, "missing info for the property value"); Assert.AreEqual(expectedValue, iAbstractTextNode.GetInfo("Value")[0].value, "Built in property name incorrect"); return; } } Assert.Fail("Didn't find the property type expected - " + expectedName); }
private static void HiddenSheet(IDocumentReader reader, string typeText, IAbstractTextType textType) { List<string> list = _typeList2List(textType); foreach (string item in list) { string builtString = item; reader.OnContentData(typeText, builtString, ref MetadataProcessor._cancel); } }
private static void Links(IDocumentReader reader, string typeText, IAbstractTextType textType) { List<KeyValuePair<string, string>> pairList = _typeList2Pair("Type", "Path", textType); for (int i = 0; i < pairList.Count; ++i) { KeyValuePair<string, string> pair = pairList[i]; string builtString = @""; builtString += @"<" + typeText; builtString += @" Type="""; builtString += pair.Key; builtString += @""">"; builtString += _putCDataSection(pair.Value); builtString += @"</" + typeText + @">"; reader.OnContentData(typeText, builtString, ref MetadataProcessor._cancel); } }
private static void AutoVersion(IDocumentReader reader, string typeText, IAbstractTextType textType) { string builtString = _typeList2String(textType); reader.OnContentData(typeText, builtString, ref MetadataProcessor._cancel); }
private static void Macros(IDocumentReader reader, string typeText, IAbstractTextType textType) { List<string> list = _typeList2List(textType); try { foreach (string item in list) { string builtString = item; reader.OnContentData(typeText, builtString, ref MetadataProcessor._cancel); } } catch (System.Exception e) { Logger.LogError("Exception within MetadataProcessor::Macros for " + typeText + ", " + textType); Logger.LogError(e); } }
private static void HyperLinks(IDocumentReader reader, string typeText, IAbstractTextType textType) { /* DOC textNode.GetInfo(0) name: "Content" type: String value: "HYPERLINK" XLS textNode.GetInfo(0) name: "Description" type: String value: "www.bbc.co.uk/news" textNode.GetInfo(1) name: "Path" type: String value: "http://www.bbc.co.uk/news" PPT textNode.GetInfo(0) name: "Content" type: String value: "HL 1" textNode.GetInfo(1) name: "Path" type: String value: "http://www.btyahoo.com/welcome2" */ /// List<KeyValuePair<string, string>> pairList = _typeList2Pair(textType); for (int i = 0; i < textType.GetChildCount(); ++i) { IAbstractTextNode textNode = textType.GetChild(i); string keyStr = textNode.GetInfo(0).name; string valStr = (textNode.GetInfoCount() > 1) ? textNode.GetInfo(1).value : textNode.GetInfo(0).value; string builtString = @""; builtString += @"<" + typeText; builtString += @" Type="""; builtString += keyStr; builtString += @""">"; builtString += _putCDataSection(valStr); builtString += @"</" + typeText + @">"; reader.OnContentData(typeText, builtString, ref MetadataProcessor._cancel); } }
static private string _typeList2String(IAbstractTextType textType) { NodeInfo nodeInfo = null; if (textType.GetChildCount() > 0) { IAbstractTextNode textNode = textType.GetChild(0); if (textNode.GetInfoCount() > 0) { nodeInfo = textNode.GetInfo(0); } } string strValue = (nodeInfo == null) ? "" : nodeInfo.value; return EscapeXML(strValue); }
private static void HiddenSlide(IDocumentReader reader, string typeText, IAbstractTextType textType) { // ppt we get "SlideId" and "Title" List<KeyValuePair<string, string>> pairList = _typeList2Pair("SlideId", "Title", textType); // pptx we get "SlideNumber" and "Content" if (pairList.Count != 0) CheckForPPTx(ref pairList, "SlideNumber", "Content", textType); for (int i = 0; i < pairList.Count; ++i) { KeyValuePair<string, string> pair = pairList[i]; string builtString = @""; builtString += @"<" + typeText; builtString += @" SlideId="""; builtString += pair.Key; builtString += @""">"; builtString += _putCDataSection(pair.Value); builtString += @"</" + typeText + @">"; reader.OnContentData(typeText, builtString, ref MetadataProcessor._cancel); } }
/// ----------------------------------------------------------------------------- private static void BuildInProperties(IDocumentReader reader, string typeText, IAbstractTextType textType) { try { List<KeyValuePair<string, string>> pairList = _typeList2Pair("Name", "Value", textType); for (int i = 0; i < pairList.Count; ++i) { KeyValuePair<string, string> pair = pairList[i]; if (!_isCommentOrParagraphMarker(pair.Value)) { string builtString = @""; builtString += @"<" + typeText; builtString += @" Name="""; builtString += pair.Key; builtString += @""">"; builtString += _putCDataSection(pair.Value); builtString += @"</" + typeText + @">"; reader.OnContentData(typeText, builtString, ref MetadataProcessor._cancel); } } } catch (System.Exception e) { Logger.LogError("Exception within MetadataProcessor::BuildInProperties for " + typeText + ", " + textType); Logger.LogError(e); } }
/// ----------------------------------------------------------------------------- public static void ProcessType(IDocumentReader reader, string typeText, IAbstractTextType textType) { try { switch (textType.GetContentType()) { case ContentType.Paragraph: MetadataProcessor.Paragraphs(reader, "Paragraph", textType); break; case ContentType.Header: MetadataProcessor.Headers(reader, "Header", textType); break; case ContentType.Footer: MetadataProcessor.Footers(reader, "Footer", textType); break; case ContentType.Comment: MetadataProcessor.Comments(reader, "Comment", textType); break; case ContentType.TrackChange: MetadataProcessor.TrackChanges(reader, "TrackChange", textType); break; case ContentType.Reviewer: MetadataProcessor.Reviewers(reader, "Reviewer", textType); break; case ContentType.HiddenText: MetadataProcessor.HiddenTexts(reader, "HiddenText", textType); break; case ContentType.SmallText: MetadataProcessor.SmallTexts(reader, "SmallText", textType); break; case ContentType.WhiteText: MetadataProcessor.WhiteTexts(reader, "WhiteText", textType); break; case ContentType.AttachedTemplate: MetadataProcessor.AttachedTemplate(reader, "AttachedTemplate", textType); break; case ContentType.Version: MetadataProcessor.Versions(reader, "Version", textType); break; case ContentType.AutoVersion: MetadataProcessor.AutoVersion(reader, "AutoVersion", textType); break; case ContentType.Field: MetadataProcessor.Fields(reader, "Field", textType); break; case ContentType.Hyperlink: MetadataProcessor.HyperLinks(reader, "Hyperlink", textType); break; case ContentType.RoutingSlip: MetadataProcessor.RoutingSlip(reader, "RoutingSlip", textType); break; case ContentType.Variable: MetadataProcessor.Variables(reader, "Variable", textType); break; case ContentType.HiddenSlide: MetadataProcessor.HiddenSlide(reader, "HiddenSlide", textType); break; case ContentType.SpeakerNote: MetadataProcessor.SpeakerNote(reader, "SpeakerNote", textType); break; case ContentType.Links: MetadataProcessor.Links(reader, "Link", textType); break; case ContentType.HiddenSheet: MetadataProcessor.HiddenSheet(reader, "HiddenSheet", textType); break; case ContentType.HiddenRow: MetadataProcessor.HiddenRow(reader, "HiddenRow", textType); break; case ContentType.HiddenColumn: MetadataProcessor.HiddenColumn(reader, "HiddenColumn", textType); break; case ContentType.RedactedText: MetadataProcessor.RedactedText(reader, "RedactedText", textType); break; case ContentType.CustomProperty: MetadataProcessor.CustomProperties(reader, "CustomProperty", textType); break; case ContentType.Macro: MetadataProcessor.Macros(reader, "Macro", textType); break; case ContentType.BuiltInProperty: MetadataProcessor.BuildInProperties(reader, "BuiltInProperty", textType); break; case ContentType.DocumentStatistic: MetadataProcessor.DocumentStatistics(reader, "DocumentStatistic", textType); break; case ContentType.Footnote: case ContentType.Endnote: MetadataProcessor.Footnotes(reader, "Footnote", textType); break; case ContentType.TextBox: /// ProcessTextType(reader, "TextBox", textType); break; case ContentType.CellText: //ProcessTextType(reader, "CellText", textType); break; case ContentType.WorkshareProperty: /// ProcessTextType(reader, "WorkshareProperty", textType); break; case ContentType.SmartTag: /// ProcessTextType(reader, "SmartTag", textType); break; case ContentType.WorkshareStyle: MetadataProcessor.WhiteTexts(reader, "WorkshareStyle", textType); break; default: /// throw new ApplicationException("Invalid Content Type in DocumentText! Enums out of date."); break; } } catch (System.Exception e) { Logger.LogError("Exception within MetadataProcessor::ProcessType for " + textType.GetContentType()); Logger.LogError(e); } }
public static void WhiteTexts(IDocumentReader reader, string typeText, IAbstractTextType textType) { List<string> list = _typeList2List(textType); try { foreach (string item in list) { if (!_isCommentOrParagraphMarker(item) && !_isCommentMarker(item)) { string builtString = @""; builtString += @"<" + typeText + @">"; builtString += _putCDataSection(item); builtString += @"</" + typeText + @">"; reader.OnContentData(typeText, builtString, ref MetadataProcessor._cancel); } } } catch (System.Exception e) { Logger.LogError("Exception within MetadataProcessor::WhiteTexts for " + typeText + ", " + textType); Logger.LogError(e); } }
public static void TrackChanges(IDocumentReader reader, string typeText, IAbstractTextType textType) { int trackChangeLimit = MetadataProcessor._TrackChangeLimit; for (int i = 0; i < textType.GetChildCount() && i < trackChangeLimit; ++i) { IAbstractTextNode textNode = textType.GetChild(i); string content = _nodeValueFromName(textNode, "Content"); string author = _nodeValueFromName(textNode, "Author"); string type = _nodeValueFromName(textNode, "Type"); if (!_isCommentOrParagraphMarker(content)) { string builtString = @""; builtString += @"<" + typeText; builtString += @" Author="""; builtString += author; builtString += @""""; builtString += @" Type="""; builtString += type; builtString += @""">"; builtString += _putCDataSection(content); builtString += @"</" + typeText + @">"; reader.OnContentData(typeText, builtString, ref MetadataProcessor._cancel); } } }
public static void HiddenTexts(IDocumentReader reader, string typeText, IAbstractTextType textType) { try { for (int loopi = 0; loopi < textType.GetChildCount(); ++loopi) { IAbstractTextNode textNode = textType.GetChild(loopi); string content = _nodeValueFromName(textNode, "Content"); if (!_isCommentOrParagraphMarker(content)) { string builtString = @""; builtString += @"<" + typeText; for (int loopj = 0; loopj < textNode.GetInfoCount(); ++loopj) { NodeInfo nodeInfo = textNode.GetInfo(loopj); string nodeValue = (nodeInfo != null) ? nodeInfo.value : ""; string nodeKey = (nodeInfo != null) ? nodeInfo.name : ""; if (!nodeKey.Equals("Content")) { builtString += @" "; builtString += nodeKey; builtString += @"="""; builtString += nodeValue; builtString += @""""; } } builtString += @">"; builtString += _putCDataSection(content); builtString += @"</" + typeText + @">"; reader.OnContentData(typeText, builtString, ref MetadataProcessor._cancel); } } } catch (System.Exception e) { Logger.LogError("Exception within MetadataProcessor::HiddenTexts for " + typeText + ", " + textType); Logger.LogError(e); } }
private static void AttachedTemplate(IDocumentReader reader, string typeText, IAbstractTextType textType) { if (textType.GetChildCount() > 0) { IAbstractTextNode textNode = textType.GetChild(0); string path = _nodeValueFromName(textNode, "Path"); string name = _nodeValueFromName(textNode, "Name"); string builtString = @""; builtString += @"<" + typeText; builtString += @" Name="""; builtString += name; builtString += @""""; builtString += @" Path="""; builtString += path; builtString += @""""; builtString += @" />"; reader.OnContentData(typeText, builtString, ref MetadataProcessor._cancel); } }
private static void Fields(IDocumentReader reader, string typeText, IAbstractTextType textType) { List<KeyValuePair<string, string>> pairList = _typeList2Pair("Instruction", "Content", textType); try { for (int i = 0; i < pairList.Count; ++i) { KeyValuePair<string, string> pair = pairList[i]; // if (!_isCommentOrParagraphMarker(pair.Value)) if (!_isHyperlink(pair.Value)) // hyperlinks added separately { string builtString = @""; builtString += @"<" + typeText; builtString += @" Instruction="""; builtString += pair.Key; builtString += @""">"; builtString += pair.Value; builtString += @"</" + typeText + @">"; reader.OnContentData(typeText, builtString, ref MetadataProcessor._cancel); } } } catch (System.Exception e) { Logger.LogError("Exception within MetadataProcessor::Fields for " + typeText + ", " + textType); Logger.LogError(e); } }
static private List<KeyValuePair<string, string>> _typeList2Pair(IAbstractTextType textType) { List<KeyValuePair<string, string>> pairList = new List<KeyValuePair<string, string>>(); try { for (int i = 0; i < textType.GetChildCount(); ++i) { IAbstractTextNode textNode = textType.GetChild(i); for (int j = 0; j < textNode.GetInfoCount(); ++j) { NodeInfo nodeInfo = textNode.GetInfo(j); pairList.Add(new KeyValuePair<string, string>(nodeInfo.name, EscapeXML(nodeInfo.value))); } } } catch( System.Exception e ) { Logger.LogError(e); } return pairList; }
private static void RoutingSlip(IDocumentReader reader, string typeText, IAbstractTextType textType) { for (int i = 0; i < textType.GetChildCount(); ++i) { IAbstractTextNode textNode = textType.GetChild(i); string message = _nodeValueFromName(textNode, "Message"); string subject = _nodeValueFromName(textNode, "Subject"); string builtString = @""; builtString += @"<" + typeText; builtString += @" Message="""; builtString += message; builtString += @""""; builtString += @" Subject="""; builtString += subject; builtString += @""">"; builtString += @"<Recipients>"; List<NodeInfo> nodeRecipients = textNode.GetInfo("Recipient"); List<string> recipients = new List<string>(); foreach (NodeInfo nodeRecipi in nodeRecipients) { builtString += @"<Recipient>"; builtString += nodeRecipi.value; builtString += @"</Recipient>"; } builtString += @"</Recipients>"; builtString += @"</" + typeText + @">"; reader.OnContentData(typeText, builtString, ref MetadataProcessor._cancel); } }
private static void HiddenColumn(IDocumentReader reader, string typeText, IAbstractTextType textType) { List<KeyValuePair<string, string>> pairList = _typeList2Pair("SheetName", "Column", textType); for (int i = 0; i < pairList.Count; ++i) { KeyValuePair<string, string> pair = pairList[i]; string builtString = @""; builtString += @"<" + typeText; builtString += @" Column="""; builtString += pair.Value; builtString += @""">"; builtString += pair.Key; builtString += @"</" + typeText + @">"; reader.OnContentData(typeText, builtString, ref MetadataProcessor._cancel); } }
private static void Footnotes(IDocumentReader reader, string typeText, IAbstractTextType textType) { List<string> list = _typeList2List(textType); foreach (string item in list) { if (!_isCommentOrParagraphMarker(item) && !_isCommentMarker(item)) { string builtString = @""; builtString += @"<" + typeText + @">"; builtString += _putCDataSection(item); builtString += @"</" + typeText + @">"; reader.OnContentData(typeText, builtString, ref MetadataProcessor._cancel); } } }
static private List<string> _typeList2List(IAbstractTextType textType) { List<string> list = new List<string>(); try { for (int i = 0; i < textType.GetChildCount(); ++i) { IAbstractTextNode textNode = textType.GetChild(i); for (int j = 0; j < textNode.GetInfoCount(); ++j) { NodeInfo nodeInfo = textNode.GetInfo(j); string nodeValue = (nodeInfo != null) ? nodeInfo.value : ""; list.Add(EscapeXML(nodeValue)); } } } catch (System.Exception e) { Logger.LogError(e); } return list; }
private static void Versions(IDocumentReader reader, string typeText, IAbstractTextType textType) { List<KeyValuePair<string, string>> pairList = _typeList2Pair("Saved By", "Comment", textType); for (int i = 0; i < pairList.Count; ++i) { KeyValuePair<string, string> pair = pairList[i]; string builtString = @""; builtString += @"<" + typeText; builtString += @" SavedBy="""; builtString += pair.Key; builtString += @""""; builtString += @" Comment="""; builtString += pair.Value; builtString += @""""; builtString += @" />"; reader.OnContentData(typeText, builtString, ref MetadataProcessor._cancel); } MetadataProcessor.Authors(pairList); }
public static void Paragraphs(IDocumentReader reader, string typeText, IAbstractTextType textType) { List<string> list = _typeList2List(textType); foreach (string item in list) { if (!_isCommentOrParagraphMarker(item)) { string builtString = _putCDataSection(item); reader.OnContentData(typeText, builtString, ref MetadataProcessor._cancel); } } }
private static void DocumentStatistics( IDocumentReader reader, string typeText, IAbstractTextType textType) { List<KeyValuePair<string, string>> pairList = _typeList2Pair("Name", "Value", textType); try { for (int i = 0; i < pairList.Count; ++i) { KeyValuePair<string, string> pair = pairList[i]; string builtString = @""; builtString += @"<" + typeText; builtString += @" Name="""; builtString += pair.Key; builtString += @""""; builtString += @" Value="""; builtString += pair.Value; builtString += @""""; builtString += @" />"; reader.OnContentData(typeText, builtString, ref MetadataProcessor._cancel); } } catch (System.Exception e) { Logger.LogError("Exception within MetadataProcessor::DocumentStatistics for " + typeText + ", " + textType); Logger.LogError(e); } }