static void Main(string[] args) { // Accept all revisions, save result as a new document WmlDocument result = RevisionAccepter.AcceptRevisions(new WmlDocument("../../Source1.docx")); result.SaveAs("Out1.docx"); }
public void Sample() { var srcDoc = new WmlDocument("../../../Word/Samples/RevisionAccepter/Source1.docx"); var result = RevisionAccepter.AcceptRevisions(srcDoc); result.SaveAs(Path.Combine(TempDir, "Out1.docx")); }
public void RA001(string name) { FileInfo sourceDocx = new FileInfo(Path.Combine(TestUtil.SourceDir.FullName, name)); WmlDocument notAccepted = new WmlDocument(sourceDocx.FullName); WmlDocument afterAccepting = RevisionAccepter.AcceptRevisions(notAccepted); var processedDestDocx = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceDocx.Name.Replace(".docx", "-processed-by-RevisionAccepter.docx"))); afterAccepting.SaveAs(processedDestDocx.FullName); }
public void RA001(string name) { var sourceDir = new DirectoryInfo("../../../../TestFiles/"); var sourceDocx = new FileInfo(Path.Combine(sourceDir.FullName, name)); var notAccepted = new WmlDocument(sourceDocx.FullName); var afterAccepting = RevisionAccepter.AcceptRevisions(notAccepted); var processedDestDocx = new FileInfo(Path.Combine(TempDir, sourceDocx.Name.Replace(".docx", "-processed-by-RevisionAccepter.docx"))); afterAccepting.SaveAs(processedDestDocx.FullName); }
private static void ExtractAllTemplateFields(WordprocessingDocument wordDoc, FieldAccumulator fieldAccumulator, bool removeCustomProperties = true, IEnumerable <string> keepPropertyNames = null) { if (RevisionAccepter.HasTrackedRevisions(wordDoc)) { throw new FieldParseException("Invalid template - contains tracked revisions"); } // extract fields from each part of the document foreach (var part in wordDoc.ContentParts()) { ExtractFieldsFromPart(part, fieldAccumulator); if (removeCustomProperties) { // remove document variables and custom properties // (in case they have any sensitive information that should not carry over to assembled documents!) MainDocumentPart main = part as MainDocumentPart; if (main != null) { var docVariables = main.DocumentSettingsPart.Settings.Descendants <DocumentVariables>(); foreach (DocumentVariables docVars in docVariables.ToList()) { foreach (DocumentVariable docVar in docVars.ToList()) { if (keepPropertyNames == null || !Enumerable.Contains <string>(keepPropertyNames, docVar.Name)) { docVar.Remove(); //docVar.Name = "Id"; //docVar.Val.Value = "123"; } } } } } } if (removeCustomProperties) { // remove custom properties if there are any (custom properties are the new/non-legacy version of document variables) var custom = wordDoc.CustomFilePropertiesPart; if (custom != null) { foreach (CustomDocumentProperty prop in custom.Properties.ToList()) { if (keepPropertyNames == null || !Enumerable.Contains <string>(keepPropertyNames, prop.Name)) { prop.Remove(); // string propName = prop.Name; // string value = prop.VTLPWSTR.InnerText; } } } } }
static void Main(string[] args) { var n = DateTime.Now; var tempDi = new DirectoryInfo(string.Format("ExampleOutput-{0:00}-{1:00}-{2:00}-{3:00}{4:00}{5:00}", n.Year - 2000, n.Month, n.Day, n.Hour, n.Minute, n.Second)); tempDi.Create(); // Accept all revisions, save result as a new document WmlDocument result = RevisionAccepter.AcceptRevisions(new WmlDocument("../../Source1.docx")); result.SaveAs(Path.Combine(tempDi.FullName, "Out1.docx")); }
public static void TransformToSingleCharacterRuns(WordprocessingDocument doc) { if (RevisionAccepter.HasTrackedRevisions(doc)) { throw new OpenXmlPowerToolsException( "Transforming a document to single character runs is not supported for " + "a document with tracked revisions."); } foreach (var part in doc.ContentParts()) { TransformPartToSingleCharacterRuns(part); } }
private static void PrepareTemplate(WordprocessingDocument wordDoc) { if (RevisionAccepter.HasTrackedRevisions(wordDoc)) { throw new FieldParseException("Invalid template - contains tracked revisions"); } SimplifyTemplateMarkup(wordDoc); foreach (var part in wordDoc.ContentParts()) { PrepareTemplatePart(part); } }
public static void CopyFormattingAssembledDocx(FileInfo source, FileInfo dest) { var ba = File.ReadAllBytes(source.FullName); using (MemoryStream ms = new MemoryStream()) { ms.Write(ba, 0, ba.Length); using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(ms, true)) { RevisionAccepter.AcceptRevisions(wordDoc); SimplifyMarkupSettings simplifyMarkupSettings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, RemoveGoBackBookmark = true, ReplaceTabsWithSpaces = false, }; MarkupSimplifier.SimplifyMarkup(wordDoc, simplifyMarkupSettings); FormattingAssemblerSettings formattingAssemblerSettings = new FormattingAssemblerSettings { RemoveStyleNamesFromParagraphAndRunProperties = false, ClearStyles = false, RestrictToSupportedLanguages = false, RestrictToSupportedNumberingFormats = false, CreateHtmlConverterAnnotationAttributes = true, OrderElementsPerStandard = false, ListItemRetrieverSettings = new ListItemRetrieverSettings() { ListItemTextImplementations = ListItemRetrieverSettings.DefaultListItemTextImplementations, }, }; FormattingAssembler.AssembleFormatting(wordDoc, formattingAssemblerSettings); } var newBa = ms.ToArray(); File.WriteAllBytes(dest.FullName, newBa); } }
private static TemplateErrorList PrepareTemplate(WordprocessingDocument wordDoc, FieldTransformIndex xm) { if (RevisionAccepter.HasTrackedRevisions(wordDoc)) { throw new FieldParseException("Invalid template - contains tracked revisions"); } SimplifyTemplateMarkup(wordDoc); var te = new TemplateErrorList(); foreach (var part in wordDoc.ContentParts()) { PrepareTemplatePart(part, xm, te); } return(te); }
public static void SimplifyMarkup(WordprocessingDocument doc, SimplifyMarkupSettings settings) { if (settings.AcceptRevisions) { RevisionAccepter.AcceptRevisions(doc); } foreach (var part in doc.ContentParts()) { SimplifyMarkupForPart(part, settings); } if (doc.MainDocumentPart.StyleDefinitionsPart != null) { SimplifyMarkupForPart(doc.MainDocumentPart.StyleDefinitionsPart, settings); } if (doc.MainDocumentPart.StylesWithEffectsPart != null) { SimplifyMarkupForPart(doc.MainDocumentPart.StylesWithEffectsPart, settings); } }
/// <summary> /// Convert to html /// </summary> /// <param name="wordDoc"></param> /// <param name="htmlConverterSettings"></param> /// <param name="imageHandler"></param> /// <returns></returns> public XElement ConvertToHtml(WordprocessingDocument wordDoc, HtmlConverterSettings htmlConverterSettings, Func <ImageInfo, XElement> imageHandler) { InitEntityMap(); if (htmlConverterSettings.ConvertFormatting) { throw new InvalidSettingsException("Conversion with formatting is not supported"); } RevisionAccepter.AcceptRevisions(wordDoc); var settings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, ReplaceTabsWithSpaces = true, }; MarkupSimplifier.SimplifyMarkup(wordDoc, settings); XElement rootElement = wordDoc.MainDocumentPart.GetXDocument().Root; AnnotateHyperlinkContent(rootElement); var xhtml = (XElement)ConvertToHtmlTransform(wordDoc, htmlConverterSettings, rootElement, imageHandler); // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type // XEntity. PtOpenXmlUtil.cs define the XEntity class. See // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx // for detailed explanation. // // If you further transform the XML tree returned by ConvertToHtmlTransform, you // must do it correctly, or entities will not be serialized properly. return(xhtml); }
// Future option //[Parameter(Mandatory = false)] //public SwitchParameter Reject; #region Cmdlet Overrides protected override void ProcessRecord() { foreach (var document in AllDocuments("Edit-OpenXmlChange")) { try { if (!(document is WmlDocument)) { throw new PowerToolsDocumentException("Not a wordprocessing document."); } if (Accept) { OutputDocument(RevisionAccepter.AcceptRevisions((WmlDocument)document)); } } catch (Exception e) { WriteError(PowerToolsExceptionHandling.GetExceptionErrorRecord(e, document)); } } }
public static WmlDocument AssembleDocument(WmlDocument templateDoc, XElement data, out bool templateError) { byte[] byteArray = templateDoc.DocumentByteArray; using (MemoryStream mem = new MemoryStream()) { mem.Write(byteArray, 0, (int)byteArray.Length); using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(mem, true)) { if (RevisionAccepter.HasTrackedRevisions(wordDoc)) { throw new OpenXmlPowerToolsException("Invalid DocumentAssembler template - contains tracked revisions"); } var te = new TemplateError(); foreach (var part in wordDoc.ContentParts()) { ProcessTemplatePart(data, te, part); } templateError = te.HasError; } WmlDocument assembledDocument = new WmlDocument("TempFileName.docx", mem.ToArray()); return(assembledDocument); } }
/// <summary> /// This method uses the MarkupSimplifier features from the OpenXMLPowerTools /// to remove the profile Errors and the RSID tags from Office, making the XML /// file cleaner to be processed to any other API /// </summary> /// <param name="docLocation"> The absolute location of the docx file</param> /// <param name="z">A namespace to be placed at the XML tags in the TransformToSimpleXml() method</param> /// <param name="formatDocument">Boolean indicating if the document should be or rewrited</param> public static void SimplifyMarkup(string docLocation, string z, bool formatDocument) { try { using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(docLocation, true)) { RevisionAccepter.AcceptRevisions(wordDoc); //Here I Define what components I want to clean from the XML. See all the attributes on the SimplifyMarkupSettings definitions SimplifyMarkupSettings settings = new SimplifyMarkupSettings { RemoveComments = true, RemoveContentControls = true, RemoveEndAndFootNotes = true, RemoveFieldCodes = false, RemoveLastRenderedPageBreak = true, RemovePermissions = true, RemoveProof = true, RemoveRsidInfo = true, RemoveSmartTags = true, RemoveSoftHyphens = true, ReplaceTabsWithSpaces = true, NormalizeXml = false, RemoveWebHidden = true, RemoveMarkupForDocumentComparison = true, }; MarkupSimplifier.SimplifyMarkup(wordDoc, settings); //Getting the deafult style of the document string defaultParagraphStyleId = wordDoc.MainDocumentPart .StyleDefinitionsPart.GetXDocument().Root.Elements(W.style) .Where(e => (string)e.Attribute(W.type) == "paragraph" && (string)e.Attribute(W._default) == "1") .Select(s => (string)s.Attribute(W.styleId)) .FirstOrDefault(); //Getting all the paragraphs in a xml node. XElement simplerXml = (XElement)TransformToSimpleXml( wordDoc.MainDocumentPart.GetXDocument().Root, defaultParagraphStyleId, z); Console.WriteLine(simplerXml); wordDoc.Save(); wordDoc.Close(); //If formatDocument is true, the ReWriteDocument() method is called if (formatDocument) { Console.WriteLine("Reescrevendo o documento sem estilos"); try { ReWriteDocument(docLocation, simplerXml); Console.WriteLine("Sucesso ao Reformatar o documento!"); } catch (Exception e) { throw new Exception(string.Format("Erro ao Reformatar o Arquivo: {0}", e.ToString())); } } } } catch (Exception e) { throw new Exception(string.Format("Não foi Possível simplificar o Arquivo. Erro: {0}", e.ToString())); } }
public static void ReplaceBookmarkText(WordprocessingDocument doc, string bookmarkName, string replacementText) { XDocument xDoc = doc.MainDocumentPart.GetXDocument(); XElement bookmark = xDoc.Descendants(W.bookmarkStart) .FirstOrDefault(d => (string)d.Attribute(W.name) == bookmarkName); if (bookmark == null) { throw new BookmarkReplacerException( "Document doesn't contain bookmark."); } if (bookmark.Parent.Name.Namespace == M.m) { throw new BookmarkReplacerException( "Replacing text in math formulas is not supported."); } if (RevisionAccepter.HasTrackedRevisions(doc)) { throw new BookmarkReplacerException( "Replacing bookmark text in documents that have tracked revisions is not supported."); } if (xDoc.Descendants(W.sdt).Any()) { throw new BookmarkReplacerException( "Replacing bookmark text in documents that have content controls is not supported."); } XElement newRoot = (XElement)FlattenParagraphsTransform(xDoc.Root); XElement startBookmarkElement = newRoot.Descendants(W.bookmarkStart) .Where(d => (string)d.Attribute(W.name) == bookmarkName) .FirstOrDefault(); int bookmarkId = (int)startBookmarkElement.Attribute(W.id); XElement endBookmarkElement = newRoot.Descendants(W.bookmarkEnd) .Where(d => (int)d.Attribute(W.id) == bookmarkId) .FirstOrDefault(); if (startBookmarkElement.Ancestors(W.hyperlink).Any() || endBookmarkElement.Ancestors(W.hyperlink).Any()) { throw new BookmarkReplacerException( "Bookmark is within a hyperlink. Can't replace text."); } if (startBookmarkElement.Ancestors(W.fldSimple).Any() || endBookmarkElement.Ancestors(W.fldSimple).Any()) { throw new BookmarkReplacerException( "Bookmark is within a simple field. Can't replace text."); } if (startBookmarkElement.Ancestors(W.smartTag).Any() || endBookmarkElement.Ancestors(W.smartTag).Any()) { throw new BookmarkReplacerException( "Bookmark is within a smart tag. Can't replace text."); } if (startBookmarkElement.Parent != endBookmarkElement.Parent) { throw new BookmarkReplacerException( "Bookmark start and end not at same levels. Can't replace text."); } XElement parentElement = startBookmarkElement.Parent; var elementsBetweenBookmarks = startBookmarkElement .ElementsAfterSelf() .TakeWhile(e => e != endBookmarkElement); var newElements = parentElement .Elements() .TakeWhile(e => e != startBookmarkElement) .Concat(new[] { startBookmarkElement, new XElement(BookmarkReplacerCustomNamespace + "Insert", elementsBetweenBookmarks .Where(e => e.Name == W.r) .Take(1) .Elements(W.rPr) .FirstOrDefault()), }) .Concat(elementsBetweenBookmarks.Where(e => e.Name != W.p && e.Name != W.r && e.Name != W.tbl)) .Concat(new[] { endBookmarkElement }) .Concat(endBookmarkElement.ElementsAfterSelf()); parentElement.ReplaceNodes(newElements); newRoot = (XElement)UnflattenParagraphsTransform(newRoot); newRoot = (XElement)ReplaceInsertElement(newRoot, replacementText); newRoot = (XElement)DemoteRunChildrenOfBodyTransform(newRoot); xDoc.Elements().First().ReplaceWith(newRoot); doc.MainDocumentPart.PutXDocument(); }