#pragma warning restore CS1998 public static FieldExtractResult ExtractFields(string templateFileName, bool removeCustomProperties = true, IEnumerable <string> keepPropertyNames = null) { string newTemplateFileName = templateFileName + "obj.docx"; string outputFile = templateFileName + "obj.json"; WmlDocument templateDoc = new WmlDocument(templateFileName); // just reads the template's bytes into memory (that's all), read-only WmlDocument preprocessedTemplate = null; byte[] byteArray = templateDoc.DocumentByteArray; var fieldAccumulator = new FieldAccumulator(); using (MemoryStream mem = new MemoryStream()) { mem.Write(byteArray, 0, byteArray.Length); // copy template file (binary) into memory -- I guess so the template/file handle isn't held/locked? using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(mem, true)) // read & parse that byte array into OXML document (also in memory) { // first, remove all the task panes / web extension parts from the template (if there are any) wordDoc.DeleteParts <WebExTaskpanesPart>(wordDoc.GetPartsOfType <WebExTaskpanesPart>()); // next, extract all fields (and thus logic) from the template's content parts ExtractAllTemplateFields(wordDoc, fieldAccumulator, removeCustomProperties, keepPropertyNames); } preprocessedTemplate = new WmlDocument(newTemplateFileName, mem.ToArray()); } // save the output (even in the case of error, since error messages are in the file) preprocessedTemplate.Save(); using (StreamWriter sw = File.CreateText(outputFile)) { fieldAccumulator.JsonSerialize(sw); sw.Close(); } return(new FieldExtractResult(newTemplateFileName, outputFile)); }
#pragma warning restore CS1998 private static CompileResult TransformTemplate(string originalTemplateFile, string preProcessedTemplateFile) { string newDocxFilename = originalTemplateFile + "ncc.docx"; byte[] byteArray = File.ReadAllBytes(preProcessedTemplateFile); WmlDocument transformedTemplate = null; using (MemoryStream memStream = new MemoryStream()) { memStream.Write(byteArray, 0, byteArray.Length); // copy the bytes into an expandable MemoryStream using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(memStream, true)) // read & parse that memory stream into an editable OXML document (also in memory) { PrepareTemplate(wordDoc); } transformedTemplate = new WmlDocument(newDocxFilename, memStream.ToArray()); } // delete output file if it already exists (Save() below is supposed to always overwrite, but I just want to be sure) if (File.Exists(newDocxFilename)) { File.Delete(newDocxFilename); } // save the output (even in the case of error, since error messages are in the file) transformedTemplate.Save(); return(new CompileResult(transformedTemplate.FileName, null)); }
public void RemoveSmartTags() { string name = "SmartTags.docx"; // this document has an invalid smartTag element (apparently inserted by 3rd party software) DirectoryInfo sourceDir = new DirectoryInfo("../../../../test/templates/"); FileInfo docx = new FileInfo(Path.Combine(sourceDir.FullName, name)); DirectoryInfo destDir = new DirectoryInfo("../../../../test/history/dot-net-results"); FileInfo outputDocx = new FileInfo(Path.Combine(destDir.FullName, name)); string filePath = outputDocx.FullName; string outPath = Path.Combine(destDir.FullName, "SmartTags-Removed.docx"); docx.CopyTo(filePath, true); WmlDocument doc = new WmlDocument(filePath); byte[] byteArray = doc.DocumentByteArray; WmlDocument transformedDoc = null; using (MemoryStream mem = new MemoryStream()) { mem.Write(byteArray, 0, byteArray.Length); using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(mem, true)) { var settings = new SimplifyMarkupSettings { RemoveSmartTags = true }; // we try to remove smart tags, but the (apparently) invalid one is not removed correctly MarkupSimplifier.SimplifyMarkup(wordDoc, settings); } transformedDoc = new WmlDocument(outPath, mem.ToArray()); Assert.False(transformedDoc.MainDocumentPart.Descendants(W.smartTag).Any()); transformedDoc.Save(); } // transformedDoc still has leftover bits of the invalid smart tag, and should therefore be invalid // (consider whether it would be appropriate to patch SimplifyMarkup to correctly remove this apparently invalid smart tag?) var validator = new Validator(); var result = validator.ValidateDocument(outPath); // MS Word also complains about the validity of this document Assert.True(result.HasErrors); }