public static void DoConversionViaWord(FileInfo newAltChunkBeforeFi, FileInfo newAltChunkAfterFi, XElement html) { var blankAltChunkFi = new DirectoryInfo(Path.Combine(TestUtil.SourceDir.FullName, "Blank-altchunk.docx")); File.Copy(blankAltChunkFi.FullName, newAltChunkBeforeFi.FullName); using (WordprocessingDocument myDoc = WordprocessingDocument.Open(newAltChunkBeforeFi.FullName, true)) { string altChunkId = "AltChunkId1"; MainDocumentPart mainPart = myDoc.MainDocumentPart; AlternativeFormatImportPart chunk = mainPart.AddAlternativeFormatImportPart( "application/xhtml+xml", altChunkId); using (Stream chunkStream = chunk.GetStream(FileMode.Create, FileAccess.Write)) using (StreamWriter stringStream = new StreamWriter(chunkStream)) stringStream.Write(html.ToString()); XElement altChunk = new XElement(W.altChunk, new XAttribute(R.id, altChunkId) ); XDocument mainDocumentXDoc = myDoc.MainDocumentPart.GetXDocument(); mainDocumentXDoc.Root .Element(W.body) .AddFirst(altChunk); myDoc.MainDocumentPart.PutXDocument(); } WordAutomationUtilities.OpenAndSaveAs(newAltChunkBeforeFi.FullName, newAltChunkAfterFi.FullName); while (true) { try { using (WordprocessingDocument wDoc = WordprocessingDocument.Open(newAltChunkAfterFi.FullName, true)) { SimplifyMarkupSettings settings2 = new SimplifyMarkupSettings { RemoveMarkupForDocumentComparison = true, }; MarkupSimplifier.SimplifyMarkup(wDoc, settings2); XElement newRoot = (XElement)RemoveDivTransform(wDoc.MainDocumentPart.GetXDocument().Root); wDoc.MainDocumentPart.GetXDocument().Root.ReplaceWith(newRoot); wDoc.MainDocumentPart.PutXDocumentWithFormatting(); } break; } catch (IOException) { System.Threading.Thread.Sleep(50); continue; } } }
public void HW010(string name) { var sourceDocxFi = new FileInfo(Path.Combine(TestUtil.SourceDir.FullName, name)); var sourceCopiedToDestDocxFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-2-Source.docx"))); var sourceCopiedToDestHtmlFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-2-Source.html"))); var destCssFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-3.css"))); var destDocxFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-4-ConvertedByHtmlToWml.docx"))); var annotatedHtmlFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-5-Annotated.txt"))); File.Copy(sourceDocxFi.FullName, sourceCopiedToDestDocxFi.FullName); SaveAsHtmlUsingHtmlConverter(sourceCopiedToDestDocxFi.FullName, sourceCopiedToDestDocxFi.DirectoryName); XElement html = HtmlToWmlReadAsXElement.ReadAsXElement(sourceCopiedToDestHtmlFi); string usedAuthorCss = HtmlToWmlConverter.CleanUpCss((string)html.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style")); File.WriteAllText(destCssFi.FullName, usedAuthorCss); var settingsWmlDocument = new WmlDocument(sourceCopiedToDestDocxFi.FullName); HtmlToWmlConverterSettings settings = HtmlToWmlConverter.GetDefaultSettings(settingsWmlDocument); // image references in HTML files contain the path to the subdir that contains the images, so base URI is the name of the directory // that contains the HTML files settings.BaseUriForImages = Path.Combine(TestUtil.TempDir.FullName); WmlDocument doc = HtmlToWmlConverter.ConvertHtmlToWml( defaultCss, usedAuthorCss, userCss, html, settings, null, // use the default EmptyDocument s_ProduceAnnotatedHtml ? annotatedHtmlFi.FullName : null); Assert.NotNull(doc); if (doc != null) { SaveValidateAndFormatMainDocPart(destDocxFi, doc); } #if DO_CONVERSION_VIA_WORD var newAltChunkBeforeFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-5-AltChunkBefore.docx"))); var newAltChunkAfterFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-6-ConvertedViaWord.docx"))); WordAutomationUtilities.DoConversionViaWord(newAltChunkBeforeFi, newAltChunkAfterFi, html); #endif }
public void LIR001(string file) { FileInfo lirFile = new FileInfo(Path.Combine(TestUtil.SourceDir.FullName, file)); WmlDocument wmlDoc = new WmlDocument(lirFile.FullName); var wordHtmlFile = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, lirFile.Name.Replace(".docx", "-Word.html"))); WordAutomationUtilities.SaveAsHtmlUsingWord(lirFile, wordHtmlFile); var ptHtmlFile = ConvertToHtml(lirFile.FullName, TestUtil.TempDir.FullName); var fiPtXml = SaveHtmlAsXml(ptHtmlFile); // read and write to get the BOM on the file var wh = File.ReadAllText(wordHtmlFile.FullName, Encoding.Default); File.WriteAllText(wordHtmlFile.FullName, wh, Encoding.UTF8); var wordXml = SaveHtmlAsXml(wordHtmlFile); CompareNumbering(fiPtXml, wordXml); }
public void HW001(string name) { #if false string[] cssFilter = new[] { "text-indent", "margin-left", "margin-right", "padding-left", "padding-right", }; #else string[] cssFilter = null; #endif #if false string[] htmlFilter = new[] { "img", }; #else string[] htmlFilter = null; #endif var sourceHtmlFi = new FileInfo(Path.Combine(TestUtil.SourceDir.FullName, name)); var sourceImageDi = new DirectoryInfo(Path.Combine(TestUtil.SourceDir.FullName, sourceHtmlFi.Name.Replace(".html", "_files"))); var destImageDi = new DirectoryInfo(Path.Combine(TestUtil.TempDir.FullName, sourceImageDi.Name)); var sourceCopiedToDestHtmlFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceHtmlFi.Name.Replace(".html", "-1-Source.html"))); var destCssFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceHtmlFi.Name.Replace(".html", "-2.css"))); var destDocxFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceHtmlFi.Name.Replace(".html", "-3-ConvertedByHtmlToWml.docx"))); var annotatedHtmlFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceHtmlFi.Name.Replace(".html", "-4-Annotated.txt"))); if (!sourceCopiedToDestHtmlFi.Exists) { File.Copy(sourceHtmlFi.FullName, sourceCopiedToDestHtmlFi.FullName); } XElement html = HtmlToWmlReadAsXElement.ReadAsXElement(sourceCopiedToDestHtmlFi); string htmlString = html.ToString(); if (htmlFilter != null && htmlFilter.Any()) { bool found = false; foreach (var item in htmlFilter) { if (htmlString.Contains(item)) { found = true; break; } } if (!found) { sourceCopiedToDestHtmlFi.Delete(); return; } } string usedAuthorCss = HtmlToWmlConverter.CleanUpCss((string)html.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style")); File.WriteAllText(destCssFi.FullName, usedAuthorCss); if (cssFilter != null && cssFilter.Any()) { bool found = false; foreach (var item in cssFilter) { if (usedAuthorCss.Contains(item)) { found = true; break; } } if (!found) { sourceCopiedToDestHtmlFi.Delete(); destCssFi.Delete(); return; } } if (sourceImageDi.Exists) { destImageDi.Create(); foreach (var file in sourceImageDi.GetFiles()) { File.Copy(file.FullName, destImageDi.FullName + "/" + file.Name); } } HtmlToWmlConverterSettings settings = HtmlToWmlConverter.GetDefaultSettings(); // image references in HTML files contain the path to the subdir that contains the images, so base URI is the name of the directory // that contains the HTML files settings.BaseUriForImages = Path.Combine(TestUtil.TempDir.FullName); WmlDocument doc = HtmlToWmlConverter.ConvertHtmlToWml(defaultCss, usedAuthorCss, userCss, html, settings, null, s_ProduceAnnotatedHtml ? annotatedHtmlFi.FullName : null); Assert.NotNull(doc); if (doc != null) { SaveValidateAndFormatMainDocPart(destDocxFi, doc); } #if DO_CONVERSION_VIA_WORD var newAltChunkBeforeFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".html", "-5-AltChunkBefore.docx"))); var newAltChunkAfterFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".html", "-6-ConvertedViaWord.docx"))); WordAutomationUtilities.DoConversionViaWord(newAltChunkBeforeFi, newAltChunkAfterFi, html); #endif }