public void HW004(string name) { DirectoryInfo sourceDir = new DirectoryInfo("../../../../TestFiles/"); var sourceHtmlFi = new FileInfo(Path.Combine(sourceDir.FullName, name)); var sourceImageDi = new DirectoryInfo(Path.Combine(sourceDir.FullName, sourceHtmlFi.Name.Replace(".html", "_files"))); var destImageDi = new DirectoryInfo(Path.Combine(TempDir, sourceImageDi.Name)); var sourceCopiedToDestHtmlFi = new FileInfo(Path.Combine(TempDir, sourceHtmlFi.Name.Replace(".html", "-1-Source.html"))); var destCssFi = new FileInfo(Path.Combine(TempDir, sourceHtmlFi.Name.Replace(".html", "-2.css"))); var destDocxFi = new FileInfo(Path.Combine(TempDir, sourceHtmlFi.Name.Replace(".html", "-3-ConvertedByHtmlToWml.docx"))); var annotatedHtmlFi = new FileInfo(Path.Combine(TempDir, sourceHtmlFi.Name.Replace(".html", "-4-Annotated.txt"))); File.Copy(sourceHtmlFi.FullName, sourceCopiedToDestHtmlFi.FullName); XElement html = HtmlToWmlReadAsXElement.ReadAsXElement(sourceCopiedToDestHtmlFi); string usedAuthorCss = HtmlToWmlConverter.CleanUpCss((string)html.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style")); File.WriteAllText(destCssFi.FullName, usedAuthorCss); HtmlToWmlConverterSettings settings = HtmlToWmlConverter.GetDefaultSettings(); settings.BaseUriForImages = Path.Combine(TempDir); Assert.Throws <OpenXmlPowerToolsException>(() => HtmlToWmlConverter.ConvertHtmlToWml(defaultCss, usedAuthorCss, userCss, html, settings, null, s_ProduceAnnotatedHtml ? annotatedHtmlFi.FullName : null)); }
public static void ConvertToDocx(string file, string destinationDir) { bool s_ProduceAnnotatedHtml = true; var sourceHtmlFi = new FileInfo(file); Console.WriteLine("Converting " + sourceHtmlFi.Name); var sourceImageDi = new DirectoryInfo(destinationDir); var destCssFi = new FileInfo(Path.Combine(destinationDir, sourceHtmlFi.Name.Replace(".html", ".css"))); var destDocxFi = new FileInfo(Path.Combine(destinationDir, sourceHtmlFi.Name.Replace(".html", "-ConvertedByHtmlToWml.docx"))); var annotatedHtmlFi = new FileInfo(Path.Combine(destinationDir, sourceHtmlFi.Name.Replace(".html", "-Annotated.txt"))); XElement html = HtmlToWmlReadAsXElement.ReadAsXElement(sourceHtmlFi); string usedAuthorCss = HtmlToWmlConverter.CleanUpCss((string)html.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style" && !d.HasAttributes)); usedAuthorCss = ConvertFontEncode(usedAuthorCss); File.WriteAllText(destCssFi.FullName, usedAuthorCss); HtmlToWmlConverterSettings settings = HtmlToWmlConverter.GetDefaultSettings(); // image references in HTML files contain the path to the subdir that contains the images, so base URI is the name of the directory // that contains the HTML files settings.BaseUriForImages = sourceHtmlFi.DirectoryName; WmlDocument doc = HtmlToWmlConverter.ConvertHtmlToWml(defaultCss, usedAuthorCss, userCss, html, settings, null, s_ProduceAnnotatedHtml ? annotatedHtmlFi.FullName : null); doc.SaveAs(destDocxFi.FullName); }
public void HW003(string name) { string testDocPrefix = "HW003_"; var sourceHtmlFi = new FileInfo(Path.Combine(TestUtil.SourceDir.FullName, name)); var sourceCopiedToDestHtmlFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, (testDocPrefix + sourceHtmlFi.Name).Replace(".html", "-1-Source.html"))); var destCssFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, (testDocPrefix + sourceHtmlFi.Name).Replace(".html", "-2.css"))); var destDocxFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, (testDocPrefix + sourceHtmlFi.Name).Replace(".html", "-3-ConvertedByHtmlToWml.docx"))); var annotatedHtmlFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, (testDocPrefix + sourceHtmlFi.Name).Replace(".html", "-4-Annotated.txt"))); File.Copy(sourceHtmlFi.FullName, sourceCopiedToDestHtmlFi.FullName); XElement html = HtmlToWmlReadAsXElement.ReadAsXElement(sourceCopiedToDestHtmlFi); string usedAuthorCss = HtmlToWmlConverter.CleanUpCss((string)html.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style")); File.WriteAllText(destCssFi.FullName, usedAuthorCss); HtmlToWmlConverterSettings settings = HtmlToWmlConverter.GetDefaultSettings(); settings.BaseUriForImages = Path.Combine(TestUtil.TempDir.FullName); settings.DefaultBlockContentMargin = "36pt"; WmlDocument doc = HtmlToWmlConverter.ConvertHtmlToWml(defaultCss, usedAuthorCss, userCss, html, settings, null, s_ProduceAnnotatedHtml ? annotatedHtmlFi.FullName : null); Assert.NotNull(doc); if (doc != null) { SaveValidateAndFormatMainDocPart(destDocxFi, doc); } }
public static void Convert(string htmlFile, string outDir) { var s_ProduceAnnotatedHtml = true; var sourceHtmlFi = new FileInfo(htmlFile); var sourceImageDi = new DirectoryInfo(outDir); var destCssFi = new FileInfo(Path.Combine(outDir, sourceHtmlFi.Name.Replace(".html", "-2.css"))); var destDocxFi = new FileInfo(Path.Combine(outDir, sourceHtmlFi.Name.Replace(".html", ".docx"))); var annotatedHtmlFi = new FileInfo(Path.Combine(outDir, sourceHtmlFi.Name.Replace(".html", "-4-Annotated.txt"))); var html = HtmlToWmlReadAsXElement.ReadAsXElement(sourceHtmlFi); var usedAuthorCss = HtmlToWmlConverter.CleanUpCss((string)html.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style")); File.WriteAllText(destCssFi.FullName, usedAuthorCss); var settings = HtmlToWmlConverter.GetDefaultSettings(); // image references in HTML files contain the path to the subdir that contains the images, so base URI is the name of the directory // that contains the HTML files settings.BaseUriForImages = sourceHtmlFi.DirectoryName; var doc = HtmlToWmlConverter.ConvertHtmlToWml(defaultCss, usedAuthorCss, userCss, html, settings, null, s_ProduceAnnotatedHtml ? annotatedHtmlFi.FullName : null); doc.SaveAs(destDocxFi.FullName); }
public void HW010(string name) { var sourceDocxFi = new FileInfo(Path.Combine(TestUtil.SourceDir.FullName, name)); var sourceCopiedToDestDocxFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-2-Source.docx"))); var sourceCopiedToDestHtmlFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-2-Source.html"))); var destCssFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-3.css"))); var destDocxFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-4-ConvertedByHtmlToWml.docx"))); var annotatedHtmlFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-5-Annotated.txt"))); File.Copy(sourceDocxFi.FullName, sourceCopiedToDestDocxFi.FullName); SaveAsHtmlUsingHtmlConverter(sourceCopiedToDestDocxFi.FullName, sourceCopiedToDestDocxFi.DirectoryName); XElement html = HtmlToWmlReadAsXElement.ReadAsXElement(sourceCopiedToDestHtmlFi); string usedAuthorCss = HtmlToWmlConverter.CleanUpCss((string)html.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style")); File.WriteAllText(destCssFi.FullName, usedAuthorCss); var settingsWmlDocument = new WmlDocument(sourceCopiedToDestDocxFi.FullName); HtmlToWmlConverterSettings settings = HtmlToWmlConverter.GetDefaultSettings(settingsWmlDocument); // image references in HTML files contain the path to the subdir that contains the images, so base URI is the name of the directory // that contains the HTML files settings.BaseUriForImages = Path.Combine(TestUtil.TempDir.FullName); WmlDocument doc = HtmlToWmlConverter.ConvertHtmlToWml( defaultCss, usedAuthorCss, userCss, html, settings, null, // use the default EmptyDocument s_ProduceAnnotatedHtml ? annotatedHtmlFi.FullName : null); Assert.NotNull(doc); if (doc != null) { SaveValidateAndFormatMainDocPart(destDocxFi, doc); } #if DO_CONVERSION_VIA_WORD var newAltChunkBeforeFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-5-AltChunkBefore.docx"))); var newAltChunkAfterFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-6-ConvertedViaWord.docx"))); WordAutomationUtilities.DoConversionViaWord(newAltChunkBeforeFi, newAltChunkAfterFi, html); #endif }
public string SaveAs(string saveTo) { XElement xhtml = HtmlToWmlReadAsXElement.ReadAsXElement(new FileInfo(this.absolutePath)); xhtml.Descendants().Where(i => i.Name.LocalName.ToLower() == "script").Remove(); string linkCss = ""; string headerCss = ""; try { var linkStyleSheets = xhtml.Descendants().Where(d => d.Name.LocalName.ToLower() == "link" && d.Attribute("rel").Value.ToLower() == "stylesheet").Select(d => File.ReadAllText(d.Attribute("href").Value)); linkCss = HtmlToWmlConverter.CleanUpCss(string.Join("\r\n", linkStyleSheets)); headerCss = HtmlToWmlConverter.CleanUpCss((string)xhtml.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style")); linkCss = linkCss.Trim('\r', '\n'); } catch (Exception ecss) { Logger.Error("获取CSS出错", ecss); } File.WriteAllText(Path.Combine(this.workingDirectory, "dump-link.css"), linkCss); File.WriteAllText(Path.Combine(this.workingDirectory, "dump-header.css"), headerCss); HtmlToWmlConverterSettings settings = HtmlToWmlConverter.GetDefaultSettings(); try { WmlDocument word = HtmlToWmlConverter.ConvertHtmlToWml(defaultCss, linkCss, headerCss, xhtml, settings); word.SaveAs(saveTo); } catch (Exception ew) { Logger.Error("保存Word出错", ew); } #if !DEBUG try { Directory.Delete(this.workingDirectory, true); } catch (Exception ex) { Logger.Error("Delete Directory error. Directory=" + this.workingDirectory, ex); } #endif return(saveTo); }
private static void ConvertToDocx(string file, string destinationDir) { var sourceHtmlFi = new FileInfo(file); var sourceImageDi = new DirectoryInfo(destinationDir); var destDocxFi = new FileInfo(Path.Combine(destinationDir, sourceHtmlFi.Name.Replace(".html", "-ConvertedByHtmlToWml.docx"))); var html = HtmlToWmlReadAsXElement.ReadAsXElement(sourceHtmlFi); var usedAuthorCss = HtmlToWmlConverter.CleanUpCss((string)html.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style")); var settings = HtmlToWmlConverter.GetDefaultSettings(); // image references in HTML files contain the path to the subdir that contains the images, so base URI is the name of the directory // that contains the HTML files settings.BaseUriForImages = sourceHtmlFi.DirectoryName; var doc = HtmlToWmlConverter.ConvertHtmlToWml(defaultCss, usedAuthorCss, userCss, html, settings); doc.SaveAs(destDocxFi.FullName); }
public void HW001(string name) { var sourceDir = new DirectoryInfo("../../../../TestFiles/"); var sourceHtmlFi = new FileInfo(Path.Combine(sourceDir.FullName, name)); var sourceImageDi = new DirectoryInfo(Path.Combine(sourceDir.FullName, sourceHtmlFi.Name.Replace(".html", "_files"))); var destCssFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceHtmlFi.Name.Replace(".html", "-2.css"))); var destDocxFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceHtmlFi.Name.Replace(".html", "-3-ConvertedByHtmlToWml.docx"))); var annotatedHtmlFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceHtmlFi.Name.Replace(".html", "-4-Annotated.txt"))); var html = HtmlToWmlReadAsXElement.ReadAsXElement(sourceHtmlFi); var usedAuthorCss = HtmlToWmlConverter.CleanUpCss((string)html.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style")); File.WriteAllText(destCssFi.FullName, usedAuthorCss); var settings = HtmlToWmlConverter.GetDefaultSettings(); // image references in HTML files contain the path to the subdir that contains the images, so base URI is the name of the directory that contains the HTML files settings.BaseUriForImages = sourceDir.FullName; var doc = HtmlToWmlConverter.ConvertHtmlToWml(defaultCss, usedAuthorCss, userCss, html, settings, null, s_ProduceAnnotatedHtml ? annotatedHtmlFi.FullName : null); Assert.NotNull(doc); SaveValidateAndFormatMainDocPart(destDocxFi, doc); }
public void HW001(string name) { #if false string[] cssFilter = new[] { "text-indent", "margin-left", "margin-right", "padding-left", "padding-right", }; #else string[] cssFilter = null; #endif #if false string[] htmlFilter = new[] { "img", }; #else string[] htmlFilter = null; #endif DirectoryInfo sourceDir = new DirectoryInfo("../../../../TestFiles/"); var sourceHtmlFi = new FileInfo(Path.Combine(sourceDir.FullName, name)); var sourceImageDi = new DirectoryInfo(Path.Combine(sourceDir.FullName, sourceHtmlFi.Name.Replace(".html", "_files"))); var destImageDi = new DirectoryInfo(Path.Combine(TempDir, sourceImageDi.Name)); var sourceCopiedToDestHtmlFi = new FileInfo(Path.Combine(TempDir, sourceHtmlFi.Name.Replace(".html", "-1-Source.html"))); var destCssFi = new FileInfo(Path.Combine(TempDir, sourceHtmlFi.Name.Replace(".html", "-2.css"))); var destDocxFi = new FileInfo(Path.Combine(TempDir, sourceHtmlFi.Name.Replace(".html", "-3-ConvertedByHtmlToWml.docx"))); var annotatedHtmlFi = new FileInfo(Path.Combine(TempDir, sourceHtmlFi.Name.Replace(".html", "-4-Annotated.txt"))); if (!sourceCopiedToDestHtmlFi.Exists) { Directory.CreateDirectory(sourceCopiedToDestHtmlFi.DirectoryName); File.Copy(sourceHtmlFi.FullName, sourceCopiedToDestHtmlFi.FullName); } XElement html = HtmlToWmlReadAsXElement.ReadAsXElement(sourceCopiedToDestHtmlFi); string htmlString = html.ToString(); if (htmlFilter != null && htmlFilter.Any()) { bool found = false; foreach (var item in htmlFilter) { if (htmlString.Contains(item)) { found = true; break; } } if (!found) { sourceCopiedToDestHtmlFi.Delete(); return; } } string usedAuthorCss = HtmlToWmlConverter.CleanUpCss((string)html.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style")); File.WriteAllText(destCssFi.FullName, usedAuthorCss); if (cssFilter != null && cssFilter.Any()) { bool found = false; foreach (var item in cssFilter) { if (usedAuthorCss.Contains(item)) { found = true; break; } } if (!found) { sourceCopiedToDestHtmlFi.Delete(); destCssFi.Delete(); return; } } if (sourceImageDi.Exists) { destImageDi.Create(); foreach (var file in sourceImageDi.GetFiles()) { File.Copy(file.FullName, destImageDi.FullName + "/" + file.Name); } } HtmlToWmlConverterSettings settings = HtmlToWmlConverter.GetDefaultSettings(); // image references in HTML files contain the path to the subdir that contains the images, so base URI is the name of the directory // that contains the HTML files settings.BaseUriForImages = Path.Combine(TempDir); WmlDocument doc = HtmlToWmlConverter.ConvertHtmlToWml(defaultCss, usedAuthorCss, userCss, html, settings, null, s_ProduceAnnotatedHtml ? annotatedHtmlFi.FullName : null); Assert.NotNull(doc); if (doc != null) { SaveValidateAndFormatMainDocPart(destDocxFi, doc); } #if DO_CONVERSION_VIA_WORD var newAltChunkBeforeFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".html", "-5-AltChunkBefore.docx"))); var newAltChunkAfterFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".html", "-6-ConvertedViaWord.docx"))); WordAutomationUtilities.DoConversionViaWord(newAltChunkBeforeFi, newAltChunkAfterFi, html); #endif }
public static void HTMLToWord(string filepath, string printData) { bool s_ProduceAnnotatedHtml = true; //var destCssFi = new FileInfo(Path.Combine(destinationDir, sourceHtmlFi.Name.Replace(".html", "-2.css"))); var destDocxFi = new FileInfo(filepath); XElement html = HtmlToWmlReadAsXElement.ReadAsXElement(printData); string usedAuthorCss = HtmlToWmlConverter.CleanUpCss((string)html.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style")); //File.WriteAllText(destCssFi.FullName, usedAuthorCss); HtmlToWmlConverterSettings settings = HtmlToWmlConverter.GetDefaultSettings(); // image references in HTML files contain the path to the subdir that contains the images, so base URI is the name of the directory // that contains the HTML files string defaultCss = @"html, address, blockquote, body, dd, div, dl, dt, fieldset, form, frame, frameset, h1, h2, h3, h4, h5, h6, noframes, ol, p, ul, center, dir, hr, menu, pre { display: block; unicode-bidi: embed } li { display: list-item } head { display: none } table { display: table } tr { display: table-row } thead { display: table-header-group } tbody { display: table-row-group } tfoot { display: table-footer-group } col { display: table-column } colgroup { display: table-column-group } td, th { display: table-cell } caption { display: table-caption } th { font-weight: bolder; text-align: center } caption { text-align: center } body { margin: auto; } h1 { font-size: 2em; margin: auto; } h2 { font-size: 1.5em; margin: auto; } h3 { font-size: 1.17em; margin: auto; } h4, p, blockquote, ul, fieldset, form, ol, dl, dir, menu { margin: auto } a { color: blue; } h5 { font-size: .83em; margin: auto } h6 { font-size: .75em; margin: auto } h1, h2, h3, h4, h5, h6, b, strong { font-weight: bolder } blockquote { margin-left: 40px; margin-right: 40px } i, cite, em, var, address { font-style: italic } pre, tt, code, kbd, samp { font-family: monospace } pre { white-space: pre } button, textarea, input, select { display: inline-block } big { font-size: 1.17em } small, sub, sup { font-size: .83em } sub { vertical-align: sub } sup { vertical-align: super } table { border-spacing: 2px; } thead, tbody, tfoot { vertical-align: middle } td, th, tr { vertical-align: inherit } s, strike, del { text-decoration: line-through } hr { border: 1px inset } ol, ul, dir, menu, dd { margin-left: 40px } ol { list-style-type: decimal } ol ul, ul ol, ul ul, ol ol { margin-top: 0; margin-bottom: 0 } u, ins { text-decoration: underline } br:before { content: ""\A""; white-space: pre-line } center { text-align: center } :link, :visited { text-decoration: underline } :focus { outline: thin dotted invert } /* Begin bidirectionality settings (do not change) */ BDO[DIR=""ltr""] { direction: ltr; unicode-bidi: bidi-override } BDO[DIR=""rtl""] { direction: rtl; unicode-bidi: bidi-override } *[DIR=""ltr""] { direction: ltr; unicode-bidi: embed } *[DIR=""rtl""] { direction: rtl; unicode-bidi: embed } "; string userCss = @""; WmlDocument doc = HtmlToWmlConverter.ConvertHtmlToWml(defaultCss, usedAuthorCss, userCss, html, settings); doc.SaveAs(destDocxFi.FullName); }