public void HW004(string name)
        {
            DirectoryInfo sourceDir     = new DirectoryInfo("../../../../TestFiles/");
            var           sourceHtmlFi  = new FileInfo(Path.Combine(sourceDir.FullName, name));
            var           sourceImageDi = new DirectoryInfo(Path.Combine(sourceDir.FullName, sourceHtmlFi.Name.Replace(".html", "_files")));

            var destImageDi = new DirectoryInfo(Path.Combine(TempDir, sourceImageDi.Name));
            var sourceCopiedToDestHtmlFi = new FileInfo(Path.Combine(TempDir, sourceHtmlFi.Name.Replace(".html", "-1-Source.html")));
            var destCssFi       = new FileInfo(Path.Combine(TempDir, sourceHtmlFi.Name.Replace(".html", "-2.css")));
            var destDocxFi      = new FileInfo(Path.Combine(TempDir, sourceHtmlFi.Name.Replace(".html", "-3-ConvertedByHtmlToWml.docx")));
            var annotatedHtmlFi = new FileInfo(Path.Combine(TempDir, sourceHtmlFi.Name.Replace(".html", "-4-Annotated.txt")));

            File.Copy(sourceHtmlFi.FullName, sourceCopiedToDestHtmlFi.FullName);
            XElement html = HtmlToWmlReadAsXElement.ReadAsXElement(sourceCopiedToDestHtmlFi);

            string usedAuthorCss = HtmlToWmlConverter.CleanUpCss((string)html.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style"));

            File.WriteAllText(destCssFi.FullName, usedAuthorCss);

            HtmlToWmlConverterSettings settings = HtmlToWmlConverter.GetDefaultSettings();

            settings.BaseUriForImages = Path.Combine(TempDir);

            Assert.Throws <OpenXmlPowerToolsException>(() => HtmlToWmlConverter.ConvertHtmlToWml(defaultCss, usedAuthorCss, userCss, html, settings, null, s_ProduceAnnotatedHtml ? annotatedHtmlFi.FullName : null));
        }
Пример #2
0
    public static void ConvertToDocx(string file, string destinationDir)
    {
        bool s_ProduceAnnotatedHtml = true;

        var sourceHtmlFi = new FileInfo(file);

        Console.WriteLine("Converting " + sourceHtmlFi.Name);
        var sourceImageDi = new DirectoryInfo(destinationDir);

        var destCssFi       = new FileInfo(Path.Combine(destinationDir, sourceHtmlFi.Name.Replace(".html", ".css")));
        var destDocxFi      = new FileInfo(Path.Combine(destinationDir, sourceHtmlFi.Name.Replace(".html", "-ConvertedByHtmlToWml.docx")));
        var annotatedHtmlFi = new FileInfo(Path.Combine(destinationDir, sourceHtmlFi.Name.Replace(".html", "-Annotated.txt")));

        XElement html = HtmlToWmlReadAsXElement.ReadAsXElement(sourceHtmlFi);

        string usedAuthorCss = HtmlToWmlConverter.CleanUpCss((string)html.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style" && !d.HasAttributes));

        usedAuthorCss = ConvertFontEncode(usedAuthorCss);
        File.WriteAllText(destCssFi.FullName, usedAuthorCss);

        HtmlToWmlConverterSettings settings = HtmlToWmlConverter.GetDefaultSettings();

        // image references in HTML files contain the path to the subdir that contains the images, so base URI is the name of the directory
        // that contains the HTML files
        settings.BaseUriForImages = sourceHtmlFi.DirectoryName;

        WmlDocument doc = HtmlToWmlConverter.ConvertHtmlToWml(defaultCss, usedAuthorCss, userCss, html, settings, null, s_ProduceAnnotatedHtml ? annotatedHtmlFi.FullName : null);

        doc.SaveAs(destDocxFi.FullName);
    }
        public void HW003(string name)
        {
            string testDocPrefix = "HW003_";
            var    sourceHtmlFi  = new FileInfo(Path.Combine(TestUtil.SourceDir.FullName, name));

            var sourceCopiedToDestHtmlFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, (testDocPrefix + sourceHtmlFi.Name).Replace(".html", "-1-Source.html")));
            var destCssFi       = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, (testDocPrefix + sourceHtmlFi.Name).Replace(".html", "-2.css")));
            var destDocxFi      = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, (testDocPrefix + sourceHtmlFi.Name).Replace(".html", "-3-ConvertedByHtmlToWml.docx")));
            var annotatedHtmlFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, (testDocPrefix + sourceHtmlFi.Name).Replace(".html", "-4-Annotated.txt")));

            File.Copy(sourceHtmlFi.FullName, sourceCopiedToDestHtmlFi.FullName);
            XElement html = HtmlToWmlReadAsXElement.ReadAsXElement(sourceCopiedToDestHtmlFi);

            string usedAuthorCss = HtmlToWmlConverter.CleanUpCss((string)html.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style"));

            File.WriteAllText(destCssFi.FullName, usedAuthorCss);

            HtmlToWmlConverterSettings settings = HtmlToWmlConverter.GetDefaultSettings();

            settings.BaseUriForImages          = Path.Combine(TestUtil.TempDir.FullName);
            settings.DefaultBlockContentMargin = "36pt";

            WmlDocument doc = HtmlToWmlConverter.ConvertHtmlToWml(defaultCss, usedAuthorCss, userCss, html, settings, null, s_ProduceAnnotatedHtml ? annotatedHtmlFi.FullName : null);

            Assert.NotNull(doc);
            if (doc != null)
            {
                SaveValidateAndFormatMainDocPart(destDocxFi, doc);
            }
        }
Пример #4
0
        public static void Convert(string htmlFile, string outDir)
        {
            var s_ProduceAnnotatedHtml = true;

            var sourceHtmlFi = new FileInfo(htmlFile);

            var sourceImageDi = new DirectoryInfo(outDir);

            var destCssFi       = new FileInfo(Path.Combine(outDir, sourceHtmlFi.Name.Replace(".html", "-2.css")));
            var destDocxFi      = new FileInfo(Path.Combine(outDir, sourceHtmlFi.Name.Replace(".html", ".docx")));
            var annotatedHtmlFi = new FileInfo(Path.Combine(outDir, sourceHtmlFi.Name.Replace(".html", "-4-Annotated.txt")));

            var html = HtmlToWmlReadAsXElement.ReadAsXElement(sourceHtmlFi);

            var usedAuthorCss = HtmlToWmlConverter.CleanUpCss((string)html.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style"));

            File.WriteAllText(destCssFi.FullName, usedAuthorCss);

            var settings = HtmlToWmlConverter.GetDefaultSettings();

            // image references in HTML files contain the path to the subdir that contains the images, so base URI is the name of the directory
            // that contains the HTML files
            settings.BaseUriForImages = sourceHtmlFi.DirectoryName;

            var doc = HtmlToWmlConverter.ConvertHtmlToWml(defaultCss, usedAuthorCss, userCss, html, settings, null, s_ProduceAnnotatedHtml ? annotatedHtmlFi.FullName : null);

            doc.SaveAs(destDocxFi.FullName);
        }
        public void HW010(string name)
        {
            var sourceDocxFi = new FileInfo(Path.Combine(TestUtil.SourceDir.FullName, name));

            var sourceCopiedToDestDocxFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-2-Source.docx")));
            var sourceCopiedToDestHtmlFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-2-Source.html")));
            var destCssFi       = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-3.css")));
            var destDocxFi      = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-4-ConvertedByHtmlToWml.docx")));
            var annotatedHtmlFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-5-Annotated.txt")));

            File.Copy(sourceDocxFi.FullName, sourceCopiedToDestDocxFi.FullName);

            SaveAsHtmlUsingHtmlConverter(sourceCopiedToDestDocxFi.FullName, sourceCopiedToDestDocxFi.DirectoryName);
            XElement html = HtmlToWmlReadAsXElement.ReadAsXElement(sourceCopiedToDestHtmlFi);

            string usedAuthorCss = HtmlToWmlConverter.CleanUpCss((string)html.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style"));

            File.WriteAllText(destCssFi.FullName, usedAuthorCss);

            var settingsWmlDocument             = new WmlDocument(sourceCopiedToDestDocxFi.FullName);
            HtmlToWmlConverterSettings settings = HtmlToWmlConverter.GetDefaultSettings(settingsWmlDocument);

            // image references in HTML files contain the path to the subdir that contains the images, so base URI is the name of the directory
            // that contains the HTML files
            settings.BaseUriForImages = Path.Combine(TestUtil.TempDir.FullName);

            WmlDocument doc = HtmlToWmlConverter.ConvertHtmlToWml(
                defaultCss,
                usedAuthorCss,
                userCss,
                html,
                settings,
                null,  // use the default EmptyDocument
                s_ProduceAnnotatedHtml ? annotatedHtmlFi.FullName : null);

            Assert.NotNull(doc);

            if (doc != null)
            {
                SaveValidateAndFormatMainDocPart(destDocxFi, doc);
            }

#if DO_CONVERSION_VIA_WORD
            var newAltChunkBeforeFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-5-AltChunkBefore.docx")));
            var newAltChunkAfterFi  = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-6-ConvertedViaWord.docx")));
            WordAutomationUtilities.DoConversionViaWord(newAltChunkBeforeFi, newAltChunkAfterFi, html);
#endif
        }
Пример #6
0
        public string SaveAs(string saveTo)
        {
            XElement xhtml = HtmlToWmlReadAsXElement.ReadAsXElement(new FileInfo(this.absolutePath));

            xhtml.Descendants().Where(i => i.Name.LocalName.ToLower() == "script").Remove();
            string linkCss   = "";
            string headerCss = "";

            try
            {
                var linkStyleSheets = xhtml.Descendants().Where(d => d.Name.LocalName.ToLower() == "link" &&
                                                                d.Attribute("rel").Value.ToLower() == "stylesheet").Select(d => File.ReadAllText(d.Attribute("href").Value));
                linkCss   = HtmlToWmlConverter.CleanUpCss(string.Join("\r\n", linkStyleSheets));
                headerCss = HtmlToWmlConverter.CleanUpCss((string)xhtml.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style"));
                linkCss   = linkCss.Trim('\r', '\n');
            }
            catch (Exception ecss)
            {
                Logger.Error("获取CSS出错", ecss);
            }
            File.WriteAllText(Path.Combine(this.workingDirectory, "dump-link.css"), linkCss);
            File.WriteAllText(Path.Combine(this.workingDirectory, "dump-header.css"), headerCss);
            HtmlToWmlConverterSettings settings = HtmlToWmlConverter.GetDefaultSettings();

            try
            {
                WmlDocument word = HtmlToWmlConverter.ConvertHtmlToWml(defaultCss, linkCss, headerCss, xhtml, settings);
                word.SaveAs(saveTo);
            }
            catch (Exception ew)
            {
                Logger.Error("保存Word出错", ew);
            }
#if !DEBUG
            try
            {
                Directory.Delete(this.workingDirectory, true);
            }
            catch (Exception ex)
            {
                Logger.Error("Delete Directory error. Directory=" + this.workingDirectory, ex);
            }
#endif
            return(saveTo);
        }
        private static void ConvertToDocx(string file, string destinationDir)
        {
            var sourceHtmlFi  = new FileInfo(file);
            var sourceImageDi = new DirectoryInfo(destinationDir);

            var destDocxFi = new FileInfo(Path.Combine(destinationDir, sourceHtmlFi.Name.Replace(".html", "-ConvertedByHtmlToWml.docx")));

            var html = HtmlToWmlReadAsXElement.ReadAsXElement(sourceHtmlFi);

            var usedAuthorCss = HtmlToWmlConverter.CleanUpCss((string)html.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style"));

            var settings = HtmlToWmlConverter.GetDefaultSettings();

            // image references in HTML files contain the path to the subdir that contains the images, so base URI is the name of the directory
            // that contains the HTML files
            settings.BaseUriForImages = sourceHtmlFi.DirectoryName;

            var doc = HtmlToWmlConverter.ConvertHtmlToWml(defaultCss, usedAuthorCss, userCss, html, settings);

            doc.SaveAs(destDocxFi.FullName);
        }
        public void HW001(string name)
        {
            var sourceDir       = new DirectoryInfo("../../../../TestFiles/");
            var sourceHtmlFi    = new FileInfo(Path.Combine(sourceDir.FullName, name));
            var sourceImageDi   = new DirectoryInfo(Path.Combine(sourceDir.FullName, sourceHtmlFi.Name.Replace(".html", "_files")));
            var destCssFi       = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceHtmlFi.Name.Replace(".html", "-2.css")));
            var destDocxFi      = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceHtmlFi.Name.Replace(".html", "-3-ConvertedByHtmlToWml.docx")));
            var annotatedHtmlFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceHtmlFi.Name.Replace(".html", "-4-Annotated.txt")));

            var html          = HtmlToWmlReadAsXElement.ReadAsXElement(sourceHtmlFi);
            var usedAuthorCss = HtmlToWmlConverter.CleanUpCss((string)html.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style"));

            File.WriteAllText(destCssFi.FullName, usedAuthorCss);

            var settings = HtmlToWmlConverter.GetDefaultSettings();

            // image references in HTML files contain the path to the subdir that contains the images, so base URI is the name of the directory that contains the HTML files
            settings.BaseUriForImages = sourceDir.FullName;

            var doc = HtmlToWmlConverter.ConvertHtmlToWml(defaultCss, usedAuthorCss, userCss, html, settings, null, s_ProduceAnnotatedHtml ? annotatedHtmlFi.FullName : null);

            Assert.NotNull(doc);
            SaveValidateAndFormatMainDocPart(destDocxFi, doc);
        }
        public void HW001(string name)
        {
#if false
            string[] cssFilter = new[] {
                "text-indent",
                "margin-left",
                "margin-right",
                "padding-left",
                "padding-right",
            };
#else
            string[] cssFilter = null;
#endif

#if false
            string[] htmlFilter = new[] {
                "img",
            };
#else
            string[] htmlFilter = null;
#endif

            DirectoryInfo sourceDir     = new DirectoryInfo("../../../../TestFiles/");
            var           sourceHtmlFi  = new FileInfo(Path.Combine(sourceDir.FullName, name));
            var           sourceImageDi = new DirectoryInfo(Path.Combine(sourceDir.FullName, sourceHtmlFi.Name.Replace(".html", "_files")));

            var destImageDi = new DirectoryInfo(Path.Combine(TempDir, sourceImageDi.Name));
            var sourceCopiedToDestHtmlFi = new FileInfo(Path.Combine(TempDir, sourceHtmlFi.Name.Replace(".html", "-1-Source.html")));
            var destCssFi       = new FileInfo(Path.Combine(TempDir, sourceHtmlFi.Name.Replace(".html", "-2.css")));
            var destDocxFi      = new FileInfo(Path.Combine(TempDir, sourceHtmlFi.Name.Replace(".html", "-3-ConvertedByHtmlToWml.docx")));
            var annotatedHtmlFi = new FileInfo(Path.Combine(TempDir, sourceHtmlFi.Name.Replace(".html", "-4-Annotated.txt")));

            if (!sourceCopiedToDestHtmlFi.Exists)
            {
                Directory.CreateDirectory(sourceCopiedToDestHtmlFi.DirectoryName);
                File.Copy(sourceHtmlFi.FullName, sourceCopiedToDestHtmlFi.FullName);
            }
            XElement html = HtmlToWmlReadAsXElement.ReadAsXElement(sourceCopiedToDestHtmlFi);

            string htmlString = html.ToString();
            if (htmlFilter != null && htmlFilter.Any())
            {
                bool found = false;
                foreach (var item in htmlFilter)
                {
                    if (htmlString.Contains(item))
                    {
                        found = true;
                        break;
                    }
                }
                if (!found)
                {
                    sourceCopiedToDestHtmlFi.Delete();
                    return;
                }
            }

            string usedAuthorCss = HtmlToWmlConverter.CleanUpCss((string)html.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style"));
            File.WriteAllText(destCssFi.FullName, usedAuthorCss);

            if (cssFilter != null && cssFilter.Any())
            {
                bool found = false;
                foreach (var item in cssFilter)
                {
                    if (usedAuthorCss.Contains(item))
                    {
                        found = true;
                        break;
                    }
                }
                if (!found)
                {
                    sourceCopiedToDestHtmlFi.Delete();
                    destCssFi.Delete();
                    return;
                }
            }

            if (sourceImageDi.Exists)
            {
                destImageDi.Create();
                foreach (var file in sourceImageDi.GetFiles())
                {
                    File.Copy(file.FullName, destImageDi.FullName + "/" + file.Name);
                }
            }

            HtmlToWmlConverterSettings settings = HtmlToWmlConverter.GetDefaultSettings();
            // image references in HTML files contain the path to the subdir that contains the images, so base URI is the name of the directory
            // that contains the HTML files
            settings.BaseUriForImages = Path.Combine(TempDir);

            WmlDocument doc = HtmlToWmlConverter.ConvertHtmlToWml(defaultCss, usedAuthorCss, userCss, html, settings, null, s_ProduceAnnotatedHtml ? annotatedHtmlFi.FullName : null);
            Assert.NotNull(doc);
            if (doc != null)
            {
                SaveValidateAndFormatMainDocPart(destDocxFi, doc);
            }

#if DO_CONVERSION_VIA_WORD
            var newAltChunkBeforeFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".html", "-5-AltChunkBefore.docx")));
            var newAltChunkAfterFi  = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".html", "-6-ConvertedViaWord.docx")));
            WordAutomationUtilities.DoConversionViaWord(newAltChunkBeforeFi, newAltChunkAfterFi, html);
#endif
        }
Пример #10
0
        public static void HTMLToWord(string filepath, string printData)
        {
            bool s_ProduceAnnotatedHtml = true;


            //var destCssFi = new FileInfo(Path.Combine(destinationDir, sourceHtmlFi.Name.Replace(".html", "-2.css")));
            var destDocxFi = new FileInfo(filepath);

            XElement html = HtmlToWmlReadAsXElement.ReadAsXElement(printData);

            string usedAuthorCss = HtmlToWmlConverter.CleanUpCss((string)html.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style"));
            //File.WriteAllText(destCssFi.FullName, usedAuthorCss);

            HtmlToWmlConverterSettings settings = HtmlToWmlConverter.GetDefaultSettings();
            // image references in HTML files contain the path to the subdir that contains the images, so base URI is the name of the directory
            // that contains the HTML files

            string defaultCss = @"html, address,
blockquote,
body, dd, div,
dl, dt, fieldset, form,
frame, frameset,
h1, h2, h3, h4,
h5, h6, noframes,
ol, p, ul, center,
dir, hr, menu, pre { display: block; unicode-bidi: embed }
li { display: list-item }
head { display: none }
table { display: table }
tr { display: table-row }
thead { display: table-header-group }
tbody { display: table-row-group }
tfoot { display: table-footer-group }
col { display: table-column }
colgroup { display: table-column-group }
td, th { display: table-cell }
caption { display: table-caption }
th { font-weight: bolder; text-align: center }
caption { text-align: center }
body { margin: auto; }
h1 { font-size: 2em; margin: auto; }
h2 { font-size: 1.5em; margin: auto; }
h3 { font-size: 1.17em; margin: auto; }
h4, p,
blockquote, ul,
fieldset, form,
ol, dl, dir,
menu { margin: auto }
a { color: blue; }
h5 { font-size: .83em; margin: auto }
h6 { font-size: .75em; margin: auto }
h1, h2, h3, h4,
h5, h6, b,
strong { font-weight: bolder }
blockquote { margin-left: 40px; margin-right: 40px }
i, cite, em,
var, address { font-style: italic }
pre, tt, code,
kbd, samp { font-family: monospace }
pre { white-space: pre }
button, textarea,
input, select { display: inline-block }
big { font-size: 1.17em }
small, sub, sup { font-size: .83em }
sub { vertical-align: sub }
sup { vertical-align: super }
table { border-spacing: 2px; }
thead, tbody,
tfoot { vertical-align: middle }
td, th, tr { vertical-align: inherit }
s, strike, del { text-decoration: line-through }
hr { border: 1px inset }
ol, ul, dir,
menu, dd { margin-left: 40px }
ol { list-style-type: decimal }
ol ul, ul ol,
ul ul, ol ol { margin-top: 0; margin-bottom: 0 }
u, ins { text-decoration: underline }
br:before { content: ""\A""; white-space: pre-line }
center { text-align: center }
:link, :visited { text-decoration: underline }
:focus { outline: thin dotted invert }
/* Begin bidirectionality settings (do not change) */
BDO[DIR=""ltr""] { direction: ltr; unicode-bidi: bidi-override }
BDO[DIR=""rtl""] { direction: rtl; unicode-bidi: bidi-override }
*[DIR=""ltr""] { direction: ltr; unicode-bidi: embed }
*[DIR=""rtl""] { direction: rtl; unicode-bidi: embed }
";

            string userCss = @"";

            WmlDocument doc = HtmlToWmlConverter.ConvertHtmlToWml(defaultCss, usedAuthorCss, userCss, html, settings);

            doc.SaveAs(destDocxFi.FullName);
        }