コード例 #1
ファイル: WordManager.cs プロジェクト: apphost/sscms
        public static (string title, string imageUrl, string body) ConvertToHtml(string docxFilePath, ConverterSettings settings)
            var    title    = string.Empty;
            var    imageUrl = string.Empty;
            string content;
            var    fi = new FileInfo(docxFilePath);

            var byteArray = File.ReadAllBytes(fi.FullName);

            using (var memoryStream = new MemoryStream())
                memoryStream.Write(byteArray, 0, byteArray.Length);
                using (var wDoc = WordprocessingDocument.Open(memoryStream, true))
                    var part = wDoc.CoreFilePropertiesPart;
                    if (part != null)
                        title = (string)part.GetXDocument().Descendants(DC.title).FirstOrDefault();

                    var htmlSettings = new HtmlConverterSettings
                        // AdditionalCss = "body { margin: 1cm auto; max-width: 20cm; padding: 0; }",
                        PageTitle                           = title,
                        FabricateCssClasses                 = true,
                        CssClassPrefix                      = "pt-",
                        RestrictToSupportedLanguages        = false,
                        RestrictToSupportedNumberingFormats = false,
                        ImageHandler                        = imageInfo =>
                            if (settings.IsClearImages || string.IsNullOrEmpty(settings.ImageDirectoryPath))

                            var         extension   = StringUtils.ToLower(imageInfo.ContentType.Split('/')[1]);
                            ImageFormat imageFormat = null;
                            if (extension == "png")
                                imageFormat = ImageFormat.Png;
                            else if (extension == "gif")
                                imageFormat = ImageFormat.Gif;
                            else if (extension == "bmp")
                                imageFormat = ImageFormat.Bmp;
                            else if (extension == "jpeg")
                                imageFormat = ImageFormat.Jpeg;
                            else if (extension == "tiff")
                                // Convert tiff to gif.
                                extension   = "gif";
                                imageFormat = ImageFormat.Gif;
                            else if (extension == "x-wmf")
                                extension   = "wmf";
                                imageFormat = ImageFormat.Wmf;

                            // If the image format isn't one that we expect, ignore it,
                            // and don't return markup for the link.
                            if (imageFormat == null)

                            var imageFileName = StringUtils.GetShortGuid(false) + "." + extension;

                            var imageFilePath = PathUtils.Combine(settings.ImageDirectoryPath, imageFileName);
                                imageInfo.Bitmap.Save(imageFilePath, imageFormat);
                            catch (System.Runtime.InteropServices.ExternalException)
                            var imageSource = PageUtils.Combine(settings.ImageDirectoryUrl, imageFileName);
                            if (string.IsNullOrEmpty(imageUrl))
                                imageUrl = imageSource;

                            var img = new XElement(Xhtml.img,
                                                   new XAttribute(NoNamespace.src, imageSource),
                                                   imageInfo.AltText != null ?
                                                   new XAttribute(NoNamespace.alt, imageInfo.AltText) : null);
                    var htmlElement = HtmlConverter.ConvertToHtml(wDoc, htmlSettings);

                    // Produce HTML document with <!DOCTYPE html > declaration to tell the browser
                    // we are using HTML5.
                    var html = new XDocument(
                        new XDocumentType("html", null, null, null),

                    // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type
                    // XEntity.  PtOpenXmlUtil.cs define the XEntity class.  See
                    // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx
                    // for detailed explanation.
                    // If you further transform the XML tree returned by ConvertToHtmlTransform, you
                    // must do it correctly, or entities will not be serialized properly.

                    var htmlString = html.ToString(SaveOptions.DisableFormatting);
                    var htmlDoc    = new HtmlDocument();
                    var style = htmlDoc.DocumentNode.SelectSingleNode("//style").OuterHtml;
                    var body  = htmlDoc.DocumentNode.SelectSingleNode("//body").InnerHtml;

                    content = $"{style}{Environment.NewLine}{body}";

                    if (settings.IsSaveHtml && !string.IsNullOrEmpty(settings.HtmlDirectoryPath) && DirectoryUtils.IsDirectoryExists(settings.HtmlDirectoryPath))
                        var htmlFilePath = PathUtils.Combine(settings.HtmlDirectoryPath, PathUtils.GetFileNameWithoutExtension(docxFilePath) + ".html");
                        File.WriteAllText(htmlFilePath, htmlString, Encoding.UTF8);

            if (settings.IsFirstLineTitle)
                var contentTitle = RegexUtils.GetInnerContent("p", content);
                contentTitle = StringUtils.StripTags(contentTitle);
                if (!string.IsNullOrEmpty(contentTitle))
                    contentTitle = contentTitle.Trim();
                    contentTitle = contentTitle.Trim(' ', ' ');
                    contentTitle = StringUtils.StripEntities(contentTitle);

                if (!string.IsNullOrEmpty(contentTitle))
                    title = contentTitle;

            if (settings.IsClearFormat)
                content = HtmlUtils.ClearFormat(content);

            if (settings.IsFirstLineIndent)
                content = HtmlUtils.FirstLineIndent(content);

            if (settings.IsClearFontSize)
                content = HtmlUtils.ClearFontSize(content);

            if (settings.IsClearFontFamily)
                content = HtmlUtils.ClearFontFamily(content);

            return(title, imageUrl, content);