Exemple #1
0
        private static async Task ConvertToHtml(FileInfo sourceDocx, FileInfo destFileName, WmlToHtmlConverterSettings settings, int expectedPixeNoise)
        {
            var byteArray             = File.ReadAllBytes(sourceDocx.FullName);
            var expectedRenderdResult = Path.Combine(sourceDocx.Directory.FullName, sourceDocx.Name + "Expectation.png");

            using var memoryStream = new MemoryStream();
            memoryStream.Write(byteArray, 0, byteArray.Length);
            using var wDoc = WordprocessingDocument.Open(memoryStream, true);
            var outputDirectory = destFileName.Directory;

            destFileName = new FileInfo(Path.Combine(outputDirectory.FullName, destFileName.Name));

            var html = WmlToHtmlConverter.ConvertToHtml(wDoc, settings);

            // Note: the XHTML returned by ConvertToHtmlTransform contains objects of type XEntity. PtOpenXmlUtil.cs define the XEntity class. See http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx for detailed explanation. If you further transform the XML tree returned by ConvertToHtmlTransform, you must do it correctly, or entities will not be serialized properly.

            var htmlString = html.ToString(SaveOptions.DisableFormatting);

            File.WriteAllText(destFileName.FullName, htmlString, Encoding.UTF8);

            if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
            {
                return;
            }

            await AssertRenderedHtmlIsEqual(destFileName.FullName, expectedRenderdResult, expectedPixeNoise);
        }
    public static void ConvertToHtml(string file, string outputDirectory)
    {
        var fi = new FileInfo(file);

        Console.WriteLine(fi.Name);
        var byteArray = File.ReadAllBytes(fi.FullName);

        using var memoryStream = new MemoryStream();
        memoryStream.Write(byteArray, 0, byteArray.Length);
        using var wDoc = WordprocessingDocument.Open(memoryStream, true);
        var destFileName = new FileInfo(fi.Name.Replace(".docx", ".html"));

        if (outputDirectory != null && outputDirectory != string.Empty)
        {
            var di = new DirectoryInfo(outputDirectory);
            if (!di.Exists)
            {
                throw new OpenXmlPowerToolsException("Output directory does not exist");
            }
            destFileName = new FileInfo(Path.Combine(di.FullName, destFileName.Name));
        }
        var imageDirectoryName = destFileName.FullName.Substring(0, destFileName.FullName.Length - 5) + "_files";

        var pageTitle = fi.FullName;
        var part      = wDoc.CoreFilePropertiesPart;

        if (part != null)
        {
            pageTitle = (string)part.GetXDocument().Descendants(DC.title).FirstOrDefault() ?? fi.FullName;
        }

        // TODO: Determine max-width from size of content area.
        var settings = new WmlToHtmlConverterSettings(pageTitle);

        var htmlElement = WmlToHtmlConverter.ConvertToHtml(wDoc, settings);

        // Produce HTML document with <!DOCTYPE html > declaration to tell the browser
        // we are using HTML5.
        var html = new XDocument(
            new XDocumentType("html", null, null, null),
            htmlElement);

        // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type
        // XEntity.  PtOpenXmlUtil.cs define the XEntity class.  See
        // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx
        // for detailed explanation.
        //
        // If you further transform the XML tree returned by ConvertToHtmlTransform, you
        // must do it correctly, or entities will not be serialized properly.

        var htmlString = html.ToString(SaveOptions.DisableFormatting);

        File.WriteAllText(destFileName.FullName, htmlString, Encoding.UTF8);
    }
Exemple #3
0
        private string ParseDOCX(FileInfo fileInfo, Func <ImageInfo, XElement> imageHandler)
        {
            byte[] byteArray;

            try
            {
                byteArray = File.ReadAllBytes(fileInfo.FullName);
            }
            catch
            {
                throw new Exception("Файл недоступен. Возможно, он открыт в другой программе.");
            }


            using (MemoryStream memoryStream = new MemoryStream())
            {
                memoryStream.Write(byteArray, 0, byteArray.Length);

                using (WordprocessingDocument wDoc = WordprocessingDocument.Open(memoryStream, true))
                {
                    //int imageCounter = 0;

                    var pageTitle = fileInfo.FullName;
                    var part      = wDoc.CoreFilePropertiesPart;
                    if (part != null)
                    {
                        pageTitle = (string)part.GetXDocument().Descendants(DC.title).FirstOrDefault() ?? fileInfo.FullName;
                    }

                    WmlToHtmlConverterSettings settings = new WmlToHtmlConverterSettings()
                    {
                        //AdditionalCss = "body { margin: 1cm auto; max-width: 20cm; padding: 0; }",
                        //PageTitle = pageTitle,
                        FabricateCssClasses = false,
                        //CssClassPrefix = "pt-",
                        //RestrictToSupportedLanguages = false,
                        //RestrictToSupportedNumberingFormats = false,
                        ImageHandler = imageHandler
                    };

                    XElement htmlElement = WmlToHtmlConverter.ConvertToHtml(wDoc, settings);

                    var html       = new XDocument(new XDocumentType("html", null, null, null), htmlElement);
                    var htmlString = html.ToString(SaveOptions.DisableFormatting);
                    return(htmlString);
                }
            }
        }
        private static async Task <Stream> ConvertToHtmlInternalAsync(Stream sourceOpenXml, string fallbackPageTitle, IImageHandler imageHandler, bool useWebSafeFonts)
        {
            using var memoryStream = new MemoryStream();
            await sourceOpenXml.CopyToAsync(memoryStream).ConfigureAwait(false);

            sourceOpenXml = memoryStream;

            using var wordProcessingDocument = WordprocessingDocument.Open(sourceOpenXml, true);
            var coreFilePropertiesPart = wordProcessingDocument.CoreFilePropertiesPart;
            var computedPageTitle      = coreFilePropertiesPart?.GetXDocument().Descendants(DC.title).FirstOrDefault();
            var pageTitle = string.IsNullOrEmpty(computedPageTitle?.Value) ? fallbackPageTitle : computedPageTitle !.Value;

            var htmlElement      = WmlToHtmlConverter.ConvertToHtml(wordProcessingDocument, CreateHtmlConverterSettings(pageTitle, imageHandler, useWebSafeFonts ? new WebSafeFontsHandler() : new FontHandler()));
            var html             = new XDocument(new XDocumentType("html", "-//W3C//DTD XHTML 1.1//EN", "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd", null), htmlElement);
            var memoryStreamHtml = new MemoryStream();

            html.Save(memoryStreamHtml, SaveOptions.DisableFormatting);
            memoryStreamHtml.Position = 0;
            return(memoryStreamHtml);
        }
        private string ConvertDocxToHtml(Stream inputDoc)
        {
            // convert Stream to a memory stream
            using (var memStream = new MemoryStream())
            {
                inputDoc.CopyTo(memStream);

                // open Word document stream
                using (WordprocessingDocument doc =
                           WordprocessingDocument.Open(memStream, true))
                {
                    // remove unnecessary markup
                    SimplifyMarkupSettings settings = new SimplifyMarkupSettings
                    {
                        AcceptRevisions                   = true,
                        NormalizeXml                      = true,
                        RemoveBookmarks                   = true,
                        RemoveComments                    = true,
                        RemoveContentControls             = true,
                        RemoveEndAndFootNotes             = true,
                        RemoveFieldCodes                  = true,
                        RemoveGoBackBookmark              = true,
                        RemoveHyperlinks                  = false,
                        RemoveLastRenderedPageBreak       = true,
                        RemoveMarkupForDocumentComparison = true,
                        RemovePermissions                 = true,
                        RemoveProof           = true,
                        RemoveRsidInfo        = true,
                        RemoveSmartTags       = true,
                        RemoveSoftHyphens     = true,
                        RemoveWebHidden       = true,
                        ReplaceTabsWithSpaces = true
                    };
                    MarkupSimplifier.SimplifyMarkup(doc, settings);

                    // export to html
                    return(WmlToHtmlConverter.ConvertToHtml(doc, new WmlToHtmlConverterSettings()).ToString());
                }
            }
        }
Exemple #6
0
        public static void ConvertToHtml(string file, string outputDirectory)
        {
            var fi        = new FileInfo(file);
            var byteArray = File.ReadAllBytes(fi.FullName);

            using var memoryStream = new MemoryStream();
            memoryStream.Write(byteArray, 0, byteArray.Length);
            using var wDoc = WordprocessingDocument.Open(memoryStream, true);
            var destFileName = new FileInfo(fi.Name.Replace(".docx", ".html"));

            if (outputDirectory != null && outputDirectory != string.Empty)
            {
                var di = new DirectoryInfo(outputDirectory);
                if (!di.Exists)
                {
                    throw new OpenXmlPowerToolsException("Output directory does not exist");
                }
                destFileName = new FileInfo(Path.Combine(di.FullName, destFileName.Name));
            }
            var pageTitle = (string)wDoc.CoreFilePropertiesPart.GetXDocument().Descendants(DC.title).FirstOrDefault();

            if (pageTitle == null)
            {
                pageTitle = fi.FullName;
            }

            var settings = new WmlToHtmlConverterSettings(pageTitle);

            var html = WmlToHtmlConverter.ConvertToHtml(wDoc, settings);

            // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type XEntity.  PtOpenXmlUtil.cs define the XEntity class.  See http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx for detailed explanation.
            // If you further transform the XML tree returned by ConvertToHtmlTransform, you must do it correctly, or entities will not be serialized properly.

            var htmlString = html.ToString(SaveOptions.DisableFormatting);

            File.WriteAllText(destFileName.FullName, htmlString, Encoding.UTF8);
        }
Exemple #7
0
        private static string ParseDOCX(FileInfo fileInfo)
        {
            try
            {
                byte[] byteArray = File.ReadAllBytes(fileInfo.FullName);
                using (MemoryStream memoryStream = new MemoryStream())
                {
                    memoryStream.Write(byteArray, 0, byteArray.Length);
                    using (WordprocessingDocument wDoc =
                               WordprocessingDocument.Open(memoryStream, true))
                    {
                        int imageCounter = 0;
                        var pageTitle    = fileInfo.FullName;
                        var part         = wDoc.CoreFilePropertiesPart;
                        if (part != null)
                        {
                            pageTitle = (string)part.GetXDocument()
                                        .Descendants(DC.title)
                                        .FirstOrDefault() ?? fileInfo.FullName;
                        }

                        WmlToHtmlConverterSettings settings = new WmlToHtmlConverterSettings()
                        {
                            AdditionalCss                       = "body { margin: 1cm auto; max-width: 20cm; padding: 0; }",
                            PageTitle                           = pageTitle,
                            FabricateCssClasses                 = true,
                            CssClassPrefix                      = "pt-",
                            RestrictToSupportedLanguages        = false,
                            RestrictToSupportedNumberingFormats = false,
                            ImageHandler                        = imageInfo =>
                            {
                                ++imageCounter;
                                string      extension   = imageInfo.ContentType.Split('/')[1].ToLower();
                                ImageFormat imageFormat = null;
                                if (extension == "png")
                                {
                                    imageFormat = ImageFormat.Png;
                                }
                                else if (extension == "gif")
                                {
                                    imageFormat = ImageFormat.Gif;
                                }
                                else if (extension == "bmp")
                                {
                                    imageFormat = ImageFormat.Bmp;
                                }
                                else if (extension == "jpeg")
                                {
                                    imageFormat = ImageFormat.Jpeg;
                                }
                                else if (extension == "tiff")
                                {
                                    extension   = "gif";
                                    imageFormat = ImageFormat.Gif;
                                }
                                else if (extension == "x-wmf")
                                {
                                    extension   = "wmf";
                                    imageFormat = ImageFormat.Wmf;
                                }

                                if (imageFormat == null)
                                {
                                    return(null);
                                }

                                string base64 = null;
                                try
                                {
                                    using (MemoryStream ms = new MemoryStream())
                                    {
                                        imageInfo.Bitmap.Save(ms, imageFormat);
                                        var ba = ms.ToArray();
                                        base64 = System.Convert.ToBase64String(ba);
                                    }
                                }
                                catch (System.Runtime.InteropServices.ExternalException)
                                { return(null); }

                                ImageFormat    format = imageInfo.Bitmap.RawFormat;
                                ImageCodecInfo codec  = ImageCodecInfo.GetImageDecoders()
                                                        .First(c => c.FormatID == format.Guid);
                                string mimeType = codec.MimeType;

                                string imageSource =
                                    string.Format("data:{0};base64,{1}", mimeType, base64);

                                XElement img = new XElement(Xhtml.img,
                                                            new XAttribute(NoNamespace.src, imageSource),
                                                            imageInfo.ImgStyleAttribute,
                                                            imageInfo.AltText != null ?
                                                            new XAttribute(NoNamespace.alt, imageInfo.AltText) : null);
                                return(img);
                            }
                        };

                        XElement htmlElement = WmlToHtmlConverter.ConvertToHtml(wDoc, settings);
                        var      html        = new XDocument(new XDocumentType("html", null, null, null),
                                                             htmlElement);
                        var htmlString = html.ToString(SaveOptions.DisableFormatting);
                        return(htmlString);
                    }
                }
            }
            catch
            {
                return("File contains corrupt data");
            }
        }
        public static void WordToHtml(string file, string outputDirectory)
        {
            var fi = new FileInfo(file);

            Console.WriteLine(fi.Name);
            byte[] byteArray = File.ReadAllBytes(fi.FullName);
            using (MemoryStream memoryStream = new MemoryStream())
            {
                memoryStream.Write(byteArray, 0, byteArray.Length);
                using (WordprocessingDocument wDoc = WordprocessingDocument.Open(memoryStream, true))
                {
                    var destFileName = new FileInfo(fi.Name.Replace(".docx", ".html"));
                    if (outputDirectory != null && outputDirectory != string.Empty)
                    {
                        DirectoryInfo di = new DirectoryInfo(outputDirectory);
                        if (!di.Exists)
                        {
                            throw new OpenXmlPowerToolsException("Output directory does not exist");
                        }
                        destFileName = new FileInfo(Path.Combine(di.FullName, destFileName.Name));
                    }
                    var imageDirectoryName = destFileName.FullName.Substring(0, destFileName.FullName.Length - 5) + "_files";
                    int imageCounter       = 0;

                    var pageTitle = fi.FullName;
                    var part      = wDoc.CoreFilePropertiesPart;
                    if (part != null)
                    {
                        pageTitle = (string)part.GetXDocument().Descendants(DC.title).FirstOrDefault() ?? fi.FullName;
                    }

                    // TODO: Determine max-width from size of content area.
                    WmlToHtmlConverterSettings settings = new WmlToHtmlConverterSettings()
                    {
                        AdditionalCss                       = "body { margin: 1cm auto; max-width: 20cm; padding: 0; }",
                        PageTitle                           = pageTitle,
                        FabricateCssClasses                 = true,
                        CssClassPrefix                      = "pt-",
                        RestrictToSupportedLanguages        = false,
                        RestrictToSupportedNumberingFormats = false,
                        ImageHandler                        = imageInfo =>
                        {
                            DirectoryInfo localDirInfo = new DirectoryInfo(imageDirectoryName);
                            if (!localDirInfo.Exists)
                            {
                                localDirInfo.Create();
                            }
                            ++imageCounter;
                            string      extension   = imageInfo.ContentType.Split('/')[1].ToLower();
                            ImageFormat imageFormat = null;
                            if (extension == "png")
                            {
                                imageFormat = ImageFormat.Png;
                            }
                            else if (extension == "gif")
                            {
                                imageFormat = ImageFormat.Gif;
                            }
                            else if (extension == "bmp")
                            {
                                imageFormat = ImageFormat.Bmp;
                            }
                            else if (extension == "jpeg")
                            {
                                imageFormat = ImageFormat.Jpeg;
                            }
                            else if (extension == "tiff")
                            {
                                // Convert tiff to gif.
                                extension   = "gif";
                                imageFormat = ImageFormat.Gif;
                            }
                            else if (extension == "x-wmf")
                            {
                                extension   = "wmf";
                                imageFormat = ImageFormat.Wmf;
                            }

                            // If the image format isn't one that we expect, ignore it,
                            // and don't return markup for the link.
                            if (imageFormat == null)
                            {
                                return(null);
                            }

                            string imageFileName = imageDirectoryName + "/image" +
                                                   imageCounter.ToString() + "." + extension;
                            try
                            {
                                imageInfo.Bitmap.Save(imageFileName, imageFormat);
                            }
                            catch (System.Runtime.InteropServices.ExternalException)
                            {
                                return(null);
                            }
                            string imageSource = localDirInfo.Name + "/image" +
                                                 imageCounter.ToString() + "." + extension;

                            XElement img = new XElement(Xhtml.img,
                                                        new XAttribute(NoNamespace.src, imageSource),
                                                        imageInfo.ImgStyleAttribute,
                                                        imageInfo.AltText != null ?
                                                        new XAttribute(NoNamespace.alt, imageInfo.AltText) : null);
                            return(img);
                        }
                    };
                    XElement htmlElement = WmlToHtmlConverter.ConvertToHtml(wDoc, settings);

                    // Produce HTML document with <!DOCTYPE html > declaration to tell the browser
                    // we are using HTML5.
                    var html = new XDocument(
                        new XDocumentType("html", null, null, null),
                        htmlElement);

                    // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type
                    // XEntity.  PtOpenXmlUtil.cs define the XEntity class.  See
                    // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx
                    // for detailed explanation.
                    //
                    // If you further transform the XML tree returned by ConvertToHtmlTransform, you
                    // must do it correctly, or entities will not be serialized properly.

                    var htmlString = html.ToString(SaveOptions.DisableFormatting);
                    File.WriteAllText(destFileName.FullName, htmlString, Encoding.UTF8);
                }
            }
        }
        public static void ConvertToHtmlNoCssClasses(FileInfo sourceDocx, FileInfo destFileName)
        {
            byte[] byteArray = File.ReadAllBytes(sourceDocx.FullName);
            using (MemoryStream memoryStream = new MemoryStream())
            {
                memoryStream.Write(byteArray, 0, byteArray.Length);
                using (WordprocessingDocument wDoc = WordprocessingDocument.Open(memoryStream, true))
                {
                    var outputDirectory = destFileName.Directory;
                    destFileName = new FileInfo(Path.Combine(outputDirectory.FullName, destFileName.Name));
                    var imageDirectoryName = destFileName.FullName.Substring(0, destFileName.FullName.Length - 5) + "_files";
                    int imageCounter       = 0;
                    var pageTitle          = (string)wDoc.CoreFilePropertiesPart.GetXDocument().Descendants(DC.title).FirstOrDefault();
                    if (pageTitle == null)
                    {
                        pageTitle = sourceDocx.FullName;
                    }

                    WmlToHtmlConverterSettings settings = new WmlToHtmlConverterSettings()
                    {
                        PageTitle                           = pageTitle,
                        FabricateCssClasses                 = false,
                        RestrictToSupportedLanguages        = false,
                        RestrictToSupportedNumberingFormats = false,
                        ImageHandler                        = imageInfo =>
                        {
                            DirectoryInfo localDirInfo = new DirectoryInfo(imageDirectoryName);
                            if (!localDirInfo.Exists)
                            {
                                localDirInfo.Create();
                            }
                            ++imageCounter;
                            string      extension   = imageInfo.ContentType.Split('/')[1].ToLower();
                            ImageFormat imageFormat = null;
                            if (extension == "png")
                            {
                                // Convert png to jpeg.
                                extension   = "gif";
                                imageFormat = ImageFormat.Gif;
                            }
                            else if (extension == "gif")
                            {
                                imageFormat = ImageFormat.Gif;
                            }
                            else if (extension == "bmp")
                            {
                                imageFormat = ImageFormat.Bmp;
                            }
                            else if (extension == "jpeg")
                            {
                                imageFormat = ImageFormat.Jpeg;
                            }
                            else if (extension == "tiff")
                            {
                                // Convert tiff to gif.
                                extension   = "gif";
                                imageFormat = ImageFormat.Gif;
                            }
                            else if (extension == "x-wmf")
                            {
                                extension   = "wmf";
                                imageFormat = ImageFormat.Wmf;
                            }

                            // If the image format isn't one that we expect, ignore it,
                            // and don't return markup for the link.
                            if (imageFormat == null)
                            {
                                return(null);
                            }

                            string imageFileName = imageDirectoryName + "/image" +
                                                   imageCounter.ToString() + "." + extension;
                            try
                            {
                                imageInfo.Bitmap.Save(imageFileName, imageFormat);
                            }
                            catch (System.Runtime.InteropServices.ExternalException)
                            {
                                return(null);
                            }
                            XElement img = new XElement(Xhtml.img,
                                                        new XAttribute(NoNamespace.src, imageFileName),
                                                        imageInfo.ImgStyleAttribute,
                                                        imageInfo.AltText != null ?
                                                        new XAttribute(NoNamespace.alt, imageInfo.AltText) : null);
                            return(img);
                        }
                    };
                    XElement html = WmlToHtmlConverter.ConvertToHtml(wDoc, settings);

                    // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type
                    // XEntity.  PtOpenXmlUtil.cs define the XEntity class.  See
                    // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx
                    // for detailed explanation.
                    //
                    // If you further transform the XML tree returned by ConvertToHtmlTransform, you
                    // must do it correctly, or entities will not be serialized properly.

                    var htmlString = html.ToString(SaveOptions.DisableFormatting);
                    File.WriteAllText(destFileName.FullName, htmlString, Encoding.UTF8);
                }
            }
        }
Exemple #10
0
        // https://github.com/OfficeDev/Open-Xml-PowerTools/blob/2f9134bd5abe0547fcf3d803b40b1401d6e58020/OpenXmlPowerToolsExamples/HtmlConverter01/HtmlConverter01.cs
        private static HtmlConversion ConvertToHtml(WordprocessingDocument wDoc, string pageTitle, string documentName = null)
        {
            var htmlConversion = new HtmlConversion();
            int imageCounter   = 0;
            var part           = wDoc.CoreFilePropertiesPart;

            if (part != null)
            {
                pageTitle = (string)part.GetXDocument().Descendants(DC.title).FirstOrDefault() ?? pageTitle;
            }
            // TODO: Determine max-width from size of content area.
            var settings = new WmlToHtmlConverterSettings()
            {
                AdditionalCss                       = "body { margin: 1cm auto; max-width: 20cm; padding: 0; }",
                PageTitle                           = pageTitle,
                FabricateCssClasses                 = true,
                CssClassPrefix                      = "pt-",
                RestrictToSupportedLanguages        = false,
                RestrictToSupportedNumberingFormats = false,
                ImageHandler                        = imageInfo =>
                {
                    ++imageCounter;
                    string      extension   = imageInfo.ContentType.Split('/')[1].ToLower();
                    ImageFormat imageFormat = null;
                    if (extension == "png")
                    {
                        imageFormat = ImageFormat.Png;
                    }
                    else if (extension == "gif")
                    {
                        imageFormat = ImageFormat.Gif;
                    }
                    else if (extension == "bmp")
                    {
                        imageFormat = ImageFormat.Bmp;
                    }
                    else if (extension == "jpeg")
                    {
                        imageFormat = ImageFormat.Jpeg;
                    }
                    else if (extension == "tiff")
                    {
                        // Convert tiff to gif.
                        extension   = "gif";
                        imageFormat = ImageFormat.Gif;
                    }
                    else if (extension == "x-wmf")
                    {
                        extension   = "wmf";
                        imageFormat = ImageFormat.Wmf;
                    }

                    // If the image format isn't one that we expect, ignore it,
                    // and don't return markup for the link.
                    if (imageFormat == null)
                    {
                        return(null);
                    }

                    // Return image buffers only when template is converted
                    string imageFilename = $"image{imageCounter}.{extension}";
                    string imageUrl      = $"./{pageTitle}/{imageFilename}";
                    if (documentName == null)
                    {
                        try
                        {
                            using var ms = new MemoryStream();
                            imageInfo.Bitmap.Save(ms, imageFormat);
                            htmlConversion.Images.Add(imageFilename, ms.ToArray());
                        }
                        catch (System.Runtime.InteropServices.ExternalException)
                        {
                            return(null);
                        }
                    }

                    XElement img = new XElement(Xhtml.img,
                                                new XAttribute(NoNamespace.src, imageUrl),
                                                imageInfo.ImgStyleAttribute,
                                                imageInfo.AltText != null ?
                                                new XAttribute(NoNamespace.alt, imageInfo.AltText) : null);
                    return(img);
                }
            };

            XElement htmlElement = WmlToHtmlConverter.ConvertToHtml(wDoc, settings);

            // Produce HTML document with <!DOCTYPE html > declaration to tell the browser
            // we are using HTML5.
            var html = new XDocument(
                new XDocumentType("html", null, null, null),
                htmlElement);

            // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type
            // XEntity.  PtOpenXmlUtil.cs define the XEntity class.  See
            // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx
            // for detailed explanation.
            //
            // If you further transform the XML tree returned by ConvertToHtmlTransform, you
            // must do it correctly, or entities will not be serialized properly.

            var htmlString = html.ToString(SaveOptions.DisableFormatting);

            htmlConversion.Html = htmlString;
            return(htmlConversion);
        }
    }
Exemple #11
0
        static void Main(string[] args)
        {
            if (args.Length != 2)
            {
                Console.Error.WriteLine("Usage: WordToHtmlConverter.exe <input filename> <output filename>");
                Environment.Exit(1);
            }

            WmlToHtmlConverterSettings settings = new WmlToHtmlConverterSettings();
            XElement e = WmlToHtmlConverter.ConvertToHtml(new WmlDocument(args[0]), settings);

            XmlWriterSettings s = new XmlWriterSettings();

            s.ConformanceLevel   = ConformanceLevel.Fragment;
            s.OmitXmlDeclaration = true;
            s.Encoding           = new System.Text.UTF8Encoding(false);
            XmlWriter writer = XmlWriter.Create(args[1], s);

            XElement style     = e.Element(XhtmlNoNamespace.body).Element(XhtmlNoNamespace.style);
            XElement article   = e.Element(XhtmlNoNamespace.body).Element(XhtmlNoNamespace.div);
            XElement footnotes = article.ElementsAfterSelf().Last();

            // The scoped attribute is a "boolean attribute," so it's not supposed to have a value.
            // We have to write it manually to accomplish that.
            writer.WriteRaw("<style scoped>");
            writer.WriteValue(style.Value);
            writer.WriteRaw("</style>");

            bool wroteBreak = false;

            writer.WriteStartElement("div");
            foreach (XElement n in article.Elements())
            {
                if (!wroteBreak)
                {
                    if (n.Name == XhtmlNoNamespace.p && (n.Attribute("class").Value == "pt-SubHead1" || n.Attribute("class").Value == "pt-Document"))
                    {
                        writer.WriteStartElement("p");
                        writer.WriteAttributeString("style", "text-align:center;");
                        writer.WriteStartElement("a");
                        writer.WriteAttributeString("href", "#begin");
                        writer.WriteRaw("&#9660; Continue Reading &#9660;");
                        writer.WriteFullEndElement();
                        writer.WriteFullEndElement();
                        writer.WriteStartElement("div");
                        writer.WriteAttributeString("style", "height:50vh");
                        writer.WriteFullEndElement();
                        writer.WriteStartElement("p");
                        writer.WriteAttributeString("id", "begin");
                        writer.WriteFullEndElement();

                        wroteBreak = true;
                    }
                }
                n.WriteTo(writer);
            }
            writer.WriteFullEndElement();

            writer.WriteRaw("<hr>");
            footnotes.WriteTo(writer);
            writer.Close();
        }
Exemple #12
0
        public XElement ConvertToHTML(byte[] bytes)
        {
            WmlDocument wordDoc      = new WmlDocument("wmldoc", bytes);
            int         imageCounter = 0;

            // These settings borrowed from https://raw.githubusercontent.com/EricWhiteDev/Open-Xml-PowerTools/vNext/OpenXmlPowerToolsExamples/WmlToHtmlConverter02/WmlToHtmlConverter02.cs
            WmlToHtmlConverterSettings settings = new WmlToHtmlConverterSettings()
            {
                // Tweaked the CSS to fix some things, specifically the padding-top and overflow-wrap settings
                AdditionalCss                       = "body { margin: 1cm auto; padding: 0; padding-top: 70px; overflow-wrap: anywhere}",
                PageTitle                           = "Display Report",
                FabricateCssClasses                 = true,
                CssClassPrefix                      = "pt-",
                RestrictToSupportedLanguages        = false,
                RestrictToSupportedNumberingFormats = false,
                // This whole block below is to handle images in the report
                ImageHandler = imageInfo =>
                {
                    ++imageCounter;
                    string      extension   = imageInfo.ContentType.Split('/')[1].ToLower();
                    ImageFormat imageFormat = null;
                    if (extension == "png")
                    {
                        imageFormat = ImageFormat.Png;
                    }
                    else if (extension == "gif")
                    {
                        imageFormat = ImageFormat.Gif;
                    }
                    else if (extension == "bmp")
                    {
                        imageFormat = ImageFormat.Bmp;
                    }
                    else if (extension == "jpeg")
                    {
                        imageFormat = ImageFormat.Jpeg;
                    }
                    else if (extension == "tiff")
                    {
                        // Convert tiff to gif.
                        extension   = "gif";
                        imageFormat = ImageFormat.Gif;
                    }
                    else if (extension == "x-wmf")
                    {
                        extension   = "wmf";
                        imageFormat = ImageFormat.Wmf;
                    }

                    // If the image format isn't one that we expect, ignore it,
                    // and don't return markup for the link.
                    if (imageFormat == null)
                    {
                        return(null);
                    }

                    string base64 = null;
                    try
                    {
                        using MemoryStream ms = new MemoryStream();
                        imageInfo.Bitmap.Save(ms, imageFormat);
                        var ba = ms.ToArray();
                        base64 = System.Convert.ToBase64String(ba);
                    }
                    catch (System.Runtime.InteropServices.ExternalException)
                    {
                        return(null);
                    }

                    ImageFormat    format   = imageInfo.Bitmap.RawFormat;
                    ImageCodecInfo codec    = ImageCodecInfo.GetImageDecoders().First(c => c.FormatID == format.Guid);
                    string         mimeType = codec.MimeType;

                    string imageSource = string.Format("data:{0};base64,{1}", mimeType, base64);

                    XElement img = new XElement(Xhtml.img,
                                                new XAttribute(NoNamespace.src, imageSource),
                                                imageInfo.ImgStyleAttribute,
                                                imageInfo.AltText != null ?
                                                new XAttribute(NoNamespace.alt, imageInfo.AltText) : null);
                    return(img);
                }
            };

            XElement html = WmlToHtmlConverter.ConvertToHtml(wordDoc, settings);

            return(html);
        }
        static void Main(string[] args)
        {
            string resourceFilepath = args[0];
            string outputFilepath   = args[1];

            byte[] byteArray = File.ReadAllBytes(resourceFilepath);

            using (MemoryStream ms = new MemoryStream())
            {
                ms.Write(byteArray, 0, byteArray.Length);

                using (WordprocessingDocument wpd = WordprocessingDocument.Open(ms, true))
                {
                    Body body         = wpd.MainDocumentPart.Document.Body;
                    int  imageCounter = 0;

                    WmlToHtmlConverterSettings settings = new WmlToHtmlConverterSettings()
                    {
                        AdditionalCss                       = "body { width: 210mm!important;height: 100%;max-width: 210mm; padding: 0; background-color: beige; padding: 1cm;}",
                        FabricateCssClasses                 = true,
                        CssClassPrefix                      = "pt-",
                        RestrictToSupportedLanguages        = false,
                        RestrictToSupportedNumberingFormats = false,
                        ImageHandler = imageInfo =>
                        {
                            ++imageCounter;
                            string      extension   = imageInfo.ContentType.Split('/')[1].ToLower();
                            ImageFormat imageFormat = null;
                            if (extension == "png")
                            {
                                imageFormat = ImageFormat.Png;
                            }
                            else if (extension == "gif")
                            {
                                imageFormat = ImageFormat.Gif;
                            }
                            else if (extension == "bmp")
                            {
                                imageFormat = ImageFormat.Bmp;
                            }
                            else if (extension == "jpeg")
                            {
                                imageFormat = ImageFormat.Jpeg;
                            }
                            else if (extension == "tiff")
                            {
                                extension   = "gif";
                                imageFormat = ImageFormat.Gif;
                            }
                            else if (extension == "x-wmf")
                            {
                                extension   = "wmf";
                                imageFormat = ImageFormat.Wmf;
                            }

                            if (imageFormat == null)
                            {
                                return(null);
                            }

                            string base64 = null;
                            try
                            {
                                using (MemoryStream ms = new MemoryStream())
                                {
                                    imageInfo.Bitmap.Save(ms, imageFormat);
                                    var ba = ms.ToArray();
                                    base64 = System.Convert.ToBase64String(ba);
                                }
                            }
                            catch (System.Runtime.InteropServices.ExternalException)
                            { return(null); }

                            ImageFormat    format = imageInfo.Bitmap.RawFormat;
                            ImageCodecInfo codec  = ImageCodecInfo.GetImageDecoders()
                                                    .First(c => c.FormatID == format.Guid);
                            string mimeType = codec.MimeType;

                            string imageSource =
                                string.Format("data:{0};base64,{1}", mimeType, base64);

                            XElement img = new XElement(Xhtml.img,
                                                        new XAttribute(NoNamespace.src, imageSource),
                                                        imageInfo.ImgStyleAttribute,
                                                        imageInfo.AltText != null ?
                                                        new XAttribute(NoNamespace.alt, imageInfo.AltText) : null);
                            return(img);
                        }
                    };


                    XElement html = WmlToHtmlConverter.ConvertToHtml(wpd, settings);
                    html.Save(outputFilepath);

                    Console.WriteLine("Done converting DOCX to XHTML.....!");



                    List <HeaderPart> headerPts = wpd.MainDocumentPart.HeaderParts.ToList();

                    DocumentFormat.OpenXml.Wordprocessing.SplitPageBreakAndParagraphMark pgBr = wpd.MainDocumentPart.Document.Body.Descendants <DocumentFormat.OpenXml.Wordprocessing.SplitPageBreakAndParagraphMark>().FirstOrDefault();

                    Console.WriteLine(pgBr.InnerXml);
                };
            }
        }
    public static string ConvertToHtml(string file, string outputDirectory)
    {
        var fi = new FileInfo(file);

        Console.WriteLine(fi.Name);
        byte[] byteArray = File.ReadAllBytes(fi.FullName);
        using (MemoryStream memoryStream = new MemoryStream())
        {
            memoryStream.Write(byteArray, 0, byteArray.Length);
            using (WordprocessingDocument wDoc = WordprocessingDocument.Open(memoryStream, true))
            {
                var destFileName = new FileInfo(fi.Name.Replace(".docx", ".html"));
                if (outputDirectory != null && outputDirectory != string.Empty)
                {
                    DirectoryInfo di = new DirectoryInfo(outputDirectory);
                    if (!di.Exists)
                    {
                        di.Create();
                        //throw new OpenXmlPowerToolsException("Output directory does not exist");
                    }

                    destFileName = new FileInfo(Path.Combine(di.FullName, destFileName.Name));
                }
                //var imageDirectoryName = destFileName.FullName.Substring(0, destFileName.FullName.Length - 5) + "_files";
                int imageCounter = 0;

                var pageTitle = fi.FullName;
                var part      = wDoc.CoreFilePropertiesPart;
                if (part != null)
                {
                    pageTitle = (string)part.GetXDocument().Descendants(DC.title).FirstOrDefault() ?? fi.FullName;
                }

                // TODO: Determine max-width from size of content area.
                WmlToHtmlConverterSettings settings = new WmlToHtmlConverterSettings()
                {
                    AdditionalCss                       = "body { margin: 1cm auto; max-width: 20cm; padding: 0; }",
                    PageTitle                           = pageTitle,
                    FabricateCssClasses                 = true,
                    CssClassPrefix                      = "pt-",
                    RestrictToSupportedLanguages        = false,
                    RestrictToSupportedNumberingFormats = false,
                    ImageHandler                        = imageInfo =>
                    {
                        ++imageCounter;
                        string      extension   = imageInfo.ContentType.Split('/')[1].ToLower();
                        ImageFormat imageFormat = null;
                        if (extension == "png")
                        {
                            imageFormat = ImageFormat.Png;
                        }
                        else if (extension == "gif")
                        {
                            imageFormat = ImageFormat.Gif;
                        }
                        else if (extension == "bmp")
                        {
                            imageFormat = ImageFormat.Bmp;
                        }
                        else if (extension == "jpeg")
                        {
                            imageFormat = ImageFormat.Jpeg;
                        }
                        else if (extension == "tiff")
                        {
                            // Convert tiff to gif.
                            extension   = "gif";
                            imageFormat = ImageFormat.Gif;
                        }
                        else if (extension == "x-wmf")
                        {
                            extension   = "wmf";
                            imageFormat = ImageFormat.Wmf;
                        }

                        // If the image format isn't one that we expect, ignore it,
                        // and don't return markup for the link.
                        if (imageFormat == null)
                        {
                            return(null);
                        }

                        string base64 = null;
                        try
                        {
                            using (MemoryStream ms = new MemoryStream())
                            {
                                imageInfo.Bitmap.Save(ms, imageFormat);
                                var ba = ms.ToArray();
                                base64 = Convert.ToBase64String(ba);
                            }
                        }
                        catch (System.Runtime.InteropServices.ExternalException)
                        {
                            return(null);
                        }
                        string path = outputDirectory + "img\\";
                        //DirectoryInfo di = new DirectoryInfo(path);
                        if (!Directory.Exists(path))
                        {
                            Directory.CreateDirectory(path);
                        }

                        string imgsrc = path + imageInfo.AltText + "." + imageFormat;
                        imageInfo.Bitmap.Save(imgsrc, imageFormat);  //保存图片
                        //ImageFormat format = imageInfo.Bitmap.RawFormat;
                        //ImageCodecInfo codec = ImageCodecInfo.GetImageDecoders().First(c => c.FormatID == format.Guid);
                        //string mimeType = codec.MimeType;

                        //string imageSource = string.Format("data:{0};base64,{1}", mimeType, base64);
                        string url = "/Resource/emw/DocTemp/img/" + imageInfo.AltText + "." + imageFormat;

                        XElement img = new XElement(Xhtml.img,
                                                    new XAttribute(NoNamespace.src, url),
                                                    imageInfo.ImgStyleAttribute,
                                                    imageInfo.AltText != null ?
                                                    new XAttribute(NoNamespace.alt, imageInfo.AltText) : null);
                        return(img);
                    }
                };
                XElement htmlElement = WmlToHtmlConverter.ConvertToHtml(wDoc, settings);

                // Produce HTML document with <!DOCTYPE html > declaration to tell the browser
                // we are using HTML5.
                var html = new XDocument(
                    new XDocumentType("html", null, null, null),
                    htmlElement);

                var htmlString = html.ToString(SaveOptions.DisableFormatting);
                File.WriteAllText(destFileName.FullName, htmlString, Encoding.UTF8);
                return(destFileName.FullName);
            }
        }
    }