public static string Parse(int siteId, string filePath, bool isClearFormat, bool isFirstLineIndent, bool isClearFontSize, bool isClearFontFamily, bool isClearImages) { if (string.IsNullOrEmpty(filePath)) { return(string.Empty); } var filename = PathUtils.GetFileNameWithoutExtension(filePath); //被转换的html文档保存的位置 try { var saveFilePath = PathUtils.GetTemporaryFilesPath(filename + ".html"); FileUtils.DeleteFileIfExists(saveFilePath); WordDntb.buildWord(filePath, saveFilePath); var parsedContent = FileUtils.ReadText(saveFilePath, System.Text.Encoding.Default); parsedContent = RegexUtils.GetInnerContent("body", parsedContent); //try //{ // parsedContent = HtmlClearUtils.ClearElementAttributes(parsedContent, "p"); //} //catch { } if (isClearFormat) { parsedContent = HtmlClearUtils.ClearFormat(parsedContent); } if (isFirstLineIndent) { parsedContent = HtmlClearUtils.FirstLineIndent(parsedContent); } if (isClearFontSize) { parsedContent = HtmlClearUtils.ClearFontSize(parsedContent); } if (isClearFontFamily) { parsedContent = HtmlClearUtils.ClearFontFamily(parsedContent); } if (isClearImages) { parsedContent = StringUtils.StripTags(parsedContent, "img"); } else { var siteInfo = SiteManager.GetSiteInfo(siteId); var imageFileNameArrayList = RegexUtils.GetOriginalImageSrcs(parsedContent); if (imageFileNameArrayList != null && imageFileNameArrayList.Count > 0) { foreach (var imageFileName in imageFileNameArrayList) { var imageFilePath = PathUtils.GetTemporaryFilesPath(imageFileName); var fileExtension = PathUtils.GetExtension(imageFilePath); var uploadDirectoryPath = PathUtility.GetUploadDirectoryPath(siteInfo, fileExtension); var uploadDirectoryUrl = PageUtility.GetSiteUrlByPhysicalPath(siteInfo, uploadDirectoryPath, true); if (!FileUtils.IsFileExists(imageFilePath)) { continue; } var uploadFileName = PathUtility.GetUploadFileName(siteInfo, imageFilePath); var destFilePath = PathUtils.Combine(uploadDirectoryPath, uploadFileName); FileUtils.MoveFile(imageFilePath, destFilePath, false); parsedContent = parsedContent.Replace(imageFileName, PageUtils.Combine(uploadDirectoryUrl, uploadFileName)); FileUtils.DeleteFileIfExists(imageFilePath); } } } FileUtils.DeleteFileIfExists(filePath); FileUtils.DeleteFileIfExists(saveFilePath); return(parsedContent.Trim()); } catch (Exception ex) { LogUtils.AddErrorLog(ex); return(string.Empty); } }
public static (string title, string content) ConvertToHtml(string docxFilePath, ConverterSettings settings) { string title; string content; var fi = new FileInfo(docxFilePath); var byteArray = File.ReadAllBytes(fi.FullName); using (var memoryStream = new MemoryStream()) { memoryStream.Write(byteArray, 0, byteArray.Length); using (var wDoc = WordprocessingDocument.Open(memoryStream, true)) { title = fi.FullName; var part = wDoc.CoreFilePropertiesPart; if (part != null) { title = (string)part.GetXDocument().Descendants(DC.title).FirstOrDefault() ?? fi.FullName; } title = PathUtils.GetFileNameWithoutExtension(title); // TODO: Determine max-width from size of content area. var htmlSettings = new HtmlConverterSettings { // AdditionalCss = "body { margin: 1cm auto; max-width: 20cm; padding: 0; }", PageTitle = title, FabricateCssClasses = true, CssClassPrefix = "pt-", RestrictToSupportedLanguages = false, RestrictToSupportedNumberingFormats = false, ImageHandler = imageInfo => { if (settings.IsClearImages || string.IsNullOrEmpty(settings.ImageDirectoryPath)) { return(null); } DirectoryUtils.CreateDirectoryIfNotExists(settings.ImageDirectoryPath); var extension = imageInfo.ContentType.Split('/')[1].ToLower(); ImageFormat imageFormat = null; if (extension == "png") { imageFormat = ImageFormat.Png; } else if (extension == "gif") { imageFormat = ImageFormat.Gif; } else if (extension == "bmp") { imageFormat = ImageFormat.Bmp; } else if (extension == "jpeg") { imageFormat = ImageFormat.Jpeg; } else if (extension == "tiff") { // Convert tiff to gif. extension = "gif"; imageFormat = ImageFormat.Gif; } else if (extension == "x-wmf") { extension = "wmf"; imageFormat = ImageFormat.Wmf; } // If the image format isn't one that we expect, ignore it, // and don't return markup for the link. if (imageFormat == null) { return(null); } var imageFileName = StringUtils.GetShortGuid(false) + "." + extension; var imageFilePath = PathUtils.Combine(settings.ImageDirectoryPath, imageFileName); try { imageInfo.Bitmap.Save(imageFilePath, imageFormat); } catch (System.Runtime.InteropServices.ExternalException) { return(null); } var imageSource = PageUtils.Combine(settings.ImageDirectoryUrl, imageFileName); var img = new XElement(Xhtml.img, new XAttribute(NoNamespace.src, imageSource), imageInfo.ImgStyleAttribute, imageInfo.AltText != null ? new XAttribute(NoNamespace.alt, imageInfo.AltText) : null); return(img); } }; var htmlElement = HtmlConverter.ConvertToHtml(wDoc, htmlSettings); // Produce HTML document with <!DOCTYPE html > declaration to tell the browser // we are using HTML5. var html = new XDocument( new XDocumentType("html", null, null, null), htmlElement); // Note: the xhtml returned by ConvertToHtmlTransform contains objects of type // XEntity. PtOpenXmlUtil.cs define the XEntity class. See // http://blogs.msdn.com/ericwhite/archive/2010/01/21/writing-entity-references-using-linq-to-xml.aspx // for detailed explanation. // // If you further transform the XML tree returned by ConvertToHtmlTransform, you // must do it correctly, or entities will not be serialized properly. var htmlString = html.ToString(SaveOptions.DisableFormatting); var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(htmlString); var style = htmlDoc.DocumentNode.SelectSingleNode("//style").OuterHtml; var body = htmlDoc.DocumentNode.SelectSingleNode("//body").InnerHtml; // var style = HtmlToWmlConverter.CleanUpCss((string)htmlElement.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style")); content = $"{style}{Environment.NewLine}{body}"; if (settings.IsSaveHtml && !string.IsNullOrEmpty(settings.HtmlDirectoryPath) && DirectoryUtils.IsDirectoryExists(settings.HtmlDirectoryPath)) { var htmlFilePath = PathUtils.Combine(settings.HtmlDirectoryPath, PathUtils.GetFileNameWithoutExtension(docxFilePath) + ".html"); File.WriteAllText(htmlFilePath, htmlString, Encoding.UTF8); } } } if (settings.IsFirstLineTitle) { var contentTitle = RegexUtils.GetInnerContent("p", content); contentTitle = StringUtils.StripTags(contentTitle); if (!string.IsNullOrEmpty(contentTitle) && settings.IsFirstLineRemove) { content = StringUtils.ReplaceFirst(contentTitle, content, string.Empty); } if (!string.IsNullOrEmpty(contentTitle)) { contentTitle = contentTitle.Trim(); contentTitle = contentTitle.Trim(' ', ' '); contentTitle = StringUtils.StripEntities(contentTitle); } if (!string.IsNullOrEmpty(contentTitle)) { title = contentTitle; } } if (settings.IsClearFormat) { content = HtmlClearUtils.ClearFormat(content); } if (settings.IsFirstLineIndent) { content = HtmlClearUtils.FirstLineIndent(content); } if (settings.IsClearFontSize) { content = HtmlClearUtils.ClearFontSize(content); } if (settings.IsClearFontFamily) { content = HtmlClearUtils.ClearFontFamily(content); } if (settings.IsFirstLineRemove) { content = StringUtils.ReplaceFirst(title, content, string.Empty); } if (string.IsNullOrEmpty(title)) { title = PathUtils.GetFileNameWithoutExtension(docxFilePath); } return(title, content); }