public override DocumentEntity ConvertToHtml(Common.Entities.DocumentEntity _docEntity) { if (!_docEntity.isConvert && String.IsNullOrEmpty(_docEntity.ConvertError)) { Aspose.Cells.Workbook xls = new Aspose.Cells.Workbook(_docEntity.FilePath); _docEntity.HtmlData = new HtmlParseContext(); _docEntity.HtmlData.PageNumber = xls.Worksheets.Count; foreach (Aspose.Cells.Worksheet item in xls.Worksheets) { Aspose.Cells.HtmlSaveOptions htmlsaveoption = new Aspose.Cells.HtmlSaveOptions(Aspose.Cells.SaveFormat.Html); htmlsaveoption.AttachedFilesDirectory = Path.Combine(_docEntity.ResourcesPath, "image"); htmlsaveoption.HtmlCrossStringType = Aspose.Cells.HtmlCrossType.Cross; using (MemoryStream htmlStream = new MemoryStream()) { try { xls.Worksheets.ActiveSheetIndex = item.Index; xls.Save(htmlStream, htmlsaveoption); _docEntity.HtmlData.HtmlContent.Add(Encoding.UTF8.GetString(htmlStream.ToArray())); _docEntity.isConvert = true; } catch (Exception e) { _docEntity.isConvert = false; _docEntity.ConvertError = e.Message; } } } } return(_docEntity); }
public override DocumentEntity ConvertToHtml(Common.Entities.DocumentEntity _docEntity) { if (!_docEntity.isConvert && String.IsNullOrEmpty(_docEntity.ConvertError)) { //Aspose.Cells.Workbook xls = new Aspose.Cells.Workbook(_docEntity.FilePath); Microsoft.Office.Interop.Excel.Application excel = new Microsoft.Office.Interop.Excel.Application(); Workbook xls = excel.Workbooks.Open(_docEntity.FilePath); _docEntity.HtmlData = new HtmlParseContext(); _docEntity.HtmlData.PageNumber = xls.Worksheets.Count; var outputpath = Path.Combine(_docEntity.ResourcesPath, "ConvertFolder"); _docEntity.ImageFolder = "ConvertFolder.files"; //if (!Directory.Exists(outputpath)) // Directory.CreateDirectory(outputpath); try { xls.SaveAs(outputpath, XlFileFormat.xlHtml); //_docEntity.HtmlData.HtmlContent.Add(Encoding.UTF8.GetString(htmlStream.ToArray())); _docEntity.isConvert = true; } catch (Exception e) { _docEntity.isConvert = false; _docEntity.ConvertError = e.Message; } xls.Close(); excel.Quit(); xls = null; excel = null; } return(_docEntity); }
public override JsonDocEntity ParseHtmlToEntity(Common.Entities.DocumentEntity _docEntity) { ///清空当前对象存储HTML解析格式的属性 ///如果已经解析过的文档就不需要重复处理了 ///此动作后续需要配合存储一起重构 //_docEntity.HtmlData.ParseContentList = new List<HtmlParseData>(); if (!_docEntity.isParse) { foreach (var htmldata in _docEntity.HtmlData.HtmlContent) { ///获取当前工作表的表名 var worksheetname = TITLE_REGEX.Match(htmldata).Groups["name"].Value; MatchCollection tablematches = TABLE_REGEX.Matches(htmldata); if (String.IsNullOrEmpty(_docEntity.HtmlData.StyleUrl)) { var style = STYLE_REGEX.Match(htmldata).Groups["style"].Value; FileUtils.WriteStyleFile(style, Path.Combine(_docEntity.ResourcesPath, "etStyle.css")); _docEntity.HtmlData.StyleUrl = _docEntity.VirtualResourcesPath + "/" + "etStyle.css"; } var count = 1; foreach (Match tablematcher in tablematches) { HtmlParseData divcontent = new HtmlParseData(); divcontent.pagecount = count; divcontent.title = worksheetname; var table = tablematcher.Groups["table"].Value; var imagematchers = DIV_IMAGE_REGEX.Matches(table); HashSet <String> hs = new HashSet <string>(); foreach (Match iamgematcher in imagematchers) { var src = iamgematcher.Groups["src"].Value; hs.Add(src); } foreach (var item in hs) { table = table.Replace(item, _docEntity.VirtualResourcesPath + "/" + _docEntity.ImageFolder + "/" + Path.GetFileName(item)); } divcontent.content = table; _docEntity.HtmlData.ParseContentList.Add(divcontent); count++; } } _docEntity.isParse = true; _docEntity.ConvertCompleteTime = DateTime.Now; } return(JsonDocEntity.Convert(_docEntity)); }
public override JsonDocEntity ParseHtmlToEntity(Common.Entities.DocumentEntity _docEntity) { if (!_docEntity.isParse) { //Excel的文件读取因为涉及到frame里的表关系,所以暂时放置在Parse的逻辑中完成: var outputpath = Path.Combine(_docEntity.ResourcesPath, "ConvertFolder.files"); string[] Files = Directory.GetFiles(outputpath); Dictionary <string, string> ParseData = new Dictionary <string, string>(); string tabstrip = ""; foreach (var filename in Files) { if (filename.EndsWith("tabstrip.htm")) { tabstrip = FileUtils.ReadFile(filename); } if (filename.EndsWith("stylesheet.css")) { FileUtils.WriteStyleFile(FileUtils.ReadFile(filename), Path.Combine(_docEntity.ResourcesPath, "etStyle.css")); } } MatchCollection mapping = Structure_Mapping.Matches(tabstrip); foreach (Match match in mapping) { var path = Path.Combine(outputpath, match.Groups["src"].Value); var content = Util.FileUtils.ReadFile(path); var name = match.Groups["Name"].Value; ParseData.Add(name, content); _docEntity.HtmlData.HtmlContent.Add(content); } foreach (var parsedataitem in ParseData) { ///获取当前工作表的表名 var worksheetname = parsedataitem.Key; //TITLE_REGEX.Match(parsedataitem.Value).Groups["name"].Value; MatchCollection tablematches = TABLE_REGEX.Matches(parsedataitem.Value); if (String.IsNullOrEmpty(_docEntity.HtmlData.StyleUrl)) { _docEntity.HtmlData.StyleUrl = _docEntity.VirtualResourcesPath + "/" + "etStyle.css"; } var count = 1; foreach (Match tablematcher in tablematches) { HtmlParseData divcontent = new HtmlParseData(); divcontent.pagecount = count; divcontent.title = worksheetname; var table = tablematcher.Groups["table"].Value; var imagematchers = DIV_IMAGE_REGEX.Matches(table); HashSet <String> hs = new HashSet <string>(); foreach (Match iamgematcher in imagematchers) { var src = iamgematcher.Groups["href"].Value; hs.Add(src); } foreach (var item in hs) { table = table.Replace(item, _docEntity.VirtualResourcesPath + "/" + _docEntity.ImageFolder + "/" + Path.GetFileName(item)); } divcontent.content = table; _docEntity.HtmlData.ParseContentList.Add(divcontent); count++; } } _docEntity.isParse = true; _docEntity.ConvertCompleteTime = DateTime.Now; } return(JsonDocEntity.Convert(_docEntity)); }