public ActionResult read(string id) { DocumentEntity doc = _repository.Single <DocumentEntity>(id); var fields = typeof(ConvertComponentType).GetFields(BindingFlags.Static | BindingFlags.Public); var componentName = ConfigurationManager.AppSettings["Component"]; ConvertComponentType selectenum = (ConvertComponentType)Enum.Parse(typeof(ConvertComponentType), componentName, false); ConvertDocument convertdoc = new ConvertDocument(doc, selectenum); ///未考虑到的情况: 如果文档已经解析完毕,是否需要重新解析 ///留下与后续数据存放的逻辑一同实现 ///update 2014-12-11已完成 逻辑在ProcessDocument中实现 JsonDocEntity parseEntity = convertdoc.ProcessDocument(); //若解析文档不成功,则直接返回 if (parseEntity != null) { //若文档已经存储过,则跳过存储逻辑 if (!convertdoc._docEntity.isStore) { convertdoc._docEntity.isStore = true; _repository.Update <DocumentEntity>(convertdoc._docEntity); } return(Json(parseEntity, JsonRequestBehavior.AllowGet)); } return(null); }
public override JsonDocEntity ParseHtmlToEntity(DocumentEntity _docEntity) { if (_docEntity.isConvert && !_docEntity.isParse) { var htmldata = _docEntity.HtmlData.HtmlContent[0]; ///清空当前对象存储HTML解析格式的属性 ///如果已经解析过的文档就不需要重复处理了 ///此动作后续需要配合存储一起重构 if (String.IsNullOrEmpty(_docEntity.HtmlData.StyleUrl)) { var style = STYLE_REGEX.Match(htmldata).Groups["style"].Value; FileUtils.WriteStyleFile(style, Path.Combine(_docEntity.ResourcesPath, "wpsStyle.css")); _docEntity.HtmlData.StyleUrl = _docEntity.VirtualResourcesPath + "/" + "wpsStyle.css"; } _docEntity.HtmlData.ParseContentList = new List <HtmlParseData>(); MatchCollection divmatches = DIV_REGEX.Matches(htmldata); var count = 1; foreach (Match divmatcher in divmatches) { HtmlParseData divcontent = new HtmlParseData(); divcontent.pagecount = count; var div = divmatcher.Groups["div"].Value; var imagematchers = DIV_IMAGE_REGEX.Matches(div); HashSet <String> hs = new HashSet <string>(); foreach (Match iamgematcher in imagematchers) { var src = iamgematcher.Groups["src"].Value; hs.Add(src); } foreach (var item in hs) { div = div.Replace(item, _docEntity.VirtualResourcesPath + "/" + _docEntity.ImageFolder + "/" + Path.GetFileName(item)); } divcontent.content = div; _docEntity.HtmlData.ParseContentList.Add(divcontent); count++; } _docEntity.ConvertCompleteTime = DateTime.Now; _docEntity.isParse = true; } return(JsonDocEntity.Convert(_docEntity)); }
public override JsonDocEntity ParseHtmlToEntity(Common.Entities.DocumentEntity _docEntity) { ///清空当前对象存储HTML解析格式的属性 ///如果已经解析过的文档就不需要重复处理了 ///此动作后续需要配合存储一起重构 //_docEntity.HtmlData.ParseContentList = new List<HtmlParseData>(); if (!_docEntity.isParse) { foreach (var htmldata in _docEntity.HtmlData.HtmlContent) { ///获取当前工作表的表名 var worksheetname = TITLE_REGEX.Match(htmldata).Groups["name"].Value; MatchCollection tablematches = TABLE_REGEX.Matches(htmldata); if (String.IsNullOrEmpty(_docEntity.HtmlData.StyleUrl)) { var style = STYLE_REGEX.Match(htmldata).Groups["style"].Value; FileUtils.WriteStyleFile(style, Path.Combine(_docEntity.ResourcesPath, "etStyle.css")); _docEntity.HtmlData.StyleUrl = _docEntity.VirtualResourcesPath + "/" + "etStyle.css"; } var count = 1; foreach (Match tablematcher in tablematches) { HtmlParseData divcontent = new HtmlParseData(); divcontent.pagecount = count; divcontent.title = worksheetname; var table = tablematcher.Groups["table"].Value; var imagematchers = DIV_IMAGE_REGEX.Matches(table); HashSet <String> hs = new HashSet <string>(); foreach (Match iamgematcher in imagematchers) { var src = iamgematcher.Groups["src"].Value; hs.Add(src); } foreach (var item in hs) { table = table.Replace(item, _docEntity.VirtualResourcesPath + "/" + _docEntity.ImageFolder + "/" + Path.GetFileName(item)); } divcontent.content = table; _docEntity.HtmlData.ParseContentList.Add(divcontent); count++; } } _docEntity.isParse = true; _docEntity.ConvertCompleteTime = DateTime.Now; } return(JsonDocEntity.Convert(_docEntity)); }
public override JsonDocEntity ParseHtmlToEntity(DocumentEntity _docEntity) { if (!_docEntity.isParse) { var VirtualResourcesPath = _docEntity.VirtualResourcesPath; for (int count = 1; count < _docEntity.HtmlData.PageNumber + 1; count++) { HtmlParseData divcontent = new HtmlParseData(); divcontent.pagecount = count; divcontent.thumbUrl = VirtualResourcesPath + "/Small/" + String.Format("Thumbnail{0}.jpg", count); divcontent.url = VirtualResourcesPath + "/Big/" + String.Format("Thumbnail{0}.jpg", count); _docEntity.HtmlData.ParseContentList.Add(divcontent); } _docEntity.ConvertCompleteTime = DateTime.Now; _docEntity.isParse = true; } return(JsonDocEntity.Convert(_docEntity)); }
public override JsonDocEntity ParseHtmlToEntity(Common.Entities.DocumentEntity _docEntity) { if (!_docEntity.isParse) { //Excel的文件读取因为涉及到frame里的表关系,所以暂时放置在Parse的逻辑中完成: var outputpath = Path.Combine(_docEntity.ResourcesPath, "ConvertFolder.files"); string[] Files = Directory.GetFiles(outputpath); Dictionary <string, string> ParseData = new Dictionary <string, string>(); string tabstrip = ""; foreach (var filename in Files) { if (filename.EndsWith("tabstrip.htm")) { tabstrip = FileUtils.ReadFile(filename); } if (filename.EndsWith("stylesheet.css")) { FileUtils.WriteStyleFile(FileUtils.ReadFile(filename), Path.Combine(_docEntity.ResourcesPath, "etStyle.css")); } } MatchCollection mapping = Structure_Mapping.Matches(tabstrip); foreach (Match match in mapping) { var path = Path.Combine(outputpath, match.Groups["src"].Value); var content = Util.FileUtils.ReadFile(path); var name = match.Groups["Name"].Value; ParseData.Add(name, content); _docEntity.HtmlData.HtmlContent.Add(content); } foreach (var parsedataitem in ParseData) { ///获取当前工作表的表名 var worksheetname = parsedataitem.Key; //TITLE_REGEX.Match(parsedataitem.Value).Groups["name"].Value; MatchCollection tablematches = TABLE_REGEX.Matches(parsedataitem.Value); if (String.IsNullOrEmpty(_docEntity.HtmlData.StyleUrl)) { _docEntity.HtmlData.StyleUrl = _docEntity.VirtualResourcesPath + "/" + "etStyle.css"; } var count = 1; foreach (Match tablematcher in tablematches) { HtmlParseData divcontent = new HtmlParseData(); divcontent.pagecount = count; divcontent.title = worksheetname; var table = tablematcher.Groups["table"].Value; var imagematchers = DIV_IMAGE_REGEX.Matches(table); HashSet <String> hs = new HashSet <string>(); foreach (Match iamgematcher in imagematchers) { var src = iamgematcher.Groups["href"].Value; hs.Add(src); } foreach (var item in hs) { table = table.Replace(item, _docEntity.VirtualResourcesPath + "/" + _docEntity.ImageFolder + "/" + Path.GetFileName(item)); } divcontent.content = table; _docEntity.HtmlData.ParseContentList.Add(divcontent); count++; } } _docEntity.isParse = true; _docEntity.ConvertCompleteTime = DateTime.Now; } return(JsonDocEntity.Convert(_docEntity)); }