Пример #1
0
        public ActionResult read(string id)
        {
            DocumentEntity       doc           = _repository.Single <DocumentEntity>(id);
            var                  fields        = typeof(ConvertComponentType).GetFields(BindingFlags.Static | BindingFlags.Public);
            var                  componentName = ConfigurationManager.AppSettings["Component"];
            ConvertComponentType selectenum    = (ConvertComponentType)Enum.Parse(typeof(ConvertComponentType), componentName, false);
            ConvertDocument      convertdoc    = new ConvertDocument(doc, selectenum);
            ///未考虑到的情况: 如果文档已经解析完毕,是否需要重新解析
            ///留下与后续数据存放的逻辑一同实现
            ///update 2014-12-11已完成 逻辑在ProcessDocument中实现
            JsonDocEntity parseEntity = convertdoc.ProcessDocument();

            //若解析文档不成功,则直接返回
            if (parseEntity != null)
            {
                //若文档已经存储过,则跳过存储逻辑
                if (!convertdoc._docEntity.isStore)
                {
                    convertdoc._docEntity.isStore = true;
                    _repository.Update <DocumentEntity>(convertdoc._docEntity);
                }
                return(Json(parseEntity, JsonRequestBehavior.AllowGet));
            }
            return(null);
        }
Пример #2
0
        public override JsonDocEntity ParseHtmlToEntity(DocumentEntity _docEntity)
        {
            if (_docEntity.isConvert && !_docEntity.isParse)
            {
                var htmldata = _docEntity.HtmlData.HtmlContent[0];
                ///清空当前对象存储HTML解析格式的属性
                ///如果已经解析过的文档就不需要重复处理了
                ///此动作后续需要配合存储一起重构
                if (String.IsNullOrEmpty(_docEntity.HtmlData.StyleUrl))
                {
                    var style = STYLE_REGEX.Match(htmldata).Groups["style"].Value;
                    FileUtils.WriteStyleFile(style, Path.Combine(_docEntity.ResourcesPath, "wpsStyle.css"));
                    _docEntity.HtmlData.StyleUrl = _docEntity.VirtualResourcesPath + "/" + "wpsStyle.css";
                }

                _docEntity.HtmlData.ParseContentList = new List <HtmlParseData>();

                MatchCollection divmatches = DIV_REGEX.Matches(htmldata);
                var             count      = 1;
                foreach (Match divmatcher in divmatches)
                {
                    HtmlParseData divcontent = new HtmlParseData();
                    divcontent.pagecount = count;

                    var div           = divmatcher.Groups["div"].Value;
                    var imagematchers = DIV_IMAGE_REGEX.Matches(div);

                    HashSet <String> hs = new HashSet <string>();
                    foreach (Match iamgematcher in imagematchers)
                    {
                        var src = iamgematcher.Groups["src"].Value;
                        hs.Add(src);
                    }

                    foreach (var item in hs)
                    {
                        div = div.Replace(item, _docEntity.VirtualResourcesPath + "/" +
                                          _docEntity.ImageFolder + "/" + Path.GetFileName(item));
                    }

                    divcontent.content = div;
                    _docEntity.HtmlData.ParseContentList.Add(divcontent);

                    count++;
                }
                _docEntity.ConvertCompleteTime = DateTime.Now;
                _docEntity.isParse             = true;
            }
            return(JsonDocEntity.Convert(_docEntity));
        }
Пример #3
0
        public override JsonDocEntity ParseHtmlToEntity(Common.Entities.DocumentEntity _docEntity)
        {
            ///清空当前对象存储HTML解析格式的属性
            ///如果已经解析过的文档就不需要重复处理了
            ///此动作后续需要配合存储一起重构
            //_docEntity.HtmlData.ParseContentList = new List<HtmlParseData>();
            if (!_docEntity.isParse)
            {
                foreach (var htmldata in _docEntity.HtmlData.HtmlContent)
                {
                    ///获取当前工作表的表名
                    var             worksheetname = TITLE_REGEX.Match(htmldata).Groups["name"].Value;
                    MatchCollection tablematches  = TABLE_REGEX.Matches(htmldata);
                    if (String.IsNullOrEmpty(_docEntity.HtmlData.StyleUrl))
                    {
                        var style = STYLE_REGEX.Match(htmldata).Groups["style"].Value;
                        FileUtils.WriteStyleFile(style, Path.Combine(_docEntity.ResourcesPath, "etStyle.css"));
                        _docEntity.HtmlData.StyleUrl = _docEntity.VirtualResourcesPath + "/" + "etStyle.css";
                    }
                    var count = 1;
                    foreach (Match tablematcher in tablematches)
                    {
                        HtmlParseData divcontent = new HtmlParseData();
                        divcontent.pagecount = count;
                        divcontent.title     = worksheetname;
                        var table         = tablematcher.Groups["table"].Value;
                        var imagematchers = DIV_IMAGE_REGEX.Matches(table);

                        HashSet <String> hs = new HashSet <string>();
                        foreach (Match iamgematcher in imagematchers)
                        {
                            var src = iamgematcher.Groups["src"].Value;
                            hs.Add(src);
                        }

                        foreach (var item in hs)
                        {
                            table = table.Replace(item, _docEntity.VirtualResourcesPath + "/" +
                                                  _docEntity.ImageFolder + "/" + Path.GetFileName(item));
                        }
                        divcontent.content = table;
                        _docEntity.HtmlData.ParseContentList.Add(divcontent);
                        count++;
                    }
                }
                _docEntity.isParse             = true;
                _docEntity.ConvertCompleteTime = DateTime.Now;
            }
            return(JsonDocEntity.Convert(_docEntity));
        }
Пример #4
0
 public override JsonDocEntity ParseHtmlToEntity(DocumentEntity _docEntity)
 {
     if (!_docEntity.isParse)
     {
         var VirtualResourcesPath = _docEntity.VirtualResourcesPath;
         for (int count = 1; count < _docEntity.HtmlData.PageNumber + 1; count++)
         {
             HtmlParseData divcontent = new HtmlParseData();
             divcontent.pagecount = count;
             divcontent.thumbUrl  = VirtualResourcesPath + "/Small/" + String.Format("Thumbnail{0}.jpg", count);
             divcontent.url       = VirtualResourcesPath + "/Big/" + String.Format("Thumbnail{0}.jpg", count);
             _docEntity.HtmlData.ParseContentList.Add(divcontent);
         }
         _docEntity.ConvertCompleteTime = DateTime.Now;
         _docEntity.isParse             = true;
     }
     return(JsonDocEntity.Convert(_docEntity));
 }
Пример #5
0
        public override JsonDocEntity ParseHtmlToEntity(Common.Entities.DocumentEntity _docEntity)
        {
            if (!_docEntity.isParse)
            {
                //Excel的文件读取因为涉及到frame里的表关系,所以暂时放置在Parse的逻辑中完成:
                var      outputpath = Path.Combine(_docEntity.ResourcesPath, "ConvertFolder.files");
                string[] Files      = Directory.GetFiles(outputpath);
                Dictionary <string, string> ParseData = new Dictionary <string, string>();
                string tabstrip = "";
                foreach (var filename in Files)
                {
                    if (filename.EndsWith("tabstrip.htm"))
                    {
                        tabstrip = FileUtils.ReadFile(filename);
                    }
                    if (filename.EndsWith("stylesheet.css"))
                    {
                        FileUtils.WriteStyleFile(FileUtils.ReadFile(filename), Path.Combine(_docEntity.ResourcesPath, "etStyle.css"));
                    }
                }

                MatchCollection mapping = Structure_Mapping.Matches(tabstrip);
                foreach (Match match in mapping)
                {
                    var path    = Path.Combine(outputpath, match.Groups["src"].Value);
                    var content = Util.FileUtils.ReadFile(path);
                    var name    = match.Groups["Name"].Value;
                    ParseData.Add(name, content);
                    _docEntity.HtmlData.HtmlContent.Add(content);
                }

                foreach (var parsedataitem in ParseData)
                {
                    ///获取当前工作表的表名
                    var             worksheetname = parsedataitem.Key; //TITLE_REGEX.Match(parsedataitem.Value).Groups["name"].Value;
                    MatchCollection tablematches  = TABLE_REGEX.Matches(parsedataitem.Value);
                    if (String.IsNullOrEmpty(_docEntity.HtmlData.StyleUrl))
                    {
                        _docEntity.HtmlData.StyleUrl = _docEntity.VirtualResourcesPath + "/" + "etStyle.css";
                    }
                    var count = 1;
                    foreach (Match tablematcher in tablematches)
                    {
                        HtmlParseData divcontent = new HtmlParseData();
                        divcontent.pagecount = count;
                        divcontent.title     = worksheetname;
                        var table           = tablematcher.Groups["table"].Value;
                        var imagematchers   = DIV_IMAGE_REGEX.Matches(table);
                        HashSet <String> hs = new HashSet <string>();
                        foreach (Match iamgematcher in imagematchers)
                        {
                            var src = iamgematcher.Groups["href"].Value;
                            hs.Add(src);
                        }
                        foreach (var item in hs)
                        {
                            table = table.Replace(item, _docEntity.VirtualResourcesPath + "/" +
                                                  _docEntity.ImageFolder + "/" + Path.GetFileName(item));
                        }
                        divcontent.content = table;
                        _docEntity.HtmlData.ParseContentList.Add(divcontent);
                        count++;
                    }
                }
                _docEntity.isParse             = true;
                _docEntity.ConvertCompleteTime = DateTime.Now;
            }
            return(JsonDocEntity.Convert(_docEntity));
        }