示例#1
0
        PreTable JudgeTitleOrText(HtmlNode htmlTable)
        {
            PreTable header = new PreTable();
            var      trs    = htmlTable.SelectNodes("./tr");

            if (trs == null)
            {
                trs = htmlTable.SelectNodes("tbody/tr");
            }
            if (trs == null)
            {
                return(null);
            }

            //if (trs.Count == 1 && trs[0].SelectNodes("td").Count == 1)
            //{
            //var td = trs[0].SelectNodes("td");
            string text = trs[0].InnerText.Replace(" ", "");

            string[] texts = trs[0].InnerText.Split('\n');
            texts = texts.Where(t => !t.IsEmpty()).ToArray();
            if (texts.Length < 1)
            {
                return(null);
            }

            if (texts.Length == 1)
            {
                if (texts[0].IsEmpty())
                {
                    header.Content = texts[0];
                }
                else
                {
                    header.Title   = texts[0].Replace("\r\n", "").Replace("\t", "").Trim();
                    header.Content = "";
                }
            }
            else
            {
                header.Title = texts[texts.Length - 1];
                for (int i = 0; i < texts.Length - 1; i++)
                {
                    header.Content += texts[i];
                }
            }
            //}
            return(header);
        }
示例#2
0
        public void LoadHtml(string path)
        {
            string target = path.Replace(".html", ".docx");
            var    doc    = new HtmlDocument();

            doc.Load(path, Encoding.UTF8);
            var             root   = doc.DocumentNode;
            List <HtmlNode> tables = new List <HtmlNode>();
            var             nodes  = root.SelectNodes("//table");

            foreach (var node in nodes)
            {
                if (HasTable(node))
                {
                    continue;
                }

                tables.Add(node);
            }
            List <PreTable> headers         = new List <PreTable>();
            bool            lastIsHeader    = false;
            int             lastHeaderIndex = -1;
            int             noHeaderKey     = 0;

            foreach (var table in tables)
            {
                if (IsTitle(table))
                {//标题
                    var header = JudgeTitleOrText(table);
                    if (header == null)
                    {
                        continue;
                    }

                    if (lastIsHeader && lastHeaderIndex > -1)
                    {//上一个表格也是表格头需要进行合并
                        var lastHeader = headers[lastHeaderIndex];
                        //先更新上一个表格头的title添加content,再把当前header的content添加
                        lastHeader.Content += lastHeader.Title + header.Content;
                        lastHeader.Title    = header.Title;
                    }
                    else
                    {
                        headers.Add(header);
                        lastIsHeader = true;
                        lastHeaderIndex++;
                    }
                }
                else
                {//表格
                    Table localTable = new Table(table);
                    if (localTable == null || localTable.Matrix == null)
                    {
                        continue;
                    }

                    if (!lastIsHeader)
                    {//表格没有表头
                        PreTable noHeader = new PreTable();
                        noHeader.Title   = "没有表头" + noHeaderKey.ToString();
                        noHeader.Content = "";
                        pre_table.Add(noHeader, localTable);
                    }
                    else
                    {
                        pre_table.Add(headers[lastHeaderIndex], localTable);
                    }
                    lastIsHeader = false;
                }
            }
            using (DocX document = DocX.Create(target))
            {
                foreach (var key in pre_table.Keys)
                {
                    var table       = pre_table[key].Matrix;
                    int rowCount    = table.GetLength(0);
                    int columnCount = table.GetLength(1);
                    var docxTable   = document.AddTable(rowCount, columnCount);
                    docxTable.AutoFit = AutoFit.Contents;
                    for (int i = 0; i < rowCount; i++)
                    {
                        for (int j = 0; j < columnCount; j++)
                        {
                            docxTable.Rows[i].Cells[j].Paragraphs[0].Append(table[i, j]);
                        }
                    }
                    if (!key.Title.Contains("没有表头"))
                    {
                        document.InsertParagraph(key.Content);
                        if (!key.Title.IsEmpty())
                        {
                            var title = document.InsertParagraph().Append(key.Title).Heading(HeadingType.Heading1);
                        }
                    }
                    MergeCell(docxTable);
                    document.InsertTable(docxTable);
                }
                document.Save();
            }

            //SaveAsDocx(tables);
        }