PreTable JudgeTitleOrText(HtmlNode htmlTable) { PreTable header = new PreTable(); var trs = htmlTable.SelectNodes("./tr"); if (trs == null) { trs = htmlTable.SelectNodes("tbody/tr"); } if (trs == null) { return(null); } //if (trs.Count == 1 && trs[0].SelectNodes("td").Count == 1) //{ //var td = trs[0].SelectNodes("td"); string text = trs[0].InnerText.Replace(" ", ""); string[] texts = trs[0].InnerText.Split('\n'); texts = texts.Where(t => !t.IsEmpty()).ToArray(); if (texts.Length < 1) { return(null); } if (texts.Length == 1) { if (texts[0].IsEmpty()) { header.Content = texts[0]; } else { header.Title = texts[0].Replace("\r\n", "").Replace("\t", "").Trim(); header.Content = ""; } } else { header.Title = texts[texts.Length - 1]; for (int i = 0; i < texts.Length - 1; i++) { header.Content += texts[i]; } } //} return(header); }
public void LoadHtml(string path) { string target = path.Replace(".html", ".docx"); var doc = new HtmlDocument(); doc.Load(path, Encoding.UTF8); var root = doc.DocumentNode; List <HtmlNode> tables = new List <HtmlNode>(); var nodes = root.SelectNodes("//table"); foreach (var node in nodes) { if (HasTable(node)) { continue; } tables.Add(node); } List <PreTable> headers = new List <PreTable>(); bool lastIsHeader = false; int lastHeaderIndex = -1; int noHeaderKey = 0; foreach (var table in tables) { if (IsTitle(table)) {//标题 var header = JudgeTitleOrText(table); if (header == null) { continue; } if (lastIsHeader && lastHeaderIndex > -1) {//上一个表格也是表格头需要进行合并 var lastHeader = headers[lastHeaderIndex]; //先更新上一个表格头的title添加content,再把当前header的content添加 lastHeader.Content += lastHeader.Title + header.Content; lastHeader.Title = header.Title; } else { headers.Add(header); lastIsHeader = true; lastHeaderIndex++; } } else {//表格 Table localTable = new Table(table); if (localTable == null || localTable.Matrix == null) { continue; } if (!lastIsHeader) {//表格没有表头 PreTable noHeader = new PreTable(); noHeader.Title = "没有表头" + noHeaderKey.ToString(); noHeader.Content = ""; pre_table.Add(noHeader, localTable); } else { pre_table.Add(headers[lastHeaderIndex], localTable); } lastIsHeader = false; } } using (DocX document = DocX.Create(target)) { foreach (var key in pre_table.Keys) { var table = pre_table[key].Matrix; int rowCount = table.GetLength(0); int columnCount = table.GetLength(1); var docxTable = document.AddTable(rowCount, columnCount); docxTable.AutoFit = AutoFit.Contents; for (int i = 0; i < rowCount; i++) { for (int j = 0; j < columnCount; j++) { docxTable.Rows[i].Cells[j].Paragraphs[0].Append(table[i, j]); } } if (!key.Title.Contains("没有表头")) { document.InsertParagraph(key.Content); if (!key.Title.IsEmpty()) { var title = document.InsertParagraph().Append(key.Title).Heading(HeadingType.Heading1); } } MergeCell(docxTable); document.InsertTable(docxTable); } document.Save(); } //SaveAsDocx(tables); }