/// <summary> /// 合并表 /// </summary> /// <param name="doc"></param> /// <param name="NextTableId"></param> public static void MergeTable(AnnouceDocument doc, int NextTableId) { var table = new HTMLTable(doc.root.TableList[NextTableId - 1]); string[] pos; string[] tablerec; string value; var offset = table.RowCount; //修改第二张表格的数据 foreach (var Nextitem in doc.root.TableList[NextTableId]) { tablerec = Nextitem.Split("|"); pos = tablerec[0].Split(","); value = tablerec[1]; var newtablerec = (NextTableId - 1) + "," + (offset + int.Parse(pos[1])) + "," + pos[2] + "|" + value; doc.root.TableList[NextTableId - 1].Add(newtablerec); } doc.root.TableList[NextTableId].Clear(); for (int i = 0; i < doc.root.Children.Count; i++) { for (int j = 0; j < doc.root.Children[i].Children.Count; j++) { var node = doc.root.Children[i].Children[j]; if (node.TableId == NextTableId) { node.TableId = -1; } } } }
/// <summary> /// /// 分页表格的修复 /// </summary> /// <param name="root"></param> public static void FixSpiltTable(MyRootHtmlNode root, AnnouceDocument doc) { for (int NextTableId = 2; NextTableId <= doc.root.TableList.Count; NextTableId++) { foreach (var item in doc.root.TableList[NextTableId]) { var FirstTablePos = -1; var SecondTablePos = -1; foreach (var p in root.Children) { foreach (var s in p.Children) { if (s.TableId == NextTableId - 1) { FirstTablePos = s.PositionId; } if (s.TableId == NextTableId) { SecondTablePos = s.PositionId; } } } if (SecondTablePos - FirstTablePos > 200) { continue; } var tablerec = item.Split("|"); var pos = tablerec[0].Split(","); var value = tablerec[1]; var row = int.Parse(pos[1]); //第二张表,第一行存在NULL if (row == 1 && value == strNullValue) { var table = new HTMLTable(doc.root.TableList[NextTableId - 1]); var nexttable = new HTMLTable(doc.root.TableList[NextTableId]); if (table.ColumnCount != nexttable.ColumnCount) { continue; } //合并表 var offset = table.RowCount; //修改第二张表格的数据 foreach (var Nextitem in root.TableList[NextTableId]) { tablerec = Nextitem.Split("|"); pos = tablerec[0].Split(","); value = tablerec[1]; var newtablerec = (NextTableId - 1) + "," + (offset + int.Parse(pos[1])) + "," + pos[2] + "|" + value; root.TableList[NextTableId - 1].Add(newtablerec); } root.TableList[NextTableId].Clear(); for (int i = 0; i < root.Children.Count; i++) { for (int j = 0; j < root.Children[i].Children.Count; j++) { var node = root.Children[i].Children[j]; if (node.TableId == NextTableId) { node.TableId = -1; } } } break; } } } //1.是否存在连续表格 NextBrother for (int i = 0; i < root.Children.Count; i++) { for (int j = 0; j < root.Children[i].Children.Count; j++) { var node = root.Children[i].Children[j]; if (node.TableId != -1) { if (node.NextBrother != null) { if (node.NextBrother.TableId != -1) { var nextnode = node.NextBrother; var table = new HTMLTable(root.TableList[node.TableId]); var nexttable = new HTMLTable(root.TableList[nextnode.TableId]); //Console.WriteLine("First Table:" + table.RowCount + "X" + table.ColumnCount); //Console.WriteLine("Second Table:" + nexttable.RowCount + "X" + nexttable.ColumnCount); if (table.ColumnCount != nexttable.ColumnCount) { continue; } //Console.WriteLine("Two Tables Has Same Column Count!"); //2.连续表格的后一个,往往是有<NULL>的行 bool hasnull = false; for (int nullcell = 1; nullcell <= table.ColumnCount; nullcell++) { if (nexttable.CellValue(1, nullcell) == HTMLTable.strNullValue) { hasnull = true; break; } } var ComboCompanyName = ""; var ComboCompanyNameColumnNo = -1; var CompanyFullNameList = doc.companynamelist.Select((x) => { return(x.secFullName); }).Distinct().ToList(); //两表同列的元素,是否有能够合并成为公司名称的?注意,需要去除空格!! int MaxColumn = table.ColumnCount; for (int col = 1; col <= MaxColumn; col++) { int TableAMaxRow = table.RowCount; int TableBMaxRow = nexttable.RowCount; for (int RowCntA = 1; RowCntA < TableAMaxRow; RowCntA++) { for (int RowCntB = 1; RowCntB < TableBMaxRow; RowCntB++) { var valueA = table.CellValue(RowCntA, col).Replace(" ", ""); var valueB = nexttable.CellValue(RowCntB, col).Replace(" ", ""); if (valueA != "" && valueB != "") { var value = valueA + valueB; if (CompanyFullNameList.Contains(value)) { ComboCompanyName = value; ComboCompanyNameColumnNo = col; //Console.WriteLine("Found FullName:" + value); break; } } } if (ComboCompanyNameColumnNo != -1) { break; } } if (ComboCompanyNameColumnNo != -1) { break; } } if (ComboCompanyNameColumnNo != -1) { //补完:注意,不能全部补!!A表以公司名开头,B表以公司名结尾 for (int k = 0; k < root.TableList[node.TableId].Count; k++) { var tablerec = root.TableList[node.TableId][k].Split("|"); var value = tablerec[1].Replace(" ", ""); //A表以公司名开头 if (ComboCompanyName.StartsWith(value)) { root.TableList[node.TableId][k] = tablerec[0] + "|" + ComboCompanyName; } } for (int k = 0; k < root.TableList[nextnode.TableId].Count; k++) { var tablerec = root.TableList[nextnode.TableId][k].Split("|"); var value = tablerec[1].Replace(" ", ""); //A表以公司名开头 if (ComboCompanyName.EndsWith(value)) { root.TableList[nextnode.TableId][k] = tablerec[0] + "|" + ComboCompanyName; } } } //特殊业务处理:增减持 bool specaillogic = false; var BuyMethod = new string[] { "集中竞价交易", "竞价交易", "大宗交易", "约定式购回" }.ToList(); if (doc.GetType() == typeof(StockChange)) { //增减持无表头的特殊处理 for (int spCell = 1; spCell <= table.ColumnCount; spCell++) { if (BuyMethod.Contains(nexttable.CellValue(1, spCell))) { specaillogic = true; break; } } } if (hasnull || ComboCompanyNameColumnNo != -1 || specaillogic) { var offset = table.RowCount; //修改第二张表格的数据 foreach (var item in root.TableList[nextnode.TableId]) { var tablerec = item.Split("|"); var pos = tablerec[0].Split(","); var value = tablerec[1]; var newtablerec = node.TableId + "," + (offset + int.Parse(pos[1])) + "," + pos[2] + "|" + value; root.TableList[node.TableId].Add(newtablerec); } root.TableList[nextnode.TableId].Clear(); nextnode.TableId = -1; //Console.WriteLine("Found Split Tables!!"); } } } } } } }