//listTable转dictionary private List <MyDictionary> ListTableToDictionary(List <MyTable> listTable) { List <MyDictionary> listDict = new List <MyDictionary>(); StringBuilder sb = new StringBuilder(); JsonWriter writer = new JsonWriter(sb); for (int i = 0; i < listTable.Count; i++) { string key = listTable[i].Text; int type = listTable[i].Type; if (type != MyTable.TYPE_CONTENT) { MyTable tableContent = null; if (type == MyTable.TYPE_TITLE_UP) { tableContent = listTable[i].Up(); } else if (type == MyTable.TYPE_TITLE_DOWN) { tableContent = listTable[i].Down(); } else if (type == MyTable.TYPE_TITLE_LEFT) { tableContent = listTable[i].Left(); } else if (type == MyTable.TYPE_TITLE_RIGHT) { tableContent = listTable[i].Right(); } else if (type == MyTable.TYPE_MIX) { MyDictionary [] dicts = GetDictionarysFromString(key); for (int j = 0; j < dicts.Length; j++) { listDict.Add(dicts[j]); } continue; } else if (type == MyTable.TYPE_JOB_SET_UP) {//工作准备 string value = ""; tableContent = listTable[i].Down(); if (tableContent != null) { value += tableContent.Text; } for (int j = 1; ; j++) { tableContent = listTable[i].Find(listTable[i].page + j, 2, listTable[i].col); if (tableContent == null) { break; } if (tableContent.Text.Replace(" ", "") == "工作步骤PROCEDURE") { break; } value += tableContent.Text; } MyDictionary dist = new MyDictionary(key, value); listDict.Add(dist); continue; } else if (type == MyTable.TYPE_TITLE_SMALL) { List <MyTable> list = listTable[i].FindRow(); MyTable tableNum = list[0]; int RowsCount = 0; while (true) { tableNum = tableNum.Down(); if (tableNum == null || !IsNumber(tableNum.Text)) { break; } RowsCount++; } string smallKey = ""; string smallValue = ""; for (int j = 0; j < list.Count; j++) { smallKey += "|" + list[j].Text; } smallKey = smallKey.Substring(1); tableNum = list[0]; for (int j = 0; j < RowsCount; j++) { string smallValueTemp = ""; tableNum = tableNum.Down(); foreach (MyTable t in tableNum.FindRow()) { smallValueTemp += "|" + t.Text; } smallValueTemp = smallValueTemp.Substring(1); smallValue += "$" + smallValueTemp; } smallValue = smallValue.Substring(1); MyDictionary dist = new MyDictionary(smallKey, smallValue); listDict.Add(dist); continue; } if (tableContent != null) { string value = tableContent.Text; if (tableContent.Image != "") { value += "|" + tableContent.Image; } MyDictionary dist = new MyDictionary(key, value); listDict.Add(dist); } } } return(listDict); }
//获取表格 private List <MyTable> GetTable(HtmlDocument htmlDoc, int page) { List <MyTable> listTable = new List <MyTable>(); //横线list List <MyTable> listHorizontal = new List <MyTable>(); //竖线list List <MyTable> listVertical = new List <MyTable>(); float fPageHeight = m_fPageHeights[page]; HtmlNodeCollection collection = null; //两种找到"线"的标志 HtmlNodeCollection collection1 = htmlDoc.DocumentNode.SelectNodes("//path[@stroke='#000000']"); HtmlNodeCollection collection2 = htmlDoc.DocumentNode.SelectNodes("//path[@fill='#000000']"); if (collection1 == null && collection2 == null) { collection = null; } else if (collection1 != null && collection2 == null) { collection = collection1; } else if (collection1 == null && collection2 != null) { collection = collection2; } else { if (collection1.Count >= collection2.Count) { collection = collection1; } else { collection = collection2; } } if (collection == null) { return(listTable); } foreach (HtmlNode node in collection) { //d是线路径 string strD = node.Attributes["d"].Value; //解析d float [] fPoint = GetStartEndPoint(strD); //M为起点 L为终点 float nMX = fPoint[0]; float nMY = fPoint[1]; float nLX = fPoint[2]; float nLY = fPoint[3]; if (IsOneLine(nMY, nLY)) { //Y相同是横线 if (IsTooShort(nLX, nMX)) { //去掉太短的 continue; } if (IsAlreadyInTable(listHorizontal, nMX, nMY, Math.Abs(nLX - nMX))) {//去掉已存在的 continue; } MyTable horizontal = new MyTable(); horizontal.X = nMX; horizontal.Y = nMY; horizontal.Width = Math.Abs(nLX - nMX); horizontal.Height = 0; listHorizontal.Add(horizontal); } else if (IsOneLine(nMX, nLX)) { //X相同是竖线 if (IsTooShort(nMY, nLY)) { //去掉太短的 continue; } if (IsAlreadyInTable(listVertical, nMX, nMY, Math.Abs(nMY - nLY))) {//去掉已存在的 continue; } MyTable vertical = new MyTable(); vertical.X = nMX; vertical.Y = nMY; vertical.Width = 0; vertical.Height = Math.Abs(nMY - nLY); listVertical.Add(vertical); } } //横线list和竖线list中有相同起始点的,判断为一个格子 for (int i = 0; i < listHorizontal.Count; i++) { for (int j = 0; j < listVertical.Count; j++) { if (IsOnePoint(listHorizontal[i].X, listHorizontal[i].Y, listVertical[j].X, listVertical[j].Y)) { MyTable table = new MyTable(); table.X = listHorizontal[i].X; table.Y = fPageHeight - listVertical[j].Y; table.Width = listHorizontal[i].Width; table.Height = listVertical[j].Height; table.page = page; listTable.Add(table); break; } } } return(listTable); }