//获取编辑框已打开 private bool hascommentOpened(mshtml.IHTMLElement commentTag) { mshtml.HTMLDocumentClass htmlDoc = webBrowser1.Document.DomDocument as mshtml.HTMLDocumentClass; for (int i = 0; i < htmlDoc.frames.length; ++i) { object index = i; mshtml.IHTMLWindow2 frameWindow = htmlDoc.frames.item(ref index) as mshtml.IHTMLWindow2; IHTMLDocument2 frame = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow); mshtml.IHTMLElementCollection tagls = (mshtml.IHTMLElementCollection)frame.all.tags("div"); IEnumerator tagie = tagls.GetEnumerator(); while (tagie.MoveNext()) { mshtml.IHTMLElement element = (mshtml.IHTMLElement)tagie.Current; if (commentTag == element && element.className == "mod-comments") { mshtml.IHTMLElementCollection childrenCol = (mshtml.IHTMLElementCollection)element.all; IEnumerator cusers = childrenCol.GetEnumerator(); while (cusers.MoveNext()) { mshtml.IHTMLElement element1 = (mshtml.IHTMLElement)cusers.Current; if (element1.className == "comment-box-wrap") { return(true); } } return(false); } } } return(false); }
private void PraiseFriend() { try { mshtml.HTMLDocumentClass htmlDoc = webBrowser1.Document.DomDocument as mshtml.HTMLDocumentClass; for (int i = 0; i < htmlDoc.frames.length; ++i) { object index = i; mshtml.IHTMLWindow2 frameWindow = htmlDoc.frames.item(ref index) as mshtml.IHTMLWindow2; IHTMLDocument2 frame = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow); mshtml.IHTMLElementCollection tagls = (mshtml.IHTMLElementCollection)frame.all.tags("a"); IEnumerator tagie = tagls.GetEnumerator(); while (tagie.MoveNext()) { mshtml.IHTMLElement element = (mshtml.IHTMLElement)tagie.Current; if (element.className == "item qz_like_btn_v3") { if (element.innerText != null && !element.innerText.Contains("取消赞")) { element.click(); praisedMap.Add(curQQ); break; } } } } } catch (Exception) { } }
/// <summary> /// 根据WebBrowser对象,获取WebBrowser对象中iframe里面的子页面对象HTMLWindow对象 /// </summary> /// <param name="wb">WebBrowser对象</param> /// <param name="frameName">iframe中frame名称</param> /// <returns>返回子页面HTMLWindow对象</returns> public static IHTMLWindow2 GetFrameWindowObject(WebBrowser wb, string frameName) { int framesCount = GetFramesCount(wb); if (framesCount > 1 && !string.IsNullOrEmpty(frameName)) { try { mshtml.HTMLDocumentClass htmlDoc = wb.Document.DomDocument as mshtml.HTMLDocumentClass; for (int i = 0; i < framesCount; i++) { object index = i as object;//跨域访问js方法 mshtml.IHTMLWindow2 frameWindow = htmlDoc.frames.item(ref index) as mshtml.IHTMLWindow2; var ff = GetDocumentFromWindow(frameWindow); if (ff != null && ff.parentWindow.name.ToLower() == frameName.ToLower()) { return(ff.parentWindow); } } } catch (Exception ex) { Loger.Log4Net.Error("调用GetFrameWindowObject(WebBrowser wb,string frameName)方法异常,参数frameName为:" + frameName, ex); } } return(null); }
/// <summary> /// Loads the anchors for a specified html file /// </summary> /// <param name="fileName">The file name in which anchors are to be retrieved from</param> private void LoadAnchors(string fileName) { WebClient client = new WebClient(); byte[] data = client.DownloadData(fileName); mshtml.HTMLDocumentClass ms = new mshtml.HTMLDocumentClass(); string strHTML = Encoding.ASCII.GetString(data); mshtml.IHTMLDocument2 mydoc = (mshtml.IHTMLDocument2)ms; //mydoc.write(!--saved from url=(0014)about:internet -->); mydoc.write(strHTML); mshtml.IHTMLElementCollection ec = (mshtml.IHTMLElementCollection)mydoc.all.tags("a"); if (ec != null) { for (int i = 0; i < ec.length - 1; i++) { mshtml.HTMLAnchorElementClass anchor; anchor = (mshtml.HTMLAnchorElementClass)ec.item(i, 0); if (!string.IsNullOrEmpty(anchor.name)) { cmbAnchor.Items.Add(anchor.name); } } } }
private string readWebPageSource() { //string html = mainWebBrowser.ExecuteJavascriptWithResult("document.getElementsByTagName('html')[0].innerHTML"); //return html; mshtml.HTMLDocumentClass dom = (mshtml.HTMLDocumentClass)mainWebBrowser.Document; return(dom.body.innerHTML); }
private void button4_Click(object sender, EventArgs e) { doc = null; tableMain.Visible = true; txtLog.Text = "[" + DateTime.Now.ToString() + "] ....// Starting new campaign.\r\n"; progressBar1.Visible = false; htmlEditorControl.BodyHtml = ""; btnSend.Enabled = true; loadAccounts(); }
//空间动态里的好友 private void button9_Click(object sender, EventArgs e) { string info = ""; mshtml.HTMLDocumentClass htmlDoc = webBrowser1.Document.DomDocument as mshtml.HTMLDocumentClass; for (int i = 0; i < htmlDoc.frames.length; ++i) { object index = i; mshtml.IHTMLWindow2 frameWindow = htmlDoc.frames.item(ref index) as mshtml.IHTMLWindow2; IHTMLDocument2 frame = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow); for (int j = 0; j < frame.frames.length; ++j) { try { object rj = j; mshtml.IHTMLWindow2 frameWindow2 = frame.frames.item(ref rj) as mshtml.IHTMLWindow2; IHTMLDocument2 frame2 = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow2); mshtml.IHTMLElementCollection userlist = (mshtml.IHTMLElementCollection)frame2.all.tags("div"); IEnumerator users = userlist.GetEnumerator(); while (users.MoveNext()) { mshtml.IHTMLElement element = (mshtml.IHTMLElement)users.Current; if (element.className == "user-list" || element.className == "ui-avatar") { mshtml.IHTMLElementCollection childrenCol = (mshtml.IHTMLElementCollection)element.children; IEnumerator cusers = childrenCol.GetEnumerator(); while (cusers.MoveNext()) { mshtml.IHTMLElement celement = (mshtml.IHTMLElement)cusers.Current; string href = (string)celement.getAttribute("href", 0); if (href.Contains("http:")) { info += href + "\n"; } } } } } catch (Exception) { } } } MessageBox.Show(info); }
//3.提交 private bool commentStep3ex(mshtml.IHTMLElement commentTag) { if (commentTag == null) { return(false); } mshtml.HTMLDocumentClass htmlDoc = webBrowser1.Document.DomDocument as mshtml.HTMLDocumentClass; for (int i = 0; i < htmlDoc.frames.length; ++i) { object index = i; mshtml.IHTMLWindow2 frameWindow = htmlDoc.frames.item(ref index) as mshtml.IHTMLWindow2; IHTMLDocument2 frame = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow); mshtml.IHTMLElementCollection tagls = (mshtml.IHTMLElementCollection)frame.all.tags("div"); IEnumerator tagie = tagls.GetEnumerator(); while (tagie.MoveNext()) { mshtml.IHTMLElement element = (mshtml.IHTMLElement)tagie.Current; if (commentTag == element && element.className == "mod-comments") { mshtml.IHTMLElementCollection childrenCol = (mshtml.IHTMLElementCollection)element.all; IEnumerator cusers = childrenCol.GetEnumerator(); while (cusers.MoveNext()) { mshtml.IHTMLElement element2 = (mshtml.IHTMLElement)cusers.Current; if (element2.className == "btn-post gb_bt evt_click") { element2.click(); return(true); // mshtml.IHTMLElementCollection element2Col = (mshtml.IHTMLElementCollection)element2.all; // IEnumerator element2Cols = element2Col.GetEnumerator(); // while (element2Cols.MoveNext()) // { // mshtml.IHTMLElement element3 = (mshtml.IHTMLElement)element2Cols.Current; // if (element3.className == "btn-post gb_bt evt_click") // { // element3.click(); // return commentTag; // } // } } } } } } return(false); }
//检测是否已评论 private mshtml.IHTMLElement gerCommented() { String myqq = getMyQQ(); mshtml.HTMLDocumentClass htmlDoc = webBrowser1.Document.DomDocument as mshtml.HTMLDocumentClass; for (int i = 0; i < htmlDoc.frames.length; ++i) { object index = i; mshtml.IHTMLWindow2 frameWindow = htmlDoc.frames.item(ref index) as mshtml.IHTMLWindow2; IHTMLDocument2 frame = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow); mshtml.IHTMLElementCollection tagls = (mshtml.IHTMLElementCollection)frame.all.tags("div"); IEnumerator tagie = tagls.GetEnumerator(); while (tagie.MoveNext()) { mshtml.IHTMLElement element = (mshtml.IHTMLElement)tagie.Current; if (element.className == "mod-comments") { mshtml.IHTMLElementCollection childrenCol = (mshtml.IHTMLElementCollection)element.all; IEnumerator cusers = childrenCol.GetEnumerator(); bool cangonext = true; while (cusers.MoveNext()) { mshtml.IHTMLElement celement = (mshtml.IHTMLElement)cusers.Current; Console.WriteLine(celement.className); if (celement.className == "comments-item bor3") { if ((String)celement.getAttribute("data-uin", 0) == myqq) { cangonext = false; break; } } } //1.打开编辑框 if (cangonext)//自己没有评论 { return(element); } } } } return(null); }
//切换到账号输入 private void button4_Click(object sender, EventArgs e) { try { mshtml.HTMLDocumentClass htmlDoc = webBrowser1.Document.DomDocument as mshtml.HTMLDocumentClass; object index = 0; mshtml.IHTMLWindow2 frameWindow = htmlDoc.frames.item(ref index) as mshtml.IHTMLWindow2; IHTMLDocument2 frame = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow); mshtml.IHTMLElementCollection logins = (mshtml.IHTMLElementCollection)frame.all.tags("a"); mshtml.IHTMLElement element = (mshtml.IHTMLElement)logins.item("switcher_plogin", 0); element.click(); } catch (System.Exception _e) { Console.Write(_e.Message); } }
//获取最后活跃时间 private void button12_Click(object sender, EventArgs e) { string info = null; mshtml.HTMLDocumentClass htmlDoc = webBrowser1.Document.DomDocument as mshtml.HTMLDocumentClass; for (int i = 0; i < htmlDoc.frames.length; ++i) { object index = i; mshtml.IHTMLWindow2 frameWindow = htmlDoc.frames.item(ref index) as mshtml.IHTMLWindow2; IHTMLDocument2 frame = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow); for (int j = 0; j < frame.frames.length; ++j) { object rj = j; mshtml.IHTMLWindow2 frameWindow2 = frame.frames.item(ref rj) as mshtml.IHTMLWindow2; IHTMLDocument2 frame2 = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow2); mshtml.IHTMLElementCollection userlist = (mshtml.IHTMLElementCollection)frame2.all.tags("span"); IEnumerator users = userlist.GetEnumerator(); while (users.MoveNext()) { mshtml.IHTMLElement element = (mshtml.IHTMLElement)users.Current; string str = element.innerText; if (str != null && str.Contains("月") && str.Contains("日") && str.Contains(":")) { info = str; break; } } if (info != null) { break; } } if (info != null) { break; } } MessageBox.Show(info); }
/// <summary> /// 根据WebBrowser对象,获取下面页面中Frame的个数 /// </summary> /// <param name="wb">WebBrowser对象</param> /// <returns>获取下面页面中Frame的个数</returns> private static int GetFramesCount(WebBrowser wb) { int count = 0; try { if (wb != null && wb.Document != null && wb.Document.DomDocument != null) { mshtml.HTMLDocumentClass htmlDoc = wb.Document.DomDocument as mshtml.HTMLDocumentClass; count = htmlDoc.frames.length; } } catch (Exception ex) { Loger.Log4Net.Error("调用GetFramesCount方法异常", ex); return(0); } return(count); }
//检测是否评论成功 private bool isCommentedSucc() { if (commentTag == null) { return(false); } String myqq = Func.getMyQQ(webBrowser1); mshtml.HTMLDocumentClass htmlDoc = webBrowser1.Document.DomDocument as mshtml.HTMLDocumentClass; for (int i = 0; i < htmlDoc.frames.length; ++i) { object index = i; mshtml.IHTMLWindow2 frameWindow = htmlDoc.frames.item(ref index) as mshtml.IHTMLWindow2; IHTMLDocument2 frame = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow); mshtml.IHTMLElementCollection tagls = (mshtml.IHTMLElementCollection)frame.all.tags("div"); IEnumerator tagie = tagls.GetEnumerator(); while (tagie.MoveNext()) { mshtml.IHTMLElement element = (mshtml.IHTMLElement)tagie.Current; if (commentTag == element && element.className == "mod-comments") { mshtml.IHTMLElementCollection childrenCol = (mshtml.IHTMLElementCollection)element.all; IEnumerator cusers = childrenCol.GetEnumerator(); while (cusers.MoveNext()) { mshtml.IHTMLElement element2 = (mshtml.IHTMLElement)cusers.Current; if (element2.className == "comments-item bor3") { if (element2.innerHTML.Contains(myqq)) { return(true); } } } } } } return(false); }
//2.编辑 private mshtml.IHTMLElement commentStep2(mshtml.IHTMLElement commentTag) { if (commentTag == null) { return(null); } mshtml.HTMLDocumentClass htmlDoc = webBrowser1.Document.DomDocument as mshtml.HTMLDocumentClass; for (int i = 0; i < htmlDoc.frames.length; ++i) { object index = i; mshtml.IHTMLWindow2 frameWindow = htmlDoc.frames.item(ref index) as mshtml.IHTMLWindow2; IHTMLDocument2 frame = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow); mshtml.IHTMLElementCollection tagls = (mshtml.IHTMLElementCollection)frame.all.tags("div"); IEnumerator tagie = tagls.GetEnumerator(); while (tagie.MoveNext()) { mshtml.IHTMLElement element = (mshtml.IHTMLElement)tagie.Current; if (commentTag == element && element.className == "mod-comments") { mshtml.IHTMLElementCollection childrenCol = (mshtml.IHTMLElementCollection)element.all; IEnumerator cusers = childrenCol.GetEnumerator(); Console.WriteLine("|||||||||||||||||||||||||||||||||||||||||||||"); while (cusers.MoveNext()) { mshtml.IHTMLElement element2 = (mshtml.IHTMLElement)cusers.Current; Console.WriteLine(element2.className); if (element2.className == "textinput textarea c_tx2") { element2.innerText = "哈哈:" + Environment.TickCount; return(element); } } Console.WriteLine("$$$$$$$$$$$$$$$$$$$$$$"); } } } return(null); }
//获取【说说】的好友信息 private void button7_Click(object sender, EventArgs e) { string info = null; mshtml.HTMLDocumentClass htmlDoc = webBrowser1.Document.DomDocument as mshtml.HTMLDocumentClass; for (int i = 0; i < htmlDoc.frames.length; ++i) { object index = i; mshtml.IHTMLWindow2 frameWindow = htmlDoc.frames.item(ref index) as mshtml.IHTMLWindow2; IHTMLDocument2 frame = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow); mshtml.IHTMLElementCollection divs = (mshtml.IHTMLElementCollection)frame.all.tags("div"); IEnumerator div = divs.GetEnumerator(); while (div.MoveNext()) { mshtml.IHTMLElement item = (mshtml.IHTMLElement)div.Current; if (item.className == "ui_avatar" || item.className == "feed_like") { mshtml.IHTMLElementCollection childrenCol = (mshtml.IHTMLElementCollection)item.children; IEnumerator cusers = childrenCol.GetEnumerator(); while (cusers.MoveNext()) { mshtml.IHTMLElement celement = (mshtml.IHTMLElement)cusers.Current; if (celement.tagName == "a" || celement.tagName == "A") { string href = (string)celement.getAttribute("href", 0); if (href != null && href.Contains("http:")) { info += href + "\n"; } } } } } } MessageBox.Show(info); }
private void PraiseFriendFix() { try { mshtml.HTMLDocumentClass htmlDoc = webBrowser1.Document.DomDocument as mshtml.HTMLDocumentClass; for (int i = 0; i < htmlDoc.frames.length; ++i) { object index = i; mshtml.IHTMLWindow2 frameWindow = htmlDoc.frames.item(ref index) as mshtml.IHTMLWindow2; IHTMLDocument2 frame = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow); mshtml.IHTMLElementCollection tagls = (mshtml.IHTMLElementCollection)frame.all.tags("A"); IEnumerator tagie = tagls.GetEnumerator(); while (tagie.MoveNext()) { mshtml.IHTMLElement element = (mshtml.IHTMLElement)tagie.Current; if (element.className == "qz_like_btn c_tx mr8") { string style = (string)element.style.cssText; if (element.innerText != null && element.innerText.Contains("取消赞") && style != "display: none;") { element.click(); success.Add(curQQ); richTextBox1.Text = richTextBox1.Text + "【成功】" + curQQ + "\n"; return; } } } } } catch (Exception) { } failed.Add(curQQ); richTextBox1.Text = richTextBox1.Text + "【失败】" + curQQ + "\n"; }
//获取个人信息 private void button8_Click(object sender, EventArgs e) { string _sex = ""; string _age = ""; mshtml.HTMLDocumentClass htmlDoc = webBrowser1.Document.DomDocument as mshtml.HTMLDocumentClass; for (int i = 0; i < htmlDoc.frames.length; ++i) { object index = i; mshtml.IHTMLWindow2 frameWindow = htmlDoc.frames.item(ref index) as mshtml.IHTMLWindow2; IHTMLDocument2 frame = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow); mshtml.IHTMLElement sex = (mshtml.IHTMLElement)frame.all.item("sex", 0); mshtml.IHTMLElement age = (mshtml.IHTMLElement)frame.all.item("age", 0); if (sex != null) { _sex = sex.outerText; } if (age != null) { _age = age.outerText; } } //for (int i = 0; i < webBrowser1.Document.Window.Frames.Count; ++i) //{ // HtmlElement sex = webBrowser1.Document.Window.Frames[i].Document.GetElementById("sex"); // if( sex != null ) // _sex = sex.OuterText; // HtmlElement age = webBrowser1.Document.Window.Frames[i].Document.GetElementById("age"); // if (age != null) // _age = age.OuterText; //} MessageBox.Show(string.Format("年龄:{0}\n性别:{1}", _age, _sex)); }
//3.提交 private mshtml.IHTMLElement commentStep3(mshtml.IHTMLElement commentTag) { if (commentTag == null) { return(null); } mshtml.HTMLDocumentClass htmlDoc = webBrowser1.Document.DomDocument as mshtml.HTMLDocumentClass; for (int i = 0; i < htmlDoc.frames.length; ++i) { object index = i; mshtml.IHTMLWindow2 frameWindow = htmlDoc.frames.item(ref index) as mshtml.IHTMLWindow2; IHTMLDocument2 frame = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow); mshtml.IHTMLElementCollection tagls = (mshtml.IHTMLElementCollection)frame.all.tags("a"); IEnumerator tagie = tagls.GetEnumerator(); while (tagie.MoveNext()) { try { mshtml.IHTMLElement element = (mshtml.IHTMLElement)tagie.Current; if (element.className == "btn-post gb_bt evt_click" && element.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement == commentTag) { element.click(); CommentMap.Add(curQQ); return(commentTag); } } catch (Exception ex) { Console.WriteLine("*********************************************************"); Console.WriteLine(ex.ToString()); Console.WriteLine("*********************************************************"); } } } return(null); }
//2.编辑 private bool commentStep2(mshtml.IHTMLElement commentTag) { if (commentTag == null) { return(false); } mshtml.HTMLDocumentClass htmlDoc = webBrowser1.Document.DomDocument as mshtml.HTMLDocumentClass; for (int i = 0; i < htmlDoc.frames.length; ++i) { object index = i; mshtml.IHTMLWindow2 frameWindow = htmlDoc.frames.item(ref index) as mshtml.IHTMLWindow2; IHTMLDocument2 frame = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow); mshtml.IHTMLElementCollection tagls = (mshtml.IHTMLElementCollection)frame.all.tags("div"); IEnumerator tagie = tagls.GetEnumerator(); while (tagie.MoveNext()) { mshtml.IHTMLElement element = (mshtml.IHTMLElement)tagie.Current; if (commentTag == element && element.className == "mod-comments") { mshtml.IHTMLElementCollection childrenCol = (mshtml.IHTMLElementCollection)element.all; IEnumerator cusers = childrenCol.GetEnumerator(); while (cusers.MoveNext()) { mshtml.IHTMLElement element2 = (mshtml.IHTMLElement)cusers.Current; if (element2.className == "textinput textarea c_tx2" || element2.className == "textinput textarea c_tx2 input_focus textinput_focus") //if (element2.className == "textinput textarea c_tx2 input_focus textinput_focus") { element2.innerText = getReplyStr(); return(true); } } } } } return(false); }
//获取最后活跃时间 private void getLasTime() { mshtml.HTMLDocumentClass htmlDoc = webBrowser1.Document.DomDocument as mshtml.HTMLDocumentClass; for (int i = 0; i < htmlDoc.frames.length; ++i) { object index = i; mshtml.IHTMLWindow2 frameWindow = htmlDoc.frames.item(ref index) as mshtml.IHTMLWindow2; IHTMLDocument2 frame = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow); for (int j = 0; j < frame.frames.length; ++j) { object rj = j; mshtml.IHTMLWindow2 frameWindow2 = frame.frames.item(ref rj) as mshtml.IHTMLWindow2; IHTMLDocument2 frame2 = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow2); mshtml.IHTMLElementCollection userlist = (mshtml.IHTMLElementCollection)frame2.all.tags("span"); IEnumerator users = userlist.GetEnumerator(); while (users.MoveNext()) { mshtml.IHTMLElement element = (mshtml.IHTMLElement)users.Current; string str = element.innerText; if (str != null && str.Contains("月") && str.Contains("日") && str.Contains(":")) { DateTime dnow = DateTime.Now; str = str.Replace("月", "-"); str = str.Replace("日", "-"); str = str.Replace(" ", ""); str = str.Replace(":", "-"); str = String.Format("{0}-{1}-00", dnow.Year, str); spider.UIDB.logintime = str; return; } } } } }
//输入账号密码 private void button5_Click(object sender, EventArgs e) { try { mshtml.HTMLDocumentClass htmlDoc = webBrowser1.Document.DomDocument as mshtml.HTMLDocumentClass; object index = 0; mshtml.IHTMLWindow2 frameWindow = htmlDoc.frames.item(ref index) as mshtml.IHTMLWindow2; IHTMLDocument2 frame = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow); mshtml.IHTMLElementCollection logins = (mshtml.IHTMLElementCollection)frame.all.tags("input"); mshtml.IHTMLInputElement element = (mshtml.IHTMLInputElement)logins.item("u", 0); element.value = "305723370"; element = (mshtml.IHTMLInputElement)logins.item("p", 0); element.value = "cdtanji51520"; } catch (System.Exception _e) { Console.Write(_e.Message); } }
//1.打开编辑框 private mshtml.IHTMLElement commentStep1(mshtml.IHTMLElement commentTag) { String myqq = getMyQQ(); mshtml.HTMLDocumentClass htmlDoc = webBrowser1.Document.DomDocument as mshtml.HTMLDocumentClass; for (int i = 0; i < htmlDoc.frames.length; ++i) { object index = i; mshtml.IHTMLWindow2 frameWindow = htmlDoc.frames.item(ref index) as mshtml.IHTMLWindow2; IHTMLDocument2 frame = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow); mshtml.IHTMLElementCollection tagls = (mshtml.IHTMLElementCollection)frame.all.tags("div"); IEnumerator tagie = tagls.GetEnumerator(); while (tagie.MoveNext()) { mshtml.IHTMLElement element = (mshtml.IHTMLElement)tagie.Current; if (commentTag == element && element.className == "mod-comments") { mshtml.IHTMLElementCollection childrenCol = (mshtml.IHTMLElementCollection)element.all; IEnumerator cusers = childrenCol.GetEnumerator(); while (cusers.MoveNext()) { mshtml.IHTMLElement element1 = (mshtml.IHTMLElement)cusers.Current; //if (element1.className == "textinput textinput-default bor2") if (element1.className == "mod-commnets-poster feedClickCmd comment_default_inputentry") { element1.click(); return(element); } } } } } return(null); }
private void PraiseFriend() { try { mshtml.HTMLDocumentClass htmlDoc = webBrowser1.Document.DomDocument as mshtml.HTMLDocumentClass; for (int i = 0; i < htmlDoc.frames.length; ++i) { object index = i; mshtml.IHTMLWindow2 frameWindow = htmlDoc.frames.item(ref index) as mshtml.IHTMLWindow2; IHTMLDocument2 frame = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow); mshtml.IHTMLElementCollection tagls = (mshtml.IHTMLElementCollection)frame.all.tags("A"); IEnumerator tagie = tagls.GetEnumerator(); while (tagie.MoveNext()) { mshtml.IHTMLElement element = (mshtml.IHTMLElement)tagie.Current; if (element.className == "qz_like_btn c_tx mr8") { if (element.innerText != null && !element.innerText.Contains("取消赞")) { element.click(); pro = "点赞完成"; //richTextBox1.Text = richTextBox1.Text + "【成功】" + curQQ + "\n"; return; } } } } } catch (Exception) { } //richTextBox1.Text = richTextBox1.Text + "【失败】" + curQQ + "\n"; Zhan();//失败后直接进入下一个 }
public override void Tick() { if (Enable) { try { string info = null; mshtml.HTMLDocumentClass htmlDoc = webBrowser1.Document.DomDocument as mshtml.HTMLDocumentClass; for (int i = 0; i < htmlDoc.frames.length; ++i) { object index = i; mshtml.IHTMLWindow2 frameWindow = htmlDoc.frames.item(ref index) as mshtml.IHTMLWindow2; IHTMLDocument2 frame = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow); mshtml.IHTMLElementCollection divs = (mshtml.IHTMLElementCollection)frame.all.tags("div"); IEnumerator div = divs.GetEnumerator(); while (div.MoveNext()) { mshtml.IHTMLElement item = (mshtml.IHTMLElement)div.Current; if (item.className == "ui_avatar" || item.className == "feed_like") { mshtml.IHTMLElementCollection childrenCol = (mshtml.IHTMLElementCollection)item.children; IEnumerator cusers = childrenCol.GetEnumerator(); while (cusers.MoveNext()) { mshtml.IHTMLElement celement = (mshtml.IHTMLElement)cusers.Current; if (celement.tagName == "a" || celement.tagName == "A") { string href = (string)celement.getAttribute("href", 0); if (href != null && href.Contains("http:")) { info += href + "\n"; spider.AddQQ(href); } } } } } } //string info = ""; //for (int i = 0; i < webBrowser1.Document.Window.Frames.Count; ++i) //{ // HtmlDocument frame = webBrowser1.Document.Window.Frames[i].Document; // HtmlElementCollection divs = frame.GetElementsByTagName("div"); // for (int j = 0; j < divs.Count; ++j) // { // string className = divs[j].GetAttribute("className"); // if (className == "ui_avatar") // { // HtmlElementCollection a = divs[j].GetElementsByTagName("a"); // if (a != null && a.Count > 0) // { // string url = a[0].GetAttribute("href"); // info += url + "\n"; // spider.AddQQ(url); // } // } // else if (className == "feed_like") // { // HtmlElementCollection a = divs[j].GetElementsByTagName("a"); // if (a != null && a.Count > 0) // { // for (int k = 0; k < a.Count; ++k) // { // if (a[k].GetAttribute("className") == "c_tx") // { // string url = a[0].GetAttribute("href"); // info += url + "\n"; // spider.AddQQ(url); // } // } // } // } // } //} spider.UserGetOver(); Log(info); ChangeCommon("NewUser"); } catch (Exception e) { Log(e.Message); ChangeCommon("NewUser"); } } }
public ArrayList prepareDOM(string htmlContent2) { string htmlContent = htmlContent2; htmlContent = HTML.trim_commenttags(htmlContent); htmlContent = HTML.trimOptions(htmlContent); htmlContent = HTML.trimScript(htmlContent); htmlContent = HTML.trim_HREF_SCR(htmlContent); htmlContent = HTML.trim_some_cases(htmlContent); //for fast processing otherwise image, link, javascript loading... IHTMLDocument2 htmlDocument = new mshtml.HTMLDocumentClass(); htmlDocument.write(htmlContent); IHTMLElementCollection allElements = htmlDocument.all; _ht = new Hashtable(); _list = new ArrayList(); _xmllist = new ArrayList(); string _tempinner_text = ""; if (htmlDocument.body != null) { if (htmlDocument.body.innerText != null) { _tempinner_text = htmlDocument.body.innerText.Replace("\r\n", ""); domhtmlContent = htmlDocument.body.outerHTML.Replace("\r\n", ""); } } element _firstelement = AnalyzeGivenHTML(htmlDocument.body.innerHTML, _tempinner_text); all_words = _firstelement.BagofWords; int i = 0; foreach (IHTMLElement htmlelement in allElements) { if (htmlelement.outerHTML != null) { element _element = new element(); _element.id = i; _element.outerHTML = htmlelement.outerHTML; _element.outerHTML = _element.outerHTML.Replace("\r\n", ""); if (htmlelement.innerHTML != null) { _element.innerHTML = htmlelement.innerHTML; _element.innerHTML = _element.innerHTML.Replace("\r\n", ""); } else { _element.innerHTML = ""; } if (_element.id == 0) { _element.elementlinked_id = -1;//root savehtmlContent = _element.outerHTML; resulthmtlContent = _element.outerHTML; } else { _element.elementlinked_id = 0; } if (htmlelement.tagName == "HTML") {//html bazen geç geliyor... savehtmlContent = _element.outerHTML; resulthmtlContent = _element.outerHTML; } string _str = _element.outerHTML; int _start = _str.IndexOf('<'); int _end = _str.IndexOf('>'); _element.elementName = _str.Substring(_start, _end - _start + 1); _element.tagName = htmlelement.tagName; _element.tag_id_Name = ""; _element.tag_class_Name = ""; if (htmlelement.id != null) { _element.tag_id = 1; _element.tag_id_Name = htmlelement.id; } if (htmlelement.className != null) { _element.tag_class = 1; _element.tag_class_Name = htmlelement.className; } if (_element.tag_id != 1 || _element.tag_class != 1) { _element.tag_idORclass = 1; } string tempinner_text = htmlelement.innerText; if (tempinner_text != null) { tempinner_text = tempinner_text.Replace("\r\n", " "); tempinner_text = tempinner_text.Trim(); } else { tempinner_text = ""; } element _tempelement = AnalyzeGivenHTML(htmlelement.outerHTML, tempinner_text); _element.BagofWords = _tempelement.BagofWords; _element.wordCount = _tempelement.wordCount; _element.DensityinHTML = (double)_element.wordCount / _firstelement.wordCount; _element.LinkCount = _tempelement.LinkCount; _element.wordCountinLink = _tempelement.wordCountinLink; _element.meanofWordinLinks = _tempelement.meanofWordinLinks; _element.meanofWordinLinksAllWords = _tempelement.meanofWordinLinksAllWords; string temp_innerhtml_ = _element.innerHTML.ToUpper(new CultureInfo("en-US", false)); //for english words thus html tags _element.dot_count = webfilter.CountStringOccurrences(temp_innerhtml_, "."); _element.h1_count = webfilter.CountStringOccurrences(temp_innerhtml_, "<H1"); _element.h2_count = webfilter.CountStringOccurrences(temp_innerhtml_, "<H2"); _element.h3_count = webfilter.CountStringOccurrences(temp_innerhtml_, "<H3"); _element.h4_count = webfilter.CountStringOccurrences(temp_innerhtml_, "<H4"); _element.h5_count = webfilter.CountStringOccurrences(temp_innerhtml_, "<H5"); _element.h6_count = webfilter.CountStringOccurrences(temp_innerhtml_, "<H6"); _element.img_count = webfilter.CountStringOccurrences(temp_innerhtml_, "<IMG"); _element.p_count = webfilter.CountStringOccurrences(temp_innerhtml_, "<P"); _element.br_count = webfilter.CountStringOccurrences(temp_innerhtml_, "<BR"); _element.span_count = webfilter.CountStringOccurrences(temp_innerhtml_, "<SPAN"); _element.object_count = webfilter.CountStringOccurrences(temp_innerhtml_, "<OBJECT"); _element.ul_count = webfilter.CountStringOccurrences(temp_innerhtml_, "<UL"); _element.li_count = webfilter.CountStringOccurrences(temp_innerhtml_, "<LI"); _element.input_count = webfilter.CountStringOccurrences(temp_innerhtml_, "<INPUT") + webfilter.CountStringOccurrences(temp_innerhtml_, "<BUTTON") + webfilter.CountStringOccurrences(temp_innerhtml_, "<LABEL"); _element.div_count = webfilter.CountStringOccurrences(temp_innerhtml_, "<DIV"); _element.td_count = webfilter.CountStringOccurrences(temp_innerhtml_, "<TD"); _element.parent_elementName = ""; //sim control //-1 : not available for sim control //0 : similar //0..1: similarity degree //1 : not similar _element.sim_bagofword = -1; _element.sim_bagofword_AE = -1; _element.sim_innerHTML = -1; _element.sim_innerHTML_AE = -1; _list.Add(_element); int key = (int)htmlelement.sourceIndex;//for fast searching _ht.Add(key, i); i++; } } foreach (IHTMLElement htmlelement in allElements) { if (htmlelement.outerHTML != null) { string[] _sonuclar = ExtractionofSubLayouts(htmlelement); string tempinner_text = _sonuclar[1]; string tempOuterHTML = _sonuclar[2]; string tempinnerHTML = _sonuclar[3]; string str_i = _sonuclar[0]; i = Convert.ToInt32(str_i); //After Extraction element _element = (element)_list[i]; element _tempelement = AnalyzeGivenHTML_AE(tempOuterHTML, tempinner_text); if (_element.elementlinked_id > 0) { element _p_element = (element)_list[_element.elementlinked_id]; _element.parent_elementName = _p_element.elementName; } if (_element.tagName == "DIV" || _element.tagName == "TD" || _element.tagName == "UL" || _element.tagName == "H1" || _element.tagName == "H2" || _element.tagName == "H3" || _element.tagName == "H4" || _element.tagName == "H5" || _element.tagName == "H6" || _element.tagName == "SPAN" || _element.tagName == "B" || _element.tagName == "STRONG" || _element.tagName == "P") { _element.outerHTML_AE = tempOuterHTML; _element.innerHTML_AE = tempinnerHTML; _element.BagofWords_AE = _tempelement.BagofWords_AE; _element.wordCount_AE = _tempelement.wordCount_AE; _element.DensityinHTML_AE = (double)_element.wordCount_AE / _firstelement.wordCount; _element.LinkCount_AE = _tempelement.LinkCount_AE; _element.wordCountinLink_AE = _tempelement.wordCountinLink_AE; _element.meanofWordinLinks_AE = _tempelement.meanofWordinLinks_AE; _element.meanofWordinLinksAllWords_AE = _tempelement.meanofWordinLinksAllWords_AE; string temp_innerhtml_AE = _element.innerHTML_AE.ToUpper(new CultureInfo("en-US", false));//for english words thus html tags _element.dot_count_AE = webfilter.CountStringOccurrences(temp_innerhtml_AE, "."); _element.h1_count_AE = webfilter.CountStringOccurrences(temp_innerhtml_AE, "<H1"); _element.h2_count_AE = webfilter.CountStringOccurrences(temp_innerhtml_AE, "<H2"); _element.h3_count_AE = webfilter.CountStringOccurrences(temp_innerhtml_AE, "<H3"); _element.h4_count_AE = webfilter.CountStringOccurrences(temp_innerhtml_AE, "<H4"); _element.h5_count_AE = webfilter.CountStringOccurrences(temp_innerhtml_AE, "<H5"); _element.h6_count_AE = webfilter.CountStringOccurrences(temp_innerhtml_AE, "<H6"); _element.img_count_AE = webfilter.CountStringOccurrences(temp_innerhtml_AE, "<IMG"); _element.p_count_AE = webfilter.CountStringOccurrences(temp_innerhtml_AE, "<P"); _element.br_count_AE = webfilter.CountStringOccurrences(temp_innerhtml_AE, "<BR"); _element.span_count_AE = webfilter.CountStringOccurrences(temp_innerhtml_AE, "<SPAN"); _element.object_count_AE = webfilter.CountStringOccurrences(temp_innerhtml_AE, "<OBJECT"); _element.ul_count_AE = webfilter.CountStringOccurrences(temp_innerhtml_AE, "<UL"); _element.li_count_AE = webfilter.CountStringOccurrences(temp_innerhtml_AE, "<LI"); _element.input_count_AE = webfilter.CountStringOccurrences(temp_innerhtml_AE, "<INPUT") + webfilter.CountStringOccurrences(temp_innerhtml_AE, "<BUTTON") + webfilter.CountStringOccurrences(temp_innerhtml_AE, "<LABEL"); _element.div_count_AE = webfilter.CountStringOccurrences(temp_innerhtml_AE, "<DIV"); _element.td_count_AE = webfilter.CountStringOccurrences(temp_innerhtml_AE, "<TD"); if (_element.wordCount_AE > _element.wordCount) { _element.wordCount_AE = _element.wordCount; //istisnayi durum scriptler sorun olduğu için nadir bir durum... } //etiketin tekrar sayısı //_element.repeat_tag_count = webfilter.CountStringOccurrences(htmlDocument.body.innerHTML, _element.elementName); int benzertagsayisi = 0; for (int k = 0; k < _list.Count; k++) { element _e1 = (element)_list[k]; if (_element.elementName == _e1.elementName) { benzertagsayisi++; } } _element.repeat_tag_count = benzertagsayisi; } _list[i] = _element; } // if not null } //for each return(_list); }
private void watchExam_Tick(object sender, EventArgs e) { //第一次考试的结果,目的要取得整个页面源代码 if (isFirstExam) { try { //使用Microsoft Internet Controls取得所有的已经打开的IE(以Tab计算) SHDocVw.ShellWindows sws = new SHDocVw.ShellWindows(); //sws为当前打开的所有IE窗口每个一个Tab都可以操作,每个Tab对应Com Object的SHDocVw.InternetExplorer foreach (SHDocVw.InternetExplorer iw in sws) { //提交后查看考试结果 if (iw.LocationName.Contains("答题后")) //iw.LocationURL 当前链接路径 查看考试结果 { //取得每个Tab之后,就可以通过InternetExplorer的Document取得每个页面的Dom mshtml.HTMLDocumentClass doc = (mshtml.HTMLDocumentClass)iw.Document; mshtml.HTMLBody body = (mshtml.HTMLBody)doc.body; htmlStr = body.innerHTML.ToString(); isFirstExam = false; //取得Dom之后,基本上就已经取得了操作IE的所有权限了,可以继续使用HTML Object Library对页面进行操作 //或者通过注册JavaScript,对页面进行操作: //mshtml.IHTMLScriptElement script = dom.createElement("script") as mshtml.IHTMLScriptElement; \\创建script标签 //script.text = "$(\"[name='wd']\").val('刘德华');"; \\通过Jquery,对百度进行操作 //mshtml.HTMLBody body = dom.body as mshtml.HTMLBody; \\取得body对象 //body.appendChild((mshtml.IHTMLDOMNode)script); \\注册JavaScript //关闭 //iw.Quit(); } } } catch { } } else //第二次进入考试,在这操作答案 { //使用Microsoft Internet Controls取得所有的已经打开的IE(以Tab计算) SHDocVw.ShellWindows sws = new SHDocVw.ShellWindows(); //sws为当前打开的所有IE窗口每个一个Tab都可以操作,每个Tab对应Com Object的SHDocVw.InternetExplorer foreach (SHDocVw.InternetExplorer iw in sws) { if (iw.LocationName.Contains("答题前")) //iw.LocationURL 当前链接路径 考试 { //全部答案 string daan = ""; //提取答案块 Regex regex = new Regex(@"{""answer"".*}]'\s*/>", RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant); if (regex.IsMatch(htmlStr)) { MatchCollection matchCollection = regex.Matches(htmlStr); daan = matchCollection[0].ToString().Replace("/>", "").Trim().Replace("]'", ""); } //把答案分组 string strTmp = "},{"; string[] lstDaAn = Regex.Split(daan, strTmp, RegexOptions.IgnoreCase); for (int i = 0; i < lstDaAn.Length; i++) { if (lstDaAn[i].Contains("{")) { lstDaAn[i] = lstDaAn[i].Replace("{", ""); } //第一条会有个{ 最后一条最后有个} if (lstDaAn[i].Contains("}")) { lstDaAn[i] = lstDaAn[i].Replace("}", ""); } //"type":"single" 单选 "realAnswer":"D" if (lstDaAn[i].Contains("\"type\":\"single\"")) { //正确答案选项 string xuanxiang = lstDaAn[i].Substring(lstDaAn[i].IndexOf("\"realAnswer\":\"") + "\"realAnswer\":\"".Length, 1); //题目 string content = lstDaAn[i].Substring(lstDaAn[i].IndexOf("\"content\":\"") + "\"content\":\"".Length, lstDaAn[i].IndexOf("\",\"filePath\"") - lstDaAn[i].IndexOf("\"content\":\"") - "\"content\":\"".Length); //自动答题 //取得每个Tab之后,就可以通过InternetExplorer的Document取得每个页面的Dom HTMLDocument doc = (HTMLDocument)iw.Document; //通过DOM操作IE页面 //mshtml.IHTMLElementCollection inputs = (mshtml.IHTMLElementCollection)doc2.all.tags("INPUT"); //mshtml.HTMLInputElement input1 = (mshtml.HTMLInputElement)inputs.item("kw1", 0); //input1.value = "test"; //mshtml.IHTMLElement element2 = (mshtml.IHTMLElement)inputs.item("su1", 0); //element2.click(); mshtml.IHTMLElementCollection inputs = (mshtml.IHTMLElementCollection)doc.all.tags("li"); IEnumerable <IHTMLElement> EnHEColl = inputs.Cast <IHTMLElement>(); IHTMLElement he1 = EnHEColl.FirstOrDefault(p => p.innerHTML != null && p.innerHTML.Contains(content)); MessageBox.Show(he1.innerText); //mshtml.HTMLInputElement input1 = (mshtml.HTMLInputElement)inputs.item("kw1", 0); } //"type":"multiple" 多选 else if (lstDaAn[i].Contains("\"type\":\"multiple\"")) { //正确答案选项 string xuanxiang = lstDaAn[i].Substring(lstDaAn[i].IndexOf("\"realAnswer\":\"") + "\"realAnswer\":\"".Length, lstDaAn[i].IndexOf("\",\"realScore\"") - lstDaAn[i].IndexOf("\"realAnswer\":\"") - "\"realAnswer\":\"".Length); //题目 string content = lstDaAn[i].Substring(lstDaAn[i].IndexOf("\"content\":\"") + "\"content\":\"".Length, lstDaAn[i].IndexOf("\",\"filePath\"") - lstDaAn[i].IndexOf("\"content\":\"") - "\"content\":\"".Length); //自动答题 } //"type":"judge"判断 else if (lstDaAn[i].Contains("\"type\":\"judge\"")) { //正确答案选项 string xuanxiang = lstDaAn[i].Substring(lstDaAn[i].IndexOf("\"realAnswer\":\"") + "\"realAnswer\":\"".Length, 1); //题目 string content = lstDaAn[i].Substring(lstDaAn[i].IndexOf("\"content\":\"") + "\"content\":\"".Length, lstDaAn[i].IndexOf("\",\"filePath\"") - lstDaAn[i].IndexOf("\"content\":\"") - "\"content\":\"".Length); //自动答题 } } } } } }
//获取聊天内容(未完) private void button13_Click(object sender, EventArgs e) { string info = null; mshtml.HTMLDocumentClass htmlDoc = webBrowser1.Document.DomDocument as mshtml.HTMLDocumentClass; for (int i = 0; i < htmlDoc.frames.length; ++i) { object index = i; mshtml.IHTMLWindow2 frameWindow = htmlDoc.frames.item(ref index) as mshtml.IHTMLWindow2; IHTMLDocument2 frame = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow); for (int j = 0; j < frame.frames.length; ++j) { object rj = j; mshtml.IHTMLWindow2 frameWindow2 = frame.frames.item(ref rj) as mshtml.IHTMLWindow2; IHTMLDocument2 frame2 = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow2); mshtml.IHTMLElement host_home_feeds = (mshtml.IHTMLElement)frame2.all.item("host_home_feeds", 0); if (host_home_feeds != null) { mshtml.IHTMLElementCollection children = (mshtml.IHTMLElementCollection)host_home_feeds.children; IEnumerator child = children.GetEnumerator(); while (child.MoveNext())//li { mshtml.IHTMLElement element = (mshtml.IHTMLElement)child.Current; if (element.className == "f-single f-s-s") { mshtml.IHTMLElementCollection children2 = (mshtml.IHTMLElementCollection)element.children; IEnumerator child2 = children2.GetEnumerator(); while (child2.MoveNext()) { mshtml.IHTMLElement element2 = (mshtml.IHTMLElement)child2.Current; if (element2.className == "f-wrap") { //太长了,未完 } } } } } //mshtml.IHTMLElementCollection userlist = (mshtml.IHTMLElementCollection)frame2.all.tags("div"); //IEnumerator users = userlist.GetEnumerator(); //while (users.MoveNext()) //{ // mshtml.IHTMLElement element = (mshtml.IHTMLElement)users.Current; // if (element.className != null) // Console.WriteLine(element.className); // if (element.className == "comments-content" && element.innerText != null) // { // info += element.innerText + "|"; // break; // } //} } } MessageBox.Show(info); }
public ArrayList fingTAGibHTML(String htmlContent, String tagName, string filename) { DateTime _now = DateTime.Now; string id_name = ""; if (tagName.Contains("id=")) { id_name = findElementName(tagName, "id=\"(.*?)\""); } string class_name = ""; if (tagName.Contains("class=")) { class_name = findElementName(tagName, "class=\"(.*?)\""); } // Obtain the document interface //IHTMLDocument2 htmlDocument = (IHTMLDocument2)new mshtml.HTMLDocument(); IHTMLDocument2 htmlDocument = new mshtml.HTMLDocumentClass(); // Construct the document htmlDocument.write(htmlContent); // Extract all image elements // IHTMLElementCollection imgElements = htmlDocument.images; IHTMLElementCollection allElements = htmlDocument.all; ArrayList sonuc = new ArrayList(); // Iterate all the elements and display tag names int elementsize = 0; int elementcnt = 0; foreach (IHTMLElement element in allElements) { string cn = ""; if (element.className != null) { cn = element.className; } string id = ""; if (element.id != null) { id = element.id; } if (element.tagName == "DIV" && cn == class_name && id == id_name) { sonuc.Add(element.innerText); } if (element.innerHTML != null) { elementsize += element.innerHTML.Length; } elementcnt++; } return(sonuc); }
public ArrayList prepareDOM(string htmlContent) { htmlContent = HTML.trimOptions(htmlContent); htmlContent = HTML.trimScript(htmlContent); IHTMLDocument2 htmlDocument = new mshtml.HTMLDocumentClass(); htmlDocument.write(htmlContent); IHTMLElementCollection allElements = htmlDocument.all; _ht = new Hashtable(); _list = new ArrayList(); _xmllist = new ArrayList(); string _tempinner_text = ""; if (htmlDocument.body != null) { if (htmlDocument.body.innerText != null) { _tempinner_text = htmlDocument.body.innerText.Replace("\r\n", ""); } } element _firstelement = AnalyzeGivenHTML(htmlContent, _tempinner_text); int i = 0; foreach (IHTMLElement htmlelement in allElements) { if (htmlelement.outerHTML != null) { element _element = new element(); _element.id = i; _element.outerHTML = htmlelement.outerHTML; _element.outerHTML = _element.outerHTML.Replace("\r\n", ""); if (htmlelement.innerHTML != null) { _element.innerHTML = htmlelement.innerHTML; _element.innerHTML = _element.innerHTML.Replace("\r\n", ""); } else { _element.innerHTML = ""; } if (_element.id == 0) { _element.elementlinked_id = -1;//root savehtmlContent = _element.outerHTML; resulthmtlContent = _element.outerHTML; } else { _element.elementlinked_id = 0; } if (htmlelement.tagName == "HTML") {//html bazen geç geliyor... savehtmlContent = _element.outerHTML; resulthmtlContent = _element.outerHTML; } string _str = _element.outerHTML; int _start = _str.IndexOf('<'); int _end = _str.IndexOf('>'); _element.elementName = _str.Substring(_start, _end - _start + 1); //<!--className::(.*?)--> ???garanti başlangıçtaki olmalı diğerlerine kaymamalı???? _start = _str.IndexOf("<!--className::"); _end = _str.IndexOf("-->"); if (_start >= 0) { if (_start == _element.elementName.Length) { _start = 15 + _element.elementName.Length; if (_end - _start > 0) { _element.className = _str.Substring(_start, _end - _start); } } } _element.tagName = htmlelement.tagName; if (htmlelement.id != null) { _element.tag_id = 1; } if (htmlelement.className != null) { _element.tag_class = 1; } _element.tag_idORclass = _element.tag_id + _element.tag_idORclass; if (_element.tag_idORclass == 2) { _element.tag_idORclass = 1; } string tempinner_text = htmlelement.innerText; if (tempinner_text != null) { tempinner_text = tempinner_text.Replace("\r\n", " "); tempinner_text = tempinner_text.Trim(); } else { tempinner_text = ""; } element _tempelement = AnalyzeGivenHTML(htmlelement.outerHTML, tempinner_text); _element.BagofWords = _tempelement.BagofWords; _element.wordCount = _tempelement.wordCount; _element.DensityinHTML = (double)_element.wordCount / _firstelement.wordCount; _element.LinkCount = _tempelement.LinkCount; _element.wordCountinLink = _tempelement.wordCountinLink; _element.meanofWordinLinks = _tempelement.meanofWordinLinks; _element.meanofWordinLinksAllWords = _tempelement.meanofWordinLinksAllWords; _element.similarity_with_other_web_page = 1; _element.relevant = false; _element.parent_elementName = ""; _list.Add(_element); int key = (int)htmlelement.sourceIndex;//for fast searching _ht.Add(key, i); i++; } } foreach (IHTMLElement htmlelement in allElements) { if (htmlelement.outerHTML != null) { string[] _sonuclar = ExtractionofSubLayouts(htmlelement); string tempinner_text = _sonuclar[1]; string tempOuterHTML = _sonuclar[2]; string tempinnerHTML = _sonuclar[3]; string str_i = _sonuclar[0]; i = Convert.ToInt32(str_i); //After Extraction element _element = (element)_list[i]; element _tempelement = AnalyzeGivenHTML_AE(tempOuterHTML, tempinner_text); _element.outerHTML_AE = tempOuterHTML; _element.innerHTML_AE = tempinnerHTML; _element.BagofWords_AE = _tempelement.BagofWords_AE; _element.wordCount_AE = _tempelement.wordCount_AE; _element.DensityinHTML_AE = (double)_element.wordCount_AE / _firstelement.wordCount; _element.LinkCount_AE = _tempelement.LinkCount_AE; _element.wordCountinLink_AE = _tempelement.wordCountinLink_AE; _element.meanofWordinLinks_AE = _tempelement.meanofWordinLinks_AE; _element.meanofWordinLinksAllWords_AE = _tempelement.meanofWordinLinksAllWords_AE; //dot endofcontent if (htmlelement.innerText != null) { if (htmlelement.innerText.Trim() != "") { if (htmlelement.innerText[htmlelement.innerText.Length - 1] == '.') { _element.dot_endofstence = 1; } else { _element.dot_endofstence = 0; } } } if (_element.wordCount_AE > _element.wordCount) { _element.wordCount_AE = _element.wordCount; //istisnayi durum scriptler sorun olduğu için nadir bir durum... } _list[i] = _element; if (htmlelement.tagName == "DIV" || htmlelement.tagName == "TD") { element _e = (element)_list[i]; if (_e.elementName.Contains("vAlign=bot")) { _e.relevant = true; } bool _decision = HTMLMarkerClass.desicionClass.determineIrrevelantLayout(_element); if (_decision == false) { _e.relevant = true; _list[i] = _e; //Update child elements for (int m = 0; m < _list.Count; m++) { element _et = (element)_list[m]; if (_et.elementlinked_id == _e.id) { if (_et.tagName != "DIV")//div'ler için karar verme { if (_et.tagName != "TD") { _et.relevant = true; _et.parent_elementName = _e.elementName; _list[m] = _et; } } } } //for m } //decision = true } //if div or td } // if not null } //for each for (int m = 0; m < _list.Count; m++) { element _element = (element)_list[m]; if (_element.relevant == true) { if (_element.tagName == "DIV" || _element.tagName == "TD") { bool _mainlayout = HTMLMarkerClass.desicionClass.determineLayout(_element); if (_mainlayout) { _element.predicted_className = "MAIN"; } else { _element.predicted_className = HTMLMarkerClass.desicionClass.determineHEADLINE_INFORMATION(_element); } } else { _element.predicted_className = HTMLMarkerClass.desicionClass.determineHEADLINE_INFORMATION(_element); if (_element.predicted_className == "MAIN") { _element.predicted_className = "IRRELEVANT"; } } /*if (_element.predicted_className == "IRRELEVANT") * _element.predicted_className = "INFORMATIONABOUTARTICLE";*/ if (clear_illegal_characters_for_XML(_element.BagofWords_AE.Trim()).Trim() != "") { _list[m] = _element; xml_elemet _xml = new xml_elemet(); _xml.elementName = _element.elementName; _xml.content = _element.BagofWords_AE; _xml.predicted_className = _element.predicted_className; _xml.parent_elementName = _element.parent_elementName; //equal content in _xml_list bool find = false; for (int v = 0; v < _xmllist.Count; v++) { xml_elemet item = (xml_elemet)_xmllist[v]; if (item.content == _xml.content) { item.elementName = item.elementName + ", " + _element.elementName; _xmllist[v] = item; find = true; } } if (!find) { _xmllist.Add(_xml); } } } } return(_list); }
public override void Tick() { if (Enable) { try { //性别和年龄 mshtml.HTMLDocumentClass htmlDoc = webBrowser1.Document.DomDocument as mshtml.HTMLDocumentClass; for (int i = 0; i < htmlDoc.frames.length; ++i) { object index = i; mshtml.IHTMLWindow2 frameWindow = htmlDoc.frames.item(ref index) as mshtml.IHTMLWindow2; IHTMLDocument2 frame = CodecentrixSample.CrossFrameIE.GetDocumentFromWindow(frameWindow); mshtml.IHTMLElement sex = (mshtml.IHTMLElement)frame.all.item("sex", 0); mshtml.IHTMLElement age = (mshtml.IHTMLElement)frame.all.item("age", 0); mshtml.IHTMLElement birthday = (mshtml.IHTMLElement)frame.all.item("birthday", 0); if (sex != null) { spider.UIDB.sex = sex.outerText; } if (age != null) { spider.UIDB.age = Convert.ToInt32(age.outerText); } if (birthday != null) { spider.UIDB.birthday = birthday.outerText; } } //说说、日志、照片 for (int i = 0; i < webBrowser1.Document.Window.Frames.Count; ++i) { HtmlDocument frame = webBrowser1.Document.Window.Frames[i].Document; HtmlElement blog = frame.GetElementById("profile-blog-cnt"); HtmlElement photo = frame.GetElementById("profile-photo-cnt"); HtmlElement mood = frame.GetElementById("profile-mood-cnt"); if (blog != null) { spider.UIDB.blog = Convert.ToInt32(blog.OuterText); } if (photo != null) { spider.UIDB.photo = Convert.ToInt32(photo.OuterText); } if (mood != null) { spider.UIDB.mood = Convert.ToInt32(mood.OuterText); } } ++spider.okcount; //获取动态QQ好友信息 getFriend(); getNickName(); getLasTime(); spider.UIDB.zone_authority = 1; spider.UIDB.used = 1; ChangeCommon("ShuoShuo"); ////ChangeCommon("NewUser"); } catch (Exception e) { Log(e.Message); ChangeCommon("NewUser"); } } }