public static string GetFeatureVector(ContentCrawlResult contentInfo, UserInfo userInfo) { StringBuilder sb = new StringBuilder(); sb.Append("1:").Append(contentInfo.Sentiment).Append(" "); //sentiment sb.Append("2:").Append(contentInfo.HasImg ? "1" : "0").Append(" "); //HasImg ? "1" : "0" sb.Append("3:").Append(contentInfo.HasUrl ? "1" : "0").Append(" "); //HasUrl ? "1" : "0" sb.Append("4:").Append(userInfo.FanNum).Append(" "); //fans sb.Append("5:").Append(userInfo.WeiboNum).Append(" "); //weibo sb.Append("6:").Append(userInfo.Credit).Append(" "); //credit sb.Append("7:").Append(GetProvinceId(userInfo.Location)).Append(" "); //userLoc sb.Append("8:").Append(userInfo.Level).Append(" "); //level sb.Append("9:").Append(userInfo.IsVerified ? "1" : "0").Append(" "); //verified sb.Append("10:").Append(contentInfo.CommentEval).Append(" "); //comval return sb.ToString(); }
public static void GetServiceInfo(string url, out ContentCrawlResult contentInfo, out UserInfo userInfo) { string[] temp = url.Split('/'); ContentCrawlServiceClient contentClient = new ContentCrawlServiceClient(); UserInfoServiceClient infoClient = new UserInfoServiceClient(); var contentTask = Task.Factory.StartNew<ContentCrawlResult>(() => { return contentClient.GetContentCrawlResult(temp[3] + '|' + temp[4]); }); var userTask = Task.Factory.StartNew<UserInfo>(() => { return infoClient.GetUserInfo(temp[3]); }); contentInfo = contentTask.Result;//contentClient.GetContentCrawlResult(temp[3] + '|' + temp[4]); userInfo = userTask.Result; //infoClient.GetUserInfo(temp[3]); }
public void ProcessRequest(HttpContext context) { var url = context.Request["url"]; context.Response.ContentType = "text/plain"; UserInfo uinfo = new UserInfo(); ContentCrawlResult cinfo = new ContentCrawlResult(); ServiceHelper.GetServiceInfo(url, out cinfo, out uinfo); var vec = ServiceHelper.GetFeatureVector(cinfo, uinfo); SVM.InputData(vec); var result = SVM.Predict(); //var result = 1; var data = String.Format("{0}|{1}|{2}|{3}|{4}|{5}|{6}|{7}|{8}|{9}|{10}|{11}|{12}|{13}|{14}|{15}|{16}|{17}", uinfo.Uid, uinfo.Uid, uinfo.Sex, uinfo.Level, uinfo.IsVerified, uinfo.Credit, uinfo.FollowNum, uinfo.FanNum, uinfo.WeiboNum, uinfo.NickName, uinfo.Location, uinfo.Intro, cinfo.Content, cinfo.CommentEval, cinfo.HasImg, cinfo.Sentiment, cinfo.HasUrl, result == 1 ? "Rumor!" : "Truth!"); context.Response.Write(data); }
bool getUserInfo(ref UserInfo userInfo) { var doc = browser.Document; if (doc.Body == null) return false; if (doc.Body.InnerHtml == null || doc.Body.InnerHtml.Contains("你访问的页面地址有误,或者该页面不存在") || doc.Body.InnerHtml.Contains("您当前访问的帐号异常")) { return false; } if (doc.Body.InnerHtml.Contains("http://img.t.sinajs.cn/t4/appstyle/e_media/images/index/busiBtn.png")) { var levelNode = (from HtmlElement el in doc.GetElementsByTagName("span") where el.GetAttribute("className").Contains("W_level_ico") select el); var userNickNameTest = (from HtmlElement el in doc.GetElementsByTagName("div") where el.GetAttribute("className") == "username" select el); if (levelNode.Count() == 0 || userNickNameTest.Count() == 0) { return false; } int level = Convert.ToInt32( levelNode.First().Children[0].GetAttribute("title").Substring("当前等级:".Length) ); string userNickName = userNickNameTest.First().Children[0].Children[0].InnerText; int credit = 0; var followNode = (from HtmlElement el in doc.GetElementsByTagName("strong") where el.NextSibling.InnerText == "关注" select el).First(); int follow = Convert.ToInt32( followNode.InnerText ); var fansNode = (from HtmlElement el in doc.GetElementsByTagName("strong") where el.NextSibling.InnerText == "粉丝" select el).First(); int fans = Convert.ToInt32( fansNode.InnerText ); var weiboNode = (from HtmlElement el in doc.GetElementsByTagName("strong") where el.NextSibling.InnerText == "微博" select el).First(); int weibo = Convert.ToInt32( weiboNode.InnerText ); bool verified = true; bool userSex = true; string userIntro = (from HtmlElement el in doc.GetElementsByTagName("div") where el.GetAttribute("className") == "moreinfo" select el).First().Children[0].Children[0].InnerText; string userLoc = "北京"; var uidNode = (from HtmlElement el in doc.GetElementsByTagName("a") where el.GetAttribute("suda-data") == "key=tblog_grade_float&value=grade_icon_click" select el).First(); string uid = uidNode.GetAttribute("href").Substring("http://level.account.weibo.com/u/?id=".Length, 10); userInfo.Uid = uid; userInfo.Level = level; userInfo.Intro = userIntro; userInfo.IsVerified = verified; userInfo.Location = userLoc; userInfo.NickName = userNickName; userInfo.Sex = userSex; userInfo.WeiboNum = weibo; userInfo.FanNum = fans; userInfo.FollowNum = follow; userInfo.Credit = credit; string log = userInfo.ToString(); tbLog.Invoke((Action)(() => { tbLog.Text += log + "\n"; })); return true; } else { var levelNode = (from HtmlElement el in doc.GetElementsByTagName("span") where el.GetAttribute("className").Contains("W_level_ico") select el); var userNickNameTest = (from HtmlElement el in doc.GetElementsByTagName("div") where el.GetAttribute("className") == "label S_txt2" && el.InnerText == "昵称" select el); if (levelNode.Count() == 0 || userNickNameTest.Count() == 0) { return false; } int level = Convert.ToInt32( levelNode.First().Children[0].GetAttribute("title").Substring("当前等级:".Length) ); var uidNode = (from HtmlElement el in doc.GetElementsByTagName("a") where el.GetAttribute("suda-data") == "key=tblog_grade_float&value=grade_icon_click" select el).First(); string uid = uidNode.GetAttribute("href").Substring("http://level.account.weibo.com/u/?id=".Length, 10); int credit = 0; try { var creditList = (from HtmlElement node in doc.GetElementsByTagName("table") where node.GetAttribute("node-type") == "credit" select node).Single().GetElementsByTagName("tr"); foreach (HtmlElement rec in creditList) { DateTime time = Convert.ToDateTime(rec.Children[0].InnerText); string reason = rec.Children[1].InnerText; int cur_credit = Convert.ToInt32( rec.Children[2].InnerText.Substring( 0, rec.Children[2].InnerText.Length - 1 ) ); if (cur_credit < 0) credit += cur_credit; } } catch (Exception ex) { } var followNode = (from HtmlElement el in doc.GetElementsByTagName("strong") where el.GetAttribute("node-type") == "follow" select el).First(); int follow = Convert.ToInt32( followNode.InnerText ); var fansNode = (from HtmlElement el in doc.GetElementsByTagName("strong") where el.GetAttribute("node-type") == "fans" select el).First(); int fans = Convert.ToInt32( fansNode.InnerText ); var weiboNode = (from HtmlElement el in doc.GetElementsByTagName("strong") where el.GetAttribute("node-type") == "weibo" select el).First(); int weibo = Convert.ToInt32( weiboNode.InnerText ); bool verified = (from HtmlElement el in doc.GetElementsByTagName("div") where el.GetAttribute("className") == "icon_bed" && el.Children[0].GetAttribute("href") == "http://verified.weibo.com/verify" select el ).Count() == 1; string headPicUrl = (from HtmlElement el in doc.GetElementsByTagName("div") where el.GetAttribute("className") == "pf_head_pic" select el). First(). Children[0].GetAttribute("src"); string userNickName = null, userLoc = null, userIntro = null; bool userSex = true; try { userNickName = (from HtmlElement el in doc.GetElementsByTagName("div") where el.GetAttribute("className") == "label S_txt2" && el.InnerText == "昵称" select el).First().NextSibling.InnerText; } catch (Exception ex) { } try { userLoc = (from HtmlElement el in doc.GetElementsByTagName("div") where el.GetAttribute("className") == "label S_txt2" && el.InnerText == "所在地" select el).First().NextSibling.InnerText; } catch (Exception ex) { } try { userSex = (from HtmlElement el in doc.GetElementsByTagName("div") where el.GetAttribute("className") == "label S_txt2" && el.InnerText == "性别" select el).First().NextSibling.InnerText == "男"; } catch (Exception ex) { } try { userIntro = (from HtmlElement el in doc.GetElementsByTagName("div") where el.GetAttribute("className") == "label S_txt2" && el.InnerText == "简介" select el).First().NextSibling.InnerText; } catch (Exception ex) { } //string data = String.Format("{0}|{1}|{2}|{3}|{4}|{5}", fans, weibo, credit, userLoc, level, verified ? 1 : 0); userInfo.Uid = uid; userInfo.Level = level; userInfo.Intro = userIntro; userInfo.IsVerified = verified; userInfo.Location = userLoc; userInfo.NickName = userNickName; userInfo.Sex = userSex; userInfo.WeiboNum = weibo; userInfo.FanNum = fans; userInfo.FollowNum = follow; userInfo.Credit = credit; string log = userInfo.ToString(); tbLog.Invoke((Action)(() => { tbLog.Text += log + "\n"; })); return true; } }
public UserInfo GetUserInfo(string name) { browser.Navigate("http://weibo.com/" + name + "/info"); int failCount = 0; Thread.Sleep(6000); UserInfo userInfo = new UserInfo(); while (!(bool) browser.Invoke((Func<bool>)(() => { return getUserInfo(ref userInfo); })) ) { tbLog.Invoke((Action)(() => { tbLog.Text += "Try again" + "\r\n"; })); failCount++; if (failCount == 6) { browser.Invoke((Action)(() => { browser.Navigate("http://weibo.com/" + name + "/info"); })); failCount = 0; } Thread.Sleep(2000); } cleanMemory(); return userInfo; }