static void Main(string[] args) { var id = Guid.NewGuid(); var path = Path.Combine(tempDirectory, id.ToString()); Directory.CreateDirectory(path); SmtpClient smtp = new SmtpClient(); smtp.EnableSsl = false; smtp.DeliveryMethod = SmtpDeliveryMethod.SpecifiedPickupDirectory; smtp.PickupDirectoryLocation = path; var parser = new JumonyParser(); var document = parser.LoadDocument("http://blog.sina.com.cn/s/blog_4701280b010183ny.html"); MailMessage message = CreateMail(document); smtp.Send(message); var directory = new DirectoryInfo(path); var file = directory.GetFiles().Single(); file.MoveTo(Path.Combine(tempDirectory, id.ToString() + ".mht")); directory.Delete(true); }
public void CompileTest() { var parser = new JumonyParser(); var document = parser.LoadDocument(Path.Combine(Environment.CurrentDirectory, "Test1.html")); var method = document.Compile(); var document2 = method(parser.DomProvider); Assert.IsTrue(document.DescendantNodes().SequenceEqual(document2.DescendantNodes(), new DomNodeComparer()), "编译还原测试失败"); }
/// <summary> /// 爬取苏宁商品list /// </summary> /// <param name="url"></param> /// <returns></returns> private Tuple <List <SNProduct>, int> SN_AnalyticsHtml(string url) { List <SNProduct> snlist = new List <SNProduct>(); HttpWebRequest request = WebRequest.Create(url) as HttpWebRequest; request.Method = "GET"; //伪装浏览器 request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36"; HttpWebResponse response = request.GetResponse() as HttpWebResponse; var parser = new JumonyParser(); var doc = parser.LoadDocument(response.GetResponseStream(), Encoding.UTF8, new Uri(url)); var list = doc.Find("#filter-results > ul > .product"); if (list.Count() == 0) { return(Tuple.Create(snlist, 0)); } foreach (var li in list) { SNProduct pro = new SNProduct(); var price = li.FindFirst(".prive-tag"); var name = li.FindFirst(".sell-point > a"); var commit = li.FindFirst(".com-cnt > .num"); var shop = li.FindFirst(".res-info > .seller"); pro.link = name.Attribute("href").Value(); pro.price = 0; pro.name = name.InnerText(); pro.commit = commit.InnerText(); pro.shop = shop.InnerText(); snlist.Add(pro); } //获取总页数 var totalPage = doc.FindFirst(".second-box > .little-page"); string totalPage_num = totalPage.InnerText().Trim(); string[] sArray = totalPage_num.Split('/'); string x = sArray[1]; return(Tuple.Create(snlist, Convert.ToInt32(x))); }
static void Main(string[] args) { var parser = new JumonyParser(); var document = parser.LoadDocument("http://www.cnblogs.com/"); var elements = document.Find("#post_list > .post_item"); foreach (var e in elements) { Console.WriteLine("title: {0}", e.FindFirst("h3 > a").InnerText()); Console.WriteLine("link: {0}", e.FindFirst("h3 > a").Attribute("href").Value()); Console.WriteLine("votes: {0}", e.FindFirst(".diggnum").InnerText()); Console.WriteLine("author: {0}", e.FindFirst(".post_item_foot a.lightblue").InnerText()); Console.WriteLine(new string('-', 30)); Console.ReadKey(); } }
/// <summary> /// 爬取JD商品list /// </summary> /// <param name="url"></param> /// <returns></returns> private Tuple <List <JDProduct>, int> JD_AnalyticsHtml(string url) { List <JDProduct> jdlist = new List <JDProduct>(); HttpWebRequest request = WebRequest.Create(url) as HttpWebRequest; request.Method = "GET"; //伪装浏览器 request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36"; HttpWebResponse response = request.GetResponse() as HttpWebResponse; var parser = new JumonyParser(); var doc = parser.LoadDocument(response.GetResponseStream(), Encoding.UTF8, new Uri(url)); var list = doc.Find("#J_goodsList > ul > .gl-item > .gl-i-wrap"); if (list.Count() == 0) { //查询不到结果 return(Tuple.Create(jdlist, 0)); } foreach (var li in list) { JDProduct pro = new JDProduct(); var price = li.FindFirst(".p-price > strong > i"); var name = li.FindFirst(".p-name > a > em"); var commit = li.FindFirst(".p-commit"); var shop = li.FindFirst(".p-shop"); pro.link = li.FindFirst(".p-img > a").Attribute("href").Value(); pro.price = Convert.ToDouble(price.InnerText()); pro.name = name.InnerText(); pro.commit = commit.InnerText(); pro.shop = shop.InnerText(); pro.IsSelf = li.FindFirst(".p-shop").Attribute("data-selfware").Value().Equals("1"); jdlist.Add(pro); } //获取总页数 var totalPage = doc.FindFirst("#J_topPage > .fp-text > i"); int totalPage_num = Convert.ToInt32(totalPage.InnerText()); return(Tuple.Create(jdlist, totalPage_num)); }
/// <summary> /// 转发 /// </summary> public void Forward() { Response.ContentType = "application/json"; var ResultValue = string.Empty; var data = Request.QueryString["mydata"]; string callback = Request.QueryString["callback"]; JavaScriptSerializer jss = new JavaScriptSerializer(); Dictionary <string, string> dic = jss.Deserialize <Dictionary <string, string> >(data); var name = dic["username"].Trim(); var pass = dic["password"].Trim(); var userinfo = CacheData.GetAllUserInfo().Where(t => t.UserName == name && t.UserPass == pass.MD5().MD5()).FirstOrDefault(); object tyeList = null; if (userinfo != null) { var tag = dic["tag"].Trim(); var type = dic["type"].Trim(); var url = dic["url"].Trim(); int typeint = -1; int.TryParse(type, out typeint); var tags = tag.Split(','); var jp = new JumonyParser(); var html = jp.LoadDocument(url); var titlehtml = html.Find(".postTitle a").FirstOrDefault().InnerHtml(); titlehtml = "【转】" + titlehtml; var bodyhtml = html.Find("#cnblogs_post_body").FirstOrDefault().InnerHtml(); bodyhtml += "</br><div class='div_zf'>==================================<a href='" + url + "' target='_blank'>原文链接</a>==================================</div>"; var mtag = BLL.Common.GetDataHelper.GetAllTag().Where(t => tags.Contains(t.TagName)).ToList(); var blogtagid = new List <int>(); for (int i = 0; i < tags.Length; i++) { blogtagid.Add(this.GetTagId(tags[i], userinfo.Id)); } //&& t.UsersId == userinfo.Id 理论是不用 加用户id 筛选 var myBlogTags = new BlogTagsBLL().GetList(t => blogtagid.Contains(t.Id), isAsNoTracking: false).ToList(); var myBlogTypes = new BLL.BlogTypesBLL().GetList(t => t.Id == typeint, isAsNoTracking: false).ToList(); object obj = null; string call = string.Empty; BLL.BlogsBLL blogbll = new BLL.BlogsBLL(); var blogtitle = blogbll.GetList(t => t.UsersId == userinfo.Id).OrderByDescending(t => t.Id).FirstOrDefault().BlogTitle; if (blogtitle == titlehtml) { obj = new { s = "no", m = "已存在相同标题博客文章~", u = GetSiteUrl() }; call = callback + "('" + obj.ToJson() + "')"; Response.Write(call); return; } var blogmode = new Blogs.ModelDB.Blogs() { UsersId = userinfo.Id, BlogTitle = titlehtml, BlogTypes = myBlogTypes, BlogTags = myBlogTags, BlogContent = bodyhtml, CreateTime = DateTime.Now, BlogCreateTime = DateTime.Now, BlogUpTime = DateTime.Now, IsShowMyHome = true }; blogbll.Add(blogmode); if (blogbll.save() > 0) { obj = new { s = "ok", m = "发布成功", u = GetSiteUrl() + "/" + userinfo.UserName + "/" + blogmode.Id + ".html" }; call = callback + "('" + obj.ToJson() + "')"; Response.Write(call); return; } obj = new { s = "no", m = "发布失败", u = GetSiteUrl() + "/" + userinfo.UserName + "/" + blogmode.Id + ".html" }; call = callback + "('" + obj.ToJson() + "')"; Response.Write(call); return; } else { var obj = new { s = "no", m = "发布失败", u = GetSiteUrl() + "/" }; var call = callback + "('" + obj.ToJson() + "')"; Response.Write(call); return; } //var cc = callback + "('ok')"; //Response.ContentType = "application/json"; //Response.Write(cc); }
/// <summary> /// 迁移cnblog评论 /// </summary> /// <param name="BlogsId">嗨博客 博客id</param> /// <param name="BlogUsersId">嗨博客 评论博客用户id(因为迁移评论者 没有id 所以都默认为1)</param> /// <param name="postId">cnblog 博客id</param>int BlogUsersId = 1, /// <param name="blogApp">cnblog 博客用户名</param> public string testJumonyParser(int BlogsId = 1, string postId = "4368417", string blogApp = "zhaopei") { bool isNext = true; int i = 0; var BlogUsersId = 1; BLL.BlogUsersSetBLL userbll = new BlogUsersSetBLL(); var usertemp = GetDataHelper.GetAllUser().Where(t => t.UserName == " ").FirstOrDefault(); if (null == usertemp) { var user = new Blogs.ModelDB.BlogUsersSet() { UserName = "******", UserPass = "******", IsDel = false, IsLock = false, UserMail = "无效", CreateTime = DateTime.Now, UserInfo = new ModelDB.UserInfo() }; userbll.Add(user); userbll.save(false); BlogUsersId = user.Id; } else { BlogUsersId = usertemp.Id; } //List<BlogCommentSet> blogcommen = new List<BlogCommentSet>(); BlogCommentSetBLL blogcommenbll = new BlogCommentSetBLL(); while (isNext) { i++; var url = "http://www.cnblogs.com/mvc/blog/GetComments.aspx?postId=" + postId + "&blogApp=" + blogApp + "&pageIndex=" + i; var jumony = new JumonyParser(); var htmlSource = jumony.LoadDocument(url).InnerHtml(); JavaScriptSerializer _jsSerializer = new JavaScriptSerializer(); CnBlogComments comm = _jsSerializer.Deserialize <CnBlogComments>(htmlSource); var commentsHtml = jumony.Parse(comm.commentsHtml); var pager = commentsHtml.Find("div.pager").FirstOrDefault(); if (null != pager) { var Next = pager.Find("*").LastOrDefault().InnerText(); if (Next != "Next >") { isNext = false; } } else { isNext = false; } var listComment = commentsHtml.Find("div.feedbackItem").ToList(); foreach (var item in listComment) { var commentDataNode = item.Find("div.feedbackListSubtitle span.comment_date").FirstOrDefault(); // var commentData = DateTime.Parse(commentDataNode.InnerText()); var commentUserNode = item.Find("div.feedbackListSubtitle a[target='_blank']").FirstOrDefault(); var commentUser = commentUserNode.InnerText(); var Content = item.Find("div.blog_comment_body").FirstOrDefault().InnerText(); blogcommenbll.Add( new BlogCommentSet() { BlogsId = BlogsId, CommentID = -1, IsDel = false, Content = Content, CreateTime = commentData, ReplyUserName = commentUser, BlogUsersId = BlogUsersId, IsInitial = true } ); } } try { blogcommenbll.save(false); } catch (Exception) { } return("ok"); }
/// <summary> /// 加载 bing 的搜索结果 /// </summary> /// <returns></returns> public ActionResult ShowBingResult() { if (!Request.QueryString.AllKeys.Contains("key")) { return(null); } string key = Request.QueryString["key"];//搜索关键字 JumonyParser jumony = new JumonyParser(); //http://cn.bing.com/search?q=AJAX+site%3ablog.haojima.net&first=11&FORM=PERE string pIndex = Request.QueryString.AllKeys.Contains("p") ? Request.QueryString["p"] : ""; int PageIndex = 1; int.TryParse(pIndex, out PageIndex); PageIndex--; //如:blog:JeffreyZhao 博客 var zhankey = key.Split(' ');//先用空格分割 var blogName = string.Empty; if (zhankey.Length >= 2) { var str = zhankey[0].Trim(); if (str.Length > 6 && str.Substring(0, 5) == "blog:") { blogName = "/" + str.Substring(5);//这里取得 用户名 } } if (!string.IsNullOrEmpty(blogName)) { key = key.Substring(key.IndexOf(' ')); } //如: var url = "http://cn.bing.com/search?q=" + key + "+site:" + GetSiteUrl() + blogName + "&first=" + PageIndex + "1&FORM=PERE"; var document = jumony.LoadDocument(url); var list = document.Find("#b_results .b_algo").ToList().Select(t => t.ToString()).ToList(); var listli = document.Find("li.b_pag nav ul li"); if (PageIndex > 0 && listli.Count() == 0) { return(null); } if (listli.Count() > 1) { var text = document.Find("li.b_pag nav ul li").Last().InnerText(); int npage = -1; if (text == "下一页") { if (listli.Count() > 1) { var num = listli.ToList()[listli.Count() - 2].InnerText(); int.TryParse(num, out npage); } } else { int.TryParse(text, out npage); } if (npage <= PageIndex) { list = null; } } return(PartialView(list)); }
public static string ForwardRealization(BlogUser userinfo, string tag, string type, string url, string siteUrl, bool isshowhome, bool isshowmyhome, bool isBJ = false) { if (userinfo != null) { int typeint = -1; int.TryParse(type, out typeint); var tags = tag.Split(','); var jp = new JumonyParser(); var html = jp.LoadDocument(url); var titlehtml = html.Find(".postTitle a").FirstOrDefault().InnerHtml(); if (!isBJ) { titlehtml = "【转】" + titlehtml; } else { titlehtml = "《" + titlehtml + "》"; } var bodyhtml = html.Find("#cnblogs_post_body").FirstOrDefault().InnerHtml(); bodyhtml += "</br><div class='div_zf'>==================================<a href='" + url + "' target='_blank'>原文链接</a>==================================</div>"; var mtag = BLL.Common.GetDataHelper.GetAllTag().Where(t => tags.Contains(t.TagName)).ToList(); var blogtagid = new List <int>(); for (int i = 0; i < tags.Length; i++) { blogtagid.Add(GetTagId(tags[i], userinfo.Id)); } //&& t.UsersId == userinfo.Id 理论是不用 加用户id 筛选 var myBlogTags = new BLL.BaseBLL <BlogTag>().GetList(t => blogtagid.Contains(t.Id), isAsNoTracking: false).ToList(); var myBlogTypes = new BLL.BaseBLL <BlogType>().GetList(t => t.Id == typeint, isAsNoTracking: false).ToList(); object obj = null; string call = string.Empty; BLL.BaseBLL <BlogInfo> blogbll = new BaseBLL <BlogInfo>(); var blogtemp = blogbll.GetList(t => t.User.Id == userinfo.Id).OrderByDescending(t => t.Id).FirstOrDefault(); if (blogtemp != null && blogtemp.Title == titlehtml) { obj = new { s = "no", m = "已存在相同标题博客文章~", u = siteUrl }; call = obj.ToJson(); //Response.Write(call); return(call); } var blogmode = new BlogInfo() { User = userinfo, Title = titlehtml, Types = myBlogTypes, Tags = myBlogTags, Content = bodyhtml, CreationTime = DateTime.Now, BlogCreateTime = DateTime.Now, BlogUpTime = DateTime.Now, IsShowMyHome = isshowmyhome, IsShowHome = isshowhome }; blogbll.Insert(blogmode); if (blogbll.save() > 0) { obj = new { s = "ok", m = "发布成功", u = siteUrl + "/" + userinfo.UserName + "/" + blogmode.Id + ".html" }; call = obj.ToJson(); //Response.Write(call); return(call); } obj = new { s = "no", m = "发布失败", u = siteUrl + "/" + userinfo.UserName + "/" + blogmode.Id + ".html" }; call = obj.ToJson(); //Response.Write(call); return(call); } else { var obj = new { s = "no", m = "发布失败", u = siteUrl + "/" }; var call = obj.ToJson(); //Response.Write(call); return(call); } }
static void Sheng(string code) { String[] lines = System.IO.File.ReadAllLines(tmp + "\\allprovince.txt"); List <Sheng> shengs = new List <Sheng>(); foreach (var line in lines) { Sheng t = new Sheng(); t.code = line.Split(new char[] { '#' }, StringSplitOptions.RemoveEmptyEntries)[1]; t.name = line.Split(new char[] { '#' }, StringSplitOptions.RemoveEmptyEntries)[0]; t.url = line.Split(new char[] { '#' }, StringSplitOptions.RemoveEmptyEntries)[2]; shengs.Add(t); } Sheng sheng = null; foreach (var i in shengs) { if (i.code == code) { sheng = i; break; } } if (sheng == null) { return; } IEnumerable <IHtmlElement> citys = parser.LoadDocument(prefix + sheng.url).Find("tr.citytr"); filepath = tmp + sheng.name + ".txt"; Console.WriteLine(sheng.name + "---" + sheng.code.PadRight(12, '0') + "---" + sheng.url); System.IO.File.AppendAllLines(filepath, new string[] { sheng.name + "#" + sheng.code.PadRight(12, '0') + "#" + sheng.url }); foreach (var city in citys) { //遍历所有的市 IEnumerable <IHtmlElement> a = city.Find("td a"); Shi shi = new Shi(); shi.url = a.ToList <IHtmlElement>()[1].Attribute("href").Value().Trim(); shi.code = shi.url.Replace(sheng.code + "/", "").Split(new char[] { '.' }, StringSplitOptions.RemoveEmptyEntries)[0].PadRight(12, '0'); shi.name = a.ToList <IHtmlElement>()[1].InnerText().Trim(); sheng.shis.Add(shi); Console.WriteLine(shi.name + "---" + shi.code + "---" + shi.url); System.IO.File.AppendAllLines(filepath, new string[] { "\t" + shi.name + "#" + shi.code + "#" + shi.url }); IEnumerable <IHtmlElement> li = load(prefix + shi.url).Find("tr.countytr"); foreach (var tr in li) { //遍历所有的县 IEnumerable <IHtmlElement> td = tr.Find("td"); Xian xian = new Xian(); if (td.Find("a").Count() > 0) { xian.url = td.ToList <IHtmlElement>()[0].Find("a").First <IHtmlElement>().Attribute("href").Value().Trim(); xian.code = td.ToList <IHtmlElement>()[0].Find("a").First <IHtmlElement>().InnerText().Trim(); xian.name = td.ToList <IHtmlElement>()[1].Find("a").First <IHtmlElement>().InnerText().Trim(); shi.xians.Add(xian); Console.WriteLine("\t" + xian.name + "---" + xian.code + "---" + xian.url); System.IO.File.AppendAllLines(filepath, new string[] { "\t\t" + xian.name + "#" + xian.code + "#" + xian.url }); IEnumerable <IHtmlElement> xiangs = load(prefix + sheng.code + "/" + xian.url).Find("tr.towntr"); foreach (var j in xiangs) { //遍历所有的乡 Xiang xiang = new Xiang(); xiang.code = j.Find("a").ToList <IHtmlElement>()[0].InnerText().Trim(); xiang.name = j.Find("a").ToList <IHtmlElement>()[1].InnerText().Trim(); xiang.url = j.Find("a").ToList <IHtmlElement>()[0].Attribute("href").Value().Trim(); xian.xiangs.Add(xiang); Console.WriteLine("\t\t" + xiang.name + "---" + xiang.code + "---" + xiang.url); System.IO.File.AppendAllLines(filepath, new string[] { "\t\t\t" + xiang.name + "#" + xiang.code + "#" + xiang.url }); string url = prefix + sheng.code + "/" + xian.url; url = url.Substring(0, url.LastIndexOf("/")) + "/" + xiang.url; IEnumerable <IHtmlElement> cuns = null; cuns = load(url).Find("tr.villagetr"); foreach (var ko in cuns) { //遍历所有的村 Cun cun = new Cun(); cun.code = ko.Find("td").ToList <IHtmlElement>()[0].InnerText().Trim(); cun.name = ko.Find("td").ToList <IHtmlElement>()[2].InnerText().Trim(); xiang.cuns.Add(cun); Console.WriteLine("\t\t\t\t" + cun.name + "---" + cun.code); System.IO.File.AppendAllLines(filepath, new string[] { "\t\t\t\t" + cun.name + "#" + cun.code }); } System.Threading.Thread.Sleep(1000); } } else { xian.code = td.ToList <IHtmlElement>()[0].InnerText().Trim(); xian.name = td.ToList <IHtmlElement>()[1].InnerText().Trim(); shi.xians.Add(xian); Console.WriteLine("\t" + xian.name + "---" + xian.code + "---" + xian.url); System.IO.File.AppendAllLines(filepath, new string[] { "\t\t" + xian.name + "#" + xian.code + "#" + xian.url }); } } System.Threading.Thread.Sleep(3000); } }