Esempio n. 1
0
        static void Main(string[] args)
        {
            var id = Guid.NewGuid();

            var path = Path.Combine(tempDirectory, id.ToString());

            Directory.CreateDirectory(path);

            SmtpClient smtp = new SmtpClient();

            smtp.EnableSsl               = false;
            smtp.DeliveryMethod          = SmtpDeliveryMethod.SpecifiedPickupDirectory;
            smtp.PickupDirectoryLocation = path;

            var parser   = new JumonyParser();
            var document = parser.LoadDocument("http://blog.sina.com.cn/s/blog_4701280b010183ny.html");

            MailMessage message = CreateMail(document);

            smtp.Send(message);

            var directory = new DirectoryInfo(path);
            var file      = directory.GetFiles().Single();

            file.MoveTo(Path.Combine(tempDirectory, id.ToString() + ".mht"));

            directory.Delete(true);
        }
Esempio n. 2
0
        public void CompileTest()
        {
            var parser   = new JumonyParser();
            var document = parser.LoadDocument(Path.Combine(Environment.CurrentDirectory, "Test1.html"));

            var method = document.Compile();

            var document2 = method(parser.DomProvider);

            Assert.IsTrue(document.DescendantNodes().SequenceEqual(document2.DescendantNodes(), new DomNodeComparer()), "编译还原测试失败");
        }
Esempio n. 3
0
        /// <summary>
        /// 爬取苏宁商品list
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        private Tuple <List <SNProduct>, int> SN_AnalyticsHtml(string url)
        {
            List <SNProduct> snlist  = new List <SNProduct>();
            HttpWebRequest   request = WebRequest.Create(url) as HttpWebRequest;

            request.Method = "GET";
            //伪装浏览器
            request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36";
            HttpWebResponse response = request.GetResponse() as HttpWebResponse;

            var parser = new JumonyParser();
            var doc    = parser.LoadDocument(response.GetResponseStream(), Encoding.UTF8, new Uri(url));
            var list   = doc.Find("#filter-results > ul > .product");

            if (list.Count() == 0)
            {
                return(Tuple.Create(snlist, 0));
            }
            foreach (var li in list)
            {
                SNProduct pro = new SNProduct();

                var price  = li.FindFirst(".prive-tag");
                var name   = li.FindFirst(".sell-point > a");
                var commit = li.FindFirst(".com-cnt > .num");
                var shop   = li.FindFirst(".res-info > .seller");

                pro.link   = name.Attribute("href").Value();
                pro.price  = 0;
                pro.name   = name.InnerText();
                pro.commit = commit.InnerText();
                pro.shop   = shop.InnerText();

                snlist.Add(pro);
            }
            //获取总页数
            var    totalPage     = doc.FindFirst(".second-box > .little-page");
            string totalPage_num = totalPage.InnerText().Trim();

            string[] sArray = totalPage_num.Split('/');
            string   x      = sArray[1];

            return(Tuple.Create(snlist, Convert.ToInt32(x)));
        }
Esempio n. 4
0
        static void Main(string[] args)
        {
            var parser = new JumonyParser();

            var document = parser.LoadDocument("http://www.cnblogs.com/");

            var elements = document.Find("#post_list > .post_item");

            foreach (var e in elements)
            {
                Console.WriteLine("title: {0}", e.FindFirst("h3 > a").InnerText());
                Console.WriteLine("link: {0}", e.FindFirst("h3 > a").Attribute("href").Value());
                Console.WriteLine("votes: {0}", e.FindFirst(".diggnum").InnerText());
                Console.WriteLine("author: {0}", e.FindFirst(".post_item_foot a.lightblue").InnerText());
                Console.WriteLine(new string('-', 30));
                Console.ReadKey();
            }
             
        }
Esempio n. 5
0
        /// <summary>
        /// 爬取JD商品list
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        private Tuple <List <JDProduct>, int> JD_AnalyticsHtml(string url)
        {
            List <JDProduct> jdlist  = new List <JDProduct>();
            HttpWebRequest   request = WebRequest.Create(url) as HttpWebRequest;

            request.Method = "GET";
            //伪装浏览器
            request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36";
            HttpWebResponse response = request.GetResponse() as HttpWebResponse;

            var parser = new JumonyParser();
            var doc    = parser.LoadDocument(response.GetResponseStream(), Encoding.UTF8, new Uri(url));
            var list   = doc.Find("#J_goodsList > ul > .gl-item > .gl-i-wrap");

            if (list.Count() == 0)
            {
                //查询不到结果
                return(Tuple.Create(jdlist, 0));
            }
            foreach (var li in list)
            {
                JDProduct pro = new JDProduct();

                var price  = li.FindFirst(".p-price > strong > i");
                var name   = li.FindFirst(".p-name > a > em");
                var commit = li.FindFirst(".p-commit");
                var shop   = li.FindFirst(".p-shop");

                pro.link   = li.FindFirst(".p-img > a").Attribute("href").Value();
                pro.price  = Convert.ToDouble(price.InnerText());
                pro.name   = name.InnerText();
                pro.commit = commit.InnerText();
                pro.shop   = shop.InnerText();
                pro.IsSelf = li.FindFirst(".p-shop").Attribute("data-selfware").Value().Equals("1");

                jdlist.Add(pro);
            }
            //获取总页数
            var totalPage     = doc.FindFirst("#J_topPage > .fp-text > i");
            int totalPage_num = Convert.ToInt32(totalPage.InnerText());

            return(Tuple.Create(jdlist, totalPage_num));
        }
Esempio n. 6
0
        /// <summary>
        /// 转发
        /// </summary>
        public void Forward()
        {
            Response.ContentType = "application/json";
            var    ResultValue = string.Empty;
            var    data        = Request.QueryString["mydata"];
            string callback    = Request.QueryString["callback"];
            JavaScriptSerializer        jss = new JavaScriptSerializer();
            Dictionary <string, string> dic = jss.Deserialize <Dictionary <string, string> >(data);
            var    name     = dic["username"].Trim();
            var    pass     = dic["password"].Trim();
            var    userinfo = CacheData.GetAllUserInfo().Where(t => t.UserName == name && t.UserPass == pass.MD5().MD5()).FirstOrDefault();
            object tyeList  = null;

            if (userinfo != null)
            {
                var tag     = dic["tag"].Trim();
                var type    = dic["type"].Trim();
                var url     = dic["url"].Trim();
                int typeint = -1;
                int.TryParse(type, out typeint);
                var tags = tag.Split(',');

                var jp        = new JumonyParser();
                var html      = jp.LoadDocument(url);
                var titlehtml = html.Find(".postTitle a").FirstOrDefault().InnerHtml();
                titlehtml = "【转】" + titlehtml;
                var bodyhtml = html.Find("#cnblogs_post_body").FirstOrDefault().InnerHtml();
                bodyhtml += "</br><div class='div_zf'>==================================<a  href='" + url + "' target='_blank'>原文链接</a>==================================</div>";

                var mtag = BLL.Common.GetDataHelper.GetAllTag().Where(t => tags.Contains(t.TagName)).ToList();

                var blogtagid = new List <int>();
                for (int i = 0; i < tags.Length; i++)
                {
                    blogtagid.Add(this.GetTagId(tags[i], userinfo.Id));
                }
                //&& t.UsersId == userinfo.Id         理论是不用 加用户id 筛选
                var myBlogTags  = new BlogTagsBLL().GetList(t => blogtagid.Contains(t.Id), isAsNoTracking: false).ToList();
                var myBlogTypes = new BLL.BlogTypesBLL().GetList(t => t.Id == typeint, isAsNoTracking: false).ToList();

                object       obj       = null;
                string       call      = string.Empty;
                BLL.BlogsBLL blogbll   = new BLL.BlogsBLL();
                var          blogtitle = blogbll.GetList(t => t.UsersId == userinfo.Id).OrderByDescending(t => t.Id).FirstOrDefault().BlogTitle;
                if (blogtitle == titlehtml)
                {
                    obj  = new { s = "no", m = "已存在相同标题博客文章~", u = GetSiteUrl() };
                    call = callback + "('" + obj.ToJson() + "')";
                    Response.Write(call);
                    return;
                }

                var blogmode = new Blogs.ModelDB.Blogs()
                {
                    UsersId        = userinfo.Id,
                    BlogTitle      = titlehtml,
                    BlogTypes      = myBlogTypes,
                    BlogTags       = myBlogTags,
                    BlogContent    = bodyhtml,
                    CreateTime     = DateTime.Now,
                    BlogCreateTime = DateTime.Now,
                    BlogUpTime     = DateTime.Now,
                    IsShowMyHome   = true
                };

                blogbll.Add(blogmode);

                if (blogbll.save() > 0)
                {
                    obj  = new { s = "ok", m = "发布成功", u = GetSiteUrl() + "/" + userinfo.UserName + "/" + blogmode.Id + ".html" };
                    call = callback + "('" + obj.ToJson() + "')";
                    Response.Write(call);
                    return;
                }
                obj  = new { s = "no", m = "发布失败", u = GetSiteUrl() + "/" + userinfo.UserName + "/" + blogmode.Id + ".html" };
                call = callback + "('" + obj.ToJson() + "')";
                Response.Write(call);
                return;
            }
            else
            {
                var obj  = new { s = "no", m = "发布失败", u = GetSiteUrl() + "/" };
                var call = callback + "('" + obj.ToJson() + "')";
                Response.Write(call);
                return;
            }
            //var cc = callback + "('ok')";
            //Response.ContentType = "application/json";
            //Response.Write(cc);
        }
        /// <summary>
        /// 迁移cnblog评论
        /// </summary>
        /// <param name="BlogsId">嗨博客 博客id</param>
        /// <param name="BlogUsersId">嗨博客  评论博客用户id(因为迁移评论者 没有id 所以都默认为1)</param>
        /// <param name="postId">cnblog 博客id</param>int BlogUsersId = 1,
        /// <param name="blogApp">cnblog 博客用户名</param>
        public string testJumonyParser(int BlogsId = 1, string postId = "4368417", string blogApp = "zhaopei")
        {
            bool isNext = true;
            int  i      = 0;

            var BlogUsersId = 1;

            BLL.BlogUsersSetBLL userbll = new BlogUsersSetBLL();
            var usertemp = GetDataHelper.GetAllUser().Where(t => t.UserName == " ").FirstOrDefault();

            if (null == usertemp)
            {
                var user = new Blogs.ModelDB.BlogUsersSet()
                {
                    UserName   = "******",
                    UserPass   = "******",
                    IsDel      = false,
                    IsLock     = false,
                    UserMail   = "无效",
                    CreateTime = DateTime.Now,
                    UserInfo   = new ModelDB.UserInfo()
                };
                userbll.Add(user);
                userbll.save(false);
                BlogUsersId = user.Id;
            }
            else
            {
                BlogUsersId = usertemp.Id;
            }

            //List<BlogCommentSet> blogcommen = new List<BlogCommentSet>();
            BlogCommentSetBLL blogcommenbll = new BlogCommentSetBLL();

            while (isNext)
            {
                i++;
                var url        = "http://www.cnblogs.com/mvc/blog/GetComments.aspx?postId=" + postId + "&blogApp=" + blogApp + "&pageIndex=" + i;
                var jumony     = new JumonyParser();
                var htmlSource = jumony.LoadDocument(url).InnerHtml();

                JavaScriptSerializer _jsSerializer = new JavaScriptSerializer();
                CnBlogComments       comm          = _jsSerializer.Deserialize <CnBlogComments>(htmlSource);
                var commentsHtml = jumony.Parse(comm.commentsHtml);
                var pager        = commentsHtml.Find("div.pager").FirstOrDefault();
                if (null != pager)
                {
                    var Next = pager.Find("*").LastOrDefault().InnerText();
                    if (Next != "Next >")
                    {
                        isNext = false;
                    }
                }
                else
                {
                    isNext = false;
                }

                var listComment = commentsHtml.Find("div.feedbackItem").ToList();
                foreach (var item in listComment)
                {
                    var commentDataNode = item.Find("div.feedbackListSubtitle span.comment_date").FirstOrDefault();  //
                    var commentData     = DateTime.Parse(commentDataNode.InnerText());
                    var commentUserNode = item.Find("div.feedbackListSubtitle a[target='_blank']").FirstOrDefault();
                    var commentUser     = commentUserNode.InnerText();
                    var Content         = item.Find("div.blog_comment_body").FirstOrDefault().InnerText();

                    blogcommenbll.Add(
                        new BlogCommentSet()
                    {
                        BlogsId       = BlogsId,
                        CommentID     = -1,
                        IsDel         = false,
                        Content       = Content,
                        CreateTime    = commentData,
                        ReplyUserName = commentUser,
                        BlogUsersId   = BlogUsersId,
                        IsInitial     = true
                    }
                        );
                }
            }

            try
            {
                blogcommenbll.save(false);
            }
            catch (Exception)
            { }
            return("ok");
        }
        /// <summary>
        /// 加载 bing  的搜索结果
        /// </summary>
        /// <returns></returns>
        public ActionResult ShowBingResult()
        {
            if (!Request.QueryString.AllKeys.Contains("key"))
            {
                return(null);
            }
            string       key    = Request.QueryString["key"];//搜索关键字
            JumonyParser jumony = new JumonyParser();
            //http://cn.bing.com/search?q=AJAX+site%3ablog.haojima.net&first=11&FORM=PERE
            string pIndex    = Request.QueryString.AllKeys.Contains("p") ? Request.QueryString["p"] : "";
            int    PageIndex = 1;

            int.TryParse(pIndex, out PageIndex);
            PageIndex--;

            //如:blog:JeffreyZhao 博客
            var zhankey  = key.Split(' ');//先用空格分割
            var blogName = string.Empty;

            if (zhankey.Length >= 2)
            {
                var str = zhankey[0].Trim();
                if (str.Length > 6 && str.Substring(0, 5) == "blog:")
                {
                    blogName = "/" + str.Substring(5);//这里取得 用户名
                }
            }
            if (!string.IsNullOrEmpty(blogName))
            {
                key = key.Substring(key.IndexOf(' '));
            }

            //如:
            var url      = "http://cn.bing.com/search?q=" + key + "+site:" + GetSiteUrl() + blogName + "&first=" + PageIndex + "1&FORM=PERE";
            var document = jumony.LoadDocument(url);
            var list     = document.Find("#b_results .b_algo").ToList().Select(t => t.ToString()).ToList();

            var listli = document.Find("li.b_pag nav ul li");

            if (PageIndex > 0 && listli.Count() == 0)
            {
                return(null);
            }

            if (listli.Count() > 1)
            {
                var text  = document.Find("li.b_pag nav ul li").Last().InnerText();
                int npage = -1;
                if (text == "下一页")
                {
                    if (listli.Count() > 1)
                    {
                        var num = listli.ToList()[listli.Count() - 2].InnerText();
                        int.TryParse(num, out npage);
                    }
                }
                else
                {
                    int.TryParse(text, out npage);
                }
                if (npage <= PageIndex)
                {
                    list = null;
                }
            }

            return(PartialView(list));
        }
        public static string ForwardRealization(BlogUser userinfo, string tag, string type, string url, string siteUrl, bool isshowhome, bool isshowmyhome, bool isBJ = false)
        {
            if (userinfo != null)
            {
                int typeint = -1;
                int.TryParse(type, out typeint);
                var tags = tag.Split(',');

                var jp        = new JumonyParser();
                var html      = jp.LoadDocument(url);
                var titlehtml = html.Find(".postTitle a").FirstOrDefault().InnerHtml();
                if (!isBJ)
                {
                    titlehtml = "【转】" + titlehtml;
                }
                else
                {
                    titlehtml = "《" + titlehtml + "》";
                }
                var bodyhtml = html.Find("#cnblogs_post_body").FirstOrDefault().InnerHtml();
                bodyhtml += "</br><div class='div_zf'>==================================<a  href='" + url + "' target='_blank'>原文链接</a>==================================</div>";

                var mtag = BLL.Common.GetDataHelper.GetAllTag().Where(t => tags.Contains(t.TagName)).ToList();

                var blogtagid = new List <int>();
                for (int i = 0; i < tags.Length; i++)
                {
                    blogtagid.Add(GetTagId(tags[i], userinfo.Id));
                }
                //&& t.UsersId == userinfo.Id         理论是不用 加用户id 筛选
                var myBlogTags  = new BLL.BaseBLL <BlogTag>().GetList(t => blogtagid.Contains(t.Id), isAsNoTracking: false).ToList();
                var myBlogTypes = new BLL.BaseBLL <BlogType>().GetList(t => t.Id == typeint, isAsNoTracking: false).ToList();

                object obj  = null;
                string call = string.Empty;
                BLL.BaseBLL <BlogInfo> blogbll = new BaseBLL <BlogInfo>();

                var blogtemp = blogbll.GetList(t => t.User.Id == userinfo.Id).OrderByDescending(t => t.Id).FirstOrDefault();
                if (blogtemp != null && blogtemp.Title == titlehtml)
                {
                    obj  = new { s = "no", m = "已存在相同标题博客文章~", u = siteUrl };
                    call = obj.ToJson();
                    //Response.Write(call);
                    return(call);
                }

                var blogmode = new BlogInfo()
                {
                    User           = userinfo,
                    Title          = titlehtml,
                    Types          = myBlogTypes,
                    Tags           = myBlogTags,
                    Content        = bodyhtml,
                    CreationTime   = DateTime.Now,
                    BlogCreateTime = DateTime.Now,
                    BlogUpTime     = DateTime.Now,
                    IsShowMyHome   = isshowmyhome,
                    IsShowHome     = isshowhome
                };

                blogbll.Insert(blogmode);

                if (blogbll.save() > 0)
                {
                    obj  = new { s = "ok", m = "发布成功", u = siteUrl + "/" + userinfo.UserName + "/" + blogmode.Id + ".html" };
                    call = obj.ToJson();
                    //Response.Write(call);
                    return(call);
                }
                obj  = new { s = "no", m = "发布失败", u = siteUrl + "/" + userinfo.UserName + "/" + blogmode.Id + ".html" };
                call = obj.ToJson();
                //Response.Write(call);
                return(call);
            }
            else
            {
                var obj  = new { s = "no", m = "发布失败", u = siteUrl + "/" };
                var call = obj.ToJson();
                //Response.Write(call);
                return(call);
            }
        }
Esempio n. 10
0
        static void Sheng(string code)
        {
            String[]     lines  = System.IO.File.ReadAllLines(tmp + "\\allprovince.txt");
            List <Sheng> shengs = new List <Sheng>();

            foreach (var line in lines)
            {
                Sheng t = new Sheng();
                t.code = line.Split(new char[] { '#' }, StringSplitOptions.RemoveEmptyEntries)[1];
                t.name = line.Split(new char[] { '#' }, StringSplitOptions.RemoveEmptyEntries)[0];
                t.url  = line.Split(new char[] { '#' }, StringSplitOptions.RemoveEmptyEntries)[2];
                shengs.Add(t);
            }
            Sheng sheng = null;

            foreach (var i in shengs)
            {
                if (i.code == code)
                {
                    sheng = i;
                    break;
                }
            }
            if (sheng == null)
            {
                return;
            }
            IEnumerable <IHtmlElement> citys = parser.LoadDocument(prefix + sheng.url).Find("tr.citytr");

            filepath = tmp + sheng.name + ".txt";
            Console.WriteLine(sheng.name + "---" + sheng.code.PadRight(12, '0') + "---" + sheng.url);
            System.IO.File.AppendAllLines(filepath, new string[] { sheng.name + "#" + sheng.code.PadRight(12, '0') + "#" + sheng.url });
            foreach (var city in citys)
            {
                //遍历所有的市
                IEnumerable <IHtmlElement> a = city.Find("td a");
                Shi shi = new Shi();
                shi.url  = a.ToList <IHtmlElement>()[1].Attribute("href").Value().Trim();
                shi.code = shi.url.Replace(sheng.code + "/", "").Split(new char[] { '.' }, StringSplitOptions.RemoveEmptyEntries)[0].PadRight(12, '0');
                shi.name = a.ToList <IHtmlElement>()[1].InnerText().Trim();
                sheng.shis.Add(shi);
                Console.WriteLine(shi.name + "---" + shi.code + "---" + shi.url);
                System.IO.File.AppendAllLines(filepath, new string[] { "\t" + shi.name + "#" + shi.code + "#" + shi.url });
                IEnumerable <IHtmlElement> li = load(prefix + shi.url).Find("tr.countytr");
                foreach (var tr in li)
                {
                    //遍历所有的县
                    IEnumerable <IHtmlElement> td = tr.Find("td");
                    Xian xian = new Xian();
                    if (td.Find("a").Count() > 0)
                    {
                        xian.url  = td.ToList <IHtmlElement>()[0].Find("a").First <IHtmlElement>().Attribute("href").Value().Trim();
                        xian.code = td.ToList <IHtmlElement>()[0].Find("a").First <IHtmlElement>().InnerText().Trim();
                        xian.name = td.ToList <IHtmlElement>()[1].Find("a").First <IHtmlElement>().InnerText().Trim();
                        shi.xians.Add(xian);
                        Console.WriteLine("\t" + xian.name + "---" + xian.code + "---" + xian.url);
                        System.IO.File.AppendAllLines(filepath, new string[] { "\t\t" + xian.name + "#" + xian.code + "#" + xian.url });

                        IEnumerable <IHtmlElement> xiangs = load(prefix + sheng.code + "/" + xian.url).Find("tr.towntr");
                        foreach (var j in xiangs)
                        {
                            //遍历所有的乡
                            Xiang xiang = new Xiang();
                            xiang.code = j.Find("a").ToList <IHtmlElement>()[0].InnerText().Trim();
                            xiang.name = j.Find("a").ToList <IHtmlElement>()[1].InnerText().Trim();
                            xiang.url  = j.Find("a").ToList <IHtmlElement>()[0].Attribute("href").Value().Trim();
                            xian.xiangs.Add(xiang);
                            Console.WriteLine("\t\t" + xiang.name + "---" + xiang.code + "---" + xiang.url);
                            System.IO.File.AppendAllLines(filepath, new string[] { "\t\t\t" + xiang.name + "#" + xiang.code + "#" + xiang.url });
                            string url = prefix + sheng.code + "/" + xian.url;
                            url = url.Substring(0, url.LastIndexOf("/")) + "/" + xiang.url;
                            IEnumerable <IHtmlElement> cuns = null;
                            cuns = load(url).Find("tr.villagetr");
                            foreach (var ko in cuns)
                            {
                                //遍历所有的村
                                Cun cun = new Cun();
                                cun.code = ko.Find("td").ToList <IHtmlElement>()[0].InnerText().Trim();
                                cun.name = ko.Find("td").ToList <IHtmlElement>()[2].InnerText().Trim();
                                xiang.cuns.Add(cun);
                                Console.WriteLine("\t\t\t\t" + cun.name + "---" + cun.code);
                                System.IO.File.AppendAllLines(filepath, new string[] { "\t\t\t\t" + cun.name + "#" + cun.code });
                            }
                            System.Threading.Thread.Sleep(1000);
                        }
                    }
                    else
                    {
                        xian.code = td.ToList <IHtmlElement>()[0].InnerText().Trim();
                        xian.name = td.ToList <IHtmlElement>()[1].InnerText().Trim();
                        shi.xians.Add(xian);
                        Console.WriteLine("\t" + xian.name + "---" + xian.code + "---" + xian.url);
                        System.IO.File.AppendAllLines(filepath, new string[] { "\t\t" + xian.name + "#" + xian.code + "#" + xian.url });
                    }
                }
                System.Threading.Thread.Sleep(3000);
            }
        }