/// <summary>
 /// 获取工作描述
 /// </summary>
 /// <param name="url"></param>
 /// <returns></returns>
 private Dictionary <string, string> getjobdesc(string url)
 {
     try
     {
         IHtmlDocument html              = new JumonyParser().LoadDocument(url);
         Regex         jgr               = new Regex("(?<jg>\\d+)");
         var           desc              = html.Find(".main-detail .desc").FirstOrDefault().InnerHtml();
         var           zz                = html.Find(".name a").FirstOrDefault().InnerText();
         var           number            = html.Find(".main-top .number").FirstOrDefault().InnerText();
         var           price             = html.Find(".main-top .detail-row .budgets .budget span").FirstOrDefault().InnerText();
         var           prices            = jgr.Matches(price);
         Dictionary <string, string> dic = new Dictionary <string, string>();
         dic.Add("desc", desc);
         dic.Add("zz", zz);
         dic.Add("number", number);
         if (prices.Count > 0)
         {
             for (int i = 0; i < prices.Count; i++)
             {
                 dic.Add("price_" + (i + 1), prices[i].Groups["jg"].Value);
             }
         }
         else
         {
             dic.Add("price_min", "");
             dic.Add("price_max", "");
         }
         return(dic);
     }
     catch (Exception e)
     {
         log.Error(e.Message);
         throw;
     }
 }
Exemple #2
0
        private async Task <bool> GetAgentsAsync(Uri cityUri)
        {
            LogHelper.Info(cityUri.ToString());
            var pageSource = await HttpClient.GetStringAsync(cityUri);

            while (!pageSource.Contains("查企业"))
            {
                if (pageSource.StartsWith("<script>window.location"))
                {
                    VertifyCode(new Uri(pageSource.Split("'")[1]));
                    pageSource = await HttpClient.GetStringAsync(cityUri);
                }
                else if (pageSource.Contains("小查还没找到数据"))
                {
                    return(false);
                }
            }
            var block = JumonyParser.Parse(pageSource).Find(".m_srchList tbody tr td:nth-child(3)");

            foreach (var item in block)
            {
                await VertifyAsync(item.InnerHtml());
            }
            if (block.Count() < PageSize)
            {
                return(false);
            }
            return(true);
        }
Exemple #3
0
        static void get_items()
        {
            var msg_prefix = "-分析列表:";
            var y          = Encoding.Default.GetByteCount(msg_prefix);

            Console.Write(msg_prefix);
            var charArr = @"-\|/".ToArray();

            var          resp     = browser.Get("https://materials.ulprospector.com/zh/results?pageSize=100");
            JumonyParser parser   = new JumonyParser();
            var          document = parser.Parse(resp);
            var          total    = document.FindFirst(".pagination.pull-right").InnerText().Split(' ').Last().ToInt();

            for (int i = 1; i < 2; i++)
            {
                resp     = browser.Get("https://materials.ulprospector.com/zh/results?pageNum=" + i + "pageSize=100");
                document = parser.Parse(resp);
                var eles = document.Find("table.results tbody tr. ");
                foreach (var v in eles)
                {
                    var item = new Item();
                    item.Url = v.FindFirst(".entry a").Attribute("href").Value();
                    item.Id  = v.FindFirst("input").Attribute("value").Value().ToInt();
                    Items.Add(item);
                }
                Console.CursorLeft = 0;
                Console.Write(charArr[i % charArr.Length]);
                Console.CursorLeft = y;
                Console.Write((int)(i * 100 / total) + "%");
            }
            Console.WriteLine();
        }
Exemple #4
0
        public override IList <ExamItem> Process(string strResponse, int moduleId)
        {
            var result = new List <ExamItem>();

            if (string.IsNullOrEmpty(strResponse))
            {
                return(result);
            }

            var document = new JumonyParser().Parse(strResponse);

            //所有题目
            var dataTable     = document.Descendants("body>div>table").ElementAt(1);
            var AllTrs        = dataTable.Elements("tr");
            var htmlExamItems = AllTrs.Where(x => x.InnerHtml().Contains("【"));

            foreach (var item in htmlExamItems)
            {
                var model = BuildEntity(moduleId, item);
                if (model == null)
                {
                    continue;
                }

                result.Add(model);
            }
            if (htmlExamItems.Count() > result.Count)
            {
                string msg = string.Format("Html:[{0}]个,解析:[{1}]个。", result.Count, htmlExamItems.Count());
                WriteLog(strResponse, msg);
            }

            return(result);
        }
        public Class1[] GetHref_(int page)
        {
            System.Threading.Thread.Sleep(1 * 1000);

            string URL = "http://liaoyang.58.com/ershoufang/0";

            Class1[] RTc = new Class1[160];
            URL = page == 1 ? URL : URL + "/pn" + page.ToString() + "/";
            //抓取关键字对应的url
            WebClient client = new WebClient();

            client.Encoding = Encoding.UTF8;

            string        html     = client.DownloadString(URL);
            IHtmlDocument document = new JumonyParser().Parse(html);

            GetUrlText_1(RTc, document);
            int b = 0;

            b = GetCount(RTc, b);
            Class1[] rtcc = new Class1[b];
            for (int a = 0; a < rtcc.Length; a++)
            {
                rtcc[a] = RTc[a];
            }
            return(rtcc);
        }
Exemple #6
0
    protected void Page_Load( object sender, EventArgs e )
    {
        var client = new WebClient();
        var html = client.DownloadString( "http://www.cnblogs.com/" );

        var parser = new JumonyParser();
        var document = parser.Parse( html );

        var links = document.Find( "a[href]" );

        var baseUrl = new Uri( "http://www.cnblogs.com" );

        var data = from hyperLink in links
               let url = new Uri( baseUrl, hyperLink.Attribute( "href" ).Value() )
               orderby url.AbsoluteUri
               select new
               {
                 Url = url.AbsoluteUri,
                 IsLinkingOut = !url.Host.EndsWith( "cnblogs.com" ),
                 Target = hyperLink.Attribute( "target" ).Value() ?? "_self"
               };

        DataList.DataSource = data;
        DataBind();
    }
Exemple #7
0
    public void Test1()
    {
        var context = new ControllerContext(HttpContext.Request.RequestContext, new TestController());

        var result = ViewEngines.Engines.FindView(context, "~/ActionUrlTest/Test1.html", null);

        Assert.NotNull(result.View, "找不到视图");


        IHtmlDocument document;

        using (var writer = new StringWriter())
        {
            result.View.Render(new ViewContext(context, result.View, new ViewDataDictionary(), new TempDataDictionary(), writer), writer);

            document = new JumonyParser().Parse(writer.ToString());
        }


        var link = document.FindFirst("a");

        Assert.NotNull(link);

        Assert.AreEqual(link.Attribute("href").Value(), "/TestController/TestAction?arg=args");
    }
Exemple #8
0
    protected void Page_Load(object sender, EventArgs e)
    {
        var client = new WebClient();
        var html   = client.DownloadString("http://www.cnblogs.com/");

        var parser   = new JumonyParser();
        var document = parser.Parse(html);

        var links = document.Find("a[href]");

        var baseUrl = new Uri("http://www.cnblogs.com");

        var data = from hyperLink in links
                   let url = new Uri(baseUrl, hyperLink.Attribute("href").Value())
                             orderby url.AbsoluteUri
                             select new
        {
            Url          = url.AbsoluteUri,
            IsLinkingOut = !url.Host.EndsWith("cnblogs.com"),
            Target       = hyperLink.Attribute("target").Value() ?? "_self"
        };

        DataList.DataSource = data;
        DataBind();
    }
Exemple #9
0
        static void Main(string[] args)
        {
            var id = Guid.NewGuid();

            var path = Path.Combine(tempDirectory, id.ToString());

            Directory.CreateDirectory(path);

            SmtpClient smtp = new SmtpClient();

            smtp.EnableSsl               = false;
            smtp.DeliveryMethod          = SmtpDeliveryMethod.SpecifiedPickupDirectory;
            smtp.PickupDirectoryLocation = path;

            var parser   = new JumonyParser();
            var document = parser.LoadDocument("http://blog.sina.com.cn/s/blog_4701280b010183ny.html");

            MailMessage message = CreateMail(document);

            smtp.Send(message);

            var directory = new DirectoryInfo(path);
            var file      = directory.GetFiles().Single();

            file.MoveTo(Path.Combine(tempDirectory, id.ToString() + ".mht"));

            directory.Delete(true);
        }
Exemple #10
0
 public sys_job GetJobInfo(string url)
 {
     try
     {
         IHtmlDocument html = new JumonyParser().LoadDocument(url, Encoding.UTF8);
         int           pos  = html.InnerHtml().IndexOf("product-info-summary");
         if (pos < 0)
         {
             return(new sys_job());
         }
         string jobtitle = html.Find(".product-info-summary .row h4").FirstOrDefault().InnerText();
         string author   = html.Find(".product-info-summary .row small").FirstOrDefault().InnerText().Replace("发布者:", "");
         string price    = html.Find(".product-info-summary .row .p-desc").FirstOrDefault().InnerText().Replace(" 预算: ", "");
         string rq       = html.Find("#p-other ul li:first-child").FirstOrDefault().InnerText();
         string xqh      = html.Find("#p-other ul li:nth-child(3)").SingleOrDefault().InnerText();
         string describe = html.Find("#wrap").SingleOrDefault().InnerHtml();
         string t        = @"<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>";
         describe = Regex.Replace(describe, t, "");
         describe = Regex.Replace(describe, "<.*?>", "").Replace("\t", "").Replace("\r", "").Replace("\n", "");
         return(new sys_job
         {
             title = jobtitle,
             author = author,
             desc = describe,
             rq = rq,
             tag = xqh,
             price_min = price
         });
     }
     catch (Exception e)
     {
         log.Error(url + "----" + e.Message);
         return(new sys_job());
     }
 }
Exemple #11
0
        /// <summary>
        /// 核心加载
        /// </summary>
        /// <param name="context"></param>
        /// <param name="url"></param>
        /// <param name="htmlContent">HTML内容</param>
        /// <param name="result">目标结果</param>
        /// <returns></returns>
        protected override bool LoadCore(HttpContext <string> context, string url, string htmlContent, ResourceSearchInfo result)
        {
            var parser = new JumonyParser();
            var doc    = parser.Parse(htmlContent, new Uri(ReferUrlPage));

            var node = doc.Find("#archiveResult tr").Skip(1);

            foreach (var row in node)
            {
                var title = row.FindFirstOrDefault("td.name")?.InnerText();
                //var size = row.FindFirstOrDefault("td.size")?.InnerText();
                var date = row.FindFirstOrDefault("td.date")?.InnerText()?.ToDateTimeNullable();
                var has  = Regex.Match(row.FindFirstOrDefault("td.action a:nth-child(1)").Attribute("href").AttributeValue, @"/([a-z\d]{40})", RegexOptions.IgnoreCase).GetGroupValue(1);

                var item = CreateResourceInfo(has, title);
                //item.DownloadSize = size;
                item.UpdateTime = date;

                result.Add(item);
            }

            var pager = doc.FindFirstOrDefault("div.pagination");

            result.HasPrevious = pager?.FindFirstOrDefault("*:first-child")?.Name == "a";
            result.HasMore     = pager?.FindFirstOrDefault("*:last-child")?.Name == "a";

            return(base.LoadCore(context, url, htmlContent, result));
        }
Exemple #12
0
 public List <string> GetPageUrl(string url)
 {
     try
     {
         url = rooturl;
         List <string> list             = new List <string>();
         IHtmlDocument html             = new JumonyParser().LoadDocument(url, Encoding.UTF8);
         string        entityCount      = html.Find("#entityCount").SingleOrDefault().Attribute("value").Value();
         string        maxEntityPerPage = html.Find("#maxEntityPerPage").SingleOrDefault().Attribute("value").Value();
         string        maxPagePerRow    = html.Find("#maxPagePerRow").SingleOrDefault().Attribute("value").Value();
         string        pageCount        = html.Find("#pageCount").SingleOrDefault().Attribute("value").Value();
         string        currentPage      = html.Find("#currentPage").SingleOrDefault().Attribute("value").Value();
         string        currentPageRow   = html.Find("#currentPageRow").SingleOrDefault().Attribute("value").Value();
         string        pageRowCount     = html.Find("#pageRowCount").SingleOrDefault().Attribute("value").Value();
         Int32         count            = Convert.ToInt32(pageCount);
         Int32         current          = Convert.ToInt32(currentPage);
         string        query            = string.Empty;
         for (Int32 i = current; i <= count; i++)
         {
             query = "entityCount=" + entityCount + "&maxEntityPerPage=" + maxEntityPerPage +
                     "&maxPagePerRow=" + maxPagePerRow + "&pageCount=" + pageCount +
                     "&currentPage=" + i + "&currentPageRow=" + currentPageRow +
                     "&pageRowCount=" + pageRowCount +
                     "&cBudget=0-1000000000&budgetTo=&statusBy=&categoryBy=&typeBy=&typeName=&orderByClause=a.c_postDate+desc";
             string pageurl = url + "?" + query;
             list.Add(pageurl);
         }
         return(list);
     }
     catch (Exception e)
     {
         log.Error(e.Message);
         return(new List <string>());
     }
 }
Exemple #13
0
 public void GetPageUrlToRedis(string url)
 {
     try
     {
         IHtmlDocument html       = new JumonyParser().LoadDocument(url);
         var           pagelist   = html.Find(".co_content8 .x a");
         var           last_index = pagelist.Count() - 3;
         var           end_index  = pagelist.Count() - 1;
         var           last_url   = pagelist.ToList()[last_index].Attribute("href").Value();
         var           end_txt    = pagelist.ToList()[end_index].InnerText();
         this.isendpage = end_txt.IndexOf("末页") >= 0 ? false : true;
         looplast_index = isendpage ? pagelist.Count() : pagelist.Count() - 1;
         var last_full_url = page_baseurl + last_url;
         int i             = 0;
         i = isfirstpage ? 0 : 2;
         for (; i < looplast_index; i++)
         {
             IHtmlElement item        = pagelist.ToList()[i];
             string       pageurl     = item.Attribute("href").Value();
             string       pagefullurl = page_baseurl + pageurl;
             db.SortedSetAdd("filmpageurl", pagefullurl, (double)index++);
             if (i == last_index && !isendpage)
             {
                 isfirstpage = false;
                 GetPageUrl(pagefullurl);
             }
         }
     }
     catch (Exception e)
     {
         log.Error(e.Message);
         throw;
     }
 }
Exemple #14
0
        public override IList <ExamItem> Process(string strResponse, int moduleId)
        {
            var result = new List <ExamItem>();

            if (string.IsNullOrEmpty(strResponse))
            {
                return(result);
            }

            var document = new JumonyParser().Parse(strResponse);

            //所有题目
            var htmlExamItems = document.Descendants(@"div.st");

            foreach (var item in htmlExamItems)
            {
                var model = BuildEntity(moduleId, item);
                if (model == null)
                {
                    continue;
                }

                result.Add(model);
            }
            if (htmlExamItems.Count() > result.Count)
            {
                string msg = string.Format("Html:[{0}]个,解析:[{1}]个。", result.Count, htmlExamItems.Count());
                WriteLog(strResponse, msg);
            }


            return(result);
        }
Exemple #15
0
        public override List <SubjectModule> Process(string strResponse, int moduleId)
        {
            var list = new List <SubjectModule>();

            var document = new JumonyParser().Parse(strResponse);
            var trs      = document.Descendants("tbody>tr");

            foreach (IHtmlElement tr in trs)
            {
                string title = tr.FindFirst("td").InnerText();
                string href  = tr.FindLast("td a").Attribute("href").Value();
                list.Add(new SubjectModule()
                {
                    Id             = GetId(moduleId, title),
                    Handler        = "SweetFly.Job.Handler.NormalHandler,SweetFly.Job",
                    HtmlDataSource = new HtmlDataSource()
                    {
                        Encoding = "GB2312",
                        Uri      = @"http://learning.cmr.com.cn/student/acourse/HomeworkCenter/" + href
                    }
                });
            }
            Console.WriteLine("{0} - {1}", trs.Count(), list.Count);

            return(list);
        }
Exemple #16
0
        public void SetStyleTest()
        {
            var element = new JumonyParser().Parse("<div></div>").Elements().First();

            element.Style("display", "none");
            Assert.AreEqual(element.Attribute("style").Value(), "display:none", ".Style( name, value ) 测试不通过");

            element.Style().SetValue("color", "red");
            Assert.AreEqual(element.Attribute("style").Value(), "display:none;color:red", ".Style().SetValue( name, value ) 测试不通过");

            element.Style().SetValue("display", "block");
            Assert.AreEqual(element.Attribute("style").Value(), "display:block;color:red", ".Style().SetValue( name, value ) 测试不通过");

            element.Style().SetValue("display", null);
            Assert.AreEqual(element.Attribute("style").Value(), "color:red", ".Style().SetValue( name, null ) 测试不通过");

            element.Style().Clear();
            Assert.AreEqual(element.Attribute("style").Value(), "", ".Style().Clear() 测试不通过");


            element.Style().SetValue("padding", "10px");
            Assert.AreEqual(element.Style().GetValue("padding-left"), "10px", "shorthand 展开测试不通过");

            element.Style().SetValue("padding-left", "0px");
            Assert.AreEqual(element.Style().GetValue("padding-left"), "0px", "shorthand 展开测试不通过");
            Assert.AreEqual(element.Style().GetValue("padding-top"), "10px", "shorthand 展开测试不通过");

            element.Style().SetValue("margin", "5px");
            Assert.AreEqual(element.Style().GetValue("margin-left"), "5px", "margin shorthand 展开测试不通过");
        }
Exemple #17
0
        static void GetDataByJumony()
        {
            Console.WriteLine("开始跑数据");

            var db = DB.GetInstance();

            for (var i = 1; i < 51; i++)
            {
                try
                {
                    var path = "http://cn.coovee.com/company/s1.html?p=" + i.ToString();

                    IHtmlDocument source = new JumonyParser().LoadDocument(path, System.Text.Encoding.GetEncoding("utf-8"));

                    var company = source.Find(".company-l-item");
                    Console.WriteLine(company.Count());
                    foreach (var item in company)
                    {
                        try
                        {
                            var a = item.Find(".dt h4 a").FirstOrDefault();

                            Console.WriteLine("公司名=" + a.InnerText());
                            var span = item.Find(".dt p span").FirstOrDefault();
                            Console.WriteLine("联系人=" + span.InnerText());
                            var zhuying = item.Find(".dd p").FirstOrDefault();
                            Console.WriteLine("主营=" + zhuying.InnerText().Replace("主营:", ""));

                            var url = item.Find(".dd ul li").Last().Find("a").FirstOrDefault().Attribute("href").Value();
                            Console.WriteLine(url);
                            System.Threading.Thread.Sleep(1000);
                            source = new JumonyParser().LoadDocument(url, System.Text.Encoding.GetEncoding("utf-8"));
                            var tel = source.Find(".company-contact-info p").ToList <IHtmlElement>()[1].InnerText();
                            Console.WriteLine("电话=" + tel.Replace("联系电话:", ""));
                            var adress = source.Find(".company-contact-info p").ToList <IHtmlElement>()[3].InnerText();
                            Console.WriteLine("地址=" + adress.Replace("公司地址:", "").Replace("  ", ""));


                            COMPANY comp = new COMPANY()
                            {
                                ID           = System.Guid.NewGuid().ToString(),
                                COMPANY_NAME = a.InnerText(),
                                ADDRESS      = adress.Replace("公司地址:", "").Replace("  ", ""),
                                LINK_MAN     = span.InnerText(),
                                SALE_PRODUCT = zhuying.InnerText().Replace("主营:", ""),
                                TEL          = tel.Replace("联系电话:", "")
                            };
                            db.Insertable <COMPANY>(comp).ExecuteCommand();

                            System.Threading.Thread.Sleep(2000);
                        }
                        catch
                        { }
                    }
                    System.Threading.Thread.Sleep(2000);
                }
                catch
                { }
            }
        }
        private void BaiXingNewHTMLhreper()
        {
            string url = "http://liaoyang.baixing.com/qiufang/";
            string ThisHtml = BXGetHTMLstr(url);
            using (var ctx = new oaEntities())
            {
                DateTime DBtime = Convert.ToDateTime(DateTime.Now.Year.ToString() + "-" + DateTime.Now.Month.ToString() + "-" + DateTime.Now.Day.ToString());

                IHtmlDocument document = new JumonyParser().Parse(ThisHtml);
                IEnumerable<IHtmlElement> result = document.Find(".media-body-title");
                List<Class1> Ncss = new List<Class1>();       
                foreach (var item in result)
                {
                    Class1 Class1 = new Class1();
                    item.FindFirst("a");
                    Class1.href = item.Exists("a") ? item.FindFirst("a").Attribute("href").Value() : string.Empty;
                    Class1.TextName = MainWindow.GetN_value(item, "a");
                    //开始读取子连接
                    #region 读取子连接
                    string ThisZ = BXGetHTMLstr(Class1.href);
                    IHtmlDocument document_1 = new JumonyParser().Parse(ThisZ); 
                    IEnumerable<IHtmlElement> restime = document_1.Find("div>.viewad-topMeta");
                    foreach (var tm in restime)
                    {

                    }
                        #endregion
                  Ncss.Add(Class1);
                }
               
            }
            //media - body - title
        }
Exemple #19
0
 void webbrowser2_LoadCompleted(object sender, NavigationEventArgs e)
 {
     mshtml.HTMLDocument mhtml = (mshtml.HTMLDocument)Webbrowser2.Document;
     Webbrowser2.Navigate("http://#");
     string        html       = mhtml.body.innerHTML;
     IHtmlDocument document_1 = new JumonyParser().Parse(html);
 }
Exemple #20
0
        /// <summary>
        /// 获取每一次请求的Ids
        /// </summary>
        /// <param name="address"></param>
        /// <returns></returns>
        public Hashtable GetList(string address = "")
        {
            var listAddress = new List <string>();

            // 获取返回信息
            var result = JsonConvert.DeserializeObject <DuoWan.DwResult>(GetUrlString(address));

            var document = new JumonyParser().Parse(result.html);

            var cells = document.Find("li>a");

            // 开始遍历
            foreach (var li in cells)
            {
                var detailUrl = li.Attribute("href").Value();

                // 获取id
                listAddress.Add((from each in detailUrl where each.ToString().ToInt(-1) > 0 select each).Join(""));
            }

            var o = new Hashtable()
            {
                { "more", result.more },
                { "ids", listAddress }
            };

            return(o);
        }
Exemple #21
0
        public static void getContent(string id)
        {
            HttpClient httpClient = new HttpClient();

            try
            {
                httpClient
                .PostAsync("http://hd.huachuan.gov.cn/aspx/gkml_view.aspx?id=" + id, null)
                .ContinueWith((postTask) =>
                {
                    HttpResponseMessage response = postTask.Result;
                    response.Content.ReadAsStringAsync().ContinueWith((readTask) =>
                    {
                        try
                        {
                            IHtmlDocument source = new JumonyParser().Parse(readTask.Result);
                            string content       = source.FindSingle(".zwnr").InnerHtml();
                            string sql           = string.Format("update t_spider_zwgk t set t.content='{0}' where t.id={1}", content, id);
                            int count            = DbHelperMySQL.ExecuteSql(sql);
                        }
                        catch (Exception e)
                        {
                            Debug.WriteLine("----->【" + id + "】内容存储异常<-----:" + e);
                        }
                    });
                });
            }
            catch (Exception e)
            {
                Debug.WriteLine("----->【" + id + "】内容存储异常<-----:" + e);
            }
        }
Exemple #22
0
        public static void getByPage(int page)
        {
            string url = "";

            if (page == 0)
            {
                url = "http://www.huachuan.gov.cn/zwgk/xxgksyzl/fgfgg/index.html";
            }
            else
            {
                string p = page.ToString();
                if (page < 10)
                {
                    p = "0" + p;
                }
                url = "http://www.huachuan.gov.cn/system/more/zwgk/xxgksyzl/fgfgg/index/page_" + p + ".html";
            }
            string        pageStr = Util.getHtmlStr(url, Encoding.Default);
            IHtmlDocument source  = new JumonyParser().Parse(pageStr);
            var           items   = source.Find(".listmain ul li");

            foreach (var item in items)
            {
                string id    = "0";
                string path  = item.FindFirst("div").FindSingle("a").Attribute("href").Value();
                string title = item.FindFirst("div").FindSingle("a").InnerText();
                string time  = item.FindFirst("div").NextElement().InnerText();
                if (path.StartsWith("http"))
                {
                    id = new Random().Next(1000000, 9999999).ToString();
                    string sql   = string.Format("select count(*) from t_spider_bslc t where t.title='{0}'", title);
                    int    count = Convert.ToInt32(DbHelperMySQL.GetSingle(sql));
                    if (count == 0)
                    {
                        sql   = string.Format("insert into t_spider_bslc(id,title,time,path) values({0},'{1}','{2}','{3}')", id, title, time, path);
                        count = DbHelperMySQL.ExecuteSql(sql);
                        if (count == 1)
                        {
                            getContent(path);
                        }
                    }
                }
                else
                {
                    id = path.Split('/')[3].Split('.')[0];
                    string sql   = string.Format("select count(*) from t_spider_bslc t where t.id={0}", id);
                    int    count = Convert.ToInt32(DbHelperMySQL.GetSingle(sql));
                    if (count == 0)
                    {
                        sql   = string.Format("insert into t_spider_bslc(id,title,time,path) values({0},'{1}','{2}','{3}')", id, title, time, path);
                        count = DbHelperMySQL.ExecuteSql(sql);
                        if (count == 1)
                        {
                            getContent(path);
                        }
                    }
                }
            }
        }
Exemple #23
0
        public void VisibleTest()
        {
            var document = new JumonyParser().LoadDocument(Path.Combine(Environment.CurrentDirectory, "StyleTest1.html"));

            document.DataBind(null);

            Assert.AreEqual(document.Find(".invisible").Count(), 0);
        }
Exemple #24
0
 void Webbrowser2_LoadCompleted(object sender, NavigationEventArgs e)
 {
     mshtml.HTMLDocument mhtml             = (mshtml.HTMLDocument)Webbrowser2.Document;
     string        html                    = mhtml.body.innerHTML;
     IHtmlDocument document                = new JumonyParser().Parse(html);
     IEnumerable <IHtmlElement> result     = document.Find("ul").Where(x => x.Identity() == "houselist-mod-new");
     IEnumerable <IHtmlElement> result_li  = result.Find("li");
     IEnumerable <IHtmlElement> result_li1 = document.Find("li>.list-item");
 }
Exemple #25
0
        public void Test1()
        {
            var document = new JumonyParser().LoadDocument(Path.Combine(Environment.CurrentDirectory, "Test1.html"));

            HtmlBinding.Create(document, null).DataBind();


            Assert.AreEqual(document.FindFirst("title").InnerHtml(), "Test Title abc text", "对 title 元素内容的文本替换测试失败");
        }
Exemple #26
0
        public static void getByPage(string cookie, string viewstate, int page)
        {
            HttpClient  httpClient  = new HttpClient();
            HttpContent postContent = new FormUrlEncodedContent(new Dictionary <string, string>()
            {
                { "__VIEWSTATE", viewstate },
                { "__VIEWSTATEGENERATOR", "7BE8FDE8" },
                { "__EVENTTARGET", "AspNetPager1" },
                { "__EVENTARGUMENT", page.ToString() },
                { "_keywords", "" },
                { "AspNetPager1_input", "1" },
            });

            httpClient
            .PostAsync("http://hd.huachuan.gov.cn/aspx/gkml_list.aspx", postContent)
            .ContinueWith((postTask) =>
            {
                HttpResponseMessage response = postTask.Result;
                response.Content.ReadAsStringAsync().ContinueWith((readTask) =>
                {
                    //Debug.WriteLine(readTask.Result);
                    IHtmlDocument source = new JumonyParser().Parse(readTask.Result);
                    var itemCount        = source.Find(".listbox").Count();
                    for (int i = 1; i <= itemCount; i++)
                    {
                        try
                        {
                            string id     = source.FindSingle("#four" + i).Attribute("href").Value().Split('=')[1];
                            string author = source.Find("#con_four_" + i).Find(".li1").Last().InnerText().Replace("发布机构:", "");
                            string time   = source.Find("#con_four_" + i).Find(".li2").Last().InnerText().Replace("发文日期:", "");
                            string title  = source.Find("#con_four_" + i).Find(".infoname").First().InnerText().Replace("名称:", "");
                            //判断第一条是否存在,如果存在,则说明新闻一直未更新,不需要继续下去了
                            //可以使用下边逻辑,continue换成return
                            //判断是否存在
                            string sql = string.Format("select count(*) from t_spider_zwgk t where t.id={0}", id);
                            int count  = Convert.ToInt32(DbHelperMySQL.GetSingle(sql));
                            if (count > 0)
                            {
                                //continue;
                                return;
                            }
                            //不存在,插入数据库
                            sql   = string.Format("insert into t_spider_zwgk(id,title,time,author) values({0},'{1}','{2}','{3}')", id, title.Replace('\'', '"'), time, author);
                            count = DbHelperMySQL.ExecuteSql(sql);
                            if (count == 1)
                            {
                                getContent(id);
                            }
                        }
                        catch (Exception e)
                        {
                            Debug.WriteLine("----->【" + page + "." + i + "】新闻创建异常<-----:" + e);
                        }
                    }
                });
            });
        }
Exemple #27
0
        static void Main(string[] args)
        {
            //抓取小说
            FileStream fs = null;

            if (File.Exists("D\\c.txt"))
            {
                fs = new FileStream("D:\\c.txt", FileMode.Append);
            }
            else
            {
                fs = new FileStream("D:\\c.txt", FileMode.Create);
            }


            var sw = new StreamWriter(fs, Encoding.UTF8);

            var baseUrl = "http://www.42xs.com/read/0/404/";
            var nextUrl = "171271.html";
            var url     = "";
            var txt     = "";
            var title   = "";

            while (nextUrl != "")
            {
                try
                {
                    url = baseUrl + nextUrl;
                    var doc      = new JumonyParser().LoadDocument(url);
                    var titleDom = doc.FindFirst("#center > div.title > h1");
                    title = titleDom.InnerText();

                    var dom = doc.FindFirst("#content");
                    txt = dom.InnerText();

                    var domNext = doc.FindFirst("#container > div:nth-child(3) > div > div.jump > a:nth-child(6)");
                    nextUrl = domNext.Attribute("href").Value();
                }
                catch
                {
                    Console.WriteLine("{0}没有成功", url);
                    nextUrl = "";
                }

                Console.WriteLine(title);
                sw.WriteLine("");
                sw.WriteLine(title);
                sw.WriteLine("");
                sw.WriteLine(txt);
            }
            Console.Write("The End. Press any key to exit...");
            Console.ReadKey();

            sw.Close();
            fs.Close();
        }
Exemple #28
0
        public void css_class_has_hyphen()
        {
            var html       = "<div class=\"css-class\"></div>";
            var htmlParser = new JumonyParser();
            var doc        = htmlParser.Parse(html);

            var css_class = doc.Find(".css-class");

            Assert.AreEqual(1, css_class.Count());
        }
Exemple #29
0
        public void SpecificationTest7()
        {
            var document = new JumonyParser().LoadDocument(Path.Combine(Environment.CurrentDirectory, "SpecificationTest7.html"));

            var link = document.FindFirstOrDefault("a");

            Assert.IsNotNull(link, "属性或内容包含特殊字符的标签解析失败");
            Assert.AreEqual(link.Attribute("href").Value(), "#", "属性内容包含 > 时解析失败。");
            Assert.AreEqual(link.Attribute("title").Value(), "this is a <a> tag", "属性内容包含 > 时解析失败。");
            //Assert.AreEqual( link.Elements().Count(), 0, "错误的解析了以特殊字符为标签名的标签" );
        }
Exemple #30
0
        public void SpecificationTest5()
        {
            var document = new JumonyParser().LoadDocument(Path.Combine(Environment.CurrentDirectory, "SpecificationTest5.html"));

            //Assert.AreEqual( document.DocumentDeclaration, "<!DOCTYPE html>", "HTML 声明解析失败" );


            var specials = document.DescendantNodes().OfType <IHtmlSpecial>().ToArray();

            Assert.AreEqual(specials.Count(), 4, "特殊标签解析数量不对");
        }
Exemple #31
0
        public void CompileTest()
        {
            var parser   = new JumonyParser();
            var document = parser.LoadDocument(Path.Combine(Environment.CurrentDirectory, "Test1.html"));

            var method = document.Compile();

            var document2 = method(parser.DomProvider);

            Assert.IsTrue(document.DescendantNodes().SequenceEqual(document2.DescendantNodes(), new DomNodeComparer()), "编译还原测试失败");
        }