Exemplo n.º 1
0
        static void GetDataByJumony()
        {
            Console.WriteLine("开始跑数据");

            var db = DB.GetInstance();

            for (var i = 1; i < 51; i++)
            {
                try
                {
                    var path = "http://cn.coovee.com/company/s1.html?p=" + i.ToString();

                    IHtmlDocument source = new JumonyParser().LoadDocument(path, System.Text.Encoding.GetEncoding("utf-8"));

                    var company = source.Find(".company-l-item");
                    Console.WriteLine(company.Count());
                    foreach (var item in company)
                    {
                        try
                        {
                            var a = item.Find(".dt h4 a").FirstOrDefault();

                            Console.WriteLine("公司名=" + a.InnerText());
                            var span = item.Find(".dt p span").FirstOrDefault();
                            Console.WriteLine("联系人=" + span.InnerText());
                            var zhuying = item.Find(".dd p").FirstOrDefault();
                            Console.WriteLine("主营=" + zhuying.InnerText().Replace("主营:", ""));

                            var url = item.Find(".dd ul li").Last().Find("a").FirstOrDefault().Attribute("href").Value();
                            Console.WriteLine(url);
                            System.Threading.Thread.Sleep(1000);
                            source = new JumonyParser().LoadDocument(url, System.Text.Encoding.GetEncoding("utf-8"));
                            var tel = source.Find(".company-contact-info p").ToList <IHtmlElement>()[1].InnerText();
                            Console.WriteLine("电话=" + tel.Replace("联系电话:", ""));
                            var adress = source.Find(".company-contact-info p").ToList <IHtmlElement>()[3].InnerText();
                            Console.WriteLine("地址=" + adress.Replace("公司地址:", "").Replace("  ", ""));


                            COMPANY comp = new COMPANY()
                            {
                                ID           = System.Guid.NewGuid().ToString(),
                                COMPANY_NAME = a.InnerText(),
                                ADDRESS      = adress.Replace("公司地址:", "").Replace("  ", ""),
                                LINK_MAN     = span.InnerText(),
                                SALE_PRODUCT = zhuying.InnerText().Replace("主营:", ""),
                                TEL          = tel.Replace("联系电话:", "")
                            };
                            db.Insertable <COMPANY>(comp).ExecuteCommand();

                            System.Threading.Thread.Sleep(2000);
                        }
                        catch
                        { }
                    }
                    System.Threading.Thread.Sleep(2000);
                }
                catch
                { }
            }
        }
Exemplo n.º 2
0
 /// <summary>
 /// 获取工作描述
 /// </summary>
 /// <param name="url"></param>
 /// <returns></returns>
 private Dictionary <string, string> getjobdesc(string url)
 {
     try
     {
         IHtmlDocument html              = new JumonyParser().LoadDocument(url);
         Regex         jgr               = new Regex("(?<jg>\\d+)");
         var           desc              = html.Find(".main-detail .desc").FirstOrDefault().InnerHtml();
         var           zz                = html.Find(".name a").FirstOrDefault().InnerText();
         var           number            = html.Find(".main-top .number").FirstOrDefault().InnerText();
         var           price             = html.Find(".main-top .detail-row .budgets .budget span").FirstOrDefault().InnerText();
         var           prices            = jgr.Matches(price);
         Dictionary <string, string> dic = new Dictionary <string, string>();
         dic.Add("desc", desc);
         dic.Add("zz", zz);
         dic.Add("number", number);
         if (prices.Count > 0)
         {
             for (int i = 0; i < prices.Count; i++)
             {
                 dic.Add("price_" + (i + 1), prices[i].Groups["jg"].Value);
             }
         }
         else
         {
             dic.Add("price_min", "");
             dic.Add("price_max", "");
         }
         return(dic);
     }
     catch (Exception e)
     {
         log.Error(e.Message);
         throw;
     }
 }
Exemplo n.º 3
0
 void Webbrowser2_LoadCompleted(object sender, NavigationEventArgs e)
 {
     mshtml.HTMLDocument mhtml             = (mshtml.HTMLDocument)Webbrowser2.Document;
     string        html                    = mhtml.body.innerHTML;
     IHtmlDocument document                = new JumonyParser().Parse(html);
     IEnumerable <IHtmlElement> result     = document.Find("ul").Where(x => x.Identity() == "houselist-mod-new");
     IEnumerable <IHtmlElement> result_li  = result.Find("li");
     IEnumerable <IHtmlElement> result_li1 = document.Find("li>.list-item");
 }
Exemplo n.º 4
0
        public static void getByPage(string cookie, string viewstate, int page)
        {
            HttpClient  httpClient  = new HttpClient();
            HttpContent postContent = new FormUrlEncodedContent(new Dictionary <string, string>()
            {
                { "__VIEWSTATE", viewstate },
                { "__VIEWSTATEGENERATOR", "7BE8FDE8" },
                { "__EVENTTARGET", "AspNetPager1" },
                { "__EVENTARGUMENT", page.ToString() },
                { "_keywords", "" },
                { "AspNetPager1_input", "1" },
            });

            httpClient
            .PostAsync("http://hd.huachuan.gov.cn/aspx/gkml_list.aspx", postContent)
            .ContinueWith((postTask) =>
            {
                HttpResponseMessage response = postTask.Result;
                response.Content.ReadAsStringAsync().ContinueWith((readTask) =>
                {
                    //Debug.WriteLine(readTask.Result);
                    IHtmlDocument source = new JumonyParser().Parse(readTask.Result);
                    var itemCount        = source.Find(".listbox").Count();
                    for (int i = 1; i <= itemCount; i++)
                    {
                        try
                        {
                            string id     = source.FindSingle("#four" + i).Attribute("href").Value().Split('=')[1];
                            string author = source.Find("#con_four_" + i).Find(".li1").Last().InnerText().Replace("发布机构:", "");
                            string time   = source.Find("#con_four_" + i).Find(".li2").Last().InnerText().Replace("发文日期:", "");
                            string title  = source.Find("#con_four_" + i).Find(".infoname").First().InnerText().Replace("名称:", "");
                            //判断第一条是否存在,如果存在,则说明新闻一直未更新,不需要继续下去了
                            //可以使用下边逻辑,continue换成return
                            //判断是否存在
                            string sql = string.Format("select count(*) from t_spider_zwgk t where t.id={0}", id);
                            int count  = Convert.ToInt32(DbHelperMySQL.GetSingle(sql));
                            if (count > 0)
                            {
                                //continue;
                                return;
                            }
                            //不存在,插入数据库
                            sql   = string.Format("insert into t_spider_zwgk(id,title,time,author) values({0},'{1}','{2}','{3}')", id, title.Replace('\'', '"'), time, author);
                            count = DbHelperMySQL.ExecuteSql(sql);
                            if (count == 1)
                            {
                                getContent(id);
                            }
                        }
                        catch (Exception e)
                        {
                            Debug.WriteLine("----->【" + page + "." + i + "】新闻创建异常<-----:" + e);
                        }
                    }
                });
            });
        }
Exemplo n.º 5
0
        public List <sys_film> Get_FilmInfo(string url)
        {
            try
            {
                List <sys_film> filmlist = new List <sys_film>();
                IHtmlDocument   source   = new JumonyParser().LoadDocument(url);
                int             pos      = source.InnerHtml().IndexOf("Zoom");
                if (pos < 0)
                {
                    return(filmlist);
                }
                //var list = source.Find("#Zoom a").Where(t => t.Attribute("href").Value().Contains("magnet:") || t.Attribute("href").Value().Contains("ftp:"));
                var list      = source.Find("#Zoom a");
                var title_all = source.Find(".title_all h1 font").FirstOrDefault().InnerText();
                var desc      = source.Find("#Zoom span").FirstOrDefault().InnerHtml();
                //评分提取
                Regex  regpf  = new Regex("(?<imdb>IMDb评分.*?<br />)");
                Regex  regdb  = new Regex("(?<douban>豆瓣评分.*?<br />)");
                var    pfms   = regpf.Match(desc);
                var    pfdb   = regdb.Match(desc);
                string imdb   = pfms.Groups["imdb"].Value.Replace("IMDb评分", "").Replace("<br />", "").Trim();
                string douban = pfdb.Groups["douban"].Value.Replace("豆瓣评分", "").Replace("<br />", "").Trim();
                foreach (var item in list)
                {
                    string filmlink = item.Attribute("href").Value();
                    if (filmlink == null)
                    {
                        continue;
                    }
                    if (filmlink.Contains("magnet:") || filmlink.Contains("ftp:"))
                    {
                        bool isok = db.SetAdd("filmlink", filmlink);
                        if (isok)
                        {
                            filmlist.Add(new sys_film()
                            {
                                link = filmlink, title = title_all, txt = desc, fromurl = url, imdb = imdb, douban = douban
                            });
                        }
                    }
                }

                return(filmlist);
            }
            catch (Exception e)
            {
                log.Error(url + "----" + e.Message);
                this.db.ListLeftPush("error_infourl", url);
                return(new List <sys_film>());
            }
        }
Exemplo n.º 6
0
        private void Loadeds1()
        {
            WebClient client = new WebClient();

            client.Encoding = Encoding.UTF8;
            string        ThisHtml        = client.DownloadString(Href);
            IHtmlDocument document_1      = new JumonyParser().Parse(ThisHtml);
            IEnumerable <IHtmlElement> rl = document_1.Find("div");

            IEnumerable <IHtmlElement> personname = rl.Find(".broker");
            IEnumerable <IHtmlElement> photo      = rl.Find(".tel");

            IEnumerable <IHtmlElement> text_aaa = rl.Find(".house-mian-info");
            string this_ = "";

            foreach (var aaa in text_aaa)
            {
                string   ss  = aaa.InnerText();
                string[] sss = ss.Split(' ');

                foreach (var lss in sss)
                {
                    this_ = lss == "" ? this_ : this_ + "|" + lss;
                }
            }
            foreach (var p in photo)
            {
                PersonnamePhoto.Text = p.Attribute("href").Value();
            }
            foreach (var p in personname)
            {
                Personnametext.Text = p.FindFirst("span").InnerText();
            }
            IEnumerable <IHtmlElement> ul_il = document_1.Find(".show-pic");
            IEnumerable <IHtmlElement> li    = ul_il.Find("li");

            foreach (var img in li)
            {
                liImg.Add(img.FindFirst("img").Attribute("data-src").Value());
            }

            Bak.Text = this_;

            class1.photo      = PersonnamePhoto.Text;
            class1.PersonName = Personnametext.Text;
            text_.Text        = class1.TextName;
            this.Title        = class1.TextName;
            MaxPage           = liImg.Count;
            GoPage(0);
        }
Exemplo n.º 7
0
        /// <summary>
        /// 获取每一次请求的Ids
        /// </summary>
        /// <param name="address"></param>
        /// <returns></returns>
        public Hashtable GetList(string address = "")
        {
            var listAddress = new List <string>();

            // 获取返回信息
            var result = JsonConvert.DeserializeObject <DuoWan.DwResult>(GetUrlString(address));

            var document = new JumonyParser().Parse(result.html);

            var cells = document.Find("li>a");

            // 开始遍历
            foreach (var li in cells)
            {
                var detailUrl = li.Attribute("href").Value();

                // 获取id
                listAddress.Add((from each in detailUrl where each.ToString().ToInt(-1) > 0 select each).Join(""));
            }

            var o = new Hashtable()
            {
                { "more", result.more },
                { "ids", listAddress }
            };

            return(o);
        }
Exemplo n.º 8
0
 public void GetPageUrlToRedis(string url)
 {
     try
     {
         IHtmlDocument html       = new JumonyParser().LoadDocument(url);
         var           pagelist   = html.Find(".co_content8 .x a");
         var           last_index = pagelist.Count() - 3;
         var           end_index  = pagelist.Count() - 1;
         var           last_url   = pagelist.ToList()[last_index].Attribute("href").Value();
         var           end_txt    = pagelist.ToList()[end_index].InnerText();
         this.isendpage = end_txt.IndexOf("末页") >= 0 ? false : true;
         looplast_index = isendpage ? pagelist.Count() : pagelist.Count() - 1;
         var last_full_url = page_baseurl + last_url;
         int i             = 0;
         i = isfirstpage ? 0 : 2;
         for (; i < looplast_index; i++)
         {
             IHtmlElement item        = pagelist.ToList()[i];
             string       pageurl     = item.Attribute("href").Value();
             string       pagefullurl = page_baseurl + pageurl;
             db.SortedSetAdd("filmpageurl", pagefullurl, (double)index++);
             if (i == last_index && !isendpage)
             {
                 isfirstpage = false;
                 GetPageUrl(pagefullurl);
             }
         }
     }
     catch (Exception e)
     {
         log.Error(e.Message);
         throw;
     }
 }
Exemplo n.º 9
0
        private void BaiXingNewHTMLhreper()
        {
            string url = "http://liaoyang.baixing.com/qiufang/";
            string ThisHtml = BXGetHTMLstr(url);
            using (var ctx = new oaEntities())
            {
                DateTime DBtime = Convert.ToDateTime(DateTime.Now.Year.ToString() + "-" + DateTime.Now.Month.ToString() + "-" + DateTime.Now.Day.ToString());

                IHtmlDocument document = new JumonyParser().Parse(ThisHtml);
                IEnumerable<IHtmlElement> result = document.Find(".media-body-title");
                List<Class1> Ncss = new List<Class1>();       
                foreach (var item in result)
                {
                    Class1 Class1 = new Class1();
                    item.FindFirst("a");
                    Class1.href = item.Exists("a") ? item.FindFirst("a").Attribute("href").Value() : string.Empty;
                    Class1.TextName = MainWindow.GetN_value(item, "a");
                    //开始读取子连接
                    #region 读取子连接
                    string ThisZ = BXGetHTMLstr(Class1.href);
                    IHtmlDocument document_1 = new JumonyParser().Parse(ThisZ); 
                    IEnumerable<IHtmlElement> restime = document_1.Find("div>.viewad-topMeta");
                    foreach (var tm in restime)
                    {

                    }
                        #endregion
                  Ncss.Add(Class1);
                }
               
            }
            //media - body - title
        }
Exemplo n.º 10
0
        public static void getByPage(int page)
        {
            string url = "";

            if (page == 0)
            {
                url = "http://www.huachuan.gov.cn/zwgk/xxgksyzl/fgfgg/index.html";
            }
            else
            {
                string p = page.ToString();
                if (page < 10)
                {
                    p = "0" + p;
                }
                url = "http://www.huachuan.gov.cn/system/more/zwgk/xxgksyzl/fgfgg/index/page_" + p + ".html";
            }
            string        pageStr = Util.getHtmlStr(url, Encoding.Default);
            IHtmlDocument source  = new JumonyParser().Parse(pageStr);
            var           items   = source.Find(".listmain ul li");

            foreach (var item in items)
            {
                string id    = "0";
                string path  = item.FindFirst("div").FindSingle("a").Attribute("href").Value();
                string title = item.FindFirst("div").FindSingle("a").InnerText();
                string time  = item.FindFirst("div").NextElement().InnerText();
                if (path.StartsWith("http"))
                {
                    id = new Random().Next(1000000, 9999999).ToString();
                    string sql   = string.Format("select count(*) from t_spider_bslc t where t.title='{0}'", title);
                    int    count = Convert.ToInt32(DbHelperMySQL.GetSingle(sql));
                    if (count == 0)
                    {
                        sql   = string.Format("insert into t_spider_bslc(id,title,time,path) values({0},'{1}','{2}','{3}')", id, title, time, path);
                        count = DbHelperMySQL.ExecuteSql(sql);
                        if (count == 1)
                        {
                            getContent(path);
                        }
                    }
                }
                else
                {
                    id = path.Split('/')[3].Split('.')[0];
                    string sql   = string.Format("select count(*) from t_spider_bslc t where t.id={0}", id);
                    int    count = Convert.ToInt32(DbHelperMySQL.GetSingle(sql));
                    if (count == 0)
                    {
                        sql   = string.Format("insert into t_spider_bslc(id,title,time,path) values({0},'{1}','{2}','{3}')", id, title, time, path);
                        count = DbHelperMySQL.ExecuteSql(sql);
                        if (count == 1)
                        {
                            getContent(path);
                        }
                    }
                }
            }
        }
Exemplo n.º 11
0
        public void VisibleTest()
        {
            var document = new JumonyParser().LoadDocument(Path.Combine(Environment.CurrentDirectory, "StyleTest1.html"));

            document.DataBind(null);

            Assert.AreEqual(document.Find(".invisible").Count(), 0);
        }
Exemplo n.º 12
0
 public List <string> GetPageUrl(string url)
 {
     try
     {
         url = rooturl;
         List <string> list             = new List <string>();
         IHtmlDocument html             = new JumonyParser().LoadDocument(url, Encoding.UTF8);
         string        entityCount      = html.Find("#entityCount").SingleOrDefault().Attribute("value").Value();
         string        maxEntityPerPage = html.Find("#maxEntityPerPage").SingleOrDefault().Attribute("value").Value();
         string        maxPagePerRow    = html.Find("#maxPagePerRow").SingleOrDefault().Attribute("value").Value();
         string        pageCount        = html.Find("#pageCount").SingleOrDefault().Attribute("value").Value();
         string        currentPage      = html.Find("#currentPage").SingleOrDefault().Attribute("value").Value();
         string        currentPageRow   = html.Find("#currentPageRow").SingleOrDefault().Attribute("value").Value();
         string        pageRowCount     = html.Find("#pageRowCount").SingleOrDefault().Attribute("value").Value();
         Int32         count            = Convert.ToInt32(pageCount);
         Int32         current          = Convert.ToInt32(currentPage);
         string        query            = string.Empty;
         for (Int32 i = current; i <= count; i++)
         {
             query = "entityCount=" + entityCount + "&maxEntityPerPage=" + maxEntityPerPage +
                     "&maxPagePerRow=" + maxPagePerRow + "&pageCount=" + pageCount +
                     "&currentPage=" + i + "&currentPageRow=" + currentPageRow +
                     "&pageRowCount=" + pageRowCount +
                     "&cBudget=0-1000000000&budgetTo=&statusBy=&categoryBy=&typeBy=&typeName=&orderByClause=a.c_postDate+desc";
             string pageurl = url + "?" + query;
             list.Add(pageurl);
         }
         return(list);
     }
     catch (Exception e)
     {
         log.Error(e.Message);
         return(new List <string>());
     }
 }
Exemplo n.º 13
0
        void webbrowser1_LoadCompleted(object sender, NavigationEventArgs e)
        {
            mshtml.HTMLDocument mhtml = (mshtml.HTMLDocument)Webbrowser1.Document;
            Webbrowser1 = Webbrowser2;
            string        html       = mhtml.body.innerHTML;
            IHtmlDocument document_1 = new JumonyParser().Parse(html);

            IEnumerable <IHtmlElement> rl = document_1.Find("div");
            string sss = "";

            foreach (var rs in rl)
            {
                if (rs.Exists("p"))
                {
                    if (rs.FindFirst("p").Identity() == "smallPicDescShow")
                    {
                        sss = rs.FindFirst("p").InnerText();
                    }
                }
            }
            IEnumerable <IHtmlElement> ul_il = document_1.Find("ul").Where(p => p.Identity() == "leftImg");
            IEnumerable <IHtmlElement> li    = ul_il.Find("li");

            foreach (var img in li)
            {
                liImg.Add(img.FindFirst("img").Attribute("src").Value());
            }
            IEnumerable <IHtmlElement> Phon   = document_1.Find("div").Where(d => d.Identity() == "houseChatEntry");
            IEnumerable <IHtmlElement> Phon_p = Phon.Find(".phone-num");
            string phone = "";

            foreach (var p in Phon_p)
            {
                phone = p.InnerText();
            }
            PersonnamePhoto.Text = phone;
            Bak.Text             = sss;
            Personnametext.Text  = class1.PersonName;
            text_.Text           = class1.TextName;
            this.Title           = class1.TextName;
            class1.photo         = phone;
            class1.bak           = sss;
            MaxPage = liImg.Count;
            GoPage(0);
        }
Exemplo n.º 14
0
        public void SpecificationTest8()
        {
            var document = new JumonyParser().LoadDocument(Path.Combine(Environment.CurrentDirectory, "SpecificationTest8.html"));

            Assert.AreEqual(document.FindSingle("div").Attributes().Count(), 1, "错误的解析了非法的属性");
            var links = document.Find("div a").ToArray();

            Assert.AreEqual(links.Length, 2, "错误的解析了不属于属性值的引用内容");
            Assert.AreEqual(links[0].InnerText(), "Test1", "错误的解析了不属于属性值的引用内容");
            Assert.AreEqual(links[1].InnerText(), " \"Test2", "错误的解析了不属于属性值的引用内容");
        }
Exemplo n.º 15
0
 public sys_job GetJobInfo(string url)
 {
     try
     {
         IHtmlDocument html = new JumonyParser().LoadDocument(url, Encoding.UTF8);
         int           pos  = html.InnerHtml().IndexOf("product-info-summary");
         if (pos < 0)
         {
             return(new sys_job());
         }
         string jobtitle = html.Find(".product-info-summary .row h4").FirstOrDefault().InnerText();
         string author   = html.Find(".product-info-summary .row small").FirstOrDefault().InnerText().Replace("发布者:", "");
         string price    = html.Find(".product-info-summary .row .p-desc").FirstOrDefault().InnerText().Replace(" 预算: ", "");
         string rq       = html.Find("#p-other ul li:first-child").FirstOrDefault().InnerText();
         string xqh      = html.Find("#p-other ul li:nth-child(3)").SingleOrDefault().InnerText();
         string describe = html.Find("#wrap").SingleOrDefault().InnerHtml();
         string t        = @"<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>";
         describe = Regex.Replace(describe, t, "");
         describe = Regex.Replace(describe, "<.*?>", "").Replace("\t", "").Replace("\r", "").Replace("\n", "");
         return(new sys_job
         {
             title = jobtitle,
             author = author,
             desc = describe,
             rq = rq,
             tag = xqh,
             price_min = price
         });
     }
     catch (Exception e)
     {
         log.Error(url + "----" + e.Message);
         return(new sys_job());
     }
 }
Exemplo n.º 16
0
        //
        //<tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="01CreateScreen/">01CreateScreen/</a></td><td align="right">2016-01-11 10:23  </td><td align="right">  - </td><td>&nbsp;</td></tr>
        //<tr><td valign="top"><img src="/icons/text.gif" alt="[TXT]"></td><td><a href="Test_money.py">Test_money.py</a></td><td align="right">2016-01-08 15:53  </td><td align="right">1.1K</td><td>&nbsp;</td></tr>
        //
        public static List <Resource> GetDirectoryContents(string url)
        {
            List <Resource> Rlist = new List <Resource>();
            //url = "http://192.168.1.42/testpage/Script";
            WebClient MyWebClient = new WebClient();

            MyWebClient.Credentials = CredentialCache.DefaultCredentials;
            Byte[] pageData   = MyWebClient.DownloadData(url);
            string pageHtml   = Encoding.UTF8.GetString(pageData); //如果获取网站页面采用的是UTF-8,则使用这句
            var    htmlSource = new JumonyParser().Parse(pageHtml);

            var list = htmlSource.Find("img[src=/icons/folder.gif]");

            foreach (var one in list)
            {
                /*if(one.Attribute("href").Value() == one.InnerText())
                 * {
                 *  string s = one.InnerText();
                 * }*/
                Resource a = new Resource();
                a.Name     = one.Parent().Parent().Find("a").ElementAt(0).InnerText();
                a.Url      = url + a.Name;
                a.IsFolder = true;
                Rlist.Add(a);
            }
            list = htmlSource.Find("img[src=/icons/text.gif]");
            foreach (var one in list)
            {
                Resource a = new Resource();
                a.Name     = one.Parent().Parent().Find("a").ElementAt(0).InnerText();
                a.Url      = url + a.Name;
                a.IsFolder = false;
                string t1 = one.Parent().Parent().Find("td[align=right]").ElementAt(0).InnerText();
                a.LastModified = DateTime.Parse(t1);
                Rlist.Add(a);
            }
            return(Rlist);
        }
Exemplo n.º 17
0
        public string RequestWeatherWebAnalysisData(string cityCode)
        {
            //获取天气网址的html页面
            IHtmlDocument source = null;

            try
            {
                source = new JumonyParser().LoadDocument($"http://www.weather.com.cn/weather1d/{cityCode}.shtml", Encoding.GetEncoding("utf-8"));
            }
            catch (Exception ex)
            {
                MessageBox.Show($"获取天气信息失败!,请稍后右键任务栏小图标点击刷新", "提示");
                return(null);
            }

            var input = source.Find("input[id=hidden_title]").First(); //获取input标签id为hidden_title
            var divs  = source.Find("div[class=xyn-weather-box]");     //查找城市所在的div
            var span  = divs.Find("h2").Find("span").First();
            //城市名
            string cityName    = span.InnerText().Trim();
            string weatherInfo = input.Attribute("value").Value();

            //分割天气信息
            string[]      weatherStrs = weatherInfo.Split(' ');
            List <string> wList       = new List <string>();

            foreach (var itemStr in weatherStrs)
            {
                if (!string.IsNullOrWhiteSpace(itemStr))
                {
                    wList.Add(itemStr);
                }
            }

            return($"{cityName} - {wList[2]} - {wList[3]}");
        }
Exemplo n.º 18
0
 public void YjsPageUrlList(string url)
 {
     try
     {
         IHtmlDocument html     = new JumonyParser().LoadDocument(url);
         var           pagelist = html.Find("nav.page_nav_div ul.pagination_webpage li a");
         var           pagenext = html.Find("nav.page_nav_div ul.pagination_webpage li.disabled");
         foreach (var item in pagenext)
         {
             string pagetxt = Regex.Replace(item.InnerHtml(), "<.*?>", "");
             if (pagetxt.IndexOf("下一页") >= 0)
             {
                 yjs_endflag = true;
             }
         }
         var size = pagelist.Count();
         int i    = 0;
         i = yjsfirstpage ? 0 : 2;
         for (; i < size - 1; i++)
         {
             var    item    = pagelist.ToList()[i];
             string pageurl = item.Attribute("href").Value();
             this.db.SortedSetAdd("yjspageurl", pageurl, (double)index++);
             if (i == size - 2 && !yjs_endflag)
             {
                 yjsfirstpage = false;
                 YjsPageUrlList(pageurl);
             }
         }
     }
     catch (Exception e)
     {
         log.Error(e.Message);
         throw;
     }
 }
Exemplo n.º 19
0
        public static string CheckAPK(ref string url)
        {
            string    downloadurl = "http://192.168.1.40/iwu_android/";
            WebClient MyWebClient = new WebClient();

            MyWebClient.Credentials = CredentialCache.DefaultCredentials;
            Byte[] pageData   = MyWebClient.DownloadData(downloadurl);
            string pageHtml   = Encoding.UTF8.GetString(pageData); //如果获取网站页面采用的是UTF-8,则使用这句
            var    htmlSource = new JumonyParser().Parse(pageHtml);
            var    one        = htmlSource.Find("img[src=/icons/folder.gif]").Last();
            string releaseUrl = one.Parent().Parent().Find("a[href]").First().InnerText();
            string time       = one.Parent().Parent().Find("td[align=right]").ElementAt(0).InnerText();

            url = downloadurl + releaseUrl + "apk/app-release.apk";
            return("最新版本号:" + releaseUrl + "\n 版本时间:" + time + "\n是否确定下载?");
        }
Exemplo n.º 20
0
 /// <summary>
 /// 获取每页工作内容,保存工作id到redis
 /// </summary>
 /// <param name="url"></param>
 /// <returns></returns>
 public List <sys_job> Get_Jobs(string url)
 {
     try
     {
         IHtmlDocument  html    = new JumonyParser().LoadDocument(url);
         var            jobs    = html.Find(".search-result a.job-item");
         List <sys_job> retlist = new List <sys_job>();
         foreach (var item in jobs)
         {
             string joburl      = item.Attribute("href").Value();
             string full_joburl = domain + item.Attribute("href").Value();
             int    pos1        = joburl.LastIndexOf("/");
             int    pos2        = joburl.LastIndexOf(".");
             string jobidstr    = joburl.Substring(pos1 + 1, pos2 - (pos1 + 1));
             bool   isok        = db.SetAdd("jobid", jobidstr);
             if (!isok)
             {
                 continue;
             }
             var     dic    = getjobdesc(full_joburl);
             sys_job sysjob = new sys_job
             {
                 addtime   = DateTime.Now,
                 amount    = item.Find(".money").FirstOrDefault().InnerText().Replace("预算:¥", ""),
                 author    = dic["zz"],
                 desc      = Regex.Replace(dic["desc"], "<.*?>", ""),
                 gq        = item.Find(".period").FirstOrDefault().InnerText().Replace("工期:", ""),
                 jobid     = jobidstr,
                 joburl    = full_joburl,
                 number    = dic["number"],
                 rq        = item.Find(".publish_at").FirstOrDefault().InnerText().Replace("发布时间:", ""),
                 status    = item.Find(".work_status").FirstOrDefault().InnerText(),
                 tag       = item.Find(".pattern").FirstOrDefault().InnerText(),
                 title     = item.Find(".job-title div").FirstOrDefault().InnerText(),
                 price_min = dic.ContainsKey("price_1") ? dic["price_1"] : "",
                 price_max = dic.ContainsKey("price_2") ? dic["price_2"] : ""
             };
             retlist.Add(sysjob);
         }
         return(retlist);
     }
     catch (Exception e)
     {
         log.Error(e.Message);
         throw;
     }
 }
Exemplo n.º 21
0
        public void NewMethod(string html, ref bool fristLoads, ref List <Class1> L_Class, ref int r)
        {
            IHtmlDocument document = new JumonyParser().Parse(html);

            //网站第一次加载后读取区域地址
            if (fristLoads)
            {
                // GetSelectQuYu(document);
                //  fristLoads = false;
            }
            IEnumerable <IHtmlElement>  result = document.Find(".house-list-wrap");
            IEnumerable <IHtmlElement>  t      = result.Find("li");
            Dictionary <string, string> dir    = new Dictionary <string, string>();

            foreach (var item in t)
            {
                #region MyRegion
                Class1 _class = new Class1();

                _class.TextName   = GetN_value(item, ".title>a");
                _class.href       = item.Exists(".title > a") ? item.FindFirst(".title>a").Attribute("href").Value() : string.Empty;
                _class.Quyu       = "同城";
                _class.PersonName = GetN_value(item, ".jjrname-outer");
                _class.Laiyuan    = "58";
                IEnumerable <IHtmlElement> adds  = item.Find(".baseinfo");
                IEnumerable <IHtmlElement> addsa = adds.Find("a");
                string adess = "";
                foreach (var addsaa in addsa)
                {
                    adess += addsaa.InnerText();
                }
                _class.Address = adess;
                adds.Find("a");
                string[] ssp = GetN_value(item, ".qj-listright").Split(' ');
                int      j   = ssp.Length == 10 ? 0 : 10 - ssp.Length;
                _class.SumMoney  = GetN_value(item, ".sum");
                _class.PingMoney = GetN_value(item, ".unit");
                _class.Allpm     = GetN_value(item, ".baseinfo");

                _class.datetime    = GetN_value(item, ".time");
                _class.Image_Count = GetInt_value(item, ".picNum");
                _class.Image_str   = _class.Image_Count > 0 ? "有" : string.Empty;
                L_Class.Add(_class);
                r++;
                #endregion
            }
        }
Exemplo n.º 22
0
 public List <sys_job> GetItemList(string url)
 {
     try
     {
         List <sys_job> retlist = new List <sys_job>();
         IHtmlDocument  html    = new JumonyParser().LoadDocument(url, Encoding.UTF8);
         var            list    = html.Find("#r-list-wrapper .row .r-list");
         foreach (var item in list)
         {
             var    joblink  = item.Find(".r-info a").FirstOrDefault().Attribute("href").Value();
             var    jobtitle = item.Find(".r-info a").FirstOrDefault().InnerText();
             int    pos      = joblink.LastIndexOf("id=");
             var    jobid    = joblink.Substring(pos + 3, joblink.Length - (pos + 3));
             string joburl   = domain + joblink;
             //
             bool isok = db.SetAdd("rrkf_jobid", jobid);
             if (!isok)
             {
                 continue;
             }
             var     price1 = item.Find(".r-price").SingleOrDefault().InnerText();
             var     number = item.Find("div:nth-child(3)").SingleOrDefault().InnerText();
             var     status = item.Find("div:last-child span").SingleOrDefault().InnerText();
             sys_job job    = GetJobInfo(joburl);
             if (job.title != null)
             {
                 job.joburl  = joburl;
                 job.jobid   = jobid;
                 job.number  = number;
                 job.status  = status;
                 job.amount  = price1;
                 job.addtime = DateTime.Now;
                 retlist.Add(job);
             }
         }
         return(retlist);
     }
     catch (Exception e)
     {
         log.Error(url + "----" + e.Message);
         return(new List <sys_job>());
     }
 }
Exemplo n.º 23
0
        static List <string> GetTbNameList()
        {
            List <string>  oo  = new List <string>();
            HttpWebRequest req = CreateReq("https://tieba.baidu.com/?page=like");

            req.UserAgent       = "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) CriOS/56.0.2924.75 Mobile/14E5239e Safari/602.1";
            req.CookieContainer = new CookieContainer();
            req.CookieContainer.Add(new Cookie("BDUSS", BDUSS, "/", "baidu.com"));

            string str = new StreamReader((req.GetResponse() as HttpWebResponse).GetResponseStream()).ReadToEnd();


            IHtmlDocument       source = new JumonyParser().Parse(str);
            List <IHtmlElement> lis    = source.Find("li[data-fn]").ToList();

            foreach (var li in lis)
            {
                oo.Add(li.Attribute("data-fn").Value());
            }

            return(oo);
        }
Exemplo n.º 24
0
 public List <sys_film> GetItemList(string url)
 {
     try
     {
         List <sys_film> filmlist = new List <sys_film>();
         IHtmlDocument   html     = new JumonyParser().LoadDocument(url, Encoding.UTF8);
         var             jobs     = html.Find(".co_content8 ul table");
         foreach (var item in jobs)
         {
             string filmurl      = item.Find("a.ulink").FirstOrDefault().Attribute("href").Value();
             string film_fullurl = domainurl + filmurl;
             var    films        = Get_FilmInfo(film_fullurl);
             filmlist.AddRange(films);
         }
         return(filmlist);
     }
     catch (Exception e)
     {
         log.Error(url + "-----" + e.Message);
         this.db.ListLeftPush("error_pageurl", url);
         return(new List <sys_film>());
     }
 }
Exemplo n.º 25
0
        /// <summary>
        /// 解析HTML文本信息
        /// </summary>
        /// <param name="SourceHtml"></param>
        /// <returns></returns>
        private TargetModel HtmlAnalytical(string SourceHtml)
        {
            TargetModel model = new TargetModel(); //模型容器
            //model.HtmlScore = SourceHtml; //存储源码
            var sorceIhtml  = new JumonyParser().Parse(SourceHtml);
            var tdHtmlBases = sorceIhtml.Find(".f-lan tr");
            var list        = new List <string>();
            var elements    = tdHtmlBases as IHtmlElement[] ?? tdHtmlBases.ToArray();

            for (int i = 0; i < elements.Count(); i++)
            {
                Console.WriteLine(elements[i].Find("td").FirstOrDefault().InnerText());
                var text = elements[i].Find("td").FirstOrDefault().InnerText();
                switch (text.Trim())
                {
                case "工商登记注册基本信息":
                    Modular01(sorceIhtml, model, i);
                    break;

                case "资本相关信息":
                    Modular02(sorceIhtml, model, i);
                    break;

                case "组织机构代码信息":
                    Modular03(sorceIhtml, model, i);
                    break;

                case "税务登记信息":
                    Modular04(sorceIhtml, model, i);
                    break;

                default:
                    break;
                }
            }
            return(model);
        }
Exemplo n.º 26
0
 public void Index()
 {
     try
     {
         string now = Convert.ToDateTime(GetNetDateTime()).ToString("HH:mm:ss");
         textBox3.Clear();
         string text1;
         //需要给utf-8的编码,否则html是乱码。
         IHtmlDocument source = new JumonyParser().Parse(Get("http://wechat.laixuanzuo.com/index.php/reserve/index.html?f=wechat", "Hm_lpvt_7838cef374eb966ae9ff502c68d6f098=" + GetTimeStamp(true) + "; Hm_lvt_7838cef374eb966ae9ff502c68d6f098=" + GetTimeStamp(true) + ";FROM_TYPE=weixin;wechatSESS_ID=" + textBox1.Text));
         //IHtmlDocument source = new JumonyParser().LoadDocument("http://127.0.0.1:5500/a.html", System.Text.Encoding.GetEncoding("utf-8"));
         var A = source.Find(".list-group-item-heading");
         foreach (var i in A)
         {
             text1 = i.InnerText().ToString();
             text1 = text1.Replace("\n", "   ");
             textBox3.AppendText(text1 + System.Environment.NewLine);
             //MessageBox.Show(i.InnerText().ToString());
         }
         textBox3.AppendText("刷新时间" + now + System.Environment.NewLine);
     }
     catch
     {
     }
 }
Exemplo n.º 27
0
        /// <summary>
        /// 获取页码连接保存到redis
        /// </summary>
        /// <param name="url"></param>
        public void PageUrlList(string url)
        {
            IHtmlDocument html             = new JumonyParser().LoadDocument(url);
            var           pagelist         = html.Find(".page-div nav ul li a");
            var           size             = pagelist.Count();
            IHtmlElement  lastpage_el      = pagelist.ToList()[size - 2];
            string        lastpageurl      = lastpage_el.Attribute("href").Value();
            string        lastpage_fullurl = domain + lastpageurl;
            string        end_page_url     = domain + pagelist.ToList()[size - 1].Attribute("href").Value();

            for (int i = firstpage; i < size - 1; i++)
            {
                var    item         = pagelist.ToList()[i];
                string page_fullurl = domain + item.Attribute("href").Value();
                this.db.SortedSetAdd("pageurl", page_fullurl, (double)index++);
            }
            if (lasturl != lastpage_fullurl)
            {
                lasturl   = lastpage_fullurl;
                firstpage = 1;
                PageUrlList(lastpage_fullurl);
            }
            this.db.SortedSetAdd("pageurl", end_page_url, (double)index++);
        }
Exemplo n.º 28
0
        private void web2_Navigated(object sender, NavigationEventArgs e)
        {
            mshtml.HTMLDocument mhtml = (mshtml.HTMLDocument)web1.Document;
            string        html        = mhtml.body.innerHTML;
            IHtmlDocument document_1  = new JumonyParser().Parse(html);

            IEnumerable <IHtmlElement> rl = document_1.Find("div");
            IEnumerable <IHtmlElement> ss = rl.Find("p").Where(x => x.Identity() == "smallPicDescShow");

            foreach (var rs in ss)
            {
                L_Class[i].bak = rs.InnerText();
            }
            IEnumerable <IHtmlElement> ul_il = document_1.Find("ul").Where(p => p.Identity() == "leftImg");
            IEnumerable <IHtmlElement> li    = ul_il.Find("li");

            foreach (var img in li)
            {
                //liImg.Add();
                L_Class[i].Image_str = L_Class[i].Image_str.Length > 0 ? L_Class[i].Image_str + "---" + img.FindFirst("img").Attribute("src").Value() : img.FindFirst("img").Attribute("src").Value();
            }
            IEnumerable <IHtmlElement> Phon   = document_1.Find("div").Where(d => d.Identity() == "houseChatEntry");
            IEnumerable <IHtmlElement> Phon_p = Phon.Find(".phone-num");
            string phone = "";

            foreach (var p in Phon_p)
            {
                phone = p.InnerText();
            }
            //获取概况信息
            IEnumerable <IHtmlElement> GKelement = rl.Where(x => x.Identity() == "generalSituation").Find(".c_000");
            int ElementI = 0;

            foreach (var str in GKelement)
            {
                #region MyRegion
                if (GKelement.Count() > 6)
                {
                    switch (ElementI)
                    {
                    case 0:
                        L_Class[i].FwSumMoney = str.InnerText();
                        break;

                    case 1:
                        L_Class[i].FwHuXing = str.InnerText();
                        break;

                    case 2:
                        L_Class[i].FwMianji = str.InnerText();
                        break;

                    case 3:
                        L_Class[i].FwChaoxiang = str.InnerText();
                        break;

                    case 4:
                        L_Class[i].Loucheng = str.InnerText();
                        break;

                    case 5:
                        L_Class[i].FwZhuangxiu = str.InnerText();
                        break;

                    case 6:
                        L_Class[i].FwNianxian = str.InnerText();
                        break;

                    default:
                        ;
                        break;
                    }
                }
                else if (GKelement.Count() == 4)
                {
                    switch (ElementI)
                    {
                    case 0:
                        L_Class[i].FwSumMoney = str.InnerText();
                        break;

                    case 1:
                        L_Class[i].FwHuXing = str.InnerText();
                        break;

                    case 2:
                        L_Class[i].FwMianji = str.InnerText();
                        break;

                    case 3:
                        if (str.InnerText().IndexOf("层") > 0)
                        {
                            L_Class[i].FwLoucheng = str.InnerText();
                        }
                        else
                        {
                            L_Class[i].FwZhuangxiu = str.InnerText();
                        }

                        break;

                    default:
                        ;
                        break;
                    }
                }
                else
                {
                    switch (ElementI)
                    {
                    case 0:
                        L_Class[i].FwSumMoney = str.InnerText();
                        break;

                    case 1:
                        L_Class[i].FwHuXing = str.InnerText();
                        break;

                    case 2:
                        L_Class[i].FwMianji = str.InnerText();
                        break;

                    case 3:
                        L_Class[i].FwLoucheng = str.InnerText();
                        break;

                    case 4:
                        L_Class[i].FwZhuangxiu = str.InnerText();
                        break;

                    case 5:
                        L_Class[i].FwNianxian = str.InnerText();
                        break;

                    default:
                        ;
                        break;
                    }
                }

                #endregion
                ElementI++;
            }
            L_Class[i].photo = phone;

            loading = false; // 在加载完成后,将该变量置为false,下一次循环随即开始执行.
        }
Exemplo n.º 29
0
        void webbrowserUpload(object sender, NavigationEventArgs e)
        {
            mshtml.HTMLDocument mhtml = (mshtml.HTMLDocument)web1.Document;
            string        html        = mhtml.body.innerHTML;
            IHtmlDocument document    = new JumonyParser().Parse(html);

            IEnumerable <IHtmlElement>  result = document.Find(".house-list-wrap");
            IEnumerable <IHtmlElement>  t      = result.Find("li");
            Dictionary <string, string> dir    = new Dictionary <string, string>();
            bool ToNotDown = true;
            int  ret       = 0;

            foreach (var item in t)
            {
                #region MyRegion
                newWORD.Class1 _class = new newWORD.Class1();

                _class.TextName   = MainWindow.GetN_value(item, ".title>a");
                _class.href       = item.Exists(".title > a") ? item.FindFirst(".title>a").Attribute("href").Value() : string.Empty;
                _class.Quyu       = "同城";
                _class.PersonName = MainWindow.GetN_value(item, ".jjrname-outer");
                _class.Laiyuan    = "58";
                IEnumerable <IHtmlElement> adds  = item.Find(".baseinfo");
                IEnumerable <IHtmlElement> addsa = adds.Find("a");
                string adess = "";
                foreach (var addsaa in addsa)
                {
                    adess += addsaa.InnerText();
                }
                _class.Address = adess;
                string   Timestr = MainWindow.GetN_value(item, ".time");
                DateTime Dte     = DateTime.Now;
                _class.FbTime = Convert.ToDateTime(Dte.Year.ToString() + "-" + Dte.Month.ToString() + "-" + Dte.Day.ToString());
                if (Timestr != "今天")
                {
                    if (Timestr.IndexOf("分钟") > -1)
                    {
                        _class.FbTime = Dte.AddMinutes(-(Convert.ToInt32(Timestr.Replace("分钟", string.Empty))));
                    }
                    else if (Timestr.IndexOf("小时") > -1)
                    {
                        _class.FbTime = Dte.AddHours(-(Convert.ToInt32(Timestr.Replace("小时", string.Empty))));
                    }
                    else
                    {
                        string[] strTime = Timestr.Split('-');
                        _class.FbTime = Convert.ToDateTime(_class.FbTime.Year + "-" + strTime[0] + "-" + strTime[1]);
                    }

                    if (_class.FbTime <= MaxTime)
                    {
                        if (ret >= 2)
                        {
                            ToNotDown = false;
                            break;
                        }
                        ret++;
                    }
                }
                else
                {
                    var datalist = iqdata.ToList();
                    if (iqdata.FirstOrDefault(x => x.HLName == _class.TextName && x.Address == _class.Address) != null)
                    {
                        continue;
                    }
                }


                _class.datetime    = "";
                _class.Image_Count = MainWindow.GetInt_value(item, ".picNum");
                _class.Image_str   = _class.Image_Count > 0 ? "有" : string.Empty;
                if (ret == 0)
                {
                    L_Class.Add(_class);
                }
                r++;
                #endregion
            }
            int nextI = 0;
            #region 跳转倒下一页
            if (ToNotDown)
            {
                //mshtml.IHTMLDocument2 doc2 = (mshtml.IHTMLDocument2)web1.Document;
                //foreach (IHTMLElement ele in doc2.all)
                //{
                //    if (ele.tagName.ToLower().Equals("a"))
                //    {
                //        IHTMLElement aa = (IHTMLElement)ele;
                //        if (ZantingI >= 1)
                //        {
                //            break;
                //        }
                //        if (aa.innerText == "下一页")
                //        {
                //            ZantingI++;
                //            nextI++;
                //            aa.click();
                //            retStr = "读取主目录" + ZantingI;
                //        }

                //    }
                //}
            }
            #endregion
            loadingS = nextI > 0 ? true : false;
        }
Exemplo n.º 30
0
        public List <sys_job> YjsJobs(string url)
        {
            try
            {
                string         regtxt  = "<.*?>";
                IHtmlDocument  html    = new JumonyParser().LoadDocument(url);
                var            jobs    = html.Find("#db_adapt_id .weui_panel");
                List <sys_job> listjob = new List <sys_job>();
                foreach (var item in jobs)
                {
                    string joburl = item.Find("a").FirstOrDefault().Attribute("href").Value();
                    int    pos1   = joburl.LastIndexOf("/");
                    string jobid  = joburl.Substring(pos1 + 1, joburl.Length - (pos1 + 1));
                    bool   isok   = this.db.SetAdd("yjsjobid", jobid);
                    if (!isok)
                    {
                        continue;
                    }
                    string jobtitle = item.Find("a .topic_title") != null?item.Find("a .topic_title").FirstOrDefault().InnerText() : "";

                    string jobdesc  = string.Empty;
                    string jobgs    = string.Empty;
                    string jobprice = string.Empty;
                    var    subitems = item.Find(".job_list_item_div .media_desc_adapt");
                    string author   = item.Find("h4.weui_media_title ") != null?item.Find("h4.weui_media_title ").FirstOrDefault().InnerText() : "";

                    bool isover = item.ToString().IndexOf("zhushi_span") > 0 ? false : true;
                    if (isover)
                    {
                        continue;
                    }
                    string numberhtml = item.Find("span.zhushi_span") != null?item.Find("span.zhushi_span").FirstOrDefault().InnerHtml() : "";

                    string number = Regex.Replace(numberhtml, regtxt, "");
                    foreach (var subitem in subitems)
                    {
                        string subitemhtml = subitem.InnerHtml();
                        if (subitemhtml.IndexOf("glyphicon-th-large") >= 0)
                        {
                            jobdesc = Regex.Replace(subitemhtml, regtxt, "").Replace("描述:", "");
                        }
                        if (subitemhtml.IndexOf("glyphicon-hourglass") >= 0)
                        {
                            jobgs = Regex.Replace(subitemhtml, regtxt, "").Replace("工时:", "");
                        }
                        if (subitemhtml.IndexOf("glyphicon-yen") >= 0)
                        {
                            jobprice = Regex.Replace(subitemhtml, regtxt, "").Replace("总价:", "").Replace("元", "");
                        }
                    }
                    sys_job jobentry = new sys_job
                    {
                        jobid   = jobid,
                        title   = jobtitle,
                        desc    = jobdesc,
                        number  = number,
                        joburl  = joburl,
                        addtime = DateTime.Now,
                        amount  = jobprice,
                        author  = author,
                        gq      = jobgs
                    };
                    listjob.Add(jobentry);
                }
                return(listjob);
            }
            catch (Exception e)
            {
                log.Error(e.Message);
                throw;
            }
        }