static void GetDataByJumony() { Console.WriteLine("开始跑数据"); var db = DB.GetInstance(); for (var i = 1; i < 51; i++) { try { var path = "http://cn.coovee.com/company/s1.html?p=" + i.ToString(); IHtmlDocument source = new JumonyParser().LoadDocument(path, System.Text.Encoding.GetEncoding("utf-8")); var company = source.Find(".company-l-item"); Console.WriteLine(company.Count()); foreach (var item in company) { try { var a = item.Find(".dt h4 a").FirstOrDefault(); Console.WriteLine("公司名=" + a.InnerText()); var span = item.Find(".dt p span").FirstOrDefault(); Console.WriteLine("联系人=" + span.InnerText()); var zhuying = item.Find(".dd p").FirstOrDefault(); Console.WriteLine("主营=" + zhuying.InnerText().Replace("主营:", "")); var url = item.Find(".dd ul li").Last().Find("a").FirstOrDefault().Attribute("href").Value(); Console.WriteLine(url); System.Threading.Thread.Sleep(1000); source = new JumonyParser().LoadDocument(url, System.Text.Encoding.GetEncoding("utf-8")); var tel = source.Find(".company-contact-info p").ToList <IHtmlElement>()[1].InnerText(); Console.WriteLine("电话=" + tel.Replace("联系电话:", "")); var adress = source.Find(".company-contact-info p").ToList <IHtmlElement>()[3].InnerText(); Console.WriteLine("地址=" + adress.Replace("公司地址:", "").Replace(" ", "")); COMPANY comp = new COMPANY() { ID = System.Guid.NewGuid().ToString(), COMPANY_NAME = a.InnerText(), ADDRESS = adress.Replace("公司地址:", "").Replace(" ", ""), LINK_MAN = span.InnerText(), SALE_PRODUCT = zhuying.InnerText().Replace("主营:", ""), TEL = tel.Replace("联系电话:", "") }; db.Insertable <COMPANY>(comp).ExecuteCommand(); System.Threading.Thread.Sleep(2000); } catch { } } System.Threading.Thread.Sleep(2000); } catch { } } }
/// <summary> /// 获取工作描述 /// </summary> /// <param name="url"></param> /// <returns></returns> private Dictionary <string, string> getjobdesc(string url) { try { IHtmlDocument html = new JumonyParser().LoadDocument(url); Regex jgr = new Regex("(?<jg>\\d+)"); var desc = html.Find(".main-detail .desc").FirstOrDefault().InnerHtml(); var zz = html.Find(".name a").FirstOrDefault().InnerText(); var number = html.Find(".main-top .number").FirstOrDefault().InnerText(); var price = html.Find(".main-top .detail-row .budgets .budget span").FirstOrDefault().InnerText(); var prices = jgr.Matches(price); Dictionary <string, string> dic = new Dictionary <string, string>(); dic.Add("desc", desc); dic.Add("zz", zz); dic.Add("number", number); if (prices.Count > 0) { for (int i = 0; i < prices.Count; i++) { dic.Add("price_" + (i + 1), prices[i].Groups["jg"].Value); } } else { dic.Add("price_min", ""); dic.Add("price_max", ""); } return(dic); } catch (Exception e) { log.Error(e.Message); throw; } }
void Webbrowser2_LoadCompleted(object sender, NavigationEventArgs e) { mshtml.HTMLDocument mhtml = (mshtml.HTMLDocument)Webbrowser2.Document; string html = mhtml.body.innerHTML; IHtmlDocument document = new JumonyParser().Parse(html); IEnumerable <IHtmlElement> result = document.Find("ul").Where(x => x.Identity() == "houselist-mod-new"); IEnumerable <IHtmlElement> result_li = result.Find("li"); IEnumerable <IHtmlElement> result_li1 = document.Find("li>.list-item"); }
public static void getByPage(string cookie, string viewstate, int page) { HttpClient httpClient = new HttpClient(); HttpContent postContent = new FormUrlEncodedContent(new Dictionary <string, string>() { { "__VIEWSTATE", viewstate }, { "__VIEWSTATEGENERATOR", "7BE8FDE8" }, { "__EVENTTARGET", "AspNetPager1" }, { "__EVENTARGUMENT", page.ToString() }, { "_keywords", "" }, { "AspNetPager1_input", "1" }, }); httpClient .PostAsync("http://hd.huachuan.gov.cn/aspx/gkml_list.aspx", postContent) .ContinueWith((postTask) => { HttpResponseMessage response = postTask.Result; response.Content.ReadAsStringAsync().ContinueWith((readTask) => { //Debug.WriteLine(readTask.Result); IHtmlDocument source = new JumonyParser().Parse(readTask.Result); var itemCount = source.Find(".listbox").Count(); for (int i = 1; i <= itemCount; i++) { try { string id = source.FindSingle("#four" + i).Attribute("href").Value().Split('=')[1]; string author = source.Find("#con_four_" + i).Find(".li1").Last().InnerText().Replace("发布机构:", ""); string time = source.Find("#con_four_" + i).Find(".li2").Last().InnerText().Replace("发文日期:", ""); string title = source.Find("#con_four_" + i).Find(".infoname").First().InnerText().Replace("名称:", ""); //判断第一条是否存在,如果存在,则说明新闻一直未更新,不需要继续下去了 //可以使用下边逻辑,continue换成return //判断是否存在 string sql = string.Format("select count(*) from t_spider_zwgk t where t.id={0}", id); int count = Convert.ToInt32(DbHelperMySQL.GetSingle(sql)); if (count > 0) { //continue; return; } //不存在,插入数据库 sql = string.Format("insert into t_spider_zwgk(id,title,time,author) values({0},'{1}','{2}','{3}')", id, title.Replace('\'', '"'), time, author); count = DbHelperMySQL.ExecuteSql(sql); if (count == 1) { getContent(id); } } catch (Exception e) { Debug.WriteLine("----->【" + page + "." + i + "】新闻创建异常<-----:" + e); } } }); }); }
public List <sys_film> Get_FilmInfo(string url) { try { List <sys_film> filmlist = new List <sys_film>(); IHtmlDocument source = new JumonyParser().LoadDocument(url); int pos = source.InnerHtml().IndexOf("Zoom"); if (pos < 0) { return(filmlist); } //var list = source.Find("#Zoom a").Where(t => t.Attribute("href").Value().Contains("magnet:") || t.Attribute("href").Value().Contains("ftp:")); var list = source.Find("#Zoom a"); var title_all = source.Find(".title_all h1 font").FirstOrDefault().InnerText(); var desc = source.Find("#Zoom span").FirstOrDefault().InnerHtml(); //评分提取 Regex regpf = new Regex("(?<imdb>IMDb评分.*?<br />)"); Regex regdb = new Regex("(?<douban>豆瓣评分.*?<br />)"); var pfms = regpf.Match(desc); var pfdb = regdb.Match(desc); string imdb = pfms.Groups["imdb"].Value.Replace("IMDb评分", "").Replace("<br />", "").Trim(); string douban = pfdb.Groups["douban"].Value.Replace("豆瓣评分", "").Replace("<br />", "").Trim(); foreach (var item in list) { string filmlink = item.Attribute("href").Value(); if (filmlink == null) { continue; } if (filmlink.Contains("magnet:") || filmlink.Contains("ftp:")) { bool isok = db.SetAdd("filmlink", filmlink); if (isok) { filmlist.Add(new sys_film() { link = filmlink, title = title_all, txt = desc, fromurl = url, imdb = imdb, douban = douban }); } } } return(filmlist); } catch (Exception e) { log.Error(url + "----" + e.Message); this.db.ListLeftPush("error_infourl", url); return(new List <sys_film>()); } }
private void Loadeds1() { WebClient client = new WebClient(); client.Encoding = Encoding.UTF8; string ThisHtml = client.DownloadString(Href); IHtmlDocument document_1 = new JumonyParser().Parse(ThisHtml); IEnumerable <IHtmlElement> rl = document_1.Find("div"); IEnumerable <IHtmlElement> personname = rl.Find(".broker"); IEnumerable <IHtmlElement> photo = rl.Find(".tel"); IEnumerable <IHtmlElement> text_aaa = rl.Find(".house-mian-info"); string this_ = ""; foreach (var aaa in text_aaa) { string ss = aaa.InnerText(); string[] sss = ss.Split(' '); foreach (var lss in sss) { this_ = lss == "" ? this_ : this_ + "|" + lss; } } foreach (var p in photo) { PersonnamePhoto.Text = p.Attribute("href").Value(); } foreach (var p in personname) { Personnametext.Text = p.FindFirst("span").InnerText(); } IEnumerable <IHtmlElement> ul_il = document_1.Find(".show-pic"); IEnumerable <IHtmlElement> li = ul_il.Find("li"); foreach (var img in li) { liImg.Add(img.FindFirst("img").Attribute("data-src").Value()); } Bak.Text = this_; class1.photo = PersonnamePhoto.Text; class1.PersonName = Personnametext.Text; text_.Text = class1.TextName; this.Title = class1.TextName; MaxPage = liImg.Count; GoPage(0); }
/// <summary> /// 获取每一次请求的Ids /// </summary> /// <param name="address"></param> /// <returns></returns> public Hashtable GetList(string address = "") { var listAddress = new List <string>(); // 获取返回信息 var result = JsonConvert.DeserializeObject <DuoWan.DwResult>(GetUrlString(address)); var document = new JumonyParser().Parse(result.html); var cells = document.Find("li>a"); // 开始遍历 foreach (var li in cells) { var detailUrl = li.Attribute("href").Value(); // 获取id listAddress.Add((from each in detailUrl where each.ToString().ToInt(-1) > 0 select each).Join("")); } var o = new Hashtable() { { "more", result.more }, { "ids", listAddress } }; return(o); }
public void GetPageUrlToRedis(string url) { try { IHtmlDocument html = new JumonyParser().LoadDocument(url); var pagelist = html.Find(".co_content8 .x a"); var last_index = pagelist.Count() - 3; var end_index = pagelist.Count() - 1; var last_url = pagelist.ToList()[last_index].Attribute("href").Value(); var end_txt = pagelist.ToList()[end_index].InnerText(); this.isendpage = end_txt.IndexOf("末页") >= 0 ? false : true; looplast_index = isendpage ? pagelist.Count() : pagelist.Count() - 1; var last_full_url = page_baseurl + last_url; int i = 0; i = isfirstpage ? 0 : 2; for (; i < looplast_index; i++) { IHtmlElement item = pagelist.ToList()[i]; string pageurl = item.Attribute("href").Value(); string pagefullurl = page_baseurl + pageurl; db.SortedSetAdd("filmpageurl", pagefullurl, (double)index++); if (i == last_index && !isendpage) { isfirstpage = false; GetPageUrl(pagefullurl); } } } catch (Exception e) { log.Error(e.Message); throw; } }
private void BaiXingNewHTMLhreper() { string url = "http://liaoyang.baixing.com/qiufang/"; string ThisHtml = BXGetHTMLstr(url); using (var ctx = new oaEntities()) { DateTime DBtime = Convert.ToDateTime(DateTime.Now.Year.ToString() + "-" + DateTime.Now.Month.ToString() + "-" + DateTime.Now.Day.ToString()); IHtmlDocument document = new JumonyParser().Parse(ThisHtml); IEnumerable<IHtmlElement> result = document.Find(".media-body-title"); List<Class1> Ncss = new List<Class1>(); foreach (var item in result) { Class1 Class1 = new Class1(); item.FindFirst("a"); Class1.href = item.Exists("a") ? item.FindFirst("a").Attribute("href").Value() : string.Empty; Class1.TextName = MainWindow.GetN_value(item, "a"); //开始读取子连接 #region 读取子连接 string ThisZ = BXGetHTMLstr(Class1.href); IHtmlDocument document_1 = new JumonyParser().Parse(ThisZ); IEnumerable<IHtmlElement> restime = document_1.Find("div>.viewad-topMeta"); foreach (var tm in restime) { } #endregion Ncss.Add(Class1); } } //media - body - title }
public static void getByPage(int page) { string url = ""; if (page == 0) { url = "http://www.huachuan.gov.cn/zwgk/xxgksyzl/fgfgg/index.html"; } else { string p = page.ToString(); if (page < 10) { p = "0" + p; } url = "http://www.huachuan.gov.cn/system/more/zwgk/xxgksyzl/fgfgg/index/page_" + p + ".html"; } string pageStr = Util.getHtmlStr(url, Encoding.Default); IHtmlDocument source = new JumonyParser().Parse(pageStr); var items = source.Find(".listmain ul li"); foreach (var item in items) { string id = "0"; string path = item.FindFirst("div").FindSingle("a").Attribute("href").Value(); string title = item.FindFirst("div").FindSingle("a").InnerText(); string time = item.FindFirst("div").NextElement().InnerText(); if (path.StartsWith("http")) { id = new Random().Next(1000000, 9999999).ToString(); string sql = string.Format("select count(*) from t_spider_bslc t where t.title='{0}'", title); int count = Convert.ToInt32(DbHelperMySQL.GetSingle(sql)); if (count == 0) { sql = string.Format("insert into t_spider_bslc(id,title,time,path) values({0},'{1}','{2}','{3}')", id, title, time, path); count = DbHelperMySQL.ExecuteSql(sql); if (count == 1) { getContent(path); } } } else { id = path.Split('/')[3].Split('.')[0]; string sql = string.Format("select count(*) from t_spider_bslc t where t.id={0}", id); int count = Convert.ToInt32(DbHelperMySQL.GetSingle(sql)); if (count == 0) { sql = string.Format("insert into t_spider_bslc(id,title,time,path) values({0},'{1}','{2}','{3}')", id, title, time, path); count = DbHelperMySQL.ExecuteSql(sql); if (count == 1) { getContent(path); } } } } }
public void VisibleTest() { var document = new JumonyParser().LoadDocument(Path.Combine(Environment.CurrentDirectory, "StyleTest1.html")); document.DataBind(null); Assert.AreEqual(document.Find(".invisible").Count(), 0); }
public List <string> GetPageUrl(string url) { try { url = rooturl; List <string> list = new List <string>(); IHtmlDocument html = new JumonyParser().LoadDocument(url, Encoding.UTF8); string entityCount = html.Find("#entityCount").SingleOrDefault().Attribute("value").Value(); string maxEntityPerPage = html.Find("#maxEntityPerPage").SingleOrDefault().Attribute("value").Value(); string maxPagePerRow = html.Find("#maxPagePerRow").SingleOrDefault().Attribute("value").Value(); string pageCount = html.Find("#pageCount").SingleOrDefault().Attribute("value").Value(); string currentPage = html.Find("#currentPage").SingleOrDefault().Attribute("value").Value(); string currentPageRow = html.Find("#currentPageRow").SingleOrDefault().Attribute("value").Value(); string pageRowCount = html.Find("#pageRowCount").SingleOrDefault().Attribute("value").Value(); Int32 count = Convert.ToInt32(pageCount); Int32 current = Convert.ToInt32(currentPage); string query = string.Empty; for (Int32 i = current; i <= count; i++) { query = "entityCount=" + entityCount + "&maxEntityPerPage=" + maxEntityPerPage + "&maxPagePerRow=" + maxPagePerRow + "&pageCount=" + pageCount + "¤tPage=" + i + "¤tPageRow=" + currentPageRow + "&pageRowCount=" + pageRowCount + "&cBudget=0-1000000000&budgetTo=&statusBy=&categoryBy=&typeBy=&typeName=&orderByClause=a.c_postDate+desc"; string pageurl = url + "?" + query; list.Add(pageurl); } return(list); } catch (Exception e) { log.Error(e.Message); return(new List <string>()); } }
void webbrowser1_LoadCompleted(object sender, NavigationEventArgs e) { mshtml.HTMLDocument mhtml = (mshtml.HTMLDocument)Webbrowser1.Document; Webbrowser1 = Webbrowser2; string html = mhtml.body.innerHTML; IHtmlDocument document_1 = new JumonyParser().Parse(html); IEnumerable <IHtmlElement> rl = document_1.Find("div"); string sss = ""; foreach (var rs in rl) { if (rs.Exists("p")) { if (rs.FindFirst("p").Identity() == "smallPicDescShow") { sss = rs.FindFirst("p").InnerText(); } } } IEnumerable <IHtmlElement> ul_il = document_1.Find("ul").Where(p => p.Identity() == "leftImg"); IEnumerable <IHtmlElement> li = ul_il.Find("li"); foreach (var img in li) { liImg.Add(img.FindFirst("img").Attribute("src").Value()); } IEnumerable <IHtmlElement> Phon = document_1.Find("div").Where(d => d.Identity() == "houseChatEntry"); IEnumerable <IHtmlElement> Phon_p = Phon.Find(".phone-num"); string phone = ""; foreach (var p in Phon_p) { phone = p.InnerText(); } PersonnamePhoto.Text = phone; Bak.Text = sss; Personnametext.Text = class1.PersonName; text_.Text = class1.TextName; this.Title = class1.TextName; class1.photo = phone; class1.bak = sss; MaxPage = liImg.Count; GoPage(0); }
public void SpecificationTest8() { var document = new JumonyParser().LoadDocument(Path.Combine(Environment.CurrentDirectory, "SpecificationTest8.html")); Assert.AreEqual(document.FindSingle("div").Attributes().Count(), 1, "错误的解析了非法的属性"); var links = document.Find("div a").ToArray(); Assert.AreEqual(links.Length, 2, "错误的解析了不属于属性值的引用内容"); Assert.AreEqual(links[0].InnerText(), "Test1", "错误的解析了不属于属性值的引用内容"); Assert.AreEqual(links[1].InnerText(), " \"Test2", "错误的解析了不属于属性值的引用内容"); }
public sys_job GetJobInfo(string url) { try { IHtmlDocument html = new JumonyParser().LoadDocument(url, Encoding.UTF8); int pos = html.InnerHtml().IndexOf("product-info-summary"); if (pos < 0) { return(new sys_job()); } string jobtitle = html.Find(".product-info-summary .row h4").FirstOrDefault().InnerText(); string author = html.Find(".product-info-summary .row small").FirstOrDefault().InnerText().Replace("发布者:", ""); string price = html.Find(".product-info-summary .row .p-desc").FirstOrDefault().InnerText().Replace(" 预算: ", ""); string rq = html.Find("#p-other ul li:first-child").FirstOrDefault().InnerText(); string xqh = html.Find("#p-other ul li:nth-child(3)").SingleOrDefault().InnerText(); string describe = html.Find("#wrap").SingleOrDefault().InnerHtml(); string t = @"<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>"; describe = Regex.Replace(describe, t, ""); describe = Regex.Replace(describe, "<.*?>", "").Replace("\t", "").Replace("\r", "").Replace("\n", ""); return(new sys_job { title = jobtitle, author = author, desc = describe, rq = rq, tag = xqh, price_min = price }); } catch (Exception e) { log.Error(url + "----" + e.Message); return(new sys_job()); } }
// //<tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="01CreateScreen/">01CreateScreen/</a></td><td align="right">2016-01-11 10:23 </td><td align="right"> - </td><td> </td></tr> //<tr><td valign="top"><img src="/icons/text.gif" alt="[TXT]"></td><td><a href="Test_money.py">Test_money.py</a></td><td align="right">2016-01-08 15:53 </td><td align="right">1.1K</td><td> </td></tr> // public static List <Resource> GetDirectoryContents(string url) { List <Resource> Rlist = new List <Resource>(); //url = "http://192.168.1.42/testpage/Script"; WebClient MyWebClient = new WebClient(); MyWebClient.Credentials = CredentialCache.DefaultCredentials; Byte[] pageData = MyWebClient.DownloadData(url); string pageHtml = Encoding.UTF8.GetString(pageData); //如果获取网站页面采用的是UTF-8,则使用这句 var htmlSource = new JumonyParser().Parse(pageHtml); var list = htmlSource.Find("img[src=/icons/folder.gif]"); foreach (var one in list) { /*if(one.Attribute("href").Value() == one.InnerText()) * { * string s = one.InnerText(); * }*/ Resource a = new Resource(); a.Name = one.Parent().Parent().Find("a").ElementAt(0).InnerText(); a.Url = url + a.Name; a.IsFolder = true; Rlist.Add(a); } list = htmlSource.Find("img[src=/icons/text.gif]"); foreach (var one in list) { Resource a = new Resource(); a.Name = one.Parent().Parent().Find("a").ElementAt(0).InnerText(); a.Url = url + a.Name; a.IsFolder = false; string t1 = one.Parent().Parent().Find("td[align=right]").ElementAt(0).InnerText(); a.LastModified = DateTime.Parse(t1); Rlist.Add(a); } return(Rlist); }
public string RequestWeatherWebAnalysisData(string cityCode) { //获取天气网址的html页面 IHtmlDocument source = null; try { source = new JumonyParser().LoadDocument($"http://www.weather.com.cn/weather1d/{cityCode}.shtml", Encoding.GetEncoding("utf-8")); } catch (Exception ex) { MessageBox.Show($"获取天气信息失败!,请稍后右键任务栏小图标点击刷新", "提示"); return(null); } var input = source.Find("input[id=hidden_title]").First(); //获取input标签id为hidden_title var divs = source.Find("div[class=xyn-weather-box]"); //查找城市所在的div var span = divs.Find("h2").Find("span").First(); //城市名 string cityName = span.InnerText().Trim(); string weatherInfo = input.Attribute("value").Value(); //分割天气信息 string[] weatherStrs = weatherInfo.Split(' '); List <string> wList = new List <string>(); foreach (var itemStr in weatherStrs) { if (!string.IsNullOrWhiteSpace(itemStr)) { wList.Add(itemStr); } } return($"{cityName} - {wList[2]} - {wList[3]}"); }
public void YjsPageUrlList(string url) { try { IHtmlDocument html = new JumonyParser().LoadDocument(url); var pagelist = html.Find("nav.page_nav_div ul.pagination_webpage li a"); var pagenext = html.Find("nav.page_nav_div ul.pagination_webpage li.disabled"); foreach (var item in pagenext) { string pagetxt = Regex.Replace(item.InnerHtml(), "<.*?>", ""); if (pagetxt.IndexOf("下一页") >= 0) { yjs_endflag = true; } } var size = pagelist.Count(); int i = 0; i = yjsfirstpage ? 0 : 2; for (; i < size - 1; i++) { var item = pagelist.ToList()[i]; string pageurl = item.Attribute("href").Value(); this.db.SortedSetAdd("yjspageurl", pageurl, (double)index++); if (i == size - 2 && !yjs_endflag) { yjsfirstpage = false; YjsPageUrlList(pageurl); } } } catch (Exception e) { log.Error(e.Message); throw; } }
public static string CheckAPK(ref string url) { string downloadurl = "http://192.168.1.40/iwu_android/"; WebClient MyWebClient = new WebClient(); MyWebClient.Credentials = CredentialCache.DefaultCredentials; Byte[] pageData = MyWebClient.DownloadData(downloadurl); string pageHtml = Encoding.UTF8.GetString(pageData); //如果获取网站页面采用的是UTF-8,则使用这句 var htmlSource = new JumonyParser().Parse(pageHtml); var one = htmlSource.Find("img[src=/icons/folder.gif]").Last(); string releaseUrl = one.Parent().Parent().Find("a[href]").First().InnerText(); string time = one.Parent().Parent().Find("td[align=right]").ElementAt(0).InnerText(); url = downloadurl + releaseUrl + "apk/app-release.apk"; return("最新版本号:" + releaseUrl + "\n 版本时间:" + time + "\n是否确定下载?"); }
/// <summary> /// 获取每页工作内容,保存工作id到redis /// </summary> /// <param name="url"></param> /// <returns></returns> public List <sys_job> Get_Jobs(string url) { try { IHtmlDocument html = new JumonyParser().LoadDocument(url); var jobs = html.Find(".search-result a.job-item"); List <sys_job> retlist = new List <sys_job>(); foreach (var item in jobs) { string joburl = item.Attribute("href").Value(); string full_joburl = domain + item.Attribute("href").Value(); int pos1 = joburl.LastIndexOf("/"); int pos2 = joburl.LastIndexOf("."); string jobidstr = joburl.Substring(pos1 + 1, pos2 - (pos1 + 1)); bool isok = db.SetAdd("jobid", jobidstr); if (!isok) { continue; } var dic = getjobdesc(full_joburl); sys_job sysjob = new sys_job { addtime = DateTime.Now, amount = item.Find(".money").FirstOrDefault().InnerText().Replace("预算:¥", ""), author = dic["zz"], desc = Regex.Replace(dic["desc"], "<.*?>", ""), gq = item.Find(".period").FirstOrDefault().InnerText().Replace("工期:", ""), jobid = jobidstr, joburl = full_joburl, number = dic["number"], rq = item.Find(".publish_at").FirstOrDefault().InnerText().Replace("发布时间:", ""), status = item.Find(".work_status").FirstOrDefault().InnerText(), tag = item.Find(".pattern").FirstOrDefault().InnerText(), title = item.Find(".job-title div").FirstOrDefault().InnerText(), price_min = dic.ContainsKey("price_1") ? dic["price_1"] : "", price_max = dic.ContainsKey("price_2") ? dic["price_2"] : "" }; retlist.Add(sysjob); } return(retlist); } catch (Exception e) { log.Error(e.Message); throw; } }
public void NewMethod(string html, ref bool fristLoads, ref List <Class1> L_Class, ref int r) { IHtmlDocument document = new JumonyParser().Parse(html); //网站第一次加载后读取区域地址 if (fristLoads) { // GetSelectQuYu(document); // fristLoads = false; } IEnumerable <IHtmlElement> result = document.Find(".house-list-wrap"); IEnumerable <IHtmlElement> t = result.Find("li"); Dictionary <string, string> dir = new Dictionary <string, string>(); foreach (var item in t) { #region MyRegion Class1 _class = new Class1(); _class.TextName = GetN_value(item, ".title>a"); _class.href = item.Exists(".title > a") ? item.FindFirst(".title>a").Attribute("href").Value() : string.Empty; _class.Quyu = "同城"; _class.PersonName = GetN_value(item, ".jjrname-outer"); _class.Laiyuan = "58"; IEnumerable <IHtmlElement> adds = item.Find(".baseinfo"); IEnumerable <IHtmlElement> addsa = adds.Find("a"); string adess = ""; foreach (var addsaa in addsa) { adess += addsaa.InnerText(); } _class.Address = adess; adds.Find("a"); string[] ssp = GetN_value(item, ".qj-listright").Split(' '); int j = ssp.Length == 10 ? 0 : 10 - ssp.Length; _class.SumMoney = GetN_value(item, ".sum"); _class.PingMoney = GetN_value(item, ".unit"); _class.Allpm = GetN_value(item, ".baseinfo"); _class.datetime = GetN_value(item, ".time"); _class.Image_Count = GetInt_value(item, ".picNum"); _class.Image_str = _class.Image_Count > 0 ? "有" : string.Empty; L_Class.Add(_class); r++; #endregion } }
public List <sys_job> GetItemList(string url) { try { List <sys_job> retlist = new List <sys_job>(); IHtmlDocument html = new JumonyParser().LoadDocument(url, Encoding.UTF8); var list = html.Find("#r-list-wrapper .row .r-list"); foreach (var item in list) { var joblink = item.Find(".r-info a").FirstOrDefault().Attribute("href").Value(); var jobtitle = item.Find(".r-info a").FirstOrDefault().InnerText(); int pos = joblink.LastIndexOf("id="); var jobid = joblink.Substring(pos + 3, joblink.Length - (pos + 3)); string joburl = domain + joblink; // bool isok = db.SetAdd("rrkf_jobid", jobid); if (!isok) { continue; } var price1 = item.Find(".r-price").SingleOrDefault().InnerText(); var number = item.Find("div:nth-child(3)").SingleOrDefault().InnerText(); var status = item.Find("div:last-child span").SingleOrDefault().InnerText(); sys_job job = GetJobInfo(joburl); if (job.title != null) { job.joburl = joburl; job.jobid = jobid; job.number = number; job.status = status; job.amount = price1; job.addtime = DateTime.Now; retlist.Add(job); } } return(retlist); } catch (Exception e) { log.Error(url + "----" + e.Message); return(new List <sys_job>()); } }
static List <string> GetTbNameList() { List <string> oo = new List <string>(); HttpWebRequest req = CreateReq("https://tieba.baidu.com/?page=like"); req.UserAgent = "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) CriOS/56.0.2924.75 Mobile/14E5239e Safari/602.1"; req.CookieContainer = new CookieContainer(); req.CookieContainer.Add(new Cookie("BDUSS", BDUSS, "/", "baidu.com")); string str = new StreamReader((req.GetResponse() as HttpWebResponse).GetResponseStream()).ReadToEnd(); IHtmlDocument source = new JumonyParser().Parse(str); List <IHtmlElement> lis = source.Find("li[data-fn]").ToList(); foreach (var li in lis) { oo.Add(li.Attribute("data-fn").Value()); } return(oo); }
public List <sys_film> GetItemList(string url) { try { List <sys_film> filmlist = new List <sys_film>(); IHtmlDocument html = new JumonyParser().LoadDocument(url, Encoding.UTF8); var jobs = html.Find(".co_content8 ul table"); foreach (var item in jobs) { string filmurl = item.Find("a.ulink").FirstOrDefault().Attribute("href").Value(); string film_fullurl = domainurl + filmurl; var films = Get_FilmInfo(film_fullurl); filmlist.AddRange(films); } return(filmlist); } catch (Exception e) { log.Error(url + "-----" + e.Message); this.db.ListLeftPush("error_pageurl", url); return(new List <sys_film>()); } }
/// <summary> /// 解析HTML文本信息 /// </summary> /// <param name="SourceHtml"></param> /// <returns></returns> private TargetModel HtmlAnalytical(string SourceHtml) { TargetModel model = new TargetModel(); //模型容器 //model.HtmlScore = SourceHtml; //存储源码 var sorceIhtml = new JumonyParser().Parse(SourceHtml); var tdHtmlBases = sorceIhtml.Find(".f-lan tr"); var list = new List <string>(); var elements = tdHtmlBases as IHtmlElement[] ?? tdHtmlBases.ToArray(); for (int i = 0; i < elements.Count(); i++) { Console.WriteLine(elements[i].Find("td").FirstOrDefault().InnerText()); var text = elements[i].Find("td").FirstOrDefault().InnerText(); switch (text.Trim()) { case "工商登记注册基本信息": Modular01(sorceIhtml, model, i); break; case "资本相关信息": Modular02(sorceIhtml, model, i); break; case "组织机构代码信息": Modular03(sorceIhtml, model, i); break; case "税务登记信息": Modular04(sorceIhtml, model, i); break; default: break; } } return(model); }
public void Index() { try { string now = Convert.ToDateTime(GetNetDateTime()).ToString("HH:mm:ss"); textBox3.Clear(); string text1; //需要给utf-8的编码,否则html是乱码。 IHtmlDocument source = new JumonyParser().Parse(Get("http://wechat.laixuanzuo.com/index.php/reserve/index.html?f=wechat", "Hm_lpvt_7838cef374eb966ae9ff502c68d6f098=" + GetTimeStamp(true) + "; Hm_lvt_7838cef374eb966ae9ff502c68d6f098=" + GetTimeStamp(true) + ";FROM_TYPE=weixin;wechatSESS_ID=" + textBox1.Text)); //IHtmlDocument source = new JumonyParser().LoadDocument("http://127.0.0.1:5500/a.html", System.Text.Encoding.GetEncoding("utf-8")); var A = source.Find(".list-group-item-heading"); foreach (var i in A) { text1 = i.InnerText().ToString(); text1 = text1.Replace("\n", " "); textBox3.AppendText(text1 + System.Environment.NewLine); //MessageBox.Show(i.InnerText().ToString()); } textBox3.AppendText("刷新时间" + now + System.Environment.NewLine); } catch { } }
/// <summary> /// 获取页码连接保存到redis /// </summary> /// <param name="url"></param> public void PageUrlList(string url) { IHtmlDocument html = new JumonyParser().LoadDocument(url); var pagelist = html.Find(".page-div nav ul li a"); var size = pagelist.Count(); IHtmlElement lastpage_el = pagelist.ToList()[size - 2]; string lastpageurl = lastpage_el.Attribute("href").Value(); string lastpage_fullurl = domain + lastpageurl; string end_page_url = domain + pagelist.ToList()[size - 1].Attribute("href").Value(); for (int i = firstpage; i < size - 1; i++) { var item = pagelist.ToList()[i]; string page_fullurl = domain + item.Attribute("href").Value(); this.db.SortedSetAdd("pageurl", page_fullurl, (double)index++); } if (lasturl != lastpage_fullurl) { lasturl = lastpage_fullurl; firstpage = 1; PageUrlList(lastpage_fullurl); } this.db.SortedSetAdd("pageurl", end_page_url, (double)index++); }
private void web2_Navigated(object sender, NavigationEventArgs e) { mshtml.HTMLDocument mhtml = (mshtml.HTMLDocument)web1.Document; string html = mhtml.body.innerHTML; IHtmlDocument document_1 = new JumonyParser().Parse(html); IEnumerable <IHtmlElement> rl = document_1.Find("div"); IEnumerable <IHtmlElement> ss = rl.Find("p").Where(x => x.Identity() == "smallPicDescShow"); foreach (var rs in ss) { L_Class[i].bak = rs.InnerText(); } IEnumerable <IHtmlElement> ul_il = document_1.Find("ul").Where(p => p.Identity() == "leftImg"); IEnumerable <IHtmlElement> li = ul_il.Find("li"); foreach (var img in li) { //liImg.Add(); L_Class[i].Image_str = L_Class[i].Image_str.Length > 0 ? L_Class[i].Image_str + "---" + img.FindFirst("img").Attribute("src").Value() : img.FindFirst("img").Attribute("src").Value(); } IEnumerable <IHtmlElement> Phon = document_1.Find("div").Where(d => d.Identity() == "houseChatEntry"); IEnumerable <IHtmlElement> Phon_p = Phon.Find(".phone-num"); string phone = ""; foreach (var p in Phon_p) { phone = p.InnerText(); } //获取概况信息 IEnumerable <IHtmlElement> GKelement = rl.Where(x => x.Identity() == "generalSituation").Find(".c_000"); int ElementI = 0; foreach (var str in GKelement) { #region MyRegion if (GKelement.Count() > 6) { switch (ElementI) { case 0: L_Class[i].FwSumMoney = str.InnerText(); break; case 1: L_Class[i].FwHuXing = str.InnerText(); break; case 2: L_Class[i].FwMianji = str.InnerText(); break; case 3: L_Class[i].FwChaoxiang = str.InnerText(); break; case 4: L_Class[i].Loucheng = str.InnerText(); break; case 5: L_Class[i].FwZhuangxiu = str.InnerText(); break; case 6: L_Class[i].FwNianxian = str.InnerText(); break; default: ; break; } } else if (GKelement.Count() == 4) { switch (ElementI) { case 0: L_Class[i].FwSumMoney = str.InnerText(); break; case 1: L_Class[i].FwHuXing = str.InnerText(); break; case 2: L_Class[i].FwMianji = str.InnerText(); break; case 3: if (str.InnerText().IndexOf("层") > 0) { L_Class[i].FwLoucheng = str.InnerText(); } else { L_Class[i].FwZhuangxiu = str.InnerText(); } break; default: ; break; } } else { switch (ElementI) { case 0: L_Class[i].FwSumMoney = str.InnerText(); break; case 1: L_Class[i].FwHuXing = str.InnerText(); break; case 2: L_Class[i].FwMianji = str.InnerText(); break; case 3: L_Class[i].FwLoucheng = str.InnerText(); break; case 4: L_Class[i].FwZhuangxiu = str.InnerText(); break; case 5: L_Class[i].FwNianxian = str.InnerText(); break; default: ; break; } } #endregion ElementI++; } L_Class[i].photo = phone; loading = false; // 在加载完成后,将该变量置为false,下一次循环随即开始执行. }
void webbrowserUpload(object sender, NavigationEventArgs e) { mshtml.HTMLDocument mhtml = (mshtml.HTMLDocument)web1.Document; string html = mhtml.body.innerHTML; IHtmlDocument document = new JumonyParser().Parse(html); IEnumerable <IHtmlElement> result = document.Find(".house-list-wrap"); IEnumerable <IHtmlElement> t = result.Find("li"); Dictionary <string, string> dir = new Dictionary <string, string>(); bool ToNotDown = true; int ret = 0; foreach (var item in t) { #region MyRegion newWORD.Class1 _class = new newWORD.Class1(); _class.TextName = MainWindow.GetN_value(item, ".title>a"); _class.href = item.Exists(".title > a") ? item.FindFirst(".title>a").Attribute("href").Value() : string.Empty; _class.Quyu = "同城"; _class.PersonName = MainWindow.GetN_value(item, ".jjrname-outer"); _class.Laiyuan = "58"; IEnumerable <IHtmlElement> adds = item.Find(".baseinfo"); IEnumerable <IHtmlElement> addsa = adds.Find("a"); string adess = ""; foreach (var addsaa in addsa) { adess += addsaa.InnerText(); } _class.Address = adess; string Timestr = MainWindow.GetN_value(item, ".time"); DateTime Dte = DateTime.Now; _class.FbTime = Convert.ToDateTime(Dte.Year.ToString() + "-" + Dte.Month.ToString() + "-" + Dte.Day.ToString()); if (Timestr != "今天") { if (Timestr.IndexOf("分钟") > -1) { _class.FbTime = Dte.AddMinutes(-(Convert.ToInt32(Timestr.Replace("分钟", string.Empty)))); } else if (Timestr.IndexOf("小时") > -1) { _class.FbTime = Dte.AddHours(-(Convert.ToInt32(Timestr.Replace("小时", string.Empty)))); } else { string[] strTime = Timestr.Split('-'); _class.FbTime = Convert.ToDateTime(_class.FbTime.Year + "-" + strTime[0] + "-" + strTime[1]); } if (_class.FbTime <= MaxTime) { if (ret >= 2) { ToNotDown = false; break; } ret++; } } else { var datalist = iqdata.ToList(); if (iqdata.FirstOrDefault(x => x.HLName == _class.TextName && x.Address == _class.Address) != null) { continue; } } _class.datetime = ""; _class.Image_Count = MainWindow.GetInt_value(item, ".picNum"); _class.Image_str = _class.Image_Count > 0 ? "有" : string.Empty; if (ret == 0) { L_Class.Add(_class); } r++; #endregion } int nextI = 0; #region 跳转倒下一页 if (ToNotDown) { //mshtml.IHTMLDocument2 doc2 = (mshtml.IHTMLDocument2)web1.Document; //foreach (IHTMLElement ele in doc2.all) //{ // if (ele.tagName.ToLower().Equals("a")) // { // IHTMLElement aa = (IHTMLElement)ele; // if (ZantingI >= 1) // { // break; // } // if (aa.innerText == "下一页") // { // ZantingI++; // nextI++; // aa.click(); // retStr = "读取主目录" + ZantingI; // } // } //} } #endregion loadingS = nextI > 0 ? true : false; }
public List <sys_job> YjsJobs(string url) { try { string regtxt = "<.*?>"; IHtmlDocument html = new JumonyParser().LoadDocument(url); var jobs = html.Find("#db_adapt_id .weui_panel"); List <sys_job> listjob = new List <sys_job>(); foreach (var item in jobs) { string joburl = item.Find("a").FirstOrDefault().Attribute("href").Value(); int pos1 = joburl.LastIndexOf("/"); string jobid = joburl.Substring(pos1 + 1, joburl.Length - (pos1 + 1)); bool isok = this.db.SetAdd("yjsjobid", jobid); if (!isok) { continue; } string jobtitle = item.Find("a .topic_title") != null?item.Find("a .topic_title").FirstOrDefault().InnerText() : ""; string jobdesc = string.Empty; string jobgs = string.Empty; string jobprice = string.Empty; var subitems = item.Find(".job_list_item_div .media_desc_adapt"); string author = item.Find("h4.weui_media_title ") != null?item.Find("h4.weui_media_title ").FirstOrDefault().InnerText() : ""; bool isover = item.ToString().IndexOf("zhushi_span") > 0 ? false : true; if (isover) { continue; } string numberhtml = item.Find("span.zhushi_span") != null?item.Find("span.zhushi_span").FirstOrDefault().InnerHtml() : ""; string number = Regex.Replace(numberhtml, regtxt, ""); foreach (var subitem in subitems) { string subitemhtml = subitem.InnerHtml(); if (subitemhtml.IndexOf("glyphicon-th-large") >= 0) { jobdesc = Regex.Replace(subitemhtml, regtxt, "").Replace("描述:", ""); } if (subitemhtml.IndexOf("glyphicon-hourglass") >= 0) { jobgs = Regex.Replace(subitemhtml, regtxt, "").Replace("工时:", ""); } if (subitemhtml.IndexOf("glyphicon-yen") >= 0) { jobprice = Regex.Replace(subitemhtml, regtxt, "").Replace("总价:", "").Replace("元", ""); } } sys_job jobentry = new sys_job { jobid = jobid, title = jobtitle, desc = jobdesc, number = number, joburl = joburl, addtime = DateTime.Now, amount = jobprice, author = author, gq = jobgs }; listjob.Add(jobentry); } return(listjob); } catch (Exception e) { log.Error(e.Message); throw; } }