public override IList<ExamItem> Process(string strResponse, int moduleId) { var result = new List<ExamItem>(); if (string.IsNullOrEmpty(strResponse)) { return result; } var document = new JumonyParser().Parse(strResponse); //所有题目 var htmlExamItems = document.Descendants(@"div.st"); foreach (var item in htmlExamItems) { var model = BuildEntity(moduleId, item); if (model == null) { continue; } result.Add(model); } if (htmlExamItems.Count() > result.Count) { string msg = string.Format("Html:[{0}]个,解析:[{1}]个。", result.Count, htmlExamItems.Count()); WriteLog(strResponse, msg); } return result; }
public override IList<ExamItem> Process(string strResponse, int moduleId) { var result = new List<ExamItem>(); if (string.IsNullOrEmpty(strResponse)) { return result; } var document = new JumonyParser().Parse(strResponse); //所有题目 var dataTable = document.Descendants("body>div>table").ElementAt(1); var AllTrs = dataTable.Elements("tr"); var htmlExamItems = AllTrs.Where(x => x.InnerHtml().Contains("【")); foreach (var item in htmlExamItems) { var model = BuildEntity(moduleId, item); if (model == null) { continue; } result.Add(model); } if (htmlExamItems.Count() > result.Count) { string msg = string.Format("Html:[{0}]个,解析:[{1}]个。", result.Count, htmlExamItems.Count()); WriteLog(strResponse, msg); } return result; }
static void Main( string[] args ) { var id = Guid.NewGuid(); var path = Path.Combine( tempDirectory, id.ToString() ); Directory.CreateDirectory( path ); SmtpClient smtp = new SmtpClient(); smtp.EnableSsl = false; smtp.DeliveryMethod = SmtpDeliveryMethod.SpecifiedPickupDirectory; smtp.PickupDirectoryLocation = path; var parser = new JumonyParser(); var document = parser.LoadDocument( "http://blog.sina.com.cn/s/blog_4701280b010183ny.html" ); MailMessage message = CreateMail( document ); smtp.Send( message ); var directory = new DirectoryInfo( path ); var file = directory.GetFiles().Single(); file.MoveTo( Path.Combine( tempDirectory, id.ToString() + ".mht" ) ); directory.Delete( true ); }
public void SetStyleTest() { var element = new JumonyParser().Parse( "<div></div>" ).Elements().First(); element.Style( "display", "none" ); Assert.AreEqual( element.Attribute( "style" ).Value(), "display:none", ".Style( name, value ) 测试不通过" ); element.Style().SetValue( "color", "red" ); Assert.AreEqual( element.Attribute( "style" ).Value(), "display:none;color:red", ".Style().SetValue( name, value ) 测试不通过" ); element.Style().SetValue( "display", "block" ); Assert.AreEqual( element.Attribute( "style" ).Value(), "display:block;color:red", ".Style().SetValue( name, value ) 测试不通过" ); element.Style().SetValue( "display", null ); Assert.AreEqual( element.Attribute( "style" ).Value(), "color:red", ".Style().SetValue( name, null ) 测试不通过" ); element.Style().Clear(); Assert.AreEqual( element.Attribute( "style" ).Value(), "", ".Style().Clear() 测试不通过" ); element.Style().SetValue( "padding", "10px" ); Assert.AreEqual( element.Style().GetValue( "padding-left" ), "10px", "shorthand 展开测试不通过" ); element.Style().SetValue( "padding-left", "0px" ); Assert.AreEqual( element.Style().GetValue( "padding-left" ), "0px", "shorthand 展开测试不通过" ); Assert.AreEqual( element.Style().GetValue( "padding-top" ), "10px", "shorthand 展开测试不通过" ); element.Style().SetValue( "margin", "5px" ); Assert.AreEqual( element.Style().GetValue( "margin-left" ), "5px", "margin shorthand 展开测试不通过" ); }
public override List<SubjectModule> Process(string strResponse, int moduleId) { var list = new List<SubjectModule>(); var document = new JumonyParser().Parse(strResponse); var trs = document.Descendants("tr[onmouseout]"); foreach (IHtmlElement tr in trs) { string title = tr.FindFirst("td").InnerText(); string href = tr.FindLast("td a").Attribute("href").Value(); list.Add(new SubjectModule() { Id = GetId(moduleId, title), Handler = "SweetFly.Job.Handler.OldHandler,SweetFly.Job", HtmlDataSource = new HtmlDataSource() { Encoding = "GB2312", Uri = @"http://learning.cmr.com.cn/subject/stupage/" + href } }); } Console.WriteLine("{0} - {1}", trs.Count(), list.Count); return list; }
protected void Page_Load( object sender, EventArgs e ) { var client = new WebClient(); var html = client.DownloadString( "http://www.cnblogs.com/" ); var parser = new JumonyParser(); var document = parser.Parse( html ); var links = document.Find( "a[href]" ); var baseUrl = new Uri( "http://www.cnblogs.com" ); var data = from hyperLink in links let url = new Uri( baseUrl, hyperLink.Attribute( "href" ).Value() ) orderby url.AbsoluteUri select new { Url = url.AbsoluteUri, IsLinkingOut = !url.Host.EndsWith( "cnblogs.com" ), Target = hyperLink.Attribute( "target" ).Value() ?? "_self" }; DataList.DataSource = data; DataBind(); }
public static void RunCrawlJob() { List<Crawl_Data_Item_Selector> listItemSelector = null; using (CrawlDBContext db = new CrawlDBContext()) { listItemSelector = db.DBSet_Crawl_Data_Item_Selector.Where(x => x.State == 0).OrderBy(x => x.ID).ToList(); Crawl_Data_Item modelTmp = null; foreach (var item in listItemSelector) { string Url = item.Url;//原Url Uri uri = new Uri(Url); IHtmlDocument doc = new JumonyParser().LoadDocument(Url); if (!string.IsNullOrEmpty(item.Encoding)) { doc = new JumonyParser().LoadDocument(Url, Encoding.GetEncoding(item.Encoding)); } //var doc = new JumonyParser().LoadDocument(Url, Encoding.UTF8); for (int i = 0; i < doc.Find(item.TitleSelector).ToList().Count; i++) { var title = doc.Find(item.TitleSelector).ToList()[i].InnerText();//标题:标题内容 if (db.DBSet_Crawl_Data_Item.Count(x => x.Title == title) > 0) { continue; } string link = "" + doc.Find(item.GOUrlSelector).ToList()[i].Attribute("href").Value();//链接 string publicDate = doc.Find(item.PublicDateSelector).ToList()[i].InnerText();//日期 string docurl2 = string.Empty; if (!link.ToLower().Contains("http://") && !link.ToLower().Contains("https://")) { link = new Uri(uri, link).ToString(); } modelTmp = new Crawl_Data_Item(); modelTmp.AddTime = DateTime.Now; modelTmp.Crawl_Data_Item_Selector_Id = item.ID; modelTmp.Url = link; modelTmp.Title = title; modelTmp.SourceUrl = Url; modelTmp.Source = item.Source; if (!string.IsNullOrEmpty(item.PublicDateFormat) && item.PublicDateFormat == "{yyyy-}MM-dd") { modelTmp.PublicDate = DateTime.Parse(DateTime.Now.Year+"-"+publicDate); //{yyyy-}MM-dd } else { modelTmp.PublicDate = DateTime.Parse(publicDate); } db.AddAsync<Crawl_Data_Item>(modelTmp); } Thread.Sleep(1000 * 60); } } }
private void CallBack(object obj) { DataRow row = (DataRow)obj; WebClient client = new WebClient(); string html = client.DownloadString(row["url"].ToString()); JumonyParser jp = new JumonyParser(); IHtmlDocument document = jp.Parse(html); IEnumerable<IHtmlElement> htmlRows = document.Find(".tel"); foreach (IHtmlElement abc in htmlRows) { string fax = abc.InnerText(); int i = fax.IndexOf("fax"); int length = fax.Length; string faxnum = "无"; if (i > -1) { i = i + 3; string sub = fax.Substring(i, length - i); sub = sub.Replace("+1", ""); sub = sub.Replace("+", ""); sub = sub.Replace("(", ""); sub = sub.Replace(")", ""); sub = sub.Replace(" ", ""); sub = sub.Replace(".", ""); sub = sub.Replace("-", ""); row["fax"] = sub; faxnum = sub; } row["status"] = 1; new faxDataSetTableAdapters.kellysearch_faxTableAdapter().Update(row); Console.WriteLine(faxnum); } }
public void Test1() { var context = new ControllerContext( HttpContext.Request.RequestContext, new TestController() ); var result = ViewEngines.Engines.FindView( context, "~/ActionUrlTest/Test1.html", null ); Assert.NotNull( result.View, "找不到视图" ); IHtmlDocument document; using ( var writer = new StringWriter() ) { result.View.Render( new ViewContext( context, result.View, new ViewDataDictionary(), new TempDataDictionary(), writer ), writer ); document = new JumonyParser().Parse( writer.ToString() ); } var link = document.FindFirst( "a" ); Assert.NotNull( link ); Assert.AreEqual( link.Attribute( "href" ).Value(), "/TestController/TestAction?arg=args" ); }
public void VisibleTest() { var document = new JumonyParser().LoadDocument( Path.Combine( Environment.CurrentDirectory, "StyleTest1.html" ) ); document.DataBind( null ); Assert.AreEqual( document.Find( ".invisible" ).Count(), 0 ); }
public void css_class_has_hyphen() { var html = "<div class=\"css-class\"></div>"; var htmlParser = new JumonyParser(); var doc = htmlParser.Parse( html ); var css_class = doc.Find( ".css-class" ); Assert.AreEqual( 1, css_class.Count() ); }
public void Test1() { var document = new JumonyParser().LoadDocument( Path.Combine( Environment.CurrentDirectory, "Test1.html" ) ); HtmlBinding.Create( document, null ).DataBind(); Assert.AreEqual( document.FindFirst( "title" ).InnerHtml(), "Test Title abc text", "对 title 元素内容的文本替换测试失败" ); }
public void SpecificationTest5() { var document = new JumonyParser().LoadDocument( Path.Combine( Environment.CurrentDirectory, "SpecificationTest5.html" ) ); //Assert.AreEqual( document.DocumentDeclaration, "<!DOCTYPE html>", "HTML 声明解析失败" ); var specials = document.DescendantNodes().OfType<IHtmlSpecial>().ToArray(); Assert.AreEqual( specials.Count(), 4, "特殊标签解析数量不对" ); }
/// <summary> /// 加载 bing 的搜索结果 /// </summary> /// <returns></returns> public ActionResult ShowBingResult() { if (!Request.QueryString.AllKeys.Contains("key")) return null; string key = Request.QueryString["key"];//搜索关键字 JumonyParser jumony = new JumonyParser(); //http://cn.bing.com/search?q=AJAX+site%3ablog.haojima.net&first=11&FORM=PERE string pIndex = Request.QueryString.AllKeys.Contains("p") ? Request.QueryString["p"] : ""; int PageIndex = 1; int.TryParse(pIndex, out PageIndex); PageIndex--; //如:blog:JeffreyZhao 博客 var zhankey = key.Split(' ');//先用空格分割 var blogName = string.Empty; if (zhankey.Length >= 2) { var str = zhankey[0].Trim(); if (str.Length > 6 && str.Substring(0, 5) == "blog:") blogName = "/" + str.Substring(5);//这里取得 用户名 } if (!string.IsNullOrEmpty(blogName)) key = key.Substring(key.IndexOf(' ')); //如: var url = "http://cn.bing.com/search?q=" + key + "+site:" + GetSiteUrl() + blogName + "&first=" + PageIndex + "1&FORM=PERE"; var document = jumony.LoadDocument(url); var list = document.Find("#b_results .b_algo").ToList().Select(t => t.ToString()).ToList(); var listli = document.Find("li.b_pag nav ul li"); if (PageIndex > 0 && listli.Count() == 0) return null; if (listli.Count() > 1) { var text = document.Find("li.b_pag nav ul li").Last().InnerText(); int npage = -1; if (text == "下一页") { if (listli.Count() > 1) { var num = listli.ToList()[listli.Count() - 2].InnerText(); int.TryParse(num, out npage); } } else int.TryParse(text, out npage); if (npage <= PageIndex) list = null; } return PartialView(list); }
public void SpecificationTest1() { //测试孤立的'<'能否被正确解析 var document = new JumonyParser().LoadDocument( Path.Combine( Environment.CurrentDirectory, "SpecificationTest1.html" ) ); var element = document.FindSingle( "a" );//需要找到一个<a>元素 Assert.AreEqual( element.InnerHtml(), "abc" );//并且内容是"abc" Assert.AreEqual( element.Attributes().Count(), 1 );//有且只有一个属性 Assert.AreEqual( element.Attribute( "abc" ).AttributeValue, "abc" );//属性值为"abc" var textNode = document.Nodes().ElementAt( 0 ) as IHtmlTextNode; Assert.IsNotNull( textNode ); Assert.IsTrue( textNode.HtmlText.Contains( '<' ) );//第一个文本节点包含了那个孤立的 '<' }
public void Test1() { var document = new JumonyParser().LoadDocument( Path.Combine( Environment.CurrentDirectory, "Test1.html" ) ); var data = new Dictionary<string, object>(); data.Add( "StyleClass", null ); data.Add( "ThisTime", null ); data.Add( "ScriptValue1", "TestValue" ); HtmlBinding.Create( document, data ).DataBind(); StringAssert.Contains( document.FindFirst( "script" ).InnerHtml(), "var value1 =\"TestValue\";" ); }
public void SetClassTest() { var element = new JumonyParser().Parse( "<div></div>" ).Elements().First(); element.Class( "test" ); Assert.AreEqual( element.Attribute( "class" ).Value(), "test", ".Class( name ) 测试不通过" ); element.Class( "-test" ); Assert.AreEqual( element.Attribute( "class" ).Value() ?? "", "", ".Class( -name ) 测试不通过" ); element.Class( "~test" ); Assert.AreEqual( element.Attribute( "class" ).Value(), "test", ".Class( ~name ) 测试不通过" ); element.Class( "~test" ); Assert.AreEqual( element.Attribute( "class" ).Value() ?? "", "", ".Class( ~name ) 测试不通过" ); element.Class( "~test" ); Assert.AreEqual( element.Attribute( "class" ).Value(), "test", ".Class( ~name ) 测试不通过" ); element.Class().Toggle( "test" ); Assert.AreEqual( element.Attribute( "class" ).Value() ?? "", "", ".Class().Toggle( name ) 测试不通过" ); element.Class().Toggle( "test" ); Assert.AreEqual( element.Attribute( "class" ).Value(), "test", ".Class().Toggle( name ) 测试不通过" ); element.Class().Toggle( "test" ); Assert.AreEqual( element.Attribute( "class" ).Value() ?? "", "", ".Class().Toggle( name ) 测试不通过" ); element.Class( "+deleted", "+completed" );//class="deleted completed" Assert.IsTrue( CssParser.Create( element.Document, ".deleted.completed" ).IsEligible( element ), ".Class( +name, +name )" ); element.Class( "+deleted", "~completed" );//class="deleted" Assert.IsFalse( CssParser.Create( element.Document, ".deleted.completed" ).IsEligible( element ), ".Class( +name, ~name )" ); Assert.IsTrue( CssParser.Create( element.Document, ".deleted" ).IsEligible( element ), ".Class( +name, ~name )" ); element.Class( "~deleted", "~completed" );//class="completed" Assert.IsFalse( CssParser.Create( element.Document, ".deleted.completed" ).IsEligible( element ), ".Class( ~name, ~name )" ); Assert.IsTrue( CssParser.Create( element.Document, ".completed" ).IsEligible( element ), ".Class( ~name, ~name )" ); element.Class( "~deleted ~completed" );//class="deleted" Assert.IsFalse( CssParser.Create( element.Document, ".deleted.completed" ).IsEligible( element ), ".Class( ~name ~name )" ); Assert.IsTrue( CssParser.Create( element.Document, ".deleted" ).IsEligible( element ), ".Class( ~name ~name )" ); element.Class( "deleted completed" );//class="deleted completed" Assert.IsTrue( CssParser.Create( element.Document, ".deleted.completed" ).IsEligible( element ), ".Class( name name )" ); element.Class( "+deleted ~completed" );//class="deleted" Assert.IsFalse( CssParser.Create( element.Document, ".deleted.completed" ).IsEligible( element ), ".Class( +name, ~name )" ); Assert.IsTrue( CssParser.Create( element.Document, ".deleted" ).IsEligible( element ), ".Class( +name, ~name )" ); }
public static string CheckAPK(ref string url) { string downloadurl = "http://192.168.1.40/iwu_android/"; WebClient MyWebClient = new WebClient(); MyWebClient.Credentials = CredentialCache.DefaultCredentials; Byte[] pageData = MyWebClient.DownloadData(downloadurl); string pageHtml = Encoding.UTF8.GetString(pageData); //如果获取网站页面采用的是UTF-8,则使用这句 var htmlSource = new JumonyParser().Parse(pageHtml); var one = htmlSource.Find("img[src=/icons/folder.gif]").Last(); string releaseUrl = one.Parent().Parent().Find("a[href]").First().InnerText(); string time = one.Parent().Parent().Find("td[align=right]").ElementAt(0).InnerText(); url = downloadurl + releaseUrl + "apk/app-release.apk"; return "最新版本号:" + releaseUrl + "\n 版本时间:" + time +"\n是否确定下载?"; }
public static TranslateTask LoadTranslateTask( string filepath ) { if ( filepath == null ) throw new ArgumentNullException( "filepath" ); if ( !File.Exists( filepath ) ) throw new InvalidOperationException( "文件不存在" ); var document = new JumonyParser().LoadDocument( File.OpenText( filepath ), new Uri( filepath ) ); var terms = EnsureTermsData( document ); return new TranslateTask( document, terms ); }
public void AttributeTest1() { var document = new JumonyParser().LoadDocument( Path.Combine( Environment.CurrentDirectory, "Test1.html" ) ); var dataValues = new Dictionary<string, object>() { { "StyleClass", "Test" }, { "ThisTime", new DateTime( 2000,1,2 ) }, { "ScriptValue1", null } }; HtmlBinding.Create( document, dataValues ).DataBind(); Assert.AreEqual( document.FindFirst( "body" ).Attribute( "class" ).Value(), "Test", "针对属性的表达式绑定不成功" ); Assert.AreEqual( document.FindFirst( "body" ).Attribute( "test" ).Value(), "this time is 2000-01-02 #", "格式表达式测试失败" ); }
public void SpecificationTest2() { //测试各种属性表达式能否被正确解析 var document = new JumonyParser().LoadDocument( Path.Combine( Environment.CurrentDirectory, "SpecificationTest2.html" ) ); var element = document.FindSingle( "A" ); Assert.AreEqual( element.Attribute( "a" ).AttributeValue, "abc" );//双引号情况 Assert.AreEqual( element.Attribute( "b" ).AttributeValue, "123" );//单引号情况 Assert.AreEqual( element.Attribute( "c" ).AttributeValue, "d=x" );// Assert.IsNull( element.Attribute( "d" ) );//属性值前面有空白的情况 Assert.AreEqual( element.Attribute( "e" ).AttributeValue, null );//没有等号的情况 Assert.AreEqual( element.Attribute( "f" ).AttributeValue, "" );//标签末尾的情况 element = document.FindSingle( "B" ); Assert.AreEqual( element.Attribute( "a" ).AttributeValue, "abc" );//等号前有空格的情况 Assert.AreEqual( element.Attribute( "b" ).AttributeValue, "" );//空属性情况 Assert.AreEqual( element.Attribute( "c" ).AttributeValue, null );//无值属性在标签末尾的情况 }
public Blog LoadStar(string blogUrl,string imgPath) { Blog blog = new Blog(); GC.Collect(); ServicePointManager.DefaultConnectionLimit = 200; HttpWebRequest request = HttpWebRequestFactory.CreateSimpleRequest(blogUrl); WebProxy proxy = new WebProxy("127.0.0.1", 1080); request.Proxy = proxy; try { WebResponse response = request.GetResponse(); Stream stream = response.GetResponseStream(); string result = ""; using (StreamReader reader = new StreamReader(stream, Encoding.GetEncoding("utf-8"))) { result = reader.ReadToEnd(); } var document = new JumonyParser().Parse(result); blog.Name = document.FindFirst(".ProfileHeaderCard-nameLink").InnerHtml(); blog.Description = document.FindFirst(".ProfileHeaderCard-bio").InnerHtml(); string imgUrl = document.FindFirst(".ProfileAvatar-image").Attribute("src").Value(); request = HttpWebRequestFactory.CreateSimpleRequest(imgUrl); HttpWebResponse imageResponse = (HttpWebResponse)request.GetResponse(); //反馈请求 Stream srr = imageResponse.GetResponseStream(); string path = imgPath + blog.Name.ToString() + ".jpg"; FileStream fs = new FileStream(path, FileMode.OpenOrCreate, FileAccess.Write); //造一个字节类型的数组来存放图片 byte[] buff = new byte[512]; int c = 0; while ((c = srr.Read(buff, 0, buff.Length)) > 0) { fs.Write(buff, 0, c); } srr.Close(); } catch (Exception e) { throw e; } return blog; }
// //<tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="01CreateScreen/">01CreateScreen/</a></td><td align="right">2016-01-11 10:23 </td><td align="right"> - </td><td> </td></tr> //<tr><td valign="top"><img src="/icons/text.gif" alt="[TXT]"></td><td><a href="Test_money.py">Test_money.py</a></td><td align="right">2016-01-08 15:53 </td><td align="right">1.1K</td><td> </td></tr> // public static List<Resource> GetDirectoryContents(string url) { List<Resource> Rlist = new List<Resource>(); //url = "http://192.168.1.42/testpage/Script"; WebClient MyWebClient = new WebClient(); MyWebClient.Credentials = CredentialCache.DefaultCredentials; Byte[] pageData = MyWebClient.DownloadData(url); string pageHtml = Encoding.UTF8.GetString(pageData); //如果获取网站页面采用的是UTF-8,则使用这句 var htmlSource = new JumonyParser().Parse(pageHtml); var list = htmlSource.Find("img[src=/icons/folder.gif]"); foreach(var one in list) { /*if(one.Attribute("href").Value() == one.InnerText()) { string s = one.InnerText(); }*/ Resource a = new Resource(); a.Name = one.Parent().Parent().Find("a").ElementAt(0).InnerText(); a.Url = url + a.Name; a.IsFolder = true; Rlist.Add(a); } list = htmlSource.Find("img[src=/icons/text.gif]"); foreach (var one in list) { Resource a = new Resource(); a.Name = one.Parent().Parent().Find("a").ElementAt(0).InnerText(); a.Url = url + a.Name; a.IsFolder = false; string t1 = one.Parent().Parent().Find("td[align=right]").ElementAt(0).InnerText(); a.LastModified = DateTime.Parse(t1); Rlist.Add(a); } return Rlist; }
public static TranslateTask LoadTranslateTask( string filepath ) { if ( filepath == null ) throw new ArgumentNullException( "filepath" ); if ( !File.Exists( filepath ) ) throw new InvalidOperationException( "文件不存在" ); var document = new JumonyParser().LoadDocument( File.OpenText( filepath ), new Uri( filepath ) ); var task = new TranslateTask( document ); task.Initialize(); return task; }
static void Main( string[] args ) { var document = new JumonyParser().LoadDocument( "http://www.sina.com.cn/", Encoding.GetEncoding( "GB2312" ) ); Stopwatch watch = new Stopwatch(); watch.Restart(); for ( int i = 0; i < 200; i++ ) { var elements = document.Descendants().ToArray(); document.Descendants().FilterBy( "body p a" ).FirstOrDefault(); document.Descendants().FilterBy( "p > a" ).FirstOrDefault(); document.Descendants().FilterBy( "p[class] a" ).FirstOrDefault(); document.Descendants().FilterBy( "p a[href]" ).FirstOrDefault(); document.Descendants().FilterBy( "p + a" ).FirstOrDefault(); document.Descendants().FilterBy( "div a" ).FirstOrDefault(); document.Descendants().FilterBy( "p div a" ).FirstOrDefault(); document.Descendants().FilterBy( "a img[src]" ).FirstOrDefault(); document.Descendants().FilterBy( "div img" ).FirstOrDefault(); document.Descendants().FilterBy( "body img[src]" ).FirstOrDefault(); } watch.Stop(); Console.WriteLine( watch.Elapsed ); watch.Restart(); for ( int i = 0; i < 200; i++ ) { var elements = document.Descendants().ToArray(); document.Descendants().ToArray().FilterBy( "body p a" ).FirstOrDefault(); document.Descendants().ToArray().FilterBy( "p > a" ).FirstOrDefault(); document.Descendants().ToArray().FilterBy( "p[class] a" ).FirstOrDefault(); document.Descendants().ToArray().FilterBy( "p a[href]" ).FirstOrDefault(); document.Descendants().ToArray().FilterBy( "p + a" ).FirstOrDefault(); document.Descendants().ToArray().FilterBy( "div a" ).FirstOrDefault(); document.Descendants().ToArray().FilterBy( "p div a" ).FirstOrDefault(); document.Descendants().ToArray().FilterBy( "a img[src]" ).FirstOrDefault(); document.Descendants().ToArray().FilterBy( "div img" ).FirstOrDefault(); document.Descendants().ToArray().FilterBy( "body img[src]" ).FirstOrDefault(); } watch.Stop(); Console.WriteLine( watch.Elapsed ); watch.Restart(); for ( int i = 0; i < 200; i++ ) { var elements = document.Descendants().ToArray(); document.Find( "body p a" ).FirstOrDefault(); document.Find( "p > a" ).FirstOrDefault(); document.Find( "p[class] a" ).FirstOrDefault(); document.Find( "p a[href]" ).FirstOrDefault(); document.Find( "p + a" ).FirstOrDefault(); document.Find( "div a" ).FirstOrDefault(); document.Find( "p div a" ).FirstOrDefault(); document.Find( "a img[src]" ).FirstOrDefault(); document.Find( "div img" ).FirstOrDefault(); document.Find( "body img[src]" ).FirstOrDefault(); } watch.Stop(); Console.WriteLine( watch.Elapsed ); Console.ReadKey(); }
/// <summary> /// 得到AppId,AppSecret /// </summary> /// <returns></returns> public WechatDevInfo GetWechatDevInfo() { // TODO 得到AppId,AppSecret WechatDevInfo devInfo = null; HttpResponseMessage response = null; try { _httpClient = new HttpClient(handler); SetHeader(); response = _httpClient.GetAsync(WeChatUrl.DEV_URL + token).Result; if (response.StatusCode == HttpStatusCode.OK) { //已经连接,正在接收数据 string result = response.Content.ReadAsStringAsync().Result; var parser = new JumonyParser(); var htmlDoc = parser.Parse(result); var htmlEles = htmlDoc.Find(".developer_info_wrp"); if (htmlEles != null && htmlEles.Count() > 0) { var vertical = htmlEles.Find(".frm_vertical_pt").ToList(); devInfo = new WechatDevInfo(); #region 解析html获取相关文本信息 for (int i = 0; i < vertical.Count; i++) { try { var infoText = vertical[i].InnerText().Trim(); if (string.IsNullOrWhiteSpace(infoText)) continue; switch (i) { case 0: devInfo.AppId = infoText; break; case 1: devInfo.AppSecret = infoText; break; case 2: devInfo.URL = infoText; break; case 3: devInfo.Token = infoText; break; case 4: devInfo.EncodingAESKey = infoText; break; case 5: SetEncodingAESType(devInfo.EncodingAESType, infoText); break; default: break; } } catch (Exception){} } #endregion } } } catch (Exception) { } finally { if (response != null) response.Dispose(); } return devInfo; }
/// <summary> /// 得到微信公众平台个人信息 /// </summary> /// <returns></returns> public WechatAccountInfo GetAccount() { WechatAccountInfo account = null; HttpResponseMessage response = null; try { _httpClient = new HttpClient(handler); SetHeader(); response = _httpClient.GetAsync(WeChatUrl.ACCOUNT_INFO_URL + token).Result; if (response.StatusCode == HttpStatusCode.OK) { //已经连接,正在接收数据 string result = response.Content.ReadAsStringAsync().Result; var parser = new JumonyParser(); var htmlDoc = parser.Parse(result); var htmlEles = htmlDoc.Find(".account_setting_area .account_setting_item .meta_content"); if (htmlEles != null && htmlEles.Count() > 0) { var setting = htmlEles.ToList(); account = new WechatAccountInfo(); #region 解析html获取相关文本信息 for (int i = 0; i < setting.Count; i++) { try { var infoText = setting[i].InnerText().Trim(); if (i > 1 && string.IsNullOrWhiteSpace(infoText)) continue; switch (i) { case 0: account.HeadImage = setting[0].Find("img").FirstOrDefault() .Attribute("src").AttributeValue; break; case 1: account.QRCode = setting[1].Find("img").FirstOrDefault() .Attribute("src").AttributeValue; break; case 2: account.AccountName = infoText; break; case 3: account.WechatNumber = infoText; break; case 4: SetWechatType(account.WechatType, infoText); break; case 5: account.Introduces = infoText; break; case 6: SetAuthenticate(account.Authenticate, infoText); break; case 7: account.PlaceAddress = infoText; break; case 8: account.SubjectInfo = infoText; break; case 9: account.LoginEmail = infoText; break; case 10: account.AccountId = infoText; break; default: break; } } catch (Exception) { } } #endregion } } } catch (Exception) { } finally { if (response != null) response.Dispose(); } return account; }
/// <summary> /// 获取公司地址 /// </summary> /// <param name="url">需要查询的地址</param> private void GetPage(string url) { WebClient client = new WebClient(); string html = client.DownloadString(url); JumonyParser jp = new JumonyParser(); IHtmlDocument document = jp.Parse(html); IEnumerable<IHtmlElement> rows = document.Find(".pagediv input"); int page = 1; foreach (IHtmlElement abc in rows) { string name = abc.Attribute("name").Value(); if (name == "maxPage") { string value = abc.Attribute("value").Value(); page = int.Parse(value); } } GetUrl(url, page); }
private void GetUrl(string url, int maxPage) { for (int i = 1; i <= maxPage; i++) { string urls = url + "&page=" + i; WebClient client = new WebClient(); string html = client.DownloadString(urls); JumonyParser jp = new JumonyParser(); IHtmlDocument document = jp.Parse(html); IEnumerable<IHtmlElement> rows = document.Find(".searchresult_zonee .heading_address a"); foreach (IHtmlElement abc in rows) { try { string businessUrl = "http://www.kellysearch.com/" + abc.Attribute("href").Value(); string name = abc.InnerText(); faxDataSet.kellysearch_faxDataTable dt = new faxDataSet.kellysearch_faxDataTable(); DataRow row = dt.NewRow(); row["name"] = name; row["status"] = 0; row["url"] = businessUrl; dt.Rows.Add(row); faxDataSetTableAdapters.kellysearch_faxTableAdapter apt = new faxDataSetTableAdapters.kellysearch_faxTableAdapter(); apt.Update(dt); Console.WriteLine(name + businessUrl); } catch (Exception ex) { Console.WriteLine(ex.Message); } } } }
private static IHtmlDocument LoadDocument( string filename ) { var document = new JumonyParser().LoadDocument( Path.Combine( Environment.CurrentDirectory, filename ) ); return document; }