Пример #1
0
        public override IList<ExamItem> Process(string strResponse, int moduleId)
        {
            var result = new List<ExamItem>();
            if (string.IsNullOrEmpty(strResponse)) { return result; }

            var document = new JumonyParser().Parse(strResponse);

            //所有题目
            var htmlExamItems = document.Descendants(@"div.st");

            foreach (var item in htmlExamItems)
            {
                var model = BuildEntity(moduleId, item);
                if (model == null) { continue; }

                result.Add(model);
            }
            if (htmlExamItems.Count() > result.Count)
            {
                string msg = string.Format("Html:[{0}]个,解析:[{1}]个。", result.Count, htmlExamItems.Count());
                WriteLog(strResponse, msg);
            }


            return result;
        }
Пример #2
0
        public override IList<ExamItem> Process(string strResponse, int moduleId)
        {
            var result = new List<ExamItem>();
            if (string.IsNullOrEmpty(strResponse)) { return result; }

            var document = new JumonyParser().Parse(strResponse);

            //所有题目
            var dataTable = document.Descendants("body>div>table").ElementAt(1);
            var AllTrs = dataTable.Elements("tr");
            var htmlExamItems = AllTrs.Where(x => x.InnerHtml().Contains("【"));

            foreach (var item in htmlExamItems)
            {
                var model = BuildEntity(moduleId, item);
                if (model == null) { continue; }

                result.Add(model);
            }
            if (htmlExamItems.Count() > result.Count)
            {
                string msg = string.Format("Html:[{0}]个,解析:[{1}]个。", result.Count, htmlExamItems.Count());
                WriteLog(strResponse, msg);
            }

            return result;
        }
Пример #3
0
    static void Main( string[] args )
    {

      var id = Guid.NewGuid();

      var path = Path.Combine( tempDirectory, id.ToString() );
      Directory.CreateDirectory( path );

      SmtpClient smtp = new SmtpClient();
      smtp.EnableSsl = false;
      smtp.DeliveryMethod = SmtpDeliveryMethod.SpecifiedPickupDirectory;
      smtp.PickupDirectoryLocation = path;

      var parser = new JumonyParser();
      var document = parser.LoadDocument( "http://blog.sina.com.cn/s/blog_4701280b010183ny.html" );

      MailMessage message = CreateMail( document );

      smtp.Send( message );

      var directory = new DirectoryInfo( path );
      var file = directory.GetFiles().Single();
      file.MoveTo( Path.Combine( tempDirectory, id.ToString() + ".mht" ) );

      directory.Delete( true );

    }
Пример #4
0
    public void SetStyleTest()
    {

      var element = new JumonyParser().Parse( "<div></div>" ).Elements().First();

      element.Style( "display", "none" );
      Assert.AreEqual( element.Attribute( "style" ).Value(), "display:none", ".Style( name, value ) 测试不通过" );

      element.Style().SetValue( "color", "red" );
      Assert.AreEqual( element.Attribute( "style" ).Value(), "display:none;color:red", ".Style().SetValue( name, value ) 测试不通过" );

      element.Style().SetValue( "display", "block" );
      Assert.AreEqual( element.Attribute( "style" ).Value(), "display:block;color:red", ".Style().SetValue( name, value ) 测试不通过" );

      element.Style().SetValue( "display", null );
      Assert.AreEqual( element.Attribute( "style" ).Value(), "color:red", ".Style().SetValue( name, null ) 测试不通过" );

      element.Style().Clear();
      Assert.AreEqual( element.Attribute( "style" ).Value(), "", ".Style().Clear() 测试不通过" );


      element.Style().SetValue( "padding", "10px" );
      Assert.AreEqual( element.Style().GetValue( "padding-left" ), "10px", "shorthand 展开测试不通过" );

      element.Style().SetValue( "padding-left", "0px" );
      Assert.AreEqual( element.Style().GetValue( "padding-left" ), "0px", "shorthand 展开测试不通过" );
      Assert.AreEqual( element.Style().GetValue( "padding-top" ), "10px", "shorthand 展开测试不通过" );

      element.Style().SetValue( "margin", "5px" );
      Assert.AreEqual( element.Style().GetValue( "margin-left" ), "5px", "margin shorthand 展开测试不通过" );

    }
Пример #5
0
        public override List<SubjectModule> Process(string strResponse, int moduleId)
        {
            var list = new List<SubjectModule>();

            var document = new JumonyParser().Parse(strResponse);
            var trs = document.Descendants("tr[onmouseout]");
            foreach (IHtmlElement tr in trs)
            {
                string title = tr.FindFirst("td").InnerText();
                string href = tr.FindLast("td a").Attribute("href").Value();
                list.Add(new SubjectModule()
                {
                    Id = GetId(moduleId, title),
                    Handler = "SweetFly.Job.Handler.OldHandler,SweetFly.Job",
                    HtmlDataSource = new HtmlDataSource()
                    {
                        Encoding = "GB2312",
                        Uri = @"http://learning.cmr.com.cn/subject/stupage/" + href
                    }
                });
            }
            Console.WriteLine("{0} - {1}", trs.Count(), list.Count);

            return list;
        }
Пример #6
0
  protected void Page_Load( object sender, EventArgs e )
  {

    var client = new WebClient();
    var html = client.DownloadString( "http://www.cnblogs.com/" );

    var parser = new JumonyParser();
    var document = parser.Parse( html );

    var links = document.Find( "a[href]" );

    var baseUrl = new Uri( "http://www.cnblogs.com" );

    var data = from hyperLink in links
               let url = new Uri( baseUrl, hyperLink.Attribute( "href" ).Value() )
               orderby url.AbsoluteUri
               select new
               {
                 Url = url.AbsoluteUri,
                 IsLinkingOut = !url.Host.EndsWith( "cnblogs.com" ),
                 Target = hyperLink.Attribute( "target" ).Value() ?? "_self"
               };

    DataList.DataSource = data;
    DataBind();

  }
Пример #7
0
        public static void RunCrawlJob()
        {
            List<Crawl_Data_Item_Selector> listItemSelector = null;
            using (CrawlDBContext db = new CrawlDBContext())
            {
                listItemSelector = db.DBSet_Crawl_Data_Item_Selector.Where(x => x.State == 0).OrderBy(x => x.ID).ToList();
                Crawl_Data_Item modelTmp = null;
                foreach (var item in listItemSelector)
                {
                    string Url = item.Url;//原Url

                    Uri uri = new Uri(Url);
                    IHtmlDocument doc = new JumonyParser().LoadDocument(Url);

                    if (!string.IsNullOrEmpty(item.Encoding))
                    {
                        doc = new JumonyParser().LoadDocument(Url, Encoding.GetEncoding(item.Encoding));
                    }
                    //var doc = new JumonyParser().LoadDocument(Url, Encoding.UTF8);
                    for (int i = 0; i < doc.Find(item.TitleSelector).ToList().Count; i++)
                    {
                        var title = doc.Find(item.TitleSelector).ToList()[i].InnerText();//标题:标题内容

                        if (db.DBSet_Crawl_Data_Item.Count(x => x.Title == title) > 0)
                        {
                            continue;
                        }
                        string link = "" + doc.Find(item.GOUrlSelector).ToList()[i].Attribute("href").Value();//链接

                        string publicDate = doc.Find(item.PublicDateSelector).ToList()[i].InnerText();//日期

                        string docurl2 = string.Empty;
                        if (!link.ToLower().Contains("http://") && !link.ToLower().Contains("https://"))
                        {
                            link = new Uri(uri, link).ToString();
                        }

                        modelTmp = new Crawl_Data_Item();
                        modelTmp.AddTime = DateTime.Now;
                        modelTmp.Crawl_Data_Item_Selector_Id = item.ID;
                        modelTmp.Url = link;
                        modelTmp.Title = title;
                        modelTmp.SourceUrl = Url;
                        modelTmp.Source = item.Source;
                        if (!string.IsNullOrEmpty(item.PublicDateFormat) && item.PublicDateFormat == "{yyyy-}MM-dd")
                        {
                            modelTmp.PublicDate = DateTime.Parse(DateTime.Now.Year+"-"+publicDate); //{yyyy-}MM-dd
                        }
                        else
                        {
                            modelTmp.PublicDate = DateTime.Parse(publicDate);
                        }

                        db.AddAsync<Crawl_Data_Item>(modelTmp);
                    }

                    Thread.Sleep(1000 * 60);
                }
            }
        }
Пример #8
0
        private void CallBack(object obj)
        {
            DataRow row = (DataRow)obj;
            WebClient client = new WebClient();
            string html = client.DownloadString(row["url"].ToString());
            JumonyParser jp = new JumonyParser();
            IHtmlDocument document = jp.Parse(html);
            IEnumerable<IHtmlElement> htmlRows = document.Find(".tel");
            foreach (IHtmlElement abc in htmlRows)
            {
                string fax = abc.InnerText();
                int i = fax.IndexOf("fax");
                int length = fax.Length;
                string faxnum = "无";
                if (i > -1)
                {
                    i = i + 3;
                    string sub = fax.Substring(i, length - i);
                    sub = sub.Replace("+1", "");
                    sub = sub.Replace("+", "");
                    sub = sub.Replace("(", "");
                    sub = sub.Replace(")", "");
                    sub = sub.Replace(" ", "");
                    sub = sub.Replace(".", "");
                    sub = sub.Replace("-", "");
                    row["fax"] = sub;
                    faxnum = sub;

                }
                row["status"] = 1;
                new faxDataSetTableAdapters.kellysearch_faxTableAdapter().Update(row);
                Console.WriteLine(faxnum);
            }
        }
Пример #9
0
  public void Test1()
  {


    var context = new ControllerContext( HttpContext.Request.RequestContext, new TestController() );

    var result = ViewEngines.Engines.FindView( context, "~/ActionUrlTest/Test1.html", null );

    Assert.NotNull( result.View, "找不到视图" );


    IHtmlDocument document;

    using ( var writer = new StringWriter() )
    {
      result.View.Render( new ViewContext( context, result.View, new ViewDataDictionary(), new TempDataDictionary(), writer ), writer );

      document = new JumonyParser().Parse( writer.ToString() );
    }


    var link = document.FindFirst( "a" );

    Assert.NotNull( link );

    Assert.AreEqual( link.Attribute( "href" ).Value(), "/TestController/TestAction?arg=args" );


  }
Пример #10
0
    public void VisibleTest()
    {
      var document = new JumonyParser().LoadDocument( Path.Combine( Environment.CurrentDirectory, "StyleTest1.html" ) );
      document.DataBind( null );

      Assert.AreEqual( document.Find( ".invisible" ).Count(), 0 );


    }
Пример #11
0
    public void css_class_has_hyphen()
    {
      var html = "<div class=\"css-class\"></div>";
      var htmlParser = new JumonyParser();
      var doc = htmlParser.Parse( html );

      var css_class = doc.Find( ".css-class" );

      Assert.AreEqual( 1, css_class.Count() );
    }
Пример #12
0
    public void Test1()
    {

      var document = new JumonyParser().LoadDocument( Path.Combine( Environment.CurrentDirectory, "Test1.html" ) );
      HtmlBinding.Create( document, null ).DataBind();


      Assert.AreEqual( document.FindFirst( "title" ).InnerHtml(), "Test Title abc text", "对 title 元素内容的文本替换测试失败" );

    }
Пример #13
0
        public void SpecificationTest5()
        {
            var document = new JumonyParser().LoadDocument( Path.Combine( Environment.CurrentDirectory, "SpecificationTest5.html" ) );

              //Assert.AreEqual( document.DocumentDeclaration, "<!DOCTYPE html>", "HTML 声明解析失败" );

              var specials = document.DescendantNodes().OfType<IHtmlSpecial>().ToArray();

              Assert.AreEqual( specials.Count(), 4, "特殊标签解析数量不对" );
        }
Пример #14
0
        /// <summary>
        /// 加载 bing  的搜索结果
        /// </summary>
        /// <returns></returns>
        public ActionResult ShowBingResult()
        {
            if (!Request.QueryString.AllKeys.Contains("key"))
                return null;
            string key = Request.QueryString["key"];//搜索关键字
            JumonyParser jumony = new JumonyParser();
            //http://cn.bing.com/search?q=AJAX+site%3ablog.haojima.net&first=11&FORM=PERE
            string pIndex = Request.QueryString.AllKeys.Contains("p") ? Request.QueryString["p"] : "";
            int PageIndex = 1;
            int.TryParse(pIndex, out PageIndex);
            PageIndex--;

            //如:blog:JeffreyZhao 博客
            var zhankey = key.Split(' ');//先用空格分割
            var blogName = string.Empty;
            if (zhankey.Length >= 2)
            {
                var str = zhankey[0].Trim();
                if (str.Length > 6 && str.Substring(0, 5) == "blog:")
                    blogName = "/" + str.Substring(5);//这里取得 用户名
            }
            if (!string.IsNullOrEmpty(blogName))
                key = key.Substring(key.IndexOf(' '));

            //如:
            var url = "http://cn.bing.com/search?q=" + key + "+site:" + GetSiteUrl() + blogName + "&first=" + PageIndex + "1&FORM=PERE";
            var document = jumony.LoadDocument(url);
            var list = document.Find("#b_results .b_algo").ToList().Select(t => t.ToString()).ToList();

            var listli = document.Find("li.b_pag nav ul li");
            if (PageIndex > 0 && listli.Count() == 0)
                return null;

            if (listli.Count() > 1)
            {
                var text = document.Find("li.b_pag nav ul li").Last().InnerText();
                int npage = -1;
                if (text == "下一页")
                {
                    if (listli.Count() > 1)
                    {
                        var num = listli.ToList()[listli.Count() - 2].InnerText();
                        int.TryParse(num, out npage);
                    }
                }
                else
                    int.TryParse(text, out npage);
                if (npage <= PageIndex)
                    list = null;
            }

            return PartialView(list);
        }
Пример #15
0
 public void SpecificationTest1()
 {
     //测试孤立的'<'能否被正确解析
       var document = new JumonyParser().LoadDocument( Path.Combine( Environment.CurrentDirectory, "SpecificationTest1.html" ) );
       var element = document.FindSingle( "a" );//需要找到一个<a>元素
       Assert.AreEqual( element.InnerHtml(), "abc" );//并且内容是"abc"
       Assert.AreEqual( element.Attributes().Count(), 1 );//有且只有一个属性
       Assert.AreEqual( element.Attribute( "abc" ).AttributeValue, "abc" );//属性值为"abc"
       var textNode = document.Nodes().ElementAt( 0 ) as IHtmlTextNode;
       Assert.IsNotNull( textNode );
       Assert.IsTrue( textNode.HtmlText.Contains( '<' ) );//第一个文本节点包含了那个孤立的 '<'
 }
Пример #16
0
    public void Test1()
    {
      var document = new JumonyParser().LoadDocument( Path.Combine( Environment.CurrentDirectory, "Test1.html" ) );
      var data = new Dictionary<string, object>();
      data.Add( "StyleClass", null );
      data.Add( "ThisTime", null );
      data.Add( "ScriptValue1", "TestValue" );

      HtmlBinding.Create( document, data ).DataBind();

      StringAssert.Contains( document.FindFirst( "script" ).InnerHtml(), "var value1 =\"TestValue\";" );
    }
Пример #17
0
    public void SetClassTest()
    {
      var element = new JumonyParser().Parse( "<div></div>" ).Elements().First();

      element.Class( "test" );
      Assert.AreEqual( element.Attribute( "class" ).Value(), "test", ".Class( name ) 测试不通过" );

      element.Class( "-test" );
      Assert.AreEqual( element.Attribute( "class" ).Value() ?? "", "", ".Class( -name ) 测试不通过" );

      element.Class( "~test" );
      Assert.AreEqual( element.Attribute( "class" ).Value(), "test", ".Class( ~name ) 测试不通过" );

      element.Class( "~test" );
      Assert.AreEqual( element.Attribute( "class" ).Value() ?? "", "", ".Class( ~name ) 测试不通过" );

      element.Class( "~test" );
      Assert.AreEqual( element.Attribute( "class" ).Value(), "test", ".Class( ~name ) 测试不通过" );

      element.Class().Toggle( "test" );
      Assert.AreEqual( element.Attribute( "class" ).Value() ?? "", "", ".Class().Toggle( name ) 测试不通过" );

      element.Class().Toggle( "test" );
      Assert.AreEqual( element.Attribute( "class" ).Value(), "test", ".Class().Toggle( name ) 测试不通过" );

      element.Class().Toggle( "test" );
      Assert.AreEqual( element.Attribute( "class" ).Value() ?? "", "", ".Class().Toggle( name ) 测试不通过" );

      element.Class( "+deleted", "+completed" );//class="deleted completed"
      Assert.IsTrue( CssParser.Create( element.Document, ".deleted.completed" ).IsEligible( element ), ".Class( +name, +name )" );

      element.Class( "+deleted", "~completed" );//class="deleted"
      Assert.IsFalse( CssParser.Create( element.Document, ".deleted.completed" ).IsEligible( element ), ".Class( +name, ~name )" );
      Assert.IsTrue( CssParser.Create( element.Document, ".deleted" ).IsEligible( element ), ".Class( +name, ~name )" );

      element.Class( "~deleted", "~completed" );//class="completed"
      Assert.IsFalse( CssParser.Create( element.Document, ".deleted.completed" ).IsEligible( element ), ".Class( ~name, ~name )" );
      Assert.IsTrue( CssParser.Create( element.Document, ".completed" ).IsEligible( element ), ".Class( ~name, ~name )" );

      element.Class( "~deleted ~completed" );//class="deleted"
      Assert.IsFalse( CssParser.Create( element.Document, ".deleted.completed" ).IsEligible( element ), ".Class( ~name ~name )" );
      Assert.IsTrue( CssParser.Create( element.Document, ".deleted" ).IsEligible( element ), ".Class( ~name ~name )" );

      element.Class( "deleted completed" );//class="deleted completed"
      Assert.IsTrue( CssParser.Create( element.Document, ".deleted.completed" ).IsEligible( element ), ".Class( name name )" );

      element.Class( "+deleted ~completed" );//class="deleted"
      Assert.IsFalse( CssParser.Create( element.Document, ".deleted.completed" ).IsEligible( element ), ".Class( +name, ~name )" );
      Assert.IsTrue( CssParser.Create( element.Document, ".deleted" ).IsEligible( element ), ".Class( +name, ~name )" );

    }
Пример #18
0
        public static string CheckAPK(ref string url)
        {
            string downloadurl = "http://192.168.1.40/iwu_android/";
            WebClient MyWebClient = new WebClient();
            MyWebClient.Credentials = CredentialCache.DefaultCredentials;
            Byte[] pageData = MyWebClient.DownloadData(downloadurl);
            string pageHtml = Encoding.UTF8.GetString(pageData); //如果获取网站页面采用的是UTF-8,则使用这句
            var htmlSource = new JumonyParser().Parse(pageHtml);
            var one = htmlSource.Find("img[src=/icons/folder.gif]").Last();
            string releaseUrl = one.Parent().Parent().Find("a[href]").First().InnerText();
            string time = one.Parent().Parent().Find("td[align=right]").ElementAt(0).InnerText();

            url = downloadurl + releaseUrl + "apk/app-release.apk";
            return "最新版本号:" + releaseUrl + "\n 版本时间:" + time +"\n是否确定下载?";
        }
Пример #19
0
    public static TranslateTask LoadTranslateTask( string filepath )
    {

      if ( filepath == null )
        throw new ArgumentNullException( "filepath" );

      if ( !File.Exists( filepath ) )
        throw new InvalidOperationException( "文件不存在" );


      var document = new JumonyParser().LoadDocument( File.OpenText( filepath ), new Uri( filepath ) );

      var terms = EnsureTermsData( document );

      return new TranslateTask( document, terms );

    }
Пример #20
0
    public void AttributeTest1()
    {
      var document = new JumonyParser().LoadDocument( Path.Combine( Environment.CurrentDirectory, "Test1.html" ) );

      var dataValues = new Dictionary<string, object>() { 
        { "StyleClass", "Test" },
        { "ThisTime", new DateTime( 2000,1,2 ) },
        { "ScriptValue1", null }
      };

      HtmlBinding.Create( document, dataValues ).DataBind();

      Assert.AreEqual( document.FindFirst( "body" ).Attribute( "class" ).Value(), "Test", "针对属性的表达式绑定不成功" );
      Assert.AreEqual( document.FindFirst( "body" ).Attribute( "test" ).Value(), "this time is 2000-01-02 #", "格式表达式测试失败" );


    }
Пример #21
0
        public void SpecificationTest2()
        {
            //测试各种属性表达式能否被正确解析
              var document = new JumonyParser().LoadDocument( Path.Combine( Environment.CurrentDirectory, "SpecificationTest2.html" ) );
              var element = document.FindSingle( "A" );

              Assert.AreEqual( element.Attribute( "a" ).AttributeValue, "abc" );//双引号情况
              Assert.AreEqual( element.Attribute( "b" ).AttributeValue, "123" );//单引号情况
              Assert.AreEqual( element.Attribute( "c" ).AttributeValue, "d=x" );//
              Assert.IsNull( element.Attribute( "d" ) );//属性值前面有空白的情况
              Assert.AreEqual( element.Attribute( "e" ).AttributeValue, null );//没有等号的情况
              Assert.AreEqual( element.Attribute( "f" ).AttributeValue, "" );//标签末尾的情况

              element = document.FindSingle( "B" );
              Assert.AreEqual( element.Attribute( "a" ).AttributeValue, "abc" );//等号前有空格的情况
              Assert.AreEqual( element.Attribute( "b" ).AttributeValue, "" );//空属性情况
              Assert.AreEqual( element.Attribute( "c" ).AttributeValue, null );//无值属性在标签末尾的情况
        }
Пример #22
0
        public Blog LoadStar(string blogUrl,string imgPath)
        {
            Blog blog = new Blog();
            GC.Collect();
            ServicePointManager.DefaultConnectionLimit = 200;
            HttpWebRequest request = HttpWebRequestFactory.CreateSimpleRequest(blogUrl);
            WebProxy proxy = new WebProxy("127.0.0.1", 1080);
            request.Proxy = proxy;
            try
            {
                WebResponse response = request.GetResponse();
                Stream stream = response.GetResponseStream();
                string result = "";
                using (StreamReader reader = new StreamReader(stream, Encoding.GetEncoding("utf-8")))
                {
                    result = reader.ReadToEnd();
                }
                var document = new JumonyParser().Parse(result);
                blog.Name = document.FindFirst(".ProfileHeaderCard-nameLink").InnerHtml();
                blog.Description = document.FindFirst(".ProfileHeaderCard-bio").InnerHtml();
                string imgUrl = document.FindFirst(".ProfileAvatar-image").Attribute("src").Value();

                request = HttpWebRequestFactory.CreateSimpleRequest(imgUrl);
                HttpWebResponse imageResponse = (HttpWebResponse)request.GetResponse(); //反馈请求
                Stream srr = imageResponse.GetResponseStream();
                string path = imgPath + blog.Name.ToString() + ".jpg";
                FileStream fs = new FileStream(path, FileMode.OpenOrCreate, FileAccess.Write);
                //造一个字节类型的数组来存放图片
                byte[] buff = new byte[512];
                int c = 0;
                while ((c = srr.Read(buff, 0, buff.Length)) > 0)
                {
                    fs.Write(buff, 0, c);
                }
                srr.Close();
            }
            catch (Exception e)
            {
                throw e;
            }
            return blog;
        }
Пример #23
0
        //
        //<tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="01CreateScreen/">01CreateScreen/</a></td><td align="right">2016-01-11 10:23  </td><td align="right">  - </td><td>&nbsp;</td></tr>
        //<tr><td valign="top"><img src="/icons/text.gif" alt="[TXT]"></td><td><a href="Test_money.py">Test_money.py</a></td><td align="right">2016-01-08 15:53  </td><td align="right">1.1K</td><td>&nbsp;</td></tr>
        //
        public static List<Resource> GetDirectoryContents(string url)
        {
            List<Resource> Rlist = new List<Resource>();
            //url = "http://192.168.1.42/testpage/Script";
            WebClient MyWebClient = new WebClient();
            MyWebClient.Credentials = CredentialCache.DefaultCredentials;
            Byte[] pageData = MyWebClient.DownloadData(url); 
            string pageHtml = Encoding.UTF8.GetString(pageData); //如果获取网站页面采用的是UTF-8,则使用这句
            var htmlSource = new JumonyParser().Parse(pageHtml);

            var list = htmlSource.Find("img[src=/icons/folder.gif]");
            foreach(var one in list)
            {
                /*if(one.Attribute("href").Value() == one.InnerText())
                {
                    string s = one.InnerText();
                }*/
                Resource a = new Resource();
                a.Name = one.Parent().Parent().Find("a").ElementAt(0).InnerText();
                a.Url = url  + a.Name;
                a.IsFolder = true;
                Rlist.Add(a);
            }
            list = htmlSource.Find("img[src=/icons/text.gif]");
            foreach (var one in list)
            {
                Resource a = new Resource();
                a.Name = one.Parent().Parent().Find("a").ElementAt(0).InnerText();
                a.Url = url  + a.Name;
                a.IsFolder = false;
                string t1 = one.Parent().Parent().Find("td[align=right]").ElementAt(0).InnerText();
                a.LastModified = DateTime.Parse(t1);
                Rlist.Add(a);
            }
            return Rlist;
        }
Пример #24
0
    public static TranslateTask LoadTranslateTask( string filepath )
    {

      if ( filepath == null )
        throw new ArgumentNullException( "filepath" );

      if ( !File.Exists( filepath ) )
        throw new InvalidOperationException( "文件不存在" );


      var document = new JumonyParser().LoadDocument( File.OpenText( filepath ), new Uri( filepath ) );

      var task = new TranslateTask( document );

      task.Initialize();

      return task;

    }
Пример #25
0
    static void Main( string[] args )
    {



      var document = new JumonyParser().LoadDocument( "http://www.sina.com.cn/", Encoding.GetEncoding( "GB2312" ) );

      Stopwatch watch = new Stopwatch();
      watch.Restart();
      for ( int i = 0; i < 200; i++ )
      {

        var elements = document.Descendants().ToArray();

        document.Descendants().FilterBy( "body p a" ).FirstOrDefault();
        document.Descendants().FilterBy( "p > a" ).FirstOrDefault();
        document.Descendants().FilterBy( "p[class] a" ).FirstOrDefault();
        document.Descendants().FilterBy( "p a[href]" ).FirstOrDefault();
        document.Descendants().FilterBy( "p + a" ).FirstOrDefault();
        document.Descendants().FilterBy( "div a" ).FirstOrDefault();
        document.Descendants().FilterBy( "p div a" ).FirstOrDefault();
        document.Descendants().FilterBy( "a img[src]" ).FirstOrDefault();
        document.Descendants().FilterBy( "div img" ).FirstOrDefault();
        document.Descendants().FilterBy( "body img[src]" ).FirstOrDefault();
      }
      watch.Stop();

      Console.WriteLine( watch.Elapsed );

      watch.Restart();
      for ( int i = 0; i < 200; i++ )
      {

        var elements = document.Descendants().ToArray();

        document.Descendants().ToArray().FilterBy( "body p a" ).FirstOrDefault();
        document.Descendants().ToArray().FilterBy( "p > a" ).FirstOrDefault();
        document.Descendants().ToArray().FilterBy( "p[class] a" ).FirstOrDefault();
        document.Descendants().ToArray().FilterBy( "p a[href]" ).FirstOrDefault();
        document.Descendants().ToArray().FilterBy( "p + a" ).FirstOrDefault();
        document.Descendants().ToArray().FilterBy( "div a" ).FirstOrDefault();
        document.Descendants().ToArray().FilterBy( "p div a" ).FirstOrDefault();
        document.Descendants().ToArray().FilterBy( "a img[src]" ).FirstOrDefault();
        document.Descendants().ToArray().FilterBy( "div img" ).FirstOrDefault();
        document.Descendants().ToArray().FilterBy( "body img[src]" ).FirstOrDefault();
      }
      watch.Stop();
      Console.WriteLine( watch.Elapsed );
      watch.Restart();
      for ( int i = 0; i < 200; i++ )
      {

        var elements = document.Descendants().ToArray();

        document.Find( "body p a" ).FirstOrDefault();
        document.Find( "p > a" ).FirstOrDefault();
        document.Find( "p[class] a" ).FirstOrDefault();
        document.Find( "p a[href]" ).FirstOrDefault();
        document.Find( "p + a" ).FirstOrDefault();
        document.Find( "div a" ).FirstOrDefault();
        document.Find( "p div a" ).FirstOrDefault();
        document.Find( "a img[src]" ).FirstOrDefault();
        document.Find( "div img" ).FirstOrDefault();
        document.Find( "body img[src]" ).FirstOrDefault();
      }
      watch.Stop();
      Console.WriteLine( watch.Elapsed );



      Console.ReadKey();


    }
Пример #26
0
        /// <summary>
        /// 得到AppId,AppSecret 
        /// </summary>
        /// <returns></returns>
        public WechatDevInfo GetWechatDevInfo()
        {
            // TODO 得到AppId,AppSecret
            WechatDevInfo devInfo = null;
            HttpResponseMessage response = null;
            try
            {
                _httpClient = new HttpClient(handler);
                SetHeader();

                response = _httpClient.GetAsync(WeChatUrl.DEV_URL + token).Result;

                if (response.StatusCode == HttpStatusCode.OK)
                {   //已经连接,正在接收数据

                    string result = response.Content.ReadAsStringAsync().Result;

                    var parser = new JumonyParser();
                    var htmlDoc = parser.Parse(result);
                    var htmlEles = htmlDoc.Find(".developer_info_wrp");
                    if (htmlEles != null && htmlEles.Count() > 0)
                    {
                        var vertical = htmlEles.Find(".frm_vertical_pt").ToList();
                        devInfo = new WechatDevInfo();
                        #region  解析html获取相关文本信息
                        for (int i = 0; i < vertical.Count; i++)
                        {
                            try
                            {
                                var infoText = vertical[i].InnerText().Trim();
                                if (string.IsNullOrWhiteSpace(infoText))
                                    continue;
                                switch (i)
                                {
                                    case 0: devInfo.AppId = infoText;
                                        break;
                                    case 1: devInfo.AppSecret = infoText;
                                        break;
                                    case 2: devInfo.URL = infoText;
                                        break;
                                    case 3: devInfo.Token = infoText;
                                        break;
                                    case 4: devInfo.EncodingAESKey = infoText;
                                        break;
                                    case 5:
                                        SetEncodingAESType(devInfo.EncodingAESType, infoText);
                                        break;
                                    default:
                                        break;
                                }
                            }
                            catch (Exception){}
                        }
                        #endregion
                    }
                }
            }
            catch (Exception)
            {

            }
            finally
            {
                if (response != null)
                    response.Dispose();
            }
            return devInfo;
        }
Пример #27
0
        /// <summary>
        /// 得到微信公众平台个人信息 
        /// </summary>
        /// <returns></returns>
        public WechatAccountInfo GetAccount()
        {
            WechatAccountInfo account = null;
            HttpResponseMessage response = null;
            try
            {
                _httpClient = new HttpClient(handler);
                SetHeader();

                response = _httpClient.GetAsync(WeChatUrl.ACCOUNT_INFO_URL + token).Result;
                if (response.StatusCode == HttpStatusCode.OK)
                {   //已经连接,正在接收数据

                    string result = response.Content.ReadAsStringAsync().Result;

                    var parser = new JumonyParser();
                    var htmlDoc = parser.Parse(result);
                    var htmlEles = htmlDoc.Find(".account_setting_area .account_setting_item .meta_content");
                    if (htmlEles != null && htmlEles.Count() > 0)
                    {
                        var setting = htmlEles.ToList();
                        account = new WechatAccountInfo();

                        #region  解析html获取相关文本信息
                        for (int i = 0; i < setting.Count; i++)
                        {
                            try
                            {

                                var infoText = setting[i].InnerText().Trim();
                                if (i > 1 && string.IsNullOrWhiteSpace(infoText))
                                    continue;
                                switch (i)
                                {
                                    case 0: account.HeadImage = setting[0].Find("img").FirstOrDefault()
                                        .Attribute("src").AttributeValue;
                                        break;
                                    case 1: account.QRCode = setting[1].Find("img").FirstOrDefault()
                                         .Attribute("src").AttributeValue;
                                        break;
                                    case 2: account.AccountName = infoText;
                                        break;
                                    case 3: account.WechatNumber = infoText;
                                        break;
                                    case 4: SetWechatType(account.WechatType, infoText);
                                        break;
                                    case 5: account.Introduces = infoText;
                                        break;
                                    case 6: SetAuthenticate(account.Authenticate, infoText);
                                        break;
                                    case 7: account.PlaceAddress = infoText;
                                        break;
                                    case 8: account.SubjectInfo = infoText;
                                        break;
                                    case 9: account.LoginEmail = infoText;
                                        break;
                                    case 10: account.AccountId = infoText;
                                        break;
                                    default:
                                        break;
                                }
                            }
                            catch (Exception) { }
                        }
                        #endregion
                    }
                }
            }
            catch (Exception)
            {

            }
            finally
            {
                if (response != null)
                    response.Dispose();
            }
            return account;
        }
Пример #28
0
        /// <summary>
        /// 获取公司地址
        /// </summary>
        /// <param name="url">需要查询的地址</param>
        private void GetPage(string url)
        {
            WebClient client = new WebClient();
            string html = client.DownloadString(url);
            JumonyParser jp = new JumonyParser();
            IHtmlDocument document = jp.Parse(html);
            IEnumerable<IHtmlElement> rows = document.Find(".pagediv input");
            int page = 1;
            foreach (IHtmlElement abc in rows)
            {
                string name = abc.Attribute("name").Value();
                if (name == "maxPage")
                {
                    string value = abc.Attribute("value").Value();
                    page = int.Parse(value);

                }
            }
            GetUrl(url, page);
        }
Пример #29
0
        private void GetUrl(string url, int maxPage)
        {
            for (int i = 1; i <= maxPage; i++)
            {

                    string urls = url + "&page=" + i;
                    WebClient client = new WebClient();
                    string html = client.DownloadString(urls);
                    JumonyParser jp = new JumonyParser();
                    IHtmlDocument document = jp.Parse(html);
                    IEnumerable<IHtmlElement> rows = document.Find(".searchresult_zonee .heading_address a");
                    foreach (IHtmlElement abc in rows)
                    {
                        try
                        {
                            string businessUrl = "http://www.kellysearch.com/" + abc.Attribute("href").Value();
                            string name = abc.InnerText();
                            faxDataSet.kellysearch_faxDataTable dt = new faxDataSet.kellysearch_faxDataTable();
                            DataRow row = dt.NewRow();
                            row["name"] = name;
                            row["status"] = 0;
                            row["url"] = businessUrl;
                            dt.Rows.Add(row);
                            faxDataSetTableAdapters.kellysearch_faxTableAdapter apt = new faxDataSetTableAdapters.kellysearch_faxTableAdapter();
                            apt.Update(dt);
                            Console.WriteLine(name + businessUrl);
                        }
                        catch (Exception ex)
                        {
                            Console.WriteLine(ex.Message);
                        }
                    }
            }
        }
Пример #30
0
 private static IHtmlDocument LoadDocument( string filename )
 {
   var document = new JumonyParser().LoadDocument( Path.Combine( Environment.CurrentDirectory, filename ) );
   return document;
 }