private void GetUrlText_1(Class1[] RTc, IHtmlDocument document)
        {
            IEnumerable <IHtmlElement>  result = document.Find("div").Where(d => d.Identity() == "main");
            IEnumerable <IHtmlElement>  t      = result.Find("tr");
            Dictionary <string, string> dir    = new Dictionary <string, string>();
            int r = 0;

            foreach (var item in t)
            {
                #region MyRegion
                Class1       _class = new Class1();
                IHtmlElement item_a = item.FindFirst(".bthead>a");
                _class.TextName = item_a.InnerText().Trim();
                _class.href     = item_a.Attribute("href").Value().Trim();
                _class.Quyu     = GetN_value(item, ".a_xq1");
                string st = GetN_value(item, ".qj-lijjrname");
                _class.PersonName = st.Replace(":", "").Trim();;
                _class.Laiyuan    = GetN_value(item, "label");
                string[]      str_  = GetN_value(item, ".qj-listleft").Split(' ');
                List <string> str_1 = LIST_G(str_);
                _class.Address = str_1[2];
                //_class.SumMoney = item.FindFirst(".pri").InnerText();
                string[] ssp = GetN_value(item, ".qj-listright").Split(' ');
                int      j   = ssp.Length == 10 ? 0 : 10 - ssp.Length;
                _class.SumMoney  = ssp[1 - j] + ssp[2 - j];
                _class.PingMoney = ssp[4 - j];
                _class.Allpm     = ssp[7 - j] + ssp[9 - j];
                string[] datetime = GetN_value(item, ".qj-listjjr").Split(' ');
                _class.datetime = datetime.Length == 3 ? datetime[2] : "";
                RTc[r]          = _class;
                r++;
                #endregion
            }
        }
Example #2
0
 /// <summary>
 /// 获取同一个Class的内容
 /// </summary>
 /// <param name="cssKey">css样式关键字</param>
 /// <returns></returns>
 public List <string> GetClassTxt(string cssKey)
 {
     try
     {
         List <string> returnValue = new List <string>();
         if (!doc.Exists(cssKey))
         {
             return(null);
         }
         var values = doc.Find(cssKey);
         if (values == null)
         {
             return(null);
         }
         foreach (var item in values)
         {
             returnValue.Add(item.InnerText());
         }
         return(returnValue);
     }
     catch
     {
         return(null);
     }
 }
Example #3
0
        /// <summary>
        /// 清除文档中所有的资源文件引用
        /// </summary>
        /// <param name="document">要清除资源文件引用的文档</param>
        /// <param name="headScopeOnly">是否仅清除 &lt;head&gt; 元素内部的引用</param>
        public void ClearAllReference(IHtmlDocument document, bool headScopeOnly = true)
        {
            if (document == null)
            {
                return;
            }


            if (headScopeOnly)
            {
                document.Find("head link[rel=stylesheet][href$=.css], head script[src$=.js]").Remove();
            }
            else
            {
                document.Find("link[rel=stylesheet][href$=.css], script[src$=.js]").Remove();
            }
        }
Example #4
0
        protected virtual IEnumerable <string> FindLinks(IHtmlDocument document)
        {
            document.ResolveUriToAbsoluate();

            foreach (var href in document.Find("a[href]").Select(element => element.Attribute("href").Value()))
            {
                var uri = new UriBuilder(new Uri(document.DocumentUri, href));
                uri.Fragment = null;
                yield return(uri.Uri.AbsoluteUri);
            }
        }
Example #5
0
        public static TargetModel Modular02(IHtmlDocument sorceIhtml, TargetModel model, int i)
        {
            //资本相关信息
            var tdHtml_zb = sorceIhtml.Find(".f-lbiao").ElementAt(i).Find("tr td").ToList();

            //foreach (var td in tdHtml_zb)
            //{
            //    Console.WriteLine(td.InnerText());
            //}
            model.capitalInfo = FillModel <CapitalInfo>(tdHtml_zb);
            return(model);
        }
Example #6
0
        public static TargetModel Modular03(IHtmlDocument sorceIhtml, TargetModel model, int i)
        {
            //组织机构代码信息
            var tdHtml_orgCode = sorceIhtml.Find(".f-lbiao").ElementAt(i).Find("tr td").ToList();

            //foreach (var td in tdHtml_orgCode)
            //{
            //    Console.WriteLine(td.InnerText());
            //}
            model.orgCodeInfo = FillModel <OrgCodeInfo>(tdHtml_orgCode);
            return(model);
        }
Example #7
0
        /// <summary>
        /// 查找部分视图渲染范畴
        /// </summary>
        /// <param name="document">加载的文档</param>
        /// <returns>渲染范畴</returns>
        protected virtual IHtmlContainer GetPartialScope(IHtmlDocument document)
        {
            var body = document.Find("body").SingleOrDefault();

            if (body == null)
            {
                return(document);
            }

            else
            {
                return(body);
            }
        }
Example #8
0
        //mshtml.HTMLDocument mhtml = (mshtml.HTMLDocument)Webbrowser2.Document;
        //string html = mhtml.body.innerHTML;
        //IHtmlDocument document = new JumonyParser().Parse(html);
        //IEnumerable<IHtmlElement> result = document.Find("div").Where(i => i.Identity() == "all-list");
        //result = result.Find("li");
        //    foreach (var li in result)
        //    {
        //        Class1 _class = new Class1();

        //_class.TextName = GetN_value(li, ".media-body-title>a");
        //_class.href = li.FindFirst(".media-body-title>a").Attribute("href").Value();
        //_class.Quyu = "百姓";
        //        _class.Laiyuan = "百姓";
        //        _class.Address = GetN_value(li, ".typo-small");

        //string this_spk = GetN_value(li, ".typo-small");
        //_class.SumMoney = GetN_value(li, ".highlight");
        //string[] thip = this_spk.Split('/');
        //_class.PingMoney = thip.Length>1? thip[1]:"";
        //        _class.Allpm = "";

        //        _class.datetime = GetN_value(li, ".pull-right");
        //        if (li.Exists("img"))
        //        {
        //            if (li.FindFirst("img").Attribute("src").Value().IndexOf("http") < 0)
        //            {
        //                _class.Image_str = string.Empty;
        //            }
        //            else
        //            {
        //                _class.Image_str = "有";
        //            }
        //        }
        //        else if (li.Exists(".img"))
        //        {
        //            if (li.FindFirst(".img").Attribute("src").Value().IndexOf("http") < 0)
        //            {
        //                _class.Image_str = string.Empty;
        //            }
        //            else
        //            {
        //                _class.Image_str = "有";
        //            }
        //        }
        //        else
        //        {
        //            _class.Image_str = string.Empty;
        //        }


        //        L_Class.Add(_class);
        //    }

        #endregion

        #endregion

        #region 读取网站区域
        private void GetSelectQuYu(IHtmlDocument document)
        {
            IEnumerable <IHtmlElement> result = document.Find("div").Where(i => i.Identity() == "qySelectFirst").Find("a");

            List <URL_list> llt = new List <URL_list>();

            foreach (var rt in result)
            {
                Quyu urt = new Quyu();
                urt.ID   = Id;
                urt.Name = rt.InnerText();
                urt.Href = rt.Attribute("href").Value();
                Quyulist_58.Add(urt);
            }
        }
Example #9
0
        /// <summary>
        /// 使用CSS选择器解析数据
        /// </summary>
        /// <param name="doc"></param>
        /// <param name="count"></param>
        private static void ParseDataWithCss(IHtmlDocument doc, ref int count)
        {
            IEnumerable <IHtmlElement> books = doc.Find("body > div.sokk-body > div > div > div.col-sm-9.col-md-10.col-lg-10.col-xs-12 > div > div.books > div > div");

            foreach (IHtmlElement bookInfo in books)
            {
                var book = new Book();
                foreach (IHtmlElement info in bookInfo.Find("div > div > div > div.title >a"))
                {
                    book.Title = info.InnerText();
                    book.Url   = new Uri("http://www.yousuu.com" + info.Attribute("href").Value());
                    break;
                }

                foreach (IHtmlElement info in bookInfo.Find("div > div > div > div.rating > span:nth-child(3)"))
                {
                    book.RateNumber = info.InnerText();
                }

                foreach (IHtmlElement info in bookInfo.Find("div > div > div > div.abstract"))
                {
                    string text = info.ToString();
                    var    li   = text.Split(':', ':', '>', '<');
                    if (li.Length != 19)
                    {
                        book = null;
                        break;
                    }

                    book.Author     = li[3];
                    book.WordNumber = li[6];
                    book.UpdateTime = li[9];
                    book.Score      = li[14];
                }

                lock (bookList)
                {
                    if (book != null && !bookList.Contains(book))
                    {
                        count++;
                        bookList.Add(book);//将数据加入到泛型列表
                    }
                }
                //Console.WriteLine(book?.ToString());//将书籍信息显示到控制台
            }
        }
Example #10
0
        /// <summary>
        /// 这个方法是用来添加<![CDATA[<meta name="generator" value="jumony" />]]>元素的。
        /// </summary>
        private void AddGeneratorMetaData(IHtmlDocument document)
        {
            var modifier = document.DomModifier;

            if (modifier != null)
            {
                var header = document.Find("html head").FirstOrDefault();

                if (header != null)
                {
                    var metaElement = modifier.AddElement(header, "meta");

                    metaElement.SetAttribute("name", "generator");
                    metaElement.SetAttribute("content", "Jumony");
                }
            }
        }
Example #11
0
        public static TargetModel Modular04(IHtmlDocument sorceIhtml, TargetModel model, int x)
        {
            //税务登记信息
            var trHtml_tax = sorceIhtml.Find(".f-lbiao").ElementAt(x).Find("tr").ToList();

            foreach (var tr in trHtml_tax)
            {
                var sorceth = tr.Find("th");
                var sorcetd = tr.Find("td");
                //for (var i = 0; i < sorceth.Count(); i++)
                //{
                //    Console.WriteLine(sorceth.ElementAt(i).InnerText());
                //    Console.WriteLine(sorcetd.ElementAt(i).InnerText());
                //}
            }
            model.taxInfo = FillModel <TaxInfo>(trHtml_tax.Find("th").ToList(), trHtml_tax.Find("td").ToList());
            return(model);
        }
Example #12
0
        private void GetUrlText_2(IHtmlDocument document, List <Class1> L_Class)
        {
            IEnumerable <IHtmlElement> result1 = document.Find(".list-items");

            foreach (var item in result1)
            {
                #region MyRegion
                Class1       _class  = new Class1();
                IHtmlElement item_a  = item.FindFirst("a");
                string       img_str = item.Exists("img") ? item.FindFirst("img").Attribute("src").Value() : "";
                _class.Image_Count = img_str.Length > 0 ? img_str.IndexOf("default.jpg") > 0?0:1:0;
                IEnumerable <IHtmlElement> div = item.Find("div");
                List <string> ls = new List <string>();
                foreach (var d in div)
                {
                    ls.Add(d.InnerText());
                }
                _class.TextName = ls[2];
                _class.SumMoney = ls[4];
                _class.Quyu     = "赶集";
                _class.Allpm    = ls[1];
                _class.Address  = ls[0];
                _class.href     = item_a.Attribute("href").Value().Trim();

                IEnumerable <IHtmlElement> ssa = item_a.Find("span");

                string item_aa = item_a.ToString().Replace("<!--", "stu1").Replace("-->", "stp2");
                item_aa         = item_aa.Substring(item_aa.IndexOf("stu1") + 4);
                item_aa         = item_aa.Substring(0, item_aa.IndexOf("stp2"));
                item_aa         = item_aa.Substring(item_aa.IndexOf(">") + 1);
                item_aa         = item_aa.Substring(0, item_aa.IndexOf("<"));
                _class.datetime = item_aa;
                string[] pm = _class.Allpm.Split(' ');

                double pm_int = Convert.ToDouble(pm[6].Replace('㎡', ' ').Trim().Length <= 0 ? pm[5].Replace('㎡', ' ').Trim() : pm[6].Replace('㎡', ' ').Trim());
                double ss     = ((Convert.ToDouble(_class.SumMoney.Replace("万元", "").Trim()) / pm_int));
                _class.PingMoney = "≈" + Convert.ToInt32(ss * 10000).ToString();
                _class.Image_str = _class.Image_Count > 0 ? "有" : string.Empty;

                L_Class.Add(_class);
                r++;
                #endregion
            }
        }
Example #13
0
        public static IEnumerable <LinkedResource> GetResources(IHtmlDocument document)
        {
            foreach (var element in document.Find("[src]"))
            {
                var attribute = element.Attribute("src");

                var value = attribute.Value();

                if (string.IsNullOrWhiteSpace(value))
                {
                    continue;
                }

                Uri resourceUrl;
                if (!Uri.TryCreate(document.DocumentUri, value, out resourceUrl))
                {
                    continue;
                }

                yield return(LoadResource(resourceUrl));
            }
        }
Example #14
0
 private void TestSelector( IHtmlDocument document, string selector )
 {
   TestContext.WriteLine( "Selector \"{0}\" seleted {1} elements", selector, document.Find( selector ).Count() );
 }
Example #15
0
    /// <summary>
    /// 查找部分视图渲染范畴
    /// </summary>
    /// <param name="document">加载的文档</param>
    /// <returns>渲染范畴</returns>
    protected virtual IHtmlContainer GetPartialScope( IHtmlDocument document )
    {
      var body = document.Find( "body" ).SingleOrDefault();

      if ( body == null )
        return document;

      else
        return body;
    }
Example #16
0
 private void TestSelector(IHtmlDocument document, string selector)
 {
     TestContext.WriteLine("Selector \"{0}\" seleted {1} elements", selector, document.Find(selector).Count());
 }
Example #17
0
 /// <summary>
 /// 在文档范围内使用选择器查找符合要求的元素
 /// </summary>
 /// <param name="selector">CSS选择器表达式</param>
 /// <returns>符合选择器要求的元素</returns>
 protected IEnumerable <IHtmlElement> Find(string selector)
 {
     return(Document.Find(selector));
 }
Example #18
0
    /// <summary>
    /// 这个方法是用来添加<![CDATA[<meta name="generator" value="jumony" />]]>元素的。
    /// </summary>
    private void AddGeneratorMetaData( IHtmlDocument document )
    {
      var modifier = document.DomModifier;
      if ( modifier != null )
      {
        var header = document.Find( "html head" ).FirstOrDefault();

        if ( header != null )
        {

          var metaElement = modifier.AddElement( header, "meta" );

          metaElement.SetAttribute( "name", "generator" );
          metaElement.SetAttribute( "content", "Jumony" );
        }
      }
    }
Example #19
0
    public static IEnumerable<LinkedResource> GetResources( IHtmlDocument document )
    {
      foreach ( var element in document.Find( "[src]" ) )
      {
        var attribute = element.Attribute( "src" );

        var value = attribute.Value();

        if ( string.IsNullOrWhiteSpace( value ) )
          continue;

        Uri resourceUrl;
        if ( !Uri.TryCreate( document.DocumentUri, value, out resourceUrl ) )
          continue;

        yield return LoadResource( resourceUrl );
      }
    }
Example #20
0
    /// <summary>
    /// 清除文档中所有的资源文件引用
    /// </summary>
    /// <param name="document">要清除资源文件引用的文档</param>
    /// <param name="headScopeOnly">是否仅清除 &lt;head&gt; 元素内部的引用</param>
    public void ClearAllReference( IHtmlDocument document, bool headScopeOnly = true )
    {
      if ( document == null )
        return;


      if ( headScopeOnly )
        document.Find( "head link[rel=stylesheet][href$=.css], head script[src$=.js]" ).Remove();
      else
        document.Find( "link[rel=stylesheet][href$=.css], script[src$=.js]" ).Remove();
    }