示例#1
0
        //新浪
        public string GetSinaBody(NodeList nodelist)
        {
            string result = "";

            for (int i = 0; i < nodelist.Size(); i++)
            {
                Div    div = (Div)nodelist[i];
                string id  = div.GetAttribute("id");
                string css = div.GetAttribute("class");
                id  = string.IsNullOrEmpty(id) ? "" : id.ToLower();
                css = string.IsNullOrEmpty(css) ? "" : css.ToLower();
                if (id.Equals("artibody"))//普通文章
                {
                    result = div.ToHtml(); break;
                }
                else if (css.Contains("articalcontent"))//历史板块文章,博客内容采集
                {
                    result = div.ToHtml(); break;
                }
                //else if (id.Equals("si_cont"))//图库,这个是其加载完成后才会载入的,需要用其他方式获取Html,如Iframe方法,ONLoad后再取其值
                //{
                //    NodeList imgList = GetTagList(div.Children, "IMG");
                //}
            }
            return(result);
        }
示例#2
0
        //------------------
        //获取新浪的内容页,id=artibody
        //------------------
        public string GetSinaArticle(string html)
        {
            string   titleTlp = "<h1 style='text-align: center;'>{0}</h1>";
            HtmlPage page     = GetPage(html);
            NodeList nodelist = GetTagList(html, "div");
            string   result   = "";

            for (int i = 0; i < nodelist.Size(); i++)
            {
                Div    div = (Div)nodelist[i];
                string id  = div.GetAttribute("id");
                string css = div.GetAttribute("class");
                id  = string.IsNullOrEmpty(id) ? "" : id.ToLower();
                css = string.IsNullOrEmpty(css) ? "" : css.ToLower();
                if (id.Equals("artibody"))//普通文章
                {
                    result = string.Format(titleTlp, page.Title) + div.ToHtml(); break;
                }
                else if (css.Contains("articalcontent"))//历史板块文章,博客内容采集
                {
                    result = string.Format(titleTlp, page.Title) + div.ToHtml(); break;
                }
                //else if (id.Equals("si_cont"))//图库,这个是其加载完成后才会载入的,需要用其他方式获取Html,如Iframe方法,ONLoad后再取其值
                //{
                //    NodeList imgList = GetTagList(div.Children, "IMG");
                //}
            }
            if (string.IsNullOrEmpty(result))
            {
                result = GetBodyHtml(html);
            }
            result = ReplaceChinaChar(result);
            return(result);
        }
示例#3
0
        //腾讯
        public string GetQQBody(NodeList nodelist)
        {
            string result = "";

            for (int i = 0; i < nodelist.Size(); i++)
            {
                Div    div = (Div)nodelist[i];
                string id  = div.GetAttribute("id");
                string css = div.GetAttribute("class");
                id  = string.IsNullOrEmpty(id) ? "" : id.ToLower();
                css = string.IsNullOrEmpty(css) ? "" : css.ToLower();
                if (id.Equals("Cnt-Main-Article-QQ".ToLower()))//要闻
                {
                    result = div.ToHtml(); break;
                }
                else if (id.Equals("articleContent".ToLower()))//今日话题
                {
                    result = div.ToHtml(); break;
                }
                else if (css.Contains("article_mod".ToLower()))
                {
                    result = div.ToHtml(); break;
                }
                else if (id.Equals("slide_bigimage_temp".ToLower()))//单图浏览
                {
                    result = div.ToHtml(); break;
                }
                else if (css.Contains("box-left"))
                {
                    result = div.ToHtml(); break;
                }
            }
            return(result);
        }
示例#4
0
        //cctv
        public string GetCctvBody(NodeList nodelist)
        {
            string result = "";

            for (int i = 0; i < nodelist.Size(); i++)
            {
                Div    div = (Div)nodelist[i];
                string id  = div.GetAttribute("id");
                string css = div.GetAttribute("class");
                id  = string.IsNullOrEmpty(id) ? "" : id.ToLower();
                css = string.IsNullOrEmpty(css) ? "" : css.ToLower();
                if (id.Equals("content_body"))//少儿
                {
                    result = div.ToHtml();
                }
                else if (id.Equals("content"))//电影、彩票
                {
                    result = div.ToHtml();
                }
                else if (css.Contains("col_w660"))//新闻、国内、国际;经济、军事、评论、娱乐、少儿、书画、旅游、汽车、时尚、历史、农业、健康、综艺、戏曲、音乐、
                {
                    result = div.ToHtml();
                }
                else if (css.Contains("col_650"))//评论
                {
                    result = div.ToHtml();
                }
                else if (css.Contains("textcontent"))//游戏
                {
                    result = div.ToHtml();
                }
            }

            return(result);
        }
示例#5
0
        //网易
        public string Get163Body(NodeList nodelist)
        {
            string result = "";

            for (int i = 0; i < nodelist.Size(); i++)
            {
                Div    div = (Div)nodelist[i];
                string id  = div.GetAttribute("id");
                string css = div.GetAttribute("class");
                id  = string.IsNullOrEmpty(id) ? "" : id.ToLower();
                css = string.IsNullOrEmpty(css) ? "" : css.ToLower();
                if (id.Equals("endText".ToLower()))//普通内容
                {
                    result = div.ToHtml(); break;
                }
                else if (css.Contains("con_1".ToLower()))
                {
                    result = div.ToHtml(); break;
                }
                else if (css.Contains("vdb-plr8".ToLower()))
                {
                    result = div.ToHtml(); break;
                }
                else if (css.Contains("m-introbox".ToLower()))
                {
                    result = div.ToHtml(); break;
                }
                else if (css.Contains("txtcont".ToLower()))
                {
                    result = div.ToHtml(); break;
                }
            }
            return(result);
        }
示例#6
0
        //新华网
        public string getXinhuanetBody(NodeList nodelist)
        {
            string result = "";

            for (int i = 0; i < nodelist.Size(); i++)
            {
                Div    div = (Div)nodelist[i];
                string id  = div.GetAttribute("id");
                string css = div.GetAttribute("class");

                id  = string.IsNullOrEmpty(id) ? "" : id.ToLower();
                css = string.IsNullOrEmpty(css) ? "" : css.ToLower();
                if (id.Equals("article")) //时政、法治;国际;财经、汽车、娱乐、时尚、信息化、人事、理论、港澳、日本、教育、科技、能源、食品、旅游、健康、公益、舆情、人才、更多
                {
                    result = div.ToHtml();
                }
                else if (id.Equals("content"))//国际、华人、军事、房产、体育、资料、高层、港澳、台湾、新加坡
                {
                    result = div.ToHtml();
                }
                else if (id.Equals("matrix"))//博客
                {
                    result = div.ToHtml();
                }
                else if (id.Equals("contentblock"))//资料
                {
                    result = div.ToHtml();
                }
                else if (id.Equals("news_content"))//炫空间
                {
                    result = div.ToHtml();
                }
                else if (id.Equals("detail-content"))//马来西亚
                {
                    result = div.ToHtml();
                }
                else if (id.Equals("message_1"))//读史
                {
                    result = div.ToHtml();
                }
                else if (css.Contains("main pagewidth")) //地方
                {
                    result = div.ToHtml();
                }
                else if (css.Contains("main_content_wrap"))
                {
                    result = div.ToHtml();
                }
                else if (css.Contains("wrap"))//微观中国
                {
                    result = div.ToHtml();
                }
                //else if (css.Equals("content"))//信息化
                //{
                //    result = div.ToHtml();
                //}
            }

            return(result);
        }
示例#7
0
        //人民网
        public string getPeopleBody(NodeList nodelist)
        {
            string result = "";

            for (int i = 0; i < nodelist.Size(); i++)
            {
                Div    div = (Div)nodelist[i];
                string id  = div.GetAttribute("id");
                string css = div.GetAttribute("class");
                id  = string.IsNullOrEmpty(id) ? "" : id.ToLower();
                css = string.IsNullOrEmpty(css) ? "" : css.ToLower();
                if (id.Equals("p_content"))//新闻:时政、社会、法治;地方:领导、旅游、人物;财经:理财、股票、能源;央企:环保、公益、彩票;教育:科技、文史、收藏、娱乐;观点:传媒、舆情;国际:台湾、港澳、军事;汽车:IT、通信、家电;房产:食品、健康、时尚;文化:读书、体育、游戏、红木、棋牌
                {
                    result = div.ToHtml();
                }
                else if (id.Equals("ft_contwrap"))//访谈
                {
                    result = div.ToHtml();
                }
                else if (css.Contains("text_show"))//理论
                {
                    result = div.ToHtml();
                }
            }

            return(result);
        }
示例#8
0
        //搜狐
        public string GetSohuBody(NodeList nodelist)
        {
            string result = "";

            for (int i = 0; i < nodelist.Size(); i++)
            {
                Div    div = (Div)nodelist[i];
                string id  = div.GetAttribute("id");
                string css = div.GetAttribute("class");
                id  = string.IsNullOrEmpty(id) ? "" : id.ToLower();
                css = string.IsNullOrEmpty(css) ? "" : css.ToLower();
                if (id.Equals("entry"))//博客
                {
                    result = div.ToHtml();
                }
                else if (css.Contains("text clear"))//新闻、军事、文化、历史、体育、社会评论、读书、财经、股票、科技、汽车、时尚、健康、教育、母婴、旅游、美食、星座
                {
                    result = div.ToHtml();
                }
                else if (css.Contains("new-detail-left"))//房产
                {
                    //result = div.ToHtml();
                }
            }
            return(result);
        }
示例#9
0
        //凤凰网
        public string GetFengBody(NodeList nodelist)
        {
            //HasAttributeFilter filter = new HasAttributeFilter("id", "artical_real");
            //nodelist = nodelist.ExtractAllNodesThatMatch(filter);
            //return nodelist.AsHtml();
            string result = "";

            for (int i = 0; i < nodelist.Size(); i++)
            {
                Div    div = (Div)nodelist[i];
                string id  = div.GetAttribute("id");
                string css = div.GetAttribute("class");
                id  = string.IsNullOrEmpty(id) ? "" : id.ToLower();
                css = string.IsNullOrEmpty(css) ? "" : css.ToLower();
                if (id.Equals("artical_real"))//资讯、财经、娱乐、体育、时尚、科技、读书、教育、历史、军事、佛教、旅游、游戏、数码、健康、亲子、家居、星座
                {
                    result = div.ToHtml(); break;
                }
                else if (id.Equals("artical"))//文化
                {
                    result = div.ToHtml();
                }
                else if (id.Equals("blog_article_content"))//博客
                {
                    result = div.ToHtml();
                }
                else if (css.Contains("gc-art-body"))//汽车的介绍
                {
                    result = div.ToHtml(); break;
                }
                else if (css.Contains("ce-main mt12"))//汽车的评价规范
                {
                    result = div.ToHtml(); break;
                }
                else if (css.Contains("arl-mian fl"))//汽车的文章
                {
                    result = div.ToHtml();
                }
                else if (css.Contains("article") && id != "")//房产的文章
                {
                    result = div.ToHtml();
                }
                else if (css.Contains("e_i_deta_LX"))//彩票
                {
                    result = div.ToHtml();
                }
            }
            return(result);
        }
示例#10
0
        private void TryToScrapeAccountInformation()
        {
            // Check if the frame we're waiting for has loaded
            var Frames = WebBrowser.Document.GetElementsByTagName("frame");

            foreach (HtmlElement Frame in Frames)
            {
                if (Frame.Name.ToLower() == "tddetails")
                {
                    bool FoundAccountDivs = false;

                    // We found our frame, check for the account details divs
                    HtmlWindow DetailsFrame = Frame.Document.Window.Frames["tddetails"];
                    var        Divs         = DetailsFrame.Document.GetElementsByTagName("div");
                    foreach (HtmlElement Div in Divs)
                    {
                        switch (Div.GetAttribute("className"))
                        {
                        case "td-target-banking":
                            // Banking accounts
                            ParseAccounts(Div, false);
                            FoundAccountDivs = true;
                            break;

                        case "td-target-creditcards":
                            // Credit accounts, so flip the sign on the balance
                            ParseAccounts(Div, true);
                            FoundAccountDivs = true;
                            break;

                        case "td-target-investing":
                            // Investment accounts
                            ParseAccounts(Div, false);
                            FoundAccountDivs = true;
                            break;
                        }
                    }

                    if (FoundAccountDivs)
                    {
                        // And now that we've parsed the account details, let's log out
                        var Anchors = DetailsFrame.Document.GetElementsByTagName("a");
                        foreach (HtmlElement Anchor in Anchors)
                        {
                            if (!string.IsNullOrWhiteSpace(Anchor.InnerText) && (Anchor.InnerText.Trim().ToLower() == "logout"))
                            {
                                Anchor.InvokeMember("click");
                            }
                        }

                        // Presumably we found a logout button above, if not we'll still switch states and hopefully
                        // the user will manually click the Logout button after waiting a few seconds
                        _ScraperState = ScraperState.WaitingForLogout;
                    }

                    break;
                }
            }
        }
示例#11
0
        //百度百家
        public string GetBaijiaBody(NodeList nodelist)
        {
            string result = "";

            for (int i = 0; i < nodelist.Size(); i++)
            {
                Div    div = (Div)nodelist[i];
                string id  = div.GetAttribute("id");
                string css = div.GetAttribute("class");
                id  = string.IsNullOrEmpty(id) ? "" : id.ToLower();
                css = string.IsNullOrEmpty(css) ? "" : css.ToLower();
                if (id.Equals("page"))//百度百家
                {
                    result = div.ToHtml();
                }
            }

            return(result);
        }
示例#12
0
文件: Program.cs 项目: TabNoc/PiWeb
            private Element CreateAppElement()
            {
                appElement.AppendChild(_list);

                List <string> pathList = new List <string>()
                {
                    "display-alert",
                    "entry-listview",
                    "shared-button",
                    "button",
                    "todo",
                    "draw",
                    "files",
                    "dotmatrixclock",
                    "editor",
                    "monkeys",
                    "refreshlistview",
                    "searchbar",
                    "slider",
                    "switch-listview",
                    "timepicker",
                    "tipcalc",
                    "weatherapp",
                    "xuzzle",
                    "webview",
                    "picker"
                };

                foreach (string path in pathList)
                {
                    Div pathElement = new Div
                    {
                        ClassName = "list-group-item",
                        Style     =
                        {
                            Cursor     = "pointer",
                            FontWeight = "bold"
                        },
                        Text = path
                    };
                    pathElement.SetAttribute("href", pathElement.Document.Window.Location + path);
                    pathElement.Click += (sender, args) =>
                    {
                        Console.WriteLine(pathElement.Document.Window.Location);
                        pathElement.Document.Window.Location = pathElement.GetAttribute("href", "");
                    };
                    appElement.AppendChild(pathElement);
                }
                return(appElement);
            }
示例#13
0
        public DataTable GetWXBySogou(string key, int count, DateTime time)
        {
            string baseurl = "http://weixin.sogou.com/weixin?type=2&query={0}&fr=sgsearch&ie=utf8&_ast=1433216256&_asf=null&w=01059900&cid=null&page={1}";

            if (string.IsNullOrEmpty(key))
            {
                return(null);
            }
            DataTable dt = GetStruct(key); DateTime cdate = DateTime.Now;

            for (int p = 0; p * 10 < count; p++)
            {
                string   url  = string.Format(baseurl, HttpUtility.UrlEncode(key), p + 1);
                string   html = ieHelp.GetHtmlFromSite(url);
                HtmlPage page = htmlHelp.GetPage(html);
                //int cpage = GetCurPage(page.Body); if (cpage <= p) { break; }
                Winista.Text.HtmlParser.Util.NodeList nodes = page.Body.ExtractAllNodesThatMatch(new HasAttributeFilter("class", "wx-rb wx-rb3"), true);
                if (nodes.Count <= 0)
                {
                    break;
                }
                //将其序列化为模型并存入相应类中
                for (int i = 0; i < nodes.Count; i++)
                {
                    Winista.Text.HtmlParser.Util.NodeList cnodes = nodes[i].Children;
                    DataRow    dr      = dt.NewRow();
                    NodeFilter f_title = new AndFilter(new HasParentFilter(new TagNameFilter("h4")), new TagNameFilter("a"));
                    ATag       a       = (ATag)cnodes.ExtractAllNodesThatMatch(f_title, true)[0];
                    dr["Title"] = a.StringText;
                    dr["Link"]  = a.Link;
                    f_title     = new AndFilter(new HasAttributeFilter("id", "weixin_account"), new TagNameFilter("a"));
                    ATag author_a = (ATag)cnodes.ExtractAllNodesThatMatch(f_title, true)[0];
                    dr["Author"] = author_a.GetAttribute("title");
                    f_title      = new HasAttributeFilter("class", "s-p");
                    Div    div      = (Div)cnodes.ExtractAllNodesThatMatch(f_title, true)[0];
                    string unixtime = div.GetAttribute("t");
                    dr["Cdate"]  = GetDateTime(unixtime);
                    dr["Day"]    = GetDateTime(unixtime).Day;
                    dr["Source"] = "微信";
                    dt.Rows.Add(dr);
                }
                if (cdate < time)
                {
                    break;
                }
            }
            return(dt);
        }
示例#14
0
 [Then(@"Check no null attribute (.*)")] public void CheckAttributeNotNull(string name)
 {
     Assert.NotEmpty(CurrentComponent.GetAttribute(name));
 }
示例#15
0
文件: MeetGz.cs 项目: SHNXJMG/Small
        protected override IList ExecuteCrawl(bool crawlAll)
        {
            IList list = new List<MeetInfo>();
            string html = string.Empty;
            string cookiestr = string.Empty;
            string viewState = string.Empty; 
            string eventValidation = string.Empty;
            string arguments = "%5B%7B%22jykssj%22%3A%222014-12-10+00%3A00%3A00%22%2C%22jyjssj%22%3A%222014-12-30+23%3A59%3A59%22%2C%22bmkey%22%3A%22%22%2C%22cdzj%22%3A%22CD0036%22%2C%22cdmc%22%3A%22%E7%AC%AC01%E8%AF%84%E6%A0%87%E5%AE%A4%22%2C%22rnrs%22%3A%229%22%2C%22zt%22%3A%2202%22%2C%22cdmj%22%3A%22105%22%2C%22tyy%22%3A%2201%22%2C%22dzbb%22%3A%2201%22%2C%22mkf%22%3A%2201%22%2C%22spcqsb%22%3A%2202%22%2C%22dlkt%22%3A%2202%22%2C%22sfsydzkpb%22%3A%2201%22%2C%22dn%22%3A%2210%22%2C%22ssbm%22%3A%22%E4%B8%AD%E5%BF%83%E6%9C%AC%E9%83%A8%22%2C%22cdlx%22%3A%22%E8%AF%84%E6%A0%87%E5%AE%A4%22%2C%22gm%22%3A%22%E5%A4%A7%22%2C%22dd%22%3A%22%E5%9B%9B%E6%A5%BC%22%2C%22zw%22%3A%22null%22%2C%22zjzy%22%3A%22null%22%2C%22zjdn%22%3A%22null%22%2C%22dldn%22%3A%22null%22%2C%22jhyzy%22%3A%22null%22%2C%22dyj%22%3A%2202%22%2C%22xsq%22%3A%2202%22%2C%22ipdjj%22%3A%2202%22%2C%22znjhp%22%3A%2202%22%2C%22yzj%22%3A%2202%22%2C%22sfxsmx%22%3A%2202%22%7D%5D";
            string method = "findPlaceByCQSCD";

            try
            {
                cookiestr = System.Web.HttpUtility.UrlDecode(arguments);
            }
            catch (Exception ex){ }
            NameValueCollection nvc = this.ToolWebSite.GetNameValueCollection(new string[]{
            "service","arguments","method"
            },
                new string[]{
                "PlaceLentManagerBS",
                cookiestr,
                "findPlaceByCQSCD"
                });
            html = this.ToolWebSite.GetHtmlByUrl("http://oa.gzzb.gd.cn/gcpbcOA/json/", nvc, Encoding.UTF8);
            try
            {
                html = this.ToolWebSite.GetHtmlByUrl(this.SiteUrl, Encoding.Default);
            }
            catch
            {
                return list;
            }
            Parser parser = new Parser(new Lexer(html));
            NodeList pageNode = parser.ExtractAllNodesThatMatch(new AndFilter(new HasParentFilter(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("id", "myTab0_Content0")), true), new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "gridtable"))));
            if (pageNode != null && pageNode.Count > 0)
            {
                TableTag table = pageNode[0] as TableTag;
                foreach (TableRow row in table.Rows)
                {
                    parser = new Parser(new Lexer(row.ToHtml()));
                    NodeList tableNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("table"));
                    parser.Reset();
                    NodeList hNode = parser.ExtractAllNodesThatMatch(new TagNameFilter("h5"));
                    if (hNode != null && hNode.Count > 0 && tableNode != null && tableNode.Count > 0)
                    {
                        string address = hNode[0].ToNodePlainString();
                        TableTag cTable = tableNode[0] as TableTag;
                        foreach (TableRow cRow in cTable.Rows)
                        {
                            foreach (TableColumn col in cRow.Columns)
                            {
                                parser = new Parser(new Lexer(col.ToHtml()));
                                NodeList divNode = parser.ExtractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "allcd")));
                                if (divNode != null && divNode.Count > 0)
                                {
                                    Div div = divNode[0] as Div;
                                    string url = div.GetAttribute("id");

                                }
                            }
                        }
                    }
                }
            }
            return list;
        }