Exemplo n.º 1
0
        public override SPCoder.Utils.Nodes.BaseNode GetSPStructure(string siteUrl)
        {
            if (string.IsNullOrEmpty(siteUrl))
            {
                siteUrl = "https://www.dotnetperls.com/";
            }
            if (!siteUrl.EndsWith("/"))
            {
                siteUrl += "/";
            }
            base.Endpoint = siteUrl;
            HtmlAgilityPack.HtmlWeb.PreRequestHandler handler = delegate(HttpWebRequest request)
            {
                request.Headers[HttpRequestHeader.AcceptEncoding] = "gzip, deflate";
                request.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip;
                request.CookieContainer        = new System.Net.CookieContainer();
                return(true);
            };
            page.PreRequest += handler;

            BaseNode rootNode = new PageNode();

            rootNode.NodeConnector = this;
            rootNode.IconPath      = "dnp.png";
            rootNode.Title         = RootNodeTitle;
            rootNode.LoadedData    = true;

            Visit(page, links, siteUrl, "", rootNode, rootNode);
            return(rootNode);
        }
Exemplo n.º 2
0
        public override SPCoder.Utils.Nodes.BaseNode GetSPStructure(string siteUrl)
        {
            BaseNode rootNode = new PageNode();

            HtmlWeb page = new HtmlWeb();

            HtmlAgilityPack.HtmlWeb.PreRequestHandler handler = delegate(HttpWebRequest request)
            {
                request.Headers[HttpRequestHeader.AcceptEncoding] = "gzip, deflate";
                request.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip;
                request.CookieContainer        = new System.Net.CookieContainer();
                return(true);
            };
            page.PreRequest += handler;

            Document = page.Load(siteUrl);
            page.Get(siteUrl, "/");

            rootNode.Title    = RootNodeTitle + page.ResponseUri.Host.ToString();
            rootNode.IconPath = "html.png";

            BaseNode my = new PageNode(Document.DocumentNode);

            my.RootNode   = rootNode;
            my.ParentNode = rootNode;
            my.Title      = Document.DocumentNode.Name;
            my.SPObject   = Document;

            //return rootNode;
            //rootNode.SPObject = site;
            doPageNodes(Document.DocumentNode, rootNode, rootNode);
            return(rootNode);
        }
Exemplo n.º 3
0
        public static List <Job> GetJobs(string url, SourceType sourceType)
        {
            var htmlWeb = new HtmlWeb
            {
                OverrideEncoding = Encoding.GetEncoding("UTF-8")
            };
            var jobs = new List <Job>();

            switch (sourceType)
            {
            case SourceType.ZLZP:
            {
                HtmlWeb.PreRequestHandler preRequestHandler = new HtmlAgilityPack.HtmlWeb.PreRequestHandler((request) =>
                    {
                        request.Headers[HttpRequestHeader.AcceptEncoding] = "gzip, deflate";
                        request.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip;
                        request.CookieContainer        = new System.Net.CookieContainer();
                        return(true);
                    });
                htmlWeb.PreRequest += preRequestHandler;
                HtmlDocument response = htmlWeb.Load(url);
                var          tables   = response.DocumentNode.SelectNodes("//*[@id='newlist_list_content_table']/table[@class='newlist']");
                if (tables != null)
                {
                    Job job = null;
                    foreach (var table in tables)
                    {
                        job = new Job();
                        //item.SelectSingleNode("tr[1]/td[@class='zwyx']").InnerText
                        //"6001-8000"
                        //item.SelectSingleNode("tr[1]/td[@class='zwmc']/div/a").InnerText
                        //"C#/Asp.Net软件工程师,研发工程师"
                        //item.SelectSingleNode("tr[1]/td[@class='gzdd']").InnerText
                        //"北京"
                        //item.SelectSingleNode("tr[1]/td[@class='gxsj']").InnerText
                        //"最新"
                        //item.SelectSingleNode("tr[1]/td[@class='gsmc']/a[1]").InnerText
                        //"北京乐鸟科技有限公司"
                        job.Name = table.SelectSingleNode("tr[1]/td[@class='zwmc']/div/a")?.InnerText;
                        if (job.Name == null)
                        {
                            continue;
                        }
                        job.Link    = table.SelectSingleNode("tr[1]/td[@class='zwmc']/div/a")?.Attributes["href"]?.Value;
                        job.City    = table.SelectSingleNode("tr[1]/td[@class='gzdd']")?.InnerText;
                        job.Date    = table.SelectSingleNode("tr[1]/td[@class='gxsj']")?.InnerText;
                        job.Company = table.SelectSingleNode("tr[1]/td[@class='gsmc']/a[1]")?.InnerText;
                        job.Wages   = table.SelectSingleNode("tr[1]/td[@class='zwyx']")?.InnerText;
                        job.Source  = "智联招聘";

                        jobs.Add(job);
                    }
                }
                break;
            }
            }
            return(jobs);
        }
Exemplo n.º 4
0
        /// <summary>
        /// 根据url请求,返回详细信息
        /// </summary>
        /// <param name="url"></param>
        /// <param name="type"></param>
        /// <returns></returns>
        public string GetUrlInfo(string url, DataType type)
        {
            var ulS = string.Empty;

            switch (type)
            {
            case DataType.智联招聘:
                #region 问题:“gzip”不是受支持的编码名 的处理方法  http:    //www.cnblogs.com/soundcode/p/3785152.html
                HtmlAgilityPack.HtmlWeb.PreRequestHandler handler = delegate(HttpWebRequest request)
                {
                    request.Headers[HttpRequestHeader.AcceptEncoding] = "gzip, deflate";
                    request.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip;
                    request.CookieContainer        = new System.Net.CookieContainer();
                    return(true);
                };
                htmlWeb.PreRequest += handler;
                #endregion
                htmlWeb.OverrideEncoding = Encoding.GetEncoding("UTF-8");
                HtmlAgilityPack.HtmlDocument response = htmlWeb.Load(url);

                var fuli    = response.DocumentNode.SelectNodes("/html/body/div[3]/div[1]/div[1]/div");
                var jiben   = response.DocumentNode.SelectNodes("/html/body/div[4]/div[1]/ul");
                var miaoshu = response.DocumentNode.SelectNodes("/html/body/div[4]/div[1]/div[1]/div/div[1]");

                if (fuli != null && fuli.Count >= 1 && !string.IsNullOrEmpty(fuli[0].InnerText.Trim()))
                {
                    ulS += "<h3>福利诱惑:</h3>" + fuli[0].InnerText;
                }
                if (jiben != null && jiben.Count >= 1 && !string.IsNullOrEmpty(jiben[0].InnerText.Trim()))
                {
                    ulS += "<h3>基本信息:</h3>" + jiben[0].InnerText;
                }
                if (miaoshu != null && miaoshu.Count >= 1 && !string.IsNullOrEmpty(miaoshu[0].InnerText.Trim()))
                {
                    ulS += "<h3>职位描述:</h3>" + miaoshu[0].InnerText;
                }
                break;

            case DataType.猎聘网:
                htmlWeb.OverrideEncoding = Encoding.GetEncoding("UTF-8");
                response = htmlWeb.Load(url);
                //--基本信息
                var jbinfo = response.DocumentNode.SelectNodes("//*[@id='job-view-enterprise']/div[1]/div[1]/div[1]/div[3]/div") ??
                             response.DocumentNode.SelectNodes("//*[@id='job-hunter']/div[1]/div[1]/div[1]/div[3]/div");
                //职位描述
                var selectNodes = response.DocumentNode.SelectNodes("//*[@id='job-hunter']/div[1]/div[1]/div[1]/div[4]")
                                  ?? response.DocumentNode.SelectNodes("//*[@id='job-view-enterprise']/div[1]/div[1]/div[1]/div[4]");
                //岗位要求
                var ganwei = response.DocumentNode.SelectNodes("//*[@id='job-hunter']/div[1]/div[1]/div[1]/div[5]/div")
                             ?? response.DocumentNode.SelectNodes("//*[@id='job-view-enterprise']/div[1]/div[1]/div[1]/div[5]/div");
                ulS = "<h3>基本信息:</h3>" + jbinfo[0].InnerText +
                      "<h3>职位描述:</h3>" + selectNodes[0].InnerText +
                      "<h3>岗位要求:</h3>" + ganwei[0].InnerText;
                break;

            case DataType.前程无忧:
                htmlWeb.OverrideEncoding = Encoding.GetEncoding("GBK");
                response = htmlWeb.Load(url);
                //--
                ulS = "<h3>基本信息:</h3>" + response.DocumentNode.SelectNodes("/html/body/div[3]/div/div[2]/table[1]/tr[3]/td[1]")[0].InnerText +
                      "<h3>职位描述:</h3>" + response.DocumentNode.SelectNodes("/html/body/div[3]/div/div[2]/div[1]/div[2]/div/table")[0].InnerText;

                break;

            case DataType.拉勾网:
                htmlWeb.OverrideEncoding = Encoding.GetEncoding("UTF-8");
                response = htmlWeb.Load(url);
                ulS      = "<h3>基本信息:</h3>" + response.DocumentNode.SelectNodes("//*[@id='container']/div[1]/div[1]/dl/dd[1]")[0].InnerText +
                           "<h3>职位描述:</h3>" + response.DocumentNode.SelectNodes("//*[@id='container']/div[1]/div[1]/dl/dd[2]")[0].InnerText;
                break;
            }

            return(ulS.ToJson());
        }