Ejemplo n.º 1
0
        /// <summary>
        /// 检查ip有效性
        /// </summary>
        /// <param name="p"></param>
        /// <returns></returns>
        private bool checkVaild(Proxy p)
        {
            var flag = false;

            try
            {
                _logger.Debug($"正在检查IP:{p.ip}:{p.port}");
                var rand = new Random();
                var n    = rand.Next(100000);
                var html = NetHttpHelper.HttpGetRequest("http://2018.ip138.com/ic.asp?" + n, out int status, 2000, p.ip, p.port);
                if (status == 200)
                {
                    //检查ip是否有效
                    if (html.Contains(p.ip) && !html.Contains("无效"))
                    {
                        flag = true;
                        _logger.Debug($"IP:{p.ip}:{p.port} 有效");
                    }
                }
            }
            catch (Exception)
            {
            }
            return(flag);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// 获取评论信息
        /// </summary>
        /// <param name="pId"></param>
        /// <returns></returns>
        public static List <CommentInfo> GetProjectCommenInfo(string city_url, string pId)
        {
            if (city_url[city_url.Length - 1] != '/')
            {
                city_url += "/";
            }
            var cinfo = new List <CommentInfo>();
            int page  = 1;
            //循环获取评论
            bool flag = true;

            while (flag)
            {
                var url    = $"{city_url}loupan/dianping-{pId}.htmls/?from=commview_dp_moretop&p={page}";
                int status = 0;
                var html   = NetHttpHelper.HttpGetRequest(url, out status);
                if (status == 200)
                {
                    HtmlDocument doc = new HtmlDocument();
                    doc.LoadHtml(html);
                    //查找评论用户
                    var author  = doc.DocumentNode.SelectNodes(".//span[@class='author']");
                    var content = doc.DocumentNode.SelectNodes(".//h4[@class='rev-subtit all-text']");
                    var date    = doc.DocumentNode.SelectNodes(".//span[@class='date']");
                    if (author != null && author.Count > 0)
                    {
                        for (int i = 0; i < author.Count; i++)
                        {
                            var cstr = content[i].InnerText.Substring(0, content[i].InnerText.Length - 2);
                            cstr = cstr.Replace("&hellip;", "…").Replace("<br/>", " "); //这里不做换行处理只加空格
                            cinfo.Add(new CommentInfo()
                            {
                                author  = author[i].InnerText,
                                content = cstr,
                                date    = DateTime.Parse(date[i].InnerText)
                            });
                        }
                    }
                    else
                    {
                        break;
                    }
                    //查找是否还有下一页 如果没有则证明是最后一页
                    var next_page = doc.DocumentNode.SelectNodes(".//a[@class='next-page next-link']");
                    if (next_page == null || next_page.Count == 0)
                    {
                        break;
                    }
                }

                page++;
                Thread.Sleep(20);
            }


            return(cinfo);
        }
Ejemplo n.º 3
0
        private void GetNewIpList(PageParam pp)
        {
            var index = pp.pageIndex;
            var size  = pp.pageSize;

            for (int i = index; i < index + size; i++)
            {
                //高匿名代理
                var url = "https://www.xicidaili.com/nn/" + i;
                try
                {
                    var html = NetHttpHelper.HttpGetRequest(url, out int status, 5000);
                    if (status == 200)
                    {
                        HtmlDocument doc = new HtmlDocument();
                        doc.LoadHtml(html);
                        var table_tr = doc.DocumentNode.SelectNodes(".//table[@id='ip_list']/tr");
                        if (table_tr != null)
                        {
                            //第一个是标题 这里从第二个开始
                            for (int j = 1; j < table_tr.Count; j++)
                            {
                                var tr_child   = table_tr[j].ChildNodes;
                                var ip_str     = tr_child[3].InnerText;
                                var port_str   = tr_child[5].InnerText;
                                var speed_html = tr_child[13].InnerHtml;
                                //提取速度
                                var    regexStr   = string.Format(" <{0}[^>]*?{1}=(['\"\"]?)(?<text>[^'\"\"\\s>]+)\\1[^>]*>", "div", "title");
                                Match  TitleMatch = Regex.Match(speed_html, regexStr, RegexOptions.IgnoreCase);
                                string speed_text = TitleMatch.Groups["text"].Value;
                                speed_text = speed_text.Replace("秒", "");
                                int.TryParse(port_str, out int port);
                                float.TryParse(speed_text, out float speed);
                                //只提取速度<=2s的
                                if (speed <= 2)
                                {
                                    //检查ip有效性
                                    Proxy px = new Proxy(ip_str, port);
                                    if (checkVaild(px))
                                    {
                                        //写入列表 稍后一并写入文件
                                        _proxyList.Add(ip_str + "," + port_str);
                                    }
                                }
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    //获取代理ip出错
                    _logger.Info("获取代理ip:" + url + " 出错");
                    _logger.Error(ex);
                }
            }
        }
Ejemplo n.º 4
0
        /// <summary>
        /// 楼盘搜索
        /// </summary>
        /// <param name="text"></param>
        /// <returns></returns>
        public static SearchResult SearchHouse(string city_url, string text)
        {
            if (city_url[city_url.Length - 1] != '/')
            {
                city_url += "/";
            }
            var result = new SearchResult();
            var url    = $"{city_url}a/brand/?kw={text}";
            int status = 0;
            var html   = NetHttpHelper.HttpGetRequest(url, out status);

            if (status == 200)
            {
                result = JsonConvert.DeserializeObject <SearchResult>(html);
            }
            return(result);
        }
Ejemplo n.º 5
0
        /// <summary>
        /// 检查ip是否有效
        /// </summary>
        /// <param name="p"></param>
        /// <returns></returns>
        private bool checkIP(Proxy p)
        {
            var flag = false;

            try
            {
                var html = NetHttpHelper.HttpGetRequest("https://www.baidu.com", out int status, 2000, p.ip, p.port);
                if (status == 200)
                {
                    if (!html.Contains("无效"))
                    {
                        flag = true;
                    }
                }
            }
            catch (Exception)
            {
            }
            return(flag);
        }
Ejemplo n.º 6
0
        /// <summary>
        /// 获取html数据 统一处理超时、自动换ip
        /// </summary>
        /// <param name="url">请求url</param>
        /// <param name="proxy">默认代理</param>
        /// <returns></returns>
        private string GetHtml(string url, bool usingProxy = false)
        {
            string html = "";

            if (usingProxy)
            {
                _proxy = _proxyService.GetProxy();
            }
            //单次请求ip更换次数
            for (int i = 0; i < 20; i++)
            {
                try
                {
                    html = NetHttpHelper.HttpGetRequest(url, out int status, 5000, _proxy.ip, _proxy.port);
                    if (status == 200 && !string.IsNullOrWhiteSpace(html))
                    {
                        //证明被安居客限制了
                        if (html.Contains("<title>验证码</title>"))
                        {
                            _logger.Debug("ank_需要滑动验证");
                            //换ip
                            _proxy = _proxyService.GetProxy();
                            continue;
                        }
                        else
                        {
                            //终止循环 代表请求成功
                            break;
                        }
                    }
                }
                catch (Exception)
                {
                    //换ip
                    _proxy = _proxyService.GetProxy();
                }
            }
            return(html);
        }
Ejemplo n.º 7
0
        /// <summary>
        /// 获取楼盘信息
        /// </summary>
        /// <returns></returns>
        public static List <ProjectInfo> GetProjectInfo(string city_url, string name)
        {
            if (city_url[city_url.Length - 1] != '/')
            {
                city_url += "/";
            }
            List <ProjectInfo> list = new List <ProjectInfo>();
            var url    = city_url + "house/s/a9" + System.Web.HttpUtility.UrlEncode(name, System.Text.Encoding.GetEncoding("GB2312"));
            int status = 0;
            var html   = NetHttpHelper.HttpGetRequest(url, out status);

            if (status == 200)
            {
                HtmlDocument doc = new HtmlDocument();
                doc.LoadHtml(html);
                //获取楼盘列表
                var nodes_p_list = doc.DocumentNode.SelectNodes(".//div[@class='nlc_details']");
                if (nodes_p_list != null)
                {
                    foreach (var item in nodes_p_list)
                    {
                        //楼盘名称
                        var html_name = item.SelectSingleNode(".//div[@class='nlcd_name']").SelectSingleNode(".//a");
                        var pName     = html_name.InnerText.Trim().Replace("·", "");
                        var purl      = "";
                        if (html_name != null)
                        {
                            foreach (var att in html_name.Attributes)
                            {
                                if (att.Name == "href")
                                {
                                    purl = att.Value;
                                    break;
                                }
                            }
                        }
                        if (purl.Contains("?"))
                        {
                            var temp_url = purl.Split('?')[0];
                            if (temp_url[temp_url.Length - 1] != '/')
                            {
                                temp_url += "/";
                            }
                            purl = temp_url;
                        }
                        //楼盘地址
                        var pAddress = item.SelectSingleNode(".//div[@class='address']").InnerText.Trim().Replace("\n", "").Replace("\t", "");
                        var pId      = "";
                        var html_id  = item.SelectSingleNode(".//div[@class='duibi']");
                        if (html_id != null)
                        {
                            foreach (var att in html_id.Attributes)
                            {
                                if (att.Name == "onclick")
                                {
                                    //按照'分割
                                    var temp = att.Value.Split('\'');
                                    if (temp.Length >= 2)
                                    {
                                        pId = temp[1];
                                    }
                                    break;
                                }
                            }
                        }
                        //评论总数
                        var html_count = item.SelectSingleNode(".//span[@class='value_num']");
                        var count      = 0;
                        if (html_count != null)
                        {
                            var temp_count = html_count.InnerText;
                            var number     = System.Text.RegularExpressions.Regex.Replace(temp_count, @"[^0-9]+", "");
                            int.TryParse(number, out count);
                        }
                        var price_text = "";
                        var price_html = item.SelectSingleNode(".//div[@class='nhouse_price']");
                        if (price_html != null)
                        {
                            price_text = price_html.InnerText.Replace("\n", "").Replace("\t", "");
                        }
                        list.Add(new ProjectInfo()
                        {
                            name         = pName,
                            address      = pAddress,
                            url          = purl,
                            id           = pId,
                            commentCount = count,
                            price        = price_text
                        });
                    }
                }
            }

            return(list);
        }
Ejemplo n.º 8
0
        /// <summary>
        /// 获取楼盘信息
        /// </summary>
        /// <returns></returns>
        public static List <ProjectInfo> GetProjectInfo(string city_url, string name)
        {
            if (city_url[city_url.Length - 1] != '/')
            {
                city_url += "/";
            }
            List <ProjectInfo> list = new List <ProjectInfo>();
            var url    = city_url + "loupan/s?kw=" + name;
            int status = 0;
            var html   = NetHttpHelper.HttpGetRequest(url, out status);

            if (status == 200)
            {
                HtmlDocument doc = new HtmlDocument();
                doc.LoadHtml(html);
                var node = doc.DocumentNode.SelectNodes(".//div[@class='item-mod ']");  //原网页item-mod 后有个空格

                if (node != null)
                {
                    foreach (var item in node)
                    {
                        string pName     = "";
                        var    name_html = item.SelectSingleNode(".//span[@class='items-name']");
                        if (name_html != null)
                        {
                            pName = name_html.InnerText;
                        }
                        var url_html = item.SelectSingleNode(".//a[@class='address']");
                        var address  = "";
                        var purl     = "";
                        if (url_html != null)
                        {
                            address = url_html.InnerText.Replace("&nbsp;", " ");
                            address = address.TrimEnd().TrimStart();
                            foreach (var att in url_html.Attributes)
                            {
                                if (att.Name == "href")
                                {
                                    purl = att.Value;
                                    break;
                                }
                            }
                        }
                        var count_html = item.SelectSingleNode(".//span[@class='list-dp']");
                        int count      = 0;
                        if (count_html != null)
                        {
                            var countStr = count_html.InnerHtml.Replace("条点评", "");
                            int.TryParse(countStr, out count);
                        }
                        string projectId  = System.Text.RegularExpressions.Regex.Replace(purl, @"[^0-9]+", "");
                        var    price_html = item.SelectSingleNode(".//p[@class='price']");
                        var    price_text = "";
                        if (price_html != null)
                        {
                            price_text = price_html.InnerText;
                        }
                        list.Add(new ProjectInfo()
                        {
                            name         = pName,
                            commentCount = count,
                            id           = projectId,
                            url          = purl,
                            address      = address,
                            price        = price_text
                        });
                    }
                }
            }

            return(list);
        }