Пример #1
0
        /// <summary>
        /// 检测代理数据
        /// </summary>
        /// <returns></returns>
        public async Task RunAsync()
        {
            try
            {
                var tstr = $"{DateTime.Now:yyyy-MM-dd HH:mm:ss}";
                LoggerHelper.Write($"检测代理数据 {tstr}");

                var count = await _redis.ZCountAsync();

                for (long i = 0; i < count; i += Proxy.BATCH_TEST_SIZE)
                {
                    var start = i;
                    var stop = new long[] { i + Proxy.BATCH_TEST_SIZE, count }.Min();
                    LoggerHelper.Write($"{tstr} 正在测试第 {start + 1}-{stop} 个代理,共 {count} 个代理");
                    var batch = await _redis.ZBatchGet(start, stop);
                    await TestOneAsync(batch);

                    Thread.Sleep(1000);
                }

                LoggerHelper.Write($"检测代理数据 本次检测{count}条数据,时间:{DateTime.Now:yyyy-MM-dd HH:mm:ss}.");
            }
            catch (Exception ex)
            {
                LoggerHelper.Write(ex, $"检测代理数据 异常: {DateTime.Now:yyyy -MM-dd HH:mm:ss}");
            }
        }
Пример #2
0
        /// <summary>
        /// 代理数据抓取
        /// </summary>
        /// <returns></returns>
        public async Task RunAsync()
        {
            try
            {
                LoggerHelper.Write($"代理数据抓取 {DateTime.Now:yyyy-MM-dd HH:mm:ss}");
                var count = await _redis.ZCountAsync();

                if (count >= Proxy.POOL_UPPER_THRESHOLD)
                {
                    LoggerHelper.Write($"代理数据抓取 代理数已达到代理池数量界限{Proxy.POOL_UPPER_THRESHOLD} {DateTime.Now:yyyy-MM-dd HH:mm:ss}");
                    return;
                }
                var pageUrls = new List <(string, int)>
                {
                    ("http://www.ip3366.net/free/?stype=1&page=", 1),
                    ("http://www.kuaidaili.com/free/inha/", 2),
                    ("https://www.xicidaili.com/nn/", 3)
                };
                var proxyUrls = new List <(string, int)>();
                for (int i = 1; i < 5; i++)
                {
                    pageUrls.ForEach(p => proxyUrls.Add(($"{p.Item1}{i}", p.Item2)));
                }

                proxyUrls.Add(("http://www.66ip.cn/nmtq.php?getnum=100&isp=0&anonymoustype=2&start=&ports=&export=&ipaddress=&area=0&proxytype=0&api=66ip", 4));

                var web       = new HtmlWeb();
                var list_task = new List <Task <(HtmlDocument, int)> >();

                proxyUrls.ForEach(item =>
                {
                    var task = Task.Run(async() =>
                    {
                        if (item.Item2 == 3)
                        {
                            using var client = _httpClient.CreateClient();

                            client.DefaultRequestHeaders.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'");
                            client.DefaultRequestHeaders.Add("Cookie", "_free_proxy_session=BAh7B0kiD3Nlc3Npb25faWQGOgZFVEkiJWRjYzc5MmM1MTBiMDMzYTUzNTZjNzA4NjBhNWRjZjliBjsAVEkiEF9jc3JmX3Rva2VuBjsARkkiMUp6S2tXT3g5a0FCT01ndzlmWWZqRVJNek1WanRuUDBCbTJUN21GMTBKd3M9BjsARg%3D%3D--2a69429cb2115c6a0cc9a86e0ebe2800c0d471b3");
                            client.DefaultRequestHeaders.Add("Host", "www.xicidaili.com");
                            client.DefaultRequestHeaders.Add("Referer", "http://www.xicidaili.com/nn/3");
                            client.DefaultRequestHeaders.Add("Upgrade-Insecure-Requests", "1'");
                            client.DefaultRequestHeaders.Add("User-Agent", ProxyHelper.GetUserAgent());

                            var httpResponse = await client.GetAsync(item.Item1);
                            var obj          = await httpResponse.Content.ReadAsStringAsync();
                            HtmlDocument doc = new HtmlDocument();
                            doc.LoadHtml(obj);
                            return(doc, item.Item2);
                        }
                        else
                        {
                            return(await web.LoadFromWebAsync(item.Item1), item.Item2);
                        }
                    });
                    list_task.Add(task);
                });
                Task.WaitAll(list_task.ToArray());
                var proxys = new List <string>();
                foreach (var list in list_task)
                {
                    var item = await list;
                    if (string.IsNullOrWhiteSpace(item.Item1.Text))
                    {
                        continue;
                    }
                    if (item.Item2 == 4)
                    {
                        await Get66IpsAsync(item, proxys);
                    }
                    else
                    {
                        var table = item.Item2 == 3 ? "table" : "tbody";
                        try
                        {
                            var nodes = item.Item1.DocumentNode.SelectNodes($"//{table}/tr").ToList();
                            nodes.ForEach(async x =>
                            {
                                var tds = x.SelectNodes("td");
                                if (tds != null)
                                {
                                    for (int i = 0; i < tds.Count; i++)
                                    {
                                        var proxyStr = "";
                                        switch (item.Item2)
                                        {
                                        case 1:
                                        case 2:
                                            proxyStr = $"{tds[0].InnerText.Trim()}:{tds[1].InnerText.Trim()}";
                                            break;

                                        case 3:
                                            proxyStr = $"{tds[1].InnerText.Trim()}:{tds[2].InnerText.Trim()}";
                                            break;

                                        default:
                                            break;
                                        }
                                        var exist = await _redis.ZExistsAsync(proxyStr);
                                        var isip  = proxyStr.IsProxyIp();
                                        if (!exist && proxyStr.IsProxyIp())
                                        {
                                            proxys.Add(proxyStr);
                                        }
                                    }
                                }
                            });
                        }
                        catch (Exception ex)
                        {
                            LoggerHelper.Write(ex, $"代理数据抓取 {item.Item2} item={item.Item1.Text} 异常: {DateTime.Now:yyyy -MM-dd HH:mm:ss}");
                        }
                    }
                }
                if (proxys.Any())
                {
                    proxys.ForEach(p => _redis.ZAddAsync(p));
                }
                _ = EmailAsync(proxys.Count());

                LoggerHelper.Write($"代理数据抓取 本次抓取到{proxys.Count()}条数据,时间:{DateTime.Now:yyyy-MM-dd HH:mm:ss}.");
            }
            catch (Exception ex)
            {
                LoggerHelper.Write(ex, $"代理数据抓取 异常: {DateTime.Now:yyyy -MM-dd HH:mm:ss}");
            }
        }