public InvokeResult <ImageRepoUploadResult> Upload(ImageRepoUploadArg arg) { InvokeResult <ImageRepoUploadResult> invokeResult = new InvokeResult <ImageRepoUploadResult>() { success = false, message = "请求无返回" }; ImageRepoUploadResult apiResult = new ImageRepoUploadResult() { Api = GetApiCode(), ApiChannel = GetApiCode() }; //var response = string.Empty; //using (HttpClient client = new HttpClient()) //{ // var content = new MultipartFormDataContent(); // content.Add(new ByteArrayContent(System.IO.File.ReadAllBytes(arg.FullFilePath)), "image", arg.ExtraArgs["uploadFileFormName"].ToString()); // content.Headers.Add("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:65.0) Gecko/20100101 Firefox/65.0"); // response = client.PostAsync("https://prntscr.com/upload.php", content).Result.Content.ReadAsStringAsync().Result; //} //执行上传 var response = RequestUtil.UploadFile("https://prntscr.com/upload.php", arg.FullFilePath, "image", arg.ExtraArgs["uploadFileFormName"].ToString(), null); if (string.IsNullOrEmpty(response)) { return(invokeResult); } //转换为json对象 var resultModel = (JObject)JsonConvert.DeserializeObject(response); var status = Convert.ToString(resultModel["status"]); if (!status.Equals("success")) { invokeResult.message = Convert.ToString(resultModel["msg"]); return(invokeResult); } var ImgAddress = resultModel["data"].ToString(); var crawlResult = RequestUtil.CrawlContentFromWebsit(ImgAddress, null); //设置匹配规则 Match mstr = Regex.Match(crawlResult.data, "(?m)<meta property=\"og: image\" content=\"(.*?)\"/>"); if (!mstr.Success) { invokeResult.message = "数据抓取失败"; return(invokeResult); } //开始逐行爬取IP while (mstr.Success) { apiResult.ImgUrl = mstr.Groups[0].Value + mstr.Groups[1].Value; break; } invokeResult.success = true; invokeResult.data = apiResult; return(invokeResult); }
/// <summary> /// 校验代理地址是否有效 /// </summary> /// <param name="proxyAddress"></param> /// <returns></returns> private bool IsProxyValid(string proxyAddress) { if (string.IsNullOrEmpty(proxyAddress)) { return(false); } var crawlResult = RequestUtil.CrawlContentFromWebsit(PROXY_TEST_URL, proxyAddress, 5000); return(PROXY_TEST_RESPONSE.Equals(crawlResult.data) ? true : false); }
/// <summary> /// 爬取IP /// </summary> private static void StartCrawlIP(object state) { mTaskId++; var taskName = "\n[ 抓取IP任务 ]"; var crawRules = GetCrawlRules(); //遍历所有规则 crawRules.ForEach(item => { //对每一条规则单开一个线程 Task.Run(() => { int threadTaskId = mTaskId; //设置爬取的页数, 以第一页为当前页 for (var currentPage = 1; currentPage <= item.MaxPage; currentPage++) { if (threadTaskId != mTaskId) { return; } var curTaskDesc = string.Format("{0} 规则 -> {1} ", taskName, item.Name); //请求目标地址, 获取目标地址HTML var crawlResult = RequestUtil.CrawlContentFromWebsit(string.Format(item.Url, currentPage), null); if (!crawlResult.success) { Console.WriteLine(curTaskDesc + "页面抓取失败"); continue; } //设置匹配规则 Match mstr = Regex.Match(crawlResult.data, item.Partten); Console.WriteLine(curTaskDesc + (mstr.Success ? "匹配成功" : "匹配失败")); //开始逐行爬取IP while (mstr.Success) { if (threadTaskId != mTaskId) { return; } var proxyAddress = mstr.Groups[1].Value + ":" + mstr.Groups[2].Value; mstr = mstr.NextMatch(); if (IsProxyValid(proxyAddress)) { SaveProxyToDB(proxyAddress); } } } }); }); }
/// <summary> /// 校验代理地址是否有效 /// </summary> /// <param name="proxyAddress"></param> /// <returns></returns> public static bool IsProxyValid(string proxyAddress) { var crawlResult = RequestUtil.CrawlContentFromWebsit(PROXY_TEST_URL, proxyAddress, 5000); return(PROXY_TEST_RESPONSE.Equals(crawlResult.data) ? true : false); }