internal async Task RunAsync() { //准备环境,创建目录 Init(); //获取总页数 if (await ProcessTotalPageCountAsync() == false) { LogHelp.Log("=================================操作失败。。。", true); return; } //循环处理, for (int i = 0; i < _totalPage; i++) { LogHelp.Log("正在处理第{0}/{1}页。。。", i + 1, _totalPage, true); //处理当前分页数据 await ProcessPageAsync(i); } for (int i = 0; i < _threadCount; i++) { _semaphore.WaitOne(); } LogHelp.Log("处理完毕!。。。。", true); Program.isFinish = true; _isFinish = true; }
private void ProcessAsync(string pageStr) { if (pageStr == null || pageStr.Length == 0 || pageStr.Contains("您无权进行当前操作,这可能因以下原因之一造成")) { return; } var startPos = pageStr.LastIndexOf(_startPart) + _startPart.Length; var endPos = pageStr.IndexOf(_endPart, startPos); var dataArea = pageStr.Substring(startPos, endPos - startPos); var ms = _regA.Matches(dataArea); LogHelp.Log("本页一共{0}个链接需要判断。。。", ms.Count, true); for (int i = 0; i < ms.Count; i++) { var item = ms[i]; var u = item.Groups[1].Value; var name = item.Groups[2].Value; bool isnum = false; int num = 0; isnum = int.TryParse(name, out num); if (isnum || u.Contains(".php") || name.Contains("<")) { //LogHelp.Log("进度:{0}/{1},不符合要求。。。", i, ms.Count); continue; } LogHelp.Log("进度:{0}/{1},符合要求:{2}", i, ms.Count, item.Groups[1].Value, true); //获取详情页的信息 _semaphore.WaitOne(); Thread.Sleep(waitopt[_random.Next(1000) % 4]); ThreadPool.QueueUserWorkItem(ProcessDetailAsync, item); //ProcessDetailAsync(item); } }
private async Task <bool> ProcessTotalPageCountAsync() { if (_frontCount != null) { _totalPage = _frontCount.Value; LogHelp.Log("总分页数为:" + _totalPage); GeneratePageList(); return(true); } _firstPageStr = await HttpHelp.GetPageStringAsync(_url); if (_firstPageStr == null || _firstPageStr.Length == 0) { return(false); } var m = _regTotalPage.Match(_firstPageStr); if (m.Groups.Count != 2) { return(false); } _totalPage = int.Parse(m.Groups[1].Value); LogHelp.Log("总分页数为:" + _totalPage); GeneratePageList(); return(true); }
private void Init() { if (!File.Exists(_baseDir)) { Directory.CreateDirectory(_baseDir); LogHelp.Log("目录创建完成:" + _baseDir); } LogHelp.Log("目录存在:" + _baseDir); }
public static string GetPageString(string url) { string res = null; int t = 0; do { t++; try { var request = WebRequest.Create(url) as HttpWebRequest; request.Method = "GET"; request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"; using (var response = request.GetResponse()) { using (var resStream = response.GetResponseStream()) { using (var sr = new StreamReader(resStream, Encoding.GetEncoding("GBK"))) { res = sr.ReadToEnd(); } } } } catch (Exception ex) { if (t == 2) { LogHelp.Log("=================================页面获取失败:" + url, true); } res = null; } } while ((res == null || res.Length == 0) && t <= 2); if (res == null || res.Length == 0) { MyDbCOntextHelp.AddErroeProcess(new Data.ErroeProcess { CreateTime = DateTime.Now, Type = 3, Url = url }); } return(res); }
public static async Task <string> GetPageStringAsync(string url) { string res = null; int t = 0; do { t++; try { var respMsg = await client.GetAsync(url); using (var resStream = await respMsg.Content.ReadAsStreamAsync()) { using (var sr = new StreamReader(resStream, Encoding.GetEncoding("GBK"))) { res = await sr.ReadToEndAsync(); } } } catch (Exception ex) { if (t == 5) { LogHelp.Log("=================================页面获取失败:" + url, true); LogHelp.Log("=================================" + ex.Message); LogHelp.Log("=================================", true); } res = null; } } while ((res == null || res.Length == 0) && t <= 5); if (res == null || res.Length == 0) { MyDbCOntextHelp.AddErroeProcess(new Data.ErroeProcess { CreateTime = DateTime.Now, Type = 3, Url = url }); } return(res); }
private async void ProcessDetailAsync(object stat) { try { var item = stat as Match; MoviePage pageModel = null; string movieName = ValidFileName(item.Groups[2].Value); var movieUrl = _uri.Scheme + "://" + _uri.Authority + "/bbs/" + item.Groups[1].Value; string moviedir = null; if (MyDbCOntextHelp.ExistMovie(movieUrl)) { pageModel = MyDbCOntextHelp.QueryMovie(m => m.Url == movieUrl); moviedir = Path.Combine(_baseDir, pageModel.Name); } else { pageModel = new MoviePage { CreateTime = DateTime.Now, UpdateTime = DateTime.Now, Name = movieName, Url = movieUrl, IsHandler = false, Type = _typeName, }; LogHelp.Log("影片:" + pageModel.Name); moviedir = Path.Combine(_baseDir, pageModel.Name); } if (!Directory.Exists(moviedir)) { Directory.CreateDirectory(moviedir); } if (pageModel.Id != 0 || MyDbCOntextHelp.AddPicturePage(pageModel)) { bool res = false; var detailPageString = await HttpHelp.GetPageStringAsync(pageModel.Url); if (detailPageString == null || detailPageString.Length == 0 || detailPageString.Contains("您无权进行当前操作,这可能因以下原因之一造成")) { return; } //下载次数 var m = _regDownloadCount.Match(detailPageString); if (m.Groups.Count == 2) { pageModel.DownloadCount = int.Parse(m.Groups[1].Value); } LogHelp.Log("影片下载次数:" + pageModel.DownloadCount); //图片 var imgAreaStr = _regImgArea.Match(detailPageString).Value; var mimgs = _regImg.Matches(imgAreaStr); var resList = new List <Resource>(mimgs.Count + 1); for (int j = 0; j < mimgs.Count; j++) { var imgitem = mimgs[j]; var r = new Resource { CreateTime = DateTime.Now, UpdateTime = DateTime.Now, PicturePageId = pageModel.Id, IsHandler = false, Type = 1, Url = imgitem.Groups[1].Value }; if (!r.Url.StartsWith("http")) { r.Url = _uri.Scheme + "://" + _uri.Authority + "/bbs/" + r.Url; } resList.Add(r); } LogHelp.Log("截图{0}张.....bt文件一个", mimgs.Count); //bt 检查重复 var btRes = new Resource { CreateTime = DateTime.Now, UpdateTime = DateTime.Now, PicturePageId = pageModel.Id, Type = 2, IsHandler = false, }; var p1 = detailPageString.IndexOf("检查重复</a>"); if (p1 == -1) { return; } var startbt = detailPageString.IndexOf("<a href=\"", p1); var endbt = detailPageString.IndexOf("</a>", startbt); var bta = detailPageString.Substring(startbt, endbt - startbt + 4); var mbt = _regBt.Match(bta); btRes.Name = mbt.Groups[2].Value; btRes.Url = _uri.Scheme + "://" + _uri.Authority + "/bbs/" + mbt.Groups[1].Value; resList.Add(btRes); bool isAllHandle = false; MyDbCOntextHelp.AddResourceList(resList); for (int i = 0; i < resList.Count - 1; i++) { res = await HttpHelp.DownloadImgAsync(resList[i].Url, moviedir); if (res) { } } res = await HttpHelp.DownloadFileAsync(btRes.Url, Path.Combine(moviedir, btRes.Name)); } LogHelp.Log("处理完毕:" + movieName); return; } finally { _semaphore.Release(); } }
public static async Task <bool> DownloadImgAsync(string url, string path) { bool res = false; int t = 0; do { t++; try { var fileName = Path.Combine(path, Path.GetFileName(url));//如果文件存在,则不需要再接受,直接返回 if (File.Exists(fileName) && new FileInfo(fileName).Length != 0) { return(true); } var respMsg = await client.GetAsync(url); using (var resStream = await respMsg.Content.ReadAsStreamAsync()) { //如果文件存在,则不需要再接受,直接返回 if (File.Exists(fileName) && new FileInfo(fileName).Length == respMsg.Content.Headers.ContentLength) { return(true); } using (var FileStream = new FileStream(fileName, FileMode.Create, FileAccess.Write, FileShare.ReadWrite, 4096, true)) { byte[] buf = new byte[1024 * 1024]; int blen = 0; do { blen = await resStream.ReadAsync(buf, 0, buf.Length); if (blen != 0) { await FileStream.WriteAsync(buf, 0, blen); } } while (blen != 0); } res = true; } } catch (Exception ex) { if (t == 2) { LogHelp.Log("=================================图片获取失败:" + url, true); } res = false; } } while (res == false && t <= 2); if (!res) { MyDbCOntextHelp.AddErroeProcess(new Data.ErroeProcess { CreateTime = DateTime.Now, Type = 1, Url = url }); } return(res); }
public static bool DownloadImg(string url, string path) { bool res = false; int t = 0; do { t++; try { var fileName = Path.Combine(path, Path.GetFileName(url));//如果文件存在,则不需要再接受,直接返回 if (File.Exists(fileName) && new FileInfo(fileName).Length != 0) { return(true); } var request = WebRequest.Create(url) as HttpWebRequest; request.Method = "GET"; request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"; using (var response = request.GetResponse()) { using (var resStream = response.GetResponseStream()) { //如果文件存在,则不需要再接受,直接返回 if (File.Exists(fileName) && new FileInfo(fileName).Length == response.ContentLength) { return(true); } using (var FileStream = new FileStream(fileName, FileMode.Create, FileAccess.Write)) { byte[] buf = new byte[8192]; int blen = 0; do { blen = resStream.Read(buf, 0, buf.Length); if (blen != 0) { FileStream.Write(buf, 0, blen); } } while (blen != 0); } res = true; } } } catch (Exception ex) { if (t == 2) { LogHelp.Log("=================================图片获取失败:" + url, true); } res = false; } } while (res == false && t <= 2); if (!res) { MyDbCOntextHelp.AddErroeProcess(new Data.ErroeProcess { CreateTime = DateTime.Now, Type = 1, Url = url }); } return(res); }