public void Collect() { string logpath = AppDomain.CurrentDomain.BaseDirectory; string dirPath = logpath + "data\\"; if (!Directory.Exists(dirPath)) { Directory.CreateDirectory(dirPath); } Comics = new List <Comic>(); for (int curPage = startPage; curPage <= endPage; curPage++) { _syncContext.Post(OutLog, "分析页面 page:" + curPage); try { int bzIndex = 0; string listUrl = _basePath + String.Format(collectorPath, curPage, bzType); //string listResult = client.GetStringAsync(listUrl).Result; string listResult = Http.GetHtml(listUrl); //<a href="/photos-index-aid-(?<mgid>\d+).html"\s*title="(?<title>.*?)"><img alt=".*?"\s*src="(?<img>.*?)" Regex rgx = new Regex(@"<a href=""/photos-index-aid-(?<mgid>\d+).html""\s*title=""(?<title>.*?)""><img alt="".*?""\s*src=""(?<img>.*?)"""); foreach (Match mch in rgx.Matches(listResult)) { Comic comic = new Comic(); bzIndex++; string mgid = mch.Groups["mgid"].Value; string title = mch.Groups["title"].Value; string img = mch.Groups["img"].Value; comic.Title = Utils.getFolderName(title); string fileStr = dirPath + "\\" + comic.Title + ".wnacgdb"; if (File.Exists(fileStr)) { _syncContext.Post(OutLog, "已解析.跳过 \r" + title + ""); continue; } comic.Id = mgid; comic.Cover = img; string detailPage = Http.GetHtml(_basePath + String.Format(detailPath, mgid)); string homePhotoId = new Regex(@"<div class=""pic_box tb""><a href=""/photos-view-id-(\d*).html"">").Match(detailPage).Groups[1].Value; if (String.IsNullOrEmpty(homePhotoId)) { _syncContext.Post(OutLog, "解析失败!!!!!!无法获取图片ID.跳过 \r" + title + ""); continue; } string photoDetailPage = Http.GetHtml(_basePath + String.Format(photoPath, homePhotoId)); MatchCollection mats = new Regex(@"<option\s+value=""(\d+)"".*?>第(\d+)頁</option>").Matches(photoDetailPage); foreach (Match m in mats) { comic.Contents.Add(int.Parse(m.Groups[2].Value), m.Groups[1].Value.Trim()); } if (comic.Contents.Count < 8) { _syncContext.Post(OutLog, "解析失败!!!!!!!!图片列表解析失败.跳过 \r" + title + ""); continue; } _syncContext.Post(OutLog, "提取 \r" + title + ""); //ExeLog.WriteLog("downloadUrl_zip.txt", dwUrl+"\\"+title+".zip\r\n"); //_syncContext.Post(AddDwList, dwUrl + "\\" + title + ".zip\r\n"); //ExeLog.WriteLog("downloadUrl_jpg.txt", _basePath + img + "\\" + title + ".jpg\r\n"); Comics.Add(comic); FileStream fs = null; StreamWriter sw = null; try { fs = new FileStream(fileStr, FileMode.Create, FileAccess.Write);//创建写入文件 sw = new StreamWriter(fs); sw.WriteLine(comic.Id); sw.WriteLine(comic.Cover); foreach (int k in comic.Contents.Keys) { sw.WriteLine(k + "|" + comic.Contents[k]); } } catch { } finally { if (sw != null) { sw.Close(); } if (fs != null) { fs.Close(); } } Thread.Sleep(100); }//foreach if (bzIndex != 12) { ExeLog.WriteLog("当前页面本子数量缺少:" + bzIndex + "/12\r\n页面:" + listUrl + "内容为:\r\n" + listResult); } } catch (Exception e) { _syncContext.Post(OutLog, "解析 page:" + curPage + "失败 \r" + e.Message + ""); } } //for _syncContext.Post(OutLog, "解析完成"); } //method
private void DownloadFun(object obj) { Random random = new Random(); string logpath = AppDomain.CurrentDomain.BaseDirectory; string dirPath = logpath + "download\\"; string downloading = dirPath + "progress\\"; string downloadok = dirPath + "ok\\"; if (!Directory.Exists(downloadok)) { Directory.CreateDirectory(downloadok); } cw : while (comics.Count > 0) { Comic c = comics.Dequeue(); string historyPath = dirPath + "history\\"; if (!Directory.Exists(historyPath)) { Directory.CreateDirectory(historyPath); } if (File.Exists(historyPath + c.Title)) { _syncContext.Post(OutLog, "曾经下载过:" + c.Title + " 跳过\r\n"); continue; } string comicPath = downloading + c.Title + "\\"; _syncContext.Post(DlTaskStart, c.Id + "|" + c.Title); //封面 if (!HttpDownloadFile(qz + c.Cover, comicPath, Utils.parseNumName(0, 4))) { _syncContext.Post(DlTaskSchedule, c.Id + "|封面下载失败"); ExeLog.WriteLog("[" + c.Title + "]封面下载失败\r\n" + "(" + (qz + c.Cover) + ")\r\n"); goto cw; } int x = 1; foreach (int k in c.Contents.Keys) { _syncContext.Post(DlTaskSchedule, c.Id + "|" + x + "/" + c.Contents.Count); string pid = c.Contents[k]; string photoPage = null; try { photoPage = Http.GetHtml(_basePath + String.Format(photoPath, pid)); } catch (Exception e) { _syncContext.Post(DlTaskSchedule, c.Id + "|第" + x + "页读取失败 e:" + e.Message); ExeLog.WriteLog("[" + c.Title + "]第" + x + "页读取失败\r\n" + "(" + _basePath + String.Format(photoPath, pid) + ")\r\n"); goto cw; } string photoUrl = qz + new Regex(@"<img id=""picarea"" class=""photo"" alt="".*?"" src=""(.*?)"" />").Match(photoPage).Groups[1].Value.Trim(); if (!HttpDownloadFile(photoUrl, comicPath, Utils.parseNumName(k, 4))) { _syncContext.Post(DlTaskSchedule, c.Id + "|第" + x + "页下载失败"); ExeLog.WriteLog("[" + c.Title + "]第" + x + "页下载失败\r\n" + "(" + photoUrl + ")\r\n"); goto cw; } FileInfo fileInfo = new FileInfo(comicPath + Utils.parseNumName(k, 4) + Utils.getPhotoExt(photoUrl)); if (!fileInfo.Exists || fileInfo.Length <= 100) { _syncContext.Post(DlTaskSchedule, c.Id + "|第" + x + "页下载失败"); ExeLog.WriteLog("[" + c.Title + "]第" + x + "页下载失败\r\n" + "(" + photoUrl + ")\r\n"); goto cw; } _syncContext.Post(DlTaskSchedule, c.Id + "|" + x + "/" + c.Contents.Count); x++; }//for _syncContext.Post(DlTaskSchedule, c.Id + "|压缩中..."); if (ZipHelper.Zip(comicPath, downloadok + c.Title + ".zip")) { Directory.Delete(comicPath, true); File.Create(historyPath + c.Title).Close(); _syncContext.Post(DlTaskSchedule, c.Id + "|完成"); } else { _syncContext.Post(DlTaskSchedule, c.Id + "|zip压缩失败"); ExeLog.WriteLog("[" + c.Title + "]zip压缩失败\r\n"); } }//while comic _syncContext.Post(OutLog, "线程退出"); }//method