Пример #1
0
        public void Collect()
        {
            string logpath = AppDomain.CurrentDomain.BaseDirectory;
            string dirPath = logpath + "data\\";

            if (!Directory.Exists(dirPath))
            {
                Directory.CreateDirectory(dirPath);
            }

            Comics = new List <Comic>();
            for (int curPage = startPage; curPage <= endPage; curPage++)
            {
                _syncContext.Post(OutLog, "分析页面 page:" + curPage);
                try
                {
                    int    bzIndex = 0;
                    string listUrl = _basePath + String.Format(collectorPath, curPage, bzType);
                    //string listResult = client.GetStringAsync(listUrl).Result;
                    string listResult = Http.GetHtml(listUrl);
                    Regex  rgx        = new Regex(@"<li class=""li gallary_item"">\s*?<div class=""pic_box"">\s*?<a href=""/photos-index-aid-(?<mgid>\d+).html""\s*title=""(?<title>.*?)""><img alt="".*?"" src=""(?<img>.*?)""");
                    foreach (Match mch in rgx.Matches(listResult))
                    {
                        Comic comic = new Comic();
                        bzIndex++;
                        string mgid  = mch.Groups["mgid"].Value;
                        string title = mch.Groups["title"].Value;
                        string img   = mch.Groups["img"].Value;
                        comic.Title = Utils.getFolderName(title);

                        string fileStr = dirPath + "\\" + comic.Title + ".wnacgdb";
                        if (File.Exists(fileStr))
                        {
                            _syncContext.Post(OutLog, "已解析.跳过 \r" + title + "");
                            continue;
                        }

                        comic.Id    = mgid;
                        comic.Cover = img;
                        string detailPage      = Http.GetHtml(_basePath + String.Format(detailPath, mgid));
                        string homePhotoId     = new Regex(@"<div class=""pic_box""><a href=""/photos-view-id-(\d*).html"">").Match(detailPage).Groups[1].Value;
                        string photoDetailPage = Http.GetHtml(_basePath + String.Format(photoPath, homePhotoId));

                        MatchCollection mats = new Regex(@"<option\s+value=""(\d+)"".*?>第(\d+)頁</option>").Matches(photoDetailPage);
                        foreach (Match m in mats)
                        {
                            comic.Contents.Add(int.Parse(m.Groups[2].Value), m.Groups[1].Value.Trim());
                        }

                        _syncContext.Post(OutLog, "提取 \r" + title + "");

                        //ExeLog.WriteLog("downloadUrl_zip.txt", dwUrl+"\\"+title+".zip\r\n");
                        //_syncContext.Post(AddDwList, dwUrl + "\\" + title + ".zip\r\n");

                        //ExeLog.WriteLog("downloadUrl_jpg.txt", _basePath + img + "\\" + title + ".jpg\r\n");

                        Comics.Add(comic);

                        FileStream   fs = null;
                        StreamWriter sw = null;
                        try
                        {
                            fs = new FileStream(fileStr, FileMode.Create, FileAccess.Write);//创建写入文件
                            sw = new StreamWriter(fs);
                            sw.WriteLine(comic.Id);
                            sw.WriteLine(comic.Cover);
                            foreach (int k in comic.Contents.Keys)
                            {
                                sw.WriteLine(k + "|" + comic.Contents[k]);
                            }
                        }
                        catch
                        {
                        }
                        finally
                        {
                            if (sw != null)
                            {
                                sw.Close();
                            }
                            if (fs != null)
                            {
                                fs.Close();
                            }
                        }

                        Thread.Sleep(100);
                    }//foreach
                    if (bzIndex != 12)
                    {
                        ExeLog.WriteLog("当前页面本子数量缺少:" + bzIndex + "/12\r\n页面:" + listUrl + "内容为:\r\n" + listResult);
                    }
                }
                catch (Exception e)
                {
                    _syncContext.Post(OutLog, "解析 page:" + curPage + "失败 \r" + e.Message + "");
                }
            } //for
            _syncContext.Post(OutLog, "解析完成");
        }     //method
Пример #2
0
        private void DownloadFun(object obj)
        {
            Random random      = new Random();
            string logpath     = AppDomain.CurrentDomain.BaseDirectory;
            string dirPath     = logpath + "download\\";
            string downloading = dirPath + "progress\\";
            string downloadok  = dirPath + "ok\\";

            if (!Directory.Exists(downloadok))
            {
                Directory.CreateDirectory(downloadok);
            }


            cw : while (comics.Count > 0)
            {
                Comic c = comics.Dequeue();

                string historyPath = dirPath + "history\\";
                if (!Directory.Exists(historyPath))
                {
                    Directory.CreateDirectory(historyPath);
                }
                if (File.Exists(historyPath + c.Title))
                {
                    _syncContext.Post(OutLog, "曾经下载过:" + c.Title + " 跳过\r\n");
                    continue;
                }

                string comicPath = downloading + c.Title + "\\";
                _syncContext.Post(DlTaskStart, c.Id + "|" + c.Title);

                //封面
                if (!HttpDownloadFile(qz + c.Cover, comicPath, Utils.parseNumName(0, 4)))
                {
                    _syncContext.Post(DlTaskSchedule, c.Id + "|封面下载失败");
                    goto cw;
                }
                int x = 1;
                foreach (int k in c.Contents.Keys)
                {
                    _syncContext.Post(DlTaskSchedule, c.Id + "|" + x + "/" + c.Contents.Count);
                    string pid       = c.Contents[k];
                    string photoPage = null;
                    try
                    {
                        photoPage = Http.GetHtml(_basePath + String.Format(photoPath, pid));
                    }
                    catch (Exception e)
                    {
                        _syncContext.Post(DlTaskSchedule, c.Id + "|第" + x + "页读取失败 e:" + e.Message);
                        goto cw;
                    }
                    string photoUrl = qz + new Regex(@"<img id=""picarea"" class=""photo"" alt="".*?"" src=""(.*?)"" />").Match(photoPage).Groups[1].Value.Trim();

                    if (!HttpDownloadFile(photoUrl, comicPath, Utils.parseNumName(k, 4)))
                    {
                        _syncContext.Post(DlTaskSchedule, c.Id + "|第" + x + "页下载失败");
                        goto cw;
                    }

                    _syncContext.Post(DlTaskSchedule, c.Id + "|" + x + "/" + c.Contents.Count);
                    x++;
                }//for
                _syncContext.Post(DlTaskSchedule, c.Id + "|压缩中...");


                if (ZipHelper.Zip(comicPath, downloadok + c.Title + ".zip"))
                {
                    Directory.Delete(comicPath, true);
                    File.Create(historyPath + c.Title).Close();
                    _syncContext.Post(DlTaskSchedule, c.Id + "|完成");
                }
                else
                {
                    _syncContext.Post(DlTaskSchedule, c.Id + "|zip压缩失败");
                }
            }//while comic


            _syncContext.Post(OutLog, "线程退出");
        }//method