示例#1
0
        internal async Task RunAsync()
        {
            //准备环境,创建目录
            Init();
            //获取总页数
            if (await ProcessTotalPageCountAsync() == false)
            {
                LogHelp.Log("=================================操作失败。。。", true);
                return;
            }

            //循环处理,
            for (int i = 0; i < _totalPage; i++)
            {
                LogHelp.Log("正在处理第{0}/{1}页。。。", i + 1, _totalPage, true);
                //处理当前分页数据
                await ProcessPageAsync(i);
            }
            for (int i = 0; i < _threadCount; i++)
            {
                _semaphore.WaitOne();
            }
            LogHelp.Log("处理完毕!。。。。", true);
            Program.isFinish = true;
            _isFinish        = true;
        }
示例#2
0
        private void ProcessAsync(string pageStr)
        {
            if (pageStr == null || pageStr.Length == 0 || pageStr.Contains("您无权进行当前操作,这可能因以下原因之一造成"))
            {
                return;
            }
            var startPos = pageStr.LastIndexOf(_startPart) + _startPart.Length;
            var endPos   = pageStr.IndexOf(_endPart, startPos);
            var dataArea = pageStr.Substring(startPos, endPos - startPos);

            var ms = _regA.Matches(dataArea);

            LogHelp.Log("本页一共{0}个链接需要判断。。。", ms.Count, true);
            for (int i = 0; i < ms.Count; i++)
            {
                var  item = ms[i];
                var  u = item.Groups[1].Value;
                var  name = item.Groups[2].Value;
                bool isnum = false; int num = 0;
                isnum = int.TryParse(name, out num);
                if (isnum || u.Contains(".php") || name.Contains("<"))
                {
                    //LogHelp.Log("进度:{0}/{1},不符合要求。。。", i, ms.Count);
                    continue;
                }
                LogHelp.Log("进度:{0}/{1},符合要求:{2}", i, ms.Count, item.Groups[1].Value, true);
                //获取详情页的信息
                _semaphore.WaitOne();
                Thread.Sleep(waitopt[_random.Next(1000) % 4]);
                ThreadPool.QueueUserWorkItem(ProcessDetailAsync, item);
                //ProcessDetailAsync(item);
            }
        }
示例#3
0
        private async Task <bool> ProcessTotalPageCountAsync()
        {
            if (_frontCount != null)
            {
                _totalPage = _frontCount.Value;
                LogHelp.Log("总分页数为:" + _totalPage);
                GeneratePageList();
                return(true);
            }
            _firstPageStr = await HttpHelp.GetPageStringAsync(_url);

            if (_firstPageStr == null || _firstPageStr.Length == 0)
            {
                return(false);
            }
            var m = _regTotalPage.Match(_firstPageStr);

            if (m.Groups.Count != 2)
            {
                return(false);
            }
            _totalPage = int.Parse(m.Groups[1].Value);
            LogHelp.Log("总分页数为:" + _totalPage);
            GeneratePageList();
            return(true);
        }
示例#4
0
 private void Init()
 {
     if (!File.Exists(_baseDir))
     {
         Directory.CreateDirectory(_baseDir);
         LogHelp.Log("目录创建完成:" + _baseDir);
     }
     LogHelp.Log("目录存在:" + _baseDir);
 }
示例#5
0
        public static string GetPageString(string url)
        {
            string res = null;
            int    t   = 0;

            do
            {
                t++;
                try
                {
                    var request = WebRequest.Create(url) as HttpWebRequest;
                    request.Method    = "GET";
                    request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36";
                    using (var response = request.GetResponse())
                    {
                        using (var resStream = response.GetResponseStream())
                        {
                            using (var sr = new StreamReader(resStream, Encoding.GetEncoding("GBK")))
                            {
                                res = sr.ReadToEnd();
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    if (t == 2)
                    {
                        LogHelp.Log("=================================页面获取失败:" + url, true);
                    }
                    res = null;
                }
            } while ((res == null || res.Length == 0) && t <= 2);
            if (res == null || res.Length == 0)
            {
                MyDbCOntextHelp.AddErroeProcess(new Data.ErroeProcess
                {
                    CreateTime = DateTime.Now,
                    Type       = 3,
                    Url        = url
                });
            }
            return(res);
        }
示例#6
0
        public static async Task <string> GetPageStringAsync(string url)
        {
            string res = null;
            int    t   = 0;

            do
            {
                t++;
                try
                {
                    var respMsg = await client.GetAsync(url);

                    using (var resStream = await respMsg.Content.ReadAsStreamAsync())
                    {
                        using (var sr = new StreamReader(resStream, Encoding.GetEncoding("GBK")))
                        {
                            res = await sr.ReadToEndAsync();
                        }
                    }
                }
                catch (Exception ex)
                {
                    if (t == 5)
                    {
                        LogHelp.Log("=================================页面获取失败:" + url, true);
                        LogHelp.Log("=================================" + ex.Message);
                        LogHelp.Log("=================================", true);
                    }
                    res = null;
                }
            } while ((res == null || res.Length == 0) && t <= 5);
            if (res == null || res.Length == 0)
            {
                MyDbCOntextHelp.AddErroeProcess(new Data.ErroeProcess
                {
                    CreateTime = DateTime.Now,
                    Type       = 3,
                    Url        = url
                });
            }
            return(res);
        }
示例#7
0
        private async void ProcessDetailAsync(object stat)
        {
            try
            {
                var       item      = stat as Match;
                MoviePage pageModel = null;
                string    movieName = ValidFileName(item.Groups[2].Value);
                var       movieUrl  = _uri.Scheme + "://" + _uri.Authority + "/bbs/" + item.Groups[1].Value;
                string    moviedir  = null;

                if (MyDbCOntextHelp.ExistMovie(movieUrl))
                {
                    pageModel = MyDbCOntextHelp.QueryMovie(m => m.Url == movieUrl);
                    moviedir  = Path.Combine(_baseDir, pageModel.Name);
                }
                else
                {
                    pageModel = new MoviePage
                    {
                        CreateTime = DateTime.Now,
                        UpdateTime = DateTime.Now,
                        Name       = movieName,
                        Url        = movieUrl,
                        IsHandler  = false,
                        Type       = _typeName,
                    };

                    LogHelp.Log("影片:" + pageModel.Name);
                    moviedir = Path.Combine(_baseDir, pageModel.Name);
                }
                if (!Directory.Exists(moviedir))
                {
                    Directory.CreateDirectory(moviedir);
                }
                if (pageModel.Id != 0 || MyDbCOntextHelp.AddPicturePage(pageModel))
                {
                    bool res = false;
                    var  detailPageString = await HttpHelp.GetPageStringAsync(pageModel.Url);

                    if (detailPageString == null || detailPageString.Length == 0 || detailPageString.Contains("您无权进行当前操作,这可能因以下原因之一造成"))
                    {
                        return;
                    }
                    //下载次数
                    var m = _regDownloadCount.Match(detailPageString);
                    if (m.Groups.Count == 2)
                    {
                        pageModel.DownloadCount = int.Parse(m.Groups[1].Value);
                    }
                    LogHelp.Log("影片下载次数:" + pageModel.DownloadCount);
                    //图片
                    var imgAreaStr = _regImgArea.Match(detailPageString).Value;

                    var mimgs   = _regImg.Matches(imgAreaStr);
                    var resList = new List <Resource>(mimgs.Count + 1);
                    for (int j = 0; j < mimgs.Count; j++)
                    {
                        var imgitem = mimgs[j];
                        var r       = new Resource
                        {
                            CreateTime    = DateTime.Now,
                            UpdateTime    = DateTime.Now,
                            PicturePageId = pageModel.Id,
                            IsHandler     = false,
                            Type          = 1,
                            Url           = imgitem.Groups[1].Value
                        };
                        if (!r.Url.StartsWith("http"))
                        {
                            r.Url = _uri.Scheme + "://" + _uri.Authority + "/bbs/" + r.Url;
                        }
                        resList.Add(r);
                    }
                    LogHelp.Log("截图{0}张.....bt文件一个", mimgs.Count);
                    //bt 检查重复
                    var btRes = new Resource
                    {
                        CreateTime    = DateTime.Now,
                        UpdateTime    = DateTime.Now,
                        PicturePageId = pageModel.Id,
                        Type          = 2,
                        IsHandler     = false,
                    };
                    var p1 = detailPageString.IndexOf("检查重复</a>");
                    if (p1 == -1)
                    {
                        return;
                    }
                    var startbt = detailPageString.IndexOf("<a href=\"", p1);
                    var endbt   = detailPageString.IndexOf("</a>", startbt);
                    var bta     = detailPageString.Substring(startbt, endbt - startbt + 4);
                    var mbt     = _regBt.Match(bta);
                    btRes.Name = mbt.Groups[2].Value;
                    btRes.Url  = _uri.Scheme + "://" + _uri.Authority + "/bbs/" + mbt.Groups[1].Value;
                    resList.Add(btRes);

                    bool isAllHandle = false;
                    MyDbCOntextHelp.AddResourceList(resList);
                    for (int i = 0; i < resList.Count - 1; i++)
                    {
                        res = await HttpHelp.DownloadImgAsync(resList[i].Url, moviedir);

                        if (res)
                        {
                        }
                    }
                    res = await HttpHelp.DownloadFileAsync(btRes.Url, Path.Combine(moviedir, btRes.Name));
                }
                LogHelp.Log("处理完毕:" + movieName);
                return;
            }
            finally
            {
                _semaphore.Release();
            }
        }
示例#8
0
        public static async Task <bool> DownloadImgAsync(string url, string path)
        {
            bool res = false;
            int  t   = 0;

            do
            {
                t++;
                try
                {
                    var fileName = Path.Combine(path, Path.GetFileName(url));//如果文件存在,则不需要再接受,直接返回
                    if (File.Exists(fileName) && new FileInfo(fileName).Length != 0)
                    {
                        return(true);
                    }
                    var respMsg = await client.GetAsync(url);

                    using (var resStream = await respMsg.Content.ReadAsStreamAsync())
                    {
                        //如果文件存在,则不需要再接受,直接返回
                        if (File.Exists(fileName) && new FileInfo(fileName).Length == respMsg.Content.Headers.ContentLength)
                        {
                            return(true);
                        }
                        using (var FileStream = new FileStream(fileName, FileMode.Create, FileAccess.Write, FileShare.ReadWrite, 4096, true))
                        {
                            byte[] buf  = new byte[1024 * 1024];
                            int    blen = 0;
                            do
                            {
                                blen = await resStream.ReadAsync(buf, 0, buf.Length);

                                if (blen != 0)
                                {
                                    await FileStream.WriteAsync(buf, 0, blen);
                                }
                            } while (blen != 0);
                        }
                        res = true;
                    }
                }
                catch (Exception ex)
                {
                    if (t == 2)
                    {
                        LogHelp.Log("=================================图片获取失败:" + url, true);
                    }
                    res = false;
                }
            } while (res == false && t <= 2);
            if (!res)
            {
                MyDbCOntextHelp.AddErroeProcess(new Data.ErroeProcess
                {
                    CreateTime = DateTime.Now,
                    Type       = 1,
                    Url        = url
                });
            }
            return(res);
        }
示例#9
0
        public static bool DownloadImg(string url, string path)
        {
            bool res = false;
            int  t   = 0;

            do
            {
                t++;
                try
                {
                    var fileName = Path.Combine(path, Path.GetFileName(url));//如果文件存在,则不需要再接受,直接返回
                    if (File.Exists(fileName) && new FileInfo(fileName).Length != 0)
                    {
                        return(true);
                    }
                    var request = WebRequest.Create(url) as HttpWebRequest;
                    request.Method    = "GET";
                    request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36";
                    using (var response = request.GetResponse())
                    {
                        using (var resStream = response.GetResponseStream())
                        {
                            //如果文件存在,则不需要再接受,直接返回
                            if (File.Exists(fileName) && new FileInfo(fileName).Length == response.ContentLength)
                            {
                                return(true);
                            }
                            using (var FileStream = new FileStream(fileName, FileMode.Create, FileAccess.Write))
                            {
                                byte[] buf  = new byte[8192];
                                int    blen = 0;
                                do
                                {
                                    blen = resStream.Read(buf, 0, buf.Length);
                                    if (blen != 0)
                                    {
                                        FileStream.Write(buf, 0, blen);
                                    }
                                } while (blen != 0);
                            }
                            res = true;
                        }
                    }
                }
                catch (Exception ex)
                {
                    if (t == 2)
                    {
                        LogHelp.Log("=================================图片获取失败:" + url, true);
                    }
                    res = false;
                }
            } while (res == false && t <= 2);
            if (!res)
            {
                MyDbCOntextHelp.AddErroeProcess(new Data.ErroeProcess
                {
                    CreateTime = DateTime.Now,
                    Type       = 1,
                    Url        = url
                });
            }
            return(res);
        }