Beispiel #1
0
        /// <summary>
        /// 调用分配捕捉到的资源的多线程来下载
        /// </summary>
        private void threadDownloadUris(object _hunter)
        {
            HunterDownloadThread hunterDownloadThread = (HunterDownloadThread)_hunter;
            DownloadInfo         downloadFile;

            while (isDownloadingUris)
            {
                Thread.Sleep(100);  //休眠,节约CPU资源
                while (uriQueue.Count > 0)
                {
                    hunterSwitch.WaitOne();
                    hDownload = new HunterDownload(this);

                    UriResource u;
                    lock (this)
                    {
                        if (uriQueue.Count > 0)
                        {
                            u = uriQueue.Dequeue();
                        }
                        else
                        {
                            break;
                        }
                    }

                    downloadFile = hDownload.DownloadFile(u, this, hunterDownloadThread);

                    if (downloadFile == null)   //如果没有下载这个文件
                    {
                        //什么也不做
                    }
                    else if (downloadFile.Remove) //如果没有入库成功,则删除这个下载到一半的文件
                    {
                        try
                        {
                            File.Delete(downloadFile.Filepath);
                        }
                        catch (Exception e)
                        {
                            mHunterConsole.WriteException(e);
                        }
                    }
                    else
                    {   //如果入库成功,则更新进度
                        projectInfo.strategy.RefreshProgress(downloadFile.Index, downloadFile.Keyword);
                    }
                }
            }

            //如果仅仅剩下此线程存活
            lock (this)
            {
                if (AliveHunterThreadsCount == 1)
                {
                    mHunterConsole.Done();
                }
            }
        }
Beispiel #2
0
        /// <summary>
        /// 下载指定uri中的所有文件。如果为null表示跳过下载,为Empty表示下载没有问题,为路径表示下载不成功。
        /// </summary>
        /// <param name="uri">文件资源定位</param>
        /// <returns>下载信息</returns>
        public DownloadInfo DownloadFile(UriResource uriRes, Hunter h, HunterDownloadThread thisThread)
        {
            Database db = null;

            if (h.projectInfo.DatabaseHelper != null)
            {
                db = h.projectInfo.DatabaseHelper.GetDatabaseInstance();
            }

            flowCalculator.Interval = 1000;
            flowCalculator.Elapsed += new ElapsedEventHandler(flowCalculator_Elapsed);
            try
            {
                HunterWebClient wc = new HunterWebClient();

                #region 判断文件是否重复
                bool isExist = false;
                //检查文件在本地是否重复
                isExist = database.LinkExists(uriRes.Url);
                //如果文件不重复,而又为网络模式,则要检查数据库内的内容

                if (!isExist && (h.projectInfo.CurrentMode == ProjectInfo.HunterMode.network))    //网络模式需要对比数据库和HunterXML
                {
                    if (db == null)
                    {
                        hunterConsole.WriteException(new Exception("数据库连接失败,使用本地模式判重:" + uriRes.Url));
                    }
                    else
                    {
                        bool OpenFailed = false;
                        try
                        {
                            db.DbOpen();
                        }
                        catch (Exception ex)
                        {
                            hunterConsole.WriteException(ex);
                        }
                        bool KRESULT = db.IsRecordExists("tb_file_infos", out isExist, new FieldValue("file_link", uriRes.Url.Replace("'", "\\'").Replace("\"", "\\\"")));
                        if (!KRESULT || OpenFailed)
                        {
                            //hunterConsole.WriteException(new Exceptions("数据库连接失败,使用本地模式判重:" + uriRes.Url));
                            isExist = false;
                        }
                        try
                        {
                            db.DbClose();
                        }
                        catch (Exception ex)
                        {
                            hunterConsole.WriteException(ex);
                        }
                    }
                }
                #endregion

                if (isExist)
                #region 链接重复对应的措施
                {
                    hunterConsole.ReportAbandonURI(uriRes, "链接重复");
                    return(null);
                }
                #endregion
                else
                {
                    #region  载部分
                    try
                    {
                        hunterConsole.WriteDownload("正在下载文件:" + uriRes.Url);
                        hunterConsole.WriteDownload(
                            "线程ID:" + Thread.CurrentThread.ManagedThreadId + Environment.NewLine +
                            "正在下载的文件:" + Environment.NewLine +
                            "下载地址:" + uriRes.Url + Environment.NewLine +
                            "下载的关键字:" + strategy.GetKeyword(uriRes.Keyword) + Environment.NewLine +
                            "下载的页面页码:" + uriRes.index);
                    }
                    catch (Exception ex)
                    {
                        hunterConsole.WriteException(ex);
                    }


                    wc.DownloadFileCompleted   += new System.ComponentModel.AsyncCompletedEventHandler(wc_DownloadFileCompleted); //绑定文件下载事件
                    wc.DownloadProgressChanged += new DownloadProgressChangedEventHandler(wc_DownloadProgressChanged);            //绑定下载进度改变事件

                    //临时文件命名
                    string filepath = HunterUtilities.GetFilenameFromUrl(pInfo, strategy, uriRes); //获取合适的文件名

                    int timeout = 0;

                    flowCalculator.Start(); //开始计算流量
                    receive1 = 0;           //最开始第一次获得的数据量为0

                    wc.DownloadKeyword     = strategy.GetKeyword(uriRes.Keyword);
                    wc.DownloadSource      = uriRes.Url;
                    wc.DownloadDestination = filepath;
                    if (!Directory.Exists(Path.GetDirectoryName(filepath)))
                    {
                        Directory.CreateDirectory(Path.GetDirectoryName(filepath));
                    }

                    wc.DownloadFileAsync(new Uri(uriRes.Url), filepath);    //开始下载

                    bool downloadProblem = false;
                    while (wc.IsBusy)
                    {
                        Thread.Sleep(1000);                                  //使用进程休眠
                        timeout++;
                        if (timeout >= pInfo.timeout || h.DownloadCancelled) //如果超时或下载取消
                        {
                            wc.CancelAsync();
                            wc.Dispose();

                            if (!downloadProblem)
                            {
                                hunterConsole.WriteDownload(
                                    "线程ID:" + Thread.CurrentThread.ManagedThreadId + Environment.NewLine +
                                    "下载超时,取消下载。");
                                try
                                {
                                    hunterConsole.ReportAbandonDownloadInfo(new DownloadInfo("", uriRes.Keyword, uriRes.index, uriRes.Url, "", true, "超时", hunterProject.projectInfo.strategy.GetKeyword(uriRes.Keyword))
                                                                            , "超时");
                                }
                                catch { }
                            }
                            downloadProblem = true;
                        }
                    }
                    flowCalculator.Stop();
                    receive1 = 0;
                    receive2 = 0;                                   //清空流量计
                    hunterConsole.outputSpeedInfo(DateTime.Now, 0); //流量计清零

                    wc.Dispose();
                    #endregion

                    if (!downloadProblem)
                    {   //如果没有下载问题
                        string MD5 = string.Empty;
                        try
                        {
                            MD5 = HunterUtilities.GetMD5Hash(filepath);
                        }
                        catch (Exception e)
                        {
                            hunterConsole.WriteException(new Exception("无法获取MD5。"));
                            hunterConsole.WriteException(e);
                        }

                        #region 文件MD5是否重复
                        bool isDuplicate; //记录文件是否重复
                        //判断是否与本地XML重复
                        isDuplicate = database.isDuplicate(uriRes.Url, MD5);
                        if (!isDuplicate && (h.projectInfo.CurrentMode == ProjectInfo.HunterMode.network))    //网络模式需要对比数据库和HunterXML
                        {
                            if (db == null)
                            {
                                hunterConsole.WriteException(new Exception("数据库连接失败,使用本地模式判重:" + MD5));
                            }
                            else
                            {
                                bool OpenFailed = false;
                                try
                                {
                                    db.DbOpen();
                                }
                                catch (Exception ex)
                                {
                                    hunterConsole.WriteException(ex);
                                }
                                bool KRESULT = db.IsFileExists("tb_file_infos", out isDuplicate, MD5);
                                if (!KRESULT || OpenFailed)
                                {
                                    hunterConsole.WriteException(new Exception("数据库连接失败,使用本地模式判重:" + MD5));
                                    isDuplicate = false;
                                }
                                try
                                {
                                    db.DbClose();
                                }
                                catch (Exception ex)
                                {
                                    hunterConsole.WriteException(ex);
                                }
                            }
                        }

                        #endregion

                        #region 文件重复、不重复对应的动作
                        if (!isDuplicate)
                        {  //检测是否重复。如果不重复则入库
                            wc.XMLFile = Path.Combine(hunterProject.projectInfo.filefolder, "$__" + Path.GetFileName(wc.DownloadDestination)) + ".xml";
                            try
                            {
                                HunterUtilities.WriteDownloadFileXML(wc.DownloadSource, wc.DownloadKeyword, Path.GetFileName(wc.DownloadDestination), (hunterProject.projectInfo.search_language == ProjectInfo.Language.none ? null : hunterProject.projectInfo.search_language.ToString()),
                                                                     wc.XMLFile);
                            }
                            catch (Exception ex)
                            {
                                hunterConsole.WriteException(ex);
                            }
                            database.addNewRecord(uriRes.Url, wc.DownloadKeyword, filepath, MD5);
                            DownloadInfo d = new DownloadInfo(filepath, uriRes.Keyword, uriRes.index, uriRes.Url, MD5, false, "已下载", hunterProject.projectInfo.strategy.GetKeyword(uriRes.Keyword));
                            //Network模式:自动上传样张
                            if (h.projectInfo.CurrentMode == ProjectInfo.HunterMode.network)
                            #region 网络模式上传样张
                            {
                                String t_md5            = HunterUtilities.GetMD5Hash(filepath); //获得文件MD5码
                                String filename         = t_md5 + "_" + Path.GetFileName(filepath);
                                String combinedPath     = Path.Combine(pInfo.share_remote_path, "cache", ProjectInfo.IP_ADDRESS + " (" + pInfo.name + ")");
                                String combinedFullPath = Path.Combine(combinedPath, filename);
                                if (!Directory.Exists(combinedPath))
                                {
                                    Directory.CreateDirectory(combinedPath);
                                }

                                bool      fileMoveSuccess = false;
                                const int maxMoveCount    = 5;
                                int       moveCount       = 0;

                                try
                                {
                                    if (File.Exists(wc.XMLFile))
                                    {
                                        File.Delete(Path.Combine(combinedPath, Path.GetFileName(wc.XMLFile)));
                                        File.Move(wc.XMLFile, Path.Combine(combinedPath, Path.GetFileName(wc.XMLFile)));
                                    }
                                }
                                catch (Exception e)
                                {
                                    hunterConsole.WriteException(e);
                                }

                                while (!fileMoveSuccess)
                                {
                                    try
                                    {
                                        if (moveCount > maxMoveCount)
                                        {
                                            break;
                                        }
                                        File.Move(filepath, combinedFullPath);
                                        fileMoveSuccess = true;
                                    }
                                    catch (Exception e)
                                    {
                                        moveCount++;
                                        hunterConsole.WriteException(e);
                                    }
                                }
                            }
                            #endregion

                            count++;
                            hunterConsole.outputDownloadedFileNum(DateTime.Now, count);
                            hunterConsole.ReportDownloadInfo(d);
                            return(d);
                        }
                        else
                        {
                            DownloadInfo d = new DownloadInfo(filepath, uriRes.Keyword,
                                                              uriRes.index, uriRes.Url, MD5, true, "MD5重复", hunterProject.projectInfo.strategy.GetKeyword(uriRes.Keyword));
                            hunterConsole.ReportAbandonDownloadInfo(d, "MD5重复");
                            return(d);   //删除文件
                        }
                        #endregion
                    }
                    return(new DownloadInfo(filepath, uriRes.Keyword, uriRes.index,
                                            uriRes.Url, null, true, "重复", hunterProject.projectInfo.strategy.GetKeyword(uriRes.Keyword))); //删除文件
                }
            }
            catch (Exception e)
            {
                //*此处预留错误处理
                hunterConsole.WriteException(e);
                return(null);
            }
        }
Beispiel #3
0
        public Hunter(HunterConsole oh, HunterConfig config, ProjectInfo _pj, HunterForm main)
        {
            try
            {
                MainForm       = main;
                Error          = false;
                mHunterConsole = oh;
                projectInfo    = _pj;
                mHunterConfig  = config;
                ProxyFetcher   = new HunterProxyFetcher(AvailableProxies);
                projectInfo    = ProjectInfo.LoadProject(_pj.mHunterConsole, _pj.projectPath, _pj.strategyPath, true);

                downloadThreadNum = int.Parse(projectInfo.threadnum);
                hunterThreads     = new HunterDownloadThread[downloadThreadNum];

                //获取代理的线程
                ProxyGetThreads = new Thread[downloadThreadNum];

                if (mHunterConfig.UseProxy == true)
                {
                    FileStream   fs = new FileStream("proxy.hip", FileMode.Open, FileAccess.Read);
                    StreamReader sr = new StreamReader(fs);
                    ProxyText = sr.ReadToEnd();
                    sr.Close();
                    fs.Close();
                    AllProxies = HunterProxy.GetProxy(ProxyText, mHunterConfig.ProxyFilterKeywords);
                }

                mHunterConsole.WriteMessage(projectInfo.ConfigInformation());
                mHunterConsole.WriteMessage("");
                mHunterConsole.WriteMessage(projectInfo.strategy.GetStrategyInformation());

                xmlDatabase = new XMLDatabase(projectInfo.database, mHunterConsole);
                xmlDatabase.openDatabase();

                try
                {
                    if (downloadThreadNum <= 0)
                    {
                        mHunterConsole.WriteMessage("配置错误:下载线程数不能小于0。");
                        return;
                    }
                }
                catch (Exception ex)
                {
                    mHunterConsole.WriteException(ex);
                    return;
                }

                for (int i = 0; i < hunterThreads.Length; i++)
                {
                    hunterThreads[i] = new HunterDownloadThread();
                    hunterThreads[i].downloadThread = new Thread(threadDownloadUris);
                }

                if (mHunterConfig.UseProxy)
                {
                    for (int i = 0; i < ProxyGetThreads.Length; i++)
                    {
                        ProxyGetThreads[i] = new Thread(GetAvaliableProxies);
                    }
                }

                if (projectInfo.strategy.Keywords.Count <= 0)
                {
                    projectInfo.mHunterConsole.WriteMessage("没有找到关键字,任务取消。");
                    Error = true;
                    return;
                }

                thHuntUris = new Thread(threadHuntUris);
                thHuntUris.SetApartmentState(ApartmentState.STA);

                hUri = new HunterUri(this);
                projectInfo.strategy.RecordFirstWord();

                mHunterConsole.WriteMessage("下载线程总数:" + hunterThreads.Length);
                mHunterConsole.WriteMessage("读取配置完毕。");
                mHunterConsole.WriteMessage("正在运行任务...");
            }
            catch (Exception e)
            {
                mHunterConsole.WriteException(e);
            }
        }